Files
elna/source/Lexer.mod

862 lines
40 KiB
Modula-2

IMPLEMENTATION MODULE Lexer;
FROM FIO IMPORT ReadNBytes;
FROM SYSTEM IMPORT ADR, TSIZE;
FROM DynamicStrings IMPORT String, InitStringCharStar, KillString;
FROM StringConvert IMPORT StringToInteger;
FROM Storage IMPORT DEALLOCATE, ALLOCATE;
FROM Strings IMPORT Length;
FROM MemUtils IMPORT MemCopy, MemZero;
FROM StrCase IMPORT Lower;
CONST
CHUNK_SIZE = 85536;
TYPE
(*
* Classification table assigns each possible character to a group (class). All
* characters of the same group a handled equivalently.
*
* Classification:
*)
TransitionClass = (
transitionClassInvalid,
transitionClassDigit,
transitionClassAlpha,
transitionClassSpace,
transitionClassColon,
transitionClassEquals,
transitionClassLeftParen,
transitionClassRightParen,
transitionClassAsterisk,
transitionClassUnderscore,
transitionClassSingle,
transitionClassHex,
transitionClassZero,
transitionClassX,
transitionClassEof,
transitionClassDot,
transitionClassMinus,
transitionClassSingleQuote,
transitionClassDoubleQuote,
transitionClassGreater,
transitionClassLess,
transitionClassOther
);
TransitionState = (
transitionStateStart,
transitionStateColon,
transitionStateIdentifier,
transitionStateDecimal,
transitionStateGreater,
transitionStateMinus,
transitionStateLeftParen,
transitionStateLess,
transitionStateDot,
transitionStateComment,
transitionStateClosingComment,
transitionStateCharacter,
transitionStateString,
transitionStateLeadingZero,
transitionStateDecimalSuffix,
transitionStateEnd
);
TransitionAction = PROCEDURE(PLexer, PLexerToken);
Transition = RECORD
action: TransitionAction;
next_state: TransitionState
END;
TransitionClasses = ARRAY[1..22] OF Transition;
VAR
classification: ARRAY[1..128] OF TransitionClass;
transitions: ARRAY[1..16] OF TransitionClasses;
PROCEDURE initialize_classification();
VAR
i: CARDINAL;
BEGIN
classification[1] := transitionClassEof; (* NUL *)
classification[2] := transitionClassInvalid; (* SOH *)
classification[3] := transitionClassInvalid; (* STX *)
classification[4] := transitionClassInvalid; (* ETX *)
classification[5] := transitionClassInvalid; (* EOT *)
classification[6] := transitionClassInvalid; (* EMQ *)
classification[7] := transitionClassInvalid; (* ACK *)
classification[8] := transitionClassInvalid; (* BEL *)
classification[9] := transitionClassInvalid; (* BS *)
classification[10] := transitionClassSpace; (* HT *)
classification[11] := transitionClassSpace; (* LF *)
classification[12] := transitionClassInvalid; (* VT *)
classification[13] := transitionClassInvalid; (* FF *)
classification[14] := transitionClassSpace; (* CR *)
classification[15] := transitionClassInvalid; (* SO *)
classification[16] := transitionClassInvalid; (* SI *)
classification[17] := transitionClassInvalid; (* DLE *)
classification[18] := transitionClassInvalid; (* DC1 *)
classification[19] := transitionClassInvalid; (* DC2 *)
classification[20] := transitionClassInvalid; (* DC3 *)
classification[21] := transitionClassInvalid; (* DC4 *)
classification[22] := transitionClassInvalid; (* NAK *)
classification[23] := transitionClassInvalid; (* SYN *)
classification[24] := transitionClassInvalid; (* ETB *)
classification[25] := transitionClassInvalid; (* CAN *)
classification[26] := transitionClassInvalid; (* EM *)
classification[27] := transitionClassInvalid; (* SUB *)
classification[28] := transitionClassInvalid; (* ESC *)
classification[29] := transitionClassInvalid; (* FS *)
classification[30] := transitionClassInvalid; (* GS *)
classification[31] := transitionClassInvalid; (* RS *)
classification[32] := transitionClassInvalid; (* US *)
classification[33] := transitionClassSpace; (* Space *)
classification[34] := transitionClassSingle; (* ! *)
classification[35] := transitionClassDoubleQuote; (* " *)
classification[36] := transitionClassOther; (* # *)
classification[37] := transitionClassOther; (* $ *)
classification[38] := transitionClassSingle; (* % *)
classification[39] := transitionClassSingle; (* AND *)
classification[40] := transitionClassSingleQuote; (* ' *)
classification[41] := transitionClassLeftParen; (* ( *)
classification[42] := transitionClassRightParen; (* ) *)
classification[43] := transitionClassAsterisk; (* * *)
classification[44] := transitionClassSingle; (* + *)
classification[45] := transitionClassSingle; (* , *)
classification[46] := transitionClassMinus; (* - *)
classification[47] := transitionClassDot; (* . *)
classification[48] := transitionClassSingle; (* / *)
classification[49] := transitionClassZero; (* 0 *)
classification[50] := transitionClassDigit; (* 1 *)
classification[51] := transitionClassDigit; (* 2 *)
classification[52] := transitionClassDigit; (* 3 *)
classification[53] := transitionClassDigit; (* 4 *)
classification[54] := transitionClassDigit; (* 5 *)
classification[55] := transitionClassDigit; (* 6 *)
classification[56] := transitionClassDigit; (* 7 *)
classification[57] := transitionClassDigit; (* 8 *)
classification[58] := transitionClassDigit; (* 9 *)
classification[59] := transitionClassColon; (* : *)
classification[60] := transitionClassSingle; (* ; *)
classification[61] := transitionClassLess; (* < *)
classification[62] := transitionClassEquals; (* = *)
classification[63] := transitionClassGreater; (* > *)
classification[64] := transitionClassOther; (* ? *)
classification[65] := transitionClassSingle; (* @ *)
classification[66] := transitionClassAlpha; (* A *)
classification[67] := transitionClassAlpha; (* B *)
classification[68] := transitionClassAlpha; (* C *)
classification[69] := transitionClassAlpha; (* D *)
classification[70] := transitionClassAlpha; (* E *)
classification[71] := transitionClassAlpha; (* F *)
classification[72] := transitionClassAlpha; (* G *)
classification[73] := transitionClassAlpha; (* H *)
classification[74] := transitionClassAlpha; (* I *)
classification[75] := transitionClassAlpha; (* J *)
classification[76] := transitionClassAlpha; (* K *)
classification[77] := transitionClassAlpha; (* L *)
classification[78] := transitionClassAlpha; (* M *)
classification[79] := transitionClassAlpha; (* N *)
classification[80] := transitionClassAlpha; (* O *)
classification[81] := transitionClassAlpha; (* P *)
classification[82] := transitionClassAlpha; (* Q *)
classification[83] := transitionClassAlpha; (* R *)
classification[84] := transitionClassAlpha; (* S *)
classification[85] := transitionClassAlpha; (* T *)
classification[86] := transitionClassAlpha; (* U *)
classification[87] := transitionClassAlpha; (* V *)
classification[88] := transitionClassAlpha; (* W *)
classification[89] := transitionClassAlpha; (* X *)
classification[90] := transitionClassAlpha; (* Y *)
classification[91] := transitionClassAlpha; (* Z *)
classification[92] := transitionClassSingle; (* [ *)
classification[93] := transitionClassOther; (* \ *)
classification[94] := transitionClassSingle; (* ] *)
classification[95] := transitionClassSingle; (* ^ *)
classification[96] := transitionClassUnderscore; (* _ *)
classification[97] := transitionClassOther; (* ` *)
classification[98] := transitionClassHex; (* a *)
classification[99] := transitionClassHex; (* b *)
classification[100] := transitionClassHex; (* c *)
classification[101] := transitionClassHex; (* d *)
classification[102] := transitionClassHex; (* e *)
classification[103] := transitionClassHex; (* f *)
classification[104] := transitionClassAlpha; (* g *)
classification[105] := transitionClassAlpha; (* h *)
classification[106] := transitionClassAlpha; (* i *)
classification[107] := transitionClassAlpha; (* j *)
classification[108] := transitionClassAlpha; (* k *)
classification[109] := transitionClassAlpha; (* l *)
classification[110] := transitionClassAlpha; (* m *)
classification[111] := transitionClassAlpha; (* n *)
classification[112] := transitionClassAlpha; (* o *)
classification[113] := transitionClassAlpha; (* p *)
classification[114] := transitionClassAlpha; (* q *)
classification[115] := transitionClassAlpha; (* r *)
classification[116] := transitionClassAlpha; (* s *)
classification[117] := transitionClassAlpha; (* t *)
classification[118] := transitionClassAlpha; (* u *)
classification[119] := transitionClassAlpha; (* v *)
classification[120] := transitionClassAlpha; (* w *)
classification[121] := transitionClassX; (* x *)
classification[122] := transitionClassAlpha; (* y *)
classification[123] := transitionClassAlpha; (* z *)
classification[124] := transitionClassOther; (* { *)
classification[125] := transitionClassSingle; (* | *)
classification[126] := transitionClassOther; (* } *)
classification[127] := transitionClassSingle; (* ~ *)
classification[128] := transitionClassInvalid; (* DEL *)
i := 129;
WHILE i <= 256 DO
classification[i] := transitionClassOther;
INC(i)
END
END initialize_classification;
PROCEDURE compare_keyword(keyword: ARRAY OF CHAR; token_start: BufferPosition; token_end: PLexerBuffer): BOOLEAN;
VAR
result: BOOLEAN;
index: CARDINAL;
keyword_length: CARDINAL;
continue: BOOLEAN;
BEGIN
index := 0;
result := TRUE;
keyword_length := Length(keyword);
continue := (index < keyword_length) AND (token_start.iterator <> token_end);
WHILE continue AND result DO
result := (keyword[index] = token_start.iterator^) OR (Lower(keyword[index]) = token_start.iterator^);
INC(token_start.iterator);
INC(index);
continue := (index < keyword_length) AND (token_start.iterator <> token_end)
END;
result := result AND (index = Length(keyword));
RETURN result AND (token_start.iterator = token_end)
END compare_keyword;
(* Reached the end of file. *)
PROCEDURE transition_action_eof(lexer: PLexer; token: PLexerToken);
BEGIN
token^.kind := lexerKindEof
END transition_action_eof;
PROCEDURE increment(position: PBufferPosition);
BEGIN
INC(position^.iterator)
END increment;
(* Add the character to the token currently read and advance to the next character. *)
PROCEDURE transition_action_accumulate(lexer: PLexer; token: PLexerToken);
BEGIN
increment(ADR(lexer^.current))
END transition_action_accumulate;
(* The current character is not a part of the token. Finish the token already
* read. Don't advance to the next character. *)
PROCEDURE transition_action_finalize(lexer: PLexer; token: PLexerToken);
BEGIN
IF lexer^.start.iterator^ = ':' THEN
token^.kind := lexerKindColon
END;
IF lexer^.start.iterator^ = '>' THEN
token^.kind := lexerKindGreaterThan
END;
IF lexer^.start.iterator^ = '<' THEN
token^.kind := lexerKindLessThan
END;
IF lexer^.start.iterator^ = '(' THEN
token^.kind := lexerKindLeftParen
END;
IF lexer^.start.iterator^ = '-' THEN
token^.kind := lexerKindMinus
END;
IF lexer^.start.iterator^ = '.' THEN
token^.kind := lexerKindDot
END
END transition_action_finalize;
(* An action for tokens containing multiple characters. *)
PROCEDURE transition_action_composite(lexer: PLexer; token: PLexerToken);
BEGIN
IF lexer^.start.iterator^ = '<' THEN
IF lexer^.current.iterator^ = '>' THEN
token^.kind := lexerKindNotEqual
END;
IF lexer^.current.iterator^ = '=' THEN
token^.kind := lexerKindLessEqual
END
END;
IF (lexer^.start.iterator^ = '>') AND (lexer^.current.iterator^ = '=') THEN
token^.kind := lexerKindGreaterEqual
END;
IF (lexer^.start.iterator^ = '.') AND (lexer^.current.iterator^ = '.') THEN
token^.kind := lexerKindRange
END;
IF (lexer^.start.iterator^ = ':') AND (lexer^.current.iterator^ = '=') THEN
token^.kind := lexerKindAssignment
END;
IF (lexer^.start.iterator^ = '-') AND (lexer^.current.iterator^ = '>') THEN
token^.kind := lexerKindArrow
END;
increment(ADR(lexer^.current))
END transition_action_composite;
(* Skip a space. *)
PROCEDURE transition_action_skip(lexer: PLexer; token: PLexerToken);
BEGIN
increment(ADR(lexer^.start));
IF ORD(lexer^.start.iterator^) = 10 THEN
INC(lexer^.start.location.line);
lexer^.start.location.column := 1
END;
lexer^.current := lexer^.start
END transition_action_skip;
(* Delimited string action. *)
PROCEDURE transition_action_delimited(lexer: PLexer; token: PLexerToken);
VAR
text_length: CARDINAL;
BEGIN
IF lexer^.start.iterator^ = '(' THEN
token^.kind := lexerKindComment
END;
IF lexer^.start.iterator^ = '"' THEN
text_length := lexer^.current.iterator;
DEC(text_length, lexer^.start.iterator);
INC(text_length);
MemZero(ADR(token^.stringKind), TSIZE(ShortString));
MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind));
token^.kind := lexerKindCharacter
END;
IF lexer^.start.iterator^ = "'" THEN
text_length := lexer^.current.iterator;
DEC(text_length, lexer^.start.iterator);
INC(text_length);
MemZero(ADR(token^.stringKind), TSIZE(ShortString));
MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind));
token^.kind := lexerKindString
END;
increment(ADR(lexer^.current))
END transition_action_delimited;
(* Finalize keyword OR identifier. *)
PROCEDURE transition_action_key_id(lexer: PLexer; token: PLexerToken);
BEGIN
token^.kind := lexerKindIdentifier;
token^.identifierKind[1] := lexer^.current.iterator;
DEC(token^.identifierKind[1], lexer^.start.iterator);
MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
IF compare_keyword('PROGRAM', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindProgram
END;
IF compare_keyword('IMPORT', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindImport
END;
IF compare_keyword('CONST', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindConst
END;
IF compare_keyword('VAR', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindVar
END;
IF compare_keyword('IF', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindIf
END;
IF compare_keyword('THEN', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindThen
END;
IF compare_keyword('ELSIF', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindElsif
END;
IF compare_keyword('ELSE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindElse
END;
IF compare_keyword('WHILE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindWhile
END;
IF compare_keyword('DO', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindDo
END;
IF compare_keyword('proc', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindProc
END;
IF compare_keyword('BEGIN', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindBegin
END;
IF compare_keyword('END', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindEnd
END;
IF compare_keyword('TYPE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindType
END;
IF compare_keyword('RECORD', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindRecord
END;
IF compare_keyword('UNION', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindUnion
END;
IF compare_keyword('NIL', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindNull
END;
IF compare_keyword('AND', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindAnd
END;
IF compare_keyword('OR', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindOr
END;
IF compare_keyword('RETURN', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindReturn
END;
IF compare_keyword('DEFINITION', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindDefinition
END;
IF compare_keyword('TO', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindTo
END;
IF compare_keyword('CASE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindCase
END;
IF compare_keyword('OF', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindOf
END;
IF compare_keyword('FROM', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindFrom
END;
IF compare_keyword('MODULE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindModule
END;
IF compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindImplementation
END;
IF compare_keyword('POINTER', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindPointer
END;
IF compare_keyword('ARRAY', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindArray
END;
IF compare_keyword('TRUE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindBoolean;
token^.booleanKind := TRUE
END;
IF compare_keyword('FALSE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindBoolean;
token^.booleanKind := FALSE
END
END transition_action_key_id;
(* Action for tokens containing only one character. The character cannot be
* followed by other characters forming a composite token. *)
PROCEDURE transition_action_single(lexer: PLexer; token: PLexerToken);
BEGIN
IF lexer^.current.iterator^ = '&' THEN
token^.kind := lexerKindAnd
END;
IF lexer^.current.iterator^ = ';' THEN
token^.kind := lexerKindSemicolon
END;
IF lexer^.current.iterator^ = ',' THEN
token^.kind := lexerKindComma
END;
IF lexer^.current.iterator^ = '~' THEN
token^.kind := lexerKindTilde
END;
IF lexer^.current.iterator^ = ')' THEN
token^.kind := lexerKindRightParen
END;
IF lexer^.current.iterator^ = '[' THEN
token^.kind := lexerKindLeftSquare
END;
IF lexer^.current.iterator^ = ']' THEN
token^.kind := lexerKindRightSquare
END;
IF lexer^.current.iterator^ = '^' THEN
token^.kind := lexerKindHat
END;
IF lexer^.current.iterator^ = '=' THEN
token^.kind := lexerKindEqual
END;
IF lexer^.current.iterator^ = '+' THEN
token^.kind := lexerKindPlus
END;
IF lexer^.current.iterator^ = '*' THEN
token^.kind := lexerKindAsterisk
END;
IF lexer^.current.iterator^ = '/' THEN
token^.kind := lexerKindDivision
END;
IF lexer^.current.iterator^ = '%' THEN
token^.kind := lexerKindRemainder
END;
IF lexer^.current.iterator^ = '@' THEN
token^.kind := lexerKindAt
END;
IF lexer^.current.iterator^ = '|' THEN
token^.kind := lexerKindPipe
END;
increment(ADR(lexer^.current.iterator))
END transition_action_single;
(* Handle an integer literal. *)
PROCEDURE transition_action_integer(lexer: PLexer; token: PLexerToken);
VAR
buffer: String;
integer_length: CARDINAL;
found: BOOLEAN;
BEGIN
token^.kind := lexerKindInteger;
integer_length := lexer^.current.iterator;
DEC(integer_length, lexer^.start.iterator);
MemZero(ADR(token^.identifierKind), TSIZE(Identifier));
MemCopy(lexer^.start.iterator, integer_length, ADR(token^.identifierKind[1]));
buffer := InitStringCharStar(ADR(token^.identifierKind[1]));
token^.integerKind := StringToInteger(buffer, 10, found);
buffer := KillString(buffer)
END transition_action_integer;
PROCEDURE set_default_transition(current_state: TransitionState; default_action: TransitionAction; next_state: TransitionState);
VAR
default_transition: Transition;
BEGIN
default_transition.action := default_action;
default_transition.next_state := next_state;
transitions[ORD(current_state) + 1][ORD(transitionClassInvalid) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassDigit) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassAlpha) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassSpace) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassColon) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassEquals) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassLeftParen) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassRightParen) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassAsterisk) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassUnderscore) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassSingle) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassHex) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassZero) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassX) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassEof) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassDot) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassMinus) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassSingleQuote) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassDoubleQuote) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassGreater) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassLess) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassOther) + 1] := default_transition
END set_default_transition;
(*
* The transition table describes transitions from one state to another, given
* a symbol (character class).
*
* The table has m rows and n columns, where m is the amount of states and n is
* the amount of classes. So given the current state and a classified character
* the table can be used to look up the next state.
*
* Each cell is a word long.
* - The least significant byte of the word is a row number (beginning with 0).
* It specifies the target state. "ff" means that this is an end state and no
* transition is possible.
* - The next byte is the action that should be performed when transitioning.
* For the meaning of actions see labels in the lex_next function, which
* handles each action.
*)
PROCEDURE initialize_transitions();
BEGIN
(* Start state. *)
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].action := NIL;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].next_state := transitionStateDecimal;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].action := transition_action_skip;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].next_state := transitionStateStart;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].next_state := transitionStateColon;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].next_state := transitionStateLeftParen;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].next_state := transitionStateLeadingZero;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].action := transition_action_eof;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].next_state := transitionStateDot;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].next_state := transitionStateMinus;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].next_state := transitionStateCharacter;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].next_state := transitionStateString;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].next_state := transitionStateGreater;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].next_state := transitionStateLess;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].action := NIL;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].next_state := transitionStateEnd;
(* Colon state. *)
set_default_transition(transitionStateColon, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].action := transition_action_composite;
transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].next_state := transitionStateEnd;
(* Identifier state. *)
set_default_transition(transitionStateIdentifier, transition_action_key_id, transitionStateEnd);
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].next_state := transitionStateIdentifier;
(* Decimal state. *)
set_default_transition(transitionStateDecimal, transition_action_integer, transitionStateEnd);
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].next_state := transitionStateDecimal;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].next_state := transitionStateDecimalSuffix;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].action := NIL;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].next_state := transitionStateDecimalSuffix;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].next_state := transitionStateDecimal;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].next_state := transitionStateDecimalSuffix;
(* Greater state. *)
set_default_transition(transitionStateGreater, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].action := transition_action_composite;
transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].next_state := transitionStateEnd;
(* Minus state. *)
set_default_transition(transitionStateMinus, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].action := transition_action_composite;
transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].next_state := transitionStateEnd;
(* Left paren state. *)
set_default_transition(transitionStateLeftParen, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].next_state := transitionStateComment;
(* Less state. *)
set_default_transition(transitionStateLess, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].action := transition_action_composite;
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].action := transition_action_composite;
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].next_state := transitionStateEnd;
(* Hexadecimal after 0x. *)
set_default_transition(transitionStateDot, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].action := transition_action_composite;
transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].next_state := transitionStateEnd;
(* Comment. *)
set_default_transition(transitionStateComment, transition_action_accumulate, transitionStateComment);
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].next_state := transitionStateClosingComment;
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].action := NIL;
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].next_state := transitionStateEnd;
(* Closing comment. *)
set_default_transition(transitionStateClosingComment, transition_action_accumulate, transitionStateComment);
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].action := NIL;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].action := transition_action_delimited;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].next_state := transitionStateClosingComment;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].action := NIL;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].next_state := transitionStateEnd;
(* Character. *)
set_default_transition(transitionStateCharacter, transition_action_accumulate, transitionStateCharacter);
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].action := NIL;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].action := NIL;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].action := transition_action_delimited;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].next_state := transitionStateEnd;
(* String. *)
set_default_transition(transitionStateString, transition_action_accumulate, transitionStateString);
transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].action := NIL;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].action := NIL;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].action := transition_action_delimited;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].next_state := transitionStateEnd;
(* Leading zero. *)
set_default_transition(transitionStateLeadingZero, transition_action_integer, transitionStateEnd);
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].action := NIL;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].action := NIL;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].action := NIL;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].action := NIL;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].action := NIL;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].action := NIL;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].next_state := transitionStateEnd;
(* Digit with a character suffix. *)
set_default_transition(transitionStateDecimalSuffix, transition_action_integer, transitionStateEnd);
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].action := NIL;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].action := NIL;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].action := NIL;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].action := NIL;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].action := NIL;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].next_state := transitionStateEnd
END initialize_transitions;
PROCEDURE lexer_initialize(lexer: PLexer; input: File);
BEGIN
lexer^.input := input;
lexer^.length := 0;
ALLOCATE(lexer^.buffer, CHUNK_SIZE);
MemZero(lexer^.buffer, CHUNK_SIZE);
lexer^.size := CHUNK_SIZE
END lexer_initialize;
PROCEDURE lexer_current(lexer: PLexer): LexerToken;
VAR
current_class: TransitionClass;
current_state: TransitionState;
current_transition: Transition;
result: LexerToken;
index1: CARDINAL;
index2: CARDINAL;
BEGIN
lexer^.current := lexer^.start;
current_state := transitionStateStart;
WHILE current_state <> transitionStateEnd DO
index1 := ORD(lexer^.current.iterator^);
INC(index1);
current_class := classification[index1];
index1 := ORD(current_state);
INC(index1);
index2 := ORD(current_class);
INC(index2);
current_transition := transitions[index1][index2];
IF current_transition.action <> NIL THEN
current_transition.action(lexer, ADR(result))
END;
current_state := current_transition.next_state
END;
result.start_location := lexer^.start.location;
result.end_location := lexer^.current.location;
RETURN result
END lexer_current;
PROCEDURE lexer_lex(lexer: PLexer): LexerToken;
VAR
result: LexerToken;
BEGIN
IF lexer^.length = 0 THEN
lexer^.length := ReadNBytes(lexer^.input, CHUNK_SIZE, lexer^.buffer);
lexer^.current.location.column := 1;
lexer^.current.location.line := 1;
lexer^.current.iterator := lexer^.buffer
END;
lexer^.start := lexer^.current;
result := lexer_current(lexer);
RETURN result
END lexer_lex;
PROCEDURE lexer_destroy(lexer: PLexer);
BEGIN
DEALLOCATE(lexer^.buffer, lexer^.size)
END lexer_destroy;
BEGIN
initialize_classification();
initialize_transitions()
END Lexer.