elna/source/Lexer.mod

787 lines
37 KiB
Modula-2

IMPLEMENTATION MODULE Lexer;
FROM FIO IMPORT ReadNBytes;
FROM SYSTEM IMPORT ADR;
FROM Storage IMPORT DEALLOCATE, ALLOCATE;
FROM Strings IMPORT Length;
FROM MemUtils IMPORT MemCopy, MemZero;
FROM StrCase IMPORT Lower;
CONST
CHUNK_SIZE = 65536;
TYPE
(*
* Classification table assigns each possible character to a group (class). All
* characters of the same group a handled equivalently.
*
* Classification:
*)
TransitionClass = (
transitionClassInvalid,
transitionClassDigit,
transitionClassAlpha,
transitionClassSpace,
transitionClassColon,
transitionClassEquals,
transitionClassLeftParen,
transitionClassRightParen,
transitionClassAsterisk,
transitionClassUnderscore,
transitionClassSingle,
transitionClassHex,
transitionClassZero,
transitionClassX,
transitionClassEof,
transitionClassDot,
transitionClassMinus,
transitionClassSingleQuote,
transitionClassDoubleQuote,
transitionClassGreater,
transitionClassLess,
transitionClassOther
);
TransitionState = (
transitionStateStart,
transitionStateColon,
transitionStateIdentifier,
transitionStateDecimal,
transitionStateGreater,
transitionStateMinus,
transitionStateLeftParen,
transitionStateLess,
transitionStateDot,
transitionStateComment,
transitionStateClosingComment,
transitionStateCharacter,
transitionStateString,
transitionStateLeadingZero,
transitionStateDecimalSuffix,
transitionStateEnd
);
TransitionAction = PROCEDURE(PLexer, PLexerToken);
Transition = RECORD
Action: TransitionAction;
NextState: TransitionState
END;
TransitionClasses = ARRAY[1..22] OF Transition;
VAR
classification: ARRAY[1..128] OF TransitionClass;
transitions: ARRAY[1..16] OF TransitionClasses;
PROCEDURE initialize_classification();
BEGIN
classification[1] := transitionClassEof; (* NUL *)
classification[2] := transitionClassInvalid; (* SOH *)
classification[3] := transitionClassInvalid; (* STX *)
classification[4] := transitionClassInvalid; (* ETX *)
classification[5] := transitionClassInvalid; (* EOT *)
classification[6] := transitionClassInvalid; (* EMQ *)
classification[7] := transitionClassInvalid; (* ACK *)
classification[8] := transitionClassInvalid; (* BEL *)
classification[9] := transitionClassInvalid; (* BS *)
classification[10] := transitionClassSpace; (* HT *)
classification[11] := transitionClassSpace; (* LF *)
classification[12] := transitionClassInvalid; (* VT *)
classification[13] := transitionClassInvalid; (* FF *)
classification[14] := transitionClassSpace; (* CR *)
classification[15] := transitionClassInvalid; (* SO *)
classification[16] := transitionClassInvalid; (* SI *)
classification[17] := transitionClassInvalid; (* DLE *)
classification[18] := transitionClassInvalid; (* DC1 *)
classification[19] := transitionClassInvalid; (* DC2 *)
classification[20] := transitionClassInvalid; (* DC3 *)
classification[21] := transitionClassInvalid; (* DC4 *)
classification[22] := transitionClassInvalid; (* NAK *)
classification[23] := transitionClassInvalid; (* SYN *)
classification[24] := transitionClassInvalid; (* ETB *)
classification[25] := transitionClassInvalid; (* CAN *)
classification[26] := transitionClassInvalid; (* EM *)
classification[27] := transitionClassInvalid; (* SUB *)
classification[28] := transitionClassInvalid; (* ESC *)
classification[29] := transitionClassInvalid; (* FS *)
classification[30] := transitionClassInvalid; (* GS *)
classification[31] := transitionClassInvalid; (* RS *)
classification[32] := transitionClassInvalid; (* US *)
classification[33] := transitionClassSpace; (* Space *)
classification[34] := transitionClassSingle; (* ! *)
classification[35] := transitionClassDoubleQuote; (* " *)
classification[36] := transitionClassOther; (* # *)
classification[37] := transitionClassOther; (* $ *)
classification[38] := transitionClassSingle; (* % *)
classification[39] := transitionClassSingle; (* AND *)
classification[40] := transitionClassSingleQuote; (* ' *)
classification[41] := transitionClassLeftParen; (* ( *)
classification[42] := transitionClassRightParen; (* ) *)
classification[43] := transitionClassAsterisk; (* * *)
classification[44] := transitionClassSingle; (* + *)
classification[45] := transitionClassSingle; (* , *)
classification[46] := transitionClassMinus; (* - *)
classification[47] := transitionClassDot; (* . *)
classification[48] := transitionClassSingle; (* / *)
classification[49] := transitionClassZero; (* 0 *)
classification[50] := transitionClassDigit; (* 1 *)
classification[51] := transitionClassDigit; (* 2 *)
classification[52] := transitionClassDigit; (* 3 *)
classification[53] := transitionClassDigit; (* 4 *)
classification[54] := transitionClassDigit; (* 5 *)
classification[55] := transitionClassDigit; (* 6 *)
classification[56] := transitionClassDigit; (* 7 *)
classification[57] := transitionClassDigit; (* 8 *)
classification[58] := transitionClassDigit; (* 9 *)
classification[59] := transitionClassColon; (* : *)
classification[60] := transitionClassSingle; (* ; *)
classification[61] := transitionClassLess; (* < *)
classification[62] := transitionClassEquals; (* = *)
classification[63] := transitionClassGreater; (* > *)
classification[64] := transitionClassOther; (* ? *)
classification[65] := transitionClassSingle; (* @ *)
classification[66] := transitionClassAlpha; (* A *)
classification[67] := transitionClassAlpha; (* B *)
classification[68] := transitionClassAlpha; (* C *)
classification[69] := transitionClassAlpha; (* D *)
classification[70] := transitionClassAlpha; (* E *)
classification[71] := transitionClassAlpha; (* F *)
classification[72] := transitionClassAlpha; (* G *)
classification[73] := transitionClassAlpha; (* H *)
classification[74] := transitionClassAlpha; (* I *)
classification[75] := transitionClassAlpha; (* J *)
classification[76] := transitionClassAlpha; (* K *)
classification[77] := transitionClassAlpha; (* L *)
classification[78] := transitionClassAlpha; (* M *)
classification[79] := transitionClassAlpha; (* N *)
classification[80] := transitionClassAlpha; (* O *)
classification[81] := transitionClassAlpha; (* P *)
classification[82] := transitionClassAlpha; (* Q *)
classification[83] := transitionClassAlpha; (* R *)
classification[84] := transitionClassAlpha; (* S *)
classification[85] := transitionClassAlpha; (* T *)
classification[86] := transitionClassAlpha; (* U *)
classification[87] := transitionClassAlpha; (* V *)
classification[88] := transitionClassAlpha; (* W *)
classification[89] := transitionClassAlpha; (* X *)
classification[90] := transitionClassAlpha; (* Y *)
classification[91] := transitionClassAlpha; (* Z *)
classification[92] := transitionClassSingle; (* [ *)
classification[93] := transitionClassOther; (* \ *)
classification[94] := transitionClassSingle; (* ] *)
classification[95] := transitionClassSingle; (* ^ *)
classification[96] := transitionClassUnderscore; (* _ *)
classification[97] := transitionClassOther; (* ` *)
classification[98] := transitionClassHex; (* a *)
classification[99] := transitionClassHex; (* b *)
classification[100] := transitionClassHex; (* c *)
classification[101] := transitionClassHex; (* d *)
classification[102] := transitionClassHex; (* e *)
classification[103] := transitionClassHex; (* f *)
classification[104] := transitionClassAlpha; (* g *)
classification[105] := transitionClassAlpha; (* h *)
classification[106] := transitionClassAlpha; (* i *)
classification[107] := transitionClassAlpha; (* j *)
classification[108] := transitionClassAlpha; (* k *)
classification[109] := transitionClassAlpha; (* l *)
classification[110] := transitionClassAlpha; (* m *)
classification[111] := transitionClassAlpha; (* n *)
classification[112] := transitionClassAlpha; (* o *)
classification[113] := transitionClassAlpha; (* p *)
classification[114] := transitionClassAlpha; (* q *)
classification[115] := transitionClassAlpha; (* r *)
classification[116] := transitionClassAlpha; (* s *)
classification[117] := transitionClassAlpha; (* t *)
classification[118] := transitionClassAlpha; (* u *)
classification[119] := transitionClassAlpha; (* v *)
classification[120] := transitionClassAlpha; (* w *)
classification[121] := transitionClassX; (* x *)
classification[122] := transitionClassAlpha; (* y *)
classification[123] := transitionClassAlpha; (* z *)
classification[124] := transitionClassOther; (* { *)
classification[125] := transitionClassSingle; (* | *)
classification[126] := transitionClassOther; (* } *)
classification[127] := transitionClassSingle; (* ~ *)
classification[128] := transitionClassInvalid (* DEL *)
END initialize_classification;
PROCEDURE compare_keyword(Keyword: ARRAY OF CHAR; TokenStart: PLexerBuffer; TokenEnd: PLexerBuffer): BOOLEAN;
VAR
Result: BOOLEAN;
Index: CARDINAL;
BEGIN
Index := 0;
Result := TRUE;
WHILE (Index < Length(Keyword)) AND (TokenStart <> TokenEnd) AND Result DO
Result := (Keyword[Index] = TokenStart^) OR (Lower(Keyword[Index]) = TokenStart^);
INC(TokenStart);
INC(Index)
END;
Result := (Index = Length(Keyword)) AND (TokenStart = TokenEnd) AND Result;
RETURN Result
END compare_keyword;
(* Reached the end of file. *)
PROCEDURE transition_action_eof(lexer: PLexer; AToken: PLexerToken);
BEGIN
AToken^.Kind := lexerKindEof
END transition_action_eof;
(* Add the character to the token currently read and advance to the next character. *)
PROCEDURE transition_action_accumulate(lexer: PLexer; AToken: PLexerToken);
BEGIN
INC(lexer^.Current)
END transition_action_accumulate;
(* The current character is not a part of the token. Finish the token already
* read. Don't advance to the next character. *)
PROCEDURE transition_action_finalize(lexer: PLexer; AToken: PLexerToken);
BEGIN
IF lexer^.Start^ = ':' THEN
AToken^.Kind := lexerKindColon
END;
IF lexer^.Start^ = '>' THEN
AToken^.Kind := lexerKindGreaterThan
END;
IF lexer^.Start^ = '<' THEN
AToken^.Kind := lexerKindLessThan
END;
IF lexer^.Start^ = '(' THEN
AToken^.Kind := lexerKindLeftParen
END;
IF lexer^.Start^ = '-' THEN
AToken^.Kind := lexerKindLeftParen
END;
IF lexer^.Start^ = '.' THEN
AToken^.Kind := lexerKindDot
END
END transition_action_finalize;
(* An action for tokens containing multiple characters. *)
PROCEDURE transition_action_composite(lexer: PLexer; AToken: PLexerToken);
BEGIN
IF lexer^.Start^ = '<' THEN
IF lexer^.Current^ = '>' THEN
AToken^.Kind := lexerKindNotEqual
END;
IF lexer^.Current^ = '=' THEN
AToken^.Kind := lexerKindLessEqual
END
END;
IF (lexer^.Start^ = '>') AND (lexer^.Current^ = '=') THEN
AToken^.Kind := lexerKindGreaterEqual
END;
IF (lexer^.Start^ = '.') AND (lexer^.Current^ = '.') THEN
AToken^.Kind := lexerKindRange
END;
IF (lexer^.Start^ = ':') AND (lexer^.Current^ = '=') THEN
AToken^.Kind := lexerKindAssignment
END;
INC(lexer^.Current)
END transition_action_composite;
(* Skip a space. *)
PROCEDURE transition_action_skip(lexer: PLexer; AToken: PLexerToken);
BEGIN
INC(lexer^.Current);
INC(lexer^.Start)
END transition_action_skip;
(* Delimited string action. *)
PROCEDURE transition_action_delimited(lexer: PLexer; AToken: PLexerToken);
BEGIN
IF lexer^.Start^ = '(' THEN
AToken^.Kind := lexerKindComment
END;
IF lexer^.Start^ = '"' THEN
AToken^.Kind := lexerKindCharacter
END;
IF lexer^.Start^ = "'" THEN
AToken^.Kind := lexerKindString
END;
INC(lexer^.Current)
END transition_action_delimited;
(* Finalize keyword OR identifier. *)
PROCEDURE transition_action_key_id(lexer: PLexer; AToken: PLexerToken);
BEGIN
AToken^.Kind := lexerKindIdentifier;
AToken^.identifierKind[1] := lexer^.Current - lexer^.Start;
MemCopy(lexer^.Start, ORD(AToken^.identifierKind[1]), ADR(AToken^.identifierKind[2]));
IF compare_keyword('PROGRAM', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindProgram
END;
IF compare_keyword('IMPORT', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindImport
END;
IF compare_keyword('CONST', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindConst
END;
IF compare_keyword('VAR', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindVar
END;
IF compare_keyword('IF', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindIf
END;
IF compare_keyword('THEN', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindThen
END;
IF compare_keyword('ELSIF', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindElsif
END;
IF compare_keyword('ELSE', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindElse
END;
IF compare_keyword('WHILE', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindWhile
END;
IF compare_keyword('DO', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindDo
END;
IF compare_keyword('proc', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindProc
END;
IF compare_keyword('BEGIN', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindBegin
END;
IF compare_keyword('END', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindEnd
END;
IF compare_keyword('TYPE', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindType
END;
IF compare_keyword('RECORD', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindRecord
END;
IF compare_keyword('UNION', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindUnion
END;
IF compare_keyword('NIL', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindNull
END;
IF compare_keyword('AND', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindAnd
END;
IF compare_keyword('OR', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindOr
END;
IF compare_keyword('RETURN', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindReturn
END;
IF compare_keyword('DEFINITION', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindDefinition
END;
IF compare_keyword('TO', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindTo
END;
IF compare_keyword('CASE', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindCase
END;
IF compare_keyword('OF', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindOf
END;
IF compare_keyword('FROM', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindFrom
END;
IF compare_keyword('MODULE', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindModule
END;
IF compare_keyword('IMPLEMENTATION', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindImplementation
END;
IF compare_keyword('POINTER', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindPointer
END;
IF compare_keyword('ARRAY', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindArray
END;
IF compare_keyword('TRUE', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindBoolean;
AToken^.booleanKind := TRUE
END;
IF compare_keyword('FALSE', lexer^.Start, lexer^.Current) THEN
AToken^.Kind := lexerKindBoolean;
AToken^.booleanKind := FALSE
END
END transition_action_key_id;
(* Action for tokens containing only one character. The character cannot be
* followed by other characters forming a composite token. *)
PROCEDURE transition_action_single(lexer: PLexer; AToken: PLexerToken);
BEGIN
IF lexer^.Current^ = '&' THEN
AToken^.Kind := lexerKindAnd
END;
IF lexer^.Current^ = ';' THEN
AToken^.Kind := lexerKindSemicolon
END;
IF lexer^.Current^ = ',' THEN
AToken^.Kind := lexerKindComma
END;
IF lexer^.Current^ = ',' THEN
AToken^.Kind := lexerKindComma
END;
IF lexer^.Current^ = ')' THEN
AToken^.Kind := lexerKindRightParen
END;
IF lexer^.Current^ = '[' THEN
AToken^.Kind := lexerKindLeftSquare
END;
IF lexer^.Current^ = ']' THEN
AToken^.Kind := lexerKindRightSquare
END;
IF lexer^.Current^ = '^' THEN
AToken^.Kind := lexerKindHat
END;
IF lexer^.Current^ = '=' THEN
AToken^.Kind := lexerKindEqual
END;
IF lexer^.Current^ = '+' THEN
AToken^.Kind := lexerKindPlus
END;
IF lexer^.Current^ = '/' THEN
AToken^.Kind := lexerKindDivision
END;
IF lexer^.Current^ = '%' THEN
AToken^.Kind := lexerKindRemainder
END;
IF lexer^.Current^ = '@' THEN
AToken^.Kind := lexerKindAt
END;
IF lexer^.Current^ = '|' THEN
AToken^.Kind := lexerKindPipe
END;
INC(lexer^.Current)
END transition_action_single;
(* Handle an integer literal. *)
PROCEDURE transition_action_integer(lexer: PLexer; AToken: PLexerToken);
BEGIN
AToken^.Kind := lexerKindInteger
END transition_action_integer;
PROCEDURE set_default_transition(CurrentState: TransitionState; DefaultAction: TransitionAction; NextState: TransitionState);
VAR
DefaultTransition: Transition;
BEGIN
DefaultTransition.Action := DefaultAction;
DefaultTransition.NextState := NextState;
transitions[ORD(CurrentState) + 1][ORD(transitionClassInvalid) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassDigit) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassAlpha) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassSpace) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassColon) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassEquals) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassLeftParen) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassRightParen) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassAsterisk) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassUnderscore) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassSingle) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassHex) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassZero) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassX) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassEof) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassDot) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassMinus) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassSingleQuote) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassDoubleQuote) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassGreater) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassLess) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassOther) + 1] := DefaultTransition
END set_default_transition;
(*
* The transition table describes transitions from one state to another, given
* a symbol (character class).
*
* The table has m rows and n columns, where m is the amount of states and n is
* the amount of classes. So given the current state and a classified character
* the table can be used to look up the next state.
*
* Each cell is a word long.
* - The least significant byte of the word is a row number (beginning with 0).
* It specifies the target state. "ff" means that this is an end state and no
* transition is possible.
* - The next byte is the action that should be performed when transitioning.
* For the meaning of actions see labels in the lex_next function, which
* handles each action.
*)
PROCEDURE initialize_transitions();
BEGIN
(* Start state. *)
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].Action := NIL;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateDecimal;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].Action := transition_action_skip;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].NextState := transitionStateStart;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].NextState := transitionStateColon;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].NextState := transitionStateLeftParen;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].Action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].Action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateLeadingZero;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].Action := transition_action_eof;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].NextState := transitionStateDot;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].NextState := transitionStateMinus;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].NextState := transitionStateCharacter;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].NextState := transitionStateString;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateGreater;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].NextState := transitionStateLess;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].Action := NIL;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].NextState := transitionStateEnd;
(* Colon state. *)
set_default_transition(transitionStateColon, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd;
(* Identifier state. *)
set_default_transition(transitionStateIdentifier, transition_action_key_id, transitionStateEnd);
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].NextState := transitionStateIdentifier;
(* Decimal state. *)
set_default_transition(transitionStateDecimal, transition_action_integer, transitionStateEnd);
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateDecimal;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateDecimalSuffix;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].Action := NIL;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateDecimalSuffix;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateDecimal;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].NextState := transitionStateDecimalSuffix;
(* Greater state. *)
set_default_transition(transitionStateGreater, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd;
(* Minus state. *)
set_default_transition(transitionStateMinus, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateEnd;
(* Left paren state. *)
set_default_transition(transitionStateLeftParen, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateComment;
(* Less state. *)
set_default_transition(transitionStateLess, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateEnd;
(* Hexadecimal after 0x. *)
set_default_transition(transitionStateDot, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].NextState := transitionStateEnd;
(* Comment. *)
set_default_transition(transitionStateComment, transition_action_accumulate, transitionStateComment);
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateClosingComment;
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].Action := NIL;
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
(* Closing comment. *)
set_default_transition(transitionStateClosingComment, transition_action_accumulate, transitionStateComment);
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].Action := NIL;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].Action := transition_action_delimited;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateClosingComment;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].Action := NIL;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
(* Character. *)
set_default_transition(transitionStateCharacter, transition_action_accumulate, transitionStateCharacter);
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].Action := NIL;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].Action := NIL;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].Action := transition_action_delimited;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].NextState := transitionStateEnd;
(* String. *)
set_default_transition(transitionStateString, transition_action_accumulate, transitionStateString);
transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].Action := NIL;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].Action := NIL;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].Action := transition_action_delimited;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].NextState := transitionStateEnd;
(* Leading zero. *)
set_default_transition(transitionStateLeadingZero, transition_action_integer, transitionStateEnd);
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].Action := NIL;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].Action := NIL;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].Action := NIL;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].Action := NIL;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].Action := NIL;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].Action := NIL;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].NextState := transitionStateEnd;
(* Digit with a character suffix. *)
set_default_transition(transitionStateDecimalSuffix, transition_action_integer, transitionStateEnd);
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].Action := NIL;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].Action := NIL;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].Action := NIL;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].Action := NIL;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].Action := NIL;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].NextState := transitionStateEnd
END initialize_transitions;
PROCEDURE lexer_initialize(lexer: PLexer; Input: File);
BEGIN
lexer^.Input := Input;
lexer^.Length := 0;
ALLOCATE(lexer^.Buffer, CHUNK_SIZE);
MemZero(lexer^.Buffer, CHUNK_SIZE);
lexer^.Size := CHUNK_SIZE
END lexer_initialize;
PROCEDURE lexer_current(lexer: PLexer): LexerToken;
VAR
CurrentClass: TransitionClass;
CurrentState: TransitionState;
CurrentTransition: Transition;
Result: LexerToken;
BEGIN
lexer^.Current := lexer^.Start;
CurrentState := transitionStateStart;
WHILE CurrentState <> transitionStateEnd DO
CurrentClass := classification[ORD(lexer^.Current^) + 1];
CurrentTransition := transitions[ORD(CurrentState) + 1][ORD(CurrentClass) + 1];
IF CurrentTransition.Action <> NIL THEN
CurrentTransition.Action(lexer, ADR(Result))
END;
CurrentState := CurrentTransition.NextState
END;
RETURN Result
END lexer_current;
PROCEDURE lexer_lex(lexer: PLexer): LexerToken;
VAR
Result: LexerToken;
BEGIN
IF lexer^.Length = 0 THEN
lexer^.Length := ReadNBytes(lexer^.Input, CHUNK_SIZE, lexer^.Buffer);
lexer^.Current := lexer^.Buffer
END;
lexer^.Start := lexer^.Current;
Result := lexer_current(lexer);
RETURN Result
END lexer_lex;
PROCEDURE lexer_destroy(lexer: PLexer);
BEGIN
DEALLOCATE(lexer^.Buffer, lexer^.Size)
END lexer_destroy;
BEGIN
initialize_classification();
initialize_transitions()
END Lexer.