723 lines
35 KiB
Modula-2
723 lines
35 KiB
Modula-2
IMPLEMENTATION MODULE Lexer;
|
|
|
|
FROM FIO IMPORT ReadNBytes;
|
|
FROM SYSTEM IMPORT ADR;
|
|
|
|
FROM Storage IMPORT DEALLOCATE, ALLOCATE;
|
|
FROM Strings IMPORT Length;
|
|
FROM MemUtils IMPORT MemZero;
|
|
|
|
CONST
|
|
ChunkSize = 65536;
|
|
|
|
TYPE
|
|
(*
|
|
* Classification table assigns each possible character to a group (class). All
|
|
* characters of the same group a handled equivalently.
|
|
*
|
|
* Classification:
|
|
*)
|
|
TransitionClass = (
|
|
transitionClassInvalid,
|
|
transitionClassDigit,
|
|
transitionClassAlpha,
|
|
transitionClassSpace,
|
|
transitionClassColon,
|
|
transitionClassEquals,
|
|
transitionClassLeftParen,
|
|
transitionClassRightParen,
|
|
transitionClassAsterisk,
|
|
transitionClassUnderscore,
|
|
transitionClassSingle,
|
|
transitionClassHex,
|
|
transitionClassZero,
|
|
transitionClassX,
|
|
transitionClassEof,
|
|
transitionClassDot,
|
|
transitionClassMinus,
|
|
transitionClassSingleQuote,
|
|
transitionClassDoubleQuote,
|
|
transitionClassGreater,
|
|
transitionClassLess,
|
|
transitionClassOther
|
|
);
|
|
TransitionState = (
|
|
transitionStateStart,
|
|
transitionStateColon,
|
|
transitionStateIdentifier,
|
|
transitionStateDecimal,
|
|
transitionStateGreater,
|
|
transitionStateMinus,
|
|
transitionStateLeftParen,
|
|
transitionStateLess,
|
|
transitionStateDot,
|
|
transitionStateComment,
|
|
transitionStateClosingComment,
|
|
transitionStateCharacter,
|
|
transitionStateString,
|
|
transitionStateLeadingZero,
|
|
transitionStateDecimalSuffix,
|
|
transitionStateEnd
|
|
);
|
|
TransitionAction = PROCEDURE(PLexer, PLexerToken);
|
|
Transition = RECORD
|
|
Action: TransitionAction;
|
|
NextState: TransitionState
|
|
END;
|
|
|
|
VAR
|
|
Classification: ARRAY[1..128] OF TransitionClass;
|
|
Transitions: ARRAY[0..MAX(TransitionState)] OF ARRAY[0..MAX(TransitionClass)] OF Transition;
|
|
|
|
PROCEDURE InitializeClassification();
|
|
BEGIN
|
|
Classification[1] := transitionClassEof; (* NUL *)
|
|
Classification[2] := transitionClassInvalid; (* SOH *)
|
|
Classification[3] := transitionClassInvalid; (* STX *)
|
|
Classification[4] := transitionClassInvalid; (* ETX *)
|
|
Classification[5] := transitionClassInvalid; (* EOT *)
|
|
Classification[6] := transitionClassInvalid; (* EMQ *)
|
|
Classification[7] := transitionClassInvalid; (* ACK *)
|
|
Classification[8] := transitionClassInvalid; (* BEL *)
|
|
Classification[9] := transitionClassInvalid; (* BS *)
|
|
Classification[10] := transitionClassSpace; (* HT *)
|
|
Classification[11] := transitionClassSpace; (* LF *)
|
|
Classification[12] := transitionClassInvalid; (* VT *)
|
|
Classification[13] := transitionClassInvalid; (* FF *)
|
|
Classification[14] := transitionClassSpace; (* CR *)
|
|
Classification[15] := transitionClassInvalid; (* SO *)
|
|
Classification[16] := transitionClassInvalid; (* SI *)
|
|
Classification[17] := transitionClassInvalid; (* DLE *)
|
|
Classification[18] := transitionClassInvalid; (* DC1 *)
|
|
Classification[19] := transitionClassInvalid; (* DC2 *)
|
|
Classification[20] := transitionClassInvalid; (* DC3 *)
|
|
Classification[21] := transitionClassInvalid; (* DC4 *)
|
|
Classification[22] := transitionClassInvalid; (* NAK *)
|
|
Classification[23] := transitionClassInvalid; (* SYN *)
|
|
Classification[24] := transitionClassInvalid; (* ETB *)
|
|
Classification[25] := transitionClassInvalid; (* CAN *)
|
|
Classification[26] := transitionClassInvalid; (* EM *)
|
|
Classification[27] := transitionClassInvalid; (* SUB *)
|
|
Classification[28] := transitionClassInvalid; (* ESC *)
|
|
Classification[29] := transitionClassInvalid; (* FS *)
|
|
Classification[30] := transitionClassInvalid; (* GS *)
|
|
Classification[31] := transitionClassInvalid; (* RS *)
|
|
Classification[32] := transitionClassInvalid; (* US *)
|
|
Classification[33] := transitionClassSpace; (* Space *)
|
|
Classification[34] := transitionClassSingle; (* ! *)
|
|
Classification[35] := transitionClassDoubleQuote; (* " *)
|
|
Classification[36] := transitionClassOther; (* # *)
|
|
Classification[37] := transitionClassOther; (* $ *)
|
|
Classification[38] := transitionClassSingle; (* % *)
|
|
Classification[39] := transitionClassSingle; (* & *)
|
|
Classification[40] := transitionClassSingleQuote; (* ' *)
|
|
Classification[41] := transitionClassLeftParen; (* ( *)
|
|
Classification[42] := transitionClassRightParen; (* ) *)
|
|
Classification[43] := transitionClassAsterisk; (* * *)
|
|
Classification[44] := transitionClassSingle; (* + *)
|
|
Classification[45] := transitionClassSingle; (* , *)
|
|
Classification[46] := transitionClassMinus; (* - *)
|
|
Classification[47] := transitionClassDot; (* . *)
|
|
Classification[48] := transitionClassSingle; (* / *)
|
|
Classification[49] := transitionClassZero; (* 0 *)
|
|
Classification[50] := transitionClassDigit; (* 1 *)
|
|
Classification[51] := transitionClassDigit; (* 2 *)
|
|
Classification[52] := transitionClassDigit; (* 3 *)
|
|
Classification[53] := transitionClassDigit; (* 4 *)
|
|
Classification[54] := transitionClassDigit; (* 5 *)
|
|
Classification[55] := transitionClassDigit; (* 6 *)
|
|
Classification[56] := transitionClassDigit; (* 7 *)
|
|
Classification[57] := transitionClassDigit; (* 8 *)
|
|
Classification[58] := transitionClassDigit; (* 9 *)
|
|
Classification[59] := transitionClassColon; (* : *)
|
|
Classification[60] := transitionClassSingle; (* ; *)
|
|
Classification[61] := transitionClassLess; (* < *)
|
|
Classification[62] := transitionClassEquals; (* = *)
|
|
Classification[63] := transitionClassGreater; (* > *)
|
|
Classification[64] := transitionClassOther; (* ? *)
|
|
Classification[65] := transitionClassSingle; (* @ *)
|
|
Classification[66] := transitionClassAlpha; (* A *)
|
|
Classification[67] := transitionClassAlpha; (* B *)
|
|
Classification[68] := transitionClassAlpha; (* C *)
|
|
Classification[69] := transitionClassAlpha; (* D *)
|
|
Classification[70] := transitionClassAlpha; (* E *)
|
|
Classification[71] := transitionClassAlpha; (* F *)
|
|
Classification[72] := transitionClassAlpha; (* G *)
|
|
Classification[73] := transitionClassAlpha; (* H *)
|
|
Classification[74] := transitionClassAlpha; (* I *)
|
|
Classification[75] := transitionClassAlpha; (* J *)
|
|
Classification[76] := transitionClassAlpha; (* K *)
|
|
Classification[77] := transitionClassAlpha; (* L *)
|
|
Classification[78] := transitionClassAlpha; (* M *)
|
|
Classification[79] := transitionClassAlpha; (* N *)
|
|
Classification[80] := transitionClassAlpha; (* O *)
|
|
Classification[81] := transitionClassAlpha; (* P *)
|
|
Classification[82] := transitionClassAlpha; (* Q *)
|
|
Classification[83] := transitionClassAlpha; (* R *)
|
|
Classification[84] := transitionClassAlpha; (* S *)
|
|
Classification[85] := transitionClassAlpha; (* T *)
|
|
Classification[86] := transitionClassAlpha; (* U *)
|
|
Classification[87] := transitionClassAlpha; (* V *)
|
|
Classification[88] := transitionClassAlpha; (* W *)
|
|
Classification[89] := transitionClassAlpha; (* X *)
|
|
Classification[90] := transitionClassAlpha; (* Y *)
|
|
Classification[91] := transitionClassAlpha; (* Z *)
|
|
Classification[92] := transitionClassSingle; (* [ *)
|
|
Classification[93] := transitionClassOther; (* \ *)
|
|
Classification[94] := transitionClassSingle; (* ] *)
|
|
Classification[95] := transitionClassSingle; (* ^ *)
|
|
Classification[96] := transitionClassUnderscore; (* _ *)
|
|
Classification[97] := transitionClassOther; (* ` *)
|
|
Classification[98] := transitionClassHex; (* a *)
|
|
Classification[99] := transitionClassHex; (* b *)
|
|
Classification[100] := transitionClassHex; (* c *)
|
|
Classification[101] := transitionClassHex; (* d *)
|
|
Classification[102] := transitionClassHex; (* e *)
|
|
Classification[103] := transitionClassHex; (* f *)
|
|
Classification[104] := transitionClassAlpha; (* g *)
|
|
Classification[105] := transitionClassAlpha; (* h *)
|
|
Classification[106] := transitionClassAlpha; (* i *)
|
|
Classification[107] := transitionClassAlpha; (* j *)
|
|
Classification[108] := transitionClassAlpha; (* k *)
|
|
Classification[109] := transitionClassAlpha; (* l *)
|
|
Classification[110] := transitionClassAlpha; (* m *)
|
|
Classification[111] := transitionClassAlpha; (* n *)
|
|
Classification[112] := transitionClassAlpha; (* o *)
|
|
Classification[113] := transitionClassAlpha; (* p *)
|
|
Classification[114] := transitionClassAlpha; (* q *)
|
|
Classification[115] := transitionClassAlpha; (* r *)
|
|
Classification[116] := transitionClassAlpha; (* s *)
|
|
Classification[117] := transitionClassAlpha; (* t *)
|
|
Classification[118] := transitionClassAlpha; (* u *)
|
|
Classification[119] := transitionClassAlpha; (* v *)
|
|
Classification[120] := transitionClassAlpha; (* w *)
|
|
Classification[121] := transitionClassX; (* x *)
|
|
Classification[122] := transitionClassAlpha; (* y *)
|
|
Classification[123] := transitionClassAlpha; (* z *)
|
|
Classification[124] := transitionClassOther; (* { *)
|
|
Classification[125] := transitionClassSingle; (* | *)
|
|
Classification[126] := transitionClassOther; (* } *)
|
|
Classification[127] := transitionClassSingle; (* ~ *)
|
|
Classification[128] := transitionClassInvalid (* DEL *)
|
|
END InitializeClassification;
|
|
|
|
PROCEDURE CompareKeyword(Keyword: ARRAY OF CHAR; TokenStart: PLexerBuffer; TokenEnd: PLexerBuffer): BOOLEAN;
|
|
VAR
|
|
Result: BOOLEAN;
|
|
Index: CARDINAL;
|
|
BEGIN
|
|
Index := 0;
|
|
Result := TRUE;
|
|
|
|
WHILE (Index < Length(Keyword)) AND (TokenStart <> TokenEnd) AND Result DO
|
|
Result := Keyword[Index] = TokenStart^;
|
|
INC(TokenStart);
|
|
INC(Index)
|
|
END;
|
|
RETURN (Index = Length(Keyword)) AND (TokenStart = TokenEnd) AND Result
|
|
END CompareKeyword;
|
|
|
|
(* Reached the end of file. *)
|
|
PROCEDURE TransitionActionEof(ALexer: PLexer; AToken: PLexerToken);
|
|
BEGIN
|
|
AToken^.Kind := lexerKindEof
|
|
END TransitionActionEof;
|
|
|
|
(* Add the character to the token currently read and advance to the next character. *)
|
|
PROCEDURE TransitionActionAccumulate(ALexer: PLexer; AToken: PLexerToken);
|
|
BEGIN
|
|
INC(ALexer^.Current)
|
|
END TransitionActionAccumulate;
|
|
|
|
(* The current character is not a part of the token. Finish the token already
|
|
* read. Don't advance to the next character. *)
|
|
PROCEDURE TransitionActionFinalize(ALexer: PLexer; AToken: PLexerToken);
|
|
BEGIN
|
|
IF ALexer^.Start^ = ':' THEN
|
|
AToken^.Kind := lexerKindColon
|
|
ELSIF ALexer^.Start^ = '>' THEN
|
|
AToken^.Kind := lexerKindGreaterThan
|
|
ELSIF ALexer^.Start^ = '<' THEN
|
|
AToken^.Kind := lexerKindLessThan
|
|
ELSIF ALexer^.Start^ = '(' THEN
|
|
AToken^.Kind := lexerKindLeftParen
|
|
ELSIF ALexer^.Start^ = '-' THEN
|
|
AToken^.Kind := lexerKindLeftParen
|
|
ELSIF ALexer^.Start^ = '.' THEN
|
|
AToken^.Kind := lexerKindDot
|
|
END
|
|
END TransitionActionFinalize;
|
|
|
|
(* An action for tokens containing multiple characters. *)
|
|
PROCEDURE TransitionActionComposite(ALexer: PLexer; AToken: PLexerToken);
|
|
BEGIN
|
|
IF ALexer^.Start^ = '<' THEN
|
|
IF ALexer^.Current^ = '>' THEN
|
|
AToken^.Kind := lexerKindNotEqual
|
|
ELSIF ALexer^.Current^ = '=' THEN
|
|
AToken^.Kind := lexerKindLessEqual
|
|
END
|
|
ELSIF (ALexer^.Start^ = '>') AND (ALexer^.Current^ = '=') THEN
|
|
AToken^.Kind := lexerKindGreaterEqual
|
|
ELSIF (ALexer^.Start^ = '.') AND (ALexer^.Current^ = '.') THEN
|
|
AToken^.Kind := lexerKindRange
|
|
ELSIF (ALexer^.Start^ = ':') AND (ALexer^.Current^ = '=') THEN
|
|
AToken^.Kind := lexerKindAssignment
|
|
END;
|
|
INC(ALexer^.Current)
|
|
END TransitionActionComposite;
|
|
|
|
(* Skip a space. *)
|
|
PROCEDURE TransitionActionSkip(ALexer: PLexer; AToken: PLexerToken);
|
|
BEGIN
|
|
INC(ALexer^.Current);
|
|
INC(ALexer^.Start)
|
|
END TransitionActionSkip;
|
|
|
|
(* 0x04. Delimited string action. *)
|
|
PROCEDURE TransitionActionDelimited(ALexer: PLexer; AToken: PLexerToken);
|
|
BEGIN
|
|
IF ALexer^.Start^ = '(' THEN
|
|
AToken^.Kind := lexerKindComment
|
|
ELSIF ALexer^.Start^ = '"' THEN
|
|
AToken^.Kind := lexerKindCharacter
|
|
ELSIF ALexer^.Start^ = "'" THEN
|
|
AToken^.Kind := lexerKindString
|
|
END;
|
|
INC(ALexer^.Current)
|
|
END TransitionActionDelimited;
|
|
|
|
(* Finalize keyword or identifier. *)
|
|
PROCEDURE TransitionActionKeyId(ALexer: PLexer; AToken: PLexerToken);
|
|
BEGIN
|
|
IF CompareKeyword('PROGRAM', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindProgram
|
|
ELSIF CompareKeyword('IMPORT', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindImport
|
|
ELSIF CompareKeyword('CONST', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindConst
|
|
ELSIF CompareKeyword('VAR', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindVar
|
|
ELSIF CompareKeyword('IF', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindIf
|
|
ELSIF CompareKeyword('THEN', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindThen
|
|
ELSIF CompareKeyword('ELSIF', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindElsif
|
|
ELSIF CompareKeyword('ELSE', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindElse
|
|
ELSIF CompareKeyword('WHILE', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindWhile
|
|
ELSIF CompareKeyword('DO', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindDo
|
|
ELSIF CompareKeyword('PROCEDURE', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindProc
|
|
ELSIF CompareKeyword('BEGIN', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindBegin
|
|
ELSIF CompareKeyword('END', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindEnd
|
|
ELSIF CompareKeyword('TYPE', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindType
|
|
ELSIF CompareKeyword('RECORD', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindRecord
|
|
ELSIF CompareKeyword('UNION', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindUnion
|
|
ELSIF CompareKeyword('NIL', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindNull
|
|
ELSIF CompareKeyword('AND', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindAnd
|
|
ELSIF CompareKeyword('OR', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindOr
|
|
ELSIF CompareKeyword('RETURN', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindReturn
|
|
ELSIF CompareKeyword('DEFINITION', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindDefinition
|
|
ELSIF CompareKeyword('TO', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindTo
|
|
ELSIF CompareKeyword('CASE', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindCase
|
|
ELSIF CompareKeyword('OF', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindOf
|
|
ELSIF CompareKeyword('FROM', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindFrom
|
|
ELSIF CompareKeyword('MODULE', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindModule
|
|
ELSIF CompareKeyword('IMPLEMENTATION', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindImplementation
|
|
ELSIF CompareKeyword('TRUE', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindBoolean;
|
|
AToken^.booleanKind := TRUE
|
|
ELSIF CompareKeyword('FALSE', ALexer^.Start, ALexer^.Current) THEN
|
|
AToken^.Kind := lexerKindBoolean;
|
|
AToken^.booleanKind := FALSE
|
|
ELSE
|
|
AToken^.Kind := lexerKindIdentifier
|
|
END;
|
|
END TransitionActionKeyId;
|
|
|
|
(* Action for tokens containing only one character. The character cannot be
|
|
* followed by other characters forming a composite token. *)
|
|
PROCEDURE TransitionActionSingle(ALexer: PLexer; AToken: PLexerToken);
|
|
BEGIN
|
|
CASE ALexer^.Current^ OF
|
|
'&': AToken^.Kind := lexerKindAnd |
|
|
';': AToken^.Kind := lexerKindSemicolon |
|
|
',': AToken^.Kind := lexerKindComma |
|
|
')': AToken^.Kind := lexerKindRightParen |
|
|
'[': AToken^.Kind := lexerKindLeftSquare |
|
|
']': AToken^.Kind := lexerKindRightSquare |
|
|
'^': AToken^.Kind := lexerKindHat |
|
|
'=': AToken^.Kind := lexerKindEqual |
|
|
'+': AToken^.Kind := lexerKindPlus |
|
|
'/': AToken^.Kind := lexerKindDivision |
|
|
'%': AToken^.Kind := lexerKindRemainder |
|
|
'@': AToken^.Kind := lexerKindAt |
|
|
'|': AToken^.Kind := lexerKindPipe
|
|
END;
|
|
INC(ALexer^.Current)
|
|
END TransitionActionSingle;
|
|
|
|
(* Handle an integer literal. *)
|
|
PROCEDURE TransitionActionInteger(ALexer: PLexer; AToken: PLexerToken);
|
|
BEGIN
|
|
AToken^.Kind := lexerKindInteger
|
|
END TransitionActionInteger;
|
|
|
|
PROCEDURE SetDefaultTransition(CurrentState: TransitionState; DefaultAction: TransitionAction; NextState: TransitionState);
|
|
VAR DefaultTransition: Transition;
|
|
BEGIN
|
|
DefaultTransition.Action := DefaultAction;
|
|
DefaultTransition.NextState := NextState;
|
|
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassInvalid)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassDigit)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassAlpha)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassSpace)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassColon)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassEquals)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassLeftParen)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassRightParen)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassAsterisk)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassUnderscore)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassSingle)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassHex)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassZero)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassX)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassEof)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassDot)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassMinus)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassSingleQuote)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassDoubleQuote)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassGreater)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassLess)] := DefaultTransition;
|
|
Transitions[ORD(CurrentState)][ORD(transitionClassOther)] := DefaultTransition;
|
|
END SetDefaultTransition;
|
|
|
|
(*
|
|
* The transition table describes transitions from one state to another, given
|
|
* a symbol (character class).
|
|
*
|
|
* The table has m rows and n columns, where m is the amount of states and n is
|
|
* the amount of classes. So given the current state and a classified character
|
|
* the table can be used to look up the next state.
|
|
*
|
|
* Each cell is a word long.
|
|
* - The least significant byte of the word is a row number (beginning with 0).
|
|
* It specifies the target state. "ff" means that this is an end state and no
|
|
* transition is possible.
|
|
* - The next byte is the action that should be performed when transitioning.
|
|
* For the meaning of actions see labels in the lex_next function, which
|
|
* handles each action.
|
|
*)
|
|
PROCEDURE InitializeTransitions();
|
|
BEGIN
|
|
(* Start state. *)
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassInvalid)].Action := NIL;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassInvalid)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassDigit)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassDigit)].NextState := transitionStateDecimal;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassAlpha)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassAlpha)].NextState := transitionStateIdentifier;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassSpace)].Action := TransitionActionSkip;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassSpace)].NextState := transitionStateStart;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassColon)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassColon)].NextState := transitionStateColon;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassEquals)].Action := TransitionActionSingle;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassEquals)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassLeftParen)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassLeftParen)].NextState := transitionStateLeftParen;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassRightParen)].Action := TransitionActionSingle;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassRightParen)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassAsterisk)].Action := TransitionActionSingle;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassAsterisk)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassUnderscore)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassUnderscore)].NextState := transitionStateIdentifier;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassSingle)].Action := TransitionActionSingle;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassSingle)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassHex)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassHex)].NextState := transitionStateIdentifier;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassZero)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassZero)].NextState := transitionStateLeadingZero;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassX)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassX)].NextState := transitionStateIdentifier;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassEof)].Action := TransitionActionEof;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassEof)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassDot)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassDot)].NextState := transitionStateDot;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassMinus)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassMinus)].NextState := transitionStateMinus;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassSingleQuote)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassSingleQuote)].NextState := transitionStateCharacter;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassDoubleQuote)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassDoubleQuote)].NextState := transitionStateString;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassGreater)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassGreater)].NextState := transitionStateGreater;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassLess)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassLess)].NextState := transitionStateLess;
|
|
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassOther)].Action := NIL;
|
|
Transitions[ORD(transitionStateStart)][ORD(transitionClassOther)].NextState := transitionStateEnd;
|
|
|
|
(* Colon state. *)
|
|
SetDefaultTransition(transitionStateColon, TransitionActionFinalize, transitionStateEnd);
|
|
|
|
Transitions[ORD(transitionStateColon)][ORD(transitionClassEquals)].Action := TransitionActionComposite;
|
|
Transitions[ORD(transitionStateColon)][ORD(transitionClassEquals)].NextState := transitionStateEnd;
|
|
|
|
(* Identifier state. *)
|
|
SetDefaultTransition(transitionStateIdentifier, TransitionActionKeyId, transitionStateEnd);
|
|
|
|
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassDigit)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassDigit)].NextState := transitionStateIdentifier;
|
|
|
|
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassAlpha)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassAlpha)].NextState := transitionStateIdentifier;
|
|
|
|
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassUnderscore)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassUnderscore)].NextState := transitionStateIdentifier;
|
|
|
|
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassHex)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassHex)].NextState := transitionStateIdentifier;
|
|
|
|
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassZero)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassZero)].NextState := transitionStateIdentifier;
|
|
|
|
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassX)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassX)].NextState := transitionStateIdentifier;
|
|
|
|
(* Decimal state. *)
|
|
SetDefaultTransition(transitionStateDecimal, TransitionActionInteger, transitionStateEnd);
|
|
|
|
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassDigit)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassDigit)].NextState := transitionStateDecimal;
|
|
|
|
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassAlpha)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassAlpha)].NextState := transitionStateDecimalSuffix;
|
|
|
|
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassUnderscore)].Action := NIL;
|
|
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassUnderscore)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassHex)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassHex)].NextState := transitionStateDecimalSuffix;
|
|
|
|
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassZero)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassZero)].NextState := transitionStateDecimal;
|
|
|
|
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassX)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassX)].NextState := transitionStateDecimalSuffix;
|
|
|
|
(* Greater state. *)
|
|
SetDefaultTransition(transitionStateGreater, TransitionActionFinalize, transitionStateEnd);
|
|
|
|
Transitions[ORD(transitionStateGreater)][ORD(transitionClassEquals)].Action := TransitionActionComposite;
|
|
Transitions[ORD(transitionStateGreater)][ORD(transitionClassEquals)].NextState := transitionStateEnd;
|
|
|
|
(* Minus state. *)
|
|
SetDefaultTransition(transitionStateMinus, TransitionActionFinalize, transitionStateEnd);
|
|
|
|
Transitions[ORD(transitionStateMinus)][ORD(transitionClassGreater)].Action := TransitionActionComposite;
|
|
Transitions[ORD(transitionStateMinus)][ORD(transitionClassGreater)].NextState := transitionStateEnd;
|
|
|
|
(* Left paren state. *)
|
|
SetDefaultTransition(transitionStateLeftParen, TransitionActionFinalize, transitionStateEnd);
|
|
|
|
Transitions[ORD(transitionStateLeftParen)][ORD(transitionClassAsterisk)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateLeftParen)][ORD(transitionClassAsterisk)].NextState := transitionStateComment;
|
|
|
|
(* Less state. *)
|
|
SetDefaultTransition(transitionStateLess, TransitionActionFinalize, transitionStateEnd);
|
|
|
|
Transitions[ORD(transitionStateLess)][ORD(transitionClassEquals)].Action := TransitionActionComposite;
|
|
Transitions[ORD(transitionStateLess)][ORD(transitionClassEquals)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateLess)][ORD(transitionClassGreater)].Action := TransitionActionComposite;
|
|
Transitions[ORD(transitionStateLess)][ORD(transitionClassGreater)].NextState := transitionStateEnd;
|
|
|
|
(* Hexadecimal after 0x. *)
|
|
SetDefaultTransition(transitionStateDot, TransitionActionFinalize, transitionStateEnd);
|
|
|
|
Transitions[ORD(transitionStateDot)][ORD(transitionClassDot)].Action := TransitionActionComposite;
|
|
Transitions[ORD(transitionStateDot)][ORD(transitionClassDot)].NextState := transitionStateEnd;
|
|
|
|
(* Comment. *)
|
|
SetDefaultTransition(transitionStateComment, TransitionActionAccumulate, transitionStateComment);
|
|
|
|
Transitions[ORD(transitionStateComment)][ORD(transitionClassAsterisk)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateComment)][ORD(transitionClassAsterisk)].NextState := transitionStateClosingComment;
|
|
|
|
Transitions[ORD(transitionStateComment)][ORD(transitionClassEof)].Action := NIL;
|
|
Transitions[ORD(transitionStateComment)][ORD(transitionClassEof)].NextState := transitionStateEnd;
|
|
|
|
(* Closing comment. *)
|
|
SetDefaultTransition(transitionStateClosingComment, TransitionActionAccumulate, transitionStateComment);
|
|
|
|
Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassInvalid)].Action := NIL;
|
|
Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassInvalid)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassRightParen)].Action := TransitionActionDelimited;
|
|
Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassRightParen)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassAsterisk)].Action := TransitionActionAccumulate;
|
|
Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassAsterisk)].NextState := transitionStateClosingComment;
|
|
|
|
Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassEof)].Action := NIL;
|
|
Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassEof)].NextState := transitionStateEnd;
|
|
|
|
(* Character. *)
|
|
SetDefaultTransition(transitionStateCharacter, TransitionActionAccumulate, transitionStateCharacter);
|
|
|
|
Transitions[ORD(transitionStateCharacter)][ORD(transitionClassInvalid)].Action := NIL;
|
|
Transitions[ORD(transitionStateCharacter)][ORD(transitionClassInvalid)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateCharacter)][ORD(transitionClassEof)].Action := NIL;
|
|
Transitions[ORD(transitionStateCharacter)][ORD(transitionClassEof)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateCharacter)][ORD(transitionClassSingleQuote)].Action := TransitionActionDelimited;
|
|
Transitions[ORD(transitionStateCharacter)][ORD(transitionClassSingleQuote)].NextState := transitionStateEnd;
|
|
|
|
(* String. *)
|
|
SetDefaultTransition(transitionStateString, TransitionActionAccumulate, transitionStateString);
|
|
|
|
Transitions[ORD(transitionStateString)][ORD(transitionClassInvalid)].Action := NIL;
|
|
Transitions[ORD(transitionStateString)][ORD(transitionClassInvalid)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateString)][ORD(transitionClassEof)].Action := NIL;
|
|
Transitions[ORD(transitionStateString)][ORD(transitionClassEof)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateString)][ORD(transitionClassDoubleQuote)].Action := TransitionActionDelimited;
|
|
Transitions[ORD(transitionStateString)][ORD(transitionClassDoubleQuote)].NextState := transitionStateEnd;
|
|
|
|
(* Leading zero. *)
|
|
SetDefaultTransition(transitionStateLeadingZero, TransitionActionInteger, transitionStateEnd);
|
|
|
|
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassDigit)].Action := NIL;
|
|
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassDigit)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassAlpha)].Action := NIL;
|
|
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassAlpha)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassUnderscore)].Action := NIL;
|
|
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassUnderscore)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassHex)].Action := NIL;
|
|
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassHex)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassZero)].Action := NIL;
|
|
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassZero)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassX)].Action := NIL;
|
|
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassX)].NextState := transitionStateEnd;
|
|
|
|
(* Digit with a character suffix. *)
|
|
SetDefaultTransition(transitionStateDecimalSuffix, TransitionActionInteger, transitionStateEnd);
|
|
|
|
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassAlpha)].Action := NIL;
|
|
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassAlpha)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassDigit)].Action := NIL;
|
|
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassDigit)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassHex)].Action := NIL;
|
|
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassHex)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassZero)].Action := NIL;
|
|
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassZero)].NextState := transitionStateEnd;
|
|
|
|
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassX)].Action := NIL;
|
|
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassX)].NextState := transitionStateEnd
|
|
END InitializeTransitions;
|
|
|
|
PROCEDURE LexerInitialize(ALexer: PLexer; Input: File);
|
|
BEGIN
|
|
ALexer^.Input := Input;
|
|
ALexer^.Length := 0;
|
|
|
|
ALLOCATE(ALexer^.Buffer, ChunkSize);
|
|
MemZero(ALexer^.Buffer, ChunkSize);
|
|
ALexer^.Size := ChunkSize
|
|
END LexerInitialize;
|
|
|
|
PROCEDURE LexerCurrent(ALexer: PLexer): LexerToken;
|
|
VAR
|
|
CurrentClass: TransitionClass;
|
|
CurrentState: TransitionState;
|
|
CurrentTransition: Transition;
|
|
Result: LexerToken;
|
|
BEGIN
|
|
ALexer^.Current := ALexer^.Start;
|
|
Result.Kind := lexerKindTrait;
|
|
CurrentState := transitionStateStart;
|
|
|
|
WHILE CurrentState <> transitionStateEnd DO
|
|
CurrentClass := Classification[ORD(ALexer^.Current^) + 1];
|
|
|
|
CurrentTransition := Transitions[ORD(CurrentState)][ORD(CurrentClass)];
|
|
IF CurrentTransition.Action <> NIL THEN
|
|
CurrentTransition.Action(ALexer, ADR(Result))
|
|
END;
|
|
CurrentState := CurrentTransition.NextState
|
|
END;
|
|
RETURN Result
|
|
END LexerCurrent;
|
|
|
|
PROCEDURE LexerLex(ALexer: PLexer): LexerToken;
|
|
BEGIN
|
|
IF ALexer^.Length = 0 THEN
|
|
ALexer^.Length := ReadNBytes(ALexer^.Input, ChunkSize, ALexer^.Buffer);
|
|
ALexer^.Current := ALexer^.Buffer
|
|
END;
|
|
ALexer^.Start := ALexer^.Current;
|
|
|
|
RETURN LexerCurrent(ALexer)
|
|
END LexerLex;
|
|
|
|
PROCEDURE LexerDestroy(ALexer: PLexer);
|
|
BEGIN
|
|
DEALLOCATE(ALexer^.Buffer, ALexer^.Size)
|
|
END LexerDestroy;
|
|
|
|
BEGIN
|
|
InitializeClassification();
|
|
InitializeTransitions()
|
|
END Lexer.
|