elna/source/Lexer.elna

815 lines
37 KiB
Plaintext

implementation module Lexer;
from FIO import ReadNBytes;
from SYSTEM import ADR;
from Storage import DEALLOCATE, ALLOCATE;
from Strings import Length;
from MemUtils import MemCopy, MemZero;
from StrCase import Lower;
const
CHUNK_SIZE = 65536;
type
(*
* Classification table assigns each possible character to a group (class). All
* characters of the same group a handled equivalently.
*
* Classification:
*)
TransitionClass = (
transitionClassInvalid,
transitionClassDigit,
transitionClassAlpha,
transitionClassSpace,
transitionClassColon,
transitionClassEquals,
transitionClassLeftParen,
transitionClassRightParen,
transitionClassAsterisk,
transitionClassUnderscore,
transitionClassSingle,
transitionClassHex,
transitionClassZero,
transitionClassX,
transitionClassEof,
transitionClassDot,
transitionClassMinus,
transitionClassSingleQuote,
transitionClassDoubleQuote,
transitionClassGreater,
transitionClassLess,
transitionClassOther
);
TransitionState = (
transitionStateStart,
transitionStateColon,
transitionStateIdentifier,
transitionStateDecimal,
transitionStateGreater,
transitionStateMinus,
transitionStateLeftParen,
transitionStateLess,
transitionStateDot,
transitionStateComment,
transitionStateClosingComment,
transitionStateCharacter,
transitionStateString,
transitionStateLeadingZero,
transitionStateDecimalSuffix,
transitionStateEnd
);
TransitionAction = proc(PLexer, PLexerToken);
Transition = record
Action: TransitionAction;
NextState: TransitionState
end;
TransitionClasses = [22]Transition;
var
classification: [128]TransitionClass;
transitions: [16]TransitionClasses;
proc initialize_classification();
var
i: CARDINAL;
begin
classification[1] := transitionClassEof; (* NUL *)
classification[2] := transitionClassInvalid; (* SOH *)
classification[3] := transitionClassInvalid; (* STX *)
classification[4] := transitionClassInvalid; (* ETX *)
classification[5] := transitionClassInvalid; (* EOT *)
classification[6] := transitionClassInvalid; (* EMQ *)
classification[7] := transitionClassInvalid; (* ACK *)
classification[8] := transitionClassInvalid; (* BEL *)
classification[9] := transitionClassInvalid; (* BS *)
classification[10] := transitionClassSpace; (* HT *)
classification[11] := transitionClassSpace; (* LF *)
classification[12] := transitionClassInvalid; (* VT *)
classification[13] := transitionClassInvalid; (* FF *)
classification[14] := transitionClassSpace; (* CR *)
classification[15] := transitionClassInvalid; (* SO *)
classification[16] := transitionClassInvalid; (* SI *)
classification[17] := transitionClassInvalid; (* DLE *)
classification[18] := transitionClassInvalid; (* DC1 *)
classification[19] := transitionClassInvalid; (* DC2 *)
classification[20] := transitionClassInvalid; (* DC3 *)
classification[21] := transitionClassInvalid; (* DC4 *)
classification[22] := transitionClassInvalid; (* NAK *)
classification[23] := transitionClassInvalid; (* SYN *)
classification[24] := transitionClassInvalid; (* ETB *)
classification[25] := transitionClassInvalid; (* CAN *)
classification[26] := transitionClassInvalid; (* EM *)
classification[27] := transitionClassInvalid; (* SUB *)
classification[28] := transitionClassInvalid; (* ESC *)
classification[29] := transitionClassInvalid; (* FS *)
classification[30] := transitionClassInvalid; (* GS *)
classification[31] := transitionClassInvalid; (* RS *)
classification[32] := transitionClassInvalid; (* US *)
classification[33] := transitionClassSpace; (* Space *)
classification[34] := transitionClassSingle; (* ! *)
classification[35] := transitionClassDoubleQuote; (* " *)
classification[36] := transitionClassOther; (* # *)
classification[37] := transitionClassOther; (* $ *)
classification[38] := transitionClassSingle; (* % *)
classification[39] := transitionClassSingle; (* & *)
classification[40] := transitionClassSingleQuote; (* ' *)
classification[41] := transitionClassLeftParen; (* ( *)
classification[42] := transitionClassRightParen; (* ) *)
classification[43] := transitionClassAsterisk; (* * *)
classification[44] := transitionClassSingle; (* + *)
classification[45] := transitionClassSingle; (* , *)
classification[46] := transitionClassMinus; (* - *)
classification[47] := transitionClassDot; (* . *)
classification[48] := transitionClassSingle; (* / *)
classification[49] := transitionClassZero; (* 0 *)
classification[50] := transitionClassDigit; (* 1 *)
classification[51] := transitionClassDigit; (* 2 *)
classification[52] := transitionClassDigit; (* 3 *)
classification[53] := transitionClassDigit; (* 4 *)
classification[54] := transitionClassDigit; (* 5 *)
classification[55] := transitionClassDigit; (* 6 *)
classification[56] := transitionClassDigit; (* 7 *)
classification[57] := transitionClassDigit; (* 8 *)
classification[58] := transitionClassDigit; (* 9 *)
classification[59] := transitionClassColon; (* : *)
classification[60] := transitionClassSingle; (* ; *)
classification[61] := transitionClassLess; (* < *)
classification[62] := transitionClassEquals; (* = *)
classification[63] := transitionClassGreater; (* > *)
classification[64] := transitionClassOther; (* ? *)
classification[65] := transitionClassSingle; (* @ *)
classification[66] := transitionClassAlpha; (* A *)
classification[67] := transitionClassAlpha; (* B *)
classification[68] := transitionClassAlpha; (* C *)
classification[69] := transitionClassAlpha; (* D *)
classification[70] := transitionClassAlpha; (* E *)
classification[71] := transitionClassAlpha; (* F *)
classification[72] := transitionClassAlpha; (* G *)
classification[73] := transitionClassAlpha; (* H *)
classification[74] := transitionClassAlpha; (* I *)
classification[75] := transitionClassAlpha; (* J *)
classification[76] := transitionClassAlpha; (* K *)
classification[77] := transitionClassAlpha; (* L *)
classification[78] := transitionClassAlpha; (* M *)
classification[79] := transitionClassAlpha; (* N *)
classification[80] := transitionClassAlpha; (* O *)
classification[81] := transitionClassAlpha; (* P *)
classification[82] := transitionClassAlpha; (* Q *)
classification[83] := transitionClassAlpha; (* R *)
classification[84] := transitionClassAlpha; (* S *)
classification[85] := transitionClassAlpha; (* T *)
classification[86] := transitionClassAlpha; (* U *)
classification[87] := transitionClassAlpha; (* V *)
classification[88] := transitionClassAlpha; (* W *)
classification[89] := transitionClassAlpha; (* X *)
classification[90] := transitionClassAlpha; (* Y *)
classification[91] := transitionClassAlpha; (* Z *)
classification[92] := transitionClassSingle; (* [ *)
classification[93] := transitionClassOther; (* \ *)
classification[94] := transitionClassSingle; (* ] *)
classification[95] := transitionClassSingle; (* ^ *)
classification[96] := transitionClassUnderscore; (* _ *)
classification[97] := transitionClassOther; (* ` *)
classification[98] := transitionClassHex; (* a *)
classification[99] := transitionClassHex; (* b *)
classification[100] := transitionClassHex; (* c *)
classification[101] := transitionClassHex; (* d *)
classification[102] := transitionClassHex; (* e *)
classification[103] := transitionClassHex; (* f *)
classification[104] := transitionClassAlpha; (* g *)
classification[105] := transitionClassAlpha; (* h *)
classification[106] := transitionClassAlpha; (* i *)
classification[107] := transitionClassAlpha; (* j *)
classification[108] := transitionClassAlpha; (* k *)
classification[109] := transitionClassAlpha; (* l *)
classification[110] := transitionClassAlpha; (* m *)
classification[111] := transitionClassAlpha; (* n *)
classification[112] := transitionClassAlpha; (* o *)
classification[113] := transitionClassAlpha; (* p *)
classification[114] := transitionClassAlpha; (* q *)
classification[115] := transitionClassAlpha; (* r *)
classification[116] := transitionClassAlpha; (* s *)
classification[117] := transitionClassAlpha; (* t *)
classification[118] := transitionClassAlpha; (* u *)
classification[119] := transitionClassAlpha; (* v *)
classification[120] := transitionClassAlpha; (* w *)
classification[121] := transitionClassX; (* x *)
classification[122] := transitionClassAlpha; (* y *)
classification[123] := transitionClassAlpha; (* z *)
classification[124] := transitionClassOther; (* { *)
classification[125] := transitionClassSingle; (* | *)
classification[126] := transitionClassOther; (* } *)
classification[127] := transitionClassSingle; (* ~ *)
classification[128] := transitionClassInvalid; (* DEL *)
i := 129;
while i <= 256 do
classification[i] := transitionClassOther;
i := i + 1
end
end;
proc compare_keyword(Keyword: ARRAY OF CHAR, TokenStart: PLexerBuffer, TokenEnd: PLexerBuffer) -> BOOLEAN;
var
result: BOOLEAN;
index: CARDINAL;
begin
index := 0;
result := true;
while (index < Length(Keyword)) & (TokenStart <> TokenEnd) & result DO
result := (Keyword[index] = TokenStart^) or (Lower(Keyword[index]) = TokenStart^);
INC(TokenStart);
INC(index)
end;
result := (index = Length(Keyword)) & (TokenStart = TokenEnd) & result;
return result
end;
(* Reached the end of file. *)
proc transition_action_eof(lexer: PLexer, token: PLexerToken);
begin
token^.kind := lexerKindEof
end;
(* Add the character to the token currently read and advance to the next character. *)
proc transition_action_accumulate(lexer: PLexer, token: PLexerToken);
begin
INC(lexer^.Current)
end;
(* The current character is not a part of the token. Finish the token already
* read. Don't advance to the next character. *)
proc transition_action_finalize(lexer: PLexer, token: PLexerToken);
begin
if lexer^.Start^ = ':' then
token^.kind := lexerKindColon
end;
if lexer^.Start^ = '>' then
token^.kind := lexerKindGreaterThan
end;
if lexer^.Start^ = '<' then
token^.kind := lexerKindLessThan
end;
if lexer^.Start^ = '(' then
token^.kind := lexerKindLeftParen
end;
if lexer^.Start^ = '-' then
token^.kind := lexerKindLeftParen
end;
if lexer^.Start^ = '.' then
token^.kind := lexerKindDot
end
end;
(* An action for tokens containing multiple characters. *)
proc transition_action_composite(lexer: PLexer, token: PLexerToken);
begin
if lexer^.Start^ = '<' then
if lexer^.Current^ = '>' then
token^.kind := lexerKindNotEqual
end;
if lexer^.Current^ = '=' then
token^.kind := lexerKindLessEqual
end
end;
if (lexer^.Start^ = '>') & (lexer^.Current^ = '=') then
token^.kind := lexerKindGreaterEqual
end;
if (lexer^.Start^ = '.') & (lexer^.Current^ = '.') then
token^.kind := lexerKindRange
end;
if (lexer^.Start^ = ':') & (lexer^.Current^ = '=') then
token^.kind := lexerKindAssignment
end;
if (lexer^.Start^ = '-') & (lexer^.Current^ = '>') then
token^.kind := lexerKindArrow
end;
INC(lexer^.Current)
end;
(* Skip a space. *)
proc transition_action_skip(lexer: PLexer, token: PLexerToken);
begin
INC(lexer^.Current);
INC(lexer^.Start)
end;
(* Delimited string action. *)
proc transition_action_delimited(lexer: PLexer, token: PLexerToken);
begin
if lexer^.Start^ = '(' then
token^.kind := lexerKindComment
end;
if lexer^.Start^ = '"' then
token^.kind := lexerKindCharacter
end;
if lexer^.Start^ = "'" then
token^.kind := lexerKindString
end;
INC(lexer^.Current)
end;
(* Finalize keyword or identifier. *)
proc transition_action_key_id(lexer: PLexer, token: PLexerToken);
begin
token^.kind := lexerKindIdentifier;
token^.identifierKind[1] := lexer^.Current - lexer^.Start;
MemCopy(lexer^.Start, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
if compare_keyword('PROGRAM', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindProgram
end;
if compare_keyword('IMPORT', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindImport
end;
if compare_keyword('CONST', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindConst
end;
if compare_keyword('VAR', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindVar
end;
if compare_keyword('IF', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindIf
end;
if compare_keyword('THEN', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindThen
end;
if compare_keyword('ELSIF', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindElsif
end;
if compare_keyword('ELSE', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindElse
end;
if compare_keyword('WHILE', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindWhile
end;
if compare_keyword('DO', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindDo
end;
if compare_keyword('proc', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindProc
end;
if compare_keyword('BEGIN', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindBegin
end;
if compare_keyword('END', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindEnd
end;
if compare_keyword('TYPE', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindType
end;
if compare_keyword('RECORD', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindRecord
end;
if compare_keyword('UNION', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindUnion
end;
if compare_keyword('NIL', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindNull
end;
if compare_keyword('AND', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindAnd
end;
if compare_keyword('OR', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindOr
end;
if compare_keyword('RETURN', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindReturn
end;
if compare_keyword('DEFINITION', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindDefinition
end;
if compare_keyword('TO', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindTo
end;
if compare_keyword('CASE', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindCase
end;
if compare_keyword('OF', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindOf
end;
if compare_keyword('FROM', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindFrom
end;
if compare_keyword('MODULE', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindModule
end;
if compare_keyword('IMPLEMENTATION', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindImplementation
end;
if compare_keyword('POINTER', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindPointer
end;
if compare_keyword('ARRAY', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindArray
end;
if compare_keyword('TRUE', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindBoolean;
token^.booleanKind := true
end;
if compare_keyword('FALSE', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindBoolean;
token^.booleanKind := false
end
end;
(* Action for tokens containing only one character. The character cannot be
* followed by other characters forming a composite token. *)
proc transition_action_single(lexer: PLexer, token: PLexerToken);
begin
if lexer^.Current^ = '&' then
token^.kind := lexerKindAnd
end;
if lexer^.Current^ = ';' then
token^.kind := lexerKindSemicolon
end;
if lexer^.Current^ = ',' then
token^.kind := lexerKindComma
end;
if lexer^.Current^ = ',' then
token^.kind := lexerKindComma
end;
if lexer^.Current^ = ')' then
token^.kind := lexerKindRightParen
end;
if lexer^.Current^ = '[' then
token^.kind := lexerKindLeftSquare
end;
if lexer^.Current^ = ']' then
token^.kind := lexerKindRightSquare
end;
if lexer^.Current^ = '^' then
token^.kind := lexerKindHat
end;
if lexer^.Current^ = '=' then
token^.kind := lexerKindEqual
end;
if lexer^.Current^ = '+' then
token^.kind := lexerKindPlus
end;
if lexer^.Current^ = '/' then
token^.kind := lexerKindDivision
end;
if lexer^.Current^ = '%' then
token^.kind := lexerKindRemainder
end;
if lexer^.Current^ = '@' then
token^.kind := lexerKindAt
end;
if lexer^.Current^ = '|' then
token^.kind := lexerKindPipe
end;
INC(lexer^.Current)
end;
(* Handle an integer literal. *)
proc transition_action_integer(lexer: PLexer, token: PLexerToken);
begin
token^.kind := lexerKindInteger
end;
proc set_default_transition(CurrentState: TransitionState, DefaultAction: TransitionAction, NextState: TransitionState);
var
DefaultTransition: Transition;
begin
DefaultTransition.Action := DefaultAction;
DefaultTransition.NextState := NextState;
transitions[ORD(CurrentState) + 1][ORD(transitionClassInvalid) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassDigit) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassAlpha) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassSpace) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassColon) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassEquals) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassLeftParen) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassRightParen) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassAsterisk) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassUnderscore) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassSingle) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassHex) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassZero) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassX) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassEof) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassDot) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassMinus) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassSingleQuote) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassDoubleQuote) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassGreater) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassLess) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassOther) + 1] := DefaultTransition
end;
(*
* The transition table describes transitions from one state to another, given
* a symbol (character class).
*
* The table has m rows and n columns, where m is the amount of states and n is
* the amount of classes. So given the current state and a classified character
* the table can be used to look up the next state.
*
* Each cell is a word long.
* - The least significant byte of the word is a row number (beginning with 0).
* It specifies the target state. "ff" means that this is an end state and no
* transition is possible.
* - The next byte is the action that should be performed when transitioning.
* For the meaning of actions see labels in the lex_next function, which
* handles each action.
*)
proc initialize_transitions();
begin
(* Start state. *)
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].Action := nil;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateDecimal;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].Action := transition_action_skip;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].NextState := transitionStateStart;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].NextState := transitionStateColon;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].NextState := transitionStateLeftParen;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].Action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].Action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateLeadingZero;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].Action := transition_action_eof;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].NextState := transitionStateDot;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].NextState := transitionStateMinus;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].NextState := transitionStateCharacter;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].NextState := transitionStateString;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateGreater;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].NextState := transitionStateLess;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].Action := nil;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].NextState := transitionStateEnd;
(* Colon state. *)
set_default_transition(transitionStateColon, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd;
(* Identifier state. *)
set_default_transition(transitionStateIdentifier, transition_action_key_id, transitionStateEnd);
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].NextState := transitionStateIdentifier;
(* Decimal state. *)
set_default_transition(transitionStateDecimal, transition_action_integer, transitionStateEnd);
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateDecimal;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateDecimalSuffix;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].Action := nil;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateDecimalSuffix;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateDecimal;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].NextState := transitionStateDecimalSuffix;
(* Greater state. *)
set_default_transition(transitionStateGreater, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd;
(* Minus state. *)
set_default_transition(transitionStateMinus, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateEnd;
(* Left paren state. *)
set_default_transition(transitionStateLeftParen, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateComment;
(* Less state. *)
set_default_transition(transitionStateLess, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateEnd;
(* Hexadecimal after 0x. *)
set_default_transition(transitionStateDot, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].NextState := transitionStateEnd;
(* Comment. *)
set_default_transition(transitionStateComment, transition_action_accumulate, transitionStateComment);
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateClosingComment;
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].Action := nil;
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
(* Closing comment. *)
set_default_transition(transitionStateClosingComment, transition_action_accumulate, transitionStateComment);
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].Action := nil;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].Action := transition_action_delimited;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateClosingComment;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].Action := nil;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
(* Character. *)
set_default_transition(transitionStateCharacter, transition_action_accumulate, transitionStateCharacter);
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].Action := nil;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].Action := nil;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].Action := transition_action_delimited;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].NextState := transitionStateEnd;
(* String. *)
set_default_transition(transitionStateString, transition_action_accumulate, transitionStateString);
transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].Action := nil;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].Action := nil;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].Action := transition_action_delimited;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].NextState := transitionStateEnd;
(* Leading zero. *)
set_default_transition(transitionStateLeadingZero, transition_action_integer, transitionStateEnd);
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].Action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].Action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].Action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].Action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].Action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].Action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].NextState := transitionStateEnd;
(* Digit with a character suffix. *)
set_default_transition(transitionStateDecimalSuffix, transition_action_integer, transitionStateEnd);
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].Action := nil;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].Action := nil;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].Action := nil;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].Action := nil;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].Action := nil;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].NextState := transitionStateEnd
end;
proc lexer_initialize(lexer: PLexer, Input: File);
begin
lexer^.Input := Input;
lexer^.Length := 0;
ALLOCATE(lexer^.Buffer, CHUNK_SIZE);
MemZero(lexer^.Buffer, CHUNK_SIZE);
lexer^.Size := CHUNK_SIZE
end;
proc lexer_current(lexer: PLexer) -> LexerToken;
var
CurrentClass: TransitionClass;
CurrentState: TransitionState;
CurrentTransition: Transition;
result: LexerToken;
begin
lexer^.Current := lexer^.Start;
CurrentState := transitionStateStart;
while CurrentState <> transitionStateEnd DO
CurrentClass := classification[ORD(lexer^.Current^) + 1];
CurrentTransition := transitions[ORD(CurrentState) + 1][ORD(CurrentClass) + 1];
if CurrentTransition.Action <> nil then
CurrentTransition.Action(lexer, ADR(result))
end;
CurrentState := CurrentTransition.NextState
end;
return result
end;
proc lexer_lex(lexer: PLexer) -> LexerToken;
var
result: LexerToken;
begin
if lexer^.Length = 0 then
lexer^.Length := ReadNBytes(lexer^.Input, CHUNK_SIZE, lexer^.Buffer);
lexer^.Current := lexer^.Buffer
end;
lexer^.Start := lexer^.Current;
result := lexer_current(lexer);
return result
end;
proc lexer_destroy(lexer: PLexer);
begin
DEALLOCATE(lexer^.Buffer, lexer^.Size)
end;
begin
initialize_classification();
initialize_transitions()
end Lexer.