815 lines
37 KiB
Plaintext
815 lines
37 KiB
Plaintext
implementation module Lexer;
|
|
|
|
from FIO import ReadNBytes;
|
|
from SYSTEM import ADR;
|
|
|
|
from Storage import DEALLOCATE, ALLOCATE;
|
|
from Strings import Length;
|
|
from MemUtils import MemCopy, MemZero;
|
|
from StrCase import Lower;
|
|
|
|
const
|
|
CHUNK_SIZE = 65536;
|
|
|
|
type
|
|
(*
|
|
* Classification table assigns each possible character to a group (class). All
|
|
* characters of the same group a handled equivalently.
|
|
*
|
|
* Classification:
|
|
*)
|
|
TransitionClass = (
|
|
transitionClassInvalid,
|
|
transitionClassDigit,
|
|
transitionClassAlpha,
|
|
transitionClassSpace,
|
|
transitionClassColon,
|
|
transitionClassEquals,
|
|
transitionClassLeftParen,
|
|
transitionClassRightParen,
|
|
transitionClassAsterisk,
|
|
transitionClassUnderscore,
|
|
transitionClassSingle,
|
|
transitionClassHex,
|
|
transitionClassZero,
|
|
transitionClassX,
|
|
transitionClassEof,
|
|
transitionClassDot,
|
|
transitionClassMinus,
|
|
transitionClassSingleQuote,
|
|
transitionClassDoubleQuote,
|
|
transitionClassGreater,
|
|
transitionClassLess,
|
|
transitionClassOther
|
|
);
|
|
TransitionState = (
|
|
transitionStateStart,
|
|
transitionStateColon,
|
|
transitionStateIdentifier,
|
|
transitionStateDecimal,
|
|
transitionStateGreater,
|
|
transitionStateMinus,
|
|
transitionStateLeftParen,
|
|
transitionStateLess,
|
|
transitionStateDot,
|
|
transitionStateComment,
|
|
transitionStateClosingComment,
|
|
transitionStateCharacter,
|
|
transitionStateString,
|
|
transitionStateLeadingZero,
|
|
transitionStateDecimalSuffix,
|
|
transitionStateEnd
|
|
);
|
|
TransitionAction = proc(PLexer, PLexerToken);
|
|
Transition = record
|
|
Action: TransitionAction;
|
|
NextState: TransitionState
|
|
end;
|
|
TransitionClasses = [22]Transition;
|
|
|
|
var
|
|
classification: [128]TransitionClass;
|
|
transitions: [16]TransitionClasses;
|
|
|
|
proc initialize_classification();
|
|
var
|
|
i: CARDINAL;
|
|
begin
|
|
classification[1] := transitionClassEof; (* NUL *)
|
|
classification[2] := transitionClassInvalid; (* SOH *)
|
|
classification[3] := transitionClassInvalid; (* STX *)
|
|
classification[4] := transitionClassInvalid; (* ETX *)
|
|
classification[5] := transitionClassInvalid; (* EOT *)
|
|
classification[6] := transitionClassInvalid; (* EMQ *)
|
|
classification[7] := transitionClassInvalid; (* ACK *)
|
|
classification[8] := transitionClassInvalid; (* BEL *)
|
|
classification[9] := transitionClassInvalid; (* BS *)
|
|
classification[10] := transitionClassSpace; (* HT *)
|
|
classification[11] := transitionClassSpace; (* LF *)
|
|
classification[12] := transitionClassInvalid; (* VT *)
|
|
classification[13] := transitionClassInvalid; (* FF *)
|
|
classification[14] := transitionClassSpace; (* CR *)
|
|
classification[15] := transitionClassInvalid; (* SO *)
|
|
classification[16] := transitionClassInvalid; (* SI *)
|
|
classification[17] := transitionClassInvalid; (* DLE *)
|
|
classification[18] := transitionClassInvalid; (* DC1 *)
|
|
classification[19] := transitionClassInvalid; (* DC2 *)
|
|
classification[20] := transitionClassInvalid; (* DC3 *)
|
|
classification[21] := transitionClassInvalid; (* DC4 *)
|
|
classification[22] := transitionClassInvalid; (* NAK *)
|
|
classification[23] := transitionClassInvalid; (* SYN *)
|
|
classification[24] := transitionClassInvalid; (* ETB *)
|
|
classification[25] := transitionClassInvalid; (* CAN *)
|
|
classification[26] := transitionClassInvalid; (* EM *)
|
|
classification[27] := transitionClassInvalid; (* SUB *)
|
|
classification[28] := transitionClassInvalid; (* ESC *)
|
|
classification[29] := transitionClassInvalid; (* FS *)
|
|
classification[30] := transitionClassInvalid; (* GS *)
|
|
classification[31] := transitionClassInvalid; (* RS *)
|
|
classification[32] := transitionClassInvalid; (* US *)
|
|
classification[33] := transitionClassSpace; (* Space *)
|
|
classification[34] := transitionClassSingle; (* ! *)
|
|
classification[35] := transitionClassDoubleQuote; (* " *)
|
|
classification[36] := transitionClassOther; (* # *)
|
|
classification[37] := transitionClassOther; (* $ *)
|
|
classification[38] := transitionClassSingle; (* % *)
|
|
classification[39] := transitionClassSingle; (* & *)
|
|
classification[40] := transitionClassSingleQuote; (* ' *)
|
|
classification[41] := transitionClassLeftParen; (* ( *)
|
|
classification[42] := transitionClassRightParen; (* ) *)
|
|
classification[43] := transitionClassAsterisk; (* * *)
|
|
classification[44] := transitionClassSingle; (* + *)
|
|
classification[45] := transitionClassSingle; (* , *)
|
|
classification[46] := transitionClassMinus; (* - *)
|
|
classification[47] := transitionClassDot; (* . *)
|
|
classification[48] := transitionClassSingle; (* / *)
|
|
classification[49] := transitionClassZero; (* 0 *)
|
|
classification[50] := transitionClassDigit; (* 1 *)
|
|
classification[51] := transitionClassDigit; (* 2 *)
|
|
classification[52] := transitionClassDigit; (* 3 *)
|
|
classification[53] := transitionClassDigit; (* 4 *)
|
|
classification[54] := transitionClassDigit; (* 5 *)
|
|
classification[55] := transitionClassDigit; (* 6 *)
|
|
classification[56] := transitionClassDigit; (* 7 *)
|
|
classification[57] := transitionClassDigit; (* 8 *)
|
|
classification[58] := transitionClassDigit; (* 9 *)
|
|
classification[59] := transitionClassColon; (* : *)
|
|
classification[60] := transitionClassSingle; (* ; *)
|
|
classification[61] := transitionClassLess; (* < *)
|
|
classification[62] := transitionClassEquals; (* = *)
|
|
classification[63] := transitionClassGreater; (* > *)
|
|
classification[64] := transitionClassOther; (* ? *)
|
|
classification[65] := transitionClassSingle; (* @ *)
|
|
classification[66] := transitionClassAlpha; (* A *)
|
|
classification[67] := transitionClassAlpha; (* B *)
|
|
classification[68] := transitionClassAlpha; (* C *)
|
|
classification[69] := transitionClassAlpha; (* D *)
|
|
classification[70] := transitionClassAlpha; (* E *)
|
|
classification[71] := transitionClassAlpha; (* F *)
|
|
classification[72] := transitionClassAlpha; (* G *)
|
|
classification[73] := transitionClassAlpha; (* H *)
|
|
classification[74] := transitionClassAlpha; (* I *)
|
|
classification[75] := transitionClassAlpha; (* J *)
|
|
classification[76] := transitionClassAlpha; (* K *)
|
|
classification[77] := transitionClassAlpha; (* L *)
|
|
classification[78] := transitionClassAlpha; (* M *)
|
|
classification[79] := transitionClassAlpha; (* N *)
|
|
classification[80] := transitionClassAlpha; (* O *)
|
|
classification[81] := transitionClassAlpha; (* P *)
|
|
classification[82] := transitionClassAlpha; (* Q *)
|
|
classification[83] := transitionClassAlpha; (* R *)
|
|
classification[84] := transitionClassAlpha; (* S *)
|
|
classification[85] := transitionClassAlpha; (* T *)
|
|
classification[86] := transitionClassAlpha; (* U *)
|
|
classification[87] := transitionClassAlpha; (* V *)
|
|
classification[88] := transitionClassAlpha; (* W *)
|
|
classification[89] := transitionClassAlpha; (* X *)
|
|
classification[90] := transitionClassAlpha; (* Y *)
|
|
classification[91] := transitionClassAlpha; (* Z *)
|
|
classification[92] := transitionClassSingle; (* [ *)
|
|
classification[93] := transitionClassOther; (* \ *)
|
|
classification[94] := transitionClassSingle; (* ] *)
|
|
classification[95] := transitionClassSingle; (* ^ *)
|
|
classification[96] := transitionClassUnderscore; (* _ *)
|
|
classification[97] := transitionClassOther; (* ` *)
|
|
classification[98] := transitionClassHex; (* a *)
|
|
classification[99] := transitionClassHex; (* b *)
|
|
classification[100] := transitionClassHex; (* c *)
|
|
classification[101] := transitionClassHex; (* d *)
|
|
classification[102] := transitionClassHex; (* e *)
|
|
classification[103] := transitionClassHex; (* f *)
|
|
classification[104] := transitionClassAlpha; (* g *)
|
|
classification[105] := transitionClassAlpha; (* h *)
|
|
classification[106] := transitionClassAlpha; (* i *)
|
|
classification[107] := transitionClassAlpha; (* j *)
|
|
classification[108] := transitionClassAlpha; (* k *)
|
|
classification[109] := transitionClassAlpha; (* l *)
|
|
classification[110] := transitionClassAlpha; (* m *)
|
|
classification[111] := transitionClassAlpha; (* n *)
|
|
classification[112] := transitionClassAlpha; (* o *)
|
|
classification[113] := transitionClassAlpha; (* p *)
|
|
classification[114] := transitionClassAlpha; (* q *)
|
|
classification[115] := transitionClassAlpha; (* r *)
|
|
classification[116] := transitionClassAlpha; (* s *)
|
|
classification[117] := transitionClassAlpha; (* t *)
|
|
classification[118] := transitionClassAlpha; (* u *)
|
|
classification[119] := transitionClassAlpha; (* v *)
|
|
classification[120] := transitionClassAlpha; (* w *)
|
|
classification[121] := transitionClassX; (* x *)
|
|
classification[122] := transitionClassAlpha; (* y *)
|
|
classification[123] := transitionClassAlpha; (* z *)
|
|
classification[124] := transitionClassOther; (* { *)
|
|
classification[125] := transitionClassSingle; (* | *)
|
|
classification[126] := transitionClassOther; (* } *)
|
|
classification[127] := transitionClassSingle; (* ~ *)
|
|
classification[128] := transitionClassInvalid; (* DEL *)
|
|
|
|
i := 129;
|
|
while i <= 256 do
|
|
classification[i] := transitionClassOther;
|
|
i := i + 1
|
|
end
|
|
end;
|
|
|
|
proc compare_keyword(Keyword: ARRAY OF CHAR, TokenStart: PLexerBuffer, TokenEnd: PLexerBuffer) -> BOOLEAN;
|
|
var
|
|
result: BOOLEAN;
|
|
index: CARDINAL;
|
|
begin
|
|
index := 0;
|
|
result := true;
|
|
|
|
while (index < Length(Keyword)) & (TokenStart <> TokenEnd) & result DO
|
|
result := (Keyword[index] = TokenStart^) or (Lower(Keyword[index]) = TokenStart^);
|
|
INC(TokenStart);
|
|
INC(index)
|
|
end;
|
|
result := (index = Length(Keyword)) & (TokenStart = TokenEnd) & result;
|
|
return result
|
|
end;
|
|
|
|
(* Reached the end of file. *)
|
|
proc transition_action_eof(lexer: PLexer, token: PLexerToken);
|
|
begin
|
|
token^.kind := lexerKindEof
|
|
end;
|
|
|
|
(* Add the character to the token currently read and advance to the next character. *)
|
|
proc transition_action_accumulate(lexer: PLexer, token: PLexerToken);
|
|
begin
|
|
INC(lexer^.Current)
|
|
end;
|
|
|
|
(* The current character is not a part of the token. Finish the token already
|
|
* read. Don't advance to the next character. *)
|
|
proc transition_action_finalize(lexer: PLexer, token: PLexerToken);
|
|
begin
|
|
if lexer^.Start^ = ':' then
|
|
token^.kind := lexerKindColon
|
|
end;
|
|
if lexer^.Start^ = '>' then
|
|
token^.kind := lexerKindGreaterThan
|
|
end;
|
|
if lexer^.Start^ = '<' then
|
|
token^.kind := lexerKindLessThan
|
|
end;
|
|
if lexer^.Start^ = '(' then
|
|
token^.kind := lexerKindLeftParen
|
|
end;
|
|
if lexer^.Start^ = '-' then
|
|
token^.kind := lexerKindLeftParen
|
|
end;
|
|
if lexer^.Start^ = '.' then
|
|
token^.kind := lexerKindDot
|
|
end
|
|
end;
|
|
|
|
(* An action for tokens containing multiple characters. *)
|
|
proc transition_action_composite(lexer: PLexer, token: PLexerToken);
|
|
begin
|
|
if lexer^.Start^ = '<' then
|
|
if lexer^.Current^ = '>' then
|
|
token^.kind := lexerKindNotEqual
|
|
end;
|
|
if lexer^.Current^ = '=' then
|
|
token^.kind := lexerKindLessEqual
|
|
end
|
|
end;
|
|
if (lexer^.Start^ = '>') & (lexer^.Current^ = '=') then
|
|
token^.kind := lexerKindGreaterEqual
|
|
end;
|
|
if (lexer^.Start^ = '.') & (lexer^.Current^ = '.') then
|
|
token^.kind := lexerKindRange
|
|
end;
|
|
if (lexer^.Start^ = ':') & (lexer^.Current^ = '=') then
|
|
token^.kind := lexerKindAssignment
|
|
end;
|
|
if (lexer^.Start^ = '-') & (lexer^.Current^ = '>') then
|
|
token^.kind := lexerKindArrow
|
|
end;
|
|
INC(lexer^.Current)
|
|
end;
|
|
|
|
(* Skip a space. *)
|
|
proc transition_action_skip(lexer: PLexer, token: PLexerToken);
|
|
begin
|
|
INC(lexer^.Current);
|
|
INC(lexer^.Start)
|
|
end;
|
|
|
|
(* Delimited string action. *)
|
|
proc transition_action_delimited(lexer: PLexer, token: PLexerToken);
|
|
begin
|
|
if lexer^.Start^ = '(' then
|
|
token^.kind := lexerKindComment
|
|
end;
|
|
if lexer^.Start^ = '"' then
|
|
token^.kind := lexerKindCharacter
|
|
end;
|
|
if lexer^.Start^ = "'" then
|
|
token^.kind := lexerKindString
|
|
end;
|
|
INC(lexer^.Current)
|
|
end;
|
|
|
|
(* Finalize keyword or identifier. *)
|
|
proc transition_action_key_id(lexer: PLexer, token: PLexerToken);
|
|
begin
|
|
token^.kind := lexerKindIdentifier;
|
|
|
|
token^.identifierKind[1] := lexer^.Current - lexer^.Start;
|
|
MemCopy(lexer^.Start, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
|
|
|
|
if compare_keyword('PROGRAM', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindProgram
|
|
end;
|
|
if compare_keyword('IMPORT', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindImport
|
|
end;
|
|
if compare_keyword('CONST', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindConst
|
|
end;
|
|
if compare_keyword('VAR', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindVar
|
|
end;
|
|
if compare_keyword('IF', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindIf
|
|
end;
|
|
if compare_keyword('THEN', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindThen
|
|
end;
|
|
if compare_keyword('ELSIF', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindElsif
|
|
end;
|
|
if compare_keyword('ELSE', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindElse
|
|
end;
|
|
if compare_keyword('WHILE', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindWhile
|
|
end;
|
|
if compare_keyword('DO', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindDo
|
|
end;
|
|
if compare_keyword('proc', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindProc
|
|
end;
|
|
if compare_keyword('BEGIN', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindBegin
|
|
end;
|
|
if compare_keyword('END', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindEnd
|
|
end;
|
|
if compare_keyword('TYPE', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindType
|
|
end;
|
|
if compare_keyword('RECORD', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindRecord
|
|
end;
|
|
if compare_keyword('UNION', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindUnion
|
|
end;
|
|
if compare_keyword('NIL', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindNull
|
|
end;
|
|
if compare_keyword('AND', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindAnd
|
|
end;
|
|
if compare_keyword('OR', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindOr
|
|
end;
|
|
if compare_keyword('RETURN', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindReturn
|
|
end;
|
|
if compare_keyword('DEFINITION', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindDefinition
|
|
end;
|
|
if compare_keyword('TO', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindTo
|
|
end;
|
|
if compare_keyword('CASE', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindCase
|
|
end;
|
|
if compare_keyword('OF', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindOf
|
|
end;
|
|
if compare_keyword('FROM', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindFrom
|
|
end;
|
|
if compare_keyword('MODULE', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindModule
|
|
end;
|
|
if compare_keyword('IMPLEMENTATION', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindImplementation
|
|
end;
|
|
if compare_keyword('POINTER', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindPointer
|
|
end;
|
|
if compare_keyword('ARRAY', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindArray
|
|
end;
|
|
if compare_keyword('TRUE', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindBoolean;
|
|
token^.booleanKind := true
|
|
end;
|
|
if compare_keyword('FALSE', lexer^.Start, lexer^.Current) then
|
|
token^.kind := lexerKindBoolean;
|
|
token^.booleanKind := false
|
|
end
|
|
end;
|
|
|
|
(* Action for tokens containing only one character. The character cannot be
|
|
* followed by other characters forming a composite token. *)
|
|
proc transition_action_single(lexer: PLexer, token: PLexerToken);
|
|
begin
|
|
if lexer^.Current^ = '&' then
|
|
token^.kind := lexerKindAnd
|
|
end;
|
|
if lexer^.Current^ = ';' then
|
|
token^.kind := lexerKindSemicolon
|
|
end;
|
|
if lexer^.Current^ = ',' then
|
|
token^.kind := lexerKindComma
|
|
end;
|
|
if lexer^.Current^ = ',' then
|
|
token^.kind := lexerKindComma
|
|
end;
|
|
if lexer^.Current^ = ')' then
|
|
token^.kind := lexerKindRightParen
|
|
end;
|
|
if lexer^.Current^ = '[' then
|
|
token^.kind := lexerKindLeftSquare
|
|
end;
|
|
if lexer^.Current^ = ']' then
|
|
token^.kind := lexerKindRightSquare
|
|
end;
|
|
if lexer^.Current^ = '^' then
|
|
token^.kind := lexerKindHat
|
|
end;
|
|
if lexer^.Current^ = '=' then
|
|
token^.kind := lexerKindEqual
|
|
end;
|
|
if lexer^.Current^ = '+' then
|
|
token^.kind := lexerKindPlus
|
|
end;
|
|
if lexer^.Current^ = '/' then
|
|
token^.kind := lexerKindDivision
|
|
end;
|
|
if lexer^.Current^ = '%' then
|
|
token^.kind := lexerKindRemainder
|
|
end;
|
|
if lexer^.Current^ = '@' then
|
|
token^.kind := lexerKindAt
|
|
end;
|
|
if lexer^.Current^ = '|' then
|
|
token^.kind := lexerKindPipe
|
|
end;
|
|
INC(lexer^.Current)
|
|
end;
|
|
|
|
(* Handle an integer literal. *)
|
|
proc transition_action_integer(lexer: PLexer, token: PLexerToken);
|
|
begin
|
|
token^.kind := lexerKindInteger
|
|
end;
|
|
|
|
proc set_default_transition(CurrentState: TransitionState, DefaultAction: TransitionAction, NextState: TransitionState);
|
|
var
|
|
DefaultTransition: Transition;
|
|
begin
|
|
DefaultTransition.Action := DefaultAction;
|
|
DefaultTransition.NextState := NextState;
|
|
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassInvalid) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassDigit) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassAlpha) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassSpace) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassColon) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassEquals) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassLeftParen) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassRightParen) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassAsterisk) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassUnderscore) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassSingle) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassHex) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassZero) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassX) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassEof) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassDot) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassMinus) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassSingleQuote) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassDoubleQuote) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassGreater) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassLess) + 1] := DefaultTransition;
|
|
transitions[ORD(CurrentState) + 1][ORD(transitionClassOther) + 1] := DefaultTransition
|
|
end;
|
|
|
|
(*
|
|
* The transition table describes transitions from one state to another, given
|
|
* a symbol (character class).
|
|
*
|
|
* The table has m rows and n columns, where m is the amount of states and n is
|
|
* the amount of classes. So given the current state and a classified character
|
|
* the table can be used to look up the next state.
|
|
*
|
|
* Each cell is a word long.
|
|
* - The least significant byte of the word is a row number (beginning with 0).
|
|
* It specifies the target state. "ff" means that this is an end state and no
|
|
* transition is possible.
|
|
* - The next byte is the action that should be performed when transitioning.
|
|
* For the meaning of actions see labels in the lex_next function, which
|
|
* handles each action.
|
|
*)
|
|
proc initialize_transitions();
|
|
begin
|
|
(* Start state. *)
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].Action := nil;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateDecimal;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateIdentifier;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].Action := transition_action_skip;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].NextState := transitionStateStart;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].NextState := transitionStateColon;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_single;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].NextState := transitionStateLeftParen;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].Action := transition_action_single;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_single;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateIdentifier;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].Action := transition_action_single;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateIdentifier;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateLeadingZero;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].NextState := transitionStateIdentifier;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].Action := transition_action_eof;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].NextState := transitionStateDot;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].NextState := transitionStateMinus;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].NextState := transitionStateCharacter;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].NextState := transitionStateString;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateGreater;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].NextState := transitionStateLess;
|
|
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].Action := nil;
|
|
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].NextState := transitionStateEnd;
|
|
|
|
(* Colon state. *)
|
|
set_default_transition(transitionStateColon, transition_action_finalize, transitionStateEnd);
|
|
|
|
transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite;
|
|
transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd;
|
|
|
|
(* Identifier state. *)
|
|
set_default_transition(transitionStateIdentifier, transition_action_key_id, transitionStateEnd);
|
|
|
|
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateIdentifier;
|
|
|
|
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateIdentifier;
|
|
|
|
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateIdentifier;
|
|
|
|
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateIdentifier;
|
|
|
|
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateIdentifier;
|
|
|
|
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].NextState := transitionStateIdentifier;
|
|
|
|
(* Decimal state. *)
|
|
set_default_transition(transitionStateDecimal, transition_action_integer, transitionStateEnd);
|
|
|
|
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateDecimal;
|
|
|
|
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateDecimalSuffix;
|
|
|
|
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].Action := nil;
|
|
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateDecimalSuffix;
|
|
|
|
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateDecimal;
|
|
|
|
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].NextState := transitionStateDecimalSuffix;
|
|
|
|
(* Greater state. *)
|
|
set_default_transition(transitionStateGreater, transition_action_finalize, transitionStateEnd);
|
|
|
|
transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite;
|
|
transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd;
|
|
|
|
(* Minus state. *)
|
|
set_default_transition(transitionStateMinus, transition_action_finalize, transitionStateEnd);
|
|
|
|
transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_composite;
|
|
transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateEnd;
|
|
|
|
(* Left paren state. *)
|
|
set_default_transition(transitionStateLeftParen, transition_action_finalize, transitionStateEnd);
|
|
|
|
transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateComment;
|
|
|
|
(* Less state. *)
|
|
set_default_transition(transitionStateLess, transition_action_finalize, transitionStateEnd);
|
|
|
|
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite;
|
|
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_composite;
|
|
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateEnd;
|
|
|
|
(* Hexadecimal after 0x. *)
|
|
set_default_transition(transitionStateDot, transition_action_finalize, transitionStateEnd);
|
|
|
|
transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].Action := transition_action_composite;
|
|
transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].NextState := transitionStateEnd;
|
|
|
|
(* Comment. *)
|
|
set_default_transition(transitionStateComment, transition_action_accumulate, transitionStateComment);
|
|
|
|
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateClosingComment;
|
|
|
|
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].Action := nil;
|
|
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
|
|
|
|
(* Closing comment. *)
|
|
set_default_transition(transitionStateClosingComment, transition_action_accumulate, transitionStateComment);
|
|
|
|
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].Action := nil;
|
|
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].Action := transition_action_delimited;
|
|
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate;
|
|
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateClosingComment;
|
|
|
|
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].Action := nil;
|
|
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
|
|
|
|
(* Character. *)
|
|
set_default_transition(transitionStateCharacter, transition_action_accumulate, transitionStateCharacter);
|
|
|
|
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].Action := nil;
|
|
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].Action := nil;
|
|
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].Action := transition_action_delimited;
|
|
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].NextState := transitionStateEnd;
|
|
|
|
(* String. *)
|
|
set_default_transition(transitionStateString, transition_action_accumulate, transitionStateString);
|
|
|
|
transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].Action := nil;
|
|
transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].Action := nil;
|
|
transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].Action := transition_action_delimited;
|
|
transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].NextState := transitionStateEnd;
|
|
|
|
(* Leading zero. *)
|
|
set_default_transition(transitionStateLeadingZero, transition_action_integer, transitionStateEnd);
|
|
|
|
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].Action := nil;
|
|
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].Action := nil;
|
|
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].Action := nil;
|
|
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].Action := nil;
|
|
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].Action := nil;
|
|
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].Action := nil;
|
|
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].NextState := transitionStateEnd;
|
|
|
|
(* Digit with a character suffix. *)
|
|
set_default_transition(transitionStateDecimalSuffix, transition_action_integer, transitionStateEnd);
|
|
|
|
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].Action := nil;
|
|
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].Action := nil;
|
|
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].Action := nil;
|
|
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].Action := nil;
|
|
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateEnd;
|
|
|
|
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].Action := nil;
|
|
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].NextState := transitionStateEnd
|
|
end;
|
|
|
|
proc lexer_initialize(lexer: PLexer, Input: File);
|
|
begin
|
|
lexer^.Input := Input;
|
|
lexer^.Length := 0;
|
|
|
|
ALLOCATE(lexer^.Buffer, CHUNK_SIZE);
|
|
MemZero(lexer^.Buffer, CHUNK_SIZE);
|
|
lexer^.Size := CHUNK_SIZE
|
|
end;
|
|
|
|
proc lexer_current(lexer: PLexer) -> LexerToken;
|
|
var
|
|
CurrentClass: TransitionClass;
|
|
CurrentState: TransitionState;
|
|
CurrentTransition: Transition;
|
|
result: LexerToken;
|
|
begin
|
|
lexer^.Current := lexer^.Start;
|
|
CurrentState := transitionStateStart;
|
|
|
|
while CurrentState <> transitionStateEnd DO
|
|
CurrentClass := classification[ORD(lexer^.Current^) + 1];
|
|
|
|
CurrentTransition := transitions[ORD(CurrentState) + 1][ORD(CurrentClass) + 1];
|
|
if CurrentTransition.Action <> nil then
|
|
CurrentTransition.Action(lexer, ADR(result))
|
|
end;
|
|
CurrentState := CurrentTransition.NextState
|
|
end;
|
|
return result
|
|
end;
|
|
|
|
proc lexer_lex(lexer: PLexer) -> LexerToken;
|
|
var
|
|
result: LexerToken;
|
|
begin
|
|
if lexer^.Length = 0 then
|
|
lexer^.Length := ReadNBytes(lexer^.Input, CHUNK_SIZE, lexer^.Buffer);
|
|
lexer^.Current := lexer^.Buffer
|
|
end;
|
|
lexer^.Start := lexer^.Current;
|
|
|
|
result := lexer_current(lexer);
|
|
return result
|
|
end;
|
|
|
|
proc lexer_destroy(lexer: PLexer);
|
|
begin
|
|
DEALLOCATE(lexer^.Buffer, lexer^.Size)
|
|
end;
|
|
|
|
begin
|
|
initialize_classification();
|
|
initialize_transitions()
|
|
end Lexer.
|