elna/source/Lexer.elna

843 lines
38 KiB
Plaintext

module;
from FIO import ReadNBytes, StdErr;
from SYSTEM import ADR, TSIZE;
from DynamicStrings import String, InitStringCharStar, KillString;
from StringConvert import StringToInteger;
from Storage import DEALLOCATE, ALLOCATE;
from Strings import Length;
from MemUtils import MemCopy, MemZero;
from StrCase import Lower;
const
CHUNK_SIZE = 65536;
type
(*
* Classification table assigns each possible character to a group (class). All
* characters of the same group a handled equivalently.
*
* Classification:
*)
TransitionClass = (
transitionClassInvalid,
transitionClassDigit,
transitionClassAlpha,
transitionClassSpace,
transitionClassColon,
transitionClassEquals,
transitionClassLeftParen,
transitionClassRightParen,
transitionClassAsterisk,
transitionClassUnderscore,
transitionClassSingle,
transitionClassHex,
transitionClassZero,
transitionClassX,
transitionClassEof,
transitionClassDot,
transitionClassMinus,
transitionClassSingleQuote,
transitionClassDoubleQuote,
transitionClassGreater,
transitionClassLess,
transitionClassOther
);
TransitionState = (
transitionStateStart,
transitionStateColon,
transitionStateIdentifier,
transitionStateDecimal,
transitionStateGreater,
transitionStateMinus,
transitionStateLeftParen,
transitionStateLess,
transitionStateDot,
transitionStateComment,
transitionStateClosingComment,
transitionStateCharacter,
transitionStateString,
transitionStateLeadingZero,
transitionStateDecimalSuffix,
transitionStateEnd
);
TransitionAction = proc(PLexer, PLexerToken);
Transition = record
action: TransitionAction;
next_state: TransitionState
end;
TransitionClasses = [22]Transition;
var
classification: [128]TransitionClass;
transitions: [16]TransitionClasses;
proc initialize_classification();
var
i: CARDINAL;
begin
classification[1] := transitionClassEof; (* NUL *)
classification[2] := transitionClassInvalid; (* SOH *)
classification[3] := transitionClassInvalid; (* STX *)
classification[4] := transitionClassInvalid; (* ETX *)
classification[5] := transitionClassInvalid; (* EOT *)
classification[6] := transitionClassInvalid; (* EMQ *)
classification[7] := transitionClassInvalid; (* ACK *)
classification[8] := transitionClassInvalid; (* BEL *)
classification[9] := transitionClassInvalid; (* BS *)
classification[10] := transitionClassSpace; (* HT *)
classification[11] := transitionClassSpace; (* LF *)
classification[12] := transitionClassInvalid; (* VT *)
classification[13] := transitionClassInvalid; (* FF *)
classification[14] := transitionClassSpace; (* CR *)
classification[15] := transitionClassInvalid; (* SO *)
classification[16] := transitionClassInvalid; (* SI *)
classification[17] := transitionClassInvalid; (* DLE *)
classification[18] := transitionClassInvalid; (* DC1 *)
classification[19] := transitionClassInvalid; (* DC2 *)
classification[20] := transitionClassInvalid; (* DC3 *)
classification[21] := transitionClassInvalid; (* DC4 *)
classification[22] := transitionClassInvalid; (* NAK *)
classification[23] := transitionClassInvalid; (* SYN *)
classification[24] := transitionClassInvalid; (* ETB *)
classification[25] := transitionClassInvalid; (* CAN *)
classification[26] := transitionClassInvalid; (* EM *)
classification[27] := transitionClassInvalid; (* SUB *)
classification[28] := transitionClassInvalid; (* ESC *)
classification[29] := transitionClassInvalid; (* FS *)
classification[30] := transitionClassInvalid; (* GS *)
classification[31] := transitionClassInvalid; (* RS *)
classification[32] := transitionClassInvalid; (* US *)
classification[33] := transitionClassSpace; (* Space *)
classification[34] := transitionClassSingle; (* ! *)
classification[35] := transitionClassDoubleQuote; (* " *)
classification[36] := transitionClassOther; (* # *)
classification[37] := transitionClassOther; (* $ *)
classification[38] := transitionClassSingle; (* % *)
classification[39] := transitionClassSingle; (* & *)
classification[40] := transitionClassSingleQuote; (* ' *)
classification[41] := transitionClassLeftParen; (* ( *)
classification[42] := transitionClassRightParen; (* ) *)
classification[43] := transitionClassAsterisk; (* * *)
classification[44] := transitionClassSingle; (* + *)
classification[45] := transitionClassSingle; (* , *)
classification[46] := transitionClassMinus; (* - *)
classification[47] := transitionClassDot; (* . *)
classification[48] := transitionClassSingle; (* / *)
classification[49] := transitionClassZero; (* 0 *)
classification[50] := transitionClassDigit; (* 1 *)
classification[51] := transitionClassDigit; (* 2 *)
classification[52] := transitionClassDigit; (* 3 *)
classification[53] := transitionClassDigit; (* 4 *)
classification[54] := transitionClassDigit; (* 5 *)
classification[55] := transitionClassDigit; (* 6 *)
classification[56] := transitionClassDigit; (* 7 *)
classification[57] := transitionClassDigit; (* 8 *)
classification[58] := transitionClassDigit; (* 9 *)
classification[59] := transitionClassColon; (* : *)
classification[60] := transitionClassSingle; (* ; *)
classification[61] := transitionClassLess; (* < *)
classification[62] := transitionClassEquals; (* = *)
classification[63] := transitionClassGreater; (* > *)
classification[64] := transitionClassOther; (* ? *)
classification[65] := transitionClassSingle; (* @ *)
classification[66] := transitionClassAlpha; (* A *)
classification[67] := transitionClassAlpha; (* B *)
classification[68] := transitionClassAlpha; (* C *)
classification[69] := transitionClassAlpha; (* D *)
classification[70] := transitionClassAlpha; (* E *)
classification[71] := transitionClassAlpha; (* F *)
classification[72] := transitionClassAlpha; (* G *)
classification[73] := transitionClassAlpha; (* H *)
classification[74] := transitionClassAlpha; (* I *)
classification[75] := transitionClassAlpha; (* J *)
classification[76] := transitionClassAlpha; (* K *)
classification[77] := transitionClassAlpha; (* L *)
classification[78] := transitionClassAlpha; (* M *)
classification[79] := transitionClassAlpha; (* N *)
classification[80] := transitionClassAlpha; (* O *)
classification[81] := transitionClassAlpha; (* P *)
classification[82] := transitionClassAlpha; (* Q *)
classification[83] := transitionClassAlpha; (* R *)
classification[84] := transitionClassAlpha; (* S *)
classification[85] := transitionClassAlpha; (* T *)
classification[86] := transitionClassAlpha; (* U *)
classification[87] := transitionClassAlpha; (* V *)
classification[88] := transitionClassAlpha; (* W *)
classification[89] := transitionClassAlpha; (* X *)
classification[90] := transitionClassAlpha; (* Y *)
classification[91] := transitionClassAlpha; (* Z *)
classification[92] := transitionClassSingle; (* [ *)
classification[93] := transitionClassOther; (* \ *)
classification[94] := transitionClassSingle; (* ] *)
classification[95] := transitionClassSingle; (* ^ *)
classification[96] := transitionClassUnderscore; (* _ *)
classification[97] := transitionClassOther; (* ` *)
classification[98] := transitionClassHex; (* a *)
classification[99] := transitionClassHex; (* b *)
classification[100] := transitionClassHex; (* c *)
classification[101] := transitionClassHex; (* d *)
classification[102] := transitionClassHex; (* e *)
classification[103] := transitionClassHex; (* f *)
classification[104] := transitionClassAlpha; (* g *)
classification[105] := transitionClassAlpha; (* h *)
classification[106] := transitionClassAlpha; (* i *)
classification[107] := transitionClassAlpha; (* j *)
classification[108] := transitionClassAlpha; (* k *)
classification[109] := transitionClassAlpha; (* l *)
classification[110] := transitionClassAlpha; (* m *)
classification[111] := transitionClassAlpha; (* n *)
classification[112] := transitionClassAlpha; (* o *)
classification[113] := transitionClassAlpha; (* p *)
classification[114] := transitionClassAlpha; (* q *)
classification[115] := transitionClassAlpha; (* r *)
classification[116] := transitionClassAlpha; (* s *)
classification[117] := transitionClassAlpha; (* t *)
classification[118] := transitionClassAlpha; (* u *)
classification[119] := transitionClassAlpha; (* v *)
classification[120] := transitionClassAlpha; (* w *)
classification[121] := transitionClassX; (* x *)
classification[122] := transitionClassAlpha; (* y *)
classification[123] := transitionClassAlpha; (* z *)
classification[124] := transitionClassOther; (* { *)
classification[125] := transitionClassSingle; (* | *)
classification[126] := transitionClassOther; (* } *)
classification[127] := transitionClassSingle; (* ~ *)
classification[128] := transitionClassInvalid; (* DEL *)
i := 129;
while i <= 256 do
classification[i] := transitionClassOther;
i := i + 1
end
end;
proc compare_keyword(keyword: ARRAY OF CHAR, token_start: PLexerBuffer, token_end: PLexerBuffer) -> BOOLEAN;
var
result: BOOLEAN;
index: CARDINAL;
begin
index := 0;
result := true;
while (index < Length(keyword)) & (token_start <> token_end) & result DO
result := (keyword[index] = token_start^) or (Lower(keyword[index]) = token_start^);
INC(token_start);
INC(index)
end;
result := (index = Length(keyword)) & (token_start = token_end) & result;
return result
end;
(* Reached the end of file. *)
proc transition_action_eof(lexer: PLexer, token: PLexerToken);
begin
token^.kind := lexerKindEof
end;
(* Add the character to the token currently read and advance to the next character. *)
proc transition_action_accumulate(lexer: PLexer, token: PLexerToken);
begin
INC(lexer^.current)
end;
(* The current character is not a part of the token. Finish the token already
* read. Don't advance to the next character. *)
proc transition_action_finalize(lexer: PLexer, token: PLexerToken);
begin
if lexer^.start^ = ':' then
token^.kind := lexerKindColon
end;
if lexer^.start^ = '>' then
token^.kind := lexerKindGreaterThan
end;
if lexer^.start^ = '<' then
token^.kind := lexerKindLessThan
end;
if lexer^.start^ = '(' then
token^.kind := lexerKindLeftParen
end;
if lexer^.start^ = '-' then
token^.kind := lexerKindLeftParen
end;
if lexer^.start^ = '.' then
token^.kind := lexerKindDot
end
end;
(* An action for tokens containing multiple characters. *)
proc transition_action_composite(lexer: PLexer, token: PLexerToken);
begin
if lexer^.start^ = '<' then
if lexer^.current^ = '>' then
token^.kind := lexerKindNotEqual
end;
if lexer^.current^ = '=' then
token^.kind := lexerKindLessEqual
end
end;
if (lexer^.start^ = '>') & (lexer^.current^ = '=') then
token^.kind := lexerKindGreaterEqual
end;
if (lexer^.start^ = '.') & (lexer^.current^ = '.') then
token^.kind := lexerKindRange
end;
if (lexer^.start^ = ':') & (lexer^.current^ = '=') then
token^.kind := lexerKindAssignment
end;
if (lexer^.start^ = '-') & (lexer^.current^ = '>') then
token^.kind := lexerKindArrow
end;
INC(lexer^.current)
end;
(* Skip a space. *)
proc transition_action_skip(lexer: PLexer, token: PLexerToken);
begin
INC(lexer^.current);
INC(lexer^.start)
end;
(* Delimited string action. *)
proc transition_action_delimited(lexer: PLexer, token: PLexerToken);
var
text_length: CARDINAL;
begin
if lexer^.start^ = '(' then
token^.kind := lexerKindComment
end;
if lexer^.start^ = '"' then
text_length := lexer^.current - lexer^.start;
INC(text_length);
MemZero(ADR(token^.stringKind), TSIZE(ShortString));
MemCopy(lexer^.start, text_length, ADR(token^.stringKind));
token^.kind := lexerKindCharacter
end;
if lexer^.start^ = "'" then
text_length := lexer^.current - lexer^.start;
INC(text_length);
MemZero(ADR(token^.stringKind), TSIZE(ShortString));
MemCopy(lexer^.start, text_length, ADR(token^.stringKind));
token^.kind := lexerKindString
end;
INC(lexer^.current)
end;
(* Finalize keyword or identifier. *)
proc transition_action_key_id(lexer: PLexer, token: PLexerToken);
begin
token^.kind := lexerKindIdentifier;
token^.identifierKind[1] := lexer^.current - lexer^.start;
MemCopy(lexer^.start, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
if compare_keyword('PROGRAM', lexer^.start, lexer^.current) then
token^.kind := lexerKindProgram
end;
if compare_keyword('IMPORT', lexer^.start, lexer^.current) then
token^.kind := lexerKindImport
end;
if compare_keyword('CONST', lexer^.start, lexer^.current) then
token^.kind := lexerKindConst
end;
if compare_keyword('VAR', lexer^.start, lexer^.current) then
token^.kind := lexerKindVar
end;
if compare_keyword('IF', lexer^.start, lexer^.current) then
token^.kind := lexerKindIf
end;
if compare_keyword('THEN', lexer^.start, lexer^.current) then
token^.kind := lexerKindThen
end;
if compare_keyword('ELSIF', lexer^.start, lexer^.current) then
token^.kind := lexerKindElsif
end;
if compare_keyword('ELSE', lexer^.start, lexer^.current) then
token^.kind := lexerKindElse
end;
if compare_keyword('WHILE', lexer^.start, lexer^.current) then
token^.kind := lexerKindWhile
end;
if compare_keyword('DO', lexer^.start, lexer^.current) then
token^.kind := lexerKindDo
end;
if compare_keyword('proc', lexer^.start, lexer^.current) then
token^.kind := lexerKindProc
end;
if compare_keyword('BEGIN', lexer^.start, lexer^.current) then
token^.kind := lexerKindBegin
end;
if compare_keyword('END', lexer^.start, lexer^.current) then
token^.kind := lexerKindEnd
end;
if compare_keyword('TYPE', lexer^.start, lexer^.current) then
token^.kind := lexerKindType
end;
if compare_keyword('RECORD', lexer^.start, lexer^.current) then
token^.kind := lexerKindRecord
end;
if compare_keyword('UNION', lexer^.start, lexer^.current) then
token^.kind := lexerKindUnion
end;
if compare_keyword('NIL', lexer^.start, lexer^.current) then
token^.kind := lexerKindNull
end;
if compare_keyword('AND', lexer^.start, lexer^.current) then
token^.kind := lexerKindAnd
end;
if compare_keyword('OR', lexer^.start, lexer^.current) then
token^.kind := lexerKindOr
end;
if compare_keyword('RETURN', lexer^.start, lexer^.current) then
token^.kind := lexerKindReturn
end;
if compare_keyword('DEFINITION', lexer^.start, lexer^.current) then
token^.kind := lexerKindDefinition
end;
if compare_keyword('TO', lexer^.start, lexer^.current) then
token^.kind := lexerKindTo
end;
if compare_keyword('CASE', lexer^.start, lexer^.current) then
token^.kind := lexerKindCase
end;
if compare_keyword('OF', lexer^.start, lexer^.current) then
token^.kind := lexerKindOf
end;
if compare_keyword('FROM', lexer^.start, lexer^.current) then
token^.kind := lexerKindFrom
end;
if compare_keyword('MODULE', lexer^.start, lexer^.current) then
token^.kind := lexerKindModule
end;
if compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current) then
token^.kind := lexerKindImplementation
end;
if compare_keyword('POINTER', lexer^.start, lexer^.current) then
token^.kind := lexerKindPointer
end;
if compare_keyword('ARRAY', lexer^.start, lexer^.current) then
token^.kind := lexerKindArray
end;
if compare_keyword('TRUE', lexer^.start, lexer^.current) then
token^.kind := lexerKindBoolean;
token^.booleanKind := true
end;
if compare_keyword('FALSE', lexer^.start, lexer^.current) then
token^.kind := lexerKindBoolean;
token^.booleanKind := false
end
end;
(* Action for tokens containing only one character. The character cannot be
* followed by other characters forming a composite token. *)
proc transition_action_single(lexer: PLexer, token: PLexerToken);
begin
if lexer^.current^ = '&' then
token^.kind := lexerKindAnd
end;
if lexer^.current^ = ';' then
token^.kind := lexerKindSemicolon
end;
if lexer^.current^ = ',' then
token^.kind := lexerKindComma
end;
if lexer^.current^ = '~' then
token^.kind := lexerKindTilde
end;
if lexer^.current^ = ')' then
token^.kind := lexerKindRightParen
end;
if lexer^.current^ = '[' then
token^.kind := lexerKindLeftSquare
end;
if lexer^.current^ = ']' then
token^.kind := lexerKindRightSquare
end;
if lexer^.current^ = '^' then
token^.kind := lexerKindHat
end;
if lexer^.current^ = '=' then
token^.kind := lexerKindEqual
end;
if lexer^.current^ = '+' then
token^.kind := lexerKindPlus
end;
if lexer^.current^ = '/' then
token^.kind := lexerKindDivision
end;
if lexer^.current^ = '%' then
token^.kind := lexerKindRemainder
end;
if lexer^.current^ = '@' then
token^.kind := lexerKindAt
end;
if lexer^.current^ = '|' then
token^.kind := lexerKindPipe
end;
INC(lexer^.current)
end;
(* Handle an integer literal. *)
proc transition_action_integer(lexer: PLexer, token: PLexerToken);
var
buffer: String;
integer_length: CARDINAL;
found: BOOLEAN;
begin
token^.kind := lexerKindInteger;
integer_length := lexer^.current - lexer^.start;
MemZero(ADR(token^.identifierKind), TSIZE(Identifier));
MemCopy(lexer^.start, integer_length, ADR(token^.identifierKind[1]));
buffer := InitStringCharStar(ADR(token^.identifierKind[1]));
token^.integerKind := StringToInteger(buffer, 10, found);
buffer := KillString(buffer)
end;
proc set_default_transition(current_state: TransitionState, DefaultAction: TransitionAction, next_state: TransitionState);
var
default_transition: Transition;
begin
default_transition.action := DefaultAction;
default_transition.next_state := next_state;
transitions[ORD(current_state) + 1][ORD(transitionClassInvalid) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassDigit) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassAlpha) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassSpace) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassColon) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassEquals) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassLeftParen) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassRightParen) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassAsterisk) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassUnderscore) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassSingle) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassHex) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassZero) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassX) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassEof) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassDot) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassMinus) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassSingleQuote) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassDoubleQuote) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassGreater) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassLess) + 1] := default_transition;
transitions[ORD(current_state) + 1][ORD(transitionClassOther) + 1] := default_transition
end;
(*
* The transition table describes transitions from one state to another, given
* a symbol (character class).
*
* The table has m rows and n columns, where m is the amount of states and n is
* the amount of classes. So given the current state and a classified character
* the table can be used to look up the next state.
*
* Each cell is a word long.
* - The least significant byte of the word is a row number (beginning with 0).
* It specifies the target state. "ff" means that this is an end state and no
* transition is possible.
* - The next byte is the action that should be performed when transitioning.
* For the meaning of actions see labels in the lex_next function, which
* handles each action.
*)
proc initialize_transitions();
begin
(* Start state. *)
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].action := nil;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].next_state := transitionStateDecimal;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].action := transition_action_skip;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].next_state := transitionStateStart;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].next_state := transitionStateColon;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].next_state := transitionStateLeftParen;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].next_state := transitionStateLeadingZero;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].action := transition_action_eof;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].next_state := transitionStateDot;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].next_state := transitionStateMinus;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].next_state := transitionStateCharacter;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].next_state := transitionStateString;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].next_state := transitionStateGreater;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].next_state := transitionStateLess;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].action := nil;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].next_state := transitionStateEnd;
(* Colon state. *)
set_default_transition(transitionStateColon, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].action := transition_action_composite;
transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].next_state := transitionStateEnd;
(* Identifier state. *)
set_default_transition(transitionStateIdentifier, transition_action_key_id, transitionStateEnd);
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].next_state := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].next_state := transitionStateIdentifier;
(* Decimal state. *)
set_default_transition(transitionStateDecimal, transition_action_integer, transitionStateEnd);
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].next_state := transitionStateDecimal;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].next_state := transitionStateDecimalSuffix;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].action := nil;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].next_state := transitionStateDecimalSuffix;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].next_state := transitionStateDecimal;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].next_state := transitionStateDecimalSuffix;
(* Greater state. *)
set_default_transition(transitionStateGreater, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].action := transition_action_composite;
transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].next_state := transitionStateEnd;
(* Minus state. *)
set_default_transition(transitionStateMinus, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].action := transition_action_composite;
transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].next_state := transitionStateEnd;
(* Left paren state. *)
set_default_transition(transitionStateLeftParen, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].next_state := transitionStateComment;
(* Less state. *)
set_default_transition(transitionStateLess, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].action := transition_action_composite;
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].action := transition_action_composite;
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].next_state := transitionStateEnd;
(* Hexadecimal after 0x. *)
set_default_transition(transitionStateDot, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].action := transition_action_composite;
transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].next_state := transitionStateEnd;
(* Comment. *)
set_default_transition(transitionStateComment, transition_action_accumulate, transitionStateComment);
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].next_state := transitionStateClosingComment;
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].action := nil;
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].next_state := transitionStateEnd;
(* Closing comment. *)
set_default_transition(transitionStateClosingComment, transition_action_accumulate, transitionStateComment);
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].action := nil;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].action := transition_action_delimited;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].action := transition_action_accumulate;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].next_state := transitionStateClosingComment;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].action := nil;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].next_state := transitionStateEnd;
(* Character. *)
set_default_transition(transitionStateCharacter, transition_action_accumulate, transitionStateCharacter);
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].action := nil;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].action := nil;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].action := transition_action_delimited;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].next_state := transitionStateEnd;
(* String. *)
set_default_transition(transitionStateString, transition_action_accumulate, transitionStateString);
transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].action := nil;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].action := nil;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].action := transition_action_delimited;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].next_state := transitionStateEnd;
(* Leading zero. *)
set_default_transition(transitionStateLeadingZero, transition_action_integer, transitionStateEnd);
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].next_state := transitionStateEnd;
(* Digit with a character suffix. *)
set_default_transition(transitionStateDecimalSuffix, transition_action_integer, transitionStateEnd);
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].action := nil;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].action := nil;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].action := nil;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].action := nil;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].next_state := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].action := nil;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].next_state := transitionStateEnd
end;
proc lexer_initialize(lexer: PLexer, input: File);
begin
lexer^.input := input;
lexer^.length := 0;
ALLOCATE(lexer^.buffer, CHUNK_SIZE);
MemZero(lexer^.buffer, CHUNK_SIZE);
lexer^.size := CHUNK_SIZE
end;
proc lexer_current(lexer: PLexer) -> LexerToken;
var
current_class: TransitionClass;
current_state: TransitionState;
current_transition: Transition;
result: LexerToken;
begin
lexer^.current := lexer^.start;
current_state := transitionStateStart;
while current_state <> transitionStateEnd DO
current_class := classification[ORD(lexer^.current^) + 1];
current_transition := transitions[ORD(current_state) + 1][ORD(current_class) + 1];
if current_transition.action <> nil then
current_transition.action(lexer, ADR(result))
end;
current_state := current_transition.next_state
end;
return result
end;
proc lexer_lex(lexer: PLexer) -> LexerToken;
var
result: LexerToken;
begin
if lexer^.length = 0 then
lexer^.length := ReadNBytes(lexer^.input, CHUNK_SIZE, lexer^.buffer);
lexer^.current := lexer^.buffer
end;
lexer^.start := lexer^.current;
result := lexer_current(lexer);
return result
end;
proc lexer_destroy(lexer: PLexer);
begin
DEALLOCATE(lexer^.buffer, lexer^.size)
end;
begin
initialize_classification();
initialize_transitions()
end.