implementation module Lexer; from FIO import ReadNBytes; from SYSTEM import ADR; from Storage import DEALLOCATE, ALLOCATE; from Strings import Length; from MemUtils import MemCopy, MemZero; from StrCase import Lower; const CHUNK_SIZE = 65536; type (* * Classification table assigns each possible character to a group (class). All * characters of the same group a handled equivalently. * * Classification: *) TransitionClass = ( transitionClassInvalid, transitionClassDigit, transitionClassAlpha, transitionClassSpace, transitionClassColon, transitionClassEquals, transitionClassLeftParen, transitionClassRightParen, transitionClassAsterisk, transitionClassUnderscore, transitionClassSingle, transitionClassHex, transitionClassZero, transitionClassX, transitionClassEof, transitionClassDot, transitionClassMinus, transitionClassSingleQuote, transitionClassDoubleQuote, transitionClassGreater, transitionClassLess, transitionClassOther ); TransitionState = ( transitionStateStart, transitionStateColon, transitionStateIdentifier, transitionStateDecimal, transitionStateGreater, transitionStateMinus, transitionStateLeftParen, transitionStateLess, transitionStateDot, transitionStateComment, transitionStateClosingComment, transitionStateCharacter, transitionStateString, transitionStateLeadingZero, transitionStateDecimalSuffix, transitionStateEnd ); TransitionAction = proc(PLexer, PLexerToken); Transition = record Action: TransitionAction; NextState: TransitionState end; TransitionClasses = [22]Transition; var classification: [128]TransitionClass; transitions: [16]TransitionClasses; proc initialize_classification(); var i: CARDINAL; begin classification[1] := transitionClassEof; (* NUL *) classification[2] := transitionClassInvalid; (* SOH *) classification[3] := transitionClassInvalid; (* STX *) classification[4] := transitionClassInvalid; (* ETX *) classification[5] := transitionClassInvalid; (* EOT *) classification[6] := transitionClassInvalid; (* EMQ *) classification[7] := transitionClassInvalid; (* ACK *) classification[8] := transitionClassInvalid; (* BEL *) classification[9] := transitionClassInvalid; (* BS *) classification[10] := transitionClassSpace; (* HT *) classification[11] := transitionClassSpace; (* LF *) classification[12] := transitionClassInvalid; (* VT *) classification[13] := transitionClassInvalid; (* FF *) classification[14] := transitionClassSpace; (* CR *) classification[15] := transitionClassInvalid; (* SO *) classification[16] := transitionClassInvalid; (* SI *) classification[17] := transitionClassInvalid; (* DLE *) classification[18] := transitionClassInvalid; (* DC1 *) classification[19] := transitionClassInvalid; (* DC2 *) classification[20] := transitionClassInvalid; (* DC3 *) classification[21] := transitionClassInvalid; (* DC4 *) classification[22] := transitionClassInvalid; (* NAK *) classification[23] := transitionClassInvalid; (* SYN *) classification[24] := transitionClassInvalid; (* ETB *) classification[25] := transitionClassInvalid; (* CAN *) classification[26] := transitionClassInvalid; (* EM *) classification[27] := transitionClassInvalid; (* SUB *) classification[28] := transitionClassInvalid; (* ESC *) classification[29] := transitionClassInvalid; (* FS *) classification[30] := transitionClassInvalid; (* GS *) classification[31] := transitionClassInvalid; (* RS *) classification[32] := transitionClassInvalid; (* US *) classification[33] := transitionClassSpace; (* Space *) classification[34] := transitionClassSingle; (* ! *) classification[35] := transitionClassDoubleQuote; (* " *) classification[36] := transitionClassOther; (* # *) classification[37] := transitionClassOther; (* $ *) classification[38] := transitionClassSingle; (* % *) classification[39] := transitionClassSingle; (* & *) classification[40] := transitionClassSingleQuote; (* ' *) classification[41] := transitionClassLeftParen; (* ( *) classification[42] := transitionClassRightParen; (* ) *) classification[43] := transitionClassAsterisk; (* * *) classification[44] := transitionClassSingle; (* + *) classification[45] := transitionClassSingle; (* , *) classification[46] := transitionClassMinus; (* - *) classification[47] := transitionClassDot; (* . *) classification[48] := transitionClassSingle; (* / *) classification[49] := transitionClassZero; (* 0 *) classification[50] := transitionClassDigit; (* 1 *) classification[51] := transitionClassDigit; (* 2 *) classification[52] := transitionClassDigit; (* 3 *) classification[53] := transitionClassDigit; (* 4 *) classification[54] := transitionClassDigit; (* 5 *) classification[55] := transitionClassDigit; (* 6 *) classification[56] := transitionClassDigit; (* 7 *) classification[57] := transitionClassDigit; (* 8 *) classification[58] := transitionClassDigit; (* 9 *) classification[59] := transitionClassColon; (* : *) classification[60] := transitionClassSingle; (* ; *) classification[61] := transitionClassLess; (* < *) classification[62] := transitionClassEquals; (* = *) classification[63] := transitionClassGreater; (* > *) classification[64] := transitionClassOther; (* ? *) classification[65] := transitionClassSingle; (* @ *) classification[66] := transitionClassAlpha; (* A *) classification[67] := transitionClassAlpha; (* B *) classification[68] := transitionClassAlpha; (* C *) classification[69] := transitionClassAlpha; (* D *) classification[70] := transitionClassAlpha; (* E *) classification[71] := transitionClassAlpha; (* F *) classification[72] := transitionClassAlpha; (* G *) classification[73] := transitionClassAlpha; (* H *) classification[74] := transitionClassAlpha; (* I *) classification[75] := transitionClassAlpha; (* J *) classification[76] := transitionClassAlpha; (* K *) classification[77] := transitionClassAlpha; (* L *) classification[78] := transitionClassAlpha; (* M *) classification[79] := transitionClassAlpha; (* N *) classification[80] := transitionClassAlpha; (* O *) classification[81] := transitionClassAlpha; (* P *) classification[82] := transitionClassAlpha; (* Q *) classification[83] := transitionClassAlpha; (* R *) classification[84] := transitionClassAlpha; (* S *) classification[85] := transitionClassAlpha; (* T *) classification[86] := transitionClassAlpha; (* U *) classification[87] := transitionClassAlpha; (* V *) classification[88] := transitionClassAlpha; (* W *) classification[89] := transitionClassAlpha; (* X *) classification[90] := transitionClassAlpha; (* Y *) classification[91] := transitionClassAlpha; (* Z *) classification[92] := transitionClassSingle; (* [ *) classification[93] := transitionClassOther; (* \ *) classification[94] := transitionClassSingle; (* ] *) classification[95] := transitionClassSingle; (* ^ *) classification[96] := transitionClassUnderscore; (* _ *) classification[97] := transitionClassOther; (* ` *) classification[98] := transitionClassHex; (* a *) classification[99] := transitionClassHex; (* b *) classification[100] := transitionClassHex; (* c *) classification[101] := transitionClassHex; (* d *) classification[102] := transitionClassHex; (* e *) classification[103] := transitionClassHex; (* f *) classification[104] := transitionClassAlpha; (* g *) classification[105] := transitionClassAlpha; (* h *) classification[106] := transitionClassAlpha; (* i *) classification[107] := transitionClassAlpha; (* j *) classification[108] := transitionClassAlpha; (* k *) classification[109] := transitionClassAlpha; (* l *) classification[110] := transitionClassAlpha; (* m *) classification[111] := transitionClassAlpha; (* n *) classification[112] := transitionClassAlpha; (* o *) classification[113] := transitionClassAlpha; (* p *) classification[114] := transitionClassAlpha; (* q *) classification[115] := transitionClassAlpha; (* r *) classification[116] := transitionClassAlpha; (* s *) classification[117] := transitionClassAlpha; (* t *) classification[118] := transitionClassAlpha; (* u *) classification[119] := transitionClassAlpha; (* v *) classification[120] := transitionClassAlpha; (* w *) classification[121] := transitionClassX; (* x *) classification[122] := transitionClassAlpha; (* y *) classification[123] := transitionClassAlpha; (* z *) classification[124] := transitionClassOther; (* { *) classification[125] := transitionClassSingle; (* | *) classification[126] := transitionClassOther; (* } *) classification[127] := transitionClassSingle; (* ~ *) classification[128] := transitionClassInvalid; (* DEL *) i := 129; while i <= 256 do classification[i] := transitionClassOther; i := i + 1 end end; proc compare_keyword(Keyword: ARRAY OF CHAR, TokenStart: PLexerBuffer, TokenEnd: PLexerBuffer) -> BOOLEAN; var result: BOOLEAN; index: CARDINAL; begin index := 0; result := true; while (index < Length(Keyword)) & (TokenStart <> TokenEnd) & result DO result := (Keyword[index] = TokenStart^) or (Lower(Keyword[index]) = TokenStart^); INC(TokenStart); INC(index) end; result := (index = Length(Keyword)) & (TokenStart = TokenEnd) & result; return result end; (* Reached the end of file. *) proc transition_action_eof(lexer: PLexer, token: PLexerToken); begin token^.kind := lexerKindEof end; (* Add the character to the token currently read and advance to the next character. *) proc transition_action_accumulate(lexer: PLexer, token: PLexerToken); begin INC(lexer^.Current) end; (* The current character is not a part of the token. Finish the token already * read. Don't advance to the next character. *) proc transition_action_finalize(lexer: PLexer, token: PLexerToken); begin if lexer^.Start^ = ':' then token^.kind := lexerKindColon end; if lexer^.Start^ = '>' then token^.kind := lexerKindGreaterThan end; if lexer^.Start^ = '<' then token^.kind := lexerKindLessThan end; if lexer^.Start^ = '(' then token^.kind := lexerKindLeftParen end; if lexer^.Start^ = '-' then token^.kind := lexerKindLeftParen end; if lexer^.Start^ = '.' then token^.kind := lexerKindDot end end; (* An action for tokens containing multiple characters. *) proc transition_action_composite(lexer: PLexer, token: PLexerToken); begin if lexer^.Start^ = '<' then if lexer^.Current^ = '>' then token^.kind := lexerKindNotEqual end; if lexer^.Current^ = '=' then token^.kind := lexerKindLessEqual end end; if (lexer^.Start^ = '>') & (lexer^.Current^ = '=') then token^.kind := lexerKindGreaterEqual end; if (lexer^.Start^ = '.') & (lexer^.Current^ = '.') then token^.kind := lexerKindRange end; if (lexer^.Start^ = ':') & (lexer^.Current^ = '=') then token^.kind := lexerKindAssignment end; if (lexer^.Start^ = '-') & (lexer^.Current^ = '>') then token^.kind := lexerKindArrow end; INC(lexer^.Current) end; (* Skip a space. *) proc transition_action_skip(lexer: PLexer, token: PLexerToken); begin INC(lexer^.Current); INC(lexer^.Start) end; (* Delimited string action. *) proc transition_action_delimited(lexer: PLexer, token: PLexerToken); begin if lexer^.Start^ = '(' then token^.kind := lexerKindComment end; if lexer^.Start^ = '"' then token^.kind := lexerKindCharacter end; if lexer^.Start^ = "'" then token^.kind := lexerKindString end; INC(lexer^.Current) end; (* Finalize keyword or identifier. *) proc transition_action_key_id(lexer: PLexer, token: PLexerToken); begin token^.kind := lexerKindIdentifier; token^.identifierKind[1] := lexer^.Current - lexer^.Start; MemCopy(lexer^.Start, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2])); if compare_keyword('PROGRAM', lexer^.Start, lexer^.Current) then token^.kind := lexerKindProgram end; if compare_keyword('IMPORT', lexer^.Start, lexer^.Current) then token^.kind := lexerKindImport end; if compare_keyword('CONST', lexer^.Start, lexer^.Current) then token^.kind := lexerKindConst end; if compare_keyword('VAR', lexer^.Start, lexer^.Current) then token^.kind := lexerKindVar end; if compare_keyword('IF', lexer^.Start, lexer^.Current) then token^.kind := lexerKindIf end; if compare_keyword('THEN', lexer^.Start, lexer^.Current) then token^.kind := lexerKindThen end; if compare_keyword('ELSIF', lexer^.Start, lexer^.Current) then token^.kind := lexerKindElsif end; if compare_keyword('ELSE', lexer^.Start, lexer^.Current) then token^.kind := lexerKindElse end; if compare_keyword('WHILE', lexer^.Start, lexer^.Current) then token^.kind := lexerKindWhile end; if compare_keyword('DO', lexer^.Start, lexer^.Current) then token^.kind := lexerKindDo end; if compare_keyword('proc', lexer^.Start, lexer^.Current) then token^.kind := lexerKindProc end; if compare_keyword('BEGIN', lexer^.Start, lexer^.Current) then token^.kind := lexerKindBegin end; if compare_keyword('END', lexer^.Start, lexer^.Current) then token^.kind := lexerKindEnd end; if compare_keyword('TYPE', lexer^.Start, lexer^.Current) then token^.kind := lexerKindType end; if compare_keyword('RECORD', lexer^.Start, lexer^.Current) then token^.kind := lexerKindRecord end; if compare_keyword('UNION', lexer^.Start, lexer^.Current) then token^.kind := lexerKindUnion end; if compare_keyword('NIL', lexer^.Start, lexer^.Current) then token^.kind := lexerKindNull end; if compare_keyword('AND', lexer^.Start, lexer^.Current) then token^.kind := lexerKindAnd end; if compare_keyword('OR', lexer^.Start, lexer^.Current) then token^.kind := lexerKindOr end; if compare_keyword('RETURN', lexer^.Start, lexer^.Current) then token^.kind := lexerKindReturn end; if compare_keyword('DEFINITION', lexer^.Start, lexer^.Current) then token^.kind := lexerKindDefinition end; if compare_keyword('TO', lexer^.Start, lexer^.Current) then token^.kind := lexerKindTo end; if compare_keyword('CASE', lexer^.Start, lexer^.Current) then token^.kind := lexerKindCase end; if compare_keyword('OF', lexer^.Start, lexer^.Current) then token^.kind := lexerKindOf end; if compare_keyword('FROM', lexer^.Start, lexer^.Current) then token^.kind := lexerKindFrom end; if compare_keyword('MODULE', lexer^.Start, lexer^.Current) then token^.kind := lexerKindModule end; if compare_keyword('IMPLEMENTATION', lexer^.Start, lexer^.Current) then token^.kind := lexerKindImplementation end; if compare_keyword('POINTER', lexer^.Start, lexer^.Current) then token^.kind := lexerKindPointer end; if compare_keyword('ARRAY', lexer^.Start, lexer^.Current) then token^.kind := lexerKindArray end; if compare_keyword('TRUE', lexer^.Start, lexer^.Current) then token^.kind := lexerKindBoolean; token^.booleanKind := true end; if compare_keyword('FALSE', lexer^.Start, lexer^.Current) then token^.kind := lexerKindBoolean; token^.booleanKind := false end end; (* Action for tokens containing only one character. The character cannot be * followed by other characters forming a composite token. *) proc transition_action_single(lexer: PLexer, token: PLexerToken); begin if lexer^.Current^ = '&' then token^.kind := lexerKindAnd end; if lexer^.Current^ = ';' then token^.kind := lexerKindSemicolon end; if lexer^.Current^ = ',' then token^.kind := lexerKindComma end; if lexer^.Current^ = ',' then token^.kind := lexerKindComma end; if lexer^.Current^ = ')' then token^.kind := lexerKindRightParen end; if lexer^.Current^ = '[' then token^.kind := lexerKindLeftSquare end; if lexer^.Current^ = ']' then token^.kind := lexerKindRightSquare end; if lexer^.Current^ = '^' then token^.kind := lexerKindHat end; if lexer^.Current^ = '=' then token^.kind := lexerKindEqual end; if lexer^.Current^ = '+' then token^.kind := lexerKindPlus end; if lexer^.Current^ = '/' then token^.kind := lexerKindDivision end; if lexer^.Current^ = '%' then token^.kind := lexerKindRemainder end; if lexer^.Current^ = '@' then token^.kind := lexerKindAt end; if lexer^.Current^ = '|' then token^.kind := lexerKindPipe end; INC(lexer^.Current) end; (* Handle an integer literal. *) proc transition_action_integer(lexer: PLexer, token: PLexerToken); begin token^.kind := lexerKindInteger end; proc set_default_transition(CurrentState: TransitionState, DefaultAction: TransitionAction, NextState: TransitionState); var DefaultTransition: Transition; begin DefaultTransition.Action := DefaultAction; DefaultTransition.NextState := NextState; transitions[ORD(CurrentState) + 1][ORD(transitionClassInvalid) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassDigit) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassAlpha) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassSpace) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassColon) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassEquals) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassLeftParen) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassRightParen) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassAsterisk) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassUnderscore) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassSingle) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassHex) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassZero) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassX) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassEof) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassDot) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassMinus) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassSingleQuote) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassDoubleQuote) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassGreater) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassLess) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassOther) + 1] := DefaultTransition end; (* * The transition table describes transitions from one state to another, given * a symbol (character class). * * The table has m rows and n columns, where m is the amount of states and n is * the amount of classes. So given the current state and a classified character * the table can be used to look up the next state. * * Each cell is a word long. * - The least significant byte of the word is a row number (beginning with 0). * It specifies the target state. "ff" means that this is an end state and no * transition is possible. * - The next byte is the action that should be performed when transitioning. * For the meaning of actions see labels in the lex_next function, which * handles each action. *) proc initialize_transitions(); begin (* Start state. *) transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].Action := nil; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateDecimal; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].Action := transition_action_skip; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].NextState := transitionStateStart; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].NextState := transitionStateColon; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_single; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].NextState := transitionStateLeftParen; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].Action := transition_action_single; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_single; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].Action := transition_action_single; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateLeadingZero; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].Action := transition_action_eof; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].NextState := transitionStateDot; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].NextState := transitionStateMinus; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].NextState := transitionStateCharacter; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].NextState := transitionStateString; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateGreater; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].NextState := transitionStateLess; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].Action := nil; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].NextState := transitionStateEnd; (* Colon state. *) set_default_transition(transitionStateColon, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite; transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd; (* Identifier state. *) set_default_transition(transitionStateIdentifier, transition_action_key_id, transitionStateEnd); transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].NextState := transitionStateIdentifier; (* Decimal state. *) set_default_transition(transitionStateDecimal, transition_action_integer, transitionStateEnd); transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateDecimal; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateDecimalSuffix; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].Action := nil; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateDecimalSuffix; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateDecimal; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].NextState := transitionStateDecimalSuffix; (* Greater state. *) set_default_transition(transitionStateGreater, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite; transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd; (* Minus state. *) set_default_transition(transitionStateMinus, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_composite; transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateEnd; (* Left paren state. *) set_default_transition(transitionStateLeftParen, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateComment; (* Less state. *) set_default_transition(transitionStateLess, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite; transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_composite; transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateEnd; (* Hexadecimal after 0x. *) set_default_transition(transitionStateDot, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].Action := transition_action_composite; transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].NextState := transitionStateEnd; (* Comment. *) set_default_transition(transitionStateComment, transition_action_accumulate, transitionStateComment); transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateClosingComment; transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].Action := nil; transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd; (* Closing comment. *) set_default_transition(transitionStateClosingComment, transition_action_accumulate, transitionStateComment); transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].Action := nil; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].Action := transition_action_delimited; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateClosingComment; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].Action := nil; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd; (* Character. *) set_default_transition(transitionStateCharacter, transition_action_accumulate, transitionStateCharacter); transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].Action := nil; transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].Action := nil; transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].Action := transition_action_delimited; transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].NextState := transitionStateEnd; (* String. *) set_default_transition(transitionStateString, transition_action_accumulate, transitionStateString); transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].Action := nil; transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].Action := nil; transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].Action := transition_action_delimited; transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].NextState := transitionStateEnd; (* Leading zero. *) set_default_transition(transitionStateLeadingZero, transition_action_integer, transitionStateEnd); transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].Action := nil; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].Action := nil; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].Action := nil; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].Action := nil; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].Action := nil; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].Action := nil; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].NextState := transitionStateEnd; (* Digit with a character suffix. *) set_default_transition(transitionStateDecimalSuffix, transition_action_integer, transitionStateEnd); transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].Action := nil; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].Action := nil; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].Action := nil; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].Action := nil; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].Action := nil; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].NextState := transitionStateEnd end; proc lexer_initialize(lexer: PLexer, Input: File); begin lexer^.Input := Input; lexer^.Length := 0; ALLOCATE(lexer^.Buffer, CHUNK_SIZE); MemZero(lexer^.Buffer, CHUNK_SIZE); lexer^.Size := CHUNK_SIZE end; proc lexer_current(lexer: PLexer) -> LexerToken; var CurrentClass: TransitionClass; CurrentState: TransitionState; CurrentTransition: Transition; result: LexerToken; begin lexer^.Current := lexer^.Start; CurrentState := transitionStateStart; while CurrentState <> transitionStateEnd DO CurrentClass := classification[ORD(lexer^.Current^) + 1]; CurrentTransition := transitions[ORD(CurrentState) + 1][ORD(CurrentClass) + 1]; if CurrentTransition.Action <> nil then CurrentTransition.Action(lexer, ADR(result)) end; CurrentState := CurrentTransition.NextState end; return result end; proc lexer_lex(lexer: PLexer) -> LexerToken; var result: LexerToken; begin if lexer^.Length = 0 then lexer^.Length := ReadNBytes(lexer^.Input, CHUNK_SIZE, lexer^.Buffer); lexer^.Current := lexer^.Buffer end; lexer^.Start := lexer^.Current; result := lexer_current(lexer); return result end; proc lexer_destroy(lexer: PLexer); begin DEALLOCATE(lexer^.Buffer, lexer^.Size) end; begin initialize_classification(); initialize_transitions() end Lexer.