implementation module Lexer; from FIO import ReadNBytes; from SYSTEM import ADR; from Storage import DEALLOCATE, ALLOCATE; from Strings import Length; from MemUtils import MemZero; from StrCase import Lower; const ChunkSize = 65536; type (* * Classification table assigns each possible character to a group (class). All * characters of the same group a handled equivalently. * * Classification: *) TransitionClass = ( transitionClassInvalid, transitionClassDigit, transitionClassAlpha, transitionClassSpace, transitionClassColon, transitionClassEquals, transitionClassLeftParen, transitionClassRightParen, transitionClassAsterisk, transitionClassUnderscore, transitionClassSingle, transitionClassHex, transitionClassZero, transitionClassX, transitionClassEof, transitionClassDot, transitionClassMinus, transitionClassSingleQuote, transitionClassDoubleQuote, transitionClassGreater, transitionClassLess, transitionClassOther ); TransitionState = ( transitionStateStart, transitionStateColon, transitionStateIdentifier, transitionStateDecimal, transitionStateGreater, transitionStateMinus, transitionStateLeftParen, transitionStateLess, transitionStateDot, transitionStateComment, transitionStateClosingComment, transitionStateCharacter, transitionStateString, transitionStateLeadingZero, transitionStateDecimalSuffix, transitionStateEnd ); TransitionAction = proc(PLexer, PLexerToken); Transition = record Action: TransitionAction; NextState: TransitionState end; var Classification: ARRAY[1..128] OF TransitionClass; Transitions: ARRAY[0..15] OF ARRAY[0..21] OF Transition; proc InitializeClassification(); begin Classification[1] := transitionClassEof; (* NUL *) Classification[2] := transitionClassInvalid; (* SOH *) Classification[3] := transitionClassInvalid; (* STX *) Classification[4] := transitionClassInvalid; (* ETX *) Classification[5] := transitionClassInvalid; (* EOT *) Classification[6] := transitionClassInvalid; (* EMQ *) Classification[7] := transitionClassInvalid; (* ACK *) Classification[8] := transitionClassInvalid; (* BEL *) Classification[9] := transitionClassInvalid; (* BS *) Classification[10] := transitionClassSpace; (* HT *) Classification[11] := transitionClassSpace; (* LF *) Classification[12] := transitionClassInvalid; (* VT *) Classification[13] := transitionClassInvalid; (* FF *) Classification[14] := transitionClassSpace; (* CR *) Classification[15] := transitionClassInvalid; (* SO *) Classification[16] := transitionClassInvalid; (* SI *) Classification[17] := transitionClassInvalid; (* DLE *) Classification[18] := transitionClassInvalid; (* DC1 *) Classification[19] := transitionClassInvalid; (* DC2 *) Classification[20] := transitionClassInvalid; (* DC3 *) Classification[21] := transitionClassInvalid; (* DC4 *) Classification[22] := transitionClassInvalid; (* NAK *) Classification[23] := transitionClassInvalid; (* SYN *) Classification[24] := transitionClassInvalid; (* ETB *) Classification[25] := transitionClassInvalid; (* CAN *) Classification[26] := transitionClassInvalid; (* EM *) Classification[27] := transitionClassInvalid; (* SUB *) Classification[28] := transitionClassInvalid; (* ESC *) Classification[29] := transitionClassInvalid; (* FS *) Classification[30] := transitionClassInvalid; (* GS *) Classification[31] := transitionClassInvalid; (* RS *) Classification[32] := transitionClassInvalid; (* US *) Classification[33] := transitionClassSpace; (* Space *) Classification[34] := transitionClassSingle; (* ! *) Classification[35] := transitionClassDoubleQuote; (* " *) Classification[36] := transitionClassOther; (* # *) Classification[37] := transitionClassOther; (* $ *) Classification[38] := transitionClassSingle; (* % *) Classification[39] := transitionClassSingle; (* & *) Classification[40] := transitionClassSingleQuote; (* ' *) Classification[41] := transitionClassLeftParen; (* ( *) Classification[42] := transitionClassRightParen; (* ) *) Classification[43] := transitionClassAsterisk; (* * *) Classification[44] := transitionClassSingle; (* + *) Classification[45] := transitionClassSingle; (* , *) Classification[46] := transitionClassMinus; (* - *) Classification[47] := transitionClassDot; (* . *) Classification[48] := transitionClassSingle; (* / *) Classification[49] := transitionClassZero; (* 0 *) Classification[50] := transitionClassDigit; (* 1 *) Classification[51] := transitionClassDigit; (* 2 *) Classification[52] := transitionClassDigit; (* 3 *) Classification[53] := transitionClassDigit; (* 4 *) Classification[54] := transitionClassDigit; (* 5 *) Classification[55] := transitionClassDigit; (* 6 *) Classification[56] := transitionClassDigit; (* 7 *) Classification[57] := transitionClassDigit; (* 8 *) Classification[58] := transitionClassDigit; (* 9 *) Classification[59] := transitionClassColon; (* : *) Classification[60] := transitionClassSingle; (* ; *) Classification[61] := transitionClassLess; (* < *) Classification[62] := transitionClassEquals; (* = *) Classification[63] := transitionClassGreater; (* > *) Classification[64] := transitionClassOther; (* ? *) Classification[65] := transitionClassSingle; (* @ *) Classification[66] := transitionClassAlpha; (* A *) Classification[67] := transitionClassAlpha; (* B *) Classification[68] := transitionClassAlpha; (* C *) Classification[69] := transitionClassAlpha; (* D *) Classification[70] := transitionClassAlpha; (* E *) Classification[71] := transitionClassAlpha; (* F *) Classification[72] := transitionClassAlpha; (* G *) Classification[73] := transitionClassAlpha; (* H *) Classification[74] := transitionClassAlpha; (* I *) Classification[75] := transitionClassAlpha; (* J *) Classification[76] := transitionClassAlpha; (* K *) Classification[77] := transitionClassAlpha; (* L *) Classification[78] := transitionClassAlpha; (* M *) Classification[79] := transitionClassAlpha; (* N *) Classification[80] := transitionClassAlpha; (* O *) Classification[81] := transitionClassAlpha; (* P *) Classification[82] := transitionClassAlpha; (* Q *) Classification[83] := transitionClassAlpha; (* R *) Classification[84] := transitionClassAlpha; (* S *) Classification[85] := transitionClassAlpha; (* T *) Classification[86] := transitionClassAlpha; (* U *) Classification[87] := transitionClassAlpha; (* V *) Classification[88] := transitionClassAlpha; (* W *) Classification[89] := transitionClassAlpha; (* X *) Classification[90] := transitionClassAlpha; (* Y *) Classification[91] := transitionClassAlpha; (* Z *) Classification[92] := transitionClassSingle; (* [ *) Classification[93] := transitionClassOther; (* \ *) Classification[94] := transitionClassSingle; (* ] *) Classification[95] := transitionClassSingle; (* ^ *) Classification[96] := transitionClassUnderscore; (* _ *) Classification[97] := transitionClassOther; (* ` *) Classification[98] := transitionClassHex; (* a *) Classification[99] := transitionClassHex; (* b *) Classification[100] := transitionClassHex; (* c *) Classification[101] := transitionClassHex; (* d *) Classification[102] := transitionClassHex; (* e *) Classification[103] := transitionClassHex; (* f *) Classification[104] := transitionClassAlpha; (* g *) Classification[105] := transitionClassAlpha; (* h *) Classification[106] := transitionClassAlpha; (* i *) Classification[107] := transitionClassAlpha; (* j *) Classification[108] := transitionClassAlpha; (* k *) Classification[109] := transitionClassAlpha; (* l *) Classification[110] := transitionClassAlpha; (* m *) Classification[111] := transitionClassAlpha; (* n *) Classification[112] := transitionClassAlpha; (* o *) Classification[113] := transitionClassAlpha; (* p *) Classification[114] := transitionClassAlpha; (* q *) Classification[115] := transitionClassAlpha; (* r *) Classification[116] := transitionClassAlpha; (* s *) Classification[117] := transitionClassAlpha; (* t *) Classification[118] := transitionClassAlpha; (* u *) Classification[119] := transitionClassAlpha; (* v *) Classification[120] := transitionClassAlpha; (* w *) Classification[121] := transitionClassX; (* x *) Classification[122] := transitionClassAlpha; (* y *) Classification[123] := transitionClassAlpha; (* z *) Classification[124] := transitionClassOther; (* { *) Classification[125] := transitionClassSingle; (* | *) Classification[126] := transitionClassOther; (* } *) Classification[127] := transitionClassSingle; (* ~ *) Classification[128] := transitionClassInvalid (* DEL *) END InitializeClassification; proc CompareKeyword(Keyword: ARRAY OF CHAR; TokenStart: PLexerBuffer; TokenEnd: PLexerBuffer): BOOLEAN; var Result: BOOLEAN; Index: CARDINAL; begin Index := 0; Result := TRUE; while (Index < Length(Keyword)) AND (TokenStart <> TokenEnd) AND Result DO Result := (Keyword[Index] = TokenStart^) OR (Lower(Keyword[Index]) = TokenStart^); INC(TokenStart); INC(Index) end; Result := (Index = Length(Keyword)) AND (TokenStart = TokenEnd) AND Result; return Result END CompareKeyword; (* Reached the end of file. *) proc TransitionActionEof(ALexer: PLexer; AToken: PLexerToken); begin AToken^.Kind := lexerKindEof END TransitionActionEof; (* Add the character to the token currently read and advance to the next character. *) proc TransitionActionAccumulate(ALexer: PLexer; AToken: PLexerToken); begin INC(ALexer^.Current) END TransitionActionAccumulate; (* The current character is not a part of the token. Finish the token already * read. Don't advance to the next character. *) proc TransitionActionFinalize(ALexer: PLexer; AToken: PLexerToken); begin if ALexer^.Start^ = ':' then AToken^.Kind := lexerKindColon end; if ALexer^.Start^ = '>' then AToken^.Kind := lexerKindGreaterThan end; if ALexer^.Start^ = '<' then AToken^.Kind := lexerKindLessThan end; if ALexer^.Start^ = '(' then AToken^.Kind := lexerKindLeftParen end; if ALexer^.Start^ = '-' then AToken^.Kind := lexerKindLeftParen end; if ALexer^.Start^ = '.' then AToken^.Kind := lexerKindDot end END TransitionActionFinalize; (* An action for tokens containing multiple characters. *) proc TransitionActionComposite(ALexer: PLexer; AToken: PLexerToken); begin if ALexer^.Start^ = '<' then if ALexer^.Current^ = '>' then AToken^.Kind := lexerKindNotEqual end; if ALexer^.Current^ = '=' then AToken^.Kind := lexerKindLessEqual end end; if (ALexer^.Start^ = '>') AND (ALexer^.Current^ = '=') then AToken^.Kind := lexerKindGreaterEqual end; if (ALexer^.Start^ = '.') AND (ALexer^.Current^ = '.') then AToken^.Kind := lexerKindRange end; if (ALexer^.Start^ = ':') AND (ALexer^.Current^ = '=') then AToken^.Kind := lexerKindAssignment end; INC(ALexer^.Current) END TransitionActionComposite; (* Skip a space. *) proc TransitionActionSkip(ALexer: PLexer; AToken: PLexerToken); begin INC(ALexer^.Current); INC(ALexer^.Start) END TransitionActionSkip; (* Delimited string action. *) proc TransitionActionDelimited(ALexer: PLexer; AToken: PLexerToken); begin if ALexer^.Start^ = '(' then AToken^.Kind := lexerKindComment end; if ALexer^.Start^ = '"' then AToken^.Kind := lexerKindCharacter end; if ALexer^.Start^ = "'" then AToken^.Kind := lexerKindString end; INC(ALexer^.Current) END TransitionActionDelimited; (* Finalize keyword or identifier. *) proc TransitionActionKeyId(ALexer: PLexer; AToken: PLexerToken); begin AToken^.Kind := lexerKindIdentifier; if CompareKeyword('PROGRAM', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindProgram end; if CompareKeyword('IMPORT', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindImport end; if CompareKeyword('CONST', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindConst end; if CompareKeyword('VAR', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindVar end; if CompareKeyword('IF', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindIf end; if CompareKeyword('THEN', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindThen end; if CompareKeyword('ELSIF', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindElsif end; if CompareKeyword('ELSE', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindElse end; if CompareKeyword('WHILE', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindWhile end; if CompareKeyword('DO', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindDo end; if CompareKeyword('proc', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindProc end; if CompareKeyword('BEGIN', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindBegin end; if CompareKeyword('END', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindEnd end; if CompareKeyword('TYPE', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindType end; if CompareKeyword('RECORD', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindRecord end; if CompareKeyword('UNION', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindUnion end; if CompareKeyword('NIL', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindNull end; if CompareKeyword('AND', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindAnd end; if CompareKeyword('OR', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindOr end; if CompareKeyword('RETURN', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindReturn end; if CompareKeyword('DEFINITION', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindDefinition end; if CompareKeyword('TO', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindTo end; if CompareKeyword('CASE', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindCase end; if CompareKeyword('OF', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindOf end; if CompareKeyword('FROM', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindFrom end; if CompareKeyword('MODULE', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindModule end; if CompareKeyword('IMPLEMENTATION', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindImplementation end; if CompareKeyword('POINTER', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindPointer end; if CompareKeyword('ARRAY', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindArray end; if CompareKeyword('TRUE', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindBoolean; AToken^.booleanKind := TRUE end; if CompareKeyword('FALSE', ALexer^.Start, ALexer^.Current) then AToken^.Kind := lexerKindBoolean; AToken^.booleanKind := FALSE end END TransitionActionKeyId; (* Action for tokens containing only one character. The character cannot be * followed by other characters forming a composite token. *) proc TransitionActionSingle(ALexer: PLexer; AToken: PLexerToken); begin if ALexer^.Current^ = '&' then AToken^.Kind := lexerKindAnd end; if ALexer^.Current^ = ';' then AToken^.Kind := lexerKindSemicolon end; if ALexer^.Current^ = ',' then AToken^.Kind := lexerKindComma end; if ALexer^.Current^ = ',' then AToken^.Kind := lexerKindComma end; if ALexer^.Current^ = ')' then AToken^.Kind := lexerKindRightParen end; if ALexer^.Current^ = '[' then AToken^.Kind := lexerKindLeftSquare end; if ALexer^.Current^ = ']' then AToken^.Kind := lexerKindRightSquare end; if ALexer^.Current^ = '^' then AToken^.Kind := lexerKindHat end; if ALexer^.Current^ = '=' then AToken^.Kind := lexerKindEqual end; if ALexer^.Current^ = '+' then AToken^.Kind := lexerKindPlus end; if ALexer^.Current^ = '/' then AToken^.Kind := lexerKindDivision end; if ALexer^.Current^ = '%' then AToken^.Kind := lexerKindRemainder end; if ALexer^.Current^ = '@' then AToken^.Kind := lexerKindAt end; if ALexer^.Current^ = '|' then AToken^.Kind := lexerKindPipe end; INC(ALexer^.Current) END TransitionActionSingle; (* Handle an integer literal. *) proc TransitionActionInteger(ALexer: PLexer; AToken: PLexerToken); begin AToken^.Kind := lexerKindInteger END TransitionActionInteger; proc SetDefaultTransition(CurrentState: TransitionState; DefaultAction: TransitionAction; NextState: TransitionState); var DefaultTransition: Transition; begin DefaultTransition.Action := DefaultAction; DefaultTransition.NextState := NextState; Transitions[ORD(CurrentState)][ORD(transitionClassInvalid)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassDigit)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassAlpha)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassSpace)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassColon)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassEquals)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassLeftParen)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassRightParen)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassAsterisk)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassUnderscore)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassSingle)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassHex)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassZero)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassX)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassEof)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassDot)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassMinus)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassSingleQuote)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassDoubleQuote)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassGreater)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassLess)] := DefaultTransition; Transitions[ORD(CurrentState)][ORD(transitionClassOther)] := DefaultTransition END SetDefaultTransition; (* * The transition table describes transitions from one state to another, given * a symbol (character class). * * The table has m rows and n columns, where m is the amount of states and n is * the amount of classes. So given the current state and a classified character * the table can be used to look up the next state. * * Each cell is a word long. * - The least significant byte of the word is a row number (beginning with 0). * It specifies the target state. "ff" means that this is an end state and no * transition is possible. * - The next byte is the action that should be performed when transitioning. * For the meaning of actions see labels in the lex_next function, which * handles each action. *) proc InitializeTransitions(); begin (* Start state. *) Transitions[ORD(transitionStateStart)][ORD(transitionClassInvalid)].Action := NIL; Transitions[ORD(transitionStateStart)][ORD(transitionClassInvalid)].NextState := transitionStateEnd; Transitions[ORD(transitionStateStart)][ORD(transitionClassDigit)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateStart)][ORD(transitionClassDigit)].NextState := transitionStateDecimal; Transitions[ORD(transitionStateStart)][ORD(transitionClassAlpha)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateStart)][ORD(transitionClassAlpha)].NextState := transitionStateIdentifier; Transitions[ORD(transitionStateStart)][ORD(transitionClassSpace)].Action := TransitionActionSkip; Transitions[ORD(transitionStateStart)][ORD(transitionClassSpace)].NextState := transitionStateStart; Transitions[ORD(transitionStateStart)][ORD(transitionClassColon)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateStart)][ORD(transitionClassColon)].NextState := transitionStateColon; Transitions[ORD(transitionStateStart)][ORD(transitionClassEquals)].Action := TransitionActionSingle; Transitions[ORD(transitionStateStart)][ORD(transitionClassEquals)].NextState := transitionStateEnd; Transitions[ORD(transitionStateStart)][ORD(transitionClassLeftParen)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateStart)][ORD(transitionClassLeftParen)].NextState := transitionStateLeftParen; Transitions[ORD(transitionStateStart)][ORD(transitionClassRightParen)].Action := TransitionActionSingle; Transitions[ORD(transitionStateStart)][ORD(transitionClassRightParen)].NextState := transitionStateEnd; Transitions[ORD(transitionStateStart)][ORD(transitionClassAsterisk)].Action := TransitionActionSingle; Transitions[ORD(transitionStateStart)][ORD(transitionClassAsterisk)].NextState := transitionStateEnd; Transitions[ORD(transitionStateStart)][ORD(transitionClassUnderscore)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateStart)][ORD(transitionClassUnderscore)].NextState := transitionStateIdentifier; Transitions[ORD(transitionStateStart)][ORD(transitionClassSingle)].Action := TransitionActionSingle; Transitions[ORD(transitionStateStart)][ORD(transitionClassSingle)].NextState := transitionStateEnd; Transitions[ORD(transitionStateStart)][ORD(transitionClassHex)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateStart)][ORD(transitionClassHex)].NextState := transitionStateIdentifier; Transitions[ORD(transitionStateStart)][ORD(transitionClassZero)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateStart)][ORD(transitionClassZero)].NextState := transitionStateLeadingZero; Transitions[ORD(transitionStateStart)][ORD(transitionClassX)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateStart)][ORD(transitionClassX)].NextState := transitionStateIdentifier; Transitions[ORD(transitionStateStart)][ORD(transitionClassEof)].Action := TransitionActionEof; Transitions[ORD(transitionStateStart)][ORD(transitionClassEof)].NextState := transitionStateEnd; Transitions[ORD(transitionStateStart)][ORD(transitionClassDot)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateStart)][ORD(transitionClassDot)].NextState := transitionStateDot; Transitions[ORD(transitionStateStart)][ORD(transitionClassMinus)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateStart)][ORD(transitionClassMinus)].NextState := transitionStateMinus; Transitions[ORD(transitionStateStart)][ORD(transitionClassSingleQuote)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateStart)][ORD(transitionClassSingleQuote)].NextState := transitionStateCharacter; Transitions[ORD(transitionStateStart)][ORD(transitionClassDoubleQuote)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateStart)][ORD(transitionClassDoubleQuote)].NextState := transitionStateString; Transitions[ORD(transitionStateStart)][ORD(transitionClassGreater)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateStart)][ORD(transitionClassGreater)].NextState := transitionStateGreater; Transitions[ORD(transitionStateStart)][ORD(transitionClassLess)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateStart)][ORD(transitionClassLess)].NextState := transitionStateLess; Transitions[ORD(transitionStateStart)][ORD(transitionClassOther)].Action := NIL; Transitions[ORD(transitionStateStart)][ORD(transitionClassOther)].NextState := transitionStateEnd; (* Colon state. *) SetDefaultTransition(transitionStateColon, TransitionActionFinalize, transitionStateEnd); Transitions[ORD(transitionStateColon)][ORD(transitionClassEquals)].Action := TransitionActionComposite; Transitions[ORD(transitionStateColon)][ORD(transitionClassEquals)].NextState := transitionStateEnd; (* Identifier state. *) SetDefaultTransition(transitionStateIdentifier, TransitionActionKeyId, transitionStateEnd); Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassDigit)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassDigit)].NextState := transitionStateIdentifier; Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassAlpha)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassAlpha)].NextState := transitionStateIdentifier; Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassUnderscore)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassUnderscore)].NextState := transitionStateIdentifier; Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassHex)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassHex)].NextState := transitionStateIdentifier; Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassZero)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassZero)].NextState := transitionStateIdentifier; Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassX)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassX)].NextState := transitionStateIdentifier; (* Decimal state. *) SetDefaultTransition(transitionStateDecimal, TransitionActionInteger, transitionStateEnd); Transitions[ORD(transitionStateDecimal)][ORD(transitionClassDigit)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateDecimal)][ORD(transitionClassDigit)].NextState := transitionStateDecimal; Transitions[ORD(transitionStateDecimal)][ORD(transitionClassAlpha)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateDecimal)][ORD(transitionClassAlpha)].NextState := transitionStateDecimalSuffix; Transitions[ORD(transitionStateDecimal)][ORD(transitionClassUnderscore)].Action := NIL; Transitions[ORD(transitionStateDecimal)][ORD(transitionClassUnderscore)].NextState := transitionStateEnd; Transitions[ORD(transitionStateDecimal)][ORD(transitionClassHex)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateDecimal)][ORD(transitionClassHex)].NextState := transitionStateDecimalSuffix; Transitions[ORD(transitionStateDecimal)][ORD(transitionClassZero)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateDecimal)][ORD(transitionClassZero)].NextState := transitionStateDecimal; Transitions[ORD(transitionStateDecimal)][ORD(transitionClassX)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateDecimal)][ORD(transitionClassX)].NextState := transitionStateDecimalSuffix; (* Greater state. *) SetDefaultTransition(transitionStateGreater, TransitionActionFinalize, transitionStateEnd); Transitions[ORD(transitionStateGreater)][ORD(transitionClassEquals)].Action := TransitionActionComposite; Transitions[ORD(transitionStateGreater)][ORD(transitionClassEquals)].NextState := transitionStateEnd; (* Minus state. *) SetDefaultTransition(transitionStateMinus, TransitionActionFinalize, transitionStateEnd); Transitions[ORD(transitionStateMinus)][ORD(transitionClassGreater)].Action := TransitionActionComposite; Transitions[ORD(transitionStateMinus)][ORD(transitionClassGreater)].NextState := transitionStateEnd; (* Left paren state. *) SetDefaultTransition(transitionStateLeftParen, TransitionActionFinalize, transitionStateEnd); Transitions[ORD(transitionStateLeftParen)][ORD(transitionClassAsterisk)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateLeftParen)][ORD(transitionClassAsterisk)].NextState := transitionStateComment; (* Less state. *) SetDefaultTransition(transitionStateLess, TransitionActionFinalize, transitionStateEnd); Transitions[ORD(transitionStateLess)][ORD(transitionClassEquals)].Action := TransitionActionComposite; Transitions[ORD(transitionStateLess)][ORD(transitionClassEquals)].NextState := transitionStateEnd; Transitions[ORD(transitionStateLess)][ORD(transitionClassGreater)].Action := TransitionActionComposite; Transitions[ORD(transitionStateLess)][ORD(transitionClassGreater)].NextState := transitionStateEnd; (* Hexadecimal after 0x. *) SetDefaultTransition(transitionStateDot, TransitionActionFinalize, transitionStateEnd); Transitions[ORD(transitionStateDot)][ORD(transitionClassDot)].Action := TransitionActionComposite; Transitions[ORD(transitionStateDot)][ORD(transitionClassDot)].NextState := transitionStateEnd; (* Comment. *) SetDefaultTransition(transitionStateComment, TransitionActionAccumulate, transitionStateComment); Transitions[ORD(transitionStateComment)][ORD(transitionClassAsterisk)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateComment)][ORD(transitionClassAsterisk)].NextState := transitionStateClosingComment; Transitions[ORD(transitionStateComment)][ORD(transitionClassEof)].Action := NIL; Transitions[ORD(transitionStateComment)][ORD(transitionClassEof)].NextState := transitionStateEnd; (* Closing comment. *) SetDefaultTransition(transitionStateClosingComment, TransitionActionAccumulate, transitionStateComment); Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassInvalid)].Action := NIL; Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassInvalid)].NextState := transitionStateEnd; Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassRightParen)].Action := TransitionActionDelimited; Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassRightParen)].NextState := transitionStateEnd; Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassAsterisk)].Action := TransitionActionAccumulate; Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassAsterisk)].NextState := transitionStateClosingComment; Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassEof)].Action := NIL; Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassEof)].NextState := transitionStateEnd; (* Character. *) SetDefaultTransition(transitionStateCharacter, TransitionActionAccumulate, transitionStateCharacter); Transitions[ORD(transitionStateCharacter)][ORD(transitionClassInvalid)].Action := NIL; Transitions[ORD(transitionStateCharacter)][ORD(transitionClassInvalid)].NextState := transitionStateEnd; Transitions[ORD(transitionStateCharacter)][ORD(transitionClassEof)].Action := NIL; Transitions[ORD(transitionStateCharacter)][ORD(transitionClassEof)].NextState := transitionStateEnd; Transitions[ORD(transitionStateCharacter)][ORD(transitionClassSingleQuote)].Action := TransitionActionDelimited; Transitions[ORD(transitionStateCharacter)][ORD(transitionClassSingleQuote)].NextState := transitionStateEnd; (* String. *) SetDefaultTransition(transitionStateString, TransitionActionAccumulate, transitionStateString); Transitions[ORD(transitionStateString)][ORD(transitionClassInvalid)].Action := NIL; Transitions[ORD(transitionStateString)][ORD(transitionClassInvalid)].NextState := transitionStateEnd; Transitions[ORD(transitionStateString)][ORD(transitionClassEof)].Action := NIL; Transitions[ORD(transitionStateString)][ORD(transitionClassEof)].NextState := transitionStateEnd; Transitions[ORD(transitionStateString)][ORD(transitionClassDoubleQuote)].Action := TransitionActionDelimited; Transitions[ORD(transitionStateString)][ORD(transitionClassDoubleQuote)].NextState := transitionStateEnd; (* Leading zero. *) SetDefaultTransition(transitionStateLeadingZero, TransitionActionInteger, transitionStateEnd); Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassDigit)].Action := NIL; Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassDigit)].NextState := transitionStateEnd; Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassAlpha)].Action := NIL; Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassAlpha)].NextState := transitionStateEnd; Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassUnderscore)].Action := NIL; Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassUnderscore)].NextState := transitionStateEnd; Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassHex)].Action := NIL; Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassHex)].NextState := transitionStateEnd; Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassZero)].Action := NIL; Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassZero)].NextState := transitionStateEnd; Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassX)].Action := NIL; Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassX)].NextState := transitionStateEnd; (* Digit with a character suffix. *) SetDefaultTransition(transitionStateDecimalSuffix, TransitionActionInteger, transitionStateEnd); Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassAlpha)].Action := NIL; Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassAlpha)].NextState := transitionStateEnd; Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassDigit)].Action := NIL; Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassDigit)].NextState := transitionStateEnd; Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassHex)].Action := NIL; Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassHex)].NextState := transitionStateEnd; Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassZero)].Action := NIL; Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassZero)].NextState := transitionStateEnd; Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassX)].Action := NIL; Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassX)].NextState := transitionStateEnd END InitializeTransitions; proc LexerInitialize(ALexer: PLexer; Input: File); begin ALexer^.Input := Input; ALexer^.Length := 0; ALLOCATE(ALexer^.Buffer, ChunkSize); MemZero(ALexer^.Buffer, ChunkSize); ALexer^.Size := ChunkSize END LexerInitialize; proc LexerCurrent(ALexer: PLexer): LexerToken; var CurrentClass: TransitionClass; CurrentState: TransitionState; CurrentTransition: Transition; Result: LexerToken; begin ALexer^.Current := ALexer^.Start; CurrentState := transitionStateStart; while CurrentState <> transitionStateEnd DO CurrentClass := Classification[ORD(ALexer^.Current^) + 1]; CurrentTransition := Transitions[ORD(CurrentState)][ORD(CurrentClass)]; if CurrentTransition.Action <> NIL then CurrentTransition.Action(ALexer, ADR(Result)) end; CurrentState := CurrentTransition.NextState end; return Result END LexerCurrent; proc LexerLex(ALexer: PLexer): LexerToken; var Result: LexerToken; begin if ALexer^.Length = 0 then ALexer^.Length := ReadNBytes(ALexer^.Input, ChunkSize, ALexer^.Buffer); ALexer^.Current := ALexer^.Buffer end; ALexer^.Start := ALexer^.Current; Result := LexerCurrent(ALexer); return Result END LexerLex; proc LexerDestroy(ALexer: PLexer); begin DEALLOCATE(ALexer^.Buffer, ALexer^.Size) END LexerDestroy; BEGIN InitializeClassification(); InitializeTransitions() END Lexer.