IMPLEMENTATION MODULE Lexer; FROM FIO IMPORT ReadNBytes; FROM SYSTEM IMPORT ADR; FROM Storage IMPORT DEALLOCATE, ALLOCATE; FROM Strings IMPORT Length; FROM MemUtils IMPORT MemCopy, MemZero; FROM StrCase IMPORT Lower; CONST CHUNK_SIZE = 65536; TYPE (* * Classification table assigns each possible character to a group (class). All * characters of the same group a handled equivalently. * * Classification: *) TransitionClass = ( transitionClassInvalid, transitionClassDigit, transitionClassAlpha, transitionClassSpace, transitionClassColon, transitionClassEquals, transitionClassLeftParen, transitionClassRightParen, transitionClassAsterisk, transitionClassUnderscore, transitionClassSingle, transitionClassHex, transitionClassZero, transitionClassX, transitionClassEof, transitionClassDot, transitionClassMinus, transitionClassSingleQuote, transitionClassDoubleQuote, transitionClassGreater, transitionClassLess, transitionClassOther ); TransitionState = ( transitionStateStart, transitionStateColon, transitionStateIdentifier, transitionStateDecimal, transitionStateGreater, transitionStateMinus, transitionStateLeftParen, transitionStateLess, transitionStateDot, transitionStateComment, transitionStateClosingComment, transitionStateCharacter, transitionStateString, transitionStateLeadingZero, transitionStateDecimalSuffix, transitionStateEnd ); TransitionAction = PROCEDURE(PLexer, PLexerToken); Transition = RECORD Action: TransitionAction; NextState: TransitionState END; TransitionClasses = ARRAY[1..22] OF Transition; VAR classification: ARRAY[1..128] OF TransitionClass; transitions: ARRAY[1..16] OF TransitionClasses; PROCEDURE initialize_classification(); BEGIN classification[1] := transitionClassEof; (* NUL *) classification[2] := transitionClassInvalid; (* SOH *) classification[3] := transitionClassInvalid; (* STX *) classification[4] := transitionClassInvalid; (* ETX *) classification[5] := transitionClassInvalid; (* EOT *) classification[6] := transitionClassInvalid; (* EMQ *) classification[7] := transitionClassInvalid; (* ACK *) classification[8] := transitionClassInvalid; (* BEL *) classification[9] := transitionClassInvalid; (* BS *) classification[10] := transitionClassSpace; (* HT *) classification[11] := transitionClassSpace; (* LF *) classification[12] := transitionClassInvalid; (* VT *) classification[13] := transitionClassInvalid; (* FF *) classification[14] := transitionClassSpace; (* CR *) classification[15] := transitionClassInvalid; (* SO *) classification[16] := transitionClassInvalid; (* SI *) classification[17] := transitionClassInvalid; (* DLE *) classification[18] := transitionClassInvalid; (* DC1 *) classification[19] := transitionClassInvalid; (* DC2 *) classification[20] := transitionClassInvalid; (* DC3 *) classification[21] := transitionClassInvalid; (* DC4 *) classification[22] := transitionClassInvalid; (* NAK *) classification[23] := transitionClassInvalid; (* SYN *) classification[24] := transitionClassInvalid; (* ETB *) classification[25] := transitionClassInvalid; (* CAN *) classification[26] := transitionClassInvalid; (* EM *) classification[27] := transitionClassInvalid; (* SUB *) classification[28] := transitionClassInvalid; (* ESC *) classification[29] := transitionClassInvalid; (* FS *) classification[30] := transitionClassInvalid; (* GS *) classification[31] := transitionClassInvalid; (* RS *) classification[32] := transitionClassInvalid; (* US *) classification[33] := transitionClassSpace; (* Space *) classification[34] := transitionClassSingle; (* ! *) classification[35] := transitionClassDoubleQuote; (* " *) classification[36] := transitionClassOther; (* # *) classification[37] := transitionClassOther; (* $ *) classification[38] := transitionClassSingle; (* % *) classification[39] := transitionClassSingle; (* AND *) classification[40] := transitionClassSingleQuote; (* ' *) classification[41] := transitionClassLeftParen; (* ( *) classification[42] := transitionClassRightParen; (* ) *) classification[43] := transitionClassAsterisk; (* * *) classification[44] := transitionClassSingle; (* + *) classification[45] := transitionClassSingle; (* , *) classification[46] := transitionClassMinus; (* - *) classification[47] := transitionClassDot; (* . *) classification[48] := transitionClassSingle; (* / *) classification[49] := transitionClassZero; (* 0 *) classification[50] := transitionClassDigit; (* 1 *) classification[51] := transitionClassDigit; (* 2 *) classification[52] := transitionClassDigit; (* 3 *) classification[53] := transitionClassDigit; (* 4 *) classification[54] := transitionClassDigit; (* 5 *) classification[55] := transitionClassDigit; (* 6 *) classification[56] := transitionClassDigit; (* 7 *) classification[57] := transitionClassDigit; (* 8 *) classification[58] := transitionClassDigit; (* 9 *) classification[59] := transitionClassColon; (* : *) classification[60] := transitionClassSingle; (* ; *) classification[61] := transitionClassLess; (* < *) classification[62] := transitionClassEquals; (* = *) classification[63] := transitionClassGreater; (* > *) classification[64] := transitionClassOther; (* ? *) classification[65] := transitionClassSingle; (* @ *) classification[66] := transitionClassAlpha; (* A *) classification[67] := transitionClassAlpha; (* B *) classification[68] := transitionClassAlpha; (* C *) classification[69] := transitionClassAlpha; (* D *) classification[70] := transitionClassAlpha; (* E *) classification[71] := transitionClassAlpha; (* F *) classification[72] := transitionClassAlpha; (* G *) classification[73] := transitionClassAlpha; (* H *) classification[74] := transitionClassAlpha; (* I *) classification[75] := transitionClassAlpha; (* J *) classification[76] := transitionClassAlpha; (* K *) classification[77] := transitionClassAlpha; (* L *) classification[78] := transitionClassAlpha; (* M *) classification[79] := transitionClassAlpha; (* N *) classification[80] := transitionClassAlpha; (* O *) classification[81] := transitionClassAlpha; (* P *) classification[82] := transitionClassAlpha; (* Q *) classification[83] := transitionClassAlpha; (* R *) classification[84] := transitionClassAlpha; (* S *) classification[85] := transitionClassAlpha; (* T *) classification[86] := transitionClassAlpha; (* U *) classification[87] := transitionClassAlpha; (* V *) classification[88] := transitionClassAlpha; (* W *) classification[89] := transitionClassAlpha; (* X *) classification[90] := transitionClassAlpha; (* Y *) classification[91] := transitionClassAlpha; (* Z *) classification[92] := transitionClassSingle; (* [ *) classification[93] := transitionClassOther; (* \ *) classification[94] := transitionClassSingle; (* ] *) classification[95] := transitionClassSingle; (* ^ *) classification[96] := transitionClassUnderscore; (* _ *) classification[97] := transitionClassOther; (* ` *) classification[98] := transitionClassHex; (* a *) classification[99] := transitionClassHex; (* b *) classification[100] := transitionClassHex; (* c *) classification[101] := transitionClassHex; (* d *) classification[102] := transitionClassHex; (* e *) classification[103] := transitionClassHex; (* f *) classification[104] := transitionClassAlpha; (* g *) classification[105] := transitionClassAlpha; (* h *) classification[106] := transitionClassAlpha; (* i *) classification[107] := transitionClassAlpha; (* j *) classification[108] := transitionClassAlpha; (* k *) classification[109] := transitionClassAlpha; (* l *) classification[110] := transitionClassAlpha; (* m *) classification[111] := transitionClassAlpha; (* n *) classification[112] := transitionClassAlpha; (* o *) classification[113] := transitionClassAlpha; (* p *) classification[114] := transitionClassAlpha; (* q *) classification[115] := transitionClassAlpha; (* r *) classification[116] := transitionClassAlpha; (* s *) classification[117] := transitionClassAlpha; (* t *) classification[118] := transitionClassAlpha; (* u *) classification[119] := transitionClassAlpha; (* v *) classification[120] := transitionClassAlpha; (* w *) classification[121] := transitionClassX; (* x *) classification[122] := transitionClassAlpha; (* y *) classification[123] := transitionClassAlpha; (* z *) classification[124] := transitionClassOther; (* { *) classification[125] := transitionClassSingle; (* | *) classification[126] := transitionClassOther; (* } *) classification[127] := transitionClassSingle; (* ~ *) classification[128] := transitionClassInvalid (* DEL *) END initialize_classification; PROCEDURE compare_keyword(Keyword: ARRAY OF CHAR; TokenStart: PLexerBuffer; TokenEnd: PLexerBuffer): BOOLEAN; VAR Result: BOOLEAN; Index: CARDINAL; BEGIN Index := 0; Result := TRUE; WHILE (Index < Length(Keyword)) AND (TokenStart <> TokenEnd) AND Result DO Result := (Keyword[Index] = TokenStart^) OR (Lower(Keyword[Index]) = TokenStart^); INC(TokenStart); INC(Index) END; Result := (Index = Length(Keyword)) AND (TokenStart = TokenEnd) AND Result; RETURN Result END compare_keyword; (* Reached the end of file. *) PROCEDURE transition_action_eof(lexer: PLexer; AToken: PLexerToken); BEGIN AToken^.Kind := lexerKindEof END transition_action_eof; (* Add the character to the token currently read and advance to the next character. *) PROCEDURE transition_action_accumulate(lexer: PLexer; AToken: PLexerToken); BEGIN INC(lexer^.Current) END transition_action_accumulate; (* The current character is not a part of the token. Finish the token already * read. Don't advance to the next character. *) PROCEDURE transition_action_finalize(lexer: PLexer; AToken: PLexerToken); BEGIN IF lexer^.Start^ = ':' THEN AToken^.Kind := lexerKindColon END; IF lexer^.Start^ = '>' THEN AToken^.Kind := lexerKindGreaterThan END; IF lexer^.Start^ = '<' THEN AToken^.Kind := lexerKindLessThan END; IF lexer^.Start^ = '(' THEN AToken^.Kind := lexerKindLeftParen END; IF lexer^.Start^ = '-' THEN AToken^.Kind := lexerKindLeftParen END; IF lexer^.Start^ = '.' THEN AToken^.Kind := lexerKindDot END END transition_action_finalize; (* An action for tokens containing multiple characters. *) PROCEDURE transition_action_composite(lexer: PLexer; AToken: PLexerToken); BEGIN IF lexer^.Start^ = '<' THEN IF lexer^.Current^ = '>' THEN AToken^.Kind := lexerKindNotEqual END; IF lexer^.Current^ = '=' THEN AToken^.Kind := lexerKindLessEqual END END; IF (lexer^.Start^ = '>') AND (lexer^.Current^ = '=') THEN AToken^.Kind := lexerKindGreaterEqual END; IF (lexer^.Start^ = '.') AND (lexer^.Current^ = '.') THEN AToken^.Kind := lexerKindRange END; IF (lexer^.Start^ = ':') AND (lexer^.Current^ = '=') THEN AToken^.Kind := lexerKindAssignment END; INC(lexer^.Current) END transition_action_composite; (* Skip a space. *) PROCEDURE transition_action_skip(lexer: PLexer; AToken: PLexerToken); BEGIN INC(lexer^.Current); INC(lexer^.Start) END transition_action_skip; (* Delimited string action. *) PROCEDURE transition_action_delimited(lexer: PLexer; AToken: PLexerToken); BEGIN IF lexer^.Start^ = '(' THEN AToken^.Kind := lexerKindComment END; IF lexer^.Start^ = '"' THEN AToken^.Kind := lexerKindCharacter END; IF lexer^.Start^ = "'" THEN AToken^.Kind := lexerKindString END; INC(lexer^.Current) END transition_action_delimited; (* Finalize keyword OR identifier. *) PROCEDURE transition_action_key_id(lexer: PLexer; AToken: PLexerToken); BEGIN AToken^.Kind := lexerKindIdentifier; AToken^.identifierKind[1] := lexer^.Current - lexer^.Start; MemCopy(lexer^.Start, ORD(AToken^.identifierKind[1]), ADR(AToken^.identifierKind[2])); IF compare_keyword('PROGRAM', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindProgram END; IF compare_keyword('IMPORT', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindImport END; IF compare_keyword('CONST', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindConst END; IF compare_keyword('VAR', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindVar END; IF compare_keyword('IF', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindIf END; IF compare_keyword('THEN', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindThen END; IF compare_keyword('ELSIF', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindElsif END; IF compare_keyword('ELSE', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindElse END; IF compare_keyword('WHILE', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindWhile END; IF compare_keyword('DO', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindDo END; IF compare_keyword('proc', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindProc END; IF compare_keyword('BEGIN', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindBegin END; IF compare_keyword('END', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindEnd END; IF compare_keyword('TYPE', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindType END; IF compare_keyword('RECORD', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindRecord END; IF compare_keyword('UNION', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindUnion END; IF compare_keyword('NIL', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindNull END; IF compare_keyword('AND', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindAnd END; IF compare_keyword('OR', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindOr END; IF compare_keyword('RETURN', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindReturn END; IF compare_keyword('DEFINITION', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindDefinition END; IF compare_keyword('TO', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindTo END; IF compare_keyword('CASE', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindCase END; IF compare_keyword('OF', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindOf END; IF compare_keyword('FROM', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindFrom END; IF compare_keyword('MODULE', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindModule END; IF compare_keyword('IMPLEMENTATION', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindImplementation END; IF compare_keyword('POINTER', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindPointer END; IF compare_keyword('ARRAY', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindArray END; IF compare_keyword('TRUE', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindBoolean; AToken^.booleanKind := TRUE END; IF compare_keyword('FALSE', lexer^.Start, lexer^.Current) THEN AToken^.Kind := lexerKindBoolean; AToken^.booleanKind := FALSE END END transition_action_key_id; (* Action for tokens containing only one character. The character cannot be * followed by other characters forming a composite token. *) PROCEDURE transition_action_single(lexer: PLexer; AToken: PLexerToken); BEGIN IF lexer^.Current^ = '&' THEN AToken^.Kind := lexerKindAnd END; IF lexer^.Current^ = ';' THEN AToken^.Kind := lexerKindSemicolon END; IF lexer^.Current^ = ',' THEN AToken^.Kind := lexerKindComma END; IF lexer^.Current^ = ',' THEN AToken^.Kind := lexerKindComma END; IF lexer^.Current^ = ')' THEN AToken^.Kind := lexerKindRightParen END; IF lexer^.Current^ = '[' THEN AToken^.Kind := lexerKindLeftSquare END; IF lexer^.Current^ = ']' THEN AToken^.Kind := lexerKindRightSquare END; IF lexer^.Current^ = '^' THEN AToken^.Kind := lexerKindHat END; IF lexer^.Current^ = '=' THEN AToken^.Kind := lexerKindEqual END; IF lexer^.Current^ = '+' THEN AToken^.Kind := lexerKindPlus END; IF lexer^.Current^ = '/' THEN AToken^.Kind := lexerKindDivision END; IF lexer^.Current^ = '%' THEN AToken^.Kind := lexerKindRemainder END; IF lexer^.Current^ = '@' THEN AToken^.Kind := lexerKindAt END; IF lexer^.Current^ = '|' THEN AToken^.Kind := lexerKindPipe END; INC(lexer^.Current) END transition_action_single; (* Handle an integer literal. *) PROCEDURE transition_action_integer(lexer: PLexer; AToken: PLexerToken); BEGIN AToken^.Kind := lexerKindInteger END transition_action_integer; PROCEDURE set_default_transition(CurrentState: TransitionState; DefaultAction: TransitionAction; NextState: TransitionState); VAR DefaultTransition: Transition; BEGIN DefaultTransition.Action := DefaultAction; DefaultTransition.NextState := NextState; transitions[ORD(CurrentState) + 1][ORD(transitionClassInvalid) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassDigit) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassAlpha) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassSpace) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassColon) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassEquals) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassLeftParen) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassRightParen) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassAsterisk) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassUnderscore) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassSingle) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassHex) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassZero) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassX) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassEof) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassDot) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassMinus) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassSingleQuote) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassDoubleQuote) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassGreater) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassLess) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassOther) + 1] := DefaultTransition END set_default_transition; (* * The transition table describes transitions from one state to another, given * a symbol (character class). * * The table has m rows and n columns, where m is the amount of states and n is * the amount of classes. So given the current state and a classified character * the table can be used to look up the next state. * * Each cell is a word long. * - The least significant byte of the word is a row number (beginning with 0). * It specifies the target state. "ff" means that this is an end state and no * transition is possible. * - The next byte is the action that should be performed when transitioning. * For the meaning of actions see labels in the lex_next function, which * handles each action. *) PROCEDURE initialize_transitions(); BEGIN (* Start state. *) transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].Action := NIL; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateDecimal; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].Action := transition_action_skip; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].NextState := transitionStateStart; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].NextState := transitionStateColon; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_single; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].NextState := transitionStateLeftParen; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].Action := transition_action_single; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_single; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].Action := transition_action_single; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateLeadingZero; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].Action := transition_action_eof; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].NextState := transitionStateDot; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].NextState := transitionStateMinus; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].NextState := transitionStateCharacter; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].NextState := transitionStateString; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateGreater; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].NextState := transitionStateLess; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].Action := NIL; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].NextState := transitionStateEnd; (* Colon state. *) set_default_transition(transitionStateColon, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite; transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd; (* Identifier state. *) set_default_transition(transitionStateIdentifier, transition_action_key_id, transitionStateEnd); transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].NextState := transitionStateIdentifier; (* Decimal state. *) set_default_transition(transitionStateDecimal, transition_action_integer, transitionStateEnd); transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateDecimal; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateDecimalSuffix; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].Action := NIL; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateDecimalSuffix; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateDecimal; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].NextState := transitionStateDecimalSuffix; (* Greater state. *) set_default_transition(transitionStateGreater, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite; transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd; (* Minus state. *) set_default_transition(transitionStateMinus, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_composite; transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateEnd; (* Left paren state. *) set_default_transition(transitionStateLeftParen, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateComment; (* Less state. *) set_default_transition(transitionStateLess, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite; transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_composite; transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateEnd; (* Hexadecimal after 0x. *) set_default_transition(transitionStateDot, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].Action := transition_action_composite; transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].NextState := transitionStateEnd; (* Comment. *) set_default_transition(transitionStateComment, transition_action_accumulate, transitionStateComment); transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateClosingComment; transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].Action := NIL; transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd; (* Closing comment. *) set_default_transition(transitionStateClosingComment, transition_action_accumulate, transitionStateComment); transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].Action := NIL; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].Action := transition_action_delimited; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateClosingComment; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].Action := NIL; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd; (* Character. *) set_default_transition(transitionStateCharacter, transition_action_accumulate, transitionStateCharacter); transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].Action := NIL; transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].Action := NIL; transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].Action := transition_action_delimited; transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].NextState := transitionStateEnd; (* String. *) set_default_transition(transitionStateString, transition_action_accumulate, transitionStateString); transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].Action := NIL; transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].Action := NIL; transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].Action := transition_action_delimited; transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].NextState := transitionStateEnd; (* Leading zero. *) set_default_transition(transitionStateLeadingZero, transition_action_integer, transitionStateEnd); transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].Action := NIL; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].Action := NIL; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].Action := NIL; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].Action := NIL; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].Action := NIL; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].Action := NIL; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].NextState := transitionStateEnd; (* Digit with a character suffix. *) set_default_transition(transitionStateDecimalSuffix, transition_action_integer, transitionStateEnd); transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].Action := NIL; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].Action := NIL; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].Action := NIL; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].Action := NIL; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].Action := NIL; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].NextState := transitionStateEnd END initialize_transitions; PROCEDURE lexer_initialize(lexer: PLexer; Input: File); BEGIN lexer^.Input := Input; lexer^.Length := 0; ALLOCATE(lexer^.Buffer, CHUNK_SIZE); MemZero(lexer^.Buffer, CHUNK_SIZE); lexer^.Size := CHUNK_SIZE END lexer_initialize; PROCEDURE lexer_current(lexer: PLexer): LexerToken; VAR CurrentClass: TransitionClass; CurrentState: TransitionState; CurrentTransition: Transition; Result: LexerToken; BEGIN lexer^.Current := lexer^.Start; CurrentState := transitionStateStart; WHILE CurrentState <> transitionStateEnd DO CurrentClass := classification[ORD(lexer^.Current^) + 1]; CurrentTransition := transitions[ORD(CurrentState) + 1][ORD(CurrentClass) + 1]; IF CurrentTransition.Action <> NIL THEN CurrentTransition.Action(lexer, ADR(Result)) END; CurrentState := CurrentTransition.NextState END; RETURN Result END lexer_current; PROCEDURE lexer_lex(lexer: PLexer): LexerToken; VAR Result: LexerToken; BEGIN IF lexer^.Length = 0 THEN lexer^.Length := ReadNBytes(lexer^.Input, CHUNK_SIZE, lexer^.Buffer); lexer^.Current := lexer^.Buffer END; lexer^.Start := lexer^.Current; Result := lexer_current(lexer); RETURN Result END lexer_lex; PROCEDURE lexer_destroy(lexer: PLexer); BEGIN DEALLOCATE(lexer^.Buffer, lexer^.Size) END lexer_destroy; BEGIN initialize_classification(); initialize_transitions() END Lexer.