IMPLEMENTATION MODULE Lexer; FROM FIO IMPORT ReadNBytes, StdErr; FROM SYSTEM IMPORT ADR, TSIZE; FROM DynamicStrings IMPORT String, InitStringCharStar, KillString; FROM StringConvert IMPORT StringToInteger; FROM Storage IMPORT DEALLOCATE, ALLOCATE; FROM Strings IMPORT Length; FROM MemUtils IMPORT MemCopy, MemZero; FROM StrCase IMPORT Lower; CONST CHUNK_SIZE = 65536; TYPE (* * Classification table assigns each possible character to a group (class). All * characters of the same group a handled equivalently. * * Classification: *) TransitionClass = ( transitionClassInvalid, transitionClassDigit, transitionClassAlpha, transitionClassSpace, transitionClassColon, transitionClassEquals, transitionClassLeftParen, transitionClassRightParen, transitionClassAsterisk, transitionClassUnderscore, transitionClassSingle, transitionClassHex, transitionClassZero, transitionClassX, transitionClassEof, transitionClassDot, transitionClassMinus, transitionClassSingleQuote, transitionClassDoubleQuote, transitionClassGreater, transitionClassLess, transitionClassOther ); TransitionState = ( transitionStateStart, transitionStateColon, transitionStateIdentifier, transitionStateDecimal, transitionStateGreater, transitionStateMinus, transitionStateLeftParen, transitionStateLess, transitionStateDot, transitionStateComment, transitionStateClosingComment, transitionStateCharacter, transitionStateString, transitionStateLeadingZero, transitionStateDecimalSuffix, transitionStateEnd ); TransitionAction = PROCEDURE(PLexer, PLexerToken); Transition = RECORD Action: TransitionAction; NextState: TransitionState END; TransitionClasses = ARRAY[1..22] OF Transition; VAR classification: ARRAY[1..128] OF TransitionClass; transitions: ARRAY[1..16] OF TransitionClasses; PROCEDURE initialize_classification(); VAR i: CARDINAL; BEGIN classification[1] := transitionClassEof; (* NUL *) classification[2] := transitionClassInvalid; (* SOH *) classification[3] := transitionClassInvalid; (* STX *) classification[4] := transitionClassInvalid; (* ETX *) classification[5] := transitionClassInvalid; (* EOT *) classification[6] := transitionClassInvalid; (* EMQ *) classification[7] := transitionClassInvalid; (* ACK *) classification[8] := transitionClassInvalid; (* BEL *) classification[9] := transitionClassInvalid; (* BS *) classification[10] := transitionClassSpace; (* HT *) classification[11] := transitionClassSpace; (* LF *) classification[12] := transitionClassInvalid; (* VT *) classification[13] := transitionClassInvalid; (* FF *) classification[14] := transitionClassSpace; (* CR *) classification[15] := transitionClassInvalid; (* SO *) classification[16] := transitionClassInvalid; (* SI *) classification[17] := transitionClassInvalid; (* DLE *) classification[18] := transitionClassInvalid; (* DC1 *) classification[19] := transitionClassInvalid; (* DC2 *) classification[20] := transitionClassInvalid; (* DC3 *) classification[21] := transitionClassInvalid; (* DC4 *) classification[22] := transitionClassInvalid; (* NAK *) classification[23] := transitionClassInvalid; (* SYN *) classification[24] := transitionClassInvalid; (* ETB *) classification[25] := transitionClassInvalid; (* CAN *) classification[26] := transitionClassInvalid; (* EM *) classification[27] := transitionClassInvalid; (* SUB *) classification[28] := transitionClassInvalid; (* ESC *) classification[29] := transitionClassInvalid; (* FS *) classification[30] := transitionClassInvalid; (* GS *) classification[31] := transitionClassInvalid; (* RS *) classification[32] := transitionClassInvalid; (* US *) classification[33] := transitionClassSpace; (* Space *) classification[34] := transitionClassSingle; (* ! *) classification[35] := transitionClassDoubleQuote; (* " *) classification[36] := transitionClassOther; (* # *) classification[37] := transitionClassOther; (* $ *) classification[38] := transitionClassSingle; (* % *) classification[39] := transitionClassSingle; (* AND *) classification[40] := transitionClassSingleQuote; (* ' *) classification[41] := transitionClassLeftParen; (* ( *) classification[42] := transitionClassRightParen; (* ) *) classification[43] := transitionClassAsterisk; (* * *) classification[44] := transitionClassSingle; (* + *) classification[45] := transitionClassSingle; (* , *) classification[46] := transitionClassMinus; (* - *) classification[47] := transitionClassDot; (* . *) classification[48] := transitionClassSingle; (* / *) classification[49] := transitionClassZero; (* 0 *) classification[50] := transitionClassDigit; (* 1 *) classification[51] := transitionClassDigit; (* 2 *) classification[52] := transitionClassDigit; (* 3 *) classification[53] := transitionClassDigit; (* 4 *) classification[54] := transitionClassDigit; (* 5 *) classification[55] := transitionClassDigit; (* 6 *) classification[56] := transitionClassDigit; (* 7 *) classification[57] := transitionClassDigit; (* 8 *) classification[58] := transitionClassDigit; (* 9 *) classification[59] := transitionClassColon; (* : *) classification[60] := transitionClassSingle; (* ; *) classification[61] := transitionClassLess; (* < *) classification[62] := transitionClassEquals; (* = *) classification[63] := transitionClassGreater; (* > *) classification[64] := transitionClassOther; (* ? *) classification[65] := transitionClassSingle; (* @ *) classification[66] := transitionClassAlpha; (* A *) classification[67] := transitionClassAlpha; (* B *) classification[68] := transitionClassAlpha; (* C *) classification[69] := transitionClassAlpha; (* D *) classification[70] := transitionClassAlpha; (* E *) classification[71] := transitionClassAlpha; (* F *) classification[72] := transitionClassAlpha; (* G *) classification[73] := transitionClassAlpha; (* H *) classification[74] := transitionClassAlpha; (* I *) classification[75] := transitionClassAlpha; (* J *) classification[76] := transitionClassAlpha; (* K *) classification[77] := transitionClassAlpha; (* L *) classification[78] := transitionClassAlpha; (* M *) classification[79] := transitionClassAlpha; (* N *) classification[80] := transitionClassAlpha; (* O *) classification[81] := transitionClassAlpha; (* P *) classification[82] := transitionClassAlpha; (* Q *) classification[83] := transitionClassAlpha; (* R *) classification[84] := transitionClassAlpha; (* S *) classification[85] := transitionClassAlpha; (* T *) classification[86] := transitionClassAlpha; (* U *) classification[87] := transitionClassAlpha; (* V *) classification[88] := transitionClassAlpha; (* W *) classification[89] := transitionClassAlpha; (* X *) classification[90] := transitionClassAlpha; (* Y *) classification[91] := transitionClassAlpha; (* Z *) classification[92] := transitionClassSingle; (* [ *) classification[93] := transitionClassOther; (* \ *) classification[94] := transitionClassSingle; (* ] *) classification[95] := transitionClassSingle; (* ^ *) classification[96] := transitionClassUnderscore; (* _ *) classification[97] := transitionClassOther; (* ` *) classification[98] := transitionClassHex; (* a *) classification[99] := transitionClassHex; (* b *) classification[100] := transitionClassHex; (* c *) classification[101] := transitionClassHex; (* d *) classification[102] := transitionClassHex; (* e *) classification[103] := transitionClassHex; (* f *) classification[104] := transitionClassAlpha; (* g *) classification[105] := transitionClassAlpha; (* h *) classification[106] := transitionClassAlpha; (* i *) classification[107] := transitionClassAlpha; (* j *) classification[108] := transitionClassAlpha; (* k *) classification[109] := transitionClassAlpha; (* l *) classification[110] := transitionClassAlpha; (* m *) classification[111] := transitionClassAlpha; (* n *) classification[112] := transitionClassAlpha; (* o *) classification[113] := transitionClassAlpha; (* p *) classification[114] := transitionClassAlpha; (* q *) classification[115] := transitionClassAlpha; (* r *) classification[116] := transitionClassAlpha; (* s *) classification[117] := transitionClassAlpha; (* t *) classification[118] := transitionClassAlpha; (* u *) classification[119] := transitionClassAlpha; (* v *) classification[120] := transitionClassAlpha; (* w *) classification[121] := transitionClassX; (* x *) classification[122] := transitionClassAlpha; (* y *) classification[123] := transitionClassAlpha; (* z *) classification[124] := transitionClassOther; (* { *) classification[125] := transitionClassSingle; (* | *) classification[126] := transitionClassOther; (* } *) classification[127] := transitionClassSingle; (* ~ *) classification[128] := transitionClassInvalid; (* DEL *) i := 129; WHILE i <= 256 DO classification[i] := transitionClassOther; i := i + 1 END END initialize_classification; PROCEDURE compare_keyword(Keyword: ARRAY OF CHAR; TokenStart: PLexerBuffer; TokenEnd: PLexerBuffer): BOOLEAN; VAR result: BOOLEAN; index: CARDINAL; BEGIN index := 0; result := TRUE; WHILE (index < Length(Keyword)) AND (TokenStart <> TokenEnd) AND result DO result := (Keyword[index] = TokenStart^) OR (Lower(Keyword[index]) = TokenStart^); INC(TokenStart); INC(index) END; result := (index = Length(Keyword)) AND (TokenStart = TokenEnd) AND result; RETURN result END compare_keyword; (* Reached the end of file. *) PROCEDURE transition_action_eof(lexer: PLexer; token: PLexerToken); BEGIN token^.kind := lexerKindEof END transition_action_eof; (* Add the character to the token currently read and advance to the next character. *) PROCEDURE transition_action_accumulate(lexer: PLexer; token: PLexerToken); BEGIN INC(lexer^.Current) END transition_action_accumulate; (* The current character is not a part of the token. Finish the token already * read. Don't advance to the next character. *) PROCEDURE transition_action_finalize(lexer: PLexer; token: PLexerToken); BEGIN IF lexer^.Start^ = ':' THEN token^.kind := lexerKindColon END; IF lexer^.Start^ = '>' THEN token^.kind := lexerKindGreaterThan END; IF lexer^.Start^ = '<' THEN token^.kind := lexerKindLessThan END; IF lexer^.Start^ = '(' THEN token^.kind := lexerKindLeftParen END; IF lexer^.Start^ = '-' THEN token^.kind := lexerKindLeftParen END; IF lexer^.Start^ = '.' THEN token^.kind := lexerKindDot END END transition_action_finalize; (* An action for tokens containing multiple characters. *) PROCEDURE transition_action_composite(lexer: PLexer; token: PLexerToken); BEGIN IF lexer^.Start^ = '<' THEN IF lexer^.Current^ = '>' THEN token^.kind := lexerKindNotEqual END; IF lexer^.Current^ = '=' THEN token^.kind := lexerKindLessEqual END END; IF (lexer^.Start^ = '>') AND (lexer^.Current^ = '=') THEN token^.kind := lexerKindGreaterEqual END; IF (lexer^.Start^ = '.') AND (lexer^.Current^ = '.') THEN token^.kind := lexerKindRange END; IF (lexer^.Start^ = ':') AND (lexer^.Current^ = '=') THEN token^.kind := lexerKindAssignment END; IF (lexer^.Start^ = '-') AND (lexer^.Current^ = '>') THEN token^.kind := lexerKindArrow END; INC(lexer^.Current) END transition_action_composite; (* Skip a space. *) PROCEDURE transition_action_skip(lexer: PLexer; token: PLexerToken); BEGIN INC(lexer^.Current); INC(lexer^.Start) END transition_action_skip; (* Delimited string action. *) PROCEDURE transition_action_delimited(lexer: PLexer; token: PLexerToken); BEGIN IF lexer^.Start^ = '(' THEN token^.kind := lexerKindComment END; IF lexer^.Start^ = '"' THEN token^.kind := lexerKindCharacter END; IF lexer^.Start^ = "'" THEN token^.kind := lexerKindString END; INC(lexer^.Current) END transition_action_delimited; (* Finalize keyword OR identifier. *) PROCEDURE transition_action_key_id(lexer: PLexer; token: PLexerToken); BEGIN token^.kind := lexerKindIdentifier; token^.identifierKind[1] := lexer^.Current - lexer^.Start; MemCopy(lexer^.Start, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2])); IF compare_keyword('PROGRAM', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindProgram END; IF compare_keyword('IMPORT', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindImport END; IF compare_keyword('CONST', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindConst END; IF compare_keyword('VAR', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindVar END; IF compare_keyword('IF', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindIf END; IF compare_keyword('THEN', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindThen END; IF compare_keyword('ELSIF', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindElsif END; IF compare_keyword('ELSE', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindElse END; IF compare_keyword('WHILE', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindWhile END; IF compare_keyword('DO', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindDo END; IF compare_keyword('proc', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindProc END; IF compare_keyword('BEGIN', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindBegin END; IF compare_keyword('END', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindEnd END; IF compare_keyword('TYPE', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindType END; IF compare_keyword('RECORD', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindRecord END; IF compare_keyword('UNION', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindUnion END; IF compare_keyword('NIL', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindNull END; IF compare_keyword('AND', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindAnd END; IF compare_keyword('OR', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindOr END; IF compare_keyword('RETURN', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindReturn END; IF compare_keyword('DEFINITION', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindDefinition END; IF compare_keyword('TO', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindTo END; IF compare_keyword('CASE', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindCase END; IF compare_keyword('OF', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindOf END; IF compare_keyword('FROM', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindFrom END; IF compare_keyword('MODULE', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindModule END; IF compare_keyword('IMPLEMENTATION', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindImplementation END; IF compare_keyword('POINTER', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindPointer END; IF compare_keyword('ARRAY', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindArray END; IF compare_keyword('TRUE', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindBoolean; token^.booleanKind := TRUE END; IF compare_keyword('FALSE', lexer^.Start, lexer^.Current) THEN token^.kind := lexerKindBoolean; token^.booleanKind := FALSE END END transition_action_key_id; (* Action for tokens containing only one character. The character cannot be * followed by other characters forming a composite token. *) PROCEDURE transition_action_single(lexer: PLexer; token: PLexerToken); BEGIN IF lexer^.Current^ = '&' THEN token^.kind := lexerKindAnd END; IF lexer^.Current^ = ';' THEN token^.kind := lexerKindSemicolon END; IF lexer^.Current^ = ',' THEN token^.kind := lexerKindComma END; IF lexer^.Current^ = ',' THEN token^.kind := lexerKindComma END; IF lexer^.Current^ = ')' THEN token^.kind := lexerKindRightParen END; IF lexer^.Current^ = '[' THEN token^.kind := lexerKindLeftSquare END; IF lexer^.Current^ = ']' THEN token^.kind := lexerKindRightSquare END; IF lexer^.Current^ = '^' THEN token^.kind := lexerKindHat END; IF lexer^.Current^ = '=' THEN token^.kind := lexerKindEqual END; IF lexer^.Current^ = '+' THEN token^.kind := lexerKindPlus END; IF lexer^.Current^ = '/' THEN token^.kind := lexerKindDivision END; IF lexer^.Current^ = '%' THEN token^.kind := lexerKindRemainder END; IF lexer^.Current^ = '@' THEN token^.kind := lexerKindAt END; IF lexer^.Current^ = '|' THEN token^.kind := lexerKindPipe END; INC(lexer^.Current) END transition_action_single; (* Handle an integer literal. *) PROCEDURE transition_action_integer(lexer: PLexer; token: PLexerToken); VAR buffer: String; integer_length: CARDINAL; found: BOOLEAN; BEGIN token^.kind := lexerKindInteger; integer_length := lexer^.Current - lexer^.Start; MemZero(ADR(token^.identifierKind), TSIZE(Identifier)); MemCopy(lexer^.Start, integer_length, ADR(token^.identifierKind[1])); buffer := InitStringCharStar(ADR(token^.identifierKind[1])); token^.integerKind := StringToInteger(buffer, 10, found); buffer := KillString(buffer) END transition_action_integer; PROCEDURE set_default_transition(CurrentState: TransitionState; DefaultAction: TransitionAction; NextState: TransitionState); VAR DefaultTransition: Transition; BEGIN DefaultTransition.Action := DefaultAction; DefaultTransition.NextState := NextState; transitions[ORD(CurrentState) + 1][ORD(transitionClassInvalid) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassDigit) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassAlpha) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassSpace) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassColon) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassEquals) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassLeftParen) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassRightParen) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassAsterisk) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassUnderscore) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassSingle) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassHex) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassZero) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassX) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassEof) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassDot) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassMinus) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassSingleQuote) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassDoubleQuote) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassGreater) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassLess) + 1] := DefaultTransition; transitions[ORD(CurrentState) + 1][ORD(transitionClassOther) + 1] := DefaultTransition END set_default_transition; (* * The transition table describes transitions from one state to another, given * a symbol (character class). * * The table has m rows and n columns, where m is the amount of states and n is * the amount of classes. So given the current state and a classified character * the table can be used to look up the next state. * * Each cell is a word long. * - The least significant byte of the word is a row number (beginning with 0). * It specifies the target state. "ff" means that this is an end state and no * transition is possible. * - The next byte is the action that should be performed when transitioning. * For the meaning of actions see labels in the lex_next function, which * handles each action. *) PROCEDURE initialize_transitions(); BEGIN (* Start state. *) transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].Action := NIL; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateDecimal; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].Action := transition_action_skip; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].NextState := transitionStateStart; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].NextState := transitionStateColon; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_single; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].NextState := transitionStateLeftParen; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].Action := transition_action_single; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_single; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].Action := transition_action_single; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateLeadingZero; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].Action := transition_action_eof; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].NextState := transitionStateDot; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].NextState := transitionStateMinus; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].NextState := transitionStateCharacter; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].NextState := transitionStateString; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateGreater; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].NextState := transitionStateLess; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].Action := NIL; transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].NextState := transitionStateEnd; (* Colon state. *) set_default_transition(transitionStateColon, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite; transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd; (* Identifier state. *) set_default_transition(transitionStateIdentifier, transition_action_key_id, transitionStateEnd); transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateIdentifier; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].NextState := transitionStateIdentifier; (* Decimal state. *) set_default_transition(transitionStateDecimal, transition_action_integer, transitionStateEnd); transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateDecimal; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateDecimalSuffix; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].Action := NIL; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateDecimalSuffix; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateDecimal; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].NextState := transitionStateDecimalSuffix; (* Greater state. *) set_default_transition(transitionStateGreater, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite; transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd; (* Minus state. *) set_default_transition(transitionStateMinus, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_composite; transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateEnd; (* Left paren state. *) set_default_transition(transitionStateLeftParen, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateComment; (* Less state. *) set_default_transition(transitionStateLess, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite; transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_composite; transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateEnd; (* Hexadecimal after 0x. *) set_default_transition(transitionStateDot, transition_action_finalize, transitionStateEnd); transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].Action := transition_action_composite; transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].NextState := transitionStateEnd; (* Comment. *) set_default_transition(transitionStateComment, transition_action_accumulate, transitionStateComment); transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateClosingComment; transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].Action := NIL; transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd; (* Closing comment. *) set_default_transition(transitionStateClosingComment, transition_action_accumulate, transitionStateComment); transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].Action := NIL; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].Action := transition_action_delimited; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateClosingComment; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].Action := NIL; transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd; (* Character. *) set_default_transition(transitionStateCharacter, transition_action_accumulate, transitionStateCharacter); transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].Action := NIL; transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].Action := NIL; transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].Action := transition_action_delimited; transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].NextState := transitionStateEnd; (* String. *) set_default_transition(transitionStateString, transition_action_accumulate, transitionStateString); transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].Action := NIL; transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].Action := NIL; transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].Action := transition_action_delimited; transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].NextState := transitionStateEnd; (* Leading zero. *) set_default_transition(transitionStateLeadingZero, transition_action_integer, transitionStateEnd); transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].Action := NIL; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].Action := NIL; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].Action := NIL; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].Action := NIL; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].Action := NIL; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].Action := NIL; transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].NextState := transitionStateEnd; (* Digit with a character suffix. *) set_default_transition(transitionStateDecimalSuffix, transition_action_integer, transitionStateEnd); transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].Action := NIL; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].Action := NIL; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].Action := NIL; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].Action := NIL; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateEnd; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].Action := NIL; transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].NextState := transitionStateEnd END initialize_transitions; PROCEDURE lexer_initialize(lexer: PLexer; Input: File); BEGIN lexer^.Input := Input; lexer^.Length := 0; ALLOCATE(lexer^.Buffer, CHUNK_SIZE); MemZero(lexer^.Buffer, CHUNK_SIZE); lexer^.Size := CHUNK_SIZE END lexer_initialize; PROCEDURE lexer_current(lexer: PLexer): LexerToken; VAR CurrentClass: TransitionClass; CurrentState: TransitionState; CurrentTransition: Transition; result: LexerToken; BEGIN lexer^.Current := lexer^.Start; CurrentState := transitionStateStart; WHILE CurrentState <> transitionStateEnd DO CurrentClass := classification[ORD(lexer^.Current^) + 1]; CurrentTransition := transitions[ORD(CurrentState) + 1][ORD(CurrentClass) + 1]; IF CurrentTransition.Action <> NIL THEN CurrentTransition.Action(lexer, ADR(result)) END; CurrentState := CurrentTransition.NextState END; RETURN result END lexer_current; PROCEDURE lexer_lex(lexer: PLexer): LexerToken; VAR result: LexerToken; BEGIN IF lexer^.Length = 0 THEN lexer^.Length := ReadNBytes(lexer^.Input, CHUNK_SIZE, lexer^.Buffer); lexer^.Current := lexer^.Buffer END; lexer^.Start := lexer^.Current; result := lexer_current(lexer); RETURN result END lexer_lex; PROCEDURE lexer_destroy(lexer: PLexer); BEGIN DEALLOCATE(lexer^.Buffer, lexer^.Size) END lexer_destroy; BEGIN initialize_classification(); initialize_transitions() END Lexer.