(* This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. *) module; import cstdio, cstring, common; const CHUNK_SIZE := 85536; type (* * Classification table assigns each possible character to a group (class). All * characters of the same group a handled equivalently. * * Classification: *) TransitionClass = ( invalid, digit, alpha, space, colon, equals, left_paren, right_paren, asterisk, underscore, single, hex, zero, x, eof, dot, minus, single_quote, double_quote, greater, less, other ); TransitionState = ( start, colon, identifier, decimal, greater, minus, left_paren, less, dot, comment, closing_comment, character, string, leading_zero, decimal_suffix, finish ); LexerToken = record kind: LexerKind; value: union booleanKind: Bool; identifierKind: Identifier; integerKind: Int; stringKind: String end; start_location: TextLocation; end_location: TextLocation end; TransitionAction = proc(^Lexer, ^LexerToken); Transition = record action: TransitionAction; next_state: TransitionState end; TransitionClasses = [22]Transition; BufferPosition* = record iterator: ^Char; location: TextLocation end; Lexer* = record input: ^FILE; buffer: ^Char; size: Word; length: Word; start: BufferPosition; current: BufferPosition end; LexerKind* = ( eof, identifier, _if, _then, _else, _elsif, _while, _do, _proc, _begin, _end, _xor, _const, _var, _case, _of, _type, _record, _union, pipe, to, boolean, null, and, _or, tilde, _return, _defer, range, left_paren, right_paren, lefts_quare, right_square, greater_equal, less_equal, greater_than, less_than, not_equal, equal, semicolon, dot, comma, plus, minus, asterisk, division, remainder, assignment, colon, hat, at, comment, integer, word, character, string, from, pointer, array, arrow, _program, _module, _import ); var classification: [128]TransitionClass; transitions: [16]TransitionClasses; proc initialize_classification(); var i: Word; begin classification[1] := TransitionClass.eof; (* NUL *) classification[2] := TransitionClass.invalid; (* SOH *) classification[3] := TransitionClass.invalid; (* STX *) classification[4] := TransitionClass.invalid; (* ETX *) classification[5] := TransitionClass.invalid; (* EOT *) classification[6] := TransitionClass.invalid; (* EMQ *) classification[7] := TransitionClass.invalid; (* ACK *) classification[8] := TransitionClass.invalid; (* BEL *) classification[9] := TransitionClass.invalid; (* BS *) classification[10] := TransitionClass.space; (* HT *) classification[11] := TransitionClass.space; (* LF *) classification[12] := TransitionClass.invalid; (* VT *) classification[13] := TransitionClass.invalid; (* FF *) classification[14] := TransitionClass.space; (* CR *) classification[15] := TransitionClass.invalid; (* SO *) classification[16] := TransitionClass.invalid; (* SI *) classification[17] := TransitionClass.invalid; (* DLE *) classification[18] := TransitionClass.invalid; (* DC1 *) classification[19] := TransitionClass.invalid; (* DC2 *) classification[20] := TransitionClass.invalid; (* DC3 *) classification[21] := TransitionClass.invalid; (* DC4 *) classification[22] := TransitionClass.invalid; (* NAK *) classification[23] := TransitionClass.invalid; (* SYN *) classification[24] := TransitionClass.invalid; (* ETB *) classification[25] := TransitionClass.invalid; (* CAN *) classification[26] := TransitionClass.invalid; (* EM *) classification[27] := TransitionClass.invalid; (* SUB *) classification[28] := TransitionClass.invalid; (* ESC *) classification[29] := TransitionClass.invalid; (* FS *) classification[30] := TransitionClass.invalid; (* GS *) classification[31] := TransitionClass.invalid; (* RS *) classification[32] := TransitionClass.invalid; (* US *) classification[33] := TransitionClass.space; (* Space *) classification[34] := TransitionClass.single; (* ! *) classification[35] := TransitionClass.double_quote; (* " *) classification[36] := TransitionClass.other; (* # *) classification[37] := TransitionClass.other; (* $ *) classification[38] := TransitionClass.single; (* % *) classification[39] := TransitionClass.single; (* & *) classification[40] := TransitionClass.single_quote; (* ' *) classification[41] := TransitionClass.left_paren; (* ( *) classification[42] := TransitionClass.right_paren; (* ) *) classification[43] := TransitionClass.asterisk; (* * *) classification[44] := TransitionClass.single; (* + *) classification[45] := TransitionClass.single; (* , *) classification[46] := TransitionClass.minus; (* - *) classification[47] := TransitionClass.dot; (* . *) classification[48] := TransitionClass.single; (* / *) classification[49] := TransitionClass.zero; (* 0 *) classification[50] := TransitionClass.digit; (* 1 *) classification[51] := TransitionClass.digit; (* 2 *) classification[52] := TransitionClass.digit; (* 3 *) classification[53] := TransitionClass.digit; (* 4 *) classification[54] := TransitionClass.digit; (* 5 *) classification[55] := TransitionClass.digit; (* 6 *) classification[56] := TransitionClass.digit; (* 7 *) classification[57] := TransitionClass.digit; (* 8 *) classification[58] := TransitionClass.digit; (* 9 *) classification[59] := TransitionClass.colon; (* : *) classification[60] := TransitionClass.single; (* ; *) classification[61] := TransitionClass.less; (* < *) classification[62] := TransitionClass.equals; (* = *) classification[63] := TransitionClass.greater; (* > *) classification[64] := TransitionClass.other; (* ? *) classification[65] := TransitionClass.single; (* @ *) classification[66] := TransitionClass.alpha; (* A *) classification[67] := TransitionClass.alpha; (* B *) classification[68] := TransitionClass.alpha; (* C *) classification[69] := TransitionClass.alpha; (* D *) classification[70] := TransitionClass.alpha; (* E *) classification[71] := TransitionClass.alpha; (* F *) classification[72] := TransitionClass.alpha; (* G *) classification[73] := TransitionClass.alpha; (* H *) classification[74] := TransitionClass.alpha; (* I *) classification[75] := TransitionClass.alpha; (* J *) classification[76] := TransitionClass.alpha; (* K *) classification[77] := TransitionClass.alpha; (* L *) classification[78] := TransitionClass.alpha; (* M *) classification[79] := TransitionClass.alpha; (* N *) classification[80] := TransitionClass.alpha; (* O *) classification[81] := TransitionClass.alpha; (* P *) classification[82] := TransitionClass.alpha; (* Q *) classification[83] := TransitionClass.alpha; (* R *) classification[84] := TransitionClass.alpha; (* S *) classification[85] := TransitionClass.alpha; (* T *) classification[86] := TransitionClass.alpha; (* U *) classification[87] := TransitionClass.alpha; (* V *) classification[88] := TransitionClass.alpha; (* W *) classification[89] := TransitionClass.alpha; (* X *) classification[90] := TransitionClass.alpha; (* Y *) classification[91] := TransitionClass.alpha; (* Z *) classification[92] := TransitionClass.single; (* [ *) classification[93] := TransitionClass.other; (* \ *) classification[94] := TransitionClass.single; (* ] *) classification[95] := TransitionClass.single; (* ^ *) classification[96] := TransitionClass.underscore; (* _ *) classification[97] := TransitionClass.other; (* ` *) classification[98] := TransitionClass.hex; (* a *) classification[99] := TransitionClass.hex; (* b *) classification[100] := TransitionClass.hex; (* c *) classification[101] := TransitionClass.hex; (* d *) classification[102] := TransitionClass.hex; (* e *) classification[103] := TransitionClass.hex; (* f *) classification[104] := TransitionClass.alpha; (* g *) classification[105] := TransitionClass.alpha; (* h *) classification[106] := TransitionClass.alpha; (* i *) classification[107] := TransitionClass.alpha; (* j *) classification[108] := TransitionClass.alpha; (* k *) classification[109] := TransitionClass.alpha; (* l *) classification[110] := TransitionClass.alpha; (* m *) classification[111] := TransitionClass.alpha; (* n *) classification[112] := TransitionClass.alpha; (* o *) classification[113] := TransitionClass.alpha; (* p *) classification[114] := TransitionClass.alpha; (* q *) classification[115] := TransitionClass.alpha; (* r *) classification[116] := TransitionClass.alpha; (* s *) classification[117] := TransitionClass.alpha; (* t *) classification[118] := TransitionClass.alpha; (* u *) classification[119] := TransitionClass.alpha; (* v *) classification[120] := TransitionClass.alpha; (* w *) classification[121] := TransitionClass.x; (* x *) classification[122] := TransitionClass.alpha; (* y *) classification[123] := TransitionClass.alpha; (* z *) classification[124] := TransitionClass.other; (* { *) classification[125] := TransitionClass.single; (* | *) classification[126] := TransitionClass.other; (* } *) classification[127] := TransitionClass.single; (* ~ *) classification[128] := TransitionClass.invalid; (* DEL *) i := 129u; while i <= 256u do classification[i] := TransitionClass.other; i := i + 1u end end; proc compare_keyword(keyword: String, token_start: BufferPosition, token_end: ^Char) -> Bool; var result: Bool; index: Word; keyword_length: Word; continue: Bool; begin index := 0u; result := true; keyword_length := Length(keyword); continue := (index < keyword_length) & (token_start.iterator <> token_end); while continue & result do result := (keyword[index] = token_start.iterator^) or (Lower(keyword[index]) = token_start.iterator^); token_start.iterator := token_start.iterator + 1; index := index + 1u; continue := (index < keyword_length) & (token_start.iterator <> token_end) end; result := result & (index = Length(keyword)); return result & (token_start.iterator = token_end) end; (* Reached the end of file. *) proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken); begin token^.kind := LexerKind.eof end; proc increment(position: ^BufferPosition); begin position^.iterator := position^.iterator + 1 end; (* Add the character to the token currently read and advance to the next character. *) proc transition_action_accumulate(lexer: ^Lexer, token: ^LexerToken); begin increment(@lexer^.current) end; (* The current character is not a part of the token. Finish the token already * read. Don't advance to the next character. *) proc transition_action_finalize(lexer: ^Lexer, token: ^LexerToken); begin if lexer^.start.iterator^ = ':' then token^.kind := LexerKind.colon end; if lexer^.start.iterator^ = '>' then token^.kind := LexerKind.greater_than end; if lexer^.start.iterator^ = '<' then token^.kind := LexerKind.less_than end; if lexer^.start.iterator^ = '(' then token^.kind := LexerKind.left_paren end; if lexer^.start.iterator^ = '-' then token^.kind := LexerKind.minus end; if lexer^.start.iterator^ = '.' then token^.kind := LexerKind.dot end end; (* An action for tokens containing multiple characters. *) proc transition_action_composite(lexer: ^Lexer, token: ^LexerToken); begin if lexer^.start.iterator^ = '<' then if lexer^.current.iterator^ = '>' then token^.kind := LexerKind.not_equal end; if lexer^.current.iterator^ = '=' then token^.kind := LexerKind.less_equal end end; if (lexer^.start.iterator^ = '>') & (lexer^.current.iterator^ = '=') then token^.kind := LexerKind.greater_equal end; if (lexer^.start.iterator^ = '.') & (lexer^.current.iterator^ = '.') then token^.kind := LexerKind.range end; if (lexer^.start.iterator^ = ':') & (lexer^.current.iterator^ = '=') then token^.kind := LexerKind.assignment end; if (lexer^.start.iterator^ = '-') & (lexer^.current.iterator^ = '>') then token^.kind := LexerKind.arrow end; increment(@lexer^.current) end; (* Skip a space. *) proc transition_action_skip(lexer: ^Lexer, token: ^LexerToken); begin increment(@lexer^.start); if lexer^.start.iterator^ = '\n' then lexer^.start.location.line := lexer^.start.location.line + 1u; lexer^.start.location.column := 1u end; lexer^.current := lexer^.start end; (* Delimited string action. *) proc transition_action_delimited(lexer: ^Lexer, token: ^LexerToken); var text_length: Word; begin if lexer^.start.iterator^ = '(' then token^.kind := LexerKind.comment end; if lexer^.start.iterator^ = '"' then text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word); token^.stringKind := String(malloc(text_length), text_length); memcpy(@token^.stringKind.ptr, lexer^.start.iterator, text_length); token^.kind := LexerKind.character end; if lexer^.start.iterator^ = "'" then text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word); token^.stringKind := String(malloc(text_length), text_length); memcpy(@token^.stringKind.ptr, lexer^.start.iterator, text_length); token^.kind := LexerKind.string end; increment(@lexer^.current) end; (* Finalize keyword or identifier. *) proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken); begin token^.kind := LexerKind.identifier; token^.identifierKind[1] := cast(lexer^.current.iterator - lexer^.start.iterator: Char); memcpy(@token^.identifierKind[2], lexer^.start.iterator, ORD(token^.identifierKind[1])); if compare_keyword("program", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._program end; if compare_keyword("import", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._import end; if compare_keyword("const", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._const end; if compare_keyword("var", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._var end; if compare_keyword("if", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._if end; if compare_keyword("then", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._then end; if compare_keyword("elsif", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._elsif end; if compare_keyword("else", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._else end; if compare_keyword("while", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._while end; if compare_keyword("do", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._do end; if compare_keyword("proc", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._proc end; if compare_keyword("begin", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._begin end; if compare_keyword("end", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._end end; if compare_keyword("type", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._type end; if compare_keyword("record", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._record end; if compare_keyword("union", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._union end; if compare_keyword("NIL", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind.null end; if compare_keyword("or", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._or end; if compare_keyword("return", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._return end; if compare_keyword("defer", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._defer end; if compare_keyword("TO", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind.to end; if compare_keyword("CASE", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._case end; if compare_keyword("OF", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._of end; if compare_keyword("FROM", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind.from end; if compare_keyword("module", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._module end; if compare_keyword("xor", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind._xor end; if compare_keyword("POINTER", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind.pointer end; if compare_keyword("ARRAY", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind.array end; if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind.boolean; token^.booleanKind := true end; if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then token^.kind := LexerKind.boolean; token^.booleanKind := false end end; (* Action for tokens containing only one character. The character cannot be * followed by other characters forming a composite token. *) proc transition_action_single(lexer: ^Lexer, token: ^LexerToken); begin if lexer^.current.iterator^ = '&' then token^.kind := LexerKind.and end; if lexer^.current.iterator^ = ';' then token^.kind := LexerKind.semicolon end; if lexer^.current.iterator^ = ',' then token^.kind := LexerKind.comma end; if lexer^.current.iterator^ = '~' then token^.kind := LexerKind.tilde end; if lexer^.current.iterator^ = ')' then token^.kind := LexerKind.right_paren end; if lexer^.current.iterator^ = '[' then token^.kind := LexerKind.left_square end; if lexer^.current.iterator^ = ']' then token^.kind := LexerKind.right_square end; if lexer^.current.iterator^ = '^' then token^.kind := LexerKind.hat end; if lexer^.current.iterator^ = '=' then token^.kind := LexerKind.equal end; if lexer^.current.iterator^ = '+' then token^.kind := LexerKind.plus end; if lexer^.current.iterator^ = '*' then token^.kind := LexerKind.asterisk end; if lexer^.current.iterator^ = '/' then token^.kind := LexerKind.division end; if lexer^.current.iterator^ = '%' then token^.kind := LexerKind.remainder end; if lexer^.current.iterator^ = '@' then token^.kind := LexerKind.at end; if lexer^.current.iterator^ = '|' then token^.kind := LexerKind.pipe end; increment(@lexer^.current.iterator) end; (* Handle an integer literal. *) proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken); var buffer: String; integer_length: Int; found: Bool; begin token^.kind := LexerKind.integer; integer_length := lexer^.current.iterator - lexer^.start.iterator; memset(@token^.identifierKind, 0, #size(Identifier)); memcpy(@token^.identifierKind[1], lexer^.start.iterator, integer_length); buffer := InitStringCharStar(@token^.identifierKind[1]); token^.integerKind := StringToInteger(buffer, 10, found); buffer := KillString(buffer) end; proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState) -> Int; var default_transition: Transition; state_index: Int; begin default_transition.action := default_action; default_transition.next_state := next_state; state_index := cast(current_state: Int) + 1; transitions[state_index][cast(TransitionClass.invalid: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.digit: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.alpha: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.space: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.colon: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.equals: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.left_paren: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.right_paren: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.asterisk: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.underscore: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.single: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.hex: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.zero: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.x: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.eof: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.dot: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.minus: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.single_quote: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.double_quote: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.greater: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.less: Int) + 1] := default_transition; transitions[state_index][cast(TransitionClass.other: Int) + 1] := default_transition; return state_index end; (* * The transition table describes transitions from one state to another, given * a symbol (character class). * * The table has m rows and n columns, where m is the amount of states and n is * the amount of classes. So given the current state and a classified character * the table can be used to look up the next state. * * Each cell is a word long. * - The least significant byte of the word is a row number (beginning with 0). * It specifies the target state. "ff" means that this is an end state and no * transition is possible. * - The next byte is the action that should be performed when transitioning. * For the meaning of actions see labels in the lex_next function, which * handles each action. *) proc initialize_transitions(); var state_index: Int; begin (* Start state. *) state_index := cast(TransitionState.start: Int) + 1; transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal; transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier; transitions[state_index][cast(TransitionClass.space: Int) + 1].action := transition_action_skip; transitions[state_index][cast(TransitionClass.space: Int) + 1].next_state := TransitionState.start; transitions[state_index][cast(TransitionClass.colon: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.colon: Int) + 1].next_state := TransitionState.colon; transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_single; transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].next_state := TransitionState.left_paren; transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_single; transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_single; transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier; transitions[state_index][cast(TransitionClass.single: Int) + 1].action := transition_action_single; transitions[state_index][cast(TransitionClass.single: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier; transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.leading_zero; transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier; transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := transition_action_eof; transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.dot; transitions[state_index][cast(TransitionClass.minus: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.minus: Int) + 1].next_state := TransitionState.minus; transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.character; transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.string; transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.greater; transitions[state_index][cast(TransitionClass.less: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.less: Int) + 1].next_state := TransitionState.less; transitions[state_index][cast(TransitionClass.other: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.other: Int) + 1].next_state := TransitionState.finish; (* Colon state. *) state_index := set_default_transition(TransitionState.colon, transition_action_finalize, TransitionState.finish); transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite; transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; (* Identifier state. *) state_index := set_default_transition(TransitionState.identifier, transition_action_key_id, TransitionState.finish); transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.identifier; transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier; transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier; transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier; transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.identifier; transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier; (* Decimal state. *) state_index := set_default_transition(TransitionState.decimal, transition_action_integer, TransitionState.finish); transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal; transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.decimal_suffix; transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.decimal_suffix; transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.decimal; transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.decimal_suffix; (* Greater state. *) state_index := set_default_transition(TransitionState.greater, transition_action_finalize, TransitionState.finish); transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite; transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; (* Minus state. *) state_index := set_default_transition(TransitionState.minus, transition_action_finalize, TransitionState.finish); transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite; transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish; (* Left paren state. *) state_index := set_default_transition(TransitionState.left_paren, transition_action_finalize, TransitionState.finish); transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.comment; (* Less state. *) state_index := set_default_transition(TransitionState.less, transition_action_finalize, TransitionState.finish); transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite; transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite; transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish; (* Hexadecimal after 0x. *) state_index := set_default_transition(TransitionState.dot, transition_action_finalize, TransitionState.finish); transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_composite; transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.finish; (* Comment. *) state_index := set_default_transition(TransitionState.comment, transition_action_accumulate, TransitionState.comment); transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment; transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; (* Closing comment. *) state_index := set_default_transition(TransitionState.closing_comment, transition_action_accumulate, TransitionState.comment); transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_delimited; transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate; transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment; transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; (* Character. *) state_index := set_default_transition(TransitionState.character, transition_action_accumulate, TransitionState.character); transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_delimited; transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.finish; (* String. *) state_index := set_default_transition(TransitionState.string, transition_action_accumulate, TransitionState.string); transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_delimited; transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.finish; (* Leading zero. *) state_index := set_default_transition(TransitionState.leading_zero, transition_action_integer, TransitionState.finish); transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish; (* Digit with a character suffix. *) state_index := set_default_transition(TransitionState.decimal_suffix, transition_action_integer, TransitionState.finish); transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish; transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish end; proc lexer_make*(lexer: ^Lexer, input: ^FILE); begin lexer^.input := input; lexer^.length := 0; lexer^.buffer := malloc(CHUNK_SIZE); memset(lexer^.buffer, 0, CHUNK_SIZE); lexer^.size := CHUNK_SIZE end; (* Returns the last read token. *) proc lexer_current*(lexer: ^Lexer) -> LexerToken; var current_class: TransitionClass; current_state: TransitionState; current_transition: Transition; result: LexerToken; index1: Word; index2: Word; begin lexer^.current := lexer^.start; current_state := TransitionState.start; while current_state <> TransitionState.finish do index1 := cast(lexer^.current.iterator^: Word) + 1u; current_class := classification[index1]; index1 := cast(current_state: Word) + 1u; index2 := cast(current_class: Word) + 1u; current_transition := transitions[index1][index2]; if current_transition.action <> nil then current_transition.action(lexer, @result) end; current_state := current_transition.next_state end; result.start_location := lexer^.start.location; result.end_location := lexer^.current.location; return result end; (* Read and return the next token. *) proc lexer_lex*(lexer: ^Lexer) -> LexerToken; var result: LexerToken; begin if lexer^.length = 0 then lexer^.length := ReadNBytes(lexer^.input, CHUNK_SIZE, lexer^.buffer); lexer^.current.location.column := 1; lexer^.current.location.line := 1; lexer^.current.iterator := lexer^.buffer end; lexer^.start := lexer^.current; result := lexer_current(lexer); return result end; proc lexer_destroy*(lexer: ^Lexer); begin free(lexer^.buffer) end; proc lexer_initialize(); begin initialize_classification(); initialize_transitions() end; end.