diff options
Diffstat (limited to 'source/lexer.elna')
| -rw-r--r-- | source/lexer.elna | 952 |
1 files changed, 0 insertions, 952 deletions
diff --git a/source/lexer.elna b/source/lexer.elna deleted file mode 100644 index d5f529b..0000000 --- a/source/lexer.elna +++ /dev/null @@ -1,952 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, - v. 2.0. If a copy of the MPL was not distributed with this file, You can - obtain one at https://mozilla.org/MPL/2.0/. *) -module; - -import cstdio, cstring, cctype, cstdlib, common; - -const - CHUNK_SIZE := 85536u; - -type - (* - * Classification table assigns each possible character to a group (class). All - * characters of the same group are handled equivalently. - * - * Classification: - *) - TransitionClass = ( - invalid, - digit, - alpha, - space, - colon, - equals, - left_paren, - right_paren, - asterisk, - underscore, - single, - hex, - zero, - x, - eof, - dot, - minus, - single_quote, - double_quote, - greater, - less, - other - ); - TransitionState = ( - start, - colon, - identifier, - decimal, - greater, - minus, - left_paren, - less, - dot, - comment, - closing_comment, - character, - string, - leading_zero, - decimal_suffix, - finish - ); - LexerToken = record - kind: LexerKind; - value: union - booleanKind: Bool; - identifierKind: Identifier; - integerKind: Int; - stringKind: String - end; - start_location: TextLocation; - end_location: TextLocation - end; - TransitionAction = proc(^Lexer, ^LexerToken); - Transition = record - action: TransitionAction; - next_state: TransitionState - end; - TransitionClasses = [22]Transition; - - BufferPosition* = record - iterator: ^Char; - location: TextLocation - end; - Lexer* = record - input: ^FILE; - buffer: ^Char; - size: Word; - length: Word; - start: BufferPosition; - current: BufferPosition - end; - LexerKind* = ( - unknown, - identifier, - _if, - _then, - _else, - _elsif, - _while, - _do, - _proc, - _begin, - _end, - _extern, - _const, - _var, - _case, - _of, - _type, - _record, - _union, - pipe, - to, - boolean, - null, - and, - _or, - _xor, - not, - _return, - _cast, - shift_left, - shift_right, - left_paren, - right_paren, - left_square, - right_square, - greater_equal, - less_equal, - greater_than, - less_than, - not_equal, - equal, - semicolon, - dot, - comma, - plus, - minus, - multiplication, - division, - remainder, - assignment, - colon, - hat, - at, - comment, - integer, - word, - character, - string, - _defer, - exclamation, - arrow, - trait, - _program, - _module, - _import - ); - -var - classification: [128]TransitionClass; - transitions: [16]TransitionClasses; - -proc initialize_classification(); -var - i: Word; -begin - classification[1] := TransitionClass.eof; (* NUL *) - classification[2] := TransitionClass.invalid; (* SOH *) - classification[3] := TransitionClass.invalid; (* STX *) - classification[4] := TransitionClass.invalid; (* ETX *) - classification[5] := TransitionClass.invalid; (* EOT *) - classification[6] := TransitionClass.invalid; (* EMQ *) - classification[7] := TransitionClass.invalid; (* ACK *) - classification[8] := TransitionClass.invalid; (* BEL *) - classification[9] := TransitionClass.invalid; (* BS *) - classification[10] := TransitionClass.space; (* HT *) - classification[11] := TransitionClass.space; (* LF *) - classification[12] := TransitionClass.invalid; (* VT *) - classification[13] := TransitionClass.invalid; (* FF *) - classification[14] := TransitionClass.space; (* CR *) - classification[15] := TransitionClass.invalid; (* SO *) - classification[16] := TransitionClass.invalid; (* SI *) - classification[17] := TransitionClass.invalid; (* DLE *) - classification[18] := TransitionClass.invalid; (* DC1 *) - classification[19] := TransitionClass.invalid; (* DC2 *) - classification[20] := TransitionClass.invalid; (* DC3 *) - classification[21] := TransitionClass.invalid; (* DC4 *) - classification[22] := TransitionClass.invalid; (* NAK *) - classification[23] := TransitionClass.invalid; (* SYN *) - classification[24] := TransitionClass.invalid; (* ETB *) - classification[25] := TransitionClass.invalid; (* CAN *) - classification[26] := TransitionClass.invalid; (* EM *) - classification[27] := TransitionClass.invalid; (* SUB *) - classification[28] := TransitionClass.invalid; (* ESC *) - classification[29] := TransitionClass.invalid; (* FS *) - classification[30] := TransitionClass.invalid; (* GS *) - classification[31] := TransitionClass.invalid; (* RS *) - classification[32] := TransitionClass.invalid; (* US *) - classification[33] := TransitionClass.space; (* Space *) - classification[34] := TransitionClass.single; (* ! *) - classification[35] := TransitionClass.double_quote; (* " *) - classification[36] := TransitionClass.other; (* # *) - classification[37] := TransitionClass.other; (* $ *) - classification[38] := TransitionClass.single; (* % *) - classification[39] := TransitionClass.single; (* & *) - classification[40] := TransitionClass.single_quote; (* ' *) - classification[41] := TransitionClass.left_paren; (* ( *) - classification[42] := TransitionClass.right_paren; (* ) *) - classification[43] := TransitionClass.asterisk; (* * *) - classification[44] := TransitionClass.single; (* + *) - classification[45] := TransitionClass.single; (* , *) - classification[46] := TransitionClass.minus; (* - *) - classification[47] := TransitionClass.dot; (* . *) - classification[48] := TransitionClass.single; (* / *) - classification[49] := TransitionClass.zero; (* 0 *) - classification[50] := TransitionClass.digit; (* 1 *) - classification[51] := TransitionClass.digit; (* 2 *) - classification[52] := TransitionClass.digit; (* 3 *) - classification[53] := TransitionClass.digit; (* 4 *) - classification[54] := TransitionClass.digit; (* 5 *) - classification[55] := TransitionClass.digit; (* 6 *) - classification[56] := TransitionClass.digit; (* 7 *) - classification[57] := TransitionClass.digit; (* 8 *) - classification[58] := TransitionClass.digit; (* 9 *) - classification[59] := TransitionClass.colon; (* : *) - classification[60] := TransitionClass.single; (* ; *) - classification[61] := TransitionClass.less; (* < *) - classification[62] := TransitionClass.equals; (* = *) - classification[63] := TransitionClass.greater; (* > *) - classification[64] := TransitionClass.other; (* ? *) - classification[65] := TransitionClass.single; (* @ *) - classification[66] := TransitionClass.alpha; (* A *) - classification[67] := TransitionClass.alpha; (* B *) - classification[68] := TransitionClass.alpha; (* C *) - classification[69] := TransitionClass.alpha; (* D *) - classification[70] := TransitionClass.alpha; (* E *) - classification[71] := TransitionClass.alpha; (* F *) - classification[72] := TransitionClass.alpha; (* G *) - classification[73] := TransitionClass.alpha; (* H *) - classification[74] := TransitionClass.alpha; (* I *) - classification[75] := TransitionClass.alpha; (* J *) - classification[76] := TransitionClass.alpha; (* K *) - classification[77] := TransitionClass.alpha; (* L *) - classification[78] := TransitionClass.alpha; (* M *) - classification[79] := TransitionClass.alpha; (* N *) - classification[80] := TransitionClass.alpha; (* O *) - classification[81] := TransitionClass.alpha; (* P *) - classification[82] := TransitionClass.alpha; (* Q *) - classification[83] := TransitionClass.alpha; (* R *) - classification[84] := TransitionClass.alpha; (* S *) - classification[85] := TransitionClass.alpha; (* T *) - classification[86] := TransitionClass.alpha; (* U *) - classification[87] := TransitionClass.alpha; (* V *) - classification[88] := TransitionClass.alpha; (* W *) - classification[89] := TransitionClass.alpha; (* X *) - classification[90] := TransitionClass.alpha; (* Y *) - classification[91] := TransitionClass.alpha; (* Z *) - classification[92] := TransitionClass.single; (* [ *) - classification[93] := TransitionClass.other; (* \ *) - classification[94] := TransitionClass.single; (* ] *) - classification[95] := TransitionClass.single; (* ^ *) - classification[96] := TransitionClass.underscore; (* _ *) - classification[97] := TransitionClass.other; (* ` *) - classification[98] := TransitionClass.hex; (* a *) - classification[99] := TransitionClass.hex; (* b *) - classification[100] := TransitionClass.hex; (* c *) - classification[101] := TransitionClass.hex; (* d *) - classification[102] := TransitionClass.hex; (* e *) - classification[103] := TransitionClass.hex; (* f *) - classification[104] := TransitionClass.alpha; (* g *) - classification[105] := TransitionClass.alpha; (* h *) - classification[106] := TransitionClass.alpha; (* i *) - classification[107] := TransitionClass.alpha; (* j *) - classification[108] := TransitionClass.alpha; (* k *) - classification[109] := TransitionClass.alpha; (* l *) - classification[110] := TransitionClass.alpha; (* m *) - classification[111] := TransitionClass.alpha; (* n *) - classification[112] := TransitionClass.alpha; (* o *) - classification[113] := TransitionClass.alpha; (* p *) - classification[114] := TransitionClass.alpha; (* q *) - classification[115] := TransitionClass.alpha; (* r *) - classification[116] := TransitionClass.alpha; (* s *) - classification[117] := TransitionClass.alpha; (* t *) - classification[118] := TransitionClass.alpha; (* u *) - classification[119] := TransitionClass.alpha; (* v *) - classification[120] := TransitionClass.alpha; (* w *) - classification[121] := TransitionClass.x; (* x *) - classification[122] := TransitionClass.alpha; (* y *) - classification[123] := TransitionClass.alpha; (* z *) - classification[124] := TransitionClass.other; (* { *) - classification[125] := TransitionClass.single; (* | *) - classification[126] := TransitionClass.other; (* } *) - classification[127] := TransitionClass.single; (* ~ *) - classification[128] := TransitionClass.invalid; (* DEL *) - - i := 129u; - while i <= 256u do - classification[i] := TransitionClass.other; - i := i + 1u - end -end; - -proc compare_keyword(keyword: String, token_start: BufferPosition, token_end: ^Char) -> Bool; -var - result: Bool; - index: Word; - continue: Bool; -begin - index := 0u; - result := true; - continue := (index < keyword.length) & (token_start.iterator <> token_end); - - while continue & result do - result := keyword[index] = token_start.iterator^ - or cast(tolower(cast(keyword[index]: Int)): Char) = token_start.iterator^; - token_start.iterator := token_start.iterator + 1; - index := index + 1u; - continue := (index < keyword.length) & (token_start.iterator <> token_end) - end; - result := result & index = keyword.length; - - return result & (token_start.iterator = token_end) -end; - -(* Reached the end of file. *) -proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken); -begin - token^.kind := LexerKind.unknown -end; - -proc increment(position: ^BufferPosition); -begin - position^.iterator := position^.iterator + 1 -end; - -(* Add the character to the token currently read and advance to the next character. *) -proc transition_action_accumulate(lexer: ^Lexer, token: ^LexerToken); -begin - increment(@lexer^.current) -end; - -(* The current character is not a part of the token. Finish the token already - * read. Don't advance to the next character. *) -proc transition_action_finalize(lexer: ^Lexer, token: ^LexerToken); -begin - if lexer^.start.iterator^ = ':' then - token^.kind := LexerKind.colon - end; - if lexer^.start.iterator^ = '>' then - token^.kind := LexerKind.greater_than - end; - if lexer^.start.iterator^ = '<' then - token^.kind := LexerKind.less_than - end; - if lexer^.start.iterator^ = '(' then - token^.kind := LexerKind.left_paren - end; - if lexer^.start.iterator^ = '-' then - token^.kind := LexerKind.minus - end; - if lexer^.start.iterator^ = '.' then - token^.kind := LexerKind.dot - end -end; - -(* An action for tokens containing multiple characters. *) -proc transition_action_composite(lexer: ^Lexer, token: ^LexerToken); -begin - if lexer^.start.iterator^ = '<' then - if lexer^.current.iterator^ = '>' then - token^.kind := LexerKind.not_equal - end; - if lexer^.current.iterator^ = '=' then - token^.kind := LexerKind.less_equal - end - end; - if (lexer^.start.iterator^ = '>') & (lexer^.current.iterator^ = '=') then - token^.kind := LexerKind.greater_equal - end; - if (lexer^.start.iterator^ = ':') & (lexer^.current.iterator^ = '=') then - token^.kind := LexerKind.assignment - end; - if (lexer^.start.iterator^ = '-') & (lexer^.current.iterator^ = '>') then - token^.kind := LexerKind.arrow - end; - increment(@lexer^.current) -end; - -(* Skip a space. *) -proc transition_action_skip(lexer: ^Lexer, token: ^LexerToken); -begin - increment(@lexer^.start); - - if lexer^.start.iterator^ = '\n' then - lexer^.start.location.line := lexer^.start.location.line + 1u; - lexer^.start.location.column := 1u - end; - lexer^.current := lexer^.start -end; - -(* Delimited string action. *) -proc transition_action_delimited(lexer: ^Lexer, token: ^LexerToken); -var - text_length: Word; -begin - if lexer^.start.iterator^ = '(' then - token^.kind := LexerKind.comment - end; - if lexer^.start.iterator^ = '"' then - text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word); - - token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length); - memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length); - - token^.kind := LexerKind.character - end; - if lexer^.start.iterator^ = '\'' then - text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word); - - token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length); - memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length); - - token^.kind := LexerKind.string - end; - increment(@lexer^.current) -end; - -(* Finalize keyword or identifier. *) -proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken); -begin - token^.kind := LexerKind.identifier; - - token^.value.identifierKind[1] := cast(lexer^.current.iterator - lexer^.start.iterator: Char); - memcpy(cast(@token^.value.identifierKind[2]: Pointer), cast(lexer^.start.iterator: Pointer), cast(token^.value.identifierKind[1]: Word)); - - if compare_keyword("program", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._program - end; - if compare_keyword("import", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._import - end; - if compare_keyword("const", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._const - end; - if compare_keyword("var", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._var - end; - if compare_keyword("if", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._if - end; - if compare_keyword("then", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._then - end; - if compare_keyword("elsif", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._elsif - end; - if compare_keyword("else", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._else - end; - if compare_keyword("while", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._while - end; - if compare_keyword("do", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._do - end; - if compare_keyword("proc", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._proc - end; - if compare_keyword("begin", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._begin - end; - if compare_keyword("end", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._end - end; - if compare_keyword("type", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._type - end; - if compare_keyword("record", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._record - end; - if compare_keyword("union", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._union - end; - if compare_keyword("NIL", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind.null - end; - if compare_keyword("or", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._or - end; - if compare_keyword("return", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._return - end; - if compare_keyword("defer", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._defer - end; - if compare_keyword("TO", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind.to - end; - if compare_keyword("CASE", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._case - end; - if compare_keyword("OF", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._of - end; - if compare_keyword("module", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._module - end; - if compare_keyword("xor", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._xor - end; - if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind.boolean; - token^.value.booleanKind := true - end; - if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind.boolean; - token^.value.booleanKind := false - end -end; - -(* Action for tokens containing only one character. The character cannot be - * followed by other characters forming a composite token. *) -proc transition_action_single(lexer: ^Lexer, token: ^LexerToken); -begin - if lexer^.current.iterator^ = '&' then - token^.kind := LexerKind.and - end; - if lexer^.current.iterator^ = ';' then - token^.kind := LexerKind.semicolon - end; - if lexer^.current.iterator^ = ',' then - token^.kind := LexerKind.comma - end; - if lexer^.current.iterator^ = '~' then - token^.kind := LexerKind.not - end; - if lexer^.current.iterator^ = ')' then - token^.kind := LexerKind.right_paren - end; - if lexer^.current.iterator^ = '[' then - token^.kind := LexerKind.left_square - end; - if lexer^.current.iterator^ = ']' then - token^.kind := LexerKind.right_square - end; - if lexer^.current.iterator^ = '^' then - token^.kind := LexerKind.hat - end; - if lexer^.current.iterator^ = '=' then - token^.kind := LexerKind.equal - end; - if lexer^.current.iterator^ = '+' then - token^.kind := LexerKind.plus - end; - if lexer^.current.iterator^ = '*' then - token^.kind := LexerKind.multiplication - end; - if lexer^.current.iterator^ = '/' then - token^.kind := LexerKind.division - end; - if lexer^.current.iterator^ = '%' then - token^.kind := LexerKind.remainder - end; - if lexer^.current.iterator^ = '@' then - token^.kind := LexerKind.at - end; - if lexer^.current.iterator^ = '|' then - token^.kind := LexerKind.pipe - end; - increment(@lexer^.current) -end; - -(* Handle an integer literal. *) -proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken); -var - buffer: String; - integer_length: Word; - found: Bool; -begin - token^.kind := LexerKind.integer; - - integer_length := cast(lexer^.current.iterator - lexer^.start.iterator: Word); - memset(cast(token^.value.identifierKind.ptr: Pointer), 0, #size(Identifier)); - memcpy(cast(@token^.value.identifierKind[1]: Pointer), cast(lexer^.start.iterator: Pointer), integer_length); - - token^.value.identifierKind[cast(token^.value.identifierKind[1]: Int) + 2] := '\0'; - token^.value.integerKind := atoi(@token^.value.identifierKind[2]) -end; - -proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState) -> Int; -var - default_transition: Transition; - state_index: Int; -begin - default_transition.action := default_action; - default_transition.next_state := next_state; - state_index := cast(current_state: Int) + 1; - - transitions[state_index][cast(TransitionClass.invalid: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.digit: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.alpha: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.space: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.colon: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.equals: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.left_paren: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.right_paren: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.underscore: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.single: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.hex: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.zero: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.x: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.eof: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.dot: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.minus: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.single_quote: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.double_quote: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.greater: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.less: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.other: Int) + 1] := default_transition; - - return state_index -end; - -(* - * The transition table describes transitions from one state to another, given - * a symbol (character class). - * - * The table has m rows and n columns, where m is the amount of states and n is - * the amount of classes. So given the current state and a classified character - * the table can be used to look up the next state. - * - * Each cell is a word long. - * - The least significant byte of the word is a row number (beginning with 0). - * It specifies the target state. "ff" means that this is an end state and no - * transition is possible. - * - The next byte is the action that should be performed when transitioning. - * For the meaning of actions see labels in the lex_next function, which - * handles each action. - *) -proc initialize_transitions(); -var - state_index: Int; -begin - (* Start state. *) - state_index := cast(TransitionState.start: Int) + 1; - - transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal; - - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.space: Int) + 1].action := transition_action_skip; - transitions[state_index][cast(TransitionClass.space: Int) + 1].next_state := TransitionState.start; - - transitions[state_index][cast(TransitionClass.colon: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.colon: Int) + 1].next_state := TransitionState.colon; - - transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_single; - transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].next_state := TransitionState.left_paren; - - transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_single; - transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_single; - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.single: Int) + 1].action := transition_action_single; - transitions[state_index][cast(TransitionClass.single: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.leading_zero; - - transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := transition_action_eof; - transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.dot; - - transitions[state_index][cast(TransitionClass.minus: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.minus: Int) + 1].next_state := TransitionState.minus; - - transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.character; - - transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.string; - - transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.greater; - - transitions[state_index][cast(TransitionClass.less: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.less: Int) + 1].next_state := TransitionState.less; - - transitions[state_index][cast(TransitionClass.other: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.other: Int) + 1].next_state := TransitionState.finish; - - (* Colon state. *) - state_index := set_default_transition(TransitionState.colon, transition_action_finalize, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite; - transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; - - (* Identifier state. *) - state_index := set_default_transition(TransitionState.identifier, transition_action_key_id, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier; - - (* Decimal state. *) - state_index := set_default_transition(TransitionState.decimal, transition_action_integer, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal; - - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.decimal_suffix; - - transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.decimal_suffix; - - transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.decimal; - - transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.decimal_suffix; - - (* Greater state. *) - state_index := set_default_transition(TransitionState.greater, transition_action_finalize, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite; - transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; - - (* Minus state. *) - state_index := set_default_transition(TransitionState.minus, transition_action_finalize, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite; - transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish; - - (* Left paren state. *) - state_index := set_default_transition(TransitionState.left_paren, transition_action_finalize, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.comment; - - (* Less state. *) - state_index := set_default_transition(TransitionState.less, transition_action_finalize, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite; - transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite; - transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish; - - (* Hexadecimal after 0x. *) - state_index := set_default_transition(TransitionState.dot, transition_action_finalize, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_composite; - transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.finish; - - (* Comment. *) - state_index := set_default_transition(TransitionState.comment, transition_action_accumulate, TransitionState.comment); - - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment; - - transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; - - (* Closing comment. *) - state_index := set_default_transition(TransitionState.closing_comment, transition_action_accumulate, TransitionState.comment); - - transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_delimited; - transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment; - - transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; - - (* Character. *) - state_index := set_default_transition(TransitionState.character, transition_action_accumulate, TransitionState.character); - - transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_delimited; - transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.finish; - - (* String. *) - state_index := set_default_transition(TransitionState.string, transition_action_accumulate, TransitionState.string); - - transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_delimited; - transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.finish; - - (* Leading zero. *) - state_index := set_default_transition(TransitionState.leading_zero, transition_action_integer, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish; - - (* Digit with a character suffix. *) - state_index := set_default_transition(TransitionState.decimal_suffix, transition_action_integer, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish -end; - -proc lexer_make*(lexer: ^Lexer, input: ^FILE); -begin - lexer^.input := input; - lexer^.length := 0u; - - lexer^.buffer := cast(malloc(CHUNK_SIZE): ^Char); - memset(cast(lexer^.buffer: Pointer), 0, CHUNK_SIZE); - lexer^.size := CHUNK_SIZE -end; - -(* Returns the last read token. *) -proc lexer_current*(lexer: ^Lexer) -> LexerToken; -var - current_class: TransitionClass; - current_state: TransitionState; - current_transition: Transition; - result: LexerToken; - index1: Word; - index2: Word; -begin - lexer^.current := lexer^.start; - current_state := TransitionState.start; - - while current_state <> TransitionState.finish do - index1 := cast(lexer^.current.iterator^: Word) + 1u; - current_class := classification[index1]; - - index1 := cast(current_state: Word) + 1u; - index2 := cast(current_class: Word) + 1u; - - current_transition := transitions[index1][index2]; - if current_transition.action <> nil then - current_transition.action(lexer, @result) - end; - current_state := current_transition.next_state - end; - result.start_location := lexer^.start.location; - result.end_location := lexer^.current.location; - - return result -end; - -(* Read and return the next token. *) -proc lexer_lex*(lexer: ^Lexer) -> LexerToken; -var - result: LexerToken; -begin - if lexer^.length = 0u then - lexer^.length := fread(cast(lexer^.buffer: Pointer), CHUNK_SIZE, 1u, lexer^.input); - lexer^.current.location.column := 1u; - lexer^.current.location.line := 1u; - lexer^.current.iterator := lexer^.buffer - end; - lexer^.start := lexer^.current; - - result := lexer_current(lexer); - return result -end; - -proc lexer_destroy*(lexer: ^Lexer); -begin - free(cast(lexer^.buffer: Pointer)) -end; - -proc lexer_initialize(); -begin - initialize_classification(); - initialize_transitions() -end; - -end. |
