summaryrefslogtreecommitdiff
path: root/source/lexer.elna
diff options
context:
space:
mode:
authorEugen Wissner <belka@caraus.de>2026-02-02 12:10:40 +0100
committerEugen Wissner <belka@caraus.de>2026-02-04 18:17:47 +0100
commit8e89d33c214e3dad03a86057c2c45a0dc7608ad7 (patch)
tree6d6b52362c6e04a8466a3f393093157f49162588 /source/lexer.elna
parent39197fe88ab23bf452e00569ef36b556b787a421 (diff)
downloadelna-8e89d33c214e3dad03a86057c2c45a0dc7608ad7.tar.gz
Split up the rakefile
Diffstat (limited to 'source/lexer.elna')
-rw-r--r--source/lexer.elna952
1 files changed, 0 insertions, 952 deletions
diff --git a/source/lexer.elna b/source/lexer.elna
deleted file mode 100644
index d5f529b..0000000
--- a/source/lexer.elna
+++ /dev/null
@@ -1,952 +0,0 @@
-(* This Source Code Form is subject to the terms of the Mozilla Public License,
- v. 2.0. If a copy of the MPL was not distributed with this file, You can
- obtain one at https://mozilla.org/MPL/2.0/. *)
-module;
-
-import cstdio, cstring, cctype, cstdlib, common;
-
-const
- CHUNK_SIZE := 85536u;
-
-type
- (*
- * Classification table assigns each possible character to a group (class). All
- * characters of the same group are handled equivalently.
- *
- * Classification:
- *)
- TransitionClass = (
- invalid,
- digit,
- alpha,
- space,
- colon,
- equals,
- left_paren,
- right_paren,
- asterisk,
- underscore,
- single,
- hex,
- zero,
- x,
- eof,
- dot,
- minus,
- single_quote,
- double_quote,
- greater,
- less,
- other
- );
- TransitionState = (
- start,
- colon,
- identifier,
- decimal,
- greater,
- minus,
- left_paren,
- less,
- dot,
- comment,
- closing_comment,
- character,
- string,
- leading_zero,
- decimal_suffix,
- finish
- );
- LexerToken = record
- kind: LexerKind;
- value: union
- booleanKind: Bool;
- identifierKind: Identifier;
- integerKind: Int;
- stringKind: String
- end;
- start_location: TextLocation;
- end_location: TextLocation
- end;
- TransitionAction = proc(^Lexer, ^LexerToken);
- Transition = record
- action: TransitionAction;
- next_state: TransitionState
- end;
- TransitionClasses = [22]Transition;
-
- BufferPosition* = record
- iterator: ^Char;
- location: TextLocation
- end;
- Lexer* = record
- input: ^FILE;
- buffer: ^Char;
- size: Word;
- length: Word;
- start: BufferPosition;
- current: BufferPosition
- end;
- LexerKind* = (
- unknown,
- identifier,
- _if,
- _then,
- _else,
- _elsif,
- _while,
- _do,
- _proc,
- _begin,
- _end,
- _extern,
- _const,
- _var,
- _case,
- _of,
- _type,
- _record,
- _union,
- pipe,
- to,
- boolean,
- null,
- and,
- _or,
- _xor,
- not,
- _return,
- _cast,
- shift_left,
- shift_right,
- left_paren,
- right_paren,
- left_square,
- right_square,
- greater_equal,
- less_equal,
- greater_than,
- less_than,
- not_equal,
- equal,
- semicolon,
- dot,
- comma,
- plus,
- minus,
- multiplication,
- division,
- remainder,
- assignment,
- colon,
- hat,
- at,
- comment,
- integer,
- word,
- character,
- string,
- _defer,
- exclamation,
- arrow,
- trait,
- _program,
- _module,
- _import
- );
-
-var
- classification: [128]TransitionClass;
- transitions: [16]TransitionClasses;
-
-proc initialize_classification();
-var
- i: Word;
-begin
- classification[1] := TransitionClass.eof; (* NUL *)
- classification[2] := TransitionClass.invalid; (* SOH *)
- classification[3] := TransitionClass.invalid; (* STX *)
- classification[4] := TransitionClass.invalid; (* ETX *)
- classification[5] := TransitionClass.invalid; (* EOT *)
- classification[6] := TransitionClass.invalid; (* EMQ *)
- classification[7] := TransitionClass.invalid; (* ACK *)
- classification[8] := TransitionClass.invalid; (* BEL *)
- classification[9] := TransitionClass.invalid; (* BS *)
- classification[10] := TransitionClass.space; (* HT *)
- classification[11] := TransitionClass.space; (* LF *)
- classification[12] := TransitionClass.invalid; (* VT *)
- classification[13] := TransitionClass.invalid; (* FF *)
- classification[14] := TransitionClass.space; (* CR *)
- classification[15] := TransitionClass.invalid; (* SO *)
- classification[16] := TransitionClass.invalid; (* SI *)
- classification[17] := TransitionClass.invalid; (* DLE *)
- classification[18] := TransitionClass.invalid; (* DC1 *)
- classification[19] := TransitionClass.invalid; (* DC2 *)
- classification[20] := TransitionClass.invalid; (* DC3 *)
- classification[21] := TransitionClass.invalid; (* DC4 *)
- classification[22] := TransitionClass.invalid; (* NAK *)
- classification[23] := TransitionClass.invalid; (* SYN *)
- classification[24] := TransitionClass.invalid; (* ETB *)
- classification[25] := TransitionClass.invalid; (* CAN *)
- classification[26] := TransitionClass.invalid; (* EM *)
- classification[27] := TransitionClass.invalid; (* SUB *)
- classification[28] := TransitionClass.invalid; (* ESC *)
- classification[29] := TransitionClass.invalid; (* FS *)
- classification[30] := TransitionClass.invalid; (* GS *)
- classification[31] := TransitionClass.invalid; (* RS *)
- classification[32] := TransitionClass.invalid; (* US *)
- classification[33] := TransitionClass.space; (* Space *)
- classification[34] := TransitionClass.single; (* ! *)
- classification[35] := TransitionClass.double_quote; (* " *)
- classification[36] := TransitionClass.other; (* # *)
- classification[37] := TransitionClass.other; (* $ *)
- classification[38] := TransitionClass.single; (* % *)
- classification[39] := TransitionClass.single; (* & *)
- classification[40] := TransitionClass.single_quote; (* ' *)
- classification[41] := TransitionClass.left_paren; (* ( *)
- classification[42] := TransitionClass.right_paren; (* ) *)
- classification[43] := TransitionClass.asterisk; (* * *)
- classification[44] := TransitionClass.single; (* + *)
- classification[45] := TransitionClass.single; (* , *)
- classification[46] := TransitionClass.minus; (* - *)
- classification[47] := TransitionClass.dot; (* . *)
- classification[48] := TransitionClass.single; (* / *)
- classification[49] := TransitionClass.zero; (* 0 *)
- classification[50] := TransitionClass.digit; (* 1 *)
- classification[51] := TransitionClass.digit; (* 2 *)
- classification[52] := TransitionClass.digit; (* 3 *)
- classification[53] := TransitionClass.digit; (* 4 *)
- classification[54] := TransitionClass.digit; (* 5 *)
- classification[55] := TransitionClass.digit; (* 6 *)
- classification[56] := TransitionClass.digit; (* 7 *)
- classification[57] := TransitionClass.digit; (* 8 *)
- classification[58] := TransitionClass.digit; (* 9 *)
- classification[59] := TransitionClass.colon; (* : *)
- classification[60] := TransitionClass.single; (* ; *)
- classification[61] := TransitionClass.less; (* < *)
- classification[62] := TransitionClass.equals; (* = *)
- classification[63] := TransitionClass.greater; (* > *)
- classification[64] := TransitionClass.other; (* ? *)
- classification[65] := TransitionClass.single; (* @ *)
- classification[66] := TransitionClass.alpha; (* A *)
- classification[67] := TransitionClass.alpha; (* B *)
- classification[68] := TransitionClass.alpha; (* C *)
- classification[69] := TransitionClass.alpha; (* D *)
- classification[70] := TransitionClass.alpha; (* E *)
- classification[71] := TransitionClass.alpha; (* F *)
- classification[72] := TransitionClass.alpha; (* G *)
- classification[73] := TransitionClass.alpha; (* H *)
- classification[74] := TransitionClass.alpha; (* I *)
- classification[75] := TransitionClass.alpha; (* J *)
- classification[76] := TransitionClass.alpha; (* K *)
- classification[77] := TransitionClass.alpha; (* L *)
- classification[78] := TransitionClass.alpha; (* M *)
- classification[79] := TransitionClass.alpha; (* N *)
- classification[80] := TransitionClass.alpha; (* O *)
- classification[81] := TransitionClass.alpha; (* P *)
- classification[82] := TransitionClass.alpha; (* Q *)
- classification[83] := TransitionClass.alpha; (* R *)
- classification[84] := TransitionClass.alpha; (* S *)
- classification[85] := TransitionClass.alpha; (* T *)
- classification[86] := TransitionClass.alpha; (* U *)
- classification[87] := TransitionClass.alpha; (* V *)
- classification[88] := TransitionClass.alpha; (* W *)
- classification[89] := TransitionClass.alpha; (* X *)
- classification[90] := TransitionClass.alpha; (* Y *)
- classification[91] := TransitionClass.alpha; (* Z *)
- classification[92] := TransitionClass.single; (* [ *)
- classification[93] := TransitionClass.other; (* \ *)
- classification[94] := TransitionClass.single; (* ] *)
- classification[95] := TransitionClass.single; (* ^ *)
- classification[96] := TransitionClass.underscore; (* _ *)
- classification[97] := TransitionClass.other; (* ` *)
- classification[98] := TransitionClass.hex; (* a *)
- classification[99] := TransitionClass.hex; (* b *)
- classification[100] := TransitionClass.hex; (* c *)
- classification[101] := TransitionClass.hex; (* d *)
- classification[102] := TransitionClass.hex; (* e *)
- classification[103] := TransitionClass.hex; (* f *)
- classification[104] := TransitionClass.alpha; (* g *)
- classification[105] := TransitionClass.alpha; (* h *)
- classification[106] := TransitionClass.alpha; (* i *)
- classification[107] := TransitionClass.alpha; (* j *)
- classification[108] := TransitionClass.alpha; (* k *)
- classification[109] := TransitionClass.alpha; (* l *)
- classification[110] := TransitionClass.alpha; (* m *)
- classification[111] := TransitionClass.alpha; (* n *)
- classification[112] := TransitionClass.alpha; (* o *)
- classification[113] := TransitionClass.alpha; (* p *)
- classification[114] := TransitionClass.alpha; (* q *)
- classification[115] := TransitionClass.alpha; (* r *)
- classification[116] := TransitionClass.alpha; (* s *)
- classification[117] := TransitionClass.alpha; (* t *)
- classification[118] := TransitionClass.alpha; (* u *)
- classification[119] := TransitionClass.alpha; (* v *)
- classification[120] := TransitionClass.alpha; (* w *)
- classification[121] := TransitionClass.x; (* x *)
- classification[122] := TransitionClass.alpha; (* y *)
- classification[123] := TransitionClass.alpha; (* z *)
- classification[124] := TransitionClass.other; (* { *)
- classification[125] := TransitionClass.single; (* | *)
- classification[126] := TransitionClass.other; (* } *)
- classification[127] := TransitionClass.single; (* ~ *)
- classification[128] := TransitionClass.invalid; (* DEL *)
-
- i := 129u;
- while i <= 256u do
- classification[i] := TransitionClass.other;
- i := i + 1u
- end
-end;
-
-proc compare_keyword(keyword: String, token_start: BufferPosition, token_end: ^Char) -> Bool;
-var
- result: Bool;
- index: Word;
- continue: Bool;
-begin
- index := 0u;
- result := true;
- continue := (index < keyword.length) & (token_start.iterator <> token_end);
-
- while continue & result do
- result := keyword[index] = token_start.iterator^
- or cast(tolower(cast(keyword[index]: Int)): Char) = token_start.iterator^;
- token_start.iterator := token_start.iterator + 1;
- index := index + 1u;
- continue := (index < keyword.length) & (token_start.iterator <> token_end)
- end;
- result := result & index = keyword.length;
-
- return result & (token_start.iterator = token_end)
-end;
-
-(* Reached the end of file. *)
-proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken);
-begin
- token^.kind := LexerKind.unknown
-end;
-
-proc increment(position: ^BufferPosition);
-begin
- position^.iterator := position^.iterator + 1
-end;
-
-(* Add the character to the token currently read and advance to the next character. *)
-proc transition_action_accumulate(lexer: ^Lexer, token: ^LexerToken);
-begin
- increment(@lexer^.current)
-end;
-
-(* The current character is not a part of the token. Finish the token already
- * read. Don't advance to the next character. *)
-proc transition_action_finalize(lexer: ^Lexer, token: ^LexerToken);
-begin
- if lexer^.start.iterator^ = ':' then
- token^.kind := LexerKind.colon
- end;
- if lexer^.start.iterator^ = '>' then
- token^.kind := LexerKind.greater_than
- end;
- if lexer^.start.iterator^ = '<' then
- token^.kind := LexerKind.less_than
- end;
- if lexer^.start.iterator^ = '(' then
- token^.kind := LexerKind.left_paren
- end;
- if lexer^.start.iterator^ = '-' then
- token^.kind := LexerKind.minus
- end;
- if lexer^.start.iterator^ = '.' then
- token^.kind := LexerKind.dot
- end
-end;
-
-(* An action for tokens containing multiple characters. *)
-proc transition_action_composite(lexer: ^Lexer, token: ^LexerToken);
-begin
- if lexer^.start.iterator^ = '<' then
- if lexer^.current.iterator^ = '>' then
- token^.kind := LexerKind.not_equal
- end;
- if lexer^.current.iterator^ = '=' then
- token^.kind := LexerKind.less_equal
- end
- end;
- if (lexer^.start.iterator^ = '>') & (lexer^.current.iterator^ = '=') then
- token^.kind := LexerKind.greater_equal
- end;
- if (lexer^.start.iterator^ = ':') & (lexer^.current.iterator^ = '=') then
- token^.kind := LexerKind.assignment
- end;
- if (lexer^.start.iterator^ = '-') & (lexer^.current.iterator^ = '>') then
- token^.kind := LexerKind.arrow
- end;
- increment(@lexer^.current)
-end;
-
-(* Skip a space. *)
-proc transition_action_skip(lexer: ^Lexer, token: ^LexerToken);
-begin
- increment(@lexer^.start);
-
- if lexer^.start.iterator^ = '\n' then
- lexer^.start.location.line := lexer^.start.location.line + 1u;
- lexer^.start.location.column := 1u
- end;
- lexer^.current := lexer^.start
-end;
-
-(* Delimited string action. *)
-proc transition_action_delimited(lexer: ^Lexer, token: ^LexerToken);
-var
- text_length: Word;
-begin
- if lexer^.start.iterator^ = '(' then
- token^.kind := LexerKind.comment
- end;
- if lexer^.start.iterator^ = '"' then
- text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word);
-
- token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length);
- memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length);
-
- token^.kind := LexerKind.character
- end;
- if lexer^.start.iterator^ = '\'' then
- text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word);
-
- token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length);
- memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length);
-
- token^.kind := LexerKind.string
- end;
- increment(@lexer^.current)
-end;
-
-(* Finalize keyword or identifier. *)
-proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken);
-begin
- token^.kind := LexerKind.identifier;
-
- token^.value.identifierKind[1] := cast(lexer^.current.iterator - lexer^.start.iterator: Char);
- memcpy(cast(@token^.value.identifierKind[2]: Pointer), cast(lexer^.start.iterator: Pointer), cast(token^.value.identifierKind[1]: Word));
-
- if compare_keyword("program", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._program
- end;
- if compare_keyword("import", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._import
- end;
- if compare_keyword("const", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._const
- end;
- if compare_keyword("var", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._var
- end;
- if compare_keyword("if", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._if
- end;
- if compare_keyword("then", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._then
- end;
- if compare_keyword("elsif", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._elsif
- end;
- if compare_keyword("else", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._else
- end;
- if compare_keyword("while", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._while
- end;
- if compare_keyword("do", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._do
- end;
- if compare_keyword("proc", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._proc
- end;
- if compare_keyword("begin", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._begin
- end;
- if compare_keyword("end", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._end
- end;
- if compare_keyword("type", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._type
- end;
- if compare_keyword("record", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._record
- end;
- if compare_keyword("union", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._union
- end;
- if compare_keyword("NIL", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind.null
- end;
- if compare_keyword("or", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._or
- end;
- if compare_keyword("return", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._return
- end;
- if compare_keyword("defer", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._defer
- end;
- if compare_keyword("TO", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind.to
- end;
- if compare_keyword("CASE", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._case
- end;
- if compare_keyword("OF", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._of
- end;
- if compare_keyword("module", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._module
- end;
- if compare_keyword("xor", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind._xor
- end;
- if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind.boolean;
- token^.value.booleanKind := true
- end;
- if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then
- token^.kind := LexerKind.boolean;
- token^.value.booleanKind := false
- end
-end;
-
-(* Action for tokens containing only one character. The character cannot be
- * followed by other characters forming a composite token. *)
-proc transition_action_single(lexer: ^Lexer, token: ^LexerToken);
-begin
- if lexer^.current.iterator^ = '&' then
- token^.kind := LexerKind.and
- end;
- if lexer^.current.iterator^ = ';' then
- token^.kind := LexerKind.semicolon
- end;
- if lexer^.current.iterator^ = ',' then
- token^.kind := LexerKind.comma
- end;
- if lexer^.current.iterator^ = '~' then
- token^.kind := LexerKind.not
- end;
- if lexer^.current.iterator^ = ')' then
- token^.kind := LexerKind.right_paren
- end;
- if lexer^.current.iterator^ = '[' then
- token^.kind := LexerKind.left_square
- end;
- if lexer^.current.iterator^ = ']' then
- token^.kind := LexerKind.right_square
- end;
- if lexer^.current.iterator^ = '^' then
- token^.kind := LexerKind.hat
- end;
- if lexer^.current.iterator^ = '=' then
- token^.kind := LexerKind.equal
- end;
- if lexer^.current.iterator^ = '+' then
- token^.kind := LexerKind.plus
- end;
- if lexer^.current.iterator^ = '*' then
- token^.kind := LexerKind.multiplication
- end;
- if lexer^.current.iterator^ = '/' then
- token^.kind := LexerKind.division
- end;
- if lexer^.current.iterator^ = '%' then
- token^.kind := LexerKind.remainder
- end;
- if lexer^.current.iterator^ = '@' then
- token^.kind := LexerKind.at
- end;
- if lexer^.current.iterator^ = '|' then
- token^.kind := LexerKind.pipe
- end;
- increment(@lexer^.current)
-end;
-
-(* Handle an integer literal. *)
-proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken);
-var
- buffer: String;
- integer_length: Word;
- found: Bool;
-begin
- token^.kind := LexerKind.integer;
-
- integer_length := cast(lexer^.current.iterator - lexer^.start.iterator: Word);
- memset(cast(token^.value.identifierKind.ptr: Pointer), 0, #size(Identifier));
- memcpy(cast(@token^.value.identifierKind[1]: Pointer), cast(lexer^.start.iterator: Pointer), integer_length);
-
- token^.value.identifierKind[cast(token^.value.identifierKind[1]: Int) + 2] := '\0';
- token^.value.integerKind := atoi(@token^.value.identifierKind[2])
-end;
-
-proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState) -> Int;
-var
- default_transition: Transition;
- state_index: Int;
-begin
- default_transition.action := default_action;
- default_transition.next_state := next_state;
- state_index := cast(current_state: Int) + 1;
-
- transitions[state_index][cast(TransitionClass.invalid: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.digit: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.alpha: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.space: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.colon: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.equals: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.left_paren: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.right_paren: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.asterisk: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.underscore: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.single: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.hex: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.zero: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.x: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.eof: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.dot: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.minus: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.single_quote: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.double_quote: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.greater: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.less: Int) + 1] := default_transition;
- transitions[state_index][cast(TransitionClass.other: Int) + 1] := default_transition;
-
- return state_index
-end;
-
-(*
- * The transition table describes transitions from one state to another, given
- * a symbol (character class).
- *
- * The table has m rows and n columns, where m is the amount of states and n is
- * the amount of classes. So given the current state and a classified character
- * the table can be used to look up the next state.
- *
- * Each cell is a word long.
- * - The least significant byte of the word is a row number (beginning with 0).
- * It specifies the target state. "ff" means that this is an end state and no
- * transition is possible.
- * - The next byte is the action that should be performed when transitioning.
- * For the meaning of actions see labels in the lex_next function, which
- * handles each action.
- *)
-proc initialize_transitions();
-var
- state_index: Int;
-begin
- (* Start state. *)
- state_index := cast(TransitionState.start: Int) + 1;
-
- transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal;
-
- transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier;
-
- transitions[state_index][cast(TransitionClass.space: Int) + 1].action := transition_action_skip;
- transitions[state_index][cast(TransitionClass.space: Int) + 1].next_state := TransitionState.start;
-
- transitions[state_index][cast(TransitionClass.colon: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.colon: Int) + 1].next_state := TransitionState.colon;
-
- transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_single;
- transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].next_state := TransitionState.left_paren;
-
- transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_single;
- transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_single;
- transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier;
-
- transitions[state_index][cast(TransitionClass.single: Int) + 1].action := transition_action_single;
- transitions[state_index][cast(TransitionClass.single: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier;
-
- transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.leading_zero;
-
- transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier;
-
- transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := transition_action_eof;
- transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.dot;
-
- transitions[state_index][cast(TransitionClass.minus: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.minus: Int) + 1].next_state := TransitionState.minus;
-
- transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.character;
-
- transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.string;
-
- transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.greater;
-
- transitions[state_index][cast(TransitionClass.less: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.less: Int) + 1].next_state := TransitionState.less;
-
- transitions[state_index][cast(TransitionClass.other: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.other: Int) + 1].next_state := TransitionState.finish;
-
- (* Colon state. *)
- state_index := set_default_transition(TransitionState.colon, transition_action_finalize, TransitionState.finish);
-
- transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite;
- transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish;
-
- (* Identifier state. *)
- state_index := set_default_transition(TransitionState.identifier, transition_action_key_id, TransitionState.finish);
-
- transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.identifier;
-
- transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier;
-
- transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier;
-
- transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier;
-
- transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.identifier;
-
- transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier;
-
- (* Decimal state. *)
- state_index := set_default_transition(TransitionState.decimal, transition_action_integer, TransitionState.finish);
-
- transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal;
-
- transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.decimal_suffix;
-
- transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.decimal_suffix;
-
- transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.decimal;
-
- transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.decimal_suffix;
-
- (* Greater state. *)
- state_index := set_default_transition(TransitionState.greater, transition_action_finalize, TransitionState.finish);
-
- transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite;
- transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish;
-
- (* Minus state. *)
- state_index := set_default_transition(TransitionState.minus, transition_action_finalize, TransitionState.finish);
-
- transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite;
- transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish;
-
- (* Left paren state. *)
- state_index := set_default_transition(TransitionState.left_paren, transition_action_finalize, TransitionState.finish);
-
- transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.comment;
-
- (* Less state. *)
- state_index := set_default_transition(TransitionState.less, transition_action_finalize, TransitionState.finish);
-
- transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite;
- transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite;
- transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish;
-
- (* Hexadecimal after 0x. *)
- state_index := set_default_transition(TransitionState.dot, transition_action_finalize, TransitionState.finish);
-
- transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_composite;
- transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.finish;
-
- (* Comment. *)
- state_index := set_default_transition(TransitionState.comment, transition_action_accumulate, TransitionState.comment);
-
- transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment;
-
- transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
-
- (* Closing comment. *)
- state_index := set_default_transition(TransitionState.closing_comment, transition_action_accumulate, TransitionState.comment);
-
- transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_delimited;
- transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate;
- transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment;
-
- transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
-
- (* Character. *)
- state_index := set_default_transition(TransitionState.character, transition_action_accumulate, TransitionState.character);
-
- transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_delimited;
- transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.finish;
-
- (* String. *)
- state_index := set_default_transition(TransitionState.string, transition_action_accumulate, TransitionState.string);
-
- transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_delimited;
- transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.finish;
-
- (* Leading zero. *)
- state_index := set_default_transition(TransitionState.leading_zero, transition_action_integer, TransitionState.finish);
-
- transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish;
-
- (* Digit with a character suffix. *)
- state_index := set_default_transition(TransitionState.decimal_suffix, transition_action_integer, TransitionState.finish);
-
- transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish;
-
- transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil;
- transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish
-end;
-
-proc lexer_make*(lexer: ^Lexer, input: ^FILE);
-begin
- lexer^.input := input;
- lexer^.length := 0u;
-
- lexer^.buffer := cast(malloc(CHUNK_SIZE): ^Char);
- memset(cast(lexer^.buffer: Pointer), 0, CHUNK_SIZE);
- lexer^.size := CHUNK_SIZE
-end;
-
-(* Returns the last read token. *)
-proc lexer_current*(lexer: ^Lexer) -> LexerToken;
-var
- current_class: TransitionClass;
- current_state: TransitionState;
- current_transition: Transition;
- result: LexerToken;
- index1: Word;
- index2: Word;
-begin
- lexer^.current := lexer^.start;
- current_state := TransitionState.start;
-
- while current_state <> TransitionState.finish do
- index1 := cast(lexer^.current.iterator^: Word) + 1u;
- current_class := classification[index1];
-
- index1 := cast(current_state: Word) + 1u;
- index2 := cast(current_class: Word) + 1u;
-
- current_transition := transitions[index1][index2];
- if current_transition.action <> nil then
- current_transition.action(lexer, @result)
- end;
- current_state := current_transition.next_state
- end;
- result.start_location := lexer^.start.location;
- result.end_location := lexer^.current.location;
-
- return result
-end;
-
-(* Read and return the next token. *)
-proc lexer_lex*(lexer: ^Lexer) -> LexerToken;
-var
- result: LexerToken;
-begin
- if lexer^.length = 0u then
- lexer^.length := fread(cast(lexer^.buffer: Pointer), CHUNK_SIZE, 1u, lexer^.input);
- lexer^.current.location.column := 1u;
- lexer^.current.location.line := 1u;
- lexer^.current.iterator := lexer^.buffer
- end;
- lexer^.start := lexer^.current;
-
- result := lexer_current(lexer);
- return result
-end;
-
-proc lexer_destroy*(lexer: ^Lexer);
-begin
- free(cast(lexer^.buffer: Pointer))
-end;
-
-proc lexer_initialize();
-begin
- initialize_classification();
- initialize_transitions()
-end;
-
-end.