summaryrefslogtreecommitdiff
path: root/boot/stage13.elna
diff options
context:
space:
mode:
Diffstat (limited to 'boot/stage13.elna')
-rw-r--r--boot/stage13.elna845
1 files changed, 573 insertions, 272 deletions
diff --git a/boot/stage13.elna b/boot/stage13.elna
index 6bdb58d..66f6593 100644
--- a/boot/stage13.elna
+++ b/boot/stage13.elna
@@ -4,6 +4,8 @@
(* Stage 13 compiler. *)
+(* - Multiline comments. *)
+
const
symbol_builtin_name_int := "Int";
symbol_builtin_name_word := "Word";
@@ -286,40 +288,6 @@ begin
return lhs or rhs
end;
-(* Reads the next token. *)
-
-(* Returns token length in a0. *)
-proc _read_token();
-var
- current_token_position: Word;
- token_length: Word;
- current_character: Word;
- is_dot: Word;
- is_alnum_result: Word;
-begin
- current_token_position := source_code_position;
- token_length := 0;
-
- .read_token_loop;
- (* Current character. *)
- current_character := _load_byte(current_token_position);
-
- (* First we try to read a derictive. *)
- (* A derictive can contain a dot and characters. *)
- is_dot := current_character = '.';
- is_alnum_result := _is_alnum(current_character);
-
- if is_dot or is_alnum_result then
- (* Advance the source code position and token length. *)
- token_length := token_length + 1;
- current_token_position := current_token_position + 1;
-
- goto .read_token_loop;
- end;
-
- return token_length
-end;
-
(* Parameters: *)
(* lhs - First pointer. *)
(* rhs - Second pointer. *)
@@ -402,7 +370,7 @@ var
begin
_write_z("\tli t0, \0");
- integer_token := _read_token();
+ integer_token := _lexer_read_token();
_write_token(integer_token);
_advance_token(integer_token);
@@ -515,7 +483,7 @@ begin
_compile_term();
(* Load the left expression from the stack; *)
- _write_z("\tlw t1, 24(sp)\n\0");
+ _write_z("\tlw t1, 64(sp)\n\0");
end;
proc _compile_expression();
@@ -531,7 +499,7 @@ begin
(* It is a binary expression. *)
(* Save the value of the left expression on the stack. *)
- _write_z("sw t0, 24(sp)\n\0");
+ _write_z("sw t0, 64(sp)\n\0");
(* Skip surrounding whitespace in front of the operator. *)
_advance_token(1);
@@ -668,7 +636,7 @@ var
argument_count: Word;
stack_offset: Word;
begin
- name_length := _read_token();
+ name_length := _lexer_read_token();
name := source_code_position;
argument_count := 0;
@@ -731,10 +699,10 @@ proc _compile_goto();
var
next_token: Word;
begin
- _advance_token(5);
+ _advance_token(6);
- next_token := _read_token();
- _write_z("\tj \0");
+ next_token := _lexer_read_token();
+ _write_z("\tj .\0");
_write_token(next_token);
_advance_token(next_token);
@@ -758,7 +726,7 @@ var
begin
_write_z("\tla t0, \0");
- name := _read_token();
+ name := _lexer_read_token();
_write_token(name);
_advance_token(name);
@@ -770,7 +738,7 @@ var
name_token: Word;
lookup_result: Word;
begin
- name_token := _read_token();
+ name_token := _lexer_read_token();
lookup_result := _symbol_table_lookup(@symbol_table_local, source_code_position, name_token);
if lookup_result <> 0 then
@@ -804,7 +772,7 @@ begin
_advance_token(7);
_compile_expression();
- _write_z("mv a0, t0\n\0");
+ _write_z("\tmv a0, t0\n\0");
end;
(* Writes a label, .Ln, where n is a unique number. *)
@@ -874,7 +842,7 @@ var
begin
(* Skip the dot. *)
_advance_token(1);
- label_token := _read_token();
+ label_token := _lexer_read_token();
_write_c('.');
_write_s(source_code_position, label_token);
_write_z(":\n\0");
@@ -968,7 +936,7 @@ proc _read_type_expression();
var
type_name: Word;
begin
- type_name := _read_token();
+ type_name := _lexer_read_token();
_advance_token(type_name);
end;
@@ -1044,7 +1012,7 @@ var
begin
(* Read the parameter name. *)
name_position := source_code_position;
- name_length := _read_token();
+ name_length := _lexer_read_token();
_advance_token(name_length);
(* Skip colon and space in front of the type expression. *)
@@ -1099,7 +1067,7 @@ begin
name_position := source_code_position;
(* Read and skip variable name, colon and the space *)
- name_length := _read_token();
+ name_length := _lexer_read_token();
_advance_token(name_length + 2);
_read_type_expression();
@@ -1142,7 +1110,7 @@ begin
(* Clear local symbol table. *)
_store_word(0, @symbol_table_local);
- name_length := _read_token();
+ name_length := _lexer_read_token();
(* Write .type _procedure_name, @function. *)
_write_z(".type \0");
@@ -1177,18 +1145,11 @@ end;
(* Prints and skips a line. *)
proc _skip_comment();
+var
+ new_position: Word;
begin
- .skip_comment_loop;
-
- (* Check for newline character. *)
- if _load_byte(source_code_position) <> '\n' then
- (* Advance the input string by one byte. *)
- _advance_token(1);
-
- goto .skip_comment_loop;
- end;
- (* Skip the newline. *)
- _advance_token(1);
+ new_position := _lexer_read_token();
+ _advance_token(new_position + 1);
end;
(* Skip newlines and comments. *)
@@ -1267,7 +1228,7 @@ begin
(* Skip @. *)
_advance_token(1);
_write_z("\n\t.word \0");
- current_byte := _read_token();
+ current_byte := _lexer_read_token();
_write_token(current_byte);
_advance_token(current_byte);
@@ -1275,7 +1236,7 @@ begin
end;
if _is_digit(current_byte) = 1 then
_write_z("\n\t.word \0");
- current_byte := _read_token();
+ current_byte := _lexer_read_token();
_write_token(current_byte);
_advance_token(1);
@@ -1305,7 +1266,7 @@ proc _compile_constant_declaration();
var
name_length: Word;
begin
- name_length := _read_token();
+ name_length := _lexer_read_token();
_write_z(".type \0");
_write_token(name_length);
@@ -1352,7 +1313,7 @@ proc _compile_variable_declaration();
var
name_length: Word;
begin
- name_length := _read_token();
+ name_length := _lexer_read_token();
_write_z(".type \0");
_write_token(name_length);
@@ -1567,66 +1528,250 @@ end;
(* Classification table assigns each possible character to a group (class). All *)
(* characters of the same group a handled equivalently. *)
-(* Classification: *)
-
-(* TransitionClass = ( *)
-(* transitionClassInvalid = 1, *)
-(* transitionClassDigit = 2, *)
-(* transitionClassAlpha = 3, *)
-(* transitionClassSpace = 4, *)
-(* transitionClassColon = 5, *)
-(* transitionClassEquals = 6, *)
-(* transitionClassLeftParen = 7, *)
-(* transitionClassRightParen = 8, *)
-(* transitionClassAsterisk = 9, *)
-(* transitionClassUnderscore = 10, *)
-(* transitionClassSingle = 11, *)
-(* transitionClassHex = 12, *)
-(* transitionClassZero = 13, *)
-(* transitionClassX = 14, *)
-(* transitionClassEof = 15, *)
-(* transitionClassDot = 16, *)
-(* transitionClassMinus = 17, *)
-(* transitionClassSingleQuote = 18, *)
-(* transitionClassDoubleQuote = 19, *)
-(* transitionClassGreater = 20, *)
-(* transitionClassLess = 21, *)
-(* transitionClassOther = 22 *)
-(* ); *)
-(* TransitionState = ( *)
-(* transitionStateStart = 1, *)
-(* transitionStateColon = 2, *)
-(* transitionStateIdentifier = 3, *)
-(* transitionStateDecimal = 4, *)
-(* transitionStateGreater = 5, *)
-(* transitionStateMinus = 6, *)
-(* transitionStateLeftParen = 7, *)
-(* transitionStateLess = 8, *)
-(* transitionStateDot = 9, *)
-(* transitionStateComment = 10, *)
-(* transitionStateClosingComment = 11, *)
-(* transitionStateCharacter = 12, *)
-(* transitionStateString = 13, *)
-(* transitionStateLeadingZero = 14, *)
-(* transitionStateDecimalSuffix = 15, *)
-(* transitionStateEnd = 16 *)
-(* ); *)
(* Transition = record *)
(* action: TransitionAction; *)
(* next_state: TransitionState *)
(* end; *)
-(* TransitionAction = ( *)
-(* none = 1, *)
-(* accumulate = 2, *)
-(* skip = 3, *)
-(* single = 4, *)
-(* eof = 5, *)
-(* finalize = 6, *)
-(* composite = 7, *)
-(* key_id = 8, *)
-(* integer = 9, *)
-(* delimited = 10 *)
-(* ); *)
+
+proc _lexer_class_invalid();
+begin
+ return 1
+end;
+
+proc _lexer_class_digit();
+begin
+ return 2
+end;
+
+proc _lexer_class_alpha();
+begin
+ return 3
+end;
+
+proc _lexer_class_space();
+begin
+ return 4
+end;
+
+proc _lexer_class_colon();
+begin
+ return 5
+end;
+
+proc _lexer_class_equals();
+begin
+ return 6
+end;
+
+proc _lexer_class_left_paren();
+begin
+ return 7
+end;
+
+proc _lexer_class_right_paren();
+begin
+ return 8
+end;
+
+proc _lexer_class_asterisk();
+begin
+ return 9
+end;
+
+proc _lexer_class_underscore();
+begin
+ return 10
+end;
+
+proc _lexer_class_single();
+begin
+ return 11
+end;
+
+proc _lexer_class_hex();
+begin
+ return 12
+end;
+
+proc _lexer_class_zero();
+begin
+ return 13
+end;
+
+proc _lexer_class_x();
+begin
+ return 14
+end;
+
+proc _lexer_class_eof();
+begin
+ return 15
+end;
+
+proc _lexer_class_dot();
+begin
+ return 16
+end;
+
+proc _lexer_class_minus();
+begin
+ return 17
+end;
+
+proc _lexer_class_single_quote();
+begin
+ return 18
+end;
+
+proc _lexer_class_double_quote();
+begin
+ return 19
+end;
+
+proc _lexer_class_greater();
+begin
+ return 20
+end;
+
+proc _lexer_class_less();
+begin
+ return 21
+end;
+
+proc _lexer_class_other();
+begin
+ return 22
+end;
+
+proc _lexer_state_start();
+begin
+ return 1
+end;
+
+proc _lexer_state_colon();
+begin
+ return 2
+end;
+
+proc _lexer_state_identifier();
+begin
+ return 3
+end;
+
+proc _lexer_state_decimal();
+begin
+ return 4
+end;
+
+proc _lexer_state_greater();
+begin
+ return 5
+end;
+
+proc _lexer_state_minus();
+begin
+ return 6
+end;
+
+proc _lexer_state_left_paren();
+begin
+ return 7
+end;
+
+proc _lexer_state_less();
+begin
+ return 8
+end;
+
+proc _lexer_state_dot();
+begin
+ return 9
+end;
+
+proc _lexer_state_comment();
+begin
+ return 10
+end;
+
+proc _lexer_state_closing_comment();
+begin
+ return 11
+end;
+
+proc _lexer_state_character();
+begin
+ return 12
+end;
+
+proc _lexer_state_string();
+begin
+ return 13
+end;
+
+proc _lexer_state_leading_zero();
+begin
+ return 14
+end;
+
+proc _lexer_state_decimal_suffix();
+begin
+ return 15
+end;
+
+proc _lexer_state_end();
+begin
+ return 16
+end;
+
+proc _lexer_action_none();
+begin
+ return 1
+end;
+
+proc _lexer_action_accumulate();
+begin
+ return 2
+end;
+
+proc _lexer_action_skip();
+begin
+ return 3
+end;
+
+proc _lexer_action_single();
+begin
+ return 4
+end;
+
+proc _lexer_action_eof();
+begin
+ return 5
+end;
+
+proc _lexer_action_finalize();
+begin
+ return 6
+end;
+
+proc _lexer_action_composite();
+begin
+ return 7
+end;
+
+proc _lexer_action_key_id();
+begin
+ return 8
+end;
+
+proc _lexer_action_integer();
+begin
+ return 9
+end;
+
+proc _lexer_action_delimited();
+begin
+ return 10
+end;
(* Assigns some value to at array index. *)
@@ -1645,7 +1790,19 @@ begin
_store_word(data, target);
end;
-proc _create_classification();
+proc _get_at(array: Word, index: Word);
+var
+ target: Word;
+begin
+ target := index + -1;
+ target := target * 4;
+ target := array + target;
+
+ return _load_word(target)
+end;
+
+(* Initializes the array with character classes. *)
+proc _lexer_classifications();
var
code: Word;
begin
@@ -1790,66 +1947,71 @@ begin
end;
end;
-(* Parameters: *)
-(* current_state - Current state (first index into transitions table). *)
-(* transition - Transition (second index into transitions table). *)
-(* action - Action to assign. *)
-(* next_state - Next state to assign. *)
-proc _set_transition(current_state: Word, transition: Word, action: Word, next_state: Word);
+proc _lexer_get_transition(current_state: Word, character_class: Word);
var
transition_table: Word;
row_position: Word;
- state_position: Word;
+ column_position: Word;
target: Word;
begin
- (* Transitions start at offset in classification array. *)
- transition_table := @classification + 256
-
(* Each state is 8 bytes long (2 words: action and next state). *)
- (* There are 16 transition classes, so a transition 8 * 16 = 128 bytes long. *)
-
+ (* There are 22 character classes, so a transition row 8 * 22 = 176 bytes long. *)
row_position := current_state + -1;
- row_position := row_position * 128;
+ row_position := row_position * 176;
- state_position := transition + -1;
- state_position := state_position * 8;
+ column_position := character_class + -1;
+ column_position := column_position * 8;
- target := transition_table + row_position;
- target := target + state_position;
+ target := _lexer_get_transition_table() + row_position;
+
+ return target + column_position
+end;
+
+(* Parameters: *)
+(* current_state - First index into transitions table. *)
+(* character_class - Second index into transitions table. *)
+(* action - Action to assign. *)
+(* next_state - Next state to assign. *)
+proc _lexer_set_transition(current_state: Word, character_class: Word, action: Word, next_state: Word);
+var
+ transition: Word;
+begin
+ transition := _lexer_get_transition(current_state, character_class);
- _store_word(action, target);
- target := target + 4;
- _store_word(next_state, target);
+ _lexer_transition_set_action(transition, action);
+ _lexer_transition_set_state(transition, next_state);
end;
+(* Sets same action and state transition for all character classes in one transition row. *)
+
(* Parameters: *)
(* current_state - Current state (Transition state enumeration). *)
(* default_action - Default action (Callback). *)
(* next_state - Next state (Transition state enumeration). *)
-proc _set_default_transition(current_state: Word, default_action: Word, next_state: Word);
-begin
- _set_transition(current_state, 1, default_action, next_state);
- _set_transition(current_state, 2, default_action, next_state);
- _set_transition(current_state, 3, default_action, next_state);
- _set_transition(current_state, 4, default_action, next_state);
- _set_transition(current_state, 5, default_action, next_state);
- _set_transition(current_state, 6, default_action, next_state);
- _set_transition(current_state, 7, default_action, next_state);
- _set_transition(current_state, 8, default_action, next_state);
- _set_transition(current_state, 9, default_action, next_state);
- _set_transition(current_state, 10, default_action, next_state);
- _set_transition(current_state, 11, default_action, next_state);
- _set_transition(current_state, 12, default_action, next_state);
- _set_transition(current_state, 13, default_action, next_state);
- _set_transition(current_state, 14, default_action, next_state);
- _set_transition(current_state, 15, default_action, next_state);
- _set_transition(current_state, 16, default_action, next_state);
- _set_transition(current_state, 17, default_action, next_state);
- _set_transition(current_state, 18, default_action, next_state);
- _set_transition(current_state, 19, default_action, next_state);
- _set_transition(current_state, 20, default_action, next_state);
- _set_transition(current_state, 21, default_action, next_state);
- _set_transition(current_state, 22, default_action, next_state);
+proc _lexer_default_transition(current_state: Word, default_action: Word, next_state: Word);
+begin
+ _lexer_set_transition(current_state, _lexer_class_invalid(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_digit(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_alpha(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_space(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_colon(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_equals(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_left_paren(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_right_paren(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_asterisk(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_underscore(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_single(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_hex(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_zero(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_x(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_eof(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_dot(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_minus(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_single_quote(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_double_quote(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_greater(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_less(), default_action, next_state);
+ _lexer_set_transition(current_state, _lexer_class_other(), default_action, next_state);
end;
@@ -1859,142 +2021,163 @@ end;
(* The table has m rows and n columns, where m is the amount of states and n is *)
(* the amount of classes. So given the current state and a classified character *)
(* the table can be used to look up the next state. *)
-
-(* Each cell is a word long. *)
-(* - The least significant byte of the word is a row number (beginning with 0). *)
-(* It specifies the target state. "ff" means that this is an end state and no *)
-(* transition is possible. *)
-(* - The next byte is the action that should be performed when transitioning. *)
-(* For the meaning of actions see labels in the lex_next function, which *)
-(* handles each action. *)
-proc _create_transitions();
+proc _lexer_transitions();
begin
(* Start state. *)
- _set_transition(1, 1, 1, 16);
- _set_transition(1, 2, 2, 4);
- _set_transition(1, 3, 2, 3);
- _set_transition(1, 4, 3, 1);
- _set_transition(1, 5, 2, 5);
- _set_transition(1, 6, 4, 16);
- _set_transition(1, 7, 2, 7);
- _set_transition(1, 8, 4, 16);
- _set_transition(1, 9, 4, 16);
- _set_transition(1, 10, 2, 3);
- _set_transition(1, 11, 4, 16);
- _set_transition(1, 12, 2, 3);
- _set_transition(1, 13, 2, 14);
- _set_transition(1, 14, 2, 3);
- _set_transition(1, 15, 5, 16);
- _set_transition(1, 16, 2, 9);
- _set_transition(1, 17, 2, 6);
- _set_transition(1, 18, 2, 12);
- _set_transition(1, 19, 2, 13);
- _set_transition(1, 20, 2, 5);
- _set_transition(1, 21, 2, 8);
- _set_transition(1, 22, 1, 16);
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_decimal());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_identifier());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_space(), _lexer_action_skip(), _lexer_state_start());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_colon(), _lexer_action_accumulate(), _lexer_state_greater());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_equals(), _lexer_action_single(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_left_paren(), _lexer_action_accumulate(), _lexer_state_left_paren());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_right_paren(), _lexer_action_single(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_asterisk(), _lexer_action_single(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_underscore(), _lexer_action_accumulate(), _lexer_state_identifier());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_single(), _lexer_action_single(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_identifier());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_leading_zero());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_identifier());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_eof(), _lexer_action_eof(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_dot(), _lexer_action_accumulate(), _lexer_state_dot());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_minus(), _lexer_action_accumulate(), _lexer_state_minus());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_single_quote(), _lexer_action_accumulate(), _lexer_state_character());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_double_quote(), _lexer_action_accumulate(), _lexer_state_string());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_greater(), _lexer_action_accumulate(), _lexer_state_greater());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_less(), _lexer_action_accumulate(), _lexer_state_less());
+ _lexer_set_transition(_lexer_state_start(), _lexer_class_other(), _lexer_action_none(), _lexer_state_end());
(* Colon state. *)
- _set_default_transition(2, 6, 16);
- _set_transition(2, 6, 7, 16);
+ _lexer_default_transition(_lexer_state_colon(), _lexer_action_finalize(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_colon(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end());
(* Identifier state. *)
- _set_default_transition(3, 8, 16);
- _set_transition(3, 2, 2, 3);
- _set_transition(3, 3, 2, 3);
- _set_transition(3, 10, 2, 3);
- _set_transition(3, 12, 2, 3);
- _set_transition(3, 13, 2, 3);
- _set_transition(3, 14, 2, 3);
+ _lexer_default_transition(_lexer_state_identifier(), _lexer_action_key_id(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_identifier(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_identifier());
+ _lexer_set_transition(_lexer_state_identifier(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_identifier());
+ _lexer_set_transition(_lexer_state_identifier(), _lexer_class_underscore(), _lexer_action_accumulate(), _lexer_state_identifier());
+ _lexer_set_transition(_lexer_state_identifier(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_identifier());
+ _lexer_set_transition(_lexer_state_identifier(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_identifier());
+ _lexer_set_transition(_lexer_state_identifier(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_identifier());
(* Decimal state. *)
- _set_default_transition(4, 9, 16);
- _set_transition(4, 2, 2, 4);
- _set_transition(4, 3, 2, 15);
- _set_transition(4, 10, 1, 16);
- _set_transition(4, 12, 2, 15);
- _set_transition(4, 13, 2, 4);
- _set_transition(4, 14, 2, 15);
+ _lexer_default_transition(_lexer_state_decimal(), _lexer_action_integer(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_decimal(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_decimal());
+ _lexer_set_transition(_lexer_state_decimal(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_decimal_suffix());
+ _lexer_set_transition(_lexer_state_decimal(), _lexer_class_underscore(), _lexer_action_none(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_decimal(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_decimal_suffix());
+ _lexer_set_transition(_lexer_state_decimal(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_decimal());
+ _lexer_set_transition(_lexer_state_decimal(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_decimal_suffix());
(* Greater state. *)
- _set_default_transition(5, 6, 16);
- _set_transition(5, 6, 7, 16);
+ _lexer_default_transition(_lexer_state_greater(), _lexer_action_finalize(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_greater(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end());
(* Minus state. *)
- _set_default_transition(6, 6, 16);
- _set_transition(6, 20, 7, 16);
+ _lexer_default_transition(_lexer_state_minus(), _lexer_action_finalize(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_minus(), _lexer_class_greater(), _lexer_action_composite(), _lexer_state_end());
(* Left paren state. *)
- _set_default_transition(7, 6, 16);
- _set_transition(7, 9, 2, 10);
+ _lexer_default_transition(_lexer_state_left_paren(), _lexer_action_finalize(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_left_paren(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_comment());
(* Less state. *)
- _set_default_transition(8, 6, 16);
- _set_transition(8, 6, 7, 16);
- _set_transition(8, 20, 7, 16);
+ _lexer_default_transition(_lexer_state_less(), _lexer_action_finalize(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_less(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_less(), _lexer_class_greater(), _lexer_action_composite(), _lexer_state_end());
(* Hexadecimal after 0x. *)
- _set_default_transition(9, 6, 16);
- _set_transition(9, 16, 7, 16);
+ _lexer_default_transition(_lexer_state_dot(), _lexer_action_finalize(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_dot(), _lexer_class_dot(), _lexer_action_composite(), _lexer_state_end());
(* Comment. *)
- _set_default_transition(10, 2, 10);
- _set_transition(10, 9, 2, 11);
- _set_transition(10, 15, 1, 16);
+ _lexer_default_transition(_lexer_state_comment(), _lexer_action_accumulate(), _lexer_state_comment());
+ _lexer_set_transition(_lexer_state_comment(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_closing_comment());
+ _lexer_set_transition(_lexer_state_comment(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end());
(* Closing comment. *)
- _set_default_transition(11, 2, 10);
- _set_transition(11, 1, 1, 16);
- _set_transition(11, 8, 10, 16);
- _set_transition(11, 9, 2, 11);
- _set_transition(11, 15, 1, 16);
+ _lexer_default_transition(_lexer_state_closing_comment(), _lexer_action_accumulate(), _lexer_state_comment());
+ _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_right_paren(), _lexer_action_delimited(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_closing_comment());
+ _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end());
(* Character. *)
- _set_default_transition(12, 2, 12);
- _set_transition(12, 1, 1, 16);
- _set_transition(12, 15, 1, 16);
- _set_transition(12, 18, 10, 16);
+ _lexer_default_transition(_lexer_state_character(), _lexer_action_accumulate(), _lexer_state_character());
+ _lexer_set_transition(_lexer_state_character(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_character(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_character(), _lexer_class_single_quote(), _lexer_action_delimited(), _lexer_state_end());
(* String. *)
- _set_default_transition(13, 2, 13);
- _set_transition(13, 1, 1, 16);
- _set_transition(13, 15, 1, 16);
- _set_transition(13, 19, 10, 16);
+ _lexer_default_transition(_lexer_state_string(), _lexer_action_accumulate(), _lexer_state_string());
+ _lexer_set_transition(_lexer_state_string(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_string(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_string(), _lexer_class_double_quote(), _lexer_action_delimited(), _lexer_state_end());
(* Leading zero. *)
- _set_default_transition(14, 9, 16);
- _set_transition(14, 2, 1, 16);
- _set_transition(14, 3, 1, 16);
- _set_transition(14, 10, 1, 16);
- _set_transition(14, 12, 1, 16);
- _set_transition(14, 13, 1, 16);
- _set_transition(14, 14, 1, 16);
+ _lexer_default_transition(_lexer_state_leading_zero(), _lexer_action_integer(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_digit(), _lexer_action_none(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_alpha(), _lexer_action_none(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_underscore(), _lexer_action_none(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_hex(), _lexer_action_none(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_zero(), _lexer_action_none(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_x(), _lexer_action_none(), _lexer_state_end());
(* Digit with a character suffix. *)
- _set_default_transition(15, 9, 16);
- _set_transition(15, 3, 1, 16);
- _set_transition(15, 2, 1, 16);
- _set_transition(15, 12, 1, 16);
- _set_transition(15, 13, 1, 16);
- _set_transition(15, 14, 1, 16);
+ _lexer_default_transition(_lexer_state_decimal_suffix(), _lexer_action_integer(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_digit(), _lexer_action_none(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_alpha(), _lexer_action_none(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_hex(), _lexer_action_none(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_zero(), _lexer_action_none(), _lexer_state_end());
+ _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_x(), _lexer_action_none(), _lexer_state_end());
end;
-proc _lexer_get_state();
-var
- offset: Word;
- size: Word;
+(* Transition table is saved after character classification table. *)
+(* Each character entry is 1 word long and there are 256 characters. *)
+(* 1024 = 256 * 4 *)
+proc _lexer_get_transition_table();
begin
- (* Lexer state is saved after the transition tables. The offset is 256 + 16 * 22. *)
- offset := @classification;
- size := 16 * 22;
- offset := offset + 256;
+ return @classification + 1024
+end;
+
+(* Lexer state is saved after the transition tables. *)
+(* Each transition table entry is 8 bytes long. The table has 16 rows (transition states) *)
+(* and 22 columns (character classes), so 2816 = 8 * 16 * 22. *)
+proc _lexer_global_state();
+begin
+ return _lexer_get_transition_table() + 2816
+end;
+
+(* Gets pointer to the token start. *)
+proc _lexer_global_start();
+begin
+ return _lexer_global_state() + 4
+end;
+
+(* Gets pointer to the token end. *)
+proc _lexer_global_end();
+begin
+ return _lexer_global_start() + 4
+end;
+
+proc _lexer_transition_get_action(transition: Word);
+begin
+ return _load_word(transition)
+end;
+
+proc _lexer_transition_set_action(transition: Word, action: Word);
+begin
+ _store_word(action, transition);
+end;
- return offset + size
+proc _lexer_transition_get_state(transition: Word);
+begin
+ return _load_word(transition + 4)
end;
-(* Gets pointer to the current source text. *)
-proc _lexer_get_current();
+proc _lexer_transition_set_state(transition: Word, state: Word);
begin
- return _lexer_get_state() + 4
+ _store_word(state, transition + 4);
end;
(* Resets the lexer state for reading the next token. *)
@@ -2004,22 +2187,140 @@ var
current: Word;
begin
(* Transition start state is 1. *)
- state := _lexer_get_state();
- _store_word(1, state);
+ state := _lexer_global_state();
+ _store_word(_lexer_state_start(), state);
- (* Text pointer to the beginning of the currently read token. *)
- current := _lexer_get_current();
+ current := _lexer_global_start();
_store_word(source_code_position, current);
- (* Initial length of the token is 0. *)
- _store_word(0, source_code_position + 4);
+ current := _lexer_global_end();
+ _store_word(source_code_position, current);
end;
(* One time lexer initialization. *)
proc _lexer_initialize();
begin
- _create_classification();
- _create_transitions();
+ _lexer_classifications();
+ _lexer_transitions();
+end;
+
+proc _lexer_next_transition();
+var
+ current_character: Word;
+ character_class: Word;
+ current_state: Word;
+begin
+ current_character := _lexer_global_end();
+ current_character := _load_word(current_character);
+ current_character := _load_byte(current_character);
+
+ character_class := _get_at(@classification, current_character + 1);
+
+ current_state := _lexer_global_state();
+ current_state := _load_word(current_state);
+
+ return _lexer_get_transition(current_state, character_class)
+end;
+
+proc _lexer_execute_action(action_to_perform: Word);
+var
+ pointer_start: Word;
+ pointer_end: Word;
+ position_start: Word;
+ position_end: Word;
+begin
+ pointer_start := _lexer_global_end();
+ position_start := _load_word(pointer_start);
+ pointer_end := _lexer_global_end();
+ position_end := _load_word(pointer_end);
+
+ if action_to_perform = _lexer_action_none() then
+ goto .action_to_perform_end;
+ end;
+ if action_to_perform = _lexer_action_accumulate() then
+ _store_word(position_end + 1, pointer_end);
+ goto .action_to_perform_end;
+ end;
+ if action_to_perform = _lexer_action_skip() then
+ _store_word(position_start + 1, pointer_start);
+ _store_word(position_end + 1, pointer_end);
+ goto .action_to_perform_end;
+ end;
+ if action_to_perform = _lexer_action_single() then
+ goto .action_to_perform_end;
+ end;
+ if action_to_perform = _lexer_action_eof() then
+ goto .action_to_perform_end;
+ end;
+ if action_to_perform = _lexer_action_finalize() then
+ goto .action_to_perform_end;
+ end;
+ if action_to_perform = _lexer_action_composite() then
+ goto .action_to_perform_end;
+ end;
+ if action_to_perform = _lexer_action_key_id() then
+ _store_word(position_end + 1, pointer_end);
+ goto .action_to_perform_end;
+ end;
+ if action_to_perform = _lexer_action_integer() then
+ _store_word(position_end + 1, pointer_end);
+ goto .action_to_perform_end;
+ end;
+ if action_to_perform = _lexer_action_delimited() then
+ _store_word(position_end + 1, pointer_end);
+ goto .action_to_perform_end;
+ end;
+
+ .action_to_perform_end;
+end;
+
+proc _lexer_execute_transition();
+var
+ next_transition: Word;
+ next_state: Word;
+ global_state: Word;
+ action_to_perform: Word;
+begin
+ next_transition := _lexer_next_transition();
+ next_state := _lexer_transition_get_state(next_transition);
+ action_to_perform := _lexer_transition_get_action(next_transition);
+
+ global_state := _lexer_global_state();
+
+ _store_word(next_state, global_state);
+ _lexer_execute_action(action_to_perform);
+
+ return next_state
+end;
+
+proc _lexer_advance_token();
+var
+ executed_transition: Word;
+begin
+ .lexer_advance_token_loop;
+ executed_transition := _lexer_execute_transition();
+
+ if executed_transition <> _lexer_state_end() then
+ goto .lexer_advance_token_loop;
+ end;
+end;
+
+(* Reads the next token. *)
+
+(* Returns token length in a0. *)
+proc _lexer_read_token();
+var
+ new_position: Word;
+ token_end: Word;
+begin
+ _lexer_reset();
+ _lexer_advance_token();
+
+ new_position := _lexer_global_end();
+ token_end := _load_word(new_position);
+ token_end := token_end + -source_code_position;
+
+ return token_end + -1
end;
(* Entry point. *)