summaryrefslogtreecommitdiff
path: root/boot/stage13.elna
diff options
context:
space:
mode:
authorEugen Wissner <belka@caraus.de>2025-09-23 22:22:38 +0200
committerEugen Wissner <belka@caraus.de>2025-09-23 22:22:38 +0200
commit0cc41f2d838630f5117d57e1491ffd4a6d613832 (patch)
tree119f3f76ca5c6a0cdd817575e8df565519fd6a9c /boot/stage13.elna
parent6e9086aa26a37ef8d89dd54b773e614a80efe720 (diff)
downloadelna-0cc41f2d838630f5117d57e1491ffd4a6d613832.tar.gz
Implement elsif for if-statements
Diffstat (limited to 'boot/stage13.elna')
-rw-r--r--boot/stage13.elna849
1 files changed, 685 insertions, 164 deletions
diff --git a/boot/stage13.elna b/boot/stage13.elna
index 66f6593..925a1cd 100644
--- a/boot/stage13.elna
+++ b/boot/stage13.elna
@@ -5,6 +5,7 @@
(* Stage 13 compiler. *)
(* - Multiline comments. *)
+(* - elsif conditions. *)
const
symbol_builtin_name_int := "Int";
@@ -114,7 +115,7 @@ end;
(* Returns the amount of bytes written in a0. *)
proc _read_file(buffer: Word, size: Word);
begin
- _syscall(0, buffer, size, 0, 0, 0, 63);
+ return _syscall(0, buffer, size, 0, 0, 0, 63)
end;
(* Writes to the standard output. *)
@@ -346,12 +347,6 @@ begin
return destination
end;
-(* Advances the token stream by a0 bytes. *)
-proc _advance_token(count: Word);
-begin
- source_code_position := source_code_position + count;
-end;
-
(* Prints the current token. *)
(* Parameters: *)
@@ -367,12 +362,13 @@ end;
proc _compile_integer_literal();
var
integer_token: Word;
+ token_kind: Word;
begin
_write_z("\tli t0, \0");
- integer_token := _lexer_read_token();
+ integer_token := _lexer_read_token(@token_kind);
_write_token(integer_token);
- _advance_token(integer_token);
+ _lexer_skip_token();
_write_c('\n');
end;
@@ -382,16 +378,16 @@ var
character: Word;
begin
_write_z("\tli t0, '\0");
- _advance_token(1);
+ source_code_position := source_code_position + 1;
character := _load_byte(source_code_position);
if character = '\\' then
_write_c('\\');
- _advance_token(1);
+ source_code_position := source_code_position + 1;
end;
_write_s(source_code_position, 1);
_write_s("'\n", 2);
- _advance_token(2);
+ source_code_position := source_code_position + 2;
end;
proc _compile_variable_expression();
@@ -403,14 +399,14 @@ end;
proc _compile_address_expression();
begin
(* Skip the "@" sign. *)
- _advance_token(1);
+ source_code_position := source_code_position + 1;
_compile_designator();
end;
proc _compile_negate_expression();
begin
(* Skip the "-" sign. *)
- _advance_token(1);
+ source_code_position := source_code_position + 1;
_compile_term();
_write_z("\tneg t0, t0\n\0");
@@ -419,7 +415,7 @@ end;
proc _compile_not_expression();
begin
(* Skip the "~" sign. *)
- _advance_token(1);
+ source_code_position := source_code_position + 1;
_compile_term();
_write_z("\tnot t0, t0\n\0");
@@ -433,7 +429,8 @@ begin
length := _string_length(source_code_position);
offset := _add_string(source_code_position);
- _advance_token(length + 2);
+ source_code_position := source_code_position + length;
+ source_code_position := source_code_position + 2;
_write_z("\tla t0, strings\n\0");
_write_z("\tli t1, \0");
@@ -479,7 +476,7 @@ end;
proc _compile_binary_rhs();
begin
(* Skip the whitespace after the binary operator. *)
- _advance_token(1);
+ source_code_position := source_code_position + 1;
_compile_term();
(* Load the left expression from the stack; *)
@@ -502,11 +499,11 @@ begin
_write_z("sw t0, 64(sp)\n\0");
(* Skip surrounding whitespace in front of the operator. *)
- _advance_token(1);
+ source_code_position := source_code_position + 1;
current_character := _load_byte(source_code_position);
if current_character = '+' then
- _advance_token(1);
+ source_code_position := source_code_position + 1;
_compile_binary_rhs();
(* Execute the operation. *)
@@ -515,7 +512,7 @@ begin
goto .compile_expression_end;
end;
if current_character = '*' then
- _advance_token(1);
+ source_code_position := source_code_position + 1;
_compile_binary_rhs();
(* Execute the operation. *)
@@ -524,7 +521,7 @@ begin
goto .compile_expression_end;
end;
if current_character = '&' then
- _advance_token(1);
+ source_code_position := source_code_position + 1;
_compile_binary_rhs();
(* Execute the operation. *)
@@ -533,7 +530,7 @@ begin
goto .compile_expression_end;
end;
if current_character = 'o' then
- _advance_token(2);
+ source_code_position := source_code_position + 2;
_compile_binary_rhs();
(* Execute the operation. *)
@@ -542,7 +539,7 @@ begin
goto .compile_expression_end;
end;
if current_character = 'x' then
- _advance_token(3);
+ source_code_position := source_code_position + 3;
_compile_binary_rhs();
(* Execute the operation. *)
@@ -551,7 +548,7 @@ begin
goto .compile_expression_end;
end;
if current_character = '=' then
- _advance_token(1);
+ source_code_position := source_code_position + 1;
_compile_binary_rhs();
(* Execute the operation. *)
@@ -560,7 +557,7 @@ begin
goto .compile_expression_end;
end;
if current_character = '%' then
- _advance_token(1);
+ source_code_position := source_code_position + 1;
_compile_binary_rhs();
(* Execute the operation. *)
@@ -569,7 +566,7 @@ begin
goto .compile_expression_end;
end;
if current_character = '/' then
- _advance_token(1);
+ source_code_position := source_code_position + 1;
_compile_binary_rhs();
(* Execute the operation. *)
@@ -578,11 +575,11 @@ begin
goto .compile_expression_end;
end;
if current_character = '<' then
- _advance_token(1);
+ source_code_position := source_code_position + 1;
current_character := _load_byte(source_code_position);
if current_character = '>' then
- _advance_token(1);
+ source_code_position := source_code_position + 1;
_compile_binary_rhs();
(* Execute the operation. *)
@@ -591,7 +588,7 @@ begin
goto .compile_expression_end;
end;
if current_character = '=' then
- _advance_token(1);
+ source_code_position := source_code_position + 1;
_compile_binary_rhs();
(* Execute the operation. *)
@@ -607,10 +604,10 @@ begin
goto .compile_expression_end;
end;
if current_character = '>' then
- _advance_token(1);
+ source_code_position := source_code_position + 1;
current_character := _load_byte(source_code_position);
if current_character = '=' then
- _advance_token(1);
+ source_code_position := source_code_position + 1;
_compile_binary_rhs();
(* Execute the operation. *)
@@ -621,7 +618,7 @@ begin
_compile_binary_rhs();
(* Execute the operation. *)
- _write_z("\tslt t0, t1, t0\n\0");
+ _write_z("\tslt t0, t0, t1\n\0");
goto .compile_expression_end;
end;
@@ -635,16 +632,21 @@ var
name: Word;
argument_count: Word;
stack_offset: Word;
+ token_kind: Word;
begin
- name_length := _lexer_read_token();
- name := source_code_position;
+ name_length := _lexer_read_token(@token_kind);
+ name := _lexer_global_start();
+ name := _load_word(name);
+ name_length := _lexer_global_end();
+ name_length := _load_word(name_length) + -name;
argument_count := 0;
(* Skip the identifier and left paren. *)
- _advance_token(name_length + 1);
+ _lexer_skip_token();
+ source_code_position := source_code_position + 1;
if _load_byte(source_code_position) = ')' then
- goto .compile_call_finalize
+ goto .compile_call_finalize;
end;
.compile_call_loop;
_compile_expression();
@@ -664,7 +666,7 @@ begin
if _load_byte(source_code_position) <> ',' then
goto .compile_call_finalize;
end;
- _advance_token(2);
+ source_code_position := source_code_position + 2;
goto .compile_call_loop;
.compile_call_finalize;
@@ -692,43 +694,47 @@ begin
_write_s(name, name_length);
(* Skip the right paren. *)
- _advance_token(1);
+ source_code_position := source_code_position + 1;
end;
proc _compile_goto();
var
next_token: Word;
+ token_kind: Word;
begin
- _advance_token(6);
+ _lexer_read_token(@token_kind);
+ _lexer_skip_token();
+
+ source_code_position := source_code_position + 2;
- next_token := _lexer_read_token();
+ next_token := _lexer_read_token(@token_kind);
_write_z("\tj .\0");
_write_token(next_token);
- _advance_token(next_token);
+ _lexer_skip_token();
end;
-proc _compile_local_designator(symbol: Word, name_length: Word);
+proc _compile_local_designator(symbol: Word);
var
variable_offset: Word;
begin
-
_write_z("\taddi t0, sp, \0");
variable_offset := _parameter_info_get_offset(symbol);
_write_i(variable_offset);
_write_c('\n');
- _advance_token(name_length);
+ _lexer_skip_token();
end;
proc _compile_global_designator();
var
name: Word;
+ token_kind: Word;
begin
_write_z("\tla t0, \0");
- name := _lexer_read_token();
+ name := _lexer_read_token(@token_kind);
_write_token(name);
- _advance_token(name);
+ _lexer_skip_token();
_write_c('\n');
end;
@@ -737,12 +743,18 @@ proc _compile_designator();
var
name_token: Word;
lookup_result: Word;
+ token_kind: Word;
+ name: Word;
begin
- name_token := _lexer_read_token();
- lookup_result := _symbol_table_lookup(@symbol_table_local, source_code_position, name_token);
+ name_token := _lexer_read_token(@token_kind);
+ name := _lexer_global_start();
+ name := _load_word(name);
+ name_token := _lexer_global_end();
+ name_token := _load_word(name_token) + -name;
+ lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token);
if lookup_result <> 0 then
- _compile_local_designator(lookup_result, name_token);
+ _compile_local_designator(lookup_result);
goto .compile_designator_end;
end;
_compile_global_designator();
@@ -758,7 +770,7 @@ begin
_write_z("\tsw t0, 60(sp)\n\0");
(* Skip the assignment sign (:=) with surrounding whitespaces. *)
- _advance_token(4);
+ source_code_position := source_code_position + 4;
(* Compile the assignment. *)
_compile_expression();
@@ -767,9 +779,13 @@ begin
end;
proc _compile_return_statement();
+var
+ token_kind: Word;
begin
(* Skip "return" keyword and whitespace after it. *)
- _advance_token(7);
+ _lexer_read_token(@token_kind);
+ _lexer_skip_token();
+ source_code_position := source_code_position + 1;
_compile_expression();
_write_z("\tmv a0, t0\n\0");
@@ -789,13 +805,18 @@ proc _compile_if();
var
after_end_label: Word;
condition_label: Word;
+ token_kind: Word;
begin
(* Skip "if ". *)
- _advance_token(3);
+ _lexer_read_token(@token_kind);
+ _lexer_skip_token();
+ source_code_position := source_code_position + 1;
+
(* Compile condition. *)
_compile_expression();
(* Skip " then" with newline. *)
- _advance_token(6);
+ _lexer_read_token(@token_kind);
+ _lexer_skip_token();
after_end_label := label_counter;
label_counter := label_counter + 1;
@@ -817,20 +838,53 @@ begin
_write_label(condition_label);
_write_z(":\n\0");
- if _memcmp(source_code_position, "end", 3) = 0 then
+ .compile_if_loop;
+
+ _lexer_read_token(@token_kind);
+ if token_kind = _lexer_token_kind_end() then
goto .compile_if_end;
end;
- if _memcmp(source_code_position, "else", 3) = 0 then
- goto .compile_if_else
+ if token_kind = _lexer_token_kind_else() then
+ goto .compile_if_else;
+ end;
+ if token_kind = _lexer_token_kind_elsif() then
+ goto .compile_if_elsif;
end;
+ .compile_if_elsif;
+ _lexer_skip_token();
+ source_code_position := source_code_position + 1;
+
+ (* Compile condition. *)
+ _compile_expression();
+ (* Skip " then" with newline. *)
+ _lexer_read_token(@token_kind);
+ _lexer_skip_token();
+
+ (* condition_label is the label in front of the next elsif condition or end. *)
+ condition_label := label_counter;
+ label_counter := label_counter + 1;
+
+ _write_z("\tbeqz t0, \0");
+ _write_label(condition_label);
+ _write_c('\n');
+
+ _compile_procedure_body();
+
+ _write_z("\tj \0");
+ _write_label(after_end_label);
+ _write_c('\n');
+
+ _write_label(condition_label);
+ _write_z(":\n\0");
+
+ goto .compile_if_loop;
+
.compile_if_else;
- (* Skip "else" and newline. *)
- _advance_token(5);
+ _lexer_skip_token();
_compile_procedure_body();
.compile_if_end;
- (* Skip "end". *)
- _advance_token(3);
+ _lexer_skip_token();
_write_label(after_end_label);
_write_z(":\n\0");
@@ -839,74 +893,77 @@ end;
proc _compile_label_declaration();
var
label_token: Word;
+ token_kind: Word;
+ name: Word;
begin
(* Skip the dot. *)
- _advance_token(1);
- label_token := _lexer_read_token();
+ _lexer_read_token(@token_kind);
+ _lexer_skip_token();
+ label_token := _lexer_read_token(@token_kind);
+ name := _lexer_global_start();
+ name := _load_word(name);
_write_c('.');
- _write_s(source_code_position, label_token);
+ _write_s(name, label_token);
_write_z(":\n\0");
- _advance_token(label_token);
+ _lexer_skip_token();
end;
proc _compile_statement();
var
current_byte: Word;
+ token_kind: Word;
begin
- _skip_spaces();
- current_byte := _load_byte(source_code_position);
+ _lexer_read_token(@token_kind);
- (* This is a call if the statement starts with an underscore. *)
- if current_byte = '_' then
- _compile_call();
- goto .compile_statement_semicolon;
- end;
- if _memcmp(source_code_position, "goto ", 5) = 0 then
+ if token_kind = _lexer_token_kind_goto() then
_compile_goto();
goto .compile_statement_semicolon;
end;
- if _memcmp(source_code_position, "if ", 3) = 0 then
+ if token_kind = _lexer_token_kind_if() then
_compile_if();
goto .compile_statement_semicolon;
end;
- if _memcmp(source_code_position, "return ", 7) = 0 then
+ if token_kind = _lexer_token_kind_return() then
_compile_return_statement();
- _write_c('\n');
-
- goto .compile_statement_end;
+ goto .compile_statement_semicolon;
end;
- if current_byte = '.' then
+ if token_kind = _lexer_token_kind_dot() then
_compile_label_declaration();
+ goto .compile_statement_semicolon;
+ end;
+ if token_kind = _lexer_token_kind_identifier() then
+ current_byte := _lexer_global_start();
+ current_byte := _load_word(current_byte);
+ current_byte := _load_byte(current_byte);
+ (* This is a call if the statement starts with an underscore. *)
+ if current_byte = '_' then
+ _compile_call();
+ else
+ _compile_assignment();
+ end;
goto .compile_statement_semicolon;
end;
- _compile_assignment();
- goto .compile_statement_semicolon;
.compile_statement_semicolon;
- _advance_token(2);
_write_c('\n');
-
- .compile_statement_end;
end;
proc _compile_procedure_body();
var
- lhs: Word;
- rhs: Word;
+ token_kind: Word;
begin
.compile_procedure_body_loop;
- _skip_empty_lines();
- _skip_spaces();
- lhs := _memcmp(source_code_position, "end", 3) = 0;
- rhs := _memcmp(source_code_position, "else", 4) = 0;
- lhs := lhs or rhs;
+ _skip_empty_lines();
+ _compile_statement();
+ _lexer_read_token(@token_kind);
- if lhs = 0 then
- _compile_statement();
+ if token_kind = _lexer_token_kind_semicolon() then
+ _lexer_skip_token();
goto .compile_procedure_body_loop;
end;
+ _skip_empty_lines();
end;
(* Writes a regster name to the standard output. *)
@@ -927,7 +984,7 @@ var
begin
current_byte := _load_byte(source_code_position);
if current_byte = '\t' then
- _advance_token(1);
+ source_code_position := source_code_position + 1;
_skip_spaces();
end;
end;
@@ -935,9 +992,10 @@ end;
proc _read_type_expression();
var
type_name: Word;
+ token_kind: Word;
begin
- type_name := _lexer_read_token();
- _advance_token(type_name);
+ type_name := _lexer_read_token(@token_kind);
+ _lexer_skip_token();
end;
(* Parameters: *)
@@ -1009,14 +1067,15 @@ var
name_length: Word;
info: Word;
name_position: Word;
+ token_kind: Word;
begin
(* Read the parameter name. *)
name_position := source_code_position;
- name_length := _lexer_read_token();
- _advance_token(name_length);
+ name_length := _lexer_read_token(@token_kind);
+ _lexer_skip_token();
(* Skip colon and space in front of the type expression. *)
- _advance_token(2);
+ source_code_position := source_code_position + 2;
_read_type_expression();
@@ -1038,7 +1097,7 @@ var
parameter_counter: Word;
begin
(* Skip open paren. *)
- _advance_token(1);
+ source_code_position := source_code_position + 1;
parameter_counter := 0;
.compile_procedure_prologue_skip;
@@ -1047,12 +1106,12 @@ begin
parameter_counter := parameter_counter + 1;
if _load_byte(source_code_position) = ',' then
- _advance_token(2);
+ source_code_position := source_code_position + 2;
goto .compile_procedure_prologue_skip;
end;
end;
(* Skip close paren. *)
- _advance_token(1);
+ source_code_position := source_code_position + 1;
end;
(* Parameters: *)
@@ -1062,13 +1121,15 @@ var
name_length: Word;
info: Word;
name_position: Word;
+ token_kind: Word;
begin
_skip_spaces();
name_position := source_code_position;
(* Read and skip variable name, colon and the space *)
- name_length := _lexer_read_token();
- _advance_token(name_length + 2);
+ name_length := _lexer_read_token(@token_kind);
+ _lexer_skip_token(name_length);
+ source_code_position := source_code_position + 2;
_read_type_expression();
@@ -1076,7 +1137,7 @@ begin
_symbol_table_enter(@symbol_table_local, name_position, name_length, info);
(* Skip semicolon and newline after the variable declaration *)
- _advance_token(2);
+ source_code_position := source_code_position + 2;
end;
proc _read_procedure_temporaries();
@@ -1086,7 +1147,7 @@ begin
if _memcmp(source_code_position, "var", 3) <> 0 then
goto .read_local_variables_end;
end;
- _advance_token(4);
+ source_code_position := source_code_position + 4;
temporary_counter := 0;
.read_local_variables_loop;
@@ -1104,13 +1165,14 @@ end;
proc _compile_procedure();
var
name_length: Word;
+ token_kind: Word;
begin
(* Skip "proc ". *)
- _advance_token(5);
+ source_code_position := source_code_position + 5;
(* Clear local symbol table. *)
_store_word(0, @symbol_table_local);
- name_length := _lexer_read_token();
+ name_length := _lexer_read_token(@token_kind);
(* Write .type _procedure_name, @function. *)
_write_z(".type \0");
@@ -1123,16 +1185,16 @@ begin
_write_z(":\n\0");
(* Skip procedure name. *)
- _advance_token(name_length);
+ _lexer_skip_token();
_write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0");
_read_procedure_parameters();
(* Skip semicolon and newline. *)
- _advance_token(2);
+ source_code_position := source_code_position + 2;
_read_procedure_temporaries();
(* Skip semicolon, "begin" and newline. *)
- _advance_token(6);
+ source_code_position := source_code_position + 6;
_compile_procedure_body();
@@ -1140,16 +1202,17 @@ begin
_write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0");
(* Skip the "end" keyword, semicolon and newline. *)
- _advance_token(5);
+ source_code_position := source_code_position + 5;
end;
(* Prints and skips a line. *)
proc _skip_comment();
var
- new_position: Word;
+ token_kind: Word;
begin
- new_position := _lexer_read_token();
- _advance_token(new_position + 1);
+ _lexer_read_token(@token_kind);
+ _lexer_skip_token();
+ source_code_position := source_code_position + 1;
end;
(* Skip newlines and comments. *)
@@ -1176,7 +1239,7 @@ begin
current_byte := _load_byte(current_position + 1);
if current_byte = '*' then
- goto .skip_empty_lines_comment
+ goto .skip_empty_lines_comment;
end;
goto .skip_empty_lines_end;
@@ -1191,7 +1254,7 @@ begin
.skip_empty_lines_tab;
current_position := current_position + 1;
- goto .skip_empty_lines_loop
+ goto .skip_empty_lines_loop;
.skip_empty_lines_end;
end;
@@ -1200,6 +1263,7 @@ proc _compile_global_initializer();
var
current_byte: Word;
length: Word;
+ token_kind: Word;
begin
current_byte := _load_byte(source_code_position);
@@ -1211,13 +1275,14 @@ begin
_write_i();
(* Skip the quoted string. *)
- _advance_token(length + 2);
+ source_code_position := source_code_position + length;
+ source_code_position := source_code_position + 2;
goto .compile_global_initializer_end;
end;
if current_byte = 'S' then
(* Skip "S(". *)
- _advance_token(2);
+ source_code_position := source_code_position + 2;
if _load_byte(source_code_position) = ')' then
goto .compile_global_initializer_closing;
@@ -1226,19 +1291,19 @@ begin
end;
if current_byte = '@' then
(* Skip @. *)
- _advance_token(1);
+ source_code_position := source_code_position + 1;
_write_z("\n\t.word \0");
- current_byte := _lexer_read_token();
+ current_byte := _lexer_read_token(@token_kind);
_write_token(current_byte);
- _advance_token(current_byte);
+ _lexer_skip_token();
goto .compile_global_initializer_end;
end;
if _is_digit(current_byte) = 1 then
_write_z("\n\t.word \0");
- current_byte := _lexer_read_token();
+ current_byte := _lexer_read_token(@token_kind);
_write_token(current_byte);
- _advance_token(1);
+ source_code_position := source_code_position + 1;
goto .compile_global_initializer_end;
end;
@@ -1248,14 +1313,14 @@ begin
if _load_byte(source_code_position) <> ')' then
(* Skip comma and whitespace after it. *)
- _advance_token(2);
+ source_code_position := source_code_position + 2;
goto .compile_global_initializer_loop;
end;
.compile_global_initializer_closing;
(* Skip ")" *)
- _advance_token(1);
+ source_code_position := source_code_position + 1;
goto .compile_global_initializer_end;
@@ -1265,8 +1330,9 @@ end;
proc _compile_constant_declaration();
var
name_length: Word;
+ token_kind: Word;
begin
- name_length := _lexer_read_token();
+ name_length := _lexer_read_token(@token_kind);
_write_z(".type \0");
_write_token(name_length);
@@ -1276,22 +1342,26 @@ begin
_write_c(':');
(* Skip the constant name with assignment sign and surrounding whitespaces. *)
- _advance_token(name_length + 4);
+ _lexer_skip_token();
+ source_code_position := source_code_position + 4;
_compile_global_initializer();
(* Skip semicolon and newline. *)
- _advance_token(2);
+ source_code_position := source_code_position + 2;
_write_c('\n');
end;
proc _compile_const_part();
+var
+ token_kind: Word;
begin
_skip_empty_lines();
+ _lexer_read_token(@token_kind);
- if _memcmp(source_code_position, "const\0", 5) <> 0 then
+ if token_kind <> _lexer_token_kind_const() then
goto .compile_const_part_end;
end;
(* Skip "const" with the newline after it. *)
- _advance_token(6);
+ _lexer_skip_token();
_write_z(".section .rodata # Compiled from const section.\n\n\0");
.compile_const_part_loop;
@@ -1300,8 +1370,7 @@ begin
(* If the character at the line beginning is not indentation, *)
(* it is probably the next code section. *)
if _load_byte(source_code_position) = '\t' then
- _advance_token(1);
-
+ source_code_position := source_code_position + 1;
_compile_constant_declaration();
goto .compile_const_part_loop;
end;
@@ -1312,8 +1381,9 @@ end;
proc _compile_variable_declaration();
var
name_length: Word;
+ token_kind: Word;
begin
- name_length := _lexer_read_token();
+ name_length := _lexer_read_token(@token_kind);
_write_z(".type \0");
_write_token(name_length);
@@ -1323,7 +1393,9 @@ begin
_write_c(':');
(* Skip the variable name and colon with space before the type. *)
- _advance_token(name_length + 2);
+ _lexer_skip_token();
+ _lexer_read_token(@token_kind);
+ _lexer_skip_token();
_read_type_expression();
if _load_byte(source_code_position) <> ' ' then
@@ -1331,32 +1403,34 @@ begin
_write_z(" .zero 81920\0");
else
(* Skip the assignment sign with surrounding whitespaces. *)
- _advance_token(4);
+ source_code_position := source_code_position + 4;
_compile_global_initializer();
end;
(* Skip semicolon and newline. *)
- _advance_token(2);
+ _lexer_read_token(@token_kind);
+ _lexer_skip_token();
_write_c('\n');
end;
proc _compile_var_part();
var
- current_character: Word;
+ token_kind: Word;
begin
- if _memcmp(source_code_position, "var\0", 3) <> 0 then
+ _lexer_read_token(@token_kind);
+
+ if token_kind <> _lexer_token_kind_var() then
goto .compile_var_part_end;
end;
(* Skip "var" and newline. *)
- _advance_token(4);
+ _lexer_skip_token();
_write_z(".section .data\n\0");
.compile_var_part_loop;
_skip_empty_lines();
- current_character := _load_byte(source_code_position);
+ _lexer_read_token(@token_kind);
- if current_character = '\t' then
- _advance_token(1);
+ if token_kind = _lexer_token_kind_identifier() then
_compile_variable_declaration();
goto .compile_var_part_loop;
end;
@@ -2222,14 +2296,445 @@ begin
return _lexer_get_transition(current_state, character_class)
end;
-proc _lexer_execute_action(action_to_perform: Word);
+proc _lexer_token_kind_identifier();
+begin
+ return 1
+end;
+
+proc _lexer_token_kind_const();
+begin
+ return 2
+end;
+
+proc _lexer_token_kind_var();
+begin
+ return 3
+end;
+
+proc _lexer_token_kind_proc();
+begin
+ return 4
+end;
+
+proc _lexer_token_kind_type();
+begin
+ return 5
+end;
+
+proc _lexer_token_kind_begin();
+begin
+ return 6
+end;
+
+proc _lexer_token_kind_end();
+begin
+ return 7
+end;
+
+proc _lexer_token_kind_if();
+begin
+ return 8
+end;
+
+proc _lexer_token_kind_then();
+begin
+ return 9
+end;
+
+proc _lexer_token_kind_else();
+begin
+ return 10
+end;
+
+proc _lexer_token_kind_elsif();
+begin
+ return 11
+end;
+
+proc _lexer_token_kind_while();
+begin
+ return 12
+end;
+
+proc _lexer_token_kind_do();
+begin
+ return 13
+end;
+
+proc _lexer_token_kind_extern();
+begin
+ return 14
+end;
+
+proc _lexer_token_kind_record();
+begin
+ return 15
+end;
+
+proc _lexer_token_kind_union();
+begin
+ return 16
+end;
+
+proc _lexer_token_kind_true();
+begin
+ return 17
+end;
+
+proc _lexer_token_kind_false();
+begin
+ return 18
+end;
+
+proc _lexer_token_kind_nil();
+begin
+ return 19
+end;
+
+proc _lexer_token_kind_and();
+begin
+ return 20
+end;
+
+proc _lexer_token_kind_or();
+begin
+ return 21
+end;
+
+proc _lexer_token_kind_xor();
+begin
+ return 22
+end;
+
+proc _lexer_token_kind_pipe();
+begin
+ return 23
+end;
+
+proc _lexer_token_kind_not();
+begin
+ return 24
+end;
+
+proc _lexer_token_kind_return();
+begin
+ return 24
+end;
+
+proc _lexer_token_kind_module();
+begin
+ return 25
+end;
+
+proc _lexer_token_kind_program();
+begin
+ return 26
+end;
+
+proc _lexer_token_kind_import();
+begin
+ return 27
+end;
+
+proc _lexer_token_kind_cast();
+begin
+ return 28
+end;
+
+proc _lexer_token_kind_defer();
+begin
+ return 29
+end;
+
+proc _lexer_token_kind_case();
+begin
+ return 30
+end;
+
+proc _lexer_token_kind_of();
+begin
+ return 31
+end;
+
+proc _lexer_token_kind_trait();
+begin
+ return 32
+end;
+
+proc _lexer_token_kind_left_paren();
+begin
+ return 33
+end;
+
+proc _lexer_token_kind_right_paren();
+begin
+ return 34
+end;
+
+proc _lexer_token_kind_left_square();
+begin
+ return 35
+end;
+
+proc _lexer_token_kind_right_square();
+begin
+ return 36
+end;
+
+proc _lexer_token_kind_shift_left();
+begin
+ return 37
+end;
+
+proc _lexer_token_kind_shift_right();
+begin
+ return 38
+end;
+
+proc _lexer_token_kind_greater_equal();
+begin
+ return 39
+end;
+
+proc _lexer_token_kind_less_equal();
+begin
+ return 40
+end;
+
+proc _lexer_token_kind_greater_than();
+begin
+ return 41
+end;
+
+proc _lexer_token_kind_less_than();
+begin
+ return 42
+end;
+
+proc _lexer_token_kind_not_equal();
+begin
+ return 43
+end;
+
+proc _lexer_token_kind_equals();
+begin
+ return 44
+end;
+
+proc _lexer_token_kind_semicolon();
+begin
+ return 45
+end;
+
+proc _lexer_token_kind_dot();
+begin
+ return 46
+end;
+
+proc _lexer_token_kind_comma();
+begin
+ return 47
+end;
+
+proc _lexer_token_kind_plus();
+begin
+ return 48
+end;
+
+proc _lexer_token_kind_arrow();
+begin
+ return 49
+end;
+
+proc _lexer_token_kind_minus();
+begin
+ return 50
+end;
+
+proc _lexer_token_kind_multiplication();
+begin
+ return 51
+end;
+
+proc _lexer_token_kind_division();
+begin
+ return 52
+end;
+
+proc _lexer_token_kind_remainder();
+begin
+ return 53
+end;
+
+proc _lexer_token_kind_assignment();
+begin
+ return 54
+end;
+
+proc _lexer_token_kind_colon();
+begin
+ return 55
+end;
+
+proc _lexer_token_kind_hat();
+begin
+ return 56
+end;
+
+proc _lexer_token_kind_at();
+begin
+ return 57
+end;
+
+proc _lexer_token_kind_exclamation();
+begin
+ return 58
+end;
+
+proc _lexer_token_kind_string();
+begin
+ return 59
+end;
+
+proc _lexer_token_kind_character();
+begin
+ return 60
+end;
+
+proc _lexer_token_kind_integer();
+begin
+ return 61
+end;
+
+proc _lexer_token_kind_word();
+begin
+ return 62
+end;
+
+proc _lexer_token_kind_goto();
+begin
+ return 63
+end;
+
+proc _lexer_compare_keyword(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word);
+var
+ result: Word;
+begin
+ result := 0;
+
+ if lhs_length = rhs_length then
+ result := _memcmp(lhs_pointer, rhs_pointer, lhs_length) = 0;
+ end;
+ return result
+end;
+
+proc _lexer_classify_keyword(position_start: Word, position_end: Word);
+var
+ result: Word;
+ token_length: Word;
+begin
+ result := _lexer_token_kind_identifier();
+ token_length := position_end + -position_start;
+
+ if _lexer_compare_keyword(position_start, token_length, "const", 5) = 1 then
+ result := _lexer_token_kind_const();
+ goto .lexer_classify_keyword_end;
+ end;
+ if _lexer_compare_keyword(position_start, token_length, "var", 3) = 1 then
+ result := _lexer_token_kind_var();
+ goto .lexer_classify_keyword_end;
+ end;
+ if _lexer_compare_keyword(position_start, token_length, "proc", 4) = 1 then
+ result := _lexer_token_kind_proc();
+ goto .lexer_classify_keyword_end;
+ end;
+ if _lexer_compare_keyword(position_start, token_length, "type", 4) = 1 then
+ result := _lexer_token_kind_type();
+ goto .lexer_classify_keyword_end;
+ end;
+ if _lexer_compare_keyword(position_start, token_length, "begin", 5) = 1 then
+ result := _lexer_token_kind_begin();
+ goto .lexer_classify_keyword_end;
+ end;
+ if _lexer_compare_keyword(position_start, token_length, "end", 3) = 1 then
+ result := _lexer_token_kind_end();
+ goto .lexer_classify_keyword_end;
+ end;
+ if _lexer_compare_keyword(position_start, token_length, "return", 6) = 1 then
+ result := _lexer_token_kind_return();
+ goto .lexer_classify_keyword_end;
+ end;
+ if _lexer_compare_keyword(position_start, token_length, "goto", 4) = 1 then
+ result := _lexer_token_kind_goto();
+ goto .lexer_classify_keyword_end;
+ end;
+ if _lexer_compare_keyword(position_start, token_length, "if", 2) = 1 then
+ result := _lexer_token_kind_if();
+ goto .lexer_classify_keyword_end;
+ end;
+ if _lexer_compare_keyword(position_start, token_length, "while", 5) = 1 then
+ result := _lexer_token_kind_while();
+ goto .lexer_classify_keyword_end;
+ end;
+ if _lexer_compare_keyword(position_start, token_length, "then", 4) = 1 then
+ result := _lexer_token_kind_then();
+ goto .lexer_classify_keyword_end;
+ end;
+ if _lexer_compare_keyword(position_start, token_length, "else", 4) = 1 then
+ result := _lexer_token_kind_else();
+ goto .lexer_classify_keyword_end;
+ end;
+ if _lexer_compare_keyword(position_start, token_length, "elsif", 5) = 1 then
+ result := _lexer_token_kind_elsif();
+ goto .lexer_classify_keyword_end;
+ end;
+ .lexer_classify_keyword_end;
+ return result
+end;
+
+proc _lexer_classify_finalize(start_position: Word);
+var
+ character: Word;
+ result: Word;
+begin
+ result := 0;
+ character := _load_byte(start_position);
+
+ if character = ':' then
+ result := _lexer_token_kind_colon();
+ goto .lexer_classify_finalize_result;
+ end;
+ if character = '.' then
+ result := _lexer_token_kind_dot();
+ goto .lexer_classify_finalize_result;
+ end;
+ .lexer_classify_finalize_result;
+ return result
+end;
+
+proc _lexer_classify_single(start_position: Word);
+var
+ character: Word;
+ result: Word;
+begin
+ result := 0;
+ character := _load_byte(start_position);
+
+ if character = ';' then
+ result := _lexer_token_kind_semicolon();
+ end;
+ return result
+end;
+
+proc _lexer_execute_action(action_to_perform: Word, kind: Word);
var
pointer_start: Word;
pointer_end: Word;
position_start: Word;
position_end: Word;
+ intermediate: Word;
begin
- pointer_start := _lexer_global_end();
+ pointer_start := _lexer_global_start();
position_start := _load_word(pointer_start);
pointer_end := _lexer_global_end();
position_end := _load_word(pointer_end);
@@ -2247,34 +2752,40 @@ begin
goto .action_to_perform_end;
end;
if action_to_perform = _lexer_action_single() then
+ _store_word(position_end + 1, pointer_end);
+
+ intermediate := _lexer_classify_single(position_start);
+ _store_word(intermediate, kind);
goto .action_to_perform_end;
end;
if action_to_perform = _lexer_action_eof() then
goto .action_to_perform_end;
end;
if action_to_perform = _lexer_action_finalize() then
+ intermediate := _lexer_classify_finalize(position_start);
+ _store_word(intermediate, kind);
goto .action_to_perform_end;
end;
if action_to_perform = _lexer_action_composite() then
goto .action_to_perform_end;
end;
if action_to_perform = _lexer_action_key_id() then
- _store_word(position_end + 1, pointer_end);
+ intermediate := _lexer_classify_keyword(position_start, position_end);
+ _store_word(intermediate, kind);
goto .action_to_perform_end;
end;
if action_to_perform = _lexer_action_integer() then
- _store_word(position_end + 1, pointer_end);
goto .action_to_perform_end;
end;
if action_to_perform = _lexer_action_delimited() then
- _store_word(position_end + 1, pointer_end);
+ (* _store_word(position_end + 1, pointer_end); *)
goto .action_to_perform_end;
end;
.action_to_perform_end;
end;
-proc _lexer_execute_transition();
+proc _lexer_execute_transition(kind: Word);
var
next_transition: Word;
next_state: Word;
@@ -2288,50 +2799,60 @@ begin
global_state := _lexer_global_state();
_store_word(next_state, global_state);
- _lexer_execute_action(action_to_perform);
+ _lexer_execute_action(action_to_perform, kind);
return next_state
end;
-proc _lexer_advance_token();
-var
- executed_transition: Word;
+proc _lexer_advance_token(kind: Word);
begin
- .lexer_advance_token_loop;
- executed_transition := _lexer_execute_transition();
-
- if executed_transition <> _lexer_state_end() then
- goto .lexer_advance_token_loop;
+ if _lexer_execute_transition(kind) <> _lexer_state_end() then
+ _lexer_advance_token(kind);
end;
end;
(* Reads the next token. *)
(* Returns token length in a0. *)
-proc _lexer_read_token();
+proc _lexer_read_token(kind: Word);
var
new_position: Word;
- token_end: Word;
begin
_lexer_reset();
- _lexer_advance_token();
+ _lexer_advance_token(kind);
new_position := _lexer_global_end();
- token_end := _load_word(new_position);
- token_end := token_end + -source_code_position;
+ return _load_word(new_position) + -source_code_position
+end;
- return token_end + -1
+(* Advances the token stream past the last read token. *)
+proc _lexer_skip_token();
+var
+ new_position: Word;
+begin
+ new_position := _lexer_global_end();
+ source_code_position := _load_word(new_position);
end;
(* Entry point. *)
proc _start();
+var
+ last_read: Word;
+ offset: Wort;
begin
_lexer_initialize();
_symbol_table_build();
(* Read the source from the standard input. *)
+ offset := @source_code;
+
+ .start_read;
(* Second argument is buffer size. Modifying update the source_code definition. *)
- _read_file(@source_code, 81920);
+ last_read := _read_file(offset, 81920);
+ if last_read > 0 then
+ offset := offset + last_read;
+ goto .start_read;
+ end;
_compile();
_exit(0);