From 0cc41f2d838630f5117d57e1491ffd4a6d613832 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Tue, 23 Sep 2025 22:22:38 +0200 Subject: [PATCH] Implement elsif for if-statements --- boot/stage12.elna | 13 +- boot/stage13.elna | 855 +++++++++++++++++++++----- boot/stage14.elna | 1470 +++++++++++++++++++++++++++++---------------- 3 files changed, 1661 insertions(+), 677 deletions(-) diff --git a/boot/stage12.elna b/boot/stage12.elna index 68f5592..1cf4969 100644 --- a/boot/stage12.elna +++ b/boot/stage12.elna @@ -109,7 +109,7 @@ end; (* Returns the amount of bytes written in a0. *) proc _read_file(v88: Word, v84: Word); begin - _syscall(0, v88, v84, 0, 0, 0, 63); + return _syscall(0, v88, v84, 0, 0, 0, 63) end; (* Writes to the standard output. *) @@ -610,7 +610,7 @@ begin _compile_binary_rhs(); (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\n\0"); + _write_z("\tslt t0, t0, t1\n\0"); goto .compile_expression_end; end; @@ -1929,8 +1929,15 @@ begin _symbol_table_build(); (* Read the source from the standard input. *) + v4 := @source_code; + + .start_read; (* Second argument is buffer size. Modifying update the source_code definition. *) - _read_file(@source_code, 81920); + v0 := _read_file(v4, 81920); + if v0 > 0 then + v4 := v4 + v0; + goto .start_read; + end; _compile(); _exit(0); diff --git a/boot/stage13.elna b/boot/stage13.elna index 66f6593..925a1cd 100644 --- a/boot/stage13.elna +++ b/boot/stage13.elna @@ -5,6 +5,7 @@ (* Stage 13 compiler. *) (* - Multiline comments. *) +(* - elsif conditions. *) const symbol_builtin_name_int := "Int"; @@ -114,7 +115,7 @@ end; (* Returns the amount of bytes written in a0. *) proc _read_file(buffer: Word, size: Word); begin - _syscall(0, buffer, size, 0, 0, 0, 63); + return _syscall(0, buffer, size, 0, 0, 0, 63) end; (* Writes to the standard output. *) @@ -346,12 +347,6 @@ begin return destination end; -(* Advances the token stream by a0 bytes. *) -proc _advance_token(count: Word); -begin - source_code_position := source_code_position + count; -end; - (* Prints the current token. *) (* Parameters: *) @@ -367,12 +362,13 @@ end; proc _compile_integer_literal(); var integer_token: Word; + token_kind: Word; begin _write_z("\tli t0, \0"); - integer_token := _lexer_read_token(); + integer_token := _lexer_read_token(@token_kind); _write_token(integer_token); - _advance_token(integer_token); + _lexer_skip_token(); _write_c('\n'); end; @@ -382,16 +378,16 @@ var character: Word; begin _write_z("\tli t0, '\0"); - _advance_token(1); + source_code_position := source_code_position + 1; character := _load_byte(source_code_position); if character = '\\' then _write_c('\\'); - _advance_token(1); + source_code_position := source_code_position + 1; end; _write_s(source_code_position, 1); _write_s("'\n", 2); - _advance_token(2); + source_code_position := source_code_position + 2; end; proc _compile_variable_expression(); @@ -403,14 +399,14 @@ end; proc _compile_address_expression(); begin (* Skip the "@" sign. *) - _advance_token(1); + source_code_position := source_code_position + 1; _compile_designator(); end; proc _compile_negate_expression(); begin (* Skip the "-" sign. *) - _advance_token(1); + source_code_position := source_code_position + 1; _compile_term(); _write_z("\tneg t0, t0\n\0"); @@ -419,7 +415,7 @@ end; proc _compile_not_expression(); begin (* Skip the "~" sign. *) - _advance_token(1); + source_code_position := source_code_position + 1; _compile_term(); _write_z("\tnot t0, t0\n\0"); @@ -433,7 +429,8 @@ begin length := _string_length(source_code_position); offset := _add_string(source_code_position); - _advance_token(length + 2); + source_code_position := source_code_position + length; + source_code_position := source_code_position + 2; _write_z("\tla t0, strings\n\0"); _write_z("\tli t1, \0"); @@ -479,7 +476,7 @@ end; proc _compile_binary_rhs(); begin (* Skip the whitespace after the binary operator. *) - _advance_token(1); + source_code_position := source_code_position + 1; _compile_term(); (* Load the left expression from the stack; *) @@ -502,11 +499,11 @@ begin _write_z("sw t0, 64(sp)\n\0"); (* Skip surrounding whitespace in front of the operator. *) - _advance_token(1); + source_code_position := source_code_position + 1; current_character := _load_byte(source_code_position); if current_character = '+' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -515,7 +512,7 @@ begin goto .compile_expression_end; end; if current_character = '*' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -524,7 +521,7 @@ begin goto .compile_expression_end; end; if current_character = '&' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -533,7 +530,7 @@ begin goto .compile_expression_end; end; if current_character = 'o' then - _advance_token(2); + source_code_position := source_code_position + 2; _compile_binary_rhs(); (* Execute the operation. *) @@ -542,7 +539,7 @@ begin goto .compile_expression_end; end; if current_character = 'x' then - _advance_token(3); + source_code_position := source_code_position + 3; _compile_binary_rhs(); (* Execute the operation. *) @@ -551,7 +548,7 @@ begin goto .compile_expression_end; end; if current_character = '=' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -560,7 +557,7 @@ begin goto .compile_expression_end; end; if current_character = '%' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -569,7 +566,7 @@ begin goto .compile_expression_end; end; if current_character = '/' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -578,11 +575,11 @@ begin goto .compile_expression_end; end; if current_character = '<' then - _advance_token(1); + source_code_position := source_code_position + 1; current_character := _load_byte(source_code_position); if current_character = '>' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -591,7 +588,7 @@ begin goto .compile_expression_end; end; if current_character = '=' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -607,10 +604,10 @@ begin goto .compile_expression_end; end; if current_character = '>' then - _advance_token(1); + source_code_position := source_code_position + 1; current_character := _load_byte(source_code_position); if current_character = '=' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -621,7 +618,7 @@ begin _compile_binary_rhs(); (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\n\0"); + _write_z("\tslt t0, t0, t1\n\0"); goto .compile_expression_end; end; @@ -635,16 +632,21 @@ var name: Word; argument_count: Word; stack_offset: Word; + token_kind: Word; begin - name_length := _lexer_read_token(); - name := source_code_position; + name_length := _lexer_read_token(@token_kind); + name := _lexer_global_start(); + name := _load_word(name); + name_length := _lexer_global_end(); + name_length := _load_word(name_length) + -name; argument_count := 0; (* Skip the identifier and left paren. *) - _advance_token(name_length + 1); + _lexer_skip_token(); + source_code_position := source_code_position + 1; if _load_byte(source_code_position) = ')' then - goto .compile_call_finalize + goto .compile_call_finalize; end; .compile_call_loop; _compile_expression(); @@ -664,7 +666,7 @@ begin if _load_byte(source_code_position) <> ',' then goto .compile_call_finalize; end; - _advance_token(2); + source_code_position := source_code_position + 2; goto .compile_call_loop; .compile_call_finalize; @@ -692,43 +694,47 @@ begin _write_s(name, name_length); (* Skip the right paren. *) - _advance_token(1); + source_code_position := source_code_position + 1; end; proc _compile_goto(); var next_token: Word; + token_kind: Word; begin - _advance_token(6); + _lexer_read_token(@token_kind); + _lexer_skip_token(); - next_token := _lexer_read_token(); + source_code_position := source_code_position + 2; + + next_token := _lexer_read_token(@token_kind); _write_z("\tj .\0"); _write_token(next_token); - _advance_token(next_token); + _lexer_skip_token(); end; -proc _compile_local_designator(symbol: Word, name_length: Word); +proc _compile_local_designator(symbol: Word); var variable_offset: Word; begin - _write_z("\taddi t0, sp, \0"); variable_offset := _parameter_info_get_offset(symbol); _write_i(variable_offset); _write_c('\n'); - _advance_token(name_length); + _lexer_skip_token(); end; proc _compile_global_designator(); var name: Word; + token_kind: Word; begin _write_z("\tla t0, \0"); - name := _lexer_read_token(); + name := _lexer_read_token(@token_kind); _write_token(name); - _advance_token(name); + _lexer_skip_token(); _write_c('\n'); end; @@ -737,12 +743,18 @@ proc _compile_designator(); var name_token: Word; lookup_result: Word; + token_kind: Word; + name: Word; begin - name_token := _lexer_read_token(); - lookup_result := _symbol_table_lookup(@symbol_table_local, source_code_position, name_token); + name_token := _lexer_read_token(@token_kind); + name := _lexer_global_start(); + name := _load_word(name); + name_token := _lexer_global_end(); + name_token := _load_word(name_token) + -name; + lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token); if lookup_result <> 0 then - _compile_local_designator(lookup_result, name_token); + _compile_local_designator(lookup_result); goto .compile_designator_end; end; _compile_global_designator(); @@ -758,7 +770,7 @@ begin _write_z("\tsw t0, 60(sp)\n\0"); (* Skip the assignment sign (:=) with surrounding whitespaces. *) - _advance_token(4); + source_code_position := source_code_position + 4; (* Compile the assignment. *) _compile_expression(); @@ -767,9 +779,13 @@ begin end; proc _compile_return_statement(); +var + token_kind: Word; begin (* Skip "return" keyword and whitespace after it. *) - _advance_token(7); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + source_code_position := source_code_position + 1; _compile_expression(); _write_z("\tmv a0, t0\n\0"); @@ -789,13 +805,18 @@ proc _compile_if(); var after_end_label: Word; condition_label: Word; + token_kind: Word; begin (* Skip "if ". *) - _advance_token(3); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + source_code_position := source_code_position + 1; + (* Compile condition. *) _compile_expression(); (* Skip " then" with newline. *) - _advance_token(6); + _lexer_read_token(@token_kind); + _lexer_skip_token(); after_end_label := label_counter; label_counter := label_counter + 1; @@ -817,20 +838,53 @@ begin _write_label(condition_label); _write_z(":\n\0"); - if _memcmp(source_code_position, "end", 3) = 0 then + .compile_if_loop; + + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_end() then goto .compile_if_end; end; - if _memcmp(source_code_position, "else", 3) = 0 then - goto .compile_if_else + if token_kind = _lexer_token_kind_else() then + goto .compile_if_else; end; + if token_kind = _lexer_token_kind_elsif() then + goto .compile_if_elsif; + end; + .compile_if_elsif; + _lexer_skip_token(); + source_code_position := source_code_position + 1; + + (* Compile condition. *) + _compile_expression(); + (* Skip " then" with newline. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + (* condition_label is the label in front of the next elsif condition or end. *) + condition_label := label_counter; + label_counter := label_counter + 1; + + _write_z("\tbeqz t0, \0"); + _write_label(condition_label); + _write_c('\n'); + + _compile_procedure_body(); + + _write_z("\tj \0"); + _write_label(after_end_label); + _write_c('\n'); + + _write_label(condition_label); + _write_z(":\n\0"); + + goto .compile_if_loop; + .compile_if_else; - (* Skip "else" and newline. *) - _advance_token(5); + _lexer_skip_token(); _compile_procedure_body(); .compile_if_end; - (* Skip "end". *) - _advance_token(3); + _lexer_skip_token(); _write_label(after_end_label); _write_z(":\n\0"); @@ -839,74 +893,77 @@ end; proc _compile_label_declaration(); var label_token: Word; + token_kind: Word; + name: Word; begin (* Skip the dot. *) - _advance_token(1); - label_token := _lexer_read_token(); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + label_token := _lexer_read_token(@token_kind); + name := _lexer_global_start(); + name := _load_word(name); _write_c('.'); - _write_s(source_code_position, label_token); + _write_s(name, label_token); _write_z(":\n\0"); - _advance_token(label_token); + _lexer_skip_token(); end; proc _compile_statement(); var current_byte: Word; + token_kind: Word; begin - _skip_spaces(); - current_byte := _load_byte(source_code_position); + _lexer_read_token(@token_kind); - (* This is a call if the statement starts with an underscore. *) - if current_byte = '_' then - _compile_call(); - goto .compile_statement_semicolon; - end; - if _memcmp(source_code_position, "goto ", 5) = 0 then + if token_kind = _lexer_token_kind_goto() then _compile_goto(); goto .compile_statement_semicolon; end; - if _memcmp(source_code_position, "if ", 3) = 0 then + if token_kind = _lexer_token_kind_if() then _compile_if(); goto .compile_statement_semicolon; end; - if _memcmp(source_code_position, "return ", 7) = 0 then + if token_kind = _lexer_token_kind_return() then _compile_return_statement(); - _write_c('\n'); - - goto .compile_statement_end; - end; - if current_byte = '.' then - _compile_label_declaration(); - goto .compile_statement_semicolon; end; - _compile_assignment(); - goto .compile_statement_semicolon; + if token_kind = _lexer_token_kind_dot() then + _compile_label_declaration(); + goto .compile_statement_semicolon; + end; + if token_kind = _lexer_token_kind_identifier() then + current_byte := _lexer_global_start(); + current_byte := _load_word(current_byte); + current_byte := _load_byte(current_byte); + + (* This is a call if the statement starts with an underscore. *) + if current_byte = '_' then + _compile_call(); + else + _compile_assignment(); + end; + goto .compile_statement_semicolon; + end; .compile_statement_semicolon; - _advance_token(2); _write_c('\n'); - - .compile_statement_end; end; proc _compile_procedure_body(); var - lhs: Word; - rhs: Word; + token_kind: Word; begin .compile_procedure_body_loop; + _skip_empty_lines(); - _skip_spaces(); + _compile_statement(); + _lexer_read_token(@token_kind); - lhs := _memcmp(source_code_position, "end", 3) = 0; - rhs := _memcmp(source_code_position, "else", 4) = 0; - lhs := lhs or rhs; - - if lhs = 0 then - _compile_statement(); + if token_kind = _lexer_token_kind_semicolon() then + _lexer_skip_token(); goto .compile_procedure_body_loop; end; + _skip_empty_lines(); end; (* Writes a regster name to the standard output. *) @@ -927,7 +984,7 @@ var begin current_byte := _load_byte(source_code_position); if current_byte = '\t' then - _advance_token(1); + source_code_position := source_code_position + 1; _skip_spaces(); end; end; @@ -935,9 +992,10 @@ end; proc _read_type_expression(); var type_name: Word; + token_kind: Word; begin - type_name := _lexer_read_token(); - _advance_token(type_name); + type_name := _lexer_read_token(@token_kind); + _lexer_skip_token(); end; (* Parameters: *) @@ -1009,14 +1067,15 @@ var name_length: Word; info: Word; name_position: Word; + token_kind: Word; begin (* Read the parameter name. *) name_position := source_code_position; - name_length := _lexer_read_token(); - _advance_token(name_length); + name_length := _lexer_read_token(@token_kind); + _lexer_skip_token(); (* Skip colon and space in front of the type expression. *) - _advance_token(2); + source_code_position := source_code_position + 2; _read_type_expression(); @@ -1038,7 +1097,7 @@ var parameter_counter: Word; begin (* Skip open paren. *) - _advance_token(1); + source_code_position := source_code_position + 1; parameter_counter := 0; .compile_procedure_prologue_skip; @@ -1047,12 +1106,12 @@ begin parameter_counter := parameter_counter + 1; if _load_byte(source_code_position) = ',' then - _advance_token(2); + source_code_position := source_code_position + 2; goto .compile_procedure_prologue_skip; end; end; (* Skip close paren. *) - _advance_token(1); + source_code_position := source_code_position + 1; end; (* Parameters: *) @@ -1062,13 +1121,15 @@ var name_length: Word; info: Word; name_position: Word; + token_kind: Word; begin _skip_spaces(); name_position := source_code_position; (* Read and skip variable name, colon and the space *) - name_length := _lexer_read_token(); - _advance_token(name_length + 2); + name_length := _lexer_read_token(@token_kind); + _lexer_skip_token(name_length); + source_code_position := source_code_position + 2; _read_type_expression(); @@ -1076,7 +1137,7 @@ begin _symbol_table_enter(@symbol_table_local, name_position, name_length, info); (* Skip semicolon and newline after the variable declaration *) - _advance_token(2); + source_code_position := source_code_position + 2; end; proc _read_procedure_temporaries(); @@ -1086,7 +1147,7 @@ begin if _memcmp(source_code_position, "var", 3) <> 0 then goto .read_local_variables_end; end; - _advance_token(4); + source_code_position := source_code_position + 4; temporary_counter := 0; .read_local_variables_loop; @@ -1104,13 +1165,14 @@ end; proc _compile_procedure(); var name_length: Word; + token_kind: Word; begin (* Skip "proc ". *) - _advance_token(5); + source_code_position := source_code_position + 5; (* Clear local symbol table. *) _store_word(0, @symbol_table_local); - name_length := _lexer_read_token(); + name_length := _lexer_read_token(@token_kind); (* Write .type _procedure_name, @function. *) _write_z(".type \0"); @@ -1123,16 +1185,16 @@ begin _write_z(":\n\0"); (* Skip procedure name. *) - _advance_token(name_length); + _lexer_skip_token(); _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); _read_procedure_parameters(); (* Skip semicolon and newline. *) - _advance_token(2); + source_code_position := source_code_position + 2; _read_procedure_temporaries(); (* Skip semicolon, "begin" and newline. *) - _advance_token(6); + source_code_position := source_code_position + 6; _compile_procedure_body(); @@ -1140,16 +1202,17 @@ begin _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); (* Skip the "end" keyword, semicolon and newline. *) - _advance_token(5); + source_code_position := source_code_position + 5; end; (* Prints and skips a line. *) proc _skip_comment(); var - new_position: Word; + token_kind: Word; begin - new_position := _lexer_read_token(); - _advance_token(new_position + 1); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + source_code_position := source_code_position + 1; end; (* Skip newlines and comments. *) @@ -1176,7 +1239,7 @@ begin current_byte := _load_byte(current_position + 1); if current_byte = '*' then - goto .skip_empty_lines_comment + goto .skip_empty_lines_comment; end; goto .skip_empty_lines_end; @@ -1191,7 +1254,7 @@ begin .skip_empty_lines_tab; current_position := current_position + 1; - goto .skip_empty_lines_loop + goto .skip_empty_lines_loop; .skip_empty_lines_end; end; @@ -1200,6 +1263,7 @@ proc _compile_global_initializer(); var current_byte: Word; length: Word; + token_kind: Word; begin current_byte := _load_byte(source_code_position); @@ -1211,13 +1275,14 @@ begin _write_i(); (* Skip the quoted string. *) - _advance_token(length + 2); + source_code_position := source_code_position + length; + source_code_position := source_code_position + 2; goto .compile_global_initializer_end; end; if current_byte = 'S' then (* Skip "S(". *) - _advance_token(2); + source_code_position := source_code_position + 2; if _load_byte(source_code_position) = ')' then goto .compile_global_initializer_closing; @@ -1226,19 +1291,19 @@ begin end; if current_byte = '@' then (* Skip @. *) - _advance_token(1); + source_code_position := source_code_position + 1; _write_z("\n\t.word \0"); - current_byte := _lexer_read_token(); + current_byte := _lexer_read_token(@token_kind); _write_token(current_byte); - _advance_token(current_byte); + _lexer_skip_token(); goto .compile_global_initializer_end; end; if _is_digit(current_byte) = 1 then _write_z("\n\t.word \0"); - current_byte := _lexer_read_token(); + current_byte := _lexer_read_token(@token_kind); _write_token(current_byte); - _advance_token(1); + source_code_position := source_code_position + 1; goto .compile_global_initializer_end; end; @@ -1248,14 +1313,14 @@ begin if _load_byte(source_code_position) <> ')' then (* Skip comma and whitespace after it. *) - _advance_token(2); + source_code_position := source_code_position + 2; goto .compile_global_initializer_loop; end; .compile_global_initializer_closing; (* Skip ")" *) - _advance_token(1); + source_code_position := source_code_position + 1; goto .compile_global_initializer_end; @@ -1265,8 +1330,9 @@ end; proc _compile_constant_declaration(); var name_length: Word; + token_kind: Word; begin - name_length := _lexer_read_token(); + name_length := _lexer_read_token(@token_kind); _write_z(".type \0"); _write_token(name_length); @@ -1276,22 +1342,26 @@ begin _write_c(':'); (* Skip the constant name with assignment sign and surrounding whitespaces. *) - _advance_token(name_length + 4); + _lexer_skip_token(); + source_code_position := source_code_position + 4; _compile_global_initializer(); (* Skip semicolon and newline. *) - _advance_token(2); + source_code_position := source_code_position + 2; _write_c('\n'); end; proc _compile_const_part(); +var + token_kind: Word; begin _skip_empty_lines(); + _lexer_read_token(@token_kind); - if _memcmp(source_code_position, "const\0", 5) <> 0 then + if token_kind <> _lexer_token_kind_const() then goto .compile_const_part_end; end; (* Skip "const" with the newline after it. *) - _advance_token(6); + _lexer_skip_token(); _write_z(".section .rodata # Compiled from const section.\n\n\0"); .compile_const_part_loop; @@ -1300,8 +1370,7 @@ begin (* If the character at the line beginning is not indentation, *) (* it is probably the next code section. *) if _load_byte(source_code_position) = '\t' then - _advance_token(1); - + source_code_position := source_code_position + 1; _compile_constant_declaration(); goto .compile_const_part_loop; end; @@ -1312,8 +1381,9 @@ end; proc _compile_variable_declaration(); var name_length: Word; + token_kind: Word; begin - name_length := _lexer_read_token(); + name_length := _lexer_read_token(@token_kind); _write_z(".type \0"); _write_token(name_length); @@ -1323,7 +1393,9 @@ begin _write_c(':'); (* Skip the variable name and colon with space before the type. *) - _advance_token(name_length + 2); + _lexer_skip_token(); + _lexer_read_token(@token_kind); + _lexer_skip_token(); _read_type_expression(); if _load_byte(source_code_position) <> ' ' then @@ -1331,32 +1403,34 @@ begin _write_z(" .zero 81920\0"); else (* Skip the assignment sign with surrounding whitespaces. *) - _advance_token(4); + source_code_position := source_code_position + 4; _compile_global_initializer(); end; (* Skip semicolon and newline. *) - _advance_token(2); + _lexer_read_token(@token_kind); + _lexer_skip_token(); _write_c('\n'); end; proc _compile_var_part(); var - current_character: Word; + token_kind: Word; begin - if _memcmp(source_code_position, "var\0", 3) <> 0 then + _lexer_read_token(@token_kind); + + if token_kind <> _lexer_token_kind_var() then goto .compile_var_part_end; end; (* Skip "var" and newline. *) - _advance_token(4); + _lexer_skip_token(); _write_z(".section .data\n\0"); .compile_var_part_loop; _skip_empty_lines(); - current_character := _load_byte(source_code_position); + _lexer_read_token(@token_kind); - if current_character = '\t' then - _advance_token(1); + if token_kind = _lexer_token_kind_identifier() then _compile_variable_declaration(); goto .compile_var_part_loop; end; @@ -2222,14 +2296,445 @@ begin return _lexer_get_transition(current_state, character_class) end; -proc _lexer_execute_action(action_to_perform: Word); +proc _lexer_token_kind_identifier(); +begin + return 1 +end; + +proc _lexer_token_kind_const(); +begin + return 2 +end; + +proc _lexer_token_kind_var(); +begin + return 3 +end; + +proc _lexer_token_kind_proc(); +begin + return 4 +end; + +proc _lexer_token_kind_type(); +begin + return 5 +end; + +proc _lexer_token_kind_begin(); +begin + return 6 +end; + +proc _lexer_token_kind_end(); +begin + return 7 +end; + +proc _lexer_token_kind_if(); +begin + return 8 +end; + +proc _lexer_token_kind_then(); +begin + return 9 +end; + +proc _lexer_token_kind_else(); +begin + return 10 +end; + +proc _lexer_token_kind_elsif(); +begin + return 11 +end; + +proc _lexer_token_kind_while(); +begin + return 12 +end; + +proc _lexer_token_kind_do(); +begin + return 13 +end; + +proc _lexer_token_kind_extern(); +begin + return 14 +end; + +proc _lexer_token_kind_record(); +begin + return 15 +end; + +proc _lexer_token_kind_union(); +begin + return 16 +end; + +proc _lexer_token_kind_true(); +begin + return 17 +end; + +proc _lexer_token_kind_false(); +begin + return 18 +end; + +proc _lexer_token_kind_nil(); +begin + return 19 +end; + +proc _lexer_token_kind_and(); +begin + return 20 +end; + +proc _lexer_token_kind_or(); +begin + return 21 +end; + +proc _lexer_token_kind_xor(); +begin + return 22 +end; + +proc _lexer_token_kind_pipe(); +begin + return 23 +end; + +proc _lexer_token_kind_not(); +begin + return 24 +end; + +proc _lexer_token_kind_return(); +begin + return 24 +end; + +proc _lexer_token_kind_module(); +begin + return 25 +end; + +proc _lexer_token_kind_program(); +begin + return 26 +end; + +proc _lexer_token_kind_import(); +begin + return 27 +end; + +proc _lexer_token_kind_cast(); +begin + return 28 +end; + +proc _lexer_token_kind_defer(); +begin + return 29 +end; + +proc _lexer_token_kind_case(); +begin + return 30 +end; + +proc _lexer_token_kind_of(); +begin + return 31 +end; + +proc _lexer_token_kind_trait(); +begin + return 32 +end; + +proc _lexer_token_kind_left_paren(); +begin + return 33 +end; + +proc _lexer_token_kind_right_paren(); +begin + return 34 +end; + +proc _lexer_token_kind_left_square(); +begin + return 35 +end; + +proc _lexer_token_kind_right_square(); +begin + return 36 +end; + +proc _lexer_token_kind_shift_left(); +begin + return 37 +end; + +proc _lexer_token_kind_shift_right(); +begin + return 38 +end; + +proc _lexer_token_kind_greater_equal(); +begin + return 39 +end; + +proc _lexer_token_kind_less_equal(); +begin + return 40 +end; + +proc _lexer_token_kind_greater_than(); +begin + return 41 +end; + +proc _lexer_token_kind_less_than(); +begin + return 42 +end; + +proc _lexer_token_kind_not_equal(); +begin + return 43 +end; + +proc _lexer_token_kind_equals(); +begin + return 44 +end; + +proc _lexer_token_kind_semicolon(); +begin + return 45 +end; + +proc _lexer_token_kind_dot(); +begin + return 46 +end; + +proc _lexer_token_kind_comma(); +begin + return 47 +end; + +proc _lexer_token_kind_plus(); +begin + return 48 +end; + +proc _lexer_token_kind_arrow(); +begin + return 49 +end; + +proc _lexer_token_kind_minus(); +begin + return 50 +end; + +proc _lexer_token_kind_multiplication(); +begin + return 51 +end; + +proc _lexer_token_kind_division(); +begin + return 52 +end; + +proc _lexer_token_kind_remainder(); +begin + return 53 +end; + +proc _lexer_token_kind_assignment(); +begin + return 54 +end; + +proc _lexer_token_kind_colon(); +begin + return 55 +end; + +proc _lexer_token_kind_hat(); +begin + return 56 +end; + +proc _lexer_token_kind_at(); +begin + return 57 +end; + +proc _lexer_token_kind_exclamation(); +begin + return 58 +end; + +proc _lexer_token_kind_string(); +begin + return 59 +end; + +proc _lexer_token_kind_character(); +begin + return 60 +end; + +proc _lexer_token_kind_integer(); +begin + return 61 +end; + +proc _lexer_token_kind_word(); +begin + return 62 +end; + +proc _lexer_token_kind_goto(); +begin + return 63 +end; + +proc _lexer_compare_keyword(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word); +var + result: Word; +begin + result := 0; + + if lhs_length = rhs_length then + result := _memcmp(lhs_pointer, rhs_pointer, lhs_length) = 0; + end; + return result +end; + +proc _lexer_classify_keyword(position_start: Word, position_end: Word); +var + result: Word; + token_length: Word; +begin + result := _lexer_token_kind_identifier(); + token_length := position_end + -position_start; + + if _lexer_compare_keyword(position_start, token_length, "const", 5) = 1 then + result := _lexer_token_kind_const(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "var", 3) = 1 then + result := _lexer_token_kind_var(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "proc", 4) = 1 then + result := _lexer_token_kind_proc(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "type", 4) = 1 then + result := _lexer_token_kind_type(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "begin", 5) = 1 then + result := _lexer_token_kind_begin(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "end", 3) = 1 then + result := _lexer_token_kind_end(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "return", 6) = 1 then + result := _lexer_token_kind_return(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "goto", 4) = 1 then + result := _lexer_token_kind_goto(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "if", 2) = 1 then + result := _lexer_token_kind_if(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "while", 5) = 1 then + result := _lexer_token_kind_while(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "then", 4) = 1 then + result := _lexer_token_kind_then(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "else", 4) = 1 then + result := _lexer_token_kind_else(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "elsif", 5) = 1 then + result := _lexer_token_kind_elsif(); + goto .lexer_classify_keyword_end; + end; + .lexer_classify_keyword_end; + return result +end; + +proc _lexer_classify_finalize(start_position: Word); +var + character: Word; + result: Word; +begin + result := 0; + character := _load_byte(start_position); + + if character = ':' then + result := _lexer_token_kind_colon(); + goto .lexer_classify_finalize_result; + end; + if character = '.' then + result := _lexer_token_kind_dot(); + goto .lexer_classify_finalize_result; + end; + .lexer_classify_finalize_result; + return result +end; + +proc _lexer_classify_single(start_position: Word); +var + character: Word; + result: Word; +begin + result := 0; + character := _load_byte(start_position); + + if character = ';' then + result := _lexer_token_kind_semicolon(); + end; + return result +end; + +proc _lexer_execute_action(action_to_perform: Word, kind: Word); var pointer_start: Word; pointer_end: Word; position_start: Word; position_end: Word; + intermediate: Word; begin - pointer_start := _lexer_global_end(); + pointer_start := _lexer_global_start(); position_start := _load_word(pointer_start); pointer_end := _lexer_global_end(); position_end := _load_word(pointer_end); @@ -2247,34 +2752,40 @@ begin goto .action_to_perform_end; end; if action_to_perform = _lexer_action_single() then + _store_word(position_end + 1, pointer_end); + + intermediate := _lexer_classify_single(position_start); + _store_word(intermediate, kind); goto .action_to_perform_end; end; if action_to_perform = _lexer_action_eof() then goto .action_to_perform_end; end; if action_to_perform = _lexer_action_finalize() then + intermediate := _lexer_classify_finalize(position_start); + _store_word(intermediate, kind); goto .action_to_perform_end; end; if action_to_perform = _lexer_action_composite() then goto .action_to_perform_end; end; if action_to_perform = _lexer_action_key_id() then - _store_word(position_end + 1, pointer_end); + intermediate := _lexer_classify_keyword(position_start, position_end); + _store_word(intermediate, kind); goto .action_to_perform_end; end; if action_to_perform = _lexer_action_integer() then - _store_word(position_end + 1, pointer_end); goto .action_to_perform_end; end; if action_to_perform = _lexer_action_delimited() then - _store_word(position_end + 1, pointer_end); + (* _store_word(position_end + 1, pointer_end); *) goto .action_to_perform_end; end; .action_to_perform_end; end; -proc _lexer_execute_transition(); +proc _lexer_execute_transition(kind: Word); var next_transition: Word; next_state: Word; @@ -2288,50 +2799,60 @@ begin global_state := _lexer_global_state(); _store_word(next_state, global_state); - _lexer_execute_action(action_to_perform); + _lexer_execute_action(action_to_perform, kind); return next_state end; -proc _lexer_advance_token(); -var - executed_transition: Word; +proc _lexer_advance_token(kind: Word); begin - .lexer_advance_token_loop; - executed_transition := _lexer_execute_transition(); - - if executed_transition <> _lexer_state_end() then - goto .lexer_advance_token_loop; + if _lexer_execute_transition(kind) <> _lexer_state_end() then + _lexer_advance_token(kind); end; end; (* Reads the next token. *) (* Returns token length in a0. *) -proc _lexer_read_token(); +proc _lexer_read_token(kind: Word); var new_position: Word; - token_end: Word; begin _lexer_reset(); - _lexer_advance_token(); + _lexer_advance_token(kind); new_position := _lexer_global_end(); - token_end := _load_word(new_position); - token_end := token_end + -source_code_position; + return _load_word(new_position) + -source_code_position +end; - return token_end + -1 +(* Advances the token stream past the last read token. *) +proc _lexer_skip_token(); +var + new_position: Word; +begin + new_position := _lexer_global_end(); + source_code_position := _load_word(new_position); end; (* Entry point. *) proc _start(); +var + last_read: Word; + offset: Wort; begin _lexer_initialize(); _symbol_table_build(); (* Read the source from the standard input. *) + offset := @source_code; + + .start_read; (* Second argument is buffer size. Modifying update the source_code definition. *) - _read_file(@source_code, 81920); + last_read := _read_file(offset, 81920); + if last_read > 0 then + offset := offset + last_read; + goto .start_read; + end; _compile(); _exit(0); diff --git a/boot/stage14.elna b/boot/stage14.elna index 66f6593..fd3d2d3 100644 --- a/boot/stage14.elna +++ b/boot/stage14.elna @@ -1,10 +1,10 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, *) -(* v. 2.0. If a copy of the MPL was not distributed with this file, You can *) -(* obtain one at https://mozilla.org/MPL/2.0/. *) +(* + * This Source Code Form is subject to the terms of the Mozilla Public License, + * v. 2.0. If a copy of the MPL was not distributed with this file, You can + * obtain one at https://mozilla.org/MPL/2.0/. + *) -(* Stage 13 compiler. *) - -(* - Multiline comments. *) +(* Stage 14 compiler. *) const symbol_builtin_name_int := "Int"; @@ -13,24 +13,26 @@ const symbol_builtin_name_char := "Char"; symbol_builtin_name_bool := "Bool"; - (* Every type info starts with a word describing what type it is. *) + (* Every type info starts with a word describing what type it is. - (* PRIMITIVE_TYPE = 1 *) + PRIMITIVE_TYPE = 1 + + Primitive types have only type size. *) - (* Primitive types have only type size. *) symbol_builtin_type_int := S(1, 4); symbol_builtin_type_word := S(1, 4); symbol_builtin_type_pointer := S(1, 4); symbol_builtin_type_char := S(1, 1); symbol_builtin_type_bool := S(1, 1); - (* Info objects start with a word describing its type. *) + (* Info objects start with a word describing its type. - (* INFO_TYPE = 1 *) - (* INFO_PARAMETER = 2 *) - (* INFO_TEMPORARY = 3 *) + INFO_TYPE = 1 + INFO_PARAMETER = 2 + INFO_TEMPORARY = 3 + + Type info has the type it belongs to. *) - (* Type info has the type it belongs to. *) symbol_type_info_int := S(1, @symbol_builtin_type_int); symbol_type_info_word := S(1, @symbol_builtin_type_word); symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); @@ -51,13 +53,15 @@ var source_code_position: Pointer := @source_code; memory_free_pointer: Word := @memory; -(* Calculates and returns the string token length between quotes, including the *) -(* escaping slash characters. *) - -(* Parameters: *) -(* string - String token pointer. *) - -(* Returns the length in a0. *) +(** + * Calculates and returns the string token length between quotes, including the + * escaping slash characters. + * + * Parameters: + * string - String token pointer. + * + * Returns the length in a0. + *) proc _string_length(string: Word); var counter: Word; @@ -76,12 +80,14 @@ begin return counter end; -(* Adds a string to the global, read-only string storage. *) - -(* Parameters: *) -(* string - String token. *) - -(* Returns the offset from the beginning of the storage to the new string in a0. *) +(** + * Adds a string to the global, read-only string storage. + * + * Parameters: + * string - String token. + * + * Returns the offset from the beginning of the storage to the new string in a0. + *) proc _add_string(string: Word); var contents: Word; @@ -107,33 +113,41 @@ begin return result end; -(* Reads standard input into a buffer. *) -(* buffer - Buffer pointer. *) -(* size - Buffer size. *) - -(* Returns the amount of bytes written in a0. *) +(** + * Reads standard input into a buffer. + * + * Parameters: + * buffer - Buffer pointer. + * size - Buffer size. + * + * Returns the amount of bytes written in a0. + *) proc _read_file(buffer: Word, size: Word); begin - _syscall(0, buffer, size, 0, 0, 0, 63); + return _syscall(0, buffer, size, 0, 0, 0, 63) end; -(* Writes to the standard output. *) - -(* Parameters: *) -(* buffer - Buffer. *) -(* size - Buffer length. *) +(** + * Writes to the standard output. + * + * Parameters: + * buffer - Buffer. + * size - Buffer length. + *) proc _write_s(buffer: Word, size: Word); begin _syscall(1, buffer, size, 0, 0, 0, 64); end; -(* Writes a number to a string buffer. *) - -(* Parameters: *) -(* number - Whole number. *) -(* output_buffer - Buffer pointer. *) - -(* Sets a0 to the length of the written number. *) +(** + * Writes a number to a string buffer. + * + * Parameters: + * number - Whole number. + * output_buffer - Buffer pointer. + * + * Sets a0 to the length of the written number. + *) proc _print_i(number: Word, output_buffer: Word); var local_buffer: Word; @@ -159,8 +173,7 @@ begin if number <> 0 then goto .print_i_digit10; - end; - if is_negative = 1 then + elsif is_negative = 1 then _store_byte('-', local_buffer); local_buffer := local_buffer + -1; end; @@ -171,10 +184,12 @@ begin return result end; -(* Writes a number to the standard output. *) - -(* Parameters: *) -(* number - Whole number. *) +(** + * Writes a number to the standard output. + * + * Parameters: + * number - Whole number. + *) proc _write_i(number: Word); var local_buffer: Word; @@ -184,19 +199,23 @@ begin _write_s(@local_buffer, length); end; -(* Writes a character from a0 into the standard output. *) - -(* Parameters: *) -(* character - Character to write. *) +(** + * Writes a character from a0 into the standard output. + * + * Parameters: + * character - Character to write. + *) proc _write_c(character: Word); begin _write_s(@character, 1); end; -(* Write null terminated string. *) - -(* Parameters: *) -(* string - String. *) +(** + * Write null terminated string. + * + * Parameters: + * string - String. + *) proc _write_z(string: Word); var next_byte: Word; @@ -238,12 +257,14 @@ begin return lhs & rhs end; -(* Detects if the passed character is a 7-bit alpha character or an underscore. *) - -(* Paramters: *) -(* character - Tested character. *) - -(* Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. *) +(** + * Detects if the passed character is a 7-bit alpha character or an underscore. + * + * Paramters: + * character - Tested character. + * + * Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. + *) proc _is_alpha(character: Word); var is_upper_result: Word; @@ -259,13 +280,15 @@ begin return is_alpha_result or is_underscore end; -(* Detects whether the passed character is a digit *) -(* (a value between 0 and 9). *) - -(* Parameters: *) -(* character - Exemined value. *) - -(* Sets a0 to 1 if it is a digit, to 0 otherwise. *) +(** + * Detects whether the passed character is a digit + * (a value between 0 and 9). + * + * Parameters: + * character - Exemined value. + * + * Sets a0 to 1 if it is a digit, to 0 otherwise. + *) proc _is_digit(character: Word); var lhs: Word; @@ -288,12 +311,14 @@ begin return lhs or rhs end; -(* Parameters: *) -(* lhs - First pointer. *) -(* rhs - Second pointer. *) -(* count - The length to compare. *) - -(* Returns 0 if memory regions are equal. *) +(** + * Parameters: + * lhs - First pointer. + * rhs - Second pointer. + * count - The length to compare. + * + * Returns 0 if memory regions are equal. + *) proc _memcmp(lhs: Word, rhs: Word, count: Word); var lhs_byte: Word; @@ -320,14 +345,16 @@ begin return result end; -(* Copies memory. *) - -(* Parameters: *) -(* destination - Destination. *) -(* source - Source. *) -(* count - Size. *) - -(* Returns the destination. *) +(** + * Copies memory. + * + * Parameters: + * destination - Destination. + * source - Source. + * count - Size. + * + * Returns the destination. + *) proc _memcpy(destination: Word, source: Word, count: Word); var current_byte: Word; @@ -346,18 +373,14 @@ begin return destination end; -(* Advances the token stream by a0 bytes. *) -proc _advance_token(count: Word); -begin - source_code_position := source_code_position + count; -end; - -(* Prints the current token. *) - -(* Parameters: *) -(* length - Token length. *) - -(* Returns a0 unchanged. *) +(** + * Prints the current token. + * + * Parameters: + * length - Token length. + * + * Returns a0 unchanged. + *) proc _write_token(length: Word); begin _write_s(source_code_position, length); @@ -367,12 +390,13 @@ end; proc _compile_integer_literal(); var integer_token: Word; + token_kind: Word; begin _write_z("\tli t0, \0"); - integer_token := _lexer_read_token(); + integer_token := _lexer_read_token(@token_kind); _write_token(integer_token); - _advance_token(integer_token); + _lexer_skip_token(); _write_c('\n'); end; @@ -382,16 +406,16 @@ var character: Word; begin _write_z("\tli t0, '\0"); - _advance_token(1); + source_code_position := source_code_position + 1; character := _load_byte(source_code_position); if character = '\\' then _write_c('\\'); - _advance_token(1); + source_code_position := source_code_position + 1; end; _write_s(source_code_position, 1); _write_s("'\n", 2); - _advance_token(2); + source_code_position := source_code_position + 2; end; proc _compile_variable_expression(); @@ -403,14 +427,14 @@ end; proc _compile_address_expression(); begin (* Skip the "@" sign. *) - _advance_token(1); + source_code_position := source_code_position + 1; _compile_designator(); end; proc _compile_negate_expression(); begin (* Skip the "-" sign. *) - _advance_token(1); + source_code_position := source_code_position + 1; _compile_term(); _write_z("\tneg t0, t0\n\0"); @@ -419,7 +443,7 @@ end; proc _compile_not_expression(); begin (* Skip the "~" sign. *) - _advance_token(1); + source_code_position := source_code_position + 1; _compile_term(); _write_z("\tnot t0, t0\n\0"); @@ -433,7 +457,8 @@ begin length := _string_length(source_code_position); offset := _add_string(source_code_position); - _advance_token(length + 2); + source_code_position := source_code_position + length; + source_code_position := source_code_position + 2; _write_z("\tla t0, strings\n\0"); _write_z("\tli t1, \0"); @@ -451,27 +476,20 @@ begin if current_character = '\'' then _compile_character_literal(); - end; - if current_character = '@' then + elsif current_character = '@' then _compile_address_expression(); - end; - if current_character = '-' then + elsif current_character = '-' then _compile_negate_expression(); - end; - if current_character = '~' then + elsif current_character = '~' then _compile_not_expression(); - end; - if current_character = '"' then + elsif current_character = '"' then _compile_string_literal(); - end; - if current_character = '_' then + elsif current_character = '_' then _compile_call(); _write_z("\nmv t0, a0\n\0"); - end; - if _is_digit(current_character) = 1 then + elsif _is_digit(current_character) = 1 then _compile_integer_literal(); - end; - if _is_lower(current_character) = 1 then + elsif _is_lower(current_character) = 1 then _compile_variable_expression(); end; end; @@ -479,7 +497,7 @@ end; proc _compile_binary_rhs(); begin (* Skip the whitespace after the binary operator. *) - _advance_token(1); + source_code_position := source_code_position + 1; _compile_term(); (* Load the left expression from the stack; *) @@ -502,128 +520,94 @@ begin _write_z("sw t0, 64(sp)\n\0"); (* Skip surrounding whitespace in front of the operator. *) - _advance_token(1); + source_code_position := source_code_position + 1; current_character := _load_byte(source_code_position); if current_character = '+' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) _write_z("add t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if current_character = '*' then - _advance_token(1); + elsif current_character = '*' then + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tmul t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if current_character = '&' then - _advance_token(1); + elsif current_character = '&' then + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tand t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if current_character = 'o' then - _advance_token(2); + elsif current_character = 'o' then + source_code_position := source_code_position + 2; _compile_binary_rhs(); (* Execute the operation. *) _write_z("or t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if current_character = 'x' then - _advance_token(3); + elsif current_character = 'x' then + source_code_position := source_code_position + 3; _compile_binary_rhs(); (* Execute the operation. *) _write_z("xor t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if current_character = '=' then - _advance_token(1); + elsif current_character = '=' then + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) _write_z("xor t0, t0, t1\nseqz t0, t0\n\0"); - - goto .compile_expression_end; - end; - if current_character = '%' then - _advance_token(1); + elsif current_character = '%' then + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) _write_z("rem t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if current_character = '/' then - _advance_token(1); + elsif current_character = '/' then + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) _write_z("div t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if current_character = '<' then - _advance_token(1); + elsif current_character = '<' then + source_code_position := source_code_position + 1; current_character := _load_byte(source_code_position); if current_character = '>' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0"); - - goto .compile_expression_end; - end; - if current_character = '=' then - _advance_token(1); + elsif current_character = '=' then + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0"); + else + _compile_binary_rhs(); - goto .compile_expression_end; + (* Execute the operation. *) + _write_z("slt t0, t1, t0\n\0"); end; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("slt t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if current_character = '>' then - _advance_token(1); + elsif current_character = '>' then + source_code_position := source_code_position + 1; current_character := _load_byte(source_code_position); if current_character = '=' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0"); + else + _compile_binary_rhs(); - goto .compile_expression_end; + (* Execute the operation. *) + _write_z("\tslt t0, t0, t1\n\0"); end; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\n\0"); - - goto .compile_expression_end; end; .compile_expression_end; @@ -635,16 +619,21 @@ var name: Word; argument_count: Word; stack_offset: Word; + token_kind: Word; begin - name_length := _lexer_read_token(); - name := source_code_position; + name_length := _lexer_read_token(@token_kind); + name := _lexer_global_start(); + name := _load_word(name); + name_length := _lexer_global_end(); + name_length := _load_word(name_length) + -name; argument_count := 0; (* Skip the identifier and left paren. *) - _advance_token(name_length + 1); + _lexer_skip_token(); + source_code_position := source_code_position + 1; if _load_byte(source_code_position) = ')' then - goto .compile_call_finalize + goto .compile_call_finalize; end; .compile_call_loop; _compile_expression(); @@ -664,7 +653,7 @@ begin if _load_byte(source_code_position) <> ',' then goto .compile_call_finalize; end; - _advance_token(2); + source_code_position := source_code_position + 2; goto .compile_call_loop; .compile_call_finalize; @@ -692,43 +681,47 @@ begin _write_s(name, name_length); (* Skip the right paren. *) - _advance_token(1); + source_code_position := source_code_position + 1; end; proc _compile_goto(); var next_token: Word; + token_kind: Word; begin - _advance_token(6); + _lexer_read_token(@token_kind); + _lexer_skip_token(); - next_token := _lexer_read_token(); + source_code_position := source_code_position + 2; + + next_token := _lexer_read_token(@token_kind); _write_z("\tj .\0"); _write_token(next_token); - _advance_token(next_token); + _lexer_skip_token(); end; -proc _compile_local_designator(symbol: Word, name_length: Word); +proc _compile_local_designator(symbol: Word); var variable_offset: Word; begin - _write_z("\taddi t0, sp, \0"); variable_offset := _parameter_info_get_offset(symbol); _write_i(variable_offset); _write_c('\n'); - _advance_token(name_length); + _lexer_skip_token(); end; proc _compile_global_designator(); var name: Word; + token_kind: Word; begin _write_z("\tla t0, \0"); - name := _lexer_read_token(); + name := _lexer_read_token(@token_kind); _write_token(name); - _advance_token(name); + _lexer_skip_token(); _write_c('\n'); end; @@ -737,12 +730,18 @@ proc _compile_designator(); var name_token: Word; lookup_result: Word; + token_kind: Word; + name: Word; begin - name_token := _lexer_read_token(); - lookup_result := _symbol_table_lookup(@symbol_table_local, source_code_position, name_token); + name_token := _lexer_read_token(@token_kind); + name := _lexer_global_start(); + name := _load_word(name); + name_token := _lexer_global_end(); + name_token := _load_word(name_token) + -name; + lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token); if lookup_result <> 0 then - _compile_local_designator(lookup_result, name_token); + _compile_local_designator(lookup_result); goto .compile_designator_end; end; _compile_global_designator(); @@ -758,7 +757,7 @@ begin _write_z("\tsw t0, 60(sp)\n\0"); (* Skip the assignment sign (:=) with surrounding whitespaces. *) - _advance_token(4); + source_code_position := source_code_position + 4; (* Compile the assignment. *) _compile_expression(); @@ -767,18 +766,24 @@ begin end; proc _compile_return_statement(); +var + token_kind: Word; begin (* Skip "return" keyword and whitespace after it. *) - _advance_token(7); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + source_code_position := source_code_position + 1; _compile_expression(); _write_z("\tmv a0, t0\n\0"); end; -(* Writes a label, .Ln, where n is a unique number. *) - -(* Parameters: *) -(* counter - Label counter. *) +(** + * Writes a label, .Ln, where n is a unique number. + * + * Parameters: + * counter - Label counter. + *) proc _write_label(counter: Word); begin _write_z(".L\0"); @@ -789,13 +794,18 @@ proc _compile_if(); var after_end_label: Word; condition_label: Word; + token_kind: Word; begin (* Skip "if ". *) - _advance_token(3); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + source_code_position := source_code_position + 1; + (* Compile condition. *) _compile_expression(); (* Skip " then" with newline. *) - _advance_token(6); + _lexer_read_token(@token_kind); + _lexer_skip_token(); after_end_label := label_counter; label_counter := label_counter + 1; @@ -817,20 +827,42 @@ begin _write_label(condition_label); _write_z(":\n\0"); - if _memcmp(source_code_position, "end", 3) = 0 then - goto .compile_if_end; - end; - if _memcmp(source_code_position, "else", 3) = 0 then - goto .compile_if_else - end; - .compile_if_else; - (* Skip "else" and newline. *) - _advance_token(5); - _compile_procedure_body(); + .compile_if_loop; - .compile_if_end; - (* Skip "end". *) - _advance_token(3); + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_else() then + _lexer_skip_token(); + _compile_procedure_body(); + elsif token_kind = _lexer_token_kind_elsif() then + _lexer_skip_token(); + source_code_position := source_code_position + 1; + + (* Compile condition. *) + _compile_expression(); + (* Skip " then" with newline. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + (* condition_label is the label in front of the next elsif condition or end. *) + condition_label := label_counter; + label_counter := label_counter + 1; + + _write_z("\tbeqz t0, \0"); + _write_label(condition_label); + _write_c('\n'); + + _compile_procedure_body(); + + _write_z("\tj \0"); + _write_label(after_end_label); + _write_c('\n'); + + _write_label(condition_label); + _write_z(":\n\0"); + + goto .compile_if_loop; + end; + _lexer_skip_token(); _write_label(after_end_label); _write_z(":\n\0"); @@ -839,81 +871,74 @@ end; proc _compile_label_declaration(); var label_token: Word; + token_kind: Word; + name: Word; begin (* Skip the dot. *) - _advance_token(1); - label_token := _lexer_read_token(); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + label_token := _lexer_read_token(@token_kind); + name := _lexer_global_start(); + name := _load_word(name); _write_c('.'); - _write_s(source_code_position, label_token); + _write_s(name, label_token); _write_z(":\n\0"); - _advance_token(label_token); + _lexer_skip_token(); end; proc _compile_statement(); var current_byte: Word; + token_kind: Word; begin - _skip_spaces(); - current_byte := _load_byte(source_code_position); + _lexer_read_token(@token_kind); - (* This is a call if the statement starts with an underscore. *) - if current_byte = '_' then - _compile_call(); - goto .compile_statement_semicolon; - end; - if _memcmp(source_code_position, "goto ", 5) = 0 then + if token_kind = _lexer_token_kind_goto() then _compile_goto(); - goto .compile_statement_semicolon; - end; - if _memcmp(source_code_position, "if ", 3) = 0 then + elsif token_kind = _lexer_token_kind_if() then _compile_if(); - goto .compile_statement_semicolon; - end; - if _memcmp(source_code_position, "return ", 7) = 0 then + elsif token_kind = _lexer_token_kind_return() then _compile_return_statement(); - _write_c('\n'); - - goto .compile_statement_end; - end; - if current_byte = '.' then + elsif token_kind = _lexer_token_kind_dot() then _compile_label_declaration(); + elsif token_kind = _lexer_token_kind_identifier() then + current_byte := _lexer_global_start(); + current_byte := _load_word(current_byte); + current_byte := _load_byte(current_byte); - goto .compile_statement_semicolon; + (* This is a call if the statement starts with an underscore. *) + if current_byte = '_' then + _compile_call(); + else + _compile_assignment(); + end; end; - _compile_assignment(); - goto .compile_statement_semicolon; - - .compile_statement_semicolon; - _advance_token(2); _write_c('\n'); - - .compile_statement_end; end; proc _compile_procedure_body(); var - lhs: Word; - rhs: Word; + token_kind: Word; begin - .compile_procedure_body_loop; _skip_empty_lines(); - _skip_spaces(); + _compile_statement(); + _lexer_read_token(@token_kind); - lhs := _memcmp(source_code_position, "end", 3) = 0; - rhs := _memcmp(source_code_position, "else", 4) = 0; - lhs := lhs or rhs; - - if lhs = 0 then - _compile_statement(); - goto .compile_procedure_body_loop; + if token_kind = _lexer_token_kind_semicolon() then + _lexer_skip_token(); + _compile_procedure_body(); + else + _skip_empty_lines(); end; end; -(* Writes a regster name to the standard output. *) - -(* Parameters: *) -(* register_character - Register character. *) -(* register_number - Register number. *) +(** + * Writes a regster name to the standard output. + * + * Parameters: + * register_character - Register character. + * register_number - Register number. + *) proc _write_register(register_character: Word, register_number: Word); begin _write_c(register_character); @@ -927,7 +952,7 @@ var begin current_byte := _load_byte(source_code_position); if current_byte = '\t' then - _advance_token(1); + source_code_position := source_code_position + 1; _skip_spaces(); end; end; @@ -935,14 +960,17 @@ end; proc _read_type_expression(); var type_name: Word; + token_kind: Word; begin - type_name := _lexer_read_token(); - _advance_token(type_name); + type_name := _lexer_read_token(@token_kind); + _lexer_skip_token(); end; -(* Parameters: *) - -(* parameter_index - Parameter index. *) +(** + * Parameters: + * + * parameter_index - Parameter index. + *) proc _parameter_info_create(parameter_index: Word); var offset: Word; @@ -971,9 +999,11 @@ begin return _load_word(info) end; -(* Parameters: *) - -(* temporary_index - Parameter index. *) +(** + * Parameters: + * + * temporary_index - Parameter index. + *) proc _temporary_info_create(temporary_index: Word); var offset: Word; @@ -1001,22 +1031,25 @@ begin return _load_word(info) end; -(* Parameters: *) - -(* parameter_index - Parameter index. *) +(** + * Parameters: + * + * parameter_index - Parameter index. + *) proc _read_procedure_parameter(parameter_index: Word); var name_length: Word; info: Word; name_position: Word; + token_kind: Word; begin (* Read the parameter name. *) name_position := source_code_position; - name_length := _lexer_read_token(); - _advance_token(name_length); + name_length := _lexer_read_token(@token_kind); + _lexer_skip_token(); (* Skip colon and space in front of the type expression. *) - _advance_token(2); + source_code_position := source_code_position + 2; _read_type_expression(); @@ -1038,7 +1071,7 @@ var parameter_counter: Word; begin (* Skip open paren. *) - _advance_token(1); + source_code_position := source_code_position + 1; parameter_counter := 0; .compile_procedure_prologue_skip; @@ -1047,28 +1080,32 @@ begin parameter_counter := parameter_counter + 1; if _load_byte(source_code_position) = ',' then - _advance_token(2); + source_code_position := source_code_position + 2; goto .compile_procedure_prologue_skip; end; end; (* Skip close paren. *) - _advance_token(1); + source_code_position := source_code_position + 1; end; -(* Parameters: *) -(* variable_index - Variable index. *) +(** + * Parameters: + * variable_index - Variable index. + *) proc _read_procedure_temporary(variable_index: Word); var name_length: Word; info: Word; name_position: Word; + token_kind: Word; begin _skip_spaces(); name_position := source_code_position; (* Read and skip variable name, colon and the space *) - name_length := _lexer_read_token(); - _advance_token(name_length + 2); + name_length := _lexer_read_token(@token_kind); + _lexer_skip_token(name_length); + source_code_position := source_code_position + 2; _read_type_expression(); @@ -1076,41 +1113,38 @@ begin _symbol_table_enter(@symbol_table_local, name_position, name_length, info); (* Skip semicolon and newline after the variable declaration *) - _advance_token(2); + source_code_position := source_code_position + 2; end; proc _read_procedure_temporaries(); var temporary_counter: Word; begin - if _memcmp(source_code_position, "var", 3) <> 0 then - goto .read_local_variables_end; + if _memcmp(source_code_position, "var", 3) = 0 then + source_code_position := source_code_position + 4; + temporary_counter := 0; + + .read_local_variables_loop; + if _memcmp(source_code_position, "begin", 5) <> 0 then + _read_procedure_temporary(temporary_counter); + + temporary_counter := temporary_counter + 1; + goto .read_local_variables_loop; + end; end; - _advance_token(4); - temporary_counter := 0; - - .read_local_variables_loop; - if _memcmp(source_code_position, "begin", 5) = 0 then - goto .read_local_variables_end; - end; - _read_procedure_temporary(temporary_counter); - - temporary_counter := temporary_counter + 1; - goto .read_local_variables_loop; - - .read_local_variables_end; end; proc _compile_procedure(); var name_length: Word; + token_kind: Word; begin (* Skip "proc ". *) - _advance_token(5); + source_code_position := source_code_position + 5; (* Clear local symbol table. *) _store_word(0, @symbol_table_local); - name_length := _lexer_read_token(); + name_length := _lexer_read_token(@token_kind); (* Write .type _procedure_name, @function. *) _write_z(".type \0"); @@ -1123,16 +1157,16 @@ begin _write_z(":\n\0"); (* Skip procedure name. *) - _advance_token(name_length); + _lexer_skip_token(); _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); _read_procedure_parameters(); (* Skip semicolon and newline. *) - _advance_token(2); + source_code_position := source_code_position + 2; _read_procedure_temporaries(); (* Skip semicolon, "begin" and newline. *) - _advance_token(6); + source_code_position := source_code_position + 6; _compile_procedure_body(); @@ -1140,19 +1174,24 @@ begin _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); (* Skip the "end" keyword, semicolon and newline. *) - _advance_token(5); + source_code_position := source_code_position + 5; end; -(* Prints and skips a line. *) +(** + * Prints and skips a line. + *) proc _skip_comment(); var - new_position: Word; + token_kind: Word; begin - new_position := _lexer_read_token(); - _advance_token(new_position + 1); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + source_code_position := source_code_position + 1; end; -(* Skip newlines and comments. *) +(** + * Skip newlines and comments. + *) proc _skip_empty_lines(); var current_position: Word; @@ -1165,41 +1204,27 @@ begin current_byte := _load_byte(current_position); if current_byte = '\n' then - goto .skip_empty_lines_newline; + source_code_position := current_position + 1; + _skip_empty_lines(); + elsif current_byte = '\t' then + current_position := current_position + 1; + goto .skip_empty_lines_loop; + elsif current_byte = '(' then + current_byte := _load_byte(current_position + 1); + + if current_byte = '*' then + source_code_position := current_position; + _skip_comment(); + _skip_empty_lines(); + end; end; - if current_byte = '\t' then - goto .skip_empty_lines_tab; - end; - if current_byte <> '(' then - goto .skip_empty_lines_end; - end; - current_byte := _load_byte(current_position + 1); - - if current_byte = '*' then - goto .skip_empty_lines_comment - end; - goto .skip_empty_lines_end; - - .skip_empty_lines_comment; - source_code_position := current_position; - _skip_comment(); - goto .skip_empty_lines_rerun; - - .skip_empty_lines_newline; - source_code_position := current_position + 1; - goto .skip_empty_lines_rerun; - - .skip_empty_lines_tab; - current_position := current_position + 1; - goto .skip_empty_lines_loop - - .skip_empty_lines_end; end; proc _compile_global_initializer(); var current_byte: Word; length: Word; + token_kind: Word; begin current_byte := _load_byte(source_code_position); @@ -1211,51 +1236,48 @@ begin _write_i(); (* Skip the quoted string. *) - _advance_token(length + 2); + source_code_position := source_code_position + length; + source_code_position := source_code_position + 2; goto .compile_global_initializer_end; - end; - if current_byte = 'S' then + elsif current_byte = 'S' then (* Skip "S(". *) - _advance_token(2); + source_code_position := source_code_position + 2; if _load_byte(source_code_position) = ')' then goto .compile_global_initializer_closing; end; goto .compile_global_initializer_loop; - end; - if current_byte = '@' then + elsif current_byte = '@' then (* Skip @. *) - _advance_token(1); + source_code_position := source_code_position + 1; _write_z("\n\t.word \0"); - current_byte := _lexer_read_token(); + current_byte := _lexer_read_token(@token_kind); _write_token(current_byte); - _advance_token(current_byte); + _lexer_skip_token(); + + goto .compile_global_initializer_end; + elsif _is_digit(current_byte) = 1 then + _write_z("\n\t.word \0"); + current_byte := _lexer_read_token(@token_kind); + _write_token(current_byte); + source_code_position := source_code_position + 1; goto .compile_global_initializer_end; end; - if _is_digit(current_byte) = 1 then - _write_z("\n\t.word \0"); - current_byte := _lexer_read_token(); - _write_token(current_byte); - _advance_token(1); - - goto .compile_global_initializer_end; - end; - .compile_global_initializer_loop; _compile_global_initializer(); if _load_byte(source_code_position) <> ')' then (* Skip comma and whitespace after it. *) - _advance_token(2); + source_code_position := source_code_position + 2; goto .compile_global_initializer_loop; end; .compile_global_initializer_closing; (* Skip ")" *) - _advance_token(1); + source_code_position := source_code_position + 1; goto .compile_global_initializer_end; @@ -1265,8 +1287,9 @@ end; proc _compile_constant_declaration(); var name_length: Word; + token_kind: Word; begin - name_length := _lexer_read_token(); + name_length := _lexer_read_token(@token_kind); _write_z(".type \0"); _write_token(name_length); @@ -1276,32 +1299,35 @@ begin _write_c(':'); (* Skip the constant name with assignment sign and surrounding whitespaces. *) - _advance_token(name_length + 4); + _lexer_skip_token(); + source_code_position := source_code_position + 4; _compile_global_initializer(); (* Skip semicolon and newline. *) - _advance_token(2); + source_code_position := source_code_position + 2; _write_c('\n'); end; proc _compile_const_part(); +var + token_kind: Word; begin _skip_empty_lines(); + _lexer_read_token(@token_kind); - if _memcmp(source_code_position, "const\0", 5) <> 0 then + if token_kind <> _lexer_token_kind_const() then goto .compile_const_part_end; end; (* Skip "const" with the newline after it. *) - _advance_token(6); + _lexer_skip_token(); _write_z(".section .rodata # Compiled from const section.\n\n\0"); .compile_const_part_loop; _skip_empty_lines(); - (* If the character at the line beginning is not indentation, *) - (* it is probably the next code section. *) + (* If the character at the line beginning is not indentation, + it is probably the next code section. *) if _load_byte(source_code_position) = '\t' then - _advance_token(1); - + source_code_position := source_code_position + 1; _compile_constant_declaration(); goto .compile_const_part_loop; end; @@ -1312,8 +1338,9 @@ end; proc _compile_variable_declaration(); var name_length: Word; + token_kind: Word; begin - name_length := _lexer_read_token(); + name_length := _lexer_read_token(@token_kind); _write_z(".type \0"); _write_token(name_length); @@ -1323,7 +1350,9 @@ begin _write_c(':'); (* Skip the variable name and colon with space before the type. *) - _advance_token(name_length + 2); + _lexer_skip_token(); + _lexer_read_token(@token_kind); + _lexer_skip_token(); _read_type_expression(); if _load_byte(source_code_position) <> ' ' then @@ -1331,32 +1360,34 @@ begin _write_z(" .zero 81920\0"); else (* Skip the assignment sign with surrounding whitespaces. *) - _advance_token(4); + source_code_position := source_code_position + 4; _compile_global_initializer(); end; (* Skip semicolon and newline. *) - _advance_token(2); + _lexer_read_token(@token_kind); + _lexer_skip_token(); _write_c('\n'); end; proc _compile_var_part(); var - current_character: Word; + token_kind: Word; begin - if _memcmp(source_code_position, "var\0", 3) <> 0 then + _lexer_read_token(@token_kind); + + if token_kind <> _lexer_token_kind_var() then goto .compile_var_part_end; end; (* Skip "var" and newline. *) - _advance_token(4); + _lexer_skip_token(); _write_z(".section .data\n\0"); .compile_var_part_loop; _skip_empty_lines(); - current_character := _load_byte(source_code_position); + _lexer_read_token(@token_kind); - if current_character = '\t' then - _advance_token(1); + if token_kind = _lexer_token_kind_identifier() then _compile_variable_declaration(); goto .compile_var_part_loop; end; @@ -1418,23 +1449,24 @@ begin _write_c('\n'); end; -(* Terminates the program. a0 contains the return code. *) - -(* Parameters: *) -(* a0 - Status code. *) +(** + * Terminates the program. + *) proc _exit(); begin _syscall(0, 0, 0, 0, 0, 0, 93); end; -(* Looks for a symbol in the given symbol table. *) - -(* Parameters: *) -(* symbol_table - Symbol table. *) -(* symbol_name - Symbol name pointer. *) -(* name_length - Symbol name length. *) - -(* Returns the symbol pointer or 0 in a0. *) +(** + * Looks for a symbol in the given symbol table. + * + * Parameters: + * symbol_table - Symbol table. + * symbol_name - Symbol name pointer. + * name_length - Symbol name length. + * + * Returns the symbol pointer or 0 in a0. + *) proc _symbol_table_lookup(symbol_table: Word, symbol_name: Word, name_length: Word); var result: Word; @@ -1480,13 +1512,15 @@ begin return result end; -(* Inserts a symbol into the table. *) - -(* Parameters: *) -(* symbol_table - Symbol table. *) -(* symbol_name - Symbol name pointer. *) -(* name_length - Symbol name length. *) -(* symbol - Symbol pointer. *) +(** + * Inserts a symbol into the table. + * + * Parameters: + * symbol_table - Symbol table. + * symbol_name - Symbol name pointer. + * name_length - Symbol name length. + * symbol - Symbol pointer. + *) proc _symbol_table_enter(symbol_table: Word, symbol_name: Word, name_length: Word, symbol: Word); var table_length: Word; @@ -1525,14 +1559,15 @@ begin end; -(* Classification table assigns each possible character to a group (class). All *) -(* characters of the same group a handled equivalently. *) - -(* Transition = record *) -(* action: TransitionAction; *) -(* next_state: TransitionState *) -(* end; *) - +(** + * Classification table assigns each possible character to a group (class). All + * characters of the same group a handled equivalently. + * + * Transition = record + * action: TransitionAction; + * next_state: TransitionState + * end; + *) proc _lexer_class_invalid(); begin return 1 @@ -1773,12 +1808,14 @@ begin return 10 end; -(* Assigns some value to at array index. *) - -(* Parameters: *) -(* array - Array pointer. *) -(* index - Index (word offset into the array). *) -(* data - Data to assign. *) +(** + * Assigns some value to at array index. + * + * Parameters: + * array - Array pointer. + * index - Index (word offset into the array). + * data - Data to assign. + *) proc _assign_at(array: Word, index: Word, data: Word); var target: Word; @@ -1954,8 +1991,8 @@ var column_position: Word; target: Word; begin - (* Each state is 8 bytes long (2 words: action and next state). *) - (* There are 22 character classes, so a transition row 8 * 22 = 176 bytes long. *) + (* Each state is 8 bytes long (2 words: action and next state). + There are 22 character classes, so a transition row 8 * 22 = 176 bytes long. *) row_position := current_state + -1; row_position := row_position * 176; @@ -1967,11 +2004,13 @@ begin return target + column_position end; -(* Parameters: *) -(* current_state - First index into transitions table. *) -(* character_class - Second index into transitions table. *) -(* action - Action to assign. *) -(* next_state - Next state to assign. *) +(** + * Parameters: + * current_state - First index into transitions table. + * character_class - Second index into transitions table. + * action - Action to assign. + * next_state - Next state to assign. + *) proc _lexer_set_transition(current_state: Word, character_class: Word, action: Word, next_state: Word); var transition: Word; @@ -1982,12 +2021,14 @@ begin _lexer_transition_set_state(transition, next_state); end; -(* Sets same action and state transition for all character classes in one transition row. *) - -(* Parameters: *) -(* current_state - Current state (Transition state enumeration). *) -(* default_action - Default action (Callback). *) -(* next_state - Next state (Transition state enumeration). *) +(** + * Sets same action and state transition for all character classes in one transition row. + * + * Parameters: + * current_state - Current state (Transition state enumeration). + * default_action - Default action (Callback). + * next_state - Next state (Transition state enumeration). + *) proc _lexer_default_transition(current_state: Word, default_action: Word, next_state: Word); begin _lexer_set_transition(current_state, _lexer_class_invalid(), default_action, next_state); @@ -2015,12 +2056,14 @@ begin end; -(* The transition table describes transitions from one state to another, given *) -(* a symbol (character class). *) - -(* The table has m rows and n columns, where m is the amount of states and n is *) -(* the amount of classes. So given the current state and a classified character *) -(* the table can be used to look up the next state. *) +(** + * The transition table describes transitions from one state to another, given + * a symbol (character class). + * + * The table has m rows and n columns, where m is the amount of states and n is + * the amount of classes. So given the current state and a classified character + * the table can be used to look up the next state. + *) proc _lexer_transitions(); begin (* Start state. *) @@ -2132,29 +2175,37 @@ begin _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_x(), _lexer_action_none(), _lexer_state_end()); end; -(* Transition table is saved after character classification table. *) -(* Each character entry is 1 word long and there are 256 characters. *) -(* 1024 = 256 * 4 *) +(** + * Transition table is saved after character classification table. + * Each character entry is 1 word long and there are 256 characters. + * 1024 = 256 * 4 + *) proc _lexer_get_transition_table(); begin return @classification + 1024 end; -(* Lexer state is saved after the transition tables. *) -(* Each transition table entry is 8 bytes long. The table has 16 rows (transition states) *) -(* and 22 columns (character classes), so 2816 = 8 * 16 * 22. *) +(** + * Lexer state is saved after the transition tables. + * Each transition table entry is 8 bytes long. The table has 16 rows (transition states) + * and 22 columns (character classes), so 2816 = 8 * 16 * 22. + *) proc _lexer_global_state(); begin return _lexer_get_transition_table() + 2816 end; -(* Gets pointer to the token start. *) +(** + * Gets pointer to the token start. + *) proc _lexer_global_start(); begin return _lexer_global_state() + 4 end; -(* Gets pointer to the token end. *) +(** + * Gets pointer to the token end. + *) proc _lexer_global_end(); begin return _lexer_global_start() + 4 @@ -2180,7 +2231,9 @@ begin _store_word(state, transition + 4); end; -(* Resets the lexer state for reading the next token. *) +(** + * Resets the lexer state for reading the next token. + *) proc _lexer_reset(); var state: Word; @@ -2197,7 +2250,9 @@ begin _store_word(source_code_position, current); end; -(* One time lexer initialization. *) +(** + * One time lexer initialization. + *) proc _lexer_initialize(); begin _lexer_classifications(); @@ -2222,59 +2277,444 @@ begin return _lexer_get_transition(current_state, character_class) end; -proc _lexer_execute_action(action_to_perform: Word); +proc _lexer_token_kind_identifier(); +begin + return 1 +end; + +proc _lexer_token_kind_const(); +begin + return 2 +end; + +proc _lexer_token_kind_var(); +begin + return 3 +end; + +proc _lexer_token_kind_proc(); +begin + return 4 +end; + +proc _lexer_token_kind_type(); +begin + return 5 +end; + +proc _lexer_token_kind_begin(); +begin + return 6 +end; + +proc _lexer_token_kind_end(); +begin + return 7 +end; + +proc _lexer_token_kind_if(); +begin + return 8 +end; + +proc _lexer_token_kind_then(); +begin + return 9 +end; + +proc _lexer_token_kind_else(); +begin + return 10 +end; + +proc _lexer_token_kind_elsif(); +begin + return 11 +end; + +proc _lexer_token_kind_while(); +begin + return 12 +end; + +proc _lexer_token_kind_do(); +begin + return 13 +end; + +proc _lexer_token_kind_extern(); +begin + return 14 +end; + +proc _lexer_token_kind_record(); +begin + return 15 +end; + +proc _lexer_token_kind_union(); +begin + return 16 +end; + +proc _lexer_token_kind_true(); +begin + return 17 +end; + +proc _lexer_token_kind_false(); +begin + return 18 +end; + +proc _lexer_token_kind_nil(); +begin + return 19 +end; + +proc _lexer_token_kind_and(); +begin + return 20 +end; + +proc _lexer_token_kind_or(); +begin + return 21 +end; + +proc _lexer_token_kind_xor(); +begin + return 22 +end; + +proc _lexer_token_kind_pipe(); +begin + return 23 +end; + +proc _lexer_token_kind_not(); +begin + return 24 +end; + +proc _lexer_token_kind_return(); +begin + return 24 +end; + +proc _lexer_token_kind_module(); +begin + return 25 +end; + +proc _lexer_token_kind_program(); +begin + return 26 +end; + +proc _lexer_token_kind_import(); +begin + return 27 +end; + +proc _lexer_token_kind_cast(); +begin + return 28 +end; + +proc _lexer_token_kind_defer(); +begin + return 29 +end; + +proc _lexer_token_kind_case(); +begin + return 30 +end; + +proc _lexer_token_kind_of(); +begin + return 31 +end; + +proc _lexer_token_kind_trait(); +begin + return 32 +end; + +proc _lexer_token_kind_left_paren(); +begin + return 33 +end; + +proc _lexer_token_kind_right_paren(); +begin + return 34 +end; + +proc _lexer_token_kind_left_square(); +begin + return 35 +end; + +proc _lexer_token_kind_right_square(); +begin + return 36 +end; + +proc _lexer_token_kind_shift_left(); +begin + return 37 +end; + +proc _lexer_token_kind_shift_right(); +begin + return 38 +end; + +proc _lexer_token_kind_greater_equal(); +begin + return 39 +end; + +proc _lexer_token_kind_less_equal(); +begin + return 40 +end; + +proc _lexer_token_kind_greater_than(); +begin + return 41 +end; + +proc _lexer_token_kind_less_than(); +begin + return 42 +end; + +proc _lexer_token_kind_not_equal(); +begin + return 43 +end; + +proc _lexer_token_kind_equals(); +begin + return 44 +end; + +proc _lexer_token_kind_semicolon(); +begin + return 45 +end; + +proc _lexer_token_kind_dot(); +begin + return 46 +end; + +proc _lexer_token_kind_comma(); +begin + return 47 +end; + +proc _lexer_token_kind_plus(); +begin + return 48 +end; + +proc _lexer_token_kind_arrow(); +begin + return 49 +end; + +proc _lexer_token_kind_minus(); +begin + return 50 +end; + +proc _lexer_token_kind_multiplication(); +begin + return 51 +end; + +proc _lexer_token_kind_division(); +begin + return 52 +end; + +proc _lexer_token_kind_remainder(); +begin + return 53 +end; + +proc _lexer_token_kind_assignment(); +begin + return 54 +end; + +proc _lexer_token_kind_colon(); +begin + return 55 +end; + +proc _lexer_token_kind_hat(); +begin + return 56 +end; + +proc _lexer_token_kind_at(); +begin + return 57 +end; + +proc _lexer_token_kind_exclamation(); +begin + return 58 +end; + +proc _lexer_token_kind_string(); +begin + return 59 +end; + +proc _lexer_token_kind_character(); +begin + return 60 +end; + +proc _lexer_token_kind_integer(); +begin + return 61 +end; + +proc _lexer_token_kind_word(); +begin + return 62 +end; + +proc _lexer_token_kind_goto(); +begin + return 63 +end; + +proc _lexer_compare_keyword(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word); +var + result: Word; +begin + result := 0; + + if lhs_length = rhs_length then + result := _memcmp(lhs_pointer, rhs_pointer, lhs_length) = 0; + end; + return result +end; + +proc _lexer_classify_keyword(position_start: Word, position_end: Word); +var + result: Word; + token_length: Word; +begin + result := _lexer_token_kind_identifier(); + token_length := position_end + -position_start; + + if _lexer_compare_keyword(position_start, token_length, "const", 5) = 1 then + result := _lexer_token_kind_const(); + elsif _lexer_compare_keyword(position_start, token_length, "var", 3) = 1 then + result := _lexer_token_kind_var(); + elsif _lexer_compare_keyword(position_start, token_length, "proc", 4) = 1 then + result := _lexer_token_kind_proc(); + elsif _lexer_compare_keyword(position_start, token_length, "type", 4) = 1 then + result := _lexer_token_kind_type(); + elsif _lexer_compare_keyword(position_start, token_length, "begin", 5) = 1 then + result := _lexer_token_kind_begin(); + elsif _lexer_compare_keyword(position_start, token_length, "end", 3) = 1 then + result := _lexer_token_kind_end(); + elsif _lexer_compare_keyword(position_start, token_length, "return", 6) = 1 then + result := _lexer_token_kind_return(); + elsif _lexer_compare_keyword(position_start, token_length, "goto", 4) = 1 then + result := _lexer_token_kind_goto(); + elsif _lexer_compare_keyword(position_start, token_length, "if", 2) = 1 then + result := _lexer_token_kind_if(); + elsif _lexer_compare_keyword(position_start, token_length, "while", 5) = 1 then + result := _lexer_token_kind_while(); + elsif _lexer_compare_keyword(position_start, token_length, "then", 4) = 1 then + result := _lexer_token_kind_then(); + elsif _lexer_compare_keyword(position_start, token_length, "else", 4) = 1 then + result := _lexer_token_kind_else(); + elsif _lexer_compare_keyword(position_start, token_length, "elsif", 5) = 1 then + result := _lexer_token_kind_elsif(); + end; + return result +end; + +proc _lexer_classify_finalize(start_position: Word); +var + character: Word; + result: Word; +begin + result := 0; + character := _load_byte(start_position); + + if character = ':' then + result := _lexer_token_kind_colon(); + elsif character = '.' then + result := _lexer_token_kind_dot(); + end; + return result +end; + +proc _lexer_classify_single(start_position: Word); +var + character: Word; + result: Word; +begin + result := 0; + character := _load_byte(start_position); + + if character = ';' then + result := _lexer_token_kind_semicolon(); + end; + return result +end; + +proc _lexer_execute_action(action_to_perform: Word, kind: Word); var pointer_start: Word; pointer_end: Word; position_start: Word; position_end: Word; + intermediate: Word; begin - pointer_start := _lexer_global_end(); + pointer_start := _lexer_global_start(); position_start := _load_word(pointer_start); pointer_end := _lexer_global_end(); position_end := _load_word(pointer_end); if action_to_perform = _lexer_action_none() then - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_accumulate() then + elsif action_to_perform = _lexer_action_accumulate() then _store_word(position_end + 1, pointer_end); - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_skip() then + elsif action_to_perform = _lexer_action_skip() then _store_word(position_start + 1, pointer_start); _store_word(position_end + 1, pointer_end); - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_single() then - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_eof() then - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_finalize() then - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_composite() then - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_key_id() then + elsif action_to_perform = _lexer_action_single() then _store_word(position_end + 1, pointer_end); - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_integer() then - _store_word(position_end + 1, pointer_end); - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_delimited() then - _store_word(position_end + 1, pointer_end); - goto .action_to_perform_end; - end; - .action_to_perform_end; + intermediate := _lexer_classify_single(position_start); + _store_word(intermediate, kind); + elsif action_to_perform = _lexer_action_eof() then + elsif action_to_perform = _lexer_action_finalize() then + intermediate := _lexer_classify_finalize(position_start); + _store_word(intermediate, kind); + elsif action_to_perform = _lexer_action_composite() then + elsif action_to_perform = _lexer_action_key_id() then + intermediate := _lexer_classify_keyword(position_start, position_end); + _store_word(intermediate, kind); + elsif action_to_perform = _lexer_action_integer() then + elsif action_to_perform = _lexer_action_delimited() then + end; end; -proc _lexer_execute_transition(); +proc _lexer_execute_transition(kind: Word); var next_transition: Word; next_state: Word; @@ -2288,50 +2728,66 @@ begin global_state := _lexer_global_state(); _store_word(next_state, global_state); - _lexer_execute_action(action_to_perform); + _lexer_execute_action(action_to_perform, kind); return next_state end; -proc _lexer_advance_token(); -var - executed_transition: Word; +proc _lexer_advance_token(kind: Word); begin - .lexer_advance_token_loop; - executed_transition := _lexer_execute_transition(); - - if executed_transition <> _lexer_state_end() then - goto .lexer_advance_token_loop; + if _lexer_execute_transition(kind) <> _lexer_state_end() then + _lexer_advance_token(kind); end; end; -(* Reads the next token. *) - -(* Returns token length in a0. *) -proc _lexer_read_token(); +(** + * Reads the next token. + * + * Returns token length in a0. + *) +proc _lexer_read_token(kind: Word); var new_position: Word; - token_end: Word; begin _lexer_reset(); - _lexer_advance_token(); + _lexer_advance_token(kind); new_position := _lexer_global_end(); - token_end := _load_word(new_position); - token_end := token_end + -source_code_position; - - return token_end + -1 + return _load_word(new_position) + -source_code_position end; -(* Entry point. *) +(** + * Advances the token stream past the last read token. + *) +proc _lexer_skip_token(); +var + new_position: Word; +begin + new_position := _lexer_global_end(); + source_code_position := _load_word(new_position); +end; + +(** + * Entry point. + *) proc _start(); +var + last_read: Word; + offset: Wort; begin _lexer_initialize(); _symbol_table_build(); (* Read the source from the standard input. *) + offset := @source_code; + + .start_read; (* Second argument is buffer size. Modifying update the source_code definition. *) - _read_file(@source_code, 81920); + last_read := _read_file(offset, 81920); + if last_read > 0 then + offset := offset + last_read; + goto .start_read; + end; _compile(); _exit(0);