diff options
| author | Eugen Wissner <belka@caraus.de> | 2025-09-23 22:22:38 +0200 |
|---|---|---|
| committer | Eugen Wissner <belka@caraus.de> | 2025-09-23 22:22:38 +0200 |
| commit | 0cc41f2d838630f5117d57e1491ffd4a6d613832 (patch) | |
| tree | 119f3f76ca5c6a0cdd817575e8df565519fd6a9c /boot/stage13.elna | |
| parent | 6e9086aa26a37ef8d89dd54b773e614a80efe720 (diff) | |
| download | elna-0cc41f2d838630f5117d57e1491ffd4a6d613832.tar.gz | |
Implement elsif for if-statements
Diffstat (limited to 'boot/stage13.elna')
| -rw-r--r-- | boot/stage13.elna | 849 |
1 files changed, 685 insertions, 164 deletions
diff --git a/boot/stage13.elna b/boot/stage13.elna index 66f6593..925a1cd 100644 --- a/boot/stage13.elna +++ b/boot/stage13.elna @@ -5,6 +5,7 @@ (* Stage 13 compiler. *) (* - Multiline comments. *) +(* - elsif conditions. *) const symbol_builtin_name_int := "Int"; @@ -114,7 +115,7 @@ end; (* Returns the amount of bytes written in a0. *) proc _read_file(buffer: Word, size: Word); begin - _syscall(0, buffer, size, 0, 0, 0, 63); + return _syscall(0, buffer, size, 0, 0, 0, 63) end; (* Writes to the standard output. *) @@ -346,12 +347,6 @@ begin return destination end; -(* Advances the token stream by a0 bytes. *) -proc _advance_token(count: Word); -begin - source_code_position := source_code_position + count; -end; - (* Prints the current token. *) (* Parameters: *) @@ -367,12 +362,13 @@ end; proc _compile_integer_literal(); var integer_token: Word; + token_kind: Word; begin _write_z("\tli t0, \0"); - integer_token := _lexer_read_token(); + integer_token := _lexer_read_token(@token_kind); _write_token(integer_token); - _advance_token(integer_token); + _lexer_skip_token(); _write_c('\n'); end; @@ -382,16 +378,16 @@ var character: Word; begin _write_z("\tli t0, '\0"); - _advance_token(1); + source_code_position := source_code_position + 1; character := _load_byte(source_code_position); if character = '\\' then _write_c('\\'); - _advance_token(1); + source_code_position := source_code_position + 1; end; _write_s(source_code_position, 1); _write_s("'\n", 2); - _advance_token(2); + source_code_position := source_code_position + 2; end; proc _compile_variable_expression(); @@ -403,14 +399,14 @@ end; proc _compile_address_expression(); begin (* Skip the "@" sign. *) - _advance_token(1); + source_code_position := source_code_position + 1; _compile_designator(); end; proc _compile_negate_expression(); begin (* Skip the "-" sign. *) - _advance_token(1); + source_code_position := source_code_position + 1; _compile_term(); _write_z("\tneg t0, t0\n\0"); @@ -419,7 +415,7 @@ end; proc _compile_not_expression(); begin (* Skip the "~" sign. *) - _advance_token(1); + source_code_position := source_code_position + 1; _compile_term(); _write_z("\tnot t0, t0\n\0"); @@ -433,7 +429,8 @@ begin length := _string_length(source_code_position); offset := _add_string(source_code_position); - _advance_token(length + 2); + source_code_position := source_code_position + length; + source_code_position := source_code_position + 2; _write_z("\tla t0, strings\n\0"); _write_z("\tli t1, \0"); @@ -479,7 +476,7 @@ end; proc _compile_binary_rhs(); begin (* Skip the whitespace after the binary operator. *) - _advance_token(1); + source_code_position := source_code_position + 1; _compile_term(); (* Load the left expression from the stack; *) @@ -502,11 +499,11 @@ begin _write_z("sw t0, 64(sp)\n\0"); (* Skip surrounding whitespace in front of the operator. *) - _advance_token(1); + source_code_position := source_code_position + 1; current_character := _load_byte(source_code_position); if current_character = '+' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -515,7 +512,7 @@ begin goto .compile_expression_end; end; if current_character = '*' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -524,7 +521,7 @@ begin goto .compile_expression_end; end; if current_character = '&' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -533,7 +530,7 @@ begin goto .compile_expression_end; end; if current_character = 'o' then - _advance_token(2); + source_code_position := source_code_position + 2; _compile_binary_rhs(); (* Execute the operation. *) @@ -542,7 +539,7 @@ begin goto .compile_expression_end; end; if current_character = 'x' then - _advance_token(3); + source_code_position := source_code_position + 3; _compile_binary_rhs(); (* Execute the operation. *) @@ -551,7 +548,7 @@ begin goto .compile_expression_end; end; if current_character = '=' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -560,7 +557,7 @@ begin goto .compile_expression_end; end; if current_character = '%' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -569,7 +566,7 @@ begin goto .compile_expression_end; end; if current_character = '/' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -578,11 +575,11 @@ begin goto .compile_expression_end; end; if current_character = '<' then - _advance_token(1); + source_code_position := source_code_position + 1; current_character := _load_byte(source_code_position); if current_character = '>' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -591,7 +588,7 @@ begin goto .compile_expression_end; end; if current_character = '=' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -607,10 +604,10 @@ begin goto .compile_expression_end; end; if current_character = '>' then - _advance_token(1); + source_code_position := source_code_position + 1; current_character := _load_byte(source_code_position); if current_character = '=' then - _advance_token(1); + source_code_position := source_code_position + 1; _compile_binary_rhs(); (* Execute the operation. *) @@ -621,7 +618,7 @@ begin _compile_binary_rhs(); (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\n\0"); + _write_z("\tslt t0, t0, t1\n\0"); goto .compile_expression_end; end; @@ -635,16 +632,21 @@ var name: Word; argument_count: Word; stack_offset: Word; + token_kind: Word; begin - name_length := _lexer_read_token(); - name := source_code_position; + name_length := _lexer_read_token(@token_kind); + name := _lexer_global_start(); + name := _load_word(name); + name_length := _lexer_global_end(); + name_length := _load_word(name_length) + -name; argument_count := 0; (* Skip the identifier and left paren. *) - _advance_token(name_length + 1); + _lexer_skip_token(); + source_code_position := source_code_position + 1; if _load_byte(source_code_position) = ')' then - goto .compile_call_finalize + goto .compile_call_finalize; end; .compile_call_loop; _compile_expression(); @@ -664,7 +666,7 @@ begin if _load_byte(source_code_position) <> ',' then goto .compile_call_finalize; end; - _advance_token(2); + source_code_position := source_code_position + 2; goto .compile_call_loop; .compile_call_finalize; @@ -692,43 +694,47 @@ begin _write_s(name, name_length); (* Skip the right paren. *) - _advance_token(1); + source_code_position := source_code_position + 1; end; proc _compile_goto(); var next_token: Word; + token_kind: Word; begin - _advance_token(6); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + source_code_position := source_code_position + 2; - next_token := _lexer_read_token(); + next_token := _lexer_read_token(@token_kind); _write_z("\tj .\0"); _write_token(next_token); - _advance_token(next_token); + _lexer_skip_token(); end; -proc _compile_local_designator(symbol: Word, name_length: Word); +proc _compile_local_designator(symbol: Word); var variable_offset: Word; begin - _write_z("\taddi t0, sp, \0"); variable_offset := _parameter_info_get_offset(symbol); _write_i(variable_offset); _write_c('\n'); - _advance_token(name_length); + _lexer_skip_token(); end; proc _compile_global_designator(); var name: Word; + token_kind: Word; begin _write_z("\tla t0, \0"); - name := _lexer_read_token(); + name := _lexer_read_token(@token_kind); _write_token(name); - _advance_token(name); + _lexer_skip_token(); _write_c('\n'); end; @@ -737,12 +743,18 @@ proc _compile_designator(); var name_token: Word; lookup_result: Word; + token_kind: Word; + name: Word; begin - name_token := _lexer_read_token(); - lookup_result := _symbol_table_lookup(@symbol_table_local, source_code_position, name_token); + name_token := _lexer_read_token(@token_kind); + name := _lexer_global_start(); + name := _load_word(name); + name_token := _lexer_global_end(); + name_token := _load_word(name_token) + -name; + lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token); if lookup_result <> 0 then - _compile_local_designator(lookup_result, name_token); + _compile_local_designator(lookup_result); goto .compile_designator_end; end; _compile_global_designator(); @@ -758,7 +770,7 @@ begin _write_z("\tsw t0, 60(sp)\n\0"); (* Skip the assignment sign (:=) with surrounding whitespaces. *) - _advance_token(4); + source_code_position := source_code_position + 4; (* Compile the assignment. *) _compile_expression(); @@ -767,9 +779,13 @@ begin end; proc _compile_return_statement(); +var + token_kind: Word; begin (* Skip "return" keyword and whitespace after it. *) - _advance_token(7); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + source_code_position := source_code_position + 1; _compile_expression(); _write_z("\tmv a0, t0\n\0"); @@ -789,13 +805,18 @@ proc _compile_if(); var after_end_label: Word; condition_label: Word; + token_kind: Word; begin (* Skip "if ". *) - _advance_token(3); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + source_code_position := source_code_position + 1; + (* Compile condition. *) _compile_expression(); (* Skip " then" with newline. *) - _advance_token(6); + _lexer_read_token(@token_kind); + _lexer_skip_token(); after_end_label := label_counter; label_counter := label_counter + 1; @@ -817,20 +838,53 @@ begin _write_label(condition_label); _write_z(":\n\0"); - if _memcmp(source_code_position, "end", 3) = 0 then + .compile_if_loop; + + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_end() then goto .compile_if_end; end; - if _memcmp(source_code_position, "else", 3) = 0 then - goto .compile_if_else + if token_kind = _lexer_token_kind_else() then + goto .compile_if_else; + end; + if token_kind = _lexer_token_kind_elsif() then + goto .compile_if_elsif; end; + .compile_if_elsif; + _lexer_skip_token(); + source_code_position := source_code_position + 1; + + (* Compile condition. *) + _compile_expression(); + (* Skip " then" with newline. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + (* condition_label is the label in front of the next elsif condition or end. *) + condition_label := label_counter; + label_counter := label_counter + 1; + + _write_z("\tbeqz t0, \0"); + _write_label(condition_label); + _write_c('\n'); + + _compile_procedure_body(); + + _write_z("\tj \0"); + _write_label(after_end_label); + _write_c('\n'); + + _write_label(condition_label); + _write_z(":\n\0"); + + goto .compile_if_loop; + .compile_if_else; - (* Skip "else" and newline. *) - _advance_token(5); + _lexer_skip_token(); _compile_procedure_body(); .compile_if_end; - (* Skip "end". *) - _advance_token(3); + _lexer_skip_token(); _write_label(after_end_label); _write_z(":\n\0"); @@ -839,74 +893,77 @@ end; proc _compile_label_declaration(); var label_token: Word; + token_kind: Word; + name: Word; begin (* Skip the dot. *) - _advance_token(1); - label_token := _lexer_read_token(); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + label_token := _lexer_read_token(@token_kind); + name := _lexer_global_start(); + name := _load_word(name); _write_c('.'); - _write_s(source_code_position, label_token); + _write_s(name, label_token); _write_z(":\n\0"); - _advance_token(label_token); + _lexer_skip_token(); end; proc _compile_statement(); var current_byte: Word; + token_kind: Word; begin - _skip_spaces(); - current_byte := _load_byte(source_code_position); + _lexer_read_token(@token_kind); - (* This is a call if the statement starts with an underscore. *) - if current_byte = '_' then - _compile_call(); - goto .compile_statement_semicolon; - end; - if _memcmp(source_code_position, "goto ", 5) = 0 then + if token_kind = _lexer_token_kind_goto() then _compile_goto(); goto .compile_statement_semicolon; end; - if _memcmp(source_code_position, "if ", 3) = 0 then + if token_kind = _lexer_token_kind_if() then _compile_if(); goto .compile_statement_semicolon; end; - if _memcmp(source_code_position, "return ", 7) = 0 then + if token_kind = _lexer_token_kind_return() then _compile_return_statement(); - _write_c('\n'); - - goto .compile_statement_end; + goto .compile_statement_semicolon; end; - if current_byte = '.' then + if token_kind = _lexer_token_kind_dot() then _compile_label_declaration(); + goto .compile_statement_semicolon; + end; + if token_kind = _lexer_token_kind_identifier() then + current_byte := _lexer_global_start(); + current_byte := _load_word(current_byte); + current_byte := _load_byte(current_byte); + (* This is a call if the statement starts with an underscore. *) + if current_byte = '_' then + _compile_call(); + else + _compile_assignment(); + end; goto .compile_statement_semicolon; end; - _compile_assignment(); - goto .compile_statement_semicolon; .compile_statement_semicolon; - _advance_token(2); _write_c('\n'); - - .compile_statement_end; end; proc _compile_procedure_body(); var - lhs: Word; - rhs: Word; + token_kind: Word; begin .compile_procedure_body_loop; - _skip_empty_lines(); - _skip_spaces(); - lhs := _memcmp(source_code_position, "end", 3) = 0; - rhs := _memcmp(source_code_position, "else", 4) = 0; - lhs := lhs or rhs; + _skip_empty_lines(); + _compile_statement(); + _lexer_read_token(@token_kind); - if lhs = 0 then - _compile_statement(); + if token_kind = _lexer_token_kind_semicolon() then + _lexer_skip_token(); goto .compile_procedure_body_loop; end; + _skip_empty_lines(); end; (* Writes a regster name to the standard output. *) @@ -927,7 +984,7 @@ var begin current_byte := _load_byte(source_code_position); if current_byte = '\t' then - _advance_token(1); + source_code_position := source_code_position + 1; _skip_spaces(); end; end; @@ -935,9 +992,10 @@ end; proc _read_type_expression(); var type_name: Word; + token_kind: Word; begin - type_name := _lexer_read_token(); - _advance_token(type_name); + type_name := _lexer_read_token(@token_kind); + _lexer_skip_token(); end; (* Parameters: *) @@ -1009,14 +1067,15 @@ var name_length: Word; info: Word; name_position: Word; + token_kind: Word; begin (* Read the parameter name. *) name_position := source_code_position; - name_length := _lexer_read_token(); - _advance_token(name_length); + name_length := _lexer_read_token(@token_kind); + _lexer_skip_token(); (* Skip colon and space in front of the type expression. *) - _advance_token(2); + source_code_position := source_code_position + 2; _read_type_expression(); @@ -1038,7 +1097,7 @@ var parameter_counter: Word; begin (* Skip open paren. *) - _advance_token(1); + source_code_position := source_code_position + 1; parameter_counter := 0; .compile_procedure_prologue_skip; @@ -1047,12 +1106,12 @@ begin parameter_counter := parameter_counter + 1; if _load_byte(source_code_position) = ',' then - _advance_token(2); + source_code_position := source_code_position + 2; goto .compile_procedure_prologue_skip; end; end; (* Skip close paren. *) - _advance_token(1); + source_code_position := source_code_position + 1; end; (* Parameters: *) @@ -1062,13 +1121,15 @@ var name_length: Word; info: Word; name_position: Word; + token_kind: Word; begin _skip_spaces(); name_position := source_code_position; (* Read and skip variable name, colon and the space *) - name_length := _lexer_read_token(); - _advance_token(name_length + 2); + name_length := _lexer_read_token(@token_kind); + _lexer_skip_token(name_length); + source_code_position := source_code_position + 2; _read_type_expression(); @@ -1076,7 +1137,7 @@ begin _symbol_table_enter(@symbol_table_local, name_position, name_length, info); (* Skip semicolon and newline after the variable declaration *) - _advance_token(2); + source_code_position := source_code_position + 2; end; proc _read_procedure_temporaries(); @@ -1086,7 +1147,7 @@ begin if _memcmp(source_code_position, "var", 3) <> 0 then goto .read_local_variables_end; end; - _advance_token(4); + source_code_position := source_code_position + 4; temporary_counter := 0; .read_local_variables_loop; @@ -1104,13 +1165,14 @@ end; proc _compile_procedure(); var name_length: Word; + token_kind: Word; begin (* Skip "proc ". *) - _advance_token(5); + source_code_position := source_code_position + 5; (* Clear local symbol table. *) _store_word(0, @symbol_table_local); - name_length := _lexer_read_token(); + name_length := _lexer_read_token(@token_kind); (* Write .type _procedure_name, @function. *) _write_z(".type \0"); @@ -1123,16 +1185,16 @@ begin _write_z(":\n\0"); (* Skip procedure name. *) - _advance_token(name_length); + _lexer_skip_token(); _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); _read_procedure_parameters(); (* Skip semicolon and newline. *) - _advance_token(2); + source_code_position := source_code_position + 2; _read_procedure_temporaries(); (* Skip semicolon, "begin" and newline. *) - _advance_token(6); + source_code_position := source_code_position + 6; _compile_procedure_body(); @@ -1140,16 +1202,17 @@ begin _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); (* Skip the "end" keyword, semicolon and newline. *) - _advance_token(5); + source_code_position := source_code_position + 5; end; (* Prints and skips a line. *) proc _skip_comment(); var - new_position: Word; + token_kind: Word; begin - new_position := _lexer_read_token(); - _advance_token(new_position + 1); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + source_code_position := source_code_position + 1; end; (* Skip newlines and comments. *) @@ -1176,7 +1239,7 @@ begin current_byte := _load_byte(current_position + 1); if current_byte = '*' then - goto .skip_empty_lines_comment + goto .skip_empty_lines_comment; end; goto .skip_empty_lines_end; @@ -1191,7 +1254,7 @@ begin .skip_empty_lines_tab; current_position := current_position + 1; - goto .skip_empty_lines_loop + goto .skip_empty_lines_loop; .skip_empty_lines_end; end; @@ -1200,6 +1263,7 @@ proc _compile_global_initializer(); var current_byte: Word; length: Word; + token_kind: Word; begin current_byte := _load_byte(source_code_position); @@ -1211,13 +1275,14 @@ begin _write_i(); (* Skip the quoted string. *) - _advance_token(length + 2); + source_code_position := source_code_position + length; + source_code_position := source_code_position + 2; goto .compile_global_initializer_end; end; if current_byte = 'S' then (* Skip "S(". *) - _advance_token(2); + source_code_position := source_code_position + 2; if _load_byte(source_code_position) = ')' then goto .compile_global_initializer_closing; @@ -1226,19 +1291,19 @@ begin end; if current_byte = '@' then (* Skip @. *) - _advance_token(1); + source_code_position := source_code_position + 1; _write_z("\n\t.word \0"); - current_byte := _lexer_read_token(); + current_byte := _lexer_read_token(@token_kind); _write_token(current_byte); - _advance_token(current_byte); + _lexer_skip_token(); goto .compile_global_initializer_end; end; if _is_digit(current_byte) = 1 then _write_z("\n\t.word \0"); - current_byte := _lexer_read_token(); + current_byte := _lexer_read_token(@token_kind); _write_token(current_byte); - _advance_token(1); + source_code_position := source_code_position + 1; goto .compile_global_initializer_end; end; @@ -1248,14 +1313,14 @@ begin if _load_byte(source_code_position) <> ')' then (* Skip comma and whitespace after it. *) - _advance_token(2); + source_code_position := source_code_position + 2; goto .compile_global_initializer_loop; end; .compile_global_initializer_closing; (* Skip ")" *) - _advance_token(1); + source_code_position := source_code_position + 1; goto .compile_global_initializer_end; @@ -1265,8 +1330,9 @@ end; proc _compile_constant_declaration(); var name_length: Word; + token_kind: Word; begin - name_length := _lexer_read_token(); + name_length := _lexer_read_token(@token_kind); _write_z(".type \0"); _write_token(name_length); @@ -1276,22 +1342,26 @@ begin _write_c(':'); (* Skip the constant name with assignment sign and surrounding whitespaces. *) - _advance_token(name_length + 4); + _lexer_skip_token(); + source_code_position := source_code_position + 4; _compile_global_initializer(); (* Skip semicolon and newline. *) - _advance_token(2); + source_code_position := source_code_position + 2; _write_c('\n'); end; proc _compile_const_part(); +var + token_kind: Word; begin _skip_empty_lines(); + _lexer_read_token(@token_kind); - if _memcmp(source_code_position, "const\0", 5) <> 0 then + if token_kind <> _lexer_token_kind_const() then goto .compile_const_part_end; end; (* Skip "const" with the newline after it. *) - _advance_token(6); + _lexer_skip_token(); _write_z(".section .rodata # Compiled from const section.\n\n\0"); .compile_const_part_loop; @@ -1300,8 +1370,7 @@ begin (* If the character at the line beginning is not indentation, *) (* it is probably the next code section. *) if _load_byte(source_code_position) = '\t' then - _advance_token(1); - + source_code_position := source_code_position + 1; _compile_constant_declaration(); goto .compile_const_part_loop; end; @@ -1312,8 +1381,9 @@ end; proc _compile_variable_declaration(); var name_length: Word; + token_kind: Word; begin - name_length := _lexer_read_token(); + name_length := _lexer_read_token(@token_kind); _write_z(".type \0"); _write_token(name_length); @@ -1323,7 +1393,9 @@ begin _write_c(':'); (* Skip the variable name and colon with space before the type. *) - _advance_token(name_length + 2); + _lexer_skip_token(); + _lexer_read_token(@token_kind); + _lexer_skip_token(); _read_type_expression(); if _load_byte(source_code_position) <> ' ' then @@ -1331,32 +1403,34 @@ begin _write_z(" .zero 81920\0"); else (* Skip the assignment sign with surrounding whitespaces. *) - _advance_token(4); + source_code_position := source_code_position + 4; _compile_global_initializer(); end; (* Skip semicolon and newline. *) - _advance_token(2); + _lexer_read_token(@token_kind); + _lexer_skip_token(); _write_c('\n'); end; proc _compile_var_part(); var - current_character: Word; + token_kind: Word; begin - if _memcmp(source_code_position, "var\0", 3) <> 0 then + _lexer_read_token(@token_kind); + + if token_kind <> _lexer_token_kind_var() then goto .compile_var_part_end; end; (* Skip "var" and newline. *) - _advance_token(4); + _lexer_skip_token(); _write_z(".section .data\n\0"); .compile_var_part_loop; _skip_empty_lines(); - current_character := _load_byte(source_code_position); + _lexer_read_token(@token_kind); - if current_character = '\t' then - _advance_token(1); + if token_kind = _lexer_token_kind_identifier() then _compile_variable_declaration(); goto .compile_var_part_loop; end; @@ -2222,14 +2296,445 @@ begin return _lexer_get_transition(current_state, character_class) end; -proc _lexer_execute_action(action_to_perform: Word); +proc _lexer_token_kind_identifier(); +begin + return 1 +end; + +proc _lexer_token_kind_const(); +begin + return 2 +end; + +proc _lexer_token_kind_var(); +begin + return 3 +end; + +proc _lexer_token_kind_proc(); +begin + return 4 +end; + +proc _lexer_token_kind_type(); +begin + return 5 +end; + +proc _lexer_token_kind_begin(); +begin + return 6 +end; + +proc _lexer_token_kind_end(); +begin + return 7 +end; + +proc _lexer_token_kind_if(); +begin + return 8 +end; + +proc _lexer_token_kind_then(); +begin + return 9 +end; + +proc _lexer_token_kind_else(); +begin + return 10 +end; + +proc _lexer_token_kind_elsif(); +begin + return 11 +end; + +proc _lexer_token_kind_while(); +begin + return 12 +end; + +proc _lexer_token_kind_do(); +begin + return 13 +end; + +proc _lexer_token_kind_extern(); +begin + return 14 +end; + +proc _lexer_token_kind_record(); +begin + return 15 +end; + +proc _lexer_token_kind_union(); +begin + return 16 +end; + +proc _lexer_token_kind_true(); +begin + return 17 +end; + +proc _lexer_token_kind_false(); +begin + return 18 +end; + +proc _lexer_token_kind_nil(); +begin + return 19 +end; + +proc _lexer_token_kind_and(); +begin + return 20 +end; + +proc _lexer_token_kind_or(); +begin + return 21 +end; + +proc _lexer_token_kind_xor(); +begin + return 22 +end; + +proc _lexer_token_kind_pipe(); +begin + return 23 +end; + +proc _lexer_token_kind_not(); +begin + return 24 +end; + +proc _lexer_token_kind_return(); +begin + return 24 +end; + +proc _lexer_token_kind_module(); +begin + return 25 +end; + +proc _lexer_token_kind_program(); +begin + return 26 +end; + +proc _lexer_token_kind_import(); +begin + return 27 +end; + +proc _lexer_token_kind_cast(); +begin + return 28 +end; + +proc _lexer_token_kind_defer(); +begin + return 29 +end; + +proc _lexer_token_kind_case(); +begin + return 30 +end; + +proc _lexer_token_kind_of(); +begin + return 31 +end; + +proc _lexer_token_kind_trait(); +begin + return 32 +end; + +proc _lexer_token_kind_left_paren(); +begin + return 33 +end; + +proc _lexer_token_kind_right_paren(); +begin + return 34 +end; + +proc _lexer_token_kind_left_square(); +begin + return 35 +end; + +proc _lexer_token_kind_right_square(); +begin + return 36 +end; + +proc _lexer_token_kind_shift_left(); +begin + return 37 +end; + +proc _lexer_token_kind_shift_right(); +begin + return 38 +end; + +proc _lexer_token_kind_greater_equal(); +begin + return 39 +end; + +proc _lexer_token_kind_less_equal(); +begin + return 40 +end; + +proc _lexer_token_kind_greater_than(); +begin + return 41 +end; + +proc _lexer_token_kind_less_than(); +begin + return 42 +end; + +proc _lexer_token_kind_not_equal(); +begin + return 43 +end; + +proc _lexer_token_kind_equals(); +begin + return 44 +end; + +proc _lexer_token_kind_semicolon(); +begin + return 45 +end; + +proc _lexer_token_kind_dot(); +begin + return 46 +end; + +proc _lexer_token_kind_comma(); +begin + return 47 +end; + +proc _lexer_token_kind_plus(); +begin + return 48 +end; + +proc _lexer_token_kind_arrow(); +begin + return 49 +end; + +proc _lexer_token_kind_minus(); +begin + return 50 +end; + +proc _lexer_token_kind_multiplication(); +begin + return 51 +end; + +proc _lexer_token_kind_division(); +begin + return 52 +end; + +proc _lexer_token_kind_remainder(); +begin + return 53 +end; + +proc _lexer_token_kind_assignment(); +begin + return 54 +end; + +proc _lexer_token_kind_colon(); +begin + return 55 +end; + +proc _lexer_token_kind_hat(); +begin + return 56 +end; + +proc _lexer_token_kind_at(); +begin + return 57 +end; + +proc _lexer_token_kind_exclamation(); +begin + return 58 +end; + +proc _lexer_token_kind_string(); +begin + return 59 +end; + +proc _lexer_token_kind_character(); +begin + return 60 +end; + +proc _lexer_token_kind_integer(); +begin + return 61 +end; + +proc _lexer_token_kind_word(); +begin + return 62 +end; + +proc _lexer_token_kind_goto(); +begin + return 63 +end; + +proc _lexer_compare_keyword(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word); +var + result: Word; +begin + result := 0; + + if lhs_length = rhs_length then + result := _memcmp(lhs_pointer, rhs_pointer, lhs_length) = 0; + end; + return result +end; + +proc _lexer_classify_keyword(position_start: Word, position_end: Word); +var + result: Word; + token_length: Word; +begin + result := _lexer_token_kind_identifier(); + token_length := position_end + -position_start; + + if _lexer_compare_keyword(position_start, token_length, "const", 5) = 1 then + result := _lexer_token_kind_const(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "var", 3) = 1 then + result := _lexer_token_kind_var(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "proc", 4) = 1 then + result := _lexer_token_kind_proc(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "type", 4) = 1 then + result := _lexer_token_kind_type(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "begin", 5) = 1 then + result := _lexer_token_kind_begin(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "end", 3) = 1 then + result := _lexer_token_kind_end(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "return", 6) = 1 then + result := _lexer_token_kind_return(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "goto", 4) = 1 then + result := _lexer_token_kind_goto(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "if", 2) = 1 then + result := _lexer_token_kind_if(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "while", 5) = 1 then + result := _lexer_token_kind_while(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "then", 4) = 1 then + result := _lexer_token_kind_then(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "else", 4) = 1 then + result := _lexer_token_kind_else(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "elsif", 5) = 1 then + result := _lexer_token_kind_elsif(); + goto .lexer_classify_keyword_end; + end; + .lexer_classify_keyword_end; + return result +end; + +proc _lexer_classify_finalize(start_position: Word); +var + character: Word; + result: Word; +begin + result := 0; + character := _load_byte(start_position); + + if character = ':' then + result := _lexer_token_kind_colon(); + goto .lexer_classify_finalize_result; + end; + if character = '.' then + result := _lexer_token_kind_dot(); + goto .lexer_classify_finalize_result; + end; + .lexer_classify_finalize_result; + return result +end; + +proc _lexer_classify_single(start_position: Word); +var + character: Word; + result: Word; +begin + result := 0; + character := _load_byte(start_position); + + if character = ';' then + result := _lexer_token_kind_semicolon(); + end; + return result +end; + +proc _lexer_execute_action(action_to_perform: Word, kind: Word); var pointer_start: Word; pointer_end: Word; position_start: Word; position_end: Word; + intermediate: Word; begin - pointer_start := _lexer_global_end(); + pointer_start := _lexer_global_start(); position_start := _load_word(pointer_start); pointer_end := _lexer_global_end(); position_end := _load_word(pointer_end); @@ -2247,34 +2752,40 @@ begin goto .action_to_perform_end; end; if action_to_perform = _lexer_action_single() then + _store_word(position_end + 1, pointer_end); + + intermediate := _lexer_classify_single(position_start); + _store_word(intermediate, kind); goto .action_to_perform_end; end; if action_to_perform = _lexer_action_eof() then goto .action_to_perform_end; end; if action_to_perform = _lexer_action_finalize() then + intermediate := _lexer_classify_finalize(position_start); + _store_word(intermediate, kind); goto .action_to_perform_end; end; if action_to_perform = _lexer_action_composite() then goto .action_to_perform_end; end; if action_to_perform = _lexer_action_key_id() then - _store_word(position_end + 1, pointer_end); + intermediate := _lexer_classify_keyword(position_start, position_end); + _store_word(intermediate, kind); goto .action_to_perform_end; end; if action_to_perform = _lexer_action_integer() then - _store_word(position_end + 1, pointer_end); goto .action_to_perform_end; end; if action_to_perform = _lexer_action_delimited() then - _store_word(position_end + 1, pointer_end); + (* _store_word(position_end + 1, pointer_end); *) goto .action_to_perform_end; end; .action_to_perform_end; end; -proc _lexer_execute_transition(); +proc _lexer_execute_transition(kind: Word); var next_transition: Word; next_state: Word; @@ -2288,50 +2799,60 @@ begin global_state := _lexer_global_state(); _store_word(next_state, global_state); - _lexer_execute_action(action_to_perform); + _lexer_execute_action(action_to_perform, kind); return next_state end; -proc _lexer_advance_token(); -var - executed_transition: Word; +proc _lexer_advance_token(kind: Word); begin - .lexer_advance_token_loop; - executed_transition := _lexer_execute_transition(); - - if executed_transition <> _lexer_state_end() then - goto .lexer_advance_token_loop; + if _lexer_execute_transition(kind) <> _lexer_state_end() then + _lexer_advance_token(kind); end; end; (* Reads the next token. *) (* Returns token length in a0. *) -proc _lexer_read_token(); +proc _lexer_read_token(kind: Word); var new_position: Word; - token_end: Word; begin _lexer_reset(); - _lexer_advance_token(); + _lexer_advance_token(kind); new_position := _lexer_global_end(); - token_end := _load_word(new_position); - token_end := token_end + -source_code_position; + return _load_word(new_position) + -source_code_position +end; - return token_end + -1 +(* Advances the token stream past the last read token. *) +proc _lexer_skip_token(); +var + new_position: Word; +begin + new_position := _lexer_global_end(); + source_code_position := _load_word(new_position); end; (* Entry point. *) proc _start(); +var + last_read: Word; + offset: Wort; begin _lexer_initialize(); _symbol_table_build(); (* Read the source from the standard input. *) + offset := @source_code; + + .start_read; (* Second argument is buffer size. Modifying update the source_code definition. *) - _read_file(@source_code, 81920); + last_read := _read_file(offset, 81920); + if last_read > 0 then + offset := offset + last_read; + goto .start_read; + end; _compile(); _exit(0); |
