From fe431a4e9d7a82c93346d3f5688ac80577254b82 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Wed, 15 Oct 2025 23:55:36 +0200 Subject: [PATCH] Start parsing statements --- boot/stage14.elna | 6 +- boot/stage15.elna | 640 +++++++++++++++++++++++++++++++++------------- 2 files changed, 470 insertions(+), 176 deletions(-) diff --git a/boot/stage14.elna b/boot/stage14.elna index 9a6e18b..1afb501 100644 --- a/boot/stage14.elna +++ b/boot/stage14.elna @@ -1598,8 +1598,8 @@ begin _lexer_read_token(@token_kind); if token_kind <> _lexer_token_kind_assignment() then - (* Else we assume this is a zeroed 102400 bytes big array. *) - _write_z(" .zero 102400\0") + (* Else we assume this is a zeroed 204800 bytes big array. *) + _write_z(" .zero 204800\0") else (* Skip the assignment sign with surrounding whitespaces. *) _lexer_skip_token(); @@ -3038,7 +3038,7 @@ begin .start_read; (* Second argument is buffer size. Modifying update the source_code definition. *) - last_read := _read_file(offset, 102400); + last_read := _read_file(offset, 204800); if last_read > 0 then offset := offset + last_read; goto .start_read diff --git a/boot/stage15.elna b/boot/stage15.elna index 3d05d1b..32f2ca1 100644 --- a/boot/stage15.elna +++ b/boot/stage15.elna @@ -135,7 +135,13 @@ type variable_expression, field_access_expression, dereference_expression, - unary_expression + unary_expression, + binary_expression, + call, + goto_statement, + label_declaration, + return_statement, + assign_statement ); const @@ -554,7 +560,7 @@ begin integer_token := _lexer_global_get_start(); integer_length := _lexer_global_get_end(); - integer_length := integer_length - integer_token; + integer_length := integer_length - integer_token; _lexer_skip_token(); _node_set_kind(result, NodeKind.integer_literal); @@ -785,13 +791,7 @@ begin elsif token_kind = LexerTokenKind.string then parser_node := _parse_string_literal() elsif token_kind = LexerTokenKind.identifier then - current_character := _lexer_global_get_start(); - current_character := _load_byte(current_character); - - (* This is a call if the statement starts with an underscore. *) - if current_character <> '_' then - parser_node := _parse_variable_expression() - end + parser_node := _parse_variable_expression() end; return parser_node end; @@ -840,6 +840,8 @@ begin simple_expression := _parse_dereference_expression(simple_expression) elsif token_kind = LexerTokenKind.dot then simple_expression := _parse_field_access_expression(simple_expression) + elsif token_kind = LexerTokenKind.left_paren then + simple_expression := _parse_call(simple_expression) end; return simple_expression end; @@ -961,177 +963,288 @@ begin end end; -proc _compile_binary_rhs(); -var - parser_node: Word; -begin - (* Save the value of the left expression on the stack. *) - _write_z("\tsw t0, 64(sp)\n\0"); - parser_node := _parse_unary_expression(); - _compile_unary_expression(parser_node); - - (* Load the left expression from the stack; *) - _write_z("\tlw t1, 64(sp)\n\0") +proc _binary_expression_size(); + return 16 end; -proc _compile_expression(); -var - token_kind: Word; - current_byte: Word; - parser_node: Word; +proc _binary_expression_get_lhs(this: Word); begin - _lexer_read_token(@token_kind); - if token_kind = LexerTokenKind.identifier then - current_byte := _lexer_global_get_start(); - current_byte := _load_byte(current_byte); + this := this + 4; + return this^ +end; - if current_byte = '_' then - _compile_call(); - _write_z("\tmv t0, a0\n\0"); - goto compile_expression_end - end - end; - parser_node := _parse_unary_expression(); - _compile_unary_expression(parser_node); +proc _binary_expression_set_lhs(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; +proc _binary_expression_get_rhs(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _binary_expression_set_rhs(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _binary_expression_get_operator(this: Word); +begin + this := this + 12; + return this^ +end; + +proc _binary_expression_set_operator(this: Word, value: Word); +begin + this := this + 12; + this^ := value +end; + +proc _parse_binary_expression(); +var + lhs_node: Word; + rhs_node: Word; + token_kind: Word; + memory_size: Word; + result: Word; +begin + lhs_node := _parse_unary_expression(); + rhs_node := 0; _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.plus then _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tadd t0, t0, t1\n\0") + rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.minus then _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tsub t0, t1, t0\n\0"); + rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.multiplication then _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tmul t0, t0, t1\n\0") + rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.and then _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tand t0, t0, t1\n\0") + rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind._or then _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tor t0, t0, t1\n\0") + rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind._xor then _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\txor t0, t0, t1\n\0") + rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.equals then _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\txor t0, t0, t1\n\tseqz t0, t0\n\0") + rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.remainder then _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\trem t0, t1, t0\n\0") + rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.division then _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tdiv t0, t1, t0\n\0") + rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.less_than then _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\n\0") + rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.greater_than then _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t0, t1\n\0") + rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.less_equal then _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t0, t1\n\txori t0, t0, 1\n\0") + rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.not_equal then _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\txor t0, t0, t1\n\tsnez t0, t0\n\0") + rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.greater_equal then _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\n\txori t0, t0, 1\n\0") + rhs_node := _parse_unary_expression() end; + if rhs_node <> 0 then + memory_size := _binary_expression_size(); + result := _allocate(memory_size); - .compile_expression_end; + _node_set_kind(result, NodeKind.binary_expression); + _binary_expression_set_lhs(result, lhs_node); + _binary_expression_set_rhs(result, rhs_node); + _binary_expression_set_operator(result, token_kind) + else + result := lhs_node + end; + return result end; -proc _compile_call(); +proc _compile_binary_expression(parser_node: Word); +var + token_kind: Word; + expression_kind: Word; + operand_node: Word; +begin + expression_kind := _node_get_kind(parser_node); + + if expression_kind <> NodeKind.binary_expression then + _compile_unary_expression(parser_node) + else + token_kind := _binary_expression_get_operator(parser_node); + + operand_node := _binary_expression_get_lhs(parser_node); + _compile_unary_expression(operand_node); + (* Save the value of the left expression on the stack. *) + _write_z("\tsw t0, 64(sp)\n\0"); + + operand_node := _binary_expression_get_rhs(parser_node); + _compile_unary_expression(operand_node); + (* Load the left expression from the stack; *) + _write_z("\tlw t1, 64(sp)\n\0"); + + if token_kind = LexerTokenKind.plus then + _write_z("\tadd t0, t0, t1\n\0") + elsif token_kind = LexerTokenKind.minus then + _write_z("\tsub t0, t1, t0\n\0"); + elsif token_kind = LexerTokenKind.multiplication then + _write_z("\tmul t0, t0, t1\n\0") + elsif token_kind = LexerTokenKind.and then + _write_z("\tand t0, t0, t1\n\0") + elsif token_kind = LexerTokenKind._or then + _write_z("\tor t0, t0, t1\n\0") + elsif token_kind = LexerTokenKind._xor then + _write_z("\txor t0, t0, t1\n\0") + elsif token_kind = LexerTokenKind.equals then + _write_z("\txor t0, t0, t1\n\tseqz t0, t0\n\0") + elsif token_kind = LexerTokenKind.remainder then + _write_z("\trem t0, t1, t0\n\0") + elsif token_kind = LexerTokenKind.division then + _write_z("\tdiv t0, t1, t0\n\0") + elsif token_kind = LexerTokenKind.less_than then + _write_z("\tslt t0, t1, t0\n\0") + elsif token_kind = LexerTokenKind.greater_than then + _write_z("\tslt t0, t0, t1\n\0") + elsif token_kind = LexerTokenKind.less_equal then + _write_z("\tslt t0, t0, t1\n\txori t0, t0, 1\n\0") + elsif token_kind = LexerTokenKind.not_equal then + _write_z("\txor t0, t0, t1\n\tsnez t0, t0\n\0") + elsif token_kind = LexerTokenKind.greater_equal then + _write_z("\tslt t0, t1, t0\n\txori t0, t0, 1\n\0") + end + end +end; + +proc _compile_expression(); +var + parser_node: Word; +begin + parser_node := _parse_binary_expression(); + _compile_binary_expression(parser_node) +end; + +(* 4 bytes node kind + 4 byte pointer to variable expression + 4 * 7 for arguments. *) +proc _call_size(); + return 44 +end; + +proc _call_get_name(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _call_set_name(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _call_get_argument(this: Word, n: Word); +begin + n := n * 4; + this := this + 8; + this := this + n; + return this^ +end; + +proc _call_set_argument(this: Word, n: Word, value: Word); +begin + n := n * 4; + this := this + 8; + this := this + n; + this^ := value +end; + +proc _parse_call(callee: Word); +var + parsed_expression: Word; + result: Word; + argument_number: Word; + token_kind: Word; + call_size: Word; +begin + call_size := _call_size(); + result := _allocate(call_size); + _node_set_kind(result, NodeKind.call); + _statement_set_next(result, 0); + + argument_number := 1; + _call_set_name(result, callee); + + _lexer_read_token(@token_kind); + _lexer_skip_token(); + _lexer_read_token(@token_kind); + + if token_kind = LexerTokenKind.right_paren then + _lexer_skip_token(); + goto parse_call_end + end; + + .parse_call_loop; + parsed_expression := _parse_binary_expression(); + _call_set_argument(result, argument_number, parsed_expression); + argument_number := argument_number + 1; + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + if token_kind = LexerTokenKind.comma then + goto parse_call_loop + end; + + .parse_call_end; + (* Set the trailing argument to nil. *) + _call_set_argument(result, argument_number, 0); + + return result +end; + +proc _compile_call(parsed_call: Word); var name_length: Word; name: Word; argument_count: Word; stack_offset: Word; - token_kind: Word; + parsed_expression: Word; begin - _lexer_read_token(@token_kind); - name := _lexer_global_get_start(); - name_length := _lexer_global_get_end(); - name_length := name_length - name; + parsed_expression := _call_get_name(parsed_call); + name := _variable_expression_get_name(parsed_expression); + name_length := _variable_expression_get_length(parsed_expression); argument_count := 0; - (* Skip the identifier and left paren. *) - _lexer_skip_token(); - _lexer_read_token(@token_kind); - _lexer_skip_token(); - - _lexer_read_token(@token_kind); - if token_kind = LexerTokenKind.right_paren then - goto compile_call_finalize - end; .compile_call_loop; - _compile_expression(); - (* Save the argument on the stack. *) - _write_z("\tsw t0, \0"); - - (* Calculate the stack offset: 116 - (4 * argument_counter) *) - stack_offset := argument_count * 4; - _write_i(116 - stack_offset); - - _write_z("(sp)\n\0"); - - (* Add one to the argument counter. *) - argument_count := argument_count + 1; - - _lexer_read_token(@token_kind); - - if token_kind <> LexerTokenKind.comma then + parsed_expression := _call_get_argument(parsed_call, argument_count + 1); + if parsed_expression = 0 then goto compile_call_finalize + else + _compile_binary_expression(parsed_expression); + + (* Save the argument on the stack. *) + _write_z("\tsw t0, \0"); + + stack_offset := argument_count * 4; + _write_i(116 - stack_offset); + _write_z("(sp)\n\0"); + + argument_count := argument_count + 1; + goto compile_call_loop end; - _lexer_skip_token(); - goto compile_call_loop; .compile_call_finalize; + (* Load the argument from the stack. *) if argument_count <> 0 then (* Decrement the argument counter. *) @@ -1151,37 +1264,159 @@ begin goto compile_call_finalize end; - .compile_call_end; _write_z("\tcall \0"); _write_s(name, name_length); - _write_c('\n'); - - (* Skip the right paren. *) - _lexer_read_token(@token_kind); - _lexer_skip_token() + _write_c('\n') end; -proc _compile_goto(); +(** + * All statements are chained into a list. Next contains a pointer to the next + * statement in the statement list. + *) +proc _statement_get_next(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _statement_set_next(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _goto_statement_size(); + return 16 +end; + +proc _goto_statement_get_label(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _goto_statement_set_label(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _goto_statement_get_length(this: Word); +begin + this := this + 12; + return this^ +end; + +proc _goto_statement_set_length(this: Word, value: Word); +begin + this := this + 12; + this^ := value +end; + +proc _parse_goto_statement(); var - next_token: Word; - next_length: Word; token_kind: Word; + label_name: Word; + label_length: Word; + statement_size: Word; + result: Word; begin _lexer_skip_token(); _lexer_read_token(@token_kind); - if token_kind = LexerTokenKind.dot then - _lexer_skip_token(); - _lexer_read_token(@token_kind) - end; - next_token := _lexer_global_get_start(); - next_length := _lexer_global_get_end(); - next_length := next_length - next_token; + label_name := _lexer_global_get_start(); + label_length := _lexer_global_get_end() - label_name; + _lexer_skip_token(); + + statement_size := _goto_statement_size(); + result := _allocate(statement_size); + + _node_set_kind(result, NodeKind.goto_statement); + _statement_set_next(result, 0); + _goto_statement_set_label(result, label_name); + _goto_statement_set_length(result, label_length); + + return result +end; + +proc _compile_goto_statement(parser_node: Word); +var + label_name: Word; + label_length: Word; +begin + label_name := _goto_statement_get_label(parser_node); + label_length := _goto_statement_get_length(parser_node); _write_z("\tj .\0"); + _write_s(label_name, label_length); + _write_c('\n') +end; - _write_s(next_token, next_length); - _lexer_skip_token() +proc _label_declaration_size(); + return 16 +end; + +proc _label_declaration_get_label(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _label_declaration_set_label(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _label_declaration_get_length(this: Word); +begin + this := this + 12; + return this^ +end; + +proc _label_declaration_set_length(this: Word, value: Word); +begin + this := this + 12; + this^ := value +end; + +proc _parse_label_declaration(); +var + token_kind: Word; + label_name: Word; + label_length: Word; + statement_size: Word; + result: Word; +begin + _lexer_skip_token(); + _lexer_read_token(@token_kind); + + label_name := _lexer_global_get_start(); + label_length := _lexer_global_get_end() - label_name; + _lexer_skip_token(); + + statement_size := _label_declaration_size(); + result := _allocate(statement_size); + + _node_set_kind(result, NodeKind.label_declaration); + _statement_set_next(result, 0); + _goto_statement_set_label(result, label_name); + _goto_statement_set_length(result, label_length); + + return result +end; + +proc _compile_label_declaration(parser_node: Word); +var + label_name: Word; + label_length: Word; +begin + label_name := _goto_statement_get_label(parser_node); + label_length := _goto_statement_get_length(parser_node); + + _write_c('.'); + _write_s(label_name, label_length); + _write_z(":\n\0"); end; proc _compile_local_designator(symbol: Word); @@ -1341,12 +1576,44 @@ begin elsif node_kind = NodeKind.field_access_expression then _compile_enumeration_value(parser_node); is_address := 0 + elsif node_kind = NodeKind.call then + _compile_call(parser_node); + _write_z("\tmv t0, a0\n\0"); + is_address := 0 else is_address := _compile_simple_expression(parser_node) end; return is_address end; +proc _assignment_statement_size(); + return 16 +end; + +proc _assignment_statement_get_assignee(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _assignment_statement_set_assignee(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _assignment_statement_get_assignment(this: Word); +begin + this := this + 12; + return this^ +end; + +proc _assignment_statement_set_assignment(this: Word, value: Word); +begin + this := this + 12; + this^ := value +end; + proc _compile_assignment(); var token_kind: Word; @@ -1368,15 +1635,52 @@ begin _write_z("\tlw t1, 60(sp)\n\tsw t0, (t1)\n\0") end; -proc _compile_return_statement(); +proc _return_statement_size(); + return 12 +end; + +proc _return_statement_get_returned(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _return_statement_set_returned(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _parse_return_statement(); var token_kind: Word; + returned: Word; + label_length: Word; + statement_size: Word; + result: Word; begin (* Skip "return" keyword and whitespace after it. *) - _lexer_read_token(@token_kind); _lexer_skip_token(); + _lexer_read_token(@token_kind); - _compile_expression(); + returned := _parse_binary_expression(); + + statement_size := _return_statement_size(); + result := _allocate(statement_size); + + _node_set_kind(result, NodeKind.return_statement); + _statement_set_next(result, 0); + _return_statement_set_returned(result, returned); + + return result +end; + +proc _compile_return_statement(parser_node: Word); +var + return_expression: Word; +begin + return_expression := _return_statement_get_returned(parser_node); + _compile_binary_expression(return_expression); _write_z("\tmv a0, t0\n\0") end; @@ -1453,38 +1757,24 @@ begin _write_z(":\n\0") end; -proc _compile_label_declaration(); -var - label_token: Word; - token_kind: Word; - name: Word; -begin - (* Skip the dot. *) - _lexer_skip_token(); - _lexer_read_token(@token_kind); - name := _lexer_global_get_start(); - label_token := _lexer_global_get_end(); - label_token := label_token - name; - _write_c('.'); - _write_s(name, label_token); - _write_z(":\n\0"); - _lexer_skip_token() -end; - proc _compile_statement(); var current_byte: Word; token_kind: Word; + parsed_call: Word; begin _lexer_read_token(@token_kind); if token_kind = LexerTokenKind._goto then - _compile_goto() + parsed_call := _parse_goto_statement(); + _compile_goto_statement(parsed_call) elsif token_kind = LexerTokenKind._if then _compile_if() elsif token_kind = LexerTokenKind._return then - _compile_return_statement() + parsed_call := _parse_return_statement(); + _compile_return_statement(parsed_call) elsif token_kind = LexerTokenKind.dot then + parsed_call := _parse_goto_statement(); _compile_label_declaration() elsif token_kind = LexerTokenKind.identifier then current_byte := _lexer_global_get_start(); @@ -1492,7 +1782,9 @@ begin (* This is a call if the statement starts with an underscore. *) if current_byte = '_' then - _compile_call() + parsed_call := _parse_variable_expression(); + parsed_call := _parse_call(parsed_call); + _compile_call(parsed_call) else _compile_assignment() end @@ -1866,6 +2158,7 @@ var name_pointer: Word; name_length: Word; token_kind: Word; + parser_node: Word; begin (* Skip "proc ". *) _lexer_read_token(@token_kind); @@ -1905,7 +2198,8 @@ begin _lexer_skip_token(); _compile_statement_list() elsif token_kind = LexerTokenKind._return then - _compile_return_statement() + parser_node := _parse_return_statement(parser_node); + _compile_return_statement(parser_node) end; (* Write the epilogue. *) @@ -2143,8 +2437,8 @@ begin _lexer_read_token(@token_kind); if token_kind <> LexerTokenKind.assignment then - (* Else we assume this is a zeroed 102400 bytes big array. *) - _write_z(" .zero 102400\0") + (* Else we assume this is a zeroed 204800 bytes big array. *) + _write_z(" .zero 204800\0") else (* Skip the assignment sign with surrounding whitespaces. *) _lexer_skip_token(); @@ -3130,7 +3424,7 @@ begin .start_read; (* Second argument is buffer size. Modifying update the source_code definition. *) - last_read := _read_file(offset, 102400); + last_read := _read_file(offset, 204800); if last_read > 0 then offset := offset + last_read; goto start_read