From d25c2d41d1203b285b59484a7c9e50c307dde7c9 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Wed, 8 Oct 2025 12:06:07 +0200 Subject: [PATCH] Extract simple expression parser --- boot/stage15.elna | 320 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 259 insertions(+), 61 deletions(-) diff --git a/boot/stage15.elna b/boot/stage15.elna index 6200fbd..012628c 100644 --- a/boot/stage15.elna +++ b/boot/stage15.elna @@ -128,10 +128,13 @@ type _goto, eof ); - NodeKind := ( + NodeKind = ( integer_literal, string_literal, - character_literal + character_literal, + variable_expression, + field_access_expression, + dereference_expression ); const @@ -623,21 +626,68 @@ begin _write_c('\n'); end; -proc _compile_variable_expression(); +proc _variable_expression_size(); + return 12 +end; + +proc _variable_expression_get_name(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _variable_expression_set_name(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _variable_expression_get_length(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _variable_expression_set_length(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _parse_variable_expression(); var name: Word; - lookup_result: Word; name_token: Word; + result: Word; begin name := _lexer_global_get_start(); name_token := _lexer_global_get_end() - name; - lookup_result := _symbol_table_lookup(@symbol_table_global, name, name_token); + _lexer_skip_token(); + result := memory_free_pointer; + memory_free_pointer := memory_free_pointer + _variable_expression_size(); + + _node_set_kind(result, NodeKind.variable_expression); + _variable_expression_set_name(result, name); + _variable_expression_set_length(result, name_token); + + return result +end; + +proc _compile_variable_expression(variable_expression: Word); +var + name: Word; + name_token: Word; + lookup_result: Word; +begin + name := _variable_expression_get_name(variable_expression); + name_token := _variable_expression_get_length(variable_expression); + + lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token); if lookup_result <> 0 then - _compile_enumeration_value(lookup_result) + _compile_local_designator(lookup_result) else - _compile_designator(); - _write_z("\tlw t0, (t0)\n\0") + _compile_global_designator(variable_expression) end end; @@ -645,9 +695,12 @@ end; * Compiled take address expression, starting with an "@" sign. *) proc _compile_address_expression(); +var + designator: Word; begin _lexer_skip_token(); - _compile_designator() + designator := _parse_designator(); + _compile_designator(designator) end; (** @@ -706,6 +759,7 @@ begin length := _string_length(token_start); _lexer_skip_token(); + _node_set_kind(result, NodeKind.string_literal); _string_literal_node_set_value(result, token_start); _string_literal_node_set_length(result, length); @@ -731,6 +785,99 @@ begin _write_z("\tadd t0, t0, t1\n\0") end; +proc _parse_simple_expression(); +var + current_character: Word; + parser_node: Word; + token_kind: Word; +begin + parser_node := 0; + _lexer_read_token(@token_kind); + + if token_kind = LexerTokenKind.character then + parser_node := _parse_character_literal() + elsif token_kind = LexerTokenKind.integer then + parser_node := _parse_integer_literal() + elsif token_kind = LexerTokenKind.string then + parser_node := _parse_string_literal() + elsif token_kind = LexerTokenKind.identifier then + current_character := _lexer_global_get_start(); + current_character := _load_byte(current_character); + + (* This is a call if the statement starts with an underscore. *) + if current_character <> '_' then + parser_node := _parse_variable_expression() + end + end; + return parser_node +end; + +proc _dereference_expression_size(); + return 8 +end; + +proc _dereference_expression_get_pointer(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _dereference_expression_set_pointer(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _parse_dereference_expression(simple_expression: Word); +var + result: Word; +begin + result := memory_free_pointer; + memory_free_pointer := memory_free_pointer + _dereference_expression_size(); + + _node_set_kind(result, NodeKind.dereference_expression); + _dereference_expression_set_pointer(result, simple_expression); + _lexer_skip_token(); + + return result +end; + +proc _parse_designator(); +var + simple_expression: Word; + token_kind: Word; +begin + simple_expression := _parse_simple_expression(); + + _lexer_read_token(@token_kind); + + if token_kind = LexerTokenKind.hat then + simple_expression := _parse_dereference_expression(simple_expression) + elsif token_kind = LexerTokenKind.dot then + simple_expression := _parse_field_access_expression(simple_expression) + end; + return simple_expression +end; + +proc _compile_simple_expression(parser_node: Word); +var + is_address: Word; +begin + is_address := 0; + + if _node_get_kind(parser_node) = NodeKind.character_literal then + _compile_character_literal(parser_node) + elsif _node_get_kind(parser_node) = NodeKind.string_literal then + _compile_string_literal(parser_node) + elsif _node_get_kind(parser_node) = NodeKind.integer_literal then + _compile_integer_literal(parser_node) + else + _compile_variable_expression(parser_node); + is_address := 1 + end; + return is_address +end; + proc _compile_term(); var current_character: Word; @@ -739,15 +886,11 @@ var begin _lexer_read_token(@token_kind); - if token_kind = LexerTokenKind.character then - parser_node := _parse_character_literal(); - _compile_character_literal(parser_node) - elsif token_kind = LexerTokenKind.string then - parser_node := _parse_string_literal(); - _compile_string_literal(parser_node) - elsif token_kind = LexerTokenKind.integer then - parser_node := _parse_integer_literal(); - _compile_integer_literal(parser_node) + parser_node := _parse_designator(); + if parser_node <> 0 then + if _compile_designator(parser_node) = 1 then + _write_z("\tlw t0, (t0) # Designator is an address.\n\0") + end elsif token_kind = LexerTokenKind.at then _compile_address_expression() elsif token_kind = LexerTokenKind.minus then @@ -755,16 +898,8 @@ begin elsif token_kind = LexerTokenKind.not then _compile_not_expression() elsif token_kind = LexerTokenKind.identifier then - current_character := _lexer_global_get_start(); - current_character := _load_byte(current_character); - - (* This is a call if the statement starts with an underscore. *) - if current_character = '_' then - _compile_call(); - _write_z("\tmv t0, a0\n\0") - else - _compile_variable_expression() - end + _compile_call(); + _write_z("\tmv t0, a0\n\0") end end; @@ -979,28 +1114,64 @@ begin _write_z("\taddi t0, sp, \0"); variable_offset := _parameter_info_get_offset(symbol); _write_i(variable_offset); - _write_c('\n'); - _lexer_skip_token() + _write_c('\n') end; -proc _compile_global_designator(); +proc _compile_global_designator(variable_expression: Word); var name: Word; - token_kind: Word; token_length: Word; begin _write_z("\tla t0, \0"); - _lexer_read_token(@token_kind); - name := _lexer_global_get_start(); - token_length := _lexer_global_get_end() - name; - _write_s(name, token_length); - _lexer_skip_token(); + name := _variable_expression_get_name(variable_expression); + token_length := _variable_expression_get_length(variable_expression); + _write_s(name, token_length); _write_c('\n') end; -proc _compile_enumeration_value(symbol: Word); +proc _field_access_expression_size(); + return 16 +end; + +proc _field_access_expression_get_aggregate(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _field_access_expression_set_aggregate(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _field_access_expression_get_field(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _field_access_expression_set_field(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _field_access_expression_get_length(this: Word); +begin + this := this + 12; + return this^ +end; + +proc _field_access_expression_set_length(this: Word, value: Word); +begin + this := this + 12; + this^ := value +end; + +proc _compile_enumeration_value(field_access_expression: Word); var enumeration_type: Word; members: Word; @@ -1011,20 +1182,22 @@ var member_name: Word; member_length: Word; counter: Word; + symbol: Word; begin + symbol := _field_access_expression_get_aggregate(field_access_expression); + value_name := _variable_expression_get_name(symbol); + name_length := _variable_expression_get_length(symbol); + + symbol := _symbol_table_lookup(@symbol_table_global, value_name, name_length); + enumeration_type := _type_info_get_type(symbol); members := _enumeration_type_get_members(enumeration_type); members_length := _enumeration_type_get_length(enumeration_type); - (* Skip enumeration type name and dot. Read the enumeration value. *) - _lexer_skip_token(); - _lexer_read_token(@token_type); - _lexer_skip_token(); _lexer_read_token(@token_type); - value_name := _lexer_global_get_start(); - name_length := _lexer_global_get_end() - value_name; - _lexer_skip_token(); + value_name := _field_access_expression_get_field(field_access_expression); + name_length := _field_access_expression_get_length(field_access_expression); counter := 1; .compile_enumeration_value_members; @@ -1044,35 +1217,60 @@ begin end end; -proc _compile_designator(); +proc _parse_field_access_expression(aggregate: Word); +var + token_kind: Word; + name: Word; + name_token: Word; + result: Word; +begin + (* Skip dot. Read the enumeration value. *) + _lexer_skip_token(); + _lexer_read_token(@token_kind); + + name := _lexer_global_get_start(); + name_token := _lexer_global_get_end() - name; + result := memory_free_pointer; + _lexer_skip_token(); + memory_free_pointer := memory_free_pointer + _field_access_expression_size(); + + _node_set_kind(result, NodeKind.field_access_expression); + _field_access_expression_set_aggregate(result, aggregate); + _field_access_expression_set_field(result, name); + _field_access_expression_set_length(result, name_token); + + return result +end; + +proc _compile_designator(parser_node: Word); var name_token: Word; lookup_result: Word; token_kind: Word; - name: Word; + parser_node: Word; + is_address: Word; begin - _lexer_read_token(@token_kind); - name := _lexer_global_get_start(); - name_token := _lexer_global_get_end() - name; - lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token); + is_address := 1; - if lookup_result <> 0 then - _compile_local_designator(lookup_result) - else - _compile_global_designator() - end; - _lexer_read_token(@token_kind); - if token_kind = LexerTokenKind.hat then - _lexer_skip_token(); + if _node_get_kind(parser_node) = NodeKind.dereference_expression then + parser_node := _dereference_expression_get_pointer(parser_node); + _compile_simple_expression(parser_node); _write_z("\tlw t0, (t0)\n\0") - end + elsif _node_get_kind(parser_node) = NodeKind.field_access_expression then + _compile_enumeration_value(parser_node); + is_address := 0 + else + is_address := _compile_simple_expression(parser_node) + end; + return is_address end; proc _compile_assignment(); var token_kind: Word; begin - _compile_designator(); + token_kind := _parse_designator(); + _compile_designator(token_kind); (* Save the assignee address on the stack. *) _write_z("\tsw t0, 60(sp)\n\0");