Extract simple expression parser

This commit is contained in:
2025-10-08 12:06:07 +02:00
parent af9ad5b712
commit d25c2d41d1

View File

@@ -128,10 +128,13 @@ type
_goto, _goto,
eof eof
); );
NodeKind := ( NodeKind = (
integer_literal, integer_literal,
string_literal, string_literal,
character_literal character_literal,
variable_expression,
field_access_expression,
dereference_expression
); );
const const
@@ -623,21 +626,68 @@ begin
_write_c('\n'); _write_c('\n');
end; end;
proc _compile_variable_expression(); proc _variable_expression_size();
return 12
end;
proc _variable_expression_get_name(this: Word);
begin
this := this + 4;
return this^
end;
proc _variable_expression_set_name(this: Word, value: Word);
begin
this := this + 4;
this^ := value
end;
proc _variable_expression_get_length(this: Word);
begin
this := this + 8;
return this^
end;
proc _variable_expression_set_length(this: Word, value: Word);
begin
this := this + 8;
this^ := value
end;
proc _parse_variable_expression();
var var
name: Word; name: Word;
lookup_result: Word;
name_token: Word; name_token: Word;
result: Word;
begin begin
name := _lexer_global_get_start(); name := _lexer_global_get_start();
name_token := _lexer_global_get_end() - name; name_token := _lexer_global_get_end() - name;
lookup_result := _symbol_table_lookup(@symbol_table_global, name, name_token); _lexer_skip_token();
result := memory_free_pointer;
memory_free_pointer := memory_free_pointer + _variable_expression_size();
_node_set_kind(result, NodeKind.variable_expression);
_variable_expression_set_name(result, name);
_variable_expression_set_length(result, name_token);
return result
end;
proc _compile_variable_expression(variable_expression: Word);
var
name: Word;
name_token: Word;
lookup_result: Word;
begin
name := _variable_expression_get_name(variable_expression);
name_token := _variable_expression_get_length(variable_expression);
lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token);
if lookup_result <> 0 then if lookup_result <> 0 then
_compile_enumeration_value(lookup_result) _compile_local_designator(lookup_result)
else else
_compile_designator(); _compile_global_designator(variable_expression)
_write_z("\tlw t0, (t0)\n\0")
end end
end; end;
@@ -645,9 +695,12 @@ end;
* Compiled take address expression, starting with an "@" sign. * Compiled take address expression, starting with an "@" sign.
*) *)
proc _compile_address_expression(); proc _compile_address_expression();
var
designator: Word;
begin begin
_lexer_skip_token(); _lexer_skip_token();
_compile_designator() designator := _parse_designator();
_compile_designator(designator)
end; end;
(** (**
@@ -706,6 +759,7 @@ begin
length := _string_length(token_start); length := _string_length(token_start);
_lexer_skip_token(); _lexer_skip_token();
_node_set_kind(result, NodeKind.string_literal);
_string_literal_node_set_value(result, token_start); _string_literal_node_set_value(result, token_start);
_string_literal_node_set_length(result, length); _string_literal_node_set_length(result, length);
@@ -731,6 +785,99 @@ begin
_write_z("\tadd t0, t0, t1\n\0") _write_z("\tadd t0, t0, t1\n\0")
end; end;
proc _parse_simple_expression();
var
current_character: Word;
parser_node: Word;
token_kind: Word;
begin
parser_node := 0;
_lexer_read_token(@token_kind);
if token_kind = LexerTokenKind.character then
parser_node := _parse_character_literal()
elsif token_kind = LexerTokenKind.integer then
parser_node := _parse_integer_literal()
elsif token_kind = LexerTokenKind.string then
parser_node := _parse_string_literal()
elsif token_kind = LexerTokenKind.identifier then
current_character := _lexer_global_get_start();
current_character := _load_byte(current_character);
(* This is a call if the statement starts with an underscore. *)
if current_character <> '_' then
parser_node := _parse_variable_expression()
end
end;
return parser_node
end;
proc _dereference_expression_size();
return 8
end;
proc _dereference_expression_get_pointer(this: Word);
begin
this := this + 4;
return this^
end;
proc _dereference_expression_set_pointer(this: Word, value: Word);
begin
this := this + 4;
this^ := value
end;
proc _parse_dereference_expression(simple_expression: Word);
var
result: Word;
begin
result := memory_free_pointer;
memory_free_pointer := memory_free_pointer + _dereference_expression_size();
_node_set_kind(result, NodeKind.dereference_expression);
_dereference_expression_set_pointer(result, simple_expression);
_lexer_skip_token();
return result
end;
proc _parse_designator();
var
simple_expression: Word;
token_kind: Word;
begin
simple_expression := _parse_simple_expression();
_lexer_read_token(@token_kind);
if token_kind = LexerTokenKind.hat then
simple_expression := _parse_dereference_expression(simple_expression)
elsif token_kind = LexerTokenKind.dot then
simple_expression := _parse_field_access_expression(simple_expression)
end;
return simple_expression
end;
proc _compile_simple_expression(parser_node: Word);
var
is_address: Word;
begin
is_address := 0;
if _node_get_kind(parser_node) = NodeKind.character_literal then
_compile_character_literal(parser_node)
elsif _node_get_kind(parser_node) = NodeKind.string_literal then
_compile_string_literal(parser_node)
elsif _node_get_kind(parser_node) = NodeKind.integer_literal then
_compile_integer_literal(parser_node)
else
_compile_variable_expression(parser_node);
is_address := 1
end;
return is_address
end;
proc _compile_term(); proc _compile_term();
var var
current_character: Word; current_character: Word;
@@ -739,15 +886,11 @@ var
begin begin
_lexer_read_token(@token_kind); _lexer_read_token(@token_kind);
if token_kind = LexerTokenKind.character then parser_node := _parse_designator();
parser_node := _parse_character_literal(); if parser_node <> 0 then
_compile_character_literal(parser_node) if _compile_designator(parser_node) = 1 then
elsif token_kind = LexerTokenKind.string then _write_z("\tlw t0, (t0) # Designator is an address.\n\0")
parser_node := _parse_string_literal(); end
_compile_string_literal(parser_node)
elsif token_kind = LexerTokenKind.integer then
parser_node := _parse_integer_literal();
_compile_integer_literal(parser_node)
elsif token_kind = LexerTokenKind.at then elsif token_kind = LexerTokenKind.at then
_compile_address_expression() _compile_address_expression()
elsif token_kind = LexerTokenKind.minus then elsif token_kind = LexerTokenKind.minus then
@@ -755,16 +898,8 @@ begin
elsif token_kind = LexerTokenKind.not then elsif token_kind = LexerTokenKind.not then
_compile_not_expression() _compile_not_expression()
elsif token_kind = LexerTokenKind.identifier then elsif token_kind = LexerTokenKind.identifier then
current_character := _lexer_global_get_start(); _compile_call();
current_character := _load_byte(current_character); _write_z("\tmv t0, a0\n\0")
(* This is a call if the statement starts with an underscore. *)
if current_character = '_' then
_compile_call();
_write_z("\tmv t0, a0\n\0")
else
_compile_variable_expression()
end
end end
end; end;
@@ -979,28 +1114,64 @@ begin
_write_z("\taddi t0, sp, \0"); _write_z("\taddi t0, sp, \0");
variable_offset := _parameter_info_get_offset(symbol); variable_offset := _parameter_info_get_offset(symbol);
_write_i(variable_offset); _write_i(variable_offset);
_write_c('\n'); _write_c('\n')
_lexer_skip_token()
end; end;
proc _compile_global_designator(); proc _compile_global_designator(variable_expression: Word);
var var
name: Word; name: Word;
token_kind: Word;
token_length: Word; token_length: Word;
begin begin
_write_z("\tla t0, \0"); _write_z("\tla t0, \0");
_lexer_read_token(@token_kind); name := _variable_expression_get_name(variable_expression);
name := _lexer_global_get_start(); token_length := _variable_expression_get_length(variable_expression);
token_length := _lexer_global_get_end() - name;
_write_s(name, token_length);
_lexer_skip_token();
_write_s(name, token_length);
_write_c('\n') _write_c('\n')
end; end;
proc _compile_enumeration_value(symbol: Word); proc _field_access_expression_size();
return 16
end;
proc _field_access_expression_get_aggregate(this: Word);
begin
this := this + 4;
return this^
end;
proc _field_access_expression_set_aggregate(this: Word, value: Word);
begin
this := this + 4;
this^ := value
end;
proc _field_access_expression_get_field(this: Word);
begin
this := this + 8;
return this^
end;
proc _field_access_expression_set_field(this: Word, value: Word);
begin
this := this + 8;
this^ := value
end;
proc _field_access_expression_get_length(this: Word);
begin
this := this + 12;
return this^
end;
proc _field_access_expression_set_length(this: Word, value: Word);
begin
this := this + 12;
this^ := value
end;
proc _compile_enumeration_value(field_access_expression: Word);
var var
enumeration_type: Word; enumeration_type: Word;
members: Word; members: Word;
@@ -1011,20 +1182,22 @@ var
member_name: Word; member_name: Word;
member_length: Word; member_length: Word;
counter: Word; counter: Word;
symbol: Word;
begin begin
symbol := _field_access_expression_get_aggregate(field_access_expression);
value_name := _variable_expression_get_name(symbol);
name_length := _variable_expression_get_length(symbol);
symbol := _symbol_table_lookup(@symbol_table_global, value_name, name_length);
enumeration_type := _type_info_get_type(symbol); enumeration_type := _type_info_get_type(symbol);
members := _enumeration_type_get_members(enumeration_type); members := _enumeration_type_get_members(enumeration_type);
members_length := _enumeration_type_get_length(enumeration_type); members_length := _enumeration_type_get_length(enumeration_type);
(* Skip enumeration type name and dot. Read the enumeration value. *)
_lexer_skip_token();
_lexer_read_token(@token_type);
_lexer_skip_token();
_lexer_read_token(@token_type); _lexer_read_token(@token_type);
value_name := _lexer_global_get_start(); value_name := _field_access_expression_get_field(field_access_expression);
name_length := _lexer_global_get_end() - value_name; name_length := _field_access_expression_get_length(field_access_expression);
_lexer_skip_token();
counter := 1; counter := 1;
.compile_enumeration_value_members; .compile_enumeration_value_members;
@@ -1044,35 +1217,60 @@ begin
end end
end; end;
proc _compile_designator(); proc _parse_field_access_expression(aggregate: Word);
var
token_kind: Word;
name: Word;
name_token: Word;
result: Word;
begin
(* Skip dot. Read the enumeration value. *)
_lexer_skip_token();
_lexer_read_token(@token_kind);
name := _lexer_global_get_start();
name_token := _lexer_global_get_end() - name;
result := memory_free_pointer;
_lexer_skip_token();
memory_free_pointer := memory_free_pointer + _field_access_expression_size();
_node_set_kind(result, NodeKind.field_access_expression);
_field_access_expression_set_aggregate(result, aggregate);
_field_access_expression_set_field(result, name);
_field_access_expression_set_length(result, name_token);
return result
end;
proc _compile_designator(parser_node: Word);
var var
name_token: Word; name_token: Word;
lookup_result: Word; lookup_result: Word;
token_kind: Word; token_kind: Word;
name: Word; parser_node: Word;
is_address: Word;
begin begin
_lexer_read_token(@token_kind); is_address := 1;
name := _lexer_global_get_start();
name_token := _lexer_global_get_end() - name;
lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token);
if lookup_result <> 0 then if _node_get_kind(parser_node) = NodeKind.dereference_expression then
_compile_local_designator(lookup_result) parser_node := _dereference_expression_get_pointer(parser_node);
else _compile_simple_expression(parser_node);
_compile_global_designator()
end;
_lexer_read_token(@token_kind);
if token_kind = LexerTokenKind.hat then
_lexer_skip_token();
_write_z("\tlw t0, (t0)\n\0") _write_z("\tlw t0, (t0)\n\0")
end elsif _node_get_kind(parser_node) = NodeKind.field_access_expression then
_compile_enumeration_value(parser_node);
is_address := 0
else
is_address := _compile_simple_expression(parser_node)
end;
return is_address
end; end;
proc _compile_assignment(); proc _compile_assignment();
var var
token_kind: Word; token_kind: Word;
begin begin
_compile_designator(); token_kind := _parse_designator();
_compile_designator(token_kind);
(* Save the assignee address on the stack. *) (* Save the assignee address on the stack. *)
_write_z("\tsw t0, 60(sp)\n\0"); _write_z("\tsw t0, 60(sp)\n\0");