Extract simple expression parser

This commit is contained in:
2025-10-08 12:06:07 +02:00
parent af9ad5b712
commit d25c2d41d1

View File

@@ -128,10 +128,13 @@ type
_goto,
eof
);
NodeKind := (
NodeKind = (
integer_literal,
string_literal,
character_literal
character_literal,
variable_expression,
field_access_expression,
dereference_expression
);
const
@@ -623,21 +626,68 @@ begin
_write_c('\n');
end;
proc _compile_variable_expression();
proc _variable_expression_size();
return 12
end;
proc _variable_expression_get_name(this: Word);
begin
this := this + 4;
return this^
end;
proc _variable_expression_set_name(this: Word, value: Word);
begin
this := this + 4;
this^ := value
end;
proc _variable_expression_get_length(this: Word);
begin
this := this + 8;
return this^
end;
proc _variable_expression_set_length(this: Word, value: Word);
begin
this := this + 8;
this^ := value
end;
proc _parse_variable_expression();
var
name: Word;
lookup_result: Word;
name_token: Word;
result: Word;
begin
name := _lexer_global_get_start();
name_token := _lexer_global_get_end() - name;
lookup_result := _symbol_table_lookup(@symbol_table_global, name, name_token);
_lexer_skip_token();
result := memory_free_pointer;
memory_free_pointer := memory_free_pointer + _variable_expression_size();
_node_set_kind(result, NodeKind.variable_expression);
_variable_expression_set_name(result, name);
_variable_expression_set_length(result, name_token);
return result
end;
proc _compile_variable_expression(variable_expression: Word);
var
name: Word;
name_token: Word;
lookup_result: Word;
begin
name := _variable_expression_get_name(variable_expression);
name_token := _variable_expression_get_length(variable_expression);
lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token);
if lookup_result <> 0 then
_compile_enumeration_value(lookup_result)
_compile_local_designator(lookup_result)
else
_compile_designator();
_write_z("\tlw t0, (t0)\n\0")
_compile_global_designator(variable_expression)
end
end;
@@ -645,9 +695,12 @@ end;
* Compiled take address expression, starting with an "@" sign.
*)
proc _compile_address_expression();
var
designator: Word;
begin
_lexer_skip_token();
_compile_designator()
designator := _parse_designator();
_compile_designator(designator)
end;
(**
@@ -706,6 +759,7 @@ begin
length := _string_length(token_start);
_lexer_skip_token();
_node_set_kind(result, NodeKind.string_literal);
_string_literal_node_set_value(result, token_start);
_string_literal_node_set_length(result, length);
@@ -731,6 +785,99 @@ begin
_write_z("\tadd t0, t0, t1\n\0")
end;
proc _parse_simple_expression();
var
current_character: Word;
parser_node: Word;
token_kind: Word;
begin
parser_node := 0;
_lexer_read_token(@token_kind);
if token_kind = LexerTokenKind.character then
parser_node := _parse_character_literal()
elsif token_kind = LexerTokenKind.integer then
parser_node := _parse_integer_literal()
elsif token_kind = LexerTokenKind.string then
parser_node := _parse_string_literal()
elsif token_kind = LexerTokenKind.identifier then
current_character := _lexer_global_get_start();
current_character := _load_byte(current_character);
(* This is a call if the statement starts with an underscore. *)
if current_character <> '_' then
parser_node := _parse_variable_expression()
end
end;
return parser_node
end;
proc _dereference_expression_size();
return 8
end;
proc _dereference_expression_get_pointer(this: Word);
begin
this := this + 4;
return this^
end;
proc _dereference_expression_set_pointer(this: Word, value: Word);
begin
this := this + 4;
this^ := value
end;
proc _parse_dereference_expression(simple_expression: Word);
var
result: Word;
begin
result := memory_free_pointer;
memory_free_pointer := memory_free_pointer + _dereference_expression_size();
_node_set_kind(result, NodeKind.dereference_expression);
_dereference_expression_set_pointer(result, simple_expression);
_lexer_skip_token();
return result
end;
proc _parse_designator();
var
simple_expression: Word;
token_kind: Word;
begin
simple_expression := _parse_simple_expression();
_lexer_read_token(@token_kind);
if token_kind = LexerTokenKind.hat then
simple_expression := _parse_dereference_expression(simple_expression)
elsif token_kind = LexerTokenKind.dot then
simple_expression := _parse_field_access_expression(simple_expression)
end;
return simple_expression
end;
proc _compile_simple_expression(parser_node: Word);
var
is_address: Word;
begin
is_address := 0;
if _node_get_kind(parser_node) = NodeKind.character_literal then
_compile_character_literal(parser_node)
elsif _node_get_kind(parser_node) = NodeKind.string_literal then
_compile_string_literal(parser_node)
elsif _node_get_kind(parser_node) = NodeKind.integer_literal then
_compile_integer_literal(parser_node)
else
_compile_variable_expression(parser_node);
is_address := 1
end;
return is_address
end;
proc _compile_term();
var
current_character: Word;
@@ -739,15 +886,11 @@ var
begin
_lexer_read_token(@token_kind);
if token_kind = LexerTokenKind.character then
parser_node := _parse_character_literal();
_compile_character_literal(parser_node)
elsif token_kind = LexerTokenKind.string then
parser_node := _parse_string_literal();
_compile_string_literal(parser_node)
elsif token_kind = LexerTokenKind.integer then
parser_node := _parse_integer_literal();
_compile_integer_literal(parser_node)
parser_node := _parse_designator();
if parser_node <> 0 then
if _compile_designator(parser_node) = 1 then
_write_z("\tlw t0, (t0) # Designator is an address.\n\0")
end
elsif token_kind = LexerTokenKind.at then
_compile_address_expression()
elsif token_kind = LexerTokenKind.minus then
@@ -755,16 +898,8 @@ begin
elsif token_kind = LexerTokenKind.not then
_compile_not_expression()
elsif token_kind = LexerTokenKind.identifier then
current_character := _lexer_global_get_start();
current_character := _load_byte(current_character);
(* This is a call if the statement starts with an underscore. *)
if current_character = '_' then
_compile_call();
_write_z("\tmv t0, a0\n\0")
else
_compile_variable_expression()
end
_compile_call();
_write_z("\tmv t0, a0\n\0")
end
end;
@@ -979,28 +1114,64 @@ begin
_write_z("\taddi t0, sp, \0");
variable_offset := _parameter_info_get_offset(symbol);
_write_i(variable_offset);
_write_c('\n');
_lexer_skip_token()
_write_c('\n')
end;
proc _compile_global_designator();
proc _compile_global_designator(variable_expression: Word);
var
name: Word;
token_kind: Word;
token_length: Word;
begin
_write_z("\tla t0, \0");
_lexer_read_token(@token_kind);
name := _lexer_global_get_start();
token_length := _lexer_global_get_end() - name;
_write_s(name, token_length);
_lexer_skip_token();
name := _variable_expression_get_name(variable_expression);
token_length := _variable_expression_get_length(variable_expression);
_write_s(name, token_length);
_write_c('\n')
end;
proc _compile_enumeration_value(symbol: Word);
proc _field_access_expression_size();
return 16
end;
proc _field_access_expression_get_aggregate(this: Word);
begin
this := this + 4;
return this^
end;
proc _field_access_expression_set_aggregate(this: Word, value: Word);
begin
this := this + 4;
this^ := value
end;
proc _field_access_expression_get_field(this: Word);
begin
this := this + 8;
return this^
end;
proc _field_access_expression_set_field(this: Word, value: Word);
begin
this := this + 8;
this^ := value
end;
proc _field_access_expression_get_length(this: Word);
begin
this := this + 12;
return this^
end;
proc _field_access_expression_set_length(this: Word, value: Word);
begin
this := this + 12;
this^ := value
end;
proc _compile_enumeration_value(field_access_expression: Word);
var
enumeration_type: Word;
members: Word;
@@ -1011,20 +1182,22 @@ var
member_name: Word;
member_length: Word;
counter: Word;
symbol: Word;
begin
symbol := _field_access_expression_get_aggregate(field_access_expression);
value_name := _variable_expression_get_name(symbol);
name_length := _variable_expression_get_length(symbol);
symbol := _symbol_table_lookup(@symbol_table_global, value_name, name_length);
enumeration_type := _type_info_get_type(symbol);
members := _enumeration_type_get_members(enumeration_type);
members_length := _enumeration_type_get_length(enumeration_type);
(* Skip enumeration type name and dot. Read the enumeration value. *)
_lexer_skip_token();
_lexer_read_token(@token_type);
_lexer_skip_token();
_lexer_read_token(@token_type);
value_name := _lexer_global_get_start();
name_length := _lexer_global_get_end() - value_name;
_lexer_skip_token();
value_name := _field_access_expression_get_field(field_access_expression);
name_length := _field_access_expression_get_length(field_access_expression);
counter := 1;
.compile_enumeration_value_members;
@@ -1044,35 +1217,60 @@ begin
end
end;
proc _compile_designator();
proc _parse_field_access_expression(aggregate: Word);
var
token_kind: Word;
name: Word;
name_token: Word;
result: Word;
begin
(* Skip dot. Read the enumeration value. *)
_lexer_skip_token();
_lexer_read_token(@token_kind);
name := _lexer_global_get_start();
name_token := _lexer_global_get_end() - name;
result := memory_free_pointer;
_lexer_skip_token();
memory_free_pointer := memory_free_pointer + _field_access_expression_size();
_node_set_kind(result, NodeKind.field_access_expression);
_field_access_expression_set_aggregate(result, aggregate);
_field_access_expression_set_field(result, name);
_field_access_expression_set_length(result, name_token);
return result
end;
proc _compile_designator(parser_node: Word);
var
name_token: Word;
lookup_result: Word;
token_kind: Word;
name: Word;
parser_node: Word;
is_address: Word;
begin
_lexer_read_token(@token_kind);
name := _lexer_global_get_start();
name_token := _lexer_global_get_end() - name;
lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token);
is_address := 1;
if lookup_result <> 0 then
_compile_local_designator(lookup_result)
else
_compile_global_designator()
end;
_lexer_read_token(@token_kind);
if token_kind = LexerTokenKind.hat then
_lexer_skip_token();
if _node_get_kind(parser_node) = NodeKind.dereference_expression then
parser_node := _dereference_expression_get_pointer(parser_node);
_compile_simple_expression(parser_node);
_write_z("\tlw t0, (t0)\n\0")
end
elsif _node_get_kind(parser_node) = NodeKind.field_access_expression then
_compile_enumeration_value(parser_node);
is_address := 0
else
is_address := _compile_simple_expression(parser_node)
end;
return is_address
end;
proc _compile_assignment();
var
token_kind: Word;
begin
_compile_designator();
token_kind := _parse_designator();
_compile_designator(token_kind);
(* Save the assignee address on the stack. *)
_write_z("\tsw t0, 60(sp)\n\0");