Start parsing statements

This commit is contained in:
2025-10-15 23:55:36 +02:00
parent c6531ba398
commit fe431a4e9d
2 changed files with 470 additions and 176 deletions

View File

@@ -1598,8 +1598,8 @@ begin
_lexer_read_token(@token_kind);
if token_kind <> _lexer_token_kind_assignment() then
(* Else we assume this is a zeroed 102400 bytes big array. *)
_write_z(" .zero 102400\0")
(* Else we assume this is a zeroed 204800 bytes big array. *)
_write_z(" .zero 204800\0")
else
(* Skip the assignment sign with surrounding whitespaces. *)
_lexer_skip_token();
@@ -3038,7 +3038,7 @@ begin
.start_read;
(* Second argument is buffer size. Modifying update the source_code definition. *)
last_read := _read_file(offset, 102400);
last_read := _read_file(offset, 204800);
if last_read > 0 then
offset := offset + last_read;
goto .start_read

View File

@@ -135,7 +135,13 @@ type
variable_expression,
field_access_expression,
dereference_expression,
unary_expression
unary_expression,
binary_expression,
call,
goto_statement,
label_declaration,
return_statement,
assign_statement
);
const
@@ -785,13 +791,7 @@ begin
elsif token_kind = LexerTokenKind.string then
parser_node := _parse_string_literal()
elsif token_kind = LexerTokenKind.identifier then
current_character := _lexer_global_get_start();
current_character := _load_byte(current_character);
(* This is a call if the statement starts with an underscore. *)
if current_character <> '_' then
parser_node := _parse_variable_expression()
end
end;
return parser_node
end;
@@ -840,6 +840,8 @@ begin
simple_expression := _parse_dereference_expression(simple_expression)
elsif token_kind = LexerTokenKind.dot then
simple_expression := _parse_field_access_expression(simple_expression)
elsif token_kind = LexerTokenKind.left_paren then
simple_expression := _parse_call(simple_expression)
end;
return simple_expression
end;
@@ -961,177 +963,288 @@ begin
end
end;
proc _compile_binary_rhs();
var
parser_node: Word;
begin
(* Save the value of the left expression on the stack. *)
_write_z("\tsw t0, 64(sp)\n\0");
parser_node := _parse_unary_expression();
_compile_unary_expression(parser_node);
(* Load the left expression from the stack; *)
_write_z("\tlw t1, 64(sp)\n\0")
proc _binary_expression_size();
return 16
end;
proc _compile_expression();
proc _binary_expression_get_lhs(this: Word);
begin
this := this + 4;
return this^
end;
proc _binary_expression_set_lhs(this: Word, value: Word);
begin
this := this + 4;
this^ := value
end;
proc _binary_expression_get_rhs(this: Word);
begin
this := this + 8;
return this^
end;
proc _binary_expression_set_rhs(this: Word, value: Word);
begin
this := this + 8;
this^ := value
end;
proc _binary_expression_get_operator(this: Word);
begin
this := this + 12;
return this^
end;
proc _binary_expression_set_operator(this: Word, value: Word);
begin
this := this + 12;
this^ := value
end;
proc _parse_binary_expression();
var
lhs_node: Word;
rhs_node: Word;
token_kind: Word;
current_byte: Word;
parser_node: Word;
memory_size: Word;
result: Word;
begin
_lexer_read_token(@token_kind);
if token_kind = LexerTokenKind.identifier then
current_byte := _lexer_global_get_start();
current_byte := _load_byte(current_byte);
if current_byte = '_' then
_compile_call();
_write_z("\tmv t0, a0\n\0");
goto compile_expression_end
end
end;
parser_node := _parse_unary_expression();
_compile_unary_expression(parser_node);
lhs_node := _parse_unary_expression();
rhs_node := 0;
_lexer_read_token(@token_kind);
if token_kind = LexerTokenKind.plus then
_lexer_skip_token();
_compile_binary_rhs();
(* Execute the operation. *)
_write_z("\tadd t0, t0, t1\n\0")
rhs_node := _parse_unary_expression()
elsif token_kind = LexerTokenKind.minus then
_lexer_skip_token();
_compile_binary_rhs();
(* Execute the operation. *)
_write_z("\tsub t0, t1, t0\n\0");
rhs_node := _parse_unary_expression()
elsif token_kind = LexerTokenKind.multiplication then
_lexer_skip_token();
_compile_binary_rhs();
(* Execute the operation. *)
_write_z("\tmul t0, t0, t1\n\0")
rhs_node := _parse_unary_expression()
elsif token_kind = LexerTokenKind.and then
_lexer_skip_token();
_compile_binary_rhs();
(* Execute the operation. *)
_write_z("\tand t0, t0, t1\n\0")
rhs_node := _parse_unary_expression()
elsif token_kind = LexerTokenKind._or then
_lexer_skip_token();
_compile_binary_rhs();
(* Execute the operation. *)
_write_z("\tor t0, t0, t1\n\0")
rhs_node := _parse_unary_expression()
elsif token_kind = LexerTokenKind._xor then
_lexer_skip_token();
_compile_binary_rhs();
(* Execute the operation. *)
_write_z("\txor t0, t0, t1\n\0")
rhs_node := _parse_unary_expression()
elsif token_kind = LexerTokenKind.equals then
_lexer_skip_token();
_compile_binary_rhs();
(* Execute the operation. *)
_write_z("\txor t0, t0, t1\n\tseqz t0, t0\n\0")
rhs_node := _parse_unary_expression()
elsif token_kind = LexerTokenKind.remainder then
_lexer_skip_token();
_compile_binary_rhs();
(* Execute the operation. *)
_write_z("\trem t0, t1, t0\n\0")
rhs_node := _parse_unary_expression()
elsif token_kind = LexerTokenKind.division then
_lexer_skip_token();
_compile_binary_rhs();
(* Execute the operation. *)
_write_z("\tdiv t0, t1, t0\n\0")
rhs_node := _parse_unary_expression()
elsif token_kind = LexerTokenKind.less_than then
_lexer_skip_token();
_compile_binary_rhs();
(* Execute the operation. *)
_write_z("\tslt t0, t1, t0\n\0")
rhs_node := _parse_unary_expression()
elsif token_kind = LexerTokenKind.greater_than then
_lexer_skip_token();
_compile_binary_rhs();
(* Execute the operation. *)
_write_z("\tslt t0, t0, t1\n\0")
rhs_node := _parse_unary_expression()
elsif token_kind = LexerTokenKind.less_equal then
_lexer_skip_token();
_compile_binary_rhs();
(* Execute the operation. *)
_write_z("\tslt t0, t0, t1\n\txori t0, t0, 1\n\0")
rhs_node := _parse_unary_expression()
elsif token_kind = LexerTokenKind.not_equal then
_lexer_skip_token();
_compile_binary_rhs();
(* Execute the operation. *)
_write_z("\txor t0, t0, t1\n\tsnez t0, t0\n\0")
rhs_node := _parse_unary_expression()
elsif token_kind = LexerTokenKind.greater_equal then
_lexer_skip_token();
_compile_binary_rhs();
rhs_node := _parse_unary_expression()
end;
if rhs_node <> 0 then
memory_size := _binary_expression_size();
result := _allocate(memory_size);
(* Execute the operation. *)
_node_set_kind(result, NodeKind.binary_expression);
_binary_expression_set_lhs(result, lhs_node);
_binary_expression_set_rhs(result, rhs_node);
_binary_expression_set_operator(result, token_kind)
else
result := lhs_node
end;
return result
end;
proc _compile_binary_expression(parser_node: Word);
var
token_kind: Word;
expression_kind: Word;
operand_node: Word;
begin
expression_kind := _node_get_kind(parser_node);
if expression_kind <> NodeKind.binary_expression then
_compile_unary_expression(parser_node)
else
token_kind := _binary_expression_get_operator(parser_node);
operand_node := _binary_expression_get_lhs(parser_node);
_compile_unary_expression(operand_node);
(* Save the value of the left expression on the stack. *)
_write_z("\tsw t0, 64(sp)\n\0");
operand_node := _binary_expression_get_rhs(parser_node);
_compile_unary_expression(operand_node);
(* Load the left expression from the stack; *)
_write_z("\tlw t1, 64(sp)\n\0");
if token_kind = LexerTokenKind.plus then
_write_z("\tadd t0, t0, t1\n\0")
elsif token_kind = LexerTokenKind.minus then
_write_z("\tsub t0, t1, t0\n\0");
elsif token_kind = LexerTokenKind.multiplication then
_write_z("\tmul t0, t0, t1\n\0")
elsif token_kind = LexerTokenKind.and then
_write_z("\tand t0, t0, t1\n\0")
elsif token_kind = LexerTokenKind._or then
_write_z("\tor t0, t0, t1\n\0")
elsif token_kind = LexerTokenKind._xor then
_write_z("\txor t0, t0, t1\n\0")
elsif token_kind = LexerTokenKind.equals then
_write_z("\txor t0, t0, t1\n\tseqz t0, t0\n\0")
elsif token_kind = LexerTokenKind.remainder then
_write_z("\trem t0, t1, t0\n\0")
elsif token_kind = LexerTokenKind.division then
_write_z("\tdiv t0, t1, t0\n\0")
elsif token_kind = LexerTokenKind.less_than then
_write_z("\tslt t0, t1, t0\n\0")
elsif token_kind = LexerTokenKind.greater_than then
_write_z("\tslt t0, t0, t1\n\0")
elsif token_kind = LexerTokenKind.less_equal then
_write_z("\tslt t0, t0, t1\n\txori t0, t0, 1\n\0")
elsif token_kind = LexerTokenKind.not_equal then
_write_z("\txor t0, t0, t1\n\tsnez t0, t0\n\0")
elsif token_kind = LexerTokenKind.greater_equal then
_write_z("\tslt t0, t1, t0\n\txori t0, t0, 1\n\0")
end
end
end;
.compile_expression_end;
proc _compile_expression();
var
parser_node: Word;
begin
parser_node := _parse_binary_expression();
_compile_binary_expression(parser_node)
end;
proc _compile_call();
(* 4 bytes node kind + 4 byte pointer to variable expression + 4 * 7 for arguments. *)
proc _call_size();
return 44
end;
proc _call_get_name(this: Word);
begin
this := this + 8;
return this^
end;
proc _call_set_name(this: Word, value: Word);
begin
this := this + 8;
this^ := value
end;
proc _call_get_argument(this: Word, n: Word);
begin
n := n * 4;
this := this + 8;
this := this + n;
return this^
end;
proc _call_set_argument(this: Word, n: Word, value: Word);
begin
n := n * 4;
this := this + 8;
this := this + n;
this^ := value
end;
proc _parse_call(callee: Word);
var
parsed_expression: Word;
result: Word;
argument_number: Word;
token_kind: Word;
call_size: Word;
begin
call_size := _call_size();
result := _allocate(call_size);
_node_set_kind(result, NodeKind.call);
_statement_set_next(result, 0);
argument_number := 1;
_call_set_name(result, callee);
_lexer_read_token(@token_kind);
_lexer_skip_token();
_lexer_read_token(@token_kind);
if token_kind = LexerTokenKind.right_paren then
_lexer_skip_token();
goto parse_call_end
end;
.parse_call_loop;
parsed_expression := _parse_binary_expression();
_call_set_argument(result, argument_number, parsed_expression);
argument_number := argument_number + 1;
_lexer_read_token(@token_kind);
_lexer_skip_token();
if token_kind = LexerTokenKind.comma then
goto parse_call_loop
end;
.parse_call_end;
(* Set the trailing argument to nil. *)
_call_set_argument(result, argument_number, 0);
return result
end;
proc _compile_call(parsed_call: Word);
var
name_length: Word;
name: Word;
argument_count: Word;
stack_offset: Word;
token_kind: Word;
parsed_expression: Word;
begin
_lexer_read_token(@token_kind);
name := _lexer_global_get_start();
name_length := _lexer_global_get_end();
name_length := name_length - name;
parsed_expression := _call_get_name(parsed_call);
name := _variable_expression_get_name(parsed_expression);
name_length := _variable_expression_get_length(parsed_expression);
argument_count := 0;
(* Skip the identifier and left paren. *)
_lexer_skip_token();
_lexer_read_token(@token_kind);
_lexer_skip_token();
_lexer_read_token(@token_kind);
if token_kind = LexerTokenKind.right_paren then
goto compile_call_finalize
end;
.compile_call_loop;
_compile_expression();
parsed_expression := _call_get_argument(parsed_call, argument_count + 1);
if parsed_expression = 0 then
goto compile_call_finalize
else
_compile_binary_expression(parsed_expression);
(* Save the argument on the stack. *)
_write_z("\tsw t0, \0");
(* Calculate the stack offset: 116 - (4 * argument_counter) *)
stack_offset := argument_count * 4;
_write_i(116 - stack_offset);
_write_z("(sp)\n\0");
(* Add one to the argument counter. *)
argument_count := argument_count + 1;
_lexer_read_token(@token_kind);
if token_kind <> LexerTokenKind.comma then
goto compile_call_finalize
goto compile_call_loop
end;
_lexer_skip_token();
goto compile_call_loop;
.compile_call_finalize;
(* Load the argument from the stack. *)
if argument_count <> 0 then
(* Decrement the argument counter. *)
@@ -1151,37 +1264,159 @@ begin
goto compile_call_finalize
end;
.compile_call_end;
_write_z("\tcall \0");
_write_s(name, name_length);
_write_c('\n');
(* Skip the right paren. *)
_lexer_read_token(@token_kind);
_lexer_skip_token()
_write_c('\n')
end;
proc _compile_goto();
(**
* All statements are chained into a list. Next contains a pointer to the next
* statement in the statement list.
*)
proc _statement_get_next(this: Word);
begin
this := this + 4;
return this^
end;
proc _statement_set_next(this: Word, value: Word);
begin
this := this + 4;
this^ := value
end;
proc _goto_statement_size();
return 16
end;
proc _goto_statement_get_label(this: Word);
begin
this := this + 8;
return this^
end;
proc _goto_statement_set_label(this: Word, value: Word);
begin
this := this + 8;
this^ := value
end;
proc _goto_statement_get_length(this: Word);
begin
this := this + 12;
return this^
end;
proc _goto_statement_set_length(this: Word, value: Word);
begin
this := this + 12;
this^ := value
end;
proc _parse_goto_statement();
var
next_token: Word;
next_length: Word;
token_kind: Word;
label_name: Word;
label_length: Word;
statement_size: Word;
result: Word;
begin
_lexer_skip_token();
_lexer_read_token(@token_kind);
if token_kind = LexerTokenKind.dot then
label_name := _lexer_global_get_start();
label_length := _lexer_global_get_end() - label_name;
_lexer_skip_token();
_lexer_read_token(@token_kind)
statement_size := _goto_statement_size();
result := _allocate(statement_size);
_node_set_kind(result, NodeKind.goto_statement);
_statement_set_next(result, 0);
_goto_statement_set_label(result, label_name);
_goto_statement_set_length(result, label_length);
return result
end;
next_token := _lexer_global_get_start();
next_length := _lexer_global_get_end();
next_length := next_length - next_token;
proc _compile_goto_statement(parser_node: Word);
var
label_name: Word;
label_length: Word;
begin
label_name := _goto_statement_get_label(parser_node);
label_length := _goto_statement_get_length(parser_node);
_write_z("\tj .\0");
_write_s(label_name, label_length);
_write_c('\n')
end;
_write_s(next_token, next_length);
_lexer_skip_token()
proc _label_declaration_size();
return 16
end;
proc _label_declaration_get_label(this: Word);
begin
this := this + 8;
return this^
end;
proc _label_declaration_set_label(this: Word, value: Word);
begin
this := this + 8;
this^ := value
end;
proc _label_declaration_get_length(this: Word);
begin
this := this + 12;
return this^
end;
proc _label_declaration_set_length(this: Word, value: Word);
begin
this := this + 12;
this^ := value
end;
proc _parse_label_declaration();
var
token_kind: Word;
label_name: Word;
label_length: Word;
statement_size: Word;
result: Word;
begin
_lexer_skip_token();
_lexer_read_token(@token_kind);
label_name := _lexer_global_get_start();
label_length := _lexer_global_get_end() - label_name;
_lexer_skip_token();
statement_size := _label_declaration_size();
result := _allocate(statement_size);
_node_set_kind(result, NodeKind.label_declaration);
_statement_set_next(result, 0);
_goto_statement_set_label(result, label_name);
_goto_statement_set_length(result, label_length);
return result
end;
proc _compile_label_declaration(parser_node: Word);
var
label_name: Word;
label_length: Word;
begin
label_name := _goto_statement_get_label(parser_node);
label_length := _goto_statement_get_length(parser_node);
_write_c('.');
_write_s(label_name, label_length);
_write_z(":\n\0");
end;
proc _compile_local_designator(symbol: Word);
@@ -1341,12 +1576,44 @@ begin
elsif node_kind = NodeKind.field_access_expression then
_compile_enumeration_value(parser_node);
is_address := 0
elsif node_kind = NodeKind.call then
_compile_call(parser_node);
_write_z("\tmv t0, a0\n\0");
is_address := 0
else
is_address := _compile_simple_expression(parser_node)
end;
return is_address
end;
proc _assignment_statement_size();
return 16
end;
proc _assignment_statement_get_assignee(this: Word);
begin
this := this + 8;
return this^
end;
proc _assignment_statement_set_assignee(this: Word, value: Word);
begin
this := this + 8;
this^ := value
end;
proc _assignment_statement_get_assignment(this: Word);
begin
this := this + 12;
return this^
end;
proc _assignment_statement_set_assignment(this: Word, value: Word);
begin
this := this + 12;
this^ := value
end;
proc _compile_assignment();
var
token_kind: Word;
@@ -1368,15 +1635,52 @@ begin
_write_z("\tlw t1, 60(sp)\n\tsw t0, (t1)\n\0")
end;
proc _compile_return_statement();
proc _return_statement_size();
return 12
end;
proc _return_statement_get_returned(this: Word);
begin
this := this + 8;
return this^
end;
proc _return_statement_set_returned(this: Word, value: Word);
begin
this := this + 8;
this^ := value
end;
proc _parse_return_statement();
var
token_kind: Word;
returned: Word;
label_length: Word;
statement_size: Word;
result: Word;
begin
(* Skip "return" keyword and whitespace after it. *)
_lexer_read_token(@token_kind);
_lexer_skip_token();
_lexer_read_token(@token_kind);
_compile_expression();
returned := _parse_binary_expression();
statement_size := _return_statement_size();
result := _allocate(statement_size);
_node_set_kind(result, NodeKind.return_statement);
_statement_set_next(result, 0);
_return_statement_set_returned(result, returned);
return result
end;
proc _compile_return_statement(parser_node: Word);
var
return_expression: Word;
begin
return_expression := _return_statement_get_returned(parser_node);
_compile_binary_expression(return_expression);
_write_z("\tmv a0, t0\n\0")
end;
@@ -1453,38 +1757,24 @@ begin
_write_z(":\n\0")
end;
proc _compile_label_declaration();
var
label_token: Word;
token_kind: Word;
name: Word;
begin
(* Skip the dot. *)
_lexer_skip_token();
_lexer_read_token(@token_kind);
name := _lexer_global_get_start();
label_token := _lexer_global_get_end();
label_token := label_token - name;
_write_c('.');
_write_s(name, label_token);
_write_z(":\n\0");
_lexer_skip_token()
end;
proc _compile_statement();
var
current_byte: Word;
token_kind: Word;
parsed_call: Word;
begin
_lexer_read_token(@token_kind);
if token_kind = LexerTokenKind._goto then
_compile_goto()
parsed_call := _parse_goto_statement();
_compile_goto_statement(parsed_call)
elsif token_kind = LexerTokenKind._if then
_compile_if()
elsif token_kind = LexerTokenKind._return then
_compile_return_statement()
parsed_call := _parse_return_statement();
_compile_return_statement(parsed_call)
elsif token_kind = LexerTokenKind.dot then
parsed_call := _parse_goto_statement();
_compile_label_declaration()
elsif token_kind = LexerTokenKind.identifier then
current_byte := _lexer_global_get_start();
@@ -1492,7 +1782,9 @@ begin
(* This is a call if the statement starts with an underscore. *)
if current_byte = '_' then
_compile_call()
parsed_call := _parse_variable_expression();
parsed_call := _parse_call(parsed_call);
_compile_call(parsed_call)
else
_compile_assignment()
end
@@ -1866,6 +2158,7 @@ var
name_pointer: Word;
name_length: Word;
token_kind: Word;
parser_node: Word;
begin
(* Skip "proc ". *)
_lexer_read_token(@token_kind);
@@ -1905,7 +2198,8 @@ begin
_lexer_skip_token();
_compile_statement_list()
elsif token_kind = LexerTokenKind._return then
_compile_return_statement()
parser_node := _parse_return_statement(parser_node);
_compile_return_statement(parser_node)
end;
(* Write the epilogue. *)
@@ -2143,8 +2437,8 @@ begin
_lexer_read_token(@token_kind);
if token_kind <> LexerTokenKind.assignment then
(* Else we assume this is a zeroed 102400 bytes big array. *)
_write_z(" .zero 102400\0")
(* Else we assume this is a zeroed 204800 bytes big array. *)
_write_z(" .zero 204800\0")
else
(* Skip the assignment sign with surrounding whitespaces. *)
_lexer_skip_token();
@@ -3130,7 +3424,7 @@ begin
.start_read;
(* Second argument is buffer size. Modifying update the source_code definition. *)
last_read := _read_file(offset, 102400);
last_read := _read_file(offset, 204800);
if last_read > 0 then
offset := offset + last_read;
goto start_read