Support simple variable assignment

This commit is contained in:
2025-09-01 09:57:18 +02:00
parent 627975775c
commit 44fa140769
8 changed files with 1454 additions and 505 deletions

View File

@@ -2,14 +2,13 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
# Stage3 compiler.
# Stage4 compiler.
#
# - Procedures without none or one argument.
# - Goto statements.
# - Character and integer literals.
# - Passing local variables to procedures.
# - Local variables should have the format: v00,
# where 00 is its offset from the sp register.
# - Taking value of local and global variables. Variables that doesn't begin
# with "v" are considered global.
# - Simple variable assignment, e.g. v0 := 5 or v0 := global_variable;
# 7 words on the stack, 28 - 56, are reversed for procedure arguments (caller side).
# - Take address unary operation "@".
.section .rodata
@@ -38,10 +37,10 @@ keyword_begin: .ascii "begin"
keyword_var: .ascii "var"
.type asm_prologue, @object
asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n"
asm_prologue: .string "\taddi sp, sp, -64\n\tsw ra, 60(sp)\n\tsw s0, 56(sp)\n\taddi s0, sp, 64\n"
.type asm_epilogue, @object
asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n"
asm_epilogue: .string "\tlw ra, 60(sp)\n\tlw s0, 56(sp)\n\taddi sp, sp, 64\n\tret\n"
.type asm_type_directive, @object
asm_type_directive: .string ".type "
@@ -64,23 +63,26 @@ asm_li: .string "\tli "
.type asm_lw, @object
asm_lw: .string "\tlw "
.type asm_la, @object
asm_la: .string "\tla "
.type asm_sw, @object
asm_sw: .string "\tsw "
.type asm_mv, @object
asm_mv: .string "mv "
.type asm_addi, @object
asm_addi: .string "\taddi "
.type asm_t0, @object
asm_t0: .string "t0"
.type asm_a0, @object
asm_a0: .string "a0"
.type asm_t1, @object
asm_t1: .string "t1"
.type asm_comma, @object
asm_comma: .string ", "
.type asm_sp, @object
asm_sp: .string "(sp)"
asm_sp: .string "sp"
.section .bss
@@ -257,7 +259,7 @@ begin
_is_upper();
sw a0, 4(sp)
_is_lower(v00);
_is_lower(v0);
lw t0, 0(sp)
xori t1, t0, '_'
@@ -292,7 +294,7 @@ begin
_is_alpha();
sw a0, 0(sp)
_is_digit(v04);
_is_digit(v4);
lw a1, 0(sp)
or a0, a0, a1
@@ -413,12 +415,7 @@ end;
proc _write_token();
begin
sw a0, 0(sp)
la a0, source_code_position
lw a0, (a0)
lw a1, 0(sp)
_write_s();
_write_s(source_code_position, v0);
lw a0, 0(sp)
end;
@@ -488,14 +485,9 @@ end;
proc _compile_integer_literal();
begin
la a0, asm_li
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
_write_z(@asm_li);
_write_z(@asm_t0);
_write_z(@asm_comma);
_read_token();
_write_token();
@@ -506,14 +498,9 @@ end;
proc _compile_character_literal();
begin
la a0, asm_li
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
_write_z(@asm_li);
_write_z(@asm_t0);
_write_z(@asm_comma);
_write_c('\'');
_advance_token(1);
@@ -542,43 +529,45 @@ end;
proc _compile_variable_expression();
begin
la a0, asm_lw
_write_z();
_compile_designator();
la a0, asm_a0
_write_z();
_write_z(@asm_lw);
_write_z(@asm_t0);
_write_z(@asm_comma);
la a0, asm_comma
_write_z();
_advance_token(1);
_read_token();
_write_token();
_advance_token();
la a0, asm_sp
_write_z();
_write_c('(');
_write_z(@asm_t0);
_write_c(')');
_write_c('\n');
end;
proc _compile_address_expression();
begin
# Skip the "@" sign.
_advance_token(1);
_compile_designator();
end;
proc _compile_expression();
begin
la t0, source_code_position
lw t0, (t0)
lb a0, (t0)
sw a0, 0(sp)
li t1, '\''
beq a0, t1, .compile_expression_character_literal
li t1, 'v'
beq a0, t1, .compile_expression_variable
li t1, '@'
beq a0, t1, .compile_expression_address
_is_digit();
_is_digit(v0);
bnez a0, .compile_expression_integer_literal
goto .compile_expression_end;
goto .compile_expression_variable;
.compile_expression_character_literal:
_compile_character_literal();
@@ -588,9 +577,13 @@ begin
_compile_integer_literal();
goto .compile_expression_end;
.compile_expression_address:
_compile_address_expression();
goto .compile_expression_end;
.compile_expression_variable:
_compile_variable_expression();
goto .compile_expression_end;;
goto .compile_expression_end;
.compile_expression_end:
end;
@@ -604,9 +597,7 @@ begin
_read_token();
sw a0, 0(sp)
la t0, source_code_position
lw t0, (t0)
sw t0, 4(sp)
v4 := source_code_position
sw zero, 8(sp)
@@ -625,25 +616,21 @@ begin
_compile_expression();
# Save the argument on the stack.
la a0, asm_sw
_write_z();
_write_z(@asm_sw);
_write_z(@asm_t0);
_write_z(@asm_comma);
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
# Calculate the stack offset: 20 - (4 * argument_counter)
# Calculate the stack offset: 52 - (4 * argument_counter)
lw t0, 8(sp)
li t1, 4
mul t0, t0, t1
li t1, 20
li t1, 52
sub a0, t1, t0
_write_i();
la a0, asm_sp
_write_z();
_write_c('(');
_write_z(@asm_sp);
_write_c(')')
_write_c('\n');
@@ -673,36 +660,34 @@ begin
addi t0, t0, -1
sw t0, 8(sp)
la a0, asm_lw
_write_z();
_write_z(@asm_lw);
_write_c('a');
lw a0, 8(sp)
_write_i();
la a0, asm_comma
_write_z();
_write_z(@asm_comma);
# Calculate the stack offset: 20 - (4 * argument_counter)
# Calculate the stack offset: 52 - (4 * argument_counter)
lw t0, 8(sp)
li t1, 4
mul t0, t0, t1
li t1, 20
li t1, 52
sub a0, t1, t0
_write_i();
la a0, asm_sp
_write_z();
_write_c('(');
_write_z(@asm_sp);
_write_c(')');
_write_c('\n');
goto .compile_call_finalize;
.compile_call_end:
la a0, asm_call
_write_z();
_write_z(@asm_call);
_write_s(v04, v00);
_write_s(v4, v0);
# Skip the right paren.
_advance_token(1);
@@ -715,13 +700,111 @@ begin
_read_token();
sw a0, 0(sp)
la a0, asm_j
_write_z();
_write_z(@asm_j);
_write_token(v00);
_write_token(v0);
_advance_token();
end;
proc _compile_local_designator();
begin
# Skip "v" in the local variable name.
_advance_token(1);
_write_z(@asm_addi);
_write_z(@asm_t0);
_write_z(@asm_comma);
_write_z(@asm_sp);
_write_z(@asm_comma);
# Read local variable stack offset and save it.
v0 := source_code_position;
_read_token();
sw a0, 4(sp)
_write_token();
_advance_token();
_write_c('\n');
end;
proc _compile_global_designator();
begin
_write_z(@asm_la);
_write_z(@asm_t0);
_write_z(@asm_comma);
_read_token();
_write_token();
_advance_token();
_write_c('\n');
end;
proc _compile_designator();
begin
la t0, source_code_position
lw t0, (t0)
lb a0, (t0)
li t1, 'v'
beq a0, t1, .compile_designator_local
goto .compile_designator_global;
.compile_designator_local:
_compile_local_designator();
goto .compile_designator_end;
.compile_designator_global:
_compile_global_designator();
goto .compile_designator_end;
.compile_designator_end:
end;
proc _compile_assignment();
begin
_compile_designator();
# Save the assignee address on the stack.
_write_z(@asm_sw);
_write_z(@asm_t0);
_write_z(@asm_comma);
_write_i(20);
_write_c('(');
_write_z(@asm_sp);
_write_c(')');
_write_c('\n');
# Skip the assignment sign (:=) with surrounding whitespaces.
_advance_token(4);
# Compile the assignment.
_compile_expression();
_write_z(@asm_lw);
_write_z(@asm_t1);
_write_z(@asm_comma);
_write_i(20);
_write_c('(');
_write_z(@asm_sp);
_write_c(')');
_write_c('\n');
_write_z(@asm_sw);
_write_z(@asm_t0);
_write_z(@asm_comma);
_write_c('(');
_write_z(@asm_t1);
_write_c(')');
end;
proc _compile_statement();
begin
# This is a call if the statement starts with an underscore.
@@ -737,6 +820,9 @@ begin
li t1, 'g'
beq t0, t1, .compile_statement_goto
li t1, 'v'
beq t0, t1, .compile_statement_assignment
_compile_line();
goto .compile_statement_end;
@@ -752,6 +838,12 @@ begin
goto .compile_statement_semicolon;
.compile_statement_assignment:
_advance_token(1);
_compile_assignment();
goto .compile_statement_semicolon;
.compile_statement_semicolon:
_advance_token(2);
@@ -789,30 +881,27 @@ begin
la a0, asm_type_directive
_write_z();
_write_token(v00);
_write_token(v0);
la a0, asm_type_function
_write_z();
# Write procedure label, _procedure_name:
_write_token(v00);
_write_token(v0);
la a0, asm_colon
_write_z();
_write_z(@asm_colon);
# Skip the function name and trailing parens, semicolon, "begin" and newline.
lw a0, 0(sp)
addi a0, a0, 10
_advance_token();
la a0, asm_prologue
_write_z();
_write_z(@asm_prologue);
_compile_procedure_body();
# Write the epilogue.
la a0, asm_epilogue
_write_z();
_write_z(@asm_epilogue);
# Skip the "end" keyword, semicolon and newline.
_advance_token(5);
@@ -882,37 +971,22 @@ begin
li t1, '#'
beq t0, t1, .compile_comment
la a0, source_code_position
lw a0, (a0)
la a1, keyword_section
li a2, 8 # ".section" length.
_memcmp();
# 8 is ".section" length.
_memcmp(source_code_position, @keyword_section, 8);
beqz a0, .compile_section
la a0, source_code_position
lw a0, (a0)
la a1, keyword_type
li a2, 5 # ".type" length.
_memcmp();
# 5 is ".type" length.
_memcmp(source_code_position, @keyword_type, 5);
beqz a0, .compile_type
la a0, source_code_position
lw a0, (a0)
la a1, keyword_proc
li a2, 5 # "proc " length. Space is needed to distinguish from "procedure".
_memcmp();
# 5 is "proc " length. Space is needed to distinguish from "procedure".
_memcmp(source_code_position, @keyword_proc, 5);
beqz a0, .compile_procedure
la a0, source_code_position
lw a0, (a0)
la a1, keyword_global
li a2, 6 # ".globl" length.
_memcmp();
# 6 is ".globl" length.
_memcmp(source_code_position, @keyword_global, 6);
beqz a0, .compile_global
# Not a known token, exit.
goto .compile_end;
@@ -959,9 +1033,8 @@ end;
proc _start();
begin
# Read the source from the standard input.
la a0, source_code
li a1, 81920 # Buffer size.
_read_file();
# Second argument is buffer size. Modifying update the source_code definition.
_read_file(@source_code, 81920);
_compile();
_exit(0);