Implement string literals

This commit is contained in:
2025-09-04 22:07:01 +02:00
parent 4b42c59649
commit 4888252274
2 changed files with 132 additions and 132 deletions

View File

@@ -1,18 +1,9 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# ihis Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
# Stage4 compiler.
# Stage6 compiler.
#
# - Stack size increased to 128 bytes per procedure.
# 7 words on the stack, 92 - 120, are reversed for procedure arguments (caller side).
# 7 words on the stack, 64 - 92, are reserved for procedure parameters (callee side).
# The first parameter is in 88, the second in 84 and so forth.
# - Unary negate operation, e.g. -5.
# - Unary locical not operation "~".
# - Binary addition "+" and multiplication "*".
# - Binary logical operations: & (and), or and xor.
# - Binary comparison operations: =, <, <=, >, >=, <>.
.section .rodata
@@ -23,7 +14,7 @@ keyword_section: .ascii ".section"
keyword_type: .ascii ".type"
.type keyword_ret, @object
keyword_ret: .ascii "ret"
keyword_ret: .ascii "\tret"
.type keyword_global, @object
keyword_global: .ascii ".globl"
@@ -97,6 +88,9 @@ asm_or: .string "\tor "
.type asm_xor, @object
asm_xor: .string "\txor "
.type asm_xori, @object
asm_xori: .string "\txori "
.type asm_sub, @object
asm_sub: .string "\tsub "
@@ -109,6 +103,9 @@ asm_snez: .string "\tsnez "
.type asm_slt, @object
asm_slt: .string "\tslt "
.type asm_mv, @object
asm_mv: .string "\tmv "
.type asm_comma, @object
asm_comma: .string ", "
@@ -202,7 +199,7 @@ begin
_memcpy();
lw a0, 0(sp)
return v0
end;
# Writes a number to the standard output.
@@ -211,8 +208,7 @@ end;
# a0 - Whole number.
proc _write_i();
begin
addi a1, sp, 0
_print_i();
_print_i(v88, @v0);
mv a1, a0
addi a0, sp, 0
@@ -223,10 +219,7 @@ end;
# Writes a character from a0 into the standard output.
proc _write_c();
begin
sb a0, 0(sp)
addi a0, sp, 0
li a1, 1
_write_s();
_write_s(@v88, 1);
end;
# Write null terminated string.
@@ -235,22 +228,17 @@ end;
# a0 - String.
proc _write_z();
begin
sw a0, 0(sp)
.write_z_loop:
# Check for 0 character.
lw a0, 88(sp)
lb a0, (a0)
beqz a0, .write_z_end
# Print a character.
lw a0, 0(sp)
lb a0, (a0)
_write_c();
# Advance the input string by one byte.
lw a0, 0(sp)
addi a0, a0, 1
sw a0, 0(sp)
v88 := v88 + 1;
goto .write_z_loop;
@@ -260,21 +248,21 @@ end;
# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
proc _is_upper();
begin
li t0, 'A' - 1
sltu t1, t0, a0 # t1 = a0 >= 'A'
v0 := v88 >= 'A';
v4 := v88 <= 'Z';
return v0 & v4
sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
end;
# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
proc _is_lower();
begin
li t0, 'a' - 1
sltu t2, t0, a0 # t2 = a0 >= 'a'
v0 := v88 >= 'a';
v4 := v88 <= 'z';
return v0 & v4
sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
end;
# Detects if the passed character is a 7-bit alpha character or an underscore.
@@ -310,12 +298,10 @@ end;
# Sets a0 to 1 if it is a digit, to 0 otherwise.
proc _is_digit();
begin
li t0, '0' - 1
sltu t1, t0, a0 # t1 = a0 >= '0'
v0 := v88 >= '0';
v4 := v88 <= '9';
sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
and a0, t1, t2
return v0 & v4
end;
proc _is_alnum();
@@ -336,12 +322,13 @@ end;
# Returns token length in a0.
proc _read_token();
begin
la t0, source_code_position # Token pointer.
lw t0, (t0)
sw t0, 0(sp) # Current token position.
sw zero, 4(sp) # Token length.
# Current token position.
v0 := source_code_position;
# Token length.
v4 := 0;
.read_token_loop:
lw t0, 0(sp)
lb t0, (t0) # Current character.
# First we try to read a derictive.
@@ -358,18 +345,13 @@ begin
.read_token_next:
# Advance the source code position and token length.
lw t0, 4(sp)
addi t0, t0, 1
sw t0, 4(sp)
lw t0, 0(sp)
addi t0, t0, 1
sw t0, 0(sp)
v4 := v4 + 1;
v0 := v0 + 1;
goto .read_token_loop;
.read_token_end:
lw a0, 4(sp)
return v4
end;
# a0 - First pointer.
@@ -445,9 +427,8 @@ end;
# Returns a0 unchanged.
proc _write_token();
begin
sw a0, 0(sp)
_write_s(source_code_position, v0);
lw a0, 0(sp)
_write_s(source_code_position, v88);
return v88
end;
proc _compile_section();
@@ -889,16 +870,18 @@ begin
_write_z(@asm_slt);
_write_register('t', 0);
_write_z(@asm_comma);
_write_register('t', 1);
_write_z(@asm_comma);
_write_register('t', 0);
_write_z(@asm_comma);
_write_register('t', 1);
_write_c('\n');
# Execute the operation.
_write_z(@asm_not);
_write_z(@asm_xori);
_write_register('t', 0);
_write_z(@asm_comma);
_write_register('t', 0);
_write_z(@asm_comma);
_write_i(1);
_write_c('\n');
goto .compile_expression_end;
@@ -933,16 +916,18 @@ begin
_write_z(@asm_slt);
_write_register('t', 0);
_write_z(@asm_comma);
_write_register('t', 0);
_write_z(@asm_comma);
_write_register('t', 1);
_write_z(@asm_comma);
_write_register('t', 0);
_write_c('\n');
# Execute the operation.
_write_z(@asm_not);
_write_z(@asm_xori);
_write_register('t', 0);
_write_z(@asm_comma);
_write_register('t', 0);
_write_z(@asm_comma);
_write_i(1);
_write_c('\n');
goto .compile_expression_end;
@@ -960,12 +945,10 @@ begin
_read_token();
sw a0, 0(sp)
v4 := source_code_position;
sw zero, 8(sp)
v8 := 0;
# Skip the identifier and left paren.
addi a0, a0, 1
_advance_token();
_advance_token(v0 + 1);
la t0, source_code_position
lw t0, (t0)
@@ -983,12 +966,9 @@ begin
_write_z(@asm_comma);
# Calculate the stack offset: 116 - (4 * argument_counter)
lw t0, 8(sp)
li t1, 4
mul t0, t0, t1
li t1, 116
sub a0, t1, t0
_write_i();
v12 := v8 * 4;
v12 := 116 + -v12;
_write_i(v12);
_write_c('(');
_write_z(@asm_sp);
@@ -997,9 +977,7 @@ begin
_write_c('\n');
# Add one to the argument counter.
lw t0, 8(sp)
addi t0, t0, 1
sw t0, 8(sp)
v8 := v8 + 1;
la t0, source_code_position
lw t0, (t0)
@@ -1018,25 +996,19 @@ begin
beqz t0, .compile_call_end
# Decrement the argument counter.
lw t0, 8(sp)
addi t0, t0, -1
sw t0, 8(sp)
v8 := v8 + -1;
_write_z(@asm_lw);
_write_c('a');
lw a0, 8(sp)
_write_i();
_write_i(v8);
_write_z(@asm_comma);
# Calculate the stack offset: 116 - (4 * argument_counter)
lw t0, 8(sp)
li t1, 4
mul t0, t0, t1
li t1, 116
sub a0, t1, t0
_write_i();
v12 := v8 * 4;
v12 := 116 + -v12;
_write_i(v12);
_write_c('(');
_write_z(@asm_sp);
@@ -1048,7 +1020,6 @@ begin
.compile_call_end:
_write_z(@asm_call);
_write_s(v4, v0);
# Skip the right paren.
@@ -1080,14 +1051,9 @@ begin
_write_z(@asm_comma);
# Read local variable stack offset and save it.
v0 := source_code_position;
_read_token();
sw a0, 4(sp)
_write_token();
_advance_token();
_write_c('\n');
end;
@@ -1103,7 +1069,6 @@ begin
_advance_token();
_write_c('\n');
end;
proc _compile_designator();
@@ -1167,6 +1132,19 @@ begin
_write_c(')');
end;
proc _compile_return_statement();
begin
# Skip "return" keyword and whitespace after it.
_advance_token(7);
_compile_expression();
_write_z(@asm_mv);
_write_register('a', 0);
_write_z(@asm_comma);
_write_register('t', 0);
end;
proc _compile_statement();
begin
# This is a call if the statement starts with an underscore.
@@ -1185,6 +1163,10 @@ begin
li t1, 'v'
beq t0, t1, .compile_statement_assignment
# keyword_ret contains "\tret", so it's 4 bytes long.
_memcmp(source_code_position, @keyword_ret, 4);
beqz a0, .compile_statement_return
_compile_line();
goto .compile_statement_end;
@@ -1206,9 +1188,15 @@ begin
goto .compile_statement_semicolon;
.compile_statement_return:
_advance_token(1);
_compile_return_statement();
_write_c('\n');
goto .compile_statement_end;
.compile_statement_semicolon:
_advance_token(2);
_write_c('\n');
.compile_statement_end:
@@ -1235,48 +1223,33 @@ end;
# a1 - Register number.
proc _write_register();
begin
sw a0, 0(sp)
sw a1, 4(sp)
_write_c();
lw a0, 4(sp)
li t0, '0'
add a0, a0, t0
_write_c();
_write_c(v88);
v84 := v84 + '0';
_write_c(v84);
end;
proc _compile_procedure_prologue();
begin
_write_z(@asm_prologue);
v0 := 0;
.compile_procedure_prologue_loop:
_write_z(@asm_sw);
li a0, 'a'
lw a1, 0(sp)
_write_register();
_write_register('a', v0);
_write_z(@asm_comma);
# Calculate the stack offset: 88 - (4 * parameter_counter)
lw t0, 0(sp)
li t1, 4
mul t0, t0, t1
li t1, 88
sub a0, t1, t0
_write_i();
v4 := v0 * 4;
v4 := 88 + -v4;
_write_i(v4);
_write_c('(');
_write_z(@asm_sp);
_write_c(')');
_write_c('\n');
v0 := v0 + 1;
lw a0, 0(sp)
addi a0, a0, 1
sw a0, 0(sp)
li t0, 8
bne a0, t0, .compile_procedure_prologue_loop
@@ -1295,18 +1268,14 @@ begin
_write_z();
_write_token(v0);
_write_z(@asm_type_function);
# Write procedure label, _procedure_name:
_write_token(v0);
_write_z(@asm_colon);
# Skip the function name and trailing parens, semicolon, "begin" and newline.
lw a0, 0(sp)
addi a0, a0, 10
_advance_token();
_advance_token(v0 + 10);
_compile_procedure_prologue();
_compile_procedure_body();
@@ -1334,9 +1303,6 @@ begin
# Read the symbol type.
_read_token();
la t0, source_code_position
lw t0, (t0)
sw t0, 12(sp)
# Print the symbol type and newline.
addi a0, a0, 1