Implement string literals

This commit is contained in:
2025-09-04 22:07:01 +02:00
parent 4b42c59649
commit 4888252274
2 changed files with 132 additions and 132 deletions

View File

@@ -2,7 +2,7 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can # v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/. # obtain one at https://mozilla.org/MPL/2.0/.
# Stage4 compiler. # Stage5 compiler.
# #
# - Stack size increased to 128 bytes per procedure. # - Stack size increased to 128 bytes per procedure.
# 7 words on the stack, 92 - 120, are reversed for procedure arguments (caller side). # 7 words on the stack, 92 - 120, are reversed for procedure arguments (caller side).
@@ -13,6 +13,7 @@
# - Binary addition "+" and multiplication "*". # - Binary addition "+" and multiplication "*".
# - Binary logical operations: & (and), or and xor. # - Binary logical operations: & (and), or and xor.
# - Binary comparison operations: =, <, <=, >, >=, <>. # - Binary comparison operations: =, <, <=, >, >=, <>.
# - Return statement.
.section .rodata .section .rodata
@@ -23,7 +24,7 @@ keyword_section: .ascii ".section"
keyword_type: .ascii ".type" keyword_type: .ascii ".type"
.type keyword_ret, @object .type keyword_ret, @object
keyword_ret: .ascii "ret" keyword_ret: .ascii "\tret"
.type keyword_global, @object .type keyword_global, @object
keyword_global: .ascii ".globl" keyword_global: .ascii ".globl"
@@ -97,6 +98,9 @@ asm_or: .string "\tor "
.type asm_xor, @object .type asm_xor, @object
asm_xor: .string "\txor " asm_xor: .string "\txor "
.type asm_xori, @object
asm_xori: .string "\txori "
.type asm_sub, @object .type asm_sub, @object
asm_sub: .string "\tsub " asm_sub: .string "\tsub "
@@ -109,6 +113,9 @@ asm_snez: .string "\tsnez "
.type asm_slt, @object .type asm_slt, @object
asm_slt: .string "\tslt " asm_slt: .string "\tslt "
.type asm_mv, @object
asm_mv: .string "\tmv "
.type asm_comma, @object .type asm_comma, @object
asm_comma: .string ", " asm_comma: .string ", "
@@ -889,16 +896,18 @@ begin
_write_z(@asm_slt); _write_z(@asm_slt);
_write_register('t', 0); _write_register('t', 0);
_write_z(@asm_comma); _write_z(@asm_comma);
_write_register('t', 1);
_write_z(@asm_comma);
_write_register('t', 0); _write_register('t', 0);
_write_z(@asm_comma);
_write_register('t', 1);
_write_c('\n'); _write_c('\n');
# Execute the operation. # Execute the operation.
_write_z(@asm_not); _write_z(@asm_xori);
_write_register('t', 0); _write_register('t', 0);
_write_z(@asm_comma); _write_z(@asm_comma);
_write_register('t', 0); _write_register('t', 0);
_write_z(@asm_comma);
_write_i(1);
_write_c('\n'); _write_c('\n');
goto .compile_expression_end; goto .compile_expression_end;
@@ -933,16 +942,18 @@ begin
_write_z(@asm_slt); _write_z(@asm_slt);
_write_register('t', 0); _write_register('t', 0);
_write_z(@asm_comma); _write_z(@asm_comma);
_write_register('t', 0);
_write_z(@asm_comma);
_write_register('t', 1); _write_register('t', 1);
_write_z(@asm_comma);
_write_register('t', 0);
_write_c('\n'); _write_c('\n');
# Execute the operation. # Execute the operation.
_write_z(@asm_not); _write_z(@asm_xori);
_write_register('t', 0); _write_register('t', 0);
_write_z(@asm_comma); _write_z(@asm_comma);
_write_register('t', 0); _write_register('t', 0);
_write_z(@asm_comma);
_write_i(1);
_write_c('\n'); _write_c('\n');
goto .compile_expression_end; goto .compile_expression_end;
@@ -1167,6 +1178,19 @@ begin
_write_c(')'); _write_c(')');
end; end;
proc _compile_return_statement();
begin
# Skip "return" keyword and whitespace after it.
_advance_token(7);
_compile_expression();
_write_z(@asm_mv);
_write_register('a', 0);
_write_z(@asm_comma);
_write_register('t', 0);
end;
proc _compile_statement(); proc _compile_statement();
begin begin
# This is a call if the statement starts with an underscore. # This is a call if the statement starts with an underscore.
@@ -1185,6 +1209,10 @@ begin
li t1, 'v' li t1, 'v'
beq t0, t1, .compile_statement_assignment beq t0, t1, .compile_statement_assignment
# keyword_ret contains "\tret", so it's 4 bytes long.
_memcmp(source_code_position, @keyword_ret, 4);
beqz a0, .compile_statement_return
_compile_line(); _compile_line();
goto .compile_statement_end; goto .compile_statement_end;
@@ -1206,9 +1234,15 @@ begin
goto .compile_statement_semicolon; goto .compile_statement_semicolon;
.compile_statement_return:
_advance_token(1);
_compile_return_statement();
_write_c('\n');
goto .compile_statement_end;
.compile_statement_semicolon: .compile_statement_semicolon:
_advance_token(2); _advance_token(2);
_write_c('\n'); _write_c('\n');
.compile_statement_end: .compile_statement_end:

View File

@@ -1,18 +1,9 @@
# This Source Code Form is subject to the terms of the Mozilla Public License, # ihis Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can # v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/. # obtain one at https://mozilla.org/MPL/2.0/.
# Stage4 compiler. # Stage6 compiler.
# #
# - Stack size increased to 128 bytes per procedure.
# 7 words on the stack, 92 - 120, are reversed for procedure arguments (caller side).
# 7 words on the stack, 64 - 92, are reserved for procedure parameters (callee side).
# The first parameter is in 88, the second in 84 and so forth.
# - Unary negate operation, e.g. -5.
# - Unary locical not operation "~".
# - Binary addition "+" and multiplication "*".
# - Binary logical operations: & (and), or and xor.
# - Binary comparison operations: =, <, <=, >, >=, <>.
.section .rodata .section .rodata
@@ -23,7 +14,7 @@ keyword_section: .ascii ".section"
keyword_type: .ascii ".type" keyword_type: .ascii ".type"
.type keyword_ret, @object .type keyword_ret, @object
keyword_ret: .ascii "ret" keyword_ret: .ascii "\tret"
.type keyword_global, @object .type keyword_global, @object
keyword_global: .ascii ".globl" keyword_global: .ascii ".globl"
@@ -97,6 +88,9 @@ asm_or: .string "\tor "
.type asm_xor, @object .type asm_xor, @object
asm_xor: .string "\txor " asm_xor: .string "\txor "
.type asm_xori, @object
asm_xori: .string "\txori "
.type asm_sub, @object .type asm_sub, @object
asm_sub: .string "\tsub " asm_sub: .string "\tsub "
@@ -109,6 +103,9 @@ asm_snez: .string "\tsnez "
.type asm_slt, @object .type asm_slt, @object
asm_slt: .string "\tslt " asm_slt: .string "\tslt "
.type asm_mv, @object
asm_mv: .string "\tmv "
.type asm_comma, @object .type asm_comma, @object
asm_comma: .string ", " asm_comma: .string ", "
@@ -202,7 +199,7 @@ begin
_memcpy(); _memcpy();
lw a0, 0(sp) return v0
end; end;
# Writes a number to the standard output. # Writes a number to the standard output.
@@ -211,8 +208,7 @@ end;
# a0 - Whole number. # a0 - Whole number.
proc _write_i(); proc _write_i();
begin begin
addi a1, sp, 0 _print_i(v88, @v0);
_print_i();
mv a1, a0 mv a1, a0
addi a0, sp, 0 addi a0, sp, 0
@@ -223,10 +219,7 @@ end;
# Writes a character from a0 into the standard output. # Writes a character from a0 into the standard output.
proc _write_c(); proc _write_c();
begin begin
sb a0, 0(sp) _write_s(@v88, 1);
addi a0, sp, 0
li a1, 1
_write_s();
end; end;
# Write null terminated string. # Write null terminated string.
@@ -235,22 +228,17 @@ end;
# a0 - String. # a0 - String.
proc _write_z(); proc _write_z();
begin begin
sw a0, 0(sp)
.write_z_loop: .write_z_loop:
# Check for 0 character. # Check for 0 character.
lw a0, 88(sp)
lb a0, (a0) lb a0, (a0)
beqz a0, .write_z_end beqz a0, .write_z_end
# Print a character. # Print a character.
lw a0, 0(sp)
lb a0, (a0)
_write_c(); _write_c();
# Advance the input string by one byte. # Advance the input string by one byte.
lw a0, 0(sp) v88 := v88 + 1;
addi a0, a0, 1
sw a0, 0(sp)
goto .write_z_loop; goto .write_z_loop;
@@ -260,21 +248,21 @@ end;
# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. # Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
proc _is_upper(); proc _is_upper();
begin begin
li t0, 'A' - 1 v0 := v88 >= 'A';
sltu t1, t0, a0 # t1 = a0 >= 'A' v4 := v88 <= 'Z';
return v0 & v4
sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
end; end;
# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. # Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
proc _is_lower(); proc _is_lower();
begin begin
li t0, 'a' - 1 v0 := v88 >= 'a';
sltu t2, t0, a0 # t2 = a0 >= 'a' v4 := v88 <= 'z';
return v0 & v4
sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
end; end;
# Detects if the passed character is a 7-bit alpha character or an underscore. # Detects if the passed character is a 7-bit alpha character or an underscore.
@@ -310,12 +298,10 @@ end;
# Sets a0 to 1 if it is a digit, to 0 otherwise. # Sets a0 to 1 if it is a digit, to 0 otherwise.
proc _is_digit(); proc _is_digit();
begin begin
li t0, '0' - 1 v0 := v88 >= '0';
sltu t1, t0, a0 # t1 = a0 >= '0' v4 := v88 <= '9';
sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' return v0 & v4
and a0, t1, t2
end; end;
proc _is_alnum(); proc _is_alnum();
@@ -336,12 +322,13 @@ end;
# Returns token length in a0. # Returns token length in a0.
proc _read_token(); proc _read_token();
begin begin
la t0, source_code_position # Token pointer. # Current token position.
lw t0, (t0) v0 := source_code_position;
sw t0, 0(sp) # Current token position. # Token length.
sw zero, 4(sp) # Token length. v4 := 0;
.read_token_loop: .read_token_loop:
lw t0, 0(sp)
lb t0, (t0) # Current character. lb t0, (t0) # Current character.
# First we try to read a derictive. # First we try to read a derictive.
@@ -358,18 +345,13 @@ begin
.read_token_next: .read_token_next:
# Advance the source code position and token length. # Advance the source code position and token length.
lw t0, 4(sp) v4 := v4 + 1;
addi t0, t0, 1 v0 := v0 + 1;
sw t0, 4(sp)
lw t0, 0(sp)
addi t0, t0, 1
sw t0, 0(sp)
goto .read_token_loop; goto .read_token_loop;
.read_token_end: .read_token_end:
lw a0, 4(sp) return v4
end; end;
# a0 - First pointer. # a0 - First pointer.
@@ -445,9 +427,8 @@ end;
# Returns a0 unchanged. # Returns a0 unchanged.
proc _write_token(); proc _write_token();
begin begin
sw a0, 0(sp) _write_s(source_code_position, v88);
_write_s(source_code_position, v0); return v88
lw a0, 0(sp)
end; end;
proc _compile_section(); proc _compile_section();
@@ -889,16 +870,18 @@ begin
_write_z(@asm_slt); _write_z(@asm_slt);
_write_register('t', 0); _write_register('t', 0);
_write_z(@asm_comma); _write_z(@asm_comma);
_write_register('t', 1);
_write_z(@asm_comma);
_write_register('t', 0); _write_register('t', 0);
_write_z(@asm_comma);
_write_register('t', 1);
_write_c('\n'); _write_c('\n');
# Execute the operation. # Execute the operation.
_write_z(@asm_not); _write_z(@asm_xori);
_write_register('t', 0); _write_register('t', 0);
_write_z(@asm_comma); _write_z(@asm_comma);
_write_register('t', 0); _write_register('t', 0);
_write_z(@asm_comma);
_write_i(1);
_write_c('\n'); _write_c('\n');
goto .compile_expression_end; goto .compile_expression_end;
@@ -933,16 +916,18 @@ begin
_write_z(@asm_slt); _write_z(@asm_slt);
_write_register('t', 0); _write_register('t', 0);
_write_z(@asm_comma); _write_z(@asm_comma);
_write_register('t', 0);
_write_z(@asm_comma);
_write_register('t', 1); _write_register('t', 1);
_write_z(@asm_comma);
_write_register('t', 0);
_write_c('\n'); _write_c('\n');
# Execute the operation. # Execute the operation.
_write_z(@asm_not); _write_z(@asm_xori);
_write_register('t', 0); _write_register('t', 0);
_write_z(@asm_comma); _write_z(@asm_comma);
_write_register('t', 0); _write_register('t', 0);
_write_z(@asm_comma);
_write_i(1);
_write_c('\n'); _write_c('\n');
goto .compile_expression_end; goto .compile_expression_end;
@@ -960,12 +945,10 @@ begin
_read_token(); _read_token();
sw a0, 0(sp) sw a0, 0(sp)
v4 := source_code_position; v4 := source_code_position;
v8 := 0;
sw zero, 8(sp)
# Skip the identifier and left paren. # Skip the identifier and left paren.
addi a0, a0, 1 _advance_token(v0 + 1);
_advance_token();
la t0, source_code_position la t0, source_code_position
lw t0, (t0) lw t0, (t0)
@@ -983,12 +966,9 @@ begin
_write_z(@asm_comma); _write_z(@asm_comma);
# Calculate the stack offset: 116 - (4 * argument_counter) # Calculate the stack offset: 116 - (4 * argument_counter)
lw t0, 8(sp) v12 := v8 * 4;
li t1, 4 v12 := 116 + -v12;
mul t0, t0, t1 _write_i(v12);
li t1, 116
sub a0, t1, t0
_write_i();
_write_c('('); _write_c('(');
_write_z(@asm_sp); _write_z(@asm_sp);
@@ -997,9 +977,7 @@ begin
_write_c('\n'); _write_c('\n');
# Add one to the argument counter. # Add one to the argument counter.
lw t0, 8(sp) v8 := v8 + 1;
addi t0, t0, 1
sw t0, 8(sp)
la t0, source_code_position la t0, source_code_position
lw t0, (t0) lw t0, (t0)
@@ -1018,25 +996,19 @@ begin
beqz t0, .compile_call_end beqz t0, .compile_call_end
# Decrement the argument counter. # Decrement the argument counter.
lw t0, 8(sp) v8 := v8 + -1;
addi t0, t0, -1
sw t0, 8(sp)
_write_z(@asm_lw); _write_z(@asm_lw);
_write_c('a'); _write_c('a');
lw a0, 8(sp) _write_i(v8);
_write_i();
_write_z(@asm_comma); _write_z(@asm_comma);
# Calculate the stack offset: 116 - (4 * argument_counter) # Calculate the stack offset: 116 - (4 * argument_counter)
lw t0, 8(sp) v12 := v8 * 4;
li t1, 4 v12 := 116 + -v12;
mul t0, t0, t1 _write_i(v12);
li t1, 116
sub a0, t1, t0
_write_i();
_write_c('('); _write_c('(');
_write_z(@asm_sp); _write_z(@asm_sp);
@@ -1048,7 +1020,6 @@ begin
.compile_call_end: .compile_call_end:
_write_z(@asm_call); _write_z(@asm_call);
_write_s(v4, v0); _write_s(v4, v0);
# Skip the right paren. # Skip the right paren.
@@ -1080,14 +1051,9 @@ begin
_write_z(@asm_comma); _write_z(@asm_comma);
# Read local variable stack offset and save it. # Read local variable stack offset and save it.
v0 := source_code_position;
_read_token(); _read_token();
sw a0, 4(sp)
_write_token(); _write_token();
_advance_token(); _advance_token();
_write_c('\n'); _write_c('\n');
end; end;
@@ -1103,7 +1069,6 @@ begin
_advance_token(); _advance_token();
_write_c('\n'); _write_c('\n');
end; end;
proc _compile_designator(); proc _compile_designator();
@@ -1167,6 +1132,19 @@ begin
_write_c(')'); _write_c(')');
end; end;
proc _compile_return_statement();
begin
# Skip "return" keyword and whitespace after it.
_advance_token(7);
_compile_expression();
_write_z(@asm_mv);
_write_register('a', 0);
_write_z(@asm_comma);
_write_register('t', 0);
end;
proc _compile_statement(); proc _compile_statement();
begin begin
# This is a call if the statement starts with an underscore. # This is a call if the statement starts with an underscore.
@@ -1185,6 +1163,10 @@ begin
li t1, 'v' li t1, 'v'
beq t0, t1, .compile_statement_assignment beq t0, t1, .compile_statement_assignment
# keyword_ret contains "\tret", so it's 4 bytes long.
_memcmp(source_code_position, @keyword_ret, 4);
beqz a0, .compile_statement_return
_compile_line(); _compile_line();
goto .compile_statement_end; goto .compile_statement_end;
@@ -1206,9 +1188,15 @@ begin
goto .compile_statement_semicolon; goto .compile_statement_semicolon;
.compile_statement_return:
_advance_token(1);
_compile_return_statement();
_write_c('\n');
goto .compile_statement_end;
.compile_statement_semicolon: .compile_statement_semicolon:
_advance_token(2); _advance_token(2);
_write_c('\n'); _write_c('\n');
.compile_statement_end: .compile_statement_end:
@@ -1235,48 +1223,33 @@ end;
# a1 - Register number. # a1 - Register number.
proc _write_register(); proc _write_register();
begin begin
sw a0, 0(sp) _write_c(v88);
sw a1, 4(sp) v84 := v84 + '0';
_write_c(v84);
_write_c();
lw a0, 4(sp)
li t0, '0'
add a0, a0, t0
_write_c();
end; end;
proc _compile_procedure_prologue(); proc _compile_procedure_prologue();
begin begin
_write_z(@asm_prologue); _write_z(@asm_prologue);
v0 := 0; v0 := 0;
.compile_procedure_prologue_loop: .compile_procedure_prologue_loop:
_write_z(@asm_sw); _write_z(@asm_sw);
_write_register('a', v0);
li a0, 'a'
lw a1, 0(sp)
_write_register();
_write_z(@asm_comma); _write_z(@asm_comma);
# Calculate the stack offset: 88 - (4 * parameter_counter) # Calculate the stack offset: 88 - (4 * parameter_counter)
lw t0, 0(sp) v4 := v0 * 4;
li t1, 4 v4 := 88 + -v4;
mul t0, t0, t1 _write_i(v4);
li t1, 88
sub a0, t1, t0
_write_i();
_write_c('('); _write_c('(');
_write_z(@asm_sp); _write_z(@asm_sp);
_write_c(')'); _write_c(')');
_write_c('\n'); _write_c('\n');
v0 := v0 + 1;
lw a0, 0(sp) lw a0, 0(sp)
addi a0, a0, 1
sw a0, 0(sp)
li t0, 8 li t0, 8
bne a0, t0, .compile_procedure_prologue_loop bne a0, t0, .compile_procedure_prologue_loop
@@ -1295,18 +1268,14 @@ begin
_write_z(); _write_z();
_write_token(v0); _write_token(v0);
_write_z(@asm_type_function); _write_z(@asm_type_function);
# Write procedure label, _procedure_name: # Write procedure label, _procedure_name:
_write_token(v0); _write_token(v0);
_write_z(@asm_colon); _write_z(@asm_colon);
# Skip the function name and trailing parens, semicolon, "begin" and newline. # Skip the function name and trailing parens, semicolon, "begin" and newline.
lw a0, 0(sp) _advance_token(v0 + 10);
addi a0, a0, 10
_advance_token();
_compile_procedure_prologue(); _compile_procedure_prologue();
_compile_procedure_body(); _compile_procedure_body();
@@ -1334,9 +1303,6 @@ begin
# Read the symbol type. # Read the symbol type.
_read_token(); _read_token();
la t0, source_code_position
lw t0, (t0)
sw t0, 12(sp)
# Print the symbol type and newline. # Print the symbol type and newline.
addi a0, a0, 1 addi a0, a0, 1