From 48882522746873d48b563ec66321ef99a51badbc Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Thu, 4 Sep 2025 22:07:01 +0200 Subject: [PATCH] Implement string literals --- boot/stage5.elna | 52 ++++++++++-- boot/stage6.elna | 212 ++++++++++++++++++++--------------------------- 2 files changed, 132 insertions(+), 132 deletions(-) diff --git a/boot/stage5.elna b/boot/stage5.elna index 3aed6c3..88b407f 100644 --- a/boot/stage5.elna +++ b/boot/stage5.elna @@ -2,7 +2,7 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -# Stage4 compiler. +# Stage5 compiler. # # - Stack size increased to 128 bytes per procedure. # 7 words on the stack, 92 - 120, are reversed for procedure arguments (caller side). @@ -13,6 +13,7 @@ # - Binary addition "+" and multiplication "*". # - Binary logical operations: & (and), or and xor. # - Binary comparison operations: =, <, <=, >, >=, <>. +# - Return statement. .section .rodata @@ -23,7 +24,7 @@ keyword_section: .ascii ".section" keyword_type: .ascii ".type" .type keyword_ret, @object -keyword_ret: .ascii "ret" +keyword_ret: .ascii "\tret" .type keyword_global, @object keyword_global: .ascii ".globl" @@ -97,6 +98,9 @@ asm_or: .string "\tor " .type asm_xor, @object asm_xor: .string "\txor " +.type asm_xori, @object +asm_xori: .string "\txori " + .type asm_sub, @object asm_sub: .string "\tsub " @@ -109,6 +113,9 @@ asm_snez: .string "\tsnez " .type asm_slt, @object asm_slt: .string "\tslt " +.type asm_mv, @object +asm_mv: .string "\tmv " + .type asm_comma, @object asm_comma: .string ", " @@ -889,16 +896,18 @@ begin _write_z(@asm_slt); _write_register('t', 0); _write_z(@asm_comma); - _write_register('t', 1); - _write_z(@asm_comma); _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); _write_c('\n'); # Execute the operation. - _write_z(@asm_not); + _write_z(@asm_xori); _write_register('t', 0); _write_z(@asm_comma); _write_register('t', 0); + _write_z(@asm_comma); + _write_i(1); _write_c('\n'); goto .compile_expression_end; @@ -933,16 +942,18 @@ begin _write_z(@asm_slt); _write_register('t', 0); _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); _write_register('t', 1); + _write_z(@asm_comma); + _write_register('t', 0); _write_c('\n'); # Execute the operation. - _write_z(@asm_not); + _write_z(@asm_xori); _write_register('t', 0); _write_z(@asm_comma); _write_register('t', 0); + _write_z(@asm_comma); + _write_i(1); _write_c('\n'); goto .compile_expression_end; @@ -1167,6 +1178,19 @@ begin _write_c(')'); end; +proc _compile_return_statement(); +begin + # Skip "return" keyword and whitespace after it. + _advance_token(7); + _compile_expression(); + + _write_z(@asm_mv); + _write_register('a', 0); + _write_z(@asm_comma); + _write_register('t', 0); + +end; + proc _compile_statement(); begin # This is a call if the statement starts with an underscore. @@ -1185,6 +1209,10 @@ begin li t1, 'v' beq t0, t1, .compile_statement_assignment + # keyword_ret contains "\tret", so it's 4 bytes long. + _memcmp(source_code_position, @keyword_ret, 4); + beqz a0, .compile_statement_return + _compile_line(); goto .compile_statement_end; @@ -1206,9 +1234,15 @@ begin goto .compile_statement_semicolon; +.compile_statement_return: + _advance_token(1); + _compile_return_statement(); + _write_c('\n'); + + goto .compile_statement_end; + .compile_statement_semicolon: _advance_token(2); - _write_c('\n'); .compile_statement_end: diff --git a/boot/stage6.elna b/boot/stage6.elna index 3aed6c3..f412cc6 100644 --- a/boot/stage6.elna +++ b/boot/stage6.elna @@ -1,18 +1,9 @@ -# This Source Code Form is subject to the terms of the Mozilla Public License, +# ihis Source Code Form is subject to the terms of the Mozilla Public License, # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -# Stage4 compiler. +# Stage6 compiler. # -# - Stack size increased to 128 bytes per procedure. -# 7 words on the stack, 92 - 120, are reversed for procedure arguments (caller side). -# 7 words on the stack, 64 - 92, are reserved for procedure parameters (callee side). -# The first parameter is in 88, the second in 84 and so forth. -# - Unary negate operation, e.g. -5. -# - Unary locical not operation "~". -# - Binary addition "+" and multiplication "*". -# - Binary logical operations: & (and), or and xor. -# - Binary comparison operations: =, <, <=, >, >=, <>. .section .rodata @@ -23,7 +14,7 @@ keyword_section: .ascii ".section" keyword_type: .ascii ".type" .type keyword_ret, @object -keyword_ret: .ascii "ret" +keyword_ret: .ascii "\tret" .type keyword_global, @object keyword_global: .ascii ".globl" @@ -97,6 +88,9 @@ asm_or: .string "\tor " .type asm_xor, @object asm_xor: .string "\txor " +.type asm_xori, @object +asm_xori: .string "\txori " + .type asm_sub, @object asm_sub: .string "\tsub " @@ -109,6 +103,9 @@ asm_snez: .string "\tsnez " .type asm_slt, @object asm_slt: .string "\tslt " +.type asm_mv, @object +asm_mv: .string "\tmv " + .type asm_comma, @object asm_comma: .string ", " @@ -202,7 +199,7 @@ begin _memcpy(); - lw a0, 0(sp) + return v0 end; # Writes a number to the standard output. @@ -211,8 +208,7 @@ end; # a0 - Whole number. proc _write_i(); begin - addi a1, sp, 0 - _print_i(); + _print_i(v88, @v0); mv a1, a0 addi a0, sp, 0 @@ -223,10 +219,7 @@ end; # Writes a character from a0 into the standard output. proc _write_c(); begin - sb a0, 0(sp) - addi a0, sp, 0 - li a1, 1 - _write_s(); + _write_s(@v88, 1); end; # Write null terminated string. @@ -235,22 +228,17 @@ end; # a0 - String. proc _write_z(); begin - sw a0, 0(sp) - .write_z_loop: # Check for 0 character. + lw a0, 88(sp) lb a0, (a0) beqz a0, .write_z_end # Print a character. - lw a0, 0(sp) - lb a0, (a0) _write_c(); # Advance the input string by one byte. - lw a0, 0(sp) - addi a0, a0, 1 - sw a0, 0(sp) + v88 := v88 + 1; goto .write_z_loop; @@ -260,21 +248,21 @@ end; # Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. proc _is_upper(); begin - li t0, 'A' - 1 - sltu t1, t0, a0 # t1 = a0 >= 'A' + v0 := v88 >= 'A'; + v4 := v88 <= 'Z'; + + return v0 & v4 - sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' - and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' end; # Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. proc _is_lower(); begin - li t0, 'a' - 1 - sltu t2, t0, a0 # t2 = a0 >= 'a' + v0 := v88 >= 'a'; + v4 := v88 <= 'z'; + + return v0 & v4 - sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' - and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' end; # Detects if the passed character is a 7-bit alpha character or an underscore. @@ -310,12 +298,10 @@ end; # Sets a0 to 1 if it is a digit, to 0 otherwise. proc _is_digit(); begin - li t0, '0' - 1 - sltu t1, t0, a0 # t1 = a0 >= '0' + v0 := v88 >= '0'; + v4 := v88 <= '9'; - sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' - - and a0, t1, t2 + return v0 & v4 end; proc _is_alnum(); @@ -336,12 +322,13 @@ end; # Returns token length in a0. proc _read_token(); begin - la t0, source_code_position # Token pointer. - lw t0, (t0) - sw t0, 0(sp) # Current token position. - sw zero, 4(sp) # Token length. + # Current token position. + v0 := source_code_position; + # Token length. + v4 := 0; .read_token_loop: + lw t0, 0(sp) lb t0, (t0) # Current character. # First we try to read a derictive. @@ -358,18 +345,13 @@ begin .read_token_next: # Advance the source code position and token length. - lw t0, 4(sp) - addi t0, t0, 1 - sw t0, 4(sp) - - lw t0, 0(sp) - addi t0, t0, 1 - sw t0, 0(sp) + v4 := v4 + 1; + v0 := v0 + 1; goto .read_token_loop; .read_token_end: - lw a0, 4(sp) + return v4 end; # a0 - First pointer. @@ -445,9 +427,8 @@ end; # Returns a0 unchanged. proc _write_token(); begin - sw a0, 0(sp) - _write_s(source_code_position, v0); - lw a0, 0(sp) + _write_s(source_code_position, v88); + return v88 end; proc _compile_section(); @@ -889,16 +870,18 @@ begin _write_z(@asm_slt); _write_register('t', 0); _write_z(@asm_comma); - _write_register('t', 1); - _write_z(@asm_comma); _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); _write_c('\n'); # Execute the operation. - _write_z(@asm_not); + _write_z(@asm_xori); _write_register('t', 0); _write_z(@asm_comma); _write_register('t', 0); + _write_z(@asm_comma); + _write_i(1); _write_c('\n'); goto .compile_expression_end; @@ -933,16 +916,18 @@ begin _write_z(@asm_slt); _write_register('t', 0); _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); _write_register('t', 1); + _write_z(@asm_comma); + _write_register('t', 0); _write_c('\n'); # Execute the operation. - _write_z(@asm_not); + _write_z(@asm_xori); _write_register('t', 0); _write_z(@asm_comma); _write_register('t', 0); + _write_z(@asm_comma); + _write_i(1); _write_c('\n'); goto .compile_expression_end; @@ -960,12 +945,10 @@ begin _read_token(); sw a0, 0(sp) v4 := source_code_position; - - sw zero, 8(sp) + v8 := 0; # Skip the identifier and left paren. - addi a0, a0, 1 - _advance_token(); + _advance_token(v0 + 1); la t0, source_code_position lw t0, (t0) @@ -983,12 +966,9 @@ begin _write_z(@asm_comma); # Calculate the stack offset: 116 - (4 * argument_counter) - lw t0, 8(sp) - li t1, 4 - mul t0, t0, t1 - li t1, 116 - sub a0, t1, t0 - _write_i(); + v12 := v8 * 4; + v12 := 116 + -v12; + _write_i(v12); _write_c('('); _write_z(@asm_sp); @@ -997,9 +977,7 @@ begin _write_c('\n'); # Add one to the argument counter. - lw t0, 8(sp) - addi t0, t0, 1 - sw t0, 8(sp) + v8 := v8 + 1; la t0, source_code_position lw t0, (t0) @@ -1018,25 +996,19 @@ begin beqz t0, .compile_call_end # Decrement the argument counter. - lw t0, 8(sp) - addi t0, t0, -1 - sw t0, 8(sp) + v8 := v8 + -1; _write_z(@asm_lw); _write_c('a'); - lw a0, 8(sp) - _write_i(); + _write_i(v8); _write_z(@asm_comma); # Calculate the stack offset: 116 - (4 * argument_counter) - lw t0, 8(sp) - li t1, 4 - mul t0, t0, t1 - li t1, 116 - sub a0, t1, t0 - _write_i(); + v12 := v8 * 4; + v12 := 116 + -v12; + _write_i(v12); _write_c('('); _write_z(@asm_sp); @@ -1048,7 +1020,6 @@ begin .compile_call_end: _write_z(@asm_call); - _write_s(v4, v0); # Skip the right paren. @@ -1080,14 +1051,9 @@ begin _write_z(@asm_comma); # Read local variable stack offset and save it. - v0 := source_code_position; - _read_token(); - sw a0, 4(sp) - _write_token(); _advance_token(); - _write_c('\n'); end; @@ -1103,7 +1069,6 @@ begin _advance_token(); _write_c('\n'); - end; proc _compile_designator(); @@ -1167,6 +1132,19 @@ begin _write_c(')'); end; +proc _compile_return_statement(); +begin + # Skip "return" keyword and whitespace after it. + _advance_token(7); + _compile_expression(); + + _write_z(@asm_mv); + _write_register('a', 0); + _write_z(@asm_comma); + _write_register('t', 0); + +end; + proc _compile_statement(); begin # This is a call if the statement starts with an underscore. @@ -1185,6 +1163,10 @@ begin li t1, 'v' beq t0, t1, .compile_statement_assignment + # keyword_ret contains "\tret", so it's 4 bytes long. + _memcmp(source_code_position, @keyword_ret, 4); + beqz a0, .compile_statement_return + _compile_line(); goto .compile_statement_end; @@ -1206,9 +1188,15 @@ begin goto .compile_statement_semicolon; +.compile_statement_return: + _advance_token(1); + _compile_return_statement(); + _write_c('\n'); + + goto .compile_statement_end; + .compile_statement_semicolon: _advance_token(2); - _write_c('\n'); .compile_statement_end: @@ -1235,48 +1223,33 @@ end; # a1 - Register number. proc _write_register(); begin - sw a0, 0(sp) - sw a1, 4(sp) - - _write_c(); - - lw a0, 4(sp) - li t0, '0' - add a0, a0, t0 - _write_c(); + _write_c(v88); + v84 := v84 + '0'; + _write_c(v84); end; proc _compile_procedure_prologue(); begin _write_z(@asm_prologue); - v0 := 0; .compile_procedure_prologue_loop: _write_z(@asm_sw); - - li a0, 'a' - lw a1, 0(sp) - _write_register(); - + _write_register('a', v0); _write_z(@asm_comma); # Calculate the stack offset: 88 - (4 * parameter_counter) - lw t0, 0(sp) - li t1, 4 - mul t0, t0, t1 - li t1, 88 - sub a0, t1, t0 - _write_i(); + v4 := v0 * 4; + v4 := 88 + -v4; + _write_i(v4); _write_c('('); _write_z(@asm_sp); _write_c(')'); _write_c('\n'); + v0 := v0 + 1; lw a0, 0(sp) - addi a0, a0, 1 - sw a0, 0(sp) li t0, 8 bne a0, t0, .compile_procedure_prologue_loop @@ -1295,18 +1268,14 @@ begin _write_z(); _write_token(v0); - _write_z(@asm_type_function); # Write procedure label, _procedure_name: _write_token(v0); - _write_z(@asm_colon); # Skip the function name and trailing parens, semicolon, "begin" and newline. - lw a0, 0(sp) - addi a0, a0, 10 - _advance_token(); + _advance_token(v0 + 10); _compile_procedure_prologue(); _compile_procedure_body(); @@ -1334,9 +1303,6 @@ begin # Read the symbol type. _read_token(); - la t0, source_code_position - lw t0, (t0) - sw t0, 12(sp) # Print the symbol type and newline. addi a0, a0, 1