From e0ac57dc1d27831b374b7540256d4e4285284492 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Fri, 5 Sep 2025 17:17:28 +0200 Subject: Add string literals --- boot/stage6.elna | 211 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 190 insertions(+), 21 deletions(-) (limited to 'boot/stage6.elna') diff --git a/boot/stage6.elna b/boot/stage6.elna index f412cc6..7d426f9 100644 --- a/boot/stage6.elna +++ b/boot/stage6.elna @@ -1,9 +1,10 @@ -# ihis Source Code Form is subject to the terms of the Mozilla Public License, +# This Source Code Form is subject to the terms of the Mozilla Public License, # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -# Stage6 compiler. +# Stage 6 compiler. # +# - String literals. .section .rodata @@ -43,6 +44,9 @@ asm_type_directive: .string ".type " .type asm_type_function, @object asm_type_function: .string ", @function\n" +.type asm_type_object, @object +asm_type_object: .string ", @object\n" + .type asm_colon, @object asm_colon: .string ":\n" @@ -112,19 +116,110 @@ asm_comma: .string ", " .type asm_sp, @object asm_sp: .string "sp" +.type asm_rodata, @object +asm_rodata: .string ".section .rodata\n" + +.type asm_strings, @object +asm_strings: .string "strings" + +.type asm_ascii, @object +asm_ascii: .string " .ascii " + .section .bss # When modifiying also change the read size in the entry point procedure. .type source_code, @object source_code: .zero 81920 +.type compiler_strings, @object +compiler_strings: .zero 8192 + .section .data +.type compiler_strings_position, @object +compiler_strings_position: .word compiler_strings + +.type compiler_strings_length, @object +compiler_strings_length: .word 0 + .type source_code_position, @object source_code_position: .word source_code .section .text +# Calculates and returns the string token length between quotes, including the +# escaping slash characters. +# +# Parameters: +# a0 - String token pointer. +# +# Returns the length in a0. +proc _string_length(); +begin + # Reset the counter. + v0 := 0; + +.string_length_loop: + v88 := v88 + 1; + + lw t0, 88(sp) + lb t0, (t0) + + li t1, '"' + beq t0, t1, .string_length_end + + v0 := v0 + 1; + goto .string_length_loop; + +.string_length_end: + return v0 +end; + +# Adds a string to the global, read-only string storage. +# +# Parameters: +# a0 - String token. +# +# Returns the offset from the beginning of the storage to the new string in a0. +proc _add_string(); +begin + v0 := v88 + 1; + v4 := compiler_strings_length; + +.add_string_loop: + lw t0, 0(sp) + lb t1, (t0) + li t2, '"' + + beq t1, t2, .add_string_end + + la t2, compiler_strings_position + lw t3, (t2) + sb t1, (t3) + + addi t3, t3, 1 + sw t3, (t2) + + addi t0, t0, 1 + sw t0, 0(sp) + + li t2, '\\' + bne t1, t2, .add_string_increment + + goto .add_string_loop; + +.add_string_increment: + la t2, compiler_strings_length + lw t4, (t2) + addi t4, t4, 1 + sw t4, (t2) + + goto .add_string_loop; + +.add_string_end: + return v4 +end; + # Reads standard input into a buffer. # a0 - Buffer pointer. # a1 - Buffer size. @@ -595,6 +690,37 @@ begin end; +proc _compile_string_literal(); +begin + _string_length(source_code_position); + sw a0, 0(sp) + + _add_string(source_code_position); + sw a0, 4(sp) + + _advance_token(v0 + 2); + + _write_z(@asm_la); + _write_register('t', 0); + _write_z(@asm_comma); + _write_z(@asm_strings); + _write_c('\n'); + + _write_z(@asm_li); + _write_register('t', 1); + _write_z(@asm_comma); + _write_i(v4); + _write_c('\n'); + + _write_z(@asm_add); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); +end; + proc _compile_term(); begin la t0, source_code_position @@ -614,6 +740,9 @@ begin li t1, '~' beq a0, t1, .compile_term_not + li t1, '"' + beq a0, t1, .compile_term_string_literal + _is_digit(v0); bnez a0, .compile_term_integer_literal @@ -639,6 +768,10 @@ begin _compile_not_expression(); goto .compile_term_end; +.compile_term_string_literal: + _compile_string_literal(); + goto .compile_term_end; + .compile_term_variable: _compile_variable_expression(); goto .compile_term_end; @@ -1336,63 +1469,99 @@ begin end; # Process the source code and print the generated code. -proc _compile(); +proc _compile_module(); begin -.compile_loop: +.compile_module_loop: _skip_newlines(); la t0, source_code_position lw t0, (t0) lb t0, (t0) - beqz t0, .compile_end + beqz t0, .compile_module_end li t1, '#' - beq t0, t1, .compile_comment + beq t0, t1, .compile_module_comment # 8 is ".section" length. _memcmp(source_code_position, @keyword_section, 8); - beqz a0, .compile_section + beqz a0, .compile_module_section # 5 is ".type" length. _memcmp(source_code_position, @keyword_type, 5); - beqz a0, .compile_type + beqz a0, .compile_module_type # 5 is "proc " length. Space is needed to distinguish from "procedure". _memcmp(source_code_position, @keyword_proc, 5); - beqz a0, .compile_procedure + beqz a0, .compile_module_procedure # 6 is ".globl" length. _memcmp(source_code_position, @keyword_global, 6); - beqz a0, .compile_global + beqz a0, .compile_module_global # Not a known token, exit. - goto .compile_end; + goto .compile_module_end; -.compile_section: +.compile_module_section: _compile_section(); - goto .compile_loop; + goto .compile_module_loop; -.compile_type: +.compile_module_type: _compile_type(); - goto .compile_loop; + goto .compile_module_loop; -.compile_global: +.compile_module_global: _compile_line(); - goto .compile_loop; + goto .compile_module_loop; -.compile_comment: +.compile_module_comment: _skip_comment(); - goto .compile_loop; + goto .compile_module_loop; -.compile_procedure: +.compile_module_procedure: _compile_procedure(); - goto .compile_loop; + goto .compile_module_loop; + +.compile_module_end: +end; + +proc _compile(); +begin + _compile_module(); + + _write_z(@asm_rodata); + _write_z(@asm_type_directive); + _write_z(@asm_strings); + _write_z(@asm_type_object); + _write_z(@asm_strings); + _write_c(':'); + _write_z(@asm_ascii); + _write_c('"'); + + la t0, compiler_strings + sw t0, 0(sp) + +.compile_loop: + lw t0, 0(sp) + la t1, compiler_strings_position + lw t1, (t1) + bge t0, t1, .compile_end + + lb a0, (t0) + + addi t0, t0, 1 + sw t0, 0(sp) + + _write_c(); + + j .compile_loop .compile_end: + _write_c('"'); + _write_c('\n'); end; # Terminates the program. a0 contains the return code. -- cgit v1.2.3