summaryrefslogtreecommitdiff
path: root/boot/stage6.elna
diff options
context:
space:
mode:
authorEugen Wissner <belka@caraus.de>2025-09-05 17:17:28 +0200
committerEugen Wissner <belka@caraus.de>2025-09-05 17:17:28 +0200
commite0ac57dc1d27831b374b7540256d4e4285284492 (patch)
tree8d09810e283a79fd80a4c45a823d1906983c6376 /boot/stage6.elna
parent48882522746873d48b563ec66321ef99a51badbc (diff)
downloadelna-e0ac57dc1d27831b374b7540256d4e4285284492.tar.gz
Add string literals
Diffstat (limited to 'boot/stage6.elna')
-rw-r--r--boot/stage6.elna211
1 files changed, 190 insertions, 21 deletions
diff --git a/boot/stage6.elna b/boot/stage6.elna
index f412cc6..7d426f9 100644
--- a/boot/stage6.elna
+++ b/boot/stage6.elna
@@ -1,9 +1,10 @@
-# ihis Source Code Form is subject to the terms of the Mozilla Public License,
+# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
-# Stage6 compiler.
+# Stage 6 compiler.
#
+# - String literals.
.section .rodata
@@ -43,6 +44,9 @@ asm_type_directive: .string ".type "
.type asm_type_function, @object
asm_type_function: .string ", @function\n"
+.type asm_type_object, @object
+asm_type_object: .string ", @object\n"
+
.type asm_colon, @object
asm_colon: .string ":\n"
@@ -112,19 +116,110 @@ asm_comma: .string ", "
.type asm_sp, @object
asm_sp: .string "sp"
+.type asm_rodata, @object
+asm_rodata: .string ".section .rodata\n"
+
+.type asm_strings, @object
+asm_strings: .string "strings"
+
+.type asm_ascii, @object
+asm_ascii: .string " .ascii "
+
.section .bss
# When modifiying also change the read size in the entry point procedure.
.type source_code, @object
source_code: .zero 81920
+.type compiler_strings, @object
+compiler_strings: .zero 8192
+
.section .data
+.type compiler_strings_position, @object
+compiler_strings_position: .word compiler_strings
+
+.type compiler_strings_length, @object
+compiler_strings_length: .word 0
+
.type source_code_position, @object
source_code_position: .word source_code
.section .text
+# Calculates and returns the string token length between quotes, including the
+# escaping slash characters.
+#
+# Parameters:
+# a0 - String token pointer.
+#
+# Returns the length in a0.
+proc _string_length();
+begin
+ # Reset the counter.
+ v0 := 0;
+
+.string_length_loop:
+ v88 := v88 + 1;
+
+ lw t0, 88(sp)
+ lb t0, (t0)
+
+ li t1, '"'
+ beq t0, t1, .string_length_end
+
+ v0 := v0 + 1;
+ goto .string_length_loop;
+
+.string_length_end:
+ return v0
+end;
+
+# Adds a string to the global, read-only string storage.
+#
+# Parameters:
+# a0 - String token.
+#
+# Returns the offset from the beginning of the storage to the new string in a0.
+proc _add_string();
+begin
+ v0 := v88 + 1;
+ v4 := compiler_strings_length;
+
+.add_string_loop:
+ lw t0, 0(sp)
+ lb t1, (t0)
+ li t2, '"'
+
+ beq t1, t2, .add_string_end
+
+ la t2, compiler_strings_position
+ lw t3, (t2)
+ sb t1, (t3)
+
+ addi t3, t3, 1
+ sw t3, (t2)
+
+ addi t0, t0, 1
+ sw t0, 0(sp)
+
+ li t2, '\\'
+ bne t1, t2, .add_string_increment
+
+ goto .add_string_loop;
+
+.add_string_increment:
+ la t2, compiler_strings_length
+ lw t4, (t2)
+ addi t4, t4, 1
+ sw t4, (t2)
+
+ goto .add_string_loop;
+
+.add_string_end:
+ return v4
+end;
+
# Reads standard input into a buffer.
# a0 - Buffer pointer.
# a1 - Buffer size.
@@ -595,6 +690,37 @@ begin
end;
+proc _compile_string_literal();
+begin
+ _string_length(source_code_position);
+ sw a0, 0(sp)
+
+ _add_string(source_code_position);
+ sw a0, 4(sp)
+
+ _advance_token(v0 + 2);
+
+ _write_z(@asm_la);
+ _write_register('t', 0);
+ _write_z(@asm_comma);
+ _write_z(@asm_strings);
+ _write_c('\n');
+
+ _write_z(@asm_li);
+ _write_register('t', 1);
+ _write_z(@asm_comma);
+ _write_i(v4);
+ _write_c('\n');
+
+ _write_z(@asm_add);
+ _write_register('t', 0);
+ _write_z(@asm_comma);
+ _write_register('t', 0);
+ _write_z(@asm_comma);
+ _write_register('t', 1);
+ _write_c('\n');
+end;
+
proc _compile_term();
begin
la t0, source_code_position
@@ -614,6 +740,9 @@ begin
li t1, '~'
beq a0, t1, .compile_term_not
+ li t1, '"'
+ beq a0, t1, .compile_term_string_literal
+
_is_digit(v0);
bnez a0, .compile_term_integer_literal
@@ -639,6 +768,10 @@ begin
_compile_not_expression();
goto .compile_term_end;
+.compile_term_string_literal:
+ _compile_string_literal();
+ goto .compile_term_end;
+
.compile_term_variable:
_compile_variable_expression();
goto .compile_term_end;
@@ -1336,63 +1469,99 @@ begin
end;
# Process the source code and print the generated code.
-proc _compile();
+proc _compile_module();
begin
-.compile_loop:
+.compile_module_loop:
_skip_newlines();
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
- beqz t0, .compile_end
+ beqz t0, .compile_module_end
li t1, '#'
- beq t0, t1, .compile_comment
+ beq t0, t1, .compile_module_comment
# 8 is ".section" length.
_memcmp(source_code_position, @keyword_section, 8);
- beqz a0, .compile_section
+ beqz a0, .compile_module_section
# 5 is ".type" length.
_memcmp(source_code_position, @keyword_type, 5);
- beqz a0, .compile_type
+ beqz a0, .compile_module_type
# 5 is "proc " length. Space is needed to distinguish from "procedure".
_memcmp(source_code_position, @keyword_proc, 5);
- beqz a0, .compile_procedure
+ beqz a0, .compile_module_procedure
# 6 is ".globl" length.
_memcmp(source_code_position, @keyword_global, 6);
- beqz a0, .compile_global
+ beqz a0, .compile_module_global
# Not a known token, exit.
- goto .compile_end;
+ goto .compile_module_end;
-.compile_section:
+.compile_module_section:
_compile_section();
- goto .compile_loop;
+ goto .compile_module_loop;
-.compile_type:
+.compile_module_type:
_compile_type();
- goto .compile_loop;
+ goto .compile_module_loop;
-.compile_global:
+.compile_module_global:
_compile_line();
- goto .compile_loop;
+ goto .compile_module_loop;
-.compile_comment:
+.compile_module_comment:
_skip_comment();
- goto .compile_loop;
+ goto .compile_module_loop;
-.compile_procedure:
+.compile_module_procedure:
_compile_procedure();
- goto .compile_loop;
+ goto .compile_module_loop;
+
+.compile_module_end:
+end;
+
+proc _compile();
+begin
+ _compile_module();
+
+ _write_z(@asm_rodata);
+ _write_z(@asm_type_directive);
+ _write_z(@asm_strings);
+ _write_z(@asm_type_object);
+ _write_z(@asm_strings);
+ _write_c(':');
+ _write_z(@asm_ascii);
+ _write_c('"');
+
+ la t0, compiler_strings
+ sw t0, 0(sp)
+
+.compile_loop:
+ lw t0, 0(sp)
+ la t1, compiler_strings_position
+ lw t1, (t1)
+ bge t0, t1, .compile_end
+
+ lb a0, (t0)
+
+ addi t0, t0, 1
+ sw t0, 0(sp)
+
+ _write_c();
+
+ j .compile_loop
.compile_end:
+ _write_c('"');
+ _write_c('\n');
end;
# Terminates the program. a0 contains the return code.