Add string literals

This commit is contained in:
2025-09-05 17:17:28 +02:00
parent 4888252274
commit e0ac57dc1d
11 changed files with 2723 additions and 67 deletions

View File

@@ -1,9 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
# frozen_string_literal: true
source 'https://rubygems.org'
gem 'term-ansicolor', '~> 1.2'
gem 'rake', '~> 13.2'

View File

@@ -1,22 +0,0 @@
GEM
remote: https://rubygems.org/
specs:
bigdecimal (3.1.9)
rake (13.2.1)
sync (0.5.0)
term-ansicolor (1.11.2)
tins (~> 1.0)
tins (1.38.0)
bigdecimal
sync
PLATFORMS
ruby
x86_64-linux
DEPENDENCIES
rake (~> 13.2)
term-ansicolor (~> 1.2)
BUNDLED WITH
2.6.7

View File

@@ -6,8 +6,6 @@
require 'open3'
require 'rake/clean'
SYSROOT = '../eugenios/build/sysroot'
QEMU = 'qemu-riscv32'
STAGES = Dir.glob('boot/stage*.elna').collect { |stage| File.basename stage, '.elna' }.sort
CLEAN.include 'build/boot', 'build/valid'
@@ -19,6 +17,10 @@ def compile(input, output)
sh ENV.fetch('CC', 'gcc'), '-nostdlib', '-o', output, input
end
def run(exe)
ENV.fetch('QEMU', '').split << exe
end
task default: :boot
desc 'Final stage'
@@ -31,9 +33,8 @@ task boot: "boot/#{STAGES.last}.elna" do |t|
source = groupped['.elna']
cat_arguments = ['cat', source]
compiler_arguments = [QEMU, '-L', SYSROOT, exe]
diff_arguments = ['diff', '-Nur', '--text', expected, '-']
Open3.pipeline(cat_arguments, compiler_arguments, diff_arguments)
Open3.pipeline(cat_arguments, run(exe), diff_arguments)
end
desc 'Convert previous stage language into the current stage language'
@@ -60,8 +61,7 @@ STAGES.each do |stage|
exe, source = t.prerequisites
cat_arguments = ['cat', source]
compiler_arguments = [QEMU, '-L', SYSROOT, exe]
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments)
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, run(exe))
IO.copy_stream last_stdout, t.name
end
@@ -70,8 +70,7 @@ STAGES.each do |stage|
exe, source = t.prerequisites
cat_arguments = ['cat', source]
compiler_arguments = [QEMU, '-L', SYSROOT, exe]
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments)
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, run(exe))
IO.copy_stream last_stdout, t.name
end
@@ -85,8 +84,7 @@ file 'build/valid/stage1.s' => ['build/boot/stage1', 'boot/stage1.s', 'build/val
source, exe, = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.s' }
cat_arguments = ['cat', *source]
compiler_arguments = [QEMU, '-L', SYSROOT, *exe]
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments)
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, run(exe.first))
IO.copy_stream last_stdout, t.name
end

View File

@@ -1,9 +1,10 @@
# ihis Source Code Form is subject to the terms of the Mozilla Public License,
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
# Stage 6 compiler.
#
# - String literals.
.section .rodata
@@ -43,6 +44,9 @@ asm_type_directive: .string ".type "
.type asm_type_function, @object
asm_type_function: .string ", @function\n"
.type asm_type_object, @object
asm_type_object: .string ", @object\n"
.type asm_colon, @object
asm_colon: .string ":\n"
@@ -112,19 +116,110 @@ asm_comma: .string ", "
.type asm_sp, @object
asm_sp: .string "sp"
.type asm_rodata, @object
asm_rodata: .string ".section .rodata\n"
.type asm_strings, @object
asm_strings: .string "strings"
.type asm_ascii, @object
asm_ascii: .string " .ascii "
.section .bss
# When modifiying also change the read size in the entry point procedure.
.type source_code, @object
source_code: .zero 81920
.type compiler_strings, @object
compiler_strings: .zero 8192
.section .data
.type compiler_strings_position, @object
compiler_strings_position: .word compiler_strings
.type compiler_strings_length, @object
compiler_strings_length: .word 0
.type source_code_position, @object
source_code_position: .word source_code
.section .text
# Calculates and returns the string token length between quotes, including the
# escaping slash characters.
#
# Parameters:
# a0 - String token pointer.
#
# Returns the length in a0.
proc _string_length();
begin
# Reset the counter.
v0 := 0;
.string_length_loop:
v88 := v88 + 1;
lw t0, 88(sp)
lb t0, (t0)
li t1, '"'
beq t0, t1, .string_length_end
v0 := v0 + 1;
goto .string_length_loop;
.string_length_end:
return v0
end;
# Adds a string to the global, read-only string storage.
#
# Parameters:
# a0 - String token.
#
# Returns the offset from the beginning of the storage to the new string in a0.
proc _add_string();
begin
v0 := v88 + 1;
v4 := compiler_strings_length;
.add_string_loop:
lw t0, 0(sp)
lb t1, (t0)
li t2, '"'
beq t1, t2, .add_string_end
la t2, compiler_strings_position
lw t3, (t2)
sb t1, (t3)
addi t3, t3, 1
sw t3, (t2)
addi t0, t0, 1
sw t0, 0(sp)
li t2, '\\'
bne t1, t2, .add_string_increment
goto .add_string_loop;
.add_string_increment:
la t2, compiler_strings_length
lw t4, (t2)
addi t4, t4, 1
sw t4, (t2)
goto .add_string_loop;
.add_string_end:
return v4
end;
# Reads standard input into a buffer.
# a0 - Buffer pointer.
# a1 - Buffer size.
@@ -595,6 +690,37 @@ begin
end;
proc _compile_string_literal();
begin
_string_length(source_code_position);
sw a0, 0(sp)
_add_string(source_code_position);
sw a0, 4(sp)
_advance_token(v0 + 2);
_write_z(@asm_la);
_write_register('t', 0);
_write_z(@asm_comma);
_write_z(@asm_strings);
_write_c('\n');
_write_z(@asm_li);
_write_register('t', 1);
_write_z(@asm_comma);
_write_i(v4);
_write_c('\n');
_write_z(@asm_add);
_write_register('t', 0);
_write_z(@asm_comma);
_write_register('t', 0);
_write_z(@asm_comma);
_write_register('t', 1);
_write_c('\n');
end;
proc _compile_term();
begin
la t0, source_code_position
@@ -614,6 +740,9 @@ begin
li t1, '~'
beq a0, t1, .compile_term_not
li t1, '"'
beq a0, t1, .compile_term_string_literal
_is_digit(v0);
bnez a0, .compile_term_integer_literal
@@ -639,6 +768,10 @@ begin
_compile_not_expression();
goto .compile_term_end;
.compile_term_string_literal:
_compile_string_literal();
goto .compile_term_end;
.compile_term_variable:
_compile_variable_expression();
goto .compile_term_end;
@@ -1336,63 +1469,99 @@ begin
end;
# Process the source code and print the generated code.
proc _compile();
proc _compile_module();
begin
.compile_loop:
.compile_module_loop:
_skip_newlines();
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
beqz t0, .compile_end
beqz t0, .compile_module_end
li t1, '#'
beq t0, t1, .compile_comment
beq t0, t1, .compile_module_comment
# 8 is ".section" length.
_memcmp(source_code_position, @keyword_section, 8);
beqz a0, .compile_section
beqz a0, .compile_module_section
# 5 is ".type" length.
_memcmp(source_code_position, @keyword_type, 5);
beqz a0, .compile_type
beqz a0, .compile_module_type
# 5 is "proc " length. Space is needed to distinguish from "procedure".
_memcmp(source_code_position, @keyword_proc, 5);
beqz a0, .compile_procedure
beqz a0, .compile_module_procedure
# 6 is ".globl" length.
_memcmp(source_code_position, @keyword_global, 6);
beqz a0, .compile_global
beqz a0, .compile_module_global
# Not a known token, exit.
goto .compile_end;
goto .compile_module_end;
.compile_section:
.compile_module_section:
_compile_section();
goto .compile_loop;
goto .compile_module_loop;
.compile_type:
.compile_module_type:
_compile_type();
goto .compile_loop;
goto .compile_module_loop;
.compile_global:
.compile_module_global:
_compile_line();
goto .compile_loop;
goto .compile_module_loop;
.compile_comment:
.compile_module_comment:
_skip_comment();
goto .compile_loop;
goto .compile_module_loop;
.compile_procedure:
.compile_module_procedure:
_compile_procedure();
goto .compile_loop;
goto .compile_module_loop;
.compile_module_end:
end;
proc _compile();
begin
_compile_module();
_write_z(@asm_rodata);
_write_z(@asm_type_directive);
_write_z(@asm_strings);
_write_z(@asm_type_object);
_write_z(@asm_strings);
_write_c(':');
_write_z(@asm_ascii);
_write_c('"');
la t0, compiler_strings
sw t0, 0(sp)
.compile_loop:
lw t0, 0(sp)
la t1, compiler_strings_position
lw t1, (t1)
bge t0, t1, .compile_end
lb a0, (t0)
addi t0, t0, 1
sw t0, 0(sp)
_write_c();
j .compile_loop
.compile_end:
_write_c('"');
_write_c('\n');
end;
# Terminates the program. a0 contains the return code.

1260
boot/stage7.elna Normal file

File diff suppressed because it is too large Load Diff

1260
boot/stage8.elna Normal file

File diff suppressed because it is too large Load Diff