From 3f11d63a0f86191f010bc0093ee8616c154d9a1b Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Thu, 8 May 2025 00:13:07 +0200 Subject: [PATCH] Add builtin symbols --- boot/definitions.inc | 9 ++ boot/stage1.s | 265 +++++++++++++++++++------------------------ boot/stage2.elna | 63 ++-------- boot/symbol.s | 104 +++++++++++++++++ rakelib/cross.rake | 14 +-- rakelib/stage.rake | 10 +- tools/init.c | 204 --------------------------------- 7 files changed, 247 insertions(+), 422 deletions(-) create mode 100644 boot/symbol.s delete mode 100644 tools/init.c diff --git a/boot/definitions.inc b/boot/definitions.inc index 97f6601..42a7943 100644 --- a/boot/definitions.inc +++ b/boot/definitions.inc @@ -2,6 +2,10 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +# +# Tokens. +# + # The constant should match the index in the keywords array in tokenizer.s. .equ TOKEN_PROGRAM, 1 @@ -52,3 +56,8 @@ .equ TOKEN_ASSIGN, 43 .equ TOKEN_INTEGER, 44 + +# +# Symbols. +# +.equ TYPE_PRIMITIVE, 1 diff --git a/boot/stage1.s b/boot/stage1.s index 525da11..b39f5bc 100644 --- a/boot/stage1.s +++ b/boot/stage1.s @@ -411,14 +411,6 @@ _build_expression: call _tokenize_next sw a0, 20(sp) - /* DEBUG - lw a0, 32(sp) - lw a1, 28(sp) - call _write_error - lw a0, 28(sp) - li a1, 8 - call _write_error */ - lw a0, 24(sp) li t0, TOKEN_MINUS @@ -754,74 +746,6 @@ _compile_call: addi sp, sp, 32 ret -# Skips the spaces till the next non space character. -.type _skip_spaces, @function -_skip_spaces: -.Lspace_loop_do: - lbu t0, (s1) # t0 = Current character. - - li t1, ' ' - beq t0, t1, .Lspace_loop_repeat - li t1, '\t' - beq t0, t1, .Lspace_loop_repeat - li t1, '\n' - beq t0, t1, .Lspace_loop_repeat - li t1, '\r' - beq t0, t1, .Lspace_loop_repeat - - j .Lspace_loop_end -.Lspace_loop_repeat: - addi s1, s1, 1 - j .Lspace_loop_do - -.Lspace_loop_end: - ret - -# Parameters: -# a0 - Line length. -.type _skip_comment, @function -_skip_comment: - # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 - - # Check whether this is a comment. - li t0, 0x2a28 # (* - sw t0, 4(sp) - addi a0, sp, 4 - mv a1, s1 - li a2, 2 - call _memcmp - bnez a0, .Lskip_comment_end - - addi s1, s1, 2 # Skip (*. - - li t0, 0x292a # *) - sw t0, 4(sp) - -.Lskip_comment_loop: - addi a0, sp, 4 - mv a1, s1 - li a2, 2 - call _memcmp - beqz a0, .Lskip_comment_close - - addi s1, s1, 1 - - j .Lskip_comment_loop - -.Lskip_comment_close: - addi s1, s1, 2 # Skip *). - -.Lskip_comment_end: - # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 - ret - # Walks through the procedure definitions. .type _compile_procedure_section, @function _compile_procedure_section: @@ -832,9 +756,6 @@ _compile_procedure_section: addi s0, sp, 32 .Lcompile_procedure_section_loop: - call _skip_spaces - call _skip_comment - mv a0, s1 addi a1, sp, 4 call _tokenize_next @@ -1083,28 +1004,9 @@ _compile_procedure: call _tokenize_next mv s1, a0 - # .type identifier, @function - la a0, asm_type - li a1, ASM_TYPE_SIZE - call _write_out - - lw a0, 20(sp) - lw a1, 16(sp) - call _write_out - - la a0, asm_type_function - li a1, ASM_TYPE_FUNCTION_SIZE - call _write_out - - lw a0, 20(sp) - lw a1, 16(sp) - call _write_out - - li t0, 0x0a3a # :\n - sw t0, 12(sp) - addi a0, sp, 12 - li a1, 2 - call _write_out + lw a0, 16(sp) + lw a1, 20(sp) + call _write_procedure_head # Skip all declarations until we find the "begin" keyword, denoting the # beginning of the procedure body. @@ -1128,21 +1030,24 @@ _compile_procedure: call _write_out # Generate the body of the procedure. -.Lcompile_procedure_body: - li t0, 0x0a646e65 # end\n - sw t0, 8(sp) - mv a0, s1 - addi a1, sp, 8 - li a2, 4 - call _memcmp + call _compile_statements + mv s1, a0 # Skip end. - beqz a0, .Lcompile_procedure_end - - call _compile_statement - j .Lcompile_procedure_body - -.Lcompile_procedure_end: - add s1, s1, 4 # Skip end\n. + /* DEBUG + sw a0, 8(sp) + lw a1, 12(sp) + li a2, TOKEN_END + sub a1, a1, a2 + seqz a1, a1 + seqz a0, a0 + addi a0, a0, '0' + addi a1, a1, '0' + sb a0, 4(sp) + sb a1, 5(sp) + addi a0, sp, 4 + li a1, 2 + call _write_error + lw a0, 8(sp) */ # Generate the procedure epilogue with a predefined stack size. la a0, epilogue @@ -1288,21 +1193,8 @@ _compile_if: li a0, '\n' call _put_char -.Lcompile_if_loop: - mv a0, s1 - addi a1, sp, 0 - call _tokenize_next - - lw t0, 0(sp) - li t1, TOKEN_END - beq t0, t1, .Lcompile_if_end - - call _compile_statement - - j .Lcompile_if_loop - -.Lcompile_if_end: - mv s1, a0 # Skip the end with newline. a0 is set by the last call to _tokenize_next. + call _compile_statements + mv s1, a0 # Skip end. # Write the label prefix. addi a0, sp, 20 @@ -1328,6 +1220,95 @@ _compile_if: addi sp, sp, 32 ret +# Writes: +# .type identifier, @function +# identifier: +# +# Parameters: +# a0 - Identifier length. +# a0 - Identifier pointer. +.type _write_procedure_head, @function +_write_procedure_head: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + sw a0, 16(sp) + sw a1, 20(sp) + + # .type identifier, @function + la a0, asm_type + li a1, ASM_TYPE_SIZE + call _write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call _write_out + + la a0, asm_type_function + li a1, ASM_TYPE_FUNCTION_SIZE + call _write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call _write_out + + li t0, 0x0a3a # :\n + sw t0, 12(sp) + addi a0, sp, 12 + li a1, 2 + call _write_out + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +# Compiles a list of statements delimited by semicolons. +# +# Sets a0 to the end of the token finishing the list +# (should be the "end" token in a valid program). +.type _compile_statements, @function +_compile_statements: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # Generate the body of the procedure. + mv a0, s1 + addi a1, sp, 0 + call _tokenize_next + lw t0, 0(sp) + li t1, TOKEN_END + + beq t0, t1, .Lcompile_statements_end + +.Lcompile_statements_body: + call _compile_statement + + mv a0, s1 + addi a1, sp, 0 + call _tokenize_next + lw t0, 0(sp) + li t1, TOKEN_SEMICOLON + + bne t0, t1, .Lcompile_statements_end + mv s1, a0 + + j .Lcompile_statements_body + +.Lcompile_statements_end: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + # Checks for the type of the current statement and compiles it. .type _compile_statement, @function _compile_statement: @@ -1357,7 +1338,7 @@ _compile_statement: li t1, TOKEN_DOT beq t0, t1, .Lcompile_statement_label - j .Lcompile_statement_empty # Else. + unimp # Else. .Lcompile_statement_if: call _compile_if @@ -1379,10 +1360,6 @@ _compile_statement: call _compile_identifier j .Lcompile_statement_end -.Lcompile_statement_empty: - addi s1, s1, 1 - j .Lcompile_statement_end - .Lcompile_statement_end: # Epilogue. lw ra, 28(sp) @@ -1422,23 +1399,14 @@ _compile_entry_point: li a1, ASM_START_SIZE call _write_out - addi s1, s1, 6 # Skip begin\n. - - # Generate the body of the procedure. -.Lcompile_entry_point_body: mv a0, s1 addi a1, sp, 4 call _tokenize_next + mv s1, a0 # Skip begin. - lw t0, 4(sp) - li t1, TOKEN_END - beq t0, t1, .Lcompile_entry_point_end - - call _compile_statement - j .Lcompile_entry_point_body - -.Lcompile_entry_point_end: - mv s1, a0 # Skip end. a0 is set by the last _tokenize_next call. + # Generate the body of the procedure. + call _compile_statements + mv s1, a0 # Skip end. la a0, asm_exit li a1, ASM_EXIT_SIZE @@ -1481,6 +1449,7 @@ _start: call _read_file li s2, 1 + call symbol_table_build call _compile # Call exit. diff --git a/boot/stage2.elna b/boot/stage2.elna index 62bd307..695f52a 100644 --- a/boot/stage2.elna +++ b/boot/stage2.elna @@ -8,7 +8,6 @@ const var source_code: [81920]Byte -(* Ignores the import. *) proc _compile_import() var loca0: Word begin @@ -61,7 +60,7 @@ begin loca24 := _token_compare(loca12, loca20, @loca16); if loca24 = 0 then goto .L_build_binary_expression_minus - end + end; loca16 := 0x2a; loca24 := _token_compare(loca12, loca20, @loca16); @@ -181,7 +180,7 @@ begin _put_char(0x0a); goto .Lcompile_identifier_expression_end - end + end; loca8 := 0x6120616c; _write_out(@loca8, 4); @@ -409,7 +408,7 @@ begin loca0 := _front(loca8) = 0x2c; if loca0 = 0 then goto .Lcompile_call_paren - end + end; loca12 := loca12 + 1; @@ -567,7 +566,7 @@ begin loca0 := _front(loca0); loca4 := loca4 + 1; - if loca0 = 0x3d then + if loca0 = 0x3d then goto .Ltoken_character_single end end; @@ -605,43 +604,6 @@ begin .Lspace_loop_end end -proc _skip_comment(loca84: Word) -var - loca0: ^Byte - loca4: Word - loca8: Int -begin - loca0 := _current(); - - loca4 := 0x2a28; - loca8 := _memcmp(loca0, @loca4, 2); - if loca8 = 0 then - goto .Lskip_comment_continue - end; - goto .Lskip_comment_end; - - .Lskip_comment_continue; - _advance(2); - - loca4 := 0x292a; - - .Lskip_comment_loop; - loca0 := _current(); - loca8 := _memcmp(loca0, @loca4, 2); - if loca8 = 0 then - goto .Lskip_comment_close - end; - - _advance(1); - - goto .Lskip_comment_loop; - - .Lskip_comment_close; - _advance(2); - - .Lskip_comment_end -end - proc _compile_assembly(loca84: Word) var loca0: ^Byte begin @@ -748,9 +710,11 @@ begin .Lcompile_variable_section_item; _skip_spaces(); loca4 := _current(); - loca0 := _front(loca4); - if _is_lower(loca0) = 0 then + loca0 := 0x636f7270; + loca0 := _memcmp(@loca0, loca4, 4); + + if loca0 = 0 then goto .Lcompile_variable_section_end end; _compile_variable(); @@ -1096,7 +1060,7 @@ proc _compile_return() begin _advance(6); _skip_spaces(); - _build_binary_expression(); + _build_binary_expression() end proc _compile_if() @@ -1164,10 +1128,6 @@ begin loca16 := _current(); loca0 := _front(loca16); - if loca0 = 0x28 then - goto .Lcompile_line_comment - end; - loca16 := _current(); loca12 := 0x676f7270; loca4 := _memcmp(loca16, @loca12, 4); @@ -1308,10 +1268,6 @@ begin _compile_program(); goto .Lcompile_line_section; - .Lcompile_line_comment; - _skip_comment(loca84); - goto .Lcompile_line_section; - .Lcompile_line_empty; _advance(1); goto .Lcompile_line_section; @@ -1325,7 +1281,6 @@ begin .Lcompile_line_end; _skip_spaces(); - _skip_comment(); return loca8 end diff --git a/boot/symbol.s b/boot/symbol.s new file mode 100644 index 0000000..66409aa --- /dev/null +++ b/boot/symbol.s @@ -0,0 +1,104 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +.global symbol_table_build + +.include "boot/definitions.inc" + +.equ SYMBOL_PRIME, 1543 + +.section .rodata + +.type symbol_builtin_name_int, @object +symbol_builtin_name_int: .ascii "Int" +.type symbol_builtin_name_word, @object +symbol_builtin_name_word: .ascii "Word" +.type symbol_builtin_name_byte, @object +symbol_builtin_name_byte: .ascii "Byte" +.type symbol_builtin_name_char, @object +symbol_builtin_name_char: .ascii "Char" + +# Every type info starts with a word describing what type it is. + +# Primitive types have only type size. +.type symbol_builtin_type_int, @object +symbol_builtin_type_int: .word TYPE_PRIMITIVE + .word 4 +.type symbol_builtin_type_word, @object +symbol_builtin_type_word: .word TYPE_PRIMITIVE + .word 4 +.type symbol_builtin_type_byte, @object +symbol_builtin_type_byte: .word TYPE_PRIMITIVE + .word 1 +.type symbol_builtin_type_char, @object +symbol_builtin_type_char: .word TYPE_PRIMITIVE + .word 1 + +.section .bss + +# The first word of the symbol table is its length. +# Then a list of type infos follows: +# +# record +# name: String +# info: ^TypeInfo +# end +.type symbol_table, @object +symbol_table: .zero SYMBOL_PRIME + +.section .text + +# Build the initial symbols. +# +# Sets a0 to the pointer to the global symbol table. +.type symbol_build, @function +symbol_table_build: + la a0, symbol_table + addi t0, a0, 4 + + li t1, 3 # Length of the word "Int". + sw t1, 0(t0) + la t1, symbol_builtin_name_int + sw t1, 4(t0) + la t1, symbol_builtin_type_int + sw t1, 8(t0) + lw t1, 0(a0) + addi t1, t1, 1 + sw t1, 0(a0) + addi t0, t0, 12 + + li t1, 4 # Length of the word "Word". + sw t1, 0(t0) + la t1, symbol_builtin_name_word + sw t1, 4(t0) + la t1, symbol_builtin_type_word + sw t1, 8(t0) + lw t1, 0(a0) + addi t1, t1, 1 + sw t1, 0(a0) + addi t0, t0, 12 + + li t1, 4 # Length of the word "Byte". + sw t1, 0(t0) + la t1, symbol_builtin_name_byte + sw t1, 4(t0) + la t1, symbol_builtin_type_byte + sw t1, 8(t0) + lw t1, 0(a0) + addi t1, t1, 1 + sw t1, 0(a0) + addi t0, t0, 12 + + li t1, 4 # Length of the word "Char". + sw t1, 0(t0) + la t1, symbol_builtin_name_char + sw t1, 4(t0) + la t1, symbol_builtin_type_char + sw t1, 8(t0) + lw t1, 0(a0) + addi t1, t1, 1 + sw t1, 0(a0) + addi t0, t0, 12 + + ret diff --git a/rakelib/cross.rake b/rakelib/cross.rake index f90bb84..b390590 100644 --- a/rakelib/cross.rake +++ b/rakelib/cross.rake @@ -309,17 +309,6 @@ namespace :cross do sh env, 'make', '-j', Etc.nprocessors.to_s, chdir: cwd.to_path sh env, 'make', 'install', chdir: cwd.to_path end - - task :init, [:target] do |_, args| - options = find_build_target GCC_VERSION, args - env = { - 'PATH' => "#{options.rootfs.realpath + 'bin'}:#{ENV['PATH']}" - } - sh env, 'riscv32-unknown-linux-gnu-gcc', - '-ffreestanding', '-static', - '-o', (options.tools + 'init').to_path, - 'tools/init.c' - end end desc 'Build cross toolchain' @@ -329,7 +318,6 @@ task cross: [ 'cross:headers', 'cross:kernel', 'cross:glibc', - 'cross:gcc2', - 'cross:init' + 'cross:gcc2' ] do end diff --git a/rakelib/stage.rake b/rakelib/stage.rake index 80f704d..6f61cae 100644 --- a/rakelib/stage.rake +++ b/rakelib/stage.rake @@ -21,16 +21,20 @@ def assemble_stage(output, compiler, source) end end +library = [] + Dir.glob('boot/*.s').each do |assembly_source| - target_object = Pathname.new('build/boot') + Pathname.new(assembly_source).basename.sub_ext('.o') + source_basename = Pathname.new(assembly_source).basename + target_object = Pathname.new('build/boot') + source_basename.sub_ext('.o') file target_object.to_s => [assembly_source, 'build/boot'] do |t| sh CROSS_GCC, '-c', '-o', t.name, assembly_source end + library << assembly_source unless source_basename.to_s.start_with? 'stage' end desc 'Initial stage' -file 'build/boot/stage1' => ['build/boot/tokenizer.o', 'build/boot/stage1.o', 'build/boot/common-boot.o'] do |t| +file 'build/boot/stage1' => ['build/boot/stage1.o', *library] do |t| sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites end @@ -43,7 +47,7 @@ file 'build/boot/stage2a.s' => ['build/boot/stage1', 'boot/stage2.elna'] do |t| end ['build/boot/stage2a', 'build/boot/stage2b'].each do |exe| - file exe => [exe.ext('.s'), 'build/boot/common-boot.o'] do |t| + file exe => [exe.ext('.s'), *library] do |t| sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites end end diff --git a/tools/init.c b/tools/init.c deleted file mode 100644 index f463bcd..0000000 --- a/tools/init.c +++ /dev/null @@ -1,204 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#define FILENAME_BUFFER_SIZE 256 - -size_t read_command(int descriptor, char *command_buffer) -{ - ssize_t bytes_read = 0; - size_t read_so_far = 0; - - while ((bytes_read = read(descriptor, command_buffer + read_so_far, FILENAME_BUFFER_SIZE - read_so_far - 1)) > 0) - { - read_so_far += bytes_read; - if (read_so_far >= FILENAME_BUFFER_SIZE - 1) - { - break; - } - } - command_buffer[read_so_far] = 0; - return read_so_far; -} - -enum status -{ - status_success, - status_failure, - status_warning, - status_fatal -}; - -unsigned int make_path(char *destination, const char *directory, const char *filename, const char *extension) -{ - unsigned int i = 0; - - for (; i < FILENAME_BUFFER_SIZE; i++) - { - if (directory[i] == 0) - { - break; - } - destination[i] = directory[i]; - } - for (int j = 0; i < FILENAME_BUFFER_SIZE; i++, j++) - { - if (filename[j] == 0) - { - break; - } - destination[i] = filename[j]; - } - if (extension == NULL) - { - goto done; - } - for (int j = 0; i < FILENAME_BUFFER_SIZE; i++, j++) - { - if (extension[j] == 0) - { - break; - } - destination[i] = extension[j]; - } -done: - destination[i] = 0; - - return i; -} - -enum status run_test(const char *file_entry_name) -{ - printf("Running %s. ", file_entry_name); - - char filename[FILENAME_BUFFER_SIZE]; - char command_buffer[FILENAME_BUFFER_SIZE]; - char file_buffer[256]; - int pipe_ends[2]; - - if (pipe(pipe_ends) == -1) - { - perror("pipe"); - return status_fatal; - } - make_path(filename, "./tests/", file_entry_name, NULL); - - int child_pid = fork(); - if (child_pid == -1) - { - return status_fatal; - } - else if (child_pid == 0) - { - close(STDIN_FILENO); - close(STDERR_FILENO); - close(pipe_ends[0]); // Close the read end. - - if (dup2(pipe_ends[1], STDOUT_FILENO) == -1) - { - perror("dup2"); - } - else - { - execl(filename, filename); - perror("execl"); - } - close(STDOUT_FILENO); - close(pipe_ends[1]); - _exit(1); - } - else - { - close(pipe_ends[1]); // Close the write end. - read_command(pipe_ends[0], command_buffer); - close(pipe_ends[0]); - - int wait_status = 0; - - make_path(filename, "./expectations/", file_entry_name, ".txt"); - - FILE *expectation_descriptor = fopen(filename, "r"); - - if (expectation_descriptor == NULL) - { - return status_warning; - } - size_t read_from_file = fread(file_buffer, 1, sizeof(file_buffer) - 1, expectation_descriptor); - fclose(expectation_descriptor); - - file_buffer[read_from_file] = 0; - for (unsigned int i = 0; ; ++i) - { - if (command_buffer[i] == 0 && file_buffer[i] == 0) - { - fwrite("\n", 1, 1, stdout); - return status_success; - } - else if (command_buffer[i] != file_buffer[i]) - { - printf("Failed. Got:\n%s", command_buffer); - return status_failure; - } - } - } -} - -struct summary -{ - size_t total; - size_t failure; - size_t success; -}; - -void walk() -{ - DIR *directory_stream = opendir("./tests"); - struct dirent *file_entry; - - struct summary test_summary = { .total = 0, .failure = 0, .success = 0 }; - - while ((file_entry = readdir(directory_stream)) != NULL) - { - if (file_entry->d_name[0] == '.') - { - continue; - } - ++test_summary.total; - switch (run_test(file_entry->d_name)) - { - case status_failure: - ++test_summary.failure; - break; - case status_success: - ++test_summary.success; - break; - case status_warning: - break; - case status_fatal: - goto end_walk; - } - } - printf("Successful: %lu, Failed: %lu, Total: %lu.\n", - test_summary.success, test_summary.failure, test_summary.total); -end_walk: - closedir(directory_stream); -} - -int main() -{ - int dev_console = open("/dev/console", O_WRONLY); - if (dev_console != -1) - { - dup2(dev_console, STDOUT_FILENO); - walk(); - close(dev_console); - } - sync(); - reboot(RB_POWER_OFF); - - return 1; -}