Add builtin symbols

This commit is contained in:
Eugen Wissner 2025-05-08 00:13:07 +02:00
parent 40701008f0
commit 3f11d63a0f
Signed by: belka
GPG Key ID: A27FDC1E8EE902C0
7 changed files with 247 additions and 422 deletions

View File

@ -2,6 +2,10 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
#
# Tokens.
#
# The constant should match the index in the keywords array in tokenizer.s.
.equ TOKEN_PROGRAM, 1
@ -52,3 +56,8 @@
.equ TOKEN_ASSIGN, 43
.equ TOKEN_INTEGER, 44
#
# Symbols.
#
.equ TYPE_PRIMITIVE, 1

View File

@ -411,14 +411,6 @@ _build_expression:
call _tokenize_next
sw a0, 20(sp)
/* DEBUG
lw a0, 32(sp)
lw a1, 28(sp)
call _write_error
lw a0, 28(sp)
li a1, 8
call _write_error */
lw a0, 24(sp)
li t0, TOKEN_MINUS
@ -754,74 +746,6 @@ _compile_call:
addi sp, sp, 32
ret
# Skips the spaces till the next non space character.
.type _skip_spaces, @function
_skip_spaces:
.Lspace_loop_do:
lbu t0, (s1) # t0 = Current character.
li t1, ' '
beq t0, t1, .Lspace_loop_repeat
li t1, '\t'
beq t0, t1, .Lspace_loop_repeat
li t1, '\n'
beq t0, t1, .Lspace_loop_repeat
li t1, '\r'
beq t0, t1, .Lspace_loop_repeat
j .Lspace_loop_end
.Lspace_loop_repeat:
addi s1, s1, 1
j .Lspace_loop_do
.Lspace_loop_end:
ret
# Parameters:
# a0 - Line length.
.type _skip_comment, @function
_skip_comment:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
# Check whether this is a comment.
li t0, 0x2a28 # (*
sw t0, 4(sp)
addi a0, sp, 4
mv a1, s1
li a2, 2
call _memcmp
bnez a0, .Lskip_comment_end
addi s1, s1, 2 # Skip (*.
li t0, 0x292a # *)
sw t0, 4(sp)
.Lskip_comment_loop:
addi a0, sp, 4
mv a1, s1
li a2, 2
call _memcmp
beqz a0, .Lskip_comment_close
addi s1, s1, 1
j .Lskip_comment_loop
.Lskip_comment_close:
addi s1, s1, 2 # Skip *).
.Lskip_comment_end:
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Walks through the procedure definitions.
.type _compile_procedure_section, @function
_compile_procedure_section:
@ -832,9 +756,6 @@ _compile_procedure_section:
addi s0, sp, 32
.Lcompile_procedure_section_loop:
call _skip_spaces
call _skip_comment
mv a0, s1
addi a1, sp, 4
call _tokenize_next
@ -1083,28 +1004,9 @@ _compile_procedure:
call _tokenize_next
mv s1, a0
# .type identifier, @function
la a0, asm_type
li a1, ASM_TYPE_SIZE
call _write_out
lw a0, 20(sp)
lw a1, 16(sp)
call _write_out
la a0, asm_type_function
li a1, ASM_TYPE_FUNCTION_SIZE
call _write_out
lw a0, 20(sp)
lw a1, 16(sp)
call _write_out
li t0, 0x0a3a # :\n
sw t0, 12(sp)
addi a0, sp, 12
li a1, 2
call _write_out
lw a0, 16(sp)
lw a1, 20(sp)
call _write_procedure_head
# Skip all declarations until we find the "begin" keyword, denoting the
# beginning of the procedure body.
@ -1128,21 +1030,24 @@ _compile_procedure:
call _write_out
# Generate the body of the procedure.
.Lcompile_procedure_body:
li t0, 0x0a646e65 # end\n
sw t0, 8(sp)
mv a0, s1
addi a1, sp, 8
li a2, 4
call _memcmp
call _compile_statements
mv s1, a0 # Skip end.
beqz a0, .Lcompile_procedure_end
call _compile_statement
j .Lcompile_procedure_body
.Lcompile_procedure_end:
add s1, s1, 4 # Skip end\n.
/* DEBUG
sw a0, 8(sp)
lw a1, 12(sp)
li a2, TOKEN_END
sub a1, a1, a2
seqz a1, a1
seqz a0, a0
addi a0, a0, '0'
addi a1, a1, '0'
sb a0, 4(sp)
sb a1, 5(sp)
addi a0, sp, 4
li a1, 2
call _write_error
lw a0, 8(sp) */
# Generate the procedure epilogue with a predefined stack size.
la a0, epilogue
@ -1288,21 +1193,8 @@ _compile_if:
li a0, '\n'
call _put_char
.Lcompile_if_loop:
mv a0, s1
addi a1, sp, 0
call _tokenize_next
lw t0, 0(sp)
li t1, TOKEN_END
beq t0, t1, .Lcompile_if_end
call _compile_statement
j .Lcompile_if_loop
.Lcompile_if_end:
mv s1, a0 # Skip the end with newline. a0 is set by the last call to _tokenize_next.
call _compile_statements
mv s1, a0 # Skip end.
# Write the label prefix.
addi a0, sp, 20
@ -1328,6 +1220,95 @@ _compile_if:
addi sp, sp, 32
ret
# Writes:
# .type identifier, @function
# identifier:
#
# Parameters:
# a0 - Identifier length.
# a0 - Identifier pointer.
.type _write_procedure_head, @function
_write_procedure_head:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
sw a0, 16(sp)
sw a1, 20(sp)
# .type identifier, @function
la a0, asm_type
li a1, ASM_TYPE_SIZE
call _write_out
lw a0, 20(sp)
lw a1, 16(sp)
call _write_out
la a0, asm_type_function
li a1, ASM_TYPE_FUNCTION_SIZE
call _write_out
lw a0, 20(sp)
lw a1, 16(sp)
call _write_out
li t0, 0x0a3a # :\n
sw t0, 12(sp)
addi a0, sp, 12
li a1, 2
call _write_out
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Compiles a list of statements delimited by semicolons.
#
# Sets a0 to the end of the token finishing the list
# (should be the "end" token in a valid program).
.type _compile_statements, @function
_compile_statements:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
# Generate the body of the procedure.
mv a0, s1
addi a1, sp, 0
call _tokenize_next
lw t0, 0(sp)
li t1, TOKEN_END
beq t0, t1, .Lcompile_statements_end
.Lcompile_statements_body:
call _compile_statement
mv a0, s1
addi a1, sp, 0
call _tokenize_next
lw t0, 0(sp)
li t1, TOKEN_SEMICOLON
bne t0, t1, .Lcompile_statements_end
mv s1, a0
j .Lcompile_statements_body
.Lcompile_statements_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Checks for the type of the current statement and compiles it.
.type _compile_statement, @function
_compile_statement:
@ -1357,7 +1338,7 @@ _compile_statement:
li t1, TOKEN_DOT
beq t0, t1, .Lcompile_statement_label
j .Lcompile_statement_empty # Else.
unimp # Else.
.Lcompile_statement_if:
call _compile_if
@ -1379,10 +1360,6 @@ _compile_statement:
call _compile_identifier
j .Lcompile_statement_end
.Lcompile_statement_empty:
addi s1, s1, 1
j .Lcompile_statement_end
.Lcompile_statement_end:
# Epilogue.
lw ra, 28(sp)
@ -1422,23 +1399,14 @@ _compile_entry_point:
li a1, ASM_START_SIZE
call _write_out
addi s1, s1, 6 # Skip begin\n.
# Generate the body of the procedure.
.Lcompile_entry_point_body:
mv a0, s1
addi a1, sp, 4
call _tokenize_next
mv s1, a0 # Skip begin.
lw t0, 4(sp)
li t1, TOKEN_END
beq t0, t1, .Lcompile_entry_point_end
call _compile_statement
j .Lcompile_entry_point_body
.Lcompile_entry_point_end:
mv s1, a0 # Skip end. a0 is set by the last _tokenize_next call.
# Generate the body of the procedure.
call _compile_statements
mv s1, a0 # Skip end.
la a0, asm_exit
li a1, ASM_EXIT_SIZE
@ -1481,6 +1449,7 @@ _start:
call _read_file
li s2, 1
call symbol_table_build
call _compile
# Call exit.

View File

@ -8,7 +8,6 @@ const
var
source_code: [81920]Byte
(* Ignores the import. *)
proc _compile_import()
var loca0: Word
begin
@ -61,7 +60,7 @@ begin
loca24 := _token_compare(loca12, loca20, @loca16);
if loca24 = 0 then
goto .L_build_binary_expression_minus
end
end;
loca16 := 0x2a;
loca24 := _token_compare(loca12, loca20, @loca16);
@ -181,7 +180,7 @@ begin
_put_char(0x0a);
goto .Lcompile_identifier_expression_end
end
end;
loca8 := 0x6120616c;
_write_out(@loca8, 4);
@ -409,7 +408,7 @@ begin
loca0 := _front(loca8) = 0x2c;
if loca0 = 0 then
goto .Lcompile_call_paren
end
end;
loca12 := loca12 + 1;
@ -567,7 +566,7 @@ begin
loca0 := _front(loca0);
loca4 := loca4 + 1;
if loca0 = 0x3d then
if loca0 = 0x3d then
goto .Ltoken_character_single
end
end;
@ -605,43 +604,6 @@ begin
.Lspace_loop_end
end
proc _skip_comment(loca84: Word)
var
loca0: ^Byte
loca4: Word
loca8: Int
begin
loca0 := _current();
loca4 := 0x2a28;
loca8 := _memcmp(loca0, @loca4, 2);
if loca8 = 0 then
goto .Lskip_comment_continue
end;
goto .Lskip_comment_end;
.Lskip_comment_continue;
_advance(2);
loca4 := 0x292a;
.Lskip_comment_loop;
loca0 := _current();
loca8 := _memcmp(loca0, @loca4, 2);
if loca8 = 0 then
goto .Lskip_comment_close
end;
_advance(1);
goto .Lskip_comment_loop;
.Lskip_comment_close;
_advance(2);
.Lskip_comment_end
end
proc _compile_assembly(loca84: Word)
var loca0: ^Byte
begin
@ -748,9 +710,11 @@ begin
.Lcompile_variable_section_item;
_skip_spaces();
loca4 := _current();
loca0 := _front(loca4);
if _is_lower(loca0) = 0 then
loca0 := 0x636f7270;
loca0 := _memcmp(@loca0, loca4, 4);
if loca0 = 0 then
goto .Lcompile_variable_section_end
end;
_compile_variable();
@ -1096,7 +1060,7 @@ proc _compile_return()
begin
_advance(6);
_skip_spaces();
_build_binary_expression();
_build_binary_expression()
end
proc _compile_if()
@ -1164,10 +1128,6 @@ begin
loca16 := _current();
loca0 := _front(loca16);
if loca0 = 0x28 then
goto .Lcompile_line_comment
end;
loca16 := _current();
loca12 := 0x676f7270;
loca4 := _memcmp(loca16, @loca12, 4);
@ -1308,10 +1268,6 @@ begin
_compile_program();
goto .Lcompile_line_section;
.Lcompile_line_comment;
_skip_comment(loca84);
goto .Lcompile_line_section;
.Lcompile_line_empty;
_advance(1);
goto .Lcompile_line_section;
@ -1325,7 +1281,6 @@ begin
.Lcompile_line_end;
_skip_spaces();
_skip_comment();
return loca8
end

104
boot/symbol.s Normal file
View File

@ -0,0 +1,104 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global symbol_table_build
.include "boot/definitions.inc"
.equ SYMBOL_PRIME, 1543
.section .rodata
.type symbol_builtin_name_int, @object
symbol_builtin_name_int: .ascii "Int"
.type symbol_builtin_name_word, @object
symbol_builtin_name_word: .ascii "Word"
.type symbol_builtin_name_byte, @object
symbol_builtin_name_byte: .ascii "Byte"
.type symbol_builtin_name_char, @object
symbol_builtin_name_char: .ascii "Char"
# Every type info starts with a word describing what type it is.
# Primitive types have only type size.
.type symbol_builtin_type_int, @object
symbol_builtin_type_int: .word TYPE_PRIMITIVE
.word 4
.type symbol_builtin_type_word, @object
symbol_builtin_type_word: .word TYPE_PRIMITIVE
.word 4
.type symbol_builtin_type_byte, @object
symbol_builtin_type_byte: .word TYPE_PRIMITIVE
.word 1
.type symbol_builtin_type_char, @object
symbol_builtin_type_char: .word TYPE_PRIMITIVE
.word 1
.section .bss
# The first word of the symbol table is its length.
# Then a list of type infos follows:
#
# record
# name: String
# info: ^TypeInfo
# end
.type symbol_table, @object
symbol_table: .zero SYMBOL_PRIME
.section .text
# Build the initial symbols.
#
# Sets a0 to the pointer to the global symbol table.
.type symbol_build, @function
symbol_table_build:
la a0, symbol_table
addi t0, a0, 4
li t1, 3 # Length of the word "Int".
sw t1, 0(t0)
la t1, symbol_builtin_name_int
sw t1, 4(t0)
la t1, symbol_builtin_type_int
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li t1, 4 # Length of the word "Word".
sw t1, 0(t0)
la t1, symbol_builtin_name_word
sw t1, 4(t0)
la t1, symbol_builtin_type_word
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li t1, 4 # Length of the word "Byte".
sw t1, 0(t0)
la t1, symbol_builtin_name_byte
sw t1, 4(t0)
la t1, symbol_builtin_type_byte
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li t1, 4 # Length of the word "Char".
sw t1, 0(t0)
la t1, symbol_builtin_name_char
sw t1, 4(t0)
la t1, symbol_builtin_type_char
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
ret

View File

@ -309,17 +309,6 @@ namespace :cross do
sh env, 'make', '-j', Etc.nprocessors.to_s, chdir: cwd.to_path
sh env, 'make', 'install', chdir: cwd.to_path
end
task :init, [:target] do |_, args|
options = find_build_target GCC_VERSION, args
env = {
'PATH' => "#{options.rootfs.realpath + 'bin'}:#{ENV['PATH']}"
}
sh env, 'riscv32-unknown-linux-gnu-gcc',
'-ffreestanding', '-static',
'-o', (options.tools + 'init').to_path,
'tools/init.c'
end
end
desc 'Build cross toolchain'
@ -329,7 +318,6 @@ task cross: [
'cross:headers',
'cross:kernel',
'cross:glibc',
'cross:gcc2',
'cross:init'
'cross:gcc2'
] do
end

View File

@ -21,16 +21,20 @@ def assemble_stage(output, compiler, source)
end
end
library = []
Dir.glob('boot/*.s').each do |assembly_source|
target_object = Pathname.new('build/boot') + Pathname.new(assembly_source).basename.sub_ext('.o')
source_basename = Pathname.new(assembly_source).basename
target_object = Pathname.new('build/boot') + source_basename.sub_ext('.o')
file target_object.to_s => [assembly_source, 'build/boot'] do |t|
sh CROSS_GCC, '-c', '-o', t.name, assembly_source
end
library << assembly_source unless source_basename.to_s.start_with? 'stage'
end
desc 'Initial stage'
file 'build/boot/stage1' => ['build/boot/tokenizer.o', 'build/boot/stage1.o', 'build/boot/common-boot.o'] do |t|
file 'build/boot/stage1' => ['build/boot/stage1.o', *library] do |t|
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
end
@ -43,7 +47,7 @@ file 'build/boot/stage2a.s' => ['build/boot/stage1', 'boot/stage2.elna'] do |t|
end
['build/boot/stage2a', 'build/boot/stage2b'].each do |exe|
file exe => [exe.ext('.s'), 'build/boot/common-boot.o'] do |t|
file exe => [exe.ext('.s'), *library] do |t|
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
end
end

View File

@ -1,204 +0,0 @@
#include <stdio.h>
#include <dirent.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/reboot.h>
#define FILENAME_BUFFER_SIZE 256
size_t read_command(int descriptor, char *command_buffer)
{
ssize_t bytes_read = 0;
size_t read_so_far = 0;
while ((bytes_read = read(descriptor, command_buffer + read_so_far, FILENAME_BUFFER_SIZE - read_so_far - 1)) > 0)
{
read_so_far += bytes_read;
if (read_so_far >= FILENAME_BUFFER_SIZE - 1)
{
break;
}
}
command_buffer[read_so_far] = 0;
return read_so_far;
}
enum status
{
status_success,
status_failure,
status_warning,
status_fatal
};
unsigned int make_path(char *destination, const char *directory, const char *filename, const char *extension)
{
unsigned int i = 0;
for (; i < FILENAME_BUFFER_SIZE; i++)
{
if (directory[i] == 0)
{
break;
}
destination[i] = directory[i];
}
for (int j = 0; i < FILENAME_BUFFER_SIZE; i++, j++)
{
if (filename[j] == 0)
{
break;
}
destination[i] = filename[j];
}
if (extension == NULL)
{
goto done;
}
for (int j = 0; i < FILENAME_BUFFER_SIZE; i++, j++)
{
if (extension[j] == 0)
{
break;
}
destination[i] = extension[j];
}
done:
destination[i] = 0;
return i;
}
enum status run_test(const char *file_entry_name)
{
printf("Running %s. ", file_entry_name);
char filename[FILENAME_BUFFER_SIZE];
char command_buffer[FILENAME_BUFFER_SIZE];
char file_buffer[256];
int pipe_ends[2];
if (pipe(pipe_ends) == -1)
{
perror("pipe");
return status_fatal;
}
make_path(filename, "./tests/", file_entry_name, NULL);
int child_pid = fork();
if (child_pid == -1)
{
return status_fatal;
}
else if (child_pid == 0)
{
close(STDIN_FILENO);
close(STDERR_FILENO);
close(pipe_ends[0]); // Close the read end.
if (dup2(pipe_ends[1], STDOUT_FILENO) == -1)
{
perror("dup2");
}
else
{
execl(filename, filename);
perror("execl");
}
close(STDOUT_FILENO);
close(pipe_ends[1]);
_exit(1);
}
else
{
close(pipe_ends[1]); // Close the write end.
read_command(pipe_ends[0], command_buffer);
close(pipe_ends[0]);
int wait_status = 0;
make_path(filename, "./expectations/", file_entry_name, ".txt");
FILE *expectation_descriptor = fopen(filename, "r");
if (expectation_descriptor == NULL)
{
return status_warning;
}
size_t read_from_file = fread(file_buffer, 1, sizeof(file_buffer) - 1, expectation_descriptor);
fclose(expectation_descriptor);
file_buffer[read_from_file] = 0;
for (unsigned int i = 0; ; ++i)
{
if (command_buffer[i] == 0 && file_buffer[i] == 0)
{
fwrite("\n", 1, 1, stdout);
return status_success;
}
else if (command_buffer[i] != file_buffer[i])
{
printf("Failed. Got:\n%s", command_buffer);
return status_failure;
}
}
}
}
struct summary
{
size_t total;
size_t failure;
size_t success;
};
void walk()
{
DIR *directory_stream = opendir("./tests");
struct dirent *file_entry;
struct summary test_summary = { .total = 0, .failure = 0, .success = 0 };
while ((file_entry = readdir(directory_stream)) != NULL)
{
if (file_entry->d_name[0] == '.')
{
continue;
}
++test_summary.total;
switch (run_test(file_entry->d_name))
{
case status_failure:
++test_summary.failure;
break;
case status_success:
++test_summary.success;
break;
case status_warning:
break;
case status_fatal:
goto end_walk;
}
}
printf("Successful: %lu, Failed: %lu, Total: %lu.\n",
test_summary.success, test_summary.failure, test_summary.total);
end_walk:
closedir(directory_stream);
}
int main()
{
int dev_console = open("/dev/console", O_WRONLY);
if (dev_console != -1)
{
dup2(dev_console, STDOUT_FILENO);
walk();
close(dev_console);
}
sync();
reboot(RB_POWER_OFF);
return 1;
}