Add builtin symbols

This commit is contained in:
2025-05-08 00:13:07 +02:00
parent 40701008f0
commit 3f11d63a0f
7 changed files with 247 additions and 422 deletions

View File

@ -2,6 +2,10 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
#
# Tokens.
#
# The constant should match the index in the keywords array in tokenizer.s.
.equ TOKEN_PROGRAM, 1
@ -52,3 +56,8 @@
.equ TOKEN_ASSIGN, 43
.equ TOKEN_INTEGER, 44
#
# Symbols.
#
.equ TYPE_PRIMITIVE, 1

View File

@ -411,14 +411,6 @@ _build_expression:
call _tokenize_next
sw a0, 20(sp)
/* DEBUG
lw a0, 32(sp)
lw a1, 28(sp)
call _write_error
lw a0, 28(sp)
li a1, 8
call _write_error */
lw a0, 24(sp)
li t0, TOKEN_MINUS
@ -754,74 +746,6 @@ _compile_call:
addi sp, sp, 32
ret
# Skips the spaces till the next non space character.
.type _skip_spaces, @function
_skip_spaces:
.Lspace_loop_do:
lbu t0, (s1) # t0 = Current character.
li t1, ' '
beq t0, t1, .Lspace_loop_repeat
li t1, '\t'
beq t0, t1, .Lspace_loop_repeat
li t1, '\n'
beq t0, t1, .Lspace_loop_repeat
li t1, '\r'
beq t0, t1, .Lspace_loop_repeat
j .Lspace_loop_end
.Lspace_loop_repeat:
addi s1, s1, 1
j .Lspace_loop_do
.Lspace_loop_end:
ret
# Parameters:
# a0 - Line length.
.type _skip_comment, @function
_skip_comment:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
# Check whether this is a comment.
li t0, 0x2a28 # (*
sw t0, 4(sp)
addi a0, sp, 4
mv a1, s1
li a2, 2
call _memcmp
bnez a0, .Lskip_comment_end
addi s1, s1, 2 # Skip (*.
li t0, 0x292a # *)
sw t0, 4(sp)
.Lskip_comment_loop:
addi a0, sp, 4
mv a1, s1
li a2, 2
call _memcmp
beqz a0, .Lskip_comment_close
addi s1, s1, 1
j .Lskip_comment_loop
.Lskip_comment_close:
addi s1, s1, 2 # Skip *).
.Lskip_comment_end:
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Walks through the procedure definitions.
.type _compile_procedure_section, @function
_compile_procedure_section:
@ -832,9 +756,6 @@ _compile_procedure_section:
addi s0, sp, 32
.Lcompile_procedure_section_loop:
call _skip_spaces
call _skip_comment
mv a0, s1
addi a1, sp, 4
call _tokenize_next
@ -1083,28 +1004,9 @@ _compile_procedure:
call _tokenize_next
mv s1, a0
# .type identifier, @function
la a0, asm_type
li a1, ASM_TYPE_SIZE
call _write_out
lw a0, 20(sp)
lw a1, 16(sp)
call _write_out
la a0, asm_type_function
li a1, ASM_TYPE_FUNCTION_SIZE
call _write_out
lw a0, 20(sp)
lw a1, 16(sp)
call _write_out
li t0, 0x0a3a # :\n
sw t0, 12(sp)
addi a0, sp, 12
li a1, 2
call _write_out
lw a0, 16(sp)
lw a1, 20(sp)
call _write_procedure_head
# Skip all declarations until we find the "begin" keyword, denoting the
# beginning of the procedure body.
@ -1128,21 +1030,24 @@ _compile_procedure:
call _write_out
# Generate the body of the procedure.
.Lcompile_procedure_body:
li t0, 0x0a646e65 # end\n
sw t0, 8(sp)
mv a0, s1
addi a1, sp, 8
li a2, 4
call _memcmp
call _compile_statements
mv s1, a0 # Skip end.
beqz a0, .Lcompile_procedure_end
call _compile_statement
j .Lcompile_procedure_body
.Lcompile_procedure_end:
add s1, s1, 4 # Skip end\n.
/* DEBUG
sw a0, 8(sp)
lw a1, 12(sp)
li a2, TOKEN_END
sub a1, a1, a2
seqz a1, a1
seqz a0, a0
addi a0, a0, '0'
addi a1, a1, '0'
sb a0, 4(sp)
sb a1, 5(sp)
addi a0, sp, 4
li a1, 2
call _write_error
lw a0, 8(sp) */
# Generate the procedure epilogue with a predefined stack size.
la a0, epilogue
@ -1288,21 +1193,8 @@ _compile_if:
li a0, '\n'
call _put_char
.Lcompile_if_loop:
mv a0, s1
addi a1, sp, 0
call _tokenize_next
lw t0, 0(sp)
li t1, TOKEN_END
beq t0, t1, .Lcompile_if_end
call _compile_statement
j .Lcompile_if_loop
.Lcompile_if_end:
mv s1, a0 # Skip the end with newline. a0 is set by the last call to _tokenize_next.
call _compile_statements
mv s1, a0 # Skip end.
# Write the label prefix.
addi a0, sp, 20
@ -1328,6 +1220,95 @@ _compile_if:
addi sp, sp, 32
ret
# Writes:
# .type identifier, @function
# identifier:
#
# Parameters:
# a0 - Identifier length.
# a0 - Identifier pointer.
.type _write_procedure_head, @function
_write_procedure_head:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
sw a0, 16(sp)
sw a1, 20(sp)
# .type identifier, @function
la a0, asm_type
li a1, ASM_TYPE_SIZE
call _write_out
lw a0, 20(sp)
lw a1, 16(sp)
call _write_out
la a0, asm_type_function
li a1, ASM_TYPE_FUNCTION_SIZE
call _write_out
lw a0, 20(sp)
lw a1, 16(sp)
call _write_out
li t0, 0x0a3a # :\n
sw t0, 12(sp)
addi a0, sp, 12
li a1, 2
call _write_out
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Compiles a list of statements delimited by semicolons.
#
# Sets a0 to the end of the token finishing the list
# (should be the "end" token in a valid program).
.type _compile_statements, @function
_compile_statements:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
# Generate the body of the procedure.
mv a0, s1
addi a1, sp, 0
call _tokenize_next
lw t0, 0(sp)
li t1, TOKEN_END
beq t0, t1, .Lcompile_statements_end
.Lcompile_statements_body:
call _compile_statement
mv a0, s1
addi a1, sp, 0
call _tokenize_next
lw t0, 0(sp)
li t1, TOKEN_SEMICOLON
bne t0, t1, .Lcompile_statements_end
mv s1, a0
j .Lcompile_statements_body
.Lcompile_statements_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Checks for the type of the current statement and compiles it.
.type _compile_statement, @function
_compile_statement:
@ -1357,7 +1338,7 @@ _compile_statement:
li t1, TOKEN_DOT
beq t0, t1, .Lcompile_statement_label
j .Lcompile_statement_empty # Else.
unimp # Else.
.Lcompile_statement_if:
call _compile_if
@ -1379,10 +1360,6 @@ _compile_statement:
call _compile_identifier
j .Lcompile_statement_end
.Lcompile_statement_empty:
addi s1, s1, 1
j .Lcompile_statement_end
.Lcompile_statement_end:
# Epilogue.
lw ra, 28(sp)
@ -1422,23 +1399,14 @@ _compile_entry_point:
li a1, ASM_START_SIZE
call _write_out
addi s1, s1, 6 # Skip begin\n.
# Generate the body of the procedure.
.Lcompile_entry_point_body:
mv a0, s1
addi a1, sp, 4
call _tokenize_next
mv s1, a0 # Skip begin.
lw t0, 4(sp)
li t1, TOKEN_END
beq t0, t1, .Lcompile_entry_point_end
call _compile_statement
j .Lcompile_entry_point_body
.Lcompile_entry_point_end:
mv s1, a0 # Skip end. a0 is set by the last _tokenize_next call.
# Generate the body of the procedure.
call _compile_statements
mv s1, a0 # Skip end.
la a0, asm_exit
li a1, ASM_EXIT_SIZE
@ -1481,6 +1449,7 @@ _start:
call _read_file
li s2, 1
call symbol_table_build
call _compile
# Call exit.

View File

@ -8,7 +8,6 @@ const
var
source_code: [81920]Byte
(* Ignores the import. *)
proc _compile_import()
var loca0: Word
begin
@ -61,7 +60,7 @@ begin
loca24 := _token_compare(loca12, loca20, @loca16);
if loca24 = 0 then
goto .L_build_binary_expression_minus
end
end;
loca16 := 0x2a;
loca24 := _token_compare(loca12, loca20, @loca16);
@ -181,7 +180,7 @@ begin
_put_char(0x0a);
goto .Lcompile_identifier_expression_end
end
end;
loca8 := 0x6120616c;
_write_out(@loca8, 4);
@ -409,7 +408,7 @@ begin
loca0 := _front(loca8) = 0x2c;
if loca0 = 0 then
goto .Lcompile_call_paren
end
end;
loca12 := loca12 + 1;
@ -567,7 +566,7 @@ begin
loca0 := _front(loca0);
loca4 := loca4 + 1;
if loca0 = 0x3d then
if loca0 = 0x3d then
goto .Ltoken_character_single
end
end;
@ -605,43 +604,6 @@ begin
.Lspace_loop_end
end
proc _skip_comment(loca84: Word)
var
loca0: ^Byte
loca4: Word
loca8: Int
begin
loca0 := _current();
loca4 := 0x2a28;
loca8 := _memcmp(loca0, @loca4, 2);
if loca8 = 0 then
goto .Lskip_comment_continue
end;
goto .Lskip_comment_end;
.Lskip_comment_continue;
_advance(2);
loca4 := 0x292a;
.Lskip_comment_loop;
loca0 := _current();
loca8 := _memcmp(loca0, @loca4, 2);
if loca8 = 0 then
goto .Lskip_comment_close
end;
_advance(1);
goto .Lskip_comment_loop;
.Lskip_comment_close;
_advance(2);
.Lskip_comment_end
end
proc _compile_assembly(loca84: Word)
var loca0: ^Byte
begin
@ -748,9 +710,11 @@ begin
.Lcompile_variable_section_item;
_skip_spaces();
loca4 := _current();
loca0 := _front(loca4);
if _is_lower(loca0) = 0 then
loca0 := 0x636f7270;
loca0 := _memcmp(@loca0, loca4, 4);
if loca0 = 0 then
goto .Lcompile_variable_section_end
end;
_compile_variable();
@ -1096,7 +1060,7 @@ proc _compile_return()
begin
_advance(6);
_skip_spaces();
_build_binary_expression();
_build_binary_expression()
end
proc _compile_if()
@ -1164,10 +1128,6 @@ begin
loca16 := _current();
loca0 := _front(loca16);
if loca0 = 0x28 then
goto .Lcompile_line_comment
end;
loca16 := _current();
loca12 := 0x676f7270;
loca4 := _memcmp(loca16, @loca12, 4);
@ -1308,10 +1268,6 @@ begin
_compile_program();
goto .Lcompile_line_section;
.Lcompile_line_comment;
_skip_comment(loca84);
goto .Lcompile_line_section;
.Lcompile_line_empty;
_advance(1);
goto .Lcompile_line_section;
@ -1325,7 +1281,6 @@ begin
.Lcompile_line_end;
_skip_spaces();
_skip_comment();
return loca8
end

104
boot/symbol.s Normal file
View File

@ -0,0 +1,104 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global symbol_table_build
.include "boot/definitions.inc"
.equ SYMBOL_PRIME, 1543
.section .rodata
.type symbol_builtin_name_int, @object
symbol_builtin_name_int: .ascii "Int"
.type symbol_builtin_name_word, @object
symbol_builtin_name_word: .ascii "Word"
.type symbol_builtin_name_byte, @object
symbol_builtin_name_byte: .ascii "Byte"
.type symbol_builtin_name_char, @object
symbol_builtin_name_char: .ascii "Char"
# Every type info starts with a word describing what type it is.
# Primitive types have only type size.
.type symbol_builtin_type_int, @object
symbol_builtin_type_int: .word TYPE_PRIMITIVE
.word 4
.type symbol_builtin_type_word, @object
symbol_builtin_type_word: .word TYPE_PRIMITIVE
.word 4
.type symbol_builtin_type_byte, @object
symbol_builtin_type_byte: .word TYPE_PRIMITIVE
.word 1
.type symbol_builtin_type_char, @object
symbol_builtin_type_char: .word TYPE_PRIMITIVE
.word 1
.section .bss
# The first word of the symbol table is its length.
# Then a list of type infos follows:
#
# record
# name: String
# info: ^TypeInfo
# end
.type symbol_table, @object
symbol_table: .zero SYMBOL_PRIME
.section .text
# Build the initial symbols.
#
# Sets a0 to the pointer to the global symbol table.
.type symbol_build, @function
symbol_table_build:
la a0, symbol_table
addi t0, a0, 4
li t1, 3 # Length of the word "Int".
sw t1, 0(t0)
la t1, symbol_builtin_name_int
sw t1, 4(t0)
la t1, symbol_builtin_type_int
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li t1, 4 # Length of the word "Word".
sw t1, 0(t0)
la t1, symbol_builtin_name_word
sw t1, 4(t0)
la t1, symbol_builtin_type_word
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li t1, 4 # Length of the word "Byte".
sw t1, 0(t0)
la t1, symbol_builtin_name_byte
sw t1, 4(t0)
la t1, symbol_builtin_type_byte
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li t1, 4 # Length of the word "Char".
sw t1, 0(t0)
la t1, symbol_builtin_name_char
sw t1, 4(t0)
la t1, symbol_builtin_type_char
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
ret