Compare commits

...

2 Commits

Author SHA1 Message Date
707f983fe9
Add symbol info for parameters and local variables 2025-05-10 23:24:03 +02:00
890486532c
Wrap the mmap2 syscall 2025-05-09 18:42:46 +02:00
8 changed files with 702 additions and 320 deletions

View File

@ -20,3 +20,19 @@ task default: ['build/boot/stage2b', 'build/boot/stage2b.s', 'boot/stage2.elna']
diff_arguments = ['diff', '-Nur', '--text', previous_output, '-']
Open3.pipeline(cat_arguments, compiler_arguments, diff_arguments)
end
file 'build/boot/test.s' => ['build/boot/stage1', 'boot/test.elna'] do |t|
source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
File.open t.name, 'w' do |output|
assemble_stage output, exe, source
end
end
file 'build/boot/test' => ['build/boot/test.s', 'boot/common-boot.s'] do |t|
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
end
task test: 'build/boot/test' do |t|
sh QEMU, '-L', SYSROOT, t.prerequisites.first
end

View File

@ -5,17 +5,22 @@
.global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower
.global _write_out, _read_file, _write_error, _put_char, _printi
.global _get, _memcmp, _memchr, _memmem, _memcpy
.global _divide_by_zero_error, _exit
.global _strings_index
.global _divide_by_zero_error, _exit, _mmap
.global _strings_index, _string_equal
.section .rodata
.equ SYS_READ, 63
.equ SYS_WRITE, 64
.equ SYS_EXIT, 93
.equ SYS_MMAP2, 222
.equ STDIN, 0
.equ STDOUT, 1
.equ STDERR, 2
.equ PROT_READ, 0x1
.equ PROT_WRITE, 0x2
.equ MAP_PRIVATE, 0x02
.equ MAP_ANONYMOUS, 0x20
new_line: .ascii "\n"
@ -500,3 +505,58 @@ _strings_index:
lw s0, 24(sp)
add sp, sp, 32
ret
# Compares two strings for equality.
#
# Parameters:
# a0 - Length of the first string.
# a1 - Pointer to the first string.
# a2 - Length of the second string.
# a3 - Pointer to the second string.
#
# Sets a0 to 1 if the string are equal, to 0 if not.
.type _string_equal, @function
_string_equal:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
# Compare string lengths.
bne a0, a2, .Lstring_equal_not_found
# If lengths match, compare the content.
mv a0, a1
mv a1, a3
# a2 is already set to the length.
call _memcmp
bnez a0, .Lstring_equal_not_found
li a0, 1
j .Lstring_equal_end
.Lstring_equal_not_found:
mv a0, zero
.Lstring_equal_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Sets a0 to the mapping address.
.type _mmap, @function
_mmap:
li a0, 0 # Address at which to create the mapping.
li a1, 4096 # The length of the mapping.
li a2, PROT_READ | PROT_WRITE # Protection flags.
li a3, MAP_ANONYMOUS | MAP_PRIVATE # The mapping is not backed by a file.
li a4, -1 # File descriptor.
li a5, 0 # Page offset.
li a7, SYS_MMAP2
ecall
ret

View File

@ -60,4 +60,7 @@
#
# Symbols.
#
.equ TYPE_PRIMITIVE, 1
.equ TYPE_PRIMITIVE, 0x01
.equ TYPE_POINTER, 0x02
.equ INFO_PARAMETER, 0x10
.equ INFO_LOCAL, 0x20

File diff suppressed because it is too large Load Diff

View File

@ -19,7 +19,11 @@ end
proc _build_binary_expression()
var
loca0, loca4, loca8, loca16, loca20: Word
loca0: Word
loca4: Word
loca8: Word
loca16: Word
loca20: Word
loca12: ^Byte
loca24: Bool
begin
@ -208,8 +212,12 @@ end
proc _build_expression()
var
loca0, loca20, loca28, loca8: Word
loca24, loca4: ^Byte
loca0: Word
loca20: Word
loca28: Word
loca8: Word
loca24: ^Byte
loca4: ^Byte
begin
loca28 := loca84 + 0x30;
@ -340,8 +348,11 @@ end
proc _compile_identifier()
var
loca0, loca16, loca8: Word
loca20, loca12: ^Byte
loca0: Word
loca16: Word
loca8: Word
loca20: ^Byte
loca12: ^Byte
loca4: Bool
begin
loca20 := _current();
@ -375,7 +386,9 @@ end
proc _compile_call(loca84: ^Byte, loca80: Word)
var
loca0, loca4, loca12: Word
loca0: Word
loca4: Word
loca12: Word
loca8: ^Byte
begin
loca12 := 0;
@ -482,7 +495,8 @@ end
proc _read_token()
var
loca0, loca4: Word
loca0: Word
loca4: Word
loca8: ^Byte
begin
loca8 := _current();
@ -664,7 +678,8 @@ end
proc _compile_constant()
var
loca0, loca4: Word
loca0: Word
loca4: Word
loca8: ^Byte
begin
loca4 := _read_token();
@ -725,8 +740,11 @@ end
proc _compile_variable()
var
loca28, loca16: ^Byte
loca0, loca24, loca20: Word
loca28: ^Byte
loca16: ^Byte
loca0: Word
loca24: Word
loca20: Word
begin
loca24 := _read_token();
loca28 := _current();
@ -791,8 +809,13 @@ end
proc _compile_procedure()
var
loca0, loca4, loca8, loca12, loca16: Word
loca20, loca24: ^Byte
loca0: Word
loca4: Word
loca8: Word
loca12: Word
loca16: Word
loca20: ^Byte
loca24: ^Byte
begin
_advance(5);
loca16 := _read_token();
@ -971,7 +994,8 @@ end
proc _token_compare(loca84: ^Byte, loca80: Word, loca76: ^Byte)
var
loca0: Bool
loca4, loca12: Byte
loca4: Byte
loca12: Byte
loca8: Word
begin
.Ltoken_compare_loop;
@ -1065,7 +1089,10 @@ end
proc _compile_if()
var
loca8, loca12, loca16, loca20: Word
loca8: Word
loca12: Word
loca16: Word
loca20: Word
loca4: ^Byte
begin
_advance(2);
@ -1368,7 +1395,8 @@ end
proc _compile()
var
loca0, loca4: Word
loca0: Word
loca4: Word
loca8: Bool
loca12: Char
loca16: ^Byte

View File

@ -2,7 +2,9 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global symbol_table_build, symbol_table_find
.global symbol_table
.global symbol_table_build, symbol_table_find, symbol_table_insert, symbol_table_dump
.global symbol_table_make_pointer, symbol_table_make_parameter, symbol_table_make_local
.include "boot/definitions.inc"
@ -18,6 +20,8 @@ symbol_builtin_name_word: .ascii "Word"
symbol_builtin_name_byte: .ascii "Byte"
.type symbol_builtin_name_char, @object
symbol_builtin_name_char: .ascii "Char"
.type symbol_builtin_name_bool, @object
symbol_builtin_name_bool: .ascii "Bool"
# Every type info starts with a word describing what type it is.
@ -34,6 +38,9 @@ symbol_builtin_type_byte: .word TYPE_PRIMITIVE
.type symbol_builtin_type_char, @object
symbol_builtin_type_char: .word TYPE_PRIMITIVE
.word 1
.type symbol_builtin_type_bool, @object
symbol_builtin_type_bool: .word TYPE_PRIMITIVE
.word 1
.section .bss
@ -49,14 +56,51 @@ symbol_table: .zero SYMBOL_PRIME
.section .text
# Prints the list of symbols in the table.
.type symbol_table_dump, @function
symbol_table_dump:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
sw s1, 20(sp) # Current symbol in the table.
sw s2, 16(sp) # Symbol table length.
la s1, symbol_table
lw s2, 0(s1)
addi s1, s1, 4 # Advance to the first symbol in the table.
.Lsymbol_table_dump_loop:
beqz s2, .Lsymbol_table_dump_end
# Compare string lengths.
lw a0, 4(s1)
lw a1, 0(s1)
call _write_error
addi s1, s1, 12
addi s2, s2, -1
j .Lsymbol_table_dump_loop
.Lsymbol_table_dump_end:
lw s1, 20(sp)
lw s2, 16(sp)
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Searches for a symbol by name.
#
# Parameters:
# a0 - Local symbol table or 0.
# a1 - Length of the symbol to search.
# a2 - Pointer to the symbol name.
# a0 - Length of the symbol to search.
# a1 - Pointer to the symbol name.
#
# Sets a0 to the symbol info.
# Sets a0 to the symbol info pointer or 0 if the symbol has not been found.
.type symbol_table_find, @function
symbol_table_find:
# Prologue.
@ -70,8 +114,8 @@ symbol_table_find:
sw s3, 12(sp) # Length of the symbol to search.
sw s4, 8(sp) # Pointer to the symbol to search.
mv s3, a1
mv s4, a2
mv s3, a0
mv s4, a1
la s1, symbol_table
lw s2, 0(s1)
@ -81,27 +125,26 @@ symbol_table_find:
beqz s2, .Lsymbol_table_find_not_found
# Compare string lengths.
mv a0, s3
mv a1, s4
lw a2, 0(s1)
bne s3, a2, .Lsymbol_table_find_continue
lw a3, 4(s1)
call _string_equal
# If lengths match, compare the content.
mv a0, s4
lw a1, 4(s1)
call _memcmp
bnez a0, .Lsymbol_table_find_continue
beqz a0, .Lsymbol_table_find_continue
lw a0, 8(s1) # Pointer to the symbol.
j .Lsymbol_table_end
j .Lsymbol_table_find_end
.Lsymbol_table_find_continue:
addi s1, s1, 12
addi s2, s2, -1
j .Lsymbol_table_find_loop
.Lsymbol_table_find_not_found:
li a0, 0
.Lsymbol_table_end:
.Lsymbol_table_find_end:
lw s1, 20(sp)
lw s2, 16(sp)
lw s3, 12(sp)
@ -113,56 +156,124 @@ symbol_table_find:
addi sp, sp, 32
ret
# Creates a pointer type.
#
# Parameters:
# a0 - Pointer to the base type.
# a1 - Output memory.
#
# Sets a0 to the size of newly created type in bytes.
.type symbol_table_make_pointer, @function
symbol_table_make_pointer:
li t0, TYPE_POINTER
sw t0, 0(a1)
sw a0, 4(a1)
li a0, 8
ret
# Creates a parameter info.
#
# Parameters:
# a0 - Pointer to the parameter type.
# a1 - Parameter offset.
# a2 - Output memory.
#
# Sets a0 to the size of newly created info object in bytes.
.type symbol_table_make_parameter, @function
symbol_table_make_parameter:
li t0, INFO_PARAMETER
sw t0, 0(a2)
sw a0, 4(a2)
sw a1, 8(a2)
li a0, 12
ret
# Creates a local variable info.
#
# Parameters:
# a0 - Pointer to the variable type.
# a1 - Variable stack offset.
# a2 - Output memory.
#
# Sets a0 to the size of newly created info object in bytes.
.type symbol_table_make_local, @function
symbol_table_make_local:
li t0, INFO_LOCAL
sw t0, 0(a2)
sw a0, 4(a2)
sw a1, 8(a2)
li a0, 12
ret
# Inserts a symbol into the table.
#
# Parameters:
# a0 - Symbol name length.
# a1 - Symbol name pointer.
# a2 - Symbol pointer.
.type symbol_table_insert, @function
symbol_table_insert:
la t0, symbol_table
lw t1, 0(t0) # Current table length.
li t2, 12 # Calculate the offset to the next entry.
mul t2, t1, t2
addi t2, t2, 4
add t2, t0, t2
sw a0, 0(t2)
sw a1, 4(t2)
sw a2, 8(t2)
addi t1, t1, 1 # Save the new length.
sw t1, 0(t0)
ret
# Build the initial symbols.
#
# Sets a0 to the pointer to the global symbol table.
.type symbol_build, @function
symbol_table_build:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
la a0, symbol_table
addi t0, a0, 4
li t1, 3 # Length of the word "Int".
sw t1, 0(t0)
la t1, symbol_builtin_name_int
sw t1, 4(t0)
la t1, symbol_builtin_type_int
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li a0, 3 # Length of the word "Int".
la a1, symbol_builtin_name_int
la a2, symbol_builtin_type_int
call symbol_table_insert
li t1, 4 # Length of the word "Word".
sw t1, 0(t0)
la t1, symbol_builtin_name_word
sw t1, 4(t0)
la t1, symbol_builtin_type_word
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li a0, 4 # Length of the word "Word".
la a1, symbol_builtin_name_word
la a2, symbol_builtin_type_word
call symbol_table_insert
li t1, 4 # Length of the word "Byte".
sw t1, 0(t0)
la t1, symbol_builtin_name_byte
sw t1, 4(t0)
la t1, symbol_builtin_type_byte
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li a0, 4 # Length of the word "Byte".
la a1, symbol_builtin_name_byte
la a2, symbol_builtin_type_byte
call symbol_table_insert
li t1, 4 # Length of the word "Char".
sw t1, 0(t0)
la t1, symbol_builtin_name_char
sw t1, 4(t0)
la t1, symbol_builtin_type_char
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li a0, 4 # Length of the word "Char".
la a1, symbol_builtin_name_char
la a2, symbol_builtin_type_char
call symbol_table_insert
li a0, 4 # Length of the word "Bool".
la a1, symbol_builtin_name_bool
la a2, symbol_builtin_type_bool
call symbol_table_insert
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret

14
boot/test.elna Normal file
View File

@ -0,0 +1,14 @@
program
proc _main(x: Word)
var loca0: Word
loca4: Word
begin
loca0 := 0x0a2c3061;
loca4 := x;
_write_out(@loca4, 4)
end
begin
_main()
end.

View File

@ -2,7 +2,7 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global _tokenize_next, classification, transitions, keywords, byte_keywords
.global lex_next, classification, transitions, keywords, byte_keywords
.include "boot/definitions.inc"
@ -246,7 +246,7 @@ byte_keywords: .ascii "&.,:;()[]^=+-*@"
# It specifies the target state. "ff" means that this is an end state and no
# transition is possible.
# - The next byte is the action that should be performed when transitioning.
# For the meaning of actions see labels in the _tokenize_next function, which
# For the meaning of actions see labels in the lex_next function, which
# handles each action.
#
.type transitions, @object
@ -318,8 +318,8 @@ transitions:
# a0 - Character.
#
# Sets a0 to the class number.
.type _classify, @function
_classify:
.type classify, @function
classify:
la t0, classification
add t0, t0, a0 # Character class pointer.
lbu a0, (t0) # Character class.
@ -332,8 +332,8 @@ _classify:
# a1 - Character class.
#
# Sets a0 to the next state.
.type _lookup_state, @function
_lookup_state:
.type lookup_state, @function
lookup_state:
li t0, CLASS_COUNT
mul a0, a0, t0 # Transition row.
add a0, a0, a1 # Transition column.
@ -347,7 +347,7 @@ _lookup_state:
ret
# Chains _classify and _lookup_state.
# Chains classify and lookup_state.
#
# Parameters:
# a0 - Current state.
@ -364,11 +364,11 @@ _next_state:
sw a0, 4(sp)
mv a0, a1
call _classify
call classify
mv a1, a0
lw a0, 4(sp)
call _lookup_state
call lookup_state
# Epilogue.
lw ra, 12(sp)
@ -383,8 +383,8 @@ _next_state:
# a1 - Token pointer.
#
# Sets a0 to the appropriate token type.
.type _classify_identifier, @function
_classify_identifier:
.type classify_identifier, @function
classify_identifier:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
@ -413,8 +413,8 @@ _classify_identifier:
# a0 - Token character.
#
# Sets a0 to the appropriate token type.
.type _classify_single, @function
_classify_single:
.type classify_single, @function
classify_single:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
@ -443,8 +443,8 @@ _classify_single:
# a1 - Token pointer.
#
# Sets a0 to the appropriate token type.
.type _classify_composite, @function
_classify_composite:
.type classify_composite, @function
classify_composite:
lbu t0, 0(a1)
li t1, ':'
beq t0, t1, .Lclassify_composite_assign
@ -465,8 +465,8 @@ _classify_composite:
# a1 - A pointer for output value, the token kind. 4 Bytes.
#
# Sets a0 to the position of the next token.
.type _tokenize_next, @function
_tokenize_next:
.type lex_next, @function
lex_next:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
@ -484,7 +484,7 @@ _tokenize_next:
sw a1, 0(sp)
sw zero, (a1) # Initialize.
.Ltokenize_next_loop:
.Llex_next_loop:
mv a0, s2
lbu a1, (s1)
call _next_state
@ -498,50 +498,50 @@ _tokenize_next:
# Perform the provided action.
li t0, 0x01 # Accumulate action.
beq t1, t0, .Ltokenize_next_accumulate
beq t1, t0, .Llex_next_accumulate
li t0, 0x02 # Print action.
beq t1, t0, .Ltokenize_next_print
beq t1, t0, .Llex_next_print
li t0, 0x03 # Skip action.
beq t1, t0, .Ltokenize_next_skip
beq t1, t0, .Llex_next_skip
li t0, 0x04 # Delimited string action.
beq t1, t0, .Ltokenize_next_comment
beq t1, t0, .Llex_next_comment
li t0, 0x05 # Finalize identifier.
beq t1, t0, .Ltokenize_next_identifier
beq t1, t0, .Llex_next_identifier
li t0, 0x06 # Single character symbol action.
beq t1, t0, .Ltokenize_next_single
beq t1, t0, .Llex_next_single
li t0, 0x07 # An action for symbols containing multiple characters.
beq t1, t0, .Ltokenize_next_composite
beq t1, t0, .Llex_next_composite
li t0, 0x08 # Integer action.
beq t1, t0, .Ltokenize_next_integer
beq t1, t0, .Llex_next_integer
j .Ltokenize_next_reject
j .Llex_next_reject
.Ltokenize_next_reject:
.Llex_next_reject:
addi s1, s1, 1
j .Ltokenize_next_end
j .Llex_next_end
.Ltokenize_next_accumulate:
.Llex_next_accumulate:
addi s1, s1, 1
j .Ltokenize_next_loop
j .Llex_next_loop
.Ltokenize_next_skip:
.Llex_next_skip:
addi s1, s1, 1
lw t0, 12(sp)
addi t0, t0, 1
sw t0, 12(sp)
j .Ltokenize_next_loop
j .Llex_next_loop
.Ltokenize_next_print:
.Llex_next_print:
/* DEBUG
addi a0, a0, 21
sw a0, 0(sp)
@ -549,49 +549,49 @@ _tokenize_next:
li a1, 1
call _write_error */
j .Ltokenize_next_end
j .Llex_next_end
.Ltokenize_next_comment:
.Llex_next_comment:
addi s1, s1, 1
j .Ltokenize_next_end
j .Llex_next_end
.Ltokenize_next_identifier:
.Llex_next_identifier:
# An identifier can be a textual keyword.
# Check the kind of the token and write it into the output parameter.
lw a1, 12(sp)
sub a0, s1, a1
sw a0, 8(sp)
call _classify_identifier
call classify_identifier
sw a0, 4(sp)
lw a0, 0(sp)
addi a1, sp, 4
li a2, 12
call _memcpy
j .Ltokenize_next_end
j .Llex_next_end
.Ltokenize_next_single:
.Llex_next_single:
lw a0, 12(sp)
addi s1, a0, 1
lbu a0, (a0)
call _classify_single
call classify_single
lw a1, 0(sp)
sw a0, (a1)
j .Ltokenize_next_end
j .Llex_next_end
.Ltokenize_next_composite:
.Llex_next_composite:
addi s1, s1, 1
lw a1, 12(sp)
sub a0, s1, a1
call _classify_composite
call classify_composite
lw a1, 0(sp)
sw a0, (a1)
j .Ltokenize_next_end
j .Llex_next_end
.Ltokenize_next_integer:
.Llex_next_integer:
lw t0, 0(sp)
li t1, TOKEN_INTEGER
sw t1, 0(t0)
@ -600,9 +600,9 @@ _tokenize_next:
sub t1, s1, t1
sw t1, 4(t0)
j .Ltokenize_next_end
j .Llex_next_end
.Ltokenize_next_end:
.Llex_next_end:
mv a0, s1 # Return the advanced text pointer.
# Restore saved registers.