Compare commits

5 Commits

8 changed files with 1366 additions and 1087 deletions

View File

@ -20,3 +20,19 @@ task default: ['build/boot/stage2b', 'build/boot/stage2b.s', 'boot/stage2.elna']
diff_arguments = ['diff', '-Nur', '--text', previous_output, '-']
Open3.pipeline(cat_arguments, compiler_arguments, diff_arguments)
end
file 'build/boot/test.s' => ['build/boot/stage1', 'boot/test.elna'] do |t|
source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
File.open t.name, 'w' do |output|
assemble_stage output, exe, source
end
end
file 'build/boot/test' => ['build/boot/test.s', 'boot/common-boot.s'] do |t|
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
end
task test: 'build/boot/test' do |t|
sh QEMU, '-L', SYSROOT, t.prerequisites.first
end

View File

@ -3,19 +3,24 @@
# obtain one at https://mozilla.org/MPL/2.0/.
.global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower
.global _write_out, _read_file, _write_error, _put_char, _printi
.global _get, _memcmp, _memchr, _memmem, _memcpy
.global _divide_by_zero_error, _exit
.global _strings_index
.global _write_s, _read_file, _write_error, _write_c, _write_i, _print_i
.global _memcmp, _memchr, _memmem, _memcpy, _mmap
.global _current, _get, _advance, _label_counter
.global _divide_by_zero_error, _exit, _strings_index, _string_equal
.section .rodata
.equ SYS_READ, 63
.equ SYS_WRITE, 64
.equ SYS_EXIT, 93
.equ SYS_MMAP2, 222
.equ STDIN, 0
.equ STDOUT, 1
.equ STDERR, 2
.equ PROT_READ, 0x1
.equ PROT_WRITE, 0x2
.equ MAP_PRIVATE, 0x02
.equ MAP_ANONYMOUS, 0x20
new_line: .ascii "\n"
@ -170,16 +175,20 @@ _is_alnum:
addi sp, sp, 16
ret
.type _write_out, @function
_write_out:
# Writes a string to the standard output.
#
# Parameters:
# a0 - Length of the string.
# a1 - String pointer.
.type _write_s, @function
_write_s:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
mv a2, a1
mv a1, a0
mv a2, a0
li a0, STDOUT
li a7, SYS_WRITE
ecall
@ -239,53 +248,87 @@ _divide_by_zero_error:
ecall
ret
# a0 - Whole number.
# Writes a number to a string buffer.
#
# t0 - Local buffer.
# t1 - Constant 10.
# a1 - Local buffer.
# t2 - Current character.
# t3 - Whether the number is negative.
.type printi, @function
_printi:
addi sp, sp, -16
sw s0, 0(sp)
sw ra, 4(sp)
addi s0, sp, 16
addi t1, zero, 10
addi a1, s0, -1
#
# Parameters:
# a0 - Whole number.
# a1 - Buffer pointer.
#
# Sets a0 to the length of the written number.
.type _print_i, @function
_print_i:
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
addi t3, zero, 0
bge a0, zero, .digit10
addi t3, zero, 1
sub a0, zero, a0
li t1, 10
addi t0, s0, -9
.digit10:
li t3, 0
bgez a0, .Lprint_i_digit10
li t3, 1
neg a0, a0
.Lprint_i_digit10:
rem t2, a0, t1
addi t2, t2, '0'
sb t2, 0(a1)
sb t2, 0(t0)
div a0, a0, t1
addi a1, a1, -1
bne zero, a0, .digit10
addi t0, t0, -1
bne zero, a0, .Lprint_i_digit10
beq zero, t3, .write_call
beq zero, t3, .Lprint_i_write_call
addi t2, zero, '-'
sb t2, 0(a1)
addi a1, a1, -1
sb t2, 0(t0)
addi t0, t0, -1
.write_call:
addi a0, zero, 1
addi a1, a1, 1
sub a2, s0, a1
addi a7, zero, 64 # write
ecall
.Lprint_i_write_call:
mv a0, a1
addi a1, t0, 1
sub a2, s0, t0
addi a2, a2, -9
sw a2, 0(sp)
lw s0, 0(sp)
lw ra, 4(sp)
addi sp, sp, 16
call _memcpy
lw a0, 0(sp)
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Writes a number to the standard output.
#
# Parameters:
# a0 - Whole number.
.type _write_i, @function
_write_i:
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
addi a1, sp, 0
call _print_i
addi a1, sp, 0
call _write_s
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Writes a character from a0 into the standard output.
.type _put_char, @function
_put_char:
.type _write_c, @function
_write_c:
# Prologue
addi sp, sp, -16
sw ra, 12(sp)
@ -500,3 +543,88 @@ _strings_index:
lw s0, 24(sp)
add sp, sp, 32
ret
# Compares two strings for equality.
#
# Parameters:
# a0 - Length of the first string.
# a1 - Pointer to the first string.
# a2 - Length of the second string.
# a3 - Pointer to the second string.
#
# Sets a0 to 1 if the string are equal, to 0 if not.
.type _string_equal, @function
_string_equal:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
# Compare string lengths.
bne a0, a2, .Lstring_equal_not_found
# If lengths match, compare the content.
mv a0, a1
mv a1, a3
# a2 is already set to the length.
call _memcmp
bnez a0, .Lstring_equal_not_found
li a0, 1
j .Lstring_equal_end
.Lstring_equal_not_found:
mv a0, zero
.Lstring_equal_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Sets a0 to the mapping address.
.type _mmap, @function
_mmap:
li a0, 0 # Address at which to create the mapping.
li a1, 4096 # The length of the mapping.
li a2, PROT_READ | PROT_WRITE # Protection flags.
li a3, MAP_ANONYMOUS | MAP_PRIVATE # The mapping is not backed by a file.
li a4, -1 # File descriptor.
li a5, 0 # Page offset.
li a7, SYS_MMAP2
ecall
ret
# Sets the a0 to the current position in the source text (s1).
.type _current, @function
_current:
mv a0, s1
ret
# Advances the position of the source text.
#
# Parameters:
# a0 - The number of bytes to advance.
.type _advance, @function
_advance:
add s1, s1, a0
ret
# Advances the global label counter by 1 setting a0 to the previous value.
#
# Parameters:
# a0 - If it is 0, resets the counter to 1.
.type _label_counter, @function
_label_counter:
bnez a0, .Llabel_counter_advance
li s2, 0
.Llabel_counter_advance:
mv a0, s2
addi s2, s2, 1
ret

View File

@ -60,4 +60,9 @@
#
# Symbols.
#
.equ TYPE_PRIMITIVE, 1
.equ TYPE_PRIMITIVE, 0x01
.equ TYPE_POINTER, 0x02
.equ TYPE_PROCEDURE, 0x03
.equ INFO_PARAMETER, 0x10
.equ INFO_LOCAL, 0x20
.equ INFO_PROCEDURE, 0x30

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,9 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global symbol_table_build, symbol_table_find
.global symbol_table
.global symbol_table_build, symbol_table_lookup, symbol_table_enter, symbol_table_dump
.global symbol_table_make_pointer, symbol_table_make_parameter, symbol_table_make_local, symbol_table_make_procedure
.include "boot/definitions.inc"
@ -18,6 +20,8 @@ symbol_builtin_name_word: .ascii "Word"
symbol_builtin_name_byte: .ascii "Byte"
.type symbol_builtin_name_char, @object
symbol_builtin_name_char: .ascii "Char"
.type symbol_builtin_name_bool, @object
symbol_builtin_name_bool: .ascii "Bool"
# Every type info starts with a word describing what type it is.
@ -34,6 +38,9 @@ symbol_builtin_type_byte: .word TYPE_PRIMITIVE
.type symbol_builtin_type_char, @object
symbol_builtin_type_char: .word TYPE_PRIMITIVE
.word 1
.type symbol_builtin_type_bool, @object
symbol_builtin_type_bool: .word TYPE_PRIMITIVE
.word 1
.section .bss
@ -49,16 +56,53 @@ symbol_table: .zero SYMBOL_PRIME
.section .text
# Prints the list of symbols in the table.
.type symbol_table_dump, @function
symbol_table_dump:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
sw s1, 20(sp) # Current symbol in the table.
sw s2, 16(sp) # Symbol table length.
la s1, symbol_table
lw s2, 0(s1)
addi s1, s1, 4 # Advance to the first symbol in the table.
.Lsymbol_table_dump_loop:
beqz s2, .Lsymbol_table_dump_end
# Compare string lengths.
lw a0, 4(s1)
lw a1, 0(s1)
call _write_error
addi s1, s1, 12
addi s2, s2, -1
j .Lsymbol_table_dump_loop
.Lsymbol_table_dump_end:
lw s1, 20(sp)
lw s2, 16(sp)
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Searches for a symbol by name.
#
# Parameters:
# a0 - Local symbol table or 0.
# a1 - Length of the symbol to search.
# a2 - Pointer to the symbol name.
# a0 - Length of the symbol to search.
# a1 - Pointer to the symbol name.
#
# Sets a0 to the symbol info.
.type symbol_table_find, @function
symbol_table_find:
# Sets a0 to the symbol info pointer or 0 if the symbol has not been found.
.type symbol_table_lookup, @function
symbol_table_lookup:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
@ -70,38 +114,37 @@ symbol_table_find:
sw s3, 12(sp) # Length of the symbol to search.
sw s4, 8(sp) # Pointer to the symbol to search.
mv s3, a1
mv s4, a2
mv s3, a0
mv s4, a1
la s1, symbol_table
lw s2, 0(s1)
addi s1, s1, 4 # Advance to the first symbol in the table.
.Lsymbol_table_find_loop:
beqz s2, .Lsymbol_table_find_not_found
.Lsymbol_table_lookup_loop:
beqz s2, .Lsymbol_table_lookup_not_found
# Compare string lengths.
mv a0, s3
mv a1, s4
lw a2, 0(s1)
bne s3, a2, .Lsymbol_table_find_continue
lw a3, 4(s1)
call _string_equal
# If lengths match, compare the content.
mv a0, s4
lw a1, 4(s1)
call _memcmp
bnez a0, .Lsymbol_table_find_continue
beqz a0, .Lsymbol_table_lookup_continue
lw a0, 8(s1) # Pointer to the symbol.
j .Lsymbol_table_end
j .Lsymbol_table_lookup_end
.Lsymbol_table_find_continue:
.Lsymbol_table_lookup_continue:
addi s1, s1, 12
addi s2, s2, -1
j .Lsymbol_table_find_loop
j .Lsymbol_table_lookup_loop
.Lsymbol_table_find_not_found:
.Lsymbol_table_lookup_not_found:
li a0, 0
.Lsymbol_table_end:
.Lsymbol_table_lookup_end:
lw s1, 20(sp)
lw s2, 16(sp)
lw s3, 12(sp)
@ -113,56 +156,142 @@ symbol_table_find:
addi sp, sp, 32
ret
# Creates a pointer type.
#
# Parameters:
# a0 - Pointer to the base type.
# a1 - Output memory.
#
# Sets a0 to the size of newly created type in bytes.
.type symbol_table_make_pointer, @function
symbol_table_make_pointer:
li t0, TYPE_POINTER
sw t0, 0(a1)
sw a0, 4(a1)
li a0, 8
ret
# Creates a parameter info.
#
# Parameters:
# a0 - Pointer to the parameter type.
# a1 - Parameter offset.
# a2 - Output memory.
#
# Sets a0 to the size of newly created info object in bytes.
.type symbol_table_make_parameter, @function
symbol_table_make_parameter:
li t0, INFO_PARAMETER
sw t0, 0(a2)
sw a0, 4(a2)
sw a1, 8(a2)
li a0, 12
ret
# Creates a local variable info.
#
# Parameters:
# a0 - Pointer to the variable type.
# a1 - Variable stack offset.
# a2 - Output memory.
#
# Sets a0 to the size of newly created info object in bytes.
.type symbol_table_make_local, @function
symbol_table_make_local:
li t0, INFO_LOCAL
sw t0, 0(a2)
sw a0, 4(a2)
sw a1, 8(a2)
li a0, 12
ret
# Creates a procedure type and procedure info objects refering the type.
#
# Parameters:
# a0 - Output memory.
#
# Sets a0 to the size of newly created info object in bytes.
.type symbol_table_make_procedure, @function
symbol_table_make_procedure:
li t0, TYPE_PROCEDURE
sw t0, 8(a0)
li t0, INFO_PROCEDURE
sw t0, 0(a0)
sw a0, 4(a0) # Procedure type stored in the same memory segment.
li a0, 12
ret
# Inserts a symbol into the table.
#
# Parameters:
# a0 - Symbol name length.
# a1 - Symbol name pointer.
# a2 - Symbol pointer.
.type symbol_table_enter, @function
symbol_table_enter:
la t0, symbol_table
lw t1, 0(t0) # Current table length.
li t2, 12 # Calculate the offset to the next entry.
mul t2, t1, t2
addi t2, t2, 4
add t2, t0, t2
sw a0, 0(t2)
sw a1, 4(t2)
sw a2, 8(t2)
addi t1, t1, 1 # Save the new length.
sw t1, 0(t0)
ret
# Build the initial symbols.
#
# Sets a0 to the pointer to the global symbol table.
.type symbol_build, @function
symbol_table_build:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
la a0, symbol_table
addi t0, a0, 4
li t1, 3 # Length of the word "Int".
sw t1, 0(t0)
la t1, symbol_builtin_name_int
sw t1, 4(t0)
la t1, symbol_builtin_type_int
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li a0, 3 # Length of the word "Int".
la a1, symbol_builtin_name_int
la a2, symbol_builtin_type_int
call symbol_table_enter
li t1, 4 # Length of the word "Word".
sw t1, 0(t0)
la t1, symbol_builtin_name_word
sw t1, 4(t0)
la t1, symbol_builtin_type_word
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li a0, 4 # Length of the word "Word".
la a1, symbol_builtin_name_word
la a2, symbol_builtin_type_word
call symbol_table_enter
li t1, 4 # Length of the word "Byte".
sw t1, 0(t0)
la t1, symbol_builtin_name_byte
sw t1, 4(t0)
la t1, symbol_builtin_type_byte
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li a0, 4 # Length of the word "Byte".
la a1, symbol_builtin_name_byte
la a2, symbol_builtin_type_byte
call symbol_table_enter
li t1, 4 # Length of the word "Char".
sw t1, 0(t0)
la t1, symbol_builtin_name_char
sw t1, 4(t0)
la t1, symbol_builtin_type_char
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li a0, 4 # Length of the word "Char".
la a1, symbol_builtin_name_char
la a2, symbol_builtin_type_char
call symbol_table_enter
li a0, 4 # Length of the word "Bool".
la a1, symbol_builtin_name_bool
la a2, symbol_builtin_type_bool
call symbol_table_enter
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret

14
boot/test.elna Normal file
View File

@ -0,0 +1,14 @@
program
proc main(x: Word, y: Word)
begin
_write_s(4, @x);
_write_s(4, @y);
y := 0x0a2c3063;
_write_s(4, @y)
end
begin
main(0x0a2c3061, 0x0a2c3062)
end.

View File

@ -2,7 +2,7 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global _tokenize_next, classification, transitions, keywords, byte_keywords
.global lex_next, classification, transitions, keywords, byte_keywords
.include "boot/definitions.inc"
@ -246,7 +246,7 @@ byte_keywords: .ascii "&.,:;()[]^=+-*@"
# It specifies the target state. "ff" means that this is an end state and no
# transition is possible.
# - The next byte is the action that should be performed when transitioning.
# For the meaning of actions see labels in the _tokenize_next function, which
# For the meaning of actions see labels in the lex_next function, which
# handles each action.
#
.type transitions, @object
@ -318,8 +318,8 @@ transitions:
# a0 - Character.
#
# Sets a0 to the class number.
.type _classify, @function
_classify:
.type classify, @function
classify:
la t0, classification
add t0, t0, a0 # Character class pointer.
lbu a0, (t0) # Character class.
@ -332,8 +332,8 @@ _classify:
# a1 - Character class.
#
# Sets a0 to the next state.
.type _lookup_state, @function
_lookup_state:
.type lookup_state, @function
lookup_state:
li t0, CLASS_COUNT
mul a0, a0, t0 # Transition row.
add a0, a0, a1 # Transition column.
@ -347,7 +347,7 @@ _lookup_state:
ret
# Chains _classify and _lookup_state.
# Chains classify and lookup_state.
#
# Parameters:
# a0 - Current state.
@ -364,11 +364,11 @@ _next_state:
sw a0, 4(sp)
mv a0, a1
call _classify
call classify
mv a1, a0
lw a0, 4(sp)
call _lookup_state
call lookup_state
# Epilogue.
lw ra, 12(sp)
@ -383,8 +383,8 @@ _next_state:
# a1 - Token pointer.
#
# Sets a0 to the appropriate token type.
.type _classify_identifier, @function
_classify_identifier:
.type classify_identifier, @function
classify_identifier:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
@ -413,8 +413,8 @@ _classify_identifier:
# a0 - Token character.
#
# Sets a0 to the appropriate token type.
.type _classify_single, @function
_classify_single:
.type classify_single, @function
classify_single:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
@ -443,8 +443,8 @@ _classify_single:
# a1 - Token pointer.
#
# Sets a0 to the appropriate token type.
.type _classify_composite, @function
_classify_composite:
.type classify_composite, @function
classify_composite:
lbu t0, 0(a1)
li t1, ':'
beq t0, t1, .Lclassify_composite_assign
@ -465,8 +465,8 @@ _classify_composite:
# a1 - A pointer for output value, the token kind. 4 Bytes.
#
# Sets a0 to the position of the next token.
.type _tokenize_next, @function
_tokenize_next:
.type lex_next, @function
lex_next:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
@ -484,7 +484,7 @@ _tokenize_next:
sw a1, 0(sp)
sw zero, (a1) # Initialize.
.Ltokenize_next_loop:
.Llex_next_loop:
mv a0, s2
lbu a1, (s1)
call _next_state
@ -498,50 +498,50 @@ _tokenize_next:
# Perform the provided action.
li t0, 0x01 # Accumulate action.
beq t1, t0, .Ltokenize_next_accumulate
beq t1, t0, .Llex_next_accumulate
li t0, 0x02 # Print action.
beq t1, t0, .Ltokenize_next_print
beq t1, t0, .Llex_next_print
li t0, 0x03 # Skip action.
beq t1, t0, .Ltokenize_next_skip
beq t1, t0, .Llex_next_skip
li t0, 0x04 # Delimited string action.
beq t1, t0, .Ltokenize_next_comment
beq t1, t0, .Llex_next_comment
li t0, 0x05 # Finalize identifier.
beq t1, t0, .Ltokenize_next_identifier
beq t1, t0, .Llex_next_identifier
li t0, 0x06 # Single character symbol action.
beq t1, t0, .Ltokenize_next_single
beq t1, t0, .Llex_next_single
li t0, 0x07 # An action for symbols containing multiple characters.
beq t1, t0, .Ltokenize_next_composite
beq t1, t0, .Llex_next_composite
li t0, 0x08 # Integer action.
beq t1, t0, .Ltokenize_next_integer
beq t1, t0, .Llex_next_integer
j .Ltokenize_next_reject
j .Llex_next_reject
.Ltokenize_next_reject:
.Llex_next_reject:
addi s1, s1, 1
j .Ltokenize_next_end
j .Llex_next_end
.Ltokenize_next_accumulate:
.Llex_next_accumulate:
addi s1, s1, 1
j .Ltokenize_next_loop
j .Llex_next_loop
.Ltokenize_next_skip:
.Llex_next_skip:
addi s1, s1, 1
lw t0, 12(sp)
addi t0, t0, 1
sw t0, 12(sp)
j .Ltokenize_next_loop
j .Llex_next_loop
.Ltokenize_next_print:
.Llex_next_print:
/* DEBUG
addi a0, a0, 21
sw a0, 0(sp)
@ -549,49 +549,49 @@ _tokenize_next:
li a1, 1
call _write_error */
j .Ltokenize_next_end
j .Llex_next_end
.Ltokenize_next_comment:
.Llex_next_comment:
addi s1, s1, 1
j .Ltokenize_next_end
j .Llex_next_end
.Ltokenize_next_identifier:
.Llex_next_identifier:
# An identifier can be a textual keyword.
# Check the kind of the token and write it into the output parameter.
lw a1, 12(sp)
sub a0, s1, a1
sw a0, 8(sp)
call _classify_identifier
call classify_identifier
sw a0, 4(sp)
lw a0, 0(sp)
addi a1, sp, 4
li a2, 12
call _memcpy
j .Ltokenize_next_end
j .Llex_next_end
.Ltokenize_next_single:
.Llex_next_single:
lw a0, 12(sp)
addi s1, a0, 1
lbu a0, (a0)
call _classify_single
call classify_single
lw a1, 0(sp)
sw a0, (a1)
j .Ltokenize_next_end
j .Llex_next_end
.Ltokenize_next_composite:
.Llex_next_composite:
addi s1, s1, 1
lw a1, 12(sp)
sub a0, s1, a1
call _classify_composite
call classify_composite
lw a1, 0(sp)
sw a0, (a1)
j .Ltokenize_next_end
j .Llex_next_end
.Ltokenize_next_integer:
.Llex_next_integer:
lw t0, 0(sp)
li t1, TOKEN_INTEGER
sw t1, 0(t0)
@ -600,9 +600,9 @@ _tokenize_next:
sub t1, s1, t1
sw t1, 4(t0)
j .Ltokenize_next_end
j .Llex_next_end
.Ltokenize_next_end:
.Llex_next_end:
mv a0, s1 # Return the advanced text pointer.
# Restore saved registers.