Start over

This commit is contained in:
2025-08-28 22:45:42 +02:00
parent e614d43ea9
commit 627975775c
11 changed files with 4631 additions and 3692 deletions

View File

@@ -5,34 +5,99 @@
require 'open3'
require 'rake/clean'
require 'term/ansicolor'
CLEAN.include 'build/boot'
CROSS_GCC = '../eugenios/build/rootfs/bin/riscv32-unknown-linux-gnu-gcc'
SYSROOT = '../eugenios/build/sysroot'
QEMU = 'qemu-riscv32'
STAGES = Dir.glob('boot/stage*.elna').collect { |stage| File.basename stage, '.elna' }.sort
CLEAN.include 'build/boot', 'build/valid'
directory 'build/boot'
directory 'build/valid'
task default: :boot
desc 'Final stage'
task default: ['build/boot/stage2b', 'build/boot/stage2b.s', 'boot/stage2.elna'] do |t|
exe, previous_output, source = t.prerequisites
task boot: "build/valid/#{STAGES.last}"
task boot: "build/valid/#{STAGES.last}.s"
task boot: "boot/#{STAGES.last}.elna" do |t|
groupped = t.prerequisites.group_by { |stage| File.extname stage }.transform_values(&:first)
exe = groupped['']
expected = groupped['.s']
source = groupped['.elna']
cat_arguments = ['cat', source]
compiler_arguments = [QEMU, '-L', SYSROOT, exe]
diff_arguments = ['diff', '-Nur', '--text', previous_output, '-']
diff_arguments = ['diff', '-Nur', '--text', expected, '-']
Open3.pipeline(cat_arguments, compiler_arguments, diff_arguments)
end
file 'build/boot/test.s' => ['build/boot/stage1', 'boot/test.elna'] do |t|
source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
desc 'Convert previous stage language into the current stage language'
task :convert do
File.open('boot/stage4.elna', 'w') do |current_stage|
li_value = nil
File.open t.name, 'w' do |output|
assemble_stage output, exe, source
File.readlines('boot/stage3.elna').each do |line|
current_stage << line
end
end
end
file 'build/boot/test' => ['build/boot/test.s', 'boot/common-boot.s'] do |t|
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
STAGES.each do |stage|
previous = stage.delete_prefix('stage').to_i.pred
file "build/valid/#{stage}" => "build/valid/#{stage}.s" do |t|
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
end
file "build/valid/#{stage}.s" => ["build/boot/#{stage}", "boot/#{stage}.elna"] do |t|
exe, source = t.prerequisites
cat_arguments = ['cat', source]
compiler_arguments = [QEMU, '-L', SYSROOT, exe]
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments)
IO.copy_stream last_stdout, t.name
end
file "build/boot/#{stage}" => "build/boot/#{stage}.s" do |t|
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
end
file "build/boot/#{stage}.s" => ["build/valid/stage#{previous}", "boot/#{stage}.elna"] do |t|
exe, source = t.prerequisites
cat_arguments = ['cat', source]
compiler_arguments = [QEMU, '-L', SYSROOT, exe]
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments)
IO.copy_stream last_stdout, t.name
end
end
task test: 'build/boot/test' do |t|
sh QEMU, '-L', SYSROOT, t.prerequisites.first
#
# Stage 1.
#
file 'build/valid/stage1' => ['build/valid', 'build/valid/stage1.s'] do |t|
source = t.prerequisites.select { |prerequisite| prerequisite.end_with? '.s' }
sh CROSS_GCC, '-nostdlib', '-o', t.name, *source
end
file 'build/valid/stage1.s' => ['build/boot/stage1', 'boot/stage1.s', 'build/valid'] do |t|
source, exe, = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.s' }
cat_arguments = ['cat', *source]
compiler_arguments = [QEMU, '-L', SYSROOT, *exe]
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments)
IO.copy_stream last_stdout, t.name
end
file 'build/boot/stage1' => ['build/boot', 'boot/stage1.s'] do |t|
source = t.prerequisites.select { |prerequisite| prerequisite.end_with? '.s' }
sh CROSS_GCC, '-nostdlib', '-o', t.name, *source
end

View File

@@ -2,17 +2,15 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower
.global _write_s, _read_file, _write_error, _write_c, _write_i, _print_i
.global _memcmp, _memchr, _memmem, _memcpy, _mmap
.global _read_file, _write_error
.global _memcmp, _memchr, _memmem, _mmap
.global _current, _get, _advance, _label_counter
.global _divide_by_zero_error, _exit, _strings_index, _string_equal
.global _divide_by_zero_error, _strings_index, _string_equal
.section .rodata
.equ SYS_READ, 63
.equ SYS_WRITE, 64
.equ SYS_EXIT, 93
.equ SYS_MMAP2, 222
.equ STDIN, 0
.equ STDOUT, 1
@@ -77,128 +75,6 @@ _memcmp:
.Lmemcmp_end:
ret
# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
.type _is_upper, @function
_is_upper:
li t0, 'A' - 1
sltu t1, t0, a0 # t1 = a0 >= 'A'
sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
ret
# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
.type _is_lower, @function
_is_lower:
li t0, 'a' - 1
sltu t2, t0, a0 # t2 = a0 >= 'a'
sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
ret
# Detects if the passed character is a 7-bit alpha character or an underscore.
# The character is passed in a0.
# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
.type _is_alpha, @function
_is_alpha:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
sw a0, 4(sp)
call _is_upper
sw a0, 0(sp)
lw a0, 4(sp)
call _is_lower
lw t0, 4(sp)
xori t1, t0, '_'
seqz t1, t1
lw t0, 0(sp)
or a0, a0, t0
or a0, a0, t1
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Detects whether the passed character is a digit
# (a value between 0 and 9).
#
# Parameters:
# a0 - Exemined value.
#
# Sets a0 to 1 if it is a digit, to 0 otherwise.
.type _is_digit, @function
_is_digit:
li t0, '0' - 1
sltu t1, t0, a0 # t1 = a0 >= '0'
sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
and a0, t1, t2
ret
.type _is_alnum, @function
_is_alnum:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
sw a0, 4(sp)
call _is_alpha
sw a0, 0(sp)
lw a0, 4(sp)
call _is_digit
lw a1, 0(sp)
or a0, a0, a1
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Writes a string to the standard output.
#
# Parameters:
# a0 - Length of the string.
# a1 - String pointer.
.type _write_s, @function
_write_s:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
mv a2, a0
li a0, STDOUT
li a7, SYS_WRITE
ecall
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret
# Reads standard input into a buffer.
# a0 - Buffer pointer.
# a1 - Buffer size.
@@ -228,16 +104,6 @@ _read_file:
addi sp, sp, 8
ret
# Terminates the program. a0 contains the return code.
#
# Parameters:
# a0 - Status code.
.type _exit, @function
_exit:
li a7, SYS_EXIT
ecall
# ret
.type _divide_by_zero_error, @function
_divide_by_zero_error:
addi a7, zero, 172 # getpid
@@ -248,106 +114,6 @@ _divide_by_zero_error:
ecall
ret
# Writes a number to a string buffer.
#
# t0 - Local buffer.
# t1 - Constant 10.
# t2 - Current character.
# t3 - Whether the number is negative.
#
# Parameters:
# a0 - Whole number.
# a1 - Buffer pointer.
#
# Sets a0 to the length of the written number.
.type _print_i, @function
_print_i:
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
li t1, 10
addi t0, s0, -9
li t3, 0
bgez a0, .Lprint_i_digit10
li t3, 1
neg a0, a0
.Lprint_i_digit10:
rem t2, a0, t1
addi t2, t2, '0'
sb t2, 0(t0)
div a0, a0, t1
addi t0, t0, -1
bne zero, a0, .Lprint_i_digit10
beq zero, t3, .Lprint_i_write_call
addi t2, zero, '-'
sb t2, 0(t0)
addi t0, t0, -1
.Lprint_i_write_call:
mv a0, a1
addi a1, t0, 1
sub a2, s0, t0
addi a2, a2, -9
sw a2, 0(sp)
call _memcpy
lw a0, 0(sp)
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Writes a number to the standard output.
#
# Parameters:
# a0 - Whole number.
.type _write_i, @function
_write_i:
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
addi a1, sp, 0
call _print_i
addi a1, sp, 0
call _write_s
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Writes a character from a0 into the standard output.
.type _write_c, @function
_write_c:
# Prologue
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
sb a0, 4(sp)
li a0, STDOUT
addi a1, sp, 4
li a2, 1
li a7, SYS_WRITE
ecall
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
add sp, sp, 16
ret
# a0 - Pointer to an array to get the first element.
#
# Dereferences a pointer and returns what is on the address in a0.
@@ -448,34 +214,6 @@ _memmem:
add sp, sp, 24
ret
# Copies memory.
#
# Parameters:
# a0 - Destination.
# a1 - Source.
# a2 - Size.
#
# Preserves a0.
.type _memcpy, @function
_memcpy:
mv t0, a0
.Lmemcpy_loop:
beqz a2, .Lmemcpy_end
lbu t1, (a1)
sb t1, (a0)
addi a0, a0, 1
addi a1, a1, 1
addi a2, a2, -1
j .Lmemcpy_loop
.Lmemcpy_end:
mv a0, t0
ret
# Searches for a string in a string array.
#
# Parameters:

View File

@@ -1,68 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
#
# Tokens.
#
# The constant should match the index in the keywords array in tokenizer.s.
.equ TOKEN_PROGRAM, 1
.equ TOKEN_IMPORT, 2
.equ TOKEN_CONST, 3
.equ TOKEN_VAR, 4
.equ TOKEN_IF, 5
.equ TOKEN_THEN, 6
.equ TOKEN_ELSIF, 7
.equ TOKEN_ELSE, 8
.equ TOKEN_WHILE, 9
.equ TOKEN_DO, 10
.equ TOKEN_PROC, 11
.equ TOKEN_BEGIN, 12
.equ TOKEN_END, 13
.equ TOKEN_TYPE, 14
.equ TOKEN_RECORD, 15
.equ TOKEN_UNION, 16
.equ TOKEN_TRUE, 17
.equ TOKEN_FALSE, 18
.equ TOKEN_NIL, 19
.equ TOKEN_XOR, 20
.equ TOKEN_OR, 21
.equ TOKEN_RETURN, 22
.equ TOKEN_CAST, 23
.equ TOKEN_GOTO, 24
.equ TOKEN_CASE, 25
.equ TOKEN_OF, 26
.equ TOKEN_IDENTIFIER, 27
# The constant should match the character index in the byte_keywords string.
.equ TOKEN_AND, TOKEN_IDENTIFIER + 1
.equ TOKEN_DOT, TOKEN_IDENTIFIER + 2
.equ TOKEN_COMMA, TOKEN_IDENTIFIER + 3
.equ TOKEN_COLON, TOKEN_IDENTIFIER + 4
.equ TOKEN_SEMICOLON, TOKEN_IDENTIFIER + 5
.equ TOKEN_LEFT_PAREN, TOKEN_IDENTIFIER + 6
.equ TOKEN_RIGHT_PAREN, TOKEN_IDENTIFIER + 7
.equ TOKEN_LEFT_BRACKET, TOKEN_IDENTIFIER + 8
.equ TOKEN_RIGHT_BRACKET, TOKEN_IDENTIFIER + 9
.equ TOKEN_HAT, TOKEN_IDENTIFIER + 10
.equ TOKEN_EQUALS, TOKEN_IDENTIFIER + 11
.equ TOKEN_PLUS, TOKEN_IDENTIFIER + 12
.equ TOKEN_MINUS, TOKEN_IDENTIFIER + 13
.equ TOKEN_ASTERISK, TOKEN_IDENTIFIER + 14
.equ TOKEN_AT, TOKEN_IDENTIFIER + 15
.equ TOKEN_ASSIGN, 43
.equ TOKEN_INTEGER, 44
#
# Symbols.
#
.equ TYPE_PRIMITIVE, 0x01
.equ TYPE_POINTER, 0x02
.equ TYPE_PROCEDURE, 0x03
.equ INFO_PARAMETER, 0x10
.equ INFO_LOCAL, 0x20
.equ INFO_PROCEDURE, 0x30

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

975
boot/stage3.elna Normal file
View File

@@ -0,0 +1,975 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
# Stage3 compiler.
#
# - Procedures without none or one argument.
# - Goto statements.
# - Character and integer literals.
# - Passing local variables to procedures.
# - Local variables should have the format: v00,
# where 00 is its offset from the sp register.
.section .rodata
.type keyword_section, @object
keyword_section: .ascii ".section"
.type keyword_type, @object
keyword_type: .ascii ".type"
.type keyword_ret, @object
keyword_ret: .ascii "ret"
.type keyword_global, @object
keyword_global: .ascii ".globl"
.type keyword_proc, @object
keyword_proc: .ascii "proc "
.type keyword_end, @object
keyword_end: .ascii "end"
.type keyword_begin, @object
keyword_begin: .ascii "begin"
.type keyword_var, @object
keyword_var: .ascii "var"
.type asm_prologue, @object
asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n"
.type asm_epilogue, @object
asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n"
.type asm_type_directive, @object
asm_type_directive: .string ".type "
.type asm_type_function, @object
asm_type_function: .string ", @function\n"
.type asm_colon, @object
asm_colon: .string ":\n"
.type asm_call, @object
asm_call: .string "\tcall "
.type asm_j, @object
asm_j: .string "\tj "
.type asm_li, @object
asm_li: .string "\tli "
.type asm_lw, @object
asm_lw: .string "\tlw "
.type asm_sw, @object
asm_sw: .string "\tsw "
.type asm_mv, @object
asm_mv: .string "mv "
.type asm_t0, @object
asm_t0: .string "t0"
.type asm_a0, @object
asm_a0: .string "a0"
.type asm_comma, @object
asm_comma: .string ", "
.type asm_sp, @object
asm_sp: .string "(sp)"
.section .bss
# When modifiying also change the read size in the entry point procedure.
.type source_code, @object
source_code: .zero 81920
.section .data
.type source_code_position, @object
source_code_position: .word source_code
.section .text
# Reads standard input into a buffer.
# a0 - Buffer pointer.
# a1 - Buffer size.
#
# Returns the amount of bytes written in a0.
proc _read_file();
begin
mv a2, a1
mv a1, a0
# STDIN.
li a0, 0
li a7, 63 # SYS_READ.
ecall
end;
# Writes to the standard output.
#
# Parameters:
# a0 - Buffer.
# a1 - Buffer length.
proc _write_s();
begin
mv a2, a1
mv a1, a0
# STDOUT.
li a0, 1
li a7, 64 # SYS_WRITE.
ecall
end;
# Writes a number to a string buffer.
#
# t0 - Local buffer.
# t1 - Constant 10.
# t2 - Current character.
# t3 - Whether the number is negative.
#
# Parameters:
# a0 - Whole number.
# a1 - Buffer pointer.
#
# Sets a0 to the length of the written number.
proc _print_i();
begin
li t1, 10
addi t0, s0, -9
li t3, 0
bgez a0, .print_i_digit10
li t3, 1
neg a0, a0
.print_i_digit10:
rem t2, a0, t1
addi t2, t2, '0'
sb t2, 0(t0)
div a0, a0, t1
addi t0, t0, -1
bne zero, a0, .print_i_digit10
beq zero, t3, .print_i_write_call
addi t2, zero, '-'
sb t2, 0(t0)
addi t0, t0, -1
.print_i_write_call:
mv a0, a1
addi a1, t0, 1
sub a2, s0, t0
addi a2, a2, -9
sw a2, 0(sp)
_memcpy();
lw a0, 0(sp)
end;
# Writes a number to the standard output.
#
# Parameters:
# a0 - Whole number.
proc _write_i();
begin
addi a1, sp, 0
_print_i();
mv a1, a0
addi a0, sp, 0
_write_s();
end;
# Writes a character from a0 into the standard output.
proc _write_c();
begin
sb a0, 0(sp)
addi a0, sp, 0
li a1, 1
_write_s();
end;
# Write null terminated string.
#
# Parameters:
# a0 - String.
proc _write_z();
begin
sw a0, 0(sp)
.write_z_loop:
# Check for 0 character.
lb a0, (a0)
beqz a0, .write_z_end
# Print a character.
lw a0, 0(sp)
lb a0, (a0)
_write_c();
# Advance the input string by one byte.
lw a0, 0(sp)
addi a0, a0, 1
sw a0, 0(sp)
goto .write_z_loop;
.write_z_end:
end;
# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
proc _is_upper();
begin
li t0, 'A' - 1
sltu t1, t0, a0 # t1 = a0 >= 'A'
sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
end;
# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
proc _is_lower();
begin
li t0, 'a' - 1
sltu t2, t0, a0 # t2 = a0 >= 'a'
sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
end;
# Detects if the passed character is a 7-bit alpha character or an underscore.
#
# Paramters:
# a0 - Tested character.
#
# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
proc _is_alpha();
begin
sw a0, 0(sp)
_is_upper();
sw a0, 4(sp)
_is_lower(v00);
lw t0, 0(sp)
xori t1, t0, '_'
seqz t1, t1
lw t0, 4(sp)
or a0, a0, t0
or a0, a0, t1
end;
# Detects whether the passed character is a digit
# (a value between 0 and 9).
#
# Parameters:
# a0 - Exemined value.
#
# Sets a0 to 1 if it is a digit, to 0 otherwise.
proc _is_digit();
begin
li t0, '0' - 1
sltu t1, t0, a0 # t1 = a0 >= '0'
sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
and a0, t1, t2
end;
proc _is_alnum();
begin
sw a0, 4(sp)
_is_alpha();
sw a0, 0(sp)
_is_digit(v04);
lw a1, 0(sp)
or a0, a0, a1
end;
# Reads the next token.
#
# Returns token length in a0.
proc _read_token();
begin
la t0, source_code_position # Token pointer.
lw t0, (t0)
sw t0, 0(sp) # Current token position.
sw zero, 4(sp) # Token length.
.read_token_loop:
lb t0, (t0) # Current character.
# First we try to read a derictive.
# A derictive can contain a dot and characters.
li t1, '.'
beq t0, t1, .read_token_next
lw a0, 0(sp)
lb a0, (a0)
_is_alnum();
bnez a0, .read_token_next
goto .read_token_end;
.read_token_next:
# Advance the source code position and token length.
lw t0, 4(sp)
addi t0, t0, 1
sw t0, 4(sp)
lw t0, 0(sp)
addi t0, t0, 1
sw t0, 0(sp)
goto .read_token_loop;
.read_token_end:
lw a0, 4(sp)
end;
# a0 - First pointer.
# a1 - Second pointer.
# a2 - The length to compare.
#
# Returns 0 in a0 if memory regions are equal.
proc _memcmp();
begin
mv t0, a0
li a0, 0
.memcmp_loop:
beqz a2, .memcmp_end
lbu t1, (t0)
lbu t2, (a1)
sub a0, t1, t2
bnez a0, .memcmp_end
addi t0, t0, 1
addi a1, a1, 1
addi a2, a2, -1
goto .memcmp_loop;
.memcmp_end:
end;
# Copies memory.
#
# Parameters:
# a0 - Destination.
# a1 - Source.
# a2 - Size.
#
# Preserves a0.
proc _memcpy();
begin
mv t0, a0
.memcpy_loop:
beqz a2, .memcpy_end
lbu t1, (a1)
sb t1, (a0)
addi a0, a0, 1
addi a1, a1, 1
addi a2, a2, -1
goto .memcpy_loop
.memcpy_end:
mv a0, t0
end;
# Advances the token stream by a0 bytes.
proc _advance_token();
begin
la t0, source_code_position
lw t1, (t0)
add t1, t1, a0
sw t1, (t0)
end;
# Prints the current token.
#
# Parameters:
# a0 - Token length.
#
# Returns a0 unchanged.
proc _write_token();
begin
sw a0, 0(sp)
la a0, source_code_position
lw a0, (a0)
lw a1, 0(sp)
_write_s();
lw a0, 0(sp)
end;
proc _compile_section();
begin
# Print and skip the ".section" (8 characters) directive and a space after it.
_write_token(9);
_advance_token();
# Read the section name.
_read_token();
addi a0, a0, 1
_write_token();
_advance_token();
end;
# Prints and skips a line.
proc _skip_comment();
begin
la t0, source_code_position
lw t1, (t0)
.skip_comment_loop:
# Check for newline character.
lb t2, (t1)
li t3, '\n'
beq t2, t3, .skip_comment_end
# Advance the input string by one byte.
addi t1, t1, 1
sw t1, (t0)
goto .skip_comment_loop;
.skip_comment_end:
# Skip the newline.
addi t1, t1, 1
sw t1, (t0)
end;
# Prints and skips a line.
proc _compile_line();
begin
.compile_line_loop:
la a0, source_code_position
lw a1, (a0)
lb t0, (a1)
li t1, '\n'
beq t0, t1, .compile_line_end
# Print a character.
lw a0, (a1)
_write_c();
# Advance the input string by one byte.
_advance_token(1);
goto .compile_line_loop;
.compile_line_end:
_write_c('\n');
_advance_token(1);
end;
proc _compile_integer_literal();
begin
la a0, asm_li
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
_read_token();
_write_token();
_advance_token();
_write_c('\n');
end;
proc _compile_character_literal();
begin
la a0, asm_li
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
li a0, '\''
_write_c();
_advance_token(1);
la t0, source_code_position
lw t0, (t0)
lb a0, (t0)
li t1, '\\'
bne a0, t1, .compile_character_literal_end
li a0, '\\'
_write_c();
_advance_token(1);
.compile_character_literal_end:
la t0, source_code_position
lw t0, (t0)
lb a0, (t0)
_write_c();
li a0, '\''
_write_c();
_write_c('\n');
_advance_token(2);
end;
proc _compile_variable_expression();
begin
la a0, asm_lw
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
_advance_token(1);
_read_token();
_write_token();
_advance_token();
la a0, asm_sp
_write_z();
_write_c('\n');
end;
proc _compile_expression();
begin
la t0, source_code_position
lw t0, (t0)
lb a0, (t0)
li t1, '\''
beq a0, t1, .compile_expression_character_literal
li t1, 'v'
beq a0, t1, .compile_expression_variable
_is_digit();
bnez a0, .compile_expression_integer_literal
goto .compile_expression_end;
.compile_expression_character_literal:
_compile_character_literal();
goto .compile_expression_end;
.compile_expression_integer_literal:
_compile_integer_literal();
goto .compile_expression_end;
.compile_expression_variable:
_compile_variable_expression();
goto .compile_expression_end;;
.compile_expression_end:
end;
proc _compile_call();
begin
# Stack variables:
# v0 - Procedure name length.
# v4 - Procedure name pointer.
# v8 - Argument count.
_read_token();
sw a0, 0(sp)
la t0, source_code_position
lw t0, (t0)
sw t0, 4(sp)
sw zero, 8(sp)
# Skip the identifier and left paren.
addi a0, a0, 1
_advance_token();
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
li t1, ')'
beq t0, t1, .compile_call_finalize
.compile_call_loop:
_compile_expression();
# Save the argument on the stack.
la a0, asm_sw
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
# Calculate the stack offset: 20 - (4 * argument_counter)
lw t0, 8(sp)
li t1, 4
mul t0, t0, t1
li t1, 20
sub a0, t1, t0
_write_i();
la a0, asm_sp
_write_z();
_write_c('\n');
# Add one to the argument counter.
lw t0, 8(sp)
addi t0, t0, 1
sw t0, 8(sp)
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
li t1, ','
bne t0, t1, .compile_call_finalize
_advance_token(2);
goto .compile_call_loop;
.compile_call_finalize:
# Load the argument from the stack.
lw t0, 8(sp)
beqz t0, .compile_call_end
# Decrement the argument counter.
lw t0, 8(sp)
addi t0, t0, -1
sw t0, 8(sp)
la a0, asm_lw
_write_z();
_write_c('a');
lw a0, 8(sp)
_write_i();
la a0, asm_comma
_write_z();
# Calculate the stack offset: 20 - (4 * argument_counter)
lw t0, 8(sp)
li t1, 4
mul t0, t0, t1
li t1, 20
sub a0, t1, t0
_write_i();
la a0, asm_sp
_write_z();
_write_c('\n');
goto .compile_call_finalize;
.compile_call_end:
la a0, asm_call
_write_z();
lw a0, 4(sp)
lw a1, 0(sp)
_write_s();
# Skip the right paren.
_advance_token(1);
end;
proc _compile_goto();
begin
_advance_token(5);
_read_token();
sw a0, 0(sp)
la a0, asm_j
_write_z();
_write_token(v00);
_advance_token();
end;
proc _compile_statement();
begin
# This is a call if the statement starts with an underscore.
la t0, source_code_position
lw t0, (t0)
# First character after alignment tab.
addi t0, t0, 1
lb t0, (t0)
li t1, '_'
beq t0, t1, .compile_statement_call
li t1, 'g'
beq t0, t1, .compile_statement_goto
_compile_line();
goto .compile_statement_end;
.compile_statement_call:
_advance_token(1);
_compile_call();
goto .compile_statement_semicolon;
.compile_statement_goto:
_advance_token(1);
_compile_goto();
goto .compile_statement_semicolon;
.compile_statement_semicolon:
_advance_token(2);
_write_c('\n');
.compile_statement_end:
end;
proc _compile_procedure_body();
begin
.compile_procedure_body_loop:
la a0, source_code_position
lw a0, (a0)
la a1, keyword_end
li a2, 3 # "end" length.
_memcmp();
beqz a0, .compile_procedure_body_epilogue
_compile_statement();
goto .compile_procedure_body_loop;
.compile_procedure_body_epilogue:
end;
proc _compile_procedure();
begin
# Skip "proc ".
_advance_token(5);
_read_token();
sw a0, 0(sp) # Save the procedure name length.
# Write .type _procedure_name, @function.
la a0, asm_type_directive
_write_z();
_write_token(v00);
la a0, asm_type_function
_write_z();
# Write procedure label, _procedure_name:
_write_token(v00);
la a0, asm_colon
_write_z();
# Skip the function name and trailing parens, semicolon, "begin" and newline.
lw a0, 0(sp)
addi a0, a0, 10
_advance_token();
la a0, asm_prologue
_write_z();
_compile_procedure_body();
# Write the epilogue.
la a0, asm_epilogue
_write_z();
# Skip the "end" keyword, semicolon and newline.
_advance_token(5);
end;
proc _compile_type();
begin
# Print and skip the ".type" (5 characters) directive and a space after it.
_write_token(6);
_advance_token();
# Read and print the symbol name.
_read_token();
# Print and skip the symbol name, comma, space and @.
addi a0, a0, 3
_write_token();
_advance_token();
# Read the symbol type.
_read_token();
la t0, source_code_position
lw t0, (t0)
sw t0, 12(sp)
# Print the symbol type and newline.
addi a0, a0, 1
_write_token();
_advance_token();
# Write the object definition itself.
_compile_line();
.compile_type_end:
end;
proc _skip_newlines();
begin
# Skip newlines.
la t0, source_code_position
lw t1, (t0)
.skip_newlines_loop:
lb t2, (t1)
li t3, '\n'
bne t2, t3, .skip_newlines_end
beqz t2, .skip_newlines_end
addi t1, t1, 1
sw t1, (t0)
goto .skip_newlines_loop;
.skip_newlines_end:
end;
# Process the source code and print the generated code.
proc _compile();
begin
.compile_loop:
_skip_newlines();
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
beqz t0, .compile_end
li t1, '#'
beq t0, t1, .compile_comment
la a0, source_code_position
lw a0, (a0)
la a1, keyword_section
li a2, 8 # ".section" length.
_memcmp();
beqz a0, .compile_section
la a0, source_code_position
lw a0, (a0)
la a1, keyword_type
li a2, 5 # ".type" length.
_memcmp();
beqz a0, .compile_type
la a0, source_code_position
lw a0, (a0)
la a1, keyword_proc
li a2, 5 # "proc " length. Space is needed to distinguish from "procedure".
_memcmp();
beqz a0, .compile_procedure
la a0, source_code_position
lw a0, (a0)
la a1, keyword_global
li a2, 6 # ".globl" length.
_memcmp();
beqz a0, .compile_global
# Not a known token, exit.
goto .compile_end;
.compile_section:
_compile_section();
goto .compile_loop;
.compile_type:
_compile_type();
goto .compile_loop;
.compile_global:
_compile_line();
goto .compile_loop;
.compile_comment:
_skip_comment();
goto .compile_loop;
.compile_procedure:
_compile_procedure();
goto .compile_loop;
.compile_end:
end;
# Terminates the program. a0 contains the return code.
#
# Parameters:
# a0 - Status code.
proc _exit();
begin
li a7, 93 # SYS_EXIT
ecall
end;
# Entry point.
.globl _start
proc _start();
begin
# Read the source from the standard input.
la a0, source_code
li a1, 81920 # Buffer size.
_read_file();
_compile();
_exit(0);
end;

969
boot/stage4.elna Normal file
View File

@@ -0,0 +1,969 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
# Stage3 compiler.
#
# - Procedures without none or one argument.
# - Goto statements.
# - Character and integer literals.
# - Passing local variables to procedures.
# - Local variables should have the format: v00,
# where 00 is its offset from the sp register.
.section .rodata
.type keyword_section, @object
keyword_section: .ascii ".section"
.type keyword_type, @object
keyword_type: .ascii ".type"
.type keyword_ret, @object
keyword_ret: .ascii "ret"
.type keyword_global, @object
keyword_global: .ascii ".globl"
.type keyword_proc, @object
keyword_proc: .ascii "proc "
.type keyword_end, @object
keyword_end: .ascii "end"
.type keyword_begin, @object
keyword_begin: .ascii "begin"
.type keyword_var, @object
keyword_var: .ascii "var"
.type asm_prologue, @object
asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n"
.type asm_epilogue, @object
asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n"
.type asm_type_directive, @object
asm_type_directive: .string ".type "
.type asm_type_function, @object
asm_type_function: .string ", @function\n"
.type asm_colon, @object
asm_colon: .string ":\n"
.type asm_call, @object
asm_call: .string "\tcall "
.type asm_j, @object
asm_j: .string "\tj "
.type asm_li, @object
asm_li: .string "\tli "
.type asm_lw, @object
asm_lw: .string "\tlw "
.type asm_sw, @object
asm_sw: .string "\tsw "
.type asm_mv, @object
asm_mv: .string "mv "
.type asm_t0, @object
asm_t0: .string "t0"
.type asm_a0, @object
asm_a0: .string "a0"
.type asm_comma, @object
asm_comma: .string ", "
.type asm_sp, @object
asm_sp: .string "(sp)"
.section .bss
# When modifiying also change the read size in the entry point procedure.
.type source_code, @object
source_code: .zero 81920
.section .data
.type source_code_position, @object
source_code_position: .word source_code
.section .text
# Reads standard input into a buffer.
# a0 - Buffer pointer.
# a1 - Buffer size.
#
# Returns the amount of bytes written in a0.
proc _read_file();
begin
mv a2, a1
mv a1, a0
# STDIN.
li a0, 0
li a7, 63 # SYS_READ.
ecall
end;
# Writes to the standard output.
#
# Parameters:
# a0 - Buffer.
# a1 - Buffer length.
proc _write_s();
begin
mv a2, a1
mv a1, a0
# STDOUT.
li a0, 1
li a7, 64 # SYS_WRITE.
ecall
end;
# Writes a number to a string buffer.
#
# t0 - Local buffer.
# t1 - Constant 10.
# t2 - Current character.
# t3 - Whether the number is negative.
#
# Parameters:
# a0 - Whole number.
# a1 - Buffer pointer.
#
# Sets a0 to the length of the written number.
proc _print_i();
begin
li t1, 10
addi t0, s0, -9
li t3, 0
bgez a0, .print_i_digit10
li t3, 1
neg a0, a0
.print_i_digit10:
rem t2, a0, t1
addi t2, t2, '0'
sb t2, 0(t0)
div a0, a0, t1
addi t0, t0, -1
bne zero, a0, .print_i_digit10
beq zero, t3, .print_i_write_call
addi t2, zero, '-'
sb t2, 0(t0)
addi t0, t0, -1
.print_i_write_call:
mv a0, a1
addi a1, t0, 1
sub a2, s0, t0
addi a2, a2, -9
sw a2, 0(sp)
_memcpy();
lw a0, 0(sp)
end;
# Writes a number to the standard output.
#
# Parameters:
# a0 - Whole number.
proc _write_i();
begin
addi a1, sp, 0
_print_i();
mv a1, a0
addi a0, sp, 0
_write_s();
end;
# Writes a character from a0 into the standard output.
proc _write_c();
begin
sb a0, 0(sp)
addi a0, sp, 0
li a1, 1
_write_s();
end;
# Write null terminated string.
#
# Parameters:
# a0 - String.
proc _write_z();
begin
sw a0, 0(sp)
.write_z_loop:
# Check for 0 character.
lb a0, (a0)
beqz a0, .write_z_end
# Print a character.
lw a0, 0(sp)
lb a0, (a0)
_write_c();
# Advance the input string by one byte.
lw a0, 0(sp)
addi a0, a0, 1
sw a0, 0(sp)
goto .write_z_loop;
.write_z_end:
end;
# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
proc _is_upper();
begin
li t0, 'A' - 1
sltu t1, t0, a0 # t1 = a0 >= 'A'
sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
end;
# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
proc _is_lower();
begin
li t0, 'a' - 1
sltu t2, t0, a0 # t2 = a0 >= 'a'
sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
end;
# Detects if the passed character is a 7-bit alpha character or an underscore.
#
# Paramters:
# a0 - Tested character.
#
# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
proc _is_alpha();
begin
sw a0, 0(sp)
_is_upper();
sw a0, 4(sp)
_is_lower(v00);
lw t0, 0(sp)
xori t1, t0, '_'
seqz t1, t1
lw t0, 4(sp)
or a0, a0, t0
or a0, a0, t1
end;
# Detects whether the passed character is a digit
# (a value between 0 and 9).
#
# Parameters:
# a0 - Exemined value.
#
# Sets a0 to 1 if it is a digit, to 0 otherwise.
proc _is_digit();
begin
li t0, '0' - 1
sltu t1, t0, a0 # t1 = a0 >= '0'
sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
and a0, t1, t2
end;
proc _is_alnum();
begin
sw a0, 4(sp)
_is_alpha();
sw a0, 0(sp)
_is_digit(v04);
lw a1, 0(sp)
or a0, a0, a1
end;
# Reads the next token.
#
# Returns token length in a0.
proc _read_token();
begin
la t0, source_code_position # Token pointer.
lw t0, (t0)
sw t0, 0(sp) # Current token position.
sw zero, 4(sp) # Token length.
.read_token_loop:
lb t0, (t0) # Current character.
# First we try to read a derictive.
# A derictive can contain a dot and characters.
li t1, '.'
beq t0, t1, .read_token_next
lw a0, 0(sp)
lb a0, (a0)
_is_alnum();
bnez a0, .read_token_next
goto .read_token_end;
.read_token_next:
# Advance the source code position and token length.
lw t0, 4(sp)
addi t0, t0, 1
sw t0, 4(sp)
lw t0, 0(sp)
addi t0, t0, 1
sw t0, 0(sp)
goto .read_token_loop;
.read_token_end:
lw a0, 4(sp)
end;
# a0 - First pointer.
# a1 - Second pointer.
# a2 - The length to compare.
#
# Returns 0 in a0 if memory regions are equal.
proc _memcmp();
begin
mv t0, a0
li a0, 0
.memcmp_loop:
beqz a2, .memcmp_end
lbu t1, (t0)
lbu t2, (a1)
sub a0, t1, t2
bnez a0, .memcmp_end
addi t0, t0, 1
addi a1, a1, 1
addi a2, a2, -1
goto .memcmp_loop;
.memcmp_end:
end;
# Copies memory.
#
# Parameters:
# a0 - Destination.
# a1 - Source.
# a2 - Size.
#
# Preserves a0.
proc _memcpy();
begin
mv t0, a0
.memcpy_loop:
beqz a2, .memcpy_end
lbu t1, (a1)
sb t1, (a0)
addi a0, a0, 1
addi a1, a1, 1
addi a2, a2, -1
goto .memcpy_loop
.memcpy_end:
mv a0, t0
end;
# Advances the token stream by a0 bytes.
proc _advance_token();
begin
la t0, source_code_position
lw t1, (t0)
add t1, t1, a0
sw t1, (t0)
end;
# Prints the current token.
#
# Parameters:
# a0 - Token length.
#
# Returns a0 unchanged.
proc _write_token();
begin
sw a0, 0(sp)
la a0, source_code_position
lw a0, (a0)
lw a1, 0(sp)
_write_s();
lw a0, 0(sp)
end;
proc _compile_section();
begin
# Print and skip the ".section" (8 characters) directive and a space after it.
_write_token(9);
_advance_token();
# Read the section name.
_read_token();
addi a0, a0, 1
_write_token();
_advance_token();
end;
# Prints and skips a line.
proc _skip_comment();
begin
la t0, source_code_position
lw t1, (t0)
.skip_comment_loop:
# Check for newline character.
lb t2, (t1)
li t3, '\n'
beq t2, t3, .skip_comment_end
# Advance the input string by one byte.
addi t1, t1, 1
sw t1, (t0)
goto .skip_comment_loop;
.skip_comment_end:
# Skip the newline.
addi t1, t1, 1
sw t1, (t0)
end;
# Prints and skips a line.
proc _compile_line();
begin
.compile_line_loop:
la a0, source_code_position
lw a1, (a0)
lb t0, (a1)
li t1, '\n'
beq t0, t1, .compile_line_end
# Print a character.
lw a0, (a1)
_write_c();
# Advance the input string by one byte.
_advance_token(1);
goto .compile_line_loop;
.compile_line_end:
_write_c('\n');
_advance_token(1);
end;
proc _compile_integer_literal();
begin
la a0, asm_li
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
_read_token();
_write_token();
_advance_token();
_write_c('\n');
end;
proc _compile_character_literal();
begin
la a0, asm_li
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
_write_c('\'');
_advance_token(1);
la t0, source_code_position
lw t0, (t0)
lb a0, (t0)
li t1, '\\'
bne a0, t1, .compile_character_literal_end
_write_c('\\');
_advance_token(1);
.compile_character_literal_end:
la t0, source_code_position
lw t0, (t0)
lb a0, (t0)
_write_c();
_write_c('\'');
_write_c('\n');
_advance_token(2);
end;
proc _compile_variable_expression();
begin
la a0, asm_lw
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
_advance_token(1);
_read_token();
_write_token();
_advance_token();
la a0, asm_sp
_write_z();
_write_c('\n');
end;
proc _compile_expression();
begin
la t0, source_code_position
lw t0, (t0)
lb a0, (t0)
li t1, '\''
beq a0, t1, .compile_expression_character_literal
li t1, 'v'
beq a0, t1, .compile_expression_variable
_is_digit();
bnez a0, .compile_expression_integer_literal
goto .compile_expression_end;
.compile_expression_character_literal:
_compile_character_literal();
goto .compile_expression_end;
.compile_expression_integer_literal:
_compile_integer_literal();
goto .compile_expression_end;
.compile_expression_variable:
_compile_variable_expression();
goto .compile_expression_end;;
.compile_expression_end:
end;
proc _compile_call();
begin
# Stack variables:
# v0 - Procedure name length.
# v4 - Procedure name pointer.
# v8 - Argument count.
_read_token();
sw a0, 0(sp)
la t0, source_code_position
lw t0, (t0)
sw t0, 4(sp)
sw zero, 8(sp)
# Skip the identifier and left paren.
addi a0, a0, 1
_advance_token();
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
li t1, ')'
beq t0, t1, .compile_call_finalize
.compile_call_loop:
_compile_expression();
# Save the argument on the stack.
la a0, asm_sw
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
# Calculate the stack offset: 20 - (4 * argument_counter)
lw t0, 8(sp)
li t1, 4
mul t0, t0, t1
li t1, 20
sub a0, t1, t0
_write_i();
la a0, asm_sp
_write_z();
_write_c('\n');
# Add one to the argument counter.
lw t0, 8(sp)
addi t0, t0, 1
sw t0, 8(sp)
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
li t1, ','
bne t0, t1, .compile_call_finalize
_advance_token(2);
goto .compile_call_loop;
.compile_call_finalize:
# Load the argument from the stack.
lw t0, 8(sp)
beqz t0, .compile_call_end
# Decrement the argument counter.
lw t0, 8(sp)
addi t0, t0, -1
sw t0, 8(sp)
la a0, asm_lw
_write_z();
_write_c('a');
lw a0, 8(sp)
_write_i();
la a0, asm_comma
_write_z();
# Calculate the stack offset: 20 - (4 * argument_counter)
lw t0, 8(sp)
li t1, 4
mul t0, t0, t1
li t1, 20
sub a0, t1, t0
_write_i();
la a0, asm_sp
_write_z();
_write_c('\n');
goto .compile_call_finalize;
.compile_call_end:
la a0, asm_call
_write_z();
_write_s(v04, v00);
# Skip the right paren.
_advance_token(1);
end;
proc _compile_goto();
begin
_advance_token(5);
_read_token();
sw a0, 0(sp)
la a0, asm_j
_write_z();
_write_token(v00);
_advance_token();
end;
proc _compile_statement();
begin
# This is a call if the statement starts with an underscore.
la t0, source_code_position
lw t0, (t0)
# First character after alignment tab.
addi t0, t0, 1
lb t0, (t0)
li t1, '_'
beq t0, t1, .compile_statement_call
li t1, 'g'
beq t0, t1, .compile_statement_goto
_compile_line();
goto .compile_statement_end;
.compile_statement_call:
_advance_token(1);
_compile_call();
goto .compile_statement_semicolon;
.compile_statement_goto:
_advance_token(1);
_compile_goto();
goto .compile_statement_semicolon;
.compile_statement_semicolon:
_advance_token(2);
_write_c('\n');
.compile_statement_end:
end;
proc _compile_procedure_body();
begin
.compile_procedure_body_loop:
la a0, source_code_position
lw a0, (a0)
la a1, keyword_end
li a2, 3 # "end" length.
_memcmp();
beqz a0, .compile_procedure_body_epilogue
_compile_statement();
goto .compile_procedure_body_loop;
.compile_procedure_body_epilogue:
end;
proc _compile_procedure();
begin
# Skip "proc ".
_advance_token(5);
_read_token();
sw a0, 0(sp) # Save the procedure name length.
# Write .type _procedure_name, @function.
la a0, asm_type_directive
_write_z();
_write_token(v00);
la a0, asm_type_function
_write_z();
# Write procedure label, _procedure_name:
_write_token(v00);
la a0, asm_colon
_write_z();
# Skip the function name and trailing parens, semicolon, "begin" and newline.
lw a0, 0(sp)
addi a0, a0, 10
_advance_token();
la a0, asm_prologue
_write_z();
_compile_procedure_body();
# Write the epilogue.
la a0, asm_epilogue
_write_z();
# Skip the "end" keyword, semicolon and newline.
_advance_token(5);
end;
proc _compile_type();
begin
# Print and skip the ".type" (5 characters) directive and a space after it.
_write_token(6);
_advance_token();
# Read and print the symbol name.
_read_token();
# Print and skip the symbol name, comma, space and @.
addi a0, a0, 3
_write_token();
_advance_token();
# Read the symbol type.
_read_token();
la t0, source_code_position
lw t0, (t0)
sw t0, 12(sp)
# Print the symbol type and newline.
addi a0, a0, 1
_write_token();
_advance_token();
# Write the object definition itself.
_compile_line();
.compile_type_end:
end;
proc _skip_newlines();
begin
# Skip newlines.
la t0, source_code_position
lw t1, (t0)
.skip_newlines_loop:
lb t2, (t1)
li t3, '\n'
bne t2, t3, .skip_newlines_end
beqz t2, .skip_newlines_end
addi t1, t1, 1
sw t1, (t0)
goto .skip_newlines_loop;
.skip_newlines_end:
end;
# Process the source code and print the generated code.
proc _compile();
begin
.compile_loop:
_skip_newlines();
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
beqz t0, .compile_end
li t1, '#'
beq t0, t1, .compile_comment
la a0, source_code_position
lw a0, (a0)
la a1, keyword_section
li a2, 8 # ".section" length.
_memcmp();
beqz a0, .compile_section
la a0, source_code_position
lw a0, (a0)
la a1, keyword_type
li a2, 5 # ".type" length.
_memcmp();
beqz a0, .compile_type
la a0, source_code_position
lw a0, (a0)
la a1, keyword_proc
li a2, 5 # "proc " length. Space is needed to distinguish from "procedure".
_memcmp();
beqz a0, .compile_procedure
la a0, source_code_position
lw a0, (a0)
la a1, keyword_global
li a2, 6 # ".globl" length.
_memcmp();
beqz a0, .compile_global
# Not a known token, exit.
goto .compile_end;
.compile_section:
_compile_section();
goto .compile_loop;
.compile_type:
_compile_type();
goto .compile_loop;
.compile_global:
_compile_line();
goto .compile_loop;
.compile_comment:
_skip_comment();
goto .compile_loop;
.compile_procedure:
_compile_procedure();
goto .compile_loop;
.compile_end:
end;
# Terminates the program. a0 contains the return code.
#
# Parameters:
# a0 - Status code.
proc _exit();
begin
li a7, 93 # SYS_EXIT
ecall
end;
# Entry point.
.globl _start
proc _start();
begin
# Read the source from the standard input.
la a0, source_code
li a1, 81920 # Buffer size.
_read_file();
_compile();
_exit(0);
end;

969
boot/stage5.elna Normal file
View File

@@ -0,0 +1,969 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
# Stage3 compiler.
#
# - Procedures without none or one argument.
# - Goto statements.
# - Character and integer literals.
# - Passing local variables to procedures.
# - Local variables should have the format: v00,
# where 00 is its offset from the sp register.
.section .rodata
.type keyword_section, @object
keyword_section: .ascii ".section"
.type keyword_type, @object
keyword_type: .ascii ".type"
.type keyword_ret, @object
keyword_ret: .ascii "ret"
.type keyword_global, @object
keyword_global: .ascii ".globl"
.type keyword_proc, @object
keyword_proc: .ascii "proc "
.type keyword_end, @object
keyword_end: .ascii "end"
.type keyword_begin, @object
keyword_begin: .ascii "begin"
.type keyword_var, @object
keyword_var: .ascii "var"
.type asm_prologue, @object
asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n"
.type asm_epilogue, @object
asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n"
.type asm_type_directive, @object
asm_type_directive: .string ".type "
.type asm_type_function, @object
asm_type_function: .string ", @function\n"
.type asm_colon, @object
asm_colon: .string ":\n"
.type asm_call, @object
asm_call: .string "\tcall "
.type asm_j, @object
asm_j: .string "\tj "
.type asm_li, @object
asm_li: .string "\tli "
.type asm_lw, @object
asm_lw: .string "\tlw "
.type asm_sw, @object
asm_sw: .string "\tsw "
.type asm_mv, @object
asm_mv: .string "mv "
.type asm_t0, @object
asm_t0: .string "t0"
.type asm_a0, @object
asm_a0: .string "a0"
.type asm_comma, @object
asm_comma: .string ", "
.type asm_sp, @object
asm_sp: .string "(sp)"
.section .bss
# When modifiying also change the read size in the entry point procedure.
.type source_code, @object
source_code: .zero 81920
.section .data
.type source_code_position, @object
source_code_position: .word source_code
.section .text
# Reads standard input into a buffer.
# a0 - Buffer pointer.
# a1 - Buffer size.
#
# Returns the amount of bytes written in a0.
proc _read_file();
begin
mv a2, a1
mv a1, a0
# STDIN.
li a0, 0
li a7, 63 # SYS_READ.
ecall
end;
# Writes to the standard output.
#
# Parameters:
# a0 - Buffer.
# a1 - Buffer length.
proc _write_s();
begin
mv a2, a1
mv a1, a0
# STDOUT.
li a0, 1
li a7, 64 # SYS_WRITE.
ecall
end;
# Writes a number to a string buffer.
#
# t0 - Local buffer.
# t1 - Constant 10.
# t2 - Current character.
# t3 - Whether the number is negative.
#
# Parameters:
# a0 - Whole number.
# a1 - Buffer pointer.
#
# Sets a0 to the length of the written number.
proc _print_i();
begin
li t1, 10
addi t0, s0, -9
li t3, 0
bgez a0, .print_i_digit10
li t3, 1
neg a0, a0
.print_i_digit10:
rem t2, a0, t1
addi t2, t2, '0'
sb t2, 0(t0)
div a0, a0, t1
addi t0, t0, -1
bne zero, a0, .print_i_digit10
beq zero, t3, .print_i_write_call
addi t2, zero, '-'
sb t2, 0(t0)
addi t0, t0, -1
.print_i_write_call:
mv a0, a1
addi a1, t0, 1
sub a2, s0, t0
addi a2, a2, -9
sw a2, 0(sp)
_memcpy();
lw a0, 0(sp)
end;
# Writes a number to the standard output.
#
# Parameters:
# a0 - Whole number.
proc _write_i();
begin
addi a1, sp, 0
_print_i();
mv a1, a0
addi a0, sp, 0
_write_s();
end;
# Writes a character from a0 into the standard output.
proc _write_c();
begin
sb a0, 0(sp)
addi a0, sp, 0
li a1, 1
_write_s();
end;
# Write null terminated string.
#
# Parameters:
# a0 - String.
proc _write_z();
begin
sw a0, 0(sp)
.write_z_loop:
# Check for 0 character.
lb a0, (a0)
beqz a0, .write_z_end
# Print a character.
lw a0, 0(sp)
lb a0, (a0)
_write_c();
# Advance the input string by one byte.
lw a0, 0(sp)
addi a0, a0, 1
sw a0, 0(sp)
goto .write_z_loop;
.write_z_end:
end;
# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
proc _is_upper();
begin
li t0, 'A' - 1
sltu t1, t0, a0 # t1 = a0 >= 'A'
sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
end;
# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
proc _is_lower();
begin
li t0, 'a' - 1
sltu t2, t0, a0 # t2 = a0 >= 'a'
sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
end;
# Detects if the passed character is a 7-bit alpha character or an underscore.
#
# Paramters:
# a0 - Tested character.
#
# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
proc _is_alpha();
begin
sw a0, 0(sp)
_is_upper();
sw a0, 4(sp)
_is_lower(v00);
lw t0, 0(sp)
xori t1, t0, '_'
seqz t1, t1
lw t0, 4(sp)
or a0, a0, t0
or a0, a0, t1
end;
# Detects whether the passed character is a digit
# (a value between 0 and 9).
#
# Parameters:
# a0 - Exemined value.
#
# Sets a0 to 1 if it is a digit, to 0 otherwise.
proc _is_digit();
begin
li t0, '0' - 1
sltu t1, t0, a0 # t1 = a0 >= '0'
sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
and a0, t1, t2
end;
proc _is_alnum();
begin
sw a0, 4(sp)
_is_alpha();
sw a0, 0(sp)
_is_digit(v04);
lw a1, 0(sp)
or a0, a0, a1
end;
# Reads the next token.
#
# Returns token length in a0.
proc _read_token();
begin
la t0, source_code_position # Token pointer.
lw t0, (t0)
sw t0, 0(sp) # Current token position.
sw zero, 4(sp) # Token length.
.read_token_loop:
lb t0, (t0) # Current character.
# First we try to read a derictive.
# A derictive can contain a dot and characters.
li t1, '.'
beq t0, t1, .read_token_next
lw a0, 0(sp)
lb a0, (a0)
_is_alnum();
bnez a0, .read_token_next
goto .read_token_end;
.read_token_next:
# Advance the source code position and token length.
lw t0, 4(sp)
addi t0, t0, 1
sw t0, 4(sp)
lw t0, 0(sp)
addi t0, t0, 1
sw t0, 0(sp)
goto .read_token_loop;
.read_token_end:
lw a0, 4(sp)
end;
# a0 - First pointer.
# a1 - Second pointer.
# a2 - The length to compare.
#
# Returns 0 in a0 if memory regions are equal.
proc _memcmp();
begin
mv t0, a0
li a0, 0
.memcmp_loop:
beqz a2, .memcmp_end
lbu t1, (t0)
lbu t2, (a1)
sub a0, t1, t2
bnez a0, .memcmp_end
addi t0, t0, 1
addi a1, a1, 1
addi a2, a2, -1
goto .memcmp_loop;
.memcmp_end:
end;
# Copies memory.
#
# Parameters:
# a0 - Destination.
# a1 - Source.
# a2 - Size.
#
# Preserves a0.
proc _memcpy();
begin
mv t0, a0
.memcpy_loop:
beqz a2, .memcpy_end
lbu t1, (a1)
sb t1, (a0)
addi a0, a0, 1
addi a1, a1, 1
addi a2, a2, -1
goto .memcpy_loop
.memcpy_end:
mv a0, t0
end;
# Advances the token stream by a0 bytes.
proc _advance_token();
begin
la t0, source_code_position
lw t1, (t0)
add t1, t1, a0
sw t1, (t0)
end;
# Prints the current token.
#
# Parameters:
# a0 - Token length.
#
# Returns a0 unchanged.
proc _write_token();
begin
sw a0, 0(sp)
la a0, source_code_position
lw a0, (a0)
lw a1, 0(sp)
_write_s();
lw a0, 0(sp)
end;
proc _compile_section();
begin
# Print and skip the ".section" (8 characters) directive and a space after it.
_write_token(9);
_advance_token();
# Read the section name.
_read_token();
addi a0, a0, 1
_write_token();
_advance_token();
end;
# Prints and skips a line.
proc _skip_comment();
begin
la t0, source_code_position
lw t1, (t0)
.skip_comment_loop:
# Check for newline character.
lb t2, (t1)
li t3, '\n'
beq t2, t3, .skip_comment_end
# Advance the input string by one byte.
addi t1, t1, 1
sw t1, (t0)
goto .skip_comment_loop;
.skip_comment_end:
# Skip the newline.
addi t1, t1, 1
sw t1, (t0)
end;
# Prints and skips a line.
proc _compile_line();
begin
.compile_line_loop:
la a0, source_code_position
lw a1, (a0)
lb t0, (a1)
li t1, '\n'
beq t0, t1, .compile_line_end
# Print a character.
lw a0, (a1)
_write_c();
# Advance the input string by one byte.
_advance_token(1);
goto .compile_line_loop;
.compile_line_end:
_write_c('\n');
_advance_token(1);
end;
proc _compile_integer_literal();
begin
la a0, asm_li
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
_read_token();
_write_token();
_advance_token();
_write_c('\n');
end;
proc _compile_character_literal();
begin
la a0, asm_li
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
_write_c('\'');
_advance_token(1);
la t0, source_code_position
lw t0, (t0)
lb a0, (t0)
li t1, '\\'
bne a0, t1, .compile_character_literal_end
_write_c('\\');
_advance_token(1);
.compile_character_literal_end:
la t0, source_code_position
lw t0, (t0)
lb a0, (t0)
_write_c();
_write_c('\'');
_write_c('\n');
_advance_token(2);
end;
proc _compile_variable_expression();
begin
la a0, asm_lw
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
_advance_token(1);
_read_token();
_write_token();
_advance_token();
la a0, asm_sp
_write_z();
_write_c('\n');
end;
proc _compile_expression();
begin
la t0, source_code_position
lw t0, (t0)
lb a0, (t0)
li t1, '\''
beq a0, t1, .compile_expression_character_literal
li t1, 'v'
beq a0, t1, .compile_expression_variable
_is_digit();
bnez a0, .compile_expression_integer_literal
goto .compile_expression_end;
.compile_expression_character_literal:
_compile_character_literal();
goto .compile_expression_end;
.compile_expression_integer_literal:
_compile_integer_literal();
goto .compile_expression_end;
.compile_expression_variable:
_compile_variable_expression();
goto .compile_expression_end;;
.compile_expression_end:
end;
proc _compile_call();
begin
# Stack variables:
# v0 - Procedure name length.
# v4 - Procedure name pointer.
# v8 - Argument count.
_read_token();
sw a0, 0(sp)
la t0, source_code_position
lw t0, (t0)
sw t0, 4(sp)
sw zero, 8(sp)
# Skip the identifier and left paren.
addi a0, a0, 1
_advance_token();
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
li t1, ')'
beq t0, t1, .compile_call_finalize
.compile_call_loop:
_compile_expression();
# Save the argument on the stack.
la a0, asm_sw
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
# Calculate the stack offset: 20 - (4 * argument_counter)
lw t0, 8(sp)
li t1, 4
mul t0, t0, t1
li t1, 20
sub a0, t1, t0
_write_i();
la a0, asm_sp
_write_z();
_write_c('\n');
# Add one to the argument counter.
lw t0, 8(sp)
addi t0, t0, 1
sw t0, 8(sp)
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
li t1, ','
bne t0, t1, .compile_call_finalize
_advance_token(2);
goto .compile_call_loop;
.compile_call_finalize:
# Load the argument from the stack.
lw t0, 8(sp)
beqz t0, .compile_call_end
# Decrement the argument counter.
lw t0, 8(sp)
addi t0, t0, -1
sw t0, 8(sp)
la a0, asm_lw
_write_z();
_write_c('a');
lw a0, 8(sp)
_write_i();
la a0, asm_comma
_write_z();
# Calculate the stack offset: 20 - (4 * argument_counter)
lw t0, 8(sp)
li t1, 4
mul t0, t0, t1
li t1, 20
sub a0, t1, t0
_write_i();
la a0, asm_sp
_write_z();
_write_c('\n');
goto .compile_call_finalize;
.compile_call_end:
la a0, asm_call
_write_z();
_write_s(v04, v00);
# Skip the right paren.
_advance_token(1);
end;
proc _compile_goto();
begin
_advance_token(5);
_read_token();
sw a0, 0(sp)
la a0, asm_j
_write_z();
_write_token(v00);
_advance_token();
end;
proc _compile_statement();
begin
# This is a call if the statement starts with an underscore.
la t0, source_code_position
lw t0, (t0)
# First character after alignment tab.
addi t0, t0, 1
lb t0, (t0)
li t1, '_'
beq t0, t1, .compile_statement_call
li t1, 'g'
beq t0, t1, .compile_statement_goto
_compile_line();
goto .compile_statement_end;
.compile_statement_call:
_advance_token(1);
_compile_call();
goto .compile_statement_semicolon;
.compile_statement_goto:
_advance_token(1);
_compile_goto();
goto .compile_statement_semicolon;
.compile_statement_semicolon:
_advance_token(2);
_write_c('\n');
.compile_statement_end:
end;
proc _compile_procedure_body();
begin
.compile_procedure_body_loop:
la a0, source_code_position
lw a0, (a0)
la a1, keyword_end
li a2, 3 # "end" length.
_memcmp();
beqz a0, .compile_procedure_body_epilogue
_compile_statement();
goto .compile_procedure_body_loop;
.compile_procedure_body_epilogue:
end;
proc _compile_procedure();
begin
# Skip "proc ".
_advance_token(5);
_read_token();
sw a0, 0(sp) # Save the procedure name length.
# Write .type _procedure_name, @function.
la a0, asm_type_directive
_write_z();
_write_token(v00);
la a0, asm_type_function
_write_z();
# Write procedure label, _procedure_name:
_write_token(v00);
la a0, asm_colon
_write_z();
# Skip the function name and trailing parens, semicolon, "begin" and newline.
lw a0, 0(sp)
addi a0, a0, 10
_advance_token();
la a0, asm_prologue
_write_z();
_compile_procedure_body();
# Write the epilogue.
la a0, asm_epilogue
_write_z();
# Skip the "end" keyword, semicolon and newline.
_advance_token(5);
end;
proc _compile_type();
begin
# Print and skip the ".type" (5 characters) directive and a space after it.
_write_token(6);
_advance_token();
# Read and print the symbol name.
_read_token();
# Print and skip the symbol name, comma, space and @.
addi a0, a0, 3
_write_token();
_advance_token();
# Read the symbol type.
_read_token();
la t0, source_code_position
lw t0, (t0)
sw t0, 12(sp)
# Print the symbol type and newline.
addi a0, a0, 1
_write_token();
_advance_token();
# Write the object definition itself.
_compile_line();
.compile_type_end:
end;
proc _skip_newlines();
begin
# Skip newlines.
la t0, source_code_position
lw t1, (t0)
.skip_newlines_loop:
lb t2, (t1)
li t3, '\n'
bne t2, t3, .skip_newlines_end
beqz t2, .skip_newlines_end
addi t1, t1, 1
sw t1, (t0)
goto .skip_newlines_loop;
.skip_newlines_end:
end;
# Process the source code and print the generated code.
proc _compile();
begin
.compile_loop:
_skip_newlines();
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
beqz t0, .compile_end
li t1, '#'
beq t0, t1, .compile_comment
la a0, source_code_position
lw a0, (a0)
la a1, keyword_section
li a2, 8 # ".section" length.
_memcmp();
beqz a0, .compile_section
la a0, source_code_position
lw a0, (a0)
la a1, keyword_type
li a2, 5 # ".type" length.
_memcmp();
beqz a0, .compile_type
la a0, source_code_position
lw a0, (a0)
la a1, keyword_proc
li a2, 5 # "proc " length. Space is needed to distinguish from "procedure".
_memcmp();
beqz a0, .compile_procedure
la a0, source_code_position
lw a0, (a0)
la a1, keyword_global
li a2, 6 # ".globl" length.
_memcmp();
beqz a0, .compile_global
# Not a known token, exit.
goto .compile_end;
.compile_section:
_compile_section();
goto .compile_loop;
.compile_type:
_compile_type();
goto .compile_loop;
.compile_global:
_compile_line();
goto .compile_loop;
.compile_comment:
_skip_comment();
goto .compile_loop;
.compile_procedure:
_compile_procedure();
goto .compile_loop;
.compile_end:
end;
# Terminates the program. a0 contains the return code.
#
# Parameters:
# a0 - Status code.
proc _exit();
begin
li a7, 93 # SYS_EXIT
ecall
end;
# Entry point.
.globl _start
proc _start();
begin
# Read the source from the standard input.
la a0, source_code
li a1, 81920 # Buffer size.
_read_file();
_compile();
_exit(0);
end;

View File

@@ -1,14 +0,0 @@
program
proc main(x: Word, y: Word)
begin
_write_s(4, @x);
_write_s(4, @y);
y := 0x0a2c3063;
_write_s(4, @y)
end
begin
main(0x0a2c3061, 0x0a2c3062)
end.

View File

@@ -1,616 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global lex_next, classification, transitions, keywords, byte_keywords
.include "boot/definitions.inc"
.section .rodata
#
# Classification table assigns each possible character to a group (class). All
# characters of the same group a handled equivalently.
#
# Classification:
#
.equ CLASS_INVALID, 0x00
.equ CLASS_DIGIT, 0x01
.equ CLASS_CHARACTER, 0x02
.equ CLASS_SPACE, 0x03
.equ CLASS_COLON, 0x04
.equ CLASS_EQUALS, 0x05
.equ CLASS_LEFT_PAREN, 0x06
.equ CLASS_RIGHT_PAREN, 0x07
.equ CLASS_ASTERISK, 0x08
.equ CLASS_UNDERSCORE, 0x09
.equ CLASS_SINGLE, 0x0a
.equ CLASS_HEX, 0x0b
.equ CLASS_ZERO, 0x0c
.equ CLASS_X, 0x0d
.equ CLASS_EOF, 0x0e
.equ CLASS_DOT, 0x0f
.equ CLASS_MINUS, 0x10
.equ CLASS_QUOTE, 0x11
.equ CLASS_GREATER, 0x12
.equ CLASS_LESS, 0x13
.equ CLASS_COUNT, 20
.type classification, @object
classification:
.byte CLASS_EOF # 00 NUL
.byte CLASS_INVALID # 01 SOH
.byte CLASS_INVALID # 02 STX
.byte CLASS_INVALID # 03 ETX
.byte CLASS_INVALID # 04 EOT
.byte CLASS_INVALID # 05 ENQ
.byte CLASS_INVALID # 06 ACK
.byte CLASS_INVALID # 07 BEL
.byte CLASS_INVALID # 08 BS
.byte CLASS_SPACE # 09 HT
.byte CLASS_SPACE # 0A LF
.byte CLASS_INVALID # 0B VT
.byte CLASS_INVALID # 0C FF
.byte CLASS_SPACE # 0D CR
.byte CLASS_INVALID # 0E SO
.byte CLASS_INVALID # 0F SI
.byte CLASS_INVALID # 10 DLE
.byte CLASS_INVALID # 11 DC1
.byte CLASS_INVALID # 12 DC2
.byte CLASS_INVALID # 13 DC3
.byte CLASS_INVALID # 14 DC4
.byte CLASS_INVALID # 15 NAK
.byte CLASS_INVALID # 16 SYN
.byte CLASS_INVALID # 17 ETB
.byte CLASS_INVALID # 18 CAN
.byte CLASS_INVALID # 19 EM
.byte CLASS_INVALID # 1A SUB
.byte CLASS_INVALID # 1B ESC
.byte CLASS_INVALID # 1C FS
.byte CLASS_INVALID # 1D GS
.byte CLASS_INVALID # 1E RS
.byte CLASS_INVALID # 1F US
.byte CLASS_SPACE # 20 Space
.byte CLASS_SINGLE # 21 !
.byte CLASS_QUOTE # 22 "
.byte 0x00 # 23 #
.byte 0x00 # 24 $
.byte CLASS_SINGLE # 25 %
.byte CLASS_SINGLE # 26 &
.byte CLASS_QUOTE # 27 '
.byte CLASS_LEFT_PAREN # 28 (
.byte CLASS_RIGHT_PAREN # 29 )
.byte CLASS_ASTERISK # 2A *
.byte CLASS_SINGLE # 2B +
.byte CLASS_SINGLE # 2C ,
.byte CLASS_MINUS # 2D -
.byte CLASS_DOT # 2E .
.byte CLASS_SINGLE # 2F /
.byte CLASS_ZERO # 30 0
.byte CLASS_DIGIT # 31 1
.byte CLASS_DIGIT # 32 2
.byte CLASS_DIGIT # 33 3
.byte CLASS_DIGIT # 34 4
.byte CLASS_DIGIT # 35 5
.byte CLASS_DIGIT # 36 6
.byte CLASS_DIGIT # 37 7
.byte CLASS_DIGIT # 38 8
.byte CLASS_DIGIT # 39 9
.byte CLASS_COLON # 3A :
.byte CLASS_SINGLE # 3B ;
.byte CLASS_LESS # 3C <
.byte CLASS_EQUALS # 3D =
.byte CLASS_GREATER # 3E >
.byte 0x00 # 3F ?
.byte CLASS_SINGLE # 40 @
.byte CLASS_CHARACTER # 41 A
.byte CLASS_CHARACTER # 42 B
.byte CLASS_CHARACTER # 43 C
.byte CLASS_CHARACTER # 44 D
.byte CLASS_CHARACTER # 45 E
.byte CLASS_CHARACTER # 46 F
.byte CLASS_CHARACTER # 47 G
.byte CLASS_CHARACTER # 48 H
.byte CLASS_CHARACTER # 49 I
.byte CLASS_CHARACTER # 4A J
.byte CLASS_CHARACTER # 4B K
.byte CLASS_CHARACTER # 4C L
.byte CLASS_CHARACTER # 4D M
.byte CLASS_CHARACTER # 4E N
.byte CLASS_CHARACTER # 4F O
.byte CLASS_CHARACTER # 50 P
.byte CLASS_CHARACTER # 51 Q
.byte CLASS_CHARACTER # 52 R
.byte CLASS_CHARACTER # 53 S
.byte CLASS_CHARACTER # 54 T
.byte CLASS_CHARACTER # 55 U
.byte CLASS_CHARACTER # 56 V
.byte CLASS_CHARACTER # 57 W
.byte CLASS_CHARACTER # 58 X
.byte CLASS_CHARACTER # 59 Y
.byte CLASS_CHARACTER # 5A Z
.byte CLASS_SINGLE # 5B [
.byte 0x00 # 5C \
.byte CLASS_SINGLE # 5D ]
.byte CLASS_SINGLE # 5E ^
.byte CLASS_UNDERSCORE # 5F _
.byte 0x00 # 60 `
.byte CLASS_HEX # 61 a
.byte CLASS_HEX # 62 b
.byte CLASS_HEX # 63 c
.byte CLASS_HEX # 64 d
.byte CLASS_HEX # 65 e
.byte CLASS_HEX # 66 f
.byte CLASS_CHARACTER # 67 g
.byte CLASS_CHARACTER # 68 h
.byte CLASS_CHARACTER # 69 i
.byte CLASS_CHARACTER # 6A j
.byte CLASS_CHARACTER # 6B k
.byte CLASS_CHARACTER # 6C l
.byte CLASS_CHARACTER # 6D m
.byte CLASS_CHARACTER # 6E n
.byte CLASS_CHARACTER # 6F o
.byte CLASS_CHARACTER # 70 p
.byte CLASS_CHARACTER # 71 q
.byte CLASS_CHARACTER # 72 r
.byte CLASS_CHARACTER # 73 s
.byte CLASS_CHARACTER # 74 t
.byte CLASS_CHARACTER # 75 u
.byte CLASS_CHARACTER # 76 v
.byte CLASS_CHARACTER # 77 w
.byte CLASS_X # 78 x
.byte CLASS_CHARACTER # 79 y
.byte CLASS_CHARACTER # 7A z
.byte 0x00 # 7B {
.byte CLASS_SINGLE # 7C |
.byte 0x00 # 7D }
.byte CLASS_SINGLE # 7E ~
.byte CLASS_INVALID # 7F DEL
#
# Textual keywords in the language.
#
.equ KEYWORDS_COUNT, TOKEN_IDENTIFIER - 1
.type keywords, @object
keywords:
.word 7
.ascii "program"
.word 6
.ascii "import"
.word 5
.ascii "const"
.word 3
.ascii "var"
.word 2
.ascii "if"
.word 4
.ascii "then"
.word 5
.ascii "elsif"
.word 4
.ascii "else"
.word 5
.ascii "while"
.word 2
.ascii "do"
.word 4
.ascii "proc"
.word 5
.ascii "begin"
.word 3
.ascii "end"
.word 4
.ascii "type"
.word 6
.ascii "record"
.word 5
.ascii "union"
.word 4
.ascii "true"
.word 5
.ascii "false"
.word 3
.ascii "nil"
.word 3
.ascii "xor"
.word 2
.ascii "or"
.word 6
.ascii "return"
.word 4
.ascii "cast"
.word 4
.ascii "goto"
.word 4
.ascii "case"
.word 2
.ascii "of"
.type byte_keywords, @object
byte_keywords: .ascii "&.,:;()[]^=+-*@"
.equ BYTE_KEYWORDS_SIZE, . - byte_keywords
.section .data
# The transition table describes transitions from one state to another, given
# a symbol (character class).
#
# The table has m rows and n columns, where m is the amount of states and n is
# the amount of classes. So given the current state and a classified character
# the table can be used to look up the next state.
#
# Each cell is a word long.
# - The least significant byte of the word is a row number (beginning with 0).
# It specifies the target state. "ff" means that this is an end state and no
# transition is possible.
# - The next byte is the action that should be performed when transitioning.
# For the meaning of actions see labels in the lex_next function, which
# handles each action.
#
.type transitions, @object
transitions:
# Invalid Digit Alpha Space : = ( )
# * _ Single Hex 0 x NUL .
# - " or ' > <
.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff
.word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x06ff
.word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x01 Colon
.word 0x05ff, 0x0102, 0x0102, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff
.word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff
.word 0x05ff, 0x05ff, 0x05ff, 0x05ff # 0x02 Identifier
.word 0x08ff, 0x0103, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
.word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x0103, 0x00ff, 0x08ff, 0x08ff
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x03 Decimal
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x04ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x04ff, 0x02ff # 0x04 Greater
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x0109, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff # 0x06 Left paren
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x04ff # 0x07 Less
.word 0x08ff, 0x0108, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
.word 0x08ff, 0x00ff, 0x08ff, 0x0108, 0x0108, 0x00ff, 0x08ff, 0x08ff
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x08 Hexadecimal after 0x.
.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x09 Comment
.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x0a Closing comment
.word 0x00ff, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
.word 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
.word 0x010b, 0x04ff, 0x010b, 0x010b # 0x0b String
.word 0x08ff, 0x00ff, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
.word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x00ff, 0x010d, 0x08ff, 0x08ff
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x0c Leading zero
.word 0x00ff, 0x0108, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff
.word 0x00ff, 0x00ff, 0x00ff, 0x0108, 0x0108, 0x00ff, 0x00ff, 0x00ff
.word 0x00ff, 0x00ff, 0x00ff, 0x00ff # 0x0d Starting hexadecimal
.section .text
# Returns the class from the classification table for the given character.
#
# Parameters:
# a0 - Character.
#
# Sets a0 to the class number.
.type classify, @function
classify:
la t0, classification
add t0, t0, a0 # Character class pointer.
lbu a0, (t0) # Character class.
ret
# Given the current state and a character class, calculates the next state.
# Parameters:
# a0 - Current state.
# a1 - Character class.
#
# Sets a0 to the next state.
.type lookup_state, @function
lookup_state:
li t0, CLASS_COUNT
mul a0, a0, t0 # Transition row.
add a0, a0, a1 # Transition column.
li t0, 4
mul a0, a0, t0 # Multiply by the word size.
la t0, transitions
add t0, t0, a0
lw a0, (t0) # Next state.
ret
# Chains classify and lookup_state.
#
# Parameters:
# a0 - Current state.
# a1 - Character.
#
# Sets a0 to the next state based on the given character.
.type _next_state, @function
_next_state:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
sw a0, 4(sp)
mv a0, a1
call classify
mv a1, a0
lw a0, 4(sp)
call lookup_state
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Takes an identifier and checks whether it's a keyword.
#
# Parameters:
# a0 - Token length.
# a1 - Token pointer.
#
# Sets a0 to the appropriate token type.
.type classify_identifier, @function
classify_identifier:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
mv a2, a0
mv a3, a1
li a0, KEYWORDS_COUNT
la a1, keywords
call _strings_index
bnez a0, .Lclassify_identifier_end
li a0, TOKEN_IDENTIFIER
.Lclassify_identifier_end:
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Takes a symbol and determines its type.
#
# Parameters:
# a0 - Token character.
#
# Sets a0 to the appropriate token type.
.type classify_single, @function
classify_single:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
mv a1, a0
li a2, BYTE_KEYWORDS_SIZE
la a0, byte_keywords
call _memchr
la a1, byte_keywords
sub a0, a0, a1
addi a0, a0, TOKEN_IDENTIFIER + 1
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Classified a symbol containing multiple characters (probably 2).
#
# Parameters:
# a0 - Token length.
# a1 - Token pointer.
#
# Sets a0 to the appropriate token type.
.type classify_composite, @function
classify_composite:
lbu t0, 0(a1)
li t1, ':'
beq t0, t1, .Lclassify_composite_assign
j .Lclassify_composite_end
.Lclassify_composite_assign:
li a0, TOKEN_ASSIGN
j .Lclassify_composite_end
.Lclassify_composite_end:
ret
# Initializes the classification table.
#
# Paramaters:
# a0 - Source text pointer.
# a1 - A pointer for output value, the token kind. 4 Bytes.
#
# Sets a0 to the position of the next token.
.type lex_next, @function
lex_next:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
sw s1, 20(sp) # Preserve s1 used for current source text position.
mv s1, a0
sw a0, 12(sp) # Keeps a pointer to the beginning of a token.
# 4(sp) and 8(sp) are reserved for the kind and length of the token if needed.
sw s2, 16(sp) # Preserve s2 containing the current state.
li s2, 0x00 # Initial, start state.
sw a1, 0(sp)
sw zero, (a1) # Initialize.
.Llex_next_loop:
mv a0, s2
lbu a1, (s1)
call _next_state
li t0, 0xff
and s2, a0, t0 # Next state.
li t0, 0xff00
and t1, a0, t0 # Transition action.
srli t1, t1, 8
# Perform the provided action.
li t0, 0x01 # Accumulate action.
beq t1, t0, .Llex_next_accumulate
li t0, 0x02 # Print action.
beq t1, t0, .Llex_next_print
li t0, 0x03 # Skip action.
beq t1, t0, .Llex_next_skip
li t0, 0x04 # Delimited string action.
beq t1, t0, .Llex_next_comment
li t0, 0x05 # Finalize identifier.
beq t1, t0, .Llex_next_identifier
li t0, 0x06 # Single character symbol action.
beq t1, t0, .Llex_next_single
li t0, 0x07 # An action for symbols containing multiple characters.
beq t1, t0, .Llex_next_composite
li t0, 0x08 # Integer action.
beq t1, t0, .Llex_next_integer
j .Llex_next_reject
.Llex_next_reject:
addi s1, s1, 1
j .Llex_next_end
.Llex_next_accumulate:
addi s1, s1, 1
j .Llex_next_loop
.Llex_next_skip:
addi s1, s1, 1
lw t0, 12(sp)
addi t0, t0, 1
sw t0, 12(sp)
j .Llex_next_loop
.Llex_next_print:
/* DEBUG
addi a0, a0, 21
sw a0, 0(sp)
addi a0, sp, 0
li a1, 1
call _write_error */
j .Llex_next_end
.Llex_next_comment:
addi s1, s1, 1
j .Llex_next_end
.Llex_next_identifier:
# An identifier can be a textual keyword.
# Check the kind of the token and write it into the output parameter.
lw a1, 12(sp)
sub a0, s1, a1
sw a0, 8(sp)
call classify_identifier
sw a0, 4(sp)
lw a0, 0(sp)
addi a1, sp, 4
li a2, 12
call _memcpy
j .Llex_next_end
.Llex_next_single:
lw a0, 12(sp)
addi s1, a0, 1
lbu a0, (a0)
call classify_single
lw a1, 0(sp)
sw a0, (a1)
j .Llex_next_end
.Llex_next_composite:
addi s1, s1, 1
lw a1, 12(sp)
sub a0, s1, a1
call classify_composite
lw a1, 0(sp)
sw a0, (a1)
j .Llex_next_end
.Llex_next_integer:
lw t0, 0(sp)
li t1, TOKEN_INTEGER
sw t1, 0(t0)
lw t1, 12(sp)
sw t1, 8(t0)
sub t1, s1, t1
sw t1, 4(t0)
j .Llex_next_end
.Llex_next_end:
mv a0, s1 # Return the advanced text pointer.
# Restore saved registers.
lw s1, 20(sp)
lw s2, 16(sp)
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret

View File

@@ -1,61 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/. -}
# frozen_string_literal: true
CROSS_GCC = 'build/rootfs/bin/riscv32-unknown-linux-gnu-gcc'
SYSROOT = 'build/sysroot'
QEMU = 'qemu-riscv32'
def assemble_stage(output, compiler, source)
arguments = [QEMU, '-L', SYSROOT, *compiler]
puts Term::ANSIColor.green(arguments * ' ')
puts
Open3.popen2(*arguments) do |qemu_in, qemu_out|
qemu_in.write File.read(*source)
qemu_in.close
IO.copy_stream qemu_out, output
qemu_out.close
end
end
library = []
Dir.glob('boot/*.s').each do |assembly_source|
source_basename = Pathname.new(assembly_source).basename
target_object = Pathname.new('build/boot') + source_basename.sub_ext('.o')
file target_object.to_s => [assembly_source, 'build/boot'] do |t|
sh CROSS_GCC, '-c', '-o', t.name, assembly_source
end
library << assembly_source unless source_basename.to_s.start_with? 'stage'
end
desc 'Initial stage'
file 'build/boot/stage1' => ['build/boot/stage1.o', *library] do |t|
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
end
file 'build/boot/stage2a.s' => ['build/boot/stage1', 'boot/stage2.elna'] do |t|
source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
File.open t.name, 'w' do |output|
assemble_stage output, exe, source
end
end
['build/boot/stage2a', 'build/boot/stage2b'].each do |exe|
file exe => [exe.ext('.s'), *library] do |t|
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
end
end
file 'build/boot/stage2b.s' => ['build/boot/stage2a', 'boot/stage2.elna'] do |t|
source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
File.open t.name, 'w' do |output|
assemble_stage output, exe, source
end
end