Start over
This commit is contained in:
91
Rakefile
91
Rakefile
@@ -5,34 +5,99 @@
|
||||
|
||||
require 'open3'
|
||||
require 'rake/clean'
|
||||
require 'term/ansicolor'
|
||||
|
||||
CLEAN.include 'build/boot'
|
||||
CROSS_GCC = '../eugenios/build/rootfs/bin/riscv32-unknown-linux-gnu-gcc'
|
||||
SYSROOT = '../eugenios/build/sysroot'
|
||||
QEMU = 'qemu-riscv32'
|
||||
STAGES = Dir.glob('boot/stage*.elna').collect { |stage| File.basename stage, '.elna' }.sort
|
||||
|
||||
CLEAN.include 'build/boot', 'build/valid'
|
||||
|
||||
directory 'build/boot'
|
||||
directory 'build/valid'
|
||||
|
||||
task default: :boot
|
||||
|
||||
desc 'Final stage'
|
||||
task default: ['build/boot/stage2b', 'build/boot/stage2b.s', 'boot/stage2.elna'] do |t|
|
||||
exe, previous_output, source = t.prerequisites
|
||||
task boot: "build/valid/#{STAGES.last}"
|
||||
task boot: "build/valid/#{STAGES.last}.s"
|
||||
task boot: "boot/#{STAGES.last}.elna" do |t|
|
||||
groupped = t.prerequisites.group_by { |stage| File.extname stage }.transform_values(&:first)
|
||||
exe = groupped['']
|
||||
expected = groupped['.s']
|
||||
source = groupped['.elna']
|
||||
|
||||
cat_arguments = ['cat', source]
|
||||
compiler_arguments = [QEMU, '-L', SYSROOT, exe]
|
||||
diff_arguments = ['diff', '-Nur', '--text', previous_output, '-']
|
||||
diff_arguments = ['diff', '-Nur', '--text', expected, '-']
|
||||
Open3.pipeline(cat_arguments, compiler_arguments, diff_arguments)
|
||||
end
|
||||
|
||||
file 'build/boot/test.s' => ['build/boot/stage1', 'boot/test.elna'] do |t|
|
||||
source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
|
||||
desc 'Convert previous stage language into the current stage language'
|
||||
task :convert do
|
||||
File.open('boot/stage4.elna', 'w') do |current_stage|
|
||||
li_value = nil
|
||||
|
||||
File.open t.name, 'w' do |output|
|
||||
assemble_stage output, exe, source
|
||||
File.readlines('boot/stage3.elna').each do |line|
|
||||
current_stage << line
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
file 'build/boot/test' => ['build/boot/test.s', 'boot/common-boot.s'] do |t|
|
||||
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
|
||||
STAGES.each do |stage|
|
||||
previous = stage.delete_prefix('stage').to_i.pred
|
||||
|
||||
file "build/valid/#{stage}" => "build/valid/#{stage}.s" do |t|
|
||||
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
|
||||
end
|
||||
|
||||
file "build/valid/#{stage}.s" => ["build/boot/#{stage}", "boot/#{stage}.elna"] do |t|
|
||||
exe, source = t.prerequisites
|
||||
|
||||
cat_arguments = ['cat', source]
|
||||
compiler_arguments = [QEMU, '-L', SYSROOT, exe]
|
||||
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments)
|
||||
|
||||
IO.copy_stream last_stdout, t.name
|
||||
end
|
||||
|
||||
file "build/boot/#{stage}" => "build/boot/#{stage}.s" do |t|
|
||||
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
|
||||
end
|
||||
|
||||
file "build/boot/#{stage}.s" => ["build/valid/stage#{previous}", "boot/#{stage}.elna"] do |t|
|
||||
exe, source = t.prerequisites
|
||||
|
||||
cat_arguments = ['cat', source]
|
||||
compiler_arguments = [QEMU, '-L', SYSROOT, exe]
|
||||
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments)
|
||||
|
||||
IO.copy_stream last_stdout, t.name
|
||||
end
|
||||
end
|
||||
|
||||
task test: 'build/boot/test' do |t|
|
||||
sh QEMU, '-L', SYSROOT, t.prerequisites.first
|
||||
#
|
||||
# Stage 1.
|
||||
#
|
||||
|
||||
file 'build/valid/stage1' => ['build/valid', 'build/valid/stage1.s'] do |t|
|
||||
source = t.prerequisites.select { |prerequisite| prerequisite.end_with? '.s' }
|
||||
|
||||
sh CROSS_GCC, '-nostdlib', '-o', t.name, *source
|
||||
end
|
||||
|
||||
file 'build/valid/stage1.s' => ['build/boot/stage1', 'boot/stage1.s', 'build/valid'] do |t|
|
||||
source, exe, = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.s' }
|
||||
|
||||
cat_arguments = ['cat', *source]
|
||||
compiler_arguments = [QEMU, '-L', SYSROOT, *exe]
|
||||
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments)
|
||||
|
||||
IO.copy_stream last_stdout, t.name
|
||||
end
|
||||
|
||||
file 'build/boot/stage1' => ['build/boot', 'boot/stage1.s'] do |t|
|
||||
source = t.prerequisites.select { |prerequisite| prerequisite.end_with? '.s' }
|
||||
|
||||
sh CROSS_GCC, '-nostdlib', '-o', t.name, *source
|
||||
end
|
||||
|
@@ -2,17 +2,15 @@
|
||||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
# obtain one at https://mozilla.org/MPL/2.0/.
|
||||
|
||||
.global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower
|
||||
.global _write_s, _read_file, _write_error, _write_c, _write_i, _print_i
|
||||
.global _memcmp, _memchr, _memmem, _memcpy, _mmap
|
||||
.global _read_file, _write_error
|
||||
.global _memcmp, _memchr, _memmem, _mmap
|
||||
.global _current, _get, _advance, _label_counter
|
||||
.global _divide_by_zero_error, _exit, _strings_index, _string_equal
|
||||
.global _divide_by_zero_error, _strings_index, _string_equal
|
||||
|
||||
.section .rodata
|
||||
|
||||
.equ SYS_READ, 63
|
||||
.equ SYS_WRITE, 64
|
||||
.equ SYS_EXIT, 93
|
||||
.equ SYS_MMAP2, 222
|
||||
.equ STDIN, 0
|
||||
.equ STDOUT, 1
|
||||
@@ -77,128 +75,6 @@ _memcmp:
|
||||
.Lmemcmp_end:
|
||||
ret
|
||||
|
||||
# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
|
||||
.type _is_upper, @function
|
||||
_is_upper:
|
||||
li t0, 'A' - 1
|
||||
sltu t1, t0, a0 # t1 = a0 >= 'A'
|
||||
|
||||
sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
|
||||
and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
|
||||
|
||||
ret
|
||||
|
||||
# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
|
||||
.type _is_lower, @function
|
||||
_is_lower:
|
||||
li t0, 'a' - 1
|
||||
sltu t2, t0, a0 # t2 = a0 >= 'a'
|
||||
|
||||
sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
|
||||
and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
|
||||
|
||||
ret
|
||||
|
||||
# Detects if the passed character is a 7-bit alpha character or an underscore.
|
||||
# The character is passed in a0.
|
||||
# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
|
||||
.type _is_alpha, @function
|
||||
_is_alpha:
|
||||
# Prologue.
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
sw a0, 4(sp)
|
||||
|
||||
call _is_upper
|
||||
sw a0, 0(sp)
|
||||
|
||||
lw a0, 4(sp)
|
||||
call _is_lower
|
||||
|
||||
lw t0, 4(sp)
|
||||
xori t1, t0, '_'
|
||||
seqz t1, t1
|
||||
|
||||
lw t0, 0(sp)
|
||||
or a0, a0, t0
|
||||
or a0, a0, t1
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
# Detects whether the passed character is a digit
|
||||
# (a value between 0 and 9).
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Exemined value.
|
||||
#
|
||||
# Sets a0 to 1 if it is a digit, to 0 otherwise.
|
||||
.type _is_digit, @function
|
||||
_is_digit:
|
||||
li t0, '0' - 1
|
||||
sltu t1, t0, a0 # t1 = a0 >= '0'
|
||||
|
||||
sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
|
||||
|
||||
and a0, t1, t2
|
||||
|
||||
ret
|
||||
|
||||
.type _is_alnum, @function
|
||||
_is_alnum:
|
||||
# Prologue.
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
sw a0, 4(sp)
|
||||
|
||||
call _is_alpha
|
||||
sw a0, 0(sp)
|
||||
|
||||
lw a0, 4(sp)
|
||||
call _is_digit
|
||||
|
||||
lw a1, 0(sp)
|
||||
or a0, a0, a1
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
# Writes a string to the standard output.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Length of the string.
|
||||
# a1 - String pointer.
|
||||
.type _write_s, @function
|
||||
_write_s:
|
||||
# Prologue.
|
||||
addi sp, sp, -8
|
||||
sw ra, 4(sp)
|
||||
sw s0, 0(sp)
|
||||
addi s0, sp, 8
|
||||
|
||||
mv a2, a0
|
||||
li a0, STDOUT
|
||||
li a7, SYS_WRITE
|
||||
ecall
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 4(sp)
|
||||
lw s0, 0(sp)
|
||||
addi sp, sp, 8
|
||||
ret
|
||||
|
||||
# Reads standard input into a buffer.
|
||||
# a0 - Buffer pointer.
|
||||
# a1 - Buffer size.
|
||||
@@ -228,16 +104,6 @@ _read_file:
|
||||
addi sp, sp, 8
|
||||
ret
|
||||
|
||||
# Terminates the program. a0 contains the return code.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Status code.
|
||||
.type _exit, @function
|
||||
_exit:
|
||||
li a7, SYS_EXIT
|
||||
ecall
|
||||
# ret
|
||||
|
||||
.type _divide_by_zero_error, @function
|
||||
_divide_by_zero_error:
|
||||
addi a7, zero, 172 # getpid
|
||||
@@ -248,106 +114,6 @@ _divide_by_zero_error:
|
||||
ecall
|
||||
ret
|
||||
|
||||
# Writes a number to a string buffer.
|
||||
#
|
||||
# t0 - Local buffer.
|
||||
# t1 - Constant 10.
|
||||
# t2 - Current character.
|
||||
# t3 - Whether the number is negative.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Whole number.
|
||||
# a1 - Buffer pointer.
|
||||
#
|
||||
# Sets a0 to the length of the written number.
|
||||
.type _print_i, @function
|
||||
_print_i:
|
||||
addi sp, sp, -32
|
||||
sw ra, 28(sp)
|
||||
sw s0, 24(sp)
|
||||
addi s0, sp, 32
|
||||
|
||||
li t1, 10
|
||||
addi t0, s0, -9
|
||||
|
||||
li t3, 0
|
||||
bgez a0, .Lprint_i_digit10
|
||||
li t3, 1
|
||||
neg a0, a0
|
||||
|
||||
.Lprint_i_digit10:
|
||||
rem t2, a0, t1
|
||||
addi t2, t2, '0'
|
||||
sb t2, 0(t0)
|
||||
div a0, a0, t1
|
||||
addi t0, t0, -1
|
||||
bne zero, a0, .Lprint_i_digit10
|
||||
|
||||
beq zero, t3, .Lprint_i_write_call
|
||||
addi t2, zero, '-'
|
||||
sb t2, 0(t0)
|
||||
addi t0, t0, -1
|
||||
|
||||
.Lprint_i_write_call:
|
||||
mv a0, a1
|
||||
addi a1, t0, 1
|
||||
sub a2, s0, t0
|
||||
addi a2, a2, -9
|
||||
sw a2, 0(sp)
|
||||
|
||||
call _memcpy
|
||||
|
||||
lw a0, 0(sp)
|
||||
|
||||
lw ra, 28(sp)
|
||||
lw s0, 24(sp)
|
||||
addi sp, sp, 32
|
||||
ret
|
||||
|
||||
# Writes a number to the standard output.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Whole number.
|
||||
.type _write_i, @function
|
||||
_write_i:
|
||||
addi sp, sp, -32
|
||||
sw ra, 28(sp)
|
||||
sw s0, 24(sp)
|
||||
addi s0, sp, 32
|
||||
|
||||
addi a1, sp, 0
|
||||
call _print_i
|
||||
|
||||
addi a1, sp, 0
|
||||
call _write_s
|
||||
|
||||
lw ra, 28(sp)
|
||||
lw s0, 24(sp)
|
||||
addi sp, sp, 32
|
||||
ret
|
||||
|
||||
# Writes a character from a0 into the standard output.
|
||||
.type _write_c, @function
|
||||
_write_c:
|
||||
# Prologue
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
sb a0, 4(sp)
|
||||
li a0, STDOUT
|
||||
addi a1, sp, 4
|
||||
li a2, 1
|
||||
li a7, SYS_WRITE
|
||||
ecall
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
add sp, sp, 16
|
||||
ret
|
||||
|
||||
# a0 - Pointer to an array to get the first element.
|
||||
#
|
||||
# Dereferences a pointer and returns what is on the address in a0.
|
||||
@@ -448,34 +214,6 @@ _memmem:
|
||||
add sp, sp, 24
|
||||
ret
|
||||
|
||||
# Copies memory.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Destination.
|
||||
# a1 - Source.
|
||||
# a2 - Size.
|
||||
#
|
||||
# Preserves a0.
|
||||
.type _memcpy, @function
|
||||
_memcpy:
|
||||
mv t0, a0
|
||||
|
||||
.Lmemcpy_loop:
|
||||
beqz a2, .Lmemcpy_end
|
||||
|
||||
lbu t1, (a1)
|
||||
sb t1, (a0)
|
||||
|
||||
addi a0, a0, 1
|
||||
addi a1, a1, 1
|
||||
addi a2, a2, -1
|
||||
|
||||
j .Lmemcpy_loop
|
||||
|
||||
.Lmemcpy_end:
|
||||
mv a0, t0
|
||||
ret
|
||||
|
||||
# Searches for a string in a string array.
|
||||
#
|
||||
# Parameters:
|
||||
|
@@ -1,68 +0,0 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
# obtain one at https://mozilla.org/MPL/2.0/.
|
||||
|
||||
#
|
||||
# Tokens.
|
||||
#
|
||||
|
||||
# The constant should match the index in the keywords array in tokenizer.s.
|
||||
|
||||
.equ TOKEN_PROGRAM, 1
|
||||
.equ TOKEN_IMPORT, 2
|
||||
.equ TOKEN_CONST, 3
|
||||
.equ TOKEN_VAR, 4
|
||||
.equ TOKEN_IF, 5
|
||||
.equ TOKEN_THEN, 6
|
||||
.equ TOKEN_ELSIF, 7
|
||||
.equ TOKEN_ELSE, 8
|
||||
.equ TOKEN_WHILE, 9
|
||||
.equ TOKEN_DO, 10
|
||||
.equ TOKEN_PROC, 11
|
||||
.equ TOKEN_BEGIN, 12
|
||||
.equ TOKEN_END, 13
|
||||
.equ TOKEN_TYPE, 14
|
||||
.equ TOKEN_RECORD, 15
|
||||
.equ TOKEN_UNION, 16
|
||||
.equ TOKEN_TRUE, 17
|
||||
.equ TOKEN_FALSE, 18
|
||||
.equ TOKEN_NIL, 19
|
||||
.equ TOKEN_XOR, 20
|
||||
.equ TOKEN_OR, 21
|
||||
.equ TOKEN_RETURN, 22
|
||||
.equ TOKEN_CAST, 23
|
||||
.equ TOKEN_GOTO, 24
|
||||
.equ TOKEN_CASE, 25
|
||||
.equ TOKEN_OF, 26
|
||||
|
||||
.equ TOKEN_IDENTIFIER, 27
|
||||
# The constant should match the character index in the byte_keywords string.
|
||||
|
||||
.equ TOKEN_AND, TOKEN_IDENTIFIER + 1
|
||||
.equ TOKEN_DOT, TOKEN_IDENTIFIER + 2
|
||||
.equ TOKEN_COMMA, TOKEN_IDENTIFIER + 3
|
||||
.equ TOKEN_COLON, TOKEN_IDENTIFIER + 4
|
||||
.equ TOKEN_SEMICOLON, TOKEN_IDENTIFIER + 5
|
||||
.equ TOKEN_LEFT_PAREN, TOKEN_IDENTIFIER + 6
|
||||
.equ TOKEN_RIGHT_PAREN, TOKEN_IDENTIFIER + 7
|
||||
.equ TOKEN_LEFT_BRACKET, TOKEN_IDENTIFIER + 8
|
||||
.equ TOKEN_RIGHT_BRACKET, TOKEN_IDENTIFIER + 9
|
||||
.equ TOKEN_HAT, TOKEN_IDENTIFIER + 10
|
||||
.equ TOKEN_EQUALS, TOKEN_IDENTIFIER + 11
|
||||
.equ TOKEN_PLUS, TOKEN_IDENTIFIER + 12
|
||||
.equ TOKEN_MINUS, TOKEN_IDENTIFIER + 13
|
||||
.equ TOKEN_ASTERISK, TOKEN_IDENTIFIER + 14
|
||||
.equ TOKEN_AT, TOKEN_IDENTIFIER + 15
|
||||
|
||||
.equ TOKEN_ASSIGN, 43
|
||||
.equ TOKEN_INTEGER, 44
|
||||
|
||||
#
|
||||
# Symbols.
|
||||
#
|
||||
.equ TYPE_PRIMITIVE, 0x01
|
||||
.equ TYPE_POINTER, 0x02
|
||||
.equ TYPE_PROCEDURE, 0x03
|
||||
.equ INFO_PARAMETER, 0x10
|
||||
.equ INFO_LOCAL, 0x20
|
||||
.equ INFO_PROCEDURE, 0x30
|
2284
boot/stage1.s
2284
boot/stage1.s
File diff suppressed because it is too large
Load Diff
2008
boot/stage2.elna
2008
boot/stage2.elna
File diff suppressed because it is too large
Load Diff
975
boot/stage3.elna
Normal file
975
boot/stage3.elna
Normal file
@@ -0,0 +1,975 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
# obtain one at https://mozilla.org/MPL/2.0/.
|
||||
|
||||
# Stage3 compiler.
|
||||
#
|
||||
# - Procedures without none or one argument.
|
||||
# - Goto statements.
|
||||
# - Character and integer literals.
|
||||
# - Passing local variables to procedures.
|
||||
# - Local variables should have the format: v00,
|
||||
# where 00 is its offset from the sp register.
|
||||
|
||||
.section .rodata
|
||||
|
||||
.type keyword_section, @object
|
||||
keyword_section: .ascii ".section"
|
||||
|
||||
.type keyword_type, @object
|
||||
keyword_type: .ascii ".type"
|
||||
|
||||
.type keyword_ret, @object
|
||||
keyword_ret: .ascii "ret"
|
||||
|
||||
.type keyword_global, @object
|
||||
keyword_global: .ascii ".globl"
|
||||
|
||||
.type keyword_proc, @object
|
||||
keyword_proc: .ascii "proc "
|
||||
|
||||
.type keyword_end, @object
|
||||
keyword_end: .ascii "end"
|
||||
|
||||
.type keyword_begin, @object
|
||||
keyword_begin: .ascii "begin"
|
||||
|
||||
.type keyword_var, @object
|
||||
keyword_var: .ascii "var"
|
||||
|
||||
.type asm_prologue, @object
|
||||
asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n"
|
||||
|
||||
.type asm_epilogue, @object
|
||||
asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n"
|
||||
|
||||
.type asm_type_directive, @object
|
||||
asm_type_directive: .string ".type "
|
||||
|
||||
.type asm_type_function, @object
|
||||
asm_type_function: .string ", @function\n"
|
||||
|
||||
.type asm_colon, @object
|
||||
asm_colon: .string ":\n"
|
||||
|
||||
.type asm_call, @object
|
||||
asm_call: .string "\tcall "
|
||||
|
||||
.type asm_j, @object
|
||||
asm_j: .string "\tj "
|
||||
|
||||
.type asm_li, @object
|
||||
asm_li: .string "\tli "
|
||||
|
||||
.type asm_lw, @object
|
||||
asm_lw: .string "\tlw "
|
||||
|
||||
.type asm_sw, @object
|
||||
asm_sw: .string "\tsw "
|
||||
|
||||
.type asm_mv, @object
|
||||
asm_mv: .string "mv "
|
||||
|
||||
.type asm_t0, @object
|
||||
asm_t0: .string "t0"
|
||||
|
||||
.type asm_a0, @object
|
||||
asm_a0: .string "a0"
|
||||
|
||||
.type asm_comma, @object
|
||||
asm_comma: .string ", "
|
||||
|
||||
.type asm_sp, @object
|
||||
asm_sp: .string "(sp)"
|
||||
|
||||
.section .bss
|
||||
|
||||
# When modifiying also change the read size in the entry point procedure.
|
||||
.type source_code, @object
|
||||
source_code: .zero 81920
|
||||
|
||||
.section .data
|
||||
|
||||
.type source_code_position, @object
|
||||
source_code_position: .word source_code
|
||||
|
||||
.section .text
|
||||
|
||||
# Reads standard input into a buffer.
|
||||
# a0 - Buffer pointer.
|
||||
# a1 - Buffer size.
|
||||
#
|
||||
# Returns the amount of bytes written in a0.
|
||||
proc _read_file();
|
||||
begin
|
||||
mv a2, a1
|
||||
mv a1, a0
|
||||
# STDIN.
|
||||
li a0, 0
|
||||
li a7, 63 # SYS_READ.
|
||||
ecall
|
||||
end;
|
||||
|
||||
# Writes to the standard output.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Buffer.
|
||||
# a1 - Buffer length.
|
||||
proc _write_s();
|
||||
begin
|
||||
mv a2, a1
|
||||
mv a1, a0
|
||||
# STDOUT.
|
||||
li a0, 1
|
||||
li a7, 64 # SYS_WRITE.
|
||||
ecall
|
||||
end;
|
||||
|
||||
# Writes a number to a string buffer.
|
||||
#
|
||||
# t0 - Local buffer.
|
||||
# t1 - Constant 10.
|
||||
# t2 - Current character.
|
||||
# t3 - Whether the number is negative.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Whole number.
|
||||
# a1 - Buffer pointer.
|
||||
#
|
||||
# Sets a0 to the length of the written number.
|
||||
proc _print_i();
|
||||
begin
|
||||
li t1, 10
|
||||
addi t0, s0, -9
|
||||
|
||||
li t3, 0
|
||||
bgez a0, .print_i_digit10
|
||||
li t3, 1
|
||||
neg a0, a0
|
||||
|
||||
.print_i_digit10:
|
||||
rem t2, a0, t1
|
||||
addi t2, t2, '0'
|
||||
sb t2, 0(t0)
|
||||
div a0, a0, t1
|
||||
addi t0, t0, -1
|
||||
bne zero, a0, .print_i_digit10
|
||||
|
||||
beq zero, t3, .print_i_write_call
|
||||
addi t2, zero, '-'
|
||||
sb t2, 0(t0)
|
||||
addi t0, t0, -1
|
||||
|
||||
.print_i_write_call:
|
||||
mv a0, a1
|
||||
addi a1, t0, 1
|
||||
sub a2, s0, t0
|
||||
addi a2, a2, -9
|
||||
sw a2, 0(sp)
|
||||
|
||||
_memcpy();
|
||||
|
||||
lw a0, 0(sp)
|
||||
end;
|
||||
|
||||
# Writes a number to the standard output.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Whole number.
|
||||
proc _write_i();
|
||||
begin
|
||||
addi a1, sp, 0
|
||||
_print_i();
|
||||
|
||||
mv a1, a0
|
||||
addi a0, sp, 0
|
||||
_write_s();
|
||||
|
||||
end;
|
||||
|
||||
# Writes a character from a0 into the standard output.
|
||||
proc _write_c();
|
||||
begin
|
||||
sb a0, 0(sp)
|
||||
addi a0, sp, 0
|
||||
li a1, 1
|
||||
_write_s();
|
||||
end;
|
||||
|
||||
# Write null terminated string.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - String.
|
||||
proc _write_z();
|
||||
begin
|
||||
sw a0, 0(sp)
|
||||
|
||||
.write_z_loop:
|
||||
# Check for 0 character.
|
||||
lb a0, (a0)
|
||||
beqz a0, .write_z_end
|
||||
|
||||
# Print a character.
|
||||
lw a0, 0(sp)
|
||||
lb a0, (a0)
|
||||
_write_c();
|
||||
|
||||
# Advance the input string by one byte.
|
||||
lw a0, 0(sp)
|
||||
addi a0, a0, 1
|
||||
sw a0, 0(sp)
|
||||
|
||||
goto .write_z_loop;
|
||||
|
||||
.write_z_end:
|
||||
end;
|
||||
|
||||
# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
|
||||
proc _is_upper();
|
||||
begin
|
||||
li t0, 'A' - 1
|
||||
sltu t1, t0, a0 # t1 = a0 >= 'A'
|
||||
|
||||
sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
|
||||
and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
|
||||
end;
|
||||
|
||||
# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
|
||||
proc _is_lower();
|
||||
begin
|
||||
li t0, 'a' - 1
|
||||
sltu t2, t0, a0 # t2 = a0 >= 'a'
|
||||
|
||||
sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
|
||||
and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
|
||||
end;
|
||||
|
||||
# Detects if the passed character is a 7-bit alpha character or an underscore.
|
||||
#
|
||||
# Paramters:
|
||||
# a0 - Tested character.
|
||||
#
|
||||
# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
|
||||
proc _is_alpha();
|
||||
begin
|
||||
sw a0, 0(sp)
|
||||
|
||||
_is_upper();
|
||||
sw a0, 4(sp)
|
||||
|
||||
_is_lower(v00);
|
||||
|
||||
lw t0, 0(sp)
|
||||
xori t1, t0, '_'
|
||||
seqz t1, t1
|
||||
|
||||
lw t0, 4(sp)
|
||||
or a0, a0, t0
|
||||
or a0, a0, t1
|
||||
end;
|
||||
|
||||
# Detects whether the passed character is a digit
|
||||
# (a value between 0 and 9).
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Exemined value.
|
||||
#
|
||||
# Sets a0 to 1 if it is a digit, to 0 otherwise.
|
||||
proc _is_digit();
|
||||
begin
|
||||
li t0, '0' - 1
|
||||
sltu t1, t0, a0 # t1 = a0 >= '0'
|
||||
|
||||
sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
|
||||
|
||||
and a0, t1, t2
|
||||
end;
|
||||
|
||||
proc _is_alnum();
|
||||
begin
|
||||
sw a0, 4(sp)
|
||||
|
||||
_is_alpha();
|
||||
sw a0, 0(sp)
|
||||
|
||||
_is_digit(v04);
|
||||
|
||||
lw a1, 0(sp)
|
||||
or a0, a0, a1
|
||||
end;
|
||||
|
||||
# Reads the next token.
|
||||
#
|
||||
# Returns token length in a0.
|
||||
proc _read_token();
|
||||
begin
|
||||
la t0, source_code_position # Token pointer.
|
||||
lw t0, (t0)
|
||||
sw t0, 0(sp) # Current token position.
|
||||
sw zero, 4(sp) # Token length.
|
||||
|
||||
.read_token_loop:
|
||||
lb t0, (t0) # Current character.
|
||||
|
||||
# First we try to read a derictive.
|
||||
# A derictive can contain a dot and characters.
|
||||
li t1, '.'
|
||||
beq t0, t1, .read_token_next
|
||||
|
||||
lw a0, 0(sp)
|
||||
lb a0, (a0)
|
||||
_is_alnum();
|
||||
bnez a0, .read_token_next
|
||||
|
||||
goto .read_token_end;
|
||||
|
||||
.read_token_next:
|
||||
# Advance the source code position and token length.
|
||||
lw t0, 4(sp)
|
||||
addi t0, t0, 1
|
||||
sw t0, 4(sp)
|
||||
|
||||
lw t0, 0(sp)
|
||||
addi t0, t0, 1
|
||||
sw t0, 0(sp)
|
||||
|
||||
goto .read_token_loop;
|
||||
|
||||
.read_token_end:
|
||||
lw a0, 4(sp)
|
||||
end;
|
||||
|
||||
# a0 - First pointer.
|
||||
# a1 - Second pointer.
|
||||
# a2 - The length to compare.
|
||||
#
|
||||
# Returns 0 in a0 if memory regions are equal.
|
||||
proc _memcmp();
|
||||
begin
|
||||
mv t0, a0
|
||||
li a0, 0
|
||||
|
||||
.memcmp_loop:
|
||||
beqz a2, .memcmp_end
|
||||
|
||||
lbu t1, (t0)
|
||||
lbu t2, (a1)
|
||||
sub a0, t1, t2
|
||||
|
||||
bnez a0, .memcmp_end
|
||||
|
||||
addi t0, t0, 1
|
||||
addi a1, a1, 1
|
||||
addi a2, a2, -1
|
||||
|
||||
goto .memcmp_loop;
|
||||
|
||||
.memcmp_end:
|
||||
end;
|
||||
|
||||
# Copies memory.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Destination.
|
||||
# a1 - Source.
|
||||
# a2 - Size.
|
||||
#
|
||||
# Preserves a0.
|
||||
proc _memcpy();
|
||||
begin
|
||||
mv t0, a0
|
||||
|
||||
.memcpy_loop:
|
||||
beqz a2, .memcpy_end
|
||||
|
||||
lbu t1, (a1)
|
||||
sb t1, (a0)
|
||||
|
||||
addi a0, a0, 1
|
||||
addi a1, a1, 1
|
||||
addi a2, a2, -1
|
||||
|
||||
goto .memcpy_loop
|
||||
|
||||
.memcpy_end:
|
||||
mv a0, t0
|
||||
end;
|
||||
|
||||
# Advances the token stream by a0 bytes.
|
||||
proc _advance_token();
|
||||
begin
|
||||
la t0, source_code_position
|
||||
lw t1, (t0)
|
||||
add t1, t1, a0
|
||||
sw t1, (t0)
|
||||
end;
|
||||
|
||||
# Prints the current token.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Token length.
|
||||
#
|
||||
# Returns a0 unchanged.
|
||||
proc _write_token();
|
||||
begin
|
||||
sw a0, 0(sp)
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
lw a1, 0(sp)
|
||||
_write_s();
|
||||
|
||||
lw a0, 0(sp)
|
||||
end;
|
||||
|
||||
proc _compile_section();
|
||||
begin
|
||||
# Print and skip the ".section" (8 characters) directive and a space after it.
|
||||
_write_token(9);
|
||||
_advance_token();
|
||||
|
||||
# Read the section name.
|
||||
_read_token();
|
||||
addi a0, a0, 1
|
||||
|
||||
_write_token();
|
||||
_advance_token();
|
||||
end;
|
||||
|
||||
# Prints and skips a line.
|
||||
proc _skip_comment();
|
||||
begin
|
||||
la t0, source_code_position
|
||||
lw t1, (t0)
|
||||
|
||||
.skip_comment_loop:
|
||||
# Check for newline character.
|
||||
lb t2, (t1)
|
||||
li t3, '\n'
|
||||
beq t2, t3, .skip_comment_end
|
||||
|
||||
# Advance the input string by one byte.
|
||||
addi t1, t1, 1
|
||||
sw t1, (t0)
|
||||
|
||||
goto .skip_comment_loop;
|
||||
|
||||
.skip_comment_end:
|
||||
# Skip the newline.
|
||||
addi t1, t1, 1
|
||||
sw t1, (t0)
|
||||
end;
|
||||
|
||||
# Prints and skips a line.
|
||||
proc _compile_line();
|
||||
begin
|
||||
.compile_line_loop:
|
||||
la a0, source_code_position
|
||||
lw a1, (a0)
|
||||
|
||||
lb t0, (a1)
|
||||
li t1, '\n'
|
||||
beq t0, t1, .compile_line_end
|
||||
|
||||
# Print a character.
|
||||
lw a0, (a1)
|
||||
_write_c();
|
||||
|
||||
# Advance the input string by one byte.
|
||||
_advance_token(1);
|
||||
|
||||
goto .compile_line_loop;
|
||||
|
||||
.compile_line_end:
|
||||
_write_c('\n');
|
||||
|
||||
_advance_token(1);
|
||||
end;
|
||||
|
||||
proc _compile_integer_literal();
|
||||
begin
|
||||
la a0, asm_li
|
||||
_write_z();
|
||||
|
||||
la a0, asm_a0
|
||||
_write_z();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
_read_token();
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
_write_c('\n');
|
||||
end;
|
||||
|
||||
proc _compile_character_literal();
|
||||
begin
|
||||
la a0, asm_li
|
||||
_write_z();
|
||||
|
||||
la a0, asm_a0
|
||||
_write_z();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
li a0, '\''
|
||||
_write_c();
|
||||
_advance_token(1);
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb a0, (t0)
|
||||
li t1, '\\'
|
||||
bne a0, t1, .compile_character_literal_end
|
||||
|
||||
li a0, '\\'
|
||||
_write_c();
|
||||
_advance_token(1);
|
||||
|
||||
.compile_character_literal_end:
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb a0, (t0)
|
||||
_write_c();
|
||||
|
||||
li a0, '\''
|
||||
_write_c();
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
_advance_token(2);
|
||||
|
||||
end;
|
||||
|
||||
proc _compile_variable_expression();
|
||||
begin
|
||||
la a0, asm_lw
|
||||
_write_z();
|
||||
|
||||
la a0, asm_a0
|
||||
_write_z();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
_advance_token(1);
|
||||
_read_token();
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
la a0, asm_sp
|
||||
_write_z();
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
end;
|
||||
|
||||
proc _compile_expression();
|
||||
begin
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb a0, (t0)
|
||||
|
||||
li t1, '\''
|
||||
beq a0, t1, .compile_expression_character_literal
|
||||
|
||||
li t1, 'v'
|
||||
beq a0, t1, .compile_expression_variable
|
||||
|
||||
_is_digit();
|
||||
bnez a0, .compile_expression_integer_literal
|
||||
|
||||
goto .compile_expression_end;
|
||||
|
||||
.compile_expression_character_literal:
|
||||
_compile_character_literal();
|
||||
goto .compile_expression_end;
|
||||
|
||||
.compile_expression_integer_literal:
|
||||
_compile_integer_literal();
|
||||
goto .compile_expression_end;
|
||||
|
||||
.compile_expression_variable:
|
||||
_compile_variable_expression();
|
||||
goto .compile_expression_end;;
|
||||
|
||||
.compile_expression_end:
|
||||
end;
|
||||
|
||||
proc _compile_call();
|
||||
begin
|
||||
# Stack variables:
|
||||
# v0 - Procedure name length.
|
||||
# v4 - Procedure name pointer.
|
||||
# v8 - Argument count.
|
||||
|
||||
_read_token();
|
||||
sw a0, 0(sp)
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
sw t0, 4(sp)
|
||||
|
||||
sw zero, 8(sp)
|
||||
|
||||
# Skip the identifier and left paren.
|
||||
addi a0, a0, 1
|
||||
_advance_token();
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, ')'
|
||||
beq t0, t1, .compile_call_finalize
|
||||
|
||||
.compile_call_loop:
|
||||
_compile_expression();
|
||||
|
||||
# Save the argument on the stack.
|
||||
la a0, asm_sw
|
||||
_write_z();
|
||||
|
||||
la a0, asm_a0
|
||||
_write_z();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
# Calculate the stack offset: 20 - (4 * argument_counter)
|
||||
lw t0, 8(sp)
|
||||
li t1, 4
|
||||
mul t0, t0, t1
|
||||
li t1, 20
|
||||
sub a0, t1, t0
|
||||
_write_i();
|
||||
|
||||
la a0, asm_sp
|
||||
_write_z();
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
# Add one to the argument counter.
|
||||
lw t0, 8(sp)
|
||||
addi t0, t0, 1
|
||||
sw t0, 8(sp)
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, ','
|
||||
bne t0, t1, .compile_call_finalize
|
||||
|
||||
_advance_token(2);
|
||||
goto .compile_call_loop;
|
||||
|
||||
.compile_call_finalize:
|
||||
# Load the argument from the stack.
|
||||
|
||||
lw t0, 8(sp)
|
||||
beqz t0, .compile_call_end
|
||||
|
||||
# Decrement the argument counter.
|
||||
lw t0, 8(sp)
|
||||
addi t0, t0, -1
|
||||
sw t0, 8(sp)
|
||||
|
||||
la a0, asm_lw
|
||||
_write_z();
|
||||
|
||||
_write_c('a');
|
||||
lw a0, 8(sp)
|
||||
_write_i();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
# Calculate the stack offset: 20 - (4 * argument_counter)
|
||||
lw t0, 8(sp)
|
||||
li t1, 4
|
||||
mul t0, t0, t1
|
||||
li t1, 20
|
||||
sub a0, t1, t0
|
||||
_write_i();
|
||||
|
||||
la a0, asm_sp
|
||||
_write_z();
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
goto .compile_call_finalize;
|
||||
|
||||
.compile_call_end:
|
||||
la a0, asm_call
|
||||
_write_z();
|
||||
|
||||
lw a0, 4(sp)
|
||||
lw a1, 0(sp)
|
||||
_write_s();
|
||||
|
||||
# Skip the right paren.
|
||||
_advance_token(1);
|
||||
end;
|
||||
|
||||
proc _compile_goto();
|
||||
begin
|
||||
_advance_token(5);
|
||||
|
||||
_read_token();
|
||||
sw a0, 0(sp)
|
||||
|
||||
la a0, asm_j
|
||||
_write_z();
|
||||
|
||||
_write_token(v00);
|
||||
_advance_token();
|
||||
end;
|
||||
|
||||
proc _compile_statement();
|
||||
begin
|
||||
# This is a call if the statement starts with an underscore.
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
# First character after alignment tab.
|
||||
addi t0, t0, 1
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, '_'
|
||||
beq t0, t1, .compile_statement_call
|
||||
|
||||
li t1, 'g'
|
||||
beq t0, t1, .compile_statement_goto
|
||||
|
||||
_compile_line();
|
||||
goto .compile_statement_end;
|
||||
|
||||
.compile_statement_call:
|
||||
_advance_token(1);
|
||||
_compile_call();
|
||||
|
||||
goto .compile_statement_semicolon;
|
||||
|
||||
.compile_statement_goto:
|
||||
_advance_token(1);
|
||||
_compile_goto();
|
||||
|
||||
goto .compile_statement_semicolon;
|
||||
|
||||
.compile_statement_semicolon:
|
||||
_advance_token(2);
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
.compile_statement_end:
|
||||
end;
|
||||
|
||||
proc _compile_procedure_body();
|
||||
begin
|
||||
.compile_procedure_body_loop:
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_end
|
||||
li a2, 3 # "end" length.
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_procedure_body_epilogue
|
||||
|
||||
_compile_statement();
|
||||
goto .compile_procedure_body_loop;
|
||||
|
||||
.compile_procedure_body_epilogue:
|
||||
end;
|
||||
|
||||
proc _compile_procedure();
|
||||
begin
|
||||
# Skip "proc ".
|
||||
_advance_token(5);
|
||||
|
||||
_read_token();
|
||||
sw a0, 0(sp) # Save the procedure name length.
|
||||
|
||||
# Write .type _procedure_name, @function.
|
||||
la a0, asm_type_directive
|
||||
_write_z();
|
||||
|
||||
_write_token(v00);
|
||||
|
||||
la a0, asm_type_function
|
||||
_write_z();
|
||||
|
||||
# Write procedure label, _procedure_name:
|
||||
_write_token(v00);
|
||||
|
||||
la a0, asm_colon
|
||||
_write_z();
|
||||
|
||||
# Skip the function name and trailing parens, semicolon, "begin" and newline.
|
||||
lw a0, 0(sp)
|
||||
addi a0, a0, 10
|
||||
_advance_token();
|
||||
|
||||
la a0, asm_prologue
|
||||
_write_z();
|
||||
|
||||
_compile_procedure_body();
|
||||
|
||||
# Write the epilogue.
|
||||
la a0, asm_epilogue
|
||||
_write_z();
|
||||
|
||||
# Skip the "end" keyword, semicolon and newline.
|
||||
_advance_token(5);
|
||||
end;
|
||||
|
||||
proc _compile_type();
|
||||
begin
|
||||
# Print and skip the ".type" (5 characters) directive and a space after it.
|
||||
_write_token(6);
|
||||
_advance_token();
|
||||
|
||||
# Read and print the symbol name.
|
||||
_read_token();
|
||||
|
||||
# Print and skip the symbol name, comma, space and @.
|
||||
addi a0, a0, 3
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
# Read the symbol type.
|
||||
_read_token();
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
sw t0, 12(sp)
|
||||
|
||||
# Print the symbol type and newline.
|
||||
addi a0, a0, 1
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
# Write the object definition itself.
|
||||
_compile_line();
|
||||
|
||||
.compile_type_end:
|
||||
end;
|
||||
|
||||
proc _skip_newlines();
|
||||
begin
|
||||
# Skip newlines.
|
||||
la t0, source_code_position
|
||||
lw t1, (t0)
|
||||
|
||||
.skip_newlines_loop:
|
||||
lb t2, (t1)
|
||||
li t3, '\n'
|
||||
bne t2, t3, .skip_newlines_end
|
||||
beqz t2, .skip_newlines_end
|
||||
|
||||
addi t1, t1, 1
|
||||
sw t1, (t0)
|
||||
|
||||
goto .skip_newlines_loop;
|
||||
|
||||
.skip_newlines_end:
|
||||
end;
|
||||
|
||||
# Process the source code and print the generated code.
|
||||
proc _compile();
|
||||
begin
|
||||
.compile_loop:
|
||||
_skip_newlines();
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
beqz t0, .compile_end
|
||||
li t1, '#'
|
||||
beq t0, t1, .compile_comment
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_section
|
||||
li a2, 8 # ".section" length.
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_section
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_type
|
||||
li a2, 5 # ".type" length.
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_type
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_proc
|
||||
li a2, 5 # "proc " length. Space is needed to distinguish from "procedure".
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_procedure
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_global
|
||||
li a2, 6 # ".globl" length.
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_global
|
||||
# Not a known token, exit.
|
||||
goto .compile_end;
|
||||
|
||||
.compile_section:
|
||||
_compile_section();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_type:
|
||||
_compile_type();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_global:
|
||||
_compile_line();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_comment:
|
||||
_skip_comment();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_procedure:
|
||||
_compile_procedure();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_end:
|
||||
end;
|
||||
|
||||
# Terminates the program. a0 contains the return code.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Status code.
|
||||
proc _exit();
|
||||
begin
|
||||
li a7, 93 # SYS_EXIT
|
||||
ecall
|
||||
end;
|
||||
|
||||
# Entry point.
|
||||
.globl _start
|
||||
proc _start();
|
||||
begin
|
||||
# Read the source from the standard input.
|
||||
la a0, source_code
|
||||
li a1, 81920 # Buffer size.
|
||||
_read_file();
|
||||
_compile();
|
||||
|
||||
_exit(0);
|
||||
|
||||
end;
|
969
boot/stage4.elna
Normal file
969
boot/stage4.elna
Normal file
@@ -0,0 +1,969 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
# obtain one at https://mozilla.org/MPL/2.0/.
|
||||
|
||||
# Stage3 compiler.
|
||||
#
|
||||
# - Procedures without none or one argument.
|
||||
# - Goto statements.
|
||||
# - Character and integer literals.
|
||||
# - Passing local variables to procedures.
|
||||
# - Local variables should have the format: v00,
|
||||
# where 00 is its offset from the sp register.
|
||||
|
||||
.section .rodata
|
||||
|
||||
.type keyword_section, @object
|
||||
keyword_section: .ascii ".section"
|
||||
|
||||
.type keyword_type, @object
|
||||
keyword_type: .ascii ".type"
|
||||
|
||||
.type keyword_ret, @object
|
||||
keyword_ret: .ascii "ret"
|
||||
|
||||
.type keyword_global, @object
|
||||
keyword_global: .ascii ".globl"
|
||||
|
||||
.type keyword_proc, @object
|
||||
keyword_proc: .ascii "proc "
|
||||
|
||||
.type keyword_end, @object
|
||||
keyword_end: .ascii "end"
|
||||
|
||||
.type keyword_begin, @object
|
||||
keyword_begin: .ascii "begin"
|
||||
|
||||
.type keyword_var, @object
|
||||
keyword_var: .ascii "var"
|
||||
|
||||
.type asm_prologue, @object
|
||||
asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n"
|
||||
|
||||
.type asm_epilogue, @object
|
||||
asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n"
|
||||
|
||||
.type asm_type_directive, @object
|
||||
asm_type_directive: .string ".type "
|
||||
|
||||
.type asm_type_function, @object
|
||||
asm_type_function: .string ", @function\n"
|
||||
|
||||
.type asm_colon, @object
|
||||
asm_colon: .string ":\n"
|
||||
|
||||
.type asm_call, @object
|
||||
asm_call: .string "\tcall "
|
||||
|
||||
.type asm_j, @object
|
||||
asm_j: .string "\tj "
|
||||
|
||||
.type asm_li, @object
|
||||
asm_li: .string "\tli "
|
||||
|
||||
.type asm_lw, @object
|
||||
asm_lw: .string "\tlw "
|
||||
|
||||
.type asm_sw, @object
|
||||
asm_sw: .string "\tsw "
|
||||
|
||||
.type asm_mv, @object
|
||||
asm_mv: .string "mv "
|
||||
|
||||
.type asm_t0, @object
|
||||
asm_t0: .string "t0"
|
||||
|
||||
.type asm_a0, @object
|
||||
asm_a0: .string "a0"
|
||||
|
||||
.type asm_comma, @object
|
||||
asm_comma: .string ", "
|
||||
|
||||
.type asm_sp, @object
|
||||
asm_sp: .string "(sp)"
|
||||
|
||||
.section .bss
|
||||
|
||||
# When modifiying also change the read size in the entry point procedure.
|
||||
.type source_code, @object
|
||||
source_code: .zero 81920
|
||||
|
||||
.section .data
|
||||
|
||||
.type source_code_position, @object
|
||||
source_code_position: .word source_code
|
||||
|
||||
.section .text
|
||||
|
||||
# Reads standard input into a buffer.
|
||||
# a0 - Buffer pointer.
|
||||
# a1 - Buffer size.
|
||||
#
|
||||
# Returns the amount of bytes written in a0.
|
||||
proc _read_file();
|
||||
begin
|
||||
mv a2, a1
|
||||
mv a1, a0
|
||||
# STDIN.
|
||||
li a0, 0
|
||||
li a7, 63 # SYS_READ.
|
||||
ecall
|
||||
end;
|
||||
|
||||
# Writes to the standard output.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Buffer.
|
||||
# a1 - Buffer length.
|
||||
proc _write_s();
|
||||
begin
|
||||
mv a2, a1
|
||||
mv a1, a0
|
||||
# STDOUT.
|
||||
li a0, 1
|
||||
li a7, 64 # SYS_WRITE.
|
||||
ecall
|
||||
end;
|
||||
|
||||
# Writes a number to a string buffer.
|
||||
#
|
||||
# t0 - Local buffer.
|
||||
# t1 - Constant 10.
|
||||
# t2 - Current character.
|
||||
# t3 - Whether the number is negative.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Whole number.
|
||||
# a1 - Buffer pointer.
|
||||
#
|
||||
# Sets a0 to the length of the written number.
|
||||
proc _print_i();
|
||||
begin
|
||||
li t1, 10
|
||||
addi t0, s0, -9
|
||||
|
||||
li t3, 0
|
||||
bgez a0, .print_i_digit10
|
||||
li t3, 1
|
||||
neg a0, a0
|
||||
|
||||
.print_i_digit10:
|
||||
rem t2, a0, t1
|
||||
addi t2, t2, '0'
|
||||
sb t2, 0(t0)
|
||||
div a0, a0, t1
|
||||
addi t0, t0, -1
|
||||
bne zero, a0, .print_i_digit10
|
||||
|
||||
beq zero, t3, .print_i_write_call
|
||||
addi t2, zero, '-'
|
||||
sb t2, 0(t0)
|
||||
addi t0, t0, -1
|
||||
|
||||
.print_i_write_call:
|
||||
mv a0, a1
|
||||
addi a1, t0, 1
|
||||
sub a2, s0, t0
|
||||
addi a2, a2, -9
|
||||
sw a2, 0(sp)
|
||||
|
||||
_memcpy();
|
||||
|
||||
lw a0, 0(sp)
|
||||
end;
|
||||
|
||||
# Writes a number to the standard output.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Whole number.
|
||||
proc _write_i();
|
||||
begin
|
||||
addi a1, sp, 0
|
||||
_print_i();
|
||||
|
||||
mv a1, a0
|
||||
addi a0, sp, 0
|
||||
_write_s();
|
||||
|
||||
end;
|
||||
|
||||
# Writes a character from a0 into the standard output.
|
||||
proc _write_c();
|
||||
begin
|
||||
sb a0, 0(sp)
|
||||
addi a0, sp, 0
|
||||
li a1, 1
|
||||
_write_s();
|
||||
end;
|
||||
|
||||
# Write null terminated string.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - String.
|
||||
proc _write_z();
|
||||
begin
|
||||
sw a0, 0(sp)
|
||||
|
||||
.write_z_loop:
|
||||
# Check for 0 character.
|
||||
lb a0, (a0)
|
||||
beqz a0, .write_z_end
|
||||
|
||||
# Print a character.
|
||||
lw a0, 0(sp)
|
||||
lb a0, (a0)
|
||||
_write_c();
|
||||
|
||||
# Advance the input string by one byte.
|
||||
lw a0, 0(sp)
|
||||
addi a0, a0, 1
|
||||
sw a0, 0(sp)
|
||||
|
||||
goto .write_z_loop;
|
||||
|
||||
.write_z_end:
|
||||
end;
|
||||
|
||||
# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
|
||||
proc _is_upper();
|
||||
begin
|
||||
li t0, 'A' - 1
|
||||
sltu t1, t0, a0 # t1 = a0 >= 'A'
|
||||
|
||||
sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
|
||||
and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
|
||||
end;
|
||||
|
||||
# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
|
||||
proc _is_lower();
|
||||
begin
|
||||
li t0, 'a' - 1
|
||||
sltu t2, t0, a0 # t2 = a0 >= 'a'
|
||||
|
||||
sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
|
||||
and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
|
||||
end;
|
||||
|
||||
# Detects if the passed character is a 7-bit alpha character or an underscore.
|
||||
#
|
||||
# Paramters:
|
||||
# a0 - Tested character.
|
||||
#
|
||||
# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
|
||||
proc _is_alpha();
|
||||
begin
|
||||
sw a0, 0(sp)
|
||||
|
||||
_is_upper();
|
||||
sw a0, 4(sp)
|
||||
|
||||
_is_lower(v00);
|
||||
|
||||
lw t0, 0(sp)
|
||||
xori t1, t0, '_'
|
||||
seqz t1, t1
|
||||
|
||||
lw t0, 4(sp)
|
||||
or a0, a0, t0
|
||||
or a0, a0, t1
|
||||
end;
|
||||
|
||||
# Detects whether the passed character is a digit
|
||||
# (a value between 0 and 9).
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Exemined value.
|
||||
#
|
||||
# Sets a0 to 1 if it is a digit, to 0 otherwise.
|
||||
proc _is_digit();
|
||||
begin
|
||||
li t0, '0' - 1
|
||||
sltu t1, t0, a0 # t1 = a0 >= '0'
|
||||
|
||||
sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
|
||||
|
||||
and a0, t1, t2
|
||||
end;
|
||||
|
||||
proc _is_alnum();
|
||||
begin
|
||||
sw a0, 4(sp)
|
||||
|
||||
_is_alpha();
|
||||
sw a0, 0(sp)
|
||||
|
||||
_is_digit(v04);
|
||||
|
||||
lw a1, 0(sp)
|
||||
or a0, a0, a1
|
||||
end;
|
||||
|
||||
# Reads the next token.
|
||||
#
|
||||
# Returns token length in a0.
|
||||
proc _read_token();
|
||||
begin
|
||||
la t0, source_code_position # Token pointer.
|
||||
lw t0, (t0)
|
||||
sw t0, 0(sp) # Current token position.
|
||||
sw zero, 4(sp) # Token length.
|
||||
|
||||
.read_token_loop:
|
||||
lb t0, (t0) # Current character.
|
||||
|
||||
# First we try to read a derictive.
|
||||
# A derictive can contain a dot and characters.
|
||||
li t1, '.'
|
||||
beq t0, t1, .read_token_next
|
||||
|
||||
lw a0, 0(sp)
|
||||
lb a0, (a0)
|
||||
_is_alnum();
|
||||
bnez a0, .read_token_next
|
||||
|
||||
goto .read_token_end;
|
||||
|
||||
.read_token_next:
|
||||
# Advance the source code position and token length.
|
||||
lw t0, 4(sp)
|
||||
addi t0, t0, 1
|
||||
sw t0, 4(sp)
|
||||
|
||||
lw t0, 0(sp)
|
||||
addi t0, t0, 1
|
||||
sw t0, 0(sp)
|
||||
|
||||
goto .read_token_loop;
|
||||
|
||||
.read_token_end:
|
||||
lw a0, 4(sp)
|
||||
end;
|
||||
|
||||
# a0 - First pointer.
|
||||
# a1 - Second pointer.
|
||||
# a2 - The length to compare.
|
||||
#
|
||||
# Returns 0 in a0 if memory regions are equal.
|
||||
proc _memcmp();
|
||||
begin
|
||||
mv t0, a0
|
||||
li a0, 0
|
||||
|
||||
.memcmp_loop:
|
||||
beqz a2, .memcmp_end
|
||||
|
||||
lbu t1, (t0)
|
||||
lbu t2, (a1)
|
||||
sub a0, t1, t2
|
||||
|
||||
bnez a0, .memcmp_end
|
||||
|
||||
addi t0, t0, 1
|
||||
addi a1, a1, 1
|
||||
addi a2, a2, -1
|
||||
|
||||
goto .memcmp_loop;
|
||||
|
||||
.memcmp_end:
|
||||
end;
|
||||
|
||||
# Copies memory.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Destination.
|
||||
# a1 - Source.
|
||||
# a2 - Size.
|
||||
#
|
||||
# Preserves a0.
|
||||
proc _memcpy();
|
||||
begin
|
||||
mv t0, a0
|
||||
|
||||
.memcpy_loop:
|
||||
beqz a2, .memcpy_end
|
||||
|
||||
lbu t1, (a1)
|
||||
sb t1, (a0)
|
||||
|
||||
addi a0, a0, 1
|
||||
addi a1, a1, 1
|
||||
addi a2, a2, -1
|
||||
|
||||
goto .memcpy_loop
|
||||
|
||||
.memcpy_end:
|
||||
mv a0, t0
|
||||
end;
|
||||
|
||||
# Advances the token stream by a0 bytes.
|
||||
proc _advance_token();
|
||||
begin
|
||||
la t0, source_code_position
|
||||
lw t1, (t0)
|
||||
add t1, t1, a0
|
||||
sw t1, (t0)
|
||||
end;
|
||||
|
||||
# Prints the current token.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Token length.
|
||||
#
|
||||
# Returns a0 unchanged.
|
||||
proc _write_token();
|
||||
begin
|
||||
sw a0, 0(sp)
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
lw a1, 0(sp)
|
||||
_write_s();
|
||||
|
||||
lw a0, 0(sp)
|
||||
end;
|
||||
|
||||
proc _compile_section();
|
||||
begin
|
||||
# Print and skip the ".section" (8 characters) directive and a space after it.
|
||||
_write_token(9);
|
||||
_advance_token();
|
||||
|
||||
# Read the section name.
|
||||
_read_token();
|
||||
addi a0, a0, 1
|
||||
|
||||
_write_token();
|
||||
_advance_token();
|
||||
end;
|
||||
|
||||
# Prints and skips a line.
|
||||
proc _skip_comment();
|
||||
begin
|
||||
la t0, source_code_position
|
||||
lw t1, (t0)
|
||||
|
||||
.skip_comment_loop:
|
||||
# Check for newline character.
|
||||
lb t2, (t1)
|
||||
li t3, '\n'
|
||||
beq t2, t3, .skip_comment_end
|
||||
|
||||
# Advance the input string by one byte.
|
||||
addi t1, t1, 1
|
||||
sw t1, (t0)
|
||||
|
||||
goto .skip_comment_loop;
|
||||
|
||||
.skip_comment_end:
|
||||
# Skip the newline.
|
||||
addi t1, t1, 1
|
||||
sw t1, (t0)
|
||||
end;
|
||||
|
||||
# Prints and skips a line.
|
||||
proc _compile_line();
|
||||
begin
|
||||
.compile_line_loop:
|
||||
la a0, source_code_position
|
||||
lw a1, (a0)
|
||||
|
||||
lb t0, (a1)
|
||||
li t1, '\n'
|
||||
beq t0, t1, .compile_line_end
|
||||
|
||||
# Print a character.
|
||||
lw a0, (a1)
|
||||
_write_c();
|
||||
|
||||
# Advance the input string by one byte.
|
||||
_advance_token(1);
|
||||
|
||||
goto .compile_line_loop;
|
||||
|
||||
.compile_line_end:
|
||||
_write_c('\n');
|
||||
|
||||
_advance_token(1);
|
||||
end;
|
||||
|
||||
proc _compile_integer_literal();
|
||||
begin
|
||||
la a0, asm_li
|
||||
_write_z();
|
||||
|
||||
la a0, asm_a0
|
||||
_write_z();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
_read_token();
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
_write_c('\n');
|
||||
end;
|
||||
|
||||
proc _compile_character_literal();
|
||||
begin
|
||||
la a0, asm_li
|
||||
_write_z();
|
||||
|
||||
la a0, asm_a0
|
||||
_write_z();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
_write_c('\'');
|
||||
_advance_token(1);
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb a0, (t0)
|
||||
li t1, '\\'
|
||||
bne a0, t1, .compile_character_literal_end
|
||||
|
||||
_write_c('\\');
|
||||
_advance_token(1);
|
||||
|
||||
.compile_character_literal_end:
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb a0, (t0)
|
||||
_write_c();
|
||||
|
||||
_write_c('\'');
|
||||
_write_c('\n');
|
||||
|
||||
_advance_token(2);
|
||||
|
||||
end;
|
||||
|
||||
proc _compile_variable_expression();
|
||||
begin
|
||||
la a0, asm_lw
|
||||
_write_z();
|
||||
|
||||
la a0, asm_a0
|
||||
_write_z();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
_advance_token(1);
|
||||
_read_token();
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
la a0, asm_sp
|
||||
_write_z();
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
end;
|
||||
|
||||
proc _compile_expression();
|
||||
begin
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb a0, (t0)
|
||||
|
||||
li t1, '\''
|
||||
beq a0, t1, .compile_expression_character_literal
|
||||
|
||||
li t1, 'v'
|
||||
beq a0, t1, .compile_expression_variable
|
||||
|
||||
_is_digit();
|
||||
bnez a0, .compile_expression_integer_literal
|
||||
|
||||
goto .compile_expression_end;
|
||||
|
||||
.compile_expression_character_literal:
|
||||
_compile_character_literal();
|
||||
goto .compile_expression_end;
|
||||
|
||||
.compile_expression_integer_literal:
|
||||
_compile_integer_literal();
|
||||
goto .compile_expression_end;
|
||||
|
||||
.compile_expression_variable:
|
||||
_compile_variable_expression();
|
||||
goto .compile_expression_end;;
|
||||
|
||||
.compile_expression_end:
|
||||
end;
|
||||
|
||||
proc _compile_call();
|
||||
begin
|
||||
# Stack variables:
|
||||
# v0 - Procedure name length.
|
||||
# v4 - Procedure name pointer.
|
||||
# v8 - Argument count.
|
||||
|
||||
_read_token();
|
||||
sw a0, 0(sp)
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
sw t0, 4(sp)
|
||||
|
||||
sw zero, 8(sp)
|
||||
|
||||
# Skip the identifier and left paren.
|
||||
addi a0, a0, 1
|
||||
_advance_token();
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, ')'
|
||||
beq t0, t1, .compile_call_finalize
|
||||
|
||||
.compile_call_loop:
|
||||
_compile_expression();
|
||||
|
||||
# Save the argument on the stack.
|
||||
la a0, asm_sw
|
||||
_write_z();
|
||||
|
||||
la a0, asm_a0
|
||||
_write_z();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
# Calculate the stack offset: 20 - (4 * argument_counter)
|
||||
lw t0, 8(sp)
|
||||
li t1, 4
|
||||
mul t0, t0, t1
|
||||
li t1, 20
|
||||
sub a0, t1, t0
|
||||
_write_i();
|
||||
|
||||
la a0, asm_sp
|
||||
_write_z();
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
# Add one to the argument counter.
|
||||
lw t0, 8(sp)
|
||||
addi t0, t0, 1
|
||||
sw t0, 8(sp)
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, ','
|
||||
bne t0, t1, .compile_call_finalize
|
||||
|
||||
_advance_token(2);
|
||||
goto .compile_call_loop;
|
||||
|
||||
.compile_call_finalize:
|
||||
# Load the argument from the stack.
|
||||
|
||||
lw t0, 8(sp)
|
||||
beqz t0, .compile_call_end
|
||||
|
||||
# Decrement the argument counter.
|
||||
lw t0, 8(sp)
|
||||
addi t0, t0, -1
|
||||
sw t0, 8(sp)
|
||||
|
||||
la a0, asm_lw
|
||||
_write_z();
|
||||
|
||||
_write_c('a');
|
||||
lw a0, 8(sp)
|
||||
_write_i();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
# Calculate the stack offset: 20 - (4 * argument_counter)
|
||||
lw t0, 8(sp)
|
||||
li t1, 4
|
||||
mul t0, t0, t1
|
||||
li t1, 20
|
||||
sub a0, t1, t0
|
||||
_write_i();
|
||||
|
||||
la a0, asm_sp
|
||||
_write_z();
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
goto .compile_call_finalize;
|
||||
|
||||
.compile_call_end:
|
||||
la a0, asm_call
|
||||
_write_z();
|
||||
|
||||
_write_s(v04, v00);
|
||||
|
||||
# Skip the right paren.
|
||||
_advance_token(1);
|
||||
end;
|
||||
|
||||
proc _compile_goto();
|
||||
begin
|
||||
_advance_token(5);
|
||||
|
||||
_read_token();
|
||||
sw a0, 0(sp)
|
||||
|
||||
la a0, asm_j
|
||||
_write_z();
|
||||
|
||||
_write_token(v00);
|
||||
_advance_token();
|
||||
end;
|
||||
|
||||
proc _compile_statement();
|
||||
begin
|
||||
# This is a call if the statement starts with an underscore.
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
# First character after alignment tab.
|
||||
addi t0, t0, 1
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, '_'
|
||||
beq t0, t1, .compile_statement_call
|
||||
|
||||
li t1, 'g'
|
||||
beq t0, t1, .compile_statement_goto
|
||||
|
||||
_compile_line();
|
||||
goto .compile_statement_end;
|
||||
|
||||
.compile_statement_call:
|
||||
_advance_token(1);
|
||||
_compile_call();
|
||||
|
||||
goto .compile_statement_semicolon;
|
||||
|
||||
.compile_statement_goto:
|
||||
_advance_token(1);
|
||||
_compile_goto();
|
||||
|
||||
goto .compile_statement_semicolon;
|
||||
|
||||
.compile_statement_semicolon:
|
||||
_advance_token(2);
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
.compile_statement_end:
|
||||
end;
|
||||
|
||||
proc _compile_procedure_body();
|
||||
begin
|
||||
.compile_procedure_body_loop:
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_end
|
||||
li a2, 3 # "end" length.
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_procedure_body_epilogue
|
||||
|
||||
_compile_statement();
|
||||
goto .compile_procedure_body_loop;
|
||||
|
||||
.compile_procedure_body_epilogue:
|
||||
end;
|
||||
|
||||
proc _compile_procedure();
|
||||
begin
|
||||
# Skip "proc ".
|
||||
_advance_token(5);
|
||||
|
||||
_read_token();
|
||||
sw a0, 0(sp) # Save the procedure name length.
|
||||
|
||||
# Write .type _procedure_name, @function.
|
||||
la a0, asm_type_directive
|
||||
_write_z();
|
||||
|
||||
_write_token(v00);
|
||||
|
||||
la a0, asm_type_function
|
||||
_write_z();
|
||||
|
||||
# Write procedure label, _procedure_name:
|
||||
_write_token(v00);
|
||||
|
||||
la a0, asm_colon
|
||||
_write_z();
|
||||
|
||||
# Skip the function name and trailing parens, semicolon, "begin" and newline.
|
||||
lw a0, 0(sp)
|
||||
addi a0, a0, 10
|
||||
_advance_token();
|
||||
|
||||
la a0, asm_prologue
|
||||
_write_z();
|
||||
|
||||
_compile_procedure_body();
|
||||
|
||||
# Write the epilogue.
|
||||
la a0, asm_epilogue
|
||||
_write_z();
|
||||
|
||||
# Skip the "end" keyword, semicolon and newline.
|
||||
_advance_token(5);
|
||||
end;
|
||||
|
||||
proc _compile_type();
|
||||
begin
|
||||
# Print and skip the ".type" (5 characters) directive and a space after it.
|
||||
_write_token(6);
|
||||
_advance_token();
|
||||
|
||||
# Read and print the symbol name.
|
||||
_read_token();
|
||||
|
||||
# Print and skip the symbol name, comma, space and @.
|
||||
addi a0, a0, 3
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
# Read the symbol type.
|
||||
_read_token();
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
sw t0, 12(sp)
|
||||
|
||||
# Print the symbol type and newline.
|
||||
addi a0, a0, 1
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
# Write the object definition itself.
|
||||
_compile_line();
|
||||
|
||||
.compile_type_end:
|
||||
end;
|
||||
|
||||
proc _skip_newlines();
|
||||
begin
|
||||
# Skip newlines.
|
||||
la t0, source_code_position
|
||||
lw t1, (t0)
|
||||
|
||||
.skip_newlines_loop:
|
||||
lb t2, (t1)
|
||||
li t3, '\n'
|
||||
bne t2, t3, .skip_newlines_end
|
||||
beqz t2, .skip_newlines_end
|
||||
|
||||
addi t1, t1, 1
|
||||
sw t1, (t0)
|
||||
|
||||
goto .skip_newlines_loop;
|
||||
|
||||
.skip_newlines_end:
|
||||
end;
|
||||
|
||||
# Process the source code and print the generated code.
|
||||
proc _compile();
|
||||
begin
|
||||
.compile_loop:
|
||||
_skip_newlines();
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
beqz t0, .compile_end
|
||||
li t1, '#'
|
||||
beq t0, t1, .compile_comment
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_section
|
||||
li a2, 8 # ".section" length.
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_section
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_type
|
||||
li a2, 5 # ".type" length.
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_type
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_proc
|
||||
li a2, 5 # "proc " length. Space is needed to distinguish from "procedure".
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_procedure
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_global
|
||||
li a2, 6 # ".globl" length.
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_global
|
||||
# Not a known token, exit.
|
||||
goto .compile_end;
|
||||
|
||||
.compile_section:
|
||||
_compile_section();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_type:
|
||||
_compile_type();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_global:
|
||||
_compile_line();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_comment:
|
||||
_skip_comment();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_procedure:
|
||||
_compile_procedure();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_end:
|
||||
end;
|
||||
|
||||
# Terminates the program. a0 contains the return code.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Status code.
|
||||
proc _exit();
|
||||
begin
|
||||
li a7, 93 # SYS_EXIT
|
||||
ecall
|
||||
end;
|
||||
|
||||
# Entry point.
|
||||
.globl _start
|
||||
proc _start();
|
||||
begin
|
||||
# Read the source from the standard input.
|
||||
la a0, source_code
|
||||
li a1, 81920 # Buffer size.
|
||||
_read_file();
|
||||
_compile();
|
||||
|
||||
_exit(0);
|
||||
|
||||
end;
|
969
boot/stage5.elna
Normal file
969
boot/stage5.elna
Normal file
@@ -0,0 +1,969 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
# obtain one at https://mozilla.org/MPL/2.0/.
|
||||
|
||||
# Stage3 compiler.
|
||||
#
|
||||
# - Procedures without none or one argument.
|
||||
# - Goto statements.
|
||||
# - Character and integer literals.
|
||||
# - Passing local variables to procedures.
|
||||
# - Local variables should have the format: v00,
|
||||
# where 00 is its offset from the sp register.
|
||||
|
||||
.section .rodata
|
||||
|
||||
.type keyword_section, @object
|
||||
keyword_section: .ascii ".section"
|
||||
|
||||
.type keyword_type, @object
|
||||
keyword_type: .ascii ".type"
|
||||
|
||||
.type keyword_ret, @object
|
||||
keyword_ret: .ascii "ret"
|
||||
|
||||
.type keyword_global, @object
|
||||
keyword_global: .ascii ".globl"
|
||||
|
||||
.type keyword_proc, @object
|
||||
keyword_proc: .ascii "proc "
|
||||
|
||||
.type keyword_end, @object
|
||||
keyword_end: .ascii "end"
|
||||
|
||||
.type keyword_begin, @object
|
||||
keyword_begin: .ascii "begin"
|
||||
|
||||
.type keyword_var, @object
|
||||
keyword_var: .ascii "var"
|
||||
|
||||
.type asm_prologue, @object
|
||||
asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n"
|
||||
|
||||
.type asm_epilogue, @object
|
||||
asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n"
|
||||
|
||||
.type asm_type_directive, @object
|
||||
asm_type_directive: .string ".type "
|
||||
|
||||
.type asm_type_function, @object
|
||||
asm_type_function: .string ", @function\n"
|
||||
|
||||
.type asm_colon, @object
|
||||
asm_colon: .string ":\n"
|
||||
|
||||
.type asm_call, @object
|
||||
asm_call: .string "\tcall "
|
||||
|
||||
.type asm_j, @object
|
||||
asm_j: .string "\tj "
|
||||
|
||||
.type asm_li, @object
|
||||
asm_li: .string "\tli "
|
||||
|
||||
.type asm_lw, @object
|
||||
asm_lw: .string "\tlw "
|
||||
|
||||
.type asm_sw, @object
|
||||
asm_sw: .string "\tsw "
|
||||
|
||||
.type asm_mv, @object
|
||||
asm_mv: .string "mv "
|
||||
|
||||
.type asm_t0, @object
|
||||
asm_t0: .string "t0"
|
||||
|
||||
.type asm_a0, @object
|
||||
asm_a0: .string "a0"
|
||||
|
||||
.type asm_comma, @object
|
||||
asm_comma: .string ", "
|
||||
|
||||
.type asm_sp, @object
|
||||
asm_sp: .string "(sp)"
|
||||
|
||||
.section .bss
|
||||
|
||||
# When modifiying also change the read size in the entry point procedure.
|
||||
.type source_code, @object
|
||||
source_code: .zero 81920
|
||||
|
||||
.section .data
|
||||
|
||||
.type source_code_position, @object
|
||||
source_code_position: .word source_code
|
||||
|
||||
.section .text
|
||||
|
||||
# Reads standard input into a buffer.
|
||||
# a0 - Buffer pointer.
|
||||
# a1 - Buffer size.
|
||||
#
|
||||
# Returns the amount of bytes written in a0.
|
||||
proc _read_file();
|
||||
begin
|
||||
mv a2, a1
|
||||
mv a1, a0
|
||||
# STDIN.
|
||||
li a0, 0
|
||||
li a7, 63 # SYS_READ.
|
||||
ecall
|
||||
end;
|
||||
|
||||
# Writes to the standard output.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Buffer.
|
||||
# a1 - Buffer length.
|
||||
proc _write_s();
|
||||
begin
|
||||
mv a2, a1
|
||||
mv a1, a0
|
||||
# STDOUT.
|
||||
li a0, 1
|
||||
li a7, 64 # SYS_WRITE.
|
||||
ecall
|
||||
end;
|
||||
|
||||
# Writes a number to a string buffer.
|
||||
#
|
||||
# t0 - Local buffer.
|
||||
# t1 - Constant 10.
|
||||
# t2 - Current character.
|
||||
# t3 - Whether the number is negative.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Whole number.
|
||||
# a1 - Buffer pointer.
|
||||
#
|
||||
# Sets a0 to the length of the written number.
|
||||
proc _print_i();
|
||||
begin
|
||||
li t1, 10
|
||||
addi t0, s0, -9
|
||||
|
||||
li t3, 0
|
||||
bgez a0, .print_i_digit10
|
||||
li t3, 1
|
||||
neg a0, a0
|
||||
|
||||
.print_i_digit10:
|
||||
rem t2, a0, t1
|
||||
addi t2, t2, '0'
|
||||
sb t2, 0(t0)
|
||||
div a0, a0, t1
|
||||
addi t0, t0, -1
|
||||
bne zero, a0, .print_i_digit10
|
||||
|
||||
beq zero, t3, .print_i_write_call
|
||||
addi t2, zero, '-'
|
||||
sb t2, 0(t0)
|
||||
addi t0, t0, -1
|
||||
|
||||
.print_i_write_call:
|
||||
mv a0, a1
|
||||
addi a1, t0, 1
|
||||
sub a2, s0, t0
|
||||
addi a2, a2, -9
|
||||
sw a2, 0(sp)
|
||||
|
||||
_memcpy();
|
||||
|
||||
lw a0, 0(sp)
|
||||
end;
|
||||
|
||||
# Writes a number to the standard output.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Whole number.
|
||||
proc _write_i();
|
||||
begin
|
||||
addi a1, sp, 0
|
||||
_print_i();
|
||||
|
||||
mv a1, a0
|
||||
addi a0, sp, 0
|
||||
_write_s();
|
||||
|
||||
end;
|
||||
|
||||
# Writes a character from a0 into the standard output.
|
||||
proc _write_c();
|
||||
begin
|
||||
sb a0, 0(sp)
|
||||
addi a0, sp, 0
|
||||
li a1, 1
|
||||
_write_s();
|
||||
end;
|
||||
|
||||
# Write null terminated string.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - String.
|
||||
proc _write_z();
|
||||
begin
|
||||
sw a0, 0(sp)
|
||||
|
||||
.write_z_loop:
|
||||
# Check for 0 character.
|
||||
lb a0, (a0)
|
||||
beqz a0, .write_z_end
|
||||
|
||||
# Print a character.
|
||||
lw a0, 0(sp)
|
||||
lb a0, (a0)
|
||||
_write_c();
|
||||
|
||||
# Advance the input string by one byte.
|
||||
lw a0, 0(sp)
|
||||
addi a0, a0, 1
|
||||
sw a0, 0(sp)
|
||||
|
||||
goto .write_z_loop;
|
||||
|
||||
.write_z_end:
|
||||
end;
|
||||
|
||||
# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
|
||||
proc _is_upper();
|
||||
begin
|
||||
li t0, 'A' - 1
|
||||
sltu t1, t0, a0 # t1 = a0 >= 'A'
|
||||
|
||||
sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
|
||||
and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
|
||||
end;
|
||||
|
||||
# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
|
||||
proc _is_lower();
|
||||
begin
|
||||
li t0, 'a' - 1
|
||||
sltu t2, t0, a0 # t2 = a0 >= 'a'
|
||||
|
||||
sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
|
||||
and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
|
||||
end;
|
||||
|
||||
# Detects if the passed character is a 7-bit alpha character or an underscore.
|
||||
#
|
||||
# Paramters:
|
||||
# a0 - Tested character.
|
||||
#
|
||||
# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
|
||||
proc _is_alpha();
|
||||
begin
|
||||
sw a0, 0(sp)
|
||||
|
||||
_is_upper();
|
||||
sw a0, 4(sp)
|
||||
|
||||
_is_lower(v00);
|
||||
|
||||
lw t0, 0(sp)
|
||||
xori t1, t0, '_'
|
||||
seqz t1, t1
|
||||
|
||||
lw t0, 4(sp)
|
||||
or a0, a0, t0
|
||||
or a0, a0, t1
|
||||
end;
|
||||
|
||||
# Detects whether the passed character is a digit
|
||||
# (a value between 0 and 9).
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Exemined value.
|
||||
#
|
||||
# Sets a0 to 1 if it is a digit, to 0 otherwise.
|
||||
proc _is_digit();
|
||||
begin
|
||||
li t0, '0' - 1
|
||||
sltu t1, t0, a0 # t1 = a0 >= '0'
|
||||
|
||||
sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
|
||||
|
||||
and a0, t1, t2
|
||||
end;
|
||||
|
||||
proc _is_alnum();
|
||||
begin
|
||||
sw a0, 4(sp)
|
||||
|
||||
_is_alpha();
|
||||
sw a0, 0(sp)
|
||||
|
||||
_is_digit(v04);
|
||||
|
||||
lw a1, 0(sp)
|
||||
or a0, a0, a1
|
||||
end;
|
||||
|
||||
# Reads the next token.
|
||||
#
|
||||
# Returns token length in a0.
|
||||
proc _read_token();
|
||||
begin
|
||||
la t0, source_code_position # Token pointer.
|
||||
lw t0, (t0)
|
||||
sw t0, 0(sp) # Current token position.
|
||||
sw zero, 4(sp) # Token length.
|
||||
|
||||
.read_token_loop:
|
||||
lb t0, (t0) # Current character.
|
||||
|
||||
# First we try to read a derictive.
|
||||
# A derictive can contain a dot and characters.
|
||||
li t1, '.'
|
||||
beq t0, t1, .read_token_next
|
||||
|
||||
lw a0, 0(sp)
|
||||
lb a0, (a0)
|
||||
_is_alnum();
|
||||
bnez a0, .read_token_next
|
||||
|
||||
goto .read_token_end;
|
||||
|
||||
.read_token_next:
|
||||
# Advance the source code position and token length.
|
||||
lw t0, 4(sp)
|
||||
addi t0, t0, 1
|
||||
sw t0, 4(sp)
|
||||
|
||||
lw t0, 0(sp)
|
||||
addi t0, t0, 1
|
||||
sw t0, 0(sp)
|
||||
|
||||
goto .read_token_loop;
|
||||
|
||||
.read_token_end:
|
||||
lw a0, 4(sp)
|
||||
end;
|
||||
|
||||
# a0 - First pointer.
|
||||
# a1 - Second pointer.
|
||||
# a2 - The length to compare.
|
||||
#
|
||||
# Returns 0 in a0 if memory regions are equal.
|
||||
proc _memcmp();
|
||||
begin
|
||||
mv t0, a0
|
||||
li a0, 0
|
||||
|
||||
.memcmp_loop:
|
||||
beqz a2, .memcmp_end
|
||||
|
||||
lbu t1, (t0)
|
||||
lbu t2, (a1)
|
||||
sub a0, t1, t2
|
||||
|
||||
bnez a0, .memcmp_end
|
||||
|
||||
addi t0, t0, 1
|
||||
addi a1, a1, 1
|
||||
addi a2, a2, -1
|
||||
|
||||
goto .memcmp_loop;
|
||||
|
||||
.memcmp_end:
|
||||
end;
|
||||
|
||||
# Copies memory.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Destination.
|
||||
# a1 - Source.
|
||||
# a2 - Size.
|
||||
#
|
||||
# Preserves a0.
|
||||
proc _memcpy();
|
||||
begin
|
||||
mv t0, a0
|
||||
|
||||
.memcpy_loop:
|
||||
beqz a2, .memcpy_end
|
||||
|
||||
lbu t1, (a1)
|
||||
sb t1, (a0)
|
||||
|
||||
addi a0, a0, 1
|
||||
addi a1, a1, 1
|
||||
addi a2, a2, -1
|
||||
|
||||
goto .memcpy_loop
|
||||
|
||||
.memcpy_end:
|
||||
mv a0, t0
|
||||
end;
|
||||
|
||||
# Advances the token stream by a0 bytes.
|
||||
proc _advance_token();
|
||||
begin
|
||||
la t0, source_code_position
|
||||
lw t1, (t0)
|
||||
add t1, t1, a0
|
||||
sw t1, (t0)
|
||||
end;
|
||||
|
||||
# Prints the current token.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Token length.
|
||||
#
|
||||
# Returns a0 unchanged.
|
||||
proc _write_token();
|
||||
begin
|
||||
sw a0, 0(sp)
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
lw a1, 0(sp)
|
||||
_write_s();
|
||||
|
||||
lw a0, 0(sp)
|
||||
end;
|
||||
|
||||
proc _compile_section();
|
||||
begin
|
||||
# Print and skip the ".section" (8 characters) directive and a space after it.
|
||||
_write_token(9);
|
||||
_advance_token();
|
||||
|
||||
# Read the section name.
|
||||
_read_token();
|
||||
addi a0, a0, 1
|
||||
|
||||
_write_token();
|
||||
_advance_token();
|
||||
end;
|
||||
|
||||
# Prints and skips a line.
|
||||
proc _skip_comment();
|
||||
begin
|
||||
la t0, source_code_position
|
||||
lw t1, (t0)
|
||||
|
||||
.skip_comment_loop:
|
||||
# Check for newline character.
|
||||
lb t2, (t1)
|
||||
li t3, '\n'
|
||||
beq t2, t3, .skip_comment_end
|
||||
|
||||
# Advance the input string by one byte.
|
||||
addi t1, t1, 1
|
||||
sw t1, (t0)
|
||||
|
||||
goto .skip_comment_loop;
|
||||
|
||||
.skip_comment_end:
|
||||
# Skip the newline.
|
||||
addi t1, t1, 1
|
||||
sw t1, (t0)
|
||||
end;
|
||||
|
||||
# Prints and skips a line.
|
||||
proc _compile_line();
|
||||
begin
|
||||
.compile_line_loop:
|
||||
la a0, source_code_position
|
||||
lw a1, (a0)
|
||||
|
||||
lb t0, (a1)
|
||||
li t1, '\n'
|
||||
beq t0, t1, .compile_line_end
|
||||
|
||||
# Print a character.
|
||||
lw a0, (a1)
|
||||
_write_c();
|
||||
|
||||
# Advance the input string by one byte.
|
||||
_advance_token(1);
|
||||
|
||||
goto .compile_line_loop;
|
||||
|
||||
.compile_line_end:
|
||||
_write_c('\n');
|
||||
|
||||
_advance_token(1);
|
||||
end;
|
||||
|
||||
proc _compile_integer_literal();
|
||||
begin
|
||||
la a0, asm_li
|
||||
_write_z();
|
||||
|
||||
la a0, asm_a0
|
||||
_write_z();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
_read_token();
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
_write_c('\n');
|
||||
end;
|
||||
|
||||
proc _compile_character_literal();
|
||||
begin
|
||||
la a0, asm_li
|
||||
_write_z();
|
||||
|
||||
la a0, asm_a0
|
||||
_write_z();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
_write_c('\'');
|
||||
_advance_token(1);
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb a0, (t0)
|
||||
li t1, '\\'
|
||||
bne a0, t1, .compile_character_literal_end
|
||||
|
||||
_write_c('\\');
|
||||
_advance_token(1);
|
||||
|
||||
.compile_character_literal_end:
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb a0, (t0)
|
||||
_write_c();
|
||||
|
||||
_write_c('\'');
|
||||
_write_c('\n');
|
||||
|
||||
_advance_token(2);
|
||||
|
||||
end;
|
||||
|
||||
proc _compile_variable_expression();
|
||||
begin
|
||||
la a0, asm_lw
|
||||
_write_z();
|
||||
|
||||
la a0, asm_a0
|
||||
_write_z();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
_advance_token(1);
|
||||
_read_token();
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
la a0, asm_sp
|
||||
_write_z();
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
end;
|
||||
|
||||
proc _compile_expression();
|
||||
begin
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb a0, (t0)
|
||||
|
||||
li t1, '\''
|
||||
beq a0, t1, .compile_expression_character_literal
|
||||
|
||||
li t1, 'v'
|
||||
beq a0, t1, .compile_expression_variable
|
||||
|
||||
_is_digit();
|
||||
bnez a0, .compile_expression_integer_literal
|
||||
|
||||
goto .compile_expression_end;
|
||||
|
||||
.compile_expression_character_literal:
|
||||
_compile_character_literal();
|
||||
goto .compile_expression_end;
|
||||
|
||||
.compile_expression_integer_literal:
|
||||
_compile_integer_literal();
|
||||
goto .compile_expression_end;
|
||||
|
||||
.compile_expression_variable:
|
||||
_compile_variable_expression();
|
||||
goto .compile_expression_end;;
|
||||
|
||||
.compile_expression_end:
|
||||
end;
|
||||
|
||||
proc _compile_call();
|
||||
begin
|
||||
# Stack variables:
|
||||
# v0 - Procedure name length.
|
||||
# v4 - Procedure name pointer.
|
||||
# v8 - Argument count.
|
||||
|
||||
_read_token();
|
||||
sw a0, 0(sp)
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
sw t0, 4(sp)
|
||||
|
||||
sw zero, 8(sp)
|
||||
|
||||
# Skip the identifier and left paren.
|
||||
addi a0, a0, 1
|
||||
_advance_token();
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, ')'
|
||||
beq t0, t1, .compile_call_finalize
|
||||
|
||||
.compile_call_loop:
|
||||
_compile_expression();
|
||||
|
||||
# Save the argument on the stack.
|
||||
la a0, asm_sw
|
||||
_write_z();
|
||||
|
||||
la a0, asm_a0
|
||||
_write_z();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
# Calculate the stack offset: 20 - (4 * argument_counter)
|
||||
lw t0, 8(sp)
|
||||
li t1, 4
|
||||
mul t0, t0, t1
|
||||
li t1, 20
|
||||
sub a0, t1, t0
|
||||
_write_i();
|
||||
|
||||
la a0, asm_sp
|
||||
_write_z();
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
# Add one to the argument counter.
|
||||
lw t0, 8(sp)
|
||||
addi t0, t0, 1
|
||||
sw t0, 8(sp)
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, ','
|
||||
bne t0, t1, .compile_call_finalize
|
||||
|
||||
_advance_token(2);
|
||||
goto .compile_call_loop;
|
||||
|
||||
.compile_call_finalize:
|
||||
# Load the argument from the stack.
|
||||
|
||||
lw t0, 8(sp)
|
||||
beqz t0, .compile_call_end
|
||||
|
||||
# Decrement the argument counter.
|
||||
lw t0, 8(sp)
|
||||
addi t0, t0, -1
|
||||
sw t0, 8(sp)
|
||||
|
||||
la a0, asm_lw
|
||||
_write_z();
|
||||
|
||||
_write_c('a');
|
||||
lw a0, 8(sp)
|
||||
_write_i();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
# Calculate the stack offset: 20 - (4 * argument_counter)
|
||||
lw t0, 8(sp)
|
||||
li t1, 4
|
||||
mul t0, t0, t1
|
||||
li t1, 20
|
||||
sub a0, t1, t0
|
||||
_write_i();
|
||||
|
||||
la a0, asm_sp
|
||||
_write_z();
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
goto .compile_call_finalize;
|
||||
|
||||
.compile_call_end:
|
||||
la a0, asm_call
|
||||
_write_z();
|
||||
|
||||
_write_s(v04, v00);
|
||||
|
||||
# Skip the right paren.
|
||||
_advance_token(1);
|
||||
end;
|
||||
|
||||
proc _compile_goto();
|
||||
begin
|
||||
_advance_token(5);
|
||||
|
||||
_read_token();
|
||||
sw a0, 0(sp)
|
||||
|
||||
la a0, asm_j
|
||||
_write_z();
|
||||
|
||||
_write_token(v00);
|
||||
_advance_token();
|
||||
end;
|
||||
|
||||
proc _compile_statement();
|
||||
begin
|
||||
# This is a call if the statement starts with an underscore.
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
# First character after alignment tab.
|
||||
addi t0, t0, 1
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, '_'
|
||||
beq t0, t1, .compile_statement_call
|
||||
|
||||
li t1, 'g'
|
||||
beq t0, t1, .compile_statement_goto
|
||||
|
||||
_compile_line();
|
||||
goto .compile_statement_end;
|
||||
|
||||
.compile_statement_call:
|
||||
_advance_token(1);
|
||||
_compile_call();
|
||||
|
||||
goto .compile_statement_semicolon;
|
||||
|
||||
.compile_statement_goto:
|
||||
_advance_token(1);
|
||||
_compile_goto();
|
||||
|
||||
goto .compile_statement_semicolon;
|
||||
|
||||
.compile_statement_semicolon:
|
||||
_advance_token(2);
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
.compile_statement_end:
|
||||
end;
|
||||
|
||||
proc _compile_procedure_body();
|
||||
begin
|
||||
.compile_procedure_body_loop:
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_end
|
||||
li a2, 3 # "end" length.
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_procedure_body_epilogue
|
||||
|
||||
_compile_statement();
|
||||
goto .compile_procedure_body_loop;
|
||||
|
||||
.compile_procedure_body_epilogue:
|
||||
end;
|
||||
|
||||
proc _compile_procedure();
|
||||
begin
|
||||
# Skip "proc ".
|
||||
_advance_token(5);
|
||||
|
||||
_read_token();
|
||||
sw a0, 0(sp) # Save the procedure name length.
|
||||
|
||||
# Write .type _procedure_name, @function.
|
||||
la a0, asm_type_directive
|
||||
_write_z();
|
||||
|
||||
_write_token(v00);
|
||||
|
||||
la a0, asm_type_function
|
||||
_write_z();
|
||||
|
||||
# Write procedure label, _procedure_name:
|
||||
_write_token(v00);
|
||||
|
||||
la a0, asm_colon
|
||||
_write_z();
|
||||
|
||||
# Skip the function name and trailing parens, semicolon, "begin" and newline.
|
||||
lw a0, 0(sp)
|
||||
addi a0, a0, 10
|
||||
_advance_token();
|
||||
|
||||
la a0, asm_prologue
|
||||
_write_z();
|
||||
|
||||
_compile_procedure_body();
|
||||
|
||||
# Write the epilogue.
|
||||
la a0, asm_epilogue
|
||||
_write_z();
|
||||
|
||||
# Skip the "end" keyword, semicolon and newline.
|
||||
_advance_token(5);
|
||||
end;
|
||||
|
||||
proc _compile_type();
|
||||
begin
|
||||
# Print and skip the ".type" (5 characters) directive and a space after it.
|
||||
_write_token(6);
|
||||
_advance_token();
|
||||
|
||||
# Read and print the symbol name.
|
||||
_read_token();
|
||||
|
||||
# Print and skip the symbol name, comma, space and @.
|
||||
addi a0, a0, 3
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
# Read the symbol type.
|
||||
_read_token();
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
sw t0, 12(sp)
|
||||
|
||||
# Print the symbol type and newline.
|
||||
addi a0, a0, 1
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
# Write the object definition itself.
|
||||
_compile_line();
|
||||
|
||||
.compile_type_end:
|
||||
end;
|
||||
|
||||
proc _skip_newlines();
|
||||
begin
|
||||
# Skip newlines.
|
||||
la t0, source_code_position
|
||||
lw t1, (t0)
|
||||
|
||||
.skip_newlines_loop:
|
||||
lb t2, (t1)
|
||||
li t3, '\n'
|
||||
bne t2, t3, .skip_newlines_end
|
||||
beqz t2, .skip_newlines_end
|
||||
|
||||
addi t1, t1, 1
|
||||
sw t1, (t0)
|
||||
|
||||
goto .skip_newlines_loop;
|
||||
|
||||
.skip_newlines_end:
|
||||
end;
|
||||
|
||||
# Process the source code and print the generated code.
|
||||
proc _compile();
|
||||
begin
|
||||
.compile_loop:
|
||||
_skip_newlines();
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
beqz t0, .compile_end
|
||||
li t1, '#'
|
||||
beq t0, t1, .compile_comment
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_section
|
||||
li a2, 8 # ".section" length.
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_section
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_type
|
||||
li a2, 5 # ".type" length.
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_type
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_proc
|
||||
li a2, 5 # "proc " length. Space is needed to distinguish from "procedure".
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_procedure
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_global
|
||||
li a2, 6 # ".globl" length.
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_global
|
||||
# Not a known token, exit.
|
||||
goto .compile_end;
|
||||
|
||||
.compile_section:
|
||||
_compile_section();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_type:
|
||||
_compile_type();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_global:
|
||||
_compile_line();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_comment:
|
||||
_skip_comment();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_procedure:
|
||||
_compile_procedure();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_end:
|
||||
end;
|
||||
|
||||
# Terminates the program. a0 contains the return code.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Status code.
|
||||
proc _exit();
|
||||
begin
|
||||
li a7, 93 # SYS_EXIT
|
||||
ecall
|
||||
end;
|
||||
|
||||
# Entry point.
|
||||
.globl _start
|
||||
proc _start();
|
||||
begin
|
||||
# Read the source from the standard input.
|
||||
la a0, source_code
|
||||
li a1, 81920 # Buffer size.
|
||||
_read_file();
|
||||
_compile();
|
||||
|
||||
_exit(0);
|
||||
|
||||
end;
|
@@ -1,14 +0,0 @@
|
||||
program
|
||||
|
||||
proc main(x: Word, y: Word)
|
||||
begin
|
||||
_write_s(4, @x);
|
||||
_write_s(4, @y);
|
||||
|
||||
y := 0x0a2c3063;
|
||||
_write_s(4, @y)
|
||||
end
|
||||
|
||||
begin
|
||||
main(0x0a2c3061, 0x0a2c3062)
|
||||
end.
|
616
boot/tokenizer.s
616
boot/tokenizer.s
@@ -1,616 +0,0 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
# obtain one at https://mozilla.org/MPL/2.0/.
|
||||
|
||||
.global lex_next, classification, transitions, keywords, byte_keywords
|
||||
|
||||
.include "boot/definitions.inc"
|
||||
|
||||
.section .rodata
|
||||
|
||||
#
|
||||
# Classification table assigns each possible character to a group (class). All
|
||||
# characters of the same group a handled equivalently.
|
||||
#
|
||||
# Classification:
|
||||
#
|
||||
.equ CLASS_INVALID, 0x00
|
||||
.equ CLASS_DIGIT, 0x01
|
||||
.equ CLASS_CHARACTER, 0x02
|
||||
.equ CLASS_SPACE, 0x03
|
||||
.equ CLASS_COLON, 0x04
|
||||
.equ CLASS_EQUALS, 0x05
|
||||
.equ CLASS_LEFT_PAREN, 0x06
|
||||
.equ CLASS_RIGHT_PAREN, 0x07
|
||||
.equ CLASS_ASTERISK, 0x08
|
||||
.equ CLASS_UNDERSCORE, 0x09
|
||||
.equ CLASS_SINGLE, 0x0a
|
||||
.equ CLASS_HEX, 0x0b
|
||||
.equ CLASS_ZERO, 0x0c
|
||||
.equ CLASS_X, 0x0d
|
||||
.equ CLASS_EOF, 0x0e
|
||||
.equ CLASS_DOT, 0x0f
|
||||
.equ CLASS_MINUS, 0x10
|
||||
.equ CLASS_QUOTE, 0x11
|
||||
.equ CLASS_GREATER, 0x12
|
||||
.equ CLASS_LESS, 0x13
|
||||
|
||||
.equ CLASS_COUNT, 20
|
||||
|
||||
.type classification, @object
|
||||
classification:
|
||||
.byte CLASS_EOF # 00 NUL
|
||||
.byte CLASS_INVALID # 01 SOH
|
||||
.byte CLASS_INVALID # 02 STX
|
||||
.byte CLASS_INVALID # 03 ETX
|
||||
.byte CLASS_INVALID # 04 EOT
|
||||
.byte CLASS_INVALID # 05 ENQ
|
||||
.byte CLASS_INVALID # 06 ACK
|
||||
.byte CLASS_INVALID # 07 BEL
|
||||
.byte CLASS_INVALID # 08 BS
|
||||
.byte CLASS_SPACE # 09 HT
|
||||
.byte CLASS_SPACE # 0A LF
|
||||
.byte CLASS_INVALID # 0B VT
|
||||
.byte CLASS_INVALID # 0C FF
|
||||
.byte CLASS_SPACE # 0D CR
|
||||
.byte CLASS_INVALID # 0E SO
|
||||
.byte CLASS_INVALID # 0F SI
|
||||
.byte CLASS_INVALID # 10 DLE
|
||||
.byte CLASS_INVALID # 11 DC1
|
||||
.byte CLASS_INVALID # 12 DC2
|
||||
.byte CLASS_INVALID # 13 DC3
|
||||
.byte CLASS_INVALID # 14 DC4
|
||||
.byte CLASS_INVALID # 15 NAK
|
||||
.byte CLASS_INVALID # 16 SYN
|
||||
.byte CLASS_INVALID # 17 ETB
|
||||
.byte CLASS_INVALID # 18 CAN
|
||||
.byte CLASS_INVALID # 19 EM
|
||||
.byte CLASS_INVALID # 1A SUB
|
||||
.byte CLASS_INVALID # 1B ESC
|
||||
.byte CLASS_INVALID # 1C FS
|
||||
.byte CLASS_INVALID # 1D GS
|
||||
.byte CLASS_INVALID # 1E RS
|
||||
.byte CLASS_INVALID # 1F US
|
||||
.byte CLASS_SPACE # 20 Space
|
||||
.byte CLASS_SINGLE # 21 !
|
||||
.byte CLASS_QUOTE # 22 "
|
||||
.byte 0x00 # 23 #
|
||||
.byte 0x00 # 24 $
|
||||
.byte CLASS_SINGLE # 25 %
|
||||
.byte CLASS_SINGLE # 26 &
|
||||
.byte CLASS_QUOTE # 27 '
|
||||
.byte CLASS_LEFT_PAREN # 28 (
|
||||
.byte CLASS_RIGHT_PAREN # 29 )
|
||||
.byte CLASS_ASTERISK # 2A *
|
||||
.byte CLASS_SINGLE # 2B +
|
||||
.byte CLASS_SINGLE # 2C ,
|
||||
.byte CLASS_MINUS # 2D -
|
||||
.byte CLASS_DOT # 2E .
|
||||
.byte CLASS_SINGLE # 2F /
|
||||
.byte CLASS_ZERO # 30 0
|
||||
.byte CLASS_DIGIT # 31 1
|
||||
.byte CLASS_DIGIT # 32 2
|
||||
.byte CLASS_DIGIT # 33 3
|
||||
.byte CLASS_DIGIT # 34 4
|
||||
.byte CLASS_DIGIT # 35 5
|
||||
.byte CLASS_DIGIT # 36 6
|
||||
.byte CLASS_DIGIT # 37 7
|
||||
.byte CLASS_DIGIT # 38 8
|
||||
.byte CLASS_DIGIT # 39 9
|
||||
.byte CLASS_COLON # 3A :
|
||||
.byte CLASS_SINGLE # 3B ;
|
||||
.byte CLASS_LESS # 3C <
|
||||
.byte CLASS_EQUALS # 3D =
|
||||
.byte CLASS_GREATER # 3E >
|
||||
.byte 0x00 # 3F ?
|
||||
.byte CLASS_SINGLE # 40 @
|
||||
.byte CLASS_CHARACTER # 41 A
|
||||
.byte CLASS_CHARACTER # 42 B
|
||||
.byte CLASS_CHARACTER # 43 C
|
||||
.byte CLASS_CHARACTER # 44 D
|
||||
.byte CLASS_CHARACTER # 45 E
|
||||
.byte CLASS_CHARACTER # 46 F
|
||||
.byte CLASS_CHARACTER # 47 G
|
||||
.byte CLASS_CHARACTER # 48 H
|
||||
.byte CLASS_CHARACTER # 49 I
|
||||
.byte CLASS_CHARACTER # 4A J
|
||||
.byte CLASS_CHARACTER # 4B K
|
||||
.byte CLASS_CHARACTER # 4C L
|
||||
.byte CLASS_CHARACTER # 4D M
|
||||
.byte CLASS_CHARACTER # 4E N
|
||||
.byte CLASS_CHARACTER # 4F O
|
||||
.byte CLASS_CHARACTER # 50 P
|
||||
.byte CLASS_CHARACTER # 51 Q
|
||||
.byte CLASS_CHARACTER # 52 R
|
||||
.byte CLASS_CHARACTER # 53 S
|
||||
.byte CLASS_CHARACTER # 54 T
|
||||
.byte CLASS_CHARACTER # 55 U
|
||||
.byte CLASS_CHARACTER # 56 V
|
||||
.byte CLASS_CHARACTER # 57 W
|
||||
.byte CLASS_CHARACTER # 58 X
|
||||
.byte CLASS_CHARACTER # 59 Y
|
||||
.byte CLASS_CHARACTER # 5A Z
|
||||
.byte CLASS_SINGLE # 5B [
|
||||
.byte 0x00 # 5C \
|
||||
.byte CLASS_SINGLE # 5D ]
|
||||
.byte CLASS_SINGLE # 5E ^
|
||||
.byte CLASS_UNDERSCORE # 5F _
|
||||
.byte 0x00 # 60 `
|
||||
.byte CLASS_HEX # 61 a
|
||||
.byte CLASS_HEX # 62 b
|
||||
.byte CLASS_HEX # 63 c
|
||||
.byte CLASS_HEX # 64 d
|
||||
.byte CLASS_HEX # 65 e
|
||||
.byte CLASS_HEX # 66 f
|
||||
.byte CLASS_CHARACTER # 67 g
|
||||
.byte CLASS_CHARACTER # 68 h
|
||||
.byte CLASS_CHARACTER # 69 i
|
||||
.byte CLASS_CHARACTER # 6A j
|
||||
.byte CLASS_CHARACTER # 6B k
|
||||
.byte CLASS_CHARACTER # 6C l
|
||||
.byte CLASS_CHARACTER # 6D m
|
||||
.byte CLASS_CHARACTER # 6E n
|
||||
.byte CLASS_CHARACTER # 6F o
|
||||
.byte CLASS_CHARACTER # 70 p
|
||||
.byte CLASS_CHARACTER # 71 q
|
||||
.byte CLASS_CHARACTER # 72 r
|
||||
.byte CLASS_CHARACTER # 73 s
|
||||
.byte CLASS_CHARACTER # 74 t
|
||||
.byte CLASS_CHARACTER # 75 u
|
||||
.byte CLASS_CHARACTER # 76 v
|
||||
.byte CLASS_CHARACTER # 77 w
|
||||
.byte CLASS_X # 78 x
|
||||
.byte CLASS_CHARACTER # 79 y
|
||||
.byte CLASS_CHARACTER # 7A z
|
||||
.byte 0x00 # 7B {
|
||||
.byte CLASS_SINGLE # 7C |
|
||||
.byte 0x00 # 7D }
|
||||
.byte CLASS_SINGLE # 7E ~
|
||||
.byte CLASS_INVALID # 7F DEL
|
||||
|
||||
#
|
||||
# Textual keywords in the language.
|
||||
#
|
||||
.equ KEYWORDS_COUNT, TOKEN_IDENTIFIER - 1
|
||||
|
||||
.type keywords, @object
|
||||
keywords:
|
||||
.word 7
|
||||
.ascii "program"
|
||||
.word 6
|
||||
.ascii "import"
|
||||
.word 5
|
||||
.ascii "const"
|
||||
.word 3
|
||||
.ascii "var"
|
||||
.word 2
|
||||
.ascii "if"
|
||||
.word 4
|
||||
.ascii "then"
|
||||
.word 5
|
||||
.ascii "elsif"
|
||||
.word 4
|
||||
.ascii "else"
|
||||
.word 5
|
||||
.ascii "while"
|
||||
.word 2
|
||||
.ascii "do"
|
||||
.word 4
|
||||
.ascii "proc"
|
||||
.word 5
|
||||
.ascii "begin"
|
||||
.word 3
|
||||
.ascii "end"
|
||||
.word 4
|
||||
.ascii "type"
|
||||
.word 6
|
||||
.ascii "record"
|
||||
.word 5
|
||||
.ascii "union"
|
||||
.word 4
|
||||
.ascii "true"
|
||||
.word 5
|
||||
.ascii "false"
|
||||
.word 3
|
||||
.ascii "nil"
|
||||
.word 3
|
||||
.ascii "xor"
|
||||
.word 2
|
||||
.ascii "or"
|
||||
.word 6
|
||||
.ascii "return"
|
||||
.word 4
|
||||
.ascii "cast"
|
||||
.word 4
|
||||
.ascii "goto"
|
||||
.word 4
|
||||
.ascii "case"
|
||||
.word 2
|
||||
.ascii "of"
|
||||
|
||||
.type byte_keywords, @object
|
||||
byte_keywords: .ascii "&.,:;()[]^=+-*@"
|
||||
.equ BYTE_KEYWORDS_SIZE, . - byte_keywords
|
||||
|
||||
.section .data
|
||||
|
||||
# The transition table describes transitions from one state to another, given
|
||||
# a symbol (character class).
|
||||
#
|
||||
# The table has m rows and n columns, where m is the amount of states and n is
|
||||
# the amount of classes. So given the current state and a classified character
|
||||
# the table can be used to look up the next state.
|
||||
#
|
||||
# Each cell is a word long.
|
||||
# - The least significant byte of the word is a row number (beginning with 0).
|
||||
# It specifies the target state. "ff" means that this is an end state and no
|
||||
# transition is possible.
|
||||
# - The next byte is the action that should be performed when transitioning.
|
||||
# For the meaning of actions see labels in the lex_next function, which
|
||||
# handles each action.
|
||||
#
|
||||
.type transitions, @object
|
||||
transitions:
|
||||
# Invalid Digit Alpha Space : = ( )
|
||||
# * _ Single Hex 0 x NUL .
|
||||
# - " or ' > <
|
||||
.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff
|
||||
.word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x06ff
|
||||
.word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x01 Colon
|
||||
|
||||
.word 0x05ff, 0x0102, 0x0102, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff
|
||||
.word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff
|
||||
.word 0x05ff, 0x05ff, 0x05ff, 0x05ff # 0x02 Identifier
|
||||
|
||||
.word 0x08ff, 0x0103, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
|
||||
.word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x0103, 0x00ff, 0x08ff, 0x08ff
|
||||
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x03 Decimal
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x04ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x04ff, 0x02ff # 0x04 Greater
|
||||
|
||||
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
|
||||
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
|
||||
.word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus
|
||||
|
||||
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
|
||||
.word 0x0109, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
|
||||
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff # 0x06 Left paren
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x04ff # 0x07 Less
|
||||
|
||||
.word 0x08ff, 0x0108, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
|
||||
.word 0x08ff, 0x00ff, 0x08ff, 0x0108, 0x0108, 0x00ff, 0x08ff, 0x08ff
|
||||
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x08 Hexadecimal after 0x.
|
||||
|
||||
.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
|
||||
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
|
||||
.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x09 Comment
|
||||
|
||||
.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff
|
||||
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
|
||||
.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x0a Closing comment
|
||||
|
||||
.word 0x00ff, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
|
||||
.word 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
|
||||
.word 0x010b, 0x04ff, 0x010b, 0x010b # 0x0b String
|
||||
|
||||
.word 0x08ff, 0x00ff, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
|
||||
.word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x00ff, 0x010d, 0x08ff, 0x08ff
|
||||
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x0c Leading zero
|
||||
|
||||
.word 0x00ff, 0x0108, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff
|
||||
.word 0x00ff, 0x00ff, 0x00ff, 0x0108, 0x0108, 0x00ff, 0x00ff, 0x00ff
|
||||
.word 0x00ff, 0x00ff, 0x00ff, 0x00ff # 0x0d Starting hexadecimal
|
||||
|
||||
.section .text
|
||||
|
||||
# Returns the class from the classification table for the given character.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Character.
|
||||
#
|
||||
# Sets a0 to the class number.
|
||||
.type classify, @function
|
||||
classify:
|
||||
la t0, classification
|
||||
add t0, t0, a0 # Character class pointer.
|
||||
lbu a0, (t0) # Character class.
|
||||
ret
|
||||
|
||||
# Given the current state and a character class, calculates the next state.
|
||||
|
||||
# Parameters:
|
||||
# a0 - Current state.
|
||||
# a1 - Character class.
|
||||
#
|
||||
# Sets a0 to the next state.
|
||||
.type lookup_state, @function
|
||||
lookup_state:
|
||||
li t0, CLASS_COUNT
|
||||
mul a0, a0, t0 # Transition row.
|
||||
add a0, a0, a1 # Transition column.
|
||||
|
||||
li t0, 4
|
||||
mul a0, a0, t0 # Multiply by the word size.
|
||||
|
||||
la t0, transitions
|
||||
add t0, t0, a0
|
||||
lw a0, (t0) # Next state.
|
||||
|
||||
ret
|
||||
|
||||
# Chains classify and lookup_state.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Current state.
|
||||
# a1 - Character.
|
||||
#
|
||||
# Sets a0 to the next state based on the given character.
|
||||
.type _next_state, @function
|
||||
_next_state:
|
||||
# Prologue.
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
sw a0, 4(sp)
|
||||
mv a0, a1
|
||||
call classify
|
||||
|
||||
mv a1, a0
|
||||
lw a0, 4(sp)
|
||||
call lookup_state
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
# Takes an identifier and checks whether it's a keyword.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Token length.
|
||||
# a1 - Token pointer.
|
||||
#
|
||||
# Sets a0 to the appropriate token type.
|
||||
.type classify_identifier, @function
|
||||
classify_identifier:
|
||||
# Prologue.
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
mv a2, a0
|
||||
mv a3, a1
|
||||
li a0, KEYWORDS_COUNT
|
||||
la a1, keywords
|
||||
call _strings_index
|
||||
|
||||
bnez a0, .Lclassify_identifier_end
|
||||
li a0, TOKEN_IDENTIFIER
|
||||
|
||||
.Lclassify_identifier_end:
|
||||
# Epilogue.
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
# Takes a symbol and determines its type.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Token character.
|
||||
#
|
||||
# Sets a0 to the appropriate token type.
|
||||
.type classify_single, @function
|
||||
classify_single:
|
||||
# Prologue.
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
mv a1, a0
|
||||
li a2, BYTE_KEYWORDS_SIZE
|
||||
la a0, byte_keywords
|
||||
call _memchr
|
||||
|
||||
la a1, byte_keywords
|
||||
sub a0, a0, a1
|
||||
addi a0, a0, TOKEN_IDENTIFIER + 1
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
# Classified a symbol containing multiple characters (probably 2).
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Token length.
|
||||
# a1 - Token pointer.
|
||||
#
|
||||
# Sets a0 to the appropriate token type.
|
||||
.type classify_composite, @function
|
||||
classify_composite:
|
||||
lbu t0, 0(a1)
|
||||
li t1, ':'
|
||||
beq t0, t1, .Lclassify_composite_assign
|
||||
|
||||
j .Lclassify_composite_end
|
||||
|
||||
.Lclassify_composite_assign:
|
||||
li a0, TOKEN_ASSIGN
|
||||
j .Lclassify_composite_end
|
||||
|
||||
.Lclassify_composite_end:
|
||||
ret
|
||||
|
||||
# Initializes the classification table.
|
||||
#
|
||||
# Paramaters:
|
||||
# a0 - Source text pointer.
|
||||
# a1 - A pointer for output value, the token kind. 4 Bytes.
|
||||
#
|
||||
# Sets a0 to the position of the next token.
|
||||
.type lex_next, @function
|
||||
lex_next:
|
||||
# Prologue.
|
||||
addi sp, sp, -32
|
||||
sw ra, 28(sp)
|
||||
sw s0, 24(sp)
|
||||
addi s0, sp, 32
|
||||
|
||||
sw s1, 20(sp) # Preserve s1 used for current source text position.
|
||||
mv s1, a0
|
||||
sw a0, 12(sp) # Keeps a pointer to the beginning of a token.
|
||||
# 4(sp) and 8(sp) are reserved for the kind and length of the token if needed.
|
||||
|
||||
sw s2, 16(sp) # Preserve s2 containing the current state.
|
||||
li s2, 0x00 # Initial, start state.
|
||||
|
||||
sw a1, 0(sp)
|
||||
sw zero, (a1) # Initialize.
|
||||
|
||||
.Llex_next_loop:
|
||||
mv a0, s2
|
||||
lbu a1, (s1)
|
||||
call _next_state
|
||||
|
||||
li t0, 0xff
|
||||
and s2, a0, t0 # Next state.
|
||||
|
||||
li t0, 0xff00
|
||||
and t1, a0, t0 # Transition action.
|
||||
srli t1, t1, 8
|
||||
|
||||
# Perform the provided action.
|
||||
li t0, 0x01 # Accumulate action.
|
||||
beq t1, t0, .Llex_next_accumulate
|
||||
|
||||
li t0, 0x02 # Print action.
|
||||
beq t1, t0, .Llex_next_print
|
||||
|
||||
li t0, 0x03 # Skip action.
|
||||
beq t1, t0, .Llex_next_skip
|
||||
|
||||
li t0, 0x04 # Delimited string action.
|
||||
beq t1, t0, .Llex_next_comment
|
||||
|
||||
li t0, 0x05 # Finalize identifier.
|
||||
beq t1, t0, .Llex_next_identifier
|
||||
|
||||
li t0, 0x06 # Single character symbol action.
|
||||
beq t1, t0, .Llex_next_single
|
||||
|
||||
li t0, 0x07 # An action for symbols containing multiple characters.
|
||||
beq t1, t0, .Llex_next_composite
|
||||
|
||||
li t0, 0x08 # Integer action.
|
||||
beq t1, t0, .Llex_next_integer
|
||||
|
||||
j .Llex_next_reject
|
||||
|
||||
.Llex_next_reject:
|
||||
addi s1, s1, 1
|
||||
|
||||
j .Llex_next_end
|
||||
|
||||
.Llex_next_accumulate:
|
||||
addi s1, s1, 1
|
||||
|
||||
j .Llex_next_loop
|
||||
|
||||
.Llex_next_skip:
|
||||
addi s1, s1, 1
|
||||
lw t0, 12(sp)
|
||||
addi t0, t0, 1
|
||||
sw t0, 12(sp)
|
||||
|
||||
j .Llex_next_loop
|
||||
|
||||
.Llex_next_print:
|
||||
/* DEBUG
|
||||
addi a0, a0, 21
|
||||
sw a0, 0(sp)
|
||||
addi a0, sp, 0
|
||||
li a1, 1
|
||||
call _write_error */
|
||||
|
||||
j .Llex_next_end
|
||||
|
||||
.Llex_next_comment:
|
||||
addi s1, s1, 1
|
||||
|
||||
j .Llex_next_end
|
||||
|
||||
.Llex_next_identifier:
|
||||
# An identifier can be a textual keyword.
|
||||
# Check the kind of the token and write it into the output parameter.
|
||||
lw a1, 12(sp)
|
||||
sub a0, s1, a1
|
||||
sw a0, 8(sp)
|
||||
call classify_identifier
|
||||
sw a0, 4(sp)
|
||||
lw a0, 0(sp)
|
||||
addi a1, sp, 4
|
||||
li a2, 12
|
||||
call _memcpy
|
||||
|
||||
j .Llex_next_end
|
||||
|
||||
.Llex_next_single:
|
||||
lw a0, 12(sp)
|
||||
addi s1, a0, 1
|
||||
lbu a0, (a0)
|
||||
call classify_single
|
||||
lw a1, 0(sp)
|
||||
sw a0, (a1)
|
||||
|
||||
j .Llex_next_end
|
||||
|
||||
.Llex_next_composite:
|
||||
addi s1, s1, 1
|
||||
lw a1, 12(sp)
|
||||
sub a0, s1, a1
|
||||
call classify_composite
|
||||
lw a1, 0(sp)
|
||||
sw a0, (a1)
|
||||
|
||||
j .Llex_next_end
|
||||
|
||||
.Llex_next_integer:
|
||||
lw t0, 0(sp)
|
||||
li t1, TOKEN_INTEGER
|
||||
sw t1, 0(t0)
|
||||
lw t1, 12(sp)
|
||||
sw t1, 8(t0)
|
||||
sub t1, s1, t1
|
||||
sw t1, 4(t0)
|
||||
|
||||
j .Llex_next_end
|
||||
|
||||
.Llex_next_end:
|
||||
mv a0, s1 # Return the advanced text pointer.
|
||||
|
||||
# Restore saved registers.
|
||||
lw s1, 20(sp)
|
||||
lw s2, 16(sp)
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 28(sp)
|
||||
lw s0, 24(sp)
|
||||
addi sp, sp, 32
|
||||
ret
|
@@ -1,61 +0,0 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
# obtain one at https://mozilla.org/MPL/2.0/. -}
|
||||
# frozen_string_literal: true
|
||||
|
||||
CROSS_GCC = 'build/rootfs/bin/riscv32-unknown-linux-gnu-gcc'
|
||||
SYSROOT = 'build/sysroot'
|
||||
QEMU = 'qemu-riscv32'
|
||||
|
||||
def assemble_stage(output, compiler, source)
|
||||
arguments = [QEMU, '-L', SYSROOT, *compiler]
|
||||
|
||||
puts Term::ANSIColor.green(arguments * ' ')
|
||||
puts
|
||||
Open3.popen2(*arguments) do |qemu_in, qemu_out|
|
||||
qemu_in.write File.read(*source)
|
||||
qemu_in.close
|
||||
|
||||
IO.copy_stream qemu_out, output
|
||||
qemu_out.close
|
||||
end
|
||||
end
|
||||
|
||||
library = []
|
||||
|
||||
Dir.glob('boot/*.s').each do |assembly_source|
|
||||
source_basename = Pathname.new(assembly_source).basename
|
||||
target_object = Pathname.new('build/boot') + source_basename.sub_ext('.o')
|
||||
|
||||
file target_object.to_s => [assembly_source, 'build/boot'] do |t|
|
||||
sh CROSS_GCC, '-c', '-o', t.name, assembly_source
|
||||
end
|
||||
library << assembly_source unless source_basename.to_s.start_with? 'stage'
|
||||
end
|
||||
|
||||
desc 'Initial stage'
|
||||
file 'build/boot/stage1' => ['build/boot/stage1.o', *library] do |t|
|
||||
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
|
||||
end
|
||||
|
||||
file 'build/boot/stage2a.s' => ['build/boot/stage1', 'boot/stage2.elna'] do |t|
|
||||
source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
|
||||
|
||||
File.open t.name, 'w' do |output|
|
||||
assemble_stage output, exe, source
|
||||
end
|
||||
end
|
||||
|
||||
['build/boot/stage2a', 'build/boot/stage2b'].each do |exe|
|
||||
file exe => [exe.ext('.s'), *library] do |t|
|
||||
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
|
||||
end
|
||||
end
|
||||
|
||||
file 'build/boot/stage2b.s' => ['build/boot/stage2a', 'boot/stage2.elna'] do |t|
|
||||
source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
|
||||
|
||||
File.open t.name, 'w' do |output|
|
||||
assemble_stage output, exe, source
|
||||
end
|
||||
end
|
Reference in New Issue
Block a user