summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Rakefile91
-rw-r--r--boot/common-boot.s268
-rw-r--r--boot/definitions.inc68
-rw-r--r--boot/stage1.s1850
-rw-r--r--boot/stage2.elna2056
-rw-r--r--boot/stage3.elna975
-rw-r--r--boot/stage4.elna969
-rw-r--r--boot/stage5.elna969
-rw-r--r--boot/test.elna14
-rw-r--r--boot/tokenizer.s616
-rw-r--r--rakelib/stage.rake61
11 files changed, 4438 insertions, 3499 deletions
diff --git a/Rakefile b/Rakefile
index 6b38038..3c9a245 100644
--- a/Rakefile
+++ b/Rakefile
@@ -5,34 +5,99 @@
require 'open3'
require 'rake/clean'
-require 'term/ansicolor'
-CLEAN.include 'build/boot'
+CROSS_GCC = '../eugenios/build/rootfs/bin/riscv32-unknown-linux-gnu-gcc'
+SYSROOT = '../eugenios/build/sysroot'
+QEMU = 'qemu-riscv32'
+STAGES = Dir.glob('boot/stage*.elna').collect { |stage| File.basename stage, '.elna' }.sort
+
+CLEAN.include 'build/boot', 'build/valid'
directory 'build/boot'
+directory 'build/valid'
+
+task default: :boot
desc 'Final stage'
-task default: ['build/boot/stage2b', 'build/boot/stage2b.s', 'boot/stage2.elna'] do |t|
- exe, previous_output, source = t.prerequisites
+task boot: "build/valid/#{STAGES.last}"
+task boot: "build/valid/#{STAGES.last}.s"
+task boot: "boot/#{STAGES.last}.elna" do |t|
+ groupped = t.prerequisites.group_by { |stage| File.extname stage }.transform_values(&:first)
+ exe = groupped['']
+ expected = groupped['.s']
+ source = groupped['.elna']
cat_arguments = ['cat', source]
compiler_arguments = [QEMU, '-L', SYSROOT, exe]
- diff_arguments = ['diff', '-Nur', '--text', previous_output, '-']
+ diff_arguments = ['diff', '-Nur', '--text', expected, '-']
Open3.pipeline(cat_arguments, compiler_arguments, diff_arguments)
end
-file 'build/boot/test.s' => ['build/boot/stage1', 'boot/test.elna'] do |t|
- source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
+desc 'Convert previous stage language into the current stage language'
+task :convert do
+ File.open('boot/stage4.elna', 'w') do |current_stage|
+ li_value = nil
- File.open t.name, 'w' do |output|
- assemble_stage output, exe, source
+ File.readlines('boot/stage3.elna').each do |line|
+ current_stage << line
+ end
end
end
-file 'build/boot/test' => ['build/boot/test.s', 'boot/common-boot.s'] do |t|
- sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
+STAGES.each do |stage|
+ previous = stage.delete_prefix('stage').to_i.pred
+
+ file "build/valid/#{stage}" => "build/valid/#{stage}.s" do |t|
+ sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
+ end
+
+ file "build/valid/#{stage}.s" => ["build/boot/#{stage}", "boot/#{stage}.elna"] do |t|
+ exe, source = t.prerequisites
+
+ cat_arguments = ['cat', source]
+ compiler_arguments = [QEMU, '-L', SYSROOT, exe]
+ last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments)
+
+ IO.copy_stream last_stdout, t.name
+ end
+
+ file "build/boot/#{stage}" => "build/boot/#{stage}.s" do |t|
+ sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
+ end
+
+ file "build/boot/#{stage}.s" => ["build/valid/stage#{previous}", "boot/#{stage}.elna"] do |t|
+ exe, source = t.prerequisites
+
+ cat_arguments = ['cat', source]
+ compiler_arguments = [QEMU, '-L', SYSROOT, exe]
+ last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments)
+
+ IO.copy_stream last_stdout, t.name
+ end
+end
+
+#
+# Stage 1.
+#
+
+file 'build/valid/stage1' => ['build/valid', 'build/valid/stage1.s'] do |t|
+ source = t.prerequisites.select { |prerequisite| prerequisite.end_with? '.s' }
+
+ sh CROSS_GCC, '-nostdlib', '-o', t.name, *source
end
-task test: 'build/boot/test' do |t|
- sh QEMU, '-L', SYSROOT, t.prerequisites.first
+file 'build/valid/stage1.s' => ['build/boot/stage1', 'boot/stage1.s', 'build/valid'] do |t|
+ source, exe, = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.s' }
+
+ cat_arguments = ['cat', *source]
+ compiler_arguments = [QEMU, '-L', SYSROOT, *exe]
+ last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments)
+
+ IO.copy_stream last_stdout, t.name
+end
+
+file 'build/boot/stage1' => ['build/boot', 'boot/stage1.s'] do |t|
+ source = t.prerequisites.select { |prerequisite| prerequisite.end_with? '.s' }
+
+ sh CROSS_GCC, '-nostdlib', '-o', t.name, *source
end
diff --git a/boot/common-boot.s b/boot/common-boot.s
index f61321e..9305d40 100644
--- a/boot/common-boot.s
+++ b/boot/common-boot.s
@@ -2,17 +2,15 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
-.global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower
-.global _write_s, _read_file, _write_error, _write_c, _write_i, _print_i
-.global _memcmp, _memchr, _memmem, _memcpy, _mmap
+.global _read_file, _write_error
+.global _memcmp, _memchr, _memmem, _mmap
.global _current, _get, _advance, _label_counter
-.global _divide_by_zero_error, _exit, _strings_index, _string_equal
+.global _divide_by_zero_error, _strings_index, _string_equal
.section .rodata
.equ SYS_READ, 63
.equ SYS_WRITE, 64
-.equ SYS_EXIT, 93
.equ SYS_MMAP2, 222
.equ STDIN, 0
.equ STDOUT, 1
@@ -77,128 +75,6 @@ _memcmp:
.Lmemcmp_end:
ret
-# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
-.type _is_upper, @function
-_is_upper:
- li t0, 'A' - 1
- sltu t1, t0, a0 # t1 = a0 >= 'A'
-
- sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
- and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
-
- ret
-
-# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
-.type _is_lower, @function
-_is_lower:
- li t0, 'a' - 1
- sltu t2, t0, a0 # t2 = a0 >= 'a'
-
- sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
- and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
-
- ret
-
-# Detects if the passed character is a 7-bit alpha character or an underscore.
-# The character is passed in a0.
-# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
-.type _is_alpha, @function
-_is_alpha:
- # Prologue.
- addi sp, sp, -16
- sw ra, 12(sp)
- sw s0, 8(sp)
- addi s0, sp, 16
-
- sw a0, 4(sp)
-
- call _is_upper
- sw a0, 0(sp)
-
- lw a0, 4(sp)
- call _is_lower
-
- lw t0, 4(sp)
- xori t1, t0, '_'
- seqz t1, t1
-
- lw t0, 0(sp)
- or a0, a0, t0
- or a0, a0, t1
-
- # Epilogue.
- lw ra, 12(sp)
- lw s0, 8(sp)
- addi sp, sp, 16
- ret
-
-# Detects whether the passed character is a digit
-# (a value between 0 and 9).
-#
-# Parameters:
-# a0 - Exemined value.
-#
-# Sets a0 to 1 if it is a digit, to 0 otherwise.
-.type _is_digit, @function
-_is_digit:
- li t0, '0' - 1
- sltu t1, t0, a0 # t1 = a0 >= '0'
-
- sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
-
- and a0, t1, t2
-
- ret
-
-.type _is_alnum, @function
-_is_alnum:
- # Prologue.
- addi sp, sp, -16
- sw ra, 12(sp)
- sw s0, 8(sp)
- addi s0, sp, 16
-
- sw a0, 4(sp)
-
- call _is_alpha
- sw a0, 0(sp)
-
- lw a0, 4(sp)
- call _is_digit
-
- lw a1, 0(sp)
- or a0, a0, a1
-
- # Epilogue.
- lw ra, 12(sp)
- lw s0, 8(sp)
- addi sp, sp, 16
- ret
-
-# Writes a string to the standard output.
-#
-# Parameters:
-# a0 - Length of the string.
-# a1 - String pointer.
-.type _write_s, @function
-_write_s:
- # Prologue.
- addi sp, sp, -8
- sw ra, 4(sp)
- sw s0, 0(sp)
- addi s0, sp, 8
-
- mv a2, a0
- li a0, STDOUT
- li a7, SYS_WRITE
- ecall
-
- # Epilogue.
- lw ra, 4(sp)
- lw s0, 0(sp)
- addi sp, sp, 8
- ret
-
# Reads standard input into a buffer.
# a0 - Buffer pointer.
# a1 - Buffer size.
@@ -228,16 +104,6 @@ _read_file:
addi sp, sp, 8
ret
-# Terminates the program. a0 contains the return code.
-#
-# Parameters:
-# a0 - Status code.
-.type _exit, @function
-_exit:
- li a7, SYS_EXIT
- ecall
- # ret
-
.type _divide_by_zero_error, @function
_divide_by_zero_error:
addi a7, zero, 172 # getpid
@@ -248,106 +114,6 @@ _divide_by_zero_error:
ecall
ret
-# Writes a number to a string buffer.
-#
-# t0 - Local buffer.
-# t1 - Constant 10.
-# t2 - Current character.
-# t3 - Whether the number is negative.
-#
-# Parameters:
-# a0 - Whole number.
-# a1 - Buffer pointer.
-#
-# Sets a0 to the length of the written number.
-.type _print_i, @function
-_print_i:
- addi sp, sp, -32
- sw ra, 28(sp)
- sw s0, 24(sp)
- addi s0, sp, 32
-
- li t1, 10
- addi t0, s0, -9
-
- li t3, 0
- bgez a0, .Lprint_i_digit10
- li t3, 1
- neg a0, a0
-
-.Lprint_i_digit10:
- rem t2, a0, t1
- addi t2, t2, '0'
- sb t2, 0(t0)
- div a0, a0, t1
- addi t0, t0, -1
- bne zero, a0, .Lprint_i_digit10
-
- beq zero, t3, .Lprint_i_write_call
- addi t2, zero, '-'
- sb t2, 0(t0)
- addi t0, t0, -1
-
-.Lprint_i_write_call:
- mv a0, a1
- addi a1, t0, 1
- sub a2, s0, t0
- addi a2, a2, -9
- sw a2, 0(sp)
-
- call _memcpy
-
- lw a0, 0(sp)
-
- lw ra, 28(sp)
- lw s0, 24(sp)
- addi sp, sp, 32
- ret
-
-# Writes a number to the standard output.
-#
-# Parameters:
-# a0 - Whole number.
-.type _write_i, @function
-_write_i:
- addi sp, sp, -32
- sw ra, 28(sp)
- sw s0, 24(sp)
- addi s0, sp, 32
-
- addi a1, sp, 0
- call _print_i
-
- addi a1, sp, 0
- call _write_s
-
- lw ra, 28(sp)
- lw s0, 24(sp)
- addi sp, sp, 32
- ret
-
-# Writes a character from a0 into the standard output.
-.type _write_c, @function
-_write_c:
- # Prologue
- addi sp, sp, -16
- sw ra, 12(sp)
- sw s0, 8(sp)
- addi s0, sp, 16
-
- sb a0, 4(sp)
- li a0, STDOUT
- addi a1, sp, 4
- li a2, 1
- li a7, SYS_WRITE
- ecall
-
- # Epilogue.
- lw ra, 12(sp)
- lw s0, 8(sp)
- add sp, sp, 16
- ret
-
# a0 - Pointer to an array to get the first element.
#
# Dereferences a pointer and returns what is on the address in a0.
@@ -448,34 +214,6 @@ _memmem:
add sp, sp, 24
ret
-# Copies memory.
-#
-# Parameters:
-# a0 - Destination.
-# a1 - Source.
-# a2 - Size.
-#
-# Preserves a0.
-.type _memcpy, @function
-_memcpy:
- mv t0, a0
-
-.Lmemcpy_loop:
- beqz a2, .Lmemcpy_end
-
- lbu t1, (a1)
- sb t1, (a0)
-
- addi a0, a0, 1
- addi a1, a1, 1
- addi a2, a2, -1
-
- j .Lmemcpy_loop
-
-.Lmemcpy_end:
- mv a0, t0
- ret
-
# Searches for a string in a string array.
#
# Parameters:
diff --git a/boot/definitions.inc b/boot/definitions.inc
deleted file mode 100644
index 88f6e8b..0000000
--- a/boot/definitions.inc
+++ /dev/null
@@ -1,68 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public License,
-# v. 2.0. If a copy of the MPL was not distributed with this file, You can
-# obtain one at https://mozilla.org/MPL/2.0/.
-
-#
-# Tokens.
-#
-
-# The constant should match the index in the keywords array in tokenizer.s.
-
-.equ TOKEN_PROGRAM, 1
-.equ TOKEN_IMPORT, 2
-.equ TOKEN_CONST, 3
-.equ TOKEN_VAR, 4
-.equ TOKEN_IF, 5
-.equ TOKEN_THEN, 6
-.equ TOKEN_ELSIF, 7
-.equ TOKEN_ELSE, 8
-.equ TOKEN_WHILE, 9
-.equ TOKEN_DO, 10
-.equ TOKEN_PROC, 11
-.equ TOKEN_BEGIN, 12
-.equ TOKEN_END, 13
-.equ TOKEN_TYPE, 14
-.equ TOKEN_RECORD, 15
-.equ TOKEN_UNION, 16
-.equ TOKEN_TRUE, 17
-.equ TOKEN_FALSE, 18
-.equ TOKEN_NIL, 19
-.equ TOKEN_XOR, 20
-.equ TOKEN_OR, 21
-.equ TOKEN_RETURN, 22
-.equ TOKEN_CAST, 23
-.equ TOKEN_GOTO, 24
-.equ TOKEN_CASE, 25
-.equ TOKEN_OF, 26
-
-.equ TOKEN_IDENTIFIER, 27
-# The constant should match the character index in the byte_keywords string.
-
-.equ TOKEN_AND, TOKEN_IDENTIFIER + 1
-.equ TOKEN_DOT, TOKEN_IDENTIFIER + 2
-.equ TOKEN_COMMA, TOKEN_IDENTIFIER + 3
-.equ TOKEN_COLON, TOKEN_IDENTIFIER + 4
-.equ TOKEN_SEMICOLON, TOKEN_IDENTIFIER + 5
-.equ TOKEN_LEFT_PAREN, TOKEN_IDENTIFIER + 6
-.equ TOKEN_RIGHT_PAREN, TOKEN_IDENTIFIER + 7
-.equ TOKEN_LEFT_BRACKET, TOKEN_IDENTIFIER + 8
-.equ TOKEN_RIGHT_BRACKET, TOKEN_IDENTIFIER + 9
-.equ TOKEN_HAT, TOKEN_IDENTIFIER + 10
-.equ TOKEN_EQUALS, TOKEN_IDENTIFIER + 11
-.equ TOKEN_PLUS, TOKEN_IDENTIFIER + 12
-.equ TOKEN_MINUS, TOKEN_IDENTIFIER + 13
-.equ TOKEN_ASTERISK, TOKEN_IDENTIFIER + 14
-.equ TOKEN_AT, TOKEN_IDENTIFIER + 15
-
-.equ TOKEN_ASSIGN, 43
-.equ TOKEN_INTEGER, 44
-
-#
-# Symbols.
-#
-.equ TYPE_PRIMITIVE, 0x01
-.equ TYPE_POINTER, 0x02
-.equ TYPE_PROCEDURE, 0x03
-.equ INFO_PARAMETER, 0x10
-.equ INFO_LOCAL, 0x20
-.equ INFO_PROCEDURE, 0x30
diff --git a/boot/stage1.s b/boot/stage1.s
index a45d8ab..c81a7f8 100644
--- a/boot/stage1.s
+++ b/boot/stage1.s
@@ -2,586 +2,276 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
-.global _start # Program entry point.
-
-#
-# Registers used as global variables:
-# s1 - Contains the current position in the source text.
-# s2 - Label counter.
-# s3 - Dynamic memory region.
-#
-# - The compiler expects valid input, otherwise it will generate invalid
-# assembly or hang. There is no error checking, no semantic analysis, no
-# type checking.
-#
-# - Imports with only a module name without package, e.g.
-# "import dummy", can be parsed, but are ignored.
-#
-# - No loops. Only labels and goto.
-#
-# - Only unsigned number literals are supported (in decimal or
-# hexadecimal format).
-#
-# - Comments are accepted only at the end of a line.
-#
-# - Return can be used only as the last statement of a procedure. It
-# doesn't actually return, but sets a0 to the appropriate value.
-#
-# - The lvalue of an assignment can only be an identifier.
-
-.include "boot/definitions.inc"
-
.equ SOURCE_BUFFER_SIZE, 81920
-.section .rodata
-section_rodata: .ascii ".section .rodata\n"
-.equ SECTION_RODATA_SIZE, . - section_rodata
-section_text: .ascii ".section .text\n"
-.equ SECTION_TEXT_SIZE, . - section_text
-section_bss: .ascii ".section .bss\n"
-.equ SECTION_BSS_SIZE, . - section_bss
-global_start: .ascii ".global _start\n"
-.equ GLOBAL_START_SIZE, . - global_start
-prologue: .ascii "addi sp, sp, -96\nsw ra, 92(sp)\nsw s0, 88(sp)\naddi s0, sp, 96\n"
-.equ PROLOGUE_SIZE, . - prologue
-epilogue: .ascii "lw ra, 92(sp)\nlw s0, 88(sp)\naddi sp, sp, 96\nret\n"
-.equ EPILOGUE_SIZE, . - epilogue
-
-asm_exit: .ascii "li a0, 0\nli a7, 93\necall\n"
-.equ ASM_EXIT_SIZE, . - asm_exit
-asm_start: .ascii ".type _start, @function\n_start:\n"
-.equ ASM_START_SIZE, . - asm_start
-asm_and_a0_a1: .ascii "and a0, a0, a1\n"
-.equ ASM_AND_A0_A1_SIZE, . - asm_and_a0_a1
-asm_or_a0_a1: .ascii "or a0, a0, a1\n"
-.equ ASM_OR_A0_A1_SIZE, . - asm_or_a0_a1
-asm_add_a0_a1: .ascii "add a0, a0, a1\n"
-.equ ASM_ADD_A0_A1_SIZE, . - asm_add_a0_a1
-asm_sub_a0_a1: .ascii "sub a0, a0, a1\n"
-.equ ASM_SUB_A0_A1_SIZE, . - asm_sub_a0_a1
-asm_mul_a0_a1: .ascii "mul a0, a0, a1\n"
-.equ ASM_MUL_A0_A1_SIZE, . - asm_mul_a0_a1
-asm_seqz_a0: .ascii "seqz a0, a0\n"
-.equ ASM_SEQZ_A0_SIZE, . - asm_seqz_a0
-asm_neg_a0: .ascii "neg a0, a0\n"
-.equ ASM_NEG_A0_SIZE, . - asm_neg_a0
-asm_type: .ascii ".type "
-.equ ASM_TYPE_SIZE, . - asm_type
-asm_type_function: .ascii ", @function\n"
-.equ ASM_TYPE_FUNCTION_SIZE, . - asm_type_function
-asm_type_object: .ascii ", @object\n"
-.equ ASM_TYPE_OBJECT_SIZE, . - asm_type_object
-asm_restore_parameters:
- .ascii "lw a0, 60(sp)\nlw a1, 56(sp)\nlw a2, 52(sp)\nlw a3, 48(sp)\nlw a4, 44(sp)\nlw a5, 40(sp)\n"
-.equ ASM_RESTORE_PARAMETERS_SIZE, . - asm_restore_parameters
-asm_preserve_parameters:
- .ascii "sw a0, 84(sp)\nsw a1, 80(sp)\nsw a2, 76(sp)\nsw a2, 76(sp)\nsw a3, 72(sp)\nsw a4, 68(sp)\nsw a5, 64(sp)\n"
-.equ ASM_PRESERVE_PARAMETERS_SIZE, . - asm_preserve_parameters
-
-.section .bss
-.type source_code, @object
-source_code: .zero SOURCE_BUFFER_SIZE
+.equ SYS_READ, 63
+.equ SYS_WRITE, 64
+.equ SYS_EXIT, 93
+.equ SYS_MMAP2, 222
+.equ STDIN, 0
+.equ STDOUT, 1
+.equ STDERR, 2
-.section .text
-
-# Ignores the import.
-.type compile_import, @function
-compile_import:
- # Prologue.
- addi sp, sp, -24
- sw ra, 20(sp)
- sw s0, 16(sp)
- addi s0, sp, 24
-
-.Lcompile_import_loop:
- mv a0, s1
- addi a1, sp, 0
- call lex_next
- li t0, TOKEN_IMPORT
- lw t1, 0(sp)
- bne t0, t1, .Lcompile_import_end
- # a0 is set from the previous lex_next call. Skip the module name.
- addi a1, sp, 0
- call lex_next
- mv s1, a0
-
- j .Lcompile_import_loop
-
-.Lcompile_import_end:
- # Epilogue.
- lw ra, 20(sp)
- lw s0, 16(sp)
- addi sp, sp, 24
- ret
+.section .rodata
-.type compile_binary_expression, @function
-compile_binary_expression:
- # Prologue.
- addi sp, sp, -32
- sw ra, 28(sp)
- sw s0, 24(sp)
- addi s0, sp, 32
+.type keyword_equ, @object
+keyword_equ: .ascii ".equ"
+.equ KEYWORD_EQU_SIZE, 4
- li a0, 0
- call compile_expression
+.type keyword_section, @object
+keyword_section: .ascii ".section"
+.equ KEYWORD_SECTION_SIZE, 8
- mv a0, s1
- addi a1, sp, 12
- call lex_next
- lw t0, 12(sp)
+.type keyword_type, @object
+keyword_type: .ascii ".type"
+.equ KEYWORD_TYPE_SIZE, 5
- li t1, TOKEN_AND
- beq t0, t1, .Lcompile_binary_expression_and
+.type keyword_type_object, @object
+keyword_type_object: .ascii "object"
+.equ KEYWORD_TYPE_OBJECT_SIZE, 6
- li t1, TOKEN_OR
- beq t0, t1, .Lcompile_binary_expression_or
+.type keyword_type_function, @object
+keyword_type_function: .ascii "function"
+.equ KEYWORD_TYPE_FUNCTION_SIZE, 8
- li t1, TOKEN_PLUS
- beq t0, t1, .Lcompile_binary_expression_plus
+.type keyword_ret, @object
+keyword_ret: .ascii "ret"
+.equ KEYWORD_RET_SIZE, 3
- li t1, TOKEN_EQUALS
- beq t0, t1, .Lcompile_binary_expression_equal
+.type keyword_global, @object
+keyword_global: .ascii ".globl"
+.equ KEYWORD_GLOBAL_SIZE, 6
- li t1, TOKEN_ASTERISK
- beq t0, t1, .Lcompile_binary_expression_product
+.type keyword_proc, @object
+keyword_proc: .ascii "proc "
+.equ KEYWORD_PROC_SIZE, 5
- li t1, TOKEN_MINUS
- beq t0, t1, .Lcompile_binary_expression_minus
+.type keyword_end, @object
+keyword_end: .ascii "end"
+.equ KEYWORD_END_SIZE, 3
- j .Lcompile_binary_expression_end
+.type keyword_begin, @object
+keyword_begin: .ascii "begin"
+.equ KEYWORD_BEGIN_SIZE, 5
-.Lcompile_binary_expression_equal:
- mv s1, a0 # Skip =.
- li a0, 1
- call compile_expression
- li a0, ASM_SUB_A0_A1_SIZE
- la a1, asm_sub_a0_a1
- call _write_s
+.type keyword_var, @object
+keyword_var: .ascii "var"
+.equ KEYWORD_VAR_SIZE, 3
- li a0, ASM_SEQZ_A0_SIZE
- la a1, asm_seqz_a0
- call _write_s
+.type asm_prologue, @object
+asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n"
- j .Lcompile_binary_expression_end
+.type asm_epilogue, @object
+asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n"
-.Lcompile_binary_expression_and:
- mv s1, a0 # Skip &.
- li a0, 1
- call compile_expression
- li a0, ASM_AND_A0_A1_SIZE
- la a1, asm_and_a0_a1
- call _write_s
+.type asm_type_directive, @object
+asm_type_directive: .string ".type "
- j .Lcompile_binary_expression_end
+.type asm_type_function, @object
+asm_type_function: .string ", @function\n"
-.Lcompile_binary_expression_or:
- mv s1, a0 # Skip or.
- li a0, 1
- call compile_expression
- li a0, ASM_OR_A0_A1_SIZE
- la a1, asm_or_a0_a1
- call _write_s
+.type asm_colon, @object
+asm_colon: .string ":\n"
- j .Lcompile_binary_expression_end
+.type asm_call, @object
+asm_call: .string "\tcall "
-.Lcompile_binary_expression_plus:
- mv s1, a0 # Skip +.
- li a0, 1
- call compile_expression
- li a0, ASM_ADD_A0_A1_SIZE
- la a1, asm_add_a0_a1
- call _write_s
+.section .bss
- j .Lcompile_binary_expression_end
+.type source_code, @object
+source_code: .zero SOURCE_BUFFER_SIZE
-.Lcompile_binary_expression_minus:
- mv s1, a0 # Skip -.
- li a0, 1
- call compile_expression
- li a0, ASM_SUB_A0_A1_SIZE
- la a1, asm_sub_a0_a1
- call _write_s
+.type source_code_position, @object
+source_code_position: .word 0
- j .Lcompile_binary_expression_end
+.section .text
-.Lcompile_binary_expression_product:
- mv s1, a0 # Skip *.
- li a0, 1
- call compile_expression
- li a0, ASM_MUL_A0_A1_SIZE
- la a1, asm_mul_a0_a1
- call _write_s
+# Reads standard input into a buffer.
+# a0 - Buffer pointer.
+# a1 - Buffer size.
+#
+# Returns the amount of bytes written in a0.
+.type _read_file, @function
+_read_file:
+ # Prologue.
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
- j .Lcompile_binary_expression_end
+ mv a2, a1
+ mv a1, a0
+ li a0, STDIN
+ li a7, SYS_READ
+ ecall
-.Lcompile_binary_expression_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
-# Looks for a register that can be used to calculate a symbol address. Writes it
-# as string, like sp or s0 into the provided buffer.
-#
-# Parameters:
-# a0 - Symbol info pointer.
-# a1 - Output buffer.
-#
-# Sets a0 to the length of register name written or 0.
-.type take_address, @function
-take_address:
- beqz a0, .Ltake_address_undefined
- lw t0, 0(a0)
-
- li t1, INFO_PARAMETER
- beq t0, t1, .Ltake_address_parameter
-
- li t1, INFO_LOCAL
- beq t0, t1, .Ltake_address_local
-
- j .Ltake_address_undefined
-
-.Ltake_address_parameter:
- li t0, 0x3073 # s0
- sh t0, (a1)
-
- li a0, 2
-
- j .Ltake_address_end
-
-.Ltake_address_local:
- li t0, 0x7073 # (sp)
- sh t0, (a1)
-
- li a0, 2
-
- j .Ltake_address_end
-
-.Ltake_address_undefined:
- li a0, 0
-
-.Ltake_address_end:
- ret
-
-# Parameters:
-# a0 - Identifier length.
-# a1 - Register number as character.
-.type compile_identifier_expression, @function
-compile_identifier_expression:
+# Writes a character from a0 into the standard output.
+.type _write_c, @function
+_write_c:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
- sw a0, 20(sp) # Identifier length.
- sw a1, 16(sp) # Register number as character.
-
- lw a0, 20(sp)
- mv a1, s1
- call symbol_table_lookup
- sw a0, 12(sp)
-
- beqz a0, .Lcompile_identifier_expression_by_name
- lw t0, 0(a0)
-
- j .Lcompile_identifier_expression_by_address
-
-.Lcompile_identifier_expression_by_name:
- # Global identifier.
- lw t1, 16(sp)
- li t0, 0x00202c00 # \0,_
- or t0, t0, t1
- sw t0, 8(sp)
- li t0, 0x6120616c # la a
- sw t0, 4(sp)
- li a0, 7
- addi a1, sp, 4
- call _write_s
-
- lw a0, 20(sp)
- mv a1, s1
- call _write_s
-
- li a0, '\n'
- call _write_c
-
- lbu a0, (s1)
- call _is_upper
- beqz a0, .Lcompile_identifier_expression_end
-
- lw t1, 16(sp)
- li t0, 0x0a290061 # a\0)\n
- sll t2, t1, 8
- or t0, t0, t2
- sw t0, 8(sp)
- li t0, 0x28202c00 # \0, (
- or t0, t0, t1
- sw t0, 4(sp)
- li t0, 0x6120776c # lw a
- sw t0, 0(sp)
- li a0, 12
- addi a1, sp, 0
- call _write_s
-
- j .Lcompile_identifier_expression_end
-
-.Lcompile_identifier_expression_by_address:
- lw t1, 16(sp)
- li t0, 0x00202c00 # \0,_
- or t0, t0, t1
- sw t0, 8(sp)
- li t0, 0x6120776c # lw a
- sw t0, 4(sp)
- li a0, 7
- addi a1, sp, 4
- call _write_s
-
- lw a0, 12(sp)
- lw a0, 8(a0)
- call _write_i
-
- li a0, '('
- call _write_c
-
- lw a0, 12(sp)
- addi a1, sp, 4
- call take_address
- addi a1, sp, 4
- call _write_s
-
- li a0, ')'
- call _write_c
- li a0, '\n'
- call _write_c
-
- j .Lcompile_identifier_expression_end
+ sb a0, 20(sp)
+ li a0, STDOUT
+ addi a1, sp, 20
+ li a2, 1
+ li a7, SYS_WRITE
+ ecall
-.Lcompile_identifier_expression_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
- addi sp, sp, 32
+ add sp, sp, 32
ret
-# Evalutes an expression and saves the result in a0.
+# Write null terminated string.
#
-# a0 - X in aX, the register number to save the result.
-.type compile_expression, @function
-compile_expression:
+# Parameters:
+# a0 - String.
+.type _write_z, @function
+_write_z:
# Prologue.
- addi sp, sp, -48
- sw ra, 44(sp)
- sw s0, 40(sp)
- addi s0, sp, 48
-
- addi a0, a0, '0' # Make the register number to a character.
- sw a0, 36(sp) # And save it.
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
- mv a0, s1
- addi a1, sp, 24
- call lex_next
sw a0, 20(sp)
- lw t0, 24(sp)
-
- li t1, TOKEN_MINUS
- beq t0, t1, .Lcompile_expression_negate
-
- li t1, TOKEN_AT
- beq t0, t1, .Lcompile_expression_address
-
- li t1, TOKEN_INTEGER
- beq t0, t1, .Lcompile_expression_literal
-
- addi a1, sp, 8
- call lex_next
- lw t0, 8(sp)
- li t1, TOKEN_LEFT_PAREN
- beq t0, t1, .Lcompile_expression_call
-
- lw s1, 32(sp)
- lw a0, 28(sp)
- lw a1, 36(sp)
- call compile_identifier_expression
+.write_z_loop:
+ # Check for 0 character.
+ lb a0, (a0)
+ beqz a0, .write_z_end
- j .Lcompile_expression_advance
-
-.Lcompile_expression_negate:
- lw s1, 20(sp) # Skip the -.
- mv a0, zero
- call compile_expression
-
- li a0, ASM_NEG_A0_SIZE
- la a1, asm_neg_a0
- call _write_s
-
- j .Lcompile_expression_end
+ # Print a character.
+ li a0, STDOUT
+ lw a1, 20(sp)
+ li a2, 1
+ li a7, SYS_WRITE
+ ecall
-.Lcompile_expression_address:
+ # Advance the input string by one byte.
lw a0, 20(sp)
- mv s1, a0 # Skip @.
-
- lw a0, 36(sp)
- call compile_at_expression
+ addi a0, a0, 1
+ sw a0, 20(sp)
- j .Lcompile_expression_end
+ j .write_z_loop
-.Lcompile_expression_call:
- mv s1, a0
+.write_z_end:
+ # Epilogue.
+ lw ra, 28(sp)
+ lw s0, 24(sp)
+ addi sp, sp, 32
+ ret
- lw a0, 32(sp)
- lw a1, 28(sp)
- call compile_call
+# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
+.type _is_upper, @function
+_is_upper:
+ # Prologue.
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
- j .Lcompile_expression_end
+ li t0, 'A' - 1
+ sltu t1, t0, a0 # t1 = a0 >= 'A'
-.Lcompile_expression_literal:
- lw t1, 36(sp)
- li t0, 0x00202c00 # \0,_
- or t0, t0, t1
- sw t0, 16(sp)
- li t0, 0x6120696c # li a
- sw t0, 12(sp)
- li a0, 7
- addi a1, sp, 12
- call _write_s
+ sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
+ and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
- lw a0, 28(sp)
- lw a1, 32(sp)
- call _write_s
+ # Epilogue.
+ lw ra, 28(sp)
+ lw s0, 24(sp)
+ addi sp, sp, 32
+ ret
- li a0, '\n'
- call _write_c
+# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
+.type _is_lower, @function
+_is_lower:
+ # Prologue.
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
- j .Lcompile_expression_advance
+ li t0, 'a' - 1
+ sltu t2, t0, a0 # t2 = a0 >= 'a'
-.Lcompile_expression_advance:
- lw s1, 20(sp)
+ sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
+ and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
-.Lcompile_expression_end:
# Epilogue.
- lw ra, 44(sp)
- lw s0, 40(sp)
- addi sp, sp, 48
+ lw ra, 28(sp)
+ lw s0, 24(sp)
+ addi sp, sp, 32
ret
-# Expression taking an identifier address.
+# Detects if the passed character is a 7-bit alpha character or an underscore.
#
-# Parameters:
-# a0 - Register number as character
-.type compile_at_expression, @function
-compile_at_expression:
+# Paramters:
+# a0 - Tested character.
+#
+# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
+.type _is_alpha, @function
+_is_alpha:
# Prologue.
- addi sp, sp, -48
- sw ra, 44(sp)
- sw s0, 40(sp)
- addi s0, sp, 48
-
- sw a0, 36(sp)
-
- mv a0, s1
- addi a1, sp, 24
- call lex_next
- mv s1, a0
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
- lw a0, 28(sp)
- lw a1, 32(sp)
- call symbol_table_lookup
sw a0, 20(sp)
- li t0, 0x20 # _
- sb t0, 12(sp)
-
- # lw a0, 28(sp)
- lw a1, 32(sp)
- addi a1, sp, 13
- call take_address
-
- lw t1, 36(sp)
- li t0, 0x2c006120 # _a\0,
- sw t0, 8(sp)
- sb t1, 10(sp)
- li t0, 0x69646461 # addi
- sw t0, 4(sp)
- addi a0, a0, 9 # The length returned by take_address + the instruction.
- addi a1, sp, 4
- call _write_s
-
- li a0, ','
- call _write_c
- li a0, ' '
- call _write_c
+ call _is_upper
+ sw a0, 16(sp)
lw a0, 20(sp)
- lw a0, 8(a0)
- call _write_i
+ call _is_lower
- j .Lcompile_at_expression_end
+ lw t0, 20(sp)
+ xori t1, t0, '_'
+ seqz t1, t1
-.Lcompile_at_expression_end:
- li a0, '\n'
- call _write_c
+ lw t0, 16(sp)
+ or a0, a0, t0
+ or a0, a0, t1
# Epilogue.
- lw ra, 44(sp)
- lw s0, 40(sp)
- addi sp, sp, 48
+ lw ra, 28(sp)
+ lw s0, 24(sp)
+ addi sp, sp, 32
ret
-# Compiles an lvalue.
+# Detects whether the passed character is a digit
+# (a value between 0 and 9).
#
# Parameters:
-# a0 - Pointer to the identifier.
-# a1 - Identifier length.
-.type compile_designator_expression, @function
-compile_designator_expression:
+# a0 - Exemined value.
+#
+# Sets a0 to 1 if it is a digit, to 0 otherwise.
+.type _is_digit, @function
+_is_digit:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
- sw a0, 20(sp) # Identifier pointer.
- sw a1, 16(sp) # Identifier length.
+ li t0, '0' - 1
+ sltu t1, t0, a0 # t1 = a0 >= '0'
- /* DEBUG
- lw a0, 20(sp)
- lw a1, 16(sp)
- call _write_error */
+ sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
-.Lcompile_designator_expression_by_address:
- lw a0, 16(sp)
- lw a1, 20(sp)
- call symbol_table_lookup
- sw a0, 12(sp)
-
- li t0, 0x202c30 # 0,_
- sw t0, 8(sp)
- li t0, 0x61207773 # sw a
- sw t0, 4(sp)
- li a0, 7
- addi a1, sp, 4
- call _write_s
-
- lw a0, 12(sp)
- lw a0, 8(a0)
- call _write_i
-
- li a0, '('
- call _write_c
-
- lw a0, 12(sp)
- addi a1, sp, 4
- call take_address
- addi a1, sp, 4
- call _write_s
-
- li a0, ')'
- call _write_c
- li a0, '\n'
- call _write_c
+ and a0, t1, t2
# Epilogue.
lw ra, 28(sp)
@@ -589,624 +279,452 @@ compile_designator_expression:
addi sp, sp, 32
ret
-# Compiles a statement beginning with an identifier.
+# Reads the next token.
#
-# Left values should be variables named "loca n", where n is the offset
-# of the variable on the stack, like loca8 or loca4.
-.type compile_identifier, @function
-compile_identifier:
+# Returns token length in a0.
+.type _read_token, @function
+_read_token:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
- # Save the pointer to the identifier and its length on the stack.
- mv a0, s1
- addi a1, sp, 12
- call lex_next
- addi a1, sp, 0
- call lex_next
- mv s1, a0
-
- lw t0, 0(sp)
+ la t0, source_code_position # Token pointer.
+ lw t0, (t0)
+ sw t0, 20(sp) # Current token position.
+ sw zero, 16(sp) # Token length.
- li t1, TOKEN_LEFT_PAREN
- beq t0, t1, .Lcompile_identifier_call
+.read_token_loop:
+ lb t0, (t0) # Current character.
- li t1, TOKEN_ASSIGN
- beq t0, t1, .Lcompile_identifier_assign
+ # First we try to read a derictive.
+ # A derictive can contain a dot and characters.
+ li t1, '.'
+ beq t0, t1, .read_token_next
- j .Lcompile_identifier_end
+ lw a0, 20(sp)
+ lb a0, (a0)
+ call _is_alpha
+ bnez a0, .read_token_next
-.Lcompile_identifier_call:
lw a0, 20(sp)
- lw a1, 16(sp)
- call compile_call
+ lb a0, (a0)
+ call _is_digit
+ bnez a0, .read_token_next
- j .Lcompile_identifier_end
+ j .read_token_end
-.Lcompile_identifier_assign:
- call compile_binary_expression
- lw a0, 20(sp)
- lw a1, 16(sp)
- call compile_designator_expression
+.read_token_next:
+ # Advance the source code position and token length.
+ lw t0, 16(sp)
+ addi t0, t0, 1
+ sw t0, 16(sp)
+
+ lw t0, 20(sp)
+ addi t0, t0, 1
+ sw t0, 20(sp)
- j .Lcompile_identifier_end
+ j .read_token_loop
+
+.read_token_end:
+ lw a0, 16(sp)
-.Lcompile_identifier_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
-# Compiles a procedure call. Expects s1 to point to the first argument.
-# a0 - Pointer to the procedure name.
-# a1 - Length of the procedure name.
+# a0 - First pointer.
+# a1 - Second pointer.
+# a2 - The length to compare.
#
-# Returns the procedure result in a0.
-.type compile_call, @function
-compile_call:
+# Returns 0 in a0 if memory regions are equal.
+.type _memcmp, @function
+_memcmp:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
- sw a0, 20(sp)
- sw a1, 16(sp)
- sw zero, 12(sp) # Argument count for a procedure call.
-
-.Lcompile_call_paren:
- lbu t0, (s1)
- li t1, 0x29 # )
- beq t0, t1, .Lcompile_call_complete
-
-.Lcompile_call_argument:
+ mv t0, a0
li a0, 0
- call compile_expression
-
- li t0, 0x202c30 # 0,_
- sw t0, 8(sp)
- li t0, 0x61207773 # sw a
- sw t0, 4(sp)
- li a0, 7
- addi a1, sp, 4
- call _write_s
-
- lw t0, 12(sp) # Argument count for a procedure call.
-
- # Only 6 arguments are supported with a0-a5.
- # Save all arguments on the stack so they aren't overriden afterwards.
- li a0, -4
- mul a0, t0, a0
- addi a0, a0, 60
- call _write_i
-
- li t0, '\n'
- sw t0, 8(sp)
- li t0, 0x29707328 # (sp)
- sw t0, 4(sp)
- li a0, 5
- addi a1, sp, 4
- call _write_s
-
- lbu t0, (s1)
- li t1, ','
- bne t0, t1, .Lcompile_call_paren
-
- lw t0, 12(sp) # Argument count for a procedure call.
- addi t0, t0, 1
- sw t0, 12(sp)
-
- addi s1, s1, 1 # Skip the comma between the arguments.
- j .Lcompile_call_argument
-.Lcompile_call_complete:
- sw zero, 12(sp)
+.Lmemcmp_loop:
+ beqz a2, .Lmemcmp_end
-.Lcompile_call_restore:
- # Just go through all a0-a5 registers and read them from stack.
- # If this stack value contains garbage, the procedure just shouldn't use it.
- li a0, ASM_RESTORE_PARAMETERS_SIZE
- la a1, asm_restore_parameters
- call _write_s
+ lbu t1, (t0)
+ lbu t2, (a1)
+ sub a0, t1, t2
-.Lcompile_call_perform:
- li t0, 0x20
- sw t0, 8(sp)
- li t0, 0x6c6c6163 # call
- sw t0, 4(sp)
- li a0, 5
- addi a1, sp, 4
- call _write_s
-
- lw a0, 16(sp)
- lw a1, 20(sp)
- call _write_s
+ bnez a0, .Lmemcmp_end
- li a0, '\n'
- call _write_c
+ addi t0, t0, 1
+ addi a1, a1, 1
+ addi a2, a2, -1
- addi s1, s1, 1 # Skip the close paren.
+ j .Lmemcmp_loop
+.Lmemcmp_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
-# Walks through the procedure definitions.
-.type compile_procedure_section, @function
-compile_procedure_section:
+# Advances the token stream by a0 bytes.
+.type _advance_token, @function
+_advance_token:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
-.Lcompile_procedure_section_loop:
- mv a0, s1
- addi a1, sp, 4
- call lex_next
- li t0, TOKEN_PROC
- lw t1, 4(sp)
- bne t0, t1, .Lcompile_procedure_section_end
-
- call compile_procedure
-
- j .Lcompile_procedure_section_loop
+ # Skip the .equ directive.
+ la t0, source_code_position
+ lw t1, (t0)
+ add t1, t1, a0
+ sw t1, (t0)
-.Lcompile_procedure_section_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
-.type compile_module_declaration, @function
-compile_module_declaration:
+.type _compile_section, @function
+_compile_section:
# Prologue.
- addi sp, sp, -24
- sw ra, 20(sp)
- sw s0, 16(sp)
- addi s0, sp, 24
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
- li a0, GLOBAL_START_SIZE
- la a1, global_start
- call _write_s
+ # Print the .section directive and a space after it.
+ li a0, STDOUT
+ la a1, source_code_position
+ lw a1, (a1)
+ li a2, KEYWORD_SECTION_SIZE + 1
+ li a7, SYS_WRITE
+ ecall
- # Skip "program".
- mv a0, s1
- addi a1, sp, 4
- call lex_next
- mv s1, a0
+ # Skip the .equ directive.
+ li a0, KEYWORD_SECTION_SIZE + 1
+ call _advance_token
+
+ # Read the section name.
+ call _read_token
+ sw a0, 16(sp)
+
+ # Print the section name and newline.
+ li a0, STDOUT
+ la a1, source_code_position
+ lw a1, (a1)
+ lw a2, 16(sp)
+ addi a2, a2, 1
+ li a7, SYS_WRITE
+ ecall
+
+ # Skip the section name.
+ lw a0, 16(sp)
+ addi a0, a0, 1
+ call _advance_token
# Epilogue.
- lw ra, 20(sp)
- lw s0, 16(sp)
- addi sp, sp, 24
+ lw ra, 28(sp)
+ lw s0, 24(sp)
+ addi sp, sp, 32
ret
-# Compiles global variable section.
-.type compile_global_section, @function
-compile_global_section:
+# Prints and skips a line.
+.type _skip_comment, @function
+_skip_comment:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
- mv a0, s1
- addi a1, sp, 4
- call lex_next
- li t0, TOKEN_VAR
- lw t1, 4(sp)
- bne t0, t1, .Lcompile_global_section_end
- mv s1, a0
-
- li a0, SECTION_BSS_SIZE
- la a1, section_bss
- call _write_s
+ la t0, source_code_position
+ lw t1, (t0)
-.Lcompile_global_section_item:
- mv a0, s1
- addi a1, sp, 12
- call lex_next
+.skip_comment_loop:
+ # Check for newline character.
+ lb t2, (t1)
+ li t3, '\n'
+ beq t2, t3, .skip_comment_end
- lw t0, 12(sp)
- li t1, TOKEN_IDENTIFIER
+ # Advance the input string by one byte.
+ addi t1, t1, 1
+ sw t1, (t0)
- bne t0, t1, .Lcompile_global_section_end
- lw s1, 20(sp) # Advance to the beginning of the variable name.
+ j .skip_comment_loop
- call compile_global
- j .Lcompile_global_section_item
+.skip_comment_end:
+ # Skip the newline.
+ addi t1, t1, 1
+ sw t1, (t0)
-.Lcompile_global_section_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
-# Compiles a global variable.
-.type compile_global, @function
-compile_global:
+# Prints and skips a line.
+.type _compile_line, @function
+_compile_line:
# Prologue.
- addi sp, sp, -48
- sw ra, 44(sp)
- sw s0, 40(sp)
- addi s0, sp, 48
-
- # Save the identifier on the stack since it should emitted multiple times.
- mv a0, s1
- addi a1, sp, 28
- call lex_next
- addi a1, sp, 4
- call lex_next # Skip the colon in front of the type.
- addi a1, sp, 4
- call lex_next # Skip the opening bracket.
- addi a1, sp, 16
- call lex_next # Save the array size on the stack since it has to be emitted multiple times.
- addi a1, sp, 4
- call lex_next # Skip the closing bracket.
- addi a1, sp, 4
- call lex_next # Skip the type.
- mv s1, a0
-
- # .type identifier, @object
- li a0, ASM_TYPE_SIZE
- la a1, asm_type
- call _write_s
-
- lw a0, 32(sp)
- lw a1, 36(sp)
- call _write_s
-
- li a0, ASM_TYPE_OBJECT_SIZE
- la a1, asm_type_object
- call _write_s
-
- # identifier: .zero size
- lw a0, 32(sp)
- lw a1, 36(sp)
- call _write_s
-
- li t0, 0x206f7265 # ero_
- sw t0, 12(sp)
- li t0, 0x7a2e203a # : .z
- sw t0, 8(sp)
- li a0, 8
- addi a1, sp, 8
- call _write_s
-
- lw a0, 20(sp)
- lw a1, 24(sp)
- call _write_s
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
- li a0, '\n'
- call _write_c
+ la a0, source_code_position
+ lw a1, (a0)
+
+.compile_line_loop:
+ # Check for newline character.
+ lb t0, (a1)
+ li t1, '\n'
+ beq t0, t1, .compile_line_end
+
+ # Print a character.
+ li a0, STDOUT
+ li a2, 1
+ li a7, SYS_WRITE
+ ecall
+
+ # Advance the input string by one byte.
+ la a0, source_code_position
+ lw a1, (a0)
+ addi a1, a1, 1
+ sw a1, (a0)
+
+ j .compile_line_loop
+
+.compile_line_end:
+ # Print and skip the newline.
+ li a0, STDOUT
+ li a2, 1
+ li a7, SYS_WRITE
+ ecall
+
+ la a0, source_code_position
+ lw a1, (a0)
+ addi a1, a1, 1
+ sw a1, (a0)
# Epilogue.
- lw ra, 44(sp)
- lw s0, 40(sp)
- addi sp, sp, 48
+ lw ra, 28(sp)
+ lw s0, 24(sp)
+ addi sp, sp, 32
ret
-# Sets a0 to the type pointer.
-.type compile_type_expression, @function
-compile_type_expression:
+.type _compile_object, @function
+_compile_object:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
-.Lcompile_type_expression_type:
- mv a0, s1
- addi a1, sp, 12
- call lex_next
- mv s1, a0
- lw t0, 12(sp)
-
- li t1, TOKEN_HAT # Pointer type.
- beq t0, t1, .Lcompile_type_expression_pointer
-
- # Named type.
- lw a0, 16(sp)
- lw a1, 20(sp)
- call symbol_table_lookup
-
- j .Lcompile_type_expression_end
+ call _compile_line
-.Lcompile_type_expression_pointer:
- call compile_type_expression
- mv a1, s3
- call symbol_table_make_pointer
- add s3, s3, a0
- sub a0, s3, a0
-
- j .Lcompile_type_expression_end
-
-.Lcompile_type_expression_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
-# Inserts local procedure variables into the symbol table.
-.type compile_local_section, @function
-compile_local_section:
+.type _compile_function_statements, @function
+_compile_function_statements:
# Prologue.
- addi sp, sp, -48
- sw ra, 44(sp)
- sw s0, 40(sp)
- addi s0, sp, 48
-
- mv a0, s1
- addi a1, sp, 28
- call lex_next
-
- lw t0, 28(sp)
- li t1, TOKEN_VAR
-
- bne t0, t1, .Lcompile_local_section_end
- mv s1, a0
-
- sw zero, 12(sp) # Variable offset counter.
-
-.Lcompile_local_section_variable:
- mv a0, s1
- addi a1, sp, 28
- call lex_next
-
- lw t0, 28(sp)
- li t1, TOKEN_IDENTIFIER
-
- bne t0, t1, .Lcompile_local_section_end
- addi a1, sp, 16
- call lex_next
- mv s1, a0 # Skip the ":" in front of the type.
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
- call compile_type_expression
- # a0 - Variable type.
- lw a1, 12(sp)
- mv a2, s3
- call symbol_table_make_local
+.compile_function_statements_loop:
+ la t0, source_code_position
+ lw t1, (t0)
+ addi t1, t1, 1 # Skip the tab.
- mv a2, s3
- add s3, s3, a0
+ mv a0, t1
+ la a1, keyword_ret
+ li a2, KEYWORD_RET_SIZE
+ call _memcmp
- lw a0, 32(sp)
- lw a1, 36(sp)
- call symbol_table_enter
+ beqz a0, .compile_function_statements_end
- lw t0, 12(sp)
- addi t0, t0, 4
- sw t0, 12(sp)
+ call _compile_line
+ j .compile_function_statements_loop
- j .Lcompile_local_section_variable
+.compile_function_statements_end:
+ call _compile_line
-.Lcompile_local_section_end:
# Epilogue.
- lw ra, 44(sp)
- lw s0, 40(sp)
- addi sp, sp, 48
+ lw ra, 28(sp)
+ lw s0, 24(sp)
+ addi sp, sp, 32
ret
-# Inserts procedure parameters into the symbol table.
-.type compile_parameters, @function
-compile_parameters:
+.type _compile_call, @function
+_compile_call:
# Prologue.
- addi sp, sp, -48
- sw ra, 44(sp)
- sw s0, 40(sp)
- addi s0, sp, 48
-
- li t0, -12
- sw t0, 12(sp) # Parameter offset counter.
-
- mv a0, s1
- addi a1, sp, 28
- call lex_next
- mv s1, a0 # Skip the opening paren.
-
- mv a0, s1
- addi a1, sp, 28
- call lex_next
-
- lw t0, 28(sp)
- li t1, TOKEN_RIGHT_PAREN
- beq t0, t1, .Lcompile_parameters_end
- # When this is not the right paren, it is an identifier.
- mv s1, a0
-
-.Lcompile_parameters_parameter:
- mv a0, s1
- addi a1, sp, 16
- call lex_next
- mv s1, a0 # Skip the ":" in front of the type.
-
- call compile_type_expression
- # a0 - Parameter type.
- lw a1, 12(sp)
- mv a2, s3
- call symbol_table_make_parameter
-
- mv a2, s3
- add s3, s3, a0
-
- lw a0, 32(sp)
- lw a1, 36(sp)
- call symbol_table_enter
-
- lw t0, 12(sp)
- addi t0, t0, -4
- sw t0, 12(sp)
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
- # Read the comma between the parameters or a closing paren.
- mv a0, s1
- addi a1, sp, 16
- call lex_next
+ call _read_token
+ sw a0, 20(sp)
- lw t0, 16(sp)
- li t1, TOKEN_COMMA
- bne t0, t1, .Lcompile_parameters_end
- # If it is a comma, read the name of the next parameter.
- addi a1, sp, 28
- call lex_next
- mv s1, a0
+ la a0, asm_call
+ call _write_z
+
+ li a0, STDOUT
+ la a1, source_code_position
+ lw a1, (a1)
+ lw a2, 20(sp)
+ li a7, SYS_WRITE
+ ecall
- j .Lcompile_parameters_parameter
+ # Skip parens, semicolon and newline.
+ lw a0, 20(sp)
+ addi a0, a0, 4
+ call _advance_token
-.Lcompile_parameters_end:
- mv s1, a0 # Skip the closing paren.
+ li a0, '\n'
+ call _write_c
# Epilogue.
- lw ra, 44(sp)
- lw s0, 40(sp)
- addi sp, sp, 48
+ lw ra, 28(sp)
+ lw s0, 24(sp)
+ addi sp, sp, 32
ret
-.type compile_procedure, @function
-compile_procedure:
+.type _compile_statement, @function
+_compile_statement:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
- mv a0, s1
- addi a1, sp, 12
- call lex_next # Skip proc.
- addi a1, sp, 12
- call lex_next
- mv s1, a0
-
- lw a0, 16(sp)
- lw a1, 20(sp)
- call write_procedure_head
-
- # Register the procedure in the symbol table.
- mv a0, s3
- call symbol_table_make_procedure
+ # This is a call if the statement starts with an underscore.
+ la t0, source_code_position
+ lw t0, (t0)
+ # First character after alignment tab.
+ addi t0, t0, 1
+ lb t0, (t0)
+
+ li t1, '_'
+ beq t0, t1, .compile_statement_call
- mv a2, s3
- add s3, s3, a0
+ call _compile_line
+ j .compile_statement_end
- lw a0, 16(sp)
- lw a1, 20(sp)
- call symbol_table_enter
+.compile_statement_call:
+ li a0, 1
+ call _advance_token
+ call _compile_call
- # Save the state of the symbol table before we enter the procedure scope.
- la t0, symbol_table
- lw t0, (t0)
- sw t0, 8(sp)
-
- call compile_parameters
- call compile_local_section
-
- # Skip the "begin" keyword, denoting the beginning of the procedure body.
- mv a0, s1
- addi a1, sp, 12
- call lex_next
- mv s1, a0
-
- # Generate the procedure prologue with a predefined stack size.
- li a0, PROLOGUE_SIZE
- la a1, prologue
- call _write_s
-
- # Save passed arguments on the stack.
- li a0, ASM_PRESERVE_PARAMETERS_SIZE
- la a1, asm_preserve_parameters
- call _write_s
-
- # Generate the body of the procedure.
- call compile_statements
- mv s1, a0 # Skip end.
-
- # Generate the procedure epilogue with a predefined stack size.
- li a0, EPILOGUE_SIZE
- la a1, epilogue
- call _write_s
-
- # Restore the symbol table, removing symbols local to this procedure.
- la t0, symbol_table
- lw t1, 8(sp)
- sw t1, (t0)
+ j .compile_statement_end
+.compile_statement_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
-# Compiles a goto statement to an uncoditional jump.
-.type compile_goto, @function
-compile_goto:
+.type _compile_procedure_body, @function
+_compile_procedure_body:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
- mv a0, s1
- addi a1, sp, 0
- call lex_next # Skip the goto keyword.
- addi a1, sp, 0
- call lex_next # We should be on dot the label is beginning with.
- addi a1, sp, 0
- call lex_next# Save the label name.
- mv s1, a0
+.compile_procedure_body_loop:
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_end
+ li a2, KEYWORD_END_SIZE
+ call _memcmp
- li t0, 0x2e206a # j .
- sw t0, 12(sp)
- li a0, 3
- addi a1, sp, 12
- call _write_s
+ beqz a0, .compile_procedure_body_epilogue
- lw a0, 4(sp)
- lw a1, 8(sp) # Saved dot position.
- call _write_s
-
- li a0, '\n'
- call _write_c
+ call _compile_statement
+ j .compile_procedure_body_loop
+.compile_procedure_body_epilogue:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
-# Rewrites a label to assembly.
-.type compile_label, @function
-compile_label:
+.type _compile_procedure, @function
+_compile_procedure:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
- mv a0, s1
- addi a1, sp, 8
- call lex_next # Dot starting the label.
- addi a1, sp, 8
- call lex_next
- mv s1, a0
+ # Skip "proc ".
+ li a0, KEYWORD_PROC_SIZE
+ call _advance_token
- li a0, '.'
- call _write_c
- lw a0, 12(sp)
- lw a1, 16(sp)
- call _write_s
- li a0, ':'
- call _write_c
- li a0, '\n'
- call _write_c
+ call _read_token
+ sw a0, 20(sp) # Save the procedure name length.
+
+ # Write .type _procedure_name, @function.
+ la a0, asm_type_directive
+ call _write_z
+
+ li a0, STDOUT
+ la a1, source_code_position
+ lw a1, (a1)
+ lw a2, 20(sp)
+ li a7, SYS_WRITE
+ ecall
+
+ la a0, asm_type_function
+ call _write_z
+
+ # Write procedure label, _procedure_name:
+ li a0, STDOUT
+ la a1, source_code_position
+ lw a1, (a1)
+ lw a2, 20(sp)
+ li a7, SYS_WRITE
+ ecall
+
+ la a0, asm_colon
+ call _write_z
+
+ # Skip the function name and trailing parens, semicolon, "begin" and newline.
+ lw a0, 20(sp)
+ addi a0, a0, KEYWORD_BEGIN_SIZE + 1 + 4
+ call _advance_token
+
+ la a0, asm_prologue
+ call _write_z
+
+ call _compile_procedure_body
+
+ # Write the epilogue.
+ la a0, asm_epilogue
+ call _write_z
+
+ li a0, KEYWORD_END_SIZE + 2
+ call _advance_token
# Epilogue.
lw ra, 28(sp)
@@ -1214,21 +732,17 @@ compile_label:
addi sp, sp, 32
ret
-# Just skips the return keyword and evaluates the return expression.
-.type compile_return, @function
-compile_return:
+.type _compile_function, @function
+_compile_function:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
- mv a0, s1
- addi a1, sp, 12
- call lex_next
- mv s1, a0 # Skip return.
-
- call compile_binary_expression
+ # Write the function header.
+ call _compile_line
+ call _compile_function_statements
# Epilogue.
lw ra, 28(sp)
@@ -1236,115 +750,151 @@ compile_return:
addi sp, sp, 32
ret
-.type compile_if, @function
-compile_if:
+.type _compile_type, @function
+_compile_type:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
- mv a0, s1
- addi a1, sp, 0
- call lex_next
- mv s1, a0 # Skip the if.
+ # Print the .type directive and a space after it.
+ li a0, STDOUT
+ la a1, source_code_position
+ lw a1, (a1)
+ li a2, KEYWORD_TYPE_SIZE + 1
+ li a7, SYS_WRITE
+ ecall
- call compile_binary_expression
+ # Skip the .type directive.
+ li a0, KEYWORD_TYPE_SIZE + 1
+ call _advance_token
- mv a0, s1
- addi a1, sp, 0
- call lex_next
- mv s1, a0 # Skip the then.
+ # Read and print the symbol name.
+ call _read_token
+ sw a0, 20(sp)
- # Label prefix.
- li t0, 0x66694c2e # .Lif
- sw t0, 20(sp)
+ # Print the symbol name, comma, space and @.
+ li a0, STDOUT
+ la a1, source_code_position
+ lw a1, (a1)
+ lw a2, 20(sp)
+ addi a2, a2, 3
+ li a7, SYS_WRITE
+ ecall
- li t0, 0x202c3061 # a0,_
- sw t0, 16(sp)
- li t0, 0x207a7165 # eqz_
+ # Skip the constant name, comma, space and @.
+ lw a0, 20(sp)
+ addi a0, a0, 3
+ call _advance_token
+
+ # Read the symbol type.
+ call _read_token
+ sw a0, 16(sp)
+ la t0, source_code_position
+ lw t0, (t0)
sw t0, 12(sp)
- li t0, 0x62626262 # bbbb
- sb t0, 11(sp)
- li a0, 13
- addi a1, sp, 11
- call _write_s
+ # Print the symbol type and newline.
+ li a0, STDOUT
+ la a1, source_code_position
+ lw a1, (a1)
+ lw a2, 16(sp)
+ addi a2, a2, 1
+ li a7, SYS_WRITE
+ ecall
- # Write the label counter.
- mv a0, s2
- call _write_i
+ lw a0, 16(sp)
+ addi a0, a0, 1
+ call _advance_token
- li a0, '\n'
- call _write_c
+ lw a0, 12(sp)
+ la a1, keyword_type_object
+ li a2, KEYWORD_TYPE_OBJECT_SIZE
+ call _memcmp
- call compile_statements
- mv s1, a0 # Skip end.
+ beqz a0, .compile_type_object
- # Write the label prefix.
- li a0, 4
- addi a1, sp, 20
- call _write_s
+ lw a0, 12(sp)
+ la a1, keyword_type_function
+ li a2, KEYWORD_TYPE_FUNCTION_SIZE
+ call _memcmp
+
+ beqz a0, .compile_type_function
- # Write the label counter.
- mv a0, s2
- call _write_i
+ j .compile_type_end
- # Finalize the label.
- li t0, 0x0a3a # :\n
- sh t0, 16(sp)
- li a0, 2
- addi a1, sp, 16
- call _write_s
+.compile_type_object:
+ call _compile_object
- addi s2, s2, 1 # Increment the label counter.
+ j .compile_type_end
+.compile_type_function:
+ call _compile_function
+
+ j .compile_type_end
+
+.compile_type_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
-# Writes:
-# .type identifier, @function
-# identifier:
-#
-# Parameters:
-# a0 - Identifier length.
-# a0 - Identifier pointer.
-.type write_procedure_head, @function
-write_procedure_head:
+.type _compile_equ, @function
+_compile_equ:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
- sw a0, 16(sp)
- sw a1, 20(sp)
+ # Print the .equ directive and a space after it.
+ li a0, STDOUT
+ la a1, source_code_position
+ lw a1, (a1)
+ li a2, KEYWORD_EQU_SIZE + 1
+ li a7, SYS_WRITE
+ ecall
- # .type identifier, @function
- li a0, ASM_TYPE_SIZE
- la a1, asm_type
- call _write_s
+ # Skip the .equ directive.
+ li a0, KEYWORD_EQU_SIZE + 1
+ call _advance_token
- lw a0, 16(sp)
- lw a1, 20(sp)
- call _write_s
+ # Read and print the constant name.
+ call _read_token
+ sw a0, 20(sp)
- li a0, ASM_TYPE_FUNCTION_SIZE
- la a1, asm_type_function
- call _write_s
+ # Print the constant name, comma and space.
+ li a0, STDOUT
+ la a1, source_code_position
+ lw a1, (a1)
+ lw a2, 20(sp)
+ addi a2, a2, 2
+ li a7, SYS_WRITE
+ ecall
- lw a0, 16(sp)
- lw a1, 20(sp)
- call _write_s
+ # Skip the constant name, comma and the space after it.
+ lw a0, 20(sp)
+ addi a0, a0, 2
+ call _advance_token
- li t0, 0x0a3a # :\n
- sw t0, 12(sp)
- li a0, 2
- addi a1, sp, 12
- call _write_s
+ # Read the constant value.
+ call _read_token
+ sw a0, 16(sp)
+
+ # Print the constant value and newline.
+ li a0, STDOUT
+ la a1, source_code_position
+ lw a1, (a1)
+ lw a2, 16(sp)
+ addi a2, a2, 1
+ li a7, SYS_WRITE
+ ecall
+
+ lw a2, 16(sp)
+ addi a2, a2, 1
+ call _advance_token
# Epilogue.
lw ra, 28(sp)
@@ -1352,193 +902,163 @@ write_procedure_head:
addi sp, sp, 32
ret
-# Compiles a list of statements delimited by semicolons.
-#
-# Sets a0 to the end of the token finishing the list
-# (should be the "end" token in a valid program).
-.type compile_statements, @function
-compile_statements:
+.type _skip_newlines, @function
+_skip_newlines:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
- # Generate the body of the procedure.
- mv a0, s1
- addi a1, sp, 0
- call lex_next
- lw t0, 0(sp)
- li t1, TOKEN_END
-
- beq t0, t1, .Lcompile_statements_end
+ # Skip newlines.
+ la t0, source_code_position
+ lw t1, (t0)
-.Lcompile_statements_body:
- call compile_statement
+.skip_newlines_loop:
+ lb t2, (t1)
+ li t3, '\n'
+ bne t2, t3, .skip_newlines_end
+ beqz t2, .skip_newlines_end
- mv a0, s1
- addi a1, sp, 0
- call lex_next
- lw t0, 0(sp)
- li t1, TOKEN_SEMICOLON
-
- bne t0, t1, .Lcompile_statements_end
- mv s1, a0
+ addi t1, t1, 1
+ sw t1, (t0)
- j .Lcompile_statements_body
+ j .skip_newlines_loop
-.Lcompile_statements_end:
+.skip_newlines_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
-# Checks for the type of the current statement and compiles it.
-.type compile_statement, @function
-compile_statement:
+# Process the source code and print the generated code.
+.type _compile, @function
+_compile:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
- mv a0, s1
- addi a1, sp, 0
- call lex_next
- lw t0, 0(sp)
+.compile_loop:
+ call _skip_newlines
- li t1, TOKEN_IDENTIFIER
- beq t0, t1, .Lcompile_statement_identifier
+ la t0, source_code_position
+ lw t0, (t0)
+ lb t0, (t0)
+ beqz t0, .compile_end
+ li t1, '#'
+ beq t0, t1, .compile_comment
- li t1, TOKEN_GOTO
- beq t0, t1, .Lcompile_statement_goto
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_equ
+ li a2, KEYWORD_EQU_SIZE
+ call _memcmp
- li t1, TOKEN_RETURN
- beq t0, t1, .Lcompile_statement_return
+ beqz a0, .compile_equ
- li t1, TOKEN_IF
- beq t0, t1, .Lcompile_statement_if
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_section
+ li a2, KEYWORD_SECTION_SIZE
+ call _memcmp
- li t1, TOKEN_DOT
- beq t0, t1, .Lcompile_statement_label
+ beqz a0, .compile_section
- unimp # Else.
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_type
+ li a2, KEYWORD_TYPE_SIZE
+ call _memcmp
-.Lcompile_statement_if:
- call compile_if
- j .Lcompile_statement_end
+ beqz a0, .compile_type
-.Lcompile_statement_label:
- call compile_label
- j .Lcompile_statement_end
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_proc
+ li a2, KEYWORD_PROC_SIZE
+ call _memcmp
-.Lcompile_statement_return:
- call compile_return
- j .Lcompile_statement_end
+ beqz a0, .compile_procedure
-.Lcompile_statement_goto:
- call compile_goto
- j .Lcompile_statement_end
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_global
+ li a2, KEYWORD_GLOBAL_SIZE
+ call _memcmp
-.Lcompile_statement_identifier:
- call compile_identifier
- j .Lcompile_statement_end
+ beqz a0, .compile_global
-.Lcompile_statement_end:
- # Epilogue.
- lw ra, 28(sp)
- lw s0, 24(sp)
- addi sp, sp, 32
- ret
+ j .compile_end # Not a known token, exit.
-# Prints ".section .text" and exits.
-.type compile_text_section, @function
-compile_text_section:
- # Prologue.
- addi sp, sp, -16
- sw ra, 12(sp)
- sw s0, 8(sp)
- addi s0, sp, 16
+.compile_equ:
+ call _compile_equ
- li a0, SECTION_TEXT_SIZE
- la a1, section_text
- call _write_s
+ j .compile_loop
- # Epilogue.
- lw ra, 12(sp)
- lw s0, 8(sp)
- addi sp, sp, 16
- ret
+.compile_section:
+ call _compile_section
-.type compile_entry_point, @function
-compile_entry_point:
- # Prologue.
- addi sp, sp, -32
- sw ra, 28(sp)
- sw s0, 24(sp)
- addi s0, sp, 32
+ j .compile_loop
- # .type _start, @function
- li a0, ASM_START_SIZE
- la a1, asm_start
- call _write_s
+.compile_type:
+ call _compile_type
- mv a0, s1
- addi a1, sp, 4
- call lex_next
- mv s1, a0 # Skip begin.
+ j .compile_loop
- # Generate the body of the procedure.
- call compile_statements
- mv s1, a0 # Skip end.
+.compile_global:
+ call _compile_line
- li a0, ASM_EXIT_SIZE
- la a1, asm_exit
- call _write_s
+ j .compile_loop
+.compile_comment:
+ call _skip_comment
+
+ j .compile_loop
+
+.compile_procedure:
+ call _compile_procedure
+
+ j .compile_loop
+
+.compile_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
-.type compile, @function
-compile:
- # Prologue.
- addi sp, sp, -16
- sw ra, 12(sp)
- sw s0, 8(sp)
- addi s0, sp, 16
-
- call compile_module_declaration
- call compile_import
- call compile_global_section
- call compile_text_section
- call compile_procedure_section
- call compile_entry_point
-
- # Epilogue.
- lw ra, 12(sp)
- lw s0, 8(sp)
- addi sp, sp, 16
- ret
-
# Entry point.
+.globl _start
.type _start, @function
_start:
+ # Prologue.
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
+
# Read the source from the standard input.
la a0, source_code
li a1, SOURCE_BUFFER_SIZE # Buffer size.
call _read_file
- li s2, 1
- call _mmap
- mv s3, a0
+ # Save the pointer to the beginning of the source code in a global variable.
+ la t0, source_code
+ la t1, source_code_position
+ sw t0, (t1)
- call symbol_table_build
- call compile
+ call _compile
# Call exit.
- li a0, 0 # Use 0 return code.
- call _exit
+ li a0, 0 # Use 0 return code.
+ li a7, SYS_EXIT
+ ecall
+
+ # Epilogue.
+ lw ra, 28(sp)
+ lw s0, 24(sp)
+ addi sp, sp, 32
+ ret
diff --git a/boot/stage2.elna b/boot/stage2.elna
index b4f359e..a9de48d 100644
--- a/boot/stage2.elna
+++ b/boot/stage2.elna
@@ -1,1393 +1,855 @@
-program
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
-import dummy
+# Stage2 compiler.
+#
+# It supports declaring and calling procedures without arguments.
+# A procedure name should start with an underscore.
-var
- source_code: [81920]Byte
+.section .rodata
-proc _compile_import()
-var loca0: Word
-begin
- _advance(6);
- _skip_spaces();
- loca0 := _read_token();
- _advance(loca0)
-end
-
-proc _build_binary_expression()
-var
- loca0: Word
- loca4: Word
- loca8: Word
- loca12: ^Byte
- loca16: Word
- loca20: Word
- loca24: Bool
-begin
- _build_expression(0);
-
- loca4 := 0x2c306120;
- loca8 := 0x0a316120;
-
- _skip_spaces();
- loca20 := _read_token();
- loca12 := _current();
-
- loca16 := 0x26;
- loca24 := _token_compare(loca12, loca20, @loca16);
- if loca24 = 0 then
- goto .L_build_binary_expression_and
- end;
-
- loca16 := 0x726f;
- loca24 := _token_compare(loca12, loca20, @loca16);
- if loca24 = 0 then
- goto .L_build_binary_expression_or
- end;
-
- loca16 := 0x3d;
- loca24 := _token_compare(loca12, loca20, @loca16);
- if loca24 = 0 then
- goto .L_build_binary_expression_equal
- end;
-
- loca16 := 0x2b;
- loca24 := _token_compare(loca12, loca20, @loca16);
- if loca24 = 0 then
- goto .L_build_binary_expression_plus
- end;
-
- loca16 := 0x2d;
- loca24 := _token_compare(loca12, loca20, @loca16);
- if loca24 = 0 then
- goto .L_build_binary_expression_minus
- end;
-
- loca16 := 0x2a;
- loca24 := _token_compare(loca12, loca20, @loca16);
- if loca24 = 0 then
- goto .L_build_binary_expression_product
- end;
-
- goto .Lbuild_binary_expression_end;
-
- .L_build_binary_expression_equal;
- _advance(1);
- _build_expression(1);
-
- loca0 := 0x627573;
- _write_s(3, @loca0);
- _write_s(4, @loca4);
- _write_s(4, @loca4);
- _write_s(4, @loca8);
-
- loca0 := 0x7a716573;
- _write_s(4, @loca0);
- _write_s(4, @loca4);
- _write_s(3, @loca4);
- _write_c(0x0a);
-
- goto .Lbuild_binary_expression_end;
-
- .L_build_binary_expression_and;
- _advance(1);
- _build_expression(1);
- loca0 := 0x646e61;
- _write_s(3, @loca0);
- _write_s(4, @loca4);
- _write_s(4, @loca4);
- _write_s(4, @loca8);
-
- goto .Lbuild_binary_expression_end;
-
- .L_build_binary_expression_or;
- _advance(2);
- _build_expression(1);
- loca0 := 0x726f;
- _write_s(2, @loca0);
- _write_s(4, @loca4);
- _write_s(4, @loca4);
- _write_s(4, @loca8);
-
- goto .Lbuild_binary_expression_end;
-
- .L_build_binary_expression_plus;
- _advance(1);
- _build_expression(1);
- loca0 := 0x646461;
- _write_s(3, @loca0);
- _write_s(4, @loca4);
- _write_s(4, @loca4);
- _write_s(4, @loca8);
-
- goto .Lbuild_binary_expression_end;
-
- .L_build_binary_expression_minus;
- _advance(1);
- _build_expression(1);
- loca0 := 0x627573;
- _write_s(3, @loca0);
- _write_s(4, @loca4);
- _write_s(4, @loca4);
- _write_s(4, @loca8);
-
- goto .Lbuild_binary_expression_end;
-
- .L_build_binary_expression_product;
- _advance(1);
- _build_expression(1);
- loca0 := 0x6c756d;
- _write_s(3, @loca0);
- _write_s(4, @loca4);
- _write_s(4, @loca4);
- _write_s(4, @loca8);
-
- goto .Lbuild_binary_expression_end;
-
- .Lbuild_binary_expression_end
-end
-
-proc _compile_identifier_expression(loca84: Word, loca80: Byte)
-var
- loca0: Word
- loca4: ^Byte
- loca8: Word
- loca12: Bool
- loca16: Word
- loca20: Word
- loca24: ^Byte
- loca28: Byte
+.type keyword_equ, @object
+keyword_equ: .ascii ".equ"
+.equ KEYWORD_EQU_SIZE, 4
+
+.type keyword_section, @object
+keyword_section: .ascii ".section"
+.equ KEYWORD_SECTION_SIZE, 8
+
+.type keyword_type, @object
+keyword_type: .ascii ".type"
+.equ KEYWORD_TYPE_SIZE, 5
+
+.type keyword_ret, @object
+keyword_ret: .ascii "ret"
+.equ KEYWORD_RET_SIZE, 3
+
+.type keyword_global, @object
+keyword_global: .ascii ".globl"
+.equ KEYWORD_GLOBAL_SIZE, 6
+
+.type keyword_proc, @object
+keyword_proc: .ascii "proc "
+.equ KEYWORD_PROC_SIZE, 5
+
+.type keyword_end, @object
+keyword_end: .ascii "end"
+.equ KEYWORD_END_SIZE, 3
+
+.type keyword_begin, @object
+keyword_begin: .ascii "begin"
+.equ KEYWORD_BEGIN_SIZE, 5
+
+.type keyword_var, @object
+keyword_var: .ascii "var"
+.equ KEYWORD_VAR_SIZE, 3
+
+.type asm_prologue, @object
+asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n"
+
+.type asm_epilogue, @object
+asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n"
+
+.type asm_type_directive, @object
+asm_type_directive: .string ".type "
+
+.type asm_type_function, @object
+asm_type_function: .string ", @function\n"
+
+.type asm_colon, @object
+asm_colon: .string ":\n"
+
+.type asm_call, @object
+asm_call: .string "\tcall "
+
+.type asm_j, @object
+asm_j: .string "\tj "
+
+.type asm_li, @object
+asm_li: .string "\tli "
+
+.type asm_lw, @object
+asm_lw: .string "\tlw "
+
+.type asm_t0, @object
+asm_t0: .string "t0"
+
+.type asm_a0, @object
+asm_a0: .string "a0"
+
+.type asm_comma, @object
+asm_comma: .string ", "
+
+.type asm_sp, @object
+asm_sp: .string "(sp)"
+
+.section .bss
+
+.equ SOURCE_BUFFER_SIZE, 81920
+.type source_code, @object
+source_code: .zero SOURCE_BUFFER_SIZE
+
+.section .data
+
+.type source_code_position, @object
+source_code_position: .word source_code
+
+.section .text
+
+# Reads standard input into a buffer.
+# a0 - Buffer pointer.
+# a1 - Buffer size.
+#
+# Returns the amount of bytes written in a0.
+proc _read_file();
begin
- loca24 := _current();
- loca0 := 0x61636f6c;
- loca0 := _memcmp(@loca0, loca24, 4);
-
- if loca0 = 0 then
- loca8 := 0x6120776c;
- _write_s(4, @loca8);
- loca8 := 0x00202c00 or loca80;
- _write_s(3, @loca8);
-
- loca4 := loca24 + 4;
- loca0 := loca84 - 4;
- _write_s(loca0, loca4);
-
- loca8 := 0x29707328;
- _write_s(4, @loca8);
- _write_c(0x0a);
-
- goto .Lcompile_identifier_expression_end
- end;
- loca0 := _front(loca24);
- loca8 := loca84 = 2;
- loca12 := loca0 = 0x73;
- if loca8 & loca12 then
- loca8 := 0x6120766d;
- _write_s(4, @loca8);
- loca8 := 0x00202c00 or loca80;
- _write_s(3, @loca8);
- _write_s(loca84, loca24);
- _write_c(0x0a);
-
- goto .Lcompile_identifier_expression_end
- end;
-
- loca8 := 0x6120616c;
- _write_s(4, @loca8);
- loca8 := 0x00202c00 or loca80;
- _write_s(3, @loca8);
-
- _write_s(loca84, loca24);
- _write_c(0x0a);
-
- if _is_upper(loca0) then
- loca8 := 0x6120776c;
- _write_s(4, @loca8);
- loca8 := 0x28202c00 or loca28;
- _write_s(4, @loca8);
- _write_c(0x61);
- _write_c(loca28);
- _write_c(0x29);
- _write_c(0x0a);
-
- goto .Lcompile_identifier_expression_end
- end;
-
- .Lcompile_identifier_expression_end
-end
-
-proc _build_expression(loca84: Word)
-var
- loca0: Word
- loca4: ^Byte
- loca8: Word
- loca12: Word
- loca16: Word
- loca20: Word
- loca24: ^Byte
- loca28: Word
+ mv a2, a1
+ mv a1, a0
+ # STDIN.
+ li a0, 0
+ li a7, 63 # SYS_READ.
+ ecall
+end;
+
+# Writes to the standard output.
+#
+# Parameters:
+# a0 - Buffer.
+# a1 - Buffer length.
+proc _write();
begin
- loca28 := loca84 + 0x30;
-
- _skip_spaces();
- loca20 := _read_token();
- loca24 := _current();
- loca0 := _front(loca24);
-
- if loca0 = 0x2d then
- goto .Lbuild_expression_negate
- end;
-
- if loca0 = 0x40 then
- goto .Lbuild_expression_address
- end;
-
- if _is_digit(loca0) then
- goto .Lbuild_expression_literal
- end;
-
- if loca0 = 0x5f then
- goto .Lbuild_expression_call
- end;
-
- _compile_identifier_expression(loca20, loca28);
- goto .Lbuild_expression_advance;
-
- .Lbuild_expression_negate;
- _advance(1);
- _build_expression(0);
-
- loca8 := 0x2067656e;
- _write_s(4, @loca8);
- loca8 := 0x202c3061;
- _write_s(4, @loca8);
- loca8 := 0x0a3061;
- _write_s(3, @loca8);
-
- goto .Lbuild_expression_advance;
-
- .Lbuild_expression_address;
- loca8 := 0x69646461;
- _write_s(4, @loca8);
- loca8 := 0x6120;
- _write_s(2, @loca8);
- _write_c(loca28);
- loca8 := 0x7073202c;
- _write_s(4, @loca8);
- loca8 := 0x202c;
- _write_s(2, @loca8);
-
- _advance(1);
- _skip_spaces();
- loca24 := _current();
- loca20 := _read_token();
-
- loca4 := loca24 + 4;
- loca0 := loca20 - 4;
- _write_s(loca0, loca4);
-
- _write_c(0xa);
-
- goto .Lbuild_expression_advance;
-
- .Lbuild_expression_call;
- _advance(loca20);
- _advance(1);
- _compile_call(loca24, loca20);
-
- goto .Lbuild_expression_end;
-
- .Lbuild_expression_literal;
- loca8 := 0x6120696c;
- _write_s(4, @loca8);
- loca8 := 0x00202c00 or loca28;
- _write_s(3, @loca8);
-
- _write_s(loca20, loca24);
- _write_c(0x0a);
-
- goto .Lbuild_expression_advance;
-
- .Lbuild_expression_advance;
- _advance(loca20);
-
- .Lbuild_expression_end
-end
-
-proc _compile_designator_expression(loca84: ^Byte, loca80: Word)
-var
- loca0: Word
- loca4: Int
- loca8: Char
- loca12: Bool
- loca16: Bool
+ mv a2, a1
+ mv a1, a0
+ # STDOUT.
+ li a0, 1
+ li a7, 64 # SYS_WRITE.
+ ecall
+end;
+
+# Writes a character from a0 into the standard output.
+proc _write_c();
begin
- loca0 := 0x61636f6c;
- loca4 := _memcmp(@loca0, loca84, 4);
-
- if loca4 = 0 then
- loca0 := 0x61207773;
- _write_s(4, @loca0);
- loca0 := 0x202c30;
- _write_s(3, @loca0);
-
- loca84 := loca84 + 4;
- loca80 := loca80 - 4;
- _write_s(loca80, loca84);
-
- loca0 := 0x29707328;
- _write_s(4, @loca0);
- _write_c(0x0a);
-
- goto .Lcompile_designator_expression_end
- end;
- loca8 := _front(loca84);
- loca12 := loca8 = 0x73;
- loca16 := loca80 = 2;
- if loca12 & loca16 then
- loca0 := 0x20766d;
- _write_s(3, @loca0);
- _write_s(loca80, loca84);
- loca0 := 0x3061202c;
- _write_s(4, @loca0);
- _write_c(0x0a);
-
- goto .Lcompile_designator_expression_end
- end;
-
- .Lcompile_designator_expression_end
-end
-
-proc _compile_identifier()
-var
- loca0: Word
- loca4: Bool
- loca8: Word
- loca12: ^Byte
- loca16: Word
- loca20: ^Byte
+ sb a0, 20(sp)
+ addi a0, sp, 20
+ li a1, 1
+ _write();
+end;
+
+# Write null terminated string.
+#
+# Parameters:
+# a0 - String.
+proc _write_z();
begin
- loca20 := _current();
- loca16 := _read_token();
+ sw a0, 20(sp)
- _advance(loca16);
- _skip_spaces();
+.write_z_loop:
+ # Check for 0 character.
+ lb a0, (a0)
+ beqz a0, .write_z_end
- loca12 := _current();
- loca8 := _read_token();
+ # Print a character.
+ lw a0, 20(sp)
+ lb a0, (a0)
+ _write_c();
- _advance(loca8);
- _skip_spaces();
+ # Advance the input string by one byte.
+ lw a0, 20(sp)
+ addi a0, a0, 1
+ sw a0, 20(sp)
- loca0 := 0x3d3a;
- loca4 := _token_compare(loca12, loca8, @loca0);
- if loca4 = 0 then
- _build_binary_expression();
- _compile_designator_expression(loca20, loca16);
+ j .write_z_loop
- goto .Lcompile_identifier_end
- end;
- if _front(loca12) = 0x28 then
- _compile_call(loca20, loca16);
+.write_z_end:
+end;
- goto .Lcompile_identifier_end
- end;
+# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
+proc _is_upper();
+begin
+ li t0, 'A' - 1
+ sltu t1, t0, a0 # t1 = a0 >= 'A'
- .Lcompile_identifier_end
-end
+ sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
+ and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
+end;
-proc _compile_call(loca84: ^Byte, loca80: Word)
-var
- loca0: Word
- loca4: Word
- loca8: ^Byte
- loca12: Word
+# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
+proc _is_lower();
begin
- loca12 := 0;
-
- .Lcompile_call_paren;
- _skip_spaces();
- loca8 := _current();
- if _front(loca8) = 0x29 then
- goto .Lcompile_call_complete
- end;
-
- .Lcompile_call_argument;
- _build_expression(0);
-
- loca0 := 0x61207773;
- _write_s(4, @loca0);
- loca0 := 0x202c30;
- _write_s(3, @loca0);
-
- loca0 := -4 * loca12;
- loca0 := loca0 + 60;
- _write_i(loca0);
-
- loca0 := 0x29707328;
- _write_s(4, @loca0);
- _write_c(0x0a);
-
- _skip_spaces();
- loca8 := _current();
- loca0 := _front(loca8) = 0x2c;
- if loca0 = 0 then
- goto .Lcompile_call_paren
- end;
-
- loca12 := loca12 + 1;
-
- _advance(1);
- goto .Lcompile_call_argument;
-
- .Lcompile_call_complete;
- loca12 := 0;
-
- .Lcompile_call_restore;
-
- loca0 := 0x6120776c;
- _write_s(4, @loca0);
- loca4 := 0x36202c30;
- _write_s(4, @loca4);
- loca4 := 0x70732830;
- _write_s(4, @loca4);
- loca4 := 0x0a29;
- _write_s(2, @loca4);
-
- _write_s(4, @loca0);
- loca4 := 0x35202c31;
- _write_s(4, @loca4);
- loca4 := 0x70732836;
- _write_s(4, @loca4);
- loca4 := 0x0a29;
- _write_s(2, @loca4);
-
- _write_s(4, @loca0);
- loca4 := 0x35202c32;
- _write_s(4, @loca4);
- loca4 := 0x70732832;
- _write_s(4, @loca4);
- loca4 := 0x0a29;
- _write_s(2, @loca4);
-
- _write_s(4, @loca0);
- loca4 := 0x34202c33;
- _write_s(4, @loca4);
- loca4 := 0x70732838;
- _write_s(4, @loca4);
- loca4 := 0x0a29;
- _write_s(2, @loca4);
-
- _write_s(4, @loca0);
- loca4 := 0x34202c34;
- _write_s(4, @loca4);
- loca4 := 0x70732834;
- _write_s(4, @loca4);
- loca4 := 0x0a29;
- _write_s(2, @loca4);
-
- _write_s(4, @loca0);
- loca4 := 0x34202c35;
- _write_s(4, @loca4);
- loca4 := 0x70732830;
- _write_s(4, @loca4);
- loca4 := 0x0a29;
- _write_s(2, @loca4);
-
- loca0 := 0x6c6c6163;
- _write_s(4, @loca0);
- _write_c(0x20);
-
- _write_s(loca80, loca84);
- _write_c(0x0a);
-
- _skip_spaces();
- _advance(1)
-end
-
-proc _read_token()
-var
- loca0: Word
- loca4: Word
- loca8: ^Byte
+ li t0, 'a' - 1
+ sltu t2, t0, a0 # t2 = a0 >= 'a'
+
+ sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
+ and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
+end;
+
+# Detects if the passed character is a 7-bit alpha character or an underscore.
+#
+# Paramters:
+# a0 - Tested character.
+#
+# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
+proc _is_alpha();
begin
- loca8 := _current();
- loca0 := _front(loca8);
- loca4 := 0;
-
- if loca0 = 0x2e then
- goto .Ltoken_character_single
- end;
-
- if loca0 = 0x2c then
- goto .Ltoken_character_single
- end;
-
- if loca0 = 0x3a then
- goto .Ltoken_character_colon
- end;
-
- if loca0 = 0x3b then
- goto .Ltoken_character_single
- end;
-
- if loca0 = 0x28 then
- goto .Ltoken_character_single
- end;
-
- if loca0 = 0x29 then
- goto .Ltoken_character_single
- end;
-
- if loca0 = 0x5b then
- goto .Ltoken_character_single
- end;
-
- if loca0 = 0x5d then
- goto .Ltoken_character_single
- end;
-
- if loca0 = 0x5e then
- goto .Ltoken_character_single
- end;
-
- if loca0 = 0x26 then
- goto .Ltoken_character_single
- end;
-
- if loca0 = 0x3d then
- goto .Ltoken_character_single
- end;
-
- if loca0 = 0x2b then
- goto .Ltoken_character_single
- end;
-
- if loca0 = 0x2d then
- goto .Ltoken_character_single
- end;
-
- if loca0 = 0x2a then
- goto .Ltoken_character_single
- end;
-
- if loca0 = 0x40 then
- goto .Ltoken_character_single
- end;
-
- .Ltoken_character_loop_do;
- loca0 := loca8 + loca4;
- loca0 := _front(loca0);
-
- if _is_alnum(loca0) then
- loca4 := loca4 + 1;
- goto .Ltoken_character_loop_do;
-
- .Ltoken_character_single;
- loca4 := loca4 + 1;
- goto .Ltoken_character_end;
-
- .Ltoken_character_colon;
- loca0 := loca8 + 1;
- loca0 := _front(loca0);
- loca4 := loca4 + 1;
-
- if loca0 = 0x3d then
- goto .Ltoken_character_single
- end
- end;
- .Ltoken_character_end;
- return loca4
-end
-
-proc _skip_spaces()
-var
- loca0: Byte
- loca4: ^Byte
+ sw a0, 20(sp)
+
+ _is_upper();
+ sw a0, 16(sp)
+
+ lw a0, 20(sp)
+ _is_lower();
+
+ lw t0, 20(sp)
+ xori t1, t0, '_'
+ seqz t1, t1
+
+ lw t0, 16(sp)
+ or a0, a0, t0
+ or a0, a0, t1
+end;
+
+# Detects whether the passed character is a digit
+# (a value between 0 and 9).
+#
+# Parameters:
+# a0 - Exemined value.
+#
+# Sets a0 to 1 if it is a digit, to 0 otherwise.
+proc _is_digit();
begin
- .Lspace_loop_do;
- loca4 := _current();
- loca0 := _front(loca4);
-
- if loca0 = 0x20 then
- goto .Lspace_loop_repeat
- end;
- if loca0 = 0x09 then
- goto .Lspace_loop_repeat
- end;
- if loca0 = 0x0a then
- goto .Lspace_loop_repeat
- end;
- if loca0 = 0x0d then
- goto .Lspace_loop_repeat
- end;
-
- goto .Lspace_loop_end;
- .Lspace_loop_repeat;
- _advance(1);
- goto .Lspace_loop_do;
-
- .Lspace_loop_end
-end
-
-proc _compile_assembly(loca84: Word)
-var loca0: ^Byte
+ li t0, '0' - 1
+ sltu t1, t0, a0 # t1 = a0 >= '0'
+
+ sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
+
+ and a0, t1, t2
+end;
+
+# Reads the next token.
+#
+# Returns token length in a0.
+proc _read_token();
begin
- loca0 := _current();
+ la t0, source_code_position # Token pointer.
+ lw t0, (t0)
+ sw t0, 20(sp) # Current token position.
+ sw zero, 16(sp) # Token length.
+
+.read_token_loop:
+ lb t0, (t0) # Current character.
+
+ # First we try to read a derictive.
+ # A derictive can contain a dot and characters.
+ li t1, '.'
+ beq t0, t1, .read_token_next
+
+ lw a0, 20(sp)
+ lb a0, (a0)
+ _is_alpha();
+ bnez a0, .read_token_next
+
+ lw a0, 20(sp)
+ lb a0, (a0)
+ _is_digit();
+ bnez a0, .read_token_next
+
+ j .read_token_end
+
+.read_token_next:
+ # Advance the source code position and token length.
+ lw t0, 16(sp)
+ addi t0, t0, 1
+ sw t0, 16(sp)
+
+ lw t0, 20(sp)
+ addi t0, t0, 1
+ sw t0, 20(sp)
+
+ j .read_token_loop
+
+.read_token_end:
+ lw a0, 16(sp)
+end;
+
+# a0 - First pointer.
+# a1 - Second pointer.
+# a2 - The length to compare.
+#
+# Returns 0 in a0 if memory regions are equal.
+proc _memcmp();
+begin
+ mv t0, a0
+ li a0, 0
- _write_s(loca84, loca0);
- _advance(loca84);
+.Lmemcmp_loop:
+ beqz a2, .Lmemcmp_end
- _write_c(0xa);
+ lbu t1, (t0)
+ lbu t2, (a1)
+ sub a0, t1, t2
- _advance(1)
-end
+ bnez a0, .Lmemcmp_end
-proc _compile_program()
-var loca0: Word
-begin
- loca0 := 0x6f6c672e;
- _write_s(4, @loca0);
- loca0 := 0x206c6162;
- _write_s(4, @loca0);
- loca0 := 0x6174735f;
- _write_s(4, @loca0);
- loca0 := 0x0a7472;
- _write_s(3, @loca0);
-
- _advance(8)
-end
-
-proc _compile_variable_section()
-var
- loca0: Word
- loca4: ^Byte
+ addi t0, t0, 1
+ addi a1, a1, 1
+ addi a2, a2, -1
+
+ j .Lmemcmp_loop
+
+.Lmemcmp_end:
+end;
+
+# Advances the token stream by a0 bytes.
+proc _advance_token();
begin
- loca0 := 0x6365732e;
- _write_s(4, @loca0);
- loca0 := 0x6e6f6974;
- _write_s(4, @loca0);
- loca0 := 0x73622e20;
- _write_s(4, @loca0);
- loca0 := 0x0a73;
- _write_s(2, @loca0);
-
- _advance(4);
-
- .Lcompile_variable_section_item;
- _skip_spaces();
- loca4 := _current();
-
- loca0 := 0x636f7270;
- loca0 := _memcmp(@loca0, loca4, 4);
-
- if loca0 = 0 then
- goto .Lcompile_variable_section_end
- end;
- _compile_variable();
- goto .Lcompile_variable_section_item;
-
- .Lcompile_variable_section_end
-end
-
-proc _compile_variable()
-var
- loca0: Word
- loca4: Word
- loca8: Word
- loca12: Word
- loca16: ^Byte
- loca20: Word
- loca24: Word
- loca28: ^Byte
+ # Skip the .equ directive.
+ la t0, source_code_position
+ lw t1, (t0)
+ add t1, t1, a0
+ sw t1, (t0)
+end;
+
+# Prints the current token.
+#
+# Parameters:
+# a0 - Token length.
+#
+# Returns a0 unchanged.
+proc _write_token();
begin
- loca24 := _read_token();
- loca28 := _current();
+ sw a0, 20(sp)
- _advance(loca24);
+ la a0, source_code_position
+ lw a0, (a0)
+ lw a1, 20(sp)
+ _write();
- _skip_spaces();
- _advance(1);
+ lw a0, 20(sp)
+end;
- _skip_spaces();
- _advance(1);
+proc _compile_section();
+begin
+ # Print and skip the .section directive and a space after it.
+ li a0, KEYWORD_SECTION_SIZE + 1
+ _write_token();
+ _advance_token();
- loca16 := _read_token();
- loca20 := _current();
- _advance(loca16);
+ # Read the section name.
+ _read_token();
+ addi a0, a0, 1
- _skip_spaces();
- _advance(1);
+ _write_token();
+ _advance_token();
+end;
- _skip_spaces();
- loca0 := _read_token();
- _advance(loca0);
+# Prints and skips a line.
+proc _skip_comment();
+begin
+ la t0, source_code_position
+ lw t1, (t0)
- loca0 := 0x7079742e;
- _write_s(4, @loca0);
- loca0 := 0x2065;
- _write_s(2, @loca0);
+.skip_comment_loop:
+ # Check for newline character.
+ lb t2, (t1)
+ li t3, '\n'
+ beq t2, t3, .skip_comment_end
- _write_s(loca24, loca28);
+ # Advance the input string by one byte.
+ addi t1, t1, 1
+ sw t1, (t0)
- loca0 := 0x6f40202c;
- _write_s(4, @loca0);
- loca0 := 0x63656a62;
- _write_s(4, @loca0);
- loca0 := 0x0a74;
- _write_s(2, @loca0);
+ j .skip_comment_loop
- loca0 := 0x7a69732e;
- _write_s(4, @loca0);
- loca0 := 0x2065;
- _write_s(2, @loca0);
+.skip_comment_end:
+ # Skip the newline.
+ addi t1, t1, 1
+ sw t1, (t0)
+end;
- _write_s(loca24, loca28);
+# Prints and skips a line.
+proc _compile_line();
+begin
+.compile_line_loop:
+ la a0, source_code_position
+ lw a1, (a0)
- loca0 := 0x202c;
- _write_s(2, @loca0);
+ lb t0, (a1)
+ li t1, '\n'
+ beq t0, t1, .compile_line_end
- _write_s(loca16, loca20);
- _write_c(0x0a);
+ # Print a character.
+ lw a0, (a1)
+ _write_c();
- _write_s(loca24, loca28);
+ # Advance the input string by one byte.
+ li a0, 1
+ _advance_token();
- loca0 := 0x7a2e203a;
- _write_s(4, @loca0);
- loca0 := 0x206f7265;
- _write_s(4, @loca0);
+ j .compile_line_loop
- _write_s(loca16, loca20);
+.compile_line_end:
+ li a0, '\n'
+ _write_c();
- _write_c(0x0a)
-end
+ li a0, 1
+ _advance_token();
+end;
-proc _compile_procedure()
-var
- loca0: Word
- loca4: Word
- loca8: Word
- loca12: Word
- loca16: Word
- loca20: ^Byte
- loca24: ^Byte
+proc _compile_integer_literal();
begin
- _advance(5);
- loca16 := _read_token();
- loca20 := _current();
- _advance(loca16);
-
- loca0 := 0x7079742e;
- _write_s(4, @loca0);
- loca0 := 0x2065;
- _write_s(2, @loca0);
-
- _write_s(loca16, loca20);
-
- loca0 := 0x6640202c;
- _write_s(4, @loca0);
- loca0 := 0x74636e75;
- _write_s(4, @loca0);
- loca0 := 0x0a6e6f69;
- _write_s(4, @loca0);
-
- _write_s(loca16, loca20);
-
- loca0 := 0x0a3a;
- _write_s(2, @loca0);
-
- _skip_spaces();
- _advance(1);
- _skip_spaces();
- _advance(1);
-
- loca12 := 0x6e;
- loca8 := 0x69676562;
-
- .Lcompile_procedure_begin;
- _skip_spaces();
- loca0 := _read_token();
-
- loca24 := _current();
- _advance(loca0);
- loca0 := _token_compare(loca24, loca0, @loca8);
-
- if loca0 = 1 then
- goto .Lcompile_procedure_begin
- end;
-
- loca0 := 0x69646461;
- _write_s(4, @loca0);
-
- loca0 := 0x2c707320;
- _write_s(4, @loca0);
- _write_s(4, @loca0);
-
- loca0 := 0x0a36392d;
- _write_s(4, @loca0);
-
- loca0 := 0x72207773;
- _write_s(4, @loca0);
- loca0 := 0x39202c61;
- _write_s(4, @loca0);
- loca0 := 0x70732832;
- _write_s(4, @loca0);
- loca0 := 0x0a29;
- _write_s(2, @loca0);
-
- loca0 := 0x73207773;
- _write_s(4, @loca0);
- loca0 := 0x38202c30;
- _write_s(4, @loca0);
- loca0 := 0x70732838;
- _write_s(4, @loca0);
- loca0 := 0x0a29;
- _write_s(2, @loca0);
-
- loca0 := 0x69646461;
- _write_s(4, @loca0);
- loca0 := 0x2c307320;
- _write_s(4, @loca0);
- loca0 := 0x2c707320;
- _write_s(4, @loca0);
- loca0 := 0x0a363920;
- _write_s(4, @loca0);
-
- loca0 := 0x61207773;
- _write_s(4, @loca0);
- loca4 := 0x38202c30;
- _write_s(4, @loca4);
- loca8 := 0x70732834;
- _write_s(4, @loca8);
- loca12 := 0x0a29;
- _write_s(2, @loca12);
-
- _write_s(4, @loca0);
- loca4 := 0x38202c31;
- _write_s(4, @loca4);
- loca8 := 0x70732830;
- _write_s(4, @loca8);
- _write_s(2, @loca12);
-
- _write_s(4, @loca0);
- loca4 := 0x37202c32;
- _write_s(4, @loca4);
- loca8 := 0x70732836;
- _write_s(4, @loca8);
- _write_s(2, @loca12);
-
- _write_s(4, @loca0);
- loca4 := 0x37202c33;
- _write_s(4, @loca4);
- loca8 := 0x70732832;
- _write_s(4, @loca8);
- _write_s(2, @loca12);
-
- _write_s(4, @loca0);
- loca4 := 0x36202c34;
- _write_s(4, @loca4);
- loca8 := 0x70732838;
- _write_s(4, @loca8);
- _write_s(2, @loca12);
-
- _write_s(4, @loca0);
- loca4 := 0x36202c35;
- _write_s(4, @loca4);
- loca8 := 0x70732838;
- _write_s(4, @loca8);
- _write_s(2, @loca12);
-
- .Lcompile_procedure_body;
- _skip_spaces();
- loca12 := _read_line();
- loca8 := 0x0a646e65;
- loca24 := _current();
- loca8 := _memcmp(loca24, @loca8, 4);
-
- if loca8 = 0 then
- goto .Lcompile_procedure_end
- end;
-
- _compile_line(loca12);
- goto .Lcompile_procedure_body;
-
- .Lcompile_procedure_end;
- _advance(4);
-
- loca0 := 0x7220776c;
- _write_s(4, @loca0);
- loca0 := 0x39202c61;
- _write_s(4, @loca0);
- loca0 := 0x70732832;
- _write_s(4, @loca0);
- loca0 := 0x0a29;
- _write_s(2, @loca0);
-
- loca0 := 0x7320776c;
- _write_s(4, @loca0);
- loca0 := 0x38202c30;
- _write_s(4, @loca0);
- loca0 := 0x70732838;
- _write_s(4, @loca0);
- loca0 := 0x0a29;
- _write_s(2, @loca0);
-
- loca0 := 0x69646461;
- _write_s(4, @loca0);
-
- loca0 := 0x2c707320;
- _write_s(4, @loca0);
- _write_s(4, @loca0);
-
- loca0 := 0x0a3639;
- _write_s(4, @loca0);
-
- loca0 := 0x0a746572;
- _write_s(4, @loca0)
-end
-
-proc _token_compare(loca84: ^Byte, loca80: Word, loca76: ^Byte)
-var
- loca0: Bool
- loca4: Byte
- loca8: Word
- loca12: Byte
+ la a0, asm_li
+ _write_z();
+
+ la a0, asm_a0
+ _write_z();
+
+ la a0, asm_comma
+ _write_z();
+
+ _read_token();
+ _write_token();
+ _advance_token();
+
+ li a0, '\n'
+ _write_c();
+end;
+
+proc _compile_character_literal();
begin
- .Ltoken_compare_loop;
- loca4 := _front(loca76);
-
- loca8 := loca4 or loca80;
- if loca8 = 0 then
- goto .Ltoken_compare_equal
- end;
- if loca80 = 0 then
- goto .Ltoken_compare_not_equal
- end;
- if loca4 = 0 then
- goto .Ltoken_compare_not_equal
- end;
- loca12 := _front(loca84);
- if loca4 = loca12 then
- goto .Ltoken_compare_continue
- end;
- goto .Ltoken_compare_not_equal;
-
- .Ltoken_compare_continue;
-
- loca84 := loca84 + 1;
- loca80 := loca80 - 1;
- loca76 := loca76 + 1;
- goto .Ltoken_compare_loop;
-
- .Ltoken_compare_not_equal;
- loca0 := 1;
- goto .Ltoken_compare_end;
-
- .Ltoken_compare_equal;
- loca0 := 0;
-
- .Ltoken_compare_end;
- return loca0
-end
-
-proc _compile_goto()
-var
- loca0: Word
- loca4: Word
- loca8: ^Byte
+ la a0, asm_li
+ _write_z();
+
+ la a0, asm_a0
+ _write_z();
+
+ la a0, asm_comma
+ _write_z();
+
+.compile_character_literal_loop:
+ la a0, source_code_position
+ lw a0, (a0)
+ li a1, 1
+ _write();
+ li a0, 1
+ _advance_token();
+
+ la t0, source_code_position
+ lw t0, (t0)
+ lb a0, (t0)
+ li t1, '\''
+ beq a0, t1, .compile_character_literal_end
+
+ j .compile_character_literal_loop
+
+.compile_character_literal_end:
+ li a0, '\''
+ _write_c();
+
+ li a0, '\n'
+ _write_c();
+
+ li a0, 1
+ _advance_token();
+end;
+
+proc _compile_variable_expression();
begin
- _advance(4);
+ la a0, asm_lw
+ _write_z();
+
+ la a0, asm_a0
+ _write_z();
- loca0 := 0x206a;
- _write_s(2, @loca0);
+ la a0, asm_comma
+ _write_z();
- _skip_spaces();
- loca8 := _current();
- _advance(1);
+ la a0, source_code_position
+ lw a0, (a0)
+ addi a0, a0, 1
+ li a1, 2
+ _write();
- loca0 := _read_token();
- _advance(loca0);
- loca0 := loca0 + 1;
- _write_s(loca0, loca8);
+ la a0, asm_sp
+ _write_z();
- _advance(1);
- _write_c(0x0a)
-end
+ li a0, '\n'
+ _write_c();
-proc _compile_label(loca84: Word)
-var
- loca0: Word
- loca4: Word
- loca8: ^Byte
+ li a0, 3
+ _advance_token();
+
+end;
+
+proc _compile_expression();
begin
- loca0 := _current();
+ la t0, source_code_position
+ lw t0, (t0)
+ lb a0, (t0)
- loca0 := loca0 + loca84;
- loca0 := loca0 - 1;
- loca4 := loca84;
+ li t1, '\''
+ beq a0, t1, .compile_expression_character_literal
- loca0 := _front(loca0);
- if loca0 = 0x3b then
- loca4 := loca4 - 1
- end;
- loca8 := _current();
- _write_s(loca4, loca8);
+ li t1, 'v'
+ beq a0, t1, .compile_expression_variable
- _write_c(0x3a);
- _write_c(0x0a);
+ _is_digit();
+ bnez a0, .compile_expression_integer_literal
- _advance(loca84)
-end
+ j .compile_expression_end
-proc _compile_return()
-begin
- _advance(6);
- _skip_spaces();
- _build_binary_expression()
-end
-
-proc _compile_if()
-var
- loca0: Word
- loca4: ^Byte
- loca8: Word
- loca12: Word
- loca16: Word
- loca20: Word
- loca24: Word
-begin
- _advance(2);
- _skip_spaces();
+.compile_expression_character_literal:
+ _compile_character_literal();
+ j .compile_expression_end
+
+.compile_expression_integer_literal:
+ _compile_integer_literal();
+ j .compile_expression_end
+
+.compile_expression_variable:
+ _compile_variable_expression();
+ j .compile_expression_end;
- _build_binary_expression();
+.compile_expression_end:
+end;
- _skip_spaces();
- _advance(4);
+proc _compile_call();
+begin
+ _read_token();
+ sw a0, 20(sp)
+ la t0, source_code_position
+ lw t0, (t0)
+ sw t0, 16(sp)
- loca20 := 0x00646e65;
- loca16 := 0x66694c2e;
+ # Skip the identifier and left paren.
+ addi a0, a0, 1
+ _advance_token();
- loca12 := 0x7a716562;
- _write_s(4, @loca12);
- loca12 := 0x2c306120;
- _write_s(4, @loca12);
- _write_c(0x20);
+ la t0, source_code_position
+ lw t0, (t0)
+ lb t0, (t0)
- loca24 := _label_counter(1);
- _write_s(4, @loca16);
- _write_i(loca24);
+ li t1, ')'
+ beq t0, t1, .compile_call_finalize
- _write_c(0x0a);
+ _compile_expression();
- .Lcompile_if_loop;
- _skip_spaces();
- loca12 := _read_token();
+.compile_call_finalize:
+ la a0, asm_call
+ _write_z();
- loca4 := _current();
- loca8 := _token_compare(loca4, loca12, @loca20);
+ lw a0, 16(sp)
+ lw a1, 20(sp)
+ _write();
- if loca8 then
- loca12 := _read_line();
- _compile_line(loca12, 1);
+ # Skip the right paren.
+ li a0, 1
+ _advance_token();
+end;
- goto .Lcompile_if_loop
- end;
+proc _compile_goto();
+begin
+ li a0, 5
+ _advance_token();
- _write_s(4, @loca16);
- _write_i(loca24);
+ _read_token();
+ sw a0, 20(sp)
- loca12 := 0x0a3a0a3a;
- _write_s(2, @loca12);
+ la a0, asm_j
+ _write_z();
- _advance(4)
-end
+ lw a0, 20(sp)
+ _write_token();
+ _advance_token();
+end;
-proc _compile_line(loca84: Word, loca80: Bool)
-var
- loca0: Char
- loca4: Int
- loca8: Bool
- loca12: Word
- loca16: ^Byte
+proc _compile_statement();
begin
- if loca84 = 0 then
- goto .Lcompile_line_empty
- end;
-
- loca16 := _current();
- loca0 := _front(loca16);
-
- loca12 := 0x676f7270;
- loca4 := _memcmp(loca16, @loca12, 4);
- if loca4 = 0 then
- goto .Lcompile_line_program
- end;
-
- loca12 := 0x0a726176;
- loca4 := _memcmp(loca16, @loca12, 4);
- if loca4 = 0 then
- goto .Lcompile_line_var
- end;
-
- loca12 := 0x636f7270;
- loca4 := _memcmp(loca16, @loca12, 4);
- if loca4 = 0 then
- goto .Lcompile_line_procedure
- end;
-
- loca12 := 0x69676562;
- loca4 := _memcmp(loca16, @loca12, 4);
- if loca4 = 0 then
- goto .Lcompile_line_begin
- end;
-
- loca12 := 0x2e646e65;
- loca4 := _memcmp(loca16, @loca12, 4);
- if loca4 = 0 then
- goto .Lcompile_line_exit
- end;
-
- loca12 := 0x61636f6c;
- loca4 := _memcmp(loca16, @loca12, 4);
- if loca4 = 0 then
- goto .Lcompile_line_identifier
- end;
- loca4 := _front(loca16);
- if loca4 = 0x73 then
- goto .Lcompile_line_identifier
- end;
-
- loca12 := 0x6f706d69;
- loca4 := _memcmp(loca16, @loca12, 4);
- if loca4 = 0 then
- goto .Lcompile_line_import
- end;
-
- loca12 := 0x6f746f67;
- loca4 := _memcmp(loca16, @loca12, 4);
- if loca4 = 0 then
- goto .Lcompile_line_goto
- end;
-
- loca12 := 0x75746572;
- loca4 := _memcmp(loca16, @loca12, 4);
- if loca4 = 0 then
- goto .Lcompile_line_return
- end;
-
- loca12 := 0x6669;
- loca4 := _memcmp(loca16, @loca12, 2);
- if loca4 = 0 then
- goto .Lcompile_line_if
- end;
-
- if loca0 = 0x2e then
- goto .Lcompile_line_label
- end;
- if loca0 = 0x5f then
- goto .Lcompile_line_identifier
- end;
- goto .Lcompile_line_unchanged;
-
- .Lcompile_line_if;
- _compile_if();
- goto .Lcompile_line_section;
-
- .Lcompile_line_label;
- _compile_label(loca84);
- goto .Lcompile_line_section;
-
- .Lcompile_line_return;
- _compile_return();
- goto .Lcompile_line_section;
-
- .Lcompile_line_goto;
+ # This is a call if the statement starts with an underscore.
+ la t0, source_code_position
+ lw t0, (t0)
+ # First character after alignment tab.
+ addi t0, t0, 1
+ lb t0, (t0)
+
+ li t1, '_'
+ beq t0, t1, .compile_statement_call
+
+ li t1, 'g'
+ beq t0, t1, .compile_statement_goto
+
+ _compile_line();
+ j .compile_statement_end
+
+.compile_statement_call:
+ li a0, 1
+ _advance_token();
+ _compile_call();
+
+ j .compile_statement_semicolon
+
+.compile_statement_goto:
+ li a0, 1
+ _advance_token();
_compile_goto();
- goto .Lcompile_line_section;
-
- .Lcompile_line_import;
- _compile_import();
- goto .Lcompile_line_section;
-
- .Lcompile_line_identifier;
- _compile_identifier();
- goto .Lcompile_line_section;
-
- .Lcompile_line_exit;
- _compile_exit();
- goto .Lcompile_line_section;
-
- .Lcompile_line_begin;
-
- if loca80 = 1 then
- goto .Lcompile_line_compile_entry
- end;
- _compile_text_section();
- .Lcompile_line_compile_entry;
- _compile_entry_point();
- loca8 := 1;
- goto .Lcompile_line_end;
-
- .Lcompile_line_procedure;
- if loca80 = 1 then
- goto .Lcompile_line_compile_procedure
- end;
- _compile_text_section();
- .Lcompile_line_compile_procedure;
- _compile_procedure();
- loca8 := 1;
- goto .Lcompile_line_end;
- .Lcompile_line_var;
- _compile_variable_section();
- goto .Lcompile_line_section;
+ j .compile_statement_semicolon
- .Lcompile_line_program;
- _compile_program();
- goto .Lcompile_line_section;
+.compile_statement_semicolon:
+ li a0, 2
+ _advance_token();
- .Lcompile_line_empty;
- _advance(1);
- goto .Lcompile_line_section;
+ li a0, '\n'
+ _write_c();
- .Lcompile_line_unchanged;
- _compile_assembly(loca84);
- goto .Lcompile_line_section;
+.compile_statement_end:
+end;
- .Lcompile_line_section;
- loca8 := 0;
+proc _compile_procedure_body();
+begin
+.compile_procedure_body_loop:
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_end
+ li a2, KEYWORD_END_SIZE
+ _memcmp();
+
+ beqz a0, .compile_procedure_body_epilogue
- .Lcompile_line_end;
- _skip_spaces();
+ _compile_statement();
+ j .compile_procedure_body_loop
- return loca8
-end
+.compile_procedure_body_epilogue:
+end;
-proc _compile_text_section()
-var loca0: Word
+proc _compile_procedure();
begin
- loca0 := 0x6365732e;
- _write_s(4, @loca0);
- loca0 := 0x6e6f6974;
- _write_s(4, @loca0);
- loca0 := 0x65742e20;
- _write_s(4, @loca0);
- loca0 := 0x0a7478;
- _write_s(3, @loca0)
-end
-
-proc _compile_entry_point()
-var loca0: Word
+ # Skip "proc ".
+ li a0, KEYWORD_PROC_SIZE
+ _advance_token();
+
+ _read_token();
+ sw a0, 20(sp) # Save the procedure name length.
+
+ # Write .type _procedure_name, @function.
+ la a0, asm_type_directive
+ _write_z();
+
+ lw a0, 20(sp)
+ _write_token();
+
+ la a0, asm_type_function
+ _write_z();
+
+ # Write procedure label, _procedure_name:
+ lw a0, 20(sp)
+ _write_token();
+
+ la a0, asm_colon
+ _write_z();
+
+ # Skip the function name and trailing parens, semicolon, "begin" and newline.
+ lw a0, 20(sp)
+ addi a0, a0, KEYWORD_BEGIN_SIZE + 1 + 4
+ _advance_token();
+
+ la a0, asm_prologue
+ _write_z();
+
+ _compile_procedure_body();
+
+ # Write the epilogue.
+ la a0, asm_epilogue
+ _write_z();
+
+ li a0, KEYWORD_END_SIZE + 2
+ _advance_token();
+end;
+
+proc _compile_type();
begin
- loca0 := 0x7079742e;
- _write_s(4, @loca0);
- loca0 := 0x735f2065;
- _write_s(4, @loca0);
- loca0 := 0x74726174;
- _write_s(4, @loca0);
- loca0 := 0x6640202c;
- _write_s(4, @loca0);
- loca0 := 0x74636e75;
- _write_s(4, @loca0);
- loca0 := 0x0a6e6f69;
- _write_s(4, @loca0);
- loca0 := 0x6174735f;
- _write_s(4, @loca0);
- loca0 := 0x0a3a7472;
- _write_s(4, @loca0);
-
- _advance(6)
-end
-
-proc _compile_exit()
-var loca0: Word
+ # Print and skip the .type directive and a space after it.
+ li a0, KEYWORD_TYPE_SIZE + 1
+ _write_token();
+ _advance_token();
+
+ # Read and print the symbol name.
+ _read_token();
+ sw a0, 20(sp)
+
+ # Print and skip the symbol name, comma, space and @.
+ lw a0, 20(sp)
+ addi a0, a0, 3
+ _write_token();
+ _advance_token();
+
+ # Read the symbol type.
+ _read_token();
+ sw a0, 16(sp)
+ la t0, source_code_position
+ lw t0, (t0)
+ sw t0, 12(sp)
+
+ # Print the symbol type and newline.
+ lw a0, 16(sp)
+ addi a0, a0, 1
+ _write_token();
+ _advance_token();
+
+ # Write the object definition itself.
+ _compile_line();
+
+.compile_type_end:
+end;
+
+proc _compile_equ();
begin
- loca0 := 0x6120696c;
- _write_s(4, @loca0);
- loca0 := 0x30202c30;
- _write_s(4, @loca0);
- loca0 := 0x20696c0a;
- _write_s(4, @loca0);
- loca0 := 0x202c3761;
- _write_s(4, @loca0);
- loca0 := 0x650a3339;
- _write_s(4, @loca0);
- loca0 := 0x6c6c6163;
- _write_s(4, @loca0);
- loca0 := 0x0a;
- _write_s(1, @loca0);
-
- _advance(4);
- _skip_spaces()
-end
-
-proc _read_line()
-var
- loca0: ^Byte
- loca4: Byte
+ # Print and skip the .equ directive and a space after it.
+ li a0, KEYWORD_EQU_SIZE + 1
+ _write_token();
+ _advance_token();
+
+ # Read and print the constant name.
+ _read_token();
+ sw a0, 20(sp)
+
+ # Print and skip the constant name, comma and space.
+ lw a0, 20(sp)
+ addi a0, a0, 2
+ _write_token();
+ _advance_token();
+
+ # Read the constant value.
+ _read_token();
+ sw a0, 16(sp)
+
+ # Print and skip the constant value and newline.
+ lw a0, 16(sp)
+ addi a0, a0, 1
+ _write_token();
+ _advance_token();
+end;
+
+proc _skip_newlines();
begin
- loca0 := _current();
-
- .Lread_line_do;
- loca4 := _front(loca0);
- if loca4 = 0 then
- goto .Lread_line_end
- end;
- if loca4 = 0x0a then
- goto .Lread_line_end
- end;
- loca0 := loca0 + 1;
- goto .Lread_line_do;
-
- .Lread_line_end;
- loca4 := _current();
- return loca0 - loca4
-end
-
-proc _compile()
-var
- loca0: Word
- loca4: Word
- loca8: Bool
- loca12: Char
- loca16: ^Byte
+ # Skip newlines.
+ la t0, source_code_position
+ lw t1, (t0)
+
+.skip_newlines_loop:
+ lb t2, (t1)
+ li t3, '\n'
+ bne t2, t3, .skip_newlines_end
+ beqz t2, .skip_newlines_end
+
+ addi t1, t1, 1
+ sw t1, (t0)
+
+ j .skip_newlines_loop
+
+.skip_newlines_end:
+end;
+
+# Process the source code and print the generated code.
+proc _compile();
begin
- loca4 := 0;
+.compile_loop:
+ _skip_newlines();
- .Lcompile_do;
- loca16 := _current();
- loca12 := _front(loca16);
+ la t0, source_code_position
+ lw t0, (t0)
+ lb t0, (t0)
+ beqz t0, .compile_end
+ li t1, '#'
+ beq t0, t1, .compile_comment
- if loca12 = 0 then
- goto .Lcompile_end
- end;
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_equ
+ li a2, KEYWORD_EQU_SIZE
+ _memcmp();
- _skip_spaces();
- loca0 := _read_line();
- loca8 := _compile_line(loca0, loca4);
+ beqz a0, .compile_equ
- if loca8 = 0 then
- goto .Lcompile_do
- end;
- loca4 := loca4 or loca8;
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_section
+ li a2, KEYWORD_SECTION_SIZE
+ _memcmp();
- goto .Lcompile_do;
- .Lcompile_end
-end
+ beqz a0, .compile_section
-proc _front(loca84: ^Word)
-begin
- return _get(loca84) & 0xff
-end
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_type
+ li a2, KEYWORD_TYPE_SIZE
+ _memcmp();
-proc _main()
-begin
- _read_file(source_code, 81920);
+ beqz a0, .compile_type
+
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_proc
+ li a2, KEYWORD_PROC_SIZE
+ _memcmp();
+
+ beqz a0, .compile_procedure
+
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_global
+ li a2, KEYWORD_GLOBAL_SIZE
+ _memcmp();
+
+ beqz a0, .compile_global
+ # Not a known token, exit.
+ j .compile_end
+
+.compile_equ:
+ _compile_equ();
+
+ j .compile_loop
+
+.compile_section:
+ _compile_section();
+
+ j .compile_loop
+
+.compile_type:
+ _compile_type();
+
+ j .compile_loop
+
+.compile_global:
+ _compile_line();
+
+ j .compile_loop
+
+.compile_comment:
+ _skip_comment();
+
+ j .compile_loop
+
+.compile_procedure:
+ _compile_procedure();
+
+ j .compile_loop
- _label_counter(0)
-end
+.compile_end:
+end;
+# Entry point.
+.globl _start
+proc _start();
begin
- _main();
- _compile()
-end.
+ # Read the source from the standard input.
+ la a0, source_code
+ li a1, SOURCE_BUFFER_SIZE # Buffer size.
+ _read_file();
+ _compile();
+
+ # Call exit.
+ li a0, 0 # Use 0 return code.
+ li a7, 93 # SYS_EXIT.
+ ecall
+end;
diff --git a/boot/stage3.elna b/boot/stage3.elna
new file mode 100644
index 0000000..2b31775
--- /dev/null
+++ b/boot/stage3.elna
@@ -0,0 +1,975 @@
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
+
+# Stage3 compiler.
+#
+# - Procedures without none or one argument.
+# - Goto statements.
+# - Character and integer literals.
+# - Passing local variables to procedures.
+# - Local variables should have the format: v00,
+# where 00 is its offset from the sp register.
+
+.section .rodata
+
+.type keyword_section, @object
+keyword_section: .ascii ".section"
+
+.type keyword_type, @object
+keyword_type: .ascii ".type"
+
+.type keyword_ret, @object
+keyword_ret: .ascii "ret"
+
+.type keyword_global, @object
+keyword_global: .ascii ".globl"
+
+.type keyword_proc, @object
+keyword_proc: .ascii "proc "
+
+.type keyword_end, @object
+keyword_end: .ascii "end"
+
+.type keyword_begin, @object
+keyword_begin: .ascii "begin"
+
+.type keyword_var, @object
+keyword_var: .ascii "var"
+
+.type asm_prologue, @object
+asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n"
+
+.type asm_epilogue, @object
+asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n"
+
+.type asm_type_directive, @object
+asm_type_directive: .string ".type "
+
+.type asm_type_function, @object
+asm_type_function: .string ", @function\n"
+
+.type asm_colon, @object
+asm_colon: .string ":\n"
+
+.type asm_call, @object
+asm_call: .string "\tcall "
+
+.type asm_j, @object
+asm_j: .string "\tj "
+
+.type asm_li, @object
+asm_li: .string "\tli "
+
+.type asm_lw, @object
+asm_lw: .string "\tlw "
+
+.type asm_sw, @object
+asm_sw: .string "\tsw "
+
+.type asm_mv, @object
+asm_mv: .string "mv "
+
+.type asm_t0, @object
+asm_t0: .string "t0"
+
+.type asm_a0, @object
+asm_a0: .string "a0"
+
+.type asm_comma, @object
+asm_comma: .string ", "
+
+.type asm_sp, @object
+asm_sp: .string "(sp)"
+
+.section .bss
+
+# When modifiying also change the read size in the entry point procedure.
+.type source_code, @object
+source_code: .zero 81920
+
+.section .data
+
+.type source_code_position, @object
+source_code_position: .word source_code
+
+.section .text
+
+# Reads standard input into a buffer.
+# a0 - Buffer pointer.
+# a1 - Buffer size.
+#
+# Returns the amount of bytes written in a0.
+proc _read_file();
+begin
+ mv a2, a1
+ mv a1, a0
+ # STDIN.
+ li a0, 0
+ li a7, 63 # SYS_READ.
+ ecall
+end;
+
+# Writes to the standard output.
+#
+# Parameters:
+# a0 - Buffer.
+# a1 - Buffer length.
+proc _write_s();
+begin
+ mv a2, a1
+ mv a1, a0
+ # STDOUT.
+ li a0, 1
+ li a7, 64 # SYS_WRITE.
+ ecall
+end;
+
+# Writes a number to a string buffer.
+#
+# t0 - Local buffer.
+# t1 - Constant 10.
+# t2 - Current character.
+# t3 - Whether the number is negative.
+#
+# Parameters:
+# a0 - Whole number.
+# a1 - Buffer pointer.
+#
+# Sets a0 to the length of the written number.
+proc _print_i();
+begin
+ li t1, 10
+ addi t0, s0, -9
+
+ li t3, 0
+ bgez a0, .print_i_digit10
+ li t3, 1
+ neg a0, a0
+
+.print_i_digit10:
+ rem t2, a0, t1
+ addi t2, t2, '0'
+ sb t2, 0(t0)
+ div a0, a0, t1
+ addi t0, t0, -1
+ bne zero, a0, .print_i_digit10
+
+ beq zero, t3, .print_i_write_call
+ addi t2, zero, '-'
+ sb t2, 0(t0)
+ addi t0, t0, -1
+
+.print_i_write_call:
+ mv a0, a1
+ addi a1, t0, 1
+ sub a2, s0, t0
+ addi a2, a2, -9
+ sw a2, 0(sp)
+
+ _memcpy();
+
+ lw a0, 0(sp)
+end;
+
+# Writes a number to the standard output.
+#
+# Parameters:
+# a0 - Whole number.
+proc _write_i();
+begin
+ addi a1, sp, 0
+ _print_i();
+
+ mv a1, a0
+ addi a0, sp, 0
+ _write_s();
+
+end;
+
+# Writes a character from a0 into the standard output.
+proc _write_c();
+begin
+ sb a0, 0(sp)
+ addi a0, sp, 0
+ li a1, 1
+ _write_s();
+end;
+
+# Write null terminated string.
+#
+# Parameters:
+# a0 - String.
+proc _write_z();
+begin
+ sw a0, 0(sp)
+
+.write_z_loop:
+ # Check for 0 character.
+ lb a0, (a0)
+ beqz a0, .write_z_end
+
+ # Print a character.
+ lw a0, 0(sp)
+ lb a0, (a0)
+ _write_c();
+
+ # Advance the input string by one byte.
+ lw a0, 0(sp)
+ addi a0, a0, 1
+ sw a0, 0(sp)
+
+ goto .write_z_loop;
+
+.write_z_end:
+end;
+
+# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
+proc _is_upper();
+begin
+ li t0, 'A' - 1
+ sltu t1, t0, a0 # t1 = a0 >= 'A'
+
+ sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
+ and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
+end;
+
+# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
+proc _is_lower();
+begin
+ li t0, 'a' - 1
+ sltu t2, t0, a0 # t2 = a0 >= 'a'
+
+ sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
+ and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
+end;
+
+# Detects if the passed character is a 7-bit alpha character or an underscore.
+#
+# Paramters:
+# a0 - Tested character.
+#
+# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
+proc _is_alpha();
+begin
+ sw a0, 0(sp)
+
+ _is_upper();
+ sw a0, 4(sp)
+
+ _is_lower(v00);
+
+ lw t0, 0(sp)
+ xori t1, t0, '_'
+ seqz t1, t1
+
+ lw t0, 4(sp)
+ or a0, a0, t0
+ or a0, a0, t1
+end;
+
+# Detects whether the passed character is a digit
+# (a value between 0 and 9).
+#
+# Parameters:
+# a0 - Exemined value.
+#
+# Sets a0 to 1 if it is a digit, to 0 otherwise.
+proc _is_digit();
+begin
+ li t0, '0' - 1
+ sltu t1, t0, a0 # t1 = a0 >= '0'
+
+ sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
+
+ and a0, t1, t2
+end;
+
+proc _is_alnum();
+begin
+ sw a0, 4(sp)
+
+ _is_alpha();
+ sw a0, 0(sp)
+
+ _is_digit(v04);
+
+ lw a1, 0(sp)
+ or a0, a0, a1
+end;
+
+# Reads the next token.
+#
+# Returns token length in a0.
+proc _read_token();
+begin
+ la t0, source_code_position # Token pointer.
+ lw t0, (t0)
+ sw t0, 0(sp) # Current token position.
+ sw zero, 4(sp) # Token length.
+
+.read_token_loop:
+ lb t0, (t0) # Current character.
+
+ # First we try to read a derictive.
+ # A derictive can contain a dot and characters.
+ li t1, '.'
+ beq t0, t1, .read_token_next
+
+ lw a0, 0(sp)
+ lb a0, (a0)
+ _is_alnum();
+ bnez a0, .read_token_next
+
+ goto .read_token_end;
+
+.read_token_next:
+ # Advance the source code position and token length.
+ lw t0, 4(sp)
+ addi t0, t0, 1
+ sw t0, 4(sp)
+
+ lw t0, 0(sp)
+ addi t0, t0, 1
+ sw t0, 0(sp)
+
+ goto .read_token_loop;
+
+.read_token_end:
+ lw a0, 4(sp)
+end;
+
+# a0 - First pointer.
+# a1 - Second pointer.
+# a2 - The length to compare.
+#
+# Returns 0 in a0 if memory regions are equal.
+proc _memcmp();
+begin
+ mv t0, a0
+ li a0, 0
+
+.memcmp_loop:
+ beqz a2, .memcmp_end
+
+ lbu t1, (t0)
+ lbu t2, (a1)
+ sub a0, t1, t2
+
+ bnez a0, .memcmp_end
+
+ addi t0, t0, 1
+ addi a1, a1, 1
+ addi a2, a2, -1
+
+ goto .memcmp_loop;
+
+.memcmp_end:
+end;
+
+# Copies memory.
+#
+# Parameters:
+# a0 - Destination.
+# a1 - Source.
+# a2 - Size.
+#
+# Preserves a0.
+proc _memcpy();
+begin
+ mv t0, a0
+
+.memcpy_loop:
+ beqz a2, .memcpy_end
+
+ lbu t1, (a1)
+ sb t1, (a0)
+
+ addi a0, a0, 1
+ addi a1, a1, 1
+ addi a2, a2, -1
+
+ goto .memcpy_loop
+
+.memcpy_end:
+ mv a0, t0
+end;
+
+# Advances the token stream by a0 bytes.
+proc _advance_token();
+begin
+ la t0, source_code_position
+ lw t1, (t0)
+ add t1, t1, a0
+ sw t1, (t0)
+end;
+
+# Prints the current token.
+#
+# Parameters:
+# a0 - Token length.
+#
+# Returns a0 unchanged.
+proc _write_token();
+begin
+ sw a0, 0(sp)
+
+ la a0, source_code_position
+ lw a0, (a0)
+ lw a1, 0(sp)
+ _write_s();
+
+ lw a0, 0(sp)
+end;
+
+proc _compile_section();
+begin
+ # Print and skip the ".section" (8 characters) directive and a space after it.
+ _write_token(9);
+ _advance_token();
+
+ # Read the section name.
+ _read_token();
+ addi a0, a0, 1
+
+ _write_token();
+ _advance_token();
+end;
+
+# Prints and skips a line.
+proc _skip_comment();
+begin
+ la t0, source_code_position
+ lw t1, (t0)
+
+.skip_comment_loop:
+ # Check for newline character.
+ lb t2, (t1)
+ li t3, '\n'
+ beq t2, t3, .skip_comment_end
+
+ # Advance the input string by one byte.
+ addi t1, t1, 1
+ sw t1, (t0)
+
+ goto .skip_comment_loop;
+
+.skip_comment_end:
+ # Skip the newline.
+ addi t1, t1, 1
+ sw t1, (t0)
+end;
+
+# Prints and skips a line.
+proc _compile_line();
+begin
+.compile_line_loop:
+ la a0, source_code_position
+ lw a1, (a0)
+
+ lb t0, (a1)
+ li t1, '\n'
+ beq t0, t1, .compile_line_end
+
+ # Print a character.
+ lw a0, (a1)
+ _write_c();
+
+ # Advance the input string by one byte.
+ _advance_token(1);
+
+ goto .compile_line_loop;
+
+.compile_line_end:
+ _write_c('\n');
+
+ _advance_token(1);
+end;
+
+proc _compile_integer_literal();
+begin
+ la a0, asm_li
+ _write_z();
+
+ la a0, asm_a0
+ _write_z();
+
+ la a0, asm_comma
+ _write_z();
+
+ _read_token();
+ _write_token();
+ _advance_token();
+
+ _write_c('\n');
+end;
+
+proc _compile_character_literal();
+begin
+ la a0, asm_li
+ _write_z();
+
+ la a0, asm_a0
+ _write_z();
+
+ la a0, asm_comma
+ _write_z();
+
+ li a0, '\''
+ _write_c();
+ _advance_token(1);
+
+ la t0, source_code_position
+ lw t0, (t0)
+ lb a0, (t0)
+ li t1, '\\'
+ bne a0, t1, .compile_character_literal_end
+
+ li a0, '\\'
+ _write_c();
+ _advance_token(1);
+
+.compile_character_literal_end:
+ la t0, source_code_position
+ lw t0, (t0)
+ lb a0, (t0)
+ _write_c();
+
+ li a0, '\''
+ _write_c();
+
+ _write_c('\n');
+
+ _advance_token(2);
+
+end;
+
+proc _compile_variable_expression();
+begin
+ la a0, asm_lw
+ _write_z();
+
+ la a0, asm_a0
+ _write_z();
+
+ la a0, asm_comma
+ _write_z();
+
+ _advance_token(1);
+ _read_token();
+ _write_token();
+ _advance_token();
+
+ la a0, asm_sp
+ _write_z();
+
+ _write_c('\n');
+
+end;
+
+proc _compile_expression();
+begin
+ la t0, source_code_position
+ lw t0, (t0)
+ lb a0, (t0)
+
+ li t1, '\''
+ beq a0, t1, .compile_expression_character_literal
+
+ li t1, 'v'
+ beq a0, t1, .compile_expression_variable
+
+ _is_digit();
+ bnez a0, .compile_expression_integer_literal
+
+ goto .compile_expression_end;
+
+.compile_expression_character_literal:
+ _compile_character_literal();
+ goto .compile_expression_end;
+
+.compile_expression_integer_literal:
+ _compile_integer_literal();
+ goto .compile_expression_end;
+
+.compile_expression_variable:
+ _compile_variable_expression();
+ goto .compile_expression_end;;
+
+.compile_expression_end:
+end;
+
+proc _compile_call();
+begin
+ # Stack variables:
+ # v0 - Procedure name length.
+ # v4 - Procedure name pointer.
+ # v8 - Argument count.
+
+ _read_token();
+ sw a0, 0(sp)
+ la t0, source_code_position
+ lw t0, (t0)
+ sw t0, 4(sp)
+
+ sw zero, 8(sp)
+
+ # Skip the identifier and left paren.
+ addi a0, a0, 1
+ _advance_token();
+
+ la t0, source_code_position
+ lw t0, (t0)
+ lb t0, (t0)
+
+ li t1, ')'
+ beq t0, t1, .compile_call_finalize
+
+.compile_call_loop:
+ _compile_expression();
+
+ # Save the argument on the stack.
+ la a0, asm_sw
+ _write_z();
+
+ la a0, asm_a0
+ _write_z();
+
+ la a0, asm_comma
+ _write_z();
+
+ # Calculate the stack offset: 20 - (4 * argument_counter)
+ lw t0, 8(sp)
+ li t1, 4
+ mul t0, t0, t1
+ li t1, 20
+ sub a0, t1, t0
+ _write_i();
+
+ la a0, asm_sp
+ _write_z();
+
+ _write_c('\n');
+
+ # Add one to the argument counter.
+ lw t0, 8(sp)
+ addi t0, t0, 1
+ sw t0, 8(sp)
+
+ la t0, source_code_position
+ lw t0, (t0)
+ lb t0, (t0)
+
+ li t1, ','
+ bne t0, t1, .compile_call_finalize
+
+ _advance_token(2);
+ goto .compile_call_loop;
+
+.compile_call_finalize:
+ # Load the argument from the stack.
+
+ lw t0, 8(sp)
+ beqz t0, .compile_call_end
+
+ # Decrement the argument counter.
+ lw t0, 8(sp)
+ addi t0, t0, -1
+ sw t0, 8(sp)
+
+ la a0, asm_lw
+ _write_z();
+
+ _write_c('a');
+ lw a0, 8(sp)
+ _write_i();
+
+ la a0, asm_comma
+ _write_z();
+
+ # Calculate the stack offset: 20 - (4 * argument_counter)
+ lw t0, 8(sp)
+ li t1, 4
+ mul t0, t0, t1
+ li t1, 20
+ sub a0, t1, t0
+ _write_i();
+
+ la a0, asm_sp
+ _write_z();
+
+ _write_c('\n');
+
+ goto .compile_call_finalize;
+
+.compile_call_end:
+ la a0, asm_call
+ _write_z();
+
+ lw a0, 4(sp)
+ lw a1, 0(sp)
+ _write_s();
+
+ # Skip the right paren.
+ _advance_token(1);
+end;
+
+proc _compile_goto();
+begin
+ _advance_token(5);
+
+ _read_token();
+ sw a0, 0(sp)
+
+ la a0, asm_j
+ _write_z();
+
+ _write_token(v00);
+ _advance_token();
+end;
+
+proc _compile_statement();
+begin
+ # This is a call if the statement starts with an underscore.
+ la t0, source_code_position
+ lw t0, (t0)
+ # First character after alignment tab.
+ addi t0, t0, 1
+ lb t0, (t0)
+
+ li t1, '_'
+ beq t0, t1, .compile_statement_call
+
+ li t1, 'g'
+ beq t0, t1, .compile_statement_goto
+
+ _compile_line();
+ goto .compile_statement_end;
+
+.compile_statement_call:
+ _advance_token(1);
+ _compile_call();
+
+ goto .compile_statement_semicolon;
+
+.compile_statement_goto:
+ _advance_token(1);
+ _compile_goto();
+
+ goto .compile_statement_semicolon;
+
+.compile_statement_semicolon:
+ _advance_token(2);
+
+ _write_c('\n');
+
+.compile_statement_end:
+end;
+
+proc _compile_procedure_body();
+begin
+.compile_procedure_body_loop:
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_end
+ li a2, 3 # "end" length.
+ _memcmp();
+
+ beqz a0, .compile_procedure_body_epilogue
+
+ _compile_statement();
+ goto .compile_procedure_body_loop;
+
+.compile_procedure_body_epilogue:
+end;
+
+proc _compile_procedure();
+begin
+ # Skip "proc ".
+ _advance_token(5);
+
+ _read_token();
+ sw a0, 0(sp) # Save the procedure name length.
+
+ # Write .type _procedure_name, @function.
+ la a0, asm_type_directive
+ _write_z();
+
+ _write_token(v00);
+
+ la a0, asm_type_function
+ _write_z();
+
+ # Write procedure label, _procedure_name:
+ _write_token(v00);
+
+ la a0, asm_colon
+ _write_z();
+
+ # Skip the function name and trailing parens, semicolon, "begin" and newline.
+ lw a0, 0(sp)
+ addi a0, a0, 10
+ _advance_token();
+
+ la a0, asm_prologue
+ _write_z();
+
+ _compile_procedure_body();
+
+ # Write the epilogue.
+ la a0, asm_epilogue
+ _write_z();
+
+ # Skip the "end" keyword, semicolon and newline.
+ _advance_token(5);
+end;
+
+proc _compile_type();
+begin
+ # Print and skip the ".type" (5 characters) directive and a space after it.
+ _write_token(6);
+ _advance_token();
+
+ # Read and print the symbol name.
+ _read_token();
+
+ # Print and skip the symbol name, comma, space and @.
+ addi a0, a0, 3
+ _write_token();
+ _advance_token();
+
+ # Read the symbol type.
+ _read_token();
+ la t0, source_code_position
+ lw t0, (t0)
+ sw t0, 12(sp)
+
+ # Print the symbol type and newline.
+ addi a0, a0, 1
+ _write_token();
+ _advance_token();
+
+ # Write the object definition itself.
+ _compile_line();
+
+.compile_type_end:
+end;
+
+proc _skip_newlines();
+begin
+ # Skip newlines.
+ la t0, source_code_position
+ lw t1, (t0)
+
+.skip_newlines_loop:
+ lb t2, (t1)
+ li t3, '\n'
+ bne t2, t3, .skip_newlines_end
+ beqz t2, .skip_newlines_end
+
+ addi t1, t1, 1
+ sw t1, (t0)
+
+ goto .skip_newlines_loop;
+
+.skip_newlines_end:
+end;
+
+# Process the source code and print the generated code.
+proc _compile();
+begin
+.compile_loop:
+ _skip_newlines();
+
+ la t0, source_code_position
+ lw t0, (t0)
+ lb t0, (t0)
+ beqz t0, .compile_end
+ li t1, '#'
+ beq t0, t1, .compile_comment
+
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_section
+ li a2, 8 # ".section" length.
+ _memcmp();
+
+ beqz a0, .compile_section
+
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_type
+ li a2, 5 # ".type" length.
+ _memcmp();
+
+ beqz a0, .compile_type
+
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_proc
+ li a2, 5 # "proc " length. Space is needed to distinguish from "procedure".
+ _memcmp();
+
+ beqz a0, .compile_procedure
+
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_global
+ li a2, 6 # ".globl" length.
+ _memcmp();
+
+ beqz a0, .compile_global
+ # Not a known token, exit.
+ goto .compile_end;
+
+.compile_section:
+ _compile_section();
+
+ goto .compile_loop;
+
+.compile_type:
+ _compile_type();
+
+ goto .compile_loop;
+
+.compile_global:
+ _compile_line();
+
+ goto .compile_loop;
+
+.compile_comment:
+ _skip_comment();
+
+ goto .compile_loop;
+
+.compile_procedure:
+ _compile_procedure();
+
+ goto .compile_loop;
+
+.compile_end:
+end;
+
+# Terminates the program. a0 contains the return code.
+#
+# Parameters:
+# a0 - Status code.
+proc _exit();
+begin
+ li a7, 93 # SYS_EXIT
+ ecall
+end;
+
+# Entry point.
+.globl _start
+proc _start();
+begin
+ # Read the source from the standard input.
+ la a0, source_code
+ li a1, 81920 # Buffer size.
+ _read_file();
+ _compile();
+
+ _exit(0);
+
+end;
diff --git a/boot/stage4.elna b/boot/stage4.elna
new file mode 100644
index 0000000..d6bbb9d
--- /dev/null
+++ b/boot/stage4.elna
@@ -0,0 +1,969 @@
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
+
+# Stage3 compiler.
+#
+# - Procedures without none or one argument.
+# - Goto statements.
+# - Character and integer literals.
+# - Passing local variables to procedures.
+# - Local variables should have the format: v00,
+# where 00 is its offset from the sp register.
+
+.section .rodata
+
+.type keyword_section, @object
+keyword_section: .ascii ".section"
+
+.type keyword_type, @object
+keyword_type: .ascii ".type"
+
+.type keyword_ret, @object
+keyword_ret: .ascii "ret"
+
+.type keyword_global, @object
+keyword_global: .ascii ".globl"
+
+.type keyword_proc, @object
+keyword_proc: .ascii "proc "
+
+.type keyword_end, @object
+keyword_end: .ascii "end"
+
+.type keyword_begin, @object
+keyword_begin: .ascii "begin"
+
+.type keyword_var, @object
+keyword_var: .ascii "var"
+
+.type asm_prologue, @object
+asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n"
+
+.type asm_epilogue, @object
+asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n"
+
+.type asm_type_directive, @object
+asm_type_directive: .string ".type "
+
+.type asm_type_function, @object
+asm_type_function: .string ", @function\n"
+
+.type asm_colon, @object
+asm_colon: .string ":\n"
+
+.type asm_call, @object
+asm_call: .string "\tcall "
+
+.type asm_j, @object
+asm_j: .string "\tj "
+
+.type asm_li, @object
+asm_li: .string "\tli "
+
+.type asm_lw, @object
+asm_lw: .string "\tlw "
+
+.type asm_sw, @object
+asm_sw: .string "\tsw "
+
+.type asm_mv, @object
+asm_mv: .string "mv "
+
+.type asm_t0, @object
+asm_t0: .string "t0"
+
+.type asm_a0, @object
+asm_a0: .string "a0"
+
+.type asm_comma, @object
+asm_comma: .string ", "
+
+.type asm_sp, @object
+asm_sp: .string "(sp)"
+
+.section .bss
+
+# When modifiying also change the read size in the entry point procedure.
+.type source_code, @object
+source_code: .zero 81920
+
+.section .data
+
+.type source_code_position, @object
+source_code_position: .word source_code
+
+.section .text
+
+# Reads standard input into a buffer.
+# a0 - Buffer pointer.
+# a1 - Buffer size.
+#
+# Returns the amount of bytes written in a0.
+proc _read_file();
+begin
+ mv a2, a1
+ mv a1, a0
+ # STDIN.
+ li a0, 0
+ li a7, 63 # SYS_READ.
+ ecall
+end;
+
+# Writes to the standard output.
+#
+# Parameters:
+# a0 - Buffer.
+# a1 - Buffer length.
+proc _write_s();
+begin
+ mv a2, a1
+ mv a1, a0
+ # STDOUT.
+ li a0, 1
+ li a7, 64 # SYS_WRITE.
+ ecall
+end;
+
+# Writes a number to a string buffer.
+#
+# t0 - Local buffer.
+# t1 - Constant 10.
+# t2 - Current character.
+# t3 - Whether the number is negative.
+#
+# Parameters:
+# a0 - Whole number.
+# a1 - Buffer pointer.
+#
+# Sets a0 to the length of the written number.
+proc _print_i();
+begin
+ li t1, 10
+ addi t0, s0, -9
+
+ li t3, 0
+ bgez a0, .print_i_digit10
+ li t3, 1
+ neg a0, a0
+
+.print_i_digit10:
+ rem t2, a0, t1
+ addi t2, t2, '0'
+ sb t2, 0(t0)
+ div a0, a0, t1
+ addi t0, t0, -1
+ bne zero, a0, .print_i_digit10
+
+ beq zero, t3, .print_i_write_call
+ addi t2, zero, '-'
+ sb t2, 0(t0)
+ addi t0, t0, -1
+
+.print_i_write_call:
+ mv a0, a1
+ addi a1, t0, 1
+ sub a2, s0, t0
+ addi a2, a2, -9
+ sw a2, 0(sp)
+
+ _memcpy();
+
+ lw a0, 0(sp)
+end;
+
+# Writes a number to the standard output.
+#
+# Parameters:
+# a0 - Whole number.
+proc _write_i();
+begin
+ addi a1, sp, 0
+ _print_i();
+
+ mv a1, a0
+ addi a0, sp, 0
+ _write_s();
+
+end;
+
+# Writes a character from a0 into the standard output.
+proc _write_c();
+begin
+ sb a0, 0(sp)
+ addi a0, sp, 0
+ li a1, 1
+ _write_s();
+end;
+
+# Write null terminated string.
+#
+# Parameters:
+# a0 - String.
+proc _write_z();
+begin
+ sw a0, 0(sp)
+
+.write_z_loop:
+ # Check for 0 character.
+ lb a0, (a0)
+ beqz a0, .write_z_end
+
+ # Print a character.
+ lw a0, 0(sp)
+ lb a0, (a0)
+ _write_c();
+
+ # Advance the input string by one byte.
+ lw a0, 0(sp)
+ addi a0, a0, 1
+ sw a0, 0(sp)
+
+ goto .write_z_loop;
+
+.write_z_end:
+end;
+
+# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
+proc _is_upper();
+begin
+ li t0, 'A' - 1
+ sltu t1, t0, a0 # t1 = a0 >= 'A'
+
+ sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
+ and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
+end;
+
+# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
+proc _is_lower();
+begin
+ li t0, 'a' - 1
+ sltu t2, t0, a0 # t2 = a0 >= 'a'
+
+ sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
+ and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
+end;
+
+# Detects if the passed character is a 7-bit alpha character or an underscore.
+#
+# Paramters:
+# a0 - Tested character.
+#
+# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
+proc _is_alpha();
+begin
+ sw a0, 0(sp)
+
+ _is_upper();
+ sw a0, 4(sp)
+
+ _is_lower(v00);
+
+ lw t0, 0(sp)
+ xori t1, t0, '_'
+ seqz t1, t1
+
+ lw t0, 4(sp)
+ or a0, a0, t0
+ or a0, a0, t1
+end;
+
+# Detects whether the passed character is a digit
+# (a value between 0 and 9).
+#
+# Parameters:
+# a0 - Exemined value.
+#
+# Sets a0 to 1 if it is a digit, to 0 otherwise.
+proc _is_digit();
+begin
+ li t0, '0' - 1
+ sltu t1, t0, a0 # t1 = a0 >= '0'
+
+ sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
+
+ and a0, t1, t2
+end;
+
+proc _is_alnum();
+begin
+ sw a0, 4(sp)
+
+ _is_alpha();
+ sw a0, 0(sp)
+
+ _is_digit(v04);
+
+ lw a1, 0(sp)
+ or a0, a0, a1
+end;
+
+# Reads the next token.
+#
+# Returns token length in a0.
+proc _read_token();
+begin
+ la t0, source_code_position # Token pointer.
+ lw t0, (t0)
+ sw t0, 0(sp) # Current token position.
+ sw zero, 4(sp) # Token length.
+
+.read_token_loop:
+ lb t0, (t0) # Current character.
+
+ # First we try to read a derictive.
+ # A derictive can contain a dot and characters.
+ li t1, '.'
+ beq t0, t1, .read_token_next
+
+ lw a0, 0(sp)
+ lb a0, (a0)
+ _is_alnum();
+ bnez a0, .read_token_next
+
+ goto .read_token_end;
+
+.read_token_next:
+ # Advance the source code position and token length.
+ lw t0, 4(sp)
+ addi t0, t0, 1
+ sw t0, 4(sp)
+
+ lw t0, 0(sp)
+ addi t0, t0, 1
+ sw t0, 0(sp)
+
+ goto .read_token_loop;
+
+.read_token_end:
+ lw a0, 4(sp)
+end;
+
+# a0 - First pointer.
+# a1 - Second pointer.
+# a2 - The length to compare.
+#
+# Returns 0 in a0 if memory regions are equal.
+proc _memcmp();
+begin
+ mv t0, a0
+ li a0, 0
+
+.memcmp_loop:
+ beqz a2, .memcmp_end
+
+ lbu t1, (t0)
+ lbu t2, (a1)
+ sub a0, t1, t2
+
+ bnez a0, .memcmp_end
+
+ addi t0, t0, 1
+ addi a1, a1, 1
+ addi a2, a2, -1
+
+ goto .memcmp_loop;
+
+.memcmp_end:
+end;
+
+# Copies memory.
+#
+# Parameters:
+# a0 - Destination.
+# a1 - Source.
+# a2 - Size.
+#
+# Preserves a0.
+proc _memcpy();
+begin
+ mv t0, a0
+
+.memcpy_loop:
+ beqz a2, .memcpy_end
+
+ lbu t1, (a1)
+ sb t1, (a0)
+
+ addi a0, a0, 1
+ addi a1, a1, 1
+ addi a2, a2, -1
+
+ goto .memcpy_loop
+
+.memcpy_end:
+ mv a0, t0
+end;
+
+# Advances the token stream by a0 bytes.
+proc _advance_token();
+begin
+ la t0, source_code_position
+ lw t1, (t0)
+ add t1, t1, a0
+ sw t1, (t0)
+end;
+
+# Prints the current token.
+#
+# Parameters:
+# a0 - Token length.
+#
+# Returns a0 unchanged.
+proc _write_token();
+begin
+ sw a0, 0(sp)
+
+ la a0, source_code_position
+ lw a0, (a0)
+ lw a1, 0(sp)
+ _write_s();
+
+ lw a0, 0(sp)
+end;
+
+proc _compile_section();
+begin
+ # Print and skip the ".section" (8 characters) directive and a space after it.
+ _write_token(9);
+ _advance_token();
+
+ # Read the section name.
+ _read_token();
+ addi a0, a0, 1
+
+ _write_token();
+ _advance_token();
+end;
+
+# Prints and skips a line.
+proc _skip_comment();
+begin
+ la t0, source_code_position
+ lw t1, (t0)
+
+.skip_comment_loop:
+ # Check for newline character.
+ lb t2, (t1)
+ li t3, '\n'
+ beq t2, t3, .skip_comment_end
+
+ # Advance the input string by one byte.
+ addi t1, t1, 1
+ sw t1, (t0)
+
+ goto .skip_comment_loop;
+
+.skip_comment_end:
+ # Skip the newline.
+ addi t1, t1, 1
+ sw t1, (t0)
+end;
+
+# Prints and skips a line.
+proc _compile_line();
+begin
+.compile_line_loop:
+ la a0, source_code_position
+ lw a1, (a0)
+
+ lb t0, (a1)
+ li t1, '\n'
+ beq t0, t1, .compile_line_end
+
+ # Print a character.
+ lw a0, (a1)
+ _write_c();
+
+ # Advance the input string by one byte.
+ _advance_token(1);
+
+ goto .compile_line_loop;
+
+.compile_line_end:
+ _write_c('\n');
+
+ _advance_token(1);
+end;
+
+proc _compile_integer_literal();
+begin
+ la a0, asm_li
+ _write_z();
+
+ la a0, asm_a0
+ _write_z();
+
+ la a0, asm_comma
+ _write_z();
+
+ _read_token();
+ _write_token();
+ _advance_token();
+
+ _write_c('\n');
+end;
+
+proc _compile_character_literal();
+begin
+ la a0, asm_li
+ _write_z();
+
+ la a0, asm_a0
+ _write_z();
+
+ la a0, asm_comma
+ _write_z();
+
+ _write_c('\'');
+ _advance_token(1);
+
+ la t0, source_code_position
+ lw t0, (t0)
+ lb a0, (t0)
+ li t1, '\\'
+ bne a0, t1, .compile_character_literal_end
+
+ _write_c('\\');
+ _advance_token(1);
+
+.compile_character_literal_end:
+ la t0, source_code_position
+ lw t0, (t0)
+ lb a0, (t0)
+ _write_c();
+
+ _write_c('\'');
+ _write_c('\n');
+
+ _advance_token(2);
+
+end;
+
+proc _compile_variable_expression();
+begin
+ la a0, asm_lw
+ _write_z();
+
+ la a0, asm_a0
+ _write_z();
+
+ la a0, asm_comma
+ _write_z();
+
+ _advance_token(1);
+ _read_token();
+ _write_token();
+ _advance_token();
+
+ la a0, asm_sp
+ _write_z();
+
+ _write_c('\n');
+
+end;
+
+proc _compile_expression();
+begin
+ la t0, source_code_position
+ lw t0, (t0)
+ lb a0, (t0)
+
+ li t1, '\''
+ beq a0, t1, .compile_expression_character_literal
+
+ li t1, 'v'
+ beq a0, t1, .compile_expression_variable
+
+ _is_digit();
+ bnez a0, .compile_expression_integer_literal
+
+ goto .compile_expression_end;
+
+.compile_expression_character_literal:
+ _compile_character_literal();
+ goto .compile_expression_end;
+
+.compile_expression_integer_literal:
+ _compile_integer_literal();
+ goto .compile_expression_end;
+
+.compile_expression_variable:
+ _compile_variable_expression();
+ goto .compile_expression_end;;
+
+.compile_expression_end:
+end;
+
+proc _compile_call();
+begin
+ # Stack variables:
+ # v0 - Procedure name length.
+ # v4 - Procedure name pointer.
+ # v8 - Argument count.
+
+ _read_token();
+ sw a0, 0(sp)
+ la t0, source_code_position
+ lw t0, (t0)
+ sw t0, 4(sp)
+
+ sw zero, 8(sp)
+
+ # Skip the identifier and left paren.
+ addi a0, a0, 1
+ _advance_token();
+
+ la t0, source_code_position
+ lw t0, (t0)
+ lb t0, (t0)
+
+ li t1, ')'
+ beq t0, t1, .compile_call_finalize
+
+.compile_call_loop:
+ _compile_expression();
+
+ # Save the argument on the stack.
+ la a0, asm_sw
+ _write_z();
+
+ la a0, asm_a0
+ _write_z();
+
+ la a0, asm_comma
+ _write_z();
+
+ # Calculate the stack offset: 20 - (4 * argument_counter)
+ lw t0, 8(sp)
+ li t1, 4
+ mul t0, t0, t1
+ li t1, 20
+ sub a0, t1, t0
+ _write_i();
+
+ la a0, asm_sp
+ _write_z();
+
+ _write_c('\n');
+
+ # Add one to the argument counter.
+ lw t0, 8(sp)
+ addi t0, t0, 1
+ sw t0, 8(sp)
+
+ la t0, source_code_position
+ lw t0, (t0)
+ lb t0, (t0)
+
+ li t1, ','
+ bne t0, t1, .compile_call_finalize
+
+ _advance_token(2);
+ goto .compile_call_loop;
+
+.compile_call_finalize:
+ # Load the argument from the stack.
+
+ lw t0, 8(sp)
+ beqz t0, .compile_call_end
+
+ # Decrement the argument counter.
+ lw t0, 8(sp)
+ addi t0, t0, -1
+ sw t0, 8(sp)
+
+ la a0, asm_lw
+ _write_z();
+
+ _write_c('a');
+ lw a0, 8(sp)
+ _write_i();
+
+ la a0, asm_comma
+ _write_z();
+
+ # Calculate the stack offset: 20 - (4 * argument_counter)
+ lw t0, 8(sp)
+ li t1, 4
+ mul t0, t0, t1
+ li t1, 20
+ sub a0, t1, t0
+ _write_i();
+
+ la a0, asm_sp
+ _write_z();
+
+ _write_c('\n');
+
+ goto .compile_call_finalize;
+
+.compile_call_end:
+ la a0, asm_call
+ _write_z();
+
+ _write_s(v04, v00);
+
+ # Skip the right paren.
+ _advance_token(1);
+end;
+
+proc _compile_goto();
+begin
+ _advance_token(5);
+
+ _read_token();
+ sw a0, 0(sp)
+
+ la a0, asm_j
+ _write_z();
+
+ _write_token(v00);
+ _advance_token();
+end;
+
+proc _compile_statement();
+begin
+ # This is a call if the statement starts with an underscore.
+ la t0, source_code_position
+ lw t0, (t0)
+ # First character after alignment tab.
+ addi t0, t0, 1
+ lb t0, (t0)
+
+ li t1, '_'
+ beq t0, t1, .compile_statement_call
+
+ li t1, 'g'
+ beq t0, t1, .compile_statement_goto
+
+ _compile_line();
+ goto .compile_statement_end;
+
+.compile_statement_call:
+ _advance_token(1);
+ _compile_call();
+
+ goto .compile_statement_semicolon;
+
+.compile_statement_goto:
+ _advance_token(1);
+ _compile_goto();
+
+ goto .compile_statement_semicolon;
+
+.compile_statement_semicolon:
+ _advance_token(2);
+
+ _write_c('\n');
+
+.compile_statement_end:
+end;
+
+proc _compile_procedure_body();
+begin
+.compile_procedure_body_loop:
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_end
+ li a2, 3 # "end" length.
+ _memcmp();
+
+ beqz a0, .compile_procedure_body_epilogue
+
+ _compile_statement();
+ goto .compile_procedure_body_loop;
+
+.compile_procedure_body_epilogue:
+end;
+
+proc _compile_procedure();
+begin
+ # Skip "proc ".
+ _advance_token(5);
+
+ _read_token();
+ sw a0, 0(sp) # Save the procedure name length.
+
+ # Write .type _procedure_name, @function.
+ la a0, asm_type_directive
+ _write_z();
+
+ _write_token(v00);
+
+ la a0, asm_type_function
+ _write_z();
+
+ # Write procedure label, _procedure_name:
+ _write_token(v00);
+
+ la a0, asm_colon
+ _write_z();
+
+ # Skip the function name and trailing parens, semicolon, "begin" and newline.
+ lw a0, 0(sp)
+ addi a0, a0, 10
+ _advance_token();
+
+ la a0, asm_prologue
+ _write_z();
+
+ _compile_procedure_body();
+
+ # Write the epilogue.
+ la a0, asm_epilogue
+ _write_z();
+
+ # Skip the "end" keyword, semicolon and newline.
+ _advance_token(5);
+end;
+
+proc _compile_type();
+begin
+ # Print and skip the ".type" (5 characters) directive and a space after it.
+ _write_token(6);
+ _advance_token();
+
+ # Read and print the symbol name.
+ _read_token();
+
+ # Print and skip the symbol name, comma, space and @.
+ addi a0, a0, 3
+ _write_token();
+ _advance_token();
+
+ # Read the symbol type.
+ _read_token();
+ la t0, source_code_position
+ lw t0, (t0)
+ sw t0, 12(sp)
+
+ # Print the symbol type and newline.
+ addi a0, a0, 1
+ _write_token();
+ _advance_token();
+
+ # Write the object definition itself.
+ _compile_line();
+
+.compile_type_end:
+end;
+
+proc _skip_newlines();
+begin
+ # Skip newlines.
+ la t0, source_code_position
+ lw t1, (t0)
+
+.skip_newlines_loop:
+ lb t2, (t1)
+ li t3, '\n'
+ bne t2, t3, .skip_newlines_end
+ beqz t2, .skip_newlines_end
+
+ addi t1, t1, 1
+ sw t1, (t0)
+
+ goto .skip_newlines_loop;
+
+.skip_newlines_end:
+end;
+
+# Process the source code and print the generated code.
+proc _compile();
+begin
+.compile_loop:
+ _skip_newlines();
+
+ la t0, source_code_position
+ lw t0, (t0)
+ lb t0, (t0)
+ beqz t0, .compile_end
+ li t1, '#'
+ beq t0, t1, .compile_comment
+
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_section
+ li a2, 8 # ".section" length.
+ _memcmp();
+
+ beqz a0, .compile_section
+
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_type
+ li a2, 5 # ".type" length.
+ _memcmp();
+
+ beqz a0, .compile_type
+
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_proc
+ li a2, 5 # "proc " length. Space is needed to distinguish from "procedure".
+ _memcmp();
+
+ beqz a0, .compile_procedure
+
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_global
+ li a2, 6 # ".globl" length.
+ _memcmp();
+
+ beqz a0, .compile_global
+ # Not a known token, exit.
+ goto .compile_end;
+
+.compile_section:
+ _compile_section();
+
+ goto .compile_loop;
+
+.compile_type:
+ _compile_type();
+
+ goto .compile_loop;
+
+.compile_global:
+ _compile_line();
+
+ goto .compile_loop;
+
+.compile_comment:
+ _skip_comment();
+
+ goto .compile_loop;
+
+.compile_procedure:
+ _compile_procedure();
+
+ goto .compile_loop;
+
+.compile_end:
+end;
+
+# Terminates the program. a0 contains the return code.
+#
+# Parameters:
+# a0 - Status code.
+proc _exit();
+begin
+ li a7, 93 # SYS_EXIT
+ ecall
+end;
+
+# Entry point.
+.globl _start
+proc _start();
+begin
+ # Read the source from the standard input.
+ la a0, source_code
+ li a1, 81920 # Buffer size.
+ _read_file();
+ _compile();
+
+ _exit(0);
+
+end;
diff --git a/boot/stage5.elna b/boot/stage5.elna
new file mode 100644
index 0000000..d6bbb9d
--- /dev/null
+++ b/boot/stage5.elna
@@ -0,0 +1,969 @@
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
+
+# Stage3 compiler.
+#
+# - Procedures without none or one argument.
+# - Goto statements.
+# - Character and integer literals.
+# - Passing local variables to procedures.
+# - Local variables should have the format: v00,
+# where 00 is its offset from the sp register.
+
+.section .rodata
+
+.type keyword_section, @object
+keyword_section: .ascii ".section"
+
+.type keyword_type, @object
+keyword_type: .ascii ".type"
+
+.type keyword_ret, @object
+keyword_ret: .ascii "ret"
+
+.type keyword_global, @object
+keyword_global: .ascii ".globl"
+
+.type keyword_proc, @object
+keyword_proc: .ascii "proc "
+
+.type keyword_end, @object
+keyword_end: .ascii "end"
+
+.type keyword_begin, @object
+keyword_begin: .ascii "begin"
+
+.type keyword_var, @object
+keyword_var: .ascii "var"
+
+.type asm_prologue, @object
+asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n"
+
+.type asm_epilogue, @object
+asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n"
+
+.type asm_type_directive, @object
+asm_type_directive: .string ".type "
+
+.type asm_type_function, @object
+asm_type_function: .string ", @function\n"
+
+.type asm_colon, @object
+asm_colon: .string ":\n"
+
+.type asm_call, @object
+asm_call: .string "\tcall "
+
+.type asm_j, @object
+asm_j: .string "\tj "
+
+.type asm_li, @object
+asm_li: .string "\tli "
+
+.type asm_lw, @object
+asm_lw: .string "\tlw "
+
+.type asm_sw, @object
+asm_sw: .string "\tsw "
+
+.type asm_mv, @object
+asm_mv: .string "mv "
+
+.type asm_t0, @object
+asm_t0: .string "t0"
+
+.type asm_a0, @object
+asm_a0: .string "a0"
+
+.type asm_comma, @object
+asm_comma: .string ", "
+
+.type asm_sp, @object
+asm_sp: .string "(sp)"
+
+.section .bss
+
+# When modifiying also change the read size in the entry point procedure.
+.type source_code, @object
+source_code: .zero 81920
+
+.section .data
+
+.type source_code_position, @object
+source_code_position: .word source_code
+
+.section .text
+
+# Reads standard input into a buffer.
+# a0 - Buffer pointer.
+# a1 - Buffer size.
+#
+# Returns the amount of bytes written in a0.
+proc _read_file();
+begin
+ mv a2, a1
+ mv a1, a0
+ # STDIN.
+ li a0, 0
+ li a7, 63 # SYS_READ.
+ ecall
+end;
+
+# Writes to the standard output.
+#
+# Parameters:
+# a0 - Buffer.
+# a1 - Buffer length.
+proc _write_s();
+begin
+ mv a2, a1
+ mv a1, a0
+ # STDOUT.
+ li a0, 1
+ li a7, 64 # SYS_WRITE.
+ ecall
+end;
+
+# Writes a number to a string buffer.
+#
+# t0 - Local buffer.
+# t1 - Constant 10.
+# t2 - Current character.
+# t3 - Whether the number is negative.
+#
+# Parameters:
+# a0 - Whole number.
+# a1 - Buffer pointer.
+#
+# Sets a0 to the length of the written number.
+proc _print_i();
+begin
+ li t1, 10
+ addi t0, s0, -9
+
+ li t3, 0
+ bgez a0, .print_i_digit10
+ li t3, 1
+ neg a0, a0
+
+.print_i_digit10:
+ rem t2, a0, t1
+ addi t2, t2, '0'
+ sb t2, 0(t0)
+ div a0, a0, t1
+ addi t0, t0, -1
+ bne zero, a0, .print_i_digit10
+
+ beq zero, t3, .print_i_write_call
+ addi t2, zero, '-'
+ sb t2, 0(t0)
+ addi t0, t0, -1
+
+.print_i_write_call:
+ mv a0, a1
+ addi a1, t0, 1
+ sub a2, s0, t0
+ addi a2, a2, -9
+ sw a2, 0(sp)
+
+ _memcpy();
+
+ lw a0, 0(sp)
+end;
+
+# Writes a number to the standard output.
+#
+# Parameters:
+# a0 - Whole number.
+proc _write_i();
+begin
+ addi a1, sp, 0
+ _print_i();
+
+ mv a1, a0
+ addi a0, sp, 0
+ _write_s();
+
+end;
+
+# Writes a character from a0 into the standard output.
+proc _write_c();
+begin
+ sb a0, 0(sp)
+ addi a0, sp, 0
+ li a1, 1
+ _write_s();
+end;
+
+# Write null terminated string.
+#
+# Parameters:
+# a0 - String.
+proc _write_z();
+begin
+ sw a0, 0(sp)
+
+.write_z_loop:
+ # Check for 0 character.
+ lb a0, (a0)
+ beqz a0, .write_z_end
+
+ # Print a character.
+ lw a0, 0(sp)
+ lb a0, (a0)
+ _write_c();
+
+ # Advance the input string by one byte.
+ lw a0, 0(sp)
+ addi a0, a0, 1
+ sw a0, 0(sp)
+
+ goto .write_z_loop;
+
+.write_z_end:
+end;
+
+# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
+proc _is_upper();
+begin
+ li t0, 'A' - 1
+ sltu t1, t0, a0 # t1 = a0 >= 'A'
+
+ sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
+ and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
+end;
+
+# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
+proc _is_lower();
+begin
+ li t0, 'a' - 1
+ sltu t2, t0, a0 # t2 = a0 >= 'a'
+
+ sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
+ and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
+end;
+
+# Detects if the passed character is a 7-bit alpha character or an underscore.
+#
+# Paramters:
+# a0 - Tested character.
+#
+# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
+proc _is_alpha();
+begin
+ sw a0, 0(sp)
+
+ _is_upper();
+ sw a0, 4(sp)
+
+ _is_lower(v00);
+
+ lw t0, 0(sp)
+ xori t1, t0, '_'
+ seqz t1, t1
+
+ lw t0, 4(sp)
+ or a0, a0, t0
+ or a0, a0, t1
+end;
+
+# Detects whether the passed character is a digit
+# (a value between 0 and 9).
+#
+# Parameters:
+# a0 - Exemined value.
+#
+# Sets a0 to 1 if it is a digit, to 0 otherwise.
+proc _is_digit();
+begin
+ li t0, '0' - 1
+ sltu t1, t0, a0 # t1 = a0 >= '0'
+
+ sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
+
+ and a0, t1, t2
+end;
+
+proc _is_alnum();
+begin
+ sw a0, 4(sp)
+
+ _is_alpha();
+ sw a0, 0(sp)
+
+ _is_digit(v04);
+
+ lw a1, 0(sp)
+ or a0, a0, a1
+end;
+
+# Reads the next token.
+#
+# Returns token length in a0.
+proc _read_token();
+begin
+ la t0, source_code_position # Token pointer.
+ lw t0, (t0)
+ sw t0, 0(sp) # Current token position.
+ sw zero, 4(sp) # Token length.
+
+.read_token_loop:
+ lb t0, (t0) # Current character.
+
+ # First we try to read a derictive.
+ # A derictive can contain a dot and characters.
+ li t1, '.'
+ beq t0, t1, .read_token_next
+
+ lw a0, 0(sp)
+ lb a0, (a0)
+ _is_alnum();
+ bnez a0, .read_token_next
+
+ goto .read_token_end;
+
+.read_token_next:
+ # Advance the source code position and token length.
+ lw t0, 4(sp)
+ addi t0, t0, 1
+ sw t0, 4(sp)
+
+ lw t0, 0(sp)
+ addi t0, t0, 1
+ sw t0, 0(sp)
+
+ goto .read_token_loop;
+
+.read_token_end:
+ lw a0, 4(sp)
+end;
+
+# a0 - First pointer.
+# a1 - Second pointer.
+# a2 - The length to compare.
+#
+# Returns 0 in a0 if memory regions are equal.
+proc _memcmp();
+begin
+ mv t0, a0
+ li a0, 0
+
+.memcmp_loop:
+ beqz a2, .memcmp_end
+
+ lbu t1, (t0)
+ lbu t2, (a1)
+ sub a0, t1, t2
+
+ bnez a0, .memcmp_end
+
+ addi t0, t0, 1
+ addi a1, a1, 1
+ addi a2, a2, -1
+
+ goto .memcmp_loop;
+
+.memcmp_end:
+end;
+
+# Copies memory.
+#
+# Parameters:
+# a0 - Destination.
+# a1 - Source.
+# a2 - Size.
+#
+# Preserves a0.
+proc _memcpy();
+begin
+ mv t0, a0
+
+.memcpy_loop:
+ beqz a2, .memcpy_end
+
+ lbu t1, (a1)
+ sb t1, (a0)
+
+ addi a0, a0, 1
+ addi a1, a1, 1
+ addi a2, a2, -1
+
+ goto .memcpy_loop
+
+.memcpy_end:
+ mv a0, t0
+end;
+
+# Advances the token stream by a0 bytes.
+proc _advance_token();
+begin
+ la t0, source_code_position
+ lw t1, (t0)
+ add t1, t1, a0
+ sw t1, (t0)
+end;
+
+# Prints the current token.
+#
+# Parameters:
+# a0 - Token length.
+#
+# Returns a0 unchanged.
+proc _write_token();
+begin
+ sw a0, 0(sp)
+
+ la a0, source_code_position
+ lw a0, (a0)
+ lw a1, 0(sp)
+ _write_s();
+
+ lw a0, 0(sp)
+end;
+
+proc _compile_section();
+begin
+ # Print and skip the ".section" (8 characters) directive and a space after it.
+ _write_token(9);
+ _advance_token();
+
+ # Read the section name.
+ _read_token();
+ addi a0, a0, 1
+
+ _write_token();
+ _advance_token();
+end;
+
+# Prints and skips a line.
+proc _skip_comment();
+begin
+ la t0, source_code_position
+ lw t1, (t0)
+
+.skip_comment_loop:
+ # Check for newline character.
+ lb t2, (t1)
+ li t3, '\n'
+ beq t2, t3, .skip_comment_end
+
+ # Advance the input string by one byte.
+ addi t1, t1, 1
+ sw t1, (t0)
+
+ goto .skip_comment_loop;
+
+.skip_comment_end:
+ # Skip the newline.
+ addi t1, t1, 1
+ sw t1, (t0)
+end;
+
+# Prints and skips a line.
+proc _compile_line();
+begin
+.compile_line_loop:
+ la a0, source_code_position
+ lw a1, (a0)
+
+ lb t0, (a1)
+ li t1, '\n'
+ beq t0, t1, .compile_line_end
+
+ # Print a character.
+ lw a0, (a1)
+ _write_c();
+
+ # Advance the input string by one byte.
+ _advance_token(1);
+
+ goto .compile_line_loop;
+
+.compile_line_end:
+ _write_c('\n');
+
+ _advance_token(1);
+end;
+
+proc _compile_integer_literal();
+begin
+ la a0, asm_li
+ _write_z();
+
+ la a0, asm_a0
+ _write_z();
+
+ la a0, asm_comma
+ _write_z();
+
+ _read_token();
+ _write_token();
+ _advance_token();
+
+ _write_c('\n');
+end;
+
+proc _compile_character_literal();
+begin
+ la a0, asm_li
+ _write_z();
+
+ la a0, asm_a0
+ _write_z();
+
+ la a0, asm_comma
+ _write_z();
+
+ _write_c('\'');
+ _advance_token(1);
+
+ la t0, source_code_position
+ lw t0, (t0)
+ lb a0, (t0)
+ li t1, '\\'
+ bne a0, t1, .compile_character_literal_end
+
+ _write_c('\\');
+ _advance_token(1);
+
+.compile_character_literal_end:
+ la t0, source_code_position
+ lw t0, (t0)
+ lb a0, (t0)
+ _write_c();
+
+ _write_c('\'');
+ _write_c('\n');
+
+ _advance_token(2);
+
+end;
+
+proc _compile_variable_expression();
+begin
+ la a0, asm_lw
+ _write_z();
+
+ la a0, asm_a0
+ _write_z();
+
+ la a0, asm_comma
+ _write_z();
+
+ _advance_token(1);
+ _read_token();
+ _write_token();
+ _advance_token();
+
+ la a0, asm_sp
+ _write_z();
+
+ _write_c('\n');
+
+end;
+
+proc _compile_expression();
+begin
+ la t0, source_code_position
+ lw t0, (t0)
+ lb a0, (t0)
+
+ li t1, '\''
+ beq a0, t1, .compile_expression_character_literal
+
+ li t1, 'v'
+ beq a0, t1, .compile_expression_variable
+
+ _is_digit();
+ bnez a0, .compile_expression_integer_literal
+
+ goto .compile_expression_end;
+
+.compile_expression_character_literal:
+ _compile_character_literal();
+ goto .compile_expression_end;
+
+.compile_expression_integer_literal:
+ _compile_integer_literal();
+ goto .compile_expression_end;
+
+.compile_expression_variable:
+ _compile_variable_expression();
+ goto .compile_expression_end;;
+
+.compile_expression_end:
+end;
+
+proc _compile_call();
+begin
+ # Stack variables:
+ # v0 - Procedure name length.
+ # v4 - Procedure name pointer.
+ # v8 - Argument count.
+
+ _read_token();
+ sw a0, 0(sp)
+ la t0, source_code_position
+ lw t0, (t0)
+ sw t0, 4(sp)
+
+ sw zero, 8(sp)
+
+ # Skip the identifier and left paren.
+ addi a0, a0, 1
+ _advance_token();
+
+ la t0, source_code_position
+ lw t0, (t0)
+ lb t0, (t0)
+
+ li t1, ')'
+ beq t0, t1, .compile_call_finalize
+
+.compile_call_loop:
+ _compile_expression();
+
+ # Save the argument on the stack.
+ la a0, asm_sw
+ _write_z();
+
+ la a0, asm_a0
+ _write_z();
+
+ la a0, asm_comma
+ _write_z();
+
+ # Calculate the stack offset: 20 - (4 * argument_counter)
+ lw t0, 8(sp)
+ li t1, 4
+ mul t0, t0, t1
+ li t1, 20
+ sub a0, t1, t0
+ _write_i();
+
+ la a0, asm_sp
+ _write_z();
+
+ _write_c('\n');
+
+ # Add one to the argument counter.
+ lw t0, 8(sp)
+ addi t0, t0, 1
+ sw t0, 8(sp)
+
+ la t0, source_code_position
+ lw t0, (t0)
+ lb t0, (t0)
+
+ li t1, ','
+ bne t0, t1, .compile_call_finalize
+
+ _advance_token(2);
+ goto .compile_call_loop;
+
+.compile_call_finalize:
+ # Load the argument from the stack.
+
+ lw t0, 8(sp)
+ beqz t0, .compile_call_end
+
+ # Decrement the argument counter.
+ lw t0, 8(sp)
+ addi t0, t0, -1
+ sw t0, 8(sp)
+
+ la a0, asm_lw
+ _write_z();
+
+ _write_c('a');
+ lw a0, 8(sp)
+ _write_i();
+
+ la a0, asm_comma
+ _write_z();
+
+ # Calculate the stack offset: 20 - (4 * argument_counter)
+ lw t0, 8(sp)
+ li t1, 4
+ mul t0, t0, t1
+ li t1, 20
+ sub a0, t1, t0
+ _write_i();
+
+ la a0, asm_sp
+ _write_z();
+
+ _write_c('\n');
+
+ goto .compile_call_finalize;
+
+.compile_call_end:
+ la a0, asm_call
+ _write_z();
+
+ _write_s(v04, v00);
+
+ # Skip the right paren.
+ _advance_token(1);
+end;
+
+proc _compile_goto();
+begin
+ _advance_token(5);
+
+ _read_token();
+ sw a0, 0(sp)
+
+ la a0, asm_j
+ _write_z();
+
+ _write_token(v00);
+ _advance_token();
+end;
+
+proc _compile_statement();
+begin
+ # This is a call if the statement starts with an underscore.
+ la t0, source_code_position
+ lw t0, (t0)
+ # First character after alignment tab.
+ addi t0, t0, 1
+ lb t0, (t0)
+
+ li t1, '_'
+ beq t0, t1, .compile_statement_call
+
+ li t1, 'g'
+ beq t0, t1, .compile_statement_goto
+
+ _compile_line();
+ goto .compile_statement_end;
+
+.compile_statement_call:
+ _advance_token(1);
+ _compile_call();
+
+ goto .compile_statement_semicolon;
+
+.compile_statement_goto:
+ _advance_token(1);
+ _compile_goto();
+
+ goto .compile_statement_semicolon;
+
+.compile_statement_semicolon:
+ _advance_token(2);
+
+ _write_c('\n');
+
+.compile_statement_end:
+end;
+
+proc _compile_procedure_body();
+begin
+.compile_procedure_body_loop:
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_end
+ li a2, 3 # "end" length.
+ _memcmp();
+
+ beqz a0, .compile_procedure_body_epilogue
+
+ _compile_statement();
+ goto .compile_procedure_body_loop;
+
+.compile_procedure_body_epilogue:
+end;
+
+proc _compile_procedure();
+begin
+ # Skip "proc ".
+ _advance_token(5);
+
+ _read_token();
+ sw a0, 0(sp) # Save the procedure name length.
+
+ # Write .type _procedure_name, @function.
+ la a0, asm_type_directive
+ _write_z();
+
+ _write_token(v00);
+
+ la a0, asm_type_function
+ _write_z();
+
+ # Write procedure label, _procedure_name:
+ _write_token(v00);
+
+ la a0, asm_colon
+ _write_z();
+
+ # Skip the function name and trailing parens, semicolon, "begin" and newline.
+ lw a0, 0(sp)
+ addi a0, a0, 10
+ _advance_token();
+
+ la a0, asm_prologue
+ _write_z();
+
+ _compile_procedure_body();
+
+ # Write the epilogue.
+ la a0, asm_epilogue
+ _write_z();
+
+ # Skip the "end" keyword, semicolon and newline.
+ _advance_token(5);
+end;
+
+proc _compile_type();
+begin
+ # Print and skip the ".type" (5 characters) directive and a space after it.
+ _write_token(6);
+ _advance_token();
+
+ # Read and print the symbol name.
+ _read_token();
+
+ # Print and skip the symbol name, comma, space and @.
+ addi a0, a0, 3
+ _write_token();
+ _advance_token();
+
+ # Read the symbol type.
+ _read_token();
+ la t0, source_code_position
+ lw t0, (t0)
+ sw t0, 12(sp)
+
+ # Print the symbol type and newline.
+ addi a0, a0, 1
+ _write_token();
+ _advance_token();
+
+ # Write the object definition itself.
+ _compile_line();
+
+.compile_type_end:
+end;
+
+proc _skip_newlines();
+begin
+ # Skip newlines.
+ la t0, source_code_position
+ lw t1, (t0)
+
+.skip_newlines_loop:
+ lb t2, (t1)
+ li t3, '\n'
+ bne t2, t3, .skip_newlines_end
+ beqz t2, .skip_newlines_end
+
+ addi t1, t1, 1
+ sw t1, (t0)
+
+ goto .skip_newlines_loop;
+
+.skip_newlines_end:
+end;
+
+# Process the source code and print the generated code.
+proc _compile();
+begin
+.compile_loop:
+ _skip_newlines();
+
+ la t0, source_code_position
+ lw t0, (t0)
+ lb t0, (t0)
+ beqz t0, .compile_end
+ li t1, '#'
+ beq t0, t1, .compile_comment
+
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_section
+ li a2, 8 # ".section" length.
+ _memcmp();
+
+ beqz a0, .compile_section
+
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_type
+ li a2, 5 # ".type" length.
+ _memcmp();
+
+ beqz a0, .compile_type
+
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_proc
+ li a2, 5 # "proc " length. Space is needed to distinguish from "procedure".
+ _memcmp();
+
+ beqz a0, .compile_procedure
+
+ la a0, source_code_position
+ lw a0, (a0)
+ la a1, keyword_global
+ li a2, 6 # ".globl" length.
+ _memcmp();
+
+ beqz a0, .compile_global
+ # Not a known token, exit.
+ goto .compile_end;
+
+.compile_section:
+ _compile_section();
+
+ goto .compile_loop;
+
+.compile_type:
+ _compile_type();
+
+ goto .compile_loop;
+
+.compile_global:
+ _compile_line();
+
+ goto .compile_loop;
+
+.compile_comment:
+ _skip_comment();
+
+ goto .compile_loop;
+
+.compile_procedure:
+ _compile_procedure();
+
+ goto .compile_loop;
+
+.compile_end:
+end;
+
+# Terminates the program. a0 contains the return code.
+#
+# Parameters:
+# a0 - Status code.
+proc _exit();
+begin
+ li a7, 93 # SYS_EXIT
+ ecall
+end;
+
+# Entry point.
+.globl _start
+proc _start();
+begin
+ # Read the source from the standard input.
+ la a0, source_code
+ li a1, 81920 # Buffer size.
+ _read_file();
+ _compile();
+
+ _exit(0);
+
+end;
diff --git a/boot/test.elna b/boot/test.elna
deleted file mode 100644
index e56547d..0000000
--- a/boot/test.elna
+++ /dev/null
@@ -1,14 +0,0 @@
-program
-
-proc main(x: Word, y: Word)
-begin
- _write_s(4, @x);
- _write_s(4, @y);
-
- y := 0x0a2c3063;
- _write_s(4, @y)
-end
-
-begin
- main(0x0a2c3061, 0x0a2c3062)
-end.
diff --git a/boot/tokenizer.s b/boot/tokenizer.s
deleted file mode 100644
index 2c7f2a3..0000000
--- a/boot/tokenizer.s
+++ /dev/null
@@ -1,616 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public License,
-# v. 2.0. If a copy of the MPL was not distributed with this file, You can
-# obtain one at https://mozilla.org/MPL/2.0/.
-
-.global lex_next, classification, transitions, keywords, byte_keywords
-
-.include "boot/definitions.inc"
-
-.section .rodata
-
-#
-# Classification table assigns each possible character to a group (class). All
-# characters of the same group a handled equivalently.
-#
-# Classification:
-#
-.equ CLASS_INVALID, 0x00
-.equ CLASS_DIGIT, 0x01
-.equ CLASS_CHARACTER, 0x02
-.equ CLASS_SPACE, 0x03
-.equ CLASS_COLON, 0x04
-.equ CLASS_EQUALS, 0x05
-.equ CLASS_LEFT_PAREN, 0x06
-.equ CLASS_RIGHT_PAREN, 0x07
-.equ CLASS_ASTERISK, 0x08
-.equ CLASS_UNDERSCORE, 0x09
-.equ CLASS_SINGLE, 0x0a
-.equ CLASS_HEX, 0x0b
-.equ CLASS_ZERO, 0x0c
-.equ CLASS_X, 0x0d
-.equ CLASS_EOF, 0x0e
-.equ CLASS_DOT, 0x0f
-.equ CLASS_MINUS, 0x10
-.equ CLASS_QUOTE, 0x11
-.equ CLASS_GREATER, 0x12
-.equ CLASS_LESS, 0x13
-
-.equ CLASS_COUNT, 20
-
-.type classification, @object
-classification:
- .byte CLASS_EOF # 00 NUL
- .byte CLASS_INVALID # 01 SOH
- .byte CLASS_INVALID # 02 STX
- .byte CLASS_INVALID # 03 ETX
- .byte CLASS_INVALID # 04 EOT
- .byte CLASS_INVALID # 05 ENQ
- .byte CLASS_INVALID # 06 ACK
- .byte CLASS_INVALID # 07 BEL
- .byte CLASS_INVALID # 08 BS
- .byte CLASS_SPACE # 09 HT
- .byte CLASS_SPACE # 0A LF
- .byte CLASS_INVALID # 0B VT
- .byte CLASS_INVALID # 0C FF
- .byte CLASS_SPACE # 0D CR
- .byte CLASS_INVALID # 0E SO
- .byte CLASS_INVALID # 0F SI
- .byte CLASS_INVALID # 10 DLE
- .byte CLASS_INVALID # 11 DC1
- .byte CLASS_INVALID # 12 DC2
- .byte CLASS_INVALID # 13 DC3
- .byte CLASS_INVALID # 14 DC4
- .byte CLASS_INVALID # 15 NAK
- .byte CLASS_INVALID # 16 SYN
- .byte CLASS_INVALID # 17 ETB
- .byte CLASS_INVALID # 18 CAN
- .byte CLASS_INVALID # 19 EM
- .byte CLASS_INVALID # 1A SUB
- .byte CLASS_INVALID # 1B ESC
- .byte CLASS_INVALID # 1C FS
- .byte CLASS_INVALID # 1D GS
- .byte CLASS_INVALID # 1E RS
- .byte CLASS_INVALID # 1F US
- .byte CLASS_SPACE # 20 Space
- .byte CLASS_SINGLE # 21 !
- .byte CLASS_QUOTE # 22 "
- .byte 0x00 # 23 #
- .byte 0x00 # 24 $
- .byte CLASS_SINGLE # 25 %
- .byte CLASS_SINGLE # 26 &
- .byte CLASS_QUOTE # 27 '
- .byte CLASS_LEFT_PAREN # 28 (
- .byte CLASS_RIGHT_PAREN # 29 )
- .byte CLASS_ASTERISK # 2A *
- .byte CLASS_SINGLE # 2B +
- .byte CLASS_SINGLE # 2C ,
- .byte CLASS_MINUS # 2D -
- .byte CLASS_DOT # 2E .
- .byte CLASS_SINGLE # 2F /
- .byte CLASS_ZERO # 30 0
- .byte CLASS_DIGIT # 31 1
- .byte CLASS_DIGIT # 32 2
- .byte CLASS_DIGIT # 33 3
- .byte CLASS_DIGIT # 34 4
- .byte CLASS_DIGIT # 35 5
- .byte CLASS_DIGIT # 36 6
- .byte CLASS_DIGIT # 37 7
- .byte CLASS_DIGIT # 38 8
- .byte CLASS_DIGIT # 39 9
- .byte CLASS_COLON # 3A :
- .byte CLASS_SINGLE # 3B ;
- .byte CLASS_LESS # 3C <
- .byte CLASS_EQUALS # 3D =
- .byte CLASS_GREATER # 3E >
- .byte 0x00 # 3F ?
- .byte CLASS_SINGLE # 40 @
- .byte CLASS_CHARACTER # 41 A
- .byte CLASS_CHARACTER # 42 B
- .byte CLASS_CHARACTER # 43 C
- .byte CLASS_CHARACTER # 44 D
- .byte CLASS_CHARACTER # 45 E
- .byte CLASS_CHARACTER # 46 F
- .byte CLASS_CHARACTER # 47 G
- .byte CLASS_CHARACTER # 48 H
- .byte CLASS_CHARACTER # 49 I
- .byte CLASS_CHARACTER # 4A J
- .byte CLASS_CHARACTER # 4B K
- .byte CLASS_CHARACTER # 4C L
- .byte CLASS_CHARACTER # 4D M
- .byte CLASS_CHARACTER # 4E N
- .byte CLASS_CHARACTER # 4F O
- .byte CLASS_CHARACTER # 50 P
- .byte CLASS_CHARACTER # 51 Q
- .byte CLASS_CHARACTER # 52 R
- .byte CLASS_CHARACTER # 53 S
- .byte CLASS_CHARACTER # 54 T
- .byte CLASS_CHARACTER # 55 U
- .byte CLASS_CHARACTER # 56 V
- .byte CLASS_CHARACTER # 57 W
- .byte CLASS_CHARACTER # 58 X
- .byte CLASS_CHARACTER # 59 Y
- .byte CLASS_CHARACTER # 5A Z
- .byte CLASS_SINGLE # 5B [
- .byte 0x00 # 5C \
- .byte CLASS_SINGLE # 5D ]
- .byte CLASS_SINGLE # 5E ^
- .byte CLASS_UNDERSCORE # 5F _
- .byte 0x00 # 60 `
- .byte CLASS_HEX # 61 a
- .byte CLASS_HEX # 62 b
- .byte CLASS_HEX # 63 c
- .byte CLASS_HEX # 64 d
- .byte CLASS_HEX # 65 e
- .byte CLASS_HEX # 66 f
- .byte CLASS_CHARACTER # 67 g
- .byte CLASS_CHARACTER # 68 h
- .byte CLASS_CHARACTER # 69 i
- .byte CLASS_CHARACTER # 6A j
- .byte CLASS_CHARACTER # 6B k
- .byte CLASS_CHARACTER # 6C l
- .byte CLASS_CHARACTER # 6D m
- .byte CLASS_CHARACTER # 6E n
- .byte CLASS_CHARACTER # 6F o
- .byte CLASS_CHARACTER # 70 p
- .byte CLASS_CHARACTER # 71 q
- .byte CLASS_CHARACTER # 72 r
- .byte CLASS_CHARACTER # 73 s
- .byte CLASS_CHARACTER # 74 t
- .byte CLASS_CHARACTER # 75 u
- .byte CLASS_CHARACTER # 76 v
- .byte CLASS_CHARACTER # 77 w
- .byte CLASS_X # 78 x
- .byte CLASS_CHARACTER # 79 y
- .byte CLASS_CHARACTER # 7A z
- .byte 0x00 # 7B {
- .byte CLASS_SINGLE # 7C |
- .byte 0x00 # 7D }
- .byte CLASS_SINGLE # 7E ~
- .byte CLASS_INVALID # 7F DEL
-
-#
-# Textual keywords in the language.
-#
-.equ KEYWORDS_COUNT, TOKEN_IDENTIFIER - 1
-
-.type keywords, @object
-keywords:
- .word 7
- .ascii "program"
- .word 6
- .ascii "import"
- .word 5
- .ascii "const"
- .word 3
- .ascii "var"
- .word 2
- .ascii "if"
- .word 4
- .ascii "then"
- .word 5
- .ascii "elsif"
- .word 4
- .ascii "else"
- .word 5
- .ascii "while"
- .word 2
- .ascii "do"
- .word 4
- .ascii "proc"
- .word 5
- .ascii "begin"
- .word 3
- .ascii "end"
- .word 4
- .ascii "type"
- .word 6
- .ascii "record"
- .word 5
- .ascii "union"
- .word 4
- .ascii "true"
- .word 5
- .ascii "false"
- .word 3
- .ascii "nil"
- .word 3
- .ascii "xor"
- .word 2
- .ascii "or"
- .word 6
- .ascii "return"
- .word 4
- .ascii "cast"
- .word 4
- .ascii "goto"
- .word 4
- .ascii "case"
- .word 2
- .ascii "of"
-
-.type byte_keywords, @object
-byte_keywords: .ascii "&.,:;()[]^=+-*@"
-.equ BYTE_KEYWORDS_SIZE, . - byte_keywords
-
-.section .data
-
-# The transition table describes transitions from one state to another, given
-# a symbol (character class).
-#
-# The table has m rows and n columns, where m is the amount of states and n is
-# the amount of classes. So given the current state and a classified character
-# the table can be used to look up the next state.
-#
-# Each cell is a word long.
-# - The least significant byte of the word is a row number (beginning with 0).
-# It specifies the target state. "ff" means that this is an end state and no
-# transition is possible.
-# - The next byte is the action that should be performed when transitioning.
-# For the meaning of actions see labels in the lex_next function, which
-# handles each action.
-#
-.type transitions, @object
-transitions:
- # Invalid Digit Alpha Space : = ( )
- # * _ Single Hex 0 x NUL .
- # - " or ' > <
- .word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff
- .word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x06ff
- .word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start
-
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x01 Colon
-
- .word 0x05ff, 0x0102, 0x0102, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff
- .word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff
- .word 0x05ff, 0x05ff, 0x05ff, 0x05ff # 0x02 Identifier
-
- .word 0x08ff, 0x0103, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
- .word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x0103, 0x00ff, 0x08ff, 0x08ff
- .word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x03 Decimal
-
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x04ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff, 0x04ff, 0x02ff # 0x04 Greater
-
- .word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
- .word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
- .word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus
-
- .word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
- .word 0x0109, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
- .word 0x06ff, 0x06ff, 0x06ff, 0x06ff # 0x06 Left paren
-
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff, 0x02ff, 0x04ff # 0x07 Less
-
- .word 0x08ff, 0x0108, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
- .word 0x08ff, 0x00ff, 0x08ff, 0x0108, 0x0108, 0x00ff, 0x08ff, 0x08ff
- .word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x08 Hexadecimal after 0x.
-
- .word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
- .word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
- .word 0x0109, 0x0109, 0x0109, 0x0109 # 0x09 Comment
-
- .word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff
- .word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
- .word 0x0109, 0x0109, 0x0109, 0x0109 # 0x0a Closing comment
-
- .word 0x00ff, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
- .word 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
- .word 0x010b, 0x04ff, 0x010b, 0x010b # 0x0b String
-
- .word 0x08ff, 0x00ff, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
- .word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x00ff, 0x010d, 0x08ff, 0x08ff
- .word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x0c Leading zero
-
- .word 0x00ff, 0x0108, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff
- .word 0x00ff, 0x00ff, 0x00ff, 0x0108, 0x0108, 0x00ff, 0x00ff, 0x00ff
- .word 0x00ff, 0x00ff, 0x00ff, 0x00ff # 0x0d Starting hexadecimal
-
-.section .text
-
-# Returns the class from the classification table for the given character.
-#
-# Parameters:
-# a0 - Character.
-#
-# Sets a0 to the class number.
-.type classify, @function
-classify:
- la t0, classification
- add t0, t0, a0 # Character class pointer.
- lbu a0, (t0) # Character class.
- ret
-
-# Given the current state and a character class, calculates the next state.
-
-# Parameters:
-# a0 - Current state.
-# a1 - Character class.
-#
-# Sets a0 to the next state.
-.type lookup_state, @function
-lookup_state:
- li t0, CLASS_COUNT
- mul a0, a0, t0 # Transition row.
- add a0, a0, a1 # Transition column.
-
- li t0, 4
- mul a0, a0, t0 # Multiply by the word size.
-
- la t0, transitions
- add t0, t0, a0
- lw a0, (t0) # Next state.
-
- ret
-
-# Chains classify and lookup_state.
-#
-# Parameters:
-# a0 - Current state.
-# a1 - Character.
-#
-# Sets a0 to the next state based on the given character.
-.type _next_state, @function
-_next_state:
- # Prologue.
- addi sp, sp, -16
- sw ra, 12(sp)
- sw s0, 8(sp)
- addi s0, sp, 16
-
- sw a0, 4(sp)
- mv a0, a1
- call classify
-
- mv a1, a0
- lw a0, 4(sp)
- call lookup_state
-
- # Epilogue.
- lw ra, 12(sp)
- lw s0, 8(sp)
- addi sp, sp, 16
- ret
-
-# Takes an identifier and checks whether it's a keyword.
-#
-# Parameters:
-# a0 - Token length.
-# a1 - Token pointer.
-#
-# Sets a0 to the appropriate token type.
-.type classify_identifier, @function
-classify_identifier:
- # Prologue.
- addi sp, sp, -16
- sw ra, 12(sp)
- sw s0, 8(sp)
- addi s0, sp, 16
-
- mv a2, a0
- mv a3, a1
- li a0, KEYWORDS_COUNT
- la a1, keywords
- call _strings_index
-
- bnez a0, .Lclassify_identifier_end
- li a0, TOKEN_IDENTIFIER
-
-.Lclassify_identifier_end:
- # Epilogue.
- lw ra, 12(sp)
- lw s0, 8(sp)
- addi sp, sp, 16
- ret
-
-# Takes a symbol and determines its type.
-#
-# Parameters:
-# a0 - Token character.
-#
-# Sets a0 to the appropriate token type.
-.type classify_single, @function
-classify_single:
- # Prologue.
- addi sp, sp, -16
- sw ra, 12(sp)
- sw s0, 8(sp)
- addi s0, sp, 16
-
- mv a1, a0
- li a2, BYTE_KEYWORDS_SIZE
- la a0, byte_keywords
- call _memchr
-
- la a1, byte_keywords
- sub a0, a0, a1
- addi a0, a0, TOKEN_IDENTIFIER + 1
-
- # Epilogue.
- lw ra, 12(sp)
- lw s0, 8(sp)
- addi sp, sp, 16
- ret
-
-# Classified a symbol containing multiple characters (probably 2).
-#
-# Parameters:
-# a0 - Token length.
-# a1 - Token pointer.
-#
-# Sets a0 to the appropriate token type.
-.type classify_composite, @function
-classify_composite:
- lbu t0, 0(a1)
- li t1, ':'
- beq t0, t1, .Lclassify_composite_assign
-
- j .Lclassify_composite_end
-
-.Lclassify_composite_assign:
- li a0, TOKEN_ASSIGN
- j .Lclassify_composite_end
-
-.Lclassify_composite_end:
- ret
-
-# Initializes the classification table.
-#
-# Paramaters:
-# a0 - Source text pointer.
-# a1 - A pointer for output value, the token kind. 4 Bytes.
-#
-# Sets a0 to the position of the next token.
-.type lex_next, @function
-lex_next:
- # Prologue.
- addi sp, sp, -32
- sw ra, 28(sp)
- sw s0, 24(sp)
- addi s0, sp, 32
-
- sw s1, 20(sp) # Preserve s1 used for current source text position.
- mv s1, a0
- sw a0, 12(sp) # Keeps a pointer to the beginning of a token.
- # 4(sp) and 8(sp) are reserved for the kind and length of the token if needed.
-
- sw s2, 16(sp) # Preserve s2 containing the current state.
- li s2, 0x00 # Initial, start state.
-
- sw a1, 0(sp)
- sw zero, (a1) # Initialize.
-
-.Llex_next_loop:
- mv a0, s2
- lbu a1, (s1)
- call _next_state
-
- li t0, 0xff
- and s2, a0, t0 # Next state.
-
- li t0, 0xff00
- and t1, a0, t0 # Transition action.
- srli t1, t1, 8
-
- # Perform the provided action.
- li t0, 0x01 # Accumulate action.
- beq t1, t0, .Llex_next_accumulate
-
- li t0, 0x02 # Print action.
- beq t1, t0, .Llex_next_print
-
- li t0, 0x03 # Skip action.
- beq t1, t0, .Llex_next_skip
-
- li t0, 0x04 # Delimited string action.
- beq t1, t0, .Llex_next_comment
-
- li t0, 0x05 # Finalize identifier.
- beq t1, t0, .Llex_next_identifier
-
- li t0, 0x06 # Single character symbol action.
- beq t1, t0, .Llex_next_single
-
- li t0, 0x07 # An action for symbols containing multiple characters.
- beq t1, t0, .Llex_next_composite
-
- li t0, 0x08 # Integer action.
- beq t1, t0, .Llex_next_integer
-
- j .Llex_next_reject
-
-.Llex_next_reject:
- addi s1, s1, 1
-
- j .Llex_next_end
-
-.Llex_next_accumulate:
- addi s1, s1, 1
-
- j .Llex_next_loop
-
-.Llex_next_skip:
- addi s1, s1, 1
- lw t0, 12(sp)
- addi t0, t0, 1
- sw t0, 12(sp)
-
- j .Llex_next_loop
-
-.Llex_next_print:
- /* DEBUG
- addi a0, a0, 21
- sw a0, 0(sp)
- addi a0, sp, 0
- li a1, 1
- call _write_error */
-
- j .Llex_next_end
-
-.Llex_next_comment:
- addi s1, s1, 1
-
- j .Llex_next_end
-
-.Llex_next_identifier:
- # An identifier can be a textual keyword.
- # Check the kind of the token and write it into the output parameter.
- lw a1, 12(sp)
- sub a0, s1, a1
- sw a0, 8(sp)
- call classify_identifier
- sw a0, 4(sp)
- lw a0, 0(sp)
- addi a1, sp, 4
- li a2, 12
- call _memcpy
-
- j .Llex_next_end
-
-.Llex_next_single:
- lw a0, 12(sp)
- addi s1, a0, 1
- lbu a0, (a0)
- call classify_single
- lw a1, 0(sp)
- sw a0, (a1)
-
- j .Llex_next_end
-
-.Llex_next_composite:
- addi s1, s1, 1
- lw a1, 12(sp)
- sub a0, s1, a1
- call classify_composite
- lw a1, 0(sp)
- sw a0, (a1)
-
- j .Llex_next_end
-
-.Llex_next_integer:
- lw t0, 0(sp)
- li t1, TOKEN_INTEGER
- sw t1, 0(t0)
- lw t1, 12(sp)
- sw t1, 8(t0)
- sub t1, s1, t1
- sw t1, 4(t0)
-
- j .Llex_next_end
-
-.Llex_next_end:
- mv a0, s1 # Return the advanced text pointer.
-
- # Restore saved registers.
- lw s1, 20(sp)
- lw s2, 16(sp)
-
- # Epilogue.
- lw ra, 28(sp)
- lw s0, 24(sp)
- addi sp, sp, 32
- ret
diff --git a/rakelib/stage.rake b/rakelib/stage.rake
deleted file mode 100644
index 6f61cae..0000000
--- a/rakelib/stage.rake
+++ /dev/null
@@ -1,61 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public License,
-# v. 2.0. If a copy of the MPL was not distributed with this file, You can
-# obtain one at https://mozilla.org/MPL/2.0/. -}
-# frozen_string_literal: true
-
-CROSS_GCC = 'build/rootfs/bin/riscv32-unknown-linux-gnu-gcc'
-SYSROOT = 'build/sysroot'
-QEMU = 'qemu-riscv32'
-
-def assemble_stage(output, compiler, source)
- arguments = [QEMU, '-L', SYSROOT, *compiler]
-
- puts Term::ANSIColor.green(arguments * ' ')
- puts
- Open3.popen2(*arguments) do |qemu_in, qemu_out|
- qemu_in.write File.read(*source)
- qemu_in.close
-
- IO.copy_stream qemu_out, output
- qemu_out.close
- end
-end
-
-library = []
-
-Dir.glob('boot/*.s').each do |assembly_source|
- source_basename = Pathname.new(assembly_source).basename
- target_object = Pathname.new('build/boot') + source_basename.sub_ext('.o')
-
- file target_object.to_s => [assembly_source, 'build/boot'] do |t|
- sh CROSS_GCC, '-c', '-o', t.name, assembly_source
- end
- library << assembly_source unless source_basename.to_s.start_with? 'stage'
-end
-
-desc 'Initial stage'
-file 'build/boot/stage1' => ['build/boot/stage1.o', *library] do |t|
- sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
-end
-
-file 'build/boot/stage2a.s' => ['build/boot/stage1', 'boot/stage2.elna'] do |t|
- source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
-
- File.open t.name, 'w' do |output|
- assemble_stage output, exe, source
- end
-end
-
-['build/boot/stage2a', 'build/boot/stage2b'].each do |exe|
- file exe => [exe.ext('.s'), *library] do |t|
- sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
- end
-end
-
-file 'build/boot/stage2b.s' => ['build/boot/stage2a', 'boot/stage2.elna'] do |t|
- source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
-
- File.open t.name, 'w' do |output|
- assemble_stage output, exe, source
- end
-end