diff --git a/Rakefile b/Rakefile index 6b38038..3c9a245 100644 --- a/Rakefile +++ b/Rakefile @@ -5,34 +5,99 @@ require 'open3' require 'rake/clean' -require 'term/ansicolor' -CLEAN.include 'build/boot' +CROSS_GCC = '../eugenios/build/rootfs/bin/riscv32-unknown-linux-gnu-gcc' +SYSROOT = '../eugenios/build/sysroot' +QEMU = 'qemu-riscv32' +STAGES = Dir.glob('boot/stage*.elna').collect { |stage| File.basename stage, '.elna' }.sort + +CLEAN.include 'build/boot', 'build/valid' directory 'build/boot' +directory 'build/valid' + +task default: :boot desc 'Final stage' -task default: ['build/boot/stage2b', 'build/boot/stage2b.s', 'boot/stage2.elna'] do |t| - exe, previous_output, source = t.prerequisites +task boot: "build/valid/#{STAGES.last}" +task boot: "build/valid/#{STAGES.last}.s" +task boot: "boot/#{STAGES.last}.elna" do |t| + groupped = t.prerequisites.group_by { |stage| File.extname stage }.transform_values(&:first) + exe = groupped[''] + expected = groupped['.s'] + source = groupped['.elna'] cat_arguments = ['cat', source] compiler_arguments = [QEMU, '-L', SYSROOT, exe] - diff_arguments = ['diff', '-Nur', '--text', previous_output, '-'] + diff_arguments = ['diff', '-Nur', '--text', expected, '-'] Open3.pipeline(cat_arguments, compiler_arguments, diff_arguments) end -file 'build/boot/test.s' => ['build/boot/stage1', 'boot/test.elna'] do |t| - source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' } +desc 'Convert previous stage language into the current stage language' +task :convert do + File.open('boot/stage4.elna', 'w') do |current_stage| + li_value = nil - File.open t.name, 'w' do |output| - assemble_stage output, exe, source + File.readlines('boot/stage3.elna').each do |line| + current_stage << line + end end end -file 'build/boot/test' => ['build/boot/test.s', 'boot/common-boot.s'] do |t| - sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites +STAGES.each do |stage| + previous = stage.delete_prefix('stage').to_i.pred + + file "build/valid/#{stage}" => "build/valid/#{stage}.s" do |t| + sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites + end + + file "build/valid/#{stage}.s" => ["build/boot/#{stage}", "boot/#{stage}.elna"] do |t| + exe, source = t.prerequisites + + cat_arguments = ['cat', source] + compiler_arguments = [QEMU, '-L', SYSROOT, exe] + last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments) + + IO.copy_stream last_stdout, t.name + end + + file "build/boot/#{stage}" => "build/boot/#{stage}.s" do |t| + sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites + end + + file "build/boot/#{stage}.s" => ["build/valid/stage#{previous}", "boot/#{stage}.elna"] do |t| + exe, source = t.prerequisites + + cat_arguments = ['cat', source] + compiler_arguments = [QEMU, '-L', SYSROOT, exe] + last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments) + + IO.copy_stream last_stdout, t.name + end end -task test: 'build/boot/test' do |t| - sh QEMU, '-L', SYSROOT, t.prerequisites.first +# +# Stage 1. +# + +file 'build/valid/stage1' => ['build/valid', 'build/valid/stage1.s'] do |t| + source = t.prerequisites.select { |prerequisite| prerequisite.end_with? '.s' } + + sh CROSS_GCC, '-nostdlib', '-o', t.name, *source +end + +file 'build/valid/stage1.s' => ['build/boot/stage1', 'boot/stage1.s', 'build/valid'] do |t| + source, exe, = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.s' } + + cat_arguments = ['cat', *source] + compiler_arguments = [QEMU, '-L', SYSROOT, *exe] + last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments) + + IO.copy_stream last_stdout, t.name +end + +file 'build/boot/stage1' => ['build/boot', 'boot/stage1.s'] do |t| + source = t.prerequisites.select { |prerequisite| prerequisite.end_with? '.s' } + + sh CROSS_GCC, '-nostdlib', '-o', t.name, *source end diff --git a/boot/common-boot.s b/boot/common-boot.s index f61321e..9305d40 100644 --- a/boot/common-boot.s +++ b/boot/common-boot.s @@ -2,17 +2,15 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -.global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower -.global _write_s, _read_file, _write_error, _write_c, _write_i, _print_i -.global _memcmp, _memchr, _memmem, _memcpy, _mmap +.global _read_file, _write_error +.global _memcmp, _memchr, _memmem, _mmap .global _current, _get, _advance, _label_counter -.global _divide_by_zero_error, _exit, _strings_index, _string_equal +.global _divide_by_zero_error, _strings_index, _string_equal .section .rodata .equ SYS_READ, 63 .equ SYS_WRITE, 64 -.equ SYS_EXIT, 93 .equ SYS_MMAP2, 222 .equ STDIN, 0 .equ STDOUT, 1 @@ -77,128 +75,6 @@ _memcmp: .Lmemcmp_end: ret -# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. -.type _is_upper, @function -_is_upper: - li t0, 'A' - 1 - sltu t1, t0, a0 # t1 = a0 >= 'A' - - sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' - and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' - - ret - -# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. -.type _is_lower, @function -_is_lower: - li t0, 'a' - 1 - sltu t2, t0, a0 # t2 = a0 >= 'a' - - sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' - and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' - - ret - -# Detects if the passed character is a 7-bit alpha character or an underscore. -# The character is passed in a0. -# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. -.type _is_alpha, @function -_is_alpha: - # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 - - sw a0, 4(sp) - - call _is_upper - sw a0, 0(sp) - - lw a0, 4(sp) - call _is_lower - - lw t0, 4(sp) - xori t1, t0, '_' - seqz t1, t1 - - lw t0, 0(sp) - or a0, a0, t0 - or a0, a0, t1 - - # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 - ret - -# Detects whether the passed character is a digit -# (a value between 0 and 9). -# -# Parameters: -# a0 - Exemined value. -# -# Sets a0 to 1 if it is a digit, to 0 otherwise. -.type _is_digit, @function -_is_digit: - li t0, '0' - 1 - sltu t1, t0, a0 # t1 = a0 >= '0' - - sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' - - and a0, t1, t2 - - ret - -.type _is_alnum, @function -_is_alnum: - # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 - - sw a0, 4(sp) - - call _is_alpha - sw a0, 0(sp) - - lw a0, 4(sp) - call _is_digit - - lw a1, 0(sp) - or a0, a0, a1 - - # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 - ret - -# Writes a string to the standard output. -# -# Parameters: -# a0 - Length of the string. -# a1 - String pointer. -.type _write_s, @function -_write_s: - # Prologue. - addi sp, sp, -8 - sw ra, 4(sp) - sw s0, 0(sp) - addi s0, sp, 8 - - mv a2, a0 - li a0, STDOUT - li a7, SYS_WRITE - ecall - - # Epilogue. - lw ra, 4(sp) - lw s0, 0(sp) - addi sp, sp, 8 - ret - # Reads standard input into a buffer. # a0 - Buffer pointer. # a1 - Buffer size. @@ -228,16 +104,6 @@ _read_file: addi sp, sp, 8 ret -# Terminates the program. a0 contains the return code. -# -# Parameters: -# a0 - Status code. -.type _exit, @function -_exit: - li a7, SYS_EXIT - ecall - # ret - .type _divide_by_zero_error, @function _divide_by_zero_error: addi a7, zero, 172 # getpid @@ -248,106 +114,6 @@ _divide_by_zero_error: ecall ret -# Writes a number to a string buffer. -# -# t0 - Local buffer. -# t1 - Constant 10. -# t2 - Current character. -# t3 - Whether the number is negative. -# -# Parameters: -# a0 - Whole number. -# a1 - Buffer pointer. -# -# Sets a0 to the length of the written number. -.type _print_i, @function -_print_i: - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - li t1, 10 - addi t0, s0, -9 - - li t3, 0 - bgez a0, .Lprint_i_digit10 - li t3, 1 - neg a0, a0 - -.Lprint_i_digit10: - rem t2, a0, t1 - addi t2, t2, '0' - sb t2, 0(t0) - div a0, a0, t1 - addi t0, t0, -1 - bne zero, a0, .Lprint_i_digit10 - - beq zero, t3, .Lprint_i_write_call - addi t2, zero, '-' - sb t2, 0(t0) - addi t0, t0, -1 - -.Lprint_i_write_call: - mv a0, a1 - addi a1, t0, 1 - sub a2, s0, t0 - addi a2, a2, -9 - sw a2, 0(sp) - - call _memcpy - - lw a0, 0(sp) - - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Writes a number to the standard output. -# -# Parameters: -# a0 - Whole number. -.type _write_i, @function -_write_i: - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - addi a1, sp, 0 - call _print_i - - addi a1, sp, 0 - call _write_s - - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Writes a character from a0 into the standard output. -.type _write_c, @function -_write_c: - # Prologue - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 - - sb a0, 4(sp) - li a0, STDOUT - addi a1, sp, 4 - li a2, 1 - li a7, SYS_WRITE - ecall - - # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - add sp, sp, 16 - ret - # a0 - Pointer to an array to get the first element. # # Dereferences a pointer and returns what is on the address in a0. @@ -448,34 +214,6 @@ _memmem: add sp, sp, 24 ret -# Copies memory. -# -# Parameters: -# a0 - Destination. -# a1 - Source. -# a2 - Size. -# -# Preserves a0. -.type _memcpy, @function -_memcpy: - mv t0, a0 - -.Lmemcpy_loop: - beqz a2, .Lmemcpy_end - - lbu t1, (a1) - sb t1, (a0) - - addi a0, a0, 1 - addi a1, a1, 1 - addi a2, a2, -1 - - j .Lmemcpy_loop - -.Lmemcpy_end: - mv a0, t0 - ret - # Searches for a string in a string array. # # Parameters: diff --git a/boot/definitions.inc b/boot/definitions.inc deleted file mode 100644 index 88f6e8b..0000000 --- a/boot/definitions.inc +++ /dev/null @@ -1,68 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public License, -# v. 2.0. If a copy of the MPL was not distributed with this file, You can -# obtain one at https://mozilla.org/MPL/2.0/. - -# -# Tokens. -# - -# The constant should match the index in the keywords array in tokenizer.s. - -.equ TOKEN_PROGRAM, 1 -.equ TOKEN_IMPORT, 2 -.equ TOKEN_CONST, 3 -.equ TOKEN_VAR, 4 -.equ TOKEN_IF, 5 -.equ TOKEN_THEN, 6 -.equ TOKEN_ELSIF, 7 -.equ TOKEN_ELSE, 8 -.equ TOKEN_WHILE, 9 -.equ TOKEN_DO, 10 -.equ TOKEN_PROC, 11 -.equ TOKEN_BEGIN, 12 -.equ TOKEN_END, 13 -.equ TOKEN_TYPE, 14 -.equ TOKEN_RECORD, 15 -.equ TOKEN_UNION, 16 -.equ TOKEN_TRUE, 17 -.equ TOKEN_FALSE, 18 -.equ TOKEN_NIL, 19 -.equ TOKEN_XOR, 20 -.equ TOKEN_OR, 21 -.equ TOKEN_RETURN, 22 -.equ TOKEN_CAST, 23 -.equ TOKEN_GOTO, 24 -.equ TOKEN_CASE, 25 -.equ TOKEN_OF, 26 - -.equ TOKEN_IDENTIFIER, 27 -# The constant should match the character index in the byte_keywords string. - -.equ TOKEN_AND, TOKEN_IDENTIFIER + 1 -.equ TOKEN_DOT, TOKEN_IDENTIFIER + 2 -.equ TOKEN_COMMA, TOKEN_IDENTIFIER + 3 -.equ TOKEN_COLON, TOKEN_IDENTIFIER + 4 -.equ TOKEN_SEMICOLON, TOKEN_IDENTIFIER + 5 -.equ TOKEN_LEFT_PAREN, TOKEN_IDENTIFIER + 6 -.equ TOKEN_RIGHT_PAREN, TOKEN_IDENTIFIER + 7 -.equ TOKEN_LEFT_BRACKET, TOKEN_IDENTIFIER + 8 -.equ TOKEN_RIGHT_BRACKET, TOKEN_IDENTIFIER + 9 -.equ TOKEN_HAT, TOKEN_IDENTIFIER + 10 -.equ TOKEN_EQUALS, TOKEN_IDENTIFIER + 11 -.equ TOKEN_PLUS, TOKEN_IDENTIFIER + 12 -.equ TOKEN_MINUS, TOKEN_IDENTIFIER + 13 -.equ TOKEN_ASTERISK, TOKEN_IDENTIFIER + 14 -.equ TOKEN_AT, TOKEN_IDENTIFIER + 15 - -.equ TOKEN_ASSIGN, 43 -.equ TOKEN_INTEGER, 44 - -# -# Symbols. -# -.equ TYPE_PRIMITIVE, 0x01 -.equ TYPE_POINTER, 0x02 -.equ TYPE_PROCEDURE, 0x03 -.equ INFO_PARAMETER, 0x10 -.equ INFO_LOCAL, 0x20 -.equ INFO_PROCEDURE, 0x30 diff --git a/boot/stage1.s b/boot/stage1.s index a45d8ab..c81a7f8 100644 --- a/boot/stage1.s +++ b/boot/stage1.s @@ -2,1141 +2,391 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -.global _start # Program entry point. - -# -# Registers used as global variables: -# s1 - Contains the current position in the source text. -# s2 - Label counter. -# s3 - Dynamic memory region. -# -# - The compiler expects valid input, otherwise it will generate invalid -# assembly or hang. There is no error checking, no semantic analysis, no -# type checking. -# -# - Imports with only a module name without package, e.g. -# "import dummy", can be parsed, but are ignored. -# -# - No loops. Only labels and goto. -# -# - Only unsigned number literals are supported (in decimal or -# hexadecimal format). -# -# - Comments are accepted only at the end of a line. -# -# - Return can be used only as the last statement of a procedure. It -# doesn't actually return, but sets a0 to the appropriate value. -# -# - The lvalue of an assignment can only be an identifier. - -.include "boot/definitions.inc" - .equ SOURCE_BUFFER_SIZE, 81920 -.section .rodata -section_rodata: .ascii ".section .rodata\n" -.equ SECTION_RODATA_SIZE, . - section_rodata -section_text: .ascii ".section .text\n" -.equ SECTION_TEXT_SIZE, . - section_text -section_bss: .ascii ".section .bss\n" -.equ SECTION_BSS_SIZE, . - section_bss -global_start: .ascii ".global _start\n" -.equ GLOBAL_START_SIZE, . - global_start -prologue: .ascii "addi sp, sp, -96\nsw ra, 92(sp)\nsw s0, 88(sp)\naddi s0, sp, 96\n" -.equ PROLOGUE_SIZE, . - prologue -epilogue: .ascii "lw ra, 92(sp)\nlw s0, 88(sp)\naddi sp, sp, 96\nret\n" -.equ EPILOGUE_SIZE, . - epilogue +.equ SYS_READ, 63 +.equ SYS_WRITE, 64 +.equ SYS_EXIT, 93 +.equ SYS_MMAP2, 222 +.equ STDIN, 0 +.equ STDOUT, 1 +.equ STDERR, 2 -asm_exit: .ascii "li a0, 0\nli a7, 93\necall\n" -.equ ASM_EXIT_SIZE, . - asm_exit -asm_start: .ascii ".type _start, @function\n_start:\n" -.equ ASM_START_SIZE, . - asm_start -asm_and_a0_a1: .ascii "and a0, a0, a1\n" -.equ ASM_AND_A0_A1_SIZE, . - asm_and_a0_a1 -asm_or_a0_a1: .ascii "or a0, a0, a1\n" -.equ ASM_OR_A0_A1_SIZE, . - asm_or_a0_a1 -asm_add_a0_a1: .ascii "add a0, a0, a1\n" -.equ ASM_ADD_A0_A1_SIZE, . - asm_add_a0_a1 -asm_sub_a0_a1: .ascii "sub a0, a0, a1\n" -.equ ASM_SUB_A0_A1_SIZE, . - asm_sub_a0_a1 -asm_mul_a0_a1: .ascii "mul a0, a0, a1\n" -.equ ASM_MUL_A0_A1_SIZE, . - asm_mul_a0_a1 -asm_seqz_a0: .ascii "seqz a0, a0\n" -.equ ASM_SEQZ_A0_SIZE, . - asm_seqz_a0 -asm_neg_a0: .ascii "neg a0, a0\n" -.equ ASM_NEG_A0_SIZE, . - asm_neg_a0 -asm_type: .ascii ".type " -.equ ASM_TYPE_SIZE, . - asm_type -asm_type_function: .ascii ", @function\n" -.equ ASM_TYPE_FUNCTION_SIZE, . - asm_type_function -asm_type_object: .ascii ", @object\n" -.equ ASM_TYPE_OBJECT_SIZE, . - asm_type_object -asm_restore_parameters: - .ascii "lw a0, 60(sp)\nlw a1, 56(sp)\nlw a2, 52(sp)\nlw a3, 48(sp)\nlw a4, 44(sp)\nlw a5, 40(sp)\n" -.equ ASM_RESTORE_PARAMETERS_SIZE, . - asm_restore_parameters -asm_preserve_parameters: - .ascii "sw a0, 84(sp)\nsw a1, 80(sp)\nsw a2, 76(sp)\nsw a2, 76(sp)\nsw a3, 72(sp)\nsw a4, 68(sp)\nsw a5, 64(sp)\n" -.equ ASM_PRESERVE_PARAMETERS_SIZE, . - asm_preserve_parameters +.section .rodata + +.type keyword_equ, @object +keyword_equ: .ascii ".equ" +.equ KEYWORD_EQU_SIZE, 4 + +.type keyword_section, @object +keyword_section: .ascii ".section" +.equ KEYWORD_SECTION_SIZE, 8 + +.type keyword_type, @object +keyword_type: .ascii ".type" +.equ KEYWORD_TYPE_SIZE, 5 + +.type keyword_type_object, @object +keyword_type_object: .ascii "object" +.equ KEYWORD_TYPE_OBJECT_SIZE, 6 + +.type keyword_type_function, @object +keyword_type_function: .ascii "function" +.equ KEYWORD_TYPE_FUNCTION_SIZE, 8 + +.type keyword_ret, @object +keyword_ret: .ascii "ret" +.equ KEYWORD_RET_SIZE, 3 + +.type keyword_global, @object +keyword_global: .ascii ".globl" +.equ KEYWORD_GLOBAL_SIZE, 6 + +.type keyword_proc, @object +keyword_proc: .ascii "proc " +.equ KEYWORD_PROC_SIZE, 5 + +.type keyword_end, @object +keyword_end: .ascii "end" +.equ KEYWORD_END_SIZE, 3 + +.type keyword_begin, @object +keyword_begin: .ascii "begin" +.equ KEYWORD_BEGIN_SIZE, 5 + +.type keyword_var, @object +keyword_var: .ascii "var" +.equ KEYWORD_VAR_SIZE, 3 + +.type asm_prologue, @object +asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" + +.type asm_epilogue, @object +asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" + +.type asm_type_directive, @object +asm_type_directive: .string ".type " + +.type asm_type_function, @object +asm_type_function: .string ", @function\n" + +.type asm_colon, @object +asm_colon: .string ":\n" + +.type asm_call, @object +asm_call: .string "\tcall " .section .bss + .type source_code, @object source_code: .zero SOURCE_BUFFER_SIZE +.type source_code_position, @object +source_code_position: .word 0 + .section .text -# Ignores the import. -.type compile_import, @function -compile_import: - # Prologue. - addi sp, sp, -24 - sw ra, 20(sp) - sw s0, 16(sp) - addi s0, sp, 24 - -.Lcompile_import_loop: - mv a0, s1 - addi a1, sp, 0 - call lex_next - li t0, TOKEN_IMPORT - lw t1, 0(sp) - bne t0, t1, .Lcompile_import_end - # a0 is set from the previous lex_next call. Skip the module name. - addi a1, sp, 0 - call lex_next - mv s1, a0 - - j .Lcompile_import_loop - -.Lcompile_import_end: - # Epilogue. - lw ra, 20(sp) - lw s0, 16(sp) - addi sp, sp, 24 - ret - -.type compile_binary_expression, @function -compile_binary_expression: +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the amount of bytes written in a0. +.type _read_file, @function +_read_file: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 - li a0, 0 - call compile_expression + mv a2, a1 + mv a1, a0 + li a0, STDIN + li a7, SYS_READ + ecall - mv a0, s1 - addi a1, sp, 12 - call lex_next - lw t0, 12(sp) - - li t1, TOKEN_AND - beq t0, t1, .Lcompile_binary_expression_and - - li t1, TOKEN_OR - beq t0, t1, .Lcompile_binary_expression_or - - li t1, TOKEN_PLUS - beq t0, t1, .Lcompile_binary_expression_plus - - li t1, TOKEN_EQUALS - beq t0, t1, .Lcompile_binary_expression_equal - - li t1, TOKEN_ASTERISK - beq t0, t1, .Lcompile_binary_expression_product - - li t1, TOKEN_MINUS - beq t0, t1, .Lcompile_binary_expression_minus - - j .Lcompile_binary_expression_end - -.Lcompile_binary_expression_equal: - mv s1, a0 # Skip =. - li a0, 1 - call compile_expression - li a0, ASM_SUB_A0_A1_SIZE - la a1, asm_sub_a0_a1 - call _write_s - - li a0, ASM_SEQZ_A0_SIZE - la a1, asm_seqz_a0 - call _write_s - - j .Lcompile_binary_expression_end - -.Lcompile_binary_expression_and: - mv s1, a0 # Skip &. - li a0, 1 - call compile_expression - li a0, ASM_AND_A0_A1_SIZE - la a1, asm_and_a0_a1 - call _write_s - - j .Lcompile_binary_expression_end - -.Lcompile_binary_expression_or: - mv s1, a0 # Skip or. - li a0, 1 - call compile_expression - li a0, ASM_OR_A0_A1_SIZE - la a1, asm_or_a0_a1 - call _write_s - - j .Lcompile_binary_expression_end - -.Lcompile_binary_expression_plus: - mv s1, a0 # Skip +. - li a0, 1 - call compile_expression - li a0, ASM_ADD_A0_A1_SIZE - la a1, asm_add_a0_a1 - call _write_s - - j .Lcompile_binary_expression_end - -.Lcompile_binary_expression_minus: - mv s1, a0 # Skip -. - li a0, 1 - call compile_expression - li a0, ASM_SUB_A0_A1_SIZE - la a1, asm_sub_a0_a1 - call _write_s - - j .Lcompile_binary_expression_end - -.Lcompile_binary_expression_product: - mv s1, a0 # Skip *. - li a0, 1 - call compile_expression - li a0, ASM_MUL_A0_A1_SIZE - la a1, asm_mul_a0_a1 - call _write_s - - j .Lcompile_binary_expression_end - -.Lcompile_binary_expression_end: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret -# Looks for a register that can be used to calculate a symbol address. Writes it -# as string, like sp or s0 into the provided buffer. -# -# Parameters: -# a0 - Symbol info pointer. -# a1 - Output buffer. -# -# Sets a0 to the length of register name written or 0. -.type take_address, @function -take_address: - beqz a0, .Ltake_address_undefined - lw t0, 0(a0) - - li t1, INFO_PARAMETER - beq t0, t1, .Ltake_address_parameter - - li t1, INFO_LOCAL - beq t0, t1, .Ltake_address_local - - j .Ltake_address_undefined - -.Ltake_address_parameter: - li t0, 0x3073 # s0 - sh t0, (a1) - - li a0, 2 - - j .Ltake_address_end - -.Ltake_address_local: - li t0, 0x7073 # (sp) - sh t0, (a1) - - li a0, 2 - - j .Ltake_address_end - -.Ltake_address_undefined: - li a0, 0 - -.Ltake_address_end: - ret - -# Parameters: -# a0 - Identifier length. -# a1 - Register number as character. -.type compile_identifier_expression, @function -compile_identifier_expression: +# Writes a character from a0 into the standard output. +.type _write_c, @function +_write_c: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 - sw a0, 20(sp) # Identifier length. - sw a1, 16(sp) # Register number as character. + sb a0, 20(sp) + li a0, STDOUT + addi a1, sp, 20 + li a2, 1 + li a7, SYS_WRITE + ecall + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + add sp, sp, 32 + ret + +# Write null terminated string. +# +# Parameters: +# a0 - String. +.type _write_z, @function +_write_z: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + sw a0, 20(sp) + +.write_z_loop: + # Check for 0 character. + lb a0, (a0) + beqz a0, .write_z_end + + # Print a character. + li a0, STDOUT + lw a1, 20(sp) + li a2, 1 + li a7, SYS_WRITE + ecall + + # Advance the input string by one byte. lw a0, 20(sp) - mv a1, s1 - call symbol_table_lookup - sw a0, 12(sp) + addi a0, a0, 1 + sw a0, 20(sp) - beqz a0, .Lcompile_identifier_expression_by_name - lw t0, 0(a0) + j .write_z_loop - j .Lcompile_identifier_expression_by_address +.write_z_end: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret -.Lcompile_identifier_expression_by_name: - # Global identifier. - lw t1, 16(sp) - li t0, 0x00202c00 # \0,_ - or t0, t0, t1 - sw t0, 8(sp) - li t0, 0x6120616c # la a - sw t0, 4(sp) - li a0, 7 - addi a1, sp, 4 - call _write_s +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +.type _is_upper, @function +_is_upper: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 - lw a0, 20(sp) - mv a1, s1 - call _write_s + li t0, 'A' - 1 + sltu t1, t0, a0 # t1 = a0 >= 'A' - li a0, '\n' - call _write_c + sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' + and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +.type _is_lower, @function +_is_lower: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + li t0, 'a' - 1 + sltu t2, t0, a0 # t2 = a0 >= 'a' + + sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' + and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +# Detects if the passed character is a 7-bit alpha character or an underscore. +# +# Paramters: +# a0 - Tested character. +# +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +.type _is_alpha, @function +_is_alpha: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + sw a0, 20(sp) - lbu a0, (s1) call _is_upper - beqz a0, .Lcompile_identifier_expression_end - - lw t1, 16(sp) - li t0, 0x0a290061 # a\0)\n - sll t2, t1, 8 - or t0, t0, t2 - sw t0, 8(sp) - li t0, 0x28202c00 # \0, ( - or t0, t0, t1 - sw t0, 4(sp) - li t0, 0x6120776c # lw a - sw t0, 0(sp) - li a0, 12 - addi a1, sp, 0 - call _write_s - - j .Lcompile_identifier_expression_end - -.Lcompile_identifier_expression_by_address: - lw t1, 16(sp) - li t0, 0x00202c00 # \0,_ - or t0, t0, t1 - sw t0, 8(sp) - li t0, 0x6120776c # lw a - sw t0, 4(sp) - li a0, 7 - addi a1, sp, 4 - call _write_s - - lw a0, 12(sp) - lw a0, 8(a0) - call _write_i - - li a0, '(' - call _write_c - - lw a0, 12(sp) - addi a1, sp, 4 - call take_address - addi a1, sp, 4 - call _write_s - - li a0, ')' - call _write_c - li a0, '\n' - call _write_c - - j .Lcompile_identifier_expression_end - -.Lcompile_identifier_expression_end: - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Evalutes an expression and saves the result in a0. -# -# a0 - X in aX, the register number to save the result. -.type compile_expression, @function -compile_expression: - # Prologue. - addi sp, sp, -48 - sw ra, 44(sp) - sw s0, 40(sp) - addi s0, sp, 48 - - addi a0, a0, '0' # Make the register number to a character. - sw a0, 36(sp) # And save it. - - mv a0, s1 - addi a1, sp, 24 - call lex_next - sw a0, 20(sp) - - lw t0, 24(sp) - - li t1, TOKEN_MINUS - beq t0, t1, .Lcompile_expression_negate - - li t1, TOKEN_AT - beq t0, t1, .Lcompile_expression_address - - li t1, TOKEN_INTEGER - beq t0, t1, .Lcompile_expression_literal - - addi a1, sp, 8 - call lex_next - lw t0, 8(sp) - li t1, TOKEN_LEFT_PAREN - beq t0, t1, .Lcompile_expression_call - - lw s1, 32(sp) - lw a0, 28(sp) - lw a1, 36(sp) - call compile_identifier_expression - - j .Lcompile_expression_advance - -.Lcompile_expression_negate: - lw s1, 20(sp) # Skip the -. - mv a0, zero - call compile_expression - - li a0, ASM_NEG_A0_SIZE - la a1, asm_neg_a0 - call _write_s - - j .Lcompile_expression_end - -.Lcompile_expression_address: - lw a0, 20(sp) - mv s1, a0 # Skip @. - - lw a0, 36(sp) - call compile_at_expression - - j .Lcompile_expression_end - -.Lcompile_expression_call: - mv s1, a0 - - lw a0, 32(sp) - lw a1, 28(sp) - call compile_call - - j .Lcompile_expression_end - -.Lcompile_expression_literal: - lw t1, 36(sp) - li t0, 0x00202c00 # \0,_ - or t0, t0, t1 - sw t0, 16(sp) - li t0, 0x6120696c # li a - sw t0, 12(sp) - li a0, 7 - addi a1, sp, 12 - call _write_s - - lw a0, 28(sp) - lw a1, 32(sp) - call _write_s - - li a0, '\n' - call _write_c - - j .Lcompile_expression_advance - -.Lcompile_expression_advance: - lw s1, 20(sp) - -.Lcompile_expression_end: - # Epilogue. - lw ra, 44(sp) - lw s0, 40(sp) - addi sp, sp, 48 - ret - -# Expression taking an identifier address. -# -# Parameters: -# a0 - Register number as character -.type compile_at_expression, @function -compile_at_expression: - # Prologue. - addi sp, sp, -48 - sw ra, 44(sp) - sw s0, 40(sp) - addi s0, sp, 48 - - sw a0, 36(sp) - - mv a0, s1 - addi a1, sp, 24 - call lex_next - mv s1, a0 - - lw a0, 28(sp) - lw a1, 32(sp) - call symbol_table_lookup - sw a0, 20(sp) - - li t0, 0x20 # _ - sb t0, 12(sp) - - # lw a0, 28(sp) - lw a1, 32(sp) - addi a1, sp, 13 - call take_address - - lw t1, 36(sp) - li t0, 0x2c006120 # _a\0, - sw t0, 8(sp) - sb t1, 10(sp) - li t0, 0x69646461 # addi - sw t0, 4(sp) - addi a0, a0, 9 # The length returned by take_address + the instruction. - addi a1, sp, 4 - call _write_s - - li a0, ',' - call _write_c - li a0, ' ' - call _write_c + sw a0, 16(sp) lw a0, 20(sp) - lw a0, 8(a0) - call _write_i + call _is_lower - j .Lcompile_at_expression_end - -.Lcompile_at_expression_end: - li a0, '\n' - call _write_c - - # Epilogue. - lw ra, 44(sp) - lw s0, 40(sp) - addi sp, sp, 48 - ret - -# Compiles an lvalue. -# -# Parameters: -# a0 - Pointer to the identifier. -# a1 - Identifier length. -.type compile_designator_expression, @function -compile_designator_expression: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - sw a0, 20(sp) # Identifier pointer. - sw a1, 16(sp) # Identifier length. - - /* DEBUG - lw a0, 20(sp) - lw a1, 16(sp) - call _write_error */ - -.Lcompile_designator_expression_by_address: - lw a0, 16(sp) - lw a1, 20(sp) - call symbol_table_lookup - sw a0, 12(sp) - - li t0, 0x202c30 # 0,_ - sw t0, 8(sp) - li t0, 0x61207773 # sw a - sw t0, 4(sp) - li a0, 7 - addi a1, sp, 4 - call _write_s - - lw a0, 12(sp) - lw a0, 8(a0) - call _write_i - - li a0, '(' - call _write_c - - lw a0, 12(sp) - addi a1, sp, 4 - call take_address - addi a1, sp, 4 - call _write_s - - li a0, ')' - call _write_c - li a0, '\n' - call _write_c - - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Compiles a statement beginning with an identifier. -# -# Left values should be variables named "loca n", where n is the offset -# of the variable on the stack, like loca8 or loca4. -.type compile_identifier, @function -compile_identifier: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - # Save the pointer to the identifier and its length on the stack. - mv a0, s1 - addi a1, sp, 12 - call lex_next - addi a1, sp, 0 - call lex_next - mv s1, a0 - - lw t0, 0(sp) - - li t1, TOKEN_LEFT_PAREN - beq t0, t1, .Lcompile_identifier_call - - li t1, TOKEN_ASSIGN - beq t0, t1, .Lcompile_identifier_assign - - j .Lcompile_identifier_end - -.Lcompile_identifier_call: - lw a0, 20(sp) - lw a1, 16(sp) - call compile_call - - j .Lcompile_identifier_end - -.Lcompile_identifier_assign: - call compile_binary_expression - lw a0, 20(sp) - lw a1, 16(sp) - call compile_designator_expression - - j .Lcompile_identifier_end - -.Lcompile_identifier_end: - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Compiles a procedure call. Expects s1 to point to the first argument. -# a0 - Pointer to the procedure name. -# a1 - Length of the procedure name. -# -# Returns the procedure result in a0. -.type compile_call, @function -compile_call: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - sw a0, 20(sp) - sw a1, 16(sp) - sw zero, 12(sp) # Argument count for a procedure call. - -.Lcompile_call_paren: - lbu t0, (s1) - li t1, 0x29 # ) - beq t0, t1, .Lcompile_call_complete - -.Lcompile_call_argument: - li a0, 0 - call compile_expression - - li t0, 0x202c30 # 0,_ - sw t0, 8(sp) - li t0, 0x61207773 # sw a - sw t0, 4(sp) - li a0, 7 - addi a1, sp, 4 - call _write_s - - lw t0, 12(sp) # Argument count for a procedure call. - - # Only 6 arguments are supported with a0-a5. - # Save all arguments on the stack so they aren't overriden afterwards. - li a0, -4 - mul a0, t0, a0 - addi a0, a0, 60 - call _write_i - - li t0, '\n' - sw t0, 8(sp) - li t0, 0x29707328 # (sp) - sw t0, 4(sp) - li a0, 5 - addi a1, sp, 4 - call _write_s - - lbu t0, (s1) - li t1, ',' - bne t0, t1, .Lcompile_call_paren - - lw t0, 12(sp) # Argument count for a procedure call. - addi t0, t0, 1 - sw t0, 12(sp) - - addi s1, s1, 1 # Skip the comma between the arguments. - j .Lcompile_call_argument - -.Lcompile_call_complete: - sw zero, 12(sp) - -.Lcompile_call_restore: - # Just go through all a0-a5 registers and read them from stack. - # If this stack value contains garbage, the procedure just shouldn't use it. - li a0, ASM_RESTORE_PARAMETERS_SIZE - la a1, asm_restore_parameters - call _write_s - -.Lcompile_call_perform: - li t0, 0x20 - sw t0, 8(sp) - li t0, 0x6c6c6163 # call - sw t0, 4(sp) - li a0, 5 - addi a1, sp, 4 - call _write_s - - lw a0, 16(sp) - lw a1, 20(sp) - call _write_s - - li a0, '\n' - call _write_c - - addi s1, s1, 1 # Skip the close paren. - - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Walks through the procedure definitions. -.type compile_procedure_section, @function -compile_procedure_section: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - -.Lcompile_procedure_section_loop: - mv a0, s1 - addi a1, sp, 4 - call lex_next - li t0, TOKEN_PROC - lw t1, 4(sp) - bne t0, t1, .Lcompile_procedure_section_end - - call compile_procedure - - j .Lcompile_procedure_section_loop - -.Lcompile_procedure_section_end: - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -.type compile_module_declaration, @function -compile_module_declaration: - # Prologue. - addi sp, sp, -24 - sw ra, 20(sp) - sw s0, 16(sp) - addi s0, sp, 24 - - li a0, GLOBAL_START_SIZE - la a1, global_start - call _write_s - - # Skip "program". - mv a0, s1 - addi a1, sp, 4 - call lex_next - mv s1, a0 - - # Epilogue. - lw ra, 20(sp) - lw s0, 16(sp) - addi sp, sp, 24 - ret - -# Compiles global variable section. -.type compile_global_section, @function -compile_global_section: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - mv a0, s1 - addi a1, sp, 4 - call lex_next - li t0, TOKEN_VAR - lw t1, 4(sp) - bne t0, t1, .Lcompile_global_section_end - mv s1, a0 - - li a0, SECTION_BSS_SIZE - la a1, section_bss - call _write_s - -.Lcompile_global_section_item: - mv a0, s1 - addi a1, sp, 12 - call lex_next - - lw t0, 12(sp) - li t1, TOKEN_IDENTIFIER - - bne t0, t1, .Lcompile_global_section_end - lw s1, 20(sp) # Advance to the beginning of the variable name. - - call compile_global - j .Lcompile_global_section_item - -.Lcompile_global_section_end: - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Compiles a global variable. -.type compile_global, @function -compile_global: - # Prologue. - addi sp, sp, -48 - sw ra, 44(sp) - sw s0, 40(sp) - addi s0, sp, 48 - - # Save the identifier on the stack since it should emitted multiple times. - mv a0, s1 - addi a1, sp, 28 - call lex_next - addi a1, sp, 4 - call lex_next # Skip the colon in front of the type. - addi a1, sp, 4 - call lex_next # Skip the opening bracket. - addi a1, sp, 16 - call lex_next # Save the array size on the stack since it has to be emitted multiple times. - addi a1, sp, 4 - call lex_next # Skip the closing bracket. - addi a1, sp, 4 - call lex_next # Skip the type. - mv s1, a0 - - # .type identifier, @object - li a0, ASM_TYPE_SIZE - la a1, asm_type - call _write_s - - lw a0, 32(sp) - lw a1, 36(sp) - call _write_s - - li a0, ASM_TYPE_OBJECT_SIZE - la a1, asm_type_object - call _write_s - - # identifier: .zero size - lw a0, 32(sp) - lw a1, 36(sp) - call _write_s - - li t0, 0x206f7265 # ero_ - sw t0, 12(sp) - li t0, 0x7a2e203a # : .z - sw t0, 8(sp) - li a0, 8 - addi a1, sp, 8 - call _write_s - - lw a0, 20(sp) - lw a1, 24(sp) - call _write_s - - li a0, '\n' - call _write_c - - # Epilogue. - lw ra, 44(sp) - lw s0, 40(sp) - addi sp, sp, 48 - ret - -# Sets a0 to the type pointer. -.type compile_type_expression, @function -compile_type_expression: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - -.Lcompile_type_expression_type: - mv a0, s1 - addi a1, sp, 12 - call lex_next - mv s1, a0 - lw t0, 12(sp) - - li t1, TOKEN_HAT # Pointer type. - beq t0, t1, .Lcompile_type_expression_pointer - - # Named type. - lw a0, 16(sp) - lw a1, 20(sp) - call symbol_table_lookup - - j .Lcompile_type_expression_end - -.Lcompile_type_expression_pointer: - call compile_type_expression - mv a1, s3 - call symbol_table_make_pointer - add s3, s3, a0 - sub a0, s3, a0 - - j .Lcompile_type_expression_end - -.Lcompile_type_expression_end: - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Inserts local procedure variables into the symbol table. -.type compile_local_section, @function -compile_local_section: - # Prologue. - addi sp, sp, -48 - sw ra, 44(sp) - sw s0, 40(sp) - addi s0, sp, 48 - - mv a0, s1 - addi a1, sp, 28 - call lex_next - - lw t0, 28(sp) - li t1, TOKEN_VAR - - bne t0, t1, .Lcompile_local_section_end - mv s1, a0 - - sw zero, 12(sp) # Variable offset counter. - -.Lcompile_local_section_variable: - mv a0, s1 - addi a1, sp, 28 - call lex_next - - lw t0, 28(sp) - li t1, TOKEN_IDENTIFIER - - bne t0, t1, .Lcompile_local_section_end - addi a1, sp, 16 - call lex_next - mv s1, a0 # Skip the ":" in front of the type. - - call compile_type_expression - # a0 - Variable type. - lw a1, 12(sp) - mv a2, s3 - call symbol_table_make_local - - mv a2, s3 - add s3, s3, a0 - - lw a0, 32(sp) - lw a1, 36(sp) - call symbol_table_enter - - lw t0, 12(sp) - addi t0, t0, 4 - sw t0, 12(sp) - - j .Lcompile_local_section_variable - -.Lcompile_local_section_end: - # Epilogue. - lw ra, 44(sp) - lw s0, 40(sp) - addi sp, sp, 48 - ret - -# Inserts procedure parameters into the symbol table. -.type compile_parameters, @function -compile_parameters: - # Prologue. - addi sp, sp, -48 - sw ra, 44(sp) - sw s0, 40(sp) - addi s0, sp, 48 - - li t0, -12 - sw t0, 12(sp) # Parameter offset counter. - - mv a0, s1 - addi a1, sp, 28 - call lex_next - mv s1, a0 # Skip the opening paren. - - mv a0, s1 - addi a1, sp, 28 - call lex_next - - lw t0, 28(sp) - li t1, TOKEN_RIGHT_PAREN - beq t0, t1, .Lcompile_parameters_end - # When this is not the right paren, it is an identifier. - mv s1, a0 - -.Lcompile_parameters_parameter: - mv a0, s1 - addi a1, sp, 16 - call lex_next - mv s1, a0 # Skip the ":" in front of the type. - - call compile_type_expression - # a0 - Parameter type. - lw a1, 12(sp) - mv a2, s3 - call symbol_table_make_parameter - - mv a2, s3 - add s3, s3, a0 - - lw a0, 32(sp) - lw a1, 36(sp) - call symbol_table_enter - - lw t0, 12(sp) - addi t0, t0, -4 - sw t0, 12(sp) - - # Read the comma between the parameters or a closing paren. - mv a0, s1 - addi a1, sp, 16 - call lex_next + lw t0, 20(sp) + xori t1, t0, '_' + seqz t1, t1 lw t0, 16(sp) - li t1, TOKEN_COMMA - bne t0, t1, .Lcompile_parameters_end - # If it is a comma, read the name of the next parameter. - addi a1, sp, 28 - call lex_next - mv s1, a0 - - j .Lcompile_parameters_parameter - -.Lcompile_parameters_end: - mv s1, a0 # Skip the closing paren. + or a0, a0, t0 + or a0, a0, t1 # Epilogue. - lw ra, 44(sp) - lw s0, 40(sp) - addi sp, sp, 48 + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 ret -.type compile_procedure, @function -compile_procedure: +# Detects whether the passed character is a digit +# (a value between 0 and 9). +# +# Parameters: +# a0 - Exemined value. +# +# Sets a0 to 1 if it is a digit, to 0 otherwise. +.type _is_digit, @function +_is_digit: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 - mv a0, s1 - addi a1, sp, 12 - call lex_next # Skip proc. - addi a1, sp, 12 - call lex_next - mv s1, a0 + li t0, '0' - 1 + sltu t1, t0, a0 # t1 = a0 >= '0' - lw a0, 16(sp) - lw a1, 20(sp) - call write_procedure_head + sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' - # Register the procedure in the symbol table. - mv a0, s3 - call symbol_table_make_procedure + and a0, t1, t2 - mv a2, s3 - add s3, s3, a0 + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret - lw a0, 16(sp) - lw a1, 20(sp) - call symbol_table_enter +# Reads the next token. +# +# Returns token length in a0. +.type _read_token, @function +_read_token: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 - # Save the state of the symbol table before we enter the procedure scope. - la t0, symbol_table + la t0, source_code_position # Token pointer. lw t0, (t0) - sw t0, 8(sp) + sw t0, 20(sp) # Current token position. + sw zero, 16(sp) # Token length. - call compile_parameters - call compile_local_section +.read_token_loop: + lb t0, (t0) # Current character. - # Skip the "begin" keyword, denoting the beginning of the procedure body. - mv a0, s1 - addi a1, sp, 12 - call lex_next - mv s1, a0 + # First we try to read a derictive. + # A derictive can contain a dot and characters. + li t1, '.' + beq t0, t1, .read_token_next - # Generate the procedure prologue with a predefined stack size. - li a0, PROLOGUE_SIZE - la a1, prologue - call _write_s + lw a0, 20(sp) + lb a0, (a0) + call _is_alpha + bnez a0, .read_token_next - # Save passed arguments on the stack. - li a0, ASM_PRESERVE_PARAMETERS_SIZE - la a1, asm_preserve_parameters - call _write_s + lw a0, 20(sp) + lb a0, (a0) + call _is_digit + bnez a0, .read_token_next - # Generate the body of the procedure. - call compile_statements - mv s1, a0 # Skip end. + j .read_token_end - # Generate the procedure epilogue with a predefined stack size. - li a0, EPILOGUE_SIZE - la a1, epilogue - call _write_s +.read_token_next: + # Advance the source code position and token length. + lw t0, 16(sp) + addi t0, t0, 1 + sw t0, 16(sp) - # Restore the symbol table, removing symbols local to this procedure. - la t0, symbol_table - lw t1, 8(sp) + lw t0, 20(sp) + addi t0, t0, 1 + sw t0, 20(sp) + + j .read_token_loop + +.read_token_end: + lw a0, 16(sp) + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +.type _memcmp, @function +_memcmp: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + mv t0, a0 + li a0, 0 + +.Lmemcmp_loop: + beqz a2, .Lmemcmp_end + + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 + + bnez a0, .Lmemcmp_end + + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + j .Lmemcmp_loop + +.Lmemcmp_end: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +# Advances the token stream by a0 bytes. +.type _advance_token, @function +_advance_token: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # Skip the .equ directive. + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 sw t1, (t0) # Epilogue. @@ -1145,400 +395,670 @@ compile_procedure: addi sp, sp, 32 ret -# Compiles a goto statement to an uncoditional jump. -.type compile_goto, @function -compile_goto: +.type _compile_section, @function +_compile_section: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 - mv a0, s1 - addi a1, sp, 0 - call lex_next # Skip the goto keyword. - addi a1, sp, 0 - call lex_next # We should be on dot the label is beginning with. - addi a1, sp, 0 - call lex_next# Save the label name. - mv s1, a0 + # Print the .section directive and a space after it. + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + li a2, KEYWORD_SECTION_SIZE + 1 + li a7, SYS_WRITE + ecall - li t0, 0x2e206a # j . - sw t0, 12(sp) - li a0, 3 - addi a1, sp, 12 - call _write_s - - lw a0, 4(sp) - lw a1, 8(sp) # Saved dot position. - call _write_s - - li a0, '\n' - call _write_c - - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Rewrites a label to assembly. -.type compile_label, @function -compile_label: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - mv a0, s1 - addi a1, sp, 8 - call lex_next # Dot starting the label. - addi a1, sp, 8 - call lex_next - mv s1, a0 - - li a0, '.' - call _write_c - lw a0, 12(sp) - lw a1, 16(sp) - call _write_s - li a0, ':' - call _write_c - li a0, '\n' - call _write_c - - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Just skips the return keyword and evaluates the return expression. -.type compile_return, @function -compile_return: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - mv a0, s1 - addi a1, sp, 12 - call lex_next - mv s1, a0 # Skip return. - - call compile_binary_expression - - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -.type compile_if, @function -compile_if: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - mv a0, s1 - addi a1, sp, 0 - call lex_next - mv s1, a0 # Skip the if. - - call compile_binary_expression - - mv a0, s1 - addi a1, sp, 0 - call lex_next - mv s1, a0 # Skip the then. - - # Label prefix. - li t0, 0x66694c2e # .Lif - sw t0, 20(sp) - - li t0, 0x202c3061 # a0,_ - sw t0, 16(sp) - li t0, 0x207a7165 # eqz_ - sw t0, 12(sp) - li t0, 0x62626262 # bbbb - sb t0, 11(sp) - - li a0, 13 - addi a1, sp, 11 - call _write_s - - # Write the label counter. - mv a0, s2 - call _write_i - - li a0, '\n' - call _write_c - - call compile_statements - mv s1, a0 # Skip end. - - # Write the label prefix. - li a0, 4 - addi a1, sp, 20 - call _write_s - - # Write the label counter. - mv a0, s2 - call _write_i - - # Finalize the label. - li t0, 0x0a3a # :\n - sh t0, 16(sp) - li a0, 2 - addi a1, sp, 16 - call _write_s - - addi s2, s2, 1 # Increment the label counter. - - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Writes: -# .type identifier, @function -# identifier: -# -# Parameters: -# a0 - Identifier length. -# a0 - Identifier pointer. -.type write_procedure_head, @function -write_procedure_head: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 + # Skip the .equ directive. + li a0, KEYWORD_SECTION_SIZE + 1 + call _advance_token + # Read the section name. + call _read_token sw a0, 16(sp) - sw a1, 20(sp) - # .type identifier, @function - li a0, ASM_TYPE_SIZE - la a1, asm_type - call _write_s + # Print the section name and newline. + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + lw a2, 16(sp) + addi a2, a2, 1 + li a7, SYS_WRITE + ecall + # Skip the section name. lw a0, 16(sp) - lw a1, 20(sp) - call _write_s + addi a0, a0, 1 + call _advance_token - li a0, ASM_TYPE_FUNCTION_SIZE - la a1, asm_type_function - call _write_s + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret - lw a0, 16(sp) - lw a1, 20(sp) - call _write_s +# Prints and skips a line. +.type _skip_comment, @function +_skip_comment: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 - li t0, 0x0a3a # :\n + la t0, source_code_position + lw t1, (t0) + +.skip_comment_loop: + # Check for newline character. + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end + + # Advance the input string by one byte. + addi t1, t1, 1 + sw t1, (t0) + + j .skip_comment_loop + +.skip_comment_end: + # Skip the newline. + addi t1, t1, 1 + sw t1, (t0) + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +# Prints and skips a line. +.type _compile_line, @function +_compile_line: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + la a0, source_code_position + lw a1, (a0) + +.compile_line_loop: + # Check for newline character. + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end + + # Print a character. + li a0, STDOUT + li a2, 1 + li a7, SYS_WRITE + ecall + + # Advance the input string by one byte. + la a0, source_code_position + lw a1, (a0) + addi a1, a1, 1 + sw a1, (a0) + + j .compile_line_loop + +.compile_line_end: + # Print and skip the newline. + li a0, STDOUT + li a2, 1 + li a7, SYS_WRITE + ecall + + la a0, source_code_position + lw a1, (a0) + addi a1, a1, 1 + sw a1, (a0) + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_object, @function +_compile_object: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + call _compile_line + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_function_statements, @function +_compile_function_statements: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + +.compile_function_statements_loop: + la t0, source_code_position + lw t1, (t0) + addi t1, t1, 1 # Skip the tab. + + mv a0, t1 + la a1, keyword_ret + li a2, KEYWORD_RET_SIZE + call _memcmp + + beqz a0, .compile_function_statements_end + + call _compile_line + j .compile_function_statements_loop + +.compile_function_statements_end: + call _compile_line + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_call, @function +_compile_call: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + call _read_token + sw a0, 20(sp) + + la a0, asm_call + call _write_z + + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + lw a2, 20(sp) + li a7, SYS_WRITE + ecall + + # Skip parens, semicolon and newline. + lw a0, 20(sp) + addi a0, a0, 4 + call _advance_token + + li a0, '\n' + call _write_c + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_statement, @function +_compile_statement: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # This is a call if the statement starts with an underscore. + la t0, source_code_position + lw t0, (t0) + # First character after alignment tab. + addi t0, t0, 1 + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call + + call _compile_line + j .compile_statement_end + +.compile_statement_call: + li a0, 1 + call _advance_token + call _compile_call + + j .compile_statement_end + +.compile_statement_end: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_procedure_body, @function +_compile_procedure_body: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + +.compile_procedure_body_loop: + la a0, source_code_position + lw a0, (a0) + la a1, keyword_end + li a2, KEYWORD_END_SIZE + call _memcmp + + beqz a0, .compile_procedure_body_epilogue + + call _compile_statement + j .compile_procedure_body_loop + +.compile_procedure_body_epilogue: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_procedure, @function +_compile_procedure: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # Skip "proc ". + li a0, KEYWORD_PROC_SIZE + call _advance_token + + call _read_token + sw a0, 20(sp) # Save the procedure name length. + + # Write .type _procedure_name, @function. + la a0, asm_type_directive + call _write_z + + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + lw a2, 20(sp) + li a7, SYS_WRITE + ecall + + la a0, asm_type_function + call _write_z + + # Write procedure label, _procedure_name: + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + lw a2, 20(sp) + li a7, SYS_WRITE + ecall + + la a0, asm_colon + call _write_z + + # Skip the function name and trailing parens, semicolon, "begin" and newline. + lw a0, 20(sp) + addi a0, a0, KEYWORD_BEGIN_SIZE + 1 + 4 + call _advance_token + + la a0, asm_prologue + call _write_z + + call _compile_procedure_body + + # Write the epilogue. + la a0, asm_epilogue + call _write_z + + li a0, KEYWORD_END_SIZE + 2 + call _advance_token + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_function, @function +_compile_function: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # Write the function header. + call _compile_line + call _compile_function_statements + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_type, @function +_compile_type: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # Print the .type directive and a space after it. + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + li a2, KEYWORD_TYPE_SIZE + 1 + li a7, SYS_WRITE + ecall + + # Skip the .type directive. + li a0, KEYWORD_TYPE_SIZE + 1 + call _advance_token + + # Read and print the symbol name. + call _read_token + sw a0, 20(sp) + + # Print the symbol name, comma, space and @. + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + lw a2, 20(sp) + addi a2, a2, 3 + li a7, SYS_WRITE + ecall + + # Skip the constant name, comma, space and @. + lw a0, 20(sp) + addi a0, a0, 3 + call _advance_token + + # Read the symbol type. + call _read_token + sw a0, 16(sp) + la t0, source_code_position + lw t0, (t0) sw t0, 12(sp) - li a0, 2 - addi a1, sp, 12 - call _write_s + # Print the symbol type and newline. + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + lw a2, 16(sp) + addi a2, a2, 1 + li a7, SYS_WRITE + ecall + + lw a0, 16(sp) + addi a0, a0, 1 + call _advance_token + + lw a0, 12(sp) + la a1, keyword_type_object + li a2, KEYWORD_TYPE_OBJECT_SIZE + call _memcmp + + beqz a0, .compile_type_object + + lw a0, 12(sp) + la a1, keyword_type_function + li a2, KEYWORD_TYPE_FUNCTION_SIZE + call _memcmp + + beqz a0, .compile_type_function + + j .compile_type_end + +.compile_type_object: + call _compile_object + + j .compile_type_end + +.compile_type_function: + call _compile_function + + j .compile_type_end + +.compile_type_end: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret -# Compiles a list of statements delimited by semicolons. -# -# Sets a0 to the end of the token finishing the list -# (should be the "end" token in a valid program). -.type compile_statements, @function -compile_statements: +.type _compile_equ, @function +_compile_equ: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 - # Generate the body of the procedure. - mv a0, s1 - addi a1, sp, 0 - call lex_next - lw t0, 0(sp) - li t1, TOKEN_END + # Print the .equ directive and a space after it. + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + li a2, KEYWORD_EQU_SIZE + 1 + li a7, SYS_WRITE + ecall - beq t0, t1, .Lcompile_statements_end + # Skip the .equ directive. + li a0, KEYWORD_EQU_SIZE + 1 + call _advance_token -.Lcompile_statements_body: - call compile_statement + # Read and print the constant name. + call _read_token + sw a0, 20(sp) - mv a0, s1 - addi a1, sp, 0 - call lex_next - lw t0, 0(sp) - li t1, TOKEN_SEMICOLON + # Print the constant name, comma and space. + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + lw a2, 20(sp) + addi a2, a2, 2 + li a7, SYS_WRITE + ecall - bne t0, t1, .Lcompile_statements_end - mv s1, a0 + # Skip the constant name, comma and the space after it. + lw a0, 20(sp) + addi a0, a0, 2 + call _advance_token - j .Lcompile_statements_body + # Read the constant value. + call _read_token + sw a0, 16(sp) + + # Print the constant value and newline. + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + lw a2, 16(sp) + addi a2, a2, 1 + li a7, SYS_WRITE + ecall + + lw a2, 16(sp) + addi a2, a2, 1 + call _advance_token -.Lcompile_statements_end: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret -# Checks for the type of the current statement and compiles it. -.type compile_statement, @function -compile_statement: +.type _skip_newlines, @function +_skip_newlines: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 - mv a0, s1 - addi a1, sp, 0 - call lex_next - lw t0, 0(sp) + # Skip newlines. + la t0, source_code_position + lw t1, (t0) - li t1, TOKEN_IDENTIFIER - beq t0, t1, .Lcompile_statement_identifier +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end - li t1, TOKEN_GOTO - beq t0, t1, .Lcompile_statement_goto + addi t1, t1, 1 + sw t1, (t0) - li t1, TOKEN_RETURN - beq t0, t1, .Lcompile_statement_return + j .skip_newlines_loop - li t1, TOKEN_IF - beq t0, t1, .Lcompile_statement_if - - li t1, TOKEN_DOT - beq t0, t1, .Lcompile_statement_label - - unimp # Else. - -.Lcompile_statement_if: - call compile_if - j .Lcompile_statement_end - -.Lcompile_statement_label: - call compile_label - j .Lcompile_statement_end - -.Lcompile_statement_return: - call compile_return - j .Lcompile_statement_end - -.Lcompile_statement_goto: - call compile_goto - j .Lcompile_statement_end - -.Lcompile_statement_identifier: - call compile_identifier - j .Lcompile_statement_end - -.Lcompile_statement_end: +.skip_newlines_end: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret -# Prints ".section .text" and exits. -.type compile_text_section, @function -compile_text_section: - # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 - - li a0, SECTION_TEXT_SIZE - la a1, section_text - call _write_s - - # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 - ret - -.type compile_entry_point, @function -compile_entry_point: +# Process the source code and print the generated code. +.type _compile, @function +_compile: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 - # .type _start, @function - li a0, ASM_START_SIZE - la a1, asm_start - call _write_s +.compile_loop: + call _skip_newlines - mv a0, s1 - addi a1, sp, 4 - call lex_next - mv s1, a0 # Skip begin. + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_end + li t1, '#' + beq t0, t1, .compile_comment - # Generate the body of the procedure. - call compile_statements - mv s1, a0 # Skip end. + la a0, source_code_position + lw a0, (a0) + la a1, keyword_equ + li a2, KEYWORD_EQU_SIZE + call _memcmp - li a0, ASM_EXIT_SIZE - la a1, asm_exit - call _write_s + beqz a0, .compile_equ + la a0, source_code_position + lw a0, (a0) + la a1, keyword_section + li a2, KEYWORD_SECTION_SIZE + call _memcmp + + beqz a0, .compile_section + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_type + li a2, KEYWORD_TYPE_SIZE + call _memcmp + + beqz a0, .compile_type + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_proc + li a2, KEYWORD_PROC_SIZE + call _memcmp + + beqz a0, .compile_procedure + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_global + li a2, KEYWORD_GLOBAL_SIZE + call _memcmp + + beqz a0, .compile_global + + j .compile_end # Not a known token, exit. + +.compile_equ: + call _compile_equ + + j .compile_loop + +.compile_section: + call _compile_section + + j .compile_loop + +.compile_type: + call _compile_type + + j .compile_loop + +.compile_global: + call _compile_line + + j .compile_loop + +.compile_comment: + call _skip_comment + + j .compile_loop + +.compile_procedure: + call _compile_procedure + + j .compile_loop + +.compile_end: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret -.type compile, @function -compile: - # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 - - call compile_module_declaration - call compile_import - call compile_global_section - call compile_text_section - call compile_procedure_section - call compile_entry_point - - # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 - ret - # Entry point. +.globl _start .type _start, @function _start: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + # Read the source from the standard input. la a0, source_code li a1, SOURCE_BUFFER_SIZE # Buffer size. call _read_file - li s2, 1 - call _mmap - mv s3, a0 + # Save the pointer to the beginning of the source code in a global variable. + la t0, source_code + la t1, source_code_position + sw t0, (t1) - call symbol_table_build - call compile + call _compile # Call exit. - li a0, 0 # Use 0 return code. - call _exit + li a0, 0 # Use 0 return code. + li a7, SYS_EXIT + ecall + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret diff --git a/boot/stage2.elna b/boot/stage2.elna index b4f359e..a9de48d 100644 --- a/boot/stage2.elna +++ b/boot/stage2.elna @@ -1,1393 +1,855 @@ -program +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. -import dummy +# Stage2 compiler. +# +# It supports declaring and calling procedures without arguments. +# A procedure name should start with an underscore. -var - source_code: [81920]Byte +.section .rodata -proc _compile_import() -var loca0: Word +.type keyword_equ, @object +keyword_equ: .ascii ".equ" +.equ KEYWORD_EQU_SIZE, 4 + +.type keyword_section, @object +keyword_section: .ascii ".section" +.equ KEYWORD_SECTION_SIZE, 8 + +.type keyword_type, @object +keyword_type: .ascii ".type" +.equ KEYWORD_TYPE_SIZE, 5 + +.type keyword_ret, @object +keyword_ret: .ascii "ret" +.equ KEYWORD_RET_SIZE, 3 + +.type keyword_global, @object +keyword_global: .ascii ".globl" +.equ KEYWORD_GLOBAL_SIZE, 6 + +.type keyword_proc, @object +keyword_proc: .ascii "proc " +.equ KEYWORD_PROC_SIZE, 5 + +.type keyword_end, @object +keyword_end: .ascii "end" +.equ KEYWORD_END_SIZE, 3 + +.type keyword_begin, @object +keyword_begin: .ascii "begin" +.equ KEYWORD_BEGIN_SIZE, 5 + +.type keyword_var, @object +keyword_var: .ascii "var" +.equ KEYWORD_VAR_SIZE, 3 + +.type asm_prologue, @object +asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" + +.type asm_epilogue, @object +asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" + +.type asm_type_directive, @object +asm_type_directive: .string ".type " + +.type asm_type_function, @object +asm_type_function: .string ", @function\n" + +.type asm_colon, @object +asm_colon: .string ":\n" + +.type asm_call, @object +asm_call: .string "\tcall " + +.type asm_j, @object +asm_j: .string "\tj " + +.type asm_li, @object +asm_li: .string "\tli " + +.type asm_lw, @object +asm_lw: .string "\tlw " + +.type asm_t0, @object +asm_t0: .string "t0" + +.type asm_a0, @object +asm_a0: .string "a0" + +.type asm_comma, @object +asm_comma: .string ", " + +.type asm_sp, @object +asm_sp: .string "(sp)" + +.section .bss + +.equ SOURCE_BUFFER_SIZE, 81920 +.type source_code, @object +source_code: .zero SOURCE_BUFFER_SIZE + +.section .data + +.type source_code_position, @object +source_code_position: .word source_code + +.section .text + +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the amount of bytes written in a0. +proc _read_file(); begin - _advance(6); - _skip_spaces(); - loca0 := _read_token(); - _advance(loca0) -end + mv a2, a1 + mv a1, a0 + # STDIN. + li a0, 0 + li a7, 63 # SYS_READ. + ecall +end; -proc _build_binary_expression() -var - loca0: Word - loca4: Word - loca8: Word - loca12: ^Byte - loca16: Word - loca20: Word - loca24: Bool +# Writes to the standard output. +# +# Parameters: +# a0 - Buffer. +# a1 - Buffer length. +proc _write(); begin - _build_expression(0); + mv a2, a1 + mv a1, a0 + # STDOUT. + li a0, 1 + li a7, 64 # SYS_WRITE. + ecall +end; - loca4 := 0x2c306120; - loca8 := 0x0a316120; - - _skip_spaces(); - loca20 := _read_token(); - loca12 := _current(); - - loca16 := 0x26; - loca24 := _token_compare(loca12, loca20, @loca16); - if loca24 = 0 then - goto .L_build_binary_expression_and - end; - - loca16 := 0x726f; - loca24 := _token_compare(loca12, loca20, @loca16); - if loca24 = 0 then - goto .L_build_binary_expression_or - end; - - loca16 := 0x3d; - loca24 := _token_compare(loca12, loca20, @loca16); - if loca24 = 0 then - goto .L_build_binary_expression_equal - end; - - loca16 := 0x2b; - loca24 := _token_compare(loca12, loca20, @loca16); - if loca24 = 0 then - goto .L_build_binary_expression_plus - end; - - loca16 := 0x2d; - loca24 := _token_compare(loca12, loca20, @loca16); - if loca24 = 0 then - goto .L_build_binary_expression_minus - end; - - loca16 := 0x2a; - loca24 := _token_compare(loca12, loca20, @loca16); - if loca24 = 0 then - goto .L_build_binary_expression_product - end; - - goto .Lbuild_binary_expression_end; - - .L_build_binary_expression_equal; - _advance(1); - _build_expression(1); - - loca0 := 0x627573; - _write_s(3, @loca0); - _write_s(4, @loca4); - _write_s(4, @loca4); - _write_s(4, @loca8); - - loca0 := 0x7a716573; - _write_s(4, @loca0); - _write_s(4, @loca4); - _write_s(3, @loca4); - _write_c(0x0a); - - goto .Lbuild_binary_expression_end; - - .L_build_binary_expression_and; - _advance(1); - _build_expression(1); - loca0 := 0x646e61; - _write_s(3, @loca0); - _write_s(4, @loca4); - _write_s(4, @loca4); - _write_s(4, @loca8); - - goto .Lbuild_binary_expression_end; - - .L_build_binary_expression_or; - _advance(2); - _build_expression(1); - loca0 := 0x726f; - _write_s(2, @loca0); - _write_s(4, @loca4); - _write_s(4, @loca4); - _write_s(4, @loca8); - - goto .Lbuild_binary_expression_end; - - .L_build_binary_expression_plus; - _advance(1); - _build_expression(1); - loca0 := 0x646461; - _write_s(3, @loca0); - _write_s(4, @loca4); - _write_s(4, @loca4); - _write_s(4, @loca8); - - goto .Lbuild_binary_expression_end; - - .L_build_binary_expression_minus; - _advance(1); - _build_expression(1); - loca0 := 0x627573; - _write_s(3, @loca0); - _write_s(4, @loca4); - _write_s(4, @loca4); - _write_s(4, @loca8); - - goto .Lbuild_binary_expression_end; - - .L_build_binary_expression_product; - _advance(1); - _build_expression(1); - loca0 := 0x6c756d; - _write_s(3, @loca0); - _write_s(4, @loca4); - _write_s(4, @loca4); - _write_s(4, @loca8); - - goto .Lbuild_binary_expression_end; - - .Lbuild_binary_expression_end -end - -proc _compile_identifier_expression(loca84: Word, loca80: Byte) -var - loca0: Word - loca4: ^Byte - loca8: Word - loca12: Bool - loca16: Word - loca20: Word - loca24: ^Byte - loca28: Byte +# Writes a character from a0 into the standard output. +proc _write_c(); begin - loca24 := _current(); - loca0 := 0x61636f6c; - loca0 := _memcmp(@loca0, loca24, 4); + sb a0, 20(sp) + addi a0, sp, 20 + li a1, 1 + _write(); +end; - if loca0 = 0 then - loca8 := 0x6120776c; - _write_s(4, @loca8); - loca8 := 0x00202c00 or loca80; - _write_s(3, @loca8); - - loca4 := loca24 + 4; - loca0 := loca84 - 4; - _write_s(loca0, loca4); - - loca8 := 0x29707328; - _write_s(4, @loca8); - _write_c(0x0a); - - goto .Lcompile_identifier_expression_end - end; - loca0 := _front(loca24); - loca8 := loca84 = 2; - loca12 := loca0 = 0x73; - if loca8 & loca12 then - loca8 := 0x6120766d; - _write_s(4, @loca8); - loca8 := 0x00202c00 or loca80; - _write_s(3, @loca8); - _write_s(loca84, loca24); - _write_c(0x0a); - - goto .Lcompile_identifier_expression_end - end; - - loca8 := 0x6120616c; - _write_s(4, @loca8); - loca8 := 0x00202c00 or loca80; - _write_s(3, @loca8); - - _write_s(loca84, loca24); - _write_c(0x0a); - - if _is_upper(loca0) then - loca8 := 0x6120776c; - _write_s(4, @loca8); - loca8 := 0x28202c00 or loca28; - _write_s(4, @loca8); - _write_c(0x61); - _write_c(loca28); - _write_c(0x29); - _write_c(0x0a); - - goto .Lcompile_identifier_expression_end - end; - - .Lcompile_identifier_expression_end -end - -proc _build_expression(loca84: Word) -var - loca0: Word - loca4: ^Byte - loca8: Word - loca12: Word - loca16: Word - loca20: Word - loca24: ^Byte - loca28: Word +# Write null terminated string. +# +# Parameters: +# a0 - String. +proc _write_z(); begin - loca28 := loca84 + 0x30; + sw a0, 20(sp) - _skip_spaces(); - loca20 := _read_token(); - loca24 := _current(); - loca0 := _front(loca24); +.write_z_loop: + # Check for 0 character. + lb a0, (a0) + beqz a0, .write_z_end - if loca0 = 0x2d then - goto .Lbuild_expression_negate - end; + # Print a character. + lw a0, 20(sp) + lb a0, (a0) + _write_c(); - if loca0 = 0x40 then - goto .Lbuild_expression_address - end; + # Advance the input string by one byte. + lw a0, 20(sp) + addi a0, a0, 1 + sw a0, 20(sp) - if _is_digit(loca0) then - goto .Lbuild_expression_literal - end; + j .write_z_loop - if loca0 = 0x5f then - goto .Lbuild_expression_call - end; +.write_z_end: +end; - _compile_identifier_expression(loca20, loca28); - goto .Lbuild_expression_advance; - - .Lbuild_expression_negate; - _advance(1); - _build_expression(0); - - loca8 := 0x2067656e; - _write_s(4, @loca8); - loca8 := 0x202c3061; - _write_s(4, @loca8); - loca8 := 0x0a3061; - _write_s(3, @loca8); - - goto .Lbuild_expression_advance; - - .Lbuild_expression_address; - loca8 := 0x69646461; - _write_s(4, @loca8); - loca8 := 0x6120; - _write_s(2, @loca8); - _write_c(loca28); - loca8 := 0x7073202c; - _write_s(4, @loca8); - loca8 := 0x202c; - _write_s(2, @loca8); - - _advance(1); - _skip_spaces(); - loca24 := _current(); - loca20 := _read_token(); - - loca4 := loca24 + 4; - loca0 := loca20 - 4; - _write_s(loca0, loca4); - - _write_c(0xa); - - goto .Lbuild_expression_advance; - - .Lbuild_expression_call; - _advance(loca20); - _advance(1); - _compile_call(loca24, loca20); - - goto .Lbuild_expression_end; - - .Lbuild_expression_literal; - loca8 := 0x6120696c; - _write_s(4, @loca8); - loca8 := 0x00202c00 or loca28; - _write_s(3, @loca8); - - _write_s(loca20, loca24); - _write_c(0x0a); - - goto .Lbuild_expression_advance; - - .Lbuild_expression_advance; - _advance(loca20); - - .Lbuild_expression_end -end - -proc _compile_designator_expression(loca84: ^Byte, loca80: Word) -var - loca0: Word - loca4: Int - loca8: Char - loca12: Bool - loca16: Bool +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_upper(); begin - loca0 := 0x61636f6c; - loca4 := _memcmp(@loca0, loca84, 4); + li t0, 'A' - 1 + sltu t1, t0, a0 # t1 = a0 >= 'A' - if loca4 = 0 then - loca0 := 0x61207773; - _write_s(4, @loca0); - loca0 := 0x202c30; - _write_s(3, @loca0); + sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' + and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' +end; - loca84 := loca84 + 4; - loca80 := loca80 - 4; - _write_s(loca80, loca84); - - loca0 := 0x29707328; - _write_s(4, @loca0); - _write_c(0x0a); - - goto .Lcompile_designator_expression_end - end; - loca8 := _front(loca84); - loca12 := loca8 = 0x73; - loca16 := loca80 = 2; - if loca12 & loca16 then - loca0 := 0x20766d; - _write_s(3, @loca0); - _write_s(loca80, loca84); - loca0 := 0x3061202c; - _write_s(4, @loca0); - _write_c(0x0a); - - goto .Lcompile_designator_expression_end - end; - - .Lcompile_designator_expression_end -end - -proc _compile_identifier() -var - loca0: Word - loca4: Bool - loca8: Word - loca12: ^Byte - loca16: Word - loca20: ^Byte +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_lower(); begin - loca20 := _current(); - loca16 := _read_token(); + li t0, 'a' - 1 + sltu t2, t0, a0 # t2 = a0 >= 'a' - _advance(loca16); - _skip_spaces(); + sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' + and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' +end; - loca12 := _current(); - loca8 := _read_token(); - - _advance(loca8); - _skip_spaces(); - - loca0 := 0x3d3a; - loca4 := _token_compare(loca12, loca8, @loca0); - if loca4 = 0 then - _build_binary_expression(); - _compile_designator_expression(loca20, loca16); - - goto .Lcompile_identifier_end - end; - if _front(loca12) = 0x28 then - _compile_call(loca20, loca16); - - goto .Lcompile_identifier_end - end; - - .Lcompile_identifier_end -end - -proc _compile_call(loca84: ^Byte, loca80: Word) -var - loca0: Word - loca4: Word - loca8: ^Byte - loca12: Word +# Detects if the passed character is a 7-bit alpha character or an underscore. +# +# Paramters: +# a0 - Tested character. +# +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +proc _is_alpha(); begin - loca12 := 0; + sw a0, 20(sp) - .Lcompile_call_paren; - _skip_spaces(); - loca8 := _current(); - if _front(loca8) = 0x29 then - goto .Lcompile_call_complete - end; + _is_upper(); + sw a0, 16(sp) - .Lcompile_call_argument; - _build_expression(0); + lw a0, 20(sp) + _is_lower(); - loca0 := 0x61207773; - _write_s(4, @loca0); - loca0 := 0x202c30; - _write_s(3, @loca0); + lw t0, 20(sp) + xori t1, t0, '_' + seqz t1, t1 - loca0 := -4 * loca12; - loca0 := loca0 + 60; - _write_i(loca0); + lw t0, 16(sp) + or a0, a0, t0 + or a0, a0, t1 +end; - loca0 := 0x29707328; - _write_s(4, @loca0); - _write_c(0x0a); - - _skip_spaces(); - loca8 := _current(); - loca0 := _front(loca8) = 0x2c; - if loca0 = 0 then - goto .Lcompile_call_paren - end; - - loca12 := loca12 + 1; - - _advance(1); - goto .Lcompile_call_argument; - - .Lcompile_call_complete; - loca12 := 0; - - .Lcompile_call_restore; - - loca0 := 0x6120776c; - _write_s(4, @loca0); - loca4 := 0x36202c30; - _write_s(4, @loca4); - loca4 := 0x70732830; - _write_s(4, @loca4); - loca4 := 0x0a29; - _write_s(2, @loca4); - - _write_s(4, @loca0); - loca4 := 0x35202c31; - _write_s(4, @loca4); - loca4 := 0x70732836; - _write_s(4, @loca4); - loca4 := 0x0a29; - _write_s(2, @loca4); - - _write_s(4, @loca0); - loca4 := 0x35202c32; - _write_s(4, @loca4); - loca4 := 0x70732832; - _write_s(4, @loca4); - loca4 := 0x0a29; - _write_s(2, @loca4); - - _write_s(4, @loca0); - loca4 := 0x34202c33; - _write_s(4, @loca4); - loca4 := 0x70732838; - _write_s(4, @loca4); - loca4 := 0x0a29; - _write_s(2, @loca4); - - _write_s(4, @loca0); - loca4 := 0x34202c34; - _write_s(4, @loca4); - loca4 := 0x70732834; - _write_s(4, @loca4); - loca4 := 0x0a29; - _write_s(2, @loca4); - - _write_s(4, @loca0); - loca4 := 0x34202c35; - _write_s(4, @loca4); - loca4 := 0x70732830; - _write_s(4, @loca4); - loca4 := 0x0a29; - _write_s(2, @loca4); - - loca0 := 0x6c6c6163; - _write_s(4, @loca0); - _write_c(0x20); - - _write_s(loca80, loca84); - _write_c(0x0a); - - _skip_spaces(); - _advance(1) -end - -proc _read_token() -var - loca0: Word - loca4: Word - loca8: ^Byte +# Detects whether the passed character is a digit +# (a value between 0 and 9). +# +# Parameters: +# a0 - Exemined value. +# +# Sets a0 to 1 if it is a digit, to 0 otherwise. +proc _is_digit(); begin - loca8 := _current(); - loca0 := _front(loca8); - loca4 := 0; + li t0, '0' - 1 + sltu t1, t0, a0 # t1 = a0 >= '0' - if loca0 = 0x2e then - goto .Ltoken_character_single - end; + sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' - if loca0 = 0x2c then - goto .Ltoken_character_single - end; + and a0, t1, t2 +end; - if loca0 = 0x3a then - goto .Ltoken_character_colon - end; - - if loca0 = 0x3b then - goto .Ltoken_character_single - end; - - if loca0 = 0x28 then - goto .Ltoken_character_single - end; - - if loca0 = 0x29 then - goto .Ltoken_character_single - end; - - if loca0 = 0x5b then - goto .Ltoken_character_single - end; - - if loca0 = 0x5d then - goto .Ltoken_character_single - end; - - if loca0 = 0x5e then - goto .Ltoken_character_single - end; - - if loca0 = 0x26 then - goto .Ltoken_character_single - end; - - if loca0 = 0x3d then - goto .Ltoken_character_single - end; - - if loca0 = 0x2b then - goto .Ltoken_character_single - end; - - if loca0 = 0x2d then - goto .Ltoken_character_single - end; - - if loca0 = 0x2a then - goto .Ltoken_character_single - end; - - if loca0 = 0x40 then - goto .Ltoken_character_single - end; - - .Ltoken_character_loop_do; - loca0 := loca8 + loca4; - loca0 := _front(loca0); - - if _is_alnum(loca0) then - loca4 := loca4 + 1; - goto .Ltoken_character_loop_do; - - .Ltoken_character_single; - loca4 := loca4 + 1; - goto .Ltoken_character_end; - - .Ltoken_character_colon; - loca0 := loca8 + 1; - loca0 := _front(loca0); - loca4 := loca4 + 1; - - if loca0 = 0x3d then - goto .Ltoken_character_single - end - end; - .Ltoken_character_end; - return loca4 -end - -proc _skip_spaces() -var - loca0: Byte - loca4: ^Byte +# Reads the next token. +# +# Returns token length in a0. +proc _read_token(); begin - .Lspace_loop_do; - loca4 := _current(); - loca0 := _front(loca4); + la t0, source_code_position # Token pointer. + lw t0, (t0) + sw t0, 20(sp) # Current token position. + sw zero, 16(sp) # Token length. - if loca0 = 0x20 then - goto .Lspace_loop_repeat - end; - if loca0 = 0x09 then - goto .Lspace_loop_repeat - end; - if loca0 = 0x0a then - goto .Lspace_loop_repeat - end; - if loca0 = 0x0d then - goto .Lspace_loop_repeat - end; +.read_token_loop: + lb t0, (t0) # Current character. - goto .Lspace_loop_end; - .Lspace_loop_repeat; - _advance(1); - goto .Lspace_loop_do; + # First we try to read a derictive. + # A derictive can contain a dot and characters. + li t1, '.' + beq t0, t1, .read_token_next - .Lspace_loop_end -end + lw a0, 20(sp) + lb a0, (a0) + _is_alpha(); + bnez a0, .read_token_next -proc _compile_assembly(loca84: Word) -var loca0: ^Byte + lw a0, 20(sp) + lb a0, (a0) + _is_digit(); + bnez a0, .read_token_next + + j .read_token_end + +.read_token_next: + # Advance the source code position and token length. + lw t0, 16(sp) + addi t0, t0, 1 + sw t0, 16(sp) + + lw t0, 20(sp) + addi t0, t0, 1 + sw t0, 20(sp) + + j .read_token_loop + +.read_token_end: + lw a0, 16(sp) +end; + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +proc _memcmp(); begin - loca0 := _current(); + mv t0, a0 + li a0, 0 - _write_s(loca84, loca0); - _advance(loca84); +.Lmemcmp_loop: + beqz a2, .Lmemcmp_end - _write_c(0xa); + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 - _advance(1) -end + bnez a0, .Lmemcmp_end -proc _compile_program() -var loca0: Word + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + j .Lmemcmp_loop + +.Lmemcmp_end: +end; + +# Advances the token stream by a0 bytes. +proc _advance_token(); begin - loca0 := 0x6f6c672e; - _write_s(4, @loca0); - loca0 := 0x206c6162; - _write_s(4, @loca0); - loca0 := 0x6174735f; - _write_s(4, @loca0); - loca0 := 0x0a7472; - _write_s(3, @loca0); + # Skip the .equ directive. + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 + sw t1, (t0) +end; - _advance(8) -end - -proc _compile_variable_section() -var - loca0: Word - loca4: ^Byte +# Prints the current token. +# +# Parameters: +# a0 - Token length. +# +# Returns a0 unchanged. +proc _write_token(); begin - loca0 := 0x6365732e; - _write_s(4, @loca0); - loca0 := 0x6e6f6974; - _write_s(4, @loca0); - loca0 := 0x73622e20; - _write_s(4, @loca0); - loca0 := 0x0a73; - _write_s(2, @loca0); + sw a0, 20(sp) - _advance(4); + la a0, source_code_position + lw a0, (a0) + lw a1, 20(sp) + _write(); - .Lcompile_variable_section_item; - _skip_spaces(); - loca4 := _current(); + lw a0, 20(sp) +end; - loca0 := 0x636f7270; - loca0 := _memcmp(@loca0, loca4, 4); - - if loca0 = 0 then - goto .Lcompile_variable_section_end - end; - _compile_variable(); - goto .Lcompile_variable_section_item; - - .Lcompile_variable_section_end -end - -proc _compile_variable() -var - loca0: Word - loca4: Word - loca8: Word - loca12: Word - loca16: ^Byte - loca20: Word - loca24: Word - loca28: ^Byte +proc _compile_section(); begin - loca24 := _read_token(); - loca28 := _current(); + # Print and skip the .section directive and a space after it. + li a0, KEYWORD_SECTION_SIZE + 1 + _write_token(); + _advance_token(); - _advance(loca24); + # Read the section name. + _read_token(); + addi a0, a0, 1 - _skip_spaces(); - _advance(1); + _write_token(); + _advance_token(); +end; - _skip_spaces(); - _advance(1); - - loca16 := _read_token(); - loca20 := _current(); - _advance(loca16); - - _skip_spaces(); - _advance(1); - - _skip_spaces(); - loca0 := _read_token(); - _advance(loca0); - - loca0 := 0x7079742e; - _write_s(4, @loca0); - loca0 := 0x2065; - _write_s(2, @loca0); - - _write_s(loca24, loca28); - - loca0 := 0x6f40202c; - _write_s(4, @loca0); - loca0 := 0x63656a62; - _write_s(4, @loca0); - loca0 := 0x0a74; - _write_s(2, @loca0); - - loca0 := 0x7a69732e; - _write_s(4, @loca0); - loca0 := 0x2065; - _write_s(2, @loca0); - - _write_s(loca24, loca28); - - loca0 := 0x202c; - _write_s(2, @loca0); - - _write_s(loca16, loca20); - _write_c(0x0a); - - _write_s(loca24, loca28); - - loca0 := 0x7a2e203a; - _write_s(4, @loca0); - loca0 := 0x206f7265; - _write_s(4, @loca0); - - _write_s(loca16, loca20); - - _write_c(0x0a) -end - -proc _compile_procedure() -var - loca0: Word - loca4: Word - loca8: Word - loca12: Word - loca16: Word - loca20: ^Byte - loca24: ^Byte +# Prints and skips a line. +proc _skip_comment(); begin - _advance(5); - loca16 := _read_token(); - loca20 := _current(); - _advance(loca16); + la t0, source_code_position + lw t1, (t0) - loca0 := 0x7079742e; - _write_s(4, @loca0); - loca0 := 0x2065; - _write_s(2, @loca0); +.skip_comment_loop: + # Check for newline character. + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end - _write_s(loca16, loca20); + # Advance the input string by one byte. + addi t1, t1, 1 + sw t1, (t0) - loca0 := 0x6640202c; - _write_s(4, @loca0); - loca0 := 0x74636e75; - _write_s(4, @loca0); - loca0 := 0x0a6e6f69; - _write_s(4, @loca0); + j .skip_comment_loop - _write_s(loca16, loca20); +.skip_comment_end: + # Skip the newline. + addi t1, t1, 1 + sw t1, (t0) +end; - loca0 := 0x0a3a; - _write_s(2, @loca0); - - _skip_spaces(); - _advance(1); - _skip_spaces(); - _advance(1); - - loca12 := 0x6e; - loca8 := 0x69676562; - - .Lcompile_procedure_begin; - _skip_spaces(); - loca0 := _read_token(); - - loca24 := _current(); - _advance(loca0); - loca0 := _token_compare(loca24, loca0, @loca8); - - if loca0 = 1 then - goto .Lcompile_procedure_begin - end; - - loca0 := 0x69646461; - _write_s(4, @loca0); - - loca0 := 0x2c707320; - _write_s(4, @loca0); - _write_s(4, @loca0); - - loca0 := 0x0a36392d; - _write_s(4, @loca0); - - loca0 := 0x72207773; - _write_s(4, @loca0); - loca0 := 0x39202c61; - _write_s(4, @loca0); - loca0 := 0x70732832; - _write_s(4, @loca0); - loca0 := 0x0a29; - _write_s(2, @loca0); - - loca0 := 0x73207773; - _write_s(4, @loca0); - loca0 := 0x38202c30; - _write_s(4, @loca0); - loca0 := 0x70732838; - _write_s(4, @loca0); - loca0 := 0x0a29; - _write_s(2, @loca0); - - loca0 := 0x69646461; - _write_s(4, @loca0); - loca0 := 0x2c307320; - _write_s(4, @loca0); - loca0 := 0x2c707320; - _write_s(4, @loca0); - loca0 := 0x0a363920; - _write_s(4, @loca0); - - loca0 := 0x61207773; - _write_s(4, @loca0); - loca4 := 0x38202c30; - _write_s(4, @loca4); - loca8 := 0x70732834; - _write_s(4, @loca8); - loca12 := 0x0a29; - _write_s(2, @loca12); - - _write_s(4, @loca0); - loca4 := 0x38202c31; - _write_s(4, @loca4); - loca8 := 0x70732830; - _write_s(4, @loca8); - _write_s(2, @loca12); - - _write_s(4, @loca0); - loca4 := 0x37202c32; - _write_s(4, @loca4); - loca8 := 0x70732836; - _write_s(4, @loca8); - _write_s(2, @loca12); - - _write_s(4, @loca0); - loca4 := 0x37202c33; - _write_s(4, @loca4); - loca8 := 0x70732832; - _write_s(4, @loca8); - _write_s(2, @loca12); - - _write_s(4, @loca0); - loca4 := 0x36202c34; - _write_s(4, @loca4); - loca8 := 0x70732838; - _write_s(4, @loca8); - _write_s(2, @loca12); - - _write_s(4, @loca0); - loca4 := 0x36202c35; - _write_s(4, @loca4); - loca8 := 0x70732838; - _write_s(4, @loca8); - _write_s(2, @loca12); - - .Lcompile_procedure_body; - _skip_spaces(); - loca12 := _read_line(); - loca8 := 0x0a646e65; - loca24 := _current(); - loca8 := _memcmp(loca24, @loca8, 4); - - if loca8 = 0 then - goto .Lcompile_procedure_end - end; - - _compile_line(loca12); - goto .Lcompile_procedure_body; - - .Lcompile_procedure_end; - _advance(4); - - loca0 := 0x7220776c; - _write_s(4, @loca0); - loca0 := 0x39202c61; - _write_s(4, @loca0); - loca0 := 0x70732832; - _write_s(4, @loca0); - loca0 := 0x0a29; - _write_s(2, @loca0); - - loca0 := 0x7320776c; - _write_s(4, @loca0); - loca0 := 0x38202c30; - _write_s(4, @loca0); - loca0 := 0x70732838; - _write_s(4, @loca0); - loca0 := 0x0a29; - _write_s(2, @loca0); - - loca0 := 0x69646461; - _write_s(4, @loca0); - - loca0 := 0x2c707320; - _write_s(4, @loca0); - _write_s(4, @loca0); - - loca0 := 0x0a3639; - _write_s(4, @loca0); - - loca0 := 0x0a746572; - _write_s(4, @loca0) -end - -proc _token_compare(loca84: ^Byte, loca80: Word, loca76: ^Byte) -var - loca0: Bool - loca4: Byte - loca8: Word - loca12: Byte +# Prints and skips a line. +proc _compile_line(); begin - .Ltoken_compare_loop; - loca4 := _front(loca76); +.compile_line_loop: + la a0, source_code_position + lw a1, (a0) - loca8 := loca4 or loca80; - if loca8 = 0 then - goto .Ltoken_compare_equal - end; - if loca80 = 0 then - goto .Ltoken_compare_not_equal - end; - if loca4 = 0 then - goto .Ltoken_compare_not_equal - end; - loca12 := _front(loca84); - if loca4 = loca12 then - goto .Ltoken_compare_continue - end; - goto .Ltoken_compare_not_equal; + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end - .Ltoken_compare_continue; + # Print a character. + lw a0, (a1) + _write_c(); - loca84 := loca84 + 1; - loca80 := loca80 - 1; - loca76 := loca76 + 1; - goto .Ltoken_compare_loop; + # Advance the input string by one byte. + li a0, 1 + _advance_token(); - .Ltoken_compare_not_equal; - loca0 := 1; - goto .Ltoken_compare_end; + j .compile_line_loop - .Ltoken_compare_equal; - loca0 := 0; +.compile_line_end: + li a0, '\n' + _write_c(); - .Ltoken_compare_end; - return loca0 -end + li a0, 1 + _advance_token(); +end; -proc _compile_goto() -var - loca0: Word - loca4: Word - loca8: ^Byte +proc _compile_integer_literal(); begin - _advance(4); + la a0, asm_li + _write_z(); - loca0 := 0x206a; - _write_s(2, @loca0); + la a0, asm_a0 + _write_z(); - _skip_spaces(); - loca8 := _current(); - _advance(1); + la a0, asm_comma + _write_z(); - loca0 := _read_token(); - _advance(loca0); - loca0 := loca0 + 1; - _write_s(loca0, loca8); + _read_token(); + _write_token(); + _advance_token(); - _advance(1); - _write_c(0x0a) -end + li a0, '\n' + _write_c(); +end; -proc _compile_label(loca84: Word) -var - loca0: Word - loca4: Word - loca8: ^Byte +proc _compile_character_literal(); begin - loca0 := _current(); + la a0, asm_li + _write_z(); - loca0 := loca0 + loca84; - loca0 := loca0 - 1; - loca4 := loca84; + la a0, asm_a0 + _write_z(); - loca0 := _front(loca0); - if loca0 = 0x3b then - loca4 := loca4 - 1 - end; - loca8 := _current(); - _write_s(loca4, loca8); + la a0, asm_comma + _write_z(); - _write_c(0x3a); - _write_c(0x0a); +.compile_character_literal_loop: + la a0, source_code_position + lw a0, (a0) + li a1, 1 + _write(); + li a0, 1 + _advance_token(); - _advance(loca84) -end + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + li t1, '\'' + beq a0, t1, .compile_character_literal_end -proc _compile_return() + j .compile_character_literal_loop + +.compile_character_literal_end: + li a0, '\'' + _write_c(); + + li a0, '\n' + _write_c(); + + li a0, 1 + _advance_token(); +end; + +proc _compile_variable_expression(); begin - _advance(6); - _skip_spaces(); - _build_binary_expression() -end + la a0, asm_lw + _write_z(); -proc _compile_if() -var - loca0: Word - loca4: ^Byte - loca8: Word - loca12: Word - loca16: Word - loca20: Word - loca24: Word + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + la a0, source_code_position + lw a0, (a0) + addi a0, a0, 1 + li a1, 2 + _write(); + + la a0, asm_sp + _write_z(); + + li a0, '\n' + _write_c(); + + li a0, 3 + _advance_token(); + +end; + +proc _compile_expression(); begin - _advance(2); - _skip_spaces(); + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) - _build_binary_expression(); + li t1, '\'' + beq a0, t1, .compile_expression_character_literal - _skip_spaces(); - _advance(4); + li t1, 'v' + beq a0, t1, .compile_expression_variable - loca20 := 0x00646e65; - loca16 := 0x66694c2e; + _is_digit(); + bnez a0, .compile_expression_integer_literal - loca12 := 0x7a716562; - _write_s(4, @loca12); - loca12 := 0x2c306120; - _write_s(4, @loca12); - _write_c(0x20); + j .compile_expression_end - loca24 := _label_counter(1); - _write_s(4, @loca16); - _write_i(loca24); +.compile_expression_character_literal: + _compile_character_literal(); + j .compile_expression_end - _write_c(0x0a); +.compile_expression_integer_literal: + _compile_integer_literal(); + j .compile_expression_end - .Lcompile_if_loop; - _skip_spaces(); - loca12 := _read_token(); +.compile_expression_variable: + _compile_variable_expression(); + j .compile_expression_end; - loca4 := _current(); - loca8 := _token_compare(loca4, loca12, @loca20); +.compile_expression_end: +end; - if loca8 then - loca12 := _read_line(); - _compile_line(loca12, 1); - - goto .Lcompile_if_loop - end; - - _write_s(4, @loca16); - _write_i(loca24); - - loca12 := 0x0a3a0a3a; - _write_s(2, @loca12); - - _advance(4) -end - -proc _compile_line(loca84: Word, loca80: Bool) -var - loca0: Char - loca4: Int - loca8: Bool - loca12: Word - loca16: ^Byte +proc _compile_call(); begin - if loca84 = 0 then - goto .Lcompile_line_empty - end; + _read_token(); + sw a0, 20(sp) + la t0, source_code_position + lw t0, (t0) + sw t0, 16(sp) - loca16 := _current(); - loca0 := _front(loca16); + # Skip the identifier and left paren. + addi a0, a0, 1 + _advance_token(); - loca12 := 0x676f7270; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_program - end; + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) - loca12 := 0x0a726176; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_var - end; + li t1, ')' + beq t0, t1, .compile_call_finalize - loca12 := 0x636f7270; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_procedure - end; + _compile_expression(); - loca12 := 0x69676562; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_begin - end; +.compile_call_finalize: + la a0, asm_call + _write_z(); - loca12 := 0x2e646e65; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_exit - end; + lw a0, 16(sp) + lw a1, 20(sp) + _write(); - loca12 := 0x61636f6c; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_identifier - end; - loca4 := _front(loca16); - if loca4 = 0x73 then - goto .Lcompile_line_identifier - end; + # Skip the right paren. + li a0, 1 + _advance_token(); +end; - loca12 := 0x6f706d69; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_import - end; +proc _compile_goto(); +begin + li a0, 5 + _advance_token(); - loca12 := 0x6f746f67; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_goto - end; + _read_token(); + sw a0, 20(sp) - loca12 := 0x75746572; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_return - end; + la a0, asm_j + _write_z(); - loca12 := 0x6669; - loca4 := _memcmp(loca16, @loca12, 2); - if loca4 = 0 then - goto .Lcompile_line_if - end; + lw a0, 20(sp) + _write_token(); + _advance_token(); +end; - if loca0 = 0x2e then - goto .Lcompile_line_label - end; - if loca0 = 0x5f then - goto .Lcompile_line_identifier - end; - goto .Lcompile_line_unchanged; +proc _compile_statement(); +begin + # This is a call if the statement starts with an underscore. + la t0, source_code_position + lw t0, (t0) + # First character after alignment tab. + addi t0, t0, 1 + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call - .Lcompile_line_if; - _compile_if(); - goto .Lcompile_line_section; + li t1, 'g' + beq t0, t1, .compile_statement_goto - .Lcompile_line_label; - _compile_label(loca84); - goto .Lcompile_line_section; + _compile_line(); + j .compile_statement_end - .Lcompile_line_return; - _compile_return(); - goto .Lcompile_line_section; +.compile_statement_call: + li a0, 1 + _advance_token(); + _compile_call(); - .Lcompile_line_goto; + j .compile_statement_semicolon + +.compile_statement_goto: + li a0, 1 + _advance_token(); _compile_goto(); - goto .Lcompile_line_section; - .Lcompile_line_import; - _compile_import(); - goto .Lcompile_line_section; + j .compile_statement_semicolon - .Lcompile_line_identifier; - _compile_identifier(); - goto .Lcompile_line_section; +.compile_statement_semicolon: + li a0, 2 + _advance_token(); - .Lcompile_line_exit; - _compile_exit(); - goto .Lcompile_line_section; + li a0, '\n' + _write_c(); - .Lcompile_line_begin; +.compile_statement_end: +end; - if loca80 = 1 then - goto .Lcompile_line_compile_entry - end; - _compile_text_section(); - .Lcompile_line_compile_entry; - _compile_entry_point(); - loca8 := 1; - goto .Lcompile_line_end; +proc _compile_procedure_body(); +begin +.compile_procedure_body_loop: + la a0, source_code_position + lw a0, (a0) + la a1, keyword_end + li a2, KEYWORD_END_SIZE + _memcmp(); - .Lcompile_line_procedure; - if loca80 = 1 then - goto .Lcompile_line_compile_procedure - end; - _compile_text_section(); - .Lcompile_line_compile_procedure; + beqz a0, .compile_procedure_body_epilogue + + _compile_statement(); + j .compile_procedure_body_loop + +.compile_procedure_body_epilogue: +end; + +proc _compile_procedure(); +begin + # Skip "proc ". + li a0, KEYWORD_PROC_SIZE + _advance_token(); + + _read_token(); + sw a0, 20(sp) # Save the procedure name length. + + # Write .type _procedure_name, @function. + la a0, asm_type_directive + _write_z(); + + lw a0, 20(sp) + _write_token(); + + la a0, asm_type_function + _write_z(); + + # Write procedure label, _procedure_name: + lw a0, 20(sp) + _write_token(); + + la a0, asm_colon + _write_z(); + + # Skip the function name and trailing parens, semicolon, "begin" and newline. + lw a0, 20(sp) + addi a0, a0, KEYWORD_BEGIN_SIZE + 1 + 4 + _advance_token(); + + la a0, asm_prologue + _write_z(); + + _compile_procedure_body(); + + # Write the epilogue. + la a0, asm_epilogue + _write_z(); + + li a0, KEYWORD_END_SIZE + 2 + _advance_token(); +end; + +proc _compile_type(); +begin + # Print and skip the .type directive and a space after it. + li a0, KEYWORD_TYPE_SIZE + 1 + _write_token(); + _advance_token(); + + # Read and print the symbol name. + _read_token(); + sw a0, 20(sp) + + # Print and skip the symbol name, comma, space and @. + lw a0, 20(sp) + addi a0, a0, 3 + _write_token(); + _advance_token(); + + # Read the symbol type. + _read_token(); + sw a0, 16(sp) + la t0, source_code_position + lw t0, (t0) + sw t0, 12(sp) + + # Print the symbol type and newline. + lw a0, 16(sp) + addi a0, a0, 1 + _write_token(); + _advance_token(); + + # Write the object definition itself. + _compile_line(); + +.compile_type_end: +end; + +proc _compile_equ(); +begin + # Print and skip the .equ directive and a space after it. + li a0, KEYWORD_EQU_SIZE + 1 + _write_token(); + _advance_token(); + + # Read and print the constant name. + _read_token(); + sw a0, 20(sp) + + # Print and skip the constant name, comma and space. + lw a0, 20(sp) + addi a0, a0, 2 + _write_token(); + _advance_token(); + + # Read the constant value. + _read_token(); + sw a0, 16(sp) + + # Print and skip the constant value and newline. + lw a0, 16(sp) + addi a0, a0, 1 + _write_token(); + _advance_token(); +end; + +proc _skip_newlines(); +begin + # Skip newlines. + la t0, source_code_position + lw t1, (t0) + +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end + + addi t1, t1, 1 + sw t1, (t0) + + j .skip_newlines_loop + +.skip_newlines_end: +end; + +# Process the source code and print the generated code. +proc _compile(); +begin +.compile_loop: + _skip_newlines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_end + li t1, '#' + beq t0, t1, .compile_comment + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_equ + li a2, KEYWORD_EQU_SIZE + _memcmp(); + + beqz a0, .compile_equ + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_section + li a2, KEYWORD_SECTION_SIZE + _memcmp(); + + beqz a0, .compile_section + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_type + li a2, KEYWORD_TYPE_SIZE + _memcmp(); + + beqz a0, .compile_type + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_proc + li a2, KEYWORD_PROC_SIZE + _memcmp(); + + beqz a0, .compile_procedure + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_global + li a2, KEYWORD_GLOBAL_SIZE + _memcmp(); + + beqz a0, .compile_global + # Not a known token, exit. + j .compile_end + +.compile_equ: + _compile_equ(); + + j .compile_loop + +.compile_section: + _compile_section(); + + j .compile_loop + +.compile_type: + _compile_type(); + + j .compile_loop + +.compile_global: + _compile_line(); + + j .compile_loop + +.compile_comment: + _skip_comment(); + + j .compile_loop + +.compile_procedure: _compile_procedure(); - loca8 := 1; - goto .Lcompile_line_end; - .Lcompile_line_var; - _compile_variable_section(); - goto .Lcompile_line_section; + j .compile_loop - .Lcompile_line_program; - _compile_program(); - goto .Lcompile_line_section; +.compile_end: +end; - .Lcompile_line_empty; - _advance(1); - goto .Lcompile_line_section; - - .Lcompile_line_unchanged; - _compile_assembly(loca84); - goto .Lcompile_line_section; - - .Lcompile_line_section; - loca8 := 0; - - .Lcompile_line_end; - _skip_spaces(); - - return loca8 -end - -proc _compile_text_section() -var loca0: Word +# Entry point. +.globl _start +proc _start(); begin - loca0 := 0x6365732e; - _write_s(4, @loca0); - loca0 := 0x6e6f6974; - _write_s(4, @loca0); - loca0 := 0x65742e20; - _write_s(4, @loca0); - loca0 := 0x0a7478; - _write_s(3, @loca0) -end + # Read the source from the standard input. + la a0, source_code + li a1, SOURCE_BUFFER_SIZE # Buffer size. + _read_file(); + _compile(); -proc _compile_entry_point() -var loca0: Word -begin - loca0 := 0x7079742e; - _write_s(4, @loca0); - loca0 := 0x735f2065; - _write_s(4, @loca0); - loca0 := 0x74726174; - _write_s(4, @loca0); - loca0 := 0x6640202c; - _write_s(4, @loca0); - loca0 := 0x74636e75; - _write_s(4, @loca0); - loca0 := 0x0a6e6f69; - _write_s(4, @loca0); - loca0 := 0x6174735f; - _write_s(4, @loca0); - loca0 := 0x0a3a7472; - _write_s(4, @loca0); - - _advance(6) -end - -proc _compile_exit() -var loca0: Word -begin - loca0 := 0x6120696c; - _write_s(4, @loca0); - loca0 := 0x30202c30; - _write_s(4, @loca0); - loca0 := 0x20696c0a; - _write_s(4, @loca0); - loca0 := 0x202c3761; - _write_s(4, @loca0); - loca0 := 0x650a3339; - _write_s(4, @loca0); - loca0 := 0x6c6c6163; - _write_s(4, @loca0); - loca0 := 0x0a; - _write_s(1, @loca0); - - _advance(4); - _skip_spaces() -end - -proc _read_line() -var - loca0: ^Byte - loca4: Byte -begin - loca0 := _current(); - - .Lread_line_do; - loca4 := _front(loca0); - if loca4 = 0 then - goto .Lread_line_end - end; - if loca4 = 0x0a then - goto .Lread_line_end - end; - loca0 := loca0 + 1; - goto .Lread_line_do; - - .Lread_line_end; - loca4 := _current(); - return loca0 - loca4 -end - -proc _compile() -var - loca0: Word - loca4: Word - loca8: Bool - loca12: Char - loca16: ^Byte -begin - loca4 := 0; - - .Lcompile_do; - loca16 := _current(); - loca12 := _front(loca16); - - if loca12 = 0 then - goto .Lcompile_end - end; - - _skip_spaces(); - loca0 := _read_line(); - loca8 := _compile_line(loca0, loca4); - - if loca8 = 0 then - goto .Lcompile_do - end; - loca4 := loca4 or loca8; - - goto .Lcompile_do; - .Lcompile_end -end - -proc _front(loca84: ^Word) -begin - return _get(loca84) & 0xff -end - -proc _main() -begin - _read_file(source_code, 81920); - - _label_counter(0) -end - -begin - _main(); - _compile() -end. + # Call exit. + li a0, 0 # Use 0 return code. + li a7, 93 # SYS_EXIT. + ecall +end; diff --git a/boot/stage3.elna b/boot/stage3.elna new file mode 100644 index 0000000..2b31775 --- /dev/null +++ b/boot/stage3.elna @@ -0,0 +1,975 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +# Stage3 compiler. +# +# - Procedures without none or one argument. +# - Goto statements. +# - Character and integer literals. +# - Passing local variables to procedures. +# - Local variables should have the format: v00, +# where 00 is its offset from the sp register. + +.section .rodata + +.type keyword_section, @object +keyword_section: .ascii ".section" + +.type keyword_type, @object +keyword_type: .ascii ".type" + +.type keyword_ret, @object +keyword_ret: .ascii "ret" + +.type keyword_global, @object +keyword_global: .ascii ".globl" + +.type keyword_proc, @object +keyword_proc: .ascii "proc " + +.type keyword_end, @object +keyword_end: .ascii "end" + +.type keyword_begin, @object +keyword_begin: .ascii "begin" + +.type keyword_var, @object +keyword_var: .ascii "var" + +.type asm_prologue, @object +asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" + +.type asm_epilogue, @object +asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" + +.type asm_type_directive, @object +asm_type_directive: .string ".type " + +.type asm_type_function, @object +asm_type_function: .string ", @function\n" + +.type asm_colon, @object +asm_colon: .string ":\n" + +.type asm_call, @object +asm_call: .string "\tcall " + +.type asm_j, @object +asm_j: .string "\tj " + +.type asm_li, @object +asm_li: .string "\tli " + +.type asm_lw, @object +asm_lw: .string "\tlw " + +.type asm_sw, @object +asm_sw: .string "\tsw " + +.type asm_mv, @object +asm_mv: .string "mv " + +.type asm_t0, @object +asm_t0: .string "t0" + +.type asm_a0, @object +asm_a0: .string "a0" + +.type asm_comma, @object +asm_comma: .string ", " + +.type asm_sp, @object +asm_sp: .string "(sp)" + +.section .bss + +# When modifiying also change the read size in the entry point procedure. +.type source_code, @object +source_code: .zero 81920 + +.section .data + +.type source_code_position, @object +source_code_position: .word source_code + +.section .text + +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the amount of bytes written in a0. +proc _read_file(); +begin + mv a2, a1 + mv a1, a0 + # STDIN. + li a0, 0 + li a7, 63 # SYS_READ. + ecall +end; + +# Writes to the standard output. +# +# Parameters: +# a0 - Buffer. +# a1 - Buffer length. +proc _write_s(); +begin + mv a2, a1 + mv a1, a0 + # STDOUT. + li a0, 1 + li a7, 64 # SYS_WRITE. + ecall +end; + +# Writes a number to a string buffer. +# +# t0 - Local buffer. +# t1 - Constant 10. +# t2 - Current character. +# t3 - Whether the number is negative. +# +# Parameters: +# a0 - Whole number. +# a1 - Buffer pointer. +# +# Sets a0 to the length of the written number. +proc _print_i(); +begin + li t1, 10 + addi t0, s0, -9 + + li t3, 0 + bgez a0, .print_i_digit10 + li t3, 1 + neg a0, a0 + +.print_i_digit10: + rem t2, a0, t1 + addi t2, t2, '0' + sb t2, 0(t0) + div a0, a0, t1 + addi t0, t0, -1 + bne zero, a0, .print_i_digit10 + + beq zero, t3, .print_i_write_call + addi t2, zero, '-' + sb t2, 0(t0) + addi t0, t0, -1 + +.print_i_write_call: + mv a0, a1 + addi a1, t0, 1 + sub a2, s0, t0 + addi a2, a2, -9 + sw a2, 0(sp) + + _memcpy(); + + lw a0, 0(sp) +end; + +# Writes a number to the standard output. +# +# Parameters: +# a0 - Whole number. +proc _write_i(); +begin + addi a1, sp, 0 + _print_i(); + + mv a1, a0 + addi a0, sp, 0 + _write_s(); + +end; + +# Writes a character from a0 into the standard output. +proc _write_c(); +begin + sb a0, 0(sp) + addi a0, sp, 0 + li a1, 1 + _write_s(); +end; + +# Write null terminated string. +# +# Parameters: +# a0 - String. +proc _write_z(); +begin + sw a0, 0(sp) + +.write_z_loop: + # Check for 0 character. + lb a0, (a0) + beqz a0, .write_z_end + + # Print a character. + lw a0, 0(sp) + lb a0, (a0) + _write_c(); + + # Advance the input string by one byte. + lw a0, 0(sp) + addi a0, a0, 1 + sw a0, 0(sp) + + goto .write_z_loop; + +.write_z_end: +end; + +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_upper(); +begin + li t0, 'A' - 1 + sltu t1, t0, a0 # t1 = a0 >= 'A' + + sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' + and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' +end; + +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_lower(); +begin + li t0, 'a' - 1 + sltu t2, t0, a0 # t2 = a0 >= 'a' + + sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' + and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' +end; + +# Detects if the passed character is a 7-bit alpha character or an underscore. +# +# Paramters: +# a0 - Tested character. +# +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +proc _is_alpha(); +begin + sw a0, 0(sp) + + _is_upper(); + sw a0, 4(sp) + + _is_lower(v00); + + lw t0, 0(sp) + xori t1, t0, '_' + seqz t1, t1 + + lw t0, 4(sp) + or a0, a0, t0 + or a0, a0, t1 +end; + +# Detects whether the passed character is a digit +# (a value between 0 and 9). +# +# Parameters: +# a0 - Exemined value. +# +# Sets a0 to 1 if it is a digit, to 0 otherwise. +proc _is_digit(); +begin + li t0, '0' - 1 + sltu t1, t0, a0 # t1 = a0 >= '0' + + sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' + + and a0, t1, t2 +end; + +proc _is_alnum(); +begin + sw a0, 4(sp) + + _is_alpha(); + sw a0, 0(sp) + + _is_digit(v04); + + lw a1, 0(sp) + or a0, a0, a1 +end; + +# Reads the next token. +# +# Returns token length in a0. +proc _read_token(); +begin + la t0, source_code_position # Token pointer. + lw t0, (t0) + sw t0, 0(sp) # Current token position. + sw zero, 4(sp) # Token length. + +.read_token_loop: + lb t0, (t0) # Current character. + + # First we try to read a derictive. + # A derictive can contain a dot and characters. + li t1, '.' + beq t0, t1, .read_token_next + + lw a0, 0(sp) + lb a0, (a0) + _is_alnum(); + bnez a0, .read_token_next + + goto .read_token_end; + +.read_token_next: + # Advance the source code position and token length. + lw t0, 4(sp) + addi t0, t0, 1 + sw t0, 4(sp) + + lw t0, 0(sp) + addi t0, t0, 1 + sw t0, 0(sp) + + goto .read_token_loop; + +.read_token_end: + lw a0, 4(sp) +end; + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +proc _memcmp(); +begin + mv t0, a0 + li a0, 0 + +.memcmp_loop: + beqz a2, .memcmp_end + + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 + + bnez a0, .memcmp_end + + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcmp_loop; + +.memcmp_end: +end; + +# Copies memory. +# +# Parameters: +# a0 - Destination. +# a1 - Source. +# a2 - Size. +# +# Preserves a0. +proc _memcpy(); +begin + mv t0, a0 + +.memcpy_loop: + beqz a2, .memcpy_end + + lbu t1, (a1) + sb t1, (a0) + + addi a0, a0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcpy_loop + +.memcpy_end: + mv a0, t0 +end; + +# Advances the token stream by a0 bytes. +proc _advance_token(); +begin + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 + sw t1, (t0) +end; + +# Prints the current token. +# +# Parameters: +# a0 - Token length. +# +# Returns a0 unchanged. +proc _write_token(); +begin + sw a0, 0(sp) + + la a0, source_code_position + lw a0, (a0) + lw a1, 0(sp) + _write_s(); + + lw a0, 0(sp) +end; + +proc _compile_section(); +begin + # Print and skip the ".section" (8 characters) directive and a space after it. + _write_token(9); + _advance_token(); + + # Read the section name. + _read_token(); + addi a0, a0, 1 + + _write_token(); + _advance_token(); +end; + +# Prints and skips a line. +proc _skip_comment(); +begin + la t0, source_code_position + lw t1, (t0) + +.skip_comment_loop: + # Check for newline character. + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end + + # Advance the input string by one byte. + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_comment_loop; + +.skip_comment_end: + # Skip the newline. + addi t1, t1, 1 + sw t1, (t0) +end; + +# Prints and skips a line. +proc _compile_line(); +begin +.compile_line_loop: + la a0, source_code_position + lw a1, (a0) + + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end + + # Print a character. + lw a0, (a1) + _write_c(); + + # Advance the input string by one byte. + _advance_token(1); + + goto .compile_line_loop; + +.compile_line_end: + _write_c('\n'); + + _advance_token(1); +end; + +proc _compile_integer_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _read_token(); + _write_token(); + _advance_token(); + + _write_c('\n'); +end; + +proc _compile_character_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + li a0, '\'' + _write_c(); + _advance_token(1); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + li t1, '\\' + bne a0, t1, .compile_character_literal_end + + li a0, '\\' + _write_c(); + _advance_token(1); + +.compile_character_literal_end: + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + _write_c(); + + li a0, '\'' + _write_c(); + + _write_c('\n'); + + _advance_token(2); + +end; + +proc _compile_variable_expression(); +begin + la a0, asm_lw + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _advance_token(1); + _read_token(); + _write_token(); + _advance_token(); + + la a0, asm_sp + _write_z(); + + _write_c('\n'); + +end; + +proc _compile_expression(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, '\'' + beq a0, t1, .compile_expression_character_literal + + li t1, 'v' + beq a0, t1, .compile_expression_variable + + _is_digit(); + bnez a0, .compile_expression_integer_literal + + goto .compile_expression_end; + +.compile_expression_character_literal: + _compile_character_literal(); + goto .compile_expression_end; + +.compile_expression_integer_literal: + _compile_integer_literal(); + goto .compile_expression_end; + +.compile_expression_variable: + _compile_variable_expression(); + goto .compile_expression_end;; + +.compile_expression_end: +end; + +proc _compile_call(); +begin + # Stack variables: + # v0 - Procedure name length. + # v4 - Procedure name pointer. + # v8 - Argument count. + + _read_token(); + sw a0, 0(sp) + la t0, source_code_position + lw t0, (t0) + sw t0, 4(sp) + + sw zero, 8(sp) + + # Skip the identifier and left paren. + addi a0, a0, 1 + _advance_token(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ')' + beq t0, t1, .compile_call_finalize + +.compile_call_loop: + _compile_expression(); + + # Save the argument on the stack. + la a0, asm_sw + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + # Calculate the stack offset: 20 - (4 * argument_counter) + lw t0, 8(sp) + li t1, 4 + mul t0, t0, t1 + li t1, 20 + sub a0, t1, t0 + _write_i(); + + la a0, asm_sp + _write_z(); + + _write_c('\n'); + + # Add one to the argument counter. + lw t0, 8(sp) + addi t0, t0, 1 + sw t0, 8(sp) + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ',' + bne t0, t1, .compile_call_finalize + + _advance_token(2); + goto .compile_call_loop; + +.compile_call_finalize: + # Load the argument from the stack. + + lw t0, 8(sp) + beqz t0, .compile_call_end + + # Decrement the argument counter. + lw t0, 8(sp) + addi t0, t0, -1 + sw t0, 8(sp) + + la a0, asm_lw + _write_z(); + + _write_c('a'); + lw a0, 8(sp) + _write_i(); + + la a0, asm_comma + _write_z(); + + # Calculate the stack offset: 20 - (4 * argument_counter) + lw t0, 8(sp) + li t1, 4 + mul t0, t0, t1 + li t1, 20 + sub a0, t1, t0 + _write_i(); + + la a0, asm_sp + _write_z(); + + _write_c('\n'); + + goto .compile_call_finalize; + +.compile_call_end: + la a0, asm_call + _write_z(); + + lw a0, 4(sp) + lw a1, 0(sp) + _write_s(); + + # Skip the right paren. + _advance_token(1); +end; + +proc _compile_goto(); +begin + _advance_token(5); + + _read_token(); + sw a0, 0(sp) + + la a0, asm_j + _write_z(); + + _write_token(v00); + _advance_token(); +end; + +proc _compile_statement(); +begin + # This is a call if the statement starts with an underscore. + la t0, source_code_position + lw t0, (t0) + # First character after alignment tab. + addi t0, t0, 1 + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call + + li t1, 'g' + beq t0, t1, .compile_statement_goto + + _compile_line(); + goto .compile_statement_end; + +.compile_statement_call: + _advance_token(1); + _compile_call(); + + goto .compile_statement_semicolon; + +.compile_statement_goto: + _advance_token(1); + _compile_goto(); + + goto .compile_statement_semicolon; + +.compile_statement_semicolon: + _advance_token(2); + + _write_c('\n'); + +.compile_statement_end: +end; + +proc _compile_procedure_body(); +begin +.compile_procedure_body_loop: + la a0, source_code_position + lw a0, (a0) + la a1, keyword_end + li a2, 3 # "end" length. + _memcmp(); + + beqz a0, .compile_procedure_body_epilogue + + _compile_statement(); + goto .compile_procedure_body_loop; + +.compile_procedure_body_epilogue: +end; + +proc _compile_procedure(); +begin + # Skip "proc ". + _advance_token(5); + + _read_token(); + sw a0, 0(sp) # Save the procedure name length. + + # Write .type _procedure_name, @function. + la a0, asm_type_directive + _write_z(); + + _write_token(v00); + + la a0, asm_type_function + _write_z(); + + # Write procedure label, _procedure_name: + _write_token(v00); + + la a0, asm_colon + _write_z(); + + # Skip the function name and trailing parens, semicolon, "begin" and newline. + lw a0, 0(sp) + addi a0, a0, 10 + _advance_token(); + + la a0, asm_prologue + _write_z(); + + _compile_procedure_body(); + + # Write the epilogue. + la a0, asm_epilogue + _write_z(); + + # Skip the "end" keyword, semicolon and newline. + _advance_token(5); +end; + +proc _compile_type(); +begin + # Print and skip the ".type" (5 characters) directive and a space after it. + _write_token(6); + _advance_token(); + + # Read and print the symbol name. + _read_token(); + + # Print and skip the symbol name, comma, space and @. + addi a0, a0, 3 + _write_token(); + _advance_token(); + + # Read the symbol type. + _read_token(); + la t0, source_code_position + lw t0, (t0) + sw t0, 12(sp) + + # Print the symbol type and newline. + addi a0, a0, 1 + _write_token(); + _advance_token(); + + # Write the object definition itself. + _compile_line(); + +.compile_type_end: +end; + +proc _skip_newlines(); +begin + # Skip newlines. + la t0, source_code_position + lw t1, (t0) + +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end + + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_newlines_loop; + +.skip_newlines_end: +end; + +# Process the source code and print the generated code. +proc _compile(); +begin +.compile_loop: + _skip_newlines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_end + li t1, '#' + beq t0, t1, .compile_comment + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_section + li a2, 8 # ".section" length. + _memcmp(); + + beqz a0, .compile_section + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_type + li a2, 5 # ".type" length. + _memcmp(); + + beqz a0, .compile_type + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_proc + li a2, 5 # "proc " length. Space is needed to distinguish from "procedure". + _memcmp(); + + beqz a0, .compile_procedure + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_global + li a2, 6 # ".globl" length. + _memcmp(); + + beqz a0, .compile_global + # Not a known token, exit. + goto .compile_end; + +.compile_section: + _compile_section(); + + goto .compile_loop; + +.compile_type: + _compile_type(); + + goto .compile_loop; + +.compile_global: + _compile_line(); + + goto .compile_loop; + +.compile_comment: + _skip_comment(); + + goto .compile_loop; + +.compile_procedure: + _compile_procedure(); + + goto .compile_loop; + +.compile_end: +end; + +# Terminates the program. a0 contains the return code. +# +# Parameters: +# a0 - Status code. +proc _exit(); +begin + li a7, 93 # SYS_EXIT + ecall +end; + +# Entry point. +.globl _start +proc _start(); +begin + # Read the source from the standard input. + la a0, source_code + li a1, 81920 # Buffer size. + _read_file(); + _compile(); + + _exit(0); + +end; diff --git a/boot/stage4.elna b/boot/stage4.elna new file mode 100644 index 0000000..d6bbb9d --- /dev/null +++ b/boot/stage4.elna @@ -0,0 +1,969 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +# Stage3 compiler. +# +# - Procedures without none or one argument. +# - Goto statements. +# - Character and integer literals. +# - Passing local variables to procedures. +# - Local variables should have the format: v00, +# where 00 is its offset from the sp register. + +.section .rodata + +.type keyword_section, @object +keyword_section: .ascii ".section" + +.type keyword_type, @object +keyword_type: .ascii ".type" + +.type keyword_ret, @object +keyword_ret: .ascii "ret" + +.type keyword_global, @object +keyword_global: .ascii ".globl" + +.type keyword_proc, @object +keyword_proc: .ascii "proc " + +.type keyword_end, @object +keyword_end: .ascii "end" + +.type keyword_begin, @object +keyword_begin: .ascii "begin" + +.type keyword_var, @object +keyword_var: .ascii "var" + +.type asm_prologue, @object +asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" + +.type asm_epilogue, @object +asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" + +.type asm_type_directive, @object +asm_type_directive: .string ".type " + +.type asm_type_function, @object +asm_type_function: .string ", @function\n" + +.type asm_colon, @object +asm_colon: .string ":\n" + +.type asm_call, @object +asm_call: .string "\tcall " + +.type asm_j, @object +asm_j: .string "\tj " + +.type asm_li, @object +asm_li: .string "\tli " + +.type asm_lw, @object +asm_lw: .string "\tlw " + +.type asm_sw, @object +asm_sw: .string "\tsw " + +.type asm_mv, @object +asm_mv: .string "mv " + +.type asm_t0, @object +asm_t0: .string "t0" + +.type asm_a0, @object +asm_a0: .string "a0" + +.type asm_comma, @object +asm_comma: .string ", " + +.type asm_sp, @object +asm_sp: .string "(sp)" + +.section .bss + +# When modifiying also change the read size in the entry point procedure. +.type source_code, @object +source_code: .zero 81920 + +.section .data + +.type source_code_position, @object +source_code_position: .word source_code + +.section .text + +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the amount of bytes written in a0. +proc _read_file(); +begin + mv a2, a1 + mv a1, a0 + # STDIN. + li a0, 0 + li a7, 63 # SYS_READ. + ecall +end; + +# Writes to the standard output. +# +# Parameters: +# a0 - Buffer. +# a1 - Buffer length. +proc _write_s(); +begin + mv a2, a1 + mv a1, a0 + # STDOUT. + li a0, 1 + li a7, 64 # SYS_WRITE. + ecall +end; + +# Writes a number to a string buffer. +# +# t0 - Local buffer. +# t1 - Constant 10. +# t2 - Current character. +# t3 - Whether the number is negative. +# +# Parameters: +# a0 - Whole number. +# a1 - Buffer pointer. +# +# Sets a0 to the length of the written number. +proc _print_i(); +begin + li t1, 10 + addi t0, s0, -9 + + li t3, 0 + bgez a0, .print_i_digit10 + li t3, 1 + neg a0, a0 + +.print_i_digit10: + rem t2, a0, t1 + addi t2, t2, '0' + sb t2, 0(t0) + div a0, a0, t1 + addi t0, t0, -1 + bne zero, a0, .print_i_digit10 + + beq zero, t3, .print_i_write_call + addi t2, zero, '-' + sb t2, 0(t0) + addi t0, t0, -1 + +.print_i_write_call: + mv a0, a1 + addi a1, t0, 1 + sub a2, s0, t0 + addi a2, a2, -9 + sw a2, 0(sp) + + _memcpy(); + + lw a0, 0(sp) +end; + +# Writes a number to the standard output. +# +# Parameters: +# a0 - Whole number. +proc _write_i(); +begin + addi a1, sp, 0 + _print_i(); + + mv a1, a0 + addi a0, sp, 0 + _write_s(); + +end; + +# Writes a character from a0 into the standard output. +proc _write_c(); +begin + sb a0, 0(sp) + addi a0, sp, 0 + li a1, 1 + _write_s(); +end; + +# Write null terminated string. +# +# Parameters: +# a0 - String. +proc _write_z(); +begin + sw a0, 0(sp) + +.write_z_loop: + # Check for 0 character. + lb a0, (a0) + beqz a0, .write_z_end + + # Print a character. + lw a0, 0(sp) + lb a0, (a0) + _write_c(); + + # Advance the input string by one byte. + lw a0, 0(sp) + addi a0, a0, 1 + sw a0, 0(sp) + + goto .write_z_loop; + +.write_z_end: +end; + +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_upper(); +begin + li t0, 'A' - 1 + sltu t1, t0, a0 # t1 = a0 >= 'A' + + sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' + and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' +end; + +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_lower(); +begin + li t0, 'a' - 1 + sltu t2, t0, a0 # t2 = a0 >= 'a' + + sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' + and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' +end; + +# Detects if the passed character is a 7-bit alpha character or an underscore. +# +# Paramters: +# a0 - Tested character. +# +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +proc _is_alpha(); +begin + sw a0, 0(sp) + + _is_upper(); + sw a0, 4(sp) + + _is_lower(v00); + + lw t0, 0(sp) + xori t1, t0, '_' + seqz t1, t1 + + lw t0, 4(sp) + or a0, a0, t0 + or a0, a0, t1 +end; + +# Detects whether the passed character is a digit +# (a value between 0 and 9). +# +# Parameters: +# a0 - Exemined value. +# +# Sets a0 to 1 if it is a digit, to 0 otherwise. +proc _is_digit(); +begin + li t0, '0' - 1 + sltu t1, t0, a0 # t1 = a0 >= '0' + + sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' + + and a0, t1, t2 +end; + +proc _is_alnum(); +begin + sw a0, 4(sp) + + _is_alpha(); + sw a0, 0(sp) + + _is_digit(v04); + + lw a1, 0(sp) + or a0, a0, a1 +end; + +# Reads the next token. +# +# Returns token length in a0. +proc _read_token(); +begin + la t0, source_code_position # Token pointer. + lw t0, (t0) + sw t0, 0(sp) # Current token position. + sw zero, 4(sp) # Token length. + +.read_token_loop: + lb t0, (t0) # Current character. + + # First we try to read a derictive. + # A derictive can contain a dot and characters. + li t1, '.' + beq t0, t1, .read_token_next + + lw a0, 0(sp) + lb a0, (a0) + _is_alnum(); + bnez a0, .read_token_next + + goto .read_token_end; + +.read_token_next: + # Advance the source code position and token length. + lw t0, 4(sp) + addi t0, t0, 1 + sw t0, 4(sp) + + lw t0, 0(sp) + addi t0, t0, 1 + sw t0, 0(sp) + + goto .read_token_loop; + +.read_token_end: + lw a0, 4(sp) +end; + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +proc _memcmp(); +begin + mv t0, a0 + li a0, 0 + +.memcmp_loop: + beqz a2, .memcmp_end + + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 + + bnez a0, .memcmp_end + + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcmp_loop; + +.memcmp_end: +end; + +# Copies memory. +# +# Parameters: +# a0 - Destination. +# a1 - Source. +# a2 - Size. +# +# Preserves a0. +proc _memcpy(); +begin + mv t0, a0 + +.memcpy_loop: + beqz a2, .memcpy_end + + lbu t1, (a1) + sb t1, (a0) + + addi a0, a0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcpy_loop + +.memcpy_end: + mv a0, t0 +end; + +# Advances the token stream by a0 bytes. +proc _advance_token(); +begin + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 + sw t1, (t0) +end; + +# Prints the current token. +# +# Parameters: +# a0 - Token length. +# +# Returns a0 unchanged. +proc _write_token(); +begin + sw a0, 0(sp) + + la a0, source_code_position + lw a0, (a0) + lw a1, 0(sp) + _write_s(); + + lw a0, 0(sp) +end; + +proc _compile_section(); +begin + # Print and skip the ".section" (8 characters) directive and a space after it. + _write_token(9); + _advance_token(); + + # Read the section name. + _read_token(); + addi a0, a0, 1 + + _write_token(); + _advance_token(); +end; + +# Prints and skips a line. +proc _skip_comment(); +begin + la t0, source_code_position + lw t1, (t0) + +.skip_comment_loop: + # Check for newline character. + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end + + # Advance the input string by one byte. + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_comment_loop; + +.skip_comment_end: + # Skip the newline. + addi t1, t1, 1 + sw t1, (t0) +end; + +# Prints and skips a line. +proc _compile_line(); +begin +.compile_line_loop: + la a0, source_code_position + lw a1, (a0) + + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end + + # Print a character. + lw a0, (a1) + _write_c(); + + # Advance the input string by one byte. + _advance_token(1); + + goto .compile_line_loop; + +.compile_line_end: + _write_c('\n'); + + _advance_token(1); +end; + +proc _compile_integer_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _read_token(); + _write_token(); + _advance_token(); + + _write_c('\n'); +end; + +proc _compile_character_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _write_c('\''); + _advance_token(1); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + li t1, '\\' + bne a0, t1, .compile_character_literal_end + + _write_c('\\'); + _advance_token(1); + +.compile_character_literal_end: + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + _write_c(); + + _write_c('\''); + _write_c('\n'); + + _advance_token(2); + +end; + +proc _compile_variable_expression(); +begin + la a0, asm_lw + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _advance_token(1); + _read_token(); + _write_token(); + _advance_token(); + + la a0, asm_sp + _write_z(); + + _write_c('\n'); + +end; + +proc _compile_expression(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, '\'' + beq a0, t1, .compile_expression_character_literal + + li t1, 'v' + beq a0, t1, .compile_expression_variable + + _is_digit(); + bnez a0, .compile_expression_integer_literal + + goto .compile_expression_end; + +.compile_expression_character_literal: + _compile_character_literal(); + goto .compile_expression_end; + +.compile_expression_integer_literal: + _compile_integer_literal(); + goto .compile_expression_end; + +.compile_expression_variable: + _compile_variable_expression(); + goto .compile_expression_end;; + +.compile_expression_end: +end; + +proc _compile_call(); +begin + # Stack variables: + # v0 - Procedure name length. + # v4 - Procedure name pointer. + # v8 - Argument count. + + _read_token(); + sw a0, 0(sp) + la t0, source_code_position + lw t0, (t0) + sw t0, 4(sp) + + sw zero, 8(sp) + + # Skip the identifier and left paren. + addi a0, a0, 1 + _advance_token(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ')' + beq t0, t1, .compile_call_finalize + +.compile_call_loop: + _compile_expression(); + + # Save the argument on the stack. + la a0, asm_sw + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + # Calculate the stack offset: 20 - (4 * argument_counter) + lw t0, 8(sp) + li t1, 4 + mul t0, t0, t1 + li t1, 20 + sub a0, t1, t0 + _write_i(); + + la a0, asm_sp + _write_z(); + + _write_c('\n'); + + # Add one to the argument counter. + lw t0, 8(sp) + addi t0, t0, 1 + sw t0, 8(sp) + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ',' + bne t0, t1, .compile_call_finalize + + _advance_token(2); + goto .compile_call_loop; + +.compile_call_finalize: + # Load the argument from the stack. + + lw t0, 8(sp) + beqz t0, .compile_call_end + + # Decrement the argument counter. + lw t0, 8(sp) + addi t0, t0, -1 + sw t0, 8(sp) + + la a0, asm_lw + _write_z(); + + _write_c('a'); + lw a0, 8(sp) + _write_i(); + + la a0, asm_comma + _write_z(); + + # Calculate the stack offset: 20 - (4 * argument_counter) + lw t0, 8(sp) + li t1, 4 + mul t0, t0, t1 + li t1, 20 + sub a0, t1, t0 + _write_i(); + + la a0, asm_sp + _write_z(); + + _write_c('\n'); + + goto .compile_call_finalize; + +.compile_call_end: + la a0, asm_call + _write_z(); + + _write_s(v04, v00); + + # Skip the right paren. + _advance_token(1); +end; + +proc _compile_goto(); +begin + _advance_token(5); + + _read_token(); + sw a0, 0(sp) + + la a0, asm_j + _write_z(); + + _write_token(v00); + _advance_token(); +end; + +proc _compile_statement(); +begin + # This is a call if the statement starts with an underscore. + la t0, source_code_position + lw t0, (t0) + # First character after alignment tab. + addi t0, t0, 1 + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call + + li t1, 'g' + beq t0, t1, .compile_statement_goto + + _compile_line(); + goto .compile_statement_end; + +.compile_statement_call: + _advance_token(1); + _compile_call(); + + goto .compile_statement_semicolon; + +.compile_statement_goto: + _advance_token(1); + _compile_goto(); + + goto .compile_statement_semicolon; + +.compile_statement_semicolon: + _advance_token(2); + + _write_c('\n'); + +.compile_statement_end: +end; + +proc _compile_procedure_body(); +begin +.compile_procedure_body_loop: + la a0, source_code_position + lw a0, (a0) + la a1, keyword_end + li a2, 3 # "end" length. + _memcmp(); + + beqz a0, .compile_procedure_body_epilogue + + _compile_statement(); + goto .compile_procedure_body_loop; + +.compile_procedure_body_epilogue: +end; + +proc _compile_procedure(); +begin + # Skip "proc ". + _advance_token(5); + + _read_token(); + sw a0, 0(sp) # Save the procedure name length. + + # Write .type _procedure_name, @function. + la a0, asm_type_directive + _write_z(); + + _write_token(v00); + + la a0, asm_type_function + _write_z(); + + # Write procedure label, _procedure_name: + _write_token(v00); + + la a0, asm_colon + _write_z(); + + # Skip the function name and trailing parens, semicolon, "begin" and newline. + lw a0, 0(sp) + addi a0, a0, 10 + _advance_token(); + + la a0, asm_prologue + _write_z(); + + _compile_procedure_body(); + + # Write the epilogue. + la a0, asm_epilogue + _write_z(); + + # Skip the "end" keyword, semicolon and newline. + _advance_token(5); +end; + +proc _compile_type(); +begin + # Print and skip the ".type" (5 characters) directive and a space after it. + _write_token(6); + _advance_token(); + + # Read and print the symbol name. + _read_token(); + + # Print and skip the symbol name, comma, space and @. + addi a0, a0, 3 + _write_token(); + _advance_token(); + + # Read the symbol type. + _read_token(); + la t0, source_code_position + lw t0, (t0) + sw t0, 12(sp) + + # Print the symbol type and newline. + addi a0, a0, 1 + _write_token(); + _advance_token(); + + # Write the object definition itself. + _compile_line(); + +.compile_type_end: +end; + +proc _skip_newlines(); +begin + # Skip newlines. + la t0, source_code_position + lw t1, (t0) + +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end + + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_newlines_loop; + +.skip_newlines_end: +end; + +# Process the source code and print the generated code. +proc _compile(); +begin +.compile_loop: + _skip_newlines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_end + li t1, '#' + beq t0, t1, .compile_comment + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_section + li a2, 8 # ".section" length. + _memcmp(); + + beqz a0, .compile_section + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_type + li a2, 5 # ".type" length. + _memcmp(); + + beqz a0, .compile_type + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_proc + li a2, 5 # "proc " length. Space is needed to distinguish from "procedure". + _memcmp(); + + beqz a0, .compile_procedure + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_global + li a2, 6 # ".globl" length. + _memcmp(); + + beqz a0, .compile_global + # Not a known token, exit. + goto .compile_end; + +.compile_section: + _compile_section(); + + goto .compile_loop; + +.compile_type: + _compile_type(); + + goto .compile_loop; + +.compile_global: + _compile_line(); + + goto .compile_loop; + +.compile_comment: + _skip_comment(); + + goto .compile_loop; + +.compile_procedure: + _compile_procedure(); + + goto .compile_loop; + +.compile_end: +end; + +# Terminates the program. a0 contains the return code. +# +# Parameters: +# a0 - Status code. +proc _exit(); +begin + li a7, 93 # SYS_EXIT + ecall +end; + +# Entry point. +.globl _start +proc _start(); +begin + # Read the source from the standard input. + la a0, source_code + li a1, 81920 # Buffer size. + _read_file(); + _compile(); + + _exit(0); + +end; diff --git a/boot/stage5.elna b/boot/stage5.elna new file mode 100644 index 0000000..d6bbb9d --- /dev/null +++ b/boot/stage5.elna @@ -0,0 +1,969 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +# Stage3 compiler. +# +# - Procedures without none or one argument. +# - Goto statements. +# - Character and integer literals. +# - Passing local variables to procedures. +# - Local variables should have the format: v00, +# where 00 is its offset from the sp register. + +.section .rodata + +.type keyword_section, @object +keyword_section: .ascii ".section" + +.type keyword_type, @object +keyword_type: .ascii ".type" + +.type keyword_ret, @object +keyword_ret: .ascii "ret" + +.type keyword_global, @object +keyword_global: .ascii ".globl" + +.type keyword_proc, @object +keyword_proc: .ascii "proc " + +.type keyword_end, @object +keyword_end: .ascii "end" + +.type keyword_begin, @object +keyword_begin: .ascii "begin" + +.type keyword_var, @object +keyword_var: .ascii "var" + +.type asm_prologue, @object +asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" + +.type asm_epilogue, @object +asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" + +.type asm_type_directive, @object +asm_type_directive: .string ".type " + +.type asm_type_function, @object +asm_type_function: .string ", @function\n" + +.type asm_colon, @object +asm_colon: .string ":\n" + +.type asm_call, @object +asm_call: .string "\tcall " + +.type asm_j, @object +asm_j: .string "\tj " + +.type asm_li, @object +asm_li: .string "\tli " + +.type asm_lw, @object +asm_lw: .string "\tlw " + +.type asm_sw, @object +asm_sw: .string "\tsw " + +.type asm_mv, @object +asm_mv: .string "mv " + +.type asm_t0, @object +asm_t0: .string "t0" + +.type asm_a0, @object +asm_a0: .string "a0" + +.type asm_comma, @object +asm_comma: .string ", " + +.type asm_sp, @object +asm_sp: .string "(sp)" + +.section .bss + +# When modifiying also change the read size in the entry point procedure. +.type source_code, @object +source_code: .zero 81920 + +.section .data + +.type source_code_position, @object +source_code_position: .word source_code + +.section .text + +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the amount of bytes written in a0. +proc _read_file(); +begin + mv a2, a1 + mv a1, a0 + # STDIN. + li a0, 0 + li a7, 63 # SYS_READ. + ecall +end; + +# Writes to the standard output. +# +# Parameters: +# a0 - Buffer. +# a1 - Buffer length. +proc _write_s(); +begin + mv a2, a1 + mv a1, a0 + # STDOUT. + li a0, 1 + li a7, 64 # SYS_WRITE. + ecall +end; + +# Writes a number to a string buffer. +# +# t0 - Local buffer. +# t1 - Constant 10. +# t2 - Current character. +# t3 - Whether the number is negative. +# +# Parameters: +# a0 - Whole number. +# a1 - Buffer pointer. +# +# Sets a0 to the length of the written number. +proc _print_i(); +begin + li t1, 10 + addi t0, s0, -9 + + li t3, 0 + bgez a0, .print_i_digit10 + li t3, 1 + neg a0, a0 + +.print_i_digit10: + rem t2, a0, t1 + addi t2, t2, '0' + sb t2, 0(t0) + div a0, a0, t1 + addi t0, t0, -1 + bne zero, a0, .print_i_digit10 + + beq zero, t3, .print_i_write_call + addi t2, zero, '-' + sb t2, 0(t0) + addi t0, t0, -1 + +.print_i_write_call: + mv a0, a1 + addi a1, t0, 1 + sub a2, s0, t0 + addi a2, a2, -9 + sw a2, 0(sp) + + _memcpy(); + + lw a0, 0(sp) +end; + +# Writes a number to the standard output. +# +# Parameters: +# a0 - Whole number. +proc _write_i(); +begin + addi a1, sp, 0 + _print_i(); + + mv a1, a0 + addi a0, sp, 0 + _write_s(); + +end; + +# Writes a character from a0 into the standard output. +proc _write_c(); +begin + sb a0, 0(sp) + addi a0, sp, 0 + li a1, 1 + _write_s(); +end; + +# Write null terminated string. +# +# Parameters: +# a0 - String. +proc _write_z(); +begin + sw a0, 0(sp) + +.write_z_loop: + # Check for 0 character. + lb a0, (a0) + beqz a0, .write_z_end + + # Print a character. + lw a0, 0(sp) + lb a0, (a0) + _write_c(); + + # Advance the input string by one byte. + lw a0, 0(sp) + addi a0, a0, 1 + sw a0, 0(sp) + + goto .write_z_loop; + +.write_z_end: +end; + +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_upper(); +begin + li t0, 'A' - 1 + sltu t1, t0, a0 # t1 = a0 >= 'A' + + sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' + and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' +end; + +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_lower(); +begin + li t0, 'a' - 1 + sltu t2, t0, a0 # t2 = a0 >= 'a' + + sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' + and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' +end; + +# Detects if the passed character is a 7-bit alpha character or an underscore. +# +# Paramters: +# a0 - Tested character. +# +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +proc _is_alpha(); +begin + sw a0, 0(sp) + + _is_upper(); + sw a0, 4(sp) + + _is_lower(v00); + + lw t0, 0(sp) + xori t1, t0, '_' + seqz t1, t1 + + lw t0, 4(sp) + or a0, a0, t0 + or a0, a0, t1 +end; + +# Detects whether the passed character is a digit +# (a value between 0 and 9). +# +# Parameters: +# a0 - Exemined value. +# +# Sets a0 to 1 if it is a digit, to 0 otherwise. +proc _is_digit(); +begin + li t0, '0' - 1 + sltu t1, t0, a0 # t1 = a0 >= '0' + + sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' + + and a0, t1, t2 +end; + +proc _is_alnum(); +begin + sw a0, 4(sp) + + _is_alpha(); + sw a0, 0(sp) + + _is_digit(v04); + + lw a1, 0(sp) + or a0, a0, a1 +end; + +# Reads the next token. +# +# Returns token length in a0. +proc _read_token(); +begin + la t0, source_code_position # Token pointer. + lw t0, (t0) + sw t0, 0(sp) # Current token position. + sw zero, 4(sp) # Token length. + +.read_token_loop: + lb t0, (t0) # Current character. + + # First we try to read a derictive. + # A derictive can contain a dot and characters. + li t1, '.' + beq t0, t1, .read_token_next + + lw a0, 0(sp) + lb a0, (a0) + _is_alnum(); + bnez a0, .read_token_next + + goto .read_token_end; + +.read_token_next: + # Advance the source code position and token length. + lw t0, 4(sp) + addi t0, t0, 1 + sw t0, 4(sp) + + lw t0, 0(sp) + addi t0, t0, 1 + sw t0, 0(sp) + + goto .read_token_loop; + +.read_token_end: + lw a0, 4(sp) +end; + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +proc _memcmp(); +begin + mv t0, a0 + li a0, 0 + +.memcmp_loop: + beqz a2, .memcmp_end + + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 + + bnez a0, .memcmp_end + + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcmp_loop; + +.memcmp_end: +end; + +# Copies memory. +# +# Parameters: +# a0 - Destination. +# a1 - Source. +# a2 - Size. +# +# Preserves a0. +proc _memcpy(); +begin + mv t0, a0 + +.memcpy_loop: + beqz a2, .memcpy_end + + lbu t1, (a1) + sb t1, (a0) + + addi a0, a0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcpy_loop + +.memcpy_end: + mv a0, t0 +end; + +# Advances the token stream by a0 bytes. +proc _advance_token(); +begin + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 + sw t1, (t0) +end; + +# Prints the current token. +# +# Parameters: +# a0 - Token length. +# +# Returns a0 unchanged. +proc _write_token(); +begin + sw a0, 0(sp) + + la a0, source_code_position + lw a0, (a0) + lw a1, 0(sp) + _write_s(); + + lw a0, 0(sp) +end; + +proc _compile_section(); +begin + # Print and skip the ".section" (8 characters) directive and a space after it. + _write_token(9); + _advance_token(); + + # Read the section name. + _read_token(); + addi a0, a0, 1 + + _write_token(); + _advance_token(); +end; + +# Prints and skips a line. +proc _skip_comment(); +begin + la t0, source_code_position + lw t1, (t0) + +.skip_comment_loop: + # Check for newline character. + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end + + # Advance the input string by one byte. + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_comment_loop; + +.skip_comment_end: + # Skip the newline. + addi t1, t1, 1 + sw t1, (t0) +end; + +# Prints and skips a line. +proc _compile_line(); +begin +.compile_line_loop: + la a0, source_code_position + lw a1, (a0) + + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end + + # Print a character. + lw a0, (a1) + _write_c(); + + # Advance the input string by one byte. + _advance_token(1); + + goto .compile_line_loop; + +.compile_line_end: + _write_c('\n'); + + _advance_token(1); +end; + +proc _compile_integer_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _read_token(); + _write_token(); + _advance_token(); + + _write_c('\n'); +end; + +proc _compile_character_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _write_c('\''); + _advance_token(1); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + li t1, '\\' + bne a0, t1, .compile_character_literal_end + + _write_c('\\'); + _advance_token(1); + +.compile_character_literal_end: + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + _write_c(); + + _write_c('\''); + _write_c('\n'); + + _advance_token(2); + +end; + +proc _compile_variable_expression(); +begin + la a0, asm_lw + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _advance_token(1); + _read_token(); + _write_token(); + _advance_token(); + + la a0, asm_sp + _write_z(); + + _write_c('\n'); + +end; + +proc _compile_expression(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, '\'' + beq a0, t1, .compile_expression_character_literal + + li t1, 'v' + beq a0, t1, .compile_expression_variable + + _is_digit(); + bnez a0, .compile_expression_integer_literal + + goto .compile_expression_end; + +.compile_expression_character_literal: + _compile_character_literal(); + goto .compile_expression_end; + +.compile_expression_integer_literal: + _compile_integer_literal(); + goto .compile_expression_end; + +.compile_expression_variable: + _compile_variable_expression(); + goto .compile_expression_end;; + +.compile_expression_end: +end; + +proc _compile_call(); +begin + # Stack variables: + # v0 - Procedure name length. + # v4 - Procedure name pointer. + # v8 - Argument count. + + _read_token(); + sw a0, 0(sp) + la t0, source_code_position + lw t0, (t0) + sw t0, 4(sp) + + sw zero, 8(sp) + + # Skip the identifier and left paren. + addi a0, a0, 1 + _advance_token(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ')' + beq t0, t1, .compile_call_finalize + +.compile_call_loop: + _compile_expression(); + + # Save the argument on the stack. + la a0, asm_sw + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + # Calculate the stack offset: 20 - (4 * argument_counter) + lw t0, 8(sp) + li t1, 4 + mul t0, t0, t1 + li t1, 20 + sub a0, t1, t0 + _write_i(); + + la a0, asm_sp + _write_z(); + + _write_c('\n'); + + # Add one to the argument counter. + lw t0, 8(sp) + addi t0, t0, 1 + sw t0, 8(sp) + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ',' + bne t0, t1, .compile_call_finalize + + _advance_token(2); + goto .compile_call_loop; + +.compile_call_finalize: + # Load the argument from the stack. + + lw t0, 8(sp) + beqz t0, .compile_call_end + + # Decrement the argument counter. + lw t0, 8(sp) + addi t0, t0, -1 + sw t0, 8(sp) + + la a0, asm_lw + _write_z(); + + _write_c('a'); + lw a0, 8(sp) + _write_i(); + + la a0, asm_comma + _write_z(); + + # Calculate the stack offset: 20 - (4 * argument_counter) + lw t0, 8(sp) + li t1, 4 + mul t0, t0, t1 + li t1, 20 + sub a0, t1, t0 + _write_i(); + + la a0, asm_sp + _write_z(); + + _write_c('\n'); + + goto .compile_call_finalize; + +.compile_call_end: + la a0, asm_call + _write_z(); + + _write_s(v04, v00); + + # Skip the right paren. + _advance_token(1); +end; + +proc _compile_goto(); +begin + _advance_token(5); + + _read_token(); + sw a0, 0(sp) + + la a0, asm_j + _write_z(); + + _write_token(v00); + _advance_token(); +end; + +proc _compile_statement(); +begin + # This is a call if the statement starts with an underscore. + la t0, source_code_position + lw t0, (t0) + # First character after alignment tab. + addi t0, t0, 1 + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call + + li t1, 'g' + beq t0, t1, .compile_statement_goto + + _compile_line(); + goto .compile_statement_end; + +.compile_statement_call: + _advance_token(1); + _compile_call(); + + goto .compile_statement_semicolon; + +.compile_statement_goto: + _advance_token(1); + _compile_goto(); + + goto .compile_statement_semicolon; + +.compile_statement_semicolon: + _advance_token(2); + + _write_c('\n'); + +.compile_statement_end: +end; + +proc _compile_procedure_body(); +begin +.compile_procedure_body_loop: + la a0, source_code_position + lw a0, (a0) + la a1, keyword_end + li a2, 3 # "end" length. + _memcmp(); + + beqz a0, .compile_procedure_body_epilogue + + _compile_statement(); + goto .compile_procedure_body_loop; + +.compile_procedure_body_epilogue: +end; + +proc _compile_procedure(); +begin + # Skip "proc ". + _advance_token(5); + + _read_token(); + sw a0, 0(sp) # Save the procedure name length. + + # Write .type _procedure_name, @function. + la a0, asm_type_directive + _write_z(); + + _write_token(v00); + + la a0, asm_type_function + _write_z(); + + # Write procedure label, _procedure_name: + _write_token(v00); + + la a0, asm_colon + _write_z(); + + # Skip the function name and trailing parens, semicolon, "begin" and newline. + lw a0, 0(sp) + addi a0, a0, 10 + _advance_token(); + + la a0, asm_prologue + _write_z(); + + _compile_procedure_body(); + + # Write the epilogue. + la a0, asm_epilogue + _write_z(); + + # Skip the "end" keyword, semicolon and newline. + _advance_token(5); +end; + +proc _compile_type(); +begin + # Print and skip the ".type" (5 characters) directive and a space after it. + _write_token(6); + _advance_token(); + + # Read and print the symbol name. + _read_token(); + + # Print and skip the symbol name, comma, space and @. + addi a0, a0, 3 + _write_token(); + _advance_token(); + + # Read the symbol type. + _read_token(); + la t0, source_code_position + lw t0, (t0) + sw t0, 12(sp) + + # Print the symbol type and newline. + addi a0, a0, 1 + _write_token(); + _advance_token(); + + # Write the object definition itself. + _compile_line(); + +.compile_type_end: +end; + +proc _skip_newlines(); +begin + # Skip newlines. + la t0, source_code_position + lw t1, (t0) + +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end + + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_newlines_loop; + +.skip_newlines_end: +end; + +# Process the source code and print the generated code. +proc _compile(); +begin +.compile_loop: + _skip_newlines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_end + li t1, '#' + beq t0, t1, .compile_comment + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_section + li a2, 8 # ".section" length. + _memcmp(); + + beqz a0, .compile_section + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_type + li a2, 5 # ".type" length. + _memcmp(); + + beqz a0, .compile_type + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_proc + li a2, 5 # "proc " length. Space is needed to distinguish from "procedure". + _memcmp(); + + beqz a0, .compile_procedure + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_global + li a2, 6 # ".globl" length. + _memcmp(); + + beqz a0, .compile_global + # Not a known token, exit. + goto .compile_end; + +.compile_section: + _compile_section(); + + goto .compile_loop; + +.compile_type: + _compile_type(); + + goto .compile_loop; + +.compile_global: + _compile_line(); + + goto .compile_loop; + +.compile_comment: + _skip_comment(); + + goto .compile_loop; + +.compile_procedure: + _compile_procedure(); + + goto .compile_loop; + +.compile_end: +end; + +# Terminates the program. a0 contains the return code. +# +# Parameters: +# a0 - Status code. +proc _exit(); +begin + li a7, 93 # SYS_EXIT + ecall +end; + +# Entry point. +.globl _start +proc _start(); +begin + # Read the source from the standard input. + la a0, source_code + li a1, 81920 # Buffer size. + _read_file(); + _compile(); + + _exit(0); + +end; diff --git a/boot/test.elna b/boot/test.elna deleted file mode 100644 index e56547d..0000000 --- a/boot/test.elna +++ /dev/null @@ -1,14 +0,0 @@ -program - -proc main(x: Word, y: Word) -begin - _write_s(4, @x); - _write_s(4, @y); - - y := 0x0a2c3063; - _write_s(4, @y) -end - -begin - main(0x0a2c3061, 0x0a2c3062) -end. diff --git a/boot/tokenizer.s b/boot/tokenizer.s deleted file mode 100644 index 2c7f2a3..0000000 --- a/boot/tokenizer.s +++ /dev/null @@ -1,616 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public License, -# v. 2.0. If a copy of the MPL was not distributed with this file, You can -# obtain one at https://mozilla.org/MPL/2.0/. - -.global lex_next, classification, transitions, keywords, byte_keywords - -.include "boot/definitions.inc" - -.section .rodata - -# -# Classification table assigns each possible character to a group (class). All -# characters of the same group a handled equivalently. -# -# Classification: -# -.equ CLASS_INVALID, 0x00 -.equ CLASS_DIGIT, 0x01 -.equ CLASS_CHARACTER, 0x02 -.equ CLASS_SPACE, 0x03 -.equ CLASS_COLON, 0x04 -.equ CLASS_EQUALS, 0x05 -.equ CLASS_LEFT_PAREN, 0x06 -.equ CLASS_RIGHT_PAREN, 0x07 -.equ CLASS_ASTERISK, 0x08 -.equ CLASS_UNDERSCORE, 0x09 -.equ CLASS_SINGLE, 0x0a -.equ CLASS_HEX, 0x0b -.equ CLASS_ZERO, 0x0c -.equ CLASS_X, 0x0d -.equ CLASS_EOF, 0x0e -.equ CLASS_DOT, 0x0f -.equ CLASS_MINUS, 0x10 -.equ CLASS_QUOTE, 0x11 -.equ CLASS_GREATER, 0x12 -.equ CLASS_LESS, 0x13 - -.equ CLASS_COUNT, 20 - -.type classification, @object -classification: - .byte CLASS_EOF # 00 NUL - .byte CLASS_INVALID # 01 SOH - .byte CLASS_INVALID # 02 STX - .byte CLASS_INVALID # 03 ETX - .byte CLASS_INVALID # 04 EOT - .byte CLASS_INVALID # 05 ENQ - .byte CLASS_INVALID # 06 ACK - .byte CLASS_INVALID # 07 BEL - .byte CLASS_INVALID # 08 BS - .byte CLASS_SPACE # 09 HT - .byte CLASS_SPACE # 0A LF - .byte CLASS_INVALID # 0B VT - .byte CLASS_INVALID # 0C FF - .byte CLASS_SPACE # 0D CR - .byte CLASS_INVALID # 0E SO - .byte CLASS_INVALID # 0F SI - .byte CLASS_INVALID # 10 DLE - .byte CLASS_INVALID # 11 DC1 - .byte CLASS_INVALID # 12 DC2 - .byte CLASS_INVALID # 13 DC3 - .byte CLASS_INVALID # 14 DC4 - .byte CLASS_INVALID # 15 NAK - .byte CLASS_INVALID # 16 SYN - .byte CLASS_INVALID # 17 ETB - .byte CLASS_INVALID # 18 CAN - .byte CLASS_INVALID # 19 EM - .byte CLASS_INVALID # 1A SUB - .byte CLASS_INVALID # 1B ESC - .byte CLASS_INVALID # 1C FS - .byte CLASS_INVALID # 1D GS - .byte CLASS_INVALID # 1E RS - .byte CLASS_INVALID # 1F US - .byte CLASS_SPACE # 20 Space - .byte CLASS_SINGLE # 21 ! - .byte CLASS_QUOTE # 22 " - .byte 0x00 # 23 # - .byte 0x00 # 24 $ - .byte CLASS_SINGLE # 25 % - .byte CLASS_SINGLE # 26 & - .byte CLASS_QUOTE # 27 ' - .byte CLASS_LEFT_PAREN # 28 ( - .byte CLASS_RIGHT_PAREN # 29 ) - .byte CLASS_ASTERISK # 2A * - .byte CLASS_SINGLE # 2B + - .byte CLASS_SINGLE # 2C , - .byte CLASS_MINUS # 2D - - .byte CLASS_DOT # 2E . - .byte CLASS_SINGLE # 2F / - .byte CLASS_ZERO # 30 0 - .byte CLASS_DIGIT # 31 1 - .byte CLASS_DIGIT # 32 2 - .byte CLASS_DIGIT # 33 3 - .byte CLASS_DIGIT # 34 4 - .byte CLASS_DIGIT # 35 5 - .byte CLASS_DIGIT # 36 6 - .byte CLASS_DIGIT # 37 7 - .byte CLASS_DIGIT # 38 8 - .byte CLASS_DIGIT # 39 9 - .byte CLASS_COLON # 3A : - .byte CLASS_SINGLE # 3B ; - .byte CLASS_LESS # 3C < - .byte CLASS_EQUALS # 3D = - .byte CLASS_GREATER # 3E > - .byte 0x00 # 3F ? - .byte CLASS_SINGLE # 40 @ - .byte CLASS_CHARACTER # 41 A - .byte CLASS_CHARACTER # 42 B - .byte CLASS_CHARACTER # 43 C - .byte CLASS_CHARACTER # 44 D - .byte CLASS_CHARACTER # 45 E - .byte CLASS_CHARACTER # 46 F - .byte CLASS_CHARACTER # 47 G - .byte CLASS_CHARACTER # 48 H - .byte CLASS_CHARACTER # 49 I - .byte CLASS_CHARACTER # 4A J - .byte CLASS_CHARACTER # 4B K - .byte CLASS_CHARACTER # 4C L - .byte CLASS_CHARACTER # 4D M - .byte CLASS_CHARACTER # 4E N - .byte CLASS_CHARACTER # 4F O - .byte CLASS_CHARACTER # 50 P - .byte CLASS_CHARACTER # 51 Q - .byte CLASS_CHARACTER # 52 R - .byte CLASS_CHARACTER # 53 S - .byte CLASS_CHARACTER # 54 T - .byte CLASS_CHARACTER # 55 U - .byte CLASS_CHARACTER # 56 V - .byte CLASS_CHARACTER # 57 W - .byte CLASS_CHARACTER # 58 X - .byte CLASS_CHARACTER # 59 Y - .byte CLASS_CHARACTER # 5A Z - .byte CLASS_SINGLE # 5B [ - .byte 0x00 # 5C \ - .byte CLASS_SINGLE # 5D ] - .byte CLASS_SINGLE # 5E ^ - .byte CLASS_UNDERSCORE # 5F _ - .byte 0x00 # 60 ` - .byte CLASS_HEX # 61 a - .byte CLASS_HEX # 62 b - .byte CLASS_HEX # 63 c - .byte CLASS_HEX # 64 d - .byte CLASS_HEX # 65 e - .byte CLASS_HEX # 66 f - .byte CLASS_CHARACTER # 67 g - .byte CLASS_CHARACTER # 68 h - .byte CLASS_CHARACTER # 69 i - .byte CLASS_CHARACTER # 6A j - .byte CLASS_CHARACTER # 6B k - .byte CLASS_CHARACTER # 6C l - .byte CLASS_CHARACTER # 6D m - .byte CLASS_CHARACTER # 6E n - .byte CLASS_CHARACTER # 6F o - .byte CLASS_CHARACTER # 70 p - .byte CLASS_CHARACTER # 71 q - .byte CLASS_CHARACTER # 72 r - .byte CLASS_CHARACTER # 73 s - .byte CLASS_CHARACTER # 74 t - .byte CLASS_CHARACTER # 75 u - .byte CLASS_CHARACTER # 76 v - .byte CLASS_CHARACTER # 77 w - .byte CLASS_X # 78 x - .byte CLASS_CHARACTER # 79 y - .byte CLASS_CHARACTER # 7A z - .byte 0x00 # 7B { - .byte CLASS_SINGLE # 7C | - .byte 0x00 # 7D } - .byte CLASS_SINGLE # 7E ~ - .byte CLASS_INVALID # 7F DEL - -# -# Textual keywords in the language. -# -.equ KEYWORDS_COUNT, TOKEN_IDENTIFIER - 1 - -.type keywords, @object -keywords: - .word 7 - .ascii "program" - .word 6 - .ascii "import" - .word 5 - .ascii "const" - .word 3 - .ascii "var" - .word 2 - .ascii "if" - .word 4 - .ascii "then" - .word 5 - .ascii "elsif" - .word 4 - .ascii "else" - .word 5 - .ascii "while" - .word 2 - .ascii "do" - .word 4 - .ascii "proc" - .word 5 - .ascii "begin" - .word 3 - .ascii "end" - .word 4 - .ascii "type" - .word 6 - .ascii "record" - .word 5 - .ascii "union" - .word 4 - .ascii "true" - .word 5 - .ascii "false" - .word 3 - .ascii "nil" - .word 3 - .ascii "xor" - .word 2 - .ascii "or" - .word 6 - .ascii "return" - .word 4 - .ascii "cast" - .word 4 - .ascii "goto" - .word 4 - .ascii "case" - .word 2 - .ascii "of" - -.type byte_keywords, @object -byte_keywords: .ascii "&.,:;()[]^=+-*@" -.equ BYTE_KEYWORDS_SIZE, . - byte_keywords - -.section .data - -# The transition table describes transitions from one state to another, given -# a symbol (character class). -# -# The table has m rows and n columns, where m is the amount of states and n is -# the amount of classes. So given the current state and a classified character -# the table can be used to look up the next state. -# -# Each cell is a word long. -# - The least significant byte of the word is a row number (beginning with 0). -# It specifies the target state. "ff" means that this is an end state and no -# transition is possible. -# - The next byte is the action that should be performed when transitioning. -# For the meaning of actions see labels in the lex_next function, which -# handles each action. -# -.type transitions, @object -transitions: - # Invalid Digit Alpha Space : = ( ) - # * _ Single Hex 0 x NUL . - # - " or ' > < - .word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff - .word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x06ff - .word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start - - .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff - .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff - .word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x01 Colon - - .word 0x05ff, 0x0102, 0x0102, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff - .word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff - .word 0x05ff, 0x05ff, 0x05ff, 0x05ff # 0x02 Identifier - - .word 0x08ff, 0x0103, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff - .word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x0103, 0x00ff, 0x08ff, 0x08ff - .word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x03 Decimal - - .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x04ff, 0x02ff, 0x02ff - .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff - .word 0x02ff, 0x02ff, 0x04ff, 0x02ff # 0x04 Greater - - .word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff - .word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff - .word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus - - .word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff - .word 0x0109, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff - .word 0x06ff, 0x06ff, 0x06ff, 0x06ff # 0x06 Left paren - - .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff - .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff - .word 0x02ff, 0x02ff, 0x02ff, 0x04ff # 0x07 Less - - .word 0x08ff, 0x0108, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff - .word 0x08ff, 0x00ff, 0x08ff, 0x0108, 0x0108, 0x00ff, 0x08ff, 0x08ff - .word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x08 Hexadecimal after 0x. - - .word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109 - .word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 - .word 0x0109, 0x0109, 0x0109, 0x0109 # 0x09 Comment - - .word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff - .word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 - .word 0x0109, 0x0109, 0x0109, 0x0109 # 0x0a Closing comment - - .word 0x00ff, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110 - .word 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110 - .word 0x010b, 0x04ff, 0x010b, 0x010b # 0x0b String - - .word 0x08ff, 0x00ff, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff - .word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x00ff, 0x010d, 0x08ff, 0x08ff - .word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x0c Leading zero - - .word 0x00ff, 0x0108, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff - .word 0x00ff, 0x00ff, 0x00ff, 0x0108, 0x0108, 0x00ff, 0x00ff, 0x00ff - .word 0x00ff, 0x00ff, 0x00ff, 0x00ff # 0x0d Starting hexadecimal - -.section .text - -# Returns the class from the classification table for the given character. -# -# Parameters: -# a0 - Character. -# -# Sets a0 to the class number. -.type classify, @function -classify: - la t0, classification - add t0, t0, a0 # Character class pointer. - lbu a0, (t0) # Character class. - ret - -# Given the current state and a character class, calculates the next state. - -# Parameters: -# a0 - Current state. -# a1 - Character class. -# -# Sets a0 to the next state. -.type lookup_state, @function -lookup_state: - li t0, CLASS_COUNT - mul a0, a0, t0 # Transition row. - add a0, a0, a1 # Transition column. - - li t0, 4 - mul a0, a0, t0 # Multiply by the word size. - - la t0, transitions - add t0, t0, a0 - lw a0, (t0) # Next state. - - ret - -# Chains classify and lookup_state. -# -# Parameters: -# a0 - Current state. -# a1 - Character. -# -# Sets a0 to the next state based on the given character. -.type _next_state, @function -_next_state: - # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 - - sw a0, 4(sp) - mv a0, a1 - call classify - - mv a1, a0 - lw a0, 4(sp) - call lookup_state - - # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 - ret - -# Takes an identifier and checks whether it's a keyword. -# -# Parameters: -# a0 - Token length. -# a1 - Token pointer. -# -# Sets a0 to the appropriate token type. -.type classify_identifier, @function -classify_identifier: - # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 - - mv a2, a0 - mv a3, a1 - li a0, KEYWORDS_COUNT - la a1, keywords - call _strings_index - - bnez a0, .Lclassify_identifier_end - li a0, TOKEN_IDENTIFIER - -.Lclassify_identifier_end: - # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 - ret - -# Takes a symbol and determines its type. -# -# Parameters: -# a0 - Token character. -# -# Sets a0 to the appropriate token type. -.type classify_single, @function -classify_single: - # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 - - mv a1, a0 - li a2, BYTE_KEYWORDS_SIZE - la a0, byte_keywords - call _memchr - - la a1, byte_keywords - sub a0, a0, a1 - addi a0, a0, TOKEN_IDENTIFIER + 1 - - # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 - ret - -# Classified a symbol containing multiple characters (probably 2). -# -# Parameters: -# a0 - Token length. -# a1 - Token pointer. -# -# Sets a0 to the appropriate token type. -.type classify_composite, @function -classify_composite: - lbu t0, 0(a1) - li t1, ':' - beq t0, t1, .Lclassify_composite_assign - - j .Lclassify_composite_end - -.Lclassify_composite_assign: - li a0, TOKEN_ASSIGN - j .Lclassify_composite_end - -.Lclassify_composite_end: - ret - -# Initializes the classification table. -# -# Paramaters: -# a0 - Source text pointer. -# a1 - A pointer for output value, the token kind. 4 Bytes. -# -# Sets a0 to the position of the next token. -.type lex_next, @function -lex_next: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - sw s1, 20(sp) # Preserve s1 used for current source text position. - mv s1, a0 - sw a0, 12(sp) # Keeps a pointer to the beginning of a token. - # 4(sp) and 8(sp) are reserved for the kind and length of the token if needed. - - sw s2, 16(sp) # Preserve s2 containing the current state. - li s2, 0x00 # Initial, start state. - - sw a1, 0(sp) - sw zero, (a1) # Initialize. - -.Llex_next_loop: - mv a0, s2 - lbu a1, (s1) - call _next_state - - li t0, 0xff - and s2, a0, t0 # Next state. - - li t0, 0xff00 - and t1, a0, t0 # Transition action. - srli t1, t1, 8 - - # Perform the provided action. - li t0, 0x01 # Accumulate action. - beq t1, t0, .Llex_next_accumulate - - li t0, 0x02 # Print action. - beq t1, t0, .Llex_next_print - - li t0, 0x03 # Skip action. - beq t1, t0, .Llex_next_skip - - li t0, 0x04 # Delimited string action. - beq t1, t0, .Llex_next_comment - - li t0, 0x05 # Finalize identifier. - beq t1, t0, .Llex_next_identifier - - li t0, 0x06 # Single character symbol action. - beq t1, t0, .Llex_next_single - - li t0, 0x07 # An action for symbols containing multiple characters. - beq t1, t0, .Llex_next_composite - - li t0, 0x08 # Integer action. - beq t1, t0, .Llex_next_integer - - j .Llex_next_reject - -.Llex_next_reject: - addi s1, s1, 1 - - j .Llex_next_end - -.Llex_next_accumulate: - addi s1, s1, 1 - - j .Llex_next_loop - -.Llex_next_skip: - addi s1, s1, 1 - lw t0, 12(sp) - addi t0, t0, 1 - sw t0, 12(sp) - - j .Llex_next_loop - -.Llex_next_print: - /* DEBUG - addi a0, a0, 21 - sw a0, 0(sp) - addi a0, sp, 0 - li a1, 1 - call _write_error */ - - j .Llex_next_end - -.Llex_next_comment: - addi s1, s1, 1 - - j .Llex_next_end - -.Llex_next_identifier: - # An identifier can be a textual keyword. - # Check the kind of the token and write it into the output parameter. - lw a1, 12(sp) - sub a0, s1, a1 - sw a0, 8(sp) - call classify_identifier - sw a0, 4(sp) - lw a0, 0(sp) - addi a1, sp, 4 - li a2, 12 - call _memcpy - - j .Llex_next_end - -.Llex_next_single: - lw a0, 12(sp) - addi s1, a0, 1 - lbu a0, (a0) - call classify_single - lw a1, 0(sp) - sw a0, (a1) - - j .Llex_next_end - -.Llex_next_composite: - addi s1, s1, 1 - lw a1, 12(sp) - sub a0, s1, a1 - call classify_composite - lw a1, 0(sp) - sw a0, (a1) - - j .Llex_next_end - -.Llex_next_integer: - lw t0, 0(sp) - li t1, TOKEN_INTEGER - sw t1, 0(t0) - lw t1, 12(sp) - sw t1, 8(t0) - sub t1, s1, t1 - sw t1, 4(t0) - - j .Llex_next_end - -.Llex_next_end: - mv a0, s1 # Return the advanced text pointer. - - # Restore saved registers. - lw s1, 20(sp) - lw s2, 16(sp) - - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret diff --git a/rakelib/stage.rake b/rakelib/stage.rake deleted file mode 100644 index 6f61cae..0000000 --- a/rakelib/stage.rake +++ /dev/null @@ -1,61 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public License, -# v. 2.0. If a copy of the MPL was not distributed with this file, You can -# obtain one at https://mozilla.org/MPL/2.0/. -} -# frozen_string_literal: true - -CROSS_GCC = 'build/rootfs/bin/riscv32-unknown-linux-gnu-gcc' -SYSROOT = 'build/sysroot' -QEMU = 'qemu-riscv32' - -def assemble_stage(output, compiler, source) - arguments = [QEMU, '-L', SYSROOT, *compiler] - - puts Term::ANSIColor.green(arguments * ' ') - puts - Open3.popen2(*arguments) do |qemu_in, qemu_out| - qemu_in.write File.read(*source) - qemu_in.close - - IO.copy_stream qemu_out, output - qemu_out.close - end -end - -library = [] - -Dir.glob('boot/*.s').each do |assembly_source| - source_basename = Pathname.new(assembly_source).basename - target_object = Pathname.new('build/boot') + source_basename.sub_ext('.o') - - file target_object.to_s => [assembly_source, 'build/boot'] do |t| - sh CROSS_GCC, '-c', '-o', t.name, assembly_source - end - library << assembly_source unless source_basename.to_s.start_with? 'stage' -end - -desc 'Initial stage' -file 'build/boot/stage1' => ['build/boot/stage1.o', *library] do |t| - sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites -end - -file 'build/boot/stage2a.s' => ['build/boot/stage1', 'boot/stage2.elna'] do |t| - source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' } - - File.open t.name, 'w' do |output| - assemble_stage output, exe, source - end -end - -['build/boot/stage2a', 'build/boot/stage2b'].each do |exe| - file exe => [exe.ext('.s'), *library] do |t| - sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites - end -end - -file 'build/boot/stage2b.s' => ['build/boot/stage2a', 'boot/stage2.elna'] do |t| - source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' } - - File.open t.name, 'w' do |output| - assemble_stage output, exe, source - end -end