From 3be051aa08d549d18ed8a954e87ce2bfca337e73 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Thu, 28 Aug 2025 22:45:42 +0200 Subject: [PATCH] Start over --- Rakefile | 127 ++- boot/stage1.s | 2284 +++++++++++++++++--------------------------- boot/stage2.elna | 2016 ++++++++++++++------------------------ boot/stage3.elna | 842 ++++++++++++++++ boot/test.elna | 14 - rakelib/stage.rake | 61 -- 6 files changed, 2602 insertions(+), 2742 deletions(-) create mode 100644 boot/stage3.elna delete mode 100644 boot/test.elna delete mode 100644 rakelib/stage.rake diff --git a/Rakefile b/Rakefile index 6b38038..ce7429b 100644 --- a/Rakefile +++ b/Rakefile @@ -5,34 +5,137 @@ require 'open3' require 'rake/clean' -require 'term/ansicolor' -CLEAN.include 'build/boot' +CROSS_GCC = '../eugenios/build/rootfs/bin/riscv32-unknown-linux-gnu-gcc' +SYSROOT = '../eugenios/build/sysroot' +QEMU = 'qemu-riscv32' + +CLEAN.include 'build/boot', 'build/valid' directory 'build/boot' +directory 'build/valid' desc 'Final stage' -task default: ['build/boot/stage2b', 'build/boot/stage2b.s', 'boot/stage2.elna'] do |t| - exe, previous_output, source = t.prerequisites +task default: ['build/valid/stage3', 'build/valid/stage3.s', 'boot/stage3.elna'] do |t| + exe, expected, source = t.prerequisites cat_arguments = ['cat', source] compiler_arguments = [QEMU, '-L', SYSROOT, exe] - diff_arguments = ['diff', '-Nur', '--text', previous_output, '-'] + diff_arguments = ['diff', '-Nur', '--text', expected, '-'] Open3.pipeline(cat_arguments, compiler_arguments, diff_arguments) end -file 'build/boot/test.s' => ['build/boot/stage1', 'boot/test.elna'] do |t| - source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' } +desc 'Convert stage2 language into the stage3 language' +task :convert do + File.open('boot/stage3.elna', 'w') do |stage2| + li_value = nil - File.open t.name, 'w' do |output| - assemble_stage output, exe, source + File.readlines('boot/stage2.elna').each do |line| + if line.start_with?("\tj ") + stage2 << "\tgoto " + line.chomp.delete_prefix("\tj ") + ";\n" + li_value = nil + elsif line.match?(/^\tli a0, [[:digit:]]/) + li_value = line.delete_prefix("\tli a0, ").chomp + elsif line == "\tli a0, '\\n'\n" + li_value = "'\\n'" + elsif !li_value.nil? && line.start_with?("\t_") + stage2 << "\t" + line[1..-4] + li_value + ");\n" + li_value = nil + else + stage2 << "\tli a0, #{li_value}\n" unless li_value.nil? + stage2 << line + li_value = nil + end + end end end -file 'build/boot/test' => ['build/boot/test.s', 'boot/common-boot.s'] do |t| +# +# Stage 3. +# + +file 'build/valid/stage3' => 'build/valid/stage3.s' do |t| sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites end -task test: 'build/boot/test' do |t| - sh QEMU, '-L', SYSROOT, t.prerequisites.first +file 'build/valid/stage3.s' => ['build/boot/stage3', 'boot/stage3.elna'] do |t| + exe, source = t.prerequisites + + cat_arguments = ['cat', source] + compiler_arguments = [QEMU, '-L', SYSROOT, exe] + last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments) + + IO.copy_stream last_stdout, t.name +end + +file 'build/boot/stage3' => 'build/boot/stage3.s' do |t| + sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites +end + +file 'build/boot/stage3.s' => ['build/valid/stage2', 'boot/stage3.elna'] do |t| + exe, source = t.prerequisites + + cat_arguments = ['cat', source] + compiler_arguments = [QEMU, '-L', SYSROOT, exe] + last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments) + + IO.copy_stream last_stdout, t.name +end + +# +# Stage 2. +# + +file 'build/valid/stage2' => 'build/valid/stage2.s' do |t| + sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites +end + +file 'build/valid/stage2.s' => ['build/boot/stage2', 'boot/stage2.elna'] do |t| + exe, source = t.prerequisites + + cat_arguments = ['cat', source] + compiler_arguments = [QEMU, '-L', SYSROOT, exe] + last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments) + + IO.copy_stream last_stdout, t.name +end + +file 'build/boot/stage2' => 'build/boot/stage2.s' do |t| + sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites +end + +file 'build/boot/stage2.s' => ['build/valid/stage1', 'boot/stage2.elna'] do |t| + exe, source = t.prerequisites + + cat_arguments = ['cat', source] + compiler_arguments = [QEMU, '-L', SYSROOT, exe] + last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments) + + IO.copy_stream last_stdout, t.name +end + +# +# Stage 1. +# + +file 'build/valid/stage1' => ['build/valid', 'build/valid/stage1.s'] do |t| + source = t.prerequisites.select { |prerequisite| prerequisite.end_with? '.s' } + + sh CROSS_GCC, '-nostdlib', '-o', t.name, *source +end + +file 'build/valid/stage1.s' => ['build/boot/stage1', 'boot/stage1.s', 'build/valid'] do |t| + source, exe, = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.s' } + + cat_arguments = ['cat', *source] + compiler_arguments = [QEMU, '-L', SYSROOT, *exe] + last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments) + + IO.copy_stream last_stdout, t.name +end + +file 'build/boot/stage1' => ['build/boot', 'boot/stage1.s'] do |t| + source = t.prerequisites.select { |prerequisite| prerequisite.end_with? '.s' } + + sh CROSS_GCC, '-nostdlib', '-o', t.name, *source end diff --git a/boot/stage1.s b/boot/stage1.s index a45d8ab..c81a7f8 100644 --- a/boot/stage1.s +++ b/boot/stage1.s @@ -2,1141 +2,391 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -.global _start # Program entry point. - -# -# Registers used as global variables: -# s1 - Contains the current position in the source text. -# s2 - Label counter. -# s3 - Dynamic memory region. -# -# - The compiler expects valid input, otherwise it will generate invalid -# assembly or hang. There is no error checking, no semantic analysis, no -# type checking. -# -# - Imports with only a module name without package, e.g. -# "import dummy", can be parsed, but are ignored. -# -# - No loops. Only labels and goto. -# -# - Only unsigned number literals are supported (in decimal or -# hexadecimal format). -# -# - Comments are accepted only at the end of a line. -# -# - Return can be used only as the last statement of a procedure. It -# doesn't actually return, but sets a0 to the appropriate value. -# -# - The lvalue of an assignment can only be an identifier. - -.include "boot/definitions.inc" - .equ SOURCE_BUFFER_SIZE, 81920 -.section .rodata -section_rodata: .ascii ".section .rodata\n" -.equ SECTION_RODATA_SIZE, . - section_rodata -section_text: .ascii ".section .text\n" -.equ SECTION_TEXT_SIZE, . - section_text -section_bss: .ascii ".section .bss\n" -.equ SECTION_BSS_SIZE, . - section_bss -global_start: .ascii ".global _start\n" -.equ GLOBAL_START_SIZE, . - global_start -prologue: .ascii "addi sp, sp, -96\nsw ra, 92(sp)\nsw s0, 88(sp)\naddi s0, sp, 96\n" -.equ PROLOGUE_SIZE, . - prologue -epilogue: .ascii "lw ra, 92(sp)\nlw s0, 88(sp)\naddi sp, sp, 96\nret\n" -.equ EPILOGUE_SIZE, . - epilogue +.equ SYS_READ, 63 +.equ SYS_WRITE, 64 +.equ SYS_EXIT, 93 +.equ SYS_MMAP2, 222 +.equ STDIN, 0 +.equ STDOUT, 1 +.equ STDERR, 2 -asm_exit: .ascii "li a0, 0\nli a7, 93\necall\n" -.equ ASM_EXIT_SIZE, . - asm_exit -asm_start: .ascii ".type _start, @function\n_start:\n" -.equ ASM_START_SIZE, . - asm_start -asm_and_a0_a1: .ascii "and a0, a0, a1\n" -.equ ASM_AND_A0_A1_SIZE, . - asm_and_a0_a1 -asm_or_a0_a1: .ascii "or a0, a0, a1\n" -.equ ASM_OR_A0_A1_SIZE, . - asm_or_a0_a1 -asm_add_a0_a1: .ascii "add a0, a0, a1\n" -.equ ASM_ADD_A0_A1_SIZE, . - asm_add_a0_a1 -asm_sub_a0_a1: .ascii "sub a0, a0, a1\n" -.equ ASM_SUB_A0_A1_SIZE, . - asm_sub_a0_a1 -asm_mul_a0_a1: .ascii "mul a0, a0, a1\n" -.equ ASM_MUL_A0_A1_SIZE, . - asm_mul_a0_a1 -asm_seqz_a0: .ascii "seqz a0, a0\n" -.equ ASM_SEQZ_A0_SIZE, . - asm_seqz_a0 -asm_neg_a0: .ascii "neg a0, a0\n" -.equ ASM_NEG_A0_SIZE, . - asm_neg_a0 -asm_type: .ascii ".type " -.equ ASM_TYPE_SIZE, . - asm_type -asm_type_function: .ascii ", @function\n" -.equ ASM_TYPE_FUNCTION_SIZE, . - asm_type_function -asm_type_object: .ascii ", @object\n" -.equ ASM_TYPE_OBJECT_SIZE, . - asm_type_object -asm_restore_parameters: - .ascii "lw a0, 60(sp)\nlw a1, 56(sp)\nlw a2, 52(sp)\nlw a3, 48(sp)\nlw a4, 44(sp)\nlw a5, 40(sp)\n" -.equ ASM_RESTORE_PARAMETERS_SIZE, . - asm_restore_parameters -asm_preserve_parameters: - .ascii "sw a0, 84(sp)\nsw a1, 80(sp)\nsw a2, 76(sp)\nsw a2, 76(sp)\nsw a3, 72(sp)\nsw a4, 68(sp)\nsw a5, 64(sp)\n" -.equ ASM_PRESERVE_PARAMETERS_SIZE, . - asm_preserve_parameters +.section .rodata + +.type keyword_equ, @object +keyword_equ: .ascii ".equ" +.equ KEYWORD_EQU_SIZE, 4 + +.type keyword_section, @object +keyword_section: .ascii ".section" +.equ KEYWORD_SECTION_SIZE, 8 + +.type keyword_type, @object +keyword_type: .ascii ".type" +.equ KEYWORD_TYPE_SIZE, 5 + +.type keyword_type_object, @object +keyword_type_object: .ascii "object" +.equ KEYWORD_TYPE_OBJECT_SIZE, 6 + +.type keyword_type_function, @object +keyword_type_function: .ascii "function" +.equ KEYWORD_TYPE_FUNCTION_SIZE, 8 + +.type keyword_ret, @object +keyword_ret: .ascii "ret" +.equ KEYWORD_RET_SIZE, 3 + +.type keyword_global, @object +keyword_global: .ascii ".globl" +.equ KEYWORD_GLOBAL_SIZE, 6 + +.type keyword_proc, @object +keyword_proc: .ascii "proc " +.equ KEYWORD_PROC_SIZE, 5 + +.type keyword_end, @object +keyword_end: .ascii "end" +.equ KEYWORD_END_SIZE, 3 + +.type keyword_begin, @object +keyword_begin: .ascii "begin" +.equ KEYWORD_BEGIN_SIZE, 5 + +.type keyword_var, @object +keyword_var: .ascii "var" +.equ KEYWORD_VAR_SIZE, 3 + +.type asm_prologue, @object +asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" + +.type asm_epilogue, @object +asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" + +.type asm_type_directive, @object +asm_type_directive: .string ".type " + +.type asm_type_function, @object +asm_type_function: .string ", @function\n" + +.type asm_colon, @object +asm_colon: .string ":\n" + +.type asm_call, @object +asm_call: .string "\tcall " .section .bss + .type source_code, @object source_code: .zero SOURCE_BUFFER_SIZE +.type source_code_position, @object +source_code_position: .word 0 + .section .text -# Ignores the import. -.type compile_import, @function -compile_import: - # Prologue. - addi sp, sp, -24 - sw ra, 20(sp) - sw s0, 16(sp) - addi s0, sp, 24 - -.Lcompile_import_loop: - mv a0, s1 - addi a1, sp, 0 - call lex_next - li t0, TOKEN_IMPORT - lw t1, 0(sp) - bne t0, t1, .Lcompile_import_end - # a0 is set from the previous lex_next call. Skip the module name. - addi a1, sp, 0 - call lex_next - mv s1, a0 - - j .Lcompile_import_loop - -.Lcompile_import_end: - # Epilogue. - lw ra, 20(sp) - lw s0, 16(sp) - addi sp, sp, 24 - ret - -.type compile_binary_expression, @function -compile_binary_expression: +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the amount of bytes written in a0. +.type _read_file, @function +_read_file: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 - li a0, 0 - call compile_expression + mv a2, a1 + mv a1, a0 + li a0, STDIN + li a7, SYS_READ + ecall - mv a0, s1 - addi a1, sp, 12 - call lex_next - lw t0, 12(sp) - - li t1, TOKEN_AND - beq t0, t1, .Lcompile_binary_expression_and - - li t1, TOKEN_OR - beq t0, t1, .Lcompile_binary_expression_or - - li t1, TOKEN_PLUS - beq t0, t1, .Lcompile_binary_expression_plus - - li t1, TOKEN_EQUALS - beq t0, t1, .Lcompile_binary_expression_equal - - li t1, TOKEN_ASTERISK - beq t0, t1, .Lcompile_binary_expression_product - - li t1, TOKEN_MINUS - beq t0, t1, .Lcompile_binary_expression_minus - - j .Lcompile_binary_expression_end - -.Lcompile_binary_expression_equal: - mv s1, a0 # Skip =. - li a0, 1 - call compile_expression - li a0, ASM_SUB_A0_A1_SIZE - la a1, asm_sub_a0_a1 - call _write_s - - li a0, ASM_SEQZ_A0_SIZE - la a1, asm_seqz_a0 - call _write_s - - j .Lcompile_binary_expression_end - -.Lcompile_binary_expression_and: - mv s1, a0 # Skip &. - li a0, 1 - call compile_expression - li a0, ASM_AND_A0_A1_SIZE - la a1, asm_and_a0_a1 - call _write_s - - j .Lcompile_binary_expression_end - -.Lcompile_binary_expression_or: - mv s1, a0 # Skip or. - li a0, 1 - call compile_expression - li a0, ASM_OR_A0_A1_SIZE - la a1, asm_or_a0_a1 - call _write_s - - j .Lcompile_binary_expression_end - -.Lcompile_binary_expression_plus: - mv s1, a0 # Skip +. - li a0, 1 - call compile_expression - li a0, ASM_ADD_A0_A1_SIZE - la a1, asm_add_a0_a1 - call _write_s - - j .Lcompile_binary_expression_end - -.Lcompile_binary_expression_minus: - mv s1, a0 # Skip -. - li a0, 1 - call compile_expression - li a0, ASM_SUB_A0_A1_SIZE - la a1, asm_sub_a0_a1 - call _write_s - - j .Lcompile_binary_expression_end - -.Lcompile_binary_expression_product: - mv s1, a0 # Skip *. - li a0, 1 - call compile_expression - li a0, ASM_MUL_A0_A1_SIZE - la a1, asm_mul_a0_a1 - call _write_s - - j .Lcompile_binary_expression_end - -.Lcompile_binary_expression_end: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret -# Looks for a register that can be used to calculate a symbol address. Writes it -# as string, like sp or s0 into the provided buffer. -# -# Parameters: -# a0 - Symbol info pointer. -# a1 - Output buffer. -# -# Sets a0 to the length of register name written or 0. -.type take_address, @function -take_address: - beqz a0, .Ltake_address_undefined - lw t0, 0(a0) - - li t1, INFO_PARAMETER - beq t0, t1, .Ltake_address_parameter - - li t1, INFO_LOCAL - beq t0, t1, .Ltake_address_local - - j .Ltake_address_undefined - -.Ltake_address_parameter: - li t0, 0x3073 # s0 - sh t0, (a1) - - li a0, 2 - - j .Ltake_address_end - -.Ltake_address_local: - li t0, 0x7073 # (sp) - sh t0, (a1) - - li a0, 2 - - j .Ltake_address_end - -.Ltake_address_undefined: - li a0, 0 - -.Ltake_address_end: - ret - -# Parameters: -# a0 - Identifier length. -# a1 - Register number as character. -.type compile_identifier_expression, @function -compile_identifier_expression: +# Writes a character from a0 into the standard output. +.type _write_c, @function +_write_c: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 - sw a0, 20(sp) # Identifier length. - sw a1, 16(sp) # Register number as character. + sb a0, 20(sp) + li a0, STDOUT + addi a1, sp, 20 + li a2, 1 + li a7, SYS_WRITE + ecall + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + add sp, sp, 32 + ret + +# Write null terminated string. +# +# Parameters: +# a0 - String. +.type _write_z, @function +_write_z: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + sw a0, 20(sp) + +.write_z_loop: + # Check for 0 character. + lb a0, (a0) + beqz a0, .write_z_end + + # Print a character. + li a0, STDOUT + lw a1, 20(sp) + li a2, 1 + li a7, SYS_WRITE + ecall + + # Advance the input string by one byte. lw a0, 20(sp) - mv a1, s1 - call symbol_table_lookup - sw a0, 12(sp) + addi a0, a0, 1 + sw a0, 20(sp) - beqz a0, .Lcompile_identifier_expression_by_name - lw t0, 0(a0) + j .write_z_loop - j .Lcompile_identifier_expression_by_address +.write_z_end: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret -.Lcompile_identifier_expression_by_name: - # Global identifier. - lw t1, 16(sp) - li t0, 0x00202c00 # \0,_ - or t0, t0, t1 - sw t0, 8(sp) - li t0, 0x6120616c # la a - sw t0, 4(sp) - li a0, 7 - addi a1, sp, 4 - call _write_s +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +.type _is_upper, @function +_is_upper: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 - lw a0, 20(sp) - mv a1, s1 - call _write_s + li t0, 'A' - 1 + sltu t1, t0, a0 # t1 = a0 >= 'A' - li a0, '\n' - call _write_c + sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' + and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +.type _is_lower, @function +_is_lower: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + li t0, 'a' - 1 + sltu t2, t0, a0 # t2 = a0 >= 'a' + + sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' + and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +# Detects if the passed character is a 7-bit alpha character or an underscore. +# +# Paramters: +# a0 - Tested character. +# +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +.type _is_alpha, @function +_is_alpha: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + sw a0, 20(sp) - lbu a0, (s1) call _is_upper - beqz a0, .Lcompile_identifier_expression_end - - lw t1, 16(sp) - li t0, 0x0a290061 # a\0)\n - sll t2, t1, 8 - or t0, t0, t2 - sw t0, 8(sp) - li t0, 0x28202c00 # \0, ( - or t0, t0, t1 - sw t0, 4(sp) - li t0, 0x6120776c # lw a - sw t0, 0(sp) - li a0, 12 - addi a1, sp, 0 - call _write_s - - j .Lcompile_identifier_expression_end - -.Lcompile_identifier_expression_by_address: - lw t1, 16(sp) - li t0, 0x00202c00 # \0,_ - or t0, t0, t1 - sw t0, 8(sp) - li t0, 0x6120776c # lw a - sw t0, 4(sp) - li a0, 7 - addi a1, sp, 4 - call _write_s - - lw a0, 12(sp) - lw a0, 8(a0) - call _write_i - - li a0, '(' - call _write_c - - lw a0, 12(sp) - addi a1, sp, 4 - call take_address - addi a1, sp, 4 - call _write_s - - li a0, ')' - call _write_c - li a0, '\n' - call _write_c - - j .Lcompile_identifier_expression_end - -.Lcompile_identifier_expression_end: - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Evalutes an expression and saves the result in a0. -# -# a0 - X in aX, the register number to save the result. -.type compile_expression, @function -compile_expression: - # Prologue. - addi sp, sp, -48 - sw ra, 44(sp) - sw s0, 40(sp) - addi s0, sp, 48 - - addi a0, a0, '0' # Make the register number to a character. - sw a0, 36(sp) # And save it. - - mv a0, s1 - addi a1, sp, 24 - call lex_next - sw a0, 20(sp) - - lw t0, 24(sp) - - li t1, TOKEN_MINUS - beq t0, t1, .Lcompile_expression_negate - - li t1, TOKEN_AT - beq t0, t1, .Lcompile_expression_address - - li t1, TOKEN_INTEGER - beq t0, t1, .Lcompile_expression_literal - - addi a1, sp, 8 - call lex_next - lw t0, 8(sp) - li t1, TOKEN_LEFT_PAREN - beq t0, t1, .Lcompile_expression_call - - lw s1, 32(sp) - lw a0, 28(sp) - lw a1, 36(sp) - call compile_identifier_expression - - j .Lcompile_expression_advance - -.Lcompile_expression_negate: - lw s1, 20(sp) # Skip the -. - mv a0, zero - call compile_expression - - li a0, ASM_NEG_A0_SIZE - la a1, asm_neg_a0 - call _write_s - - j .Lcompile_expression_end - -.Lcompile_expression_address: - lw a0, 20(sp) - mv s1, a0 # Skip @. - - lw a0, 36(sp) - call compile_at_expression - - j .Lcompile_expression_end - -.Lcompile_expression_call: - mv s1, a0 - - lw a0, 32(sp) - lw a1, 28(sp) - call compile_call - - j .Lcompile_expression_end - -.Lcompile_expression_literal: - lw t1, 36(sp) - li t0, 0x00202c00 # \0,_ - or t0, t0, t1 - sw t0, 16(sp) - li t0, 0x6120696c # li a - sw t0, 12(sp) - li a0, 7 - addi a1, sp, 12 - call _write_s - - lw a0, 28(sp) - lw a1, 32(sp) - call _write_s - - li a0, '\n' - call _write_c - - j .Lcompile_expression_advance - -.Lcompile_expression_advance: - lw s1, 20(sp) - -.Lcompile_expression_end: - # Epilogue. - lw ra, 44(sp) - lw s0, 40(sp) - addi sp, sp, 48 - ret - -# Expression taking an identifier address. -# -# Parameters: -# a0 - Register number as character -.type compile_at_expression, @function -compile_at_expression: - # Prologue. - addi sp, sp, -48 - sw ra, 44(sp) - sw s0, 40(sp) - addi s0, sp, 48 - - sw a0, 36(sp) - - mv a0, s1 - addi a1, sp, 24 - call lex_next - mv s1, a0 - - lw a0, 28(sp) - lw a1, 32(sp) - call symbol_table_lookup - sw a0, 20(sp) - - li t0, 0x20 # _ - sb t0, 12(sp) - - # lw a0, 28(sp) - lw a1, 32(sp) - addi a1, sp, 13 - call take_address - - lw t1, 36(sp) - li t0, 0x2c006120 # _a\0, - sw t0, 8(sp) - sb t1, 10(sp) - li t0, 0x69646461 # addi - sw t0, 4(sp) - addi a0, a0, 9 # The length returned by take_address + the instruction. - addi a1, sp, 4 - call _write_s - - li a0, ',' - call _write_c - li a0, ' ' - call _write_c + sw a0, 16(sp) lw a0, 20(sp) - lw a0, 8(a0) - call _write_i + call _is_lower - j .Lcompile_at_expression_end - -.Lcompile_at_expression_end: - li a0, '\n' - call _write_c - - # Epilogue. - lw ra, 44(sp) - lw s0, 40(sp) - addi sp, sp, 48 - ret - -# Compiles an lvalue. -# -# Parameters: -# a0 - Pointer to the identifier. -# a1 - Identifier length. -.type compile_designator_expression, @function -compile_designator_expression: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - sw a0, 20(sp) # Identifier pointer. - sw a1, 16(sp) # Identifier length. - - /* DEBUG - lw a0, 20(sp) - lw a1, 16(sp) - call _write_error */ - -.Lcompile_designator_expression_by_address: - lw a0, 16(sp) - lw a1, 20(sp) - call symbol_table_lookup - sw a0, 12(sp) - - li t0, 0x202c30 # 0,_ - sw t0, 8(sp) - li t0, 0x61207773 # sw a - sw t0, 4(sp) - li a0, 7 - addi a1, sp, 4 - call _write_s - - lw a0, 12(sp) - lw a0, 8(a0) - call _write_i - - li a0, '(' - call _write_c - - lw a0, 12(sp) - addi a1, sp, 4 - call take_address - addi a1, sp, 4 - call _write_s - - li a0, ')' - call _write_c - li a0, '\n' - call _write_c - - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Compiles a statement beginning with an identifier. -# -# Left values should be variables named "loca n", where n is the offset -# of the variable on the stack, like loca8 or loca4. -.type compile_identifier, @function -compile_identifier: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - # Save the pointer to the identifier and its length on the stack. - mv a0, s1 - addi a1, sp, 12 - call lex_next - addi a1, sp, 0 - call lex_next - mv s1, a0 - - lw t0, 0(sp) - - li t1, TOKEN_LEFT_PAREN - beq t0, t1, .Lcompile_identifier_call - - li t1, TOKEN_ASSIGN - beq t0, t1, .Lcompile_identifier_assign - - j .Lcompile_identifier_end - -.Lcompile_identifier_call: - lw a0, 20(sp) - lw a1, 16(sp) - call compile_call - - j .Lcompile_identifier_end - -.Lcompile_identifier_assign: - call compile_binary_expression - lw a0, 20(sp) - lw a1, 16(sp) - call compile_designator_expression - - j .Lcompile_identifier_end - -.Lcompile_identifier_end: - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Compiles a procedure call. Expects s1 to point to the first argument. -# a0 - Pointer to the procedure name. -# a1 - Length of the procedure name. -# -# Returns the procedure result in a0. -.type compile_call, @function -compile_call: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - sw a0, 20(sp) - sw a1, 16(sp) - sw zero, 12(sp) # Argument count for a procedure call. - -.Lcompile_call_paren: - lbu t0, (s1) - li t1, 0x29 # ) - beq t0, t1, .Lcompile_call_complete - -.Lcompile_call_argument: - li a0, 0 - call compile_expression - - li t0, 0x202c30 # 0,_ - sw t0, 8(sp) - li t0, 0x61207773 # sw a - sw t0, 4(sp) - li a0, 7 - addi a1, sp, 4 - call _write_s - - lw t0, 12(sp) # Argument count for a procedure call. - - # Only 6 arguments are supported with a0-a5. - # Save all arguments on the stack so they aren't overriden afterwards. - li a0, -4 - mul a0, t0, a0 - addi a0, a0, 60 - call _write_i - - li t0, '\n' - sw t0, 8(sp) - li t0, 0x29707328 # (sp) - sw t0, 4(sp) - li a0, 5 - addi a1, sp, 4 - call _write_s - - lbu t0, (s1) - li t1, ',' - bne t0, t1, .Lcompile_call_paren - - lw t0, 12(sp) # Argument count for a procedure call. - addi t0, t0, 1 - sw t0, 12(sp) - - addi s1, s1, 1 # Skip the comma between the arguments. - j .Lcompile_call_argument - -.Lcompile_call_complete: - sw zero, 12(sp) - -.Lcompile_call_restore: - # Just go through all a0-a5 registers and read them from stack. - # If this stack value contains garbage, the procedure just shouldn't use it. - li a0, ASM_RESTORE_PARAMETERS_SIZE - la a1, asm_restore_parameters - call _write_s - -.Lcompile_call_perform: - li t0, 0x20 - sw t0, 8(sp) - li t0, 0x6c6c6163 # call - sw t0, 4(sp) - li a0, 5 - addi a1, sp, 4 - call _write_s - - lw a0, 16(sp) - lw a1, 20(sp) - call _write_s - - li a0, '\n' - call _write_c - - addi s1, s1, 1 # Skip the close paren. - - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Walks through the procedure definitions. -.type compile_procedure_section, @function -compile_procedure_section: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - -.Lcompile_procedure_section_loop: - mv a0, s1 - addi a1, sp, 4 - call lex_next - li t0, TOKEN_PROC - lw t1, 4(sp) - bne t0, t1, .Lcompile_procedure_section_end - - call compile_procedure - - j .Lcompile_procedure_section_loop - -.Lcompile_procedure_section_end: - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -.type compile_module_declaration, @function -compile_module_declaration: - # Prologue. - addi sp, sp, -24 - sw ra, 20(sp) - sw s0, 16(sp) - addi s0, sp, 24 - - li a0, GLOBAL_START_SIZE - la a1, global_start - call _write_s - - # Skip "program". - mv a0, s1 - addi a1, sp, 4 - call lex_next - mv s1, a0 - - # Epilogue. - lw ra, 20(sp) - lw s0, 16(sp) - addi sp, sp, 24 - ret - -# Compiles global variable section. -.type compile_global_section, @function -compile_global_section: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - mv a0, s1 - addi a1, sp, 4 - call lex_next - li t0, TOKEN_VAR - lw t1, 4(sp) - bne t0, t1, .Lcompile_global_section_end - mv s1, a0 - - li a0, SECTION_BSS_SIZE - la a1, section_bss - call _write_s - -.Lcompile_global_section_item: - mv a0, s1 - addi a1, sp, 12 - call lex_next - - lw t0, 12(sp) - li t1, TOKEN_IDENTIFIER - - bne t0, t1, .Lcompile_global_section_end - lw s1, 20(sp) # Advance to the beginning of the variable name. - - call compile_global - j .Lcompile_global_section_item - -.Lcompile_global_section_end: - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Compiles a global variable. -.type compile_global, @function -compile_global: - # Prologue. - addi sp, sp, -48 - sw ra, 44(sp) - sw s0, 40(sp) - addi s0, sp, 48 - - # Save the identifier on the stack since it should emitted multiple times. - mv a0, s1 - addi a1, sp, 28 - call lex_next - addi a1, sp, 4 - call lex_next # Skip the colon in front of the type. - addi a1, sp, 4 - call lex_next # Skip the opening bracket. - addi a1, sp, 16 - call lex_next # Save the array size on the stack since it has to be emitted multiple times. - addi a1, sp, 4 - call lex_next # Skip the closing bracket. - addi a1, sp, 4 - call lex_next # Skip the type. - mv s1, a0 - - # .type identifier, @object - li a0, ASM_TYPE_SIZE - la a1, asm_type - call _write_s - - lw a0, 32(sp) - lw a1, 36(sp) - call _write_s - - li a0, ASM_TYPE_OBJECT_SIZE - la a1, asm_type_object - call _write_s - - # identifier: .zero size - lw a0, 32(sp) - lw a1, 36(sp) - call _write_s - - li t0, 0x206f7265 # ero_ - sw t0, 12(sp) - li t0, 0x7a2e203a # : .z - sw t0, 8(sp) - li a0, 8 - addi a1, sp, 8 - call _write_s - - lw a0, 20(sp) - lw a1, 24(sp) - call _write_s - - li a0, '\n' - call _write_c - - # Epilogue. - lw ra, 44(sp) - lw s0, 40(sp) - addi sp, sp, 48 - ret - -# Sets a0 to the type pointer. -.type compile_type_expression, @function -compile_type_expression: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - -.Lcompile_type_expression_type: - mv a0, s1 - addi a1, sp, 12 - call lex_next - mv s1, a0 - lw t0, 12(sp) - - li t1, TOKEN_HAT # Pointer type. - beq t0, t1, .Lcompile_type_expression_pointer - - # Named type. - lw a0, 16(sp) - lw a1, 20(sp) - call symbol_table_lookup - - j .Lcompile_type_expression_end - -.Lcompile_type_expression_pointer: - call compile_type_expression - mv a1, s3 - call symbol_table_make_pointer - add s3, s3, a0 - sub a0, s3, a0 - - j .Lcompile_type_expression_end - -.Lcompile_type_expression_end: - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Inserts local procedure variables into the symbol table. -.type compile_local_section, @function -compile_local_section: - # Prologue. - addi sp, sp, -48 - sw ra, 44(sp) - sw s0, 40(sp) - addi s0, sp, 48 - - mv a0, s1 - addi a1, sp, 28 - call lex_next - - lw t0, 28(sp) - li t1, TOKEN_VAR - - bne t0, t1, .Lcompile_local_section_end - mv s1, a0 - - sw zero, 12(sp) # Variable offset counter. - -.Lcompile_local_section_variable: - mv a0, s1 - addi a1, sp, 28 - call lex_next - - lw t0, 28(sp) - li t1, TOKEN_IDENTIFIER - - bne t0, t1, .Lcompile_local_section_end - addi a1, sp, 16 - call lex_next - mv s1, a0 # Skip the ":" in front of the type. - - call compile_type_expression - # a0 - Variable type. - lw a1, 12(sp) - mv a2, s3 - call symbol_table_make_local - - mv a2, s3 - add s3, s3, a0 - - lw a0, 32(sp) - lw a1, 36(sp) - call symbol_table_enter - - lw t0, 12(sp) - addi t0, t0, 4 - sw t0, 12(sp) - - j .Lcompile_local_section_variable - -.Lcompile_local_section_end: - # Epilogue. - lw ra, 44(sp) - lw s0, 40(sp) - addi sp, sp, 48 - ret - -# Inserts procedure parameters into the symbol table. -.type compile_parameters, @function -compile_parameters: - # Prologue. - addi sp, sp, -48 - sw ra, 44(sp) - sw s0, 40(sp) - addi s0, sp, 48 - - li t0, -12 - sw t0, 12(sp) # Parameter offset counter. - - mv a0, s1 - addi a1, sp, 28 - call lex_next - mv s1, a0 # Skip the opening paren. - - mv a0, s1 - addi a1, sp, 28 - call lex_next - - lw t0, 28(sp) - li t1, TOKEN_RIGHT_PAREN - beq t0, t1, .Lcompile_parameters_end - # When this is not the right paren, it is an identifier. - mv s1, a0 - -.Lcompile_parameters_parameter: - mv a0, s1 - addi a1, sp, 16 - call lex_next - mv s1, a0 # Skip the ":" in front of the type. - - call compile_type_expression - # a0 - Parameter type. - lw a1, 12(sp) - mv a2, s3 - call symbol_table_make_parameter - - mv a2, s3 - add s3, s3, a0 - - lw a0, 32(sp) - lw a1, 36(sp) - call symbol_table_enter - - lw t0, 12(sp) - addi t0, t0, -4 - sw t0, 12(sp) - - # Read the comma between the parameters or a closing paren. - mv a0, s1 - addi a1, sp, 16 - call lex_next + lw t0, 20(sp) + xori t1, t0, '_' + seqz t1, t1 lw t0, 16(sp) - li t1, TOKEN_COMMA - bne t0, t1, .Lcompile_parameters_end - # If it is a comma, read the name of the next parameter. - addi a1, sp, 28 - call lex_next - mv s1, a0 - - j .Lcompile_parameters_parameter - -.Lcompile_parameters_end: - mv s1, a0 # Skip the closing paren. + or a0, a0, t0 + or a0, a0, t1 # Epilogue. - lw ra, 44(sp) - lw s0, 40(sp) - addi sp, sp, 48 + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 ret -.type compile_procedure, @function -compile_procedure: +# Detects whether the passed character is a digit +# (a value between 0 and 9). +# +# Parameters: +# a0 - Exemined value. +# +# Sets a0 to 1 if it is a digit, to 0 otherwise. +.type _is_digit, @function +_is_digit: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 - mv a0, s1 - addi a1, sp, 12 - call lex_next # Skip proc. - addi a1, sp, 12 - call lex_next - mv s1, a0 + li t0, '0' - 1 + sltu t1, t0, a0 # t1 = a0 >= '0' - lw a0, 16(sp) - lw a1, 20(sp) - call write_procedure_head + sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' - # Register the procedure in the symbol table. - mv a0, s3 - call symbol_table_make_procedure + and a0, t1, t2 - mv a2, s3 - add s3, s3, a0 + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret - lw a0, 16(sp) - lw a1, 20(sp) - call symbol_table_enter +# Reads the next token. +# +# Returns token length in a0. +.type _read_token, @function +_read_token: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 - # Save the state of the symbol table before we enter the procedure scope. - la t0, symbol_table + la t0, source_code_position # Token pointer. lw t0, (t0) - sw t0, 8(sp) + sw t0, 20(sp) # Current token position. + sw zero, 16(sp) # Token length. - call compile_parameters - call compile_local_section +.read_token_loop: + lb t0, (t0) # Current character. - # Skip the "begin" keyword, denoting the beginning of the procedure body. - mv a0, s1 - addi a1, sp, 12 - call lex_next - mv s1, a0 + # First we try to read a derictive. + # A derictive can contain a dot and characters. + li t1, '.' + beq t0, t1, .read_token_next - # Generate the procedure prologue with a predefined stack size. - li a0, PROLOGUE_SIZE - la a1, prologue - call _write_s + lw a0, 20(sp) + lb a0, (a0) + call _is_alpha + bnez a0, .read_token_next - # Save passed arguments on the stack. - li a0, ASM_PRESERVE_PARAMETERS_SIZE - la a1, asm_preserve_parameters - call _write_s + lw a0, 20(sp) + lb a0, (a0) + call _is_digit + bnez a0, .read_token_next - # Generate the body of the procedure. - call compile_statements - mv s1, a0 # Skip end. + j .read_token_end - # Generate the procedure epilogue with a predefined stack size. - li a0, EPILOGUE_SIZE - la a1, epilogue - call _write_s +.read_token_next: + # Advance the source code position and token length. + lw t0, 16(sp) + addi t0, t0, 1 + sw t0, 16(sp) - # Restore the symbol table, removing symbols local to this procedure. - la t0, symbol_table - lw t1, 8(sp) + lw t0, 20(sp) + addi t0, t0, 1 + sw t0, 20(sp) + + j .read_token_loop + +.read_token_end: + lw a0, 16(sp) + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +.type _memcmp, @function +_memcmp: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + mv t0, a0 + li a0, 0 + +.Lmemcmp_loop: + beqz a2, .Lmemcmp_end + + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 + + bnez a0, .Lmemcmp_end + + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + j .Lmemcmp_loop + +.Lmemcmp_end: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +# Advances the token stream by a0 bytes. +.type _advance_token, @function +_advance_token: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # Skip the .equ directive. + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 sw t1, (t0) # Epilogue. @@ -1145,400 +395,670 @@ compile_procedure: addi sp, sp, 32 ret -# Compiles a goto statement to an uncoditional jump. -.type compile_goto, @function -compile_goto: +.type _compile_section, @function +_compile_section: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 - mv a0, s1 - addi a1, sp, 0 - call lex_next # Skip the goto keyword. - addi a1, sp, 0 - call lex_next # We should be on dot the label is beginning with. - addi a1, sp, 0 - call lex_next# Save the label name. - mv s1, a0 + # Print the .section directive and a space after it. + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + li a2, KEYWORD_SECTION_SIZE + 1 + li a7, SYS_WRITE + ecall - li t0, 0x2e206a # j . - sw t0, 12(sp) - li a0, 3 - addi a1, sp, 12 - call _write_s - - lw a0, 4(sp) - lw a1, 8(sp) # Saved dot position. - call _write_s - - li a0, '\n' - call _write_c - - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Rewrites a label to assembly. -.type compile_label, @function -compile_label: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - mv a0, s1 - addi a1, sp, 8 - call lex_next # Dot starting the label. - addi a1, sp, 8 - call lex_next - mv s1, a0 - - li a0, '.' - call _write_c - lw a0, 12(sp) - lw a1, 16(sp) - call _write_s - li a0, ':' - call _write_c - li a0, '\n' - call _write_c - - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Just skips the return keyword and evaluates the return expression. -.type compile_return, @function -compile_return: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - mv a0, s1 - addi a1, sp, 12 - call lex_next - mv s1, a0 # Skip return. - - call compile_binary_expression - - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -.type compile_if, @function -compile_if: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - mv a0, s1 - addi a1, sp, 0 - call lex_next - mv s1, a0 # Skip the if. - - call compile_binary_expression - - mv a0, s1 - addi a1, sp, 0 - call lex_next - mv s1, a0 # Skip the then. - - # Label prefix. - li t0, 0x66694c2e # .Lif - sw t0, 20(sp) - - li t0, 0x202c3061 # a0,_ - sw t0, 16(sp) - li t0, 0x207a7165 # eqz_ - sw t0, 12(sp) - li t0, 0x62626262 # bbbb - sb t0, 11(sp) - - li a0, 13 - addi a1, sp, 11 - call _write_s - - # Write the label counter. - mv a0, s2 - call _write_i - - li a0, '\n' - call _write_c - - call compile_statements - mv s1, a0 # Skip end. - - # Write the label prefix. - li a0, 4 - addi a1, sp, 20 - call _write_s - - # Write the label counter. - mv a0, s2 - call _write_i - - # Finalize the label. - li t0, 0x0a3a # :\n - sh t0, 16(sp) - li a0, 2 - addi a1, sp, 16 - call _write_s - - addi s2, s2, 1 # Increment the label counter. - - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Writes: -# .type identifier, @function -# identifier: -# -# Parameters: -# a0 - Identifier length. -# a0 - Identifier pointer. -.type write_procedure_head, @function -write_procedure_head: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 + # Skip the .equ directive. + li a0, KEYWORD_SECTION_SIZE + 1 + call _advance_token + # Read the section name. + call _read_token sw a0, 16(sp) - sw a1, 20(sp) - # .type identifier, @function - li a0, ASM_TYPE_SIZE - la a1, asm_type - call _write_s + # Print the section name and newline. + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + lw a2, 16(sp) + addi a2, a2, 1 + li a7, SYS_WRITE + ecall + # Skip the section name. lw a0, 16(sp) - lw a1, 20(sp) - call _write_s + addi a0, a0, 1 + call _advance_token - li a0, ASM_TYPE_FUNCTION_SIZE - la a1, asm_type_function - call _write_s + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret - lw a0, 16(sp) - lw a1, 20(sp) - call _write_s +# Prints and skips a line. +.type _skip_comment, @function +_skip_comment: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 - li t0, 0x0a3a # :\n + la t0, source_code_position + lw t1, (t0) + +.skip_comment_loop: + # Check for newline character. + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end + + # Advance the input string by one byte. + addi t1, t1, 1 + sw t1, (t0) + + j .skip_comment_loop + +.skip_comment_end: + # Skip the newline. + addi t1, t1, 1 + sw t1, (t0) + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +# Prints and skips a line. +.type _compile_line, @function +_compile_line: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + la a0, source_code_position + lw a1, (a0) + +.compile_line_loop: + # Check for newline character. + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end + + # Print a character. + li a0, STDOUT + li a2, 1 + li a7, SYS_WRITE + ecall + + # Advance the input string by one byte. + la a0, source_code_position + lw a1, (a0) + addi a1, a1, 1 + sw a1, (a0) + + j .compile_line_loop + +.compile_line_end: + # Print and skip the newline. + li a0, STDOUT + li a2, 1 + li a7, SYS_WRITE + ecall + + la a0, source_code_position + lw a1, (a0) + addi a1, a1, 1 + sw a1, (a0) + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_object, @function +_compile_object: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + call _compile_line + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_function_statements, @function +_compile_function_statements: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + +.compile_function_statements_loop: + la t0, source_code_position + lw t1, (t0) + addi t1, t1, 1 # Skip the tab. + + mv a0, t1 + la a1, keyword_ret + li a2, KEYWORD_RET_SIZE + call _memcmp + + beqz a0, .compile_function_statements_end + + call _compile_line + j .compile_function_statements_loop + +.compile_function_statements_end: + call _compile_line + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_call, @function +_compile_call: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + call _read_token + sw a0, 20(sp) + + la a0, asm_call + call _write_z + + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + lw a2, 20(sp) + li a7, SYS_WRITE + ecall + + # Skip parens, semicolon and newline. + lw a0, 20(sp) + addi a0, a0, 4 + call _advance_token + + li a0, '\n' + call _write_c + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_statement, @function +_compile_statement: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # This is a call if the statement starts with an underscore. + la t0, source_code_position + lw t0, (t0) + # First character after alignment tab. + addi t0, t0, 1 + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call + + call _compile_line + j .compile_statement_end + +.compile_statement_call: + li a0, 1 + call _advance_token + call _compile_call + + j .compile_statement_end + +.compile_statement_end: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_procedure_body, @function +_compile_procedure_body: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + +.compile_procedure_body_loop: + la a0, source_code_position + lw a0, (a0) + la a1, keyword_end + li a2, KEYWORD_END_SIZE + call _memcmp + + beqz a0, .compile_procedure_body_epilogue + + call _compile_statement + j .compile_procedure_body_loop + +.compile_procedure_body_epilogue: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_procedure, @function +_compile_procedure: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # Skip "proc ". + li a0, KEYWORD_PROC_SIZE + call _advance_token + + call _read_token + sw a0, 20(sp) # Save the procedure name length. + + # Write .type _procedure_name, @function. + la a0, asm_type_directive + call _write_z + + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + lw a2, 20(sp) + li a7, SYS_WRITE + ecall + + la a0, asm_type_function + call _write_z + + # Write procedure label, _procedure_name: + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + lw a2, 20(sp) + li a7, SYS_WRITE + ecall + + la a0, asm_colon + call _write_z + + # Skip the function name and trailing parens, semicolon, "begin" and newline. + lw a0, 20(sp) + addi a0, a0, KEYWORD_BEGIN_SIZE + 1 + 4 + call _advance_token + + la a0, asm_prologue + call _write_z + + call _compile_procedure_body + + # Write the epilogue. + la a0, asm_epilogue + call _write_z + + li a0, KEYWORD_END_SIZE + 2 + call _advance_token + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_function, @function +_compile_function: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # Write the function header. + call _compile_line + call _compile_function_statements + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_type, @function +_compile_type: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # Print the .type directive and a space after it. + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + li a2, KEYWORD_TYPE_SIZE + 1 + li a7, SYS_WRITE + ecall + + # Skip the .type directive. + li a0, KEYWORD_TYPE_SIZE + 1 + call _advance_token + + # Read and print the symbol name. + call _read_token + sw a0, 20(sp) + + # Print the symbol name, comma, space and @. + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + lw a2, 20(sp) + addi a2, a2, 3 + li a7, SYS_WRITE + ecall + + # Skip the constant name, comma, space and @. + lw a0, 20(sp) + addi a0, a0, 3 + call _advance_token + + # Read the symbol type. + call _read_token + sw a0, 16(sp) + la t0, source_code_position + lw t0, (t0) sw t0, 12(sp) - li a0, 2 - addi a1, sp, 12 - call _write_s + # Print the symbol type and newline. + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + lw a2, 16(sp) + addi a2, a2, 1 + li a7, SYS_WRITE + ecall + + lw a0, 16(sp) + addi a0, a0, 1 + call _advance_token + + lw a0, 12(sp) + la a1, keyword_type_object + li a2, KEYWORD_TYPE_OBJECT_SIZE + call _memcmp + + beqz a0, .compile_type_object + + lw a0, 12(sp) + la a1, keyword_type_function + li a2, KEYWORD_TYPE_FUNCTION_SIZE + call _memcmp + + beqz a0, .compile_type_function + + j .compile_type_end + +.compile_type_object: + call _compile_object + + j .compile_type_end + +.compile_type_function: + call _compile_function + + j .compile_type_end + +.compile_type_end: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret -# Compiles a list of statements delimited by semicolons. -# -# Sets a0 to the end of the token finishing the list -# (should be the "end" token in a valid program). -.type compile_statements, @function -compile_statements: +.type _compile_equ, @function +_compile_equ: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 - # Generate the body of the procedure. - mv a0, s1 - addi a1, sp, 0 - call lex_next - lw t0, 0(sp) - li t1, TOKEN_END + # Print the .equ directive and a space after it. + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + li a2, KEYWORD_EQU_SIZE + 1 + li a7, SYS_WRITE + ecall - beq t0, t1, .Lcompile_statements_end + # Skip the .equ directive. + li a0, KEYWORD_EQU_SIZE + 1 + call _advance_token -.Lcompile_statements_body: - call compile_statement + # Read and print the constant name. + call _read_token + sw a0, 20(sp) - mv a0, s1 - addi a1, sp, 0 - call lex_next - lw t0, 0(sp) - li t1, TOKEN_SEMICOLON + # Print the constant name, comma and space. + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + lw a2, 20(sp) + addi a2, a2, 2 + li a7, SYS_WRITE + ecall - bne t0, t1, .Lcompile_statements_end - mv s1, a0 + # Skip the constant name, comma and the space after it. + lw a0, 20(sp) + addi a0, a0, 2 + call _advance_token - j .Lcompile_statements_body + # Read the constant value. + call _read_token + sw a0, 16(sp) + + # Print the constant value and newline. + li a0, STDOUT + la a1, source_code_position + lw a1, (a1) + lw a2, 16(sp) + addi a2, a2, 1 + li a7, SYS_WRITE + ecall + + lw a2, 16(sp) + addi a2, a2, 1 + call _advance_token -.Lcompile_statements_end: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret -# Checks for the type of the current statement and compiles it. -.type compile_statement, @function -compile_statement: +.type _skip_newlines, @function +_skip_newlines: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 - mv a0, s1 - addi a1, sp, 0 - call lex_next - lw t0, 0(sp) + # Skip newlines. + la t0, source_code_position + lw t1, (t0) - li t1, TOKEN_IDENTIFIER - beq t0, t1, .Lcompile_statement_identifier +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end - li t1, TOKEN_GOTO - beq t0, t1, .Lcompile_statement_goto + addi t1, t1, 1 + sw t1, (t0) - li t1, TOKEN_RETURN - beq t0, t1, .Lcompile_statement_return + j .skip_newlines_loop - li t1, TOKEN_IF - beq t0, t1, .Lcompile_statement_if - - li t1, TOKEN_DOT - beq t0, t1, .Lcompile_statement_label - - unimp # Else. - -.Lcompile_statement_if: - call compile_if - j .Lcompile_statement_end - -.Lcompile_statement_label: - call compile_label - j .Lcompile_statement_end - -.Lcompile_statement_return: - call compile_return - j .Lcompile_statement_end - -.Lcompile_statement_goto: - call compile_goto - j .Lcompile_statement_end - -.Lcompile_statement_identifier: - call compile_identifier - j .Lcompile_statement_end - -.Lcompile_statement_end: +.skip_newlines_end: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret -# Prints ".section .text" and exits. -.type compile_text_section, @function -compile_text_section: - # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 - - li a0, SECTION_TEXT_SIZE - la a1, section_text - call _write_s - - # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 - ret - -.type compile_entry_point, @function -compile_entry_point: +# Process the source code and print the generated code. +.type _compile, @function +_compile: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 - # .type _start, @function - li a0, ASM_START_SIZE - la a1, asm_start - call _write_s +.compile_loop: + call _skip_newlines - mv a0, s1 - addi a1, sp, 4 - call lex_next - mv s1, a0 # Skip begin. + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_end + li t1, '#' + beq t0, t1, .compile_comment - # Generate the body of the procedure. - call compile_statements - mv s1, a0 # Skip end. + la a0, source_code_position + lw a0, (a0) + la a1, keyword_equ + li a2, KEYWORD_EQU_SIZE + call _memcmp - li a0, ASM_EXIT_SIZE - la a1, asm_exit - call _write_s + beqz a0, .compile_equ + la a0, source_code_position + lw a0, (a0) + la a1, keyword_section + li a2, KEYWORD_SECTION_SIZE + call _memcmp + + beqz a0, .compile_section + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_type + li a2, KEYWORD_TYPE_SIZE + call _memcmp + + beqz a0, .compile_type + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_proc + li a2, KEYWORD_PROC_SIZE + call _memcmp + + beqz a0, .compile_procedure + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_global + li a2, KEYWORD_GLOBAL_SIZE + call _memcmp + + beqz a0, .compile_global + + j .compile_end # Not a known token, exit. + +.compile_equ: + call _compile_equ + + j .compile_loop + +.compile_section: + call _compile_section + + j .compile_loop + +.compile_type: + call _compile_type + + j .compile_loop + +.compile_global: + call _compile_line + + j .compile_loop + +.compile_comment: + call _skip_comment + + j .compile_loop + +.compile_procedure: + call _compile_procedure + + j .compile_loop + +.compile_end: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret -.type compile, @function -compile: - # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 - - call compile_module_declaration - call compile_import - call compile_global_section - call compile_text_section - call compile_procedure_section - call compile_entry_point - - # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 - ret - # Entry point. +.globl _start .type _start, @function _start: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + # Read the source from the standard input. la a0, source_code li a1, SOURCE_BUFFER_SIZE # Buffer size. call _read_file - li s2, 1 - call _mmap - mv s3, a0 + # Save the pointer to the beginning of the source code in a global variable. + la t0, source_code + la t1, source_code_position + sw t0, (t1) - call symbol_table_build - call compile + call _compile # Call exit. - li a0, 0 # Use 0 return code. - call _exit + li a0, 0 # Use 0 return code. + li a7, SYS_EXIT + ecall + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret diff --git a/boot/stage2.elna b/boot/stage2.elna index b4f359e..ab24e90 100644 --- a/boot/stage2.elna +++ b/boot/stage2.elna @@ -1,1393 +1,863 @@ -program +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. -import dummy +# Stage2 compiler. +# +# It supports declaring and calling procedures without arguments. +# A procedure name should start with an underscore. -var - source_code: [81920]Byte +.section .rodata -proc _compile_import() -var loca0: Word +.type keyword_equ, @object +keyword_equ: .ascii ".equ" +.equ KEYWORD_EQU_SIZE, 4 + +.type keyword_section, @object +keyword_section: .ascii ".section" +.equ KEYWORD_SECTION_SIZE, 8 + +.type keyword_type, @object +keyword_type: .ascii ".type" +.equ KEYWORD_TYPE_SIZE, 5 + +.type keyword_ret, @object +keyword_ret: .ascii "ret" +.equ KEYWORD_RET_SIZE, 3 + +.type keyword_global, @object +keyword_global: .ascii ".globl" +.equ KEYWORD_GLOBAL_SIZE, 6 + +.type keyword_proc, @object +keyword_proc: .ascii "proc " +.equ KEYWORD_PROC_SIZE, 5 + +.type keyword_end, @object +keyword_end: .ascii "end" +.equ KEYWORD_END_SIZE, 3 + +.type keyword_begin, @object +keyword_begin: .ascii "begin" +.equ KEYWORD_BEGIN_SIZE, 5 + +.type keyword_var, @object +keyword_var: .ascii "var" +.equ KEYWORD_VAR_SIZE, 3 + +.type asm_prologue, @object +asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" + +.type asm_epilogue, @object +asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" + +.type asm_type_directive, @object +asm_type_directive: .string ".type " + +.type asm_type_function, @object +asm_type_function: .string ", @function\n" + +.type asm_colon, @object +asm_colon: .string ":\n" + +.type asm_call, @object +asm_call: .string "\tcall " + +.type asm_j, @object +asm_j: .string "\tj " + +.type asm_li, @object +asm_li: .string "\tli " + +.type asm_lw, @object +asm_lw: .string "\tlw " + +.type asm_t0, @object +asm_t0: .string "t0" + +.type asm_a0, @object +asm_a0: .string "a0" + +.type asm_comma, @object +asm_comma: .string ", " + +.type asm_sp, @object +asm_sp: .string "(sp)" + +.section .bss + +.equ SOURCE_BUFFER_SIZE, 81920 +.type source_code, @object +source_code: .zero SOURCE_BUFFER_SIZE + +.section .data + +.type source_code_position, @object +source_code_position: .word source_code + +.section .text + +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the amount of bytes written in a0. +proc _read_file(); begin - _advance(6); - _skip_spaces(); - loca0 := _read_token(); - _advance(loca0) -end + mv a2, a1 + mv a1, a0 + # STDIN. + li a0, 0 + li a7, 63 # SYS_READ. + ecall +end; -proc _build_binary_expression() -var - loca0: Word - loca4: Word - loca8: Word - loca12: ^Byte - loca16: Word - loca20: Word - loca24: Bool +# Writes to the standard output. +# +# Parameters: +# a0 - Buffer. +# a1 - Buffer length. +proc _write(); begin - _build_expression(0); + mv a2, a1 + mv a1, a0 + # STDOUT. + li a0, 1 + li a7, 64 # SYS_WRITE. + ecall +end; - loca4 := 0x2c306120; - loca8 := 0x0a316120; - - _skip_spaces(); - loca20 := _read_token(); - loca12 := _current(); - - loca16 := 0x26; - loca24 := _token_compare(loca12, loca20, @loca16); - if loca24 = 0 then - goto .L_build_binary_expression_and - end; - - loca16 := 0x726f; - loca24 := _token_compare(loca12, loca20, @loca16); - if loca24 = 0 then - goto .L_build_binary_expression_or - end; - - loca16 := 0x3d; - loca24 := _token_compare(loca12, loca20, @loca16); - if loca24 = 0 then - goto .L_build_binary_expression_equal - end; - - loca16 := 0x2b; - loca24 := _token_compare(loca12, loca20, @loca16); - if loca24 = 0 then - goto .L_build_binary_expression_plus - end; - - loca16 := 0x2d; - loca24 := _token_compare(loca12, loca20, @loca16); - if loca24 = 0 then - goto .L_build_binary_expression_minus - end; - - loca16 := 0x2a; - loca24 := _token_compare(loca12, loca20, @loca16); - if loca24 = 0 then - goto .L_build_binary_expression_product - end; - - goto .Lbuild_binary_expression_end; - - .L_build_binary_expression_equal; - _advance(1); - _build_expression(1); - - loca0 := 0x627573; - _write_s(3, @loca0); - _write_s(4, @loca4); - _write_s(4, @loca4); - _write_s(4, @loca8); - - loca0 := 0x7a716573; - _write_s(4, @loca0); - _write_s(4, @loca4); - _write_s(3, @loca4); - _write_c(0x0a); - - goto .Lbuild_binary_expression_end; - - .L_build_binary_expression_and; - _advance(1); - _build_expression(1); - loca0 := 0x646e61; - _write_s(3, @loca0); - _write_s(4, @loca4); - _write_s(4, @loca4); - _write_s(4, @loca8); - - goto .Lbuild_binary_expression_end; - - .L_build_binary_expression_or; - _advance(2); - _build_expression(1); - loca0 := 0x726f; - _write_s(2, @loca0); - _write_s(4, @loca4); - _write_s(4, @loca4); - _write_s(4, @loca8); - - goto .Lbuild_binary_expression_end; - - .L_build_binary_expression_plus; - _advance(1); - _build_expression(1); - loca0 := 0x646461; - _write_s(3, @loca0); - _write_s(4, @loca4); - _write_s(4, @loca4); - _write_s(4, @loca8); - - goto .Lbuild_binary_expression_end; - - .L_build_binary_expression_minus; - _advance(1); - _build_expression(1); - loca0 := 0x627573; - _write_s(3, @loca0); - _write_s(4, @loca4); - _write_s(4, @loca4); - _write_s(4, @loca8); - - goto .Lbuild_binary_expression_end; - - .L_build_binary_expression_product; - _advance(1); - _build_expression(1); - loca0 := 0x6c756d; - _write_s(3, @loca0); - _write_s(4, @loca4); - _write_s(4, @loca4); - _write_s(4, @loca8); - - goto .Lbuild_binary_expression_end; - - .Lbuild_binary_expression_end -end - -proc _compile_identifier_expression(loca84: Word, loca80: Byte) -var - loca0: Word - loca4: ^Byte - loca8: Word - loca12: Bool - loca16: Word - loca20: Word - loca24: ^Byte - loca28: Byte +# Writes a character from a0 into the standard output. +proc _write_c(); begin - loca24 := _current(); - loca0 := 0x61636f6c; - loca0 := _memcmp(@loca0, loca24, 4); + sb a0, 20(sp) + addi a0, sp, 20 + li a1, 1 + _write(); +end; - if loca0 = 0 then - loca8 := 0x6120776c; - _write_s(4, @loca8); - loca8 := 0x00202c00 or loca80; - _write_s(3, @loca8); - - loca4 := loca24 + 4; - loca0 := loca84 - 4; - _write_s(loca0, loca4); - - loca8 := 0x29707328; - _write_s(4, @loca8); - _write_c(0x0a); - - goto .Lcompile_identifier_expression_end - end; - loca0 := _front(loca24); - loca8 := loca84 = 2; - loca12 := loca0 = 0x73; - if loca8 & loca12 then - loca8 := 0x6120766d; - _write_s(4, @loca8); - loca8 := 0x00202c00 or loca80; - _write_s(3, @loca8); - _write_s(loca84, loca24); - _write_c(0x0a); - - goto .Lcompile_identifier_expression_end - end; - - loca8 := 0x6120616c; - _write_s(4, @loca8); - loca8 := 0x00202c00 or loca80; - _write_s(3, @loca8); - - _write_s(loca84, loca24); - _write_c(0x0a); - - if _is_upper(loca0) then - loca8 := 0x6120776c; - _write_s(4, @loca8); - loca8 := 0x28202c00 or loca28; - _write_s(4, @loca8); - _write_c(0x61); - _write_c(loca28); - _write_c(0x29); - _write_c(0x0a); - - goto .Lcompile_identifier_expression_end - end; - - .Lcompile_identifier_expression_end -end - -proc _build_expression(loca84: Word) -var - loca0: Word - loca4: ^Byte - loca8: Word - loca12: Word - loca16: Word - loca20: Word - loca24: ^Byte - loca28: Word +# Write null terminated string. +# +# Parameters: +# a0 - String. +proc _write_z(); begin - loca28 := loca84 + 0x30; + sw a0, 20(sp) - _skip_spaces(); - loca20 := _read_token(); - loca24 := _current(); - loca0 := _front(loca24); +.write_z_loop: + # Check for 0 character. + lb a0, (a0) + beqz a0, .write_z_end - if loca0 = 0x2d then - goto .Lbuild_expression_negate - end; + # Print a character. + lw a0, 20(sp) + lb a0, (a0) + _write_c(); - if loca0 = 0x40 then - goto .Lbuild_expression_address - end; + # Advance the input string by one byte. + lw a0, 20(sp) + addi a0, a0, 1 + sw a0, 20(sp) - if _is_digit(loca0) then - goto .Lbuild_expression_literal - end; + j .write_z_loop - if loca0 = 0x5f then - goto .Lbuild_expression_call - end; +.write_z_end: +end; - _compile_identifier_expression(loca20, loca28); - goto .Lbuild_expression_advance; - - .Lbuild_expression_negate; - _advance(1); - _build_expression(0); - - loca8 := 0x2067656e; - _write_s(4, @loca8); - loca8 := 0x202c3061; - _write_s(4, @loca8); - loca8 := 0x0a3061; - _write_s(3, @loca8); - - goto .Lbuild_expression_advance; - - .Lbuild_expression_address; - loca8 := 0x69646461; - _write_s(4, @loca8); - loca8 := 0x6120; - _write_s(2, @loca8); - _write_c(loca28); - loca8 := 0x7073202c; - _write_s(4, @loca8); - loca8 := 0x202c; - _write_s(2, @loca8); - - _advance(1); - _skip_spaces(); - loca24 := _current(); - loca20 := _read_token(); - - loca4 := loca24 + 4; - loca0 := loca20 - 4; - _write_s(loca0, loca4); - - _write_c(0xa); - - goto .Lbuild_expression_advance; - - .Lbuild_expression_call; - _advance(loca20); - _advance(1); - _compile_call(loca24, loca20); - - goto .Lbuild_expression_end; - - .Lbuild_expression_literal; - loca8 := 0x6120696c; - _write_s(4, @loca8); - loca8 := 0x00202c00 or loca28; - _write_s(3, @loca8); - - _write_s(loca20, loca24); - _write_c(0x0a); - - goto .Lbuild_expression_advance; - - .Lbuild_expression_advance; - _advance(loca20); - - .Lbuild_expression_end -end - -proc _compile_designator_expression(loca84: ^Byte, loca80: Word) -var - loca0: Word - loca4: Int - loca8: Char - loca12: Bool - loca16: Bool +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_upper(); begin - loca0 := 0x61636f6c; - loca4 := _memcmp(@loca0, loca84, 4); + li t0, 'A' - 1 + sltu t1, t0, a0 # t1 = a0 >= 'A' - if loca4 = 0 then - loca0 := 0x61207773; - _write_s(4, @loca0); - loca0 := 0x202c30; - _write_s(3, @loca0); + sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' + and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' +end; - loca84 := loca84 + 4; - loca80 := loca80 - 4; - _write_s(loca80, loca84); - - loca0 := 0x29707328; - _write_s(4, @loca0); - _write_c(0x0a); - - goto .Lcompile_designator_expression_end - end; - loca8 := _front(loca84); - loca12 := loca8 = 0x73; - loca16 := loca80 = 2; - if loca12 & loca16 then - loca0 := 0x20766d; - _write_s(3, @loca0); - _write_s(loca80, loca84); - loca0 := 0x3061202c; - _write_s(4, @loca0); - _write_c(0x0a); - - goto .Lcompile_designator_expression_end - end; - - .Lcompile_designator_expression_end -end - -proc _compile_identifier() -var - loca0: Word - loca4: Bool - loca8: Word - loca12: ^Byte - loca16: Word - loca20: ^Byte +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_lower(); begin - loca20 := _current(); - loca16 := _read_token(); + li t0, 'a' - 1 + sltu t2, t0, a0 # t2 = a0 >= 'a' - _advance(loca16); - _skip_spaces(); + sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' + and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' +end; - loca12 := _current(); - loca8 := _read_token(); - - _advance(loca8); - _skip_spaces(); - - loca0 := 0x3d3a; - loca4 := _token_compare(loca12, loca8, @loca0); - if loca4 = 0 then - _build_binary_expression(); - _compile_designator_expression(loca20, loca16); - - goto .Lcompile_identifier_end - end; - if _front(loca12) = 0x28 then - _compile_call(loca20, loca16); - - goto .Lcompile_identifier_end - end; - - .Lcompile_identifier_end -end - -proc _compile_call(loca84: ^Byte, loca80: Word) -var - loca0: Word - loca4: Word - loca8: ^Byte - loca12: Word +# Detects if the passed character is a 7-bit alpha character or an underscore. +# +# Paramters: +# a0 - Tested character. +# +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +proc _is_alpha(); begin - loca12 := 0; + sw a0, 20(sp) - .Lcompile_call_paren; - _skip_spaces(); - loca8 := _current(); - if _front(loca8) = 0x29 then - goto .Lcompile_call_complete - end; + _is_upper(); + sw a0, 16(sp) - .Lcompile_call_argument; - _build_expression(0); + lw a0, 20(sp) + _is_lower(); - loca0 := 0x61207773; - _write_s(4, @loca0); - loca0 := 0x202c30; - _write_s(3, @loca0); + lw t0, 20(sp) + xori t1, t0, '_' + seqz t1, t1 - loca0 := -4 * loca12; - loca0 := loca0 + 60; - _write_i(loca0); + lw t0, 16(sp) + or a0, a0, t0 + or a0, a0, t1 +end; - loca0 := 0x29707328; - _write_s(4, @loca0); - _write_c(0x0a); - - _skip_spaces(); - loca8 := _current(); - loca0 := _front(loca8) = 0x2c; - if loca0 = 0 then - goto .Lcompile_call_paren - end; - - loca12 := loca12 + 1; - - _advance(1); - goto .Lcompile_call_argument; - - .Lcompile_call_complete; - loca12 := 0; - - .Lcompile_call_restore; - - loca0 := 0x6120776c; - _write_s(4, @loca0); - loca4 := 0x36202c30; - _write_s(4, @loca4); - loca4 := 0x70732830; - _write_s(4, @loca4); - loca4 := 0x0a29; - _write_s(2, @loca4); - - _write_s(4, @loca0); - loca4 := 0x35202c31; - _write_s(4, @loca4); - loca4 := 0x70732836; - _write_s(4, @loca4); - loca4 := 0x0a29; - _write_s(2, @loca4); - - _write_s(4, @loca0); - loca4 := 0x35202c32; - _write_s(4, @loca4); - loca4 := 0x70732832; - _write_s(4, @loca4); - loca4 := 0x0a29; - _write_s(2, @loca4); - - _write_s(4, @loca0); - loca4 := 0x34202c33; - _write_s(4, @loca4); - loca4 := 0x70732838; - _write_s(4, @loca4); - loca4 := 0x0a29; - _write_s(2, @loca4); - - _write_s(4, @loca0); - loca4 := 0x34202c34; - _write_s(4, @loca4); - loca4 := 0x70732834; - _write_s(4, @loca4); - loca4 := 0x0a29; - _write_s(2, @loca4); - - _write_s(4, @loca0); - loca4 := 0x34202c35; - _write_s(4, @loca4); - loca4 := 0x70732830; - _write_s(4, @loca4); - loca4 := 0x0a29; - _write_s(2, @loca4); - - loca0 := 0x6c6c6163; - _write_s(4, @loca0); - _write_c(0x20); - - _write_s(loca80, loca84); - _write_c(0x0a); - - _skip_spaces(); - _advance(1) -end - -proc _read_token() -var - loca0: Word - loca4: Word - loca8: ^Byte +# Detects whether the passed character is a digit +# (a value between 0 and 9). +# +# Parameters: +# a0 - Exemined value. +# +# Sets a0 to 1 if it is a digit, to 0 otherwise. +proc _is_digit(); begin - loca8 := _current(); - loca0 := _front(loca8); - loca4 := 0; + li t0, '0' - 1 + sltu t1, t0, a0 # t1 = a0 >= '0' - if loca0 = 0x2e then - goto .Ltoken_character_single - end; + sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' - if loca0 = 0x2c then - goto .Ltoken_character_single - end; + and a0, t1, t2 +end; - if loca0 = 0x3a then - goto .Ltoken_character_colon - end; - - if loca0 = 0x3b then - goto .Ltoken_character_single - end; - - if loca0 = 0x28 then - goto .Ltoken_character_single - end; - - if loca0 = 0x29 then - goto .Ltoken_character_single - end; - - if loca0 = 0x5b then - goto .Ltoken_character_single - end; - - if loca0 = 0x5d then - goto .Ltoken_character_single - end; - - if loca0 = 0x5e then - goto .Ltoken_character_single - end; - - if loca0 = 0x26 then - goto .Ltoken_character_single - end; - - if loca0 = 0x3d then - goto .Ltoken_character_single - end; - - if loca0 = 0x2b then - goto .Ltoken_character_single - end; - - if loca0 = 0x2d then - goto .Ltoken_character_single - end; - - if loca0 = 0x2a then - goto .Ltoken_character_single - end; - - if loca0 = 0x40 then - goto .Ltoken_character_single - end; - - .Ltoken_character_loop_do; - loca0 := loca8 + loca4; - loca0 := _front(loca0); - - if _is_alnum(loca0) then - loca4 := loca4 + 1; - goto .Ltoken_character_loop_do; - - .Ltoken_character_single; - loca4 := loca4 + 1; - goto .Ltoken_character_end; - - .Ltoken_character_colon; - loca0 := loca8 + 1; - loca0 := _front(loca0); - loca4 := loca4 + 1; - - if loca0 = 0x3d then - goto .Ltoken_character_single - end - end; - .Ltoken_character_end; - return loca4 -end - -proc _skip_spaces() -var - loca0: Byte - loca4: ^Byte +# Reads the next token. +# +# Returns token length in a0. +proc _read_token(); begin - .Lspace_loop_do; - loca4 := _current(); - loca0 := _front(loca4); + la t0, source_code_position # Token pointer. + lw t0, (t0) + sw t0, 20(sp) # Current token position. + sw zero, 16(sp) # Token length. - if loca0 = 0x20 then - goto .Lspace_loop_repeat - end; - if loca0 = 0x09 then - goto .Lspace_loop_repeat - end; - if loca0 = 0x0a then - goto .Lspace_loop_repeat - end; - if loca0 = 0x0d then - goto .Lspace_loop_repeat - end; +.read_token_loop: + lb t0, (t0) # Current character. - goto .Lspace_loop_end; - .Lspace_loop_repeat; - _advance(1); - goto .Lspace_loop_do; + # First we try to read a derictive. + # A derictive can contain a dot and characters. + li t1, '.' + beq t0, t1, .read_token_next - .Lspace_loop_end -end + lw a0, 20(sp) + lb a0, (a0) + _is_alpha(); + bnez a0, .read_token_next -proc _compile_assembly(loca84: Word) -var loca0: ^Byte + lw a0, 20(sp) + lb a0, (a0) + _is_digit(); + bnez a0, .read_token_next + + j .read_token_end + +.read_token_next: + # Advance the source code position and token length. + lw t0, 16(sp) + addi t0, t0, 1 + sw t0, 16(sp) + + lw t0, 20(sp) + addi t0, t0, 1 + sw t0, 20(sp) + + j .read_token_loop + +.read_token_end: + lw a0, 16(sp) +end; + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +proc _memcmp(); begin - loca0 := _current(); + mv t0, a0 + li a0, 0 - _write_s(loca84, loca0); - _advance(loca84); +.Lmemcmp_loop: + beqz a2, .Lmemcmp_end - _write_c(0xa); + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 - _advance(1) -end + bnez a0, .Lmemcmp_end -proc _compile_program() -var loca0: Word + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + j .Lmemcmp_loop + +.Lmemcmp_end: +end; + +# Advances the token stream by a0 bytes. +proc _advance_token(); begin - loca0 := 0x6f6c672e; - _write_s(4, @loca0); - loca0 := 0x206c6162; - _write_s(4, @loca0); - loca0 := 0x6174735f; - _write_s(4, @loca0); - loca0 := 0x0a7472; - _write_s(3, @loca0); + # Skip the .equ directive. + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 + sw t1, (t0) +end; - _advance(8) -end - -proc _compile_variable_section() -var - loca0: Word - loca4: ^Byte +# Prints the current token. +# +# Parameters: +# a0 - Token length. +# +# Returns a0 unchanged. +proc _write_token(); begin - loca0 := 0x6365732e; - _write_s(4, @loca0); - loca0 := 0x6e6f6974; - _write_s(4, @loca0); - loca0 := 0x73622e20; - _write_s(4, @loca0); - loca0 := 0x0a73; - _write_s(2, @loca0); + sw a0, 20(sp) - _advance(4); + la a0, source_code_position + lw a0, (a0) + lw a1, 20(sp) + _write(); - .Lcompile_variable_section_item; - _skip_spaces(); - loca4 := _current(); + lw a0, 20(sp) +end; - loca0 := 0x636f7270; - loca0 := _memcmp(@loca0, loca4, 4); - - if loca0 = 0 then - goto .Lcompile_variable_section_end - end; - _compile_variable(); - goto .Lcompile_variable_section_item; - - .Lcompile_variable_section_end -end - -proc _compile_variable() -var - loca0: Word - loca4: Word - loca8: Word - loca12: Word - loca16: ^Byte - loca20: Word - loca24: Word - loca28: ^Byte +proc _compile_section(); begin - loca24 := _read_token(); - loca28 := _current(); + # Print and skip the .section directive and a space after it. + li a0, KEYWORD_SECTION_SIZE + 1 + _write_token(); + _advance_token(); - _advance(loca24); + # Read the section name. + _read_token(); + addi a0, a0, 1 - _skip_spaces(); - _advance(1); + _write_token(); + _advance_token(); +end; - _skip_spaces(); - _advance(1); - - loca16 := _read_token(); - loca20 := _current(); - _advance(loca16); - - _skip_spaces(); - _advance(1); - - _skip_spaces(); - loca0 := _read_token(); - _advance(loca0); - - loca0 := 0x7079742e; - _write_s(4, @loca0); - loca0 := 0x2065; - _write_s(2, @loca0); - - _write_s(loca24, loca28); - - loca0 := 0x6f40202c; - _write_s(4, @loca0); - loca0 := 0x63656a62; - _write_s(4, @loca0); - loca0 := 0x0a74; - _write_s(2, @loca0); - - loca0 := 0x7a69732e; - _write_s(4, @loca0); - loca0 := 0x2065; - _write_s(2, @loca0); - - _write_s(loca24, loca28); - - loca0 := 0x202c; - _write_s(2, @loca0); - - _write_s(loca16, loca20); - _write_c(0x0a); - - _write_s(loca24, loca28); - - loca0 := 0x7a2e203a; - _write_s(4, @loca0); - loca0 := 0x206f7265; - _write_s(4, @loca0); - - _write_s(loca16, loca20); - - _write_c(0x0a) -end - -proc _compile_procedure() -var - loca0: Word - loca4: Word - loca8: Word - loca12: Word - loca16: Word - loca20: ^Byte - loca24: ^Byte +# Prints and skips a line. +proc _skip_comment(); begin - _advance(5); - loca16 := _read_token(); - loca20 := _current(); - _advance(loca16); + la t0, source_code_position + lw t1, (t0) - loca0 := 0x7079742e; - _write_s(4, @loca0); - loca0 := 0x2065; - _write_s(2, @loca0); +.skip_comment_loop: + # Check for newline character. + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end - _write_s(loca16, loca20); + # Advance the input string by one byte. + addi t1, t1, 1 + sw t1, (t0) - loca0 := 0x6640202c; - _write_s(4, @loca0); - loca0 := 0x74636e75; - _write_s(4, @loca0); - loca0 := 0x0a6e6f69; - _write_s(4, @loca0); + j .skip_comment_loop - _write_s(loca16, loca20); +.skip_comment_end: + # Skip the newline. + addi t1, t1, 1 + sw t1, (t0) +end; - loca0 := 0x0a3a; - _write_s(2, @loca0); - - _skip_spaces(); - _advance(1); - _skip_spaces(); - _advance(1); - - loca12 := 0x6e; - loca8 := 0x69676562; - - .Lcompile_procedure_begin; - _skip_spaces(); - loca0 := _read_token(); - - loca24 := _current(); - _advance(loca0); - loca0 := _token_compare(loca24, loca0, @loca8); - - if loca0 = 1 then - goto .Lcompile_procedure_begin - end; - - loca0 := 0x69646461; - _write_s(4, @loca0); - - loca0 := 0x2c707320; - _write_s(4, @loca0); - _write_s(4, @loca0); - - loca0 := 0x0a36392d; - _write_s(4, @loca0); - - loca0 := 0x72207773; - _write_s(4, @loca0); - loca0 := 0x39202c61; - _write_s(4, @loca0); - loca0 := 0x70732832; - _write_s(4, @loca0); - loca0 := 0x0a29; - _write_s(2, @loca0); - - loca0 := 0x73207773; - _write_s(4, @loca0); - loca0 := 0x38202c30; - _write_s(4, @loca0); - loca0 := 0x70732838; - _write_s(4, @loca0); - loca0 := 0x0a29; - _write_s(2, @loca0); - - loca0 := 0x69646461; - _write_s(4, @loca0); - loca0 := 0x2c307320; - _write_s(4, @loca0); - loca0 := 0x2c707320; - _write_s(4, @loca0); - loca0 := 0x0a363920; - _write_s(4, @loca0); - - loca0 := 0x61207773; - _write_s(4, @loca0); - loca4 := 0x38202c30; - _write_s(4, @loca4); - loca8 := 0x70732834; - _write_s(4, @loca8); - loca12 := 0x0a29; - _write_s(2, @loca12); - - _write_s(4, @loca0); - loca4 := 0x38202c31; - _write_s(4, @loca4); - loca8 := 0x70732830; - _write_s(4, @loca8); - _write_s(2, @loca12); - - _write_s(4, @loca0); - loca4 := 0x37202c32; - _write_s(4, @loca4); - loca8 := 0x70732836; - _write_s(4, @loca8); - _write_s(2, @loca12); - - _write_s(4, @loca0); - loca4 := 0x37202c33; - _write_s(4, @loca4); - loca8 := 0x70732832; - _write_s(4, @loca8); - _write_s(2, @loca12); - - _write_s(4, @loca0); - loca4 := 0x36202c34; - _write_s(4, @loca4); - loca8 := 0x70732838; - _write_s(4, @loca8); - _write_s(2, @loca12); - - _write_s(4, @loca0); - loca4 := 0x36202c35; - _write_s(4, @loca4); - loca8 := 0x70732838; - _write_s(4, @loca8); - _write_s(2, @loca12); - - .Lcompile_procedure_body; - _skip_spaces(); - loca12 := _read_line(); - loca8 := 0x0a646e65; - loca24 := _current(); - loca8 := _memcmp(loca24, @loca8, 4); - - if loca8 = 0 then - goto .Lcompile_procedure_end - end; - - _compile_line(loca12); - goto .Lcompile_procedure_body; - - .Lcompile_procedure_end; - _advance(4); - - loca0 := 0x7220776c; - _write_s(4, @loca0); - loca0 := 0x39202c61; - _write_s(4, @loca0); - loca0 := 0x70732832; - _write_s(4, @loca0); - loca0 := 0x0a29; - _write_s(2, @loca0); - - loca0 := 0x7320776c; - _write_s(4, @loca0); - loca0 := 0x38202c30; - _write_s(4, @loca0); - loca0 := 0x70732838; - _write_s(4, @loca0); - loca0 := 0x0a29; - _write_s(2, @loca0); - - loca0 := 0x69646461; - _write_s(4, @loca0); - - loca0 := 0x2c707320; - _write_s(4, @loca0); - _write_s(4, @loca0); - - loca0 := 0x0a3639; - _write_s(4, @loca0); - - loca0 := 0x0a746572; - _write_s(4, @loca0) -end - -proc _token_compare(loca84: ^Byte, loca80: Word, loca76: ^Byte) -var - loca0: Bool - loca4: Byte - loca8: Word - loca12: Byte +# Prints and skips a line. +proc _compile_line(); begin - .Ltoken_compare_loop; - loca4 := _front(loca76); +.compile_line_loop: + la a0, source_code_position + lw a1, (a0) - loca8 := loca4 or loca80; - if loca8 = 0 then - goto .Ltoken_compare_equal - end; - if loca80 = 0 then - goto .Ltoken_compare_not_equal - end; - if loca4 = 0 then - goto .Ltoken_compare_not_equal - end; - loca12 := _front(loca84); - if loca4 = loca12 then - goto .Ltoken_compare_continue - end; - goto .Ltoken_compare_not_equal; + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end - .Ltoken_compare_continue; + # Print a character. + lw a0, (a1) + _write_c(); - loca84 := loca84 + 1; - loca80 := loca80 - 1; - loca76 := loca76 + 1; - goto .Ltoken_compare_loop; + # Advance the input string by one byte. + li a0, 1 + _advance_token(); - .Ltoken_compare_not_equal; - loca0 := 1; - goto .Ltoken_compare_end; + j .compile_line_loop - .Ltoken_compare_equal; - loca0 := 0; +.compile_line_end: + li a0, '\n' + _write_c(); - .Ltoken_compare_end; - return loca0 -end + li a0, 1 + _advance_token(); +end; -proc _compile_goto() -var - loca0: Word - loca4: Word - loca8: ^Byte +proc _compile_integer_literal(); begin - _advance(4); + la a0, asm_li + _write_z(); - loca0 := 0x206a; - _write_s(2, @loca0); + la a0, asm_a0 + _write_z(); - _skip_spaces(); - loca8 := _current(); - _advance(1); + la a0, asm_comma + _write_z(); - loca0 := _read_token(); - _advance(loca0); - loca0 := loca0 + 1; - _write_s(loca0, loca8); + la a0, source_code_position + lw a0, (a0) + li a1, 1 + _write(); - _advance(1); - _write_c(0x0a) -end + li a0, '\n' + _write_c(); -proc _compile_label(loca84: Word) -var - loca0: Word - loca4: Word - loca8: ^Byte + li a0, 1 + _advance_token(); +end; + +proc _compile_character_literal(); begin - loca0 := _current(); + la a0, asm_li + _write_z(); - loca0 := loca0 + loca84; - loca0 := loca0 - 1; - loca4 := loca84; + la a0, asm_a0 + _write_z(); - loca0 := _front(loca0); - if loca0 = 0x3b then - loca4 := loca4 - 1 - end; - loca8 := _current(); - _write_s(loca4, loca8); + la a0, asm_comma + _write_z(); - _write_c(0x3a); - _write_c(0x0a); +.compile_character_literal_loop: + la a0, source_code_position + lw a0, (a0) + li a1, 1 + _write(); + li a0, 1 + _advance_token(); - _advance(loca84) -end + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + li t1, '\'' + beq a0, t1, .compile_character_literal_end -proc _compile_return() + _write_c(); + li a0, 1 + _advance_token(); + + j .compile_character_literal_loop + +.compile_character_literal_end: + li a0, '\'' + _write_c(); + + li a0, '\n' + _write_c(); + + li a0, 2 + _advance_token(); +end; + +proc _compile_variable_expression(); begin - _advance(6); - _skip_spaces(); - _build_binary_expression() -end + la a0, asm_lw + _write_z(); -proc _compile_if() -var - loca0: Word - loca4: ^Byte - loca8: Word - loca12: Word - loca16: Word - loca20: Word - loca24: Word + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + la a0, source_code_position + lw a0, (a0) + addi a0, a0, 1 + li a1, 2 + _write(); + + la a0, asm_sp + _write_z(); + + li a0, '\n' + _write_c(); + + li a0, 3 + _advance_token(); + +end; + +proc _compile_expression(); begin - _advance(2); - _skip_spaces(); + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) - _build_binary_expression(); + li t1, '\'' + beq a0, t1, .compile_expression_character_literal - _skip_spaces(); - _advance(4); + li t1, 'v' + beq a0, t1, .compile_expression_variable - loca20 := 0x00646e65; - loca16 := 0x66694c2e; + _is_digit(); + bnez a0, .compile_expression_integer_literal - loca12 := 0x7a716562; - _write_s(4, @loca12); - loca12 := 0x2c306120; - _write_s(4, @loca12); - _write_c(0x20); + j .compile_expression_end - loca24 := _label_counter(1); - _write_s(4, @loca16); - _write_i(loca24); +.compile_expression_character_literal: + _compile_character_literal(); + j .compile_expression_end - _write_c(0x0a); +.compile_expression_integer_literal: + _compile_integer_literal(); + j .compile_expression_end - .Lcompile_if_loop; - _skip_spaces(); - loca12 := _read_token(); +.compile_expression_variable: + _compile_variable_expression(); + j .compile_expression_end; - loca4 := _current(); - loca8 := _token_compare(loca4, loca12, @loca20); +.compile_expression_end: +end; - if loca8 then - loca12 := _read_line(); - _compile_line(loca12, 1); - - goto .Lcompile_if_loop - end; - - _write_s(4, @loca16); - _write_i(loca24); - - loca12 := 0x0a3a0a3a; - _write_s(2, @loca12); - - _advance(4) -end - -proc _compile_line(loca84: Word, loca80: Bool) -var - loca0: Char - loca4: Int - loca8: Bool - loca12: Word - loca16: ^Byte +proc _compile_call(); begin - if loca84 = 0 then - goto .Lcompile_line_empty - end; + _read_token(); + sw a0, 20(sp) + la t0, source_code_position + lw t0, (t0) + sw t0, 16(sp) - loca16 := _current(); - loca0 := _front(loca16); + # Skip the identifier and left paren. + addi a0, a0, 1 + _advance_token(); - loca12 := 0x676f7270; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_program - end; + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) - loca12 := 0x0a726176; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_var - end; + li t1, ')' + beq t0, t1, .compile_call_finalize - loca12 := 0x636f7270; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_procedure - end; + _compile_expression(); - loca12 := 0x69676562; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_begin - end; +.compile_call_finalize: + la a0, asm_call + _write_z(); - loca12 := 0x2e646e65; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_exit - end; + lw a0, 16(sp) + lw a1, 20(sp) + _write(); - loca12 := 0x61636f6c; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_identifier - end; - loca4 := _front(loca16); - if loca4 = 0x73 then - goto .Lcompile_line_identifier - end; + # Skip the right paren. + li a0, 1 + _advance_token(); +end; - loca12 := 0x6f706d69; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_import - end; +proc _compile_goto(); +begin + li a0, 5 + _advance_token(); - loca12 := 0x6f746f67; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_goto - end; + _read_token(); + sw a0, 20(sp) - loca12 := 0x75746572; - loca4 := _memcmp(loca16, @loca12, 4); - if loca4 = 0 then - goto .Lcompile_line_return - end; + la a0, asm_j + _write_z(); - loca12 := 0x6669; - loca4 := _memcmp(loca16, @loca12, 2); - if loca4 = 0 then - goto .Lcompile_line_if - end; + lw a0, 20(sp) + _write_token(); + _advance_token(); +end; - if loca0 = 0x2e then - goto .Lcompile_line_label - end; - if loca0 = 0x5f then - goto .Lcompile_line_identifier - end; - goto .Lcompile_line_unchanged; +proc _compile_statement(); +begin + # This is a call if the statement starts with an underscore. + la t0, source_code_position + lw t0, (t0) + # First character after alignment tab. + addi t0, t0, 1 + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call - .Lcompile_line_if; - _compile_if(); - goto .Lcompile_line_section; + li t1, 'g' + beq t0, t1, .compile_statement_goto - .Lcompile_line_label; - _compile_label(loca84); - goto .Lcompile_line_section; + _compile_line(); + j .compile_statement_end - .Lcompile_line_return; - _compile_return(); - goto .Lcompile_line_section; +.compile_statement_call: + li a0, 1 + _advance_token(); + _compile_call(); - .Lcompile_line_goto; + j .compile_statement_semicolon + +.compile_statement_goto: + li a0, 1 + _advance_token(); _compile_goto(); - goto .Lcompile_line_section; - .Lcompile_line_import; - _compile_import(); - goto .Lcompile_line_section; + j .compile_statement_semicolon - .Lcompile_line_identifier; - _compile_identifier(); - goto .Lcompile_line_section; +.compile_statement_semicolon: + li a0, 2 + _advance_token(); - .Lcompile_line_exit; - _compile_exit(); - goto .Lcompile_line_section; + li a0, '\n' + _write_c(); - .Lcompile_line_begin; +.compile_statement_end: +end; - if loca80 = 1 then - goto .Lcompile_line_compile_entry - end; - _compile_text_section(); - .Lcompile_line_compile_entry; - _compile_entry_point(); - loca8 := 1; - goto .Lcompile_line_end; +proc _compile_procedure_body(); +begin +.compile_procedure_body_loop: + la a0, source_code_position + lw a0, (a0) + la a1, keyword_end + li a2, KEYWORD_END_SIZE + _memcmp(); - .Lcompile_line_procedure; - if loca80 = 1 then - goto .Lcompile_line_compile_procedure - end; - _compile_text_section(); - .Lcompile_line_compile_procedure; + beqz a0, .compile_procedure_body_epilogue + + _compile_statement(); + j .compile_procedure_body_loop + +.compile_procedure_body_epilogue: +end; + +proc _compile_procedure(); +begin + # Skip "proc ". + li a0, KEYWORD_PROC_SIZE + _advance_token(); + + _read_token(); + sw a0, 20(sp) # Save the procedure name length. + + # Write .type _procedure_name, @function. + la a0, asm_type_directive + _write_z(); + + lw a0, 20(sp) + _write_token(); + + la a0, asm_type_function + _write_z(); + + # Write procedure label, _procedure_name: + lw a0, 20(sp) + _write_token(); + + la a0, asm_colon + _write_z(); + + # Skip the function name and trailing parens, semicolon, "begin" and newline. + lw a0, 20(sp) + addi a0, a0, KEYWORD_BEGIN_SIZE + 1 + 4 + _advance_token(); + + la a0, asm_prologue + _write_z(); + + _compile_procedure_body(); + + # Write the epilogue. + la a0, asm_epilogue + _write_z(); + + li a0, KEYWORD_END_SIZE + 2 + _advance_token(); +end; + +proc _compile_type(); +begin + # Print and skip the .type directive and a space after it. + li a0, KEYWORD_TYPE_SIZE + 1 + _write_token(); + _advance_token(); + + # Read and print the symbol name. + _read_token(); + sw a0, 20(sp) + + # Print and skip the symbol name, comma, space and @. + lw a0, 20(sp) + addi a0, a0, 3 + _write_token(); + _advance_token(); + + # Read the symbol type. + _read_token(); + sw a0, 16(sp) + la t0, source_code_position + lw t0, (t0) + sw t0, 12(sp) + + # Print the symbol type and newline. + lw a0, 16(sp) + addi a0, a0, 1 + _write_token(); + _advance_token(); + + # Write the object definition itself. + _compile_line(); + +.compile_type_end: +end; + +proc _compile_equ(); +begin + # Print and skip the .equ directive and a space after it. + li a0, KEYWORD_EQU_SIZE + 1 + _write_token(); + _advance_token(); + + # Read and print the constant name. + _read_token(); + sw a0, 20(sp) + + # Print and skip the constant name, comma and space. + lw a0, 20(sp) + addi a0, a0, 2 + _write_token(); + _advance_token(); + + # Read the constant value. + _read_token(); + sw a0, 16(sp) + + # Print and skip the constant value and newline. + lw a0, 16(sp) + addi a0, a0, 1 + _write_token(); + _advance_token(); +end; + +proc _skip_newlines(); +begin + # Skip newlines. + la t0, source_code_position + lw t1, (t0) + +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end + + addi t1, t1, 1 + sw t1, (t0) + + j .skip_newlines_loop + +.skip_newlines_end: +end; + +# Process the source code and print the generated code. +proc _compile(); +begin +.compile_loop: + _skip_newlines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_end + li t1, '#' + beq t0, t1, .compile_comment + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_equ + li a2, KEYWORD_EQU_SIZE + _memcmp(); + + beqz a0, .compile_equ + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_section + li a2, KEYWORD_SECTION_SIZE + _memcmp(); + + beqz a0, .compile_section + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_type + li a2, KEYWORD_TYPE_SIZE + _memcmp(); + + beqz a0, .compile_type + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_proc + li a2, KEYWORD_PROC_SIZE + _memcmp(); + + beqz a0, .compile_procedure + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_global + li a2, KEYWORD_GLOBAL_SIZE + _memcmp(); + + beqz a0, .compile_global + # Not a known token, exit. + j .compile_end + +.compile_equ: + _compile_equ(); + + j .compile_loop + +.compile_section: + _compile_section(); + + j .compile_loop + +.compile_type: + _compile_type(); + + j .compile_loop + +.compile_global: + _compile_line(); + + j .compile_loop + +.compile_comment: + _skip_comment(); + + j .compile_loop + +.compile_procedure: _compile_procedure(); - loca8 := 1; - goto .Lcompile_line_end; - .Lcompile_line_var; - _compile_variable_section(); - goto .Lcompile_line_section; + j .compile_loop - .Lcompile_line_program; - _compile_program(); - goto .Lcompile_line_section; +.compile_end: +end; - .Lcompile_line_empty; - _advance(1); - goto .Lcompile_line_section; - - .Lcompile_line_unchanged; - _compile_assembly(loca84); - goto .Lcompile_line_section; - - .Lcompile_line_section; - loca8 := 0; - - .Lcompile_line_end; - _skip_spaces(); - - return loca8 -end - -proc _compile_text_section() -var loca0: Word +# Entry point. +.globl _start +proc _start(); begin - loca0 := 0x6365732e; - _write_s(4, @loca0); - loca0 := 0x6e6f6974; - _write_s(4, @loca0); - loca0 := 0x65742e20; - _write_s(4, @loca0); - loca0 := 0x0a7478; - _write_s(3, @loca0) -end + # Read the source from the standard input. + la a0, source_code + li a1, SOURCE_BUFFER_SIZE # Buffer size. + _read_file(); + _compile(); -proc _compile_entry_point() -var loca0: Word -begin - loca0 := 0x7079742e; - _write_s(4, @loca0); - loca0 := 0x735f2065; - _write_s(4, @loca0); - loca0 := 0x74726174; - _write_s(4, @loca0); - loca0 := 0x6640202c; - _write_s(4, @loca0); - loca0 := 0x74636e75; - _write_s(4, @loca0); - loca0 := 0x0a6e6f69; - _write_s(4, @loca0); - loca0 := 0x6174735f; - _write_s(4, @loca0); - loca0 := 0x0a3a7472; - _write_s(4, @loca0); - - _advance(6) -end - -proc _compile_exit() -var loca0: Word -begin - loca0 := 0x6120696c; - _write_s(4, @loca0); - loca0 := 0x30202c30; - _write_s(4, @loca0); - loca0 := 0x20696c0a; - _write_s(4, @loca0); - loca0 := 0x202c3761; - _write_s(4, @loca0); - loca0 := 0x650a3339; - _write_s(4, @loca0); - loca0 := 0x6c6c6163; - _write_s(4, @loca0); - loca0 := 0x0a; - _write_s(1, @loca0); - - _advance(4); - _skip_spaces() -end - -proc _read_line() -var - loca0: ^Byte - loca4: Byte -begin - loca0 := _current(); - - .Lread_line_do; - loca4 := _front(loca0); - if loca4 = 0 then - goto .Lread_line_end - end; - if loca4 = 0x0a then - goto .Lread_line_end - end; - loca0 := loca0 + 1; - goto .Lread_line_do; - - .Lread_line_end; - loca4 := _current(); - return loca0 - loca4 -end - -proc _compile() -var - loca0: Word - loca4: Word - loca8: Bool - loca12: Char - loca16: ^Byte -begin - loca4 := 0; - - .Lcompile_do; - loca16 := _current(); - loca12 := _front(loca16); - - if loca12 = 0 then - goto .Lcompile_end - end; - - _skip_spaces(); - loca0 := _read_line(); - loca8 := _compile_line(loca0, loca4); - - if loca8 = 0 then - goto .Lcompile_do - end; - loca4 := loca4 or loca8; - - goto .Lcompile_do; - .Lcompile_end -end - -proc _front(loca84: ^Word) -begin - return _get(loca84) & 0xff -end - -proc _main() -begin - _read_file(source_code, 81920); - - _label_counter(0) -end - -begin - _main(); - _compile() -end. + # Call exit. + li a0, 0 # Use 0 return code. + li a7, 93 # SYS_EXIT. + ecall +end; diff --git a/boot/stage3.elna b/boot/stage3.elna new file mode 100644 index 0000000..e85f498 --- /dev/null +++ b/boot/stage3.elna @@ -0,0 +1,842 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +# Stage2 compiler. +# +# It supports declaring and calling procedures without arguments. +# A procedure name should start with an underscore. + +.section .rodata + +.type keyword_equ, @object +keyword_equ: .ascii ".equ" +.equ KEYWORD_EQU_SIZE, 4 + +.type keyword_section, @object +keyword_section: .ascii ".section" +.equ KEYWORD_SECTION_SIZE, 8 + +.type keyword_type, @object +keyword_type: .ascii ".type" +.equ KEYWORD_TYPE_SIZE, 5 + +.type keyword_ret, @object +keyword_ret: .ascii "ret" +.equ KEYWORD_RET_SIZE, 3 + +.type keyword_global, @object +keyword_global: .ascii ".globl" +.equ KEYWORD_GLOBAL_SIZE, 6 + +.type keyword_proc, @object +keyword_proc: .ascii "proc " +.equ KEYWORD_PROC_SIZE, 5 + +.type keyword_end, @object +keyword_end: .ascii "end" +.equ KEYWORD_END_SIZE, 3 + +.type keyword_begin, @object +keyword_begin: .ascii "begin" +.equ KEYWORD_BEGIN_SIZE, 5 + +.type keyword_var, @object +keyword_var: .ascii "var" +.equ KEYWORD_VAR_SIZE, 3 + +.type asm_prologue, @object +asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" + +.type asm_epilogue, @object +asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" + +.type asm_type_directive, @object +asm_type_directive: .string ".type " + +.type asm_type_function, @object +asm_type_function: .string ", @function\n" + +.type asm_colon, @object +asm_colon: .string ":\n" + +.type asm_call, @object +asm_call: .string "\tcall " + +.type asm_j, @object +asm_j: .string "\tj " + +.type asm_li, @object +asm_li: .string "\tli " + +.type asm_lw, @object +asm_lw: .string "\tlw " + +.type asm_t0, @object +asm_t0: .string "t0" + +.type asm_a0, @object +asm_a0: .string "a0" + +.type asm_comma, @object +asm_comma: .string ", " + +.type asm_sp, @object +asm_sp: .string "(sp)" + +.section .bss + +.equ SOURCE_BUFFER_SIZE, 81920 +.type source_code, @object +source_code: .zero SOURCE_BUFFER_SIZE + +.section .data + +.type source_code_position, @object +source_code_position: .word source_code + +.section .text + +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the amount of bytes written in a0. +proc _read_file(); +begin + mv a2, a1 + mv a1, a0 + # STDIN. + li a0, 0 + li a7, 63 # SYS_READ. + ecall +end; + +# Writes to the standard output. +# +# Parameters: +# a0 - Buffer. +# a1 - Buffer length. +proc _write(); +begin + mv a2, a1 + mv a1, a0 + # STDOUT. + li a0, 1 + li a7, 64 # SYS_WRITE. + ecall +end; + +# Writes a character from a0 into the standard output. +proc _write_c(); +begin + sb a0, 20(sp) + addi a0, sp, 20 + li a1, 1 + _write(); +end; + +# Write null terminated string. +# +# Parameters: +# a0 - String. +proc _write_z(); +begin + sw a0, 20(sp) + +.write_z_loop: + # Check for 0 character. + lb a0, (a0) + beqz a0, .write_z_end + + # Print a character. + lw a0, 20(sp) + lb a0, (a0) + _write_c(); + + # Advance the input string by one byte. + lw a0, 20(sp) + addi a0, a0, 1 + sw a0, 20(sp) + + goto .write_z_loop; + +.write_z_end: +end; + +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_upper(); +begin + li t0, 'A' - 1 + sltu t1, t0, a0 # t1 = a0 >= 'A' + + sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' + and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' +end; + +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_lower(); +begin + li t0, 'a' - 1 + sltu t2, t0, a0 # t2 = a0 >= 'a' + + sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' + and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' +end; + +# Detects if the passed character is a 7-bit alpha character or an underscore. +# +# Paramters: +# a0 - Tested character. +# +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +proc _is_alpha(); +begin + sw a0, 20(sp) + + _is_upper(); + sw a0, 16(sp) + + _is_lower(v20); + + lw t0, 20(sp) + xori t1, t0, '_' + seqz t1, t1 + + lw t0, 16(sp) + or a0, a0, t0 + or a0, a0, t1 +end; + +# Detects whether the passed character is a digit +# (a value between 0 and 9). +# +# Parameters: +# a0 - Exemined value. +# +# Sets a0 to 1 if it is a digit, to 0 otherwise. +proc _is_digit(); +begin + li t0, '0' - 1 + sltu t1, t0, a0 # t1 = a0 >= '0' + + sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' + + and a0, t1, t2 +end; + +# Reads the next token. +# +# Returns token length in a0. +proc _read_token(); +begin + la t0, source_code_position # Token pointer. + lw t0, (t0) + sw t0, 20(sp) # Current token position. + sw zero, 16(sp) # Token length. + +.read_token_loop: + lb t0, (t0) # Current character. + + # First we try to read a derictive. + # A derictive can contain a dot and characters. + li t1, '.' + beq t0, t1, .read_token_next + + lw a0, 20(sp) + lb a0, (a0) + _is_alpha(); + bnez a0, .read_token_next + + lw a0, 20(sp) + lb a0, (a0) + _is_digit(); + bnez a0, .read_token_next + + goto .read_token_end; + +.read_token_next: + # Advance the source code position and token length. + lw t0, 16(sp) + addi t0, t0, 1 + sw t0, 16(sp) + + lw t0, 20(sp) + addi t0, t0, 1 + sw t0, 20(sp) + + goto .read_token_loop; + +.read_token_end: + lw a0, 16(sp) +end; + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +proc _memcmp(); +begin + mv t0, a0 + li a0, 0 + +.Lmemcmp_loop: + beqz a2, .Lmemcmp_end + + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 + + bnez a0, .Lmemcmp_end + + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .Lmemcmp_loop; + +.Lmemcmp_end: +end; + +# Advances the token stream by a0 bytes. +proc _advance_token(); +begin + # Skip the .equ directive. + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 + sw t1, (t0) +end; + +# Prints the current token. +# +# Parameters: +# a0 - Token length. +# +# Returns a0 unchanged. +proc _write_token(); +begin + sw a0, 20(sp) + + la a0, source_code_position + lw a0, (a0) + lw a1, 20(sp) + _write(); + + lw a0, 20(sp) +end; + +proc _compile_section(); +begin + # Print and skip the .section directive and a space after it. + li a0, KEYWORD_SECTION_SIZE + 1 + _write_token(); + _advance_token(); + + # Read the section name. + _read_token(); + addi a0, a0, 1 + + _write_token(); + _advance_token(); +end; + +# Prints and skips a line. +proc _skip_comment(); +begin + la t0, source_code_position + lw t1, (t0) + +.skip_comment_loop: + # Check for newline character. + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end + + # Advance the input string by one byte. + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_comment_loop; + +.skip_comment_end: + # Skip the newline. + addi t1, t1, 1 + sw t1, (t0) +end; + +# Prints and skips a line. +proc _compile_line(); +begin +.compile_line_loop: + la a0, source_code_position + lw a1, (a0) + + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end + + # Print a character. + lw a0, (a1) + _write_c(); + + # Advance the input string by one byte. + _advance_token(1); + + goto .compile_line_loop; + +.compile_line_end: + _write_c('\n'); + + _advance_token(1); +end; + +proc _compile_integer_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + la a0, source_code_position + lw a0, (a0) + li a1, 1 + _write(); + + _write_c('\n'); + + _advance_token(1); +end; + +proc _compile_character_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + +.compile_character_literal_loop: + la a0, source_code_position + lw a0, (a0) + li a1, 1 + _write(); + _advance_token(1); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + li t1, '\'' + beq a0, t1, .compile_character_literal_end + + _write_c(); + _advance_token(1); + + goto .compile_character_literal_loop; + +.compile_character_literal_end: + li a0, '\'' + _write_c(); + + _write_c('\n'); + + _advance_token(2); +end; + +proc _compile_variable_expression(); +begin + la a0, asm_lw + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + la a0, source_code_position + lw a0, (a0) + addi a0, a0, 1 + li a1, 2 + _write(); + + la a0, asm_sp + _write_z(); + + _write_c('\n'); + + _advance_token(3); + +end; + +proc _compile_expression(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, '\'' + beq a0, t1, .compile_expression_character_literal + + li t1, 'v' + beq a0, t1, .compile_expression_variable + + _is_digit(); + bnez a0, .compile_expression_integer_literal + + goto .compile_expression_end; + +.compile_expression_character_literal: + _compile_character_literal(); + goto .compile_expression_end; + +.compile_expression_integer_literal: + _compile_integer_literal(); + goto .compile_expression_end; + +.compile_expression_variable: + _compile_variable_expression(); + goto .compile_expression_end;; + +.compile_expression_end: +end; + +proc _compile_call(); +begin + _read_token(); + sw a0, 20(sp) + la t0, source_code_position + lw t0, (t0) + sw t0, 16(sp) + + # Skip the identifier and left paren. + addi a0, a0, 1 + _advance_token(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ')' + beq t0, t1, .compile_call_finalize + + _compile_expression(); + +.compile_call_finalize: + la a0, asm_call + _write_z(); + + lw a0, 16(sp) + lw a1, 20(sp) + _write(); + + # Skip the right paren. + _advance_token(1); +end; + +proc _compile_goto(); +begin + _advance_token(5); + + _read_token(); + sw a0, 20(sp) + + la a0, asm_j + _write_z(); + + _write_token(v20); + _advance_token(); +end; + +proc _compile_statement(); +begin + # This is a call if the statement starts with an underscore. + la t0, source_code_position + lw t0, (t0) + # First character after alignment tab. + addi t0, t0, 1 + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call + + li t1, 'g' + beq t0, t1, .compile_statement_goto + + _compile_line(); + goto .compile_statement_end; + +.compile_statement_call: + _advance_token(1); + _compile_call(); + + goto .compile_statement_semicolon; + +.compile_statement_goto: + _advance_token(1); + _compile_goto(); + + goto .compile_statement_semicolon; + +.compile_statement_semicolon: + _advance_token(2); + + _write_c('\n'); + +.compile_statement_end: +end; + +proc _compile_procedure_body(); +begin +.compile_procedure_body_loop: + la a0, source_code_position + lw a0, (a0) + la a1, keyword_end + li a2, KEYWORD_END_SIZE + _memcmp(); + + beqz a0, .compile_procedure_body_epilogue + + _compile_statement(); + goto .compile_procedure_body_loop; + +.compile_procedure_body_epilogue: +end; + +proc _compile_procedure(); +begin + # Skip "proc ". + li a0, KEYWORD_PROC_SIZE + _advance_token(); + + _read_token(); + sw a0, 20(sp) # Save the procedure name length. + + # Write .type _procedure_name, @function. + la a0, asm_type_directive + _write_z(); + + _write_token(v20); + + la a0, asm_type_function + _write_z(); + + # Write procedure label, _procedure_name: + _write_token(v20); + + la a0, asm_colon + _write_z(); + + # Skip the function name and trailing parens, semicolon, "begin" and newline. + lw a0, 20(sp) + addi a0, a0, KEYWORD_BEGIN_SIZE + 1 + 4 + _advance_token(); + + la a0, asm_prologue + _write_z(); + + _compile_procedure_body(); + + # Write the epilogue. + la a0, asm_epilogue + _write_z(); + + li a0, KEYWORD_END_SIZE + 2 + _advance_token(); +end; + +proc _compile_type(); +begin + # Print and skip the .type directive and a space after it. + li a0, KEYWORD_TYPE_SIZE + 1 + _write_token(); + _advance_token(); + + # Read and print the symbol name. + _read_token(); + sw a0, 20(sp) + + # Print and skip the symbol name, comma, space and @. + lw a0, 20(sp) + addi a0, a0, 3 + _write_token(); + _advance_token(); + + # Read the symbol type. + _read_token(); + sw a0, 16(sp) + la t0, source_code_position + lw t0, (t0) + sw t0, 12(sp) + + # Print the symbol type and newline. + lw a0, 16(sp) + addi a0, a0, 1 + _write_token(); + _advance_token(); + + # Write the object definition itself. + _compile_line(); + +.compile_type_end: +end; + +proc _compile_equ(); +begin + # Print and skip the .equ directive and a space after it. + li a0, KEYWORD_EQU_SIZE + 1 + _write_token(); + _advance_token(); + + # Read and print the constant name. + _read_token(); + sw a0, 20(sp) + + # Print and skip the constant name, comma and space. + lw a0, 20(sp) + addi a0, a0, 2 + _write_token(); + _advance_token(); + + # Read the constant value. + _read_token(); + sw a0, 16(sp) + + # Print and skip the constant value and newline. + lw a0, 16(sp) + addi a0, a0, 1 + _write_token(); + _advance_token(); +end; + +proc _skip_newlines(); +begin + # Skip newlines. + la t0, source_code_position + lw t1, (t0) + +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end + + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_newlines_loop; + +.skip_newlines_end: +end; + +# Process the source code and print the generated code. +proc _compile(); +begin +.compile_loop: + _skip_newlines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_end + li t1, '#' + beq t0, t1, .compile_comment + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_equ + li a2, KEYWORD_EQU_SIZE + _memcmp(); + + beqz a0, .compile_equ + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_section + li a2, KEYWORD_SECTION_SIZE + _memcmp(); + + beqz a0, .compile_section + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_type + li a2, KEYWORD_TYPE_SIZE + _memcmp(); + + beqz a0, .compile_type + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_proc + li a2, KEYWORD_PROC_SIZE + _memcmp(); + + beqz a0, .compile_procedure + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_global + li a2, KEYWORD_GLOBAL_SIZE + _memcmp(); + + beqz a0, .compile_global + # Not a known token, exit. + goto .compile_end; + +.compile_equ: + _compile_equ(); + + goto .compile_loop; + +.compile_section: + _compile_section(); + + goto .compile_loop; + +.compile_type: + _compile_type(); + + goto .compile_loop; + +.compile_global: + _compile_line(); + + goto .compile_loop; + +.compile_comment: + _skip_comment(); + + goto .compile_loop; + +.compile_procedure: + _compile_procedure(); + + goto .compile_loop; + +.compile_end: +end; + +# Entry point. +.globl _start +proc _start(); +begin + # Read the source from the standard input. + la a0, source_code + li a1, SOURCE_BUFFER_SIZE # Buffer size. + _read_file(); + _compile(); + + # Call exit. + li a0, 0 # Use 0 return code. + li a7, 93 # SYS_EXIT. + ecall +end; diff --git a/boot/test.elna b/boot/test.elna deleted file mode 100644 index e56547d..0000000 --- a/boot/test.elna +++ /dev/null @@ -1,14 +0,0 @@ -program - -proc main(x: Word, y: Word) -begin - _write_s(4, @x); - _write_s(4, @y); - - y := 0x0a2c3063; - _write_s(4, @y) -end - -begin - main(0x0a2c3061, 0x0a2c3062) -end. diff --git a/rakelib/stage.rake b/rakelib/stage.rake deleted file mode 100644 index 6f61cae..0000000 --- a/rakelib/stage.rake +++ /dev/null @@ -1,61 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public License, -# v. 2.0. If a copy of the MPL was not distributed with this file, You can -# obtain one at https://mozilla.org/MPL/2.0/. -} -# frozen_string_literal: true - -CROSS_GCC = 'build/rootfs/bin/riscv32-unknown-linux-gnu-gcc' -SYSROOT = 'build/sysroot' -QEMU = 'qemu-riscv32' - -def assemble_stage(output, compiler, source) - arguments = [QEMU, '-L', SYSROOT, *compiler] - - puts Term::ANSIColor.green(arguments * ' ') - puts - Open3.popen2(*arguments) do |qemu_in, qemu_out| - qemu_in.write File.read(*source) - qemu_in.close - - IO.copy_stream qemu_out, output - qemu_out.close - end -end - -library = [] - -Dir.glob('boot/*.s').each do |assembly_source| - source_basename = Pathname.new(assembly_source).basename - target_object = Pathname.new('build/boot') + source_basename.sub_ext('.o') - - file target_object.to_s => [assembly_source, 'build/boot'] do |t| - sh CROSS_GCC, '-c', '-o', t.name, assembly_source - end - library << assembly_source unless source_basename.to_s.start_with? 'stage' -end - -desc 'Initial stage' -file 'build/boot/stage1' => ['build/boot/stage1.o', *library] do |t| - sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites -end - -file 'build/boot/stage2a.s' => ['build/boot/stage1', 'boot/stage2.elna'] do |t| - source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' } - - File.open t.name, 'w' do |output| - assemble_stage output, exe, source - end -end - -['build/boot/stage2a', 'build/boot/stage2b'].each do |exe| - file exe => [exe.ext('.s'), *library] do |t| - sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites - end -end - -file 'build/boot/stage2b.s' => ['build/boot/stage2a', 'boot/stage2.elna'] do |t| - source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' } - - File.open t.name, 'w' do |output| - assemble_stage output, exe, source - end -end