From 536443b020d01d0d5372496529086a11b2486621 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Mon, 21 Apr 2025 22:56:50 +0200 Subject: [PATCH] Add stages and a rakefile --- Rakefile | 41 ++ boot/asm-boot.s | 894 +++++++++++++++++++++++++++++++++++++++++++- boot/common-boot.s | 906 +-------------------------------------------- boot/echo-boot.s | 35 ++ boot/stage2.elna | 32 ++ 5 files changed, 1011 insertions(+), 897 deletions(-) create mode 100644 Rakefile create mode 100644 boot/echo-boot.s create mode 100644 boot/stage2.elna diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..368abfa --- /dev/null +++ b/Rakefile @@ -0,0 +1,41 @@ +require 'open3' +require 'rake/clean' + +CLOBBER.include 'build' + +CROSS_GCC = '../riscv32-ilp32d--glibc/bin/riscv32-linux-gcc' +SYSROOT = '../riscv32-ilp32d--glibc/riscv32-buildroot-linux-gnu/sysroot' +QEMU = 'qemu-riscv32' + +desc 'Final stage' +task default: 'build/stage2' + +directory 'build' + +desc 'Initial stage' +file 'build/stage1' => ['boot/echo-boot.s', 'boot/common-boot.s', 'build'] do |t| + assembler = t.prerequisites.filter { |prerequisite| prerequisite.end_with? '.s' } + + sh CROSS_GCC, '-nostdlib', '-o', t.name, *assembler +end + +file 'build/stage2.s' => ['build/stage1', 'boot/stage2.elna'] do |t| + assembler, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' } + arguments = [QEMU, '-L', SYSROOT, *exe] + + puts(arguments * ' ') + puts + Open3.popen2(*arguments) do |qemu_in, qemu_out| + qemu_in.write File.read(*assembler) + qemu_in.close + + File.open t.name, 'w' do |output| + IO.copy_stream qemu_out, output + end + qemu_out.close + end +end + +file 'build/stage2' => ['build/stage2.s', 'boot/common-boot.s'] do |t| + sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites +end diff --git a/boot/asm-boot.s b/boot/asm-boot.s index 594c624..80167a2 100644 --- a/boot/asm-boot.s +++ b/boot/asm-boot.s @@ -1,18 +1,897 @@ # s1 - Contains the current position in the source text. -.data +.global _start # Program entry point. + +.section .rodata -.equ SYS_EXIT, 93 .equ SOURCE_BUFFER_SIZE, 2048 -.bss +asm_begin: .ascii ".text\n.global _start\n_start:\naddi sp, sp, -64\nsw ra, 60(sp)\nsw s0, 56(sp)\naddi s0, sp, 64\n" +.equ ASM_BEGIN_SIZE, . - asm_begin +asm_end: .ascii "addi a0, zero, 0\naddi a7, zero, 93\necall\nlw ra, 60(sp)\nlw s0, 56(sp)\naddi sp, sp, 64\nret\n" +.equ ASM_END_SIZE, . - asm_end +asm_program: .ascii ".bss\n" +.equ ASM_PROGRAM_SIZE, . - asm_program +asm_type: .ascii ".type " +.equ ASM_TYPE_SIZE, . - asm_type +asm_object: .ascii ", @object\n" +.equ ASM_OBJECT_SIZE, . - asm_object +asm_size: .ascii ".size " +.equ ASM_SIZE_SIZE, . - asm_size +asm_zero: .ascii ".zero " +.equ ASM_ZERO_SIZE, . - asm_zero +asm_global: .ascii ".global " +.equ ASM_GLOBAL_SIZE, . - asm_global + +token_begin: .string "begin" +token_end: .string "end" +token_import: .string "import" +token_open_paren: .string "(" +token_close_paren: .string ")" +token_open_square: .string "[" +token_assign: .string ":=" +token_var: .string "var" +token_comma: .string "," + +space: .ascii " " +comma: .ascii "," +new_line: .ascii "\n" +colon: .ascii ":" +digit_zero: .ascii "0" + +instruction_la: .ascii "la" +instruction_call: .ascii "call" +instruction_addi: .ascii "addi" +instruction_li: .ascii "li" +instruction_sw: .ascii "sw" +instruction_lw: .ascii "lw" + +register_a0: .ascii "a0" +register_sp: .ascii "sp" +register_a: .ascii "a" + +.section .bss .global source_code .type source_code, @object .size source_code, SOURCE_BUFFER_SIZE source_code: .zero SOURCE_BUFFER_SIZE -.text -.global _start # Program entry point. +.section .text + +.type _skip_spaces, @function +_skip_spaces: +.Lspace_loop_do: + lbu t0, (s1) # t0 = Current character. + + li t1, ' ' + beq t0, t1, .Lspace_loop_repeat + li t1, '\t' + beq t0, t1, .Lspace_loop_repeat + li t1, '\n' + beq t0, t1, .Lspace_loop_repeat + li t1, '\r' + beq t0, t1, .Lspace_loop_repeat + + j .Lspace_loop_end +.Lspace_loop_repeat: + addi s1, s1, 1 + j .Lspace_loop_do + +.Lspace_loop_end: + ret + +# Compares two string, which of one has a length, the other one is null-terminated. +# +# a0 - The address of the token string. +# a1 - The length of the string in a0. +# a2 - The address of the null-terminated string. +# +# If the strings match sets a0 to 0, otherwise sets it to 1. +.type _token_compare, @function +_token_compare: + addi t0, a0, 0 + addi t1, a1, 0 + addi t2, a2, 0 + +.Ltoken_compare_loop: + lbu t3, (t2) + + # Will only be 0 if the current character in the null terminated string is \0 and the remaining length of the + # another string is 0. + or t4, t3, t1 + beqz t4, .Ltoken_compare_equal + + beqz t1, .Ltoken_compare_not_equal + beqz t3, .Ltoken_compare_not_equal + + lbu t4, (t0) + bne t3, t4, .Ltoken_compare_not_equal + + addi t0, t0, 1 + addi t1, t1, -1 + addi t2, t2, 1 + j .Ltoken_compare_loop + +.Ltoken_compare_not_equal: + li a0, 1 + j .Ltoken_compare_end + +.Ltoken_compare_equal: + li a0, 0 + +.Ltoken_compare_end: + ret + +# Reads a token and returns its length in a0. +# _read_token doesn't change s1, it finds the length of the token s1 is pointing to. +.type _read_token, @function +_read_token: + # Prologue. + addi sp, sp, -16 + sw ra, 12(sp) + sw s0, 8(sp) + addi s0, sp, 16 + + lbu t0, (s1) # t0 = Current character. + sw zero, 4(sp) + + li t1, '.' + beq t0, t1, .Ltoken_character_single + + li t1, ',' + beq t0, t1, .Ltoken_character_single + + li t1, ':' + beq t0, t1, .Ltoken_character_colon + + li t1, ';' + beq t0, t1, .Ltoken_character_single + + li t1, '(' + beq t0, t1, .Ltoken_character_single + + li t1, ')' + beq t0, t1, .Ltoken_character_single + + li t1, '[' + beq t0, t1, .Ltoken_character_single + + li t1, ']' + beq t0, t1, .Ltoken_character_single + +.Ltoken_character_loop_do: # Expect an identifier or a number. + lw t6, 4(sp) + add t1, s1, t6 + lbu a0, (t1) # a0 = Current character. + + call is_alnum + + beqz a0, .Ltoken_character_end + lw t6, 4(sp) + addi t6, t6, 1 + sw t6, 4(sp) + j .Ltoken_character_loop_do + +.Ltoken_character_single: + lw t6, 4(sp) + addi t6, t6, 1 + sw t6, 4(sp) + j .Ltoken_character_end + +.Ltoken_character_colon: + lbu t0, 1(s1) # t0 = The character after the colon. + lw t6, 4(sp) + addi t6, t6, 1 + sw t6, 4(sp) + + li t1, '=' + beq t0, t1, .Ltoken_character_single + j .Ltoken_character_end + +.Ltoken_character_end: + lw a0, 4(sp) + + # Epilogue. + lw ra, 12(sp) + lw s0, 8(sp) + addi sp, sp, 16 + ret + +# Generate entry point symbol. +.type _compile_begin, @function +_compile_begin: + # Prologue. + addi sp, sp, -8 + sw ra, 4(sp) + sw s0, 0(sp) + addi s0, sp, 8 + + # Write initial assembler. + la a0, asm_begin + addi a1, zero, ASM_BEGIN_SIZE + call write_out + + addi s1, s1, 5 + + # Epilogue. + lw ra, 4(sp) + lw s0, 0(sp) + addi sp, sp, 8 + ret + +# Generate program termination code. +.type _compile_end, @function +_compile_end: + # Prologue. + addi sp, sp, -8 + sw ra, 4(sp) + sw s0, 0(sp) + addi s0, sp, 8 + + # Write closing assembler. + la a0, asm_end + addi a1, zero, ASM_END_SIZE + call write_out + + addi s1, s1, 3 + + # Epilogue. + lw ra, 4(sp) + lw s0, 0(sp) + addi sp, sp, 8 + ret + +# Ignores the import. +.type _compile_import, @function +_compile_import: + # Prologue. + addi sp, sp, -8 + sw ra, 4(sp) + sw s0, 0(sp) + addi s0, sp, 8 + + addi s1, s1, 6 + call _skip_spaces + call _read_token + add s1, s1, a0 # Skip the imported module name. + + # Epilogue. + lw ra, 4(sp) + lw s0, 0(sp) + addi sp, sp, 8 + ret + +# Compiles a procedure call. Expects s1 to point to the first argument. +# a0 - Pointer to the procedure name. +# a1 - Length of the procedure name. +# +# Returns the procedure result in a0. +.type _compile_call, @function +_compile_call: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + sw a0, 20(sp) + sw a1, 16(sp) + sw zero, 12(sp) # Argument count for a procedure call. + +.Lcompile_call_paren: + call _skip_spaces + call _read_token + addi a1, a0, 0 + addi a0, s1, 0 + la a2, token_close_paren + call _token_compare + beqz a0, .Lcompile_call_complete + +.Lcompile_call_argument: + call _build_expression + + la a0, instruction_sw + li a1, 2 + call write_out + + la a0, space + li a1, 1 + call write_out + + la a0, register_a0 + li a1, 2 + call write_out + + la a0, comma + li a1, 1 + call write_out + + lw t0, 12(sp) # Argument count for a procedure call. + + # Only 8 arguments are supported with a0-a7. + # Save all arguments on the stack so they aren't overriden afterwards. + # The offset on the stack always has two digits in this case. + li t1, -4 + mul t1, t0, t1 + addi t1, t1, 52 + li t2, 10 + div t3, t1, t2 + rem t4, t1, t2 + addi t3, t3, '0' + addi t4, t4, '0' + + sw t3, 8(sp) + sw t4, 4(sp) + + addi a0, sp, 8 + li a1, 1 + call write_out + + addi a0, sp, 4 + li a1, 1 + call write_out + + la a0, token_open_paren + li a1, 1 + call write_out + + la a0, register_sp + li a1, 2 + call write_out + + la a0, token_close_paren + li a1, 1 + call write_out + + la a0, new_line + li a1, 1 + call write_out + + call _skip_spaces + call _read_token + addi a1, a0, 0 + addi a0, s1, 0 + la a2, token_comma + call _token_compare + + /* DEBUG. Write the current token to stderr. + addi a0, zero, STDERR + addi a1, s1, 0 + li a2, 4 #(sp) + addi a7, zero, SYS_WRITE + ecall + addi a0, zero, STDERR + la a1, token_open_square + li a2, 1 + addi a7, zero, SYS_WRITE + ecall + DEBUG. End */ + + bnez a0, .Lcompile_call_paren + + lw t0, 12(sp) # Argument count for a procedure call. + addi t0, t0, 1 + sw t0, 12(sp) + + addi s1, s1, 1 # Skip the comma between the arguments. + j .Lcompile_call_argument + +.Lcompile_call_complete: + sw zero, 12(sp) + +.Lcompile_call_restore: + # Just go through all a0-a7 registers and read them from stack. + # If this stack value contains garbage, the procedure just shouldn't use it. + lw t0, 12(sp) + li t1, 7 + bgt t0, t1, .Lcompile_call_perform + + la a0, instruction_lw + li a1, 2 + call write_out + + la a0, space + li a1, 1 + call write_out + + la a0, register_a + li a1, 1 + call write_out + + lw t0, 12(sp) + addi t0, t0, '0' + sw t0, 8(sp) + + addi a0, sp, 8 + li a1, 1 + call write_out + + la a0, comma + li a1, 1 + call write_out + + lw t0, 12(sp) # Argument count for a procedure call. + + li t1, -4 + mul t1, t0, t1 + addi t1, t1, 52 + li t2, 10 + div t3, t1, t2 + rem t4, t1, t2 + addi t3, t3, '0' + addi t4, t4, '0' + + sw t3, 8(sp) + sw t4, 4(sp) + + addi a0, sp, 8 + li a1, 1 + call write_out + + addi a0, sp, 4 + li a1, 1 + call write_out + + la a0, token_open_paren + li a1, 1 + call write_out + + la a0, register_sp + li a1, 2 + call write_out + + la a0, token_close_paren + li a1, 1 + call write_out + + la a0, new_line + li a1, 1 + call write_out + + lw t0, 12(sp) # Increment. + addi t0, t0, 1 + sw t0, 12(sp) + + j .Lcompile_call_restore + +.Lcompile_call_perform: + la a0, instruction_call + li a1, 4 + call write_out + + la a0, space + li a1, 1 + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + call _skip_spaces + addi s1, s1, 1 # Skip the close paren. + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile, @function +compile: + # Prologue. + addi sp, sp, -16 + sw ra, 12(sp) + sw s0, 8(sp) + addi s0, sp, 16 + + # Write .bss section header for global variables. + la a0, asm_program + addi a1, zero, ASM_PROGRAM_SIZE + call write_out + + call _skip_spaces + addi s1, s1, 7 # Skip "program" keyword. + +.Lcharacter_loop_do: + call _skip_spaces + + lbu t0, (s1) # t0 = Current character. + beqz t0, .Lcharacter_loop_end + + call _read_token + sw a0, 4(sp) # Save the token length on the stack. + beqz a0, .Lcharacter_loop_end # No token read, there is unrecognized input. + + lw a0, 4(sp) + call _handle_token + + j .Lcharacter_loop_do +.Lcharacter_loop_end: + + # Epilogue. + lw ra, 12(sp) + lw s0, 8(sp) + addi sp, sp, 16 + ret + +# Evalutes an expression and saves the result in a0. +.type _build_expression, @function +_build_expression: + # Prologue. + addi sp, sp, -16 + sw ra, 12(sp) + sw s0, 8(sp) + addi s0, sp, 16 + + call _skip_spaces + call _read_token + sw s1, 4(sp) + sw a0, 0(sp) + + # Integer literal. + addi a0, s1, 0 + lb a0, (a0) + call is_digit + bnez a0, .Lbuild_expression_number_literal + + # Named identifier. + la a0, instruction_la + li a1, 2 + call write_out + + la a0, space + li a1, 1 + call write_out + + la a0, register_a0 + li a1, 2 + call write_out + + la a0, comma + li a1, 1 + call write_out + + lw a0, 4(sp) + lw a1, 0(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + j .Lbuild_expression_end + +.Lbuild_expression_number_literal: + la a0, instruction_li + li a1, 2 + call write_out + + la a0, space + li a1, 1 + call write_out + + la a0, register_a0 + li a1, 2 + call write_out + + la a0, comma + li a1, 1 + call write_out + + lw a0, 4(sp) + lw a1, 0(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + j .Lbuild_expression_end + +.Lbuild_expression_end: + lw a0, 0(sp) + add s1, s1, a0 + + # Epilogue. + lw ra, 12(sp) + lw s0, 8(sp) + addi sp, sp, 16 + ret + +# Compiles a statement beginning with an identifier. +.type _compile_identifier, @function +_compile_identifier: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # Save the pointer to the identifier and its length on the stack. + sw a0, 20(sp) + sw a1, 16(sp) + + add s1, s1, a1 + call _skip_spaces + call _read_token + + # Save the pointer and the length of the token following the identifier. + sw s1, 12(sp) + sw a0, 8(sp) + + add s1, s1, a0 # Skip that token. + call _skip_spaces + + lw a0, 12(sp) + lw a1, 8(sp) + la a2, token_assign + call _token_compare + beqz a0, .Lcompile_identifier_assign + + lw a0, 12(sp) + lw a1, 8(sp) + la a2, token_open_paren + call _token_compare + beqz a0, .Lcompile_identifier_call + + j .Lcompile_identifier_end + +.Lcompile_identifier_call: + lw a0, 20(sp) + lw a1, 16(sp) + call _compile_call + + j .Lcompile_identifier_end + +.Lcompile_identifier_assign: + call _build_expression + + la a0, instruction_addi + li a1, 4 + call write_out + + la a0, space + li a1, 1 + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, comma + li a1, 1 + call write_out + + la a0, register_a0 + li a1, 2 + call write_out + + la a0, comma + li a1, 1 + call write_out + + la a0, digit_zero + li a1, 1 + call write_out + + la a0, new_line + li a1, 1 + call write_out + + j .Lcompile_identifier_end + +.Lcompile_identifier_end: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_var, @function +_compile_var: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # Variable name. + addi s1, s1, 3 + call _skip_spaces + call _read_token + sw s1, 20(sp) + sw a0, 16(sp) + add s1, s1, a0 + + # Skip the colon. + call _skip_spaces + call _read_token + add s1, s1, a0 + + call _skip_spaces + call _read_token + sw a0, 12(sp) + + addi a0, s1, 0 + lw a1, 12(sp) + la a2, token_open_square + call _token_compare + beqz a0, .Lcompile_var_array + + j .Lcompile_var_end + +.Lcompile_var_array: + call _skip_spaces + add s1, s1, 1 # Skip the opening square bracket. + + call _skip_spaces + call _read_token + sw a0, 8(sp) + + la a0, asm_type + li a1, ASM_TYPE_SIZE + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, asm_object + li a1, ASM_OBJECT_SIZE + call write_out + + la a0, asm_size + li a1, ASM_SIZE_SIZE + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, comma + li a1, 1 + call write_out + + addi a0, s1, 0 + lw a1, 8(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, colon + li a1, 1 + call write_out + + la a0, asm_zero + li a1, ASM_ZERO_SIZE + call write_out + + addi a0, s1, 0 + lw a1, 8(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + la a0, asm_global + li a1, ASM_GLOBAL_SIZE + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + lw a0, 8(sp) + add s1, s1, a0 + + call _skip_spaces + add s1, s1, 1 # Skip the closing square bracket. + + call _skip_spaces + call _read_token + + sw a0, 12(sp) + j .Lcompile_var_end + +.Lcompile_var_end: + lw a0, 12(sp) + add s1, s1, a0 + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _handle_token, @function +_handle_token: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + sw a0, 20(sp) + + # Detect what token has been read. + addi a0, s1, 0 + lw a1, 20(sp) + la a2, token_begin + call _token_compare + beqz a0, .Lhandle_token_begin + + addi a0, s1, 0 + lw a1, 20(sp) + la a2, token_end + call _token_compare + beqz a0, .Lhandle_token_end + + addi a0, s1, 0 + lw a1, 20(sp) + la a2, token_import + call _token_compare + beqz a0, .Lhandle_token_import + + addi a0, s1, 0 + lw a1, 20(sp) + la a2, token_var + call _token_compare + beqz a0, .Lhandle_token_var + + # If the first symbol in the token is a character, assume an identifier. + addi a0, s1, 0 + lb a0, (a0) + call is_alpha + bnez a0, .Lhandle_token_identifier + + # Ignore the unknown token. + lw t0, 20(sp) + add s1, s1, t0 + j .Lhandle_token_return + +.Lhandle_token_begin: + call _compile_begin + j .Lhandle_token_return + +.Lhandle_token_end: + call _compile_end + j .Lhandle_token_return + +.Lhandle_token_import: + call _compile_import + j .Lhandle_token_return + +.Lhandle_token_var: + call _compile_var + j .Lhandle_token_return + +.Lhandle_token_identifier: + addi a0, s1, 0 + lw a1, 20(sp) + call _compile_identifier + j .Lhandle_token_return + +.Lhandle_token_return: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret _start: # Read the source from the standard input. @@ -24,6 +903,5 @@ _start: call compile # Call exit. - addi a0, zero, 0 # Use 0 return code. - addi a7, zero, SYS_EXIT - ecall + li a0, 0 # Use 0 return code. + call exit diff --git a/boot/common-boot.s b/boot/common-boot.s index 1ad60a4..bfd1dbb 100644 --- a/boot/common-boot.s +++ b/boot/common-boot.s @@ -1,57 +1,15 @@ -.data +.global is_alpha, is_digit, is_alnum, write_out, read_file, exit + +.section .rodata .equ SYS_READ, 63 .equ SYS_WRITE, 64 +.equ SYS_EXIT, 93 .equ STDIN, 0 .equ STDOUT, 1 .equ STDERR, 2 -token_begin: .string "begin" -token_end: .string "end" -token_import: .string "import" -token_open_paren: .string "(" -token_close_paren: .string ")" -token_open_square: .string "[" -token_assign: .string ":=" -token_var: .string "var" -token_comma: .string "," - -space: .ascii " " -comma: .ascii "," -new_line: .ascii "\n" -colon: .ascii ":" -digit_zero: .ascii "0" - -instruction_la: .ascii "la" -instruction_call: .ascii "call" -instruction_addi: .ascii "addi" -instruction_li: .ascii "li" -instruction_sw: .ascii "sw" -instruction_lw: .ascii "lw" - -register_a0: .ascii "a0" -register_sp: .ascii "sp" -register_a: .ascii "a" - -asm_program: .ascii ".bss\n" -.equ ASM_PROGRAM_SIZE, . - asm_program -asm_begin: .ascii ".text\n.global _start\n_start:\naddi sp, sp, -64\nsw ra, 60(sp)\nsw s0, 56(sp)\naddi s0, sp, 64\n" -.equ ASM_BEGIN_SIZE, . - asm_begin -asm_end: .ascii "addi a0, zero, 0\naddi a7, zero, 93\necall\nlw ra, 60(sp)\nlw s0, 56(sp)\naddi sp, sp, 64\nret\n" -.equ ASM_END_SIZE, . - asm_end -asm_type: .ascii ".type " -.equ ASM_TYPE_SIZE, . - asm_type -asm_object: .ascii ", @object\n" -.equ ASM_OBJECT_SIZE, . - asm_object -asm_size: .ascii ".size " -.equ ASM_SIZE_SIZE, . - asm_size -asm_zero: .ascii ".zero " -.equ ASM_ZERO_SIZE, . - asm_zero -asm_global: .ascii ".global " -.equ ASM_GLOBAL_SIZE, . - asm_global - -.text -.global is_alpha, is_digit, is_alnum, compile, write_out, read_file +.section .text # Detects if the passed character is a 7-bit alpha character or an underscore. # The character is passed in a0. @@ -113,803 +71,6 @@ is_alnum: addi sp, sp, 16 ret -.type _skip_spaces, @function -_skip_spaces: -.Lspace_loop_do: - lbu t0, (s1) # t0 = Current character. - - li t1, ' ' - beq t0, t1, .Lspace_loop_repeat - li t1, '\t' - beq t0, t1, .Lspace_loop_repeat - li t1, '\n' - beq t0, t1, .Lspace_loop_repeat - li t1, '\r' - beq t0, t1, .Lspace_loop_repeat - - j .Lspace_loop_end -.Lspace_loop_repeat: - addi s1, s1, 1 - j .Lspace_loop_do - -.Lspace_loop_end: - ret - -# Compares two string, which of one has a length, the other one is null-terminated. -# -# a0 - The address of the token string. -# a1 - The length of the string in a0. -# a2 - The address of the null-terminated string. -# -# If the strings match sets a0 to 0, otherwise sets it to 1. -.type _token_compare, @function -_token_compare: - addi t0, a0, 0 - addi t1, a1, 0 - addi t2, a2, 0 - -.Ltoken_compare_loop: - lbu t3, (t2) - - # Will only be 0 if the current character in the null terminated string is \0 and the remaining length of the - # another string is 0. - or t4, t3, t1 - beqz t4, .Ltoken_compare_equal - - beqz t1, .Ltoken_compare_not_equal - beqz t3, .Ltoken_compare_not_equal - - lbu t4, (t0) - bne t3, t4, .Ltoken_compare_not_equal - - addi t0, t0, 1 - addi t1, t1, -1 - addi t2, t2, 1 - j .Ltoken_compare_loop - -.Ltoken_compare_not_equal: - li a0, 1 - j .Ltoken_compare_end - -.Ltoken_compare_equal: - li a0, 0 - -.Ltoken_compare_end: - ret - -# Reads a token and returns its length in a0. -# _read_token doesn't change s1, it finds the length of the token s1 is pointing to. -.type _read_token, @function -_read_token: - # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 - - lbu t0, (s1) # t0 = Current character. - sw zero, 4(sp) - - li t1, '.' - beq t0, t1, .Ltoken_character_single - - li t1, ',' - beq t0, t1, .Ltoken_character_single - - li t1, ':' - beq t0, t1, .Ltoken_character_colon - - li t1, ';' - beq t0, t1, .Ltoken_character_single - - li t1, '(' - beq t0, t1, .Ltoken_character_single - - li t1, ')' - beq t0, t1, .Ltoken_character_single - - li t1, '[' - beq t0, t1, .Ltoken_character_single - - li t1, ']' - beq t0, t1, .Ltoken_character_single - -.Ltoken_character_loop_do: # Expect an identifier or a number. - lw t6, 4(sp) - add t1, s1, t6 - lbu a0, (t1) # a0 = Current character. - - call is_alnum - - beqz a0, .Ltoken_character_end - lw t6, 4(sp) - addi t6, t6, 1 - sw t6, 4(sp) - j .Ltoken_character_loop_do - -.Ltoken_character_single: - lw t6, 4(sp) - addi t6, t6, 1 - sw t6, 4(sp) - j .Ltoken_character_end - -.Ltoken_character_colon: - lbu t0, 1(s1) # t0 = The character after the colon. - lw t6, 4(sp) - addi t6, t6, 1 - sw t6, 4(sp) - - li t1, '=' - beq t0, t1, .Ltoken_character_single - j .Ltoken_character_end - -.Ltoken_character_end: - lw a0, 4(sp) - - # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 - ret - -# Generate entry point symbol. -.type _compile_begin, @function -_compile_begin: - # Prologue. - addi sp, sp, -8 - sw ra, 4(sp) - sw s0, 0(sp) - addi s0, sp, 8 - - # Write initial assembler. - la a0, asm_begin - addi a1, zero, ASM_BEGIN_SIZE - call write_out - - addi s1, s1, 5 - - # Epilogue. - lw ra, 4(sp) - lw s0, 0(sp) - addi sp, sp, 8 - ret - -# Generate program termination code. -.type _compile_end, @function -_compile_end: - # Prologue. - addi sp, sp, -8 - sw ra, 4(sp) - sw s0, 0(sp) - addi s0, sp, 8 - - # Write closing assembler. - la a0, asm_end - addi a1, zero, ASM_END_SIZE - call write_out - - addi s1, s1, 3 - - # Epilogue. - lw ra, 4(sp) - lw s0, 0(sp) - addi sp, sp, 8 - ret - -# Ignores the import. -.type _compile_import, @function -_compile_import: - # Prologue. - addi sp, sp, -8 - sw ra, 4(sp) - sw s0, 0(sp) - addi s0, sp, 8 - - addi s1, s1, 6 - call _skip_spaces - call _read_token - add s1, s1, a0 # Skip the imported module name. - - # Epilogue. - lw ra, 4(sp) - lw s0, 0(sp) - addi sp, sp, 8 - ret - -# Compiles a procedure call. Expects s1 to point to the first argument. -# a0 - Pointer to the procedure name. -# a1 - Length of the procedure name. -# -# Returns the procedure result in a0. -.type _compile_call, @function -_compile_call: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - sw a0, 20(sp) - sw a1, 16(sp) - sw zero, 12(sp) # Argument count for a procedure call. - -.Lcompile_call_paren: - call _skip_spaces - call _read_token - addi a1, a0, 0 - addi a0, s1, 0 - la a2, token_close_paren - call _token_compare - beqz a0, .Lcompile_call_complete - -.Lcompile_call_argument: - call _build_expression - - la a0, instruction_sw - li a1, 2 - call write_out - - la a0, space - li a1, 1 - call write_out - - la a0, register_a0 - li a1, 2 - call write_out - - la a0, comma - li a1, 1 - call write_out - - lw t0, 12(sp) # Argument count for a procedure call. - - # Only 8 arguments are supported with a0-a7. - # Save all arguments on the stack so they aren't overriden afterwards. - # The offset on the stack always has two digits in this case. - li t1, -4 - mul t1, t0, t1 - addi t1, t1, 52 - li t2, 10 - div t3, t1, t2 - rem t4, t1, t2 - addi t3, t3, '0' - addi t4, t4, '0' - - sw t3, 8(sp) - sw t4, 4(sp) - - addi a0, sp, 8 - li a1, 1 - call write_out - - addi a0, sp, 4 - li a1, 1 - call write_out - - la a0, token_open_paren - li a1, 1 - call write_out - - la a0, register_sp - li a1, 2 - call write_out - - la a0, token_close_paren - li a1, 1 - call write_out - - la a0, new_line - li a1, 1 - call write_out - - call _skip_spaces - call _read_token - addi a1, a0, 0 - addi a0, s1, 0 - la a2, token_comma - call _token_compare - - /* DEBUG. Write the current token to stderr. - addi a0, zero, STDERR - addi a1, s1, 0 - li a2, 4 #(sp) - addi a7, zero, SYS_WRITE - ecall - addi a0, zero, STDERR - la a1, token_open_square - li a2, 1 - addi a7, zero, SYS_WRITE - ecall - DEBUG. End */ - - bnez a0, .Lcompile_call_paren - - lw t0, 12(sp) # Argument count for a procedure call. - addi t0, t0, 1 - sw t0, 12(sp) - - addi s1, s1, 1 # Skip the comma between the arguments. - j .Lcompile_call_argument - -.Lcompile_call_complete: - sw zero, 12(sp) - -.Lcompile_call_restore: - # Just go through all a0-a7 registers and read them from stack. - # If this stack value contains garbage, the procedure just shouldn't use it. - lw t0, 12(sp) - li t1, 7 - bgt t0, t1, .Lcompile_call_perform - - la a0, instruction_lw - li a1, 2 - call write_out - - la a0, space - li a1, 1 - call write_out - - la a0, register_a - li a1, 1 - call write_out - - lw t0, 12(sp) - addi t0, t0, '0' - sw t0, 8(sp) - - addi a0, sp, 8 - li a1, 1 - call write_out - - la a0, comma - li a1, 1 - call write_out - - lw t0, 12(sp) # Argument count for a procedure call. - - li t1, -4 - mul t1, t0, t1 - addi t1, t1, 52 - li t2, 10 - div t3, t1, t2 - rem t4, t1, t2 - addi t3, t3, '0' - addi t4, t4, '0' - - sw t3, 8(sp) - sw t4, 4(sp) - - addi a0, sp, 8 - li a1, 1 - call write_out - - addi a0, sp, 4 - li a1, 1 - call write_out - - la a0, token_open_paren - li a1, 1 - call write_out - - la a0, register_sp - li a1, 2 - call write_out - - la a0, token_close_paren - li a1, 1 - call write_out - - la a0, new_line - li a1, 1 - call write_out - - lw t0, 12(sp) # Increment. - addi t0, t0, 1 - sw t0, 12(sp) - - j .Lcompile_call_restore - -.Lcompile_call_perform: - la a0, instruction_call - li a1, 4 - call write_out - - la a0, space - li a1, 1 - call write_out - - lw a0, 20(sp) - lw a1, 16(sp) - call write_out - - la a0, new_line - li a1, 1 - call write_out - - call _skip_spaces - addi s1, s1, 1 # Skip the close paren. - - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Evalutes an expression and saves the result in a0. -.type _build_expression, @function -_build_expression: - # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 - - call _skip_spaces - call _read_token - sw s1, 4(sp) - sw a0, 0(sp) - - # Integer literal. - addi a0, s1, 0 - lb a0, (a0) - call is_digit - bnez a0, .Lbuild_expression_number_literal - - # Named identifier. - la a0, instruction_la - li a1, 2 - call write_out - - la a0, space - li a1, 1 - call write_out - - la a0, register_a0 - li a1, 2 - call write_out - - la a0, comma - li a1, 1 - call write_out - - lw a0, 4(sp) - lw a1, 0(sp) - call write_out - - la a0, new_line - li a1, 1 - call write_out - - j .Lbuild_expression_end - -.Lbuild_expression_number_literal: - la a0, instruction_li - li a1, 2 - call write_out - - la a0, space - li a1, 1 - call write_out - - la a0, register_a0 - li a1, 2 - call write_out - - la a0, comma - li a1, 1 - call write_out - - lw a0, 4(sp) - lw a1, 0(sp) - call write_out - - la a0, new_line - li a1, 1 - call write_out - - j .Lbuild_expression_end - -.Lbuild_expression_end: - lw a0, 0(sp) - add s1, s1, a0 - - # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 - ret - -# Compiles a statement beginning with an identifier. -.type _compile_identifier, @function -_compile_identifier: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - # Save the pointer to the identifier and its length on the stack. - sw a0, 20(sp) - sw a1, 16(sp) - - add s1, s1, a1 - call _skip_spaces - call _read_token - - # Save the pointer and the length of the token following the identifier. - sw s1, 12(sp) - sw a0, 8(sp) - - add s1, s1, a0 # Skip that token. - call _skip_spaces - - lw a0, 12(sp) - lw a1, 8(sp) - la a2, token_assign - call _token_compare - beqz a0, .Lcompile_identifier_assign - - lw a0, 12(sp) - lw a1, 8(sp) - la a2, token_open_paren - call _token_compare - beqz a0, .Lcompile_identifier_call - - j .Lcompile_identifier_end - -.Lcompile_identifier_call: - lw a0, 20(sp) - lw a1, 16(sp) - call _compile_call - - j .Lcompile_identifier_end - -.Lcompile_identifier_assign: - call _build_expression - - la a0, instruction_addi - li a1, 4 - call write_out - - la a0, space - li a1, 1 - call write_out - - lw a0, 20(sp) - lw a1, 16(sp) - call write_out - - la a0, comma - li a1, 1 - call write_out - - la a0, register_a0 - li a1, 2 - call write_out - - la a0, comma - li a1, 1 - call write_out - - la a0, digit_zero - li a1, 1 - call write_out - - la a0, new_line - li a1, 1 - call write_out - - j .Lcompile_identifier_end - -.Lcompile_identifier_end: - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -.type _compile_var, @function -_compile_var: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - # Variable name. - addi s1, s1, 3 - call _skip_spaces - call _read_token - sw s1, 20(sp) - sw a0, 16(sp) - add s1, s1, a0 - - # Skip the colon. - call _skip_spaces - call _read_token - add s1, s1, a0 - - call _skip_spaces - call _read_token - sw a0, 12(sp) - - addi a0, s1, 0 - lw a1, 12(sp) - la a2, token_open_square - call _token_compare - beqz a0, .Lcompile_var_array - - j .Lcompile_var_end - -.Lcompile_var_array: - call _skip_spaces - add s1, s1, 1 # Skip the opening square bracket. - - call _skip_spaces - call _read_token - sw a0, 8(sp) - - la a0, asm_type - li a1, ASM_TYPE_SIZE - call write_out - - lw a0, 20(sp) - lw a1, 16(sp) - call write_out - - la a0, asm_object - li a1, ASM_OBJECT_SIZE - call write_out - - la a0, asm_size - li a1, ASM_SIZE_SIZE - call write_out - - lw a0, 20(sp) - lw a1, 16(sp) - call write_out - - la a0, comma - li a1, 1 - call write_out - - addi a0, s1, 0 - lw a1, 8(sp) - call write_out - - la a0, new_line - li a1, 1 - call write_out - - lw a0, 20(sp) - lw a1, 16(sp) - call write_out - - la a0, colon - li a1, 1 - call write_out - - la a0, asm_zero - li a1, ASM_ZERO_SIZE - call write_out - - addi a0, s1, 0 - lw a1, 8(sp) - call write_out - - la a0, new_line - li a1, 1 - call write_out - - la a0, asm_global - li a1, ASM_GLOBAL_SIZE - call write_out - - lw a0, 20(sp) - lw a1, 16(sp) - call write_out - - la a0, new_line - li a1, 1 - call write_out - - lw a0, 8(sp) - add s1, s1, a0 - - call _skip_spaces - add s1, s1, 1 # Skip the closing square bracket. - - call _skip_spaces - call _read_token - - sw a0, 12(sp) - j .Lcompile_var_end - -.Lcompile_var_end: - lw a0, 12(sp) - add s1, s1, a0 - - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -.type _handle_token, @function -_handle_token: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - sw a0, 20(sp) - - # Detect what token has been read. - addi a0, s1, 0 - lw a1, 20(sp) - la a2, token_begin - call _token_compare - beqz a0, .Lhandle_token_begin - - addi a0, s1, 0 - lw a1, 20(sp) - la a2, token_end - call _token_compare - beqz a0, .Lhandle_token_end - - addi a0, s1, 0 - lw a1, 20(sp) - la a2, token_import - call _token_compare - beqz a0, .Lhandle_token_import - - addi a0, s1, 0 - lw a1, 20(sp) - la a2, token_var - call _token_compare - beqz a0, .Lhandle_token_var - - # If the first symbol in the token is a character, assume an identifier. - addi a0, s1, 0 - lb a0, (a0) - call is_alpha - bnez a0, .Lhandle_token_identifier - - # Ignore the unknown token. - lw t0, 20(sp) - add s1, s1, t0 - j .Lhandle_token_return - -.Lhandle_token_begin: - call _compile_begin - j .Lhandle_token_return - -.Lhandle_token_end: - call _compile_end - j .Lhandle_token_return - -.Lhandle_token_import: - call _compile_import - j .Lhandle_token_return - -.Lhandle_token_var: - call _compile_var - j .Lhandle_token_return - -.Lhandle_token_identifier: - addi a0, s1, 0 - lw a1, 20(sp) - call _compile_identifier - j .Lhandle_token_return - -.Lhandle_token_return: - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - .type write, @function write_out: # Prologue. @@ -918,10 +79,10 @@ write_out: sw s0, 0(sp) addi s0, sp, 8 - addi a2, a1, 0 - addi a1, a0, 0 - addi a0, zero, STDOUT - addi a7, zero, SYS_WRITE + mv a2, a1 + mv a1, a0 + li a0, STDOUT + li a7, SYS_WRITE ecall # Epilogue. @@ -943,10 +104,10 @@ read_file: sw s0, 0(sp) addi s0, sp, 8 - addi a2, a1, 0 - addi a1, a0, 0 - addi a0, zero, STDIN - addi a7, zero, SYS_READ + mv a2, a1 + mv a1, a0 + li a0, STDIN + li a7, SYS_READ ecall # Epilogue. @@ -955,40 +116,7 @@ read_file: addi sp, sp, 8 ret -.type _compile, @function -compile: - # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 - - # Write .bss section header for global variables. - la a0, asm_program - addi a1, zero, ASM_PROGRAM_SIZE - call write_out - - call _skip_spaces - addi s1, s1, 7 # Skip "program" keyword. - -.Lcharacter_loop_do: - call _skip_spaces - - lbu t0, (s1) # t0 = Current character. - beqz t0, .Lcharacter_loop_end - - call _read_token - sw a0, 4(sp) # Save the token length on the stack. - beqz a0, .Lcharacter_loop_end # No token read, there is unrecognized input. - - lw a0, 4(sp) - call _handle_token - - j .Lcharacter_loop_do -.Lcharacter_loop_end: - - # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 - ret +# Terminates the program. a0 contains the return code. +exit: + li a7, SYS_EXIT + ecall diff --git a/boot/echo-boot.s b/boot/echo-boot.s new file mode 100644 index 0000000..b6e65f7 --- /dev/null +++ b/boot/echo-boot.s @@ -0,0 +1,35 @@ +.global _start, source_code + +.section .rodata +.type SOURCE_BUFFER_SIZE, @object +.size SOURCE_BUFFER_SIZE, 4 +SOURCE_BUFFER_SIZE: .long 4096 + +.section .bss +.type source_code, @object +.size source_code, 4096 +source_code: .zero 4096 + +.section .text + +_compile: + ret + +_start: + # Read the source from the standard input. + la a0, source_code + la a1, SOURCE_BUFFER_SIZE # Buffer size. + lw a1, (a1) + call read_file + mv s1, a0 + + call _compile + + # Write the source to the standard output. + la a0, source_code + mv a1, s1 + call write_out + + # Call exit. + li a0, 0 # Use 0 return code. + call exit diff --git a/boot/stage2.elna b/boot/stage2.elna new file mode 100644 index 0000000..a119f4a --- /dev/null +++ b/boot/stage2.elna @@ -0,0 +1,32 @@ +.global _start, source_code + +.equ SYS_READ, 63 +.equ SYS_WRITE, 64 +.equ SYS_EXIT, 93 +.equ STDIN, 0 +.equ STDOUT, 1 +.equ STDERR, 2 + +.equ SOURCE_BUFFER_SIZE, 2048 + +.section .bss +.type source_code, @object +.size source_code, SOURCE_BUFFER_SIZE +source_code: .zero SOURCE_BUFFER_SIZE + +.section .text + +_start: + # Read the source from the standard input. + la a0, source_code + li a1, SOURCE_BUFFER_SIZE # Buffer size. + call read_file + + # Write the source to the standard output. + mv a1, a0 + la a0, source_code + call write_out + + # Call exit. + li a0, 0 # Use 0 return code. + call exit