From 148da8ed91f17c6fb367f52c927629b0f4cacb5e Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Fri, 18 Apr 2025 09:26:57 +0200 Subject: Initial commit --- boot/asm-boot.s | 29 ++ boot/common-boot.s | 994 ++++++++++++++++++++++++++++++++++++++++++++++++++++ boot/goto-boot.elna | 12 + 3 files changed, 1035 insertions(+) create mode 100644 boot/asm-boot.s create mode 100644 boot/common-boot.s create mode 100644 boot/goto-boot.elna (limited to 'boot') diff --git a/boot/asm-boot.s b/boot/asm-boot.s new file mode 100644 index 0000000..594c624 --- /dev/null +++ b/boot/asm-boot.s @@ -0,0 +1,29 @@ +# s1 - Contains the current position in the source text. + +.data + +.equ SYS_EXIT, 93 +.equ SOURCE_BUFFER_SIZE, 2048 + +.bss +.global source_code +.type source_code, @object +.size source_code, SOURCE_BUFFER_SIZE +source_code: .zero SOURCE_BUFFER_SIZE + +.text +.global _start # Program entry point. + +_start: + # Read the source from the standard input. + la a0, source_code + li a1, SOURCE_BUFFER_SIZE # Buffer size. + call read_file + + la s1, source_code # s1 = Source code position. + call compile + + # Call exit. + addi a0, zero, 0 # Use 0 return code. + addi a7, zero, SYS_EXIT + ecall diff --git a/boot/common-boot.s b/boot/common-boot.s new file mode 100644 index 0000000..1ad60a4 --- /dev/null +++ b/boot/common-boot.s @@ -0,0 +1,994 @@ +.data + +.equ SYS_READ, 63 +.equ SYS_WRITE, 64 +.equ STDIN, 0 +.equ STDOUT, 1 +.equ STDERR, 2 + +token_begin: .string "begin" +token_end: .string "end" +token_import: .string "import" +token_open_paren: .string "(" +token_close_paren: .string ")" +token_open_square: .string "[" +token_assign: .string ":=" +token_var: .string "var" +token_comma: .string "," + +space: .ascii " " +comma: .ascii "," +new_line: .ascii "\n" +colon: .ascii ":" +digit_zero: .ascii "0" + +instruction_la: .ascii "la" +instruction_call: .ascii "call" +instruction_addi: .ascii "addi" +instruction_li: .ascii "li" +instruction_sw: .ascii "sw" +instruction_lw: .ascii "lw" + +register_a0: .ascii "a0" +register_sp: .ascii "sp" +register_a: .ascii "a" + +asm_program: .ascii ".bss\n" +.equ ASM_PROGRAM_SIZE, . - asm_program +asm_begin: .ascii ".text\n.global _start\n_start:\naddi sp, sp, -64\nsw ra, 60(sp)\nsw s0, 56(sp)\naddi s0, sp, 64\n" +.equ ASM_BEGIN_SIZE, . - asm_begin +asm_end: .ascii "addi a0, zero, 0\naddi a7, zero, 93\necall\nlw ra, 60(sp)\nlw s0, 56(sp)\naddi sp, sp, 64\nret\n" +.equ ASM_END_SIZE, . - asm_end +asm_type: .ascii ".type " +.equ ASM_TYPE_SIZE, . - asm_type +asm_object: .ascii ", @object\n" +.equ ASM_OBJECT_SIZE, . - asm_object +asm_size: .ascii ".size " +.equ ASM_SIZE_SIZE, . - asm_size +asm_zero: .ascii ".zero " +.equ ASM_ZERO_SIZE, . - asm_zero +asm_global: .ascii ".global " +.equ ASM_GLOBAL_SIZE, . - asm_global + +.text +.global is_alpha, is_digit, is_alnum, compile, write_out, read_file + +# Detects if the passed character is a 7-bit alpha character or an underscore. +# The character is passed in a0. +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +.type is_alpha, @function +is_alpha: + li t0, 'A' - 1 + sltu t1, t0, a0 # t1 = a0 >= 'A' + + sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' + and t1, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' + + li t0, 'a' - 1 + sltu t2, t0, a0 # t2 = a0 >= 'a' + + sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' + and t2, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' + + xori t3, a0, '_' + seqz t3, t3 + + or a0, t1, t2 + or a0, a0, t3 + ret + +.type is_digit, @function +is_digit: + li t0, '0' - 1 + sltu t1, t0, a0 # t1 = a0 >= '0' + + sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' + + and a0, t1, t2 + + ret + +.type is_alnum, @function +is_alnum: + # Prologue. + addi sp, sp, -16 + sw ra, 12(sp) + sw s0, 8(sp) + addi s0, sp, 16 + + sw a0, 4(sp) + + call is_alpha + sw a0, 0(sp) + + lw a0, 4(sp) + call is_digit + + lw a1, 0(sp) + or a0, a0, a1 + + # Epilogue. + lw ra, 12(sp) + lw s0, 8(sp) + addi sp, sp, 16 + ret + +.type _skip_spaces, @function +_skip_spaces: +.Lspace_loop_do: + lbu t0, (s1) # t0 = Current character. + + li t1, ' ' + beq t0, t1, .Lspace_loop_repeat + li t1, '\t' + beq t0, t1, .Lspace_loop_repeat + li t1, '\n' + beq t0, t1, .Lspace_loop_repeat + li t1, '\r' + beq t0, t1, .Lspace_loop_repeat + + j .Lspace_loop_end +.Lspace_loop_repeat: + addi s1, s1, 1 + j .Lspace_loop_do + +.Lspace_loop_end: + ret + +# Compares two string, which of one has a length, the other one is null-terminated. +# +# a0 - The address of the token string. +# a1 - The length of the string in a0. +# a2 - The address of the null-terminated string. +# +# If the strings match sets a0 to 0, otherwise sets it to 1. +.type _token_compare, @function +_token_compare: + addi t0, a0, 0 + addi t1, a1, 0 + addi t2, a2, 0 + +.Ltoken_compare_loop: + lbu t3, (t2) + + # Will only be 0 if the current character in the null terminated string is \0 and the remaining length of the + # another string is 0. + or t4, t3, t1 + beqz t4, .Ltoken_compare_equal + + beqz t1, .Ltoken_compare_not_equal + beqz t3, .Ltoken_compare_not_equal + + lbu t4, (t0) + bne t3, t4, .Ltoken_compare_not_equal + + addi t0, t0, 1 + addi t1, t1, -1 + addi t2, t2, 1 + j .Ltoken_compare_loop + +.Ltoken_compare_not_equal: + li a0, 1 + j .Ltoken_compare_end + +.Ltoken_compare_equal: + li a0, 0 + +.Ltoken_compare_end: + ret + +# Reads a token and returns its length in a0. +# _read_token doesn't change s1, it finds the length of the token s1 is pointing to. +.type _read_token, @function +_read_token: + # Prologue. + addi sp, sp, -16 + sw ra, 12(sp) + sw s0, 8(sp) + addi s0, sp, 16 + + lbu t0, (s1) # t0 = Current character. + sw zero, 4(sp) + + li t1, '.' + beq t0, t1, .Ltoken_character_single + + li t1, ',' + beq t0, t1, .Ltoken_character_single + + li t1, ':' + beq t0, t1, .Ltoken_character_colon + + li t1, ';' + beq t0, t1, .Ltoken_character_single + + li t1, '(' + beq t0, t1, .Ltoken_character_single + + li t1, ')' + beq t0, t1, .Ltoken_character_single + + li t1, '[' + beq t0, t1, .Ltoken_character_single + + li t1, ']' + beq t0, t1, .Ltoken_character_single + +.Ltoken_character_loop_do: # Expect an identifier or a number. + lw t6, 4(sp) + add t1, s1, t6 + lbu a0, (t1) # a0 = Current character. + + call is_alnum + + beqz a0, .Ltoken_character_end + lw t6, 4(sp) + addi t6, t6, 1 + sw t6, 4(sp) + j .Ltoken_character_loop_do + +.Ltoken_character_single: + lw t6, 4(sp) + addi t6, t6, 1 + sw t6, 4(sp) + j .Ltoken_character_end + +.Ltoken_character_colon: + lbu t0, 1(s1) # t0 = The character after the colon. + lw t6, 4(sp) + addi t6, t6, 1 + sw t6, 4(sp) + + li t1, '=' + beq t0, t1, .Ltoken_character_single + j .Ltoken_character_end + +.Ltoken_character_end: + lw a0, 4(sp) + + # Epilogue. + lw ra, 12(sp) + lw s0, 8(sp) + addi sp, sp, 16 + ret + +# Generate entry point symbol. +.type _compile_begin, @function +_compile_begin: + # Prologue. + addi sp, sp, -8 + sw ra, 4(sp) + sw s0, 0(sp) + addi s0, sp, 8 + + # Write initial assembler. + la a0, asm_begin + addi a1, zero, ASM_BEGIN_SIZE + call write_out + + addi s1, s1, 5 + + # Epilogue. + lw ra, 4(sp) + lw s0, 0(sp) + addi sp, sp, 8 + ret + +# Generate program termination code. +.type _compile_end, @function +_compile_end: + # Prologue. + addi sp, sp, -8 + sw ra, 4(sp) + sw s0, 0(sp) + addi s0, sp, 8 + + # Write closing assembler. + la a0, asm_end + addi a1, zero, ASM_END_SIZE + call write_out + + addi s1, s1, 3 + + # Epilogue. + lw ra, 4(sp) + lw s0, 0(sp) + addi sp, sp, 8 + ret + +# Ignores the import. +.type _compile_import, @function +_compile_import: + # Prologue. + addi sp, sp, -8 + sw ra, 4(sp) + sw s0, 0(sp) + addi s0, sp, 8 + + addi s1, s1, 6 + call _skip_spaces + call _read_token + add s1, s1, a0 # Skip the imported module name. + + # Epilogue. + lw ra, 4(sp) + lw s0, 0(sp) + addi sp, sp, 8 + ret + +# Compiles a procedure call. Expects s1 to point to the first argument. +# a0 - Pointer to the procedure name. +# a1 - Length of the procedure name. +# +# Returns the procedure result in a0. +.type _compile_call, @function +_compile_call: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + sw a0, 20(sp) + sw a1, 16(sp) + sw zero, 12(sp) # Argument count for a procedure call. + +.Lcompile_call_paren: + call _skip_spaces + call _read_token + addi a1, a0, 0 + addi a0, s1, 0 + la a2, token_close_paren + call _token_compare + beqz a0, .Lcompile_call_complete + +.Lcompile_call_argument: + call _build_expression + + la a0, instruction_sw + li a1, 2 + call write_out + + la a0, space + li a1, 1 + call write_out + + la a0, register_a0 + li a1, 2 + call write_out + + la a0, comma + li a1, 1 + call write_out + + lw t0, 12(sp) # Argument count for a procedure call. + + # Only 8 arguments are supported with a0-a7. + # Save all arguments on the stack so they aren't overriden afterwards. + # The offset on the stack always has two digits in this case. + li t1, -4 + mul t1, t0, t1 + addi t1, t1, 52 + li t2, 10 + div t3, t1, t2 + rem t4, t1, t2 + addi t3, t3, '0' + addi t4, t4, '0' + + sw t3, 8(sp) + sw t4, 4(sp) + + addi a0, sp, 8 + li a1, 1 + call write_out + + addi a0, sp, 4 + li a1, 1 + call write_out + + la a0, token_open_paren + li a1, 1 + call write_out + + la a0, register_sp + li a1, 2 + call write_out + + la a0, token_close_paren + li a1, 1 + call write_out + + la a0, new_line + li a1, 1 + call write_out + + call _skip_spaces + call _read_token + addi a1, a0, 0 + addi a0, s1, 0 + la a2, token_comma + call _token_compare + + /* DEBUG. Write the current token to stderr. + addi a0, zero, STDERR + addi a1, s1, 0 + li a2, 4 #(sp) + addi a7, zero, SYS_WRITE + ecall + addi a0, zero, STDERR + la a1, token_open_square + li a2, 1 + addi a7, zero, SYS_WRITE + ecall + DEBUG. End */ + + bnez a0, .Lcompile_call_paren + + lw t0, 12(sp) # Argument count for a procedure call. + addi t0, t0, 1 + sw t0, 12(sp) + + addi s1, s1, 1 # Skip the comma between the arguments. + j .Lcompile_call_argument + +.Lcompile_call_complete: + sw zero, 12(sp) + +.Lcompile_call_restore: + # Just go through all a0-a7 registers and read them from stack. + # If this stack value contains garbage, the procedure just shouldn't use it. + lw t0, 12(sp) + li t1, 7 + bgt t0, t1, .Lcompile_call_perform + + la a0, instruction_lw + li a1, 2 + call write_out + + la a0, space + li a1, 1 + call write_out + + la a0, register_a + li a1, 1 + call write_out + + lw t0, 12(sp) + addi t0, t0, '0' + sw t0, 8(sp) + + addi a0, sp, 8 + li a1, 1 + call write_out + + la a0, comma + li a1, 1 + call write_out + + lw t0, 12(sp) # Argument count for a procedure call. + + li t1, -4 + mul t1, t0, t1 + addi t1, t1, 52 + li t2, 10 + div t3, t1, t2 + rem t4, t1, t2 + addi t3, t3, '0' + addi t4, t4, '0' + + sw t3, 8(sp) + sw t4, 4(sp) + + addi a0, sp, 8 + li a1, 1 + call write_out + + addi a0, sp, 4 + li a1, 1 + call write_out + + la a0, token_open_paren + li a1, 1 + call write_out + + la a0, register_sp + li a1, 2 + call write_out + + la a0, token_close_paren + li a1, 1 + call write_out + + la a0, new_line + li a1, 1 + call write_out + + lw t0, 12(sp) # Increment. + addi t0, t0, 1 + sw t0, 12(sp) + + j .Lcompile_call_restore + +.Lcompile_call_perform: + la a0, instruction_call + li a1, 4 + call write_out + + la a0, space + li a1, 1 + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + call _skip_spaces + addi s1, s1, 1 # Skip the close paren. + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +# Evalutes an expression and saves the result in a0. +.type _build_expression, @function +_build_expression: + # Prologue. + addi sp, sp, -16 + sw ra, 12(sp) + sw s0, 8(sp) + addi s0, sp, 16 + + call _skip_spaces + call _read_token + sw s1, 4(sp) + sw a0, 0(sp) + + # Integer literal. + addi a0, s1, 0 + lb a0, (a0) + call is_digit + bnez a0, .Lbuild_expression_number_literal + + # Named identifier. + la a0, instruction_la + li a1, 2 + call write_out + + la a0, space + li a1, 1 + call write_out + + la a0, register_a0 + li a1, 2 + call write_out + + la a0, comma + li a1, 1 + call write_out + + lw a0, 4(sp) + lw a1, 0(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + j .Lbuild_expression_end + +.Lbuild_expression_number_literal: + la a0, instruction_li + li a1, 2 + call write_out + + la a0, space + li a1, 1 + call write_out + + la a0, register_a0 + li a1, 2 + call write_out + + la a0, comma + li a1, 1 + call write_out + + lw a0, 4(sp) + lw a1, 0(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + j .Lbuild_expression_end + +.Lbuild_expression_end: + lw a0, 0(sp) + add s1, s1, a0 + + # Epilogue. + lw ra, 12(sp) + lw s0, 8(sp) + addi sp, sp, 16 + ret + +# Compiles a statement beginning with an identifier. +.type _compile_identifier, @function +_compile_identifier: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # Save the pointer to the identifier and its length on the stack. + sw a0, 20(sp) + sw a1, 16(sp) + + add s1, s1, a1 + call _skip_spaces + call _read_token + + # Save the pointer and the length of the token following the identifier. + sw s1, 12(sp) + sw a0, 8(sp) + + add s1, s1, a0 # Skip that token. + call _skip_spaces + + lw a0, 12(sp) + lw a1, 8(sp) + la a2, token_assign + call _token_compare + beqz a0, .Lcompile_identifier_assign + + lw a0, 12(sp) + lw a1, 8(sp) + la a2, token_open_paren + call _token_compare + beqz a0, .Lcompile_identifier_call + + j .Lcompile_identifier_end + +.Lcompile_identifier_call: + lw a0, 20(sp) + lw a1, 16(sp) + call _compile_call + + j .Lcompile_identifier_end + +.Lcompile_identifier_assign: + call _build_expression + + la a0, instruction_addi + li a1, 4 + call write_out + + la a0, space + li a1, 1 + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, comma + li a1, 1 + call write_out + + la a0, register_a0 + li a1, 2 + call write_out + + la a0, comma + li a1, 1 + call write_out + + la a0, digit_zero + li a1, 1 + call write_out + + la a0, new_line + li a1, 1 + call write_out + + j .Lcompile_identifier_end + +.Lcompile_identifier_end: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_var, @function +_compile_var: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # Variable name. + addi s1, s1, 3 + call _skip_spaces + call _read_token + sw s1, 20(sp) + sw a0, 16(sp) + add s1, s1, a0 + + # Skip the colon. + call _skip_spaces + call _read_token + add s1, s1, a0 + + call _skip_spaces + call _read_token + sw a0, 12(sp) + + addi a0, s1, 0 + lw a1, 12(sp) + la a2, token_open_square + call _token_compare + beqz a0, .Lcompile_var_array + + j .Lcompile_var_end + +.Lcompile_var_array: + call _skip_spaces + add s1, s1, 1 # Skip the opening square bracket. + + call _skip_spaces + call _read_token + sw a0, 8(sp) + + la a0, asm_type + li a1, ASM_TYPE_SIZE + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, asm_object + li a1, ASM_OBJECT_SIZE + call write_out + + la a0, asm_size + li a1, ASM_SIZE_SIZE + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, comma + li a1, 1 + call write_out + + addi a0, s1, 0 + lw a1, 8(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, colon + li a1, 1 + call write_out + + la a0, asm_zero + li a1, ASM_ZERO_SIZE + call write_out + + addi a0, s1, 0 + lw a1, 8(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + la a0, asm_global + li a1, ASM_GLOBAL_SIZE + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + lw a0, 8(sp) + add s1, s1, a0 + + call _skip_spaces + add s1, s1, 1 # Skip the closing square bracket. + + call _skip_spaces + call _read_token + + sw a0, 12(sp) + j .Lcompile_var_end + +.Lcompile_var_end: + lw a0, 12(sp) + add s1, s1, a0 + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _handle_token, @function +_handle_token: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + sw a0, 20(sp) + + # Detect what token has been read. + addi a0, s1, 0 + lw a1, 20(sp) + la a2, token_begin + call _token_compare + beqz a0, .Lhandle_token_begin + + addi a0, s1, 0 + lw a1, 20(sp) + la a2, token_end + call _token_compare + beqz a0, .Lhandle_token_end + + addi a0, s1, 0 + lw a1, 20(sp) + la a2, token_import + call _token_compare + beqz a0, .Lhandle_token_import + + addi a0, s1, 0 + lw a1, 20(sp) + la a2, token_var + call _token_compare + beqz a0, .Lhandle_token_var + + # If the first symbol in the token is a character, assume an identifier. + addi a0, s1, 0 + lb a0, (a0) + call is_alpha + bnez a0, .Lhandle_token_identifier + + # Ignore the unknown token. + lw t0, 20(sp) + add s1, s1, t0 + j .Lhandle_token_return + +.Lhandle_token_begin: + call _compile_begin + j .Lhandle_token_return + +.Lhandle_token_end: + call _compile_end + j .Lhandle_token_return + +.Lhandle_token_import: + call _compile_import + j .Lhandle_token_return + +.Lhandle_token_var: + call _compile_var + j .Lhandle_token_return + +.Lhandle_token_identifier: + addi a0, s1, 0 + lw a1, 20(sp) + call _compile_identifier + j .Lhandle_token_return + +.Lhandle_token_return: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type write, @function +write_out: + # Prologue. + addi sp, sp, -8 + sw ra, 4(sp) + sw s0, 0(sp) + addi s0, sp, 8 + + addi a2, a1, 0 + addi a1, a0, 0 + addi a0, zero, STDOUT + addi a7, zero, SYS_WRITE + ecall + + # Epilogue. + lw ra, 4(sp) + lw s0, 0(sp) + addi sp, sp, 8 + ret + +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the result in a0. +.type read_file, @function +read_file: + # Prologue. + addi sp, sp, -8 + sw ra, 4(sp) + sw s0, 0(sp) + addi s0, sp, 8 + + addi a2, a1, 0 + addi a1, a0, 0 + addi a0, zero, STDIN + addi a7, zero, SYS_READ + ecall + + # Epilogue. + lw ra, 4(sp) + lw s0, 0(sp) + addi sp, sp, 8 + ret + +.type _compile, @function +compile: + # Prologue. + addi sp, sp, -16 + sw ra, 12(sp) + sw s0, 8(sp) + addi s0, sp, 16 + + # Write .bss section header for global variables. + la a0, asm_program + addi a1, zero, ASM_PROGRAM_SIZE + call write_out + + call _skip_spaces + addi s1, s1, 7 # Skip "program" keyword. + +.Lcharacter_loop_do: + call _skip_spaces + + lbu t0, (s1) # t0 = Current character. + beqz t0, .Lcharacter_loop_end + + call _read_token + sw a0, 4(sp) # Save the token length on the stack. + beqz a0, .Lcharacter_loop_end # No token read, there is unrecognized input. + + lw a0, 4(sp) + call _handle_token + + j .Lcharacter_loop_do +.Lcharacter_loop_end: + + # Epilogue. + lw ra, 12(sp) + lw s0, 8(sp) + addi sp, sp, 16 + ret diff --git a/boot/goto-boot.elna b/boot/goto-boot.elna new file mode 100644 index 0000000..c2fd69b --- /dev/null +++ b/boot/goto-boot.elna @@ -0,0 +1,12 @@ +program + +import dummy + +var source_code: [2048]Byte + +begin + read_file(source_code, 2048); + + s1 := source_code; + +end. -- cgit v1.2.3