diff options
| author | Eugen Wissner <belka@caraus.de> | 2025-04-21 22:56:50 +0200 |
|---|---|---|
| committer | Eugen Wissner <belka@caraus.de> | 2025-04-21 22:56:50 +0200 |
| commit | 536443b020d01d0d5372496529086a11b2486621 (patch) | |
| tree | 78632939b6e12ec6281f99e394bd56d236c4b965 /boot/asm-boot.s | |
| parent | 148da8ed91f17c6fb367f52c927629b0f4cacb5e (diff) | |
| download | elna-536443b020d01d0d5372496529086a11b2486621.tar.gz | |
Add stages and a rakefile
Diffstat (limited to 'boot/asm-boot.s')
| -rw-r--r-- | boot/asm-boot.s | 894 |
1 files changed, 886 insertions, 8 deletions
diff --git a/boot/asm-boot.s b/boot/asm-boot.s index 594c624..80167a2 100644 --- a/boot/asm-boot.s +++ b/boot/asm-boot.s @@ -1,18 +1,897 @@ # s1 - Contains the current position in the source text. -.data +.global _start # Program entry point. + +.section .rodata -.equ SYS_EXIT, 93 .equ SOURCE_BUFFER_SIZE, 2048 -.bss +asm_begin: .ascii ".text\n.global _start\n_start:\naddi sp, sp, -64\nsw ra, 60(sp)\nsw s0, 56(sp)\naddi s0, sp, 64\n" +.equ ASM_BEGIN_SIZE, . - asm_begin +asm_end: .ascii "addi a0, zero, 0\naddi a7, zero, 93\necall\nlw ra, 60(sp)\nlw s0, 56(sp)\naddi sp, sp, 64\nret\n" +.equ ASM_END_SIZE, . - asm_end +asm_program: .ascii ".bss\n" +.equ ASM_PROGRAM_SIZE, . - asm_program +asm_type: .ascii ".type " +.equ ASM_TYPE_SIZE, . - asm_type +asm_object: .ascii ", @object\n" +.equ ASM_OBJECT_SIZE, . - asm_object +asm_size: .ascii ".size " +.equ ASM_SIZE_SIZE, . - asm_size +asm_zero: .ascii ".zero " +.equ ASM_ZERO_SIZE, . - asm_zero +asm_global: .ascii ".global " +.equ ASM_GLOBAL_SIZE, . - asm_global + +token_begin: .string "begin" +token_end: .string "end" +token_import: .string "import" +token_open_paren: .string "(" +token_close_paren: .string ")" +token_open_square: .string "[" +token_assign: .string ":=" +token_var: .string "var" +token_comma: .string "," + +space: .ascii " " +comma: .ascii "," +new_line: .ascii "\n" +colon: .ascii ":" +digit_zero: .ascii "0" + +instruction_la: .ascii "la" +instruction_call: .ascii "call" +instruction_addi: .ascii "addi" +instruction_li: .ascii "li" +instruction_sw: .ascii "sw" +instruction_lw: .ascii "lw" + +register_a0: .ascii "a0" +register_sp: .ascii "sp" +register_a: .ascii "a" + +.section .bss .global source_code .type source_code, @object .size source_code, SOURCE_BUFFER_SIZE source_code: .zero SOURCE_BUFFER_SIZE -.text -.global _start # Program entry point. +.section .text + +.type _skip_spaces, @function +_skip_spaces: +.Lspace_loop_do: + lbu t0, (s1) # t0 = Current character. + + li t1, ' ' + beq t0, t1, .Lspace_loop_repeat + li t1, '\t' + beq t0, t1, .Lspace_loop_repeat + li t1, '\n' + beq t0, t1, .Lspace_loop_repeat + li t1, '\r' + beq t0, t1, .Lspace_loop_repeat + + j .Lspace_loop_end +.Lspace_loop_repeat: + addi s1, s1, 1 + j .Lspace_loop_do + +.Lspace_loop_end: + ret + +# Compares two string, which of one has a length, the other one is null-terminated. +# +# a0 - The address of the token string. +# a1 - The length of the string in a0. +# a2 - The address of the null-terminated string. +# +# If the strings match sets a0 to 0, otherwise sets it to 1. +.type _token_compare, @function +_token_compare: + addi t0, a0, 0 + addi t1, a1, 0 + addi t2, a2, 0 + +.Ltoken_compare_loop: + lbu t3, (t2) + + # Will only be 0 if the current character in the null terminated string is \0 and the remaining length of the + # another string is 0. + or t4, t3, t1 + beqz t4, .Ltoken_compare_equal + + beqz t1, .Ltoken_compare_not_equal + beqz t3, .Ltoken_compare_not_equal + + lbu t4, (t0) + bne t3, t4, .Ltoken_compare_not_equal + + addi t0, t0, 1 + addi t1, t1, -1 + addi t2, t2, 1 + j .Ltoken_compare_loop + +.Ltoken_compare_not_equal: + li a0, 1 + j .Ltoken_compare_end + +.Ltoken_compare_equal: + li a0, 0 + +.Ltoken_compare_end: + ret + +# Reads a token and returns its length in a0. +# _read_token doesn't change s1, it finds the length of the token s1 is pointing to. +.type _read_token, @function +_read_token: + # Prologue. + addi sp, sp, -16 + sw ra, 12(sp) + sw s0, 8(sp) + addi s0, sp, 16 + + lbu t0, (s1) # t0 = Current character. + sw zero, 4(sp) + + li t1, '.' + beq t0, t1, .Ltoken_character_single + + li t1, ',' + beq t0, t1, .Ltoken_character_single + + li t1, ':' + beq t0, t1, .Ltoken_character_colon + + li t1, ';' + beq t0, t1, .Ltoken_character_single + + li t1, '(' + beq t0, t1, .Ltoken_character_single + + li t1, ')' + beq t0, t1, .Ltoken_character_single + + li t1, '[' + beq t0, t1, .Ltoken_character_single + + li t1, ']' + beq t0, t1, .Ltoken_character_single + +.Ltoken_character_loop_do: # Expect an identifier or a number. + lw t6, 4(sp) + add t1, s1, t6 + lbu a0, (t1) # a0 = Current character. + + call is_alnum + + beqz a0, .Ltoken_character_end + lw t6, 4(sp) + addi t6, t6, 1 + sw t6, 4(sp) + j .Ltoken_character_loop_do + +.Ltoken_character_single: + lw t6, 4(sp) + addi t6, t6, 1 + sw t6, 4(sp) + j .Ltoken_character_end + +.Ltoken_character_colon: + lbu t0, 1(s1) # t0 = The character after the colon. + lw t6, 4(sp) + addi t6, t6, 1 + sw t6, 4(sp) + + li t1, '=' + beq t0, t1, .Ltoken_character_single + j .Ltoken_character_end + +.Ltoken_character_end: + lw a0, 4(sp) + + # Epilogue. + lw ra, 12(sp) + lw s0, 8(sp) + addi sp, sp, 16 + ret + +# Generate entry point symbol. +.type _compile_begin, @function +_compile_begin: + # Prologue. + addi sp, sp, -8 + sw ra, 4(sp) + sw s0, 0(sp) + addi s0, sp, 8 + + # Write initial assembler. + la a0, asm_begin + addi a1, zero, ASM_BEGIN_SIZE + call write_out + + addi s1, s1, 5 + + # Epilogue. + lw ra, 4(sp) + lw s0, 0(sp) + addi sp, sp, 8 + ret + +# Generate program termination code. +.type _compile_end, @function +_compile_end: + # Prologue. + addi sp, sp, -8 + sw ra, 4(sp) + sw s0, 0(sp) + addi s0, sp, 8 + + # Write closing assembler. + la a0, asm_end + addi a1, zero, ASM_END_SIZE + call write_out + + addi s1, s1, 3 + + # Epilogue. + lw ra, 4(sp) + lw s0, 0(sp) + addi sp, sp, 8 + ret + +# Ignores the import. +.type _compile_import, @function +_compile_import: + # Prologue. + addi sp, sp, -8 + sw ra, 4(sp) + sw s0, 0(sp) + addi s0, sp, 8 + + addi s1, s1, 6 + call _skip_spaces + call _read_token + add s1, s1, a0 # Skip the imported module name. + + # Epilogue. + lw ra, 4(sp) + lw s0, 0(sp) + addi sp, sp, 8 + ret + +# Compiles a procedure call. Expects s1 to point to the first argument. +# a0 - Pointer to the procedure name. +# a1 - Length of the procedure name. +# +# Returns the procedure result in a0. +.type _compile_call, @function +_compile_call: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + sw a0, 20(sp) + sw a1, 16(sp) + sw zero, 12(sp) # Argument count for a procedure call. + +.Lcompile_call_paren: + call _skip_spaces + call _read_token + addi a1, a0, 0 + addi a0, s1, 0 + la a2, token_close_paren + call _token_compare + beqz a0, .Lcompile_call_complete + +.Lcompile_call_argument: + call _build_expression + + la a0, instruction_sw + li a1, 2 + call write_out + + la a0, space + li a1, 1 + call write_out + + la a0, register_a0 + li a1, 2 + call write_out + + la a0, comma + li a1, 1 + call write_out + + lw t0, 12(sp) # Argument count for a procedure call. + + # Only 8 arguments are supported with a0-a7. + # Save all arguments on the stack so they aren't overriden afterwards. + # The offset on the stack always has two digits in this case. + li t1, -4 + mul t1, t0, t1 + addi t1, t1, 52 + li t2, 10 + div t3, t1, t2 + rem t4, t1, t2 + addi t3, t3, '0' + addi t4, t4, '0' + + sw t3, 8(sp) + sw t4, 4(sp) + + addi a0, sp, 8 + li a1, 1 + call write_out + + addi a0, sp, 4 + li a1, 1 + call write_out + + la a0, token_open_paren + li a1, 1 + call write_out + + la a0, register_sp + li a1, 2 + call write_out + + la a0, token_close_paren + li a1, 1 + call write_out + + la a0, new_line + li a1, 1 + call write_out + + call _skip_spaces + call _read_token + addi a1, a0, 0 + addi a0, s1, 0 + la a2, token_comma + call _token_compare + + /* DEBUG. Write the current token to stderr. + addi a0, zero, STDERR + addi a1, s1, 0 + li a2, 4 #(sp) + addi a7, zero, SYS_WRITE + ecall + addi a0, zero, STDERR + la a1, token_open_square + li a2, 1 + addi a7, zero, SYS_WRITE + ecall + DEBUG. End */ + + bnez a0, .Lcompile_call_paren + + lw t0, 12(sp) # Argument count for a procedure call. + addi t0, t0, 1 + sw t0, 12(sp) + + addi s1, s1, 1 # Skip the comma between the arguments. + j .Lcompile_call_argument + +.Lcompile_call_complete: + sw zero, 12(sp) + +.Lcompile_call_restore: + # Just go through all a0-a7 registers and read them from stack. + # If this stack value contains garbage, the procedure just shouldn't use it. + lw t0, 12(sp) + li t1, 7 + bgt t0, t1, .Lcompile_call_perform + + la a0, instruction_lw + li a1, 2 + call write_out + + la a0, space + li a1, 1 + call write_out + + la a0, register_a + li a1, 1 + call write_out + + lw t0, 12(sp) + addi t0, t0, '0' + sw t0, 8(sp) + + addi a0, sp, 8 + li a1, 1 + call write_out + + la a0, comma + li a1, 1 + call write_out + + lw t0, 12(sp) # Argument count for a procedure call. + + li t1, -4 + mul t1, t0, t1 + addi t1, t1, 52 + li t2, 10 + div t3, t1, t2 + rem t4, t1, t2 + addi t3, t3, '0' + addi t4, t4, '0' + + sw t3, 8(sp) + sw t4, 4(sp) + + addi a0, sp, 8 + li a1, 1 + call write_out + + addi a0, sp, 4 + li a1, 1 + call write_out + + la a0, token_open_paren + li a1, 1 + call write_out + + la a0, register_sp + li a1, 2 + call write_out + + la a0, token_close_paren + li a1, 1 + call write_out + + la a0, new_line + li a1, 1 + call write_out + + lw t0, 12(sp) # Increment. + addi t0, t0, 1 + sw t0, 12(sp) + + j .Lcompile_call_restore + +.Lcompile_call_perform: + la a0, instruction_call + li a1, 4 + call write_out + + la a0, space + li a1, 1 + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + call _skip_spaces + addi s1, s1, 1 # Skip the close paren. + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile, @function +compile: + # Prologue. + addi sp, sp, -16 + sw ra, 12(sp) + sw s0, 8(sp) + addi s0, sp, 16 + + # Write .bss section header for global variables. + la a0, asm_program + addi a1, zero, ASM_PROGRAM_SIZE + call write_out + + call _skip_spaces + addi s1, s1, 7 # Skip "program" keyword. + +.Lcharacter_loop_do: + call _skip_spaces + + lbu t0, (s1) # t0 = Current character. + beqz t0, .Lcharacter_loop_end + + call _read_token + sw a0, 4(sp) # Save the token length on the stack. + beqz a0, .Lcharacter_loop_end # No token read, there is unrecognized input. + + lw a0, 4(sp) + call _handle_token + + j .Lcharacter_loop_do +.Lcharacter_loop_end: + + # Epilogue. + lw ra, 12(sp) + lw s0, 8(sp) + addi sp, sp, 16 + ret + +# Evalutes an expression and saves the result in a0. +.type _build_expression, @function +_build_expression: + # Prologue. + addi sp, sp, -16 + sw ra, 12(sp) + sw s0, 8(sp) + addi s0, sp, 16 + + call _skip_spaces + call _read_token + sw s1, 4(sp) + sw a0, 0(sp) + + # Integer literal. + addi a0, s1, 0 + lb a0, (a0) + call is_digit + bnez a0, .Lbuild_expression_number_literal + + # Named identifier. + la a0, instruction_la + li a1, 2 + call write_out + + la a0, space + li a1, 1 + call write_out + + la a0, register_a0 + li a1, 2 + call write_out + + la a0, comma + li a1, 1 + call write_out + + lw a0, 4(sp) + lw a1, 0(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + j .Lbuild_expression_end + +.Lbuild_expression_number_literal: + la a0, instruction_li + li a1, 2 + call write_out + + la a0, space + li a1, 1 + call write_out + + la a0, register_a0 + li a1, 2 + call write_out + + la a0, comma + li a1, 1 + call write_out + + lw a0, 4(sp) + lw a1, 0(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + j .Lbuild_expression_end + +.Lbuild_expression_end: + lw a0, 0(sp) + add s1, s1, a0 + + # Epilogue. + lw ra, 12(sp) + lw s0, 8(sp) + addi sp, sp, 16 + ret + +# Compiles a statement beginning with an identifier. +.type _compile_identifier, @function +_compile_identifier: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # Save the pointer to the identifier and its length on the stack. + sw a0, 20(sp) + sw a1, 16(sp) + + add s1, s1, a1 + call _skip_spaces + call _read_token + + # Save the pointer and the length of the token following the identifier. + sw s1, 12(sp) + sw a0, 8(sp) + + add s1, s1, a0 # Skip that token. + call _skip_spaces + + lw a0, 12(sp) + lw a1, 8(sp) + la a2, token_assign + call _token_compare + beqz a0, .Lcompile_identifier_assign + + lw a0, 12(sp) + lw a1, 8(sp) + la a2, token_open_paren + call _token_compare + beqz a0, .Lcompile_identifier_call + + j .Lcompile_identifier_end + +.Lcompile_identifier_call: + lw a0, 20(sp) + lw a1, 16(sp) + call _compile_call + + j .Lcompile_identifier_end + +.Lcompile_identifier_assign: + call _build_expression + + la a0, instruction_addi + li a1, 4 + call write_out + + la a0, space + li a1, 1 + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, comma + li a1, 1 + call write_out + + la a0, register_a0 + li a1, 2 + call write_out + + la a0, comma + li a1, 1 + call write_out + + la a0, digit_zero + li a1, 1 + call write_out + + la a0, new_line + li a1, 1 + call write_out + + j .Lcompile_identifier_end + +.Lcompile_identifier_end: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_var, @function +_compile_var: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # Variable name. + addi s1, s1, 3 + call _skip_spaces + call _read_token + sw s1, 20(sp) + sw a0, 16(sp) + add s1, s1, a0 + + # Skip the colon. + call _skip_spaces + call _read_token + add s1, s1, a0 + + call _skip_spaces + call _read_token + sw a0, 12(sp) + + addi a0, s1, 0 + lw a1, 12(sp) + la a2, token_open_square + call _token_compare + beqz a0, .Lcompile_var_array + + j .Lcompile_var_end + +.Lcompile_var_array: + call _skip_spaces + add s1, s1, 1 # Skip the opening square bracket. + + call _skip_spaces + call _read_token + sw a0, 8(sp) + + la a0, asm_type + li a1, ASM_TYPE_SIZE + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, asm_object + li a1, ASM_OBJECT_SIZE + call write_out + + la a0, asm_size + li a1, ASM_SIZE_SIZE + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, comma + li a1, 1 + call write_out + + addi a0, s1, 0 + lw a1, 8(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, colon + li a1, 1 + call write_out + + la a0, asm_zero + li a1, ASM_ZERO_SIZE + call write_out + + addi a0, s1, 0 + lw a1, 8(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + la a0, asm_global + li a1, ASM_GLOBAL_SIZE + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + la a0, new_line + li a1, 1 + call write_out + + lw a0, 8(sp) + add s1, s1, a0 + + call _skip_spaces + add s1, s1, 1 # Skip the closing square bracket. + + call _skip_spaces + call _read_token + + sw a0, 12(sp) + j .Lcompile_var_end + +.Lcompile_var_end: + lw a0, 12(sp) + add s1, s1, a0 + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _handle_token, @function +_handle_token: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + sw a0, 20(sp) + + # Detect what token has been read. + addi a0, s1, 0 + lw a1, 20(sp) + la a2, token_begin + call _token_compare + beqz a0, .Lhandle_token_begin + + addi a0, s1, 0 + lw a1, 20(sp) + la a2, token_end + call _token_compare + beqz a0, .Lhandle_token_end + + addi a0, s1, 0 + lw a1, 20(sp) + la a2, token_import + call _token_compare + beqz a0, .Lhandle_token_import + + addi a0, s1, 0 + lw a1, 20(sp) + la a2, token_var + call _token_compare + beqz a0, .Lhandle_token_var + + # If the first symbol in the token is a character, assume an identifier. + addi a0, s1, 0 + lb a0, (a0) + call is_alpha + bnez a0, .Lhandle_token_identifier + + # Ignore the unknown token. + lw t0, 20(sp) + add s1, s1, t0 + j .Lhandle_token_return + +.Lhandle_token_begin: + call _compile_begin + j .Lhandle_token_return + +.Lhandle_token_end: + call _compile_end + j .Lhandle_token_return + +.Lhandle_token_import: + call _compile_import + j .Lhandle_token_return + +.Lhandle_token_var: + call _compile_var + j .Lhandle_token_return + +.Lhandle_token_identifier: + addi a0, s1, 0 + lw a1, 20(sp) + call _compile_identifier + j .Lhandle_token_return + +.Lhandle_token_return: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret _start: # Read the source from the standard input. @@ -24,6 +903,5 @@ _start: call compile # Call exit. - addi a0, zero, 0 # Use 0 return code. - addi a7, zero, SYS_EXIT - ecall + li a0, 0 # Use 0 return code. + call exit |
