summaryrefslogtreecommitdiff
path: root/boot/asm-boot.s
diff options
context:
space:
mode:
authorEugen Wissner <belka@caraus.de>2025-04-21 22:56:50 +0200
committerEugen Wissner <belka@caraus.de>2025-04-21 22:56:50 +0200
commit536443b020d01d0d5372496529086a11b2486621 (patch)
tree78632939b6e12ec6281f99e394bd56d236c4b965 /boot/asm-boot.s
parent148da8ed91f17c6fb367f52c927629b0f4cacb5e (diff)
downloadelna-536443b020d01d0d5372496529086a11b2486621.tar.gz
Add stages and a rakefile
Diffstat (limited to 'boot/asm-boot.s')
-rw-r--r--boot/asm-boot.s894
1 files changed, 886 insertions, 8 deletions
diff --git a/boot/asm-boot.s b/boot/asm-boot.s
index 594c624..80167a2 100644
--- a/boot/asm-boot.s
+++ b/boot/asm-boot.s
@@ -1,18 +1,897 @@
# s1 - Contains the current position in the source text.
-.data
+.global _start # Program entry point.
+
+.section .rodata
-.equ SYS_EXIT, 93
.equ SOURCE_BUFFER_SIZE, 2048
-.bss
+asm_begin: .ascii ".text\n.global _start\n_start:\naddi sp, sp, -64\nsw ra, 60(sp)\nsw s0, 56(sp)\naddi s0, sp, 64\n"
+.equ ASM_BEGIN_SIZE, . - asm_begin
+asm_end: .ascii "addi a0, zero, 0\naddi a7, zero, 93\necall\nlw ra, 60(sp)\nlw s0, 56(sp)\naddi sp, sp, 64\nret\n"
+.equ ASM_END_SIZE, . - asm_end
+asm_program: .ascii ".bss\n"
+.equ ASM_PROGRAM_SIZE, . - asm_program
+asm_type: .ascii ".type "
+.equ ASM_TYPE_SIZE, . - asm_type
+asm_object: .ascii ", @object\n"
+.equ ASM_OBJECT_SIZE, . - asm_object
+asm_size: .ascii ".size "
+.equ ASM_SIZE_SIZE, . - asm_size
+asm_zero: .ascii ".zero "
+.equ ASM_ZERO_SIZE, . - asm_zero
+asm_global: .ascii ".global "
+.equ ASM_GLOBAL_SIZE, . - asm_global
+
+token_begin: .string "begin"
+token_end: .string "end"
+token_import: .string "import"
+token_open_paren: .string "("
+token_close_paren: .string ")"
+token_open_square: .string "["
+token_assign: .string ":="
+token_var: .string "var"
+token_comma: .string ","
+
+space: .ascii " "
+comma: .ascii ","
+new_line: .ascii "\n"
+colon: .ascii ":"
+digit_zero: .ascii "0"
+
+instruction_la: .ascii "la"
+instruction_call: .ascii "call"
+instruction_addi: .ascii "addi"
+instruction_li: .ascii "li"
+instruction_sw: .ascii "sw"
+instruction_lw: .ascii "lw"
+
+register_a0: .ascii "a0"
+register_sp: .ascii "sp"
+register_a: .ascii "a"
+
+.section .bss
.global source_code
.type source_code, @object
.size source_code, SOURCE_BUFFER_SIZE
source_code: .zero SOURCE_BUFFER_SIZE
-.text
-.global _start # Program entry point.
+.section .text
+
+.type _skip_spaces, @function
+_skip_spaces:
+.Lspace_loop_do:
+ lbu t0, (s1) # t0 = Current character.
+
+ li t1, ' '
+ beq t0, t1, .Lspace_loop_repeat
+ li t1, '\t'
+ beq t0, t1, .Lspace_loop_repeat
+ li t1, '\n'
+ beq t0, t1, .Lspace_loop_repeat
+ li t1, '\r'
+ beq t0, t1, .Lspace_loop_repeat
+
+ j .Lspace_loop_end
+.Lspace_loop_repeat:
+ addi s1, s1, 1
+ j .Lspace_loop_do
+
+.Lspace_loop_end:
+ ret
+
+# Compares two string, which of one has a length, the other one is null-terminated.
+#
+# a0 - The address of the token string.
+# a1 - The length of the string in a0.
+# a2 - The address of the null-terminated string.
+#
+# If the strings match sets a0 to 0, otherwise sets it to 1.
+.type _token_compare, @function
+_token_compare:
+ addi t0, a0, 0
+ addi t1, a1, 0
+ addi t2, a2, 0
+
+.Ltoken_compare_loop:
+ lbu t3, (t2)
+
+ # Will only be 0 if the current character in the null terminated string is \0 and the remaining length of the
+ # another string is 0.
+ or t4, t3, t1
+ beqz t4, .Ltoken_compare_equal
+
+ beqz t1, .Ltoken_compare_not_equal
+ beqz t3, .Ltoken_compare_not_equal
+
+ lbu t4, (t0)
+ bne t3, t4, .Ltoken_compare_not_equal
+
+ addi t0, t0, 1
+ addi t1, t1, -1
+ addi t2, t2, 1
+ j .Ltoken_compare_loop
+
+.Ltoken_compare_not_equal:
+ li a0, 1
+ j .Ltoken_compare_end
+
+.Ltoken_compare_equal:
+ li a0, 0
+
+.Ltoken_compare_end:
+ ret
+
+# Reads a token and returns its length in a0.
+# _read_token doesn't change s1, it finds the length of the token s1 is pointing to.
+.type _read_token, @function
+_read_token:
+ # Prologue.
+ addi sp, sp, -16
+ sw ra, 12(sp)
+ sw s0, 8(sp)
+ addi s0, sp, 16
+
+ lbu t0, (s1) # t0 = Current character.
+ sw zero, 4(sp)
+
+ li t1, '.'
+ beq t0, t1, .Ltoken_character_single
+
+ li t1, ','
+ beq t0, t1, .Ltoken_character_single
+
+ li t1, ':'
+ beq t0, t1, .Ltoken_character_colon
+
+ li t1, ';'
+ beq t0, t1, .Ltoken_character_single
+
+ li t1, '('
+ beq t0, t1, .Ltoken_character_single
+
+ li t1, ')'
+ beq t0, t1, .Ltoken_character_single
+
+ li t1, '['
+ beq t0, t1, .Ltoken_character_single
+
+ li t1, ']'
+ beq t0, t1, .Ltoken_character_single
+
+.Ltoken_character_loop_do: # Expect an identifier or a number.
+ lw t6, 4(sp)
+ add t1, s1, t6
+ lbu a0, (t1) # a0 = Current character.
+
+ call is_alnum
+
+ beqz a0, .Ltoken_character_end
+ lw t6, 4(sp)
+ addi t6, t6, 1
+ sw t6, 4(sp)
+ j .Ltoken_character_loop_do
+
+.Ltoken_character_single:
+ lw t6, 4(sp)
+ addi t6, t6, 1
+ sw t6, 4(sp)
+ j .Ltoken_character_end
+
+.Ltoken_character_colon:
+ lbu t0, 1(s1) # t0 = The character after the colon.
+ lw t6, 4(sp)
+ addi t6, t6, 1
+ sw t6, 4(sp)
+
+ li t1, '='
+ beq t0, t1, .Ltoken_character_single
+ j .Ltoken_character_end
+
+.Ltoken_character_end:
+ lw a0, 4(sp)
+
+ # Epilogue.
+ lw ra, 12(sp)
+ lw s0, 8(sp)
+ addi sp, sp, 16
+ ret
+
+# Generate entry point symbol.
+.type _compile_begin, @function
+_compile_begin:
+ # Prologue.
+ addi sp, sp, -8
+ sw ra, 4(sp)
+ sw s0, 0(sp)
+ addi s0, sp, 8
+
+ # Write initial assembler.
+ la a0, asm_begin
+ addi a1, zero, ASM_BEGIN_SIZE
+ call write_out
+
+ addi s1, s1, 5
+
+ # Epilogue.
+ lw ra, 4(sp)
+ lw s0, 0(sp)
+ addi sp, sp, 8
+ ret
+
+# Generate program termination code.
+.type _compile_end, @function
+_compile_end:
+ # Prologue.
+ addi sp, sp, -8
+ sw ra, 4(sp)
+ sw s0, 0(sp)
+ addi s0, sp, 8
+
+ # Write closing assembler.
+ la a0, asm_end
+ addi a1, zero, ASM_END_SIZE
+ call write_out
+
+ addi s1, s1, 3
+
+ # Epilogue.
+ lw ra, 4(sp)
+ lw s0, 0(sp)
+ addi sp, sp, 8
+ ret
+
+# Ignores the import.
+.type _compile_import, @function
+_compile_import:
+ # Prologue.
+ addi sp, sp, -8
+ sw ra, 4(sp)
+ sw s0, 0(sp)
+ addi s0, sp, 8
+
+ addi s1, s1, 6
+ call _skip_spaces
+ call _read_token
+ add s1, s1, a0 # Skip the imported module name.
+
+ # Epilogue.
+ lw ra, 4(sp)
+ lw s0, 0(sp)
+ addi sp, sp, 8
+ ret
+
+# Compiles a procedure call. Expects s1 to point to the first argument.
+# a0 - Pointer to the procedure name.
+# a1 - Length of the procedure name.
+#
+# Returns the procedure result in a0.
+.type _compile_call, @function
+_compile_call:
+ # Prologue.
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
+
+ sw a0, 20(sp)
+ sw a1, 16(sp)
+ sw zero, 12(sp) # Argument count for a procedure call.
+
+.Lcompile_call_paren:
+ call _skip_spaces
+ call _read_token
+ addi a1, a0, 0
+ addi a0, s1, 0
+ la a2, token_close_paren
+ call _token_compare
+ beqz a0, .Lcompile_call_complete
+
+.Lcompile_call_argument:
+ call _build_expression
+
+ la a0, instruction_sw
+ li a1, 2
+ call write_out
+
+ la a0, space
+ li a1, 1
+ call write_out
+
+ la a0, register_a0
+ li a1, 2
+ call write_out
+
+ la a0, comma
+ li a1, 1
+ call write_out
+
+ lw t0, 12(sp) # Argument count for a procedure call.
+
+ # Only 8 arguments are supported with a0-a7.
+ # Save all arguments on the stack so they aren't overriden afterwards.
+ # The offset on the stack always has two digits in this case.
+ li t1, -4
+ mul t1, t0, t1
+ addi t1, t1, 52
+ li t2, 10
+ div t3, t1, t2
+ rem t4, t1, t2
+ addi t3, t3, '0'
+ addi t4, t4, '0'
+
+ sw t3, 8(sp)
+ sw t4, 4(sp)
+
+ addi a0, sp, 8
+ li a1, 1
+ call write_out
+
+ addi a0, sp, 4
+ li a1, 1
+ call write_out
+
+ la a0, token_open_paren
+ li a1, 1
+ call write_out
+
+ la a0, register_sp
+ li a1, 2
+ call write_out
+
+ la a0, token_close_paren
+ li a1, 1
+ call write_out
+
+ la a0, new_line
+ li a1, 1
+ call write_out
+
+ call _skip_spaces
+ call _read_token
+ addi a1, a0, 0
+ addi a0, s1, 0
+ la a2, token_comma
+ call _token_compare
+
+ /* DEBUG. Write the current token to stderr.
+ addi a0, zero, STDERR
+ addi a1, s1, 0
+ li a2, 4 #(sp)
+ addi a7, zero, SYS_WRITE
+ ecall
+ addi a0, zero, STDERR
+ la a1, token_open_square
+ li a2, 1
+ addi a7, zero, SYS_WRITE
+ ecall
+ DEBUG. End */
+
+ bnez a0, .Lcompile_call_paren
+
+ lw t0, 12(sp) # Argument count for a procedure call.
+ addi t0, t0, 1
+ sw t0, 12(sp)
+
+ addi s1, s1, 1 # Skip the comma between the arguments.
+ j .Lcompile_call_argument
+
+.Lcompile_call_complete:
+ sw zero, 12(sp)
+
+.Lcompile_call_restore:
+ # Just go through all a0-a7 registers and read them from stack.
+ # If this stack value contains garbage, the procedure just shouldn't use it.
+ lw t0, 12(sp)
+ li t1, 7
+ bgt t0, t1, .Lcompile_call_perform
+
+ la a0, instruction_lw
+ li a1, 2
+ call write_out
+
+ la a0, space
+ li a1, 1
+ call write_out
+
+ la a0, register_a
+ li a1, 1
+ call write_out
+
+ lw t0, 12(sp)
+ addi t0, t0, '0'
+ sw t0, 8(sp)
+
+ addi a0, sp, 8
+ li a1, 1
+ call write_out
+
+ la a0, comma
+ li a1, 1
+ call write_out
+
+ lw t0, 12(sp) # Argument count for a procedure call.
+
+ li t1, -4
+ mul t1, t0, t1
+ addi t1, t1, 52
+ li t2, 10
+ div t3, t1, t2
+ rem t4, t1, t2
+ addi t3, t3, '0'
+ addi t4, t4, '0'
+
+ sw t3, 8(sp)
+ sw t4, 4(sp)
+
+ addi a0, sp, 8
+ li a1, 1
+ call write_out
+
+ addi a0, sp, 4
+ li a1, 1
+ call write_out
+
+ la a0, token_open_paren
+ li a1, 1
+ call write_out
+
+ la a0, register_sp
+ li a1, 2
+ call write_out
+
+ la a0, token_close_paren
+ li a1, 1
+ call write_out
+
+ la a0, new_line
+ li a1, 1
+ call write_out
+
+ lw t0, 12(sp) # Increment.
+ addi t0, t0, 1
+ sw t0, 12(sp)
+
+ j .Lcompile_call_restore
+
+.Lcompile_call_perform:
+ la a0, instruction_call
+ li a1, 4
+ call write_out
+
+ la a0, space
+ li a1, 1
+ call write_out
+
+ lw a0, 20(sp)
+ lw a1, 16(sp)
+ call write_out
+
+ la a0, new_line
+ li a1, 1
+ call write_out
+
+ call _skip_spaces
+ addi s1, s1, 1 # Skip the close paren.
+
+ # Epilogue.
+ lw ra, 28(sp)
+ lw s0, 24(sp)
+ addi sp, sp, 32
+ ret
+
+.type _compile, @function
+compile:
+ # Prologue.
+ addi sp, sp, -16
+ sw ra, 12(sp)
+ sw s0, 8(sp)
+ addi s0, sp, 16
+
+ # Write .bss section header for global variables.
+ la a0, asm_program
+ addi a1, zero, ASM_PROGRAM_SIZE
+ call write_out
+
+ call _skip_spaces
+ addi s1, s1, 7 # Skip "program" keyword.
+
+.Lcharacter_loop_do:
+ call _skip_spaces
+
+ lbu t0, (s1) # t0 = Current character.
+ beqz t0, .Lcharacter_loop_end
+
+ call _read_token
+ sw a0, 4(sp) # Save the token length on the stack.
+ beqz a0, .Lcharacter_loop_end # No token read, there is unrecognized input.
+
+ lw a0, 4(sp)
+ call _handle_token
+
+ j .Lcharacter_loop_do
+.Lcharacter_loop_end:
+
+ # Epilogue.
+ lw ra, 12(sp)
+ lw s0, 8(sp)
+ addi sp, sp, 16
+ ret
+
+# Evalutes an expression and saves the result in a0.
+.type _build_expression, @function
+_build_expression:
+ # Prologue.
+ addi sp, sp, -16
+ sw ra, 12(sp)
+ sw s0, 8(sp)
+ addi s0, sp, 16
+
+ call _skip_spaces
+ call _read_token
+ sw s1, 4(sp)
+ sw a0, 0(sp)
+
+ # Integer literal.
+ addi a0, s1, 0
+ lb a0, (a0)
+ call is_digit
+ bnez a0, .Lbuild_expression_number_literal
+
+ # Named identifier.
+ la a0, instruction_la
+ li a1, 2
+ call write_out
+
+ la a0, space
+ li a1, 1
+ call write_out
+
+ la a0, register_a0
+ li a1, 2
+ call write_out
+
+ la a0, comma
+ li a1, 1
+ call write_out
+
+ lw a0, 4(sp)
+ lw a1, 0(sp)
+ call write_out
+
+ la a0, new_line
+ li a1, 1
+ call write_out
+
+ j .Lbuild_expression_end
+
+.Lbuild_expression_number_literal:
+ la a0, instruction_li
+ li a1, 2
+ call write_out
+
+ la a0, space
+ li a1, 1
+ call write_out
+
+ la a0, register_a0
+ li a1, 2
+ call write_out
+
+ la a0, comma
+ li a1, 1
+ call write_out
+
+ lw a0, 4(sp)
+ lw a1, 0(sp)
+ call write_out
+
+ la a0, new_line
+ li a1, 1
+ call write_out
+
+ j .Lbuild_expression_end
+
+.Lbuild_expression_end:
+ lw a0, 0(sp)
+ add s1, s1, a0
+
+ # Epilogue.
+ lw ra, 12(sp)
+ lw s0, 8(sp)
+ addi sp, sp, 16
+ ret
+
+# Compiles a statement beginning with an identifier.
+.type _compile_identifier, @function
+_compile_identifier:
+ # Prologue.
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
+
+ # Save the pointer to the identifier and its length on the stack.
+ sw a0, 20(sp)
+ sw a1, 16(sp)
+
+ add s1, s1, a1
+ call _skip_spaces
+ call _read_token
+
+ # Save the pointer and the length of the token following the identifier.
+ sw s1, 12(sp)
+ sw a0, 8(sp)
+
+ add s1, s1, a0 # Skip that token.
+ call _skip_spaces
+
+ lw a0, 12(sp)
+ lw a1, 8(sp)
+ la a2, token_assign
+ call _token_compare
+ beqz a0, .Lcompile_identifier_assign
+
+ lw a0, 12(sp)
+ lw a1, 8(sp)
+ la a2, token_open_paren
+ call _token_compare
+ beqz a0, .Lcompile_identifier_call
+
+ j .Lcompile_identifier_end
+
+.Lcompile_identifier_call:
+ lw a0, 20(sp)
+ lw a1, 16(sp)
+ call _compile_call
+
+ j .Lcompile_identifier_end
+
+.Lcompile_identifier_assign:
+ call _build_expression
+
+ la a0, instruction_addi
+ li a1, 4
+ call write_out
+
+ la a0, space
+ li a1, 1
+ call write_out
+
+ lw a0, 20(sp)
+ lw a1, 16(sp)
+ call write_out
+
+ la a0, comma
+ li a1, 1
+ call write_out
+
+ la a0, register_a0
+ li a1, 2
+ call write_out
+
+ la a0, comma
+ li a1, 1
+ call write_out
+
+ la a0, digit_zero
+ li a1, 1
+ call write_out
+
+ la a0, new_line
+ li a1, 1
+ call write_out
+
+ j .Lcompile_identifier_end
+
+.Lcompile_identifier_end:
+ # Epilogue.
+ lw ra, 28(sp)
+ lw s0, 24(sp)
+ addi sp, sp, 32
+ ret
+
+.type _compile_var, @function
+_compile_var:
+ # Prologue.
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
+
+ # Variable name.
+ addi s1, s1, 3
+ call _skip_spaces
+ call _read_token
+ sw s1, 20(sp)
+ sw a0, 16(sp)
+ add s1, s1, a0
+
+ # Skip the colon.
+ call _skip_spaces
+ call _read_token
+ add s1, s1, a0
+
+ call _skip_spaces
+ call _read_token
+ sw a0, 12(sp)
+
+ addi a0, s1, 0
+ lw a1, 12(sp)
+ la a2, token_open_square
+ call _token_compare
+ beqz a0, .Lcompile_var_array
+
+ j .Lcompile_var_end
+
+.Lcompile_var_array:
+ call _skip_spaces
+ add s1, s1, 1 # Skip the opening square bracket.
+
+ call _skip_spaces
+ call _read_token
+ sw a0, 8(sp)
+
+ la a0, asm_type
+ li a1, ASM_TYPE_SIZE
+ call write_out
+
+ lw a0, 20(sp)
+ lw a1, 16(sp)
+ call write_out
+
+ la a0, asm_object
+ li a1, ASM_OBJECT_SIZE
+ call write_out
+
+ la a0, asm_size
+ li a1, ASM_SIZE_SIZE
+ call write_out
+
+ lw a0, 20(sp)
+ lw a1, 16(sp)
+ call write_out
+
+ la a0, comma
+ li a1, 1
+ call write_out
+
+ addi a0, s1, 0
+ lw a1, 8(sp)
+ call write_out
+
+ la a0, new_line
+ li a1, 1
+ call write_out
+
+ lw a0, 20(sp)
+ lw a1, 16(sp)
+ call write_out
+
+ la a0, colon
+ li a1, 1
+ call write_out
+
+ la a0, asm_zero
+ li a1, ASM_ZERO_SIZE
+ call write_out
+
+ addi a0, s1, 0
+ lw a1, 8(sp)
+ call write_out
+
+ la a0, new_line
+ li a1, 1
+ call write_out
+
+ la a0, asm_global
+ li a1, ASM_GLOBAL_SIZE
+ call write_out
+
+ lw a0, 20(sp)
+ lw a1, 16(sp)
+ call write_out
+
+ la a0, new_line
+ li a1, 1
+ call write_out
+
+ lw a0, 8(sp)
+ add s1, s1, a0
+
+ call _skip_spaces
+ add s1, s1, 1 # Skip the closing square bracket.
+
+ call _skip_spaces
+ call _read_token
+
+ sw a0, 12(sp)
+ j .Lcompile_var_end
+
+.Lcompile_var_end:
+ lw a0, 12(sp)
+ add s1, s1, a0
+
+ # Epilogue.
+ lw ra, 28(sp)
+ lw s0, 24(sp)
+ addi sp, sp, 32
+ ret
+
+.type _handle_token, @function
+_handle_token:
+ # Prologue.
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
+
+ sw a0, 20(sp)
+
+ # Detect what token has been read.
+ addi a0, s1, 0
+ lw a1, 20(sp)
+ la a2, token_begin
+ call _token_compare
+ beqz a0, .Lhandle_token_begin
+
+ addi a0, s1, 0
+ lw a1, 20(sp)
+ la a2, token_end
+ call _token_compare
+ beqz a0, .Lhandle_token_end
+
+ addi a0, s1, 0
+ lw a1, 20(sp)
+ la a2, token_import
+ call _token_compare
+ beqz a0, .Lhandle_token_import
+
+ addi a0, s1, 0
+ lw a1, 20(sp)
+ la a2, token_var
+ call _token_compare
+ beqz a0, .Lhandle_token_var
+
+ # If the first symbol in the token is a character, assume an identifier.
+ addi a0, s1, 0
+ lb a0, (a0)
+ call is_alpha
+ bnez a0, .Lhandle_token_identifier
+
+ # Ignore the unknown token.
+ lw t0, 20(sp)
+ add s1, s1, t0
+ j .Lhandle_token_return
+
+.Lhandle_token_begin:
+ call _compile_begin
+ j .Lhandle_token_return
+
+.Lhandle_token_end:
+ call _compile_end
+ j .Lhandle_token_return
+
+.Lhandle_token_import:
+ call _compile_import
+ j .Lhandle_token_return
+
+.Lhandle_token_var:
+ call _compile_var
+ j .Lhandle_token_return
+
+.Lhandle_token_identifier:
+ addi a0, s1, 0
+ lw a1, 20(sp)
+ call _compile_identifier
+ j .Lhandle_token_return
+
+.Lhandle_token_return:
+ # Epilogue.
+ lw ra, 28(sp)
+ lw s0, 24(sp)
+ addi sp, sp, 32
+ ret
_start:
# Read the source from the standard input.
@@ -24,6 +903,5 @@ _start:
call compile
# Call exit.
- addi a0, zero, 0 # Use 0 return code.
- addi a7, zero, SYS_EXIT
- ecall
+ li a0, 0 # Use 0 return code.
+ call exit