From 3bd86e6e1cf9634af0f45ed526181351328b350d Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Mon, 5 May 2025 23:11:52 +0200 Subject: Add my homegrown cross compiler scripts --- boot/stage1.s | 372 ++++++++++++++++++++++++---------------------------------- 1 file changed, 152 insertions(+), 220 deletions(-) (limited to 'boot/stage1.s') diff --git a/boot/stage1.s b/boot/stage1.s index c86e872..b32815d 100644 --- a/boot/stage1.s +++ b/boot/stage1.s @@ -4,9 +4,29 @@ .global _start # Program entry point. +# # Registers used as global variables: # s1 - Contains the current position in the source text. # s2 - Label counter. +# +# - The compiler expects valid input, otherwise it will generate invalid +# assembly or hang. There is no error checking, no semantic analysis, no +# type checking. +# +# - Imports with only a module name without package, e.g. +# "import dummy", can be parsed, but are ignored. +# +# - No loops. Only labels and goto. +# +# - Only unsigned number literals are supported (in decimal or +# hexadecimal format). +# +# - Comments are accepted only at the end of a line. +# +# - Return can be used only as the last statement of a procedure. It +# doesn't actually return, but sets a0 to the appropriate value. +# +# - The lvalue of an assignment can only be an identifier. .include "boot/definitions.inc" @@ -73,9 +93,6 @@ _compile_import: addi s0, sp, 24 .Lcompile_import_loop: - call _skip_comment - call _skip_spaces - mv a0, s1 addi a1, sp, 0 call _tokenize_next @@ -106,7 +123,6 @@ _build_binary_expression: li a0, 0 call _build_expression - call _skip_spaces mv a0, s1 addi a1, sp, 12 @@ -114,26 +130,26 @@ _build_binary_expression: lw t0, 12(sp) li t1, TOKEN_AND - beq t0, t1, .L_build_binary_expression_and + beq t0, t1, .Lbuild_binary_expression_and li t1, TOKEN_OR - beq t0, t1, .L_build_binary_expression_or + beq t0, t1, .Lbuild_binary_expression_or li t1, TOKEN_PLUS - beq t0, t1, .L_build_binary_expression_plus + beq t0, t1, .Lbuild_binary_expression_plus li t1, TOKEN_EQUALS - beq t0, t1, .L_build_binary_expression_equal + beq t0, t1, .Lbuild_binary_expression_equal li t1, TOKEN_ASTERISK - beq t0, t1, .L_build_binary_expression_product + beq t0, t1, .Lbuild_binary_expression_product li t1, TOKEN_MINUS - beq t0, t1, .L_build_binary_expression_minus + beq t0, t1, .Lbuild_binary_expression_minus j .Lbuild_binary_expression_end -.L_build_binary_expression_equal: +.Lbuild_binary_expression_equal: mv s1, a0 # Skip =. li a0, 1 call _build_expression @@ -147,7 +163,7 @@ _build_binary_expression: j .Lbuild_binary_expression_end -.L_build_binary_expression_and: +.Lbuild_binary_expression_and: mv s1, a0 # Skip &. li a0, 1 call _build_expression @@ -157,7 +173,7 @@ _build_binary_expression: j .Lbuild_binary_expression_end -.L_build_binary_expression_or: +.Lbuild_binary_expression_or: mv s1, a0 # Skip or. li a0, 1 call _build_expression @@ -167,7 +183,7 @@ _build_binary_expression: j .Lbuild_binary_expression_end -.L_build_binary_expression_plus: +.Lbuild_binary_expression_plus: mv s1, a0 # Skip +. li a0, 1 call _build_expression @@ -177,7 +193,7 @@ _build_binary_expression: j .Lbuild_binary_expression_end -.L_build_binary_expression_minus: +.Lbuild_binary_expression_minus: mv s1, a0 # Skip -. li a0, 1 call _build_expression @@ -187,7 +203,7 @@ _build_binary_expression: j .Lbuild_binary_expression_end -.L_build_binary_expression_product: +.Lbuild_binary_expression_product: mv s1, a0 # Skip *. li a0, 1 call _build_expression @@ -448,7 +464,6 @@ _build_expression: call _write_out addi s1, s1, 1 # Skip @. - call _skip_spaces call _read_token sw s1, 32(sp) sw a0, 28(sp) @@ -654,7 +669,6 @@ _compile_call: sw zero, 12(sp) # Argument count for a procedure call. .Lcompile_call_paren: - call _skip_spaces lbu t0, (s1) li t1, 0x29 # ) beq t0, t1, .Lcompile_call_complete @@ -688,7 +702,6 @@ _compile_call: li a1, 5 call _write_out - call _skip_spaces lbu t0, (s1) li t1, ',' bne t0, t1, .Lcompile_call_paren @@ -726,7 +739,6 @@ _compile_call: li a0, '\n' call _put_char - call _skip_spaces addi s1, s1, 1 # Skip the close paren. # Epilogue. @@ -912,7 +924,6 @@ _compile_procedure_section: .Lcompile_procedure_section_loop: call _skip_spaces call _skip_comment - call _skip_spaces mv a0, s1 addi a1, sp, 4 @@ -945,7 +956,6 @@ _compile_module_declaration: call _write_out # Skip "program". - call _skip_comment mv a0, s1 addi a1, sp, 4 call _tokenize_next @@ -965,9 +975,6 @@ _compile_constant_section: sw s0, 16(sp) addi s0, sp, 24 - call _skip_comment - call _skip_spaces - mv a0, s1 addi a1, sp, 4 call _tokenize_next @@ -999,25 +1006,22 @@ _compile_constant_section: .type _compile_constant, @function _compile_constant: # Prologue. - addi sp, sp, -24 - sw ra, 20(sp) - sw s0, 16(sp) - addi s0, sp, 24 + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 mv a0, s1 - addi a1, sp, 4 + addi a1, sp, 12 call _tokenize_next - - sub a1, a0, s1 # The identifier end from _tokenize_next should be in a0. - mv a0, s1 - add s1, s1, a1 # Save the identifier pointer before advancing it. + addi a1, sp, 0 + call _tokenize_next # Skip the assignment sign. + mv s1, a0 + # Write identifier the identifier. + lw a0, 20(sp) + lw a1, 16(sp) call _write_out - mv a0, s1 - addi a1, sp, 4 - call _tokenize_next - mv s1, a0 # Skip the assignment sign. - # : .long li t0, 0x20676e6f # ong_ sw t0, 4(sp) @@ -1027,21 +1031,23 @@ _compile_constant: li a1, 8 call _write_out - call _skip_spaces - call _read_token + mv a0, s1 + addi a1, sp, 12 + call _tokenize_next + mv s1, a0 - mv a1, a0 # The literal length from _read_token should be in a1. - mv a0, s1 # Save the literal pointer before advancing it. - add s1, s1, a1 + lw a0, 20(sp) # Save the literal pointer before advancing it. + lw a1, 16(sp) # The literal length. call _write_out li a0, '\n' call _put_char + call _skip_spaces # Epilogue. - lw ra, 20(sp) - lw s0, 16(sp) - addi sp, sp, 24 + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 ret .type _compile_variable_section, @function @@ -1080,6 +1086,7 @@ _compile_variable_section: addi sp, sp, 24 ret +# Compile a global variable. .type _compile_variable, @function _compile_variable: # Prologue. @@ -1088,32 +1095,33 @@ _compile_variable: sw s0, 40(sp) addi s0, sp, 48 - call _read_token - # Save the identifier on the stack since it should emitted multiple times. - sw s1, 36(sp) - sw a0, 32(sp) - add s1, s1, a0 - - call _skip_spaces - addi s1, s1, 1 # Skip the colon in front of the type. - - call _skip_spaces - addi s1, s1, 1 # Skip the opening bracket. - - call _read_token + mv a0, s1 + addi a1, sp, 28 + call _tokenize_next + addi a1, sp, 4 + call _tokenize_next # Skip the colon in front of the type. + addi a1, sp, 4 + call _tokenize_next # Skip the opening bracket. + addi a1, sp, 16 + call _tokenize_next # Save the array size on the stack since it has to be emitted multiple times. + addi a1, sp, 4 + call _tokenize_next # Skip the closing bracket. + addi a1, sp, 4 + call _tokenize_next # Skip the type. + mv s1, a0 - # Save the array size on the stack since it has to be emitted multiple times. - sw s1, 28(sp) + /* DEBUG + lw a0, 24(sp) + add a0, a0, '0' sw a0, 24(sp) - add s1, s1, a0 - - call _skip_spaces - addi s1, s1, 1 # Skip the closing bracket. - - call _skip_spaces - call _read_token - add s1, s1, a0 # Skip the type. + addi a0, sp, 24 + li a1, 1 + call _write_error + lw a0, 28(sp) + li a1, 8 + call _write_error + */ # .type identifier, @object la a0, asm_type @@ -1134,15 +1142,15 @@ _compile_variable: call _write_out li t0, 0x206f7265 # ero_ - sw t0, 20(sp) + sw t0, 12(sp) li t0, 0x7a2e203a # : .z - sw t0, 16(sp) - addi a0, sp, 16 + sw t0, 8(sp) + addi a0, sp, 8 li a1, 8 call _write_out - lw a0, 28(sp) - lw a1, 24(sp) + lw a0, 24(sp) + lw a1, 20(sp) call _write_out li a0, '\n' @@ -1215,9 +1223,6 @@ _compile_procedure: # Generate the body of the procedure. .Lcompile_procedure_body: - call _skip_spaces - call _read_line - sw a0, 12(sp) li t0, 0x0a646e65 # end\n sw t0, 8(sp) mv a0, s1 @@ -1227,7 +1232,6 @@ _compile_procedure: beqz a0, .Lcompile_procedure_end - lw a0, 12(sp) call _compile_statement j .Lcompile_procedure_body @@ -1245,111 +1249,95 @@ _compile_procedure: addi sp, sp, 32 ret +# Compiles a goto statement to an uncoditional jump. .type _compile_goto, @function _compile_goto: # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 - addi s1, s1, 4 # Skip the goto keyword. + mv a0, s1 + addi a1, sp, 0 + call _tokenize_next # Skip the goto keyword. + addi a1, sp, 0 + call _tokenize_next # We should be on dot the label is beginning with. + addi a1, sp, 0 + call _tokenize_next# Save the label name. + mv s1, a0 - li t0, 0x206a # j_ - sw t0, 8(sp) - addi a0, sp, 8 - li a1, 2 + li t0, 0x2e206a # j . + sw t0, 12(sp) + addi a0, sp, 12 + li a1, 3 call _write_out - call _skip_spaces - sw s1, 8(sp) # We should be on dot the label is beginning with. - addi s1, s1, 1 - - call _read_token - add s1, s1, a0 - addi a1, a0, 1 # Label length and the dot. lw a0, 8(sp) # Saved dot position. + lw a1, 4(sp) call _write_out - addi s1, s1, 1 # Skip the new line. - li a0, '\n' call _put_char # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 ret -# a0 - Line length. +# Rewrites a label to assembly. .type _compile_label, @function _compile_label: # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 - - sw a0, 0(sp) # Save the line length. - mv a1, a0 # Argument for _write_out later. - - lw t0, 0(sp) # Line length. - mv t1, s1 # Line start. - - add t1, t1, t0 - addi t1, t1, -1 # Last character on the line. - - lbu t1, (t1) - li t2, ';' - bne t1, t2, .Lcompile_label_colon - - addi a1, a1, -1 + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 -.Lcompile_label_colon: - # Write the whole line as is. mv a0, s1 - call _write_out - - li t0, 0x3a # : - sw t0, 4(sp) - addi a0, sp, 4 - li a1, 1 - call _write_out + addi a1, sp, 8 + call _tokenize_next # Dot starting the label. + addi a1, sp, 8 + call _tokenize_next + mv s1, a0 - li t0, '\n' - sw t0, 4(sp) - addi a0, sp, 4 - li a1, 1 + li a0, '.' + call _put_char + lw a0, 16(sp) + lw a1, 12(sp) call _write_out - - lw a0, 0(sp) - addi a0, a0, 1 # Skip the new line as well. - add s1, s1, a0 # Skip the line. + li a0, ':' + call _put_char + li a0, '\n' + call _put_char # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 ret +# Just skips the return keyword and evaluates the return expression. .type _compile_return, @function _compile_return: # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 - addi s1, s1, 6 # Skip return. - call _skip_spaces + mv a0, s1 + addi a1, sp, 12 + call _tokenize_next + mv s1, a0 # Skip return. call _build_binary_expression # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 ret .type _compile_if, @function @@ -1403,8 +1391,6 @@ _compile_if: li t1, TOKEN_END beq t0, t1, .Lcompile_if_end - call _read_line - li a1, 1 call _compile_statement j .Lcompile_if_loop @@ -1422,7 +1408,7 @@ _compile_if: call _printi # Finalize the label. - li t0, 0x0a3a # :\n:\n + li t0, 0x0a3a # :\n sh t0, 16(sp) addi a0, sp, 16 li a1, 2 @@ -1436,11 +1422,7 @@ _compile_if: addi sp, sp, 32 ret -# Parameters: -# a0 - Line length. -# -# Returns 1 in a0 if the parsed line contained a text section element such a -# procedure or the program entry point. Otherwise sets a0 to 0. +# Checks for the type of the current statement and compiles it. .type _compile_statement, @function _compile_statement: # Prologue. @@ -1449,42 +1431,17 @@ _compile_statement: sw s0, 24(sp) addi s0, sp, 32 - # Preserve passed arguments. - sw a0, 20(sp) - - mv a0, s1 - lw a1, 20(sp) - call _is_local_identifier - bnez a0, .Lcompile_statement_identifier - - mv a0, s1 - li a1, 2 - call _is_register_identifier - bnez a0, .Lcompile_statement_identifier - - li t0, 0x6f746f67 # goto - sw t0, 12(sp) - mv a0, s1 - addi a1, sp, 12 - li a2, 4 - call _memcmp - beqz a0, .Lcompile_statement_goto - - call _skip_comment - /* DEBUG - mv a0, s1 - li a1, 4 - call _write_error - mv a0, s1 - li a1, 4 - call _write_error - */ - mv a0, s1 addi a1, sp, 0 call _tokenize_next lw t0, 0(sp) + li t1, TOKEN_IDENTIFIER + beq t0, t1, .Lcompile_statement_identifier + + li t1, TOKEN_GOTO + beq t0, t1, .Lcompile_statement_goto + li t1, TOKEN_RETURN beq t0, t1, .Lcompile_statement_return @@ -1494,10 +1451,6 @@ _compile_statement: li t1, TOKEN_DOT beq t0, t1, .Lcompile_statement_label - lbu t0, (s1) - li t1, '_' - beq t0, t1, .Lcompile_statement_identifier - j .Lcompile_statement_empty # Else. .Lcompile_statement_if: @@ -1505,7 +1458,6 @@ _compile_statement: j .Lcompile_statement_end .Lcompile_statement_label: - lw a0, 20(sp) call _compile_label j .Lcompile_statement_end @@ -1527,7 +1479,6 @@ _compile_statement: .Lcompile_statement_end: sw a0, 12(sp) - call _skip_spaces call _skip_comment lw a0, 12(sp) @@ -1559,10 +1510,10 @@ _compile_text_section: .type _compile_entry_point, @function _compile_entry_point: # Prologue. - addi sp, sp, -8 - sw ra, 4(sp) - sw s0, 0(sp) - addi s0, sp, 8 + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 # .type _start, @function la a0, asm_start @@ -1581,7 +1532,6 @@ _compile_entry_point: li t1, TOKEN_END beq t0, t1, .Lcompile_entry_point_end - lw a0, 12(sp) call _compile_statement j .Lcompile_entry_point_body @@ -1593,27 +1543,9 @@ _compile_entry_point: call _write_out # Epilogue. - lw ra, 4(sp) - lw s0, 0(sp) - addi sp, sp, 8 - ret - -# Finds the end of the line and returns its length in a0. -.type _read_line, @function -_read_line: - mv t0, s1 # Local position in the source text. - -.Lread_line_do: - lbu t1, (t0) # t1 = Current character. - beqz t1, .Lread_line_end # Exit the loop on the NUL character. - li t2, '\n' - beq t1, t2, .Lread_line_end # Exit the loop on the new line. - - addi t0, t0, 1 - j .Lread_line_do - -.Lread_line_end: - sub a0, t0, s1 # Return the line length. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 ret .type _compile, @function -- cgit v1.2.3