Add my homegrown cross compiler scripts

This commit is contained in:
2025-05-05 23:11:52 +02:00
parent df1c0486c5
commit 3bd86e6e1c
8 changed files with 1042 additions and 640 deletions

View File

@ -34,20 +34,21 @@
.equ TOKEN_IDENTIFIER, 27
# The constant should match the character index in the byte_keywords string.
.equ TOKEN_AND, 28
.equ TOKEN_DOT, 29
.equ TOKEN_COMMA, 30
.equ TOKEN_COLON, 31
.equ TOKEN_SEMICOLON, 32
.equ TOKEN_LEFT_PAREN, 33
.equ TOKEN_RIGHT_PAREN, 34
.equ TOKEN_LEFT_BRACKET, 35
.equ TOKEN_RIGHT_BRACKET, 36
.equ TOKEN_HAT, 37
.equ TOKEN_EQUALS, 38
.equ TOKEN_PLUS, 39
.equ TOKEN_MINUS, 40
.equ TOKEN_ASTERISK, 41
.equ TOKEN_AT, 42
.equ TOKEN_AND, TOKEN_IDENTIFIER + 1
.equ TOKEN_DOT, TOKEN_IDENTIFIER + 2
.equ TOKEN_COMMA, TOKEN_IDENTIFIER + 3
.equ TOKEN_COLON, TOKEN_IDENTIFIER + 4
.equ TOKEN_SEMICOLON, TOKEN_IDENTIFIER + 5
.equ TOKEN_LEFT_PAREN, TOKEN_IDENTIFIER + 6
.equ TOKEN_RIGHT_PAREN, TOKEN_IDENTIFIER + 7
.equ TOKEN_LEFT_BRACKET, TOKEN_IDENTIFIER + 8
.equ TOKEN_RIGHT_BRACKET, TOKEN_IDENTIFIER + 9
.equ TOKEN_HAT, TOKEN_IDENTIFIER + 10
.equ TOKEN_EQUALS, TOKEN_IDENTIFIER + 11
.equ TOKEN_PLUS, TOKEN_IDENTIFIER + 12
.equ TOKEN_MINUS, TOKEN_IDENTIFIER + 13
.equ TOKEN_ASTERISK, TOKEN_IDENTIFIER + 14
.equ TOKEN_AT, TOKEN_IDENTIFIER + 15
.equ TOKEN_ASSIGN, 43
.equ TOKEN_INTEGER, 44

View File

@ -4,9 +4,29 @@
.global _start # Program entry point.
#
# Registers used as global variables:
# s1 - Contains the current position in the source text.
# s2 - Label counter.
#
# - The compiler expects valid input, otherwise it will generate invalid
# assembly or hang. There is no error checking, no semantic analysis, no
# type checking.
#
# - Imports with only a module name without package, e.g.
# "import dummy", can be parsed, but are ignored.
#
# - No loops. Only labels and goto.
#
# - Only unsigned number literals are supported (in decimal or
# hexadecimal format).
#
# - Comments are accepted only at the end of a line.
#
# - Return can be used only as the last statement of a procedure. It
# doesn't actually return, but sets a0 to the appropriate value.
#
# - The lvalue of an assignment can only be an identifier.
.include "boot/definitions.inc"
@ -73,9 +93,6 @@ _compile_import:
addi s0, sp, 24
.Lcompile_import_loop:
call _skip_comment
call _skip_spaces
mv a0, s1
addi a1, sp, 0
call _tokenize_next
@ -106,7 +123,6 @@ _build_binary_expression:
li a0, 0
call _build_expression
call _skip_spaces
mv a0, s1
addi a1, sp, 12
@ -114,26 +130,26 @@ _build_binary_expression:
lw t0, 12(sp)
li t1, TOKEN_AND
beq t0, t1, .L_build_binary_expression_and
beq t0, t1, .Lbuild_binary_expression_and
li t1, TOKEN_OR
beq t0, t1, .L_build_binary_expression_or
beq t0, t1, .Lbuild_binary_expression_or
li t1, TOKEN_PLUS
beq t0, t1, .L_build_binary_expression_plus
beq t0, t1, .Lbuild_binary_expression_plus
li t1, TOKEN_EQUALS
beq t0, t1, .L_build_binary_expression_equal
beq t0, t1, .Lbuild_binary_expression_equal
li t1, TOKEN_ASTERISK
beq t0, t1, .L_build_binary_expression_product
beq t0, t1, .Lbuild_binary_expression_product
li t1, TOKEN_MINUS
beq t0, t1, .L_build_binary_expression_minus
beq t0, t1, .Lbuild_binary_expression_minus
j .Lbuild_binary_expression_end
.L_build_binary_expression_equal:
.Lbuild_binary_expression_equal:
mv s1, a0 # Skip =.
li a0, 1
call _build_expression
@ -147,7 +163,7 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_and:
.Lbuild_binary_expression_and:
mv s1, a0 # Skip &.
li a0, 1
call _build_expression
@ -157,7 +173,7 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_or:
.Lbuild_binary_expression_or:
mv s1, a0 # Skip or.
li a0, 1
call _build_expression
@ -167,7 +183,7 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_plus:
.Lbuild_binary_expression_plus:
mv s1, a0 # Skip +.
li a0, 1
call _build_expression
@ -177,7 +193,7 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_minus:
.Lbuild_binary_expression_minus:
mv s1, a0 # Skip -.
li a0, 1
call _build_expression
@ -187,7 +203,7 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_product:
.Lbuild_binary_expression_product:
mv s1, a0 # Skip *.
li a0, 1
call _build_expression
@ -448,7 +464,6 @@ _build_expression:
call _write_out
addi s1, s1, 1 # Skip @.
call _skip_spaces
call _read_token
sw s1, 32(sp)
sw a0, 28(sp)
@ -654,7 +669,6 @@ _compile_call:
sw zero, 12(sp) # Argument count for a procedure call.
.Lcompile_call_paren:
call _skip_spaces
lbu t0, (s1)
li t1, 0x29 # )
beq t0, t1, .Lcompile_call_complete
@ -688,7 +702,6 @@ _compile_call:
li a1, 5
call _write_out
call _skip_spaces
lbu t0, (s1)
li t1, ','
bne t0, t1, .Lcompile_call_paren
@ -726,7 +739,6 @@ _compile_call:
li a0, '\n'
call _put_char
call _skip_spaces
addi s1, s1, 1 # Skip the close paren.
# Epilogue.
@ -912,7 +924,6 @@ _compile_procedure_section:
.Lcompile_procedure_section_loop:
call _skip_spaces
call _skip_comment
call _skip_spaces
mv a0, s1
addi a1, sp, 4
@ -945,7 +956,6 @@ _compile_module_declaration:
call _write_out
# Skip "program".
call _skip_comment
mv a0, s1
addi a1, sp, 4
call _tokenize_next
@ -965,9 +975,6 @@ _compile_constant_section:
sw s0, 16(sp)
addi s0, sp, 24
call _skip_comment
call _skip_spaces
mv a0, s1
addi a1, sp, 4
call _tokenize_next
@ -999,25 +1006,22 @@ _compile_constant_section:
.type _compile_constant, @function
_compile_constant:
# Prologue.
addi sp, sp, -24
sw ra, 20(sp)
sw s0, 16(sp)
addi s0, sp, 24
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
mv a0, s1
addi a1, sp, 4
addi a1, sp, 12
call _tokenize_next
sub a1, a0, s1 # The identifier end from _tokenize_next should be in a0.
mv a0, s1
add s1, s1, a1 # Save the identifier pointer before advancing it.
addi a1, sp, 0
call _tokenize_next # Skip the assignment sign.
mv s1, a0
# Write identifier the identifier.
lw a0, 20(sp)
lw a1, 16(sp)
call _write_out
mv a0, s1
addi a1, sp, 4
call _tokenize_next
mv s1, a0 # Skip the assignment sign.
# : .long
li t0, 0x20676e6f # ong_
sw t0, 4(sp)
@ -1027,21 +1031,23 @@ _compile_constant:
li a1, 8
call _write_out
call _skip_spaces
call _read_token
mv a0, s1
addi a1, sp, 12
call _tokenize_next
mv s1, a0
mv a1, a0 # The literal length from _read_token should be in a1.
mv a0, s1 # Save the literal pointer before advancing it.
add s1, s1, a1
lw a0, 20(sp) # Save the literal pointer before advancing it.
lw a1, 16(sp) # The literal length.
call _write_out
li a0, '\n'
call _put_char
call _skip_spaces
# Epilogue.
lw ra, 20(sp)
lw s0, 16(sp)
addi sp, sp, 24
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
.type _compile_variable_section, @function
@ -1080,6 +1086,7 @@ _compile_variable_section:
addi sp, sp, 24
ret
# Compile a global variable.
.type _compile_variable, @function
_compile_variable:
# Prologue.
@ -1088,32 +1095,33 @@ _compile_variable:
sw s0, 40(sp)
addi s0, sp, 48
call _read_token
# Save the identifier on the stack since it should emitted multiple times.
sw s1, 36(sp)
sw a0, 32(sp)
add s1, s1, a0
mv a0, s1
addi a1, sp, 28
call _tokenize_next
addi a1, sp, 4
call _tokenize_next # Skip the colon in front of the type.
addi a1, sp, 4
call _tokenize_next # Skip the opening bracket.
addi a1, sp, 16
call _tokenize_next # Save the array size on the stack since it has to be emitted multiple times.
addi a1, sp, 4
call _tokenize_next # Skip the closing bracket.
addi a1, sp, 4
call _tokenize_next # Skip the type.
mv s1, a0
call _skip_spaces
addi s1, s1, 1 # Skip the colon in front of the type.
call _skip_spaces
addi s1, s1, 1 # Skip the opening bracket.
call _read_token
# Save the array size on the stack since it has to be emitted multiple times.
sw s1, 28(sp)
/* DEBUG
lw a0, 24(sp)
add a0, a0, '0'
sw a0, 24(sp)
add s1, s1, a0
call _skip_spaces
addi s1, s1, 1 # Skip the closing bracket.
call _skip_spaces
call _read_token
add s1, s1, a0 # Skip the type.
addi a0, sp, 24
li a1, 1
call _write_error
lw a0, 28(sp)
li a1, 8
call _write_error
*/
# .type identifier, @object
la a0, asm_type
@ -1134,15 +1142,15 @@ _compile_variable:
call _write_out
li t0, 0x206f7265 # ero_
sw t0, 20(sp)
sw t0, 12(sp)
li t0, 0x7a2e203a # : .z
sw t0, 16(sp)
addi a0, sp, 16
sw t0, 8(sp)
addi a0, sp, 8
li a1, 8
call _write_out
lw a0, 28(sp)
lw a1, 24(sp)
lw a0, 24(sp)
lw a1, 20(sp)
call _write_out
li a0, '\n'
@ -1215,9 +1223,6 @@ _compile_procedure:
# Generate the body of the procedure.
.Lcompile_procedure_body:
call _skip_spaces
call _read_line
sw a0, 12(sp)
li t0, 0x0a646e65 # end\n
sw t0, 8(sp)
mv a0, s1
@ -1227,7 +1232,6 @@ _compile_procedure:
beqz a0, .Lcompile_procedure_end
lw a0, 12(sp)
call _compile_statement
j .Lcompile_procedure_body
@ -1245,111 +1249,95 @@ _compile_procedure:
addi sp, sp, 32
ret
# Compiles a goto statement to an uncoditional jump.
.type _compile_goto, @function
_compile_goto:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
addi s1, s1, 4 # Skip the goto keyword.
mv a0, s1
addi a1, sp, 0
call _tokenize_next # Skip the goto keyword.
addi a1, sp, 0
call _tokenize_next # We should be on dot the label is beginning with.
addi a1, sp, 0
call _tokenize_next# Save the label name.
mv s1, a0
li t0, 0x206a # j_
sw t0, 8(sp)
addi a0, sp, 8
li a1, 2
li t0, 0x2e206a # j .
sw t0, 12(sp)
addi a0, sp, 12
li a1, 3
call _write_out
call _skip_spaces
sw s1, 8(sp) # We should be on dot the label is beginning with.
addi s1, s1, 1
call _read_token
add s1, s1, a0
addi a1, a0, 1 # Label length and the dot.
lw a0, 8(sp) # Saved dot position.
lw a1, 4(sp)
call _write_out
addi s1, s1, 1 # Skip the new line.
li a0, '\n'
call _put_char
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# a0 - Line length.
# Rewrites a label to assembly.
.type _compile_label, @function
_compile_label:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
sw a0, 0(sp) # Save the line length.
mv a1, a0 # Argument for _write_out later.
lw t0, 0(sp) # Line length.
mv t1, s1 # Line start.
add t1, t1, t0
addi t1, t1, -1 # Last character on the line.
lbu t1, (t1)
li t2, ';'
bne t1, t2, .Lcompile_label_colon
addi a1, a1, -1
.Lcompile_label_colon:
# Write the whole line as is.
mv a0, s1
call _write_out
addi a1, sp, 8
call _tokenize_next # Dot starting the label.
addi a1, sp, 8
call _tokenize_next
mv s1, a0
li t0, 0x3a # :
sw t0, 4(sp)
addi a0, sp, 4
li a1, 1
li a0, '.'
call _put_char
lw a0, 16(sp)
lw a1, 12(sp)
call _write_out
li t0, '\n'
sw t0, 4(sp)
addi a0, sp, 4
li a1, 1
call _write_out
lw a0, 0(sp)
addi a0, a0, 1 # Skip the new line as well.
add s1, s1, a0 # Skip the line.
li a0, ':'
call _put_char
li a0, '\n'
call _put_char
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Just skips the return keyword and evaluates the return expression.
.type _compile_return, @function
_compile_return:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
addi s1, s1, 6 # Skip return.
call _skip_spaces
mv a0, s1
addi a1, sp, 12
call _tokenize_next
mv s1, a0 # Skip return.
call _build_binary_expression
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
.type _compile_if, @function
@ -1403,8 +1391,6 @@ _compile_if:
li t1, TOKEN_END
beq t0, t1, .Lcompile_if_end
call _read_line
li a1, 1
call _compile_statement
j .Lcompile_if_loop
@ -1422,7 +1408,7 @@ _compile_if:
call _printi
# Finalize the label.
li t0, 0x0a3a # :\n:\n
li t0, 0x0a3a # :\n
sh t0, 16(sp)
addi a0, sp, 16
li a1, 2
@ -1436,11 +1422,7 @@ _compile_if:
addi sp, sp, 32
ret
# Parameters:
# a0 - Line length.
#
# Returns 1 in a0 if the parsed line contained a text section element such a
# procedure or the program entry point. Otherwise sets a0 to 0.
# Checks for the type of the current statement and compiles it.
.type _compile_statement, @function
_compile_statement:
# Prologue.
@ -1449,42 +1431,17 @@ _compile_statement:
sw s0, 24(sp)
addi s0, sp, 32
# Preserve passed arguments.
sw a0, 20(sp)
mv a0, s1
lw a1, 20(sp)
call _is_local_identifier
bnez a0, .Lcompile_statement_identifier
mv a0, s1
li a1, 2
call _is_register_identifier
bnez a0, .Lcompile_statement_identifier
li t0, 0x6f746f67 # goto
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_statement_goto
call _skip_comment
/* DEBUG
mv a0, s1
li a1, 4
call _write_error
mv a0, s1
li a1, 4
call _write_error
*/
mv a0, s1
addi a1, sp, 0
call _tokenize_next
lw t0, 0(sp)
li t1, TOKEN_IDENTIFIER
beq t0, t1, .Lcompile_statement_identifier
li t1, TOKEN_GOTO
beq t0, t1, .Lcompile_statement_goto
li t1, TOKEN_RETURN
beq t0, t1, .Lcompile_statement_return
@ -1494,10 +1451,6 @@ _compile_statement:
li t1, TOKEN_DOT
beq t0, t1, .Lcompile_statement_label
lbu t0, (s1)
li t1, '_'
beq t0, t1, .Lcompile_statement_identifier
j .Lcompile_statement_empty # Else.
.Lcompile_statement_if:
@ -1505,7 +1458,6 @@ _compile_statement:
j .Lcompile_statement_end
.Lcompile_statement_label:
lw a0, 20(sp)
call _compile_label
j .Lcompile_statement_end
@ -1527,7 +1479,6 @@ _compile_statement:
.Lcompile_statement_end:
sw a0, 12(sp)
call _skip_spaces
call _skip_comment
lw a0, 12(sp)
@ -1559,10 +1510,10 @@ _compile_text_section:
.type _compile_entry_point, @function
_compile_entry_point:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
# .type _start, @function
la a0, asm_start
@ -1581,7 +1532,6 @@ _compile_entry_point:
li t1, TOKEN_END
beq t0, t1, .Lcompile_entry_point_end
lw a0, 12(sp)
call _compile_statement
j .Lcompile_entry_point_body
@ -1593,27 +1543,9 @@ _compile_entry_point:
call _write_out
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret
# Finds the end of the line and returns its length in a0.
.type _read_line, @function
_read_line:
mv t0, s1 # Local position in the source text.
.Lread_line_do:
lbu t1, (t0) # t1 = Current character.
beqz t1, .Lread_line_end # Exit the loop on the NUL character.
li t2, '\n'
beq t1, t2, .Lread_line_end # Exit the loop on the new line.
addi t0, t0, 1
j .Lread_line_do
.Lread_line_end:
sub a0, t0, s1 # Return the line length.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
.type _compile, @function

File diff suppressed because it is too large Load Diff

View File

@ -266,9 +266,9 @@ transitions:
.word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff
.word 0x05ff, 0x05ff, 0x05ff, 0x05ff # 0x02 Identifier
.word 0x02ff, 0x0103, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x0103, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x03 Integer
.word 0x08ff, 0x0103, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
.word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x0103, 0x00ff, 0x08ff, 0x08ff
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x03 Integer
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x04ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
@ -518,6 +518,9 @@ _tokenize_next:
li t0, 0x07 # An action for symbols containing multiple characters.
beq t1, t0, .Ltokenize_next_composite
li t0, 0x08 # Integer action.
beq t1, t0, .Ltokenize_next_integer
j .Ltokenize_next_reject
.Ltokenize_next_reject:
@ -588,6 +591,17 @@ _tokenize_next:
j .Ltokenize_next_end
.Ltokenize_next_integer:
lw a1, 12(sp)
sub a0, s1, a1
sw a0, 8(sp)
sw a0, 4(sp)
lw a0, 0(sp)
addi a1, sp, 4
li a2, 12
call _memcpy
j .Ltokenize_next_end
.Ltokenize_next_end:
mv a0, s1 # Return the advanced text pointer.