Tokenize single character symbols

This commit is contained in:
2025-05-03 23:35:41 +02:00
parent dcfd6b1515
commit 0a0bc4e1f2
6 changed files with 291 additions and 335 deletions

View File

@@ -1,6 +1,10 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global _start # Program entry point.
# Global variables or registers.
# Registers used as global variables:
# s1 - Contains the current position in the source text.
# s2 - Label counter.
@@ -42,6 +46,10 @@ asm_neg_a0: .ascii "neg a0, a0\n"
.equ ASM_NEG_A0_SIZE, . - asm_neg_a0
asm_type: .ascii ".type "
.equ ASM_TYPE_SIZE, . - asm_type
asm_type_function: .ascii ", @function\n"
.equ ASM_TYPE_FUNCTION_SIZE, . - asm_type_function
asm_type_object: .ascii ", @object\n"
.equ ASM_TYPE_OBJECT_SIZE, . - asm_type_object
asm_restore_parameters:
.ascii "lw a0, 60(sp)\nlw a1, 56(sp)\nlw a2, 52(sp)\nlw a3, 48(sp)\nlw a4, 44(sp)\nlw a5, 40(sp)\n"
.equ ASM_RESTORE_PARAMETERS_SIZE, . - asm_restore_parameters
@@ -77,14 +85,6 @@ _compile_import:
call _tokenize_next
mv s1, a0
/* DEBUG
lw t0, 0(sp)
addi t0, t0, '0'
sw t0, 4(sp)
addi a0, sp, 4
li a1, 1
call _write_error*/
j .Lcompile_import_loop
.Lcompile_import_end:
@@ -104,63 +104,35 @@ _build_binary_expression:
li a0, 0
call _build_expression
call _skip_spaces
call _read_token
sw a0, 20(sp)
li t0, '&'
sw t0, 16(sp)
mv a0, s1
lw a1, 20(sp)
addi a2, sp, 16
call _token_compare
beqz a0, .L_build_binary_expression_and
addi a1, sp, 16
call _tokenize_next
lw t0, 16(sp)
li t0, 0x726f # or
sw t0, 16(sp)
mv a0, s1
lw a1, 20(sp)
addi a2, sp, 16
call _token_compare
beqz a0, .L_build_binary_expression_or
li t1, TOKEN_AND
beq t0, t1, .L_build_binary_expression_and
li t0, '='
sw t0, 16(sp)
mv a0, s1
lw a1, 20(sp)
addi a2, sp, 16
call _token_compare
beqz a0, .L_build_binary_expression_equal
li t1, TOKEN_OR
beq t0, t1, .L_build_binary_expression_or
li t0, '+'
sw t0, 16(sp)
mv a0, s1
lw a1, 20(sp)
addi a2, sp, 16
call _token_compare
beqz a0, .L_build_binary_expression_plus
li t1, TOKEN_PLUS
beq t0, t1, .L_build_binary_expression_plus
li t0, '-'
sw t0, 16(sp)
mv a0, s1
lw a1, 20(sp)
addi a2, sp, 16
call _token_compare
beqz a0, .L_build_binary_expression_minus
li t1, TOKEN_EQUALS
beq t0, t1, .L_build_binary_expression_equal
li t0, '*'
sw t0, 16(sp)
mv a0, s1
lw a1, 20(sp)
addi a2, sp, 16
call _token_compare
beqz a0, .L_build_binary_expression_product
li t1, TOKEN_ASTERISK
beq t0, t1, .L_build_binary_expression_product
li t1, TOKEN_MINUS
beq t0, t1, .L_build_binary_expression_minus
j .Lbuild_binary_expression_end
.L_build_binary_expression_equal:
addi s1, s1, 1 # Skip =.
mv s1, a0 # Skip =.
li a0, 1
call _build_expression
la a0, asm_sub_a0_a1
@@ -174,7 +146,12 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_and:
addi s1, s1, 1 # Skip &.
/* DEBUG
addi a0, s1, 0
li a1, 4
call _write_error */
mv s1, a0 # Skip &.
li a0, 1
call _build_expression
la a0, asm_and_a0_a1
@@ -184,7 +161,7 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_or:
addi s1, s1, 2 # Skip or.
mv s1, a0 # Skip or.
li a0, 1
call _build_expression
la a0, asm_or_a0_a1
@@ -194,7 +171,7 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_plus:
addi s1, s1, 1 # Skip +.
mv s1, a0 # Skip +.
li a0, 1
call _build_expression
la a0, asm_add_a0_a1
@@ -204,7 +181,7 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_minus:
addi s1, s1, 1 # Skip -.
mv s1, a0 # Skip -.
li a0, 1
call _build_expression
la a0, asm_sub_a0_a1
@@ -214,7 +191,7 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_product:
addi s1, s1, 1 # Skip *.
mv s1, a0 # Skip *.
li a0, 1
call _build_expression
la a0, asm_mul_a0_a1
@@ -937,29 +914,31 @@ _skip_comment:
# Parameters:
# a0 - Line length.
.type _compile_assembly, @function
_compile_assembly:
.type _compile_procedure_section, @function
_compile_procedure_section:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
sw a0, 4(sp) # a0 - Line length.
.Lcompile_procedure_section_loop:
call _skip_spaces
call _skip_comment
call _skip_spaces
# Write the source to the standard output.
mv a0, s1
lw a1, 4(sp)
call _write_out
addi a1, sp, 0
call _tokenize_next
li t0, TOKEN_PROC
lw t1, 0(sp)
bne t0, t1, .Lcompile_procedure_section_end
lw t0, 4(sp)
add s1, s1, t0
call _compile_procedure
li a0, '\n'
call _put_char
addi s1, s1, 1 # Skip the new line.
j .Lcompile_procedure_section_loop
.Lcompile_procedure_section_end:
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
@@ -1038,15 +1017,19 @@ _compile_constant:
sw s0, 8(sp)
addi s0, sp, 16
call _read_token
mv a0, s1
addi a1, sp, 0
call _tokenize_next
mv a1, a0 # The identifier length from _read_token should be in a1.
mv a0, s1 # Save the identifier pointer before advancing it.
add s1, s1, a1
sub a1, a0, s1 # The identifier end from _tokenize_next should be in a0.
mv a0, s1
add s1, s1, a1 # Save the identifier pointer before advancing it.
call _write_out
call _skip_spaces
addi s1, s1, 2 # Skip the assignment sign.
mv a0, s1
addi a1, sp, 0
call _tokenize_next
mv s1, a0 # Skip the assignment sign.
# : .long
li t0, 0x20676e6f # ong_
@@ -1154,42 +1137,10 @@ _compile_variable:
lw a1, 24(sp)
call _write_out
li t0, 0x0a74 # t\n
sw t0, 12(sp)
li t0, 0x63656a62 # bjec
sw t0, 8(sp)
li t0, 0x6f40202c # , @o
sw t0, 4(sp)
addi a0, sp, 4
li a1, 10
la a0, asm_type_object
li a1, ASM_TYPE_OBJECT_SIZE
call _write_out
# .size identifier, size
li t0, 0x2065 # e_
sw t0, 12(sp)
li t0, 0x7a69732e # .siz
sw t0, 8(sp)
addi a0, sp, 8
li a1, 6
call _write_out
lw a0, 28(sp)
lw a1, 24(sp)
call _write_out
li t0, 0x202c # ,_
sw t0, 12(sp)
addi a0, sp, 12
li a1, 2
call _write_out
lw a0, 20(sp)
lw a1, 16(sp)
call _write_out
li a0, '\n'
call _put_char
# identifier: .zero size
lw a0, 28(sp)
lw a1, 24(sp)
@@ -1239,14 +1190,8 @@ _compile_procedure:
lw a1, 16(sp)
call _write_out
li t0, 0x0a6e6f69 # ion\n
sw t0, 12(sp)
li t0, 0x74636e75 # unct
sw t0, 8(sp)
li t0, 0x6640202c # , @f
sw t0, 4(sp)
addi a0, sp, 4
li a1, 12
la a0, asm_type_function
li a1, ASM_TYPE_FUNCTION_SIZE
call _write_out
lw a0, 20(sp)
@@ -1356,7 +1301,7 @@ _compile_procedure:
beqz a0, .Lcompile_procedure_end
lw a0, 12(sp)
call _compile_line
call _compile_statement
j .Lcompile_procedure_body
.Lcompile_procedure_end:
@@ -1577,7 +1522,7 @@ _compile_if:
call _read_line
li a1, 1
call _compile_line
call _compile_statement
j .Lcompile_if_loop
@@ -1614,8 +1559,8 @@ _compile_if:
#
# Returns 1 in a0 if the parsed line contained a text section element such a
# procedure or the program entry point. Otherwise sets a0 to 0.
.type _compile_line, @function
_compile_line:
.type _compile_statement, @function
_compile_statement:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
@@ -1626,45 +1571,17 @@ _compile_line:
sw a0, 20(sp)
sw a1, 16(sp)
beqz a0, .Lcompile_line_empty # Skip an empty line.
lbu t0, (s1)
li t1, '('
beq t0, t1, .Lcompile_line_comment
li t0, 0x636f7270 # proc
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_procedure
li t0, 0x69676562 # begi
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_begin
li t0, 0x2e646e65 # end.
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_exit
call _skip_comment
mv a0, s1
lw a1, 20(sp)
call _is_local_identifier
bnez a0, .Lcompile_line_identifier
bnez a0, .Lcompile_statement_identifier
mv a0, s1
li a1, 2
call _is_register_identifier
bnez a0, .Lcompile_line_identifier
bnez a0, .Lcompile_statement_identifier
li t0, 0x6f746f67 # goto
sw t0, 12(sp)
@@ -1672,7 +1589,7 @@ _compile_line:
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_goto
beqz a0, .Lcompile_statement_goto
li t0, 0x75746572 # retu
sw t0, 12(sp)
@@ -1680,7 +1597,7 @@ _compile_line:
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_return
beqz a0, .Lcompile_statement_return
li t0, 0x6669 # if
sw t0, 12(sp)
@@ -1688,77 +1605,42 @@ _compile_line:
addi a1, sp, 12
li a2, 2
call _memcmp
beqz a0, .Lcompile_line_if
beqz a0, .Lcompile_statement_if
lbu t0, (s1)
li t1, '.'
beq t0, t1, .Lcompile_line_label
beq t0, t1, .Lcompile_statement_label
li t1, '_'
beq t0, t1, .Lcompile_line_identifier
beq t0, t1, .Lcompile_statement_identifier
j .Lcompile_line_unchanged # Else.
j .Lcompile_statement_empty # Else.
.Lcompile_line_if:
.Lcompile_statement_if:
call _compile_if
j .Lcompile_line_section
j .Lcompile_statement_end
.Lcompile_line_label:
.Lcompile_statement_label:
lw a0, 20(sp)
call _compile_label
j .Lcompile_line_section
j .Lcompile_statement_end
.Lcompile_line_return:
.Lcompile_statement_return:
call _compile_return
j .Lcompile_line_section
j .Lcompile_statement_end
.Lcompile_line_goto:
.Lcompile_statement_goto:
call _compile_goto
j .Lcompile_line_section
j .Lcompile_statement_end
.Lcompile_line_identifier:
.Lcompile_statement_identifier:
call _compile_identifier
j .Lcompile_line_section
j .Lcompile_statement_end
.Lcompile_line_exit:
call _compile_exit
j .Lcompile_line_section
.Lcompile_line_begin:
lw a1, 16(sp)
bnez a1, .Lcompile_line_compile_entry
call _compile_text_section
.Lcompile_line_compile_entry:
call _compile_entry_point
li a0, 1
j .Lcompile_line_end
.Lcompile_line_procedure:
lw a1, 16(sp)
bnez a1, .Lcompile_line_compile_procedure
call _compile_text_section
.Lcompile_line_compile_procedure:
call _compile_procedure
li a0, 1
j .Lcompile_line_end
.Lcompile_line_comment:
lw a0, 20(sp)
call _skip_comment
j .Lcompile_line_section
.Lcompile_line_empty:
.Lcompile_statement_empty:
addi s1, s1, 1
j .Lcompile_line_section
j .Lcompile_statement_end
.Lcompile_line_unchanged:
lw a0, 20(sp)
call _compile_assembly
j .Lcompile_line_section
.Lcompile_line_section:
mv a0, zero
.Lcompile_line_end:
.Lcompile_statement_end:
sw a0, 12(sp)
call _skip_spaces
call _skip_comment
@@ -1804,20 +1686,25 @@ _compile_entry_point:
addi s1, s1, 6 # Skip begin\n.
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret
# Generate the body of the procedure.
.Lcompile_entry_point_body:
call _skip_spaces
call _read_line
sw a0, 12(sp)
li t0, 0x2e646e65 # end
sw t0, 8(sp)
mv a0, s1
addi a1, sp, 8
li a2, 4
call _memcmp
.type _compile_exit, @function
_compile_exit:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
beqz a0, .Lcompile_entry_point_end
lw a0, 12(sp)
call _compile_statement
j .Lcompile_entry_point_body
.Lcompile_entry_point_end:
la a0, asm_exit
li a1, ASM_EXIT_SIZE
call _write_out
@@ -1857,30 +1744,13 @@ _compile:
sw s0, 8(sp)
addi s0, sp, 16
sw zero, 4(sp) # Whether the text section header was already emitted.
call _compile_module_declaration
call _compile_import
call _compile_constant_section
call _compile_variable_section
.Lcompile_do:
lbu t0, (s1) # t0 = Current character.
beqz t0, .Lcompile_end # Exit the loop on the NUL character.
call _skip_spaces
call _read_line
lw a1, 4(sp)
call _compile_line
beqz a0, .Lcompile_do
# Update whether the text section header was already emitted.
lw t0, 4(sp)
or t0, t0, a0
sw t0, 4(sp)
j .Lcompile_do
.Lcompile_end:
call _compile_text_section
call _compile_procedure_section
call _compile_entry_point
# Epilogue.
lw ra, 12(sp)
@@ -1888,22 +1758,6 @@ _compile:
addi sp, sp, 16
ret
.type _main, @function
_main:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
li s2, 1
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret
# Entry point.
.type _start, @function
_start:
@@ -1912,8 +1766,7 @@ _start:
li a1, SOURCE_BUFFER_SIZE # Buffer size.
call _read_file
mv a0, s1
call _main
li s2, 1
call _compile
# Call exit.