Fix strings search looping

This commit is contained in:
Eugen Wissner 2025-05-04 23:49:39 +02:00
parent 0a0bc4e1f2
commit df1c0486c5
Signed by: belka
GPG Key ID: A27FDC1E8EE902C0
4 changed files with 243 additions and 350 deletions

View File

@ -425,6 +425,9 @@ _memcpy:
addi a0, a0, 1 addi a0, a0, 1
addi a1, a1, 1 addi a1, a1, 1
addi a2, a2, -1
j .Lmemcpy_loop
.Lmemcpy_end: .Lmemcpy_end:
mv a0, t0 mv a0, t0
@ -473,6 +476,8 @@ _strings_index:
beqz a0, .Lstrings_index_end beqz a0, .Lstrings_index_end
.Lstrings_index_next: .Lstrings_index_next:
# Advance the pointer, reduce the length.
lw a2, (s2)
addi s2, s2, 4 addi s2, s2, 4
add s2, s2, a2 add s2, s2, a2
addi s1, s1, -1 addi s1, s1, -1

View File

@ -19,34 +19,35 @@
.equ TOKEN_END, 13 .equ TOKEN_END, 13
.equ TOKEN_TYPE, 14 .equ TOKEN_TYPE, 14
.equ TOKEN_RECORD, 15 .equ TOKEN_RECORD, 15
.equ TOKEN_RECORD, 16 .equ TOKEN_UNION, 16
.equ TOKEN_TRUE, 17 .equ TOKEN_TRUE, 17
.equ TOKEN_FASE, 18 .equ TOKEN_FALSE, 18
.equ TOKEN_NIL, 19 .equ TOKEN_NIL, 19
.equ TOKEN_XOR, 20 .equ TOKEN_XOR, 20
.equ TOKEN_OR, 21 .equ TOKEN_OR, 21
.equ TOKEN_RETURN, 22 .equ TOKEN_RETURN, 22
.equ TOKEN_CAST, 23 .equ TOKEN_CAST, 23
.equ TOKEN_DEFER, 24 .equ TOKEN_GOTO, 24
.equ TOKEN_CASE, 25 .equ TOKEN_CASE, 25
.equ TOKEN_OF, 26 .equ TOKEN_OF, 26
.equ TOKEN_IDENTIFIER, 27
# The constant should match the character index in the byte_keywords string. # The constant should match the character index in the byte_keywords string.
.equ TOKEN_AND, 27 .equ TOKEN_AND, 28
.equ TOKEN_DOT, 28 .equ TOKEN_DOT, 29
.equ TOKEN_COMMA, 29 .equ TOKEN_COMMA, 30
.equ TOKEN_COLON, 30 .equ TOKEN_COLON, 31
.equ TOKEN_SEMICOLON, 31 .equ TOKEN_SEMICOLON, 32
.equ TOKEN_LEFT_PAREN, 32 .equ TOKEN_LEFT_PAREN, 33
.equ TOKEN_RIGHT_PAREN, 33 .equ TOKEN_RIGHT_PAREN, 34
.equ TOKEN_LEFT_BRACKET, 34 .equ TOKEN_LEFT_BRACKET, 35
.equ TOKEN_RIGHT_BRACKET, 35 .equ TOKEN_RIGHT_BRACKET, 36
.equ TOKEN_HAT, 36 .equ TOKEN_HAT, 37
.equ TOKEN_EQUALS, 37 .equ TOKEN_EQUALS, 38
.equ TOKEN_PLUS, 38 .equ TOKEN_PLUS, 39
.equ TOKEN_MINUS, 39 .equ TOKEN_MINUS, 40
.equ TOKEN_ASTERISK, 40 .equ TOKEN_ASTERISK, 41
.equ TOKEN_AT, 41 .equ TOKEN_AT, 42
.equ TOKEN_ASSIGN, 42 .equ TOKEN_ASSIGN, 43

View File

@ -53,10 +53,12 @@ asm_type_object: .ascii ", @object\n"
asm_restore_parameters: asm_restore_parameters:
.ascii "lw a0, 60(sp)\nlw a1, 56(sp)\nlw a2, 52(sp)\nlw a3, 48(sp)\nlw a4, 44(sp)\nlw a5, 40(sp)\n" .ascii "lw a0, 60(sp)\nlw a1, 56(sp)\nlw a2, 52(sp)\nlw a3, 48(sp)\nlw a4, 44(sp)\nlw a5, 40(sp)\n"
.equ ASM_RESTORE_PARAMETERS_SIZE, . - asm_restore_parameters .equ ASM_RESTORE_PARAMETERS_SIZE, . - asm_restore_parameters
asm_preserve_parameters:
.ascii "sw a0, 84(sp)\nsw a1, 80(sp)\nsw a2, 76(sp)\nsw a2, 76(sp)\nsw a3, 72(sp)\nsw a4, 68(sp)\nsw a5, 64(sp)\n"
.equ ASM_PRESERVE_PARAMETERS_SIZE, . - asm_preserve_parameters
.section .bss .section .bss
.type source_code, @object .type source_code, @object
.size source_code, SOURCE_BUFFER_SIZE
source_code: .zero SOURCE_BUFFER_SIZE source_code: .zero SOURCE_BUFFER_SIZE
.section .text .section .text
@ -65,10 +67,10 @@ source_code: .zero SOURCE_BUFFER_SIZE
.type _compile_import, @function .type _compile_import, @function
_compile_import: _compile_import:
# Prologue. # Prologue.
addi sp, sp, -16 addi sp, sp, -24
sw ra, 12(sp) sw ra, 20(sp)
sw s0, 8(sp) sw s0, 16(sp)
addi s0, sp, 16 addi s0, sp, 24
.Lcompile_import_loop: .Lcompile_import_loop:
call _skip_comment call _skip_comment
@ -89,9 +91,9 @@ _compile_import:
.Lcompile_import_end: .Lcompile_import_end:
# Epilogue. # Epilogue.
lw ra, 12(sp) lw ra, 20(sp)
lw s0, 8(sp) lw s0, 16(sp)
addi sp, sp, 16 addi sp, sp, 24
ret ret
.type _build_binary_expression, @function .type _build_binary_expression, @function
@ -107,9 +109,9 @@ _build_binary_expression:
call _skip_spaces call _skip_spaces
mv a0, s1 mv a0, s1
addi a1, sp, 16 addi a1, sp, 12
call _tokenize_next call _tokenize_next
lw t0, 16(sp) lw t0, 12(sp)
li t1, TOKEN_AND li t1, TOKEN_AND
beq t0, t1, .L_build_binary_expression_and beq t0, t1, .L_build_binary_expression_and
@ -146,11 +148,6 @@ _build_binary_expression:
j .Lbuild_binary_expression_end j .Lbuild_binary_expression_end
.L_build_binary_expression_and: .L_build_binary_expression_and:
/* DEBUG
addi a0, s1, 0
li a1, 4
call _write_error */
mv s1, a0 # Skip &. mv s1, a0 # Skip &.
li a0, 1 li a0, 1
call _build_expression call _build_expression
@ -385,25 +382,29 @@ _compile_identifier_expression:
.type _build_expression, @function .type _build_expression, @function
_build_expression: _build_expression:
# Prologue. # Prologue.
addi sp, sp, -40 addi sp, sp, -48
sw ra, 36(sp) sw ra, 44(sp)
sw s0, 32(sp) sw s0, 40(sp)
addi s0, sp, 40 addi s0, sp, 48
addi a0, a0, '0' # Make the register number to a character. addi a0, a0, '0' # Make the register number to a character.
sw a0, 28(sp) # And save it. sw a0, 36(sp) # And save it.
mv a0, s1
addi a1, sp, 24
call _tokenize_next
call _skip_spaces call _skip_spaces
call _read_token call _read_token
sw s1, 24(sp) sw s1, 32(sp)
sw a0, 20(sp) sw a0, 28(sp)
lbu a0, (s1) lw a0, 24(sp)
li t0, '-'
li t0, TOKEN_MINUS
beq a0, t0, .Lbuild_expression_negate beq a0, t0, .Lbuild_expression_negate
lbu a0, (s1) li t0, TOKEN_AT
li t0, '@'
beq a0, t0, .Lbuild_expression_address beq a0, t0, .Lbuild_expression_address
lbu a0, (s1) lbu a0, (s1)
@ -414,8 +415,8 @@ _build_expression:
li t0, '_' li t0, '_'
beq a0, t0, .Lbuild_expression_call beq a0, t0, .Lbuild_expression_call
lw a0, 20(sp) lw a0, 28(sp)
lw a1, 28(sp) lw a1, 36(sp)
call _compile_identifier_expression call _compile_identifier_expression
j .Lbuild_expression_advance j .Lbuild_expression_advance
@ -432,7 +433,7 @@ _build_expression:
j .Lbuild_expression_advance j .Lbuild_expression_advance
.Lbuild_expression_address: .Lbuild_expression_address:
lw t1, 28(sp) lw t1, 36(sp)
li t0, 0x20 # _ li t0, 0x20 # _
sw t0, 16(sp) sw t0, 16(sp)
li t0, 0x2c707320 # _sp, li t0, 0x2c707320 # _sp,
@ -449,11 +450,11 @@ _build_expression:
addi s1, s1, 1 # Skip @. addi s1, s1, 1 # Skip @.
call _skip_spaces call _skip_spaces
call _read_token call _read_token
sw s1, 24(sp) sw s1, 32(sp)
sw a0, 20(sp) sw a0, 28(sp)
lw a0, 24(sp) lw a0, 32(sp)
lw a1, 20(sp) lw a1, 28(sp)
addi a0, a0, 4 # Skip the "loca" variable prefix. addi a0, a0, 4 # Skip the "loca" variable prefix.
addi a1, a1, -4 # Skip the "loca" variable prefix. addi a1, a1, -4 # Skip the "loca" variable prefix.
call _write_out call _write_out
@ -464,8 +465,8 @@ _build_expression:
j .Lbuild_expression_advance j .Lbuild_expression_advance
.Lbuild_expression_call: .Lbuild_expression_call:
lw a0, 24(sp) lw a0, 32(sp)
lw a1, 20(sp) lw a1, 28(sp)
add s1, s1, a1 add s1, s1, a1
addi s1, s1, 1 addi s1, s1, 1
call _compile_call call _compile_call
@ -473,7 +474,7 @@ _build_expression:
j .Lbuild_expression_end j .Lbuild_expression_end
.Lbuild_expression_literal: .Lbuild_expression_literal:
lw t1, 28(sp) lw t1, 36(sp)
li t0, 0x00202c00 # \0,_ li t0, 0x00202c00 # \0,_
or t0, t0, t1 or t0, t0, t1
sw t0, 16(sp) sw t0, 16(sp)
@ -483,8 +484,8 @@ _build_expression:
li a1, 7 li a1, 7
call _write_out call _write_out
lw a0, 24(sp) lw a0, 32(sp)
lw a1, 20(sp) lw a1, 28(sp)
call _write_out call _write_out
li a0, '\n' li a0, '\n'
@ -493,14 +494,14 @@ _build_expression:
j .Lbuild_expression_advance j .Lbuild_expression_advance
.Lbuild_expression_advance: .Lbuild_expression_advance:
lw a0, 20(sp) lw a0, 28(sp)
add s1, s1, a0 add s1, s1, a0
.Lbuild_expression_end: .Lbuild_expression_end:
# Epilogue. # Epilogue.
lw ra, 36(sp) lw ra, 44(sp)
lw s0, 32(sp) lw s0, 40(sp)
addi sp, sp, 40 addi sp, sp, 48
ret ret
# Compiles an lvalue. # Compiles an lvalue.
@ -595,34 +596,20 @@ _compile_identifier:
sw s0, 24(sp) sw s0, 24(sp)
addi s0, sp, 32 addi s0, sp, 32
call _read_token
# Save the pointer to the identifier and its length on the stack. # Save the pointer to the identifier and its length on the stack.
sw s1, 20(sp) mv a0, s1
sw a0, 16(sp) addi a1, sp, 12
call _tokenize_next
addi a1, sp, 0
call _tokenize_next
mv s1, a0
add s1, s1, a0 lw t0, 0(sp)
call _skip_spaces
call _read_token
# Save the pointer and the length of the token following the identifier. li t1, TOKEN_ASSIGN
sw s1, 12(sp) beq t0, t1, .Lcompile_identifier_assign
sw a0, 8(sp)
add s1, s1, a0 # Skip that token. li t1, TOKEN_LEFT_PAREN
call _skip_spaces
li t0, 0x3d3a # :=
sw t0, 4(sp)
lw a0, 12(sp)
lw a1, 8(sp)
addi a2, sp, 4
call _token_compare
beqz a0, .Lcompile_identifier_assign
lw t0, 12(sp)
lbu t0, (t0)
li t1, 0x28 # (
beq t0, t1, .Lcompile_identifier_call beq t0, t1, .Lcompile_identifier_call
j .Lcompile_identifier_end j .Lcompile_identifier_end
@ -917,10 +904,10 @@ _skip_comment:
.type _compile_procedure_section, @function .type _compile_procedure_section, @function
_compile_procedure_section: _compile_procedure_section:
# Prologue. # Prologue.
addi sp, sp, -16 addi sp, sp, -24
sw ra, 12(sp) sw ra, 20(sp)
sw s0, 8(sp) sw s0, 16(sp)
addi s0, sp, 16 addi s0, sp, 24
.Lcompile_procedure_section_loop: .Lcompile_procedure_section_loop:
call _skip_spaces call _skip_spaces
@ -928,10 +915,10 @@ _compile_procedure_section:
call _skip_spaces call _skip_spaces
mv a0, s1 mv a0, s1
addi a1, sp, 0 addi a1, sp, 4
call _tokenize_next call _tokenize_next
li t0, TOKEN_PROC li t0, TOKEN_PROC
lw t1, 0(sp) lw t1, 4(sp)
bne t0, t1, .Lcompile_procedure_section_end bne t0, t1, .Lcompile_procedure_section_end
call _compile_procedure call _compile_procedure
@ -940,18 +927,18 @@ _compile_procedure_section:
.Lcompile_procedure_section_end: .Lcompile_procedure_section_end:
# Epilogue. # Epilogue.
lw ra, 12(sp) lw ra, 20(sp)
lw s0, 8(sp) lw s0, 16(sp)
addi sp, sp, 16 addi sp, sp, 24
ret ret
.type _compile_module_declaration, @function .type _compile_module_declaration, @function
_compile_module_declaration: _compile_module_declaration:
# Prologue. # Prologue.
addi sp, sp, -16 addi sp, sp, -24
sw ra, 12(sp) sw ra, 20(sp)
sw s0, 8(sp) sw s0, 16(sp)
addi s0, sp, 16 addi s0, sp, 24
la a0, global_start la a0, global_start
li a1, GLOBAL_START_SIZE li a1, GLOBAL_START_SIZE
@ -960,32 +947,32 @@ _compile_module_declaration:
# Skip "program". # Skip "program".
call _skip_comment call _skip_comment
mv a0, s1 mv a0, s1
addi a1, sp, 0 addi a1, sp, 4
call _tokenize_next call _tokenize_next
mv s1, a0 mv s1, a0
# Epilogue. # Epilogue.
lw ra, 12(sp) lw ra, 20(sp)
lw s0, 8(sp) lw s0, 16(sp)
addi sp, sp, 16 addi sp, sp, 24
ret ret
.type _compile_constant_section, @function .type _compile_constant_section, @function
_compile_constant_section: _compile_constant_section:
# Prologue. # Prologue.
addi sp, sp, -16 addi sp, sp, -24
sw ra, 12(sp) sw ra, 20(sp)
sw s0, 8(sp) sw s0, 16(sp)
addi s0, sp, 16 addi s0, sp, 24
call _skip_comment call _skip_comment
call _skip_spaces call _skip_spaces
mv a0, s1 mv a0, s1
addi a1, sp, 0 addi a1, sp, 4
call _tokenize_next call _tokenize_next
li t0, TOKEN_CONST li t0, TOKEN_CONST
lw t1, 0(sp) lw t1, 4(sp)
bne t0, t1, .Lcompile_constant_section_end bne t0, t1, .Lcompile_constant_section_end
mv s1, a0 mv s1, a0
@ -1004,21 +991,21 @@ _compile_constant_section:
.Lcompile_constant_section_end: .Lcompile_constant_section_end:
# Epilogue. # Epilogue.
lw ra, 12(sp) lw ra, 20(sp)
lw s0, 8(sp) lw s0, 16(sp)
addi sp, sp, 16 addi sp, sp, 24
ret ret
.type _compile_constant, @function .type _compile_constant, @function
_compile_constant: _compile_constant:
# Prologue. # Prologue.
addi sp, sp, -16 addi sp, sp, -24
sw ra, 12(sp) sw ra, 20(sp)
sw s0, 8(sp) sw s0, 16(sp)
addi s0, sp, 16 addi s0, sp, 24
mv a0, s1 mv a0, s1
addi a1, sp, 0 addi a1, sp, 4
call _tokenize_next call _tokenize_next
sub a1, a0, s1 # The identifier end from _tokenize_next should be in a0. sub a1, a0, s1 # The identifier end from _tokenize_next should be in a0.
@ -1027,7 +1014,7 @@ _compile_constant:
call _write_out call _write_out
mv a0, s1 mv a0, s1
addi a1, sp, 0 addi a1, sp, 4
call _tokenize_next call _tokenize_next
mv s1, a0 # Skip the assignment sign. mv s1, a0 # Skip the assignment sign.
@ -1052,24 +1039,24 @@ _compile_constant:
call _put_char call _put_char
# Epilogue. # Epilogue.
lw ra, 12(sp) lw ra, 20(sp)
lw s0, 8(sp) lw s0, 16(sp)
addi sp, sp, 16 addi sp, sp, 24
ret ret
.type _compile_variable_section, @function .type _compile_variable_section, @function
_compile_variable_section: _compile_variable_section:
# Prologue. # Prologue.
addi sp, sp, -16 addi sp, sp, -24
sw ra, 12(sp) sw ra, 20(sp)
sw s0, 8(sp) sw s0, 16(sp)
addi s0, sp, 16 addi s0, sp, 24
mv a0, s1 mv a0, s1
addi a1, sp, 0 addi a1, sp, 4
call _tokenize_next call _tokenize_next
li t0, TOKEN_VAR li t0, TOKEN_VAR
lw t1, 0(sp) lw t1, 4(sp)
bne t0, t1, .Lcompile_variable_section_end bne t0, t1, .Lcompile_variable_section_end
mv s1, a0 mv s1, a0
@ -1088,24 +1075,24 @@ _compile_variable_section:
.Lcompile_variable_section_end: .Lcompile_variable_section_end:
# Epilogue. # Epilogue.
lw ra, 12(sp) lw ra, 20(sp)
lw s0, 8(sp) lw s0, 16(sp)
addi sp, sp, 16 addi sp, sp, 24
ret ret
.type _compile_variable, @function .type _compile_variable, @function
_compile_variable: _compile_variable:
# Prologue. # Prologue.
addi sp, sp, -40 addi sp, sp, -48
sw ra, 36(sp) sw ra, 44(sp)
sw s0, 32(sp) sw s0, 40(sp)
addi s0, sp, 40 addi s0, sp, 48
call _read_token call _read_token
# Save the identifier on the stack since it should emitted multiple times. # Save the identifier on the stack since it should emitted multiple times.
sw s1, 28(sp) sw s1, 36(sp)
sw a0, 24(sp) sw a0, 32(sp)
add s1, s1, a0 add s1, s1, a0
call _skip_spaces call _skip_spaces
@ -1117,8 +1104,8 @@ _compile_variable:
call _read_token call _read_token
# Save the array size on the stack since it has to be emitted multiple times. # Save the array size on the stack since it has to be emitted multiple times.
sw s1, 20(sp) sw s1, 28(sp)
sw a0, 16(sp) sw a0, 24(sp)
add s1, s1, a0 add s1, s1, a0
call _skip_spaces call _skip_spaces
@ -1133,8 +1120,8 @@ _compile_variable:
li a1, ASM_TYPE_SIZE li a1, ASM_TYPE_SIZE
call _write_out call _write_out
lw a0, 28(sp) lw a0, 36(sp)
lw a1, 24(sp) lw a1, 32(sp)
call _write_out call _write_out
la a0, asm_type_object la a0, asm_type_object
@ -1142,29 +1129,29 @@ _compile_variable:
call _write_out call _write_out
# identifier: .zero size # identifier: .zero size
lw a0, 28(sp) lw a0, 36(sp)
lw a1, 24(sp) lw a1, 32(sp)
call _write_out call _write_out
li t0, 0x206f7265 # ero_ li t0, 0x206f7265 # ero_
sw t0, 12(sp) sw t0, 20(sp)
li t0, 0x7a2e203a # : .z li t0, 0x7a2e203a # : .z
sw t0, 8(sp) sw t0, 16(sp)
addi a0, sp, 8 addi a0, sp, 16
li a1, 8 li a1, 8
call _write_out call _write_out
lw a0, 20(sp) lw a0, 28(sp)
lw a1, 16(sp) lw a1, 24(sp)
call _write_out call _write_out
li a0, '\n' li a0, '\n'
call _put_char call _put_char
# Epilogue. # Epilogue.
lw ra, 36(sp) lw ra, 44(sp)
lw s0, 32(sp) lw s0, 40(sp)
addi sp, sp, 40 addi sp, sp, 48
ret ret
.type _compile_procedure, @function .type _compile_procedure, @function
@ -1175,11 +1162,12 @@ _compile_procedure:
sw s0, 24(sp) sw s0, 24(sp)
addi s0, sp, 32 addi s0, sp, 32
addi s1, s1, 5 # Skip proc_ mv a0, s1
call _read_token addi a1, sp, 12
sw s1, 20(sp) call _tokenize_next # Skip proc.
sw a0, 16(sp) addi a1, sp, 12
add s1, s1, a0 call _tokenize_next
mv s1, a0
# .type identifier, @function # .type identifier, @function
la a0, asm_type la a0, asm_type
@ -1204,29 +1192,16 @@ _compile_procedure:
li a1, 2 li a1, 2
call _write_out call _write_out
call _skip_spaces
addi s1, s1, 1 # Skip opening argument paren.
call _skip_spaces
addi s1, s1, 1 # Skip closing argument paren.
li t0, 0x6e # n
sw t0, 12(sp)
li t0, 0x69676562 # begi
sw t0, 8(sp)
# Skip all declarations until we find the "begin" keyword, denoting the # Skip all declarations until we find the "begin" keyword, denoting the
# beginning of the procedure body. # beginning of the procedure body.
.Lcompile_procedure_begin: .Lcompile_procedure_begin:
call _skip_spaces
call _read_token
mv a1, a0
mv a0, s1 mv a0, s1
addi a2, sp, 8 addi a1, sp, 4
add s1, s1, a1 call _tokenize_next
call _token_compare mv s1, a0
lw t0, 4(sp)
bnez a0, .Lcompile_procedure_begin li t1, TOKEN_BEGIN
bne t0, t1, .Lcompile_procedure_begin
# Generate the procedure prologue with a predefined stack size. # Generate the procedure prologue with a predefined stack size.
la a0, prologue la a0, prologue
@ -1234,56 +1209,8 @@ _compile_procedure:
call _write_out call _write_out
# Save passed arguments on the stack. # Save passed arguments on the stack.
li t0, 0x0a29 # )\n la a0, asm_preserve_parameters
sw t0, 12(sp) li a1, ASM_PRESERVE_PARAMETERS_SIZE
li t0, 0x70732834 # 4(sp
sw t0, 8(sp)
li t0, 0x38202c30 # 0, 8
sw t0, 4(sp)
li t0, 0x61207773 # sw a
sw t0, 0(sp)
addi a0, sp, 0
li a1, 14
call _write_out
li t0, '0'
sb t0, 8(sp)
li t0, 0x38202c31 # 1, 8
sw t0, 4(sp)
addi a0, sp, 0
li a1, 14
call _write_out
li t0, '6'
sb t0, 8(sp)
li t0, 0x37202c32 # 2, 7
sw t0, 4(sp)
addi a0, sp, 0
li a1, 14
call _write_out
li t0, '2'
sb t0, 8(sp)
li t0, 0x37202c33 # 3, 7
sw t0, 4(sp)
addi a0, sp, 0
li a1, 14
call _write_out
li t0, '8'
sb t0, 8(sp)
li t0, 0x36202c34 # 4, 6
sw t0, 4(sp)
addi a0, sp, 0
li a1, 14
call _write_out
li t0, '4'
sb t0, 8(sp)
li t0, 0x36202c35 # 5, 6
sw t0, 4(sp)
addi a0, sp, 0
li a1, 14
call _write_out call _write_out
# Generate the body of the procedure. # Generate the body of the procedure.
@ -1318,48 +1245,6 @@ _compile_procedure:
addi sp, sp, 32 addi sp, sp, 32
ret ret
# Compares two string, which of one has a length, the other one is null-terminated.
#
# a0 - The address of the token string.
# a1 - The length of the string in a0.
# a2 - The address of the null-terminated string.
#
# If the strings match sets a0 to 0, otherwise sets it to 1.
.type _token_compare, @function
_token_compare:
addi t0, a0, 0
addi t1, a1, 0
addi t2, a2, 0
.Ltoken_compare_loop:
lbu t3, (t2)
# Will only be 0 if the current character in the null terminated string is \0 and the remaining length of the
# another string is 0.
or t4, t3, t1
beqz t4, .Ltoken_compare_equal
beqz t1, .Ltoken_compare_not_equal
beqz t3, .Ltoken_compare_not_equal
lbu t4, (t0)
bne t3, t4, .Ltoken_compare_not_equal
addi t0, t0, 1
addi t1, t1, -1
addi t2, t2, 1
j .Ltoken_compare_loop
.Ltoken_compare_not_equal:
li a0, 1
j .Ltoken_compare_end
.Ltoken_compare_equal:
li a0, 0
.Ltoken_compare_end:
ret
.type _compile_goto, @function .type _compile_goto, @function
_compile_goto: _compile_goto:
# Prologue. # Prologue.
@ -1475,30 +1360,30 @@ _compile_if:
sw s0, 24(sp) sw s0, 24(sp)
addi s0, sp, 32 addi s0, sp, 32
addi s1, s1, 2 # Skip the if. mv a0, s1
call _skip_spaces addi a1, sp, 0
call _tokenize_next
mv s1, a0 # Skip the if.
call _build_binary_expression call _build_binary_expression
call _skip_spaces mv a0, s1
addi s1, s1, 4 # Skip the then. addi a1, sp, 0
call _tokenize_next
# if end marker. mv s1, a0 # Skip the then.
li t0, 0x00646e65 # end\0
sw t0, 20(sp)
# Label prefix. # Label prefix.
li t0, 0x66694c2e # .Lif li t0, 0x66694c2e # .Lif
sw t0, 16(sp) sw t0, 20(sp)
li t0, 0x202c3061 # a0,_ li t0, 0x202c3061 # a0,_
sw t0, 12(sp) sw t0, 16(sp)
li t0, 0x207a7165 # eqz_ li t0, 0x207a7165 # eqz_
sw t0, 8(sp) sw t0, 12(sp)
li t0, 0x62626262 # bbbb li t0, 0x62626262 # bbbb
sb t0, 7(sp) sb t0, 11(sp)
addi a0, sp, 7 addi a0, sp, 11
li a1, 13 li a1, 13
call _write_out call _write_out
@ -1510,15 +1395,13 @@ _compile_if:
call _put_char call _put_char
.Lcompile_if_loop: .Lcompile_if_loop:
call _skip_spaces
call _read_token
mv a1, a0
mv a0, s1 mv a0, s1
addi a2, sp, 20 addi a1, sp, 0
call _token_compare call _tokenize_next
beqz a0, .Lcompile_if_end lw t0, 0(sp)
li t1, TOKEN_END
beq t0, t1, .Lcompile_if_end
call _read_line call _read_line
li a1, 1 li a1, 1
@ -1527,8 +1410,10 @@ _compile_if:
j .Lcompile_if_loop j .Lcompile_if_loop
.Lcompile_if_end: .Lcompile_if_end:
mv s1, a0 # Skip the end with newline. a0 is set by the last call to _tokenize_next.
# Write the label prefix. # Write the label prefix.
addi a0, sp, 16 addi a0, sp, 20
li a1, 4 li a1, 4
call _write_out call _write_out
@ -1537,14 +1422,13 @@ _compile_if:
call _printi call _printi
# Finalize the label. # Finalize the label.
li t0, 0x0a3a0a3a # :\n:\n li t0, 0x0a3a # :\n:\n
sh t0, 12(sp) sh t0, 16(sp)
addi a0, sp, 12 addi a0, sp, 16
li a1, 2 li a1, 2
call _write_out call _write_out
addi s2, s2, 1 # Increment the label counter. addi s2, s2, 1 # Increment the label counter.
addi s1, s1, 4 # Skip the end with newline.
# Epilogue. # Epilogue.
lw ra, 28(sp) lw ra, 28(sp)
@ -1554,8 +1438,6 @@ _compile_if:
# Parameters: # Parameters:
# a0 - Line length. # a0 - Line length.
# a1 - Whether the section header was already emitted. If not it should be
# emitted before any code is written.
# #
# Returns 1 in a0 if the parsed line contained a text section element such a # Returns 1 in a0 if the parsed line contained a text section element such a
# procedure or the program entry point. Otherwise sets a0 to 0. # procedure or the program entry point. Otherwise sets a0 to 0.
@ -1569,9 +1451,6 @@ _compile_statement:
# Preserve passed arguments. # Preserve passed arguments.
sw a0, 20(sp) sw a0, 20(sp)
sw a1, 16(sp)
call _skip_comment
mv a0, s1 mv a0, s1
lw a1, 20(sp) lw a1, 20(sp)
@ -1591,25 +1470,31 @@ _compile_statement:
call _memcmp call _memcmp
beqz a0, .Lcompile_statement_goto beqz a0, .Lcompile_statement_goto
li t0, 0x75746572 # retu call _skip_comment
sw t0, 12(sp) /* DEBUG
mv a0, s1 mv a0, s1
addi a1, sp, 12 li a1, 4
li a2, 4 call _write_error
call _memcmp mv a0, s1
beqz a0, .Lcompile_statement_return li a1, 4
call _write_error
*/
li t0, 0x6669 # if
sw t0, 12(sp)
mv a0, s1 mv a0, s1
addi a1, sp, 12 addi a1, sp, 0
li a2, 2 call _tokenize_next
call _memcmp lw t0, 0(sp)
beqz a0, .Lcompile_statement_if
li t1, TOKEN_RETURN
beq t0, t1, .Lcompile_statement_return
li t1, TOKEN_IF
beq t0, t1, .Lcompile_statement_if
li t1, TOKEN_DOT
beq t0, t1, .Lcompile_statement_label
lbu t0, (s1) lbu t0, (s1)
li t1, '.'
beq t0, t1, .Lcompile_statement_label
li t1, '_' li t1, '_'
beq t0, t1, .Lcompile_statement_identifier beq t0, t1, .Lcompile_statement_identifier
@ -1688,30 +1573,25 @@ _compile_entry_point:
# Generate the body of the procedure. # Generate the body of the procedure.
.Lcompile_entry_point_body: .Lcompile_entry_point_body:
call _skip_spaces
call _read_line
sw a0, 12(sp)
li t0, 0x2e646e65 # end
sw t0, 8(sp)
mv a0, s1 mv a0, s1
addi a1, sp, 8 addi a1, sp, 4
li a2, 4 call _tokenize_next
call _memcmp
beqz a0, .Lcompile_entry_point_end lw t0, 4(sp)
li t1, TOKEN_END
beq t0, t1, .Lcompile_entry_point_end
lw a0, 12(sp) lw a0, 12(sp)
call _compile_statement call _compile_statement
j .Lcompile_entry_point_body j .Lcompile_entry_point_body
.Lcompile_entry_point_end: .Lcompile_entry_point_end:
mv s1, a0 # Skip end. a0 is set by the last _tokenize_next call.
la a0, asm_exit la a0, asm_exit
li a1, ASM_EXIT_SIZE li a1, ASM_EXIT_SIZE
call _write_out call _write_out
addi s1, s1, 4 # Skip end.
call _skip_spaces # Read the possible new line at the end of the file.
# Epilogue. # Epilogue.
lw ra, 4(sp) lw ra, 4(sp)
lw s0, 0(sp) lw s0, 0(sp)

View File

@ -38,7 +38,6 @@
.equ CLASS_COUNT, 20 .equ CLASS_COUNT, 20
.type classification, @object .type classification, @object
.size classification, 128
classification: classification:
.byte CLASS_EOF # 00 NUL .byte CLASS_EOF # 00 NUL
.byte CLASS_INVALID # 01 SOH .byte CLASS_INVALID # 01 SOH
@ -172,7 +171,7 @@ classification:
# #
# Textual keywords in the language. # Textual keywords in the language.
# #
.equ KEYWORDS_COUNT, 21 .equ KEYWORDS_COUNT, TOKEN_IDENTIFIER - 1
.type keywords, @object .type keywords, @object
keywords: keywords:
@ -222,8 +221,8 @@ keywords:
.ascii "return" .ascii "return"
.word 4 .word 4
.ascii "cast" .ascii "cast"
.word 5 .word 4
.ascii "defer" .ascii "goto"
.word 4 .word 4
.ascii "case" .ascii "case"
.word 2 .word 2
@ -251,13 +250,12 @@ byte_keywords: .ascii "&.,:;()[]^=+-*@"
# handles each action. # handles each action.
# #
.type transitions, @object .type transitions, @object
.size transitions, 14 * CLASS_COUNT # state count * CLASS_COUNT
transitions: transitions:
# Invalid Digit Alpha Space : = ( ) # Invalid Digit Alpha Space : = ( )
# * _ Single Hex 0 x NUL . # * _ Single Hex 0 x NUL .
# - " or ' > < # - " or ' > <
.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff .word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff
.word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x0108 .word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x06ff
.word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start .word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff
@ -280,9 +278,9 @@ transitions:
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff .word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus .word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff .word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff .word 0x0109, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x06 Left paren .word 0x06ff, 0x06ff, 0x06ff, 0x06ff # 0x06 Left paren
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
@ -399,6 +397,10 @@ _classify_identifier:
la a1, keywords la a1, keywords
call _strings_index call _strings_index
bnez a0, .Lclassify_identifier_end
li a0, TOKEN_IDENTIFIER
.Lclassify_identifier_end:
# Epilogue. # Epilogue.
lw ra, 12(sp) lw ra, 12(sp)
lw s0, 8(sp) lw s0, 8(sp)
@ -426,7 +428,7 @@ _classify_single:
la a1, byte_keywords la a1, byte_keywords
sub a0, a0, a1 sub a0, a0, a1
addi a0, a0, 27 addi a0, a0, TOKEN_IDENTIFIER + 1
# Epilogue. # Epilogue.
lw ra, 12(sp) lw ra, 12(sp)
@ -466,16 +468,17 @@ _classify_composite:
.type _tokenize_next, @function .type _tokenize_next, @function
_tokenize_next: _tokenize_next:
# Prologue. # Prologue.
addi sp, sp, -24 addi sp, sp, -32
sw ra, 20(sp) sw ra, 28(sp)
sw s0, 16(sp) sw s0, 24(sp)
addi s0, sp, 24 addi s0, sp, 32
sw s1, 12(sp) # Preserve s1 used for current source text position. sw s1, 20(sp) # Preserve s1 used for current source text position.
mv s1, a0 mv s1, a0
sw a0, 4(sp) # Keeps a pointer to the beginning of a token. sw a0, 12(sp) # Keeps a pointer to the beginning of a token.
# 4(sp) and 8(sp) are reserved for the kind and length of the token if needed.
sw s2, 8(sp) # Preserve s2 containing the current state. sw s2, 16(sp) # Preserve s2 containing the current state.
li s2, 0x00 # Initial, start state. li s2, 0x00 # Initial, start state.
sw a1, 0(sp) sw a1, 0(sp)
@ -529,9 +532,9 @@ _tokenize_next:
.Ltokenize_next_skip: .Ltokenize_next_skip:
addi s1, s1, 1 addi s1, s1, 1
lw t0, 4(sp) lw t0, 12(sp)
addi t0, t0, 1 addi t0, t0, 1
sw t0, 4(sp) sw t0, 12(sp)
j .Ltokenize_next_loop j .Ltokenize_next_loop
@ -553,16 +556,20 @@ _tokenize_next:
.Ltokenize_next_identifier: .Ltokenize_next_identifier:
# An identifier can be a textual keyword. # An identifier can be a textual keyword.
# Check the kind of the token and write it into the output parameter. # Check the kind of the token and write it into the output parameter.
lw a1, 4(sp) lw a1, 12(sp)
sub a0, s1, a1 sub a0, s1, a1
sw a0, 8(sp)
call _classify_identifier call _classify_identifier
lw a1, 0(sp) sw a0, 4(sp)
sw a0, (a1) lw a0, 0(sp)
addi a1, sp, 4
li a2, 12
call _memcpy
j .Ltokenize_next_end j .Ltokenize_next_end
.Ltokenize_next_single: .Ltokenize_next_single:
lw a0, 4(sp) lw a0, 12(sp)
addi s1, a0, 1 addi s1, a0, 1
lbu a0, (a0) lbu a0, (a0)
call _classify_single call _classify_single
@ -573,7 +580,7 @@ _tokenize_next:
.Ltokenize_next_composite: .Ltokenize_next_composite:
addi s1, s1, 1 addi s1, s1, 1
lw a1, 4(sp) lw a1, 12(sp)
sub a0, s1, a1 sub a0, s1, a1
call _classify_composite call _classify_composite
lw a1, 0(sp) lw a1, 0(sp)
@ -585,11 +592,11 @@ _tokenize_next:
mv a0, s1 # Return the advanced text pointer. mv a0, s1 # Return the advanced text pointer.
# Restore saved registers. # Restore saved registers.
lw s1, 12(sp) lw s1, 20(sp)
lw s2, 8(sp) lw s2, 16(sp)
# Epilogue. # Epilogue.
lw ra, 20(sp) lw ra, 28(sp)
lw s0, 16(sp) lw s0, 24(sp)
addi sp, sp, 24 addi sp, sp, 32
ret ret