Fix strings search looping

This commit is contained in:
2025-05-04 23:49:39 +02:00
parent 0a0bc4e1f2
commit df1c0486c5
4 changed files with 243 additions and 350 deletions

View File

@@ -38,7 +38,6 @@
.equ CLASS_COUNT, 20
.type classification, @object
.size classification, 128
classification:
.byte CLASS_EOF # 00 NUL
.byte CLASS_INVALID # 01 SOH
@@ -172,7 +171,7 @@ classification:
#
# Textual keywords in the language.
#
.equ KEYWORDS_COUNT, 21
.equ KEYWORDS_COUNT, TOKEN_IDENTIFIER - 1
.type keywords, @object
keywords:
@@ -222,8 +221,8 @@ keywords:
.ascii "return"
.word 4
.ascii "cast"
.word 5
.ascii "defer"
.word 4
.ascii "goto"
.word 4
.ascii "case"
.word 2
@@ -251,13 +250,12 @@ byte_keywords: .ascii "&.,:;()[]^=+-*@"
# handles each action.
#
.type transitions, @object
.size transitions, 14 * CLASS_COUNT # state count * CLASS_COUNT
transitions:
# Invalid Digit Alpha Space : = ( )
# * _ Single Hex 0 x NUL .
# - " or ' > <
.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff
.word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x0108
.word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x06ff
.word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff
@@ -280,9 +278,9 @@ transitions:
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x06 Left paren
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x0109, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff # 0x06 Left paren
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
@@ -399,6 +397,10 @@ _classify_identifier:
la a1, keywords
call _strings_index
bnez a0, .Lclassify_identifier_end
li a0, TOKEN_IDENTIFIER
.Lclassify_identifier_end:
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
@@ -426,7 +428,7 @@ _classify_single:
la a1, byte_keywords
sub a0, a0, a1
addi a0, a0, 27
addi a0, a0, TOKEN_IDENTIFIER + 1
# Epilogue.
lw ra, 12(sp)
@@ -466,16 +468,17 @@ _classify_composite:
.type _tokenize_next, @function
_tokenize_next:
# Prologue.
addi sp, sp, -24
sw ra, 20(sp)
sw s0, 16(sp)
addi s0, sp, 24
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
sw s1, 12(sp) # Preserve s1 used for current source text position.
sw s1, 20(sp) # Preserve s1 used for current source text position.
mv s1, a0
sw a0, 4(sp) # Keeps a pointer to the beginning of a token.
sw a0, 12(sp) # Keeps a pointer to the beginning of a token.
# 4(sp) and 8(sp) are reserved for the kind and length of the token if needed.
sw s2, 8(sp) # Preserve s2 containing the current state.
sw s2, 16(sp) # Preserve s2 containing the current state.
li s2, 0x00 # Initial, start state.
sw a1, 0(sp)
@@ -529,9 +532,9 @@ _tokenize_next:
.Ltokenize_next_skip:
addi s1, s1, 1
lw t0, 4(sp)
lw t0, 12(sp)
addi t0, t0, 1
sw t0, 4(sp)
sw t0, 12(sp)
j .Ltokenize_next_loop
@@ -553,16 +556,20 @@ _tokenize_next:
.Ltokenize_next_identifier:
# An identifier can be a textual keyword.
# Check the kind of the token and write it into the output parameter.
lw a1, 4(sp)
lw a1, 12(sp)
sub a0, s1, a1
sw a0, 8(sp)
call _classify_identifier
lw a1, 0(sp)
sw a0, (a1)
sw a0, 4(sp)
lw a0, 0(sp)
addi a1, sp, 4
li a2, 12
call _memcpy
j .Ltokenize_next_end
.Ltokenize_next_single:
lw a0, 4(sp)
lw a0, 12(sp)
addi s1, a0, 1
lbu a0, (a0)
call _classify_single
@@ -573,7 +580,7 @@ _tokenize_next:
.Ltokenize_next_composite:
addi s1, s1, 1
lw a1, 4(sp)
lw a1, 12(sp)
sub a0, s1, a1
call _classify_composite
lw a1, 0(sp)
@@ -585,11 +592,11 @@ _tokenize_next:
mv a0, s1 # Return the advanced text pointer.
# Restore saved registers.
lw s1, 12(sp)
lw s2, 8(sp)
lw s1, 20(sp)
lw s2, 16(sp)
# Epilogue.
lw ra, 20(sp)
lw s0, 16(sp)
addi sp, sp, 24
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret