summaryrefslogtreecommitdiff
path: root/boot/tokenizer.s
diff options
context:
space:
mode:
authorEugen Wissner <belka@caraus.de>2025-05-04 23:49:39 +0200
committerEugen Wissner <belka@caraus.de>2025-05-04 23:49:39 +0200
commitdf1c0486c5c7e88e9a5a3bd8a4335fc74c4963fc (patch)
tree21baf3d2275e28ccbcafd510e3e7a8b4df1027b5 /boot/tokenizer.s
parent0a0bc4e1f2e263cdda83bd78a2a77ff68ae2334e (diff)
downloadelna-df1c0486c5c7e88e9a5a3bd8a4335fc74c4963fc.tar.gz
Fix strings search looping
Diffstat (limited to 'boot/tokenizer.s')
-rw-r--r--boot/tokenizer.s65
1 files changed, 36 insertions, 29 deletions
diff --git a/boot/tokenizer.s b/boot/tokenizer.s
index 67b2602..e358b89 100644
--- a/boot/tokenizer.s
+++ b/boot/tokenizer.s
@@ -38,7 +38,6 @@
.equ CLASS_COUNT, 20
.type classification, @object
-.size classification, 128
classification:
.byte CLASS_EOF # 00 NUL
.byte CLASS_INVALID # 01 SOH
@@ -172,7 +171,7 @@ classification:
#
# Textual keywords in the language.
#
-.equ KEYWORDS_COUNT, 21
+.equ KEYWORDS_COUNT, TOKEN_IDENTIFIER - 1
.type keywords, @object
keywords:
@@ -222,8 +221,8 @@ keywords:
.ascii "return"
.word 4
.ascii "cast"
- .word 5
- .ascii "defer"
+ .word 4
+ .ascii "goto"
.word 4
.ascii "case"
.word 2
@@ -251,13 +250,12 @@ byte_keywords: .ascii "&.,:;()[]^=+-*@"
# handles each action.
#
.type transitions, @object
-.size transitions, 14 * CLASS_COUNT # state count * CLASS_COUNT
transitions:
# Invalid Digit Alpha Space : = ( )
# * _ Single Hex 0 x NUL .
# - " or ' > <
.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff
- .word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x0108
+ .word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x06ff
.word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff
@@ -280,9 +278,9 @@ transitions:
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x06 Left paren
+ .word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
+ .word 0x0109, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
+ .word 0x06ff, 0x06ff, 0x06ff, 0x06ff # 0x06 Left paren
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
@@ -399,6 +397,10 @@ _classify_identifier:
la a1, keywords
call _strings_index
+ bnez a0, .Lclassify_identifier_end
+ li a0, TOKEN_IDENTIFIER
+
+.Lclassify_identifier_end:
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
@@ -426,7 +428,7 @@ _classify_single:
la a1, byte_keywords
sub a0, a0, a1
- addi a0, a0, 27
+ addi a0, a0, TOKEN_IDENTIFIER + 1
# Epilogue.
lw ra, 12(sp)
@@ -466,16 +468,17 @@ _classify_composite:
.type _tokenize_next, @function
_tokenize_next:
# Prologue.
- addi sp, sp, -24
- sw ra, 20(sp)
- sw s0, 16(sp)
- addi s0, sp, 24
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
- sw s1, 12(sp) # Preserve s1 used for current source text position.
+ sw s1, 20(sp) # Preserve s1 used for current source text position.
mv s1, a0
- sw a0, 4(sp) # Keeps a pointer to the beginning of a token.
+ sw a0, 12(sp) # Keeps a pointer to the beginning of a token.
+ # 4(sp) and 8(sp) are reserved for the kind and length of the token if needed.
- sw s2, 8(sp) # Preserve s2 containing the current state.
+ sw s2, 16(sp) # Preserve s2 containing the current state.
li s2, 0x00 # Initial, start state.
sw a1, 0(sp)
@@ -529,9 +532,9 @@ _tokenize_next:
.Ltokenize_next_skip:
addi s1, s1, 1
- lw t0, 4(sp)
+ lw t0, 12(sp)
addi t0, t0, 1
- sw t0, 4(sp)
+ sw t0, 12(sp)
j .Ltokenize_next_loop
@@ -553,16 +556,20 @@ _tokenize_next:
.Ltokenize_next_identifier:
# An identifier can be a textual keyword.
# Check the kind of the token and write it into the output parameter.
- lw a1, 4(sp)
+ lw a1, 12(sp)
sub a0, s1, a1
+ sw a0, 8(sp)
call _classify_identifier
- lw a1, 0(sp)
- sw a0, (a1)
+ sw a0, 4(sp)
+ lw a0, 0(sp)
+ addi a1, sp, 4
+ li a2, 12
+ call _memcpy
j .Ltokenize_next_end
.Ltokenize_next_single:
- lw a0, 4(sp)
+ lw a0, 12(sp)
addi s1, a0, 1
lbu a0, (a0)
call _classify_single
@@ -573,7 +580,7 @@ _tokenize_next:
.Ltokenize_next_composite:
addi s1, s1, 1
- lw a1, 4(sp)
+ lw a1, 12(sp)
sub a0, s1, a1
call _classify_composite
lw a1, 0(sp)
@@ -585,11 +592,11 @@ _tokenize_next:
mv a0, s1 # Return the advanced text pointer.
# Restore saved registers.
- lw s1, 12(sp)
- lw s2, 8(sp)
+ lw s1, 20(sp)
+ lw s2, 16(sp)
# Epilogue.
- lw ra, 20(sp)
- lw s0, 16(sp)
- addi sp, sp, 24
+ lw ra, 28(sp)
+ lw s0, 24(sp)
+ addi sp, sp, 32
ret