diff options
Diffstat (limited to 'boot/tokenizer.s')
| -rw-r--r-- | boot/tokenizer.s | 65 |
1 files changed, 36 insertions, 29 deletions
diff --git a/boot/tokenizer.s b/boot/tokenizer.s index 67b2602..e358b89 100644 --- a/boot/tokenizer.s +++ b/boot/tokenizer.s @@ -38,7 +38,6 @@ .equ CLASS_COUNT, 20 .type classification, @object -.size classification, 128 classification: .byte CLASS_EOF # 00 NUL .byte CLASS_INVALID # 01 SOH @@ -172,7 +171,7 @@ classification: # # Textual keywords in the language. # -.equ KEYWORDS_COUNT, 21 +.equ KEYWORDS_COUNT, TOKEN_IDENTIFIER - 1 .type keywords, @object keywords: @@ -222,8 +221,8 @@ keywords: .ascii "return" .word 4 .ascii "cast" - .word 5 - .ascii "defer" + .word 4 + .ascii "goto" .word 4 .ascii "case" .word 2 @@ -251,13 +250,12 @@ byte_keywords: .ascii "&.,:;()[]^=+-*@" # handles each action. # .type transitions, @object -.size transitions, 14 * CLASS_COUNT # state count * CLASS_COUNT transitions: # Invalid Digit Alpha Space : = ( ) # * _ Single Hex 0 x NUL . # - " or ' > < .word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff - .word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x0108 + .word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x06ff .word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff @@ -280,9 +278,9 @@ transitions: .word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff .word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus - .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff - .word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff - .word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x06 Left paren + .word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff + .word 0x0109, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff + .word 0x06ff, 0x06ff, 0x06ff, 0x06ff # 0x06 Left paren .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff @@ -399,6 +397,10 @@ _classify_identifier: la a1, keywords call _strings_index + bnez a0, .Lclassify_identifier_end + li a0, TOKEN_IDENTIFIER + +.Lclassify_identifier_end: # Epilogue. lw ra, 12(sp) lw s0, 8(sp) @@ -426,7 +428,7 @@ _classify_single: la a1, byte_keywords sub a0, a0, a1 - addi a0, a0, 27 + addi a0, a0, TOKEN_IDENTIFIER + 1 # Epilogue. lw ra, 12(sp) @@ -466,16 +468,17 @@ _classify_composite: .type _tokenize_next, @function _tokenize_next: # Prologue. - addi sp, sp, -24 - sw ra, 20(sp) - sw s0, 16(sp) - addi s0, sp, 24 + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 - sw s1, 12(sp) # Preserve s1 used for current source text position. + sw s1, 20(sp) # Preserve s1 used for current source text position. mv s1, a0 - sw a0, 4(sp) # Keeps a pointer to the beginning of a token. + sw a0, 12(sp) # Keeps a pointer to the beginning of a token. + # 4(sp) and 8(sp) are reserved for the kind and length of the token if needed. - sw s2, 8(sp) # Preserve s2 containing the current state. + sw s2, 16(sp) # Preserve s2 containing the current state. li s2, 0x00 # Initial, start state. sw a1, 0(sp) @@ -529,9 +532,9 @@ _tokenize_next: .Ltokenize_next_skip: addi s1, s1, 1 - lw t0, 4(sp) + lw t0, 12(sp) addi t0, t0, 1 - sw t0, 4(sp) + sw t0, 12(sp) j .Ltokenize_next_loop @@ -553,16 +556,20 @@ _tokenize_next: .Ltokenize_next_identifier: # An identifier can be a textual keyword. # Check the kind of the token and write it into the output parameter. - lw a1, 4(sp) + lw a1, 12(sp) sub a0, s1, a1 + sw a0, 8(sp) call _classify_identifier - lw a1, 0(sp) - sw a0, (a1) + sw a0, 4(sp) + lw a0, 0(sp) + addi a1, sp, 4 + li a2, 12 + call _memcpy j .Ltokenize_next_end .Ltokenize_next_single: - lw a0, 4(sp) + lw a0, 12(sp) addi s1, a0, 1 lbu a0, (a0) call _classify_single @@ -573,7 +580,7 @@ _tokenize_next: .Ltokenize_next_composite: addi s1, s1, 1 - lw a1, 4(sp) + lw a1, 12(sp) sub a0, s1, a1 call _classify_composite lw a1, 0(sp) @@ -585,11 +592,11 @@ _tokenize_next: mv a0, s1 # Return the advanced text pointer. # Restore saved registers. - lw s1, 12(sp) - lw s2, 8(sp) + lw s1, 20(sp) + lw s2, 16(sp) # Epilogue. - lw ra, 20(sp) - lw s0, 16(sp) - addi sp, sp, 24 + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 ret |
