summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--boot/stage1.s13
-rw-r--r--boot/states.txt20
-rw-r--r--boot/tokenizer.s526
3 files changed, 381 insertions, 178 deletions
diff --git a/boot/stage1.s b/boot/stage1.s
index e591e21..9b118d5 100644
--- a/boot/stage1.s
+++ b/boot/stage1.s
@@ -1896,11 +1896,6 @@ _main:
sw s0, 0(sp)
addi s0, sp, 8
- # Read the source from the standard input.
- la a0, source_code
- li a1, SOURCE_BUFFER_SIZE # Buffer size.
- call _read_file
-
li s2, 1
# Epilogue.
@@ -1912,7 +1907,13 @@ _main:
# Entry point.
.type _start, @function
_start:
- call _tokenizer_initialize
+ # Read the source from the standard input.
+ la a0, source_code
+ li a1, SOURCE_BUFFER_SIZE # Buffer size.
+ call _read_file
+
+ mv a0, s1
+ call _tokenize
call _main
call _compile
diff --git a/boot/states.txt b/boot/states.txt
deleted file mode 100644
index 20d5966..0000000
--- a/boot/states.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-- start
-digit: integer
-upper: identifier
-lower: identifier
-space: start
-invalid: error
-
-- identifier
-digit: identifier
-upper: identifier
-lower: identifier
-space: end
-invalid: end
-
-- integer:
-digit: integer
-upper: end
-lower: end
-space: end
-invalid: end
diff --git a/boot/tokenizer.s b/boot/tokenizer.s
index b1ac11f..5570031 100644
--- a/boot/tokenizer.s
+++ b/boot/tokenizer.s
@@ -1,190 +1,405 @@
-.global _tokenizer_initialize
+.global _tokenize, classification, transitions
.section .rodata
+
+#
+# Classification table assigns each possible character to a group (class). All
+# characters of the same group a handled equivalently.
#
-# Classes:
+# Classification:
#
-# 0x00: Invalid
-# 0x01: Digit
-# 0x02: Character
-# 0x03: Space
-.type classes, @object
-.size classes, 128
-classes:
- .byte 0x00 # 00 NUL
- .byte 0x00 # 01 SOH
- .byte 0x00 # 02 STX
- .byte 0x00 # 03 ETX
- .byte 0x00 # 04 EOT
- .byte 0x00 # 05 ENQ
- .byte 0x00 # 06 ACK
- .byte 0x00 # 07 BEL
- .byte 0x00 # 08 BS
- .byte 0x00 # 09 HT
- .byte 0x00 # 0A LF
- .byte 0x00 # 0B VT
- .byte 0x00 # 0C FF
- .byte 0x00 # 0D CR
- .byte 0x00 # 0E SO
- .byte 0x00 # 0F SI
- .byte 0x00 # 10 DLE
- .byte 0x00 # 11 DC1
- .byte 0x00 # 12 DC2
- .byte 0x00 # 13 DC3
- .byte 0x00 # 14 DC4
- .byte 0x00 # 15 NAK
- .byte 0x00 # 16 SYN
- .byte 0x00 # 17 ETB
- .byte 0x00 # 18 CAN
- .byte 0x00 # 19 EM
- .byte 0x00 # 1A SUB
- .byte 0x00 # 1B ESC
- .byte 0x00 # 1C FS
- .byte 0x00 # 1D GS
- .byte 0x00 # 1E RS
- .byte 0x00 # 1F US
- .byte 0x03 # 20 Space
- .byte 0x00 # 21 !
+.equ CLASS_INVALID, 0x0
+.equ CLASS_DIGIT, 0x01
+.equ CLASS_CHARACTER, 0x02
+.equ CLASS_SPACE, 0x03
+.equ CLASS_COLON, 0x04
+.equ CLASS_EQUALS, 0x05
+.equ CLASS_LEFT_PAREN, 0x06
+.equ CLASS_RIGHT_PAREN, 0x07
+.equ CLASS_ASTERISK, 0x08
+.equ CLASS_UNDERSCORE, 0x09
+.equ CLASS_SINGLE, 0x0a
+.equ CLASS_HEX, 0x0b
+.equ CLASS_ZERO, 0x0c
+.equ CLASS_X, 0x0d
+.equ CLASS_EOF, 0x0e
+.equ CLASS_DOT, 0x0f
+
+.equ CLASS_COUNT, 16
+
+.type classification, @object
+.size classification, 128
+classification:
+ .byte CLASS_EOF # 00 NUL
+ .byte CLASS_INVALID # 01 SOH
+ .byte CLASS_INVALID # 02 STX
+ .byte CLASS_INVALID # 03 ETX
+ .byte CLASS_INVALID # 04 EOT
+ .byte CLASS_INVALID # 05 ENQ
+ .byte CLASS_INVALID # 06 ACK
+ .byte CLASS_INVALID # 07 BEL
+ .byte CLASS_INVALID # 08 BS
+ .byte CLASS_SPACE # 09 HT
+ .byte CLASS_SPACE # 0A LF
+ .byte CLASS_INVALID # 0B VT
+ .byte CLASS_INVALID # 0C FF
+ .byte CLASS_SPACE # 0D CR
+ .byte CLASS_INVALID # 0E SO
+ .byte CLASS_INVALID # 0F SI
+ .byte CLASS_INVALID # 10 DLE
+ .byte CLASS_INVALID # 11 DC1
+ .byte CLASS_INVALID # 12 DC2
+ .byte CLASS_INVALID # 13 DC3
+ .byte CLASS_INVALID # 14 DC4
+ .byte CLASS_INVALID # 15 NAK
+ .byte CLASS_INVALID # 16 SYN
+ .byte CLASS_INVALID # 17 ETB
+ .byte CLASS_INVALID # 18 CAN
+ .byte CLASS_INVALID # 19 EM
+ .byte CLASS_INVALID # 1A SUB
+ .byte CLASS_INVALID # 1B ESC
+ .byte CLASS_INVALID # 1C FS
+ .byte CLASS_INVALID # 1D GS
+ .byte CLASS_INVALID # 1E RS
+ .byte CLASS_INVALID # 1F US
+ .byte CLASS_SPACE # 20 Space
+ .byte CLASS_SINGLE # 21 !
.byte 0x00 # 22 "
.byte 0x00 # 23 #
.byte 0x00 # 24 $
- .byte 0x00 # 25 %
- .byte 0x00 # 26 &
+ .byte CLASS_SINGLE # 25 %
+ .byte CLASS_SINGLE # 26 &
.byte 0x00 # 27 '
- .byte 0x00 # 28 (
- .byte 0x00 # 29 )
- .byte 0x00 # 2A *
- .byte 0x00 # 2B +
- .byte 0x00 # 2C ,
+ .byte CLASS_LEFT_PAREN # 28 (
+ .byte CLASS_RIGHT_PAREN # 29 )
+ .byte CLASS_ASTERISK # 2A *
+ .byte CLASS_SINGLE # 2B +
+ .byte CLASS_SINGLE # 2C ,
.byte 0x00 # 2D -
- .byte 0x00 # 2E .
- .byte 0x00 # 2F /
- .byte 0x01 # 30 0
- .byte 0x01 # 31 1
- .byte 0x01 # 32 2
- .byte 0x01 # 33 3
- .byte 0x01 # 34 4
- .byte 0x01 # 35 5
- .byte 0x01 # 36 6
- .byte 0x01 # 37 7
- .byte 0x01 # 38 8
- .byte 0x01 # 39 9
- .byte 0x00 # 3A :
- .byte 0x00 # 3B ;
+ .byte CLASS_DOT # 2E .
+ .byte CLASS_SINGLE # 2F /
+ .byte CLASS_ZERO # 30 0
+ .byte CLASS_DIGIT # 31 1
+ .byte CLASS_DIGIT # 32 2
+ .byte CLASS_DIGIT # 33 3
+ .byte CLASS_DIGIT # 34 4
+ .byte CLASS_DIGIT # 35 5
+ .byte CLASS_DIGIT # 36 6
+ .byte CLASS_DIGIT # 37 7
+ .byte CLASS_DIGIT # 38 8
+ .byte CLASS_DIGIT # 39 9
+ .byte CLASS_COLON # 3A :
+ .byte CLASS_SINGLE # 3B ;
.byte 0x00 # 3C <
- .byte 0x00 # 3D =
+ .byte CLASS_EQUALS # 3D =
.byte 0x00 # 3E >
.byte 0x00 # 3F ?
- .byte 0x00 # 40 @
- .byte 0x02 # 41 A
- .byte 0x02 # 42 B
- .byte 0x02 # 43 C
- .byte 0x02 # 44 D
- .byte 0x02 # 45 E
- .byte 0x02 # 46 F
- .byte 0x02 # 47 G
- .byte 0x02 # 48 H
- .byte 0x02 # 49 I
- .byte 0x02 # 4A J
- .byte 0x02 # 4B K
- .byte 0x02 # 4C L
- .byte 0x02 # 4D M
- .byte 0x02 # 4E N
- .byte 0x02 # 4F O
- .byte 0x02 # 50 P
- .byte 0x02 # 51 Q
- .byte 0x02 # 52 R
- .byte 0x02 # 53 S
- .byte 0x02 # 54 T
- .byte 0x02 # 55 U
- .byte 0x02 # 56 V
- .byte 0x02 # 57 W
- .byte 0x02 # 58 X
- .byte 0x02 # 59 Y
- .byte 0x02 # 5A Z
- .byte 0x00 # 5B [
+ .byte CLASS_SINGLE # 40 @
+ .byte CLASS_CHARACTER # 41 A
+ .byte CLASS_CHARACTER # 42 B
+ .byte CLASS_CHARACTER # 43 C
+ .byte CLASS_CHARACTER # 44 D
+ .byte CLASS_CHARACTER # 45 E
+ .byte CLASS_CHARACTER # 46 F
+ .byte CLASS_CHARACTER # 47 G
+ .byte CLASS_CHARACTER # 48 H
+ .byte CLASS_CHARACTER # 49 I
+ .byte CLASS_CHARACTER # 4A J
+ .byte CLASS_CHARACTER # 4B K
+ .byte CLASS_CHARACTER # 4C L
+ .byte CLASS_CHARACTER # 4D M
+ .byte CLASS_CHARACTER # 4E N
+ .byte CLASS_CHARACTER # 4F O
+ .byte CLASS_CHARACTER # 50 P
+ .byte CLASS_CHARACTER # 51 Q
+ .byte CLASS_CHARACTER # 52 R
+ .byte CLASS_CHARACTER # 53 S
+ .byte CLASS_CHARACTER # 54 T
+ .byte CLASS_CHARACTER # 55 U
+ .byte CLASS_CHARACTER # 56 V
+ .byte CLASS_CHARACTER # 57 W
+ .byte CLASS_CHARACTER # 58 X
+ .byte CLASS_CHARACTER # 59 Y
+ .byte CLASS_CHARACTER # 5A Z
+ .byte CLASS_SINGLE # 5B [
.byte 0x00 # 5C \
- .byte 0x00 # 5D ]
- .byte 0x00 # 5E ^
- .byte 0x00 # 5F _
+ .byte CLASS_SINGLE # 5D ]
+ .byte CLASS_SINGLE # 5E ^
+ .byte CLASS_UNDERSCORE # 5F _
.byte 0x00 # 60 `
- .byte 0x02 # 61 a
- .byte 0x02 # 62 b
- .byte 0x02 # 63 c
- .byte 0x02 # 64 d
- .byte 0x02 # 65 e
- .byte 0x02 # 66 f
- .byte 0x02 # 67 g
- .byte 0x02 # 68 h
- .byte 0x02 # 69 i
- .byte 0x02 # 6A j
- .byte 0x02 # 6B k
- .byte 0x02 # 6C l
- .byte 0x02 # 6D m
- .byte 0x02 # 6E n
- .byte 0x02 # 6F o
- .byte 0x02 # 70 p
- .byte 0x02 # 71 q
- .byte 0x02 # 72 r
- .byte 0x02 # 73 s
- .byte 0x02 # 74 t
- .byte 0x02 # 75 u
- .byte 0x02 # 76 v
- .byte 0x02 # 77 w
- .byte 0x02 # 78 x
- .byte 0x02 # 79 y
- .byte 0x02 # 7A z
+ .byte CLASS_HEX # 61 a
+ .byte CLASS_HEX # 62 b
+ .byte CLASS_HEX # 63 c
+ .byte CLASS_HEX # 64 d
+ .byte CLASS_HEX # 65 e
+ .byte CLASS_HEX # 66 f
+ .byte CLASS_CHARACTER # 67 g
+ .byte CLASS_CHARACTER # 68 h
+ .byte CLASS_CHARACTER # 69 i
+ .byte CLASS_CHARACTER # 6A j
+ .byte CLASS_CHARACTER # 6B k
+ .byte CLASS_CHARACTER # 6C l
+ .byte CLASS_CHARACTER # 6D m
+ .byte CLASS_CHARACTER # 6E n
+ .byte CLASS_CHARACTER # 6F o
+ .byte CLASS_CHARACTER # 70 p
+ .byte CLASS_CHARACTER # 71 q
+ .byte CLASS_CHARACTER # 72 r
+ .byte CLASS_CHARACTER # 73 s
+ .byte CLASS_CHARACTER # 74 t
+ .byte CLASS_CHARACTER # 75 u
+ .byte CLASS_CHARACTER # 76 v
+ .byte CLASS_CHARACTER # 77 w
+ .byte CLASS_X # 78 x
+ .byte CLASS_CHARACTER # 79 y
+ .byte CLASS_CHARACTER # 7A z
.byte 0x00 # 7B {
- .byte 0x00 # 7C |
+ .byte CLASS_SINGLE # 7C |
.byte 0x00 # 7D }
- .byte 0x00 # 7E ~
- .byte 0x00 # 7F DEL
+ .byte CLASS_SINGLE # 7E ~
+ .byte CLASS_INVALID # 7F DEL
.section .data
-.section .bss
-.type class_names, @object
-.size class_names, 1024
-class_names: .zero 1024
+# The transition table describes transitions from one state to another, given
+# a symbol (character class).
+#
+# The table has m rows and n columns, where m is the amount of states and n is
+# the amount of classes. So given the current state and a classified character
+# the table can be used to look up the next state.
+#
+# Each cell is a word long.
+# - The least significant byte of the word is a row number (beginning with 0).
+# It specifies the target state. "ff" means that this is an end state and no
+# transition is possible.
+# - The next byte is the action that should be performed when transitioning.
+# For the meaning of actions see labels in the _analyze_token function, which
+# handles each action.
+#
+.type transitions, @object
+.size transitions, 13 * CLASS_COUNT # state count * CLASS_COUNT
+transitions:
+ # Invalid Digit Alpha Space : = ( )
+ # * _ Single Hex 0 x NUL .
+ .word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x0105, 0x0106, 0x0107
+ .word 0x0108, 0x0102, 0x010b, 0x0102, 0x010c, 0x0102, 0x00ff, 0x010e # 00 Start
+
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x0104, 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 01 Colon
+
+ .word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
+ .word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff # 02 Identifier
+
+ .word 0x02ff, 0x0103, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x0103, 0x02ff, 0x02ff, 0x02ff # 03 Integer
+
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 04 Assign
+
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 05 Eauals
+
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
+ .word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 06 Left paren
+
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 07 Right paren
+
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 08 Asterisk
+
+ .word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
+ .word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 09 Comment
+
+ .word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff
+ .word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 0a Closing comment
+
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0b Single character token
+
+ .word 0x02ff, 0x00ff, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x00ff, 0x010d, 0x02ff, 0x02ff # 0c Zero
+
+ .word 0x02ff, 0x010d, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x010d, 0x010d, 0x00ff, 0x2ff, 0x02ff # 0d Hexadecimal
+
+ .word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
+ .word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x2ff, 0x02ff # 0e Dot
.section .text
+# Returns the class from the classification table for the given character.
+#
+# Parameters:
+# a0 - Character.
+#
+# Sets a0 to the class number.
+.type _classify, @function
+_classify:
+ la t0, classification
+ add t0, t0, a0 # Character class pointer.
+ lbu a0, (t0) # Character class.
+ ret
+
+# Given the current state and a character class, calculates the next state.
+
+# Parameters:
+# a0 - Current state.
+# a1 - Character class.
+#
+# Sets a0 to the next state.
+.type _lookup_state, @function
+_lookup_state:
+ li t0, CLASS_COUNT
+ mul a0, a0, t0 # Transition row.
+ add a0, a0, a1 # Transition column.
+
+ li t0, 4
+ mul a0, a0, t0 # Multiply by the word size.
+
+ la t0, transitions
+ add t0, t0, a0
+ lw a0, (t0) # Next state.
+
+ ret
+
+# Chains _classify and _lookup_state.
+#
+# Parameters:
+# a0 - Current state.
+# a1 - Character.
+#
+# Sets a0 to the next state based on the given character.
+.type _next_state, @function
+_next_state:
+ # Prologue.
+ addi sp, sp, -16
+ sw ra, 12(sp)
+ sw s0, 8(sp)
+ addi s0, sp, 16
+
+ sw a0, 4(sp)
+ mv a0, a1
+ call _classify
+
+ mv a1, a0
+ lw a0, 4(sp)
+ call _lookup_state
+
+ # Epilogue.
+ lw ra, 12(sp)
+ lw s0, 8(sp)
+ addi sp, sp, 16
+ ret
+
# Initializes the classification table.
#
# Paramaters:
-# a0 - Raw input for the classification table.
-.type _initialize_classes, @function
-_initialize_classes:
+# a0 - Source text pointer.
+.type _analyze_token, @function
+_analyze_token:
# Prologue.
addi sp, sp, -24
sw ra, 20(sp)
sw s0, 16(sp)
addi s0, sp, 24
- sw s1, 12(sp) # Preserve the s1 register used for the character counter.
- li s1, 128 # 128 ASCII characters.
+ sw s1, 12(sp) # Preserve s1 used for current source text position.
+ mv s1, a0
+ sw a0, 4(sp) # Keeps a pointer to the beginning of a token.
+
+ sw s2, 8(sp) # Preserve s2 containing the current state.
+ li s2, 0x00 # Initial, start state.
+
+.Lanalyze_token_loop:
+ mv a0, s2
+ lbu a1, (s1)
+ call _next_state
+
+ li t0, 0xff
+ and s2, a0, t0 # Next state.
-.Linitialize_classes_loop:
- addi s1, s1, -1
+ li t0, 0xff00
+ and t1, a0, t0 # Transition action.
+ srli t1, t1, 8
- la t0, classes
- add t0, t0, s1
- lbu t0, (t0)
- li t1, 0x01
- bne t0, t1, .Linitialize_classes_step
+ li t0, 0x01 # Accumulate action.
+ beq t1, t0, .Lanalyze_token_accumulate
- /* DEBUG */
- li a0, 0x69676964
- sw a0, 8(sp) # Preserve the memory address.
- addi a0, sp, 8
- li a1, 4
+ li t0, 0x02 # Print action.
+ beq t1, t0, .Lanalyze_token_print
+
+ li t0, 0x03 # Skip action.
+ beq t1, t0, .Lanalyze_token_skip
+
+ li t0, 0x04 # Comment action.
+ beq t1, t0, .Lanalyze_token_comment
+
+ /* DEBUG
+ mv s4, t1
+ addi t1, t1, '0'
+ sb t1, 0(sp)
+ li t1, ' '
+ sb t1, 1(sp)
+ addi t1, s2, '0'
+ sb t1, 2(sp)
+ addi a0, sp, 0 */
+ sw s1, 0(sp)
+ addi a0, s1, 0
+ li a1, 3
call _write_error
+ /* mv t1, s4
+ DEBUG */
+
+ j .Lanalyze_token_reject
-.Linitialize_classes_step:
- bnez s1, .Linitialize_classes_loop
+.Lanalyze_token_reject:
+ addi s1, s1, 1
- lw s1, 12(sp) # Restore the saved register.
+ j .Lanalyze_token_end
+
+.Lanalyze_token_accumulate:
+ addi s1, s1, 1
+
+ j .Lanalyze_token_loop
+
+.Lanalyze_token_skip:
+ addi s1, s1, 1
+ lw t0, 4(sp)
+ addi t0, t0, 1
+ sw t0, 4(sp)
+
+ j .Lanalyze_token_loop
+
+.Lanalyze_token_print:
+ /* DEBUG
+ lw a0, 4(sp)
+ mv a1, s1
+ sub a1, a1, a0
+ call _write_error
+ DEBUG */
+
+ j .Lanalyze_token_end
+
+.Lanalyze_token_comment:
+ addi s1, s1, 1
+
+ /* DEBUG
+ lw a0, 4(sp)
+ mv a1, s1
+ sub a1, a1, a0
+ call _write_error
+ DEBUG */
+
+ j .Lanalyze_token_end
+
+.Lanalyze_token_end:
+ mv a0, s1 # Return the advanced text pointer.
+
+ # Restore saved registers.
+ lw s1, 12(sp)
+ lw s2, 8(sp)
# Epilogue.
lw ra, 20(sp)
@@ -193,15 +408,22 @@ _initialize_classes:
ret
# Initializes the lookup tables.
-.type _tokenizer_initialize, @function
-_tokenizer_initialize:
+#
+# Parameters:
+# a0 - Source text pointer.
+.type _tokenize, @function
+_tokenize:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
- call _initialize_classes
+.Ltokenize_loop:
+ call _analyze_token
+
+ lw t0, (a0)
+ bnez t0, .Ltokenize_loop
# Epilogue.
lw ra, 4(sp)