Create tokenization tables

2025-05-01 23:37:40 +02:00
parent f3a8b2626a
commit 768821c689
3 changed files with 381 additions and 178 deletions
--- a/boot/stage1.s
+++ b/boot/stage1.s
@@ -1896,11 +1896,6 @@ _main:
 	sw s0, 0(sp)
 	addi s0, sp, 8
 	# Read the source from the standard input.
 	la a0, source_code
 	li a1, SOURCE_BUFFER_SIZE # Buffer size.
 	call _read_file
 	li s2, 1
 	# Epilogue.
@@ -1912,7 +1907,13 @@ _main:
 # Entry point.
 .type _start, @function
 _start:
-	call _tokenizer_initialize
+	# Read the source from the standard input.
 	la a0, source_code
 	li a1, SOURCE_BUFFER_SIZE # Buffer size.
 	call _read_file
 	mv a0, s1
 	call _tokenize
 	call _main
 	call _compile
--- a/boot/states.txt
+++ b/boot/states.txt
@@ -1,20 +0,0 @@
 - start
 digit: integer
 upper: identifier
 lower: identifier
 space: start
 invalid: error
 - identifier
 digit: identifier
 upper: identifier
 lower: identifier
 space: end
 invalid: end
 - integer:
 digit: integer
 upper: end
 lower: end
 space: end
 invalid: end
--- a/boot/tokenizer.s
+++ b/boot/tokenizer.s
@@ -1,190 +1,405 @@
-.global _tokenizer_initialize
+.global _tokenize, classification, transitions
 .section .rodata
 #
-# Classes:
+# Classification table assigns each possible character to a group (class). All
 # characters of the same group a handled equivalently.
 #
-# 0x00: Invalid
+# Classification:
-# 0x01: Digit
+#
-# 0x02: Character
+.equ CLASS_INVALID, 0x0
-# 0x03: Space
+.equ CLASS_DIGIT, 0x01
-.type classes, @object
+.equ CLASS_CHARACTER, 0x02
-.size classes, 128
+.equ CLASS_SPACE, 0x03
-classes:
+.equ CLASS_COLON, 0x04
-	.byte 0x00 # 00 NUL
+.equ CLASS_EQUALS, 0x05
-	.byte 0x00 # 01 SOH
+.equ CLASS_LEFT_PAREN, 0x06
-	.byte 0x00 # 02 STX
+.equ CLASS_RIGHT_PAREN, 0x07
-	.byte 0x00 # 03 ETX
+.equ CLASS_ASTERISK, 0x08
-	.byte 0x00 # 04 EOT
+.equ CLASS_UNDERSCORE, 0x09
-	.byte 0x00 # 05 ENQ
+.equ CLASS_SINGLE, 0x0a
-	.byte 0x00 # 06 ACK
+.equ CLASS_HEX, 0x0b
-	.byte 0x00 # 07 BEL
+.equ CLASS_ZERO, 0x0c
-	.byte 0x00 # 08 BS
+.equ CLASS_X, 0x0d
-	.byte 0x00 # 09 HT
+.equ CLASS_EOF, 0x0e
-	.byte 0x00 # 0A LF
+.equ CLASS_DOT, 0x0f
-	.byte 0x00 # 0B VT
+
-	.byte 0x00 # 0C FF
+.equ CLASS_COUNT, 16
-	.byte 0x00 # 0D CR
+
-	.byte 0x00 # 0E SO
+.type classification, @object
-	.byte 0x00 # 0F SI
+.size classification, 128
-	.byte 0x00 # 10 DLE
+classification:
-	.byte 0x00 # 11 DC1
+	.byte CLASS_EOF # 00 NUL
-	.byte 0x00 # 12 DC2
+	.byte CLASS_INVALID # 01 SOH
-	.byte 0x00 # 13 DC3
+	.byte CLASS_INVALID # 02 STX
-	.byte 0x00 # 14 DC4
+	.byte CLASS_INVALID # 03 ETX
-	.byte 0x00 # 15 NAK
+	.byte CLASS_INVALID # 04 EOT
-	.byte 0x00 # 16 SYN
+	.byte CLASS_INVALID # 05 ENQ
-	.byte 0x00 # 17 ETB
+	.byte CLASS_INVALID # 06 ACK
-	.byte 0x00 # 18 CAN
+	.byte CLASS_INVALID # 07 BEL
-	.byte 0x00 # 19 EM
+	.byte CLASS_INVALID # 08 BS
-	.byte 0x00 # 1A SUB
+	.byte CLASS_SPACE # 09 HT
-	.byte 0x00 # 1B ESC
+	.byte CLASS_SPACE # 0A LF
-	.byte 0x00 # 1C FS
+	.byte CLASS_INVALID # 0B VT
-	.byte 0x00 # 1D GS
+	.byte CLASS_INVALID # 0C FF
-	.byte 0x00 # 1E RS
+	.byte CLASS_SPACE # 0D CR
-	.byte 0x00 # 1F US
+	.byte CLASS_INVALID # 0E SO
-	.byte 0x03 # 20 Space
+	.byte CLASS_INVALID # 0F SI
-	.byte 0x00 # 21 !
+	.byte CLASS_INVALID # 10 DLE
 	.byte CLASS_INVALID # 11 DC1
 	.byte CLASS_INVALID # 12 DC2
 	.byte CLASS_INVALID # 13 DC3
 	.byte CLASS_INVALID # 14 DC4
 	.byte CLASS_INVALID # 15 NAK
 	.byte CLASS_INVALID # 16 SYN
 	.byte CLASS_INVALID # 17 ETB
 	.byte CLASS_INVALID # 18 CAN
 	.byte CLASS_INVALID # 19 EM
 	.byte CLASS_INVALID # 1A SUB
 	.byte CLASS_INVALID # 1B ESC
 	.byte CLASS_INVALID # 1C FS
 	.byte CLASS_INVALID # 1D GS
 	.byte CLASS_INVALID # 1E RS
 	.byte CLASS_INVALID # 1F US
 	.byte CLASS_SPACE # 20 Space
 	.byte CLASS_SINGLE # 21 !
 	.byte 0x00 # 22 "
 	.byte 0x00 # 23 #
 	.byte 0x00 # 24 $
-	.byte 0x00 # 25 %
+	.byte CLASS_SINGLE # 25 %
-	.byte 0x00 # 26 &
+	.byte CLASS_SINGLE # 26 &
 	.byte 0x00 # 27 '
-	.byte 0x00 # 28 (
+	.byte CLASS_LEFT_PAREN # 28 (
-	.byte 0x00 # 29 )
+	.byte CLASS_RIGHT_PAREN # 29 )
-	.byte 0x00 # 2A *
+	.byte CLASS_ASTERISK # 2A *
-	.byte 0x00 # 2B +
+	.byte CLASS_SINGLE # 2B +
-	.byte 0x00 # 2C ,
+	.byte CLASS_SINGLE # 2C ,
 	.byte 0x00 # 2D -
-	.byte 0x00 # 2E .
+	.byte CLASS_DOT # 2E .
-	.byte 0x00 # 2F /
+	.byte CLASS_SINGLE # 2F /
-	.byte 0x01 # 30 0
+	.byte CLASS_ZERO # 30 0
-	.byte 0x01 # 31 1
+	.byte CLASS_DIGIT # 31 1
-	.byte 0x01 # 32 2
+	.byte CLASS_DIGIT # 32 2
-	.byte 0x01 # 33 3
+	.byte CLASS_DIGIT # 33 3
-	.byte 0x01 # 34 4
+	.byte CLASS_DIGIT # 34 4
-	.byte 0x01 # 35 5
+	.byte CLASS_DIGIT # 35 5
-	.byte 0x01 # 36 6
+	.byte CLASS_DIGIT # 36 6
-	.byte 0x01 # 37 7
+	.byte CLASS_DIGIT # 37 7
-	.byte 0x01 # 38 8
+	.byte CLASS_DIGIT # 38 8
-	.byte 0x01 # 39 9
+	.byte CLASS_DIGIT # 39 9
-	.byte 0x00 # 3A :
+	.byte CLASS_COLON # 3A :
-	.byte 0x00 # 3B ;
+	.byte CLASS_SINGLE # 3B ;
 	.byte 0x00 # 3C <
-	.byte 0x00 # 3D =
+	.byte CLASS_EQUALS # 3D =
 	.byte 0x00 # 3E >
 	.byte 0x00 # 3F ?
-	.byte 0x00 # 40 @
+	.byte CLASS_SINGLE # 40 @
-	.byte 0x02 # 41 A
+	.byte CLASS_CHARACTER # 41 A
-	.byte 0x02 # 42 B
+	.byte CLASS_CHARACTER # 42 B
-	.byte 0x02 # 43 C
+	.byte CLASS_CHARACTER # 43 C
-	.byte 0x02 # 44 D
+	.byte CLASS_CHARACTER # 44 D
-	.byte 0x02 # 45 E
+	.byte CLASS_CHARACTER # 45 E
-	.byte 0x02 # 46 F
+	.byte CLASS_CHARACTER # 46 F
-	.byte 0x02 # 47 G
+	.byte CLASS_CHARACTER # 47 G
-	.byte 0x02 # 48 H
+	.byte CLASS_CHARACTER # 48 H
-	.byte 0x02 # 49 I
+	.byte CLASS_CHARACTER # 49 I
-	.byte 0x02 # 4A J
+	.byte CLASS_CHARACTER # 4A J
-	.byte 0x02 # 4B K
+	.byte CLASS_CHARACTER # 4B K
-	.byte 0x02 # 4C L
+	.byte CLASS_CHARACTER # 4C L
-	.byte 0x02 # 4D M
+	.byte CLASS_CHARACTER # 4D M
-	.byte 0x02 # 4E N
+	.byte CLASS_CHARACTER # 4E N
-	.byte 0x02 # 4F O
+	.byte CLASS_CHARACTER # 4F O
-	.byte 0x02 # 50 P
+	.byte CLASS_CHARACTER # 50 P
-	.byte 0x02 # 51 Q
+	.byte CLASS_CHARACTER # 51 Q
-	.byte 0x02 # 52 R
+	.byte CLASS_CHARACTER # 52 R
-	.byte 0x02 # 53 S
+	.byte CLASS_CHARACTER # 53 S
-	.byte 0x02 # 54 T
+	.byte CLASS_CHARACTER # 54 T
-	.byte 0x02 # 55 U
+	.byte CLASS_CHARACTER # 55 U
-	.byte 0x02 # 56 V
+	.byte CLASS_CHARACTER # 56 V
-	.byte 0x02 # 57 W
+	.byte CLASS_CHARACTER # 57 W
-	.byte 0x02 # 58 X
+	.byte CLASS_CHARACTER # 58 X
-	.byte 0x02 # 59 Y
+	.byte CLASS_CHARACTER # 59 Y
-	.byte 0x02 # 5A Z
+	.byte CLASS_CHARACTER # 5A Z
-	.byte 0x00 # 5B [
+	.byte CLASS_SINGLE # 5B [
 	.byte 0x00 # 5C \
-	.byte 0x00 # 5D ]
+	.byte CLASS_SINGLE # 5D ]
-	.byte 0x00 # 5E ^
+	.byte CLASS_SINGLE # 5E ^
-	.byte 0x00 # 5F _
+	.byte CLASS_UNDERSCORE # 5F _
 	.byte 0x00 # 60 `
-	.byte 0x02 # 61 a
+	.byte CLASS_HEX # 61 a
-	.byte 0x02 # 62 b
+	.byte CLASS_HEX # 62 b
-	.byte 0x02 # 63 c
+	.byte CLASS_HEX # 63 c
-	.byte 0x02 # 64 d
+	.byte CLASS_HEX # 64 d
-	.byte 0x02 # 65 e
+	.byte CLASS_HEX # 65 e
-	.byte 0x02 # 66 f
+	.byte CLASS_HEX # 66 f
-	.byte 0x02 # 67 g
+	.byte CLASS_CHARACTER # 67 g
-	.byte 0x02 # 68 h
+	.byte CLASS_CHARACTER # 68 h
-	.byte 0x02 # 69 i
+	.byte CLASS_CHARACTER # 69 i
-	.byte 0x02 # 6A j
+	.byte CLASS_CHARACTER # 6A j
-	.byte 0x02 # 6B k
+	.byte CLASS_CHARACTER # 6B k
-	.byte 0x02 # 6C l
+	.byte CLASS_CHARACTER # 6C l
-	.byte 0x02 # 6D m
+	.byte CLASS_CHARACTER # 6D m
-	.byte 0x02 # 6E n
+	.byte CLASS_CHARACTER # 6E n
-	.byte 0x02 # 6F o
+	.byte CLASS_CHARACTER # 6F o
-	.byte 0x02 # 70 p
+	.byte CLASS_CHARACTER # 70 p
-	.byte 0x02 # 71 q
+	.byte CLASS_CHARACTER # 71 q
-	.byte 0x02 # 72 r
+	.byte CLASS_CHARACTER # 72 r
-	.byte 0x02 # 73 s
+	.byte CLASS_CHARACTER # 73 s
-	.byte 0x02 # 74 t
+	.byte CLASS_CHARACTER # 74 t
-	.byte 0x02 # 75 u
+	.byte CLASS_CHARACTER # 75 u
-	.byte 0x02 # 76 v
+	.byte CLASS_CHARACTER # 76 v
-	.byte 0x02 # 77 w
+	.byte CLASS_CHARACTER # 77 w
-	.byte 0x02 # 78 x
+	.byte CLASS_X # 78 x
-	.byte 0x02 # 79 y
+	.byte CLASS_CHARACTER # 79 y
-	.byte 0x02 # 7A z
+	.byte CLASS_CHARACTER # 7A z
 	.byte 0x00 # 7B {
-	.byte 0x00 # 7C |
+	.byte CLASS_SINGLE # 7C |
 	.byte 0x00 # 7D }
-	.byte 0x00 # 7E ~
+	.byte CLASS_SINGLE # 7E ~
-	.byte 0x00 # 7F DEL
+	.byte CLASS_INVALID # 7F DEL
 .section .data
-.section .bss
+# The transition table describes transitions from one state to another, given
-.type class_names, @object
+# a symbol (character class).
-.size class_names, 1024
+#
-class_names: .zero 1024
+# The table has m rows and n columns, where m is the amount of states and n is
 # the amount of classes. So given the current state and a classified character
 # the table can be used to look up the next state.
 #
 # Each cell is a word long.
 # - The least significant byte of the word is a row number (beginning with 0).
 #   It specifies the target state. "ff" means that this is an end state and no
 #   transition is possible.
 # - The next byte is the action that should be performed when transitioning.
 #   For the meaning of actions see labels in the _analyze_token function, which
 #   handles each action.
 #
 .type transitions, @object
 .size transitions, 13 * CLASS_COUNT # state count * CLASS_COUNT
 transitions:
 	#     Invalid Digit   Alpha   Space   :       =       (       )     
 	#     *       _       Single  Hex     0       x       NUL     .
 	.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x0105, 0x0106, 0x0107
 	.word 0x0108, 0x0102, 0x010b, 0x0102, 0x010c, 0x0102, 0x00ff, 0x010e # 00 Start
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x0104, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 01 Colon
 	.word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff # 02 Identifier
 	.word 0x02ff, 0x0103, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x0103, 0x02ff, 0x02ff, 0x02ff # 03 Integer
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 04 Assign
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 05 Eauals
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 06 Left paren
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 07 Right paren
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 08 Asterisk
 	.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
 	.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 09 Comment
 	.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff
 	.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 0a Closing comment
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0b Single character token
 	.word 0x02ff, 0x00ff, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x00ff, 0x010d, 0x02ff, 0x02ff # 0c Zero
 	.word 0x02ff, 0x010d, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x010d, 0x010d, 0x00ff, 0x2ff, 0x02ff # 0d Hexadecimal
 	.word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x2ff, 0x02ff # 0e Dot
 .section .text
 # Returns the class from the classification table for the given character.
 #
 # Parameters:
 # a0 - Character.
 #
 # Sets a0 to the class number.
 .type _classify, @function
 _classify:
 	la t0, classification
 	add t0, t0, a0 # Character class pointer.
 	lbu a0, (t0) # Character class.
 	ret
 # Given the current state and a character class, calculates the next state.
 # Parameters:
 # a0 - Current state.
 # a1 - Character class.
 #
 # Sets a0 to the next state.
 .type _lookup_state, @function
 _lookup_state:
 	li t0, CLASS_COUNT
 	mul a0, a0, t0 # Transition row.
 	add a0, a0, a1 # Transition column.
 	li t0, 4
 	mul a0, a0, t0 # Multiply by the word size.
 	la t0, transitions
 	add t0, t0, a0
 	lw a0, (t0) # Next state.
 	ret
 # Chains _classify and _lookup_state.
 #
 # Parameters:
 # a0 - Current state.
 # a1 - Character.
 #
 # Sets a0 to the next state based on the given character.
 .type _next_state, @function
 _next_state:
 	# Prologue.
 	addi sp, sp, -16
 	sw ra, 12(sp)
 	sw s0, 8(sp)
 	addi s0, sp, 16
 	sw a0, 4(sp)
 	mv a0, a1
 	call _classify
 	mv a1, a0
 	lw a0, 4(sp)
 	call _lookup_state
 	# Epilogue.
 	lw ra, 12(sp)
 	lw s0, 8(sp)
 	addi sp, sp, 16
 	ret
 # Initializes the classification table.
 #
 # Paramaters:
-# a0 - Raw input for the classification table.
+# a0 - Source text pointer.
-.type _initialize_classes, @function
+.type _analyze_token, @function
-_initialize_classes:
+_analyze_token:
 	# Prologue.
 	addi sp, sp, -24
 	sw ra, 20(sp)
 	sw s0, 16(sp)
 	addi s0, sp, 24
-	sw s1, 12(sp) # Preserve the s1 register used for the character counter.
+	sw s1, 12(sp) # Preserve s1 used for current source text position.
-	li s1, 128 # 128 ASCII characters.
+	mv s1, a0
 	sw a0, 4(sp) # Keeps a pointer to the beginning of a token.
-.Linitialize_classes_loop:
+	sw s2, 8(sp) # Preserve s2 containing the current state.
-	addi s1, s1, -1
+	li s2, 0x00 # Initial, start state.
-	la t0, classes
+.Lanalyze_token_loop:
-	add t0, t0, s1
+	mv a0, s2
-	lbu t0, (t0)
+	lbu a1, (s1)
-	li t1, 0x01
+	call _next_state
-	bne t0, t1, .Linitialize_classes_step
+	li t0, 0xff
 	and s2, a0, t0 # Next state.
-	/* DEBUG */
+	li t0, 0xff00
-	li a0, 0x69676964
+	and t1, a0, t0 # Transition action.
-	sw a0, 8(sp) # Preserve the memory address.
+	srli t1, t1, 8
-	addi a0, sp, 8
+
-	li a1, 4
+
 	li t0, 0x01 # Accumulate action.
 	beq t1, t0, .Lanalyze_token_accumulate
 	li t0, 0x02 # Print action.
 	beq t1, t0, .Lanalyze_token_print
 	li t0, 0x03 # Skip action.
 	beq t1, t0, .Lanalyze_token_skip
 	li t0, 0x04 # Comment action.
 	beq t1, t0, .Lanalyze_token_comment
 	/* DEBUG
 	mv s4, t1
 	addi t1, t1, '0'
 	sb t1, 0(sp)
 	li t1, ' '
 	sb t1, 1(sp)
 	addi t1, s2, '0'
 	sb t1, 2(sp)
 	addi a0, sp, 0 */
 	sw s1, 0(sp)
 	addi a0, s1, 0
 	li a1, 3
 	call _write_error
 	/* mv t1, s4
 	DEBUG */
-.Linitialize_classes_step:
+	j .Lanalyze_token_reject
 	bnez s1, .Linitialize_classes_loop
-	lw s1, 12(sp) # Restore the saved register.
+.Lanalyze_token_reject:
 	addi s1, s1, 1
 	j .Lanalyze_token_end
 .Lanalyze_token_accumulate:
 	addi s1, s1, 1
 	j .Lanalyze_token_loop
 .Lanalyze_token_skip:
 	addi s1, s1, 1
 	lw t0, 4(sp)
 	addi t0, t0, 1
 	sw t0, 4(sp)
 	j .Lanalyze_token_loop
 .Lanalyze_token_print:
 	/* DEBUG
 	lw a0, 4(sp)
 	mv a1, s1
 	sub a1, a1, a0
 	call _write_error
 	DEBUG */
 	j .Lanalyze_token_end
 .Lanalyze_token_comment:
 	addi s1, s1, 1
 	/* DEBUG
 	lw a0, 4(sp)
 	mv a1, s1
 	sub a1, a1, a0
 	call _write_error
 	DEBUG */
 	j .Lanalyze_token_end
 .Lanalyze_token_end:
 	mv a0, s1 # Return the advanced text pointer.
 	# Restore saved registers.
 	lw s1, 12(sp)
 	lw s2, 8(sp)
 	# Epilogue.
 	lw ra, 20(sp)
@@ -193,15 +408,22 @@ _initialize_classes:
 	ret
 # Initializes the lookup tables.
-.type _tokenizer_initialize, @function
+#
-_tokenizer_initialize:
+# Parameters:
 # a0 - Source text pointer.
 .type _tokenize, @function
 _tokenize:
 	# Prologue.
 	addi sp, sp, -8
 	sw ra, 4(sp)
 	sw s0, 0(sp)
 	addi s0, sp, 8
-	call _initialize_classes
+.Ltokenize_loop:
 	call _analyze_token
 	lw t0, (a0)
 	bnez t0, .Ltokenize_loop
 	# Epilogue.
 	lw ra, 4(sp)