Properly tokenize declaration sections

2025-05-02 22:57:04 +02:00
parent 768821c689
commit dcfd6b1515
5 changed files with 357 additions and 165 deletions
--- a/24
+++ b/24
@@ -36,11 +36,17 @@ end
 directory 'build'
-desc 'Initial stage'
+Dir.glob('boot/*.s').each do |assembly_source|
-file 'build/stage1' => ['boot/stage1.s', 'boot/common-boot.s', 'boot/tokenizer.s', 'build'] do |t|
+  target_object = Pathname.new('build') + Pathname.new(assembly_source).basename.sub_ext('.o')
  source = t.prerequisites.filter { |prerequisite| prerequisite.end_with? '.s' }
-  sh CROSS_GCC, '-nostdlib', '-o', t.name, *source
+  file target_object.to_s => [assembly_source, 'build'] do |t|
    sh CROSS_GCC, '-c', '-o', t.name, assembly_source
  end
 end
 desc 'Initial stage'
 file 'build/stage1' => ['build/tokenizer.o', 'build/stage1.o', 'build/common-boot.o'] do |t|
  sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
 end
 file 'build/stage2a.s' => ['build/stage1', 'boot/stage2.elna'] do |t|
@@ -51,8 +57,10 @@ file 'build/stage2a.s' => ['build/stage1', 'boot/stage2.elna'] do |t|
  end
 end
-file 'build/stage2a' => ['build/stage2a.s', 'boot/common-boot.s'] do |t|
+['build/stage2a', 'build/stage2b'].each do |exe|
-  sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
+  file exe => [exe.ext('.s'), 'build/common-boot.o'] do |t|
    sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
  end
 end
 file 'build/stage2b.s' => ['build/stage2a', 'boot/stage2.elna'] do |t|
@@ -62,7 +70,3 @@ file 'build/stage2b.s' => ['build/stage2a', 'boot/stage2.elna'] do |t|
    assemble_stage output, exe, source
  end
 end
 file 'build/stage2b' => ['build/stage2b.s', 'boot/common-boot.s'] do |t|
  sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
 end
--- a/boot/common-boot.s
+++ b/boot/common-boot.s
@@ -2,6 +2,7 @@
 .global _write_out, _read_file, _write_error, _put_char, _printi
 .global _get, _memcmp, _memchr, _memmem, _memcpy
 .global _divide_by_zero_error, _exit
 .global _strings_index
 .section .rodata
@@ -424,3 +425,69 @@ _memcpy:
 .Lmemcpy_end:
 	mv a0, t0
 	ret
 # Searches for a string in a string array.
 #
 # Parameters:
 # a0 - Number of elements in the string array.
 # a1 - String array.
 # a2 - Needle length.
 # a3 - Needle.
 #
 # Sets a0 to the 1-based index of the needle in the haystack or to 0 if the
 # element could not be found.
 .type _strings_index, @function
 _strings_index:
 	# Prologue.
 	addi sp, sp, -32
 	sw ra, 28(sp)
 	sw s0, 24(sp)
 	addi s0, sp, 32
 	sw s1, 20(sp)
 	mv s1, a0
 	sw s2, 16(sp)
 	mv s2, a1
 	sw s3, 12(sp)
 	mv s3, a2
 	sw s4, 8(sp)
 	mv s4, a3
 	sw s5, 4(sp)
 	li s5, 0 # Index counter.
 .Lstrings_index_loop:
 	addi s5, s5, 1
 	beqz s1, .Lstrings_index_missing
 	lw a2, (s2) # Read the length of the current element in the haystack.
 	bne a2, s3, .Lstrings_index_next # Lengths don't match, skip the iteration.
 	addi a0, s2, 4
 	mv a1, s4
 	call _memcmp
 	beqz a0, .Lstrings_index_end
 .Lstrings_index_next:
 	addi s2, s2, 4
 	add s2, s2, a2
 	addi s1, s1, -1
 	j .Lstrings_index_loop
 .Lstrings_index_missing:
 	li s5, 0
 .Lstrings_index_end:
 	mv a0, s5
 	lw s1, 20(sp)
 	lw s2, 16(sp)
 	lw s3, 12(sp)
 	lw s4, 8(sp)
 	lw s5, 4(sp)
 	# Epilogue.
 	lw ra, 28(sp)
 	lw s0, 24(sp)
 	add sp, sp, 32
 	ret
--- a/boot/definitions.inc
+++ b/boot/definitions.inc
@@ -0,0 +1,28 @@
 # The constant should match the index in the keywords array in tokenizer.s.
 .equ TOKEN_PROGRAM, 1
 .equ TOKEN_IMPORT, 2
 .equ TOKEN_CONST, 3
 .equ TOKEN_VAR, 4
 .equ TOKEN_IF, 5
 .equ TOKEN_THEN, 6
 .equ TOKEN_ELSIF, 7
 .equ TOKEN_ELSE, 8
 .equ TOKEN_WHILE, 9
 .equ TOKEN_DO, 10
 .equ TOKEN_PROC, 11
 .equ TOKEN_BEGIN, 12
 .equ TOKEN_END, 13
 .equ TOKEN_TYPE, 14
 .equ TOKEN_RECORD, 15
 .equ TOKEN_RECORD, 16
 .equ TOKEN_TRUE, 17
 .equ TOKEN_FASE, 18
 .equ TOKEN_NIL, 19
 .equ TOKEN_XOR, 20
 .equ TOKEN_OR, 21
 .equ TOKEN_RETURN, 22
 .equ TOKEN_CAST, 23
 .equ TOKEN_DEFER, 24
 .equ TOKEN_CASE, 25
 .equ TOKEN_OF, 26
--- a/boot/stage1.s
+++ b/boot/stage1.s
@@ -4,6 +4,8 @@
 # s1 - Contains the current position in the source text.
 # s2 - Label counter.
 .include "boot/definitions.inc"
 .equ SOURCE_BUFFER_SIZE, 81920
 .section .rodata
@@ -55,20 +57,41 @@ source_code: .zero SOURCE_BUFFER_SIZE
 .type _compile_import, @function
 _compile_import:
 	# Prologue.
-	addi sp, sp, -8
+	addi sp, sp, -16
-	sw ra, 4(sp)
+	sw ra, 12(sp)
-	sw s0, 0(sp)
+	sw s0, 8(sp)
-	addi s0, sp, 8
+	addi s0, sp, 16
-	addi s1, s1, 6
+.Lcompile_import_loop:
 	call _skip_comment
 	call _skip_spaces
 	call _read_token
 	add s1, s1, a0 # Skip the imported module name.
 	mv a0, s1
 	addi a1, sp, 0
 	call _tokenize_next
 	li t0, TOKEN_IMPORT
 	lw t1, 0(sp)
 	bne t0, t1, .Lcompile_import_end
 	# a0 is set from the previous _tokenize_next call. Skip the module name.
 	addi a1, sp, 0
 	call _tokenize_next
 	mv s1, a0
 	/* DEBUG 
 	lw t0, 0(sp)
 	addi t0, t0, '0'
 	sw t0, 4(sp)
 	addi a0, sp, 4
 	li a1, 1
 	call _write_error*/
 	j .Lcompile_import_loop
 .Lcompile_import_end:
 	# Epilogue.
-	lw ra, 4(sp)
+	lw ra, 12(sp)
-	lw s0, 0(sp)
+	lw s0, 8(sp)
-	addi sp, sp, 8
+	addi sp, sp, 16
 	ret
 .type _build_binary_expression, @function
@@ -943,40 +966,54 @@ _compile_assembly:
 	addi sp, sp, 16
 	ret
-.type _compile_program, @function
+.type _compile_module_declaration, @function
-_compile_program:
+_compile_module_declaration:
 	# Prologue.
-	addi sp, sp, -8
+	addi sp, sp, -16
-	sw ra, 4(sp)
+	sw ra, 12(sp)
-	sw s0, 0(sp)
+	sw s0, 8(sp)
-	addi s0, sp, 8
+	addi s0, sp, 16
 	la a0, global_start
 	li a1, GLOBAL_START_SIZE
 	call _write_out
-	addi s1, s1, 8 # program\n.
+	# Skip "program".
 	call _skip_comment
 	mv a0, s1
 	addi a1, sp, 0
 	call _tokenize_next
 	mv s1, a0
 	# Epilogue.
-	lw ra, 4(sp)
+	lw ra, 12(sp)
-	lw s0, 0(sp)
+	lw s0, 8(sp)
-	addi sp, sp, 8
+	addi sp, sp, 16
 	ret
 .type _compile_constant_section, @function
 _compile_constant_section:
 	# Prologue.
-	addi sp, sp, -8
+	addi sp, sp, -16
-	sw ra, 4(sp)
+	sw ra, 12(sp)
-	sw s0, 0(sp)
+	sw s0, 8(sp)
-	addi s0, sp, 8
+	addi s0, sp, 16
 	call _skip_comment
 	call _skip_spaces
 	mv a0, s1
 	addi a1, sp, 0
 	call _tokenize_next
 	li t0, TOKEN_CONST
 	lw t1, 0(sp)
 	bne t0, t1, .Lcompile_constant_section_end
 	mv s1, a0
 	la a0, section_rodata
 	li a1, SECTION_RODATA_SIZE
 	call _write_out
 	addi s1, s1, 6 # const\n.
 .Lcompile_constant_section_item:
 	call _skip_spaces
 	lbu a0, (s1)
@@ -988,9 +1025,9 @@ _compile_constant_section:
 .Lcompile_constant_section_end:
 	# Epilogue.
-	lw ra, 4(sp)
+	lw ra, 12(sp)
-	lw s0, 0(sp)
+	lw s0, 8(sp)
-	addi sp, sp, 8
+	addi sp, sp, 16
 	ret
 .type _compile_constant, @function
@@ -1040,17 +1077,23 @@ _compile_constant:
 .type _compile_variable_section, @function
 _compile_variable_section:
 	# Prologue.
-	addi sp, sp, -8
+	addi sp, sp, -16
-	sw ra, 4(sp)
+	sw ra, 12(sp)
-	sw s0, 0(sp)
+	sw s0, 8(sp)
-	addi s0, sp, 8
+	addi s0, sp, 16
 	mv a0, s1
 	addi a1, sp, 0
 	call _tokenize_next
 	li t0, TOKEN_VAR
 	lw t1, 0(sp)
 	bne t0, t1, .Lcompile_variable_section_end
 	mv s1, a0
 	la a0, section_bss
 	li a1, SECTION_BSS_SIZE
 	call _write_out
 	addi s1, s1, 4 # var\n.
 .Lcompile_variable_section_item:
 	call _skip_spaces
 	lbu a0, (s1)
@@ -1062,9 +1105,9 @@ _compile_variable_section:
 .Lcompile_variable_section_end:
 	# Epilogue.
-	lw ra, 4(sp)
+	lw ra, 12(sp)
-	lw s0, 0(sp)
+	lw s0, 8(sp)
-	addi sp, sp, 8
+	addi sp, sp, 16
 	ret
 .type _compile_variable, @function
@@ -1589,30 +1632,6 @@ _compile_line:
 	li t1, '('
 	beq t0, t1, .Lcompile_line_comment
 	li t0, 0x676f7270 # prog
 	sw t0, 12(sp)
 	mv a0, s1
 	addi a1, sp, 12
 	li a2, 4
 	call _memcmp
 	beqz a0, .Lcompile_line_program
 	li t0, 0x736e6f63 # cons
 	sw t0, 12(sp)
 	mv a0, s1
 	addi a1, sp, 12
 	li a2, 4
 	call _memcmp
 	beqz a0, .Lcompile_line_const
 	li t0, 0x0a726176 # var\n
 	sw t0, 12(sp)
 	mv a0, s1
 	addi a1, sp, 12
 	li a2, 4
 	call _memcmp
 	beqz a0, .Lcompile_line_var
 	li t0, 0x636f7270 # proc
 	sw t0, 12(sp)
 	mv a0, s1
@@ -1647,14 +1666,6 @@ _compile_line:
 	call _is_register_identifier
 	bnez a0, .Lcompile_line_identifier
 	li t0, 0x6f706d69 # impo
 	sw t0, 12(sp)
 	mv a0, s1
 	addi a1, sp, 12
 	li a2, 4
 	call _memcmp
 	beqz a0, .Lcompile_line_import
 	li t0, 0x6f746f67 # goto
 	sw t0, 12(sp)
 	mv a0, s1
@@ -1704,10 +1715,6 @@ _compile_line:
 	call _compile_goto
 	j .Lcompile_line_section
 .Lcompile_line_import:
 	call _compile_import
 	j .Lcompile_line_section
 .Lcompile_line_identifier:
 	call _compile_identifier
 	j .Lcompile_line_section
@@ -1725,10 +1732,6 @@ _compile_line:
 	li a0, 1
 	j .Lcompile_line_end
 .Lcompile_line_const:
 	call _compile_constant_section
 	j .Lcompile_line_section
 .Lcompile_line_procedure:
 	lw a1, 16(sp)
 	bnez a1, .Lcompile_line_compile_procedure
@@ -1738,14 +1741,6 @@ _compile_line:
 	li a0, 1
 	j .Lcompile_line_end
 .Lcompile_line_var:
 	call _compile_variable_section
 	j .Lcompile_line_section
 .Lcompile_line_program:
 	call _compile_program
 	j .Lcompile_line_section
 .Lcompile_line_comment:
 	lw a0, 20(sp)
 	call _skip_comment
@@ -1864,6 +1859,11 @@ _compile:
 	sw zero, 4(sp) # Whether the text section header was already emitted.
 	call _compile_module_declaration
 	call _compile_import
 	call _compile_constant_section
 	call _compile_variable_section
 .Lcompile_do:
 	lbu t0, (s1) # t0 = Current character.
 	beqz t0, .Lcompile_end # Exit the loop on the NUL character.
@@ -1913,7 +1913,6 @@ _start:
 	call _read_file
 	mv a0, s1
 	call _tokenize
 	call _main
 	call _compile
--- a/boot/tokenizer.s
+++ b/boot/tokenizer.s
@@ -1,4 +1,4 @@
-.global _tokenize, classification, transitions
+.global _tokenize_next, classification, transitions, keywords
 .section .rodata
@@ -24,8 +24,10 @@
 .equ CLASS_X, 0x0d
 .equ CLASS_EOF, 0x0e
 .equ CLASS_DOT, 0x0f
 .equ CLASS_MINUS, 0x10
 .equ CLASS_DOUBLE_QUOTE, 0x11
-.equ CLASS_COUNT, 16
+.equ CLASS_COUNT, 18
 .type classification, @object
 .size classification, 128
@@ -64,7 +66,7 @@ classification:
 	.byte CLASS_INVALID # 1F US
 	.byte CLASS_SPACE # 20 Space
 	.byte CLASS_SINGLE # 21 !
-	.byte 0x00 # 22 "
+	.byte CLASS_DOUBLE_QUOTE # 22 "
 	.byte 0x00 # 23 #
 	.byte 0x00 # 24 $
 	.byte CLASS_SINGLE # 25 %
@@ -75,7 +77,7 @@ classification:
 	.byte CLASS_ASTERISK # 2A *
 	.byte CLASS_SINGLE # 2B +
 	.byte CLASS_SINGLE # 2C ,
-	.byte 0x00 # 2D -
+	.byte CLASS_MINUS # 2D -
 	.byte CLASS_DOT # 2E .
 	.byte CLASS_SINGLE # 2F /
 	.byte CLASS_ZERO # 30 0
@@ -159,6 +161,67 @@ classification:
 	.byte CLASS_SINGLE # 7E ~
 	.byte CLASS_INVALID # 7F DEL
 #
 # Textual keywords in the language.
 #
 .equ KEYWORDS_COUNT, 21
 .type keywords, @object
 keywords:
 	.word 7
 	.ascii "program"
 	.word 6
 	.ascii "import"
 	.word 5
 	.ascii "const"
 	.word 3
 	.ascii "var"
 	.word 2
 	.ascii "if"
 	.word 4
 	.ascii "then"
 	.word 5
 	.ascii "elsif"
 	.word 4
 	.ascii "else"
 	.word 5
 	.ascii "while"
 	.word 2
 	.ascii "do"
 	.word 4
 	.ascii "proc"
 	.word 5
 	.ascii "begin"
 	.word 3
 	.ascii "end"
 	.word 4
 	.ascii "type"
 	.word 6
 	.ascii "record"
 	.word 5
 	.ascii "union"
 	.word 4
 	.ascii "true"
 	.word 5
 	.ascii "false"
 	.word 3
 	.ascii "nil"
 	.word 3
 	.ascii "xor"
 	.word 2
 	.ascii "or"
 	.word 6
 	.ascii "return"
 	.word 4
 	.ascii "cast"
 	.word 5
 	.ascii "defer"
 	.word 4
 	.ascii "case"
 	.word 2
 	.ascii "of"
 .size keywords, . - keywords
 .section .data
 # The transition table describes transitions from one state to another, given
@@ -173,58 +236,82 @@ classification:
 #   It specifies the target state. "ff" means that this is an end state and no
 #   transition is possible.
 # - The next byte is the action that should be performed when transitioning.
-#   For the meaning of actions see labels in the _analyze_token function, which
+#   For the meaning of actions see labels in the _tokenize_next function, which
 #   handles each action.
 #
 .type transitions, @object
-.size transitions, 13 * CLASS_COUNT # state count * CLASS_COUNT
+.size transitions, 17 * CLASS_COUNT # state count * CLASS_COUNT
 transitions:
 	#     Invalid Digit   Alpha   Space   :       =       (       )     
 	#     *       _       Single  Hex     0       x       NUL     .
 	#     -       "
 	.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x0105, 0x0106, 0x0107
 	.word 0x0108, 0x0102, 0x010b, 0x0102, 0x010c, 0x0102, 0x00ff, 0x010e # 00 Start
 	.word 0x010f, 0x0110
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x0104, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 01 Colon
 	.word 0x02ff, 0x02ff
-	.word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
+	.word 0x05ff, 0x0102, 0x0102, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff
-	.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff # 02 Identifier
+	.word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff # 02 Identifier
 	.word 0x05ff, 0x05ff
 	.word 0x02ff, 0x0103, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x0103, 0x02ff, 0x02ff, 0x02ff # 03 Integer
 	.word 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 04 Assign
 	.word 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 05 Eauals
 	.word 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 06 Left paren
 	.word 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 07 Right paren
 	.word 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 08 Asterisk
 	.word 0x02ff, 0x02ff
 	.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
 	.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 09 Comment
 	.word 0x0109, 0x0109
 	.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff
 	.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 0a Closing comment
 	.word 0x0109, 0x0109
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0b Single character token
 	.word 0x02ff, 0x02ff
 	.word 0x02ff, 0x00ff, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x00ff, 0x010d, 0x02ff, 0x02ff # 0c Zero
 	.word 0x02ff, 0x02ff
 	.word 0x02ff, 0x010d, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x010d, 0x010d, 0x00ff, 0x2ff, 0x02ff # 0d Hexadecimal
 	.word 0x00ff, 0x02ff
 	.word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
-	.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x2ff, 0x02ff # 0e Dot
+	.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff # 0e Dot
 	.word 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0f Minus
 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
 	.word 0x02ff, 0x02ff
 	.word 0x00ff, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110 # 10 Starting string.
 	.word 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110
 	.word 0x0110, 0x04ff
 .section .text
@@ -292,12 +379,42 @@ _next_state:
 	addi sp, sp, 16
 	ret
 # Takes an identifier and checks whether it's a keyword.
 #
 # Parameters:
 # a0 - Token length.
 # a1 - Token pointer.
 #
 # Sets a0 to the appropriate token type.
 .type _classify_identifier, @function
 _classify_identifier:
 	# Prologue.
 	addi sp, sp, -16
 	sw ra, 12(sp)
 	sw s0, 8(sp)
 	addi s0, sp, 16
 	mv a2, a0
 	mv a3, a1
 	li a0, KEYWORDS_COUNT
 	la a1, keywords
 	call _strings_index
 	# Epilogue.
 	lw ra, 12(sp)
 	lw s0, 8(sp)
 	addi sp, sp, 16
 	ret
 # Initializes the classification table.
 #
 # Paramaters:
 # a0 - Source text pointer.
-.type _analyze_token, @function
+# a1 - A pointer for output value, the token kind. 4 Bytes.
-_analyze_token:
+#
 # Sets a0 to the position of the next token.
 .type _tokenize_next, @function
 _tokenize_next:
 	# Prologue.
 	addi sp, sp, -24
 	sw ra, 20(sp)
@@ -311,7 +428,10 @@ _analyze_token:
 	sw s2, 8(sp) # Preserve s2 containing the current state.
 	li s2, 0x00 # Initial, start state.
-.Lanalyze_token_loop:
+	sw a1, 0(sp)
 	sw zero, (a1) # Initialize.
 .Ltokenize_next_loop:
 	mv a0, s2
 	lbu a1, (s1)
 	call _next_state
@@ -323,56 +443,43 @@ _analyze_token:
 	and t1, a0, t0 # Transition action.
 	srli t1, t1, 8
-
+	# Perform the provided action.
 	li t0, 0x01 # Accumulate action.
-	beq t1, t0, .Lanalyze_token_accumulate
+	beq t1, t0, .Ltokenize_next_accumulate
 	li t0, 0x02 # Print action.
-	beq t1, t0, .Lanalyze_token_print
+	beq t1, t0, .Ltokenize_next_print
 	li t0, 0x03 # Skip action.
-	beq t1, t0, .Lanalyze_token_skip
+	beq t1, t0, .Ltokenize_next_skip
 	li t0, 0x04 # Comment action.
-	beq t1, t0, .Lanalyze_token_comment
+	beq t1, t0, .Ltokenize_next_comment
-	/* DEBUG
+	li t0, 0x05 # Finalize identifier.
-	mv s4, t1
+	beq t1, t0, .Ltokenize_next_identifier
 	addi t1, t1, '0'
 	sb t1, 0(sp)
 	li t1, ' '
 	sb t1, 1(sp)
 	addi t1, s2, '0'
 	sb t1, 2(sp)
 	addi a0, sp, 0 */
 	sw s1, 0(sp)
 	addi a0, s1, 0
 	li a1, 3
 	call _write_error
 	/* mv t1, s4
 	DEBUG */
-	j .Lanalyze_token_reject
+	j .Ltokenize_next_reject
-.Lanalyze_token_reject:
+.Ltokenize_next_reject:
 	addi s1, s1, 1
-	j .Lanalyze_token_end
+	j .Ltokenize_next_end
-.Lanalyze_token_accumulate:
+.Ltokenize_next_accumulate:
 	addi s1, s1, 1
-	j .Lanalyze_token_loop
+	j .Ltokenize_next_loop
-.Lanalyze_token_skip:
+.Ltokenize_next_skip:
 	addi s1, s1, 1
 	lw t0, 4(sp)
 	addi t0, t0, 1
 	sw t0, 4(sp)
-	j .Lanalyze_token_loop
+	j .Ltokenize_next_loop
-.Lanalyze_token_print:
+.Ltokenize_next_print:
 	/* DEBUG
 	lw a0, 4(sp)
 	mv a1, s1
@@ -380,9 +487,9 @@ _analyze_token:
 	call _write_error
 	DEBUG */
-	j .Lanalyze_token_end
+	j .Ltokenize_next_end
-.Lanalyze_token_comment:
+.Ltokenize_next_comment:
 	addi s1, s1, 1
 	/* DEBUG
@@ -392,9 +499,20 @@ _analyze_token:
 	call _write_error
 	DEBUG */
-	j .Lanalyze_token_end
+	j .Ltokenize_next_end
-.Lanalyze_token_end:
+.Ltokenize_next_identifier:
 	# An identifier can be a textual keyword.
 	# Check the kind of the token and write it into the output parameter.
 	lw a1, 4(sp)
 	sub a0, s1, a1
 	call _classify_identifier
 	lw a1, 0(sp)
 	sw a0, (a1)
 	j .Ltokenize_next_end
 .Ltokenize_next_end:
 	mv a0, s1 # Return the advanced text pointer.
 	# Restore saved registers.
@@ -406,27 +524,3 @@ _analyze_token:
 	lw s0, 16(sp)
 	addi sp, sp, 24
 	ret
 # Initializes the lookup tables.
 #
 # Parameters:
 # a0 - Source text pointer.
 .type _tokenize, @function
 _tokenize:
 	# Prologue.
 	addi sp, sp, -8
 	sw ra, 4(sp)
 	sw s0, 0(sp)
 	addi s0, sp, 8
 .Ltokenize_loop:
 	call _analyze_token
 	lw t0, (a0)
 	bnez t0, .Ltokenize_loop
 	# Epilogue.
 	lw ra, 4(sp)
 	lw s0, 0(sp)
 	addi sp, sp, 8
 	ret