summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Rakefile24
-rw-r--r--boot/common-boot.s67
-rw-r--r--boot/definitions.inc28
-rw-r--r--boot/stage1.s173
-rw-r--r--boot/tokenizer.s230
5 files changed, 357 insertions, 165 deletions
diff --git a/Rakefile b/Rakefile
index 4398ef8..2bc2683 100644
--- a/Rakefile
+++ b/Rakefile
@@ -36,11 +36,17 @@ end
directory 'build'
-desc 'Initial stage'
-file 'build/stage1' => ['boot/stage1.s', 'boot/common-boot.s', 'boot/tokenizer.s', 'build'] do |t|
- source = t.prerequisites.filter { |prerequisite| prerequisite.end_with? '.s' }
+Dir.glob('boot/*.s').each do |assembly_source|
+ target_object = Pathname.new('build') + Pathname.new(assembly_source).basename.sub_ext('.o')
+
+ file target_object.to_s => [assembly_source, 'build'] do |t|
+ sh CROSS_GCC, '-c', '-o', t.name, assembly_source
+ end
+end
- sh CROSS_GCC, '-nostdlib', '-o', t.name, *source
+desc 'Initial stage'
+file 'build/stage1' => ['build/tokenizer.o', 'build/stage1.o', 'build/common-boot.o'] do |t|
+ sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
end
file 'build/stage2a.s' => ['build/stage1', 'boot/stage2.elna'] do |t|
@@ -51,8 +57,10 @@ file 'build/stage2a.s' => ['build/stage1', 'boot/stage2.elna'] do |t|
end
end
-file 'build/stage2a' => ['build/stage2a.s', 'boot/common-boot.s'] do |t|
- sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
+['build/stage2a', 'build/stage2b'].each do |exe|
+ file exe => [exe.ext('.s'), 'build/common-boot.o'] do |t|
+ sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
+ end
end
file 'build/stage2b.s' => ['build/stage2a', 'boot/stage2.elna'] do |t|
@@ -62,7 +70,3 @@ file 'build/stage2b.s' => ['build/stage2a', 'boot/stage2.elna'] do |t|
assemble_stage output, exe, source
end
end
-
-file 'build/stage2b' => ['build/stage2b.s', 'boot/common-boot.s'] do |t|
- sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
-end
diff --git a/boot/common-boot.s b/boot/common-boot.s
index 0cf31f1..26dad8d 100644
--- a/boot/common-boot.s
+++ b/boot/common-boot.s
@@ -2,6 +2,7 @@
.global _write_out, _read_file, _write_error, _put_char, _printi
.global _get, _memcmp, _memchr, _memmem, _memcpy
.global _divide_by_zero_error, _exit
+.global _strings_index
.section .rodata
@@ -424,3 +425,69 @@ _memcpy:
.Lmemcpy_end:
mv a0, t0
ret
+
+# Searches for a string in a string array.
+#
+# Parameters:
+# a0 - Number of elements in the string array.
+# a1 - String array.
+# a2 - Needle length.
+# a3 - Needle.
+#
+# Sets a0 to the 1-based index of the needle in the haystack or to 0 if the
+# element could not be found.
+.type _strings_index, @function
+_strings_index:
+ # Prologue.
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
+
+ sw s1, 20(sp)
+ mv s1, a0
+ sw s2, 16(sp)
+ mv s2, a1
+ sw s3, 12(sp)
+ mv s3, a2
+ sw s4, 8(sp)
+ mv s4, a3
+ sw s5, 4(sp)
+ li s5, 0 # Index counter.
+
+.Lstrings_index_loop:
+ addi s5, s5, 1
+ beqz s1, .Lstrings_index_missing
+
+ lw a2, (s2) # Read the length of the current element in the haystack.
+ bne a2, s3, .Lstrings_index_next # Lengths don't match, skip the iteration.
+
+ addi a0, s2, 4
+ mv a1, s4
+ call _memcmp
+
+ beqz a0, .Lstrings_index_end
+
+.Lstrings_index_next:
+ addi s2, s2, 4
+ add s2, s2, a2
+ addi s1, s1, -1
+ j .Lstrings_index_loop
+
+.Lstrings_index_missing:
+ li s5, 0
+
+.Lstrings_index_end:
+ mv a0, s5
+
+ lw s1, 20(sp)
+ lw s2, 16(sp)
+ lw s3, 12(sp)
+ lw s4, 8(sp)
+ lw s5, 4(sp)
+
+ # Epilogue.
+ lw ra, 28(sp)
+ lw s0, 24(sp)
+ add sp, sp, 32
+ ret
diff --git a/boot/definitions.inc b/boot/definitions.inc
new file mode 100644
index 0000000..0e2f54e
--- /dev/null
+++ b/boot/definitions.inc
@@ -0,0 +1,28 @@
+# The constant should match the index in the keywords array in tokenizer.s.
+
+.equ TOKEN_PROGRAM, 1
+.equ TOKEN_IMPORT, 2
+.equ TOKEN_CONST, 3
+.equ TOKEN_VAR, 4
+.equ TOKEN_IF, 5
+.equ TOKEN_THEN, 6
+.equ TOKEN_ELSIF, 7
+.equ TOKEN_ELSE, 8
+.equ TOKEN_WHILE, 9
+.equ TOKEN_DO, 10
+.equ TOKEN_PROC, 11
+.equ TOKEN_BEGIN, 12
+.equ TOKEN_END, 13
+.equ TOKEN_TYPE, 14
+.equ TOKEN_RECORD, 15
+.equ TOKEN_RECORD, 16
+.equ TOKEN_TRUE, 17
+.equ TOKEN_FASE, 18
+.equ TOKEN_NIL, 19
+.equ TOKEN_XOR, 20
+.equ TOKEN_OR, 21
+.equ TOKEN_RETURN, 22
+.equ TOKEN_CAST, 23
+.equ TOKEN_DEFER, 24
+.equ TOKEN_CASE, 25
+.equ TOKEN_OF, 26
diff --git a/boot/stage1.s b/boot/stage1.s
index 9b118d5..9ab072d 100644
--- a/boot/stage1.s
+++ b/boot/stage1.s
@@ -4,6 +4,8 @@
# s1 - Contains the current position in the source text.
# s2 - Label counter.
+.include "boot/definitions.inc"
+
.equ SOURCE_BUFFER_SIZE, 81920
.section .rodata
@@ -55,20 +57,41 @@ source_code: .zero SOURCE_BUFFER_SIZE
.type _compile_import, @function
_compile_import:
# Prologue.
- addi sp, sp, -8
- sw ra, 4(sp)
- sw s0, 0(sp)
- addi s0, sp, 8
+ addi sp, sp, -16
+ sw ra, 12(sp)
+ sw s0, 8(sp)
+ addi s0, sp, 16
- addi s1, s1, 6
+.Lcompile_import_loop:
+ call _skip_comment
call _skip_spaces
- call _read_token
- add s1, s1, a0 # Skip the imported module name.
+ mv a0, s1
+ addi a1, sp, 0
+ call _tokenize_next
+ li t0, TOKEN_IMPORT
+ lw t1, 0(sp)
+ bne t0, t1, .Lcompile_import_end
+ # a0 is set from the previous _tokenize_next call. Skip the module name.
+ addi a1, sp, 0
+ call _tokenize_next
+ mv s1, a0
+
+ /* DEBUG
+ lw t0, 0(sp)
+ addi t0, t0, '0'
+ sw t0, 4(sp)
+ addi a0, sp, 4
+ li a1, 1
+ call _write_error*/
+
+ j .Lcompile_import_loop
+
+.Lcompile_import_end:
# Epilogue.
- lw ra, 4(sp)
- lw s0, 0(sp)
- addi sp, sp, 8
+ lw ra, 12(sp)
+ lw s0, 8(sp)
+ addi sp, sp, 16
ret
.type _build_binary_expression, @function
@@ -943,40 +966,54 @@ _compile_assembly:
addi sp, sp, 16
ret
-.type _compile_program, @function
-_compile_program:
+.type _compile_module_declaration, @function
+_compile_module_declaration:
# Prologue.
- addi sp, sp, -8
- sw ra, 4(sp)
- sw s0, 0(sp)
- addi s0, sp, 8
+ addi sp, sp, -16
+ sw ra, 12(sp)
+ sw s0, 8(sp)
+ addi s0, sp, 16
la a0, global_start
li a1, GLOBAL_START_SIZE
call _write_out
- addi s1, s1, 8 # program\n.
+ # Skip "program".
+ call _skip_comment
+ mv a0, s1
+ addi a1, sp, 0
+ call _tokenize_next
+ mv s1, a0
# Epilogue.
- lw ra, 4(sp)
- lw s0, 0(sp)
- addi sp, sp, 8
+ lw ra, 12(sp)
+ lw s0, 8(sp)
+ addi sp, sp, 16
ret
.type _compile_constant_section, @function
_compile_constant_section:
# Prologue.
- addi sp, sp, -8
- sw ra, 4(sp)
- sw s0, 0(sp)
- addi s0, sp, 8
+ addi sp, sp, -16
+ sw ra, 12(sp)
+ sw s0, 8(sp)
+ addi s0, sp, 16
+
+ call _skip_comment
+ call _skip_spaces
+
+ mv a0, s1
+ addi a1, sp, 0
+ call _tokenize_next
+ li t0, TOKEN_CONST
+ lw t1, 0(sp)
+ bne t0, t1, .Lcompile_constant_section_end
+ mv s1, a0
la a0, section_rodata
li a1, SECTION_RODATA_SIZE
call _write_out
- addi s1, s1, 6 # const\n.
-
.Lcompile_constant_section_item:
call _skip_spaces
lbu a0, (s1)
@@ -988,9 +1025,9 @@ _compile_constant_section:
.Lcompile_constant_section_end:
# Epilogue.
- lw ra, 4(sp)
- lw s0, 0(sp)
- addi sp, sp, 8
+ lw ra, 12(sp)
+ lw s0, 8(sp)
+ addi sp, sp, 16
ret
.type _compile_constant, @function
@@ -1040,17 +1077,23 @@ _compile_constant:
.type _compile_variable_section, @function
_compile_variable_section:
# Prologue.
- addi sp, sp, -8
- sw ra, 4(sp)
- sw s0, 0(sp)
- addi s0, sp, 8
+ addi sp, sp, -16
+ sw ra, 12(sp)
+ sw s0, 8(sp)
+ addi s0, sp, 16
+
+ mv a0, s1
+ addi a1, sp, 0
+ call _tokenize_next
+ li t0, TOKEN_VAR
+ lw t1, 0(sp)
+ bne t0, t1, .Lcompile_variable_section_end
+ mv s1, a0
la a0, section_bss
li a1, SECTION_BSS_SIZE
call _write_out
- addi s1, s1, 4 # var\n.
-
.Lcompile_variable_section_item:
call _skip_spaces
lbu a0, (s1)
@@ -1062,9 +1105,9 @@ _compile_variable_section:
.Lcompile_variable_section_end:
# Epilogue.
- lw ra, 4(sp)
- lw s0, 0(sp)
- addi sp, sp, 8
+ lw ra, 12(sp)
+ lw s0, 8(sp)
+ addi sp, sp, 16
ret
.type _compile_variable, @function
@@ -1589,30 +1632,6 @@ _compile_line:
li t1, '('
beq t0, t1, .Lcompile_line_comment
- li t0, 0x676f7270 # prog
- sw t0, 12(sp)
- mv a0, s1
- addi a1, sp, 12
- li a2, 4
- call _memcmp
- beqz a0, .Lcompile_line_program
-
- li t0, 0x736e6f63 # cons
- sw t0, 12(sp)
- mv a0, s1
- addi a1, sp, 12
- li a2, 4
- call _memcmp
- beqz a0, .Lcompile_line_const
-
- li t0, 0x0a726176 # var\n
- sw t0, 12(sp)
- mv a0, s1
- addi a1, sp, 12
- li a2, 4
- call _memcmp
- beqz a0, .Lcompile_line_var
-
li t0, 0x636f7270 # proc
sw t0, 12(sp)
mv a0, s1
@@ -1647,14 +1666,6 @@ _compile_line:
call _is_register_identifier
bnez a0, .Lcompile_line_identifier
- li t0, 0x6f706d69 # impo
- sw t0, 12(sp)
- mv a0, s1
- addi a1, sp, 12
- li a2, 4
- call _memcmp
- beqz a0, .Lcompile_line_import
-
li t0, 0x6f746f67 # goto
sw t0, 12(sp)
mv a0, s1
@@ -1704,10 +1715,6 @@ _compile_line:
call _compile_goto
j .Lcompile_line_section
-.Lcompile_line_import:
- call _compile_import
- j .Lcompile_line_section
-
.Lcompile_line_identifier:
call _compile_identifier
j .Lcompile_line_section
@@ -1725,10 +1732,6 @@ _compile_line:
li a0, 1
j .Lcompile_line_end
-.Lcompile_line_const:
- call _compile_constant_section
- j .Lcompile_line_section
-
.Lcompile_line_procedure:
lw a1, 16(sp)
bnez a1, .Lcompile_line_compile_procedure
@@ -1738,14 +1741,6 @@ _compile_line:
li a0, 1
j .Lcompile_line_end
-.Lcompile_line_var:
- call _compile_variable_section
- j .Lcompile_line_section
-
-.Lcompile_line_program:
- call _compile_program
- j .Lcompile_line_section
-
.Lcompile_line_comment:
lw a0, 20(sp)
call _skip_comment
@@ -1864,6 +1859,11 @@ _compile:
sw zero, 4(sp) # Whether the text section header was already emitted.
+ call _compile_module_declaration
+ call _compile_import
+ call _compile_constant_section
+ call _compile_variable_section
+
.Lcompile_do:
lbu t0, (s1) # t0 = Current character.
beqz t0, .Lcompile_end # Exit the loop on the NUL character.
@@ -1913,7 +1913,6 @@ _start:
call _read_file
mv a0, s1
- call _tokenize
call _main
call _compile
diff --git a/boot/tokenizer.s b/boot/tokenizer.s
index 5570031..4315f66 100644
--- a/boot/tokenizer.s
+++ b/boot/tokenizer.s
@@ -1,4 +1,4 @@
-.global _tokenize, classification, transitions
+.global _tokenize_next, classification, transitions, keywords
.section .rodata
@@ -24,8 +24,10 @@
.equ CLASS_X, 0x0d
.equ CLASS_EOF, 0x0e
.equ CLASS_DOT, 0x0f
+.equ CLASS_MINUS, 0x10
+.equ CLASS_DOUBLE_QUOTE, 0x11
-.equ CLASS_COUNT, 16
+.equ CLASS_COUNT, 18
.type classification, @object
.size classification, 128
@@ -64,7 +66,7 @@ classification:
.byte CLASS_INVALID # 1F US
.byte CLASS_SPACE # 20 Space
.byte CLASS_SINGLE # 21 !
- .byte 0x00 # 22 "
+ .byte CLASS_DOUBLE_QUOTE # 22 "
.byte 0x00 # 23 #
.byte 0x00 # 24 $
.byte CLASS_SINGLE # 25 %
@@ -75,7 +77,7 @@ classification:
.byte CLASS_ASTERISK # 2A *
.byte CLASS_SINGLE # 2B +
.byte CLASS_SINGLE # 2C ,
- .byte 0x00 # 2D -
+ .byte CLASS_MINUS # 2D -
.byte CLASS_DOT # 2E .
.byte CLASS_SINGLE # 2F /
.byte CLASS_ZERO # 30 0
@@ -159,6 +161,67 @@ classification:
.byte CLASS_SINGLE # 7E ~
.byte CLASS_INVALID # 7F DEL
+#
+# Textual keywords in the language.
+#
+.equ KEYWORDS_COUNT, 21
+
+.type keywords, @object
+keywords:
+ .word 7
+ .ascii "program"
+ .word 6
+ .ascii "import"
+ .word 5
+ .ascii "const"
+ .word 3
+ .ascii "var"
+ .word 2
+ .ascii "if"
+ .word 4
+ .ascii "then"
+ .word 5
+ .ascii "elsif"
+ .word 4
+ .ascii "else"
+ .word 5
+ .ascii "while"
+ .word 2
+ .ascii "do"
+ .word 4
+ .ascii "proc"
+ .word 5
+ .ascii "begin"
+ .word 3
+ .ascii "end"
+ .word 4
+ .ascii "type"
+ .word 6
+ .ascii "record"
+ .word 5
+ .ascii "union"
+ .word 4
+ .ascii "true"
+ .word 5
+ .ascii "false"
+ .word 3
+ .ascii "nil"
+ .word 3
+ .ascii "xor"
+ .word 2
+ .ascii "or"
+ .word 6
+ .ascii "return"
+ .word 4
+ .ascii "cast"
+ .word 5
+ .ascii "defer"
+ .word 4
+ .ascii "case"
+ .word 2
+ .ascii "of"
+.size keywords, . - keywords
+
.section .data
# The transition table describes transitions from one state to another, given
@@ -173,58 +236,82 @@ classification:
# It specifies the target state. "ff" means that this is an end state and no
# transition is possible.
# - The next byte is the action that should be performed when transitioning.
-# For the meaning of actions see labels in the _analyze_token function, which
+# For the meaning of actions see labels in the _tokenize_next function, which
# handles each action.
#
.type transitions, @object
-.size transitions, 13 * CLASS_COUNT # state count * CLASS_COUNT
+.size transitions, 17 * CLASS_COUNT # state count * CLASS_COUNT
transitions:
# Invalid Digit Alpha Space : = ( )
# * _ Single Hex 0 x NUL .
+ # - "
.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x0105, 0x0106, 0x0107
.word 0x0108, 0x0102, 0x010b, 0x0102, 0x010c, 0x0102, 0x00ff, 0x010e # 00 Start
+ .word 0x010f, 0x0110
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x0104, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 01 Colon
+ .word 0x02ff, 0x02ff
- .word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff # 02 Identifier
+ .word 0x05ff, 0x0102, 0x0102, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff
+ .word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff # 02 Identifier
+ .word 0x05ff, 0x05ff
.word 0x02ff, 0x0103, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x0103, 0x02ff, 0x02ff, 0x02ff # 03 Integer
+ .word 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 04 Assign
+ .word 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 05 Eauals
+ .word 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 06 Left paren
+ .word 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 07 Right paren
+ .word 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 08 Asterisk
+ .word 0x02ff, 0x02ff
.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 09 Comment
+ .word 0x0109, 0x0109
.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 0a Closing comment
+ .word 0x0109, 0x0109
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0b Single character token
+ .word 0x02ff, 0x02ff
.word 0x02ff, 0x00ff, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x00ff, 0x010d, 0x02ff, 0x02ff # 0c Zero
+ .word 0x02ff, 0x02ff
.word 0x02ff, 0x010d, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x010d, 0x010d, 0x00ff, 0x2ff, 0x02ff # 0d Hexadecimal
+ .word 0x00ff, 0x02ff
.word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x2ff, 0x02ff # 0e Dot
+ .word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff # 0e Dot
+ .word 0x02ff, 0x02ff
+
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0f Minus
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff
+
+ .word 0x00ff, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110 # 10 Starting string.
+ .word 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110
+ .word 0x0110, 0x04ff
.section .text
@@ -292,12 +379,42 @@ _next_state:
addi sp, sp, 16
ret
+# Takes an identifier and checks whether it's a keyword.
+#
+# Parameters:
+# a0 - Token length.
+# a1 - Token pointer.
+#
+# Sets a0 to the appropriate token type.
+.type _classify_identifier, @function
+_classify_identifier:
+ # Prologue.
+ addi sp, sp, -16
+ sw ra, 12(sp)
+ sw s0, 8(sp)
+ addi s0, sp, 16
+
+ mv a2, a0
+ mv a3, a1
+ li a0, KEYWORDS_COUNT
+ la a1, keywords
+ call _strings_index
+
+ # Epilogue.
+ lw ra, 12(sp)
+ lw s0, 8(sp)
+ addi sp, sp, 16
+ ret
+
# Initializes the classification table.
#
# Paramaters:
# a0 - Source text pointer.
-.type _analyze_token, @function
-_analyze_token:
+# a1 - A pointer for output value, the token kind. 4 Bytes.
+#
+# Sets a0 to the position of the next token.
+.type _tokenize_next, @function
+_tokenize_next:
# Prologue.
addi sp, sp, -24
sw ra, 20(sp)
@@ -311,7 +428,10 @@ _analyze_token:
sw s2, 8(sp) # Preserve s2 containing the current state.
li s2, 0x00 # Initial, start state.
-.Lanalyze_token_loop:
+ sw a1, 0(sp)
+ sw zero, (a1) # Initialize.
+
+.Ltokenize_next_loop:
mv a0, s2
lbu a1, (s1)
call _next_state
@@ -323,56 +443,43 @@ _analyze_token:
and t1, a0, t0 # Transition action.
srli t1, t1, 8
-
+ # Perform the provided action.
li t0, 0x01 # Accumulate action.
- beq t1, t0, .Lanalyze_token_accumulate
+ beq t1, t0, .Ltokenize_next_accumulate
li t0, 0x02 # Print action.
- beq t1, t0, .Lanalyze_token_print
+ beq t1, t0, .Ltokenize_next_print
li t0, 0x03 # Skip action.
- beq t1, t0, .Lanalyze_token_skip
+ beq t1, t0, .Ltokenize_next_skip
li t0, 0x04 # Comment action.
- beq t1, t0, .Lanalyze_token_comment
+ beq t1, t0, .Ltokenize_next_comment
- /* DEBUG
- mv s4, t1
- addi t1, t1, '0'
- sb t1, 0(sp)
- li t1, ' '
- sb t1, 1(sp)
- addi t1, s2, '0'
- sb t1, 2(sp)
- addi a0, sp, 0 */
- sw s1, 0(sp)
- addi a0, s1, 0
- li a1, 3
- call _write_error
- /* mv t1, s4
- DEBUG */
+ li t0, 0x05 # Finalize identifier.
+ beq t1, t0, .Ltokenize_next_identifier
- j .Lanalyze_token_reject
+ j .Ltokenize_next_reject
-.Lanalyze_token_reject:
+.Ltokenize_next_reject:
addi s1, s1, 1
- j .Lanalyze_token_end
+ j .Ltokenize_next_end
-.Lanalyze_token_accumulate:
+.Ltokenize_next_accumulate:
addi s1, s1, 1
- j .Lanalyze_token_loop
+ j .Ltokenize_next_loop
-.Lanalyze_token_skip:
+.Ltokenize_next_skip:
addi s1, s1, 1
lw t0, 4(sp)
addi t0, t0, 1
sw t0, 4(sp)
- j .Lanalyze_token_loop
+ j .Ltokenize_next_loop
-.Lanalyze_token_print:
+.Ltokenize_next_print:
/* DEBUG
lw a0, 4(sp)
mv a1, s1
@@ -380,9 +487,9 @@ _analyze_token:
call _write_error
DEBUG */
- j .Lanalyze_token_end
+ j .Ltokenize_next_end
-.Lanalyze_token_comment:
+.Ltokenize_next_comment:
addi s1, s1, 1
/* DEBUG
@@ -392,9 +499,20 @@ _analyze_token:
call _write_error
DEBUG */
- j .Lanalyze_token_end
+ j .Ltokenize_next_end
+
+.Ltokenize_next_identifier:
+ # An identifier can be a textual keyword.
+ # Check the kind of the token and write it into the output parameter.
+ lw a1, 4(sp)
+ sub a0, s1, a1
+ call _classify_identifier
+ lw a1, 0(sp)
+ sw a0, (a1)
+
+ j .Ltokenize_next_end
-.Lanalyze_token_end:
+.Ltokenize_next_end:
mv a0, s1 # Return the advanced text pointer.
# Restore saved registers.
@@ -406,27 +524,3 @@ _analyze_token:
lw s0, 16(sp)
addi sp, sp, 24
ret
-
-# Initializes the lookup tables.
-#
-# Parameters:
-# a0 - Source text pointer.
-.type _tokenize, @function
-_tokenize:
- # Prologue.
- addi sp, sp, -8
- sw ra, 4(sp)
- sw s0, 0(sp)
- addi s0, sp, 8
-
-.Ltokenize_loop:
- call _analyze_token
-
- lw t0, (a0)
- bnez t0, .Ltokenize_loop
-
- # Epilogue.
- lw ra, 4(sp)
- lw s0, 0(sp)
- addi sp, sp, 8
- ret