summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEugen Wissner <belka@caraus.de>2025-04-29 23:08:46 +0200
committerEugen Wissner <belka@caraus.de>2025-04-29 23:08:46 +0200
commit23b7a1ab308442777a32c3bd123e68ad98369c42 (patch)
tree5b2ea7e933d50a9dd150568f1aafd3bad60db073
parent9c66cec171bafaf12713efb78ac6099ef1f23020 (diff)
downloadelna-23b7a1ab308442777a32c3bd123e68ad98369c42.tar.gz
Support preserved registers as identifiers
… in expressions
-rw-r--r--Rakefile2
-rw-r--r--boot/common-boot.s21
-rw-r--r--boot/stage1.s276
-rw-r--r--boot/stage2.elna888
-rw-r--r--boot/states.txt20
-rw-r--r--boot/tokenizer.s181
6 files changed, 868 insertions, 520 deletions
diff --git a/Rakefile b/Rakefile
index 3fa065e..ef74301 100644
--- a/Rakefile
+++ b/Rakefile
@@ -37,7 +37,7 @@ end
directory 'build'
desc 'Initial stage'
-file 'build/stage1' => ['boot/stage1.s', 'boot/common-boot.s', 'build'] do |t|
+file 'build/stage1' => ['boot/stage1.s', 'boot/common-boot.s', 'boot/tokenizer.s', 'build'] do |t|
source = t.prerequisites.filter { |prerequisite| prerequisite.end_with? '.s' }
sh CROSS_GCC, '-nostdlib', '-o', t.name, *source
diff --git a/boot/common-boot.s b/boot/common-boot.s
index e8eba52..e5796f1 100644
--- a/boot/common-boot.s
+++ b/boot/common-boot.s
@@ -1,5 +1,6 @@
.global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower
-.global _write_out, _read_file, _memcmp, _write_error, _put_char, _printi
+.global _write_out, _read_file, _write_error, _put_char, _printi
+.global _get, _memcmp
.global _divide_by_zero_error, _exit
.section .rodata
@@ -187,7 +188,9 @@ _write_out:
# a0 - Buffer pointer.
# a1 - Buffer size.
#
-# Returns the result in a0.
+# Sets s1 to the buffer passed in a0.
+#
+# Returns the amount of bytes written in a0.
.type _read_file, @function
_read_file:
# Prologue.
@@ -196,9 +199,11 @@ _read_file:
sw s0, 0(sp)
addi s0, sp, 8
- mv a2, a1
- mv a1, a0
+ mv s1, a0
+
li a0, STDIN
+ mv a2, a1
+ mv a1, s1
li a7, SYS_READ
ecall
@@ -293,3 +298,11 @@ _put_char:
lw s0, 8(sp)
add sp, sp, 16
ret
+
+# a0 - Pointer to an array to get the first element.
+#
+# Dereferences a pointer and returns what is on the address in a0.
+.type _get, @function
+_get:
+ lw a0, (a0)
+ ret
diff --git a/boot/stage1.s b/boot/stage1.s
index 61dcdec..7137edc 100644
--- a/boot/stage1.s
+++ b/boot/stage1.s
@@ -207,6 +207,130 @@ _build_binary_expression:
addi sp, sp, 32
ret
+# Parameters:
+# a0 - Identifier length.
+# a1 - Register number as character.
+.type _build_identifier_expression, @function
+_build_identifier_expression:
+ # Prologue.
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
+
+ sw a0, 20(sp) # Identifier length.
+ sw a1, 16(sp) # Register number as character.
+
+ li t0, 0x61636f6c # loca
+ sw t0, 12(sp)
+ mv a0, s1
+ addi a1, sp, 12
+ li a2, 4
+ call _memcmp
+ beqz a0, .Lbuild_identifier_expression_local
+
+ lbu a0, (s1)
+ lw t0, 20(sp)
+ addi t0, t0, -2
+ seqz t0, t0
+ addi t1, a0, -'s'
+ seqz t1, t1
+ and t0, t0, t1
+ bnez t0, .Lbuild_identifier_expression_saved
+
+ # Global identifier.
+ lw t1, 16(sp)
+ li t0, 0x00202c00 # \0,_
+ or t0, t0, t1
+ sw t0, 12(sp)
+ li t0, 0x6120616c # la a
+ sw t0, 8(sp)
+ addi a0, sp, 8
+ li a1, 7
+ call _write_out
+
+ mv a0, s1
+ lw a1, 20(sp)
+ call _write_out
+
+ li a0, '\n'
+ call _put_char
+
+ lbu a0, (s1)
+ call _is_upper
+ beqz a0, .Lbuild_identifier_expression_end
+
+ lw t1, 16(sp)
+ li t0, 0x0a290061 # a\0)\n
+ sll t2, t1, 8
+ or t0, t0, t2
+ sw t0, 12(sp)
+ li t0, 0x28202c00 # \0, (
+ or t0, t0, t1
+ sw t0, 8(sp)
+ li t0, 0x6120776c # lw a
+ sw t0, 4(sp)
+ addi a0, sp, 4
+ li a1, 12
+ call _write_out
+
+ j .Lbuild_identifier_expression_end
+
+.Lbuild_identifier_expression_saved:
+ li t0, 0x00202c00 # \0,_
+ lw t1, 16(sp)
+ or t0, t0, t1
+ sw t0, 12(sp)
+ li t0, 0x6120766d # mv a
+ sw t0, 8(sp)
+ addi a0, sp, 8
+ li a1, 7
+ call _write_out
+
+ mv a0, s1
+ lw a1, 20(sp)
+ call _write_out
+
+ li a0, '\n'
+ call _put_char
+
+ j .Lbuild_identifier_expression_end
+
+.Lbuild_identifier_expression_local:
+ lw t1, 16(sp)
+ li t0, 0x00202c00 # \0,_
+ or t0, t0, t1
+ sw t0, 12(sp)
+ li t0, 0x6120776c # lw a
+ sw t0, 8(sp)
+ addi a0, sp, 8
+ li a1, 7
+ call _write_out
+
+ mv a0, s1
+ lw a1, 20(sp)
+ addi a0, a0, 4 # Skip the "loca" variable prefix.
+ addi a1, a1, -4 # Skip the "loca" variable prefix.
+ call _write_out
+
+ li t0, 0x29707328 # (sp)
+ sw t0, 12(sp)
+ addi a0, sp, 12
+ li a1, 4
+ call _write_out
+ li a0, '\n'
+ call _put_char
+
+ j .Lbuild_identifier_expression_end
+
+.Lbuild_identifier_expression_end:
+
+ # Epilogue.
+ lw ra, 28(sp)
+ lw s0, 24(sp)
+ addi sp, sp, 32
+ ret
+
# Evalutes an expression and saves the result in a0.
#
# a0 - X in aX, the register number to save the result.
@@ -242,31 +366,9 @@ _build_expression:
li t0, '_'
beq a0, t0, .Lbuild_expression_call
- li t0, 0x61636f6c # loca
- sw t0, 8(sp)
- mv a0, s1
- addi a1, sp, 8
- li a2, 4
- call _memcmp
- beqz a0, .Lbuild_expression_identifier
-
- # Named identifier.
- lw t1, 28(sp)
- li t0, 0x00202c00 # \0,_
- or t0, t0, t1
- sw t0, 8(sp)
- li t0, 0x6120616c # la a
- sw t0, 4(sp)
- addi a0, sp, 4
- li a1, 7
- call _write_out
-
- lw a0, 24(sp)
- lw a1, 20(sp)
- call _write_out
-
- li a0, '\n'
- call _put_char
+ lw a0, 20(sp)
+ lw a1, 28(sp)
+ call _build_identifier_expression
j .Lbuild_expression_advance
@@ -313,33 +415,6 @@ _build_expression:
j .Lbuild_expression_advance
-.Lbuild_expression_identifier:
- lw t1, 28(sp)
- li t0, 0x00202c00 # \0,_
- or t0, t0, t1
- sw t0, 16(sp)
- li t0, 0x6120776c # lw a
- sw t0, 12(sp)
- addi a0, sp, 12
- li a1, 7
- call _write_out
-
- lw a0, 24(sp)
- lw a1, 20(sp)
- addi a0, a0, 4 # Skip the "loca" variable prefix.
- addi a1, a1, -4 # Skip the "loca" variable prefix.
- call _write_out
-
- li t0, '\n'
- sw t0, 16(sp)
- li t0, 0x29707328 # (sp)
- sw t0, 12(sp)
- addi a0, sp, 12
- li a1, 5
- call _write_out
-
- j .Lbuild_expression_advance
-
.Lbuild_expression_call:
lw a0, 24(sp)
lw a1, 20(sp)
@@ -683,24 +758,6 @@ _skip_spaces:
.Lspace_loop_end:
ret
-# Skips tabs at the line beginning.
-.type _skip_indentation, @function
-_skip_indentation:
-.Lskip_indentation_do:
- lbu t0, (s1)
-
- li t1, '\t'
- beq t0, t1, .Lskip_indentation_skip
-
- j .Lskip_indentation_end
-
-.Lskip_indentation_skip:
- addi s1, s1, 1
- j .Lskip_indentation_do
-
-.Lskip_indentation_end:
- ret
-
# Parameters:
# a0 - Line length.
.type _skip_comment, @function
@@ -1134,7 +1191,7 @@ _compile_procedure:
# Generate the body of the procedure.
.Lcompile_procedure_body:
- call _skip_indentation
+ call _skip_spaces
call _read_line
sw a0, 12(sp)
li t0, 0x0a646e65 # end\n
@@ -1427,23 +1484,19 @@ _compile_line:
li t1, '('
beq t0, t1, .Lcompile_line_comment
- li t0, 0x0a6d6172 # ram\n
- sw t0, 12(sp)
li t0, 0x676f7270 # prog
- sw t0, 8(sp)
+ sw t0, 12(sp)
mv a0, s1
- addi a1, sp, 8
- li a2, 8
+ addi a1, sp, 12
+ li a2, 4
call _memcmp
beqz a0, .Lcompile_line_program
- li t0, 0x0a74 # t\n
- sw t0, 12(sp)
li t0, 0x736e6f63 # cons
- sw t0, 8(sp)
+ sw t0, 12(sp)
mv a0, s1
- addi a1, sp, 8
- li a2, 6
+ addi a1, sp, 12
+ li a2, 4
call _memcmp
beqz a0, .Lcompile_line_const
@@ -1455,23 +1508,19 @@ _compile_line:
call _memcmp
beqz a0, .Lcompile_line_var
- li t0, 0x20 # _
- sw t0, 12(sp)
li t0, 0x636f7270 # proc
- sw t0, 8(sp)
+ sw t0, 12(sp)
mv a0, s1
- addi a1, sp, 8
- li a2, 5
+ addi a1, sp, 12
+ li a2, 4
call _memcmp
beqz a0, .Lcompile_line_procedure
- li t0, 0x0a6e # n\n
- sw t0, 12(sp)
li t0, 0x69676562 # begi
- sw t0, 8(sp)
+ sw t0, 12(sp)
mv a0, s1
- addi a1, sp, 8
- li a2, 6
+ addi a1, sp, 12
+ li a2, 4
call _memcmp
beqz a0, .Lcompile_line_begin
@@ -1491,13 +1540,11 @@ _compile_line:
call _memcmp
beqz a0, .Lcompile_line_identifier
- li t0, 0x7472 # rt
- sw t0, 12(sp)
li t0, 0x6f706d69 # impo
- sw t0, 8(sp)
+ sw t0, 12(sp)
mv a0, s1
- addi a1, sp, 8
- li a2, 6
+ addi a1, sp, 12
+ li a2, 4
call _memcmp
beqz a0, .Lcompile_line_import
@@ -1509,13 +1556,11 @@ _compile_line:
call _memcmp
beqz a0, .Lcompile_line_goto
- li t0, 0x6e72 # rn
- sw t0, 12(sp)
li t0, 0x75746572 # retu
- sw t0, 8(sp)
+ sw t0, 12(sp)
mv a0, s1
- addi a1, sp, 8
- li a2, 6
+ addi a1, sp, 12
+ li a2, 4
call _memcmp
beqz a0, .Lcompile_line_return
@@ -1716,7 +1761,7 @@ _compile:
lbu t0, (s1) # t0 = Current character.
beqz t0, .Lcompile_end # Exit the loop on the NUL character.
- call _skip_indentation
+ call _skip_spaces
call _read_line
lw a1, 4(sp)
call _compile_line
@@ -1736,16 +1781,35 @@ _compile:
addi sp, sp, 16
ret
-# Entry point.
-.type _start, @function
-_start:
+.type _main, @function
+_main:
+ # Prologue.
+ addi sp, sp, -8
+ sw ra, 4(sp)
+ sw s0, 0(sp)
+ addi s0, sp, 8
+
# Read the source from the standard input.
la a0, source_code
li a1, SOURCE_BUFFER_SIZE # Buffer size.
call _read_file
li s2, 1
- la s1, source_code # s1 = Source code position.
+
+ # Epilogue.
+ lw ra, 4(sp)
+ lw s0, 0(sp)
+ addi sp, sp, 8
+ ret
+
+# Entry point.
+.type _start, @function
+_start:
+ call _tokenizer_initialize
+ li a1, 50
+ call _write_error
+
+ call _main
call _compile
# Call exit.
diff --git a/boot/stage2.elna b/boot/stage2.elna
index 0eb713c..3cb2deb 100644
--- a/boot/stage2.elna
+++ b/boot/stage2.elna
@@ -39,7 +39,10 @@ begin
end
proc _build_binary_expression()
-var loca0, loca4, loca8, loca20: Word
+var
+ loca0, loca4, loca8, loca16, loca20: Word
+ loca12: ^Byte
+ loca24: Bool
begin
_build_expression(0)
@@ -48,60 +51,51 @@ begin
_skip_spaces()
loca20 := _read_token()
+ loca12 := _current()
+
+ loca16 := 0x26 (* & *)
+ loca24 := _token_compare(loca12, loca20, @loca16)
+ if loca24 = 0 then
+ goto .L_build_binary_expression_and
+ end
+
+ loca16 := 0x726f (* or *)
+ loca24 := _token_compare(loca12, loca20, @loca16)
+ if loca24 = 0 then
+ goto .L_build_binary_expression_or
+ end
+
+ loca16 := 0x3d (* = *)
+ loca24 := _token_compare(loca12, loca20, @loca16)
+ if loca24 = 0 then
+ goto .L_build_binary_expression_equal
+ end
+
+ loca16 := 0x2b (* + *)
+ loca24 := _token_compare(loca12, loca20, @loca16)
+ if loca24 = 0 then
+ goto .L_build_binary_expression_plus
+ end
+
+ loca16 := 0x2d (* - *)
+ loca24 := _token_compare(loca12, loca20, @loca16)
+ if loca24 = 0 then
+ goto .L_build_binary_expression_minus
+ end
- li t0, 0x26 # &
- sw t0, 16(sp)
- mv a0, s1
- lw a1, 20(sp)
- addi a2, sp, 16
- call _token_compare
- beqz a0, .L_build_binary_expression_and
-
- li t0, 0x726f # or
- sw t0, 16(sp)
- mv a0, s1
- lw a1, 20(sp)
- addi a2, sp, 16
- call _token_compare
- beqz a0, .L_build_binary_expression_or
-
- li t0, 0x3d # =
- sw t0, 16(sp)
- mv a0, s1
- lw a1, 20(sp)
- addi a2, sp, 16
- call _token_compare
- beqz a0, .L_build_binary_expression_equal
-
- li t0, 0x2b # +
- sw t0, 16(sp)
- mv a0, s1
- lw a1, 20(sp)
- addi a2, sp, 16
- call _token_compare
- beqz a0, .L_build_binary_expression_plus
-
- li t0, 0x2d # -
- sw t0, 16(sp)
- mv a0, s1
- lw a1, 20(sp)
- addi a2, sp, 16
- call _token_compare
- beqz a0, .L_build_binary_expression_minus
-
- li t0, 0x2a # *
- sw t0, 16(sp)
- mv a0, s1
- lw a1, 20(sp)
- addi a2, sp, 16
- call _token_compare
- beqz a0, .L_build_binary_expression_product
+ loca16 := 0x2a (* * *)
+ loca24 := _token_compare(loca12, loca20, @loca16)
+ if loca24 = 0 then
+ goto .L_build_binary_expression_product
+ end
goto .Lbuild_binary_expression_end
.L_build_binary_expression_equal
_advance(1) (* Skip =. *)
_build_expression(1)
+
+ loca0 := 0x627573(* sub *)
_write_out(@loca0, 3)
_write_out(@loca4, 4)
_write_out(@loca4, 4)
@@ -110,7 +104,8 @@ begin
loca0 := 0x7a716573 (* seqz *)
_write_out(@loca0, 4)
_write_out(@loca4, 4)
- _write_out(@loca4, 4)
+ _write_out(@loca4, 3)
+ _put_char(0x0a) (* \n *)
goto .Lbuild_binary_expression_end
@@ -172,6 +167,67 @@ begin
.Lbuild_binary_expression_end
end
+proc _build_identifier_expression(loca84: Word, loca80: Byte)
+begin
+ loca24 := _current()
+ loca0 := 0x61636f6c (* loca *)
+ loca0 := _memcmp(@loca0, loca24, 4)
+
+ if loca0 = 0 then
+ loca8 := 0x6120776c (* lw a *)
+ _write_out(@loca8, 4)
+ loca8 := 0x00202c00 or loca80 (* \0,_ *)
+ _write_out(@loca8, 3)
+
+ loca4 := loca24 + 4
+ loca0 := loca84 - 4
+ _write_out(loca4, loca0) (* Skip the "loca" variable prefix. *)
+
+ loca8 := 0x29707328 (* (sp) *)
+ _write_out(@loca8, 4)
+ _put_char(0x0a) (* \n *)
+
+ goto .Lbuild_identifier_expression_end
+ end
+ loca0 := _front(loca24)
+ loca8 := loca84 = 2
+ loca12 := loca0 = 0x73
+ if loca8 & loca12 then
+ loca8 := 0x6120766d (* mv a *)
+ _write_out(@loca8, 4)
+ loca8 := 0x00202c00 or loca80 (* \0,_ *)
+ _write_out(@loca8, 3)
+ _write_out(loca24, loca84)
+ _put_char(0x0a) (* \n *)
+
+ goto .Lbuild_identifier_expression_end
+ end
+
+ (* Global identifier. *)
+ loca8 := 0x6120616c (* la a *)
+ _write_out(@loca8, 4)
+ loca8 := 0x00202c00 or loca80
+ _write_out(@loca8, 3)
+
+ _write_out(loca24, loca84)
+ _put_char(0x0a)
+
+ if _is_upper(loca0) then
+ loca8 := 0x6120776c (* lw a *)
+ _write_out(@loca8, 4)
+ loca8 := 0x28202c00 or loca28 (* \0, ( *)
+ _write_out(@loca8, 4)
+ _put_char(0x61) (* a *)
+ _put_char(loca28)
+ _put_char(0x29) (* ) *)
+ _put_char(0x0a) (* \n *)
+
+ goto .Lbuild_identifier_expression_end
+ end
+
+ .Lbuild_identifier_expression_end
+end
+
(*
Evalutes an expression and saves the result in a0.
@@ -188,41 +244,28 @@ begin
_skip_spaces()
loca20 := _read_token()
loca24 := _current()
+ loca0 := _front(loca24)
- lbu a0, (s1)
- li t0, '-'
- beq a0, t0, .Lbuild_expression_negate
+ (* - *)
+ if loca0 = 0x2d then
+ goto .Lbuild_expression_negate
+ end
- lbu a0, (s1)
- li t0, '@'
- beq a0, t0, .Lbuild_expression_address
+ (* @ *)
+ if loca0 = 0x40 then
+ goto .Lbuild_expression_address
+ end
- loca0 := _front()
if _is_digit(loca0) then
goto .Lbuild_expression_literal
end
- lbu a0, (s1)
- li t0, '_'
- beq a0, t0, .Lbuild_expression_call
-
- li t0, 0x61636f6c # loca
- sw t0, 8(sp)
- mv a0, s1
- addi a1, sp, 8
- li a2, 4
- call _memcmp
- beqz a0, .Lbuild_expression_identifier
-
- (* Named identifier. *)
- loca8 := 0x6120616c (* la a *)
- _write_out(@loca8, 4)
- loca8 := 0x00202c00 or loca28
- _write_out(@loca8, 3)
-
- _write_out(loca24, loca20)
- _put_char(0x0a)
+ (* _ *)
+ if loca0 = 0x5f then
+ goto .Lbuild_expression_call
+ end
+ _build_identifier_expression(loca20, loca28);
goto .Lbuild_expression_advance
.Lbuild_expression_negate
@@ -263,22 +306,6 @@ begin
goto .Lbuild_expression_advance
- .Lbuild_expression_identifier
- loca8 := 0x6120776c (* lw a *)
- _write_out(@loca8, 4)
- loca8 := 0x00202c00 or loca28 # \0,_
- _write_out(@loca8, 3)
-
- loca4 := loca24 + 4
- loca0 := loca20 - 4
- _write_out(loca4, loca0) (* Skip the "loca" variable prefix. *)
-
- loca8 := 0x29707328 (* (sp) *)
- _write_out(@loca8, 4)
- _put_char(0x0a) (* \n *)
-
- goto .Lbuild_expression_advance
-
.Lbuild_expression_call
_advance(loca20)
_advance(1)
@@ -313,6 +340,7 @@ proc _compile_identifier()
var
loca0, loca16, loca8: Word
loca20, loca12: ^Byte
+ loca4: Bool
begin
(* Save the pointer to the identifier and its length on the stack. *)
loca20 := _current()
@@ -325,21 +353,18 @@ begin
loca12 := _current()
loca8 := _read_token()
- _advance(loca8) # Skip that token.
+ _advance(loca8) (* Skip that token. *)
_skip_spaces()
- li t0, 0x3d3a # :=
- sw t0, 4(sp)
- lw a0, 12(sp)
- lw a1, 8(sp)
- addi a2, sp, 4
- call _token_compare
- beqz a0, .Lcompile_identifier_assign
+ loca0 := 0x3d3a (* := *)
+ loca4 := _token_compare(loca12, loca8, @loca0)
+ if loca4 = 0 then
+ goto .Lcompile_identifier_assign
+ end
- lw t0, 12(sp)
- lbu t0, (t0)
- li t1, 0x28 # (
- beq t0, t1, .Lcompile_identifier_call
+ if _front(loca12) = 0x28 then
+ goto .Lcompile_identifier_call
+ end
goto .Lcompile_identifier_end
@@ -379,14 +404,16 @@ Returns the procedure result in a0.
proc _compile_call(loca84: ^Byte, loca80: Word)
var
loca0, loca4, loca12: Word
+ loca8: ^Byte
begin
loca12 := 0 (* Argument count for a procedure call. *)
.Lcompile_call_paren
_skip_spaces()
- lbu t0, (s1)
- li t1, 0x29 # )
- beq t0, t1, .Lcompile_call_complete
+ loca8 := _current()
+ if _front(loca8) = 0x29 then
+ goto .Lcompile_call_complete
+ end
.Lcompile_call_argument
_build_expression(0)
@@ -409,9 +436,11 @@ begin
_put_char(0x0a) (* \n *)
_skip_spaces()
- lbu t0, (s1)
- li t1, ','
- bne t0, t1, .Lcompile_call_paren
+ loca8 := _current()
+ loca0 := _front(loca8) = 0x2c
+ if loca0 = 0 then
+ goto .Lcompile_call_paren
+ end
loca12 := loca12 + 1 (* Argument count for a procedure call. *)
@@ -497,64 +526,97 @@ Reads a token and returns its length in a0.
_read_token doesn't change s1, it finds the length of the token s1 is pointing to.
*)
proc _read_token()
-var loca4: Word
+var
+ loca0, loca4: Word
+ loca8: ^Byte
begin
- lbu t0, (s1) # t0 = Current character.
+ loca8 := _current()
+ loca0 := _front(loca8) (* t0 = Current character. *)
loca4 := 0
- li t1, '.'
- beq t0, t1, .Ltoken_character_single
+ (* . *)
+ if loca0 = 0x2e then
+ goto .Ltoken_character_single
+ end
- li t1, ','
- beq t0, t1, .Ltoken_character_single
+ (* , *)
+ if loca0 = 0x2c then
+ goto .Ltoken_character_single
+ end
- li t1, ':'
- beq t0, t1, .Ltoken_character_colon
+ (* : *)
+ if loca0 = 0x3a then
+ goto .Ltoken_character_colon
+ end
- li t1, ';'
- beq t0, t1, .Ltoken_character_single
+ (* ; *)
+ if loca0 = 0x3b then
+ goto .Ltoken_character_single
+ end
- li t1, '('
- beq t0, t1, .Ltoken_character_single
+ (* ( *)
+ if loca0 = 0x28 then
+ goto .Ltoken_character_single
+ end
- li t1, ')'
- beq t0, t1, .Ltoken_character_single
+ (* ) *)
+ if loca0 = 0x29 then
+ goto .Ltoken_character_single
+ end
- li t1, '['
- beq t0, t1, .Ltoken_character_single
+ (* [ *)
+ if loca0 = 0x5b then
+ goto .Ltoken_character_single
+ end
- li t1, ']'
- beq t0, t1, .Ltoken_character_single
+ (* ] *)
+ if loca0 = 0x5d then
+ goto .Ltoken_character_single
+ end
+
+ (* ^ *)
+ if loca0 = 0x5e then
+ goto .Ltoken_character_single
+ end
- li t1, '^'
- beq t0, t1, .Ltoken_character_single
+ (* & *)
+ if loca0 = 0x26 then
+ goto .Ltoken_character_single
+ end
- li t1, '&'
- beq t0, t1, .Ltoken_character_single
+ (* = *)
+ if loca0 = 0x3d then
+ goto .Ltoken_character_single
+ end
- li t1, '='
- beq t0, t1, .Ltoken_character_single
+ (* + *)
+ if loca0 = 0x2b then
+ goto .Ltoken_character_single
+ end
- li t1, '+'
- beq t0, t1, .Ltoken_character_single
+ (* - *)
+ if loca0 = 0x2d then
+ goto .Ltoken_character_single
+ end
- li t1, '-'
- beq t0, t1, .Ltoken_character_single
+ (* * *)
+ if loca0 = 0x2a then
+ goto .Ltoken_character_single
+ end
- li t1, '*'
- beq t0, t1, .Ltoken_character_single
+ (* @ *)
+ if loca0 = 0x40 then
+ goto .Ltoken_character_single
+ end
- li t1, '@'
- beq t0, t1, .Ltoken_character_single
(* Expect an identifier or a number. *)
.Ltoken_character_loop_do
- lw t6, 4(sp)
- add t1, s1, t6
- lbu a0, (t1) # a0 = Current character.
+ loca0 := loca8 + loca4
+ loca0 := _front(loca0)
- call _is_alnum
-
- beqz a0, .Ltoken_character_end
+ if _is_alnum(loca0) = 0 then
+ goto .Ltoken_character_end
+ end
loca4 := loca4 + 1
goto .Ltoken_character_loop_do
@@ -563,31 +625,46 @@ begin
goto .Ltoken_character_end
.Ltoken_character_colon
- lbu t0, 1(s1) # t0 = The character after the colon.
+ loca0 := loca8 + 1
+ loca0 := _front(loca0) (* t0 = The character after the colon. *)
loca4 := loca4 + 1
- li t1, '='
- beq t0, t1, .Ltoken_character_single
+ (* = *)
+ if loca0 = 0x3d then
+ goto .Ltoken_character_single
+ end
goto .Ltoken_character_end
.Ltoken_character_end
- lw a0, 4(sp)
+ return loca4
end
(* Skips the spaces till the next non space character. *)
proc _skip_spaces()
+var
+ loca0: Byte
+ loca4: ^Byte
begin
.Lspace_loop_do
- lbu t0, (s1) # t0 = Current character.
+ loca4 := _current()
+ loca0 := _front(loca4) (* t0 = Current character. *)
- li t1, ' '
- beq t0, t1, .Lspace_loop_repeat
- li t1, '\t'
- beq t0, t1, .Lspace_loop_repeat
- li t1, '\n'
- beq t0, t1, .Lspace_loop_repeat
- li t1, '\r'
- beq t0, t1, .Lspace_loop_repeat
+ (* _ *)
+ if loca0 = 0x20 then
+ goto .Lspace_loop_repeat
+ end
+ (* \t *)
+ if loca0 = 0x09 then
+ goto .Lspace_loop_repeat
+ end
+ (* \n *)
+ if loca0 = 0x0a then
+ goto .Lspace_loop_repeat
+ end
+ (* \r *)
+ if loca0 = 0x0d then
+ goto .Lspace_loop_repeat
+ end
goto .Lspace_loop_end
.Lspace_loop_repeat
@@ -597,50 +674,37 @@ begin
.Lspace_loop_end
end
-(* Skips tabs at the line beginning. *)
-proc _skip_indentation()
-begin
- .Lskip_indentation_do
- lbu t0, (s1)
-
- li t1, '\t'
- beq t0, t1, .Lskip_indentation_skip
-
- goto .Lskip_indentation_end
-
- .Lskip_indentation_skip
- _advance(1)
- goto .Lskip_indentation_do
-
- .Lskip_indentation_end
-end
-
(*
Parameters:
a0 - Line length.
*)
proc _skip_comment(loca84: Word)
+var
+ loca0: ^Byte
+ loca4: Word
+ loca8: Int
begin
+ loca0 := _current()
+
(* Check whether this is a comment. *)
- li t0, 0x2a28 # (*
- sw t0, 4(sp)
- addi a0, sp, 4
- mv a1, s1
- li a2, 2
- call _memcmp
- bnez a0, .Lskip_comment_end
+ loca4 := 0x2a28 (* ( and * *)
+ loca8 := _memcmp(loca0, @loca4, 2)
+ if loca8 = 0 then
+ goto .Lskip_comment_continue
+ end
+ goto .Lskip_comment_end
+ .Lskip_comment_continue
_advance(2) (* Skip (*. *)
- li t0, 0x292a # *)
- sw t0, 4(sp)
+ loca4 := 0x292a (* ( and * *)
.Lskip_comment_loop
- addi a0, sp, 4
- mv a1, s1
- li a2, 2
- call _memcmp
- beqz a0, .Lskip_comment_close
+ loca0 := _current()
+ loca8 := _memcmp(loca0, @loca4, 2)
+ if loca8 = 0 then
+ goto .Lskip_comment_close
+ end
_advance(1)
@@ -687,7 +751,9 @@ begin
end
proc _compile_constant_section()
-var loca0: Word
+var
+ loca0: Word
+ loca4: ^Byte
begin
(* .section .rodata *)
loca0 := 0x6365732e (* .sec *)
@@ -705,9 +771,11 @@ begin
.Lcompile_constant_section_item
_skip_spaces()
- lbu a0, (s1)
- call _is_upper
- beqz a0, .Lcompile_constant_section_end
+ loca4 := _current()
+ loca0 := _front(loca4)
+ if _is_upper(loca0) = 0 then
+ goto .Lcompile_constant_section_end
+ end
_compile_constant()
goto .Lcompile_constant_section_item
@@ -746,7 +814,9 @@ begin
end
proc _compile_variable_section()
-var loca0: Word
+var
+ loca0: Word
+ loca4: ^Byte
begin
(* .section .bss *)
loca0 := 0x6365732e (* .sec *)
@@ -762,10 +832,12 @@ begin
.Lcompile_variable_section_item
_skip_spaces()
- lbu a0, (s1)
- call _is_lower
- beqz a0, .Lcompile_variable_section_end
+ loca4 := _current()
+ loca0 := _front(loca4)
+ if _is_lower(loca0) = 0 then
+ goto .Lcompile_variable_section_end
+ end
_compile_variable()
goto .Lcompile_variable_section_item
@@ -846,7 +918,7 @@ end
proc _compile_procedure()
var
loca0, loca4, loca8, loca12, loca16: Word
- loca20: ^Char
+ loca20, loca24: ^Byte
begin
_advance(5) (* Skip proc_ *)
loca16 := _read_token()
@@ -887,15 +959,15 @@ begin
*)
.Lcompile_procedure_begin
_skip_spaces()
- call _read_token
+ loca0 := _read_token()
- mv a1, a0
- mv a0, s1
- addi a2, sp, 8
- add s1, s1, a1
- call _token_compare
+ loca24 := _current()
+ _advance(loca0)
+ loca0 := _token_compare(loca24, loca0, @loca8)
- bnez a0, .Lcompile_procedure_begin
+ if loca0 = 1 then
+ goto .Lcompile_procedure_begin
+ end
(* Generate the procedure prologue with a predefined stack size. *)
loca0 := 0x69646461 (* addi *)
@@ -982,33 +1054,30 @@ begin
(* Generate the body of the procedure. *)
.Lcompile_procedure_body
- _skip_indentation()
- call _read_line
- sw a0, 12(sp)
- li t0, 0x0a646e65 # end\n
- sw t0, 8(sp)
- mv a0, s1
- addi a1, sp, 8
- li a2, 4
- call _memcmp
-
- beqz a0, .Lcompile_procedure_end
-
- lw a0, 12(sp)
- call _compile_line
+ _skip_spaces()
+ loca12 := _read_line()
+ loca8 := 0x0a646e65 (* end\n *)
+ loca24 := _current()
+ loca8 := _memcmp(loca24, @loca8, 4)
+
+ if loca8 = 0 then
+ goto .Lcompile_procedure_end
+ end
+
+ _compile_line(loca12)
goto .Lcompile_procedure_body
.Lcompile_procedure_end
_advance(4) (* Skip end\n. *)
(* Generate the procedure epilogue with a predefined stack size. *)
- loca0 := 0x7220776c # lw r
+ loca0 := 0x7220776c (* lw r *)
_write_out(@loca0, 4)
- loca0 := 0x39202c61 # a, 9
+ loca0 := 0x39202c61 (* a, 9 *)
_write_out(@loca0, 4)
- loca0 := 0x70732832 # 2(sp
+ loca0 := 0x70732832 (* 2(sp *)
_write_out(@loca0, 4)
- loca0 := 0x0a29 # )\n
+ loca0 := 0x0a29 (* )\n *)
_write_out(@loca0, 2)
loca0 := 0x7320776c (* lw s *)
@@ -1043,41 +1112,51 @@ Compares two string, which of one has a length, the other one is null-terminated
If the strings match sets a0 to 0, otherwise sets it to 1.
*)
-proc _token_compare()
+proc _token_compare(loca84: ^Byte, loca80: Word, loca76: ^Byte)
+var
+ loca0: Bool
+ loca4, loca12: Byte
+ loca8: Word
begin
- addi t0, a0, 0
- addi t1, a1, 0
- addi t2, a2, 0
-
.Ltoken_compare_loop
- lbu t3, (t2)
+ loca4 := _front(loca76)
(*
Will only be 0 if the current character in the null terminated string is \0 and the remaining length of the
another string is 0.
*)
- or t4, t3, t1
- beqz t4, .Ltoken_compare_equal
-
- beqz t1, .Ltoken_compare_not_equal
- beqz t3, .Ltoken_compare_not_equal
+ loca8 := loca4 or loca80
+ if loca8 = 0 then
+ goto .Ltoken_compare_equal
+ end
+ if loca80 = 0 then
+ goto .Ltoken_compare_not_equal
+ end
+ if loca4 = 0 then
+ goto .Ltoken_compare_not_equal
+ end
+ loca12 := _front(loca84)
+ if loca4 = loca12 then
+ goto .Ltoken_compare_continue
+ end
+ goto .Ltoken_compare_not_equal
- lbu t4, (t0)
- bne t3, t4, .Ltoken_compare_not_equal
+ .Ltoken_compare_continue
- addi t0, t0, 1
- addi t1, t1, -1
- addi t2, t2, 1
+ loca84 := loca84 + 1
+ loca80 := loca80 - 1
+ loca76 := loca76 + 1
goto .Ltoken_compare_loop
.Ltoken_compare_not_equal
- li a0, 1
+ loca0 := 1
goto .Ltoken_compare_end
.Ltoken_compare_equal
- li a0, 0
+ loca0 := 0
.Ltoken_compare_end
+ return loca0
end
proc _compile_goto()
@@ -1112,15 +1191,13 @@ begin
loca0 := _current()
_write_out(loca0, loca84)
- lw t0, 84(sp) # Line length.
- mv t1, s1 # Line start.
-
- add t1, t1, t0
- addi t1, t1, -1 # Last character on the line.
+ loca0 := loca0 + loca84
+ loca0 := loca0 - 1 (* Last character on the line. *)
- lbu t1, (t1)
- li t2, ':'
- beq t1, t2, .Lcompile_label_colon
+ loca0 := _front(loca0)
+ if loca0 = 0x3a then
+ goto .Lcompile_label_colon
+ end
_put_char(0x3a) (* : *)
@@ -1161,8 +1238,7 @@ begin
(* Write the label *)
_write_out(@loca16, 4)
- mv a0, s2
- call _printi
+ _printi(s2)
_put_char(0x0a) (* \n *)
@@ -1182,13 +1258,13 @@ begin
(* Write the label *)
_write_out(@loca16, 4)
- mv a0, s2
- call _printi
+ _printi(s2)
loca12 := 0x0a3a0a3a (* :\n:\n *)
_write_out(@loca12, 2)
- addi s2, s2, 1 # Increment the label counter.
+ (* Increment the label counter. *)
+ addi s2, s2, 1
_advance(4) (* Skip the end with newline. *)
end
@@ -1202,119 +1278,99 @@ Returns 1 in a0 if the parsed line contained a text section element such a
procedure or the program entry point. Otherwise sets a0 to 0.
*)
proc _compile_line(loca84: Word, loca80: Bool)
+var
+ loca0: Char
+ loca4: Int
+ loca8: Bool
+ loca12: Word
+ loca16: ^Byte
begin
- beqz a0, .Lcompile_line_empty # Skip an empty line.
-
- lbu t0, (s1)
- li t1, '('
- beq t0, t1, .Lcompile_line_comment
-
- li t0, 0x0a6d6172 # ram\n
- sw t0, 12(sp)
- li t0, 0x676f7270 # prog
- sw t0, 8(sp)
- mv a0, s1
- addi a1, sp, 8
- li a2, 8
- call _memcmp
- beqz a0, .Lcompile_line_program
-
- li t0, 0x0a74 # t\n
- sw t0, 12(sp)
- li t0, 0x736e6f63 # cons
- sw t0, 8(sp)
- mv a0, s1
- addi a1, sp, 8
- li a2, 6
- call _memcmp
- beqz a0, .Lcompile_line_const
-
- li t0, 0x0a726176 # var\n
- sw t0, 12(sp)
- mv a0, s1
- addi a1, sp, 12
- li a2, 4
- call _memcmp
- beqz a0, .Lcompile_line_var
-
- li t0, 0x20 # _
- sw t0, 12(sp)
- li t0, 0x636f7270 # proc
- sw t0, 8(sp)
- mv a0, s1
- addi a1, sp, 8
- li a2, 5
- call _memcmp
- beqz a0, .Lcompile_line_procedure
-
- li t0, 0x0a6e # n\n
- sw t0, 12(sp)
- li t0, 0x69676562 # begi
- sw t0, 8(sp)
- mv a0, s1
- addi a1, sp, 8
- li a2, 6
- call _memcmp
- beqz a0, .Lcompile_line_begin
-
- li t0, 0x2e646e65 # end.
- sw t0, 12(sp)
- mv a0, s1
- addi a1, sp, 12
- li a2, 4
- call _memcmp
- beqz a0, .Lcompile_line_exit
-
- li t0, 0x61636f6c # loca
- sw t0, 12(sp)
- mv a0, s1
- addi a1, sp, 12
- li a2, 4
- call _memcmp
- beqz a0, .Lcompile_line_identifier
-
- li t0, 0x7472 # rt
- sw t0, 12(sp)
- li t0, 0x6f706d69 # impo
- sw t0, 8(sp)
- mv a0, s1
- addi a1, sp, 8
- li a2, 6
- call _memcmp
- beqz a0, .Lcompile_line_import
-
- li t0, 0x6f746f67 # goto
- sw t0, 12(sp)
- mv a0, s1
- addi a1, sp, 12
- li a2, 4
- call _memcmp
- beqz a0, .Lcompile_line_goto
-
- li t0, 0x6e72 # rn
- sw t0, 12(sp)
- li t0, 0x75746572 # retu
- sw t0, 8(sp)
- mv a0, s1
- addi a1, sp, 8
- li a2, 6
- call _memcmp
- beqz a0, .Lcompile_line_return
-
- li t0, 0x6669 # if
- sw t0, 12(sp)
- mv a0, s1
- addi a1, sp, 12
- li a2, 2
- call _memcmp
- beqz a0, .Lcompile_line_if
-
- lbu t0, (s1)
- li t1, '.'
- beq t0, t1, .Lcompile_line_label
- li t1, '_'
- beq t0, t1, .Lcompile_line_identifier
+ if loca84 = 0 then
+ goto .Lcompile_line_empty (* Skip an empty line. *)
+ end
+
+ loca16 := _current()
+ loca0 := _front(loca16)
+ (* ( *)
+ if loca0 = 0x28 then
+ goto .Lcompile_line_comment
+ end
+ loca16 := _current()
+
+ loca12 := 0x676f7270 (* prog *)
+ loca4 := _memcmp(loca16, @loca12, 4)
+ if loca4 = 0 then
+ goto .Lcompile_line_program
+ end
+
+ loca12 := 0x736e6f63 (* cons *)
+ loca4 := _memcmp(loca16, @loca12, 4)
+ if loca4 = 0 then
+ goto .Lcompile_line_const
+ end
+
+ loca12 := 0x0a726176 (* var\n *)
+ loca4 := _memcmp(loca16, @loca12, 4)
+ if loca4 = 0 then
+ goto .Lcompile_line_var
+ end
+
+ loca12 := 0x636f7270 (* proc *)
+ loca4 := _memcmp(loca16, @loca12, 4)
+ if loca4 = 0 then
+ goto .Lcompile_line_procedure
+ end
+
+ loca12 := 0x69676562 (* begi *)
+ loca4 := _memcmp(loca16, @loca12, 4)
+ if loca4 = 0 then
+ goto .Lcompile_line_begin
+ end
+
+ loca12 := 0x2e646e65 (* end. *)
+ loca4 := _memcmp(loca16, @loca12, 4)
+ if loca4 = 0 then
+ goto .Lcompile_line_exit
+ end
+
+ loca12 := 0x61636f6c (* loca *)
+ loca4 := _memcmp(loca16, @loca12, 4)
+ if loca4 = 0 then
+ goto .Lcompile_line_identifier
+ end
+
+ loca12 := 0x6f706d69 (* impo *)
+ loca4 := _memcmp(loca16, @loca12, 4)
+ if loca4 = 0 then
+ goto .Lcompile_line_import
+ end
+
+ loca12 := 0x6f746f67 (* goto *)
+ loca4 := _memcmp(loca16, @loca12, 4)
+ if loca4 = 0 then
+ goto .Lcompile_line_goto
+ end
+
+ loca12 := 0x75746572 (* retu *)
+ loca4 := _memcmp(loca16, @loca12, 4)
+ if loca4 = 0 then
+ goto .Lcompile_line_return
+ end
+ loca12 := 0x6669 (* if *)
+ loca4 := _memcmp(loca16, @loca12, 2)
+ if loca4 = 0 then
+ goto .Lcompile_line_if
+ end
+
+ (* . *)
+ if loca0 = 0x2e then
+ goto .Lcompile_line_label
+ end
+ (* _ *)
+ if loca0 = 0x5f then
+ goto .Lcompile_line_identifier
+ end
goto .Lcompile_line_unchanged (* Else. *)
.Lcompile_line_if:
@@ -1326,10 +1382,6 @@ begin
goto .Lcompile_line_section
.Lcompile_line_return
- (* DEBUG
- mv a0, s1
- li a1, 8
- call _write_error *)
_compile_return()
goto .Lcompile_line_section
@@ -1350,12 +1402,14 @@ begin
goto .Lcompile_line_section
.Lcompile_line_begin
- lw a1, 80(sp)
- bnez a1, .Lcompile_line_compile_entry
+
+ if loca80 = 1 then
+ goto .Lcompile_line_compile_entry
+ end
_compile_text_section()
.Lcompile_line_compile_entry
_compile_entry_point()
- li a0, 1
+ loca8 := 1
goto .Lcompile_line_end
.Lcompile_line_const
@@ -1363,12 +1417,13 @@ begin
goto .Lcompile_line_section
.Lcompile_line_procedure
- lw a1, 80(sp)
- bnez a1, .Lcompile_line_compile_procedure
+ if loca80 = 1 then
+ goto .Lcompile_line_compile_procedure
+ end
_compile_text_section()
.Lcompile_line_compile_procedure
_compile_procedure()
- li a0, 1
+ loca8 := 1
goto .Lcompile_line_end
.Lcompile_line_var
@@ -1392,13 +1447,13 @@ begin
goto .Lcompile_line_section
.Lcompile_line_section
- mv a0, zero
+ loca8 := 0
.Lcompile_line_end
- sw a0, 12(sp)
_skip_spaces()
_skip_comment()
- lw a0, 12(sp)
+
+ return loca8
end
(* Prints ".section .text" and exits. *)
@@ -1443,11 +1498,9 @@ end
proc _compile_exit()
var loca0: Word
begin
- (*
- li a0, 0
- li a7, SYS_EXIT
- ecall
- *)
+ (* li a0, 0 *)
+ (* li a7, SYS_EXIT *)
+ (* ecall *)
loca0 := 0x6120696c (* li a *)
_write_out(@loca0, 4)
loca0 := 0x30202c30 (* 0, 0 *)
@@ -1469,16 +1522,20 @@ end
(* Finds the end of the line and returns its length in a0. *)
proc _read_line()
+var
+ loca0: ^Byte
+ loca4: Byte
begin
loca0 := _current() (* Local position in the source text. *)
.Lread_line_do
- lw t0, 0(sp)
- lbu t1, (t0) # t1 = Current character.
- beqz t1, .Lread_line_end # Exit the loop on the NUL character.
- li t2, '\n'
- beq t1, t2, .Lread_line_end # Exit the loop on the new line.
-
+ loca4 := _front(loca0) (* t1 = Current character. *)
+ if loca4 = 0 then
+ goto .Lread_line_end (* Exit the loop on the NUL character. *)
+ end
+ if loca4 = 0x0a then
+ goto .Lread_line_end (* Exit the loop on the new line. *)
+ end
loca0 := loca0 + 1
goto .Lread_line_do
@@ -1491,18 +1548,26 @@ proc _compile()
var
loca0, loca4: Word
loca8: Bool
+ loca12: Char
+ loca16: ^Byte
begin
loca4 := 0 (* Whether the text section header was already emitted. *)
.Lcompile_do
- lbu t0, (s1) # t0 = Current character.
- beqz t0, .Lcompile_end # Exit the loop on the NUL character.
+ loca16 := _current()
+ loca12 := _front(loca16) (* t0 = Current character. *)
- _skip_indentation()
+ if loca12 = 0 then
+ goto .Lcompile_end (* Exit the loop on the NUL character. *)
+ end
+
+ _skip_spaces()
loca0 := _read_line()
loca8 := _compile_line(loca0, loca4)
- beqz a0, .Lcompile_do
+ if loca8 = 0 then
+ goto .Lcompile_do
+ end
(* Update whether the text section header was already emitted. *)
loca4 := loca4 or loca8
@@ -1513,30 +1578,35 @@ end
(* Returns the pointer to the current position in the source text in a0. *)
proc _current()
begin
- mv a0, s1
+ return s1
end
(* a0 is the number of bytes to advance in the source text. *)
-proc _advance()
+proc _advance(loca84: Word)
begin
add s1, s1, a0
end
-(* Returns the first character in the remaining source text. *)
-proc _front()
+(*
+a0 - Pointer to an array to get the first element.
+
+Returns the first character in the remaining source text.
+*)
+proc _front(loca84: ^Word)
begin
- lbu a0, (s1)
+ return _get(loca84) & 0xff
end
-(* Entry point. *)
+proc _main()
begin
(* Read the source from the standard input. *)
- la a0, source_code
- la a1, SOURCE_BUFFER_SIZE # Buffer size.
- lw a1, (a1)
- call _read_file
+ _read_file(source_code, SOURCE_BUFFER_SIZE)
+
+ addi s2, zero, 1
+end
- la s1, source_code # s1 = Source code position.
- li s2, 1
+(* Entry point. *)
+begin
+ _main()
_compile()
end.
diff --git a/boot/states.txt b/boot/states.txt
new file mode 100644
index 0000000..20d5966
--- /dev/null
+++ b/boot/states.txt
@@ -0,0 +1,20 @@
+- start
+digit: integer
+upper: identifier
+lower: identifier
+space: start
+invalid: error
+
+- identifier
+digit: identifier
+upper: identifier
+lower: identifier
+space: end
+invalid: end
+
+- integer:
+digit: integer
+upper: end
+lower: end
+space: end
+invalid: end
diff --git a/boot/tokenizer.s b/boot/tokenizer.s
new file mode 100644
index 0000000..2057c2e
--- /dev/null
+++ b/boot/tokenizer.s
@@ -0,0 +1,181 @@
+.global _tokenizer_initialize
+
+.section .rodata
+
+raw_classes:
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "space\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+
+.section .bss
+.type class_names, @object
+.size class_names, 1024
+class_names: .zero 1024
+
+.section .data
+.type classes, @object
+.size classes, 512 # 128 characters * 4 byte.
+classes:
+
+.section .text
+
+# Initializes the classification table.
+#
+# Paramaters:
+# a0 - Raw input for the classification table.
+.type _tokenizer_classes, @function
+_tokenizer_classes:
+ # Prologue.
+ addi sp, sp, -8
+ sw ra, 4(sp)
+ sw s0, 0(sp)
+ addi s0, sp, 8
+
+ # Epilogue.
+ lw ra, 4(sp)
+ lw s0, 0(sp)
+ addi sp, sp, 8
+ ret
+
+# Initializes the lookup tables.
+.type _tokenizer_initialize, @function
+_tokenizer_initialize:
+ # Prologue.
+ addi sp, sp, -8
+ sw ra, 4(sp)
+ sw s0, 0(sp)
+ addi s0, sp, 8
+
+ la a0, raw_classes
+ call _tokenizer_classes
+
+ # Epilogue.
+ lw ra, 4(sp)
+ lw s0, 0(sp)
+ addi sp, sp, 8
+ ret