summaryrefslogtreecommitdiff
path: root/boot/tokenizer.s
diff options
context:
space:
mode:
Diffstat (limited to 'boot/tokenizer.s')
-rw-r--r--boot/tokenizer.s181
1 files changed, 181 insertions, 0 deletions
diff --git a/boot/tokenizer.s b/boot/tokenizer.s
new file mode 100644
index 0000000..2057c2e
--- /dev/null
+++ b/boot/tokenizer.s
@@ -0,0 +1,181 @@
+.global _tokenizer_initialize
+
+.section .rodata
+
+raw_classes:
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "space\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "digit\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "upper\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "lower\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+ .ascii "invalid\n"
+
+.section .bss
+.type class_names, @object
+.size class_names, 1024
+class_names: .zero 1024
+
+.section .data
+.type classes, @object
+.size classes, 512 # 128 characters * 4 byte.
+classes:
+
+.section .text
+
+# Initializes the classification table.
+#
+# Paramaters:
+# a0 - Raw input for the classification table.
+.type _tokenizer_classes, @function
+_tokenizer_classes:
+ # Prologue.
+ addi sp, sp, -8
+ sw ra, 4(sp)
+ sw s0, 0(sp)
+ addi s0, sp, 8
+
+ # Epilogue.
+ lw ra, 4(sp)
+ lw s0, 0(sp)
+ addi sp, sp, 8
+ ret
+
+# Initializes the lookup tables.
+.type _tokenizer_initialize, @function
+_tokenizer_initialize:
+ # Prologue.
+ addi sp, sp, -8
+ sw ra, 4(sp)
+ sw s0, 0(sp)
+ addi s0, sp, 8
+
+ la a0, raw_classes
+ call _tokenizer_classes
+
+ # Epilogue.
+ lw ra, 4(sp)
+ lw s0, 0(sp)
+ addi sp, sp, 8
+ ret