summaryrefslogtreecommitdiff
path: root/boot/tokenizer.s
diff options
context:
space:
mode:
Diffstat (limited to 'boot/tokenizer.s')
-rw-r--r--boot/tokenizer.s319
1 files changed, 174 insertions, 145 deletions
diff --git a/boot/tokenizer.s b/boot/tokenizer.s
index 2057c2e..b1ac11f 100644
--- a/boot/tokenizer.s
+++ b/boot/tokenizer.s
@@ -1,165 +1,195 @@
.global _tokenizer_initialize
.section .rodata
+#
+# Classes:
+#
+# 0x00: Invalid
+# 0x01: Digit
+# 0x02: Character
+# 0x03: Space
+.type classes, @object
+.size classes, 128
+classes:
+ .byte 0x00 # 00 NUL
+ .byte 0x00 # 01 SOH
+ .byte 0x00 # 02 STX
+ .byte 0x00 # 03 ETX
+ .byte 0x00 # 04 EOT
+ .byte 0x00 # 05 ENQ
+ .byte 0x00 # 06 ACK
+ .byte 0x00 # 07 BEL
+ .byte 0x00 # 08 BS
+ .byte 0x00 # 09 HT
+ .byte 0x00 # 0A LF
+ .byte 0x00 # 0B VT
+ .byte 0x00 # 0C FF
+ .byte 0x00 # 0D CR
+ .byte 0x00 # 0E SO
+ .byte 0x00 # 0F SI
+ .byte 0x00 # 10 DLE
+ .byte 0x00 # 11 DC1
+ .byte 0x00 # 12 DC2
+ .byte 0x00 # 13 DC3
+ .byte 0x00 # 14 DC4
+ .byte 0x00 # 15 NAK
+ .byte 0x00 # 16 SYN
+ .byte 0x00 # 17 ETB
+ .byte 0x00 # 18 CAN
+ .byte 0x00 # 19 EM
+ .byte 0x00 # 1A SUB
+ .byte 0x00 # 1B ESC
+ .byte 0x00 # 1C FS
+ .byte 0x00 # 1D GS
+ .byte 0x00 # 1E RS
+ .byte 0x00 # 1F US
+ .byte 0x03 # 20 Space
+ .byte 0x00 # 21 !
+ .byte 0x00 # 22 "
+ .byte 0x00 # 23 #
+ .byte 0x00 # 24 $
+ .byte 0x00 # 25 %
+ .byte 0x00 # 26 &
+ .byte 0x00 # 27 '
+ .byte 0x00 # 28 (
+ .byte 0x00 # 29 )
+ .byte 0x00 # 2A *
+ .byte 0x00 # 2B +
+ .byte 0x00 # 2C ,
+ .byte 0x00 # 2D -
+ .byte 0x00 # 2E .
+ .byte 0x00 # 2F /
+ .byte 0x01 # 30 0
+ .byte 0x01 # 31 1
+ .byte 0x01 # 32 2
+ .byte 0x01 # 33 3
+ .byte 0x01 # 34 4
+ .byte 0x01 # 35 5
+ .byte 0x01 # 36 6
+ .byte 0x01 # 37 7
+ .byte 0x01 # 38 8
+ .byte 0x01 # 39 9
+ .byte 0x00 # 3A :
+ .byte 0x00 # 3B ;
+ .byte 0x00 # 3C <
+ .byte 0x00 # 3D =
+ .byte 0x00 # 3E >
+ .byte 0x00 # 3F ?
+ .byte 0x00 # 40 @
+ .byte 0x02 # 41 A
+ .byte 0x02 # 42 B
+ .byte 0x02 # 43 C
+ .byte 0x02 # 44 D
+ .byte 0x02 # 45 E
+ .byte 0x02 # 46 F
+ .byte 0x02 # 47 G
+ .byte 0x02 # 48 H
+ .byte 0x02 # 49 I
+ .byte 0x02 # 4A J
+ .byte 0x02 # 4B K
+ .byte 0x02 # 4C L
+ .byte 0x02 # 4D M
+ .byte 0x02 # 4E N
+ .byte 0x02 # 4F O
+ .byte 0x02 # 50 P
+ .byte 0x02 # 51 Q
+ .byte 0x02 # 52 R
+ .byte 0x02 # 53 S
+ .byte 0x02 # 54 T
+ .byte 0x02 # 55 U
+ .byte 0x02 # 56 V
+ .byte 0x02 # 57 W
+ .byte 0x02 # 58 X
+ .byte 0x02 # 59 Y
+ .byte 0x02 # 5A Z
+ .byte 0x00 # 5B [
+ .byte 0x00 # 5C \
+ .byte 0x00 # 5D ]
+ .byte 0x00 # 5E ^
+ .byte 0x00 # 5F _
+ .byte 0x00 # 60 `
+ .byte 0x02 # 61 a
+ .byte 0x02 # 62 b
+ .byte 0x02 # 63 c
+ .byte 0x02 # 64 d
+ .byte 0x02 # 65 e
+ .byte 0x02 # 66 f
+ .byte 0x02 # 67 g
+ .byte 0x02 # 68 h
+ .byte 0x02 # 69 i
+ .byte 0x02 # 6A j
+ .byte 0x02 # 6B k
+ .byte 0x02 # 6C l
+ .byte 0x02 # 6D m
+ .byte 0x02 # 6E n
+ .byte 0x02 # 6F o
+ .byte 0x02 # 70 p
+ .byte 0x02 # 71 q
+ .byte 0x02 # 72 r
+ .byte 0x02 # 73 s
+ .byte 0x02 # 74 t
+ .byte 0x02 # 75 u
+ .byte 0x02 # 76 v
+ .byte 0x02 # 77 w
+ .byte 0x02 # 78 x
+ .byte 0x02 # 79 y
+ .byte 0x02 # 7A z
+ .byte 0x00 # 7B {
+ .byte 0x00 # 7C |
+ .byte 0x00 # 7D }
+ .byte 0x00 # 7E ~
+ .byte 0x00 # 7F DEL
-raw_classes:
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "space\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "digit\n"
- .ascii "digit\n"
- .ascii "digit\n"
- .ascii "digit\n"
- .ascii "digit\n"
- .ascii "digit\n"
- .ascii "digit\n"
- .ascii "digit\n"
- .ascii "digit\n"
- .ascii "digit\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "upper\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "lower\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
- .ascii "invalid\n"
+.section .data
.section .bss
.type class_names, @object
.size class_names, 1024
class_names: .zero 1024
-.section .data
-.type classes, @object
-.size classes, 512 # 128 characters * 4 byte.
-classes:
-
.section .text
# Initializes the classification table.
#
# Paramaters:
# a0 - Raw input for the classification table.
-.type _tokenizer_classes, @function
-_tokenizer_classes:
+.type _initialize_classes, @function
+_initialize_classes:
# Prologue.
- addi sp, sp, -8
- sw ra, 4(sp)
- sw s0, 0(sp)
- addi s0, sp, 8
+ addi sp, sp, -24
+ sw ra, 20(sp)
+ sw s0, 16(sp)
+ addi s0, sp, 24
+
+ sw s1, 12(sp) # Preserve the s1 register used for the character counter.
+ li s1, 128 # 128 ASCII characters.
+
+.Linitialize_classes_loop:
+ addi s1, s1, -1
+
+ la t0, classes
+ add t0, t0, s1
+ lbu t0, (t0)
+ li t1, 0x01
+
+ bne t0, t1, .Linitialize_classes_step
+
+ /* DEBUG */
+ li a0, 0x69676964
+ sw a0, 8(sp) # Preserve the memory address.
+ addi a0, sp, 8
+ li a1, 4
+ call _write_error
+
+.Linitialize_classes_step:
+ bnez s1, .Linitialize_classes_loop
+
+ lw s1, 12(sp) # Restore the saved register.
# Epilogue.
- lw ra, 4(sp)
- lw s0, 0(sp)
- addi sp, sp, 8
+ lw ra, 20(sp)
+ lw s0, 16(sp)
+ addi sp, sp, 24
ret
# Initializes the lookup tables.
@@ -171,8 +201,7 @@ _tokenizer_initialize:
sw s0, 0(sp)
addi s0, sp, 8
- la a0, raw_classes
- call _tokenizer_classes
+ call _initialize_classes
# Epilogue.
lw ra, 4(sp)