Add semicolons separating the statements

This commit is contained in:
2025-05-01 01:32:45 +02:00
parent 23b7a1ab30
commit f3a8b2626a
5 changed files with 1341 additions and 1091 deletions

View File

@@ -1,165 +1,195 @@
.global _tokenizer_initialize
.section .rodata
#
# Classes:
#
# 0x00: Invalid
# 0x01: Digit
# 0x02: Character
# 0x03: Space
.type classes, @object
.size classes, 128
classes:
.byte 0x00 # 00 NUL
.byte 0x00 # 01 SOH
.byte 0x00 # 02 STX
.byte 0x00 # 03 ETX
.byte 0x00 # 04 EOT
.byte 0x00 # 05 ENQ
.byte 0x00 # 06 ACK
.byte 0x00 # 07 BEL
.byte 0x00 # 08 BS
.byte 0x00 # 09 HT
.byte 0x00 # 0A LF
.byte 0x00 # 0B VT
.byte 0x00 # 0C FF
.byte 0x00 # 0D CR
.byte 0x00 # 0E SO
.byte 0x00 # 0F SI
.byte 0x00 # 10 DLE
.byte 0x00 # 11 DC1
.byte 0x00 # 12 DC2
.byte 0x00 # 13 DC3
.byte 0x00 # 14 DC4
.byte 0x00 # 15 NAK
.byte 0x00 # 16 SYN
.byte 0x00 # 17 ETB
.byte 0x00 # 18 CAN
.byte 0x00 # 19 EM
.byte 0x00 # 1A SUB
.byte 0x00 # 1B ESC
.byte 0x00 # 1C FS
.byte 0x00 # 1D GS
.byte 0x00 # 1E RS
.byte 0x00 # 1F US
.byte 0x03 # 20 Space
.byte 0x00 # 21 !
.byte 0x00 # 22 "
.byte 0x00 # 23 #
.byte 0x00 # 24 $
.byte 0x00 # 25 %
.byte 0x00 # 26 &
.byte 0x00 # 27 '
.byte 0x00 # 28 (
.byte 0x00 # 29 )
.byte 0x00 # 2A *
.byte 0x00 # 2B +
.byte 0x00 # 2C ,
.byte 0x00 # 2D -
.byte 0x00 # 2E .
.byte 0x00 # 2F /
.byte 0x01 # 30 0
.byte 0x01 # 31 1
.byte 0x01 # 32 2
.byte 0x01 # 33 3
.byte 0x01 # 34 4
.byte 0x01 # 35 5
.byte 0x01 # 36 6
.byte 0x01 # 37 7
.byte 0x01 # 38 8
.byte 0x01 # 39 9
.byte 0x00 # 3A :
.byte 0x00 # 3B ;
.byte 0x00 # 3C <
.byte 0x00 # 3D =
.byte 0x00 # 3E >
.byte 0x00 # 3F ?
.byte 0x00 # 40 @
.byte 0x02 # 41 A
.byte 0x02 # 42 B
.byte 0x02 # 43 C
.byte 0x02 # 44 D
.byte 0x02 # 45 E
.byte 0x02 # 46 F
.byte 0x02 # 47 G
.byte 0x02 # 48 H
.byte 0x02 # 49 I
.byte 0x02 # 4A J
.byte 0x02 # 4B K
.byte 0x02 # 4C L
.byte 0x02 # 4D M
.byte 0x02 # 4E N
.byte 0x02 # 4F O
.byte 0x02 # 50 P
.byte 0x02 # 51 Q
.byte 0x02 # 52 R
.byte 0x02 # 53 S
.byte 0x02 # 54 T
.byte 0x02 # 55 U
.byte 0x02 # 56 V
.byte 0x02 # 57 W
.byte 0x02 # 58 X
.byte 0x02 # 59 Y
.byte 0x02 # 5A Z
.byte 0x00 # 5B [
.byte 0x00 # 5C \
.byte 0x00 # 5D ]
.byte 0x00 # 5E ^
.byte 0x00 # 5F _
.byte 0x00 # 60 `
.byte 0x02 # 61 a
.byte 0x02 # 62 b
.byte 0x02 # 63 c
.byte 0x02 # 64 d
.byte 0x02 # 65 e
.byte 0x02 # 66 f
.byte 0x02 # 67 g
.byte 0x02 # 68 h
.byte 0x02 # 69 i
.byte 0x02 # 6A j
.byte 0x02 # 6B k
.byte 0x02 # 6C l
.byte 0x02 # 6D m
.byte 0x02 # 6E n
.byte 0x02 # 6F o
.byte 0x02 # 70 p
.byte 0x02 # 71 q
.byte 0x02 # 72 r
.byte 0x02 # 73 s
.byte 0x02 # 74 t
.byte 0x02 # 75 u
.byte 0x02 # 76 v
.byte 0x02 # 77 w
.byte 0x02 # 78 x
.byte 0x02 # 79 y
.byte 0x02 # 7A z
.byte 0x00 # 7B {
.byte 0x00 # 7C |
.byte 0x00 # 7D }
.byte 0x00 # 7E ~
.byte 0x00 # 7F DEL
raw_classes:
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "space\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.section .data
.section .bss
.type class_names, @object
.size class_names, 1024
class_names: .zero 1024
.section .data
.type classes, @object
.size classes, 512 # 128 characters * 4 byte.
classes:
.section .text
# Initializes the classification table.
#
# Paramaters:
# a0 - Raw input for the classification table.
.type _tokenizer_classes, @function
_tokenizer_classes:
.type _initialize_classes, @function
_initialize_classes:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
addi sp, sp, -24
sw ra, 20(sp)
sw s0, 16(sp)
addi s0, sp, 24
sw s1, 12(sp) # Preserve the s1 register used for the character counter.
li s1, 128 # 128 ASCII characters.
.Linitialize_classes_loop:
addi s1, s1, -1
la t0, classes
add t0, t0, s1
lbu t0, (t0)
li t1, 0x01
bne t0, t1, .Linitialize_classes_step
/* DEBUG */
li a0, 0x69676964
sw a0, 8(sp) # Preserve the memory address.
addi a0, sp, 8
li a1, 4
call _write_error
.Linitialize_classes_step:
bnez s1, .Linitialize_classes_loop
lw s1, 12(sp) # Restore the saved register.
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
lw ra, 20(sp)
lw s0, 16(sp)
addi sp, sp, 24
ret
# Initializes the lookup tables.
@@ -171,8 +201,7 @@ _tokenizer_initialize:
sw s0, 0(sp)
addi s0, sp, 8
la a0, raw_classes
call _tokenizer_classes
call _initialize_classes
# Epilogue.
lw ra, 4(sp)