Create tokenization tables
This commit is contained in:
parent
f3a8b2626a
commit
768821c689
@ -1896,11 +1896,6 @@ _main:
|
||||
sw s0, 0(sp)
|
||||
addi s0, sp, 8
|
||||
|
||||
# Read the source from the standard input.
|
||||
la a0, source_code
|
||||
li a1, SOURCE_BUFFER_SIZE # Buffer size.
|
||||
call _read_file
|
||||
|
||||
li s2, 1
|
||||
|
||||
# Epilogue.
|
||||
@ -1912,7 +1907,13 @@ _main:
|
||||
# Entry point.
|
||||
.type _start, @function
|
||||
_start:
|
||||
call _tokenizer_initialize
|
||||
# Read the source from the standard input.
|
||||
la a0, source_code
|
||||
li a1, SOURCE_BUFFER_SIZE # Buffer size.
|
||||
call _read_file
|
||||
|
||||
mv a0, s1
|
||||
call _tokenize
|
||||
call _main
|
||||
call _compile
|
||||
|
||||
|
@ -1,20 +0,0 @@
|
||||
- start
|
||||
digit: integer
|
||||
upper: identifier
|
||||
lower: identifier
|
||||
space: start
|
||||
invalid: error
|
||||
|
||||
- identifier
|
||||
digit: identifier
|
||||
upper: identifier
|
||||
lower: identifier
|
||||
space: end
|
||||
invalid: end
|
||||
|
||||
- integer:
|
||||
digit: integer
|
||||
upper: end
|
||||
lower: end
|
||||
space: end
|
||||
invalid: end
|
526
boot/tokenizer.s
526
boot/tokenizer.s
@ -1,190 +1,405 @@
|
||||
.global _tokenizer_initialize
|
||||
.global _tokenize, classification, transitions
|
||||
|
||||
.section .rodata
|
||||
|
||||
#
|
||||
# Classes:
|
||||
# Classification table assigns each possible character to a group (class). All
|
||||
# characters of the same group a handled equivalently.
|
||||
#
|
||||
# 0x00: Invalid
|
||||
# 0x01: Digit
|
||||
# 0x02: Character
|
||||
# 0x03: Space
|
||||
.type classes, @object
|
||||
.size classes, 128
|
||||
classes:
|
||||
.byte 0x00 # 00 NUL
|
||||
.byte 0x00 # 01 SOH
|
||||
.byte 0x00 # 02 STX
|
||||
.byte 0x00 # 03 ETX
|
||||
.byte 0x00 # 04 EOT
|
||||
.byte 0x00 # 05 ENQ
|
||||
.byte 0x00 # 06 ACK
|
||||
.byte 0x00 # 07 BEL
|
||||
.byte 0x00 # 08 BS
|
||||
.byte 0x00 # 09 HT
|
||||
.byte 0x00 # 0A LF
|
||||
.byte 0x00 # 0B VT
|
||||
.byte 0x00 # 0C FF
|
||||
.byte 0x00 # 0D CR
|
||||
.byte 0x00 # 0E SO
|
||||
.byte 0x00 # 0F SI
|
||||
.byte 0x00 # 10 DLE
|
||||
.byte 0x00 # 11 DC1
|
||||
.byte 0x00 # 12 DC2
|
||||
.byte 0x00 # 13 DC3
|
||||
.byte 0x00 # 14 DC4
|
||||
.byte 0x00 # 15 NAK
|
||||
.byte 0x00 # 16 SYN
|
||||
.byte 0x00 # 17 ETB
|
||||
.byte 0x00 # 18 CAN
|
||||
.byte 0x00 # 19 EM
|
||||
.byte 0x00 # 1A SUB
|
||||
.byte 0x00 # 1B ESC
|
||||
.byte 0x00 # 1C FS
|
||||
.byte 0x00 # 1D GS
|
||||
.byte 0x00 # 1E RS
|
||||
.byte 0x00 # 1F US
|
||||
.byte 0x03 # 20 Space
|
||||
.byte 0x00 # 21 !
|
||||
# Classification:
|
||||
#
|
||||
.equ CLASS_INVALID, 0x0
|
||||
.equ CLASS_DIGIT, 0x01
|
||||
.equ CLASS_CHARACTER, 0x02
|
||||
.equ CLASS_SPACE, 0x03
|
||||
.equ CLASS_COLON, 0x04
|
||||
.equ CLASS_EQUALS, 0x05
|
||||
.equ CLASS_LEFT_PAREN, 0x06
|
||||
.equ CLASS_RIGHT_PAREN, 0x07
|
||||
.equ CLASS_ASTERISK, 0x08
|
||||
.equ CLASS_UNDERSCORE, 0x09
|
||||
.equ CLASS_SINGLE, 0x0a
|
||||
.equ CLASS_HEX, 0x0b
|
||||
.equ CLASS_ZERO, 0x0c
|
||||
.equ CLASS_X, 0x0d
|
||||
.equ CLASS_EOF, 0x0e
|
||||
.equ CLASS_DOT, 0x0f
|
||||
|
||||
.equ CLASS_COUNT, 16
|
||||
|
||||
.type classification, @object
|
||||
.size classification, 128
|
||||
classification:
|
||||
.byte CLASS_EOF # 00 NUL
|
||||
.byte CLASS_INVALID # 01 SOH
|
||||
.byte CLASS_INVALID # 02 STX
|
||||
.byte CLASS_INVALID # 03 ETX
|
||||
.byte CLASS_INVALID # 04 EOT
|
||||
.byte CLASS_INVALID # 05 ENQ
|
||||
.byte CLASS_INVALID # 06 ACK
|
||||
.byte CLASS_INVALID # 07 BEL
|
||||
.byte CLASS_INVALID # 08 BS
|
||||
.byte CLASS_SPACE # 09 HT
|
||||
.byte CLASS_SPACE # 0A LF
|
||||
.byte CLASS_INVALID # 0B VT
|
||||
.byte CLASS_INVALID # 0C FF
|
||||
.byte CLASS_SPACE # 0D CR
|
||||
.byte CLASS_INVALID # 0E SO
|
||||
.byte CLASS_INVALID # 0F SI
|
||||
.byte CLASS_INVALID # 10 DLE
|
||||
.byte CLASS_INVALID # 11 DC1
|
||||
.byte CLASS_INVALID # 12 DC2
|
||||
.byte CLASS_INVALID # 13 DC3
|
||||
.byte CLASS_INVALID # 14 DC4
|
||||
.byte CLASS_INVALID # 15 NAK
|
||||
.byte CLASS_INVALID # 16 SYN
|
||||
.byte CLASS_INVALID # 17 ETB
|
||||
.byte CLASS_INVALID # 18 CAN
|
||||
.byte CLASS_INVALID # 19 EM
|
||||
.byte CLASS_INVALID # 1A SUB
|
||||
.byte CLASS_INVALID # 1B ESC
|
||||
.byte CLASS_INVALID # 1C FS
|
||||
.byte CLASS_INVALID # 1D GS
|
||||
.byte CLASS_INVALID # 1E RS
|
||||
.byte CLASS_INVALID # 1F US
|
||||
.byte CLASS_SPACE # 20 Space
|
||||
.byte CLASS_SINGLE # 21 !
|
||||
.byte 0x00 # 22 "
|
||||
.byte 0x00 # 23 #
|
||||
.byte 0x00 # 24 $
|
||||
.byte 0x00 # 25 %
|
||||
.byte 0x00 # 26 &
|
||||
.byte CLASS_SINGLE # 25 %
|
||||
.byte CLASS_SINGLE # 26 &
|
||||
.byte 0x00 # 27 '
|
||||
.byte 0x00 # 28 (
|
||||
.byte 0x00 # 29 )
|
||||
.byte 0x00 # 2A *
|
||||
.byte 0x00 # 2B +
|
||||
.byte 0x00 # 2C ,
|
||||
.byte CLASS_LEFT_PAREN # 28 (
|
||||
.byte CLASS_RIGHT_PAREN # 29 )
|
||||
.byte CLASS_ASTERISK # 2A *
|
||||
.byte CLASS_SINGLE # 2B +
|
||||
.byte CLASS_SINGLE # 2C ,
|
||||
.byte 0x00 # 2D -
|
||||
.byte 0x00 # 2E .
|
||||
.byte 0x00 # 2F /
|
||||
.byte 0x01 # 30 0
|
||||
.byte 0x01 # 31 1
|
||||
.byte 0x01 # 32 2
|
||||
.byte 0x01 # 33 3
|
||||
.byte 0x01 # 34 4
|
||||
.byte 0x01 # 35 5
|
||||
.byte 0x01 # 36 6
|
||||
.byte 0x01 # 37 7
|
||||
.byte 0x01 # 38 8
|
||||
.byte 0x01 # 39 9
|
||||
.byte 0x00 # 3A :
|
||||
.byte 0x00 # 3B ;
|
||||
.byte CLASS_DOT # 2E .
|
||||
.byte CLASS_SINGLE # 2F /
|
||||
.byte CLASS_ZERO # 30 0
|
||||
.byte CLASS_DIGIT # 31 1
|
||||
.byte CLASS_DIGIT # 32 2
|
||||
.byte CLASS_DIGIT # 33 3
|
||||
.byte CLASS_DIGIT # 34 4
|
||||
.byte CLASS_DIGIT # 35 5
|
||||
.byte CLASS_DIGIT # 36 6
|
||||
.byte CLASS_DIGIT # 37 7
|
||||
.byte CLASS_DIGIT # 38 8
|
||||
.byte CLASS_DIGIT # 39 9
|
||||
.byte CLASS_COLON # 3A :
|
||||
.byte CLASS_SINGLE # 3B ;
|
||||
.byte 0x00 # 3C <
|
||||
.byte 0x00 # 3D =
|
||||
.byte CLASS_EQUALS # 3D =
|
||||
.byte 0x00 # 3E >
|
||||
.byte 0x00 # 3F ?
|
||||
.byte 0x00 # 40 @
|
||||
.byte 0x02 # 41 A
|
||||
.byte 0x02 # 42 B
|
||||
.byte 0x02 # 43 C
|
||||
.byte 0x02 # 44 D
|
||||
.byte 0x02 # 45 E
|
||||
.byte 0x02 # 46 F
|
||||
.byte 0x02 # 47 G
|
||||
.byte 0x02 # 48 H
|
||||
.byte 0x02 # 49 I
|
||||
.byte 0x02 # 4A J
|
||||
.byte 0x02 # 4B K
|
||||
.byte 0x02 # 4C L
|
||||
.byte 0x02 # 4D M
|
||||
.byte 0x02 # 4E N
|
||||
.byte 0x02 # 4F O
|
||||
.byte 0x02 # 50 P
|
||||
.byte 0x02 # 51 Q
|
||||
.byte 0x02 # 52 R
|
||||
.byte 0x02 # 53 S
|
||||
.byte 0x02 # 54 T
|
||||
.byte 0x02 # 55 U
|
||||
.byte 0x02 # 56 V
|
||||
.byte 0x02 # 57 W
|
||||
.byte 0x02 # 58 X
|
||||
.byte 0x02 # 59 Y
|
||||
.byte 0x02 # 5A Z
|
||||
.byte 0x00 # 5B [
|
||||
.byte CLASS_SINGLE # 40 @
|
||||
.byte CLASS_CHARACTER # 41 A
|
||||
.byte CLASS_CHARACTER # 42 B
|
||||
.byte CLASS_CHARACTER # 43 C
|
||||
.byte CLASS_CHARACTER # 44 D
|
||||
.byte CLASS_CHARACTER # 45 E
|
||||
.byte CLASS_CHARACTER # 46 F
|
||||
.byte CLASS_CHARACTER # 47 G
|
||||
.byte CLASS_CHARACTER # 48 H
|
||||
.byte CLASS_CHARACTER # 49 I
|
||||
.byte CLASS_CHARACTER # 4A J
|
||||
.byte CLASS_CHARACTER # 4B K
|
||||
.byte CLASS_CHARACTER # 4C L
|
||||
.byte CLASS_CHARACTER # 4D M
|
||||
.byte CLASS_CHARACTER # 4E N
|
||||
.byte CLASS_CHARACTER # 4F O
|
||||
.byte CLASS_CHARACTER # 50 P
|
||||
.byte CLASS_CHARACTER # 51 Q
|
||||
.byte CLASS_CHARACTER # 52 R
|
||||
.byte CLASS_CHARACTER # 53 S
|
||||
.byte CLASS_CHARACTER # 54 T
|
||||
.byte CLASS_CHARACTER # 55 U
|
||||
.byte CLASS_CHARACTER # 56 V
|
||||
.byte CLASS_CHARACTER # 57 W
|
||||
.byte CLASS_CHARACTER # 58 X
|
||||
.byte CLASS_CHARACTER # 59 Y
|
||||
.byte CLASS_CHARACTER # 5A Z
|
||||
.byte CLASS_SINGLE # 5B [
|
||||
.byte 0x00 # 5C \
|
||||
.byte 0x00 # 5D ]
|
||||
.byte 0x00 # 5E ^
|
||||
.byte 0x00 # 5F _
|
||||
.byte CLASS_SINGLE # 5D ]
|
||||
.byte CLASS_SINGLE # 5E ^
|
||||
.byte CLASS_UNDERSCORE # 5F _
|
||||
.byte 0x00 # 60 `
|
||||
.byte 0x02 # 61 a
|
||||
.byte 0x02 # 62 b
|
||||
.byte 0x02 # 63 c
|
||||
.byte 0x02 # 64 d
|
||||
.byte 0x02 # 65 e
|
||||
.byte 0x02 # 66 f
|
||||
.byte 0x02 # 67 g
|
||||
.byte 0x02 # 68 h
|
||||
.byte 0x02 # 69 i
|
||||
.byte 0x02 # 6A j
|
||||
.byte 0x02 # 6B k
|
||||
.byte 0x02 # 6C l
|
||||
.byte 0x02 # 6D m
|
||||
.byte 0x02 # 6E n
|
||||
.byte 0x02 # 6F o
|
||||
.byte 0x02 # 70 p
|
||||
.byte 0x02 # 71 q
|
||||
.byte 0x02 # 72 r
|
||||
.byte 0x02 # 73 s
|
||||
.byte 0x02 # 74 t
|
||||
.byte 0x02 # 75 u
|
||||
.byte 0x02 # 76 v
|
||||
.byte 0x02 # 77 w
|
||||
.byte 0x02 # 78 x
|
||||
.byte 0x02 # 79 y
|
||||
.byte 0x02 # 7A z
|
||||
.byte CLASS_HEX # 61 a
|
||||
.byte CLASS_HEX # 62 b
|
||||
.byte CLASS_HEX # 63 c
|
||||
.byte CLASS_HEX # 64 d
|
||||
.byte CLASS_HEX # 65 e
|
||||
.byte CLASS_HEX # 66 f
|
||||
.byte CLASS_CHARACTER # 67 g
|
||||
.byte CLASS_CHARACTER # 68 h
|
||||
.byte CLASS_CHARACTER # 69 i
|
||||
.byte CLASS_CHARACTER # 6A j
|
||||
.byte CLASS_CHARACTER # 6B k
|
||||
.byte CLASS_CHARACTER # 6C l
|
||||
.byte CLASS_CHARACTER # 6D m
|
||||
.byte CLASS_CHARACTER # 6E n
|
||||
.byte CLASS_CHARACTER # 6F o
|
||||
.byte CLASS_CHARACTER # 70 p
|
||||
.byte CLASS_CHARACTER # 71 q
|
||||
.byte CLASS_CHARACTER # 72 r
|
||||
.byte CLASS_CHARACTER # 73 s
|
||||
.byte CLASS_CHARACTER # 74 t
|
||||
.byte CLASS_CHARACTER # 75 u
|
||||
.byte CLASS_CHARACTER # 76 v
|
||||
.byte CLASS_CHARACTER # 77 w
|
||||
.byte CLASS_X # 78 x
|
||||
.byte CLASS_CHARACTER # 79 y
|
||||
.byte CLASS_CHARACTER # 7A z
|
||||
.byte 0x00 # 7B {
|
||||
.byte 0x00 # 7C |
|
||||
.byte CLASS_SINGLE # 7C |
|
||||
.byte 0x00 # 7D }
|
||||
.byte 0x00 # 7E ~
|
||||
.byte 0x00 # 7F DEL
|
||||
.byte CLASS_SINGLE # 7E ~
|
||||
.byte CLASS_INVALID # 7F DEL
|
||||
|
||||
.section .data
|
||||
|
||||
.section .bss
|
||||
.type class_names, @object
|
||||
.size class_names, 1024
|
||||
class_names: .zero 1024
|
||||
# The transition table describes transitions from one state to another, given
|
||||
# a symbol (character class).
|
||||
#
|
||||
# The table has m rows and n columns, where m is the amount of states and n is
|
||||
# the amount of classes. So given the current state and a classified character
|
||||
# the table can be used to look up the next state.
|
||||
#
|
||||
# Each cell is a word long.
|
||||
# - The least significant byte of the word is a row number (beginning with 0).
|
||||
# It specifies the target state. "ff" means that this is an end state and no
|
||||
# transition is possible.
|
||||
# - The next byte is the action that should be performed when transitioning.
|
||||
# For the meaning of actions see labels in the _analyze_token function, which
|
||||
# handles each action.
|
||||
#
|
||||
.type transitions, @object
|
||||
.size transitions, 13 * CLASS_COUNT # state count * CLASS_COUNT
|
||||
transitions:
|
||||
# Invalid Digit Alpha Space : = ( )
|
||||
# * _ Single Hex 0 x NUL .
|
||||
.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x0105, 0x0106, 0x0107
|
||||
.word 0x0108, 0x0102, 0x010b, 0x0102, 0x010c, 0x0102, 0x00ff, 0x010e # 00 Start
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x0104, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 01 Colon
|
||||
|
||||
.word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff # 02 Identifier
|
||||
|
||||
.word 0x02ff, 0x0103, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x0103, 0x02ff, 0x02ff, 0x02ff # 03 Integer
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 04 Assign
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 05 Eauals
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 06 Left paren
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 07 Right paren
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 08 Asterisk
|
||||
|
||||
.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
|
||||
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 09 Comment
|
||||
|
||||
.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff
|
||||
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 0a Closing comment
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0b Single character token
|
||||
|
||||
.word 0x02ff, 0x00ff, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x00ff, 0x010d, 0x02ff, 0x02ff # 0c Zero
|
||||
|
||||
.word 0x02ff, 0x010d, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x010d, 0x010d, 0x00ff, 0x2ff, 0x02ff # 0d Hexadecimal
|
||||
|
||||
.word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x2ff, 0x02ff # 0e Dot
|
||||
|
||||
.section .text
|
||||
|
||||
# Returns the class from the classification table for the given character.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Character.
|
||||
#
|
||||
# Sets a0 to the class number.
|
||||
.type _classify, @function
|
||||
_classify:
|
||||
la t0, classification
|
||||
add t0, t0, a0 # Character class pointer.
|
||||
lbu a0, (t0) # Character class.
|
||||
ret
|
||||
|
||||
# Given the current state and a character class, calculates the next state.
|
||||
|
||||
# Parameters:
|
||||
# a0 - Current state.
|
||||
# a1 - Character class.
|
||||
#
|
||||
# Sets a0 to the next state.
|
||||
.type _lookup_state, @function
|
||||
_lookup_state:
|
||||
li t0, CLASS_COUNT
|
||||
mul a0, a0, t0 # Transition row.
|
||||
add a0, a0, a1 # Transition column.
|
||||
|
||||
li t0, 4
|
||||
mul a0, a0, t0 # Multiply by the word size.
|
||||
|
||||
la t0, transitions
|
||||
add t0, t0, a0
|
||||
lw a0, (t0) # Next state.
|
||||
|
||||
ret
|
||||
|
||||
# Chains _classify and _lookup_state.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Current state.
|
||||
# a1 - Character.
|
||||
#
|
||||
# Sets a0 to the next state based on the given character.
|
||||
.type _next_state, @function
|
||||
_next_state:
|
||||
# Prologue.
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
sw a0, 4(sp)
|
||||
mv a0, a1
|
||||
call _classify
|
||||
|
||||
mv a1, a0
|
||||
lw a0, 4(sp)
|
||||
call _lookup_state
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
# Initializes the classification table.
|
||||
#
|
||||
# Paramaters:
|
||||
# a0 - Raw input for the classification table.
|
||||
.type _initialize_classes, @function
|
||||
_initialize_classes:
|
||||
# a0 - Source text pointer.
|
||||
.type _analyze_token, @function
|
||||
_analyze_token:
|
||||
# Prologue.
|
||||
addi sp, sp, -24
|
||||
sw ra, 20(sp)
|
||||
sw s0, 16(sp)
|
||||
addi s0, sp, 24
|
||||
|
||||
sw s1, 12(sp) # Preserve the s1 register used for the character counter.
|
||||
li s1, 128 # 128 ASCII characters.
|
||||
sw s1, 12(sp) # Preserve s1 used for current source text position.
|
||||
mv s1, a0
|
||||
sw a0, 4(sp) # Keeps a pointer to the beginning of a token.
|
||||
|
||||
.Linitialize_classes_loop:
|
||||
addi s1, s1, -1
|
||||
sw s2, 8(sp) # Preserve s2 containing the current state.
|
||||
li s2, 0x00 # Initial, start state.
|
||||
|
||||
la t0, classes
|
||||
add t0, t0, s1
|
||||
lbu t0, (t0)
|
||||
li t1, 0x01
|
||||
.Lanalyze_token_loop:
|
||||
mv a0, s2
|
||||
lbu a1, (s1)
|
||||
call _next_state
|
||||
|
||||
bne t0, t1, .Linitialize_classes_step
|
||||
li t0, 0xff
|
||||
and s2, a0, t0 # Next state.
|
||||
|
||||
/* DEBUG */
|
||||
li a0, 0x69676964
|
||||
sw a0, 8(sp) # Preserve the memory address.
|
||||
addi a0, sp, 8
|
||||
li a1, 4
|
||||
li t0, 0xff00
|
||||
and t1, a0, t0 # Transition action.
|
||||
srli t1, t1, 8
|
||||
|
||||
|
||||
li t0, 0x01 # Accumulate action.
|
||||
beq t1, t0, .Lanalyze_token_accumulate
|
||||
|
||||
li t0, 0x02 # Print action.
|
||||
beq t1, t0, .Lanalyze_token_print
|
||||
|
||||
li t0, 0x03 # Skip action.
|
||||
beq t1, t0, .Lanalyze_token_skip
|
||||
|
||||
li t0, 0x04 # Comment action.
|
||||
beq t1, t0, .Lanalyze_token_comment
|
||||
|
||||
/* DEBUG
|
||||
mv s4, t1
|
||||
addi t1, t1, '0'
|
||||
sb t1, 0(sp)
|
||||
li t1, ' '
|
||||
sb t1, 1(sp)
|
||||
addi t1, s2, '0'
|
||||
sb t1, 2(sp)
|
||||
addi a0, sp, 0 */
|
||||
sw s1, 0(sp)
|
||||
addi a0, s1, 0
|
||||
li a1, 3
|
||||
call _write_error
|
||||
/* mv t1, s4
|
||||
DEBUG */
|
||||
|
||||
.Linitialize_classes_step:
|
||||
bnez s1, .Linitialize_classes_loop
|
||||
j .Lanalyze_token_reject
|
||||
|
||||
lw s1, 12(sp) # Restore the saved register.
|
||||
.Lanalyze_token_reject:
|
||||
addi s1, s1, 1
|
||||
|
||||
j .Lanalyze_token_end
|
||||
|
||||
.Lanalyze_token_accumulate:
|
||||
addi s1, s1, 1
|
||||
|
||||
j .Lanalyze_token_loop
|
||||
|
||||
.Lanalyze_token_skip:
|
||||
addi s1, s1, 1
|
||||
lw t0, 4(sp)
|
||||
addi t0, t0, 1
|
||||
sw t0, 4(sp)
|
||||
|
||||
j .Lanalyze_token_loop
|
||||
|
||||
.Lanalyze_token_print:
|
||||
/* DEBUG
|
||||
lw a0, 4(sp)
|
||||
mv a1, s1
|
||||
sub a1, a1, a0
|
||||
call _write_error
|
||||
DEBUG */
|
||||
|
||||
j .Lanalyze_token_end
|
||||
|
||||
.Lanalyze_token_comment:
|
||||
addi s1, s1, 1
|
||||
|
||||
/* DEBUG
|
||||
lw a0, 4(sp)
|
||||
mv a1, s1
|
||||
sub a1, a1, a0
|
||||
call _write_error
|
||||
DEBUG */
|
||||
|
||||
j .Lanalyze_token_end
|
||||
|
||||
.Lanalyze_token_end:
|
||||
mv a0, s1 # Return the advanced text pointer.
|
||||
|
||||
# Restore saved registers.
|
||||
lw s1, 12(sp)
|
||||
lw s2, 8(sp)
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 20(sp)
|
||||
@ -193,15 +408,22 @@ _initialize_classes:
|
||||
ret
|
||||
|
||||
# Initializes the lookup tables.
|
||||
.type _tokenizer_initialize, @function
|
||||
_tokenizer_initialize:
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Source text pointer.
|
||||
.type _tokenize, @function
|
||||
_tokenize:
|
||||
# Prologue.
|
||||
addi sp, sp, -8
|
||||
sw ra, 4(sp)
|
||||
sw s0, 0(sp)
|
||||
addi s0, sp, 8
|
||||
|
||||
call _initialize_classes
|
||||
.Ltokenize_loop:
|
||||
call _analyze_token
|
||||
|
||||
lw t0, (a0)
|
||||
bnez t0, .Ltokenize_loop
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 4(sp)
|
||||
|
Loading…
x
Reference in New Issue
Block a user