617 lines
14 KiB
ArmAsm
617 lines
14 KiB
ArmAsm
# This Source Code Form is subject to the terms of the Mozilla Public License,
|
|
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
|
# obtain one at https://mozilla.org/MPL/2.0/.
|
|
|
|
.global _tokenize_next, classification, transitions, keywords, byte_keywords
|
|
|
|
.include "boot/definitions.inc"
|
|
|
|
.section .rodata
|
|
|
|
#
|
|
# Classification table assigns each possible character to a group (class). All
|
|
# characters of the same group a handled equivalently.
|
|
#
|
|
# Classification:
|
|
#
|
|
.equ CLASS_INVALID, 0x00
|
|
.equ CLASS_DIGIT, 0x01
|
|
.equ CLASS_CHARACTER, 0x02
|
|
.equ CLASS_SPACE, 0x03
|
|
.equ CLASS_COLON, 0x04
|
|
.equ CLASS_EQUALS, 0x05
|
|
.equ CLASS_LEFT_PAREN, 0x06
|
|
.equ CLASS_RIGHT_PAREN, 0x07
|
|
.equ CLASS_ASTERISK, 0x08
|
|
.equ CLASS_UNDERSCORE, 0x09
|
|
.equ CLASS_SINGLE, 0x0a
|
|
.equ CLASS_HEX, 0x0b
|
|
.equ CLASS_ZERO, 0x0c
|
|
.equ CLASS_X, 0x0d
|
|
.equ CLASS_EOF, 0x0e
|
|
.equ CLASS_DOT, 0x0f
|
|
.equ CLASS_MINUS, 0x10
|
|
.equ CLASS_QUOTE, 0x11
|
|
.equ CLASS_GREATER, 0x12
|
|
.equ CLASS_LESS, 0x13
|
|
|
|
.equ CLASS_COUNT, 20
|
|
|
|
.type classification, @object
|
|
classification:
|
|
.byte CLASS_EOF # 00 NUL
|
|
.byte CLASS_INVALID # 01 SOH
|
|
.byte CLASS_INVALID # 02 STX
|
|
.byte CLASS_INVALID # 03 ETX
|
|
.byte CLASS_INVALID # 04 EOT
|
|
.byte CLASS_INVALID # 05 ENQ
|
|
.byte CLASS_INVALID # 06 ACK
|
|
.byte CLASS_INVALID # 07 BEL
|
|
.byte CLASS_INVALID # 08 BS
|
|
.byte CLASS_SPACE # 09 HT
|
|
.byte CLASS_SPACE # 0A LF
|
|
.byte CLASS_INVALID # 0B VT
|
|
.byte CLASS_INVALID # 0C FF
|
|
.byte CLASS_SPACE # 0D CR
|
|
.byte CLASS_INVALID # 0E SO
|
|
.byte CLASS_INVALID # 0F SI
|
|
.byte CLASS_INVALID # 10 DLE
|
|
.byte CLASS_INVALID # 11 DC1
|
|
.byte CLASS_INVALID # 12 DC2
|
|
.byte CLASS_INVALID # 13 DC3
|
|
.byte CLASS_INVALID # 14 DC4
|
|
.byte CLASS_INVALID # 15 NAK
|
|
.byte CLASS_INVALID # 16 SYN
|
|
.byte CLASS_INVALID # 17 ETB
|
|
.byte CLASS_INVALID # 18 CAN
|
|
.byte CLASS_INVALID # 19 EM
|
|
.byte CLASS_INVALID # 1A SUB
|
|
.byte CLASS_INVALID # 1B ESC
|
|
.byte CLASS_INVALID # 1C FS
|
|
.byte CLASS_INVALID # 1D GS
|
|
.byte CLASS_INVALID # 1E RS
|
|
.byte CLASS_INVALID # 1F US
|
|
.byte CLASS_SPACE # 20 Space
|
|
.byte CLASS_SINGLE # 21 !
|
|
.byte CLASS_QUOTE # 22 "
|
|
.byte 0x00 # 23 #
|
|
.byte 0x00 # 24 $
|
|
.byte CLASS_SINGLE # 25 %
|
|
.byte CLASS_SINGLE # 26 &
|
|
.byte CLASS_QUOTE # 27 '
|
|
.byte CLASS_LEFT_PAREN # 28 (
|
|
.byte CLASS_RIGHT_PAREN # 29 )
|
|
.byte CLASS_ASTERISK # 2A *
|
|
.byte CLASS_SINGLE # 2B +
|
|
.byte CLASS_SINGLE # 2C ,
|
|
.byte CLASS_MINUS # 2D -
|
|
.byte CLASS_DOT # 2E .
|
|
.byte CLASS_SINGLE # 2F /
|
|
.byte CLASS_ZERO # 30 0
|
|
.byte CLASS_DIGIT # 31 1
|
|
.byte CLASS_DIGIT # 32 2
|
|
.byte CLASS_DIGIT # 33 3
|
|
.byte CLASS_DIGIT # 34 4
|
|
.byte CLASS_DIGIT # 35 5
|
|
.byte CLASS_DIGIT # 36 6
|
|
.byte CLASS_DIGIT # 37 7
|
|
.byte CLASS_DIGIT # 38 8
|
|
.byte CLASS_DIGIT # 39 9
|
|
.byte CLASS_COLON # 3A :
|
|
.byte CLASS_SINGLE # 3B ;
|
|
.byte CLASS_LESS # 3C <
|
|
.byte CLASS_EQUALS # 3D =
|
|
.byte CLASS_GREATER # 3E >
|
|
.byte 0x00 # 3F ?
|
|
.byte CLASS_SINGLE # 40 @
|
|
.byte CLASS_CHARACTER # 41 A
|
|
.byte CLASS_CHARACTER # 42 B
|
|
.byte CLASS_CHARACTER # 43 C
|
|
.byte CLASS_CHARACTER # 44 D
|
|
.byte CLASS_CHARACTER # 45 E
|
|
.byte CLASS_CHARACTER # 46 F
|
|
.byte CLASS_CHARACTER # 47 G
|
|
.byte CLASS_CHARACTER # 48 H
|
|
.byte CLASS_CHARACTER # 49 I
|
|
.byte CLASS_CHARACTER # 4A J
|
|
.byte CLASS_CHARACTER # 4B K
|
|
.byte CLASS_CHARACTER # 4C L
|
|
.byte CLASS_CHARACTER # 4D M
|
|
.byte CLASS_CHARACTER # 4E N
|
|
.byte CLASS_CHARACTER # 4F O
|
|
.byte CLASS_CHARACTER # 50 P
|
|
.byte CLASS_CHARACTER # 51 Q
|
|
.byte CLASS_CHARACTER # 52 R
|
|
.byte CLASS_CHARACTER # 53 S
|
|
.byte CLASS_CHARACTER # 54 T
|
|
.byte CLASS_CHARACTER # 55 U
|
|
.byte CLASS_CHARACTER # 56 V
|
|
.byte CLASS_CHARACTER # 57 W
|
|
.byte CLASS_CHARACTER # 58 X
|
|
.byte CLASS_CHARACTER # 59 Y
|
|
.byte CLASS_CHARACTER # 5A Z
|
|
.byte CLASS_SINGLE # 5B [
|
|
.byte 0x00 # 5C \
|
|
.byte CLASS_SINGLE # 5D ]
|
|
.byte CLASS_SINGLE # 5E ^
|
|
.byte CLASS_UNDERSCORE # 5F _
|
|
.byte 0x00 # 60 `
|
|
.byte CLASS_HEX # 61 a
|
|
.byte CLASS_HEX # 62 b
|
|
.byte CLASS_HEX # 63 c
|
|
.byte CLASS_HEX # 64 d
|
|
.byte CLASS_HEX # 65 e
|
|
.byte CLASS_HEX # 66 f
|
|
.byte CLASS_CHARACTER # 67 g
|
|
.byte CLASS_CHARACTER # 68 h
|
|
.byte CLASS_CHARACTER # 69 i
|
|
.byte CLASS_CHARACTER # 6A j
|
|
.byte CLASS_CHARACTER # 6B k
|
|
.byte CLASS_CHARACTER # 6C l
|
|
.byte CLASS_CHARACTER # 6D m
|
|
.byte CLASS_CHARACTER # 6E n
|
|
.byte CLASS_CHARACTER # 6F o
|
|
.byte CLASS_CHARACTER # 70 p
|
|
.byte CLASS_CHARACTER # 71 q
|
|
.byte CLASS_CHARACTER # 72 r
|
|
.byte CLASS_CHARACTER # 73 s
|
|
.byte CLASS_CHARACTER # 74 t
|
|
.byte CLASS_CHARACTER # 75 u
|
|
.byte CLASS_CHARACTER # 76 v
|
|
.byte CLASS_CHARACTER # 77 w
|
|
.byte CLASS_X # 78 x
|
|
.byte CLASS_CHARACTER # 79 y
|
|
.byte CLASS_CHARACTER # 7A z
|
|
.byte 0x00 # 7B {
|
|
.byte CLASS_SINGLE # 7C |
|
|
.byte 0x00 # 7D }
|
|
.byte CLASS_SINGLE # 7E ~
|
|
.byte CLASS_INVALID # 7F DEL
|
|
|
|
#
|
|
# Textual keywords in the language.
|
|
#
|
|
.equ KEYWORDS_COUNT, TOKEN_IDENTIFIER - 1
|
|
|
|
.type keywords, @object
|
|
keywords:
|
|
.word 7
|
|
.ascii "program"
|
|
.word 6
|
|
.ascii "import"
|
|
.word 5
|
|
.ascii "const"
|
|
.word 3
|
|
.ascii "var"
|
|
.word 2
|
|
.ascii "if"
|
|
.word 4
|
|
.ascii "then"
|
|
.word 5
|
|
.ascii "elsif"
|
|
.word 4
|
|
.ascii "else"
|
|
.word 5
|
|
.ascii "while"
|
|
.word 2
|
|
.ascii "do"
|
|
.word 4
|
|
.ascii "proc"
|
|
.word 5
|
|
.ascii "begin"
|
|
.word 3
|
|
.ascii "end"
|
|
.word 4
|
|
.ascii "type"
|
|
.word 6
|
|
.ascii "record"
|
|
.word 5
|
|
.ascii "union"
|
|
.word 4
|
|
.ascii "true"
|
|
.word 5
|
|
.ascii "false"
|
|
.word 3
|
|
.ascii "nil"
|
|
.word 3
|
|
.ascii "xor"
|
|
.word 2
|
|
.ascii "or"
|
|
.word 6
|
|
.ascii "return"
|
|
.word 4
|
|
.ascii "cast"
|
|
.word 4
|
|
.ascii "goto"
|
|
.word 4
|
|
.ascii "case"
|
|
.word 2
|
|
.ascii "of"
|
|
|
|
.type byte_keywords, @object
|
|
byte_keywords: .ascii "&.,:;()[]^=+-*@"
|
|
.equ BYTE_KEYWORDS_SIZE, . - byte_keywords
|
|
|
|
.section .data
|
|
|
|
# The transition table describes transitions from one state to another, given
|
|
# a symbol (character class).
|
|
#
|
|
# The table has m rows and n columns, where m is the amount of states and n is
|
|
# the amount of classes. So given the current state and a classified character
|
|
# the table can be used to look up the next state.
|
|
#
|
|
# Each cell is a word long.
|
|
# - The least significant byte of the word is a row number (beginning with 0).
|
|
# It specifies the target state. "ff" means that this is an end state and no
|
|
# transition is possible.
|
|
# - The next byte is the action that should be performed when transitioning.
|
|
# For the meaning of actions see labels in the _tokenize_next function, which
|
|
# handles each action.
|
|
#
|
|
.type transitions, @object
|
|
transitions:
|
|
# Invalid Digit Alpha Space : = ( )
|
|
# * _ Single Hex 0 x NUL .
|
|
# - " or ' > <
|
|
.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff
|
|
.word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x06ff
|
|
.word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start
|
|
|
|
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff
|
|
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
|
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x01 Colon
|
|
|
|
.word 0x05ff, 0x0102, 0x0102, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff
|
|
.word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff
|
|
.word 0x05ff, 0x05ff, 0x05ff, 0x05ff # 0x02 Identifier
|
|
|
|
.word 0x08ff, 0x0103, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
|
|
.word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x0103, 0x00ff, 0x08ff, 0x08ff
|
|
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x03 Decimal
|
|
|
|
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x04ff, 0x02ff, 0x02ff
|
|
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
|
.word 0x02ff, 0x02ff, 0x04ff, 0x02ff # 0x04 Greater
|
|
|
|
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
|
|
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
|
|
.word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus
|
|
|
|
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
|
|
.word 0x0109, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
|
|
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff # 0x06 Left paren
|
|
|
|
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
|
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
|
.word 0x02ff, 0x02ff, 0x02ff, 0x04ff # 0x07 Less
|
|
|
|
.word 0x08ff, 0x0108, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
|
|
.word 0x08ff, 0x00ff, 0x08ff, 0x0108, 0x0108, 0x00ff, 0x08ff, 0x08ff
|
|
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x08 Hexadecimal after 0x.
|
|
|
|
.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
|
|
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
|
|
.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x09 Comment
|
|
|
|
.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff
|
|
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
|
|
.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x0a Closing comment
|
|
|
|
.word 0x00ff, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
|
|
.word 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
|
|
.word 0x010b, 0x04ff, 0x010b, 0x010b # 0x0b String
|
|
|
|
.word 0x08ff, 0x00ff, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
|
|
.word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x00ff, 0x010d, 0x08ff, 0x08ff
|
|
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x0c Leading zero
|
|
|
|
.word 0x00ff, 0x0108, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff
|
|
.word 0x00ff, 0x00ff, 0x00ff, 0x0108, 0x0108, 0x00ff, 0x00ff, 0x00ff
|
|
.word 0x00ff, 0x00ff, 0x00ff, 0x00ff # 0x0d Starting hexadecimal
|
|
|
|
.section .text
|
|
|
|
# Returns the class from the classification table for the given character.
|
|
#
|
|
# Parameters:
|
|
# a0 - Character.
|
|
#
|
|
# Sets a0 to the class number.
|
|
.type _classify, @function
|
|
_classify:
|
|
la t0, classification
|
|
add t0, t0, a0 # Character class pointer.
|
|
lbu a0, (t0) # Character class.
|
|
ret
|
|
|
|
# Given the current state and a character class, calculates the next state.
|
|
|
|
# Parameters:
|
|
# a0 - Current state.
|
|
# a1 - Character class.
|
|
#
|
|
# Sets a0 to the next state.
|
|
.type _lookup_state, @function
|
|
_lookup_state:
|
|
li t0, CLASS_COUNT
|
|
mul a0, a0, t0 # Transition row.
|
|
add a0, a0, a1 # Transition column.
|
|
|
|
li t0, 4
|
|
mul a0, a0, t0 # Multiply by the word size.
|
|
|
|
la t0, transitions
|
|
add t0, t0, a0
|
|
lw a0, (t0) # Next state.
|
|
|
|
ret
|
|
|
|
# Chains _classify and _lookup_state.
|
|
#
|
|
# Parameters:
|
|
# a0 - Current state.
|
|
# a1 - Character.
|
|
#
|
|
# Sets a0 to the next state based on the given character.
|
|
.type _next_state, @function
|
|
_next_state:
|
|
# Prologue.
|
|
addi sp, sp, -16
|
|
sw ra, 12(sp)
|
|
sw s0, 8(sp)
|
|
addi s0, sp, 16
|
|
|
|
sw a0, 4(sp)
|
|
mv a0, a1
|
|
call _classify
|
|
|
|
mv a1, a0
|
|
lw a0, 4(sp)
|
|
call _lookup_state
|
|
|
|
# Epilogue.
|
|
lw ra, 12(sp)
|
|
lw s0, 8(sp)
|
|
addi sp, sp, 16
|
|
ret
|
|
|
|
# Takes an identifier and checks whether it's a keyword.
|
|
#
|
|
# Parameters:
|
|
# a0 - Token length.
|
|
# a1 - Token pointer.
|
|
#
|
|
# Sets a0 to the appropriate token type.
|
|
.type _classify_identifier, @function
|
|
_classify_identifier:
|
|
# Prologue.
|
|
addi sp, sp, -16
|
|
sw ra, 12(sp)
|
|
sw s0, 8(sp)
|
|
addi s0, sp, 16
|
|
|
|
mv a2, a0
|
|
mv a3, a1
|
|
li a0, KEYWORDS_COUNT
|
|
la a1, keywords
|
|
call _strings_index
|
|
|
|
bnez a0, .Lclassify_identifier_end
|
|
li a0, TOKEN_IDENTIFIER
|
|
|
|
.Lclassify_identifier_end:
|
|
# Epilogue.
|
|
lw ra, 12(sp)
|
|
lw s0, 8(sp)
|
|
addi sp, sp, 16
|
|
ret
|
|
|
|
# Takes a symbol and determines its type.
|
|
#
|
|
# Parameters:
|
|
# a0 - Token character.
|
|
#
|
|
# Sets a0 to the appropriate token type.
|
|
.type _classify_single, @function
|
|
_classify_single:
|
|
# Prologue.
|
|
addi sp, sp, -16
|
|
sw ra, 12(sp)
|
|
sw s0, 8(sp)
|
|
addi s0, sp, 16
|
|
|
|
mv a1, a0
|
|
li a2, BYTE_KEYWORDS_SIZE
|
|
la a0, byte_keywords
|
|
call _memchr
|
|
|
|
la a1, byte_keywords
|
|
sub a0, a0, a1
|
|
addi a0, a0, TOKEN_IDENTIFIER + 1
|
|
|
|
# Epilogue.
|
|
lw ra, 12(sp)
|
|
lw s0, 8(sp)
|
|
addi sp, sp, 16
|
|
ret
|
|
|
|
# Classified a symbol containing multiple characters (probably 2).
|
|
#
|
|
# Parameters:
|
|
# a0 - Token length.
|
|
# a1 - Token pointer.
|
|
#
|
|
# Sets a0 to the appropriate token type.
|
|
.type _classify_composite, @function
|
|
_classify_composite:
|
|
lbu t0, 0(a1)
|
|
li t1, ':'
|
|
beq t0, t1, .Lclassify_composite_assign
|
|
|
|
j .Lclassify_composite_end
|
|
|
|
.Lclassify_composite_assign:
|
|
li a0, TOKEN_ASSIGN
|
|
j .Lclassify_composite_end
|
|
|
|
.Lclassify_composite_end:
|
|
ret
|
|
|
|
# Initializes the classification table.
|
|
#
|
|
# Paramaters:
|
|
# a0 - Source text pointer.
|
|
# a1 - A pointer for output value, the token kind. 4 Bytes.
|
|
#
|
|
# Sets a0 to the position of the next token.
|
|
.type _tokenize_next, @function
|
|
_tokenize_next:
|
|
# Prologue.
|
|
addi sp, sp, -32
|
|
sw ra, 28(sp)
|
|
sw s0, 24(sp)
|
|
addi s0, sp, 32
|
|
|
|
sw s1, 20(sp) # Preserve s1 used for current source text position.
|
|
mv s1, a0
|
|
sw a0, 12(sp) # Keeps a pointer to the beginning of a token.
|
|
# 4(sp) and 8(sp) are reserved for the kind and length of the token if needed.
|
|
|
|
sw s2, 16(sp) # Preserve s2 containing the current state.
|
|
li s2, 0x00 # Initial, start state.
|
|
|
|
sw a1, 0(sp)
|
|
sw zero, (a1) # Initialize.
|
|
|
|
.Ltokenize_next_loop:
|
|
mv a0, s2
|
|
lbu a1, (s1)
|
|
call _next_state
|
|
|
|
li t0, 0xff
|
|
and s2, a0, t0 # Next state.
|
|
|
|
li t0, 0xff00
|
|
and t1, a0, t0 # Transition action.
|
|
srli t1, t1, 8
|
|
|
|
# Perform the provided action.
|
|
li t0, 0x01 # Accumulate action.
|
|
beq t1, t0, .Ltokenize_next_accumulate
|
|
|
|
li t0, 0x02 # Print action.
|
|
beq t1, t0, .Ltokenize_next_print
|
|
|
|
li t0, 0x03 # Skip action.
|
|
beq t1, t0, .Ltokenize_next_skip
|
|
|
|
li t0, 0x04 # Delimited string action.
|
|
beq t1, t0, .Ltokenize_next_comment
|
|
|
|
li t0, 0x05 # Finalize identifier.
|
|
beq t1, t0, .Ltokenize_next_identifier
|
|
|
|
li t0, 0x06 # Single character symbol action.
|
|
beq t1, t0, .Ltokenize_next_single
|
|
|
|
li t0, 0x07 # An action for symbols containing multiple characters.
|
|
beq t1, t0, .Ltokenize_next_composite
|
|
|
|
li t0, 0x08 # Integer action.
|
|
beq t1, t0, .Ltokenize_next_integer
|
|
|
|
j .Ltokenize_next_reject
|
|
|
|
.Ltokenize_next_reject:
|
|
addi s1, s1, 1
|
|
|
|
j .Ltokenize_next_end
|
|
|
|
.Ltokenize_next_accumulate:
|
|
addi s1, s1, 1
|
|
|
|
j .Ltokenize_next_loop
|
|
|
|
.Ltokenize_next_skip:
|
|
addi s1, s1, 1
|
|
lw t0, 12(sp)
|
|
addi t0, t0, 1
|
|
sw t0, 12(sp)
|
|
|
|
j .Ltokenize_next_loop
|
|
|
|
.Ltokenize_next_print:
|
|
/* DEBUG
|
|
addi a0, a0, 21
|
|
sw a0, 0(sp)
|
|
addi a0, sp, 0
|
|
li a1, 1
|
|
call _write_error */
|
|
|
|
j .Ltokenize_next_end
|
|
|
|
.Ltokenize_next_comment:
|
|
addi s1, s1, 1
|
|
|
|
j .Ltokenize_next_end
|
|
|
|
.Ltokenize_next_identifier:
|
|
# An identifier can be a textual keyword.
|
|
# Check the kind of the token and write it into the output parameter.
|
|
lw a1, 12(sp)
|
|
sub a0, s1, a1
|
|
sw a0, 8(sp)
|
|
call _classify_identifier
|
|
sw a0, 4(sp)
|
|
lw a0, 0(sp)
|
|
addi a1, sp, 4
|
|
li a2, 12
|
|
call _memcpy
|
|
|
|
j .Ltokenize_next_end
|
|
|
|
.Ltokenize_next_single:
|
|
lw a0, 12(sp)
|
|
addi s1, a0, 1
|
|
lbu a0, (a0)
|
|
call _classify_single
|
|
lw a1, 0(sp)
|
|
sw a0, (a1)
|
|
|
|
j .Ltokenize_next_end
|
|
|
|
.Ltokenize_next_composite:
|
|
addi s1, s1, 1
|
|
lw a1, 12(sp)
|
|
sub a0, s1, a1
|
|
call _classify_composite
|
|
lw a1, 0(sp)
|
|
sw a0, (a1)
|
|
|
|
j .Ltokenize_next_end
|
|
|
|
.Ltokenize_next_integer:
|
|
lw t0, 0(sp)
|
|
li t1, TOKEN_INTEGER
|
|
sw t1, 0(t0)
|
|
lw t1, 12(sp)
|
|
sw t1, 8(t0)
|
|
sub t1, s1, t1
|
|
sw t1, 4(t0)
|
|
|
|
j .Ltokenize_next_end
|
|
|
|
.Ltokenize_next_end:
|
|
mv a0, s1 # Return the advanced text pointer.
|
|
|
|
# Restore saved registers.
|
|
lw s1, 20(sp)
|
|
lw s2, 16(sp)
|
|
|
|
# Epilogue.
|
|
lw ra, 28(sp)
|
|
lw s0, 24(sp)
|
|
addi sp, sp, 32
|
|
ret
|