Properly tokenize declaration sections

This commit is contained in:
Eugen Wissner 2025-05-02 22:57:04 +02:00
parent 768821c689
commit dcfd6b1515
Signed by: belka
GPG Key ID: A27FDC1E8EE902C0
5 changed files with 357 additions and 165 deletions

View File

@ -36,11 +36,17 @@ end
directory 'build'
desc 'Initial stage'
file 'build/stage1' => ['boot/stage1.s', 'boot/common-boot.s', 'boot/tokenizer.s', 'build'] do |t|
source = t.prerequisites.filter { |prerequisite| prerequisite.end_with? '.s' }
Dir.glob('boot/*.s').each do |assembly_source|
target_object = Pathname.new('build') + Pathname.new(assembly_source).basename.sub_ext('.o')
sh CROSS_GCC, '-nostdlib', '-o', t.name, *source
file target_object.to_s => [assembly_source, 'build'] do |t|
sh CROSS_GCC, '-c', '-o', t.name, assembly_source
end
end
desc 'Initial stage'
file 'build/stage1' => ['build/tokenizer.o', 'build/stage1.o', 'build/common-boot.o'] do |t|
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
end
file 'build/stage2a.s' => ['build/stage1', 'boot/stage2.elna'] do |t|
@ -51,8 +57,10 @@ file 'build/stage2a.s' => ['build/stage1', 'boot/stage2.elna'] do |t|
end
end
file 'build/stage2a' => ['build/stage2a.s', 'boot/common-boot.s'] do |t|
['build/stage2a', 'build/stage2b'].each do |exe|
file exe => [exe.ext('.s'), 'build/common-boot.o'] do |t|
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
end
end
file 'build/stage2b.s' => ['build/stage2a', 'boot/stage2.elna'] do |t|
@ -62,7 +70,3 @@ file 'build/stage2b.s' => ['build/stage2a', 'boot/stage2.elna'] do |t|
assemble_stage output, exe, source
end
end
file 'build/stage2b' => ['build/stage2b.s', 'boot/common-boot.s'] do |t|
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
end

View File

@ -2,6 +2,7 @@
.global _write_out, _read_file, _write_error, _put_char, _printi
.global _get, _memcmp, _memchr, _memmem, _memcpy
.global _divide_by_zero_error, _exit
.global _strings_index
.section .rodata
@ -424,3 +425,69 @@ _memcpy:
.Lmemcpy_end:
mv a0, t0
ret
# Searches for a string in a string array.
#
# Parameters:
# a0 - Number of elements in the string array.
# a1 - String array.
# a2 - Needle length.
# a3 - Needle.
#
# Sets a0 to the 1-based index of the needle in the haystack or to 0 if the
# element could not be found.
.type _strings_index, @function
_strings_index:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
sw s1, 20(sp)
mv s1, a0
sw s2, 16(sp)
mv s2, a1
sw s3, 12(sp)
mv s3, a2
sw s4, 8(sp)
mv s4, a3
sw s5, 4(sp)
li s5, 0 # Index counter.
.Lstrings_index_loop:
addi s5, s5, 1
beqz s1, .Lstrings_index_missing
lw a2, (s2) # Read the length of the current element in the haystack.
bne a2, s3, .Lstrings_index_next # Lengths don't match, skip the iteration.
addi a0, s2, 4
mv a1, s4
call _memcmp
beqz a0, .Lstrings_index_end
.Lstrings_index_next:
addi s2, s2, 4
add s2, s2, a2
addi s1, s1, -1
j .Lstrings_index_loop
.Lstrings_index_missing:
li s5, 0
.Lstrings_index_end:
mv a0, s5
lw s1, 20(sp)
lw s2, 16(sp)
lw s3, 12(sp)
lw s4, 8(sp)
lw s5, 4(sp)
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
add sp, sp, 32
ret

28
boot/definitions.inc Normal file
View File

@ -0,0 +1,28 @@
# The constant should match the index in the keywords array in tokenizer.s.
.equ TOKEN_PROGRAM, 1
.equ TOKEN_IMPORT, 2
.equ TOKEN_CONST, 3
.equ TOKEN_VAR, 4
.equ TOKEN_IF, 5
.equ TOKEN_THEN, 6
.equ TOKEN_ELSIF, 7
.equ TOKEN_ELSE, 8
.equ TOKEN_WHILE, 9
.equ TOKEN_DO, 10
.equ TOKEN_PROC, 11
.equ TOKEN_BEGIN, 12
.equ TOKEN_END, 13
.equ TOKEN_TYPE, 14
.equ TOKEN_RECORD, 15
.equ TOKEN_RECORD, 16
.equ TOKEN_TRUE, 17
.equ TOKEN_FASE, 18
.equ TOKEN_NIL, 19
.equ TOKEN_XOR, 20
.equ TOKEN_OR, 21
.equ TOKEN_RETURN, 22
.equ TOKEN_CAST, 23
.equ TOKEN_DEFER, 24
.equ TOKEN_CASE, 25
.equ TOKEN_OF, 26

View File

@ -4,6 +4,8 @@
# s1 - Contains the current position in the source text.
# s2 - Label counter.
.include "boot/definitions.inc"
.equ SOURCE_BUFFER_SIZE, 81920
.section .rodata
@ -55,20 +57,41 @@ source_code: .zero SOURCE_BUFFER_SIZE
.type _compile_import, @function
_compile_import:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
addi s1, s1, 6
.Lcompile_import_loop:
call _skip_comment
call _skip_spaces
call _read_token
add s1, s1, a0 # Skip the imported module name.
mv a0, s1
addi a1, sp, 0
call _tokenize_next
li t0, TOKEN_IMPORT
lw t1, 0(sp)
bne t0, t1, .Lcompile_import_end
# a0 is set from the previous _tokenize_next call. Skip the module name.
addi a1, sp, 0
call _tokenize_next
mv s1, a0
/* DEBUG
lw t0, 0(sp)
addi t0, t0, '0'
sw t0, 4(sp)
addi a0, sp, 4
li a1, 1
call _write_error*/
j .Lcompile_import_loop
.Lcompile_import_end:
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
.type _build_binary_expression, @function
@ -943,40 +966,54 @@ _compile_assembly:
addi sp, sp, 16
ret
.type _compile_program, @function
_compile_program:
.type _compile_module_declaration, @function
_compile_module_declaration:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
la a0, global_start
li a1, GLOBAL_START_SIZE
call _write_out
addi s1, s1, 8 # program\n.
# Skip "program".
call _skip_comment
mv a0, s1
addi a1, sp, 0
call _tokenize_next
mv s1, a0
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
.type _compile_constant_section, @function
_compile_constant_section:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
call _skip_comment
call _skip_spaces
mv a0, s1
addi a1, sp, 0
call _tokenize_next
li t0, TOKEN_CONST
lw t1, 0(sp)
bne t0, t1, .Lcompile_constant_section_end
mv s1, a0
la a0, section_rodata
li a1, SECTION_RODATA_SIZE
call _write_out
addi s1, s1, 6 # const\n.
.Lcompile_constant_section_item:
call _skip_spaces
lbu a0, (s1)
@ -988,9 +1025,9 @@ _compile_constant_section:
.Lcompile_constant_section_end:
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
.type _compile_constant, @function
@ -1040,17 +1077,23 @@ _compile_constant:
.type _compile_variable_section, @function
_compile_variable_section:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
mv a0, s1
addi a1, sp, 0
call _tokenize_next
li t0, TOKEN_VAR
lw t1, 0(sp)
bne t0, t1, .Lcompile_variable_section_end
mv s1, a0
la a0, section_bss
li a1, SECTION_BSS_SIZE
call _write_out
addi s1, s1, 4 # var\n.
.Lcompile_variable_section_item:
call _skip_spaces
lbu a0, (s1)
@ -1062,9 +1105,9 @@ _compile_variable_section:
.Lcompile_variable_section_end:
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
.type _compile_variable, @function
@ -1589,30 +1632,6 @@ _compile_line:
li t1, '('
beq t0, t1, .Lcompile_line_comment
li t0, 0x676f7270 # prog
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_program
li t0, 0x736e6f63 # cons
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_const
li t0, 0x0a726176 # var\n
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_var
li t0, 0x636f7270 # proc
sw t0, 12(sp)
mv a0, s1
@ -1647,14 +1666,6 @@ _compile_line:
call _is_register_identifier
bnez a0, .Lcompile_line_identifier
li t0, 0x6f706d69 # impo
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_import
li t0, 0x6f746f67 # goto
sw t0, 12(sp)
mv a0, s1
@ -1704,10 +1715,6 @@ _compile_line:
call _compile_goto
j .Lcompile_line_section
.Lcompile_line_import:
call _compile_import
j .Lcompile_line_section
.Lcompile_line_identifier:
call _compile_identifier
j .Lcompile_line_section
@ -1725,10 +1732,6 @@ _compile_line:
li a0, 1
j .Lcompile_line_end
.Lcompile_line_const:
call _compile_constant_section
j .Lcompile_line_section
.Lcompile_line_procedure:
lw a1, 16(sp)
bnez a1, .Lcompile_line_compile_procedure
@ -1738,14 +1741,6 @@ _compile_line:
li a0, 1
j .Lcompile_line_end
.Lcompile_line_var:
call _compile_variable_section
j .Lcompile_line_section
.Lcompile_line_program:
call _compile_program
j .Lcompile_line_section
.Lcompile_line_comment:
lw a0, 20(sp)
call _skip_comment
@ -1864,6 +1859,11 @@ _compile:
sw zero, 4(sp) # Whether the text section header was already emitted.
call _compile_module_declaration
call _compile_import
call _compile_constant_section
call _compile_variable_section
.Lcompile_do:
lbu t0, (s1) # t0 = Current character.
beqz t0, .Lcompile_end # Exit the loop on the NUL character.
@ -1913,7 +1913,6 @@ _start:
call _read_file
mv a0, s1
call _tokenize
call _main
call _compile

View File

@ -1,4 +1,4 @@
.global _tokenize, classification, transitions
.global _tokenize_next, classification, transitions, keywords
.section .rodata
@ -24,8 +24,10 @@
.equ CLASS_X, 0x0d
.equ CLASS_EOF, 0x0e
.equ CLASS_DOT, 0x0f
.equ CLASS_MINUS, 0x10
.equ CLASS_DOUBLE_QUOTE, 0x11
.equ CLASS_COUNT, 16
.equ CLASS_COUNT, 18
.type classification, @object
.size classification, 128
@ -64,7 +66,7 @@ classification:
.byte CLASS_INVALID # 1F US
.byte CLASS_SPACE # 20 Space
.byte CLASS_SINGLE # 21 !
.byte 0x00 # 22 "
.byte CLASS_DOUBLE_QUOTE # 22 "
.byte 0x00 # 23 #
.byte 0x00 # 24 $
.byte CLASS_SINGLE # 25 %
@ -75,7 +77,7 @@ classification:
.byte CLASS_ASTERISK # 2A *
.byte CLASS_SINGLE # 2B +
.byte CLASS_SINGLE # 2C ,
.byte 0x00 # 2D -
.byte CLASS_MINUS # 2D -
.byte CLASS_DOT # 2E .
.byte CLASS_SINGLE # 2F /
.byte CLASS_ZERO # 30 0
@ -159,6 +161,67 @@ classification:
.byte CLASS_SINGLE # 7E ~
.byte CLASS_INVALID # 7F DEL
#
# Textual keywords in the language.
#
.equ KEYWORDS_COUNT, 21
.type keywords, @object
keywords:
.word 7
.ascii "program"
.word 6
.ascii "import"
.word 5
.ascii "const"
.word 3
.ascii "var"
.word 2
.ascii "if"
.word 4
.ascii "then"
.word 5
.ascii "elsif"
.word 4
.ascii "else"
.word 5
.ascii "while"
.word 2
.ascii "do"
.word 4
.ascii "proc"
.word 5
.ascii "begin"
.word 3
.ascii "end"
.word 4
.ascii "type"
.word 6
.ascii "record"
.word 5
.ascii "union"
.word 4
.ascii "true"
.word 5
.ascii "false"
.word 3
.ascii "nil"
.word 3
.ascii "xor"
.word 2
.ascii "or"
.word 6
.ascii "return"
.word 4
.ascii "cast"
.word 5
.ascii "defer"
.word 4
.ascii "case"
.word 2
.ascii "of"
.size keywords, . - keywords
.section .data
# The transition table describes transitions from one state to another, given
@ -173,58 +236,82 @@ classification:
# It specifies the target state. "ff" means that this is an end state and no
# transition is possible.
# - The next byte is the action that should be performed when transitioning.
# For the meaning of actions see labels in the _analyze_token function, which
# For the meaning of actions see labels in the _tokenize_next function, which
# handles each action.
#
.type transitions, @object
.size transitions, 13 * CLASS_COUNT # state count * CLASS_COUNT
.size transitions, 17 * CLASS_COUNT # state count * CLASS_COUNT
transitions:
# Invalid Digit Alpha Space : = ( )
# * _ Single Hex 0 x NUL .
# - "
.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x0105, 0x0106, 0x0107
.word 0x0108, 0x0102, 0x010b, 0x0102, 0x010c, 0x0102, 0x00ff, 0x010e # 00 Start
.word 0x010f, 0x0110
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x0104, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 01 Colon
.word 0x02ff, 0x02ff
.word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff # 02 Identifier
.word 0x05ff, 0x0102, 0x0102, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff
.word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff # 02 Identifier
.word 0x05ff, 0x05ff
.word 0x02ff, 0x0103, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x0103, 0x02ff, 0x02ff, 0x02ff # 03 Integer
.word 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 04 Assign
.word 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 05 Eauals
.word 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 06 Left paren
.word 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 07 Right paren
.word 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 08 Asterisk
.word 0x02ff, 0x02ff
.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 09 Comment
.word 0x0109, 0x0109
.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 0a Closing comment
.word 0x0109, 0x0109
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0b Single character token
.word 0x02ff, 0x02ff
.word 0x02ff, 0x00ff, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x00ff, 0x010d, 0x02ff, 0x02ff # 0c Zero
.word 0x02ff, 0x02ff
.word 0x02ff, 0x010d, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x010d, 0x010d, 0x00ff, 0x2ff, 0x02ff # 0d Hexadecimal
.word 0x00ff, 0x02ff
.word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x2ff, 0x02ff # 0e Dot
.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff # 0e Dot
.word 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0f Minus
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff
.word 0x00ff, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110 # 10 Starting string.
.word 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110
.word 0x0110, 0x04ff
.section .text
@ -292,12 +379,42 @@ _next_state:
addi sp, sp, 16
ret
# Takes an identifier and checks whether it's a keyword.
#
# Parameters:
# a0 - Token length.
# a1 - Token pointer.
#
# Sets a0 to the appropriate token type.
.type _classify_identifier, @function
_classify_identifier:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
mv a2, a0
mv a3, a1
li a0, KEYWORDS_COUNT
la a1, keywords
call _strings_index
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Initializes the classification table.
#
# Paramaters:
# a0 - Source text pointer.
.type _analyze_token, @function
_analyze_token:
# a1 - A pointer for output value, the token kind. 4 Bytes.
#
# Sets a0 to the position of the next token.
.type _tokenize_next, @function
_tokenize_next:
# Prologue.
addi sp, sp, -24
sw ra, 20(sp)
@ -311,7 +428,10 @@ _analyze_token:
sw s2, 8(sp) # Preserve s2 containing the current state.
li s2, 0x00 # Initial, start state.
.Lanalyze_token_loop:
sw a1, 0(sp)
sw zero, (a1) # Initialize.
.Ltokenize_next_loop:
mv a0, s2
lbu a1, (s1)
call _next_state
@ -323,56 +443,43 @@ _analyze_token:
and t1, a0, t0 # Transition action.
srli t1, t1, 8
# Perform the provided action.
li t0, 0x01 # Accumulate action.
beq t1, t0, .Lanalyze_token_accumulate
beq t1, t0, .Ltokenize_next_accumulate
li t0, 0x02 # Print action.
beq t1, t0, .Lanalyze_token_print
beq t1, t0, .Ltokenize_next_print
li t0, 0x03 # Skip action.
beq t1, t0, .Lanalyze_token_skip
beq t1, t0, .Ltokenize_next_skip
li t0, 0x04 # Comment action.
beq t1, t0, .Lanalyze_token_comment
beq t1, t0, .Ltokenize_next_comment
/* DEBUG
mv s4, t1
addi t1, t1, '0'
sb t1, 0(sp)
li t1, ' '
sb t1, 1(sp)
addi t1, s2, '0'
sb t1, 2(sp)
addi a0, sp, 0 */
sw s1, 0(sp)
addi a0, s1, 0
li a1, 3
call _write_error
/* mv t1, s4
DEBUG */
li t0, 0x05 # Finalize identifier.
beq t1, t0, .Ltokenize_next_identifier
j .Lanalyze_token_reject
j .Ltokenize_next_reject
.Lanalyze_token_reject:
.Ltokenize_next_reject:
addi s1, s1, 1
j .Lanalyze_token_end
j .Ltokenize_next_end
.Lanalyze_token_accumulate:
.Ltokenize_next_accumulate:
addi s1, s1, 1
j .Lanalyze_token_loop
j .Ltokenize_next_loop
.Lanalyze_token_skip:
.Ltokenize_next_skip:
addi s1, s1, 1
lw t0, 4(sp)
addi t0, t0, 1
sw t0, 4(sp)
j .Lanalyze_token_loop
j .Ltokenize_next_loop
.Lanalyze_token_print:
.Ltokenize_next_print:
/* DEBUG
lw a0, 4(sp)
mv a1, s1
@ -380,9 +487,9 @@ _analyze_token:
call _write_error
DEBUG */
j .Lanalyze_token_end
j .Ltokenize_next_end
.Lanalyze_token_comment:
.Ltokenize_next_comment:
addi s1, s1, 1
/* DEBUG
@ -392,9 +499,20 @@ _analyze_token:
call _write_error
DEBUG */
j .Lanalyze_token_end
j .Ltokenize_next_end
.Lanalyze_token_end:
.Ltokenize_next_identifier:
# An identifier can be a textual keyword.
# Check the kind of the token and write it into the output parameter.
lw a1, 4(sp)
sub a0, s1, a1
call _classify_identifier
lw a1, 0(sp)
sw a0, (a1)
j .Ltokenize_next_end
.Ltokenize_next_end:
mv a0, s1 # Return the advanced text pointer.
# Restore saved registers.
@ -406,27 +524,3 @@ _analyze_token:
lw s0, 16(sp)
addi sp, sp, 24
ret
# Initializes the lookup tables.
#
# Parameters:
# a0 - Source text pointer.
.type _tokenize, @function
_tokenize:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
.Ltokenize_loop:
call _analyze_token
lw t0, (a0)
bnez t0, .Ltokenize_loop
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret