Properly tokenize declaration sections
This commit is contained in:
parent
768821c689
commit
dcfd6b1515
22
Rakefile
22
Rakefile
@ -36,11 +36,17 @@ end
|
||||
|
||||
directory 'build'
|
||||
|
||||
desc 'Initial stage'
|
||||
file 'build/stage1' => ['boot/stage1.s', 'boot/common-boot.s', 'boot/tokenizer.s', 'build'] do |t|
|
||||
source = t.prerequisites.filter { |prerequisite| prerequisite.end_with? '.s' }
|
||||
Dir.glob('boot/*.s').each do |assembly_source|
|
||||
target_object = Pathname.new('build') + Pathname.new(assembly_source).basename.sub_ext('.o')
|
||||
|
||||
sh CROSS_GCC, '-nostdlib', '-o', t.name, *source
|
||||
file target_object.to_s => [assembly_source, 'build'] do |t|
|
||||
sh CROSS_GCC, '-c', '-o', t.name, assembly_source
|
||||
end
|
||||
end
|
||||
|
||||
desc 'Initial stage'
|
||||
file 'build/stage1' => ['build/tokenizer.o', 'build/stage1.o', 'build/common-boot.o'] do |t|
|
||||
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
|
||||
end
|
||||
|
||||
file 'build/stage2a.s' => ['build/stage1', 'boot/stage2.elna'] do |t|
|
||||
@ -51,9 +57,11 @@ file 'build/stage2a.s' => ['build/stage1', 'boot/stage2.elna'] do |t|
|
||||
end
|
||||
end
|
||||
|
||||
file 'build/stage2a' => ['build/stage2a.s', 'boot/common-boot.s'] do |t|
|
||||
['build/stage2a', 'build/stage2b'].each do |exe|
|
||||
file exe => [exe.ext('.s'), 'build/common-boot.o'] do |t|
|
||||
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
|
||||
end
|
||||
end
|
||||
|
||||
file 'build/stage2b.s' => ['build/stage2a', 'boot/stage2.elna'] do |t|
|
||||
source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
|
||||
@ -62,7 +70,3 @@ file 'build/stage2b.s' => ['build/stage2a', 'boot/stage2.elna'] do |t|
|
||||
assemble_stage output, exe, source
|
||||
end
|
||||
end
|
||||
|
||||
file 'build/stage2b' => ['build/stage2b.s', 'boot/common-boot.s'] do |t|
|
||||
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
|
||||
end
|
||||
|
@ -2,6 +2,7 @@
|
||||
.global _write_out, _read_file, _write_error, _put_char, _printi
|
||||
.global _get, _memcmp, _memchr, _memmem, _memcpy
|
||||
.global _divide_by_zero_error, _exit
|
||||
.global _strings_index
|
||||
|
||||
.section .rodata
|
||||
|
||||
@ -424,3 +425,69 @@ _memcpy:
|
||||
.Lmemcpy_end:
|
||||
mv a0, t0
|
||||
ret
|
||||
|
||||
# Searches for a string in a string array.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Number of elements in the string array.
|
||||
# a1 - String array.
|
||||
# a2 - Needle length.
|
||||
# a3 - Needle.
|
||||
#
|
||||
# Sets a0 to the 1-based index of the needle in the haystack or to 0 if the
|
||||
# element could not be found.
|
||||
.type _strings_index, @function
|
||||
_strings_index:
|
||||
# Prologue.
|
||||
addi sp, sp, -32
|
||||
sw ra, 28(sp)
|
||||
sw s0, 24(sp)
|
||||
addi s0, sp, 32
|
||||
|
||||
sw s1, 20(sp)
|
||||
mv s1, a0
|
||||
sw s2, 16(sp)
|
||||
mv s2, a1
|
||||
sw s3, 12(sp)
|
||||
mv s3, a2
|
||||
sw s4, 8(sp)
|
||||
mv s4, a3
|
||||
sw s5, 4(sp)
|
||||
li s5, 0 # Index counter.
|
||||
|
||||
.Lstrings_index_loop:
|
||||
addi s5, s5, 1
|
||||
beqz s1, .Lstrings_index_missing
|
||||
|
||||
lw a2, (s2) # Read the length of the current element in the haystack.
|
||||
bne a2, s3, .Lstrings_index_next # Lengths don't match, skip the iteration.
|
||||
|
||||
addi a0, s2, 4
|
||||
mv a1, s4
|
||||
call _memcmp
|
||||
|
||||
beqz a0, .Lstrings_index_end
|
||||
|
||||
.Lstrings_index_next:
|
||||
addi s2, s2, 4
|
||||
add s2, s2, a2
|
||||
addi s1, s1, -1
|
||||
j .Lstrings_index_loop
|
||||
|
||||
.Lstrings_index_missing:
|
||||
li s5, 0
|
||||
|
||||
.Lstrings_index_end:
|
||||
mv a0, s5
|
||||
|
||||
lw s1, 20(sp)
|
||||
lw s2, 16(sp)
|
||||
lw s3, 12(sp)
|
||||
lw s4, 8(sp)
|
||||
lw s5, 4(sp)
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 28(sp)
|
||||
lw s0, 24(sp)
|
||||
add sp, sp, 32
|
||||
ret
|
||||
|
28
boot/definitions.inc
Normal file
28
boot/definitions.inc
Normal file
@ -0,0 +1,28 @@
|
||||
# The constant should match the index in the keywords array in tokenizer.s.
|
||||
|
||||
.equ TOKEN_PROGRAM, 1
|
||||
.equ TOKEN_IMPORT, 2
|
||||
.equ TOKEN_CONST, 3
|
||||
.equ TOKEN_VAR, 4
|
||||
.equ TOKEN_IF, 5
|
||||
.equ TOKEN_THEN, 6
|
||||
.equ TOKEN_ELSIF, 7
|
||||
.equ TOKEN_ELSE, 8
|
||||
.equ TOKEN_WHILE, 9
|
||||
.equ TOKEN_DO, 10
|
||||
.equ TOKEN_PROC, 11
|
||||
.equ TOKEN_BEGIN, 12
|
||||
.equ TOKEN_END, 13
|
||||
.equ TOKEN_TYPE, 14
|
||||
.equ TOKEN_RECORD, 15
|
||||
.equ TOKEN_RECORD, 16
|
||||
.equ TOKEN_TRUE, 17
|
||||
.equ TOKEN_FASE, 18
|
||||
.equ TOKEN_NIL, 19
|
||||
.equ TOKEN_XOR, 20
|
||||
.equ TOKEN_OR, 21
|
||||
.equ TOKEN_RETURN, 22
|
||||
.equ TOKEN_CAST, 23
|
||||
.equ TOKEN_DEFER, 24
|
||||
.equ TOKEN_CASE, 25
|
||||
.equ TOKEN_OF, 26
|
173
boot/stage1.s
173
boot/stage1.s
@ -4,6 +4,8 @@
|
||||
# s1 - Contains the current position in the source text.
|
||||
# s2 - Label counter.
|
||||
|
||||
.include "boot/definitions.inc"
|
||||
|
||||
.equ SOURCE_BUFFER_SIZE, 81920
|
||||
|
||||
.section .rodata
|
||||
@ -55,20 +57,41 @@ source_code: .zero SOURCE_BUFFER_SIZE
|
||||
.type _compile_import, @function
|
||||
_compile_import:
|
||||
# Prologue.
|
||||
addi sp, sp, -8
|
||||
sw ra, 4(sp)
|
||||
sw s0, 0(sp)
|
||||
addi s0, sp, 8
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
addi s1, s1, 6
|
||||
.Lcompile_import_loop:
|
||||
call _skip_comment
|
||||
call _skip_spaces
|
||||
call _read_token
|
||||
add s1, s1, a0 # Skip the imported module name.
|
||||
|
||||
mv a0, s1
|
||||
addi a1, sp, 0
|
||||
call _tokenize_next
|
||||
li t0, TOKEN_IMPORT
|
||||
lw t1, 0(sp)
|
||||
bne t0, t1, .Lcompile_import_end
|
||||
# a0 is set from the previous _tokenize_next call. Skip the module name.
|
||||
addi a1, sp, 0
|
||||
call _tokenize_next
|
||||
mv s1, a0
|
||||
|
||||
/* DEBUG
|
||||
lw t0, 0(sp)
|
||||
addi t0, t0, '0'
|
||||
sw t0, 4(sp)
|
||||
addi a0, sp, 4
|
||||
li a1, 1
|
||||
call _write_error*/
|
||||
|
||||
j .Lcompile_import_loop
|
||||
|
||||
.Lcompile_import_end:
|
||||
# Epilogue.
|
||||
lw ra, 4(sp)
|
||||
lw s0, 0(sp)
|
||||
addi sp, sp, 8
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
.type _build_binary_expression, @function
|
||||
@ -943,40 +966,54 @@ _compile_assembly:
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
.type _compile_program, @function
|
||||
_compile_program:
|
||||
.type _compile_module_declaration, @function
|
||||
_compile_module_declaration:
|
||||
# Prologue.
|
||||
addi sp, sp, -8
|
||||
sw ra, 4(sp)
|
||||
sw s0, 0(sp)
|
||||
addi s0, sp, 8
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
la a0, global_start
|
||||
li a1, GLOBAL_START_SIZE
|
||||
call _write_out
|
||||
|
||||
addi s1, s1, 8 # program\n.
|
||||
# Skip "program".
|
||||
call _skip_comment
|
||||
mv a0, s1
|
||||
addi a1, sp, 0
|
||||
call _tokenize_next
|
||||
mv s1, a0
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 4(sp)
|
||||
lw s0, 0(sp)
|
||||
addi sp, sp, 8
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
.type _compile_constant_section, @function
|
||||
_compile_constant_section:
|
||||
# Prologue.
|
||||
addi sp, sp, -8
|
||||
sw ra, 4(sp)
|
||||
sw s0, 0(sp)
|
||||
addi s0, sp, 8
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
call _skip_comment
|
||||
call _skip_spaces
|
||||
|
||||
mv a0, s1
|
||||
addi a1, sp, 0
|
||||
call _tokenize_next
|
||||
li t0, TOKEN_CONST
|
||||
lw t1, 0(sp)
|
||||
bne t0, t1, .Lcompile_constant_section_end
|
||||
mv s1, a0
|
||||
|
||||
la a0, section_rodata
|
||||
li a1, SECTION_RODATA_SIZE
|
||||
call _write_out
|
||||
|
||||
addi s1, s1, 6 # const\n.
|
||||
|
||||
.Lcompile_constant_section_item:
|
||||
call _skip_spaces
|
||||
lbu a0, (s1)
|
||||
@ -988,9 +1025,9 @@ _compile_constant_section:
|
||||
|
||||
.Lcompile_constant_section_end:
|
||||
# Epilogue.
|
||||
lw ra, 4(sp)
|
||||
lw s0, 0(sp)
|
||||
addi sp, sp, 8
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
.type _compile_constant, @function
|
||||
@ -1040,17 +1077,23 @@ _compile_constant:
|
||||
.type _compile_variable_section, @function
|
||||
_compile_variable_section:
|
||||
# Prologue.
|
||||
addi sp, sp, -8
|
||||
sw ra, 4(sp)
|
||||
sw s0, 0(sp)
|
||||
addi s0, sp, 8
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
mv a0, s1
|
||||
addi a1, sp, 0
|
||||
call _tokenize_next
|
||||
li t0, TOKEN_VAR
|
||||
lw t1, 0(sp)
|
||||
bne t0, t1, .Lcompile_variable_section_end
|
||||
mv s1, a0
|
||||
|
||||
la a0, section_bss
|
||||
li a1, SECTION_BSS_SIZE
|
||||
call _write_out
|
||||
|
||||
addi s1, s1, 4 # var\n.
|
||||
|
||||
.Lcompile_variable_section_item:
|
||||
call _skip_spaces
|
||||
lbu a0, (s1)
|
||||
@ -1062,9 +1105,9 @@ _compile_variable_section:
|
||||
|
||||
.Lcompile_variable_section_end:
|
||||
# Epilogue.
|
||||
lw ra, 4(sp)
|
||||
lw s0, 0(sp)
|
||||
addi sp, sp, 8
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
.type _compile_variable, @function
|
||||
@ -1589,30 +1632,6 @@ _compile_line:
|
||||
li t1, '('
|
||||
beq t0, t1, .Lcompile_line_comment
|
||||
|
||||
li t0, 0x676f7270 # prog
|
||||
sw t0, 12(sp)
|
||||
mv a0, s1
|
||||
addi a1, sp, 12
|
||||
li a2, 4
|
||||
call _memcmp
|
||||
beqz a0, .Lcompile_line_program
|
||||
|
||||
li t0, 0x736e6f63 # cons
|
||||
sw t0, 12(sp)
|
||||
mv a0, s1
|
||||
addi a1, sp, 12
|
||||
li a2, 4
|
||||
call _memcmp
|
||||
beqz a0, .Lcompile_line_const
|
||||
|
||||
li t0, 0x0a726176 # var\n
|
||||
sw t0, 12(sp)
|
||||
mv a0, s1
|
||||
addi a1, sp, 12
|
||||
li a2, 4
|
||||
call _memcmp
|
||||
beqz a0, .Lcompile_line_var
|
||||
|
||||
li t0, 0x636f7270 # proc
|
||||
sw t0, 12(sp)
|
||||
mv a0, s1
|
||||
@ -1647,14 +1666,6 @@ _compile_line:
|
||||
call _is_register_identifier
|
||||
bnez a0, .Lcompile_line_identifier
|
||||
|
||||
li t0, 0x6f706d69 # impo
|
||||
sw t0, 12(sp)
|
||||
mv a0, s1
|
||||
addi a1, sp, 12
|
||||
li a2, 4
|
||||
call _memcmp
|
||||
beqz a0, .Lcompile_line_import
|
||||
|
||||
li t0, 0x6f746f67 # goto
|
||||
sw t0, 12(sp)
|
||||
mv a0, s1
|
||||
@ -1704,10 +1715,6 @@ _compile_line:
|
||||
call _compile_goto
|
||||
j .Lcompile_line_section
|
||||
|
||||
.Lcompile_line_import:
|
||||
call _compile_import
|
||||
j .Lcompile_line_section
|
||||
|
||||
.Lcompile_line_identifier:
|
||||
call _compile_identifier
|
||||
j .Lcompile_line_section
|
||||
@ -1725,10 +1732,6 @@ _compile_line:
|
||||
li a0, 1
|
||||
j .Lcompile_line_end
|
||||
|
||||
.Lcompile_line_const:
|
||||
call _compile_constant_section
|
||||
j .Lcompile_line_section
|
||||
|
||||
.Lcompile_line_procedure:
|
||||
lw a1, 16(sp)
|
||||
bnez a1, .Lcompile_line_compile_procedure
|
||||
@ -1738,14 +1741,6 @@ _compile_line:
|
||||
li a0, 1
|
||||
j .Lcompile_line_end
|
||||
|
||||
.Lcompile_line_var:
|
||||
call _compile_variable_section
|
||||
j .Lcompile_line_section
|
||||
|
||||
.Lcompile_line_program:
|
||||
call _compile_program
|
||||
j .Lcompile_line_section
|
||||
|
||||
.Lcompile_line_comment:
|
||||
lw a0, 20(sp)
|
||||
call _skip_comment
|
||||
@ -1864,6 +1859,11 @@ _compile:
|
||||
|
||||
sw zero, 4(sp) # Whether the text section header was already emitted.
|
||||
|
||||
call _compile_module_declaration
|
||||
call _compile_import
|
||||
call _compile_constant_section
|
||||
call _compile_variable_section
|
||||
|
||||
.Lcompile_do:
|
||||
lbu t0, (s1) # t0 = Current character.
|
||||
beqz t0, .Lcompile_end # Exit the loop on the NUL character.
|
||||
@ -1913,7 +1913,6 @@ _start:
|
||||
call _read_file
|
||||
|
||||
mv a0, s1
|
||||
call _tokenize
|
||||
call _main
|
||||
call _compile
|
||||
|
||||
|
230
boot/tokenizer.s
230
boot/tokenizer.s
@ -1,4 +1,4 @@
|
||||
.global _tokenize, classification, transitions
|
||||
.global _tokenize_next, classification, transitions, keywords
|
||||
|
||||
.section .rodata
|
||||
|
||||
@ -24,8 +24,10 @@
|
||||
.equ CLASS_X, 0x0d
|
||||
.equ CLASS_EOF, 0x0e
|
||||
.equ CLASS_DOT, 0x0f
|
||||
.equ CLASS_MINUS, 0x10
|
||||
.equ CLASS_DOUBLE_QUOTE, 0x11
|
||||
|
||||
.equ CLASS_COUNT, 16
|
||||
.equ CLASS_COUNT, 18
|
||||
|
||||
.type classification, @object
|
||||
.size classification, 128
|
||||
@ -64,7 +66,7 @@ classification:
|
||||
.byte CLASS_INVALID # 1F US
|
||||
.byte CLASS_SPACE # 20 Space
|
||||
.byte CLASS_SINGLE # 21 !
|
||||
.byte 0x00 # 22 "
|
||||
.byte CLASS_DOUBLE_QUOTE # 22 "
|
||||
.byte 0x00 # 23 #
|
||||
.byte 0x00 # 24 $
|
||||
.byte CLASS_SINGLE # 25 %
|
||||
@ -75,7 +77,7 @@ classification:
|
||||
.byte CLASS_ASTERISK # 2A *
|
||||
.byte CLASS_SINGLE # 2B +
|
||||
.byte CLASS_SINGLE # 2C ,
|
||||
.byte 0x00 # 2D -
|
||||
.byte CLASS_MINUS # 2D -
|
||||
.byte CLASS_DOT # 2E .
|
||||
.byte CLASS_SINGLE # 2F /
|
||||
.byte CLASS_ZERO # 30 0
|
||||
@ -159,6 +161,67 @@ classification:
|
||||
.byte CLASS_SINGLE # 7E ~
|
||||
.byte CLASS_INVALID # 7F DEL
|
||||
|
||||
#
|
||||
# Textual keywords in the language.
|
||||
#
|
||||
.equ KEYWORDS_COUNT, 21
|
||||
|
||||
.type keywords, @object
|
||||
keywords:
|
||||
.word 7
|
||||
.ascii "program"
|
||||
.word 6
|
||||
.ascii "import"
|
||||
.word 5
|
||||
.ascii "const"
|
||||
.word 3
|
||||
.ascii "var"
|
||||
.word 2
|
||||
.ascii "if"
|
||||
.word 4
|
||||
.ascii "then"
|
||||
.word 5
|
||||
.ascii "elsif"
|
||||
.word 4
|
||||
.ascii "else"
|
||||
.word 5
|
||||
.ascii "while"
|
||||
.word 2
|
||||
.ascii "do"
|
||||
.word 4
|
||||
.ascii "proc"
|
||||
.word 5
|
||||
.ascii "begin"
|
||||
.word 3
|
||||
.ascii "end"
|
||||
.word 4
|
||||
.ascii "type"
|
||||
.word 6
|
||||
.ascii "record"
|
||||
.word 5
|
||||
.ascii "union"
|
||||
.word 4
|
||||
.ascii "true"
|
||||
.word 5
|
||||
.ascii "false"
|
||||
.word 3
|
||||
.ascii "nil"
|
||||
.word 3
|
||||
.ascii "xor"
|
||||
.word 2
|
||||
.ascii "or"
|
||||
.word 6
|
||||
.ascii "return"
|
||||
.word 4
|
||||
.ascii "cast"
|
||||
.word 5
|
||||
.ascii "defer"
|
||||
.word 4
|
||||
.ascii "case"
|
||||
.word 2
|
||||
.ascii "of"
|
||||
.size keywords, . - keywords
|
||||
|
||||
.section .data
|
||||
|
||||
# The transition table describes transitions from one state to another, given
|
||||
@ -173,58 +236,82 @@ classification:
|
||||
# It specifies the target state. "ff" means that this is an end state and no
|
||||
# transition is possible.
|
||||
# - The next byte is the action that should be performed when transitioning.
|
||||
# For the meaning of actions see labels in the _analyze_token function, which
|
||||
# For the meaning of actions see labels in the _tokenize_next function, which
|
||||
# handles each action.
|
||||
#
|
||||
.type transitions, @object
|
||||
.size transitions, 13 * CLASS_COUNT # state count * CLASS_COUNT
|
||||
.size transitions, 17 * CLASS_COUNT # state count * CLASS_COUNT
|
||||
transitions:
|
||||
# Invalid Digit Alpha Space : = ( )
|
||||
# * _ Single Hex 0 x NUL .
|
||||
# - "
|
||||
.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x0105, 0x0106, 0x0107
|
||||
.word 0x0108, 0x0102, 0x010b, 0x0102, 0x010c, 0x0102, 0x00ff, 0x010e # 00 Start
|
||||
.word 0x010f, 0x0110
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x0104, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 01 Colon
|
||||
.word 0x02ff, 0x02ff
|
||||
|
||||
.word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff # 02 Identifier
|
||||
.word 0x05ff, 0x0102, 0x0102, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff
|
||||
.word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff # 02 Identifier
|
||||
.word 0x05ff, 0x05ff
|
||||
|
||||
.word 0x02ff, 0x0103, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x0103, 0x02ff, 0x02ff, 0x02ff # 03 Integer
|
||||
.word 0x02ff, 0x02ff
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 04 Assign
|
||||
.word 0x02ff, 0x02ff
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 05 Eauals
|
||||
.word 0x02ff, 0x02ff
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 06 Left paren
|
||||
.word 0x02ff, 0x02ff
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 07 Right paren
|
||||
.word 0x02ff, 0x02ff
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 08 Asterisk
|
||||
.word 0x02ff, 0x02ff
|
||||
|
||||
.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
|
||||
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 09 Comment
|
||||
.word 0x0109, 0x0109
|
||||
|
||||
.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff
|
||||
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 0a Closing comment
|
||||
.word 0x0109, 0x0109
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0b Single character token
|
||||
.word 0x02ff, 0x02ff
|
||||
|
||||
.word 0x02ff, 0x00ff, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x00ff, 0x010d, 0x02ff, 0x02ff # 0c Zero
|
||||
.word 0x02ff, 0x02ff
|
||||
|
||||
.word 0x02ff, 0x010d, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x010d, 0x010d, 0x00ff, 0x2ff, 0x02ff # 0d Hexadecimal
|
||||
.word 0x00ff, 0x02ff
|
||||
|
||||
.word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x2ff, 0x02ff # 0e Dot
|
||||
.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff # 0e Dot
|
||||
.word 0x02ff, 0x02ff
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0f Minus
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff
|
||||
|
||||
.word 0x00ff, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110 # 10 Starting string.
|
||||
.word 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110
|
||||
.word 0x0110, 0x04ff
|
||||
|
||||
.section .text
|
||||
|
||||
@ -292,12 +379,42 @@ _next_state:
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
# Takes an identifier and checks whether it's a keyword.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Token length.
|
||||
# a1 - Token pointer.
|
||||
#
|
||||
# Sets a0 to the appropriate token type.
|
||||
.type _classify_identifier, @function
|
||||
_classify_identifier:
|
||||
# Prologue.
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
mv a2, a0
|
||||
mv a3, a1
|
||||
li a0, KEYWORDS_COUNT
|
||||
la a1, keywords
|
||||
call _strings_index
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
# Initializes the classification table.
|
||||
#
|
||||
# Paramaters:
|
||||
# a0 - Source text pointer.
|
||||
.type _analyze_token, @function
|
||||
_analyze_token:
|
||||
# a1 - A pointer for output value, the token kind. 4 Bytes.
|
||||
#
|
||||
# Sets a0 to the position of the next token.
|
||||
.type _tokenize_next, @function
|
||||
_tokenize_next:
|
||||
# Prologue.
|
||||
addi sp, sp, -24
|
||||
sw ra, 20(sp)
|
||||
@ -311,7 +428,10 @@ _analyze_token:
|
||||
sw s2, 8(sp) # Preserve s2 containing the current state.
|
||||
li s2, 0x00 # Initial, start state.
|
||||
|
||||
.Lanalyze_token_loop:
|
||||
sw a1, 0(sp)
|
||||
sw zero, (a1) # Initialize.
|
||||
|
||||
.Ltokenize_next_loop:
|
||||
mv a0, s2
|
||||
lbu a1, (s1)
|
||||
call _next_state
|
||||
@ -323,56 +443,43 @@ _analyze_token:
|
||||
and t1, a0, t0 # Transition action.
|
||||
srli t1, t1, 8
|
||||
|
||||
|
||||
# Perform the provided action.
|
||||
li t0, 0x01 # Accumulate action.
|
||||
beq t1, t0, .Lanalyze_token_accumulate
|
||||
beq t1, t0, .Ltokenize_next_accumulate
|
||||
|
||||
li t0, 0x02 # Print action.
|
||||
beq t1, t0, .Lanalyze_token_print
|
||||
beq t1, t0, .Ltokenize_next_print
|
||||
|
||||
li t0, 0x03 # Skip action.
|
||||
beq t1, t0, .Lanalyze_token_skip
|
||||
beq t1, t0, .Ltokenize_next_skip
|
||||
|
||||
li t0, 0x04 # Comment action.
|
||||
beq t1, t0, .Lanalyze_token_comment
|
||||
beq t1, t0, .Ltokenize_next_comment
|
||||
|
||||
/* DEBUG
|
||||
mv s4, t1
|
||||
addi t1, t1, '0'
|
||||
sb t1, 0(sp)
|
||||
li t1, ' '
|
||||
sb t1, 1(sp)
|
||||
addi t1, s2, '0'
|
||||
sb t1, 2(sp)
|
||||
addi a0, sp, 0 */
|
||||
sw s1, 0(sp)
|
||||
addi a0, s1, 0
|
||||
li a1, 3
|
||||
call _write_error
|
||||
/* mv t1, s4
|
||||
DEBUG */
|
||||
li t0, 0x05 # Finalize identifier.
|
||||
beq t1, t0, .Ltokenize_next_identifier
|
||||
|
||||
j .Lanalyze_token_reject
|
||||
j .Ltokenize_next_reject
|
||||
|
||||
.Lanalyze_token_reject:
|
||||
.Ltokenize_next_reject:
|
||||
addi s1, s1, 1
|
||||
|
||||
j .Lanalyze_token_end
|
||||
j .Ltokenize_next_end
|
||||
|
||||
.Lanalyze_token_accumulate:
|
||||
.Ltokenize_next_accumulate:
|
||||
addi s1, s1, 1
|
||||
|
||||
j .Lanalyze_token_loop
|
||||
j .Ltokenize_next_loop
|
||||
|
||||
.Lanalyze_token_skip:
|
||||
.Ltokenize_next_skip:
|
||||
addi s1, s1, 1
|
||||
lw t0, 4(sp)
|
||||
addi t0, t0, 1
|
||||
sw t0, 4(sp)
|
||||
|
||||
j .Lanalyze_token_loop
|
||||
j .Ltokenize_next_loop
|
||||
|
||||
.Lanalyze_token_print:
|
||||
.Ltokenize_next_print:
|
||||
/* DEBUG
|
||||
lw a0, 4(sp)
|
||||
mv a1, s1
|
||||
@ -380,9 +487,9 @@ _analyze_token:
|
||||
call _write_error
|
||||
DEBUG */
|
||||
|
||||
j .Lanalyze_token_end
|
||||
j .Ltokenize_next_end
|
||||
|
||||
.Lanalyze_token_comment:
|
||||
.Ltokenize_next_comment:
|
||||
addi s1, s1, 1
|
||||
|
||||
/* DEBUG
|
||||
@ -392,9 +499,20 @@ _analyze_token:
|
||||
call _write_error
|
||||
DEBUG */
|
||||
|
||||
j .Lanalyze_token_end
|
||||
j .Ltokenize_next_end
|
||||
|
||||
.Lanalyze_token_end:
|
||||
.Ltokenize_next_identifier:
|
||||
# An identifier can be a textual keyword.
|
||||
# Check the kind of the token and write it into the output parameter.
|
||||
lw a1, 4(sp)
|
||||
sub a0, s1, a1
|
||||
call _classify_identifier
|
||||
lw a1, 0(sp)
|
||||
sw a0, (a1)
|
||||
|
||||
j .Ltokenize_next_end
|
||||
|
||||
.Ltokenize_next_end:
|
||||
mv a0, s1 # Return the advanced text pointer.
|
||||
|
||||
# Restore saved registers.
|
||||
@ -406,27 +524,3 @@ _analyze_token:
|
||||
lw s0, 16(sp)
|
||||
addi sp, sp, 24
|
||||
ret
|
||||
|
||||
# Initializes the lookup tables.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Source text pointer.
|
||||
.type _tokenize, @function
|
||||
_tokenize:
|
||||
# Prologue.
|
||||
addi sp, sp, -8
|
||||
sw ra, 4(sp)
|
||||
sw s0, 0(sp)
|
||||
addi s0, sp, 8
|
||||
|
||||
.Ltokenize_loop:
|
||||
call _analyze_token
|
||||
|
||||
lw t0, (a0)
|
||||
bnez t0, .Ltokenize_loop
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 4(sp)
|
||||
lw s0, 0(sp)
|
||||
addi sp, sp, 8
|
||||
ret
|
||||
|
Loading…
x
Reference in New Issue
Block a user