summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--boot/common-boot.s5
-rw-r--r--boot/definitions.inc39
-rw-r--r--boot/stage1.s484
-rw-r--r--boot/tokenizer.s65
4 files changed, 243 insertions, 350 deletions
diff --git a/boot/common-boot.s b/boot/common-boot.s
index a6fb04e..2a192d3 100644
--- a/boot/common-boot.s
+++ b/boot/common-boot.s
@@ -425,6 +425,9 @@ _memcpy:
addi a0, a0, 1
addi a1, a1, 1
+ addi a2, a2, -1
+
+ j .Lmemcpy_loop
.Lmemcpy_end:
mv a0, t0
@@ -473,6 +476,8 @@ _strings_index:
beqz a0, .Lstrings_index_end
.Lstrings_index_next:
+ # Advance the pointer, reduce the length.
+ lw a2, (s2)
addi s2, s2, 4
add s2, s2, a2
addi s1, s1, -1
diff --git a/boot/definitions.inc b/boot/definitions.inc
index 4d8ab9c..d61a5f6 100644
--- a/boot/definitions.inc
+++ b/boot/definitions.inc
@@ -19,34 +19,35 @@
.equ TOKEN_END, 13
.equ TOKEN_TYPE, 14
.equ TOKEN_RECORD, 15
-.equ TOKEN_RECORD, 16
+.equ TOKEN_UNION, 16
.equ TOKEN_TRUE, 17
-.equ TOKEN_FASE, 18
+.equ TOKEN_FALSE, 18
.equ TOKEN_NIL, 19
.equ TOKEN_XOR, 20
.equ TOKEN_OR, 21
.equ TOKEN_RETURN, 22
.equ TOKEN_CAST, 23
-.equ TOKEN_DEFER, 24
+.equ TOKEN_GOTO, 24
.equ TOKEN_CASE, 25
.equ TOKEN_OF, 26
+.equ TOKEN_IDENTIFIER, 27
# The constant should match the character index in the byte_keywords string.
-.equ TOKEN_AND, 27
-.equ TOKEN_DOT, 28
-.equ TOKEN_COMMA, 29
-.equ TOKEN_COLON, 30
-.equ TOKEN_SEMICOLON, 31
-.equ TOKEN_LEFT_PAREN, 32
-.equ TOKEN_RIGHT_PAREN, 33
-.equ TOKEN_LEFT_BRACKET, 34
-.equ TOKEN_RIGHT_BRACKET, 35
-.equ TOKEN_HAT, 36
-.equ TOKEN_EQUALS, 37
-.equ TOKEN_PLUS, 38
-.equ TOKEN_MINUS, 39
-.equ TOKEN_ASTERISK, 40
-.equ TOKEN_AT, 41
+.equ TOKEN_AND, 28
+.equ TOKEN_DOT, 29
+.equ TOKEN_COMMA, 30
+.equ TOKEN_COLON, 31
+.equ TOKEN_SEMICOLON, 32
+.equ TOKEN_LEFT_PAREN, 33
+.equ TOKEN_RIGHT_PAREN, 34
+.equ TOKEN_LEFT_BRACKET, 35
+.equ TOKEN_RIGHT_BRACKET, 36
+.equ TOKEN_HAT, 37
+.equ TOKEN_EQUALS, 38
+.equ TOKEN_PLUS, 39
+.equ TOKEN_MINUS, 40
+.equ TOKEN_ASTERISK, 41
+.equ TOKEN_AT, 42
-.equ TOKEN_ASSIGN, 42
+.equ TOKEN_ASSIGN, 43
diff --git a/boot/stage1.s b/boot/stage1.s
index 6761bb2..c86e872 100644
--- a/boot/stage1.s
+++ b/boot/stage1.s
@@ -53,10 +53,12 @@ asm_type_object: .ascii ", @object\n"
asm_restore_parameters:
.ascii "lw a0, 60(sp)\nlw a1, 56(sp)\nlw a2, 52(sp)\nlw a3, 48(sp)\nlw a4, 44(sp)\nlw a5, 40(sp)\n"
.equ ASM_RESTORE_PARAMETERS_SIZE, . - asm_restore_parameters
+asm_preserve_parameters:
+ .ascii "sw a0, 84(sp)\nsw a1, 80(sp)\nsw a2, 76(sp)\nsw a2, 76(sp)\nsw a3, 72(sp)\nsw a4, 68(sp)\nsw a5, 64(sp)\n"
+.equ ASM_PRESERVE_PARAMETERS_SIZE, . - asm_preserve_parameters
.section .bss
.type source_code, @object
-.size source_code, SOURCE_BUFFER_SIZE
source_code: .zero SOURCE_BUFFER_SIZE
.section .text
@@ -65,10 +67,10 @@ source_code: .zero SOURCE_BUFFER_SIZE
.type _compile_import, @function
_compile_import:
# Prologue.
- addi sp, sp, -16
- sw ra, 12(sp)
- sw s0, 8(sp)
- addi s0, sp, 16
+ addi sp, sp, -24
+ sw ra, 20(sp)
+ sw s0, 16(sp)
+ addi s0, sp, 24
.Lcompile_import_loop:
call _skip_comment
@@ -89,9 +91,9 @@ _compile_import:
.Lcompile_import_end:
# Epilogue.
- lw ra, 12(sp)
- lw s0, 8(sp)
- addi sp, sp, 16
+ lw ra, 20(sp)
+ lw s0, 16(sp)
+ addi sp, sp, 24
ret
.type _build_binary_expression, @function
@@ -107,9 +109,9 @@ _build_binary_expression:
call _skip_spaces
mv a0, s1
- addi a1, sp, 16
+ addi a1, sp, 12
call _tokenize_next
- lw t0, 16(sp)
+ lw t0, 12(sp)
li t1, TOKEN_AND
beq t0, t1, .L_build_binary_expression_and
@@ -146,11 +148,6 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_and:
- /* DEBUG
- addi a0, s1, 0
- li a1, 4
- call _write_error */
-
mv s1, a0 # Skip &.
li a0, 1
call _build_expression
@@ -385,25 +382,29 @@ _compile_identifier_expression:
.type _build_expression, @function
_build_expression:
# Prologue.
- addi sp, sp, -40
- sw ra, 36(sp)
- sw s0, 32(sp)
- addi s0, sp, 40
+ addi sp, sp, -48
+ sw ra, 44(sp)
+ sw s0, 40(sp)
+ addi s0, sp, 48
addi a0, a0, '0' # Make the register number to a character.
- sw a0, 28(sp) # And save it.
+ sw a0, 36(sp) # And save it.
+
+ mv a0, s1
+ addi a1, sp, 24
+ call _tokenize_next
call _skip_spaces
call _read_token
- sw s1, 24(sp)
- sw a0, 20(sp)
+ sw s1, 32(sp)
+ sw a0, 28(sp)
- lbu a0, (s1)
- li t0, '-'
+ lw a0, 24(sp)
+
+ li t0, TOKEN_MINUS
beq a0, t0, .Lbuild_expression_negate
- lbu a0, (s1)
- li t0, '@'
+ li t0, TOKEN_AT
beq a0, t0, .Lbuild_expression_address
lbu a0, (s1)
@@ -414,8 +415,8 @@ _build_expression:
li t0, '_'
beq a0, t0, .Lbuild_expression_call
- lw a0, 20(sp)
- lw a1, 28(sp)
+ lw a0, 28(sp)
+ lw a1, 36(sp)
call _compile_identifier_expression
j .Lbuild_expression_advance
@@ -432,7 +433,7 @@ _build_expression:
j .Lbuild_expression_advance
.Lbuild_expression_address:
- lw t1, 28(sp)
+ lw t1, 36(sp)
li t0, 0x20 # _
sw t0, 16(sp)
li t0, 0x2c707320 # _sp,
@@ -449,11 +450,11 @@ _build_expression:
addi s1, s1, 1 # Skip @.
call _skip_spaces
call _read_token
- sw s1, 24(sp)
- sw a0, 20(sp)
+ sw s1, 32(sp)
+ sw a0, 28(sp)
- lw a0, 24(sp)
- lw a1, 20(sp)
+ lw a0, 32(sp)
+ lw a1, 28(sp)
addi a0, a0, 4 # Skip the "loca" variable prefix.
addi a1, a1, -4 # Skip the "loca" variable prefix.
call _write_out
@@ -464,8 +465,8 @@ _build_expression:
j .Lbuild_expression_advance
.Lbuild_expression_call:
- lw a0, 24(sp)
- lw a1, 20(sp)
+ lw a0, 32(sp)
+ lw a1, 28(sp)
add s1, s1, a1
addi s1, s1, 1
call _compile_call
@@ -473,7 +474,7 @@ _build_expression:
j .Lbuild_expression_end
.Lbuild_expression_literal:
- lw t1, 28(sp)
+ lw t1, 36(sp)
li t0, 0x00202c00 # \0,_
or t0, t0, t1
sw t0, 16(sp)
@@ -483,8 +484,8 @@ _build_expression:
li a1, 7
call _write_out
- lw a0, 24(sp)
- lw a1, 20(sp)
+ lw a0, 32(sp)
+ lw a1, 28(sp)
call _write_out
li a0, '\n'
@@ -493,14 +494,14 @@ _build_expression:
j .Lbuild_expression_advance
.Lbuild_expression_advance:
- lw a0, 20(sp)
+ lw a0, 28(sp)
add s1, s1, a0
.Lbuild_expression_end:
# Epilogue.
- lw ra, 36(sp)
- lw s0, 32(sp)
- addi sp, sp, 40
+ lw ra, 44(sp)
+ lw s0, 40(sp)
+ addi sp, sp, 48
ret
# Compiles an lvalue.
@@ -595,34 +596,20 @@ _compile_identifier:
sw s0, 24(sp)
addi s0, sp, 32
- call _read_token
-
# Save the pointer to the identifier and its length on the stack.
- sw s1, 20(sp)
- sw a0, 16(sp)
-
- add s1, s1, a0
- call _skip_spaces
- call _read_token
-
- # Save the pointer and the length of the token following the identifier.
- sw s1, 12(sp)
- sw a0, 8(sp)
+ mv a0, s1
+ addi a1, sp, 12
+ call _tokenize_next
+ addi a1, sp, 0
+ call _tokenize_next
+ mv s1, a0
- add s1, s1, a0 # Skip that token.
- call _skip_spaces
+ lw t0, 0(sp)
- li t0, 0x3d3a # :=
- sw t0, 4(sp)
- lw a0, 12(sp)
- lw a1, 8(sp)
- addi a2, sp, 4
- call _token_compare
- beqz a0, .Lcompile_identifier_assign
+ li t1, TOKEN_ASSIGN
+ beq t0, t1, .Lcompile_identifier_assign
- lw t0, 12(sp)
- lbu t0, (t0)
- li t1, 0x28 # (
+ li t1, TOKEN_LEFT_PAREN
beq t0, t1, .Lcompile_identifier_call
j .Lcompile_identifier_end
@@ -917,10 +904,10 @@ _skip_comment:
.type _compile_procedure_section, @function
_compile_procedure_section:
# Prologue.
- addi sp, sp, -16
- sw ra, 12(sp)
- sw s0, 8(sp)
- addi s0, sp, 16
+ addi sp, sp, -24
+ sw ra, 20(sp)
+ sw s0, 16(sp)
+ addi s0, sp, 24
.Lcompile_procedure_section_loop:
call _skip_spaces
@@ -928,10 +915,10 @@ _compile_procedure_section:
call _skip_spaces
mv a0, s1
- addi a1, sp, 0
+ addi a1, sp, 4
call _tokenize_next
li t0, TOKEN_PROC
- lw t1, 0(sp)
+ lw t1, 4(sp)
bne t0, t1, .Lcompile_procedure_section_end
call _compile_procedure
@@ -940,18 +927,18 @@ _compile_procedure_section:
.Lcompile_procedure_section_end:
# Epilogue.
- lw ra, 12(sp)
- lw s0, 8(sp)
- addi sp, sp, 16
+ lw ra, 20(sp)
+ lw s0, 16(sp)
+ addi sp, sp, 24
ret
.type _compile_module_declaration, @function
_compile_module_declaration:
# Prologue.
- addi sp, sp, -16
- sw ra, 12(sp)
- sw s0, 8(sp)
- addi s0, sp, 16
+ addi sp, sp, -24
+ sw ra, 20(sp)
+ sw s0, 16(sp)
+ addi s0, sp, 24
la a0, global_start
li a1, GLOBAL_START_SIZE
@@ -960,32 +947,32 @@ _compile_module_declaration:
# Skip "program".
call _skip_comment
mv a0, s1
- addi a1, sp, 0
+ addi a1, sp, 4
call _tokenize_next
mv s1, a0
# Epilogue.
- lw ra, 12(sp)
- lw s0, 8(sp)
- addi sp, sp, 16
+ lw ra, 20(sp)
+ lw s0, 16(sp)
+ addi sp, sp, 24
ret
.type _compile_constant_section, @function
_compile_constant_section:
# Prologue.
- addi sp, sp, -16
- sw ra, 12(sp)
- sw s0, 8(sp)
- addi s0, sp, 16
+ addi sp, sp, -24
+ sw ra, 20(sp)
+ sw s0, 16(sp)
+ addi s0, sp, 24
call _skip_comment
call _skip_spaces
mv a0, s1
- addi a1, sp, 0
+ addi a1, sp, 4
call _tokenize_next
li t0, TOKEN_CONST
- lw t1, 0(sp)
+ lw t1, 4(sp)
bne t0, t1, .Lcompile_constant_section_end
mv s1, a0
@@ -1004,21 +991,21 @@ _compile_constant_section:
.Lcompile_constant_section_end:
# Epilogue.
- lw ra, 12(sp)
- lw s0, 8(sp)
- addi sp, sp, 16
+ lw ra, 20(sp)
+ lw s0, 16(sp)
+ addi sp, sp, 24
ret
.type _compile_constant, @function
_compile_constant:
# Prologue.
- addi sp, sp, -16
- sw ra, 12(sp)
- sw s0, 8(sp)
- addi s0, sp, 16
+ addi sp, sp, -24
+ sw ra, 20(sp)
+ sw s0, 16(sp)
+ addi s0, sp, 24
mv a0, s1
- addi a1, sp, 0
+ addi a1, sp, 4
call _tokenize_next
sub a1, a0, s1 # The identifier end from _tokenize_next should be in a0.
@@ -1027,7 +1014,7 @@ _compile_constant:
call _write_out
mv a0, s1
- addi a1, sp, 0
+ addi a1, sp, 4
call _tokenize_next
mv s1, a0 # Skip the assignment sign.
@@ -1052,24 +1039,24 @@ _compile_constant:
call _put_char
# Epilogue.
- lw ra, 12(sp)
- lw s0, 8(sp)
- addi sp, sp, 16
+ lw ra, 20(sp)
+ lw s0, 16(sp)
+ addi sp, sp, 24
ret
.type _compile_variable_section, @function
_compile_variable_section:
# Prologue.
- addi sp, sp, -16
- sw ra, 12(sp)
- sw s0, 8(sp)
- addi s0, sp, 16
+ addi sp, sp, -24
+ sw ra, 20(sp)
+ sw s0, 16(sp)
+ addi s0, sp, 24
mv a0, s1
- addi a1, sp, 0
+ addi a1, sp, 4
call _tokenize_next
li t0, TOKEN_VAR
- lw t1, 0(sp)
+ lw t1, 4(sp)
bne t0, t1, .Lcompile_variable_section_end
mv s1, a0
@@ -1088,24 +1075,24 @@ _compile_variable_section:
.Lcompile_variable_section_end:
# Epilogue.
- lw ra, 12(sp)
- lw s0, 8(sp)
- addi sp, sp, 16
+ lw ra, 20(sp)
+ lw s0, 16(sp)
+ addi sp, sp, 24
ret
.type _compile_variable, @function
_compile_variable:
# Prologue.
- addi sp, sp, -40
- sw ra, 36(sp)
- sw s0, 32(sp)
- addi s0, sp, 40
+ addi sp, sp, -48
+ sw ra, 44(sp)
+ sw s0, 40(sp)
+ addi s0, sp, 48
call _read_token
# Save the identifier on the stack since it should emitted multiple times.
- sw s1, 28(sp)
- sw a0, 24(sp)
+ sw s1, 36(sp)
+ sw a0, 32(sp)
add s1, s1, a0
call _skip_spaces
@@ -1117,8 +1104,8 @@ _compile_variable:
call _read_token
# Save the array size on the stack since it has to be emitted multiple times.
- sw s1, 20(sp)
- sw a0, 16(sp)
+ sw s1, 28(sp)
+ sw a0, 24(sp)
add s1, s1, a0
call _skip_spaces
@@ -1133,8 +1120,8 @@ _compile_variable:
li a1, ASM_TYPE_SIZE
call _write_out
- lw a0, 28(sp)
- lw a1, 24(sp)
+ lw a0, 36(sp)
+ lw a1, 32(sp)
call _write_out
la a0, asm_type_object
@@ -1142,29 +1129,29 @@ _compile_variable:
call _write_out
# identifier: .zero size
- lw a0, 28(sp)
- lw a1, 24(sp)
+ lw a0, 36(sp)
+ lw a1, 32(sp)
call _write_out
li t0, 0x206f7265 # ero_
- sw t0, 12(sp)
+ sw t0, 20(sp)
li t0, 0x7a2e203a # : .z
- sw t0, 8(sp)
- addi a0, sp, 8
+ sw t0, 16(sp)
+ addi a0, sp, 16
li a1, 8
call _write_out
- lw a0, 20(sp)
- lw a1, 16(sp)
+ lw a0, 28(sp)
+ lw a1, 24(sp)
call _write_out
li a0, '\n'
call _put_char
# Epilogue.
- lw ra, 36(sp)
- lw s0, 32(sp)
- addi sp, sp, 40
+ lw ra, 44(sp)
+ lw s0, 40(sp)
+ addi sp, sp, 48
ret
.type _compile_procedure, @function
@@ -1175,11 +1162,12 @@ _compile_procedure:
sw s0, 24(sp)
addi s0, sp, 32
- addi s1, s1, 5 # Skip proc_
- call _read_token
- sw s1, 20(sp)
- sw a0, 16(sp)
- add s1, s1, a0
+ mv a0, s1
+ addi a1, sp, 12
+ call _tokenize_next # Skip proc.
+ addi a1, sp, 12
+ call _tokenize_next
+ mv s1, a0
# .type identifier, @function
la a0, asm_type
@@ -1204,29 +1192,16 @@ _compile_procedure:
li a1, 2
call _write_out
- call _skip_spaces
- addi s1, s1, 1 # Skip opening argument paren.
- call _skip_spaces
- addi s1, s1, 1 # Skip closing argument paren.
-
- li t0, 0x6e # n
- sw t0, 12(sp)
- li t0, 0x69676562 # begi
- sw t0, 8(sp)
-
# Skip all declarations until we find the "begin" keyword, denoting the
# beginning of the procedure body.
.Lcompile_procedure_begin:
- call _skip_spaces
- call _read_token
-
- mv a1, a0
mv a0, s1
- addi a2, sp, 8
- add s1, s1, a1
- call _token_compare
-
- bnez a0, .Lcompile_procedure_begin
+ addi a1, sp, 4
+ call _tokenize_next
+ mv s1, a0
+ lw t0, 4(sp)
+ li t1, TOKEN_BEGIN
+ bne t0, t1, .Lcompile_procedure_begin
# Generate the procedure prologue with a predefined stack size.
la a0, prologue
@@ -1234,56 +1209,8 @@ _compile_procedure:
call _write_out
# Save passed arguments on the stack.
- li t0, 0x0a29 # )\n
- sw t0, 12(sp)
- li t0, 0x70732834 # 4(sp
- sw t0, 8(sp)
- li t0, 0x38202c30 # 0, 8
- sw t0, 4(sp)
- li t0, 0x61207773 # sw a
- sw t0, 0(sp)
- addi a0, sp, 0
- li a1, 14
- call _write_out
-
- li t0, '0'
- sb t0, 8(sp)
- li t0, 0x38202c31 # 1, 8
- sw t0, 4(sp)
- addi a0, sp, 0
- li a1, 14
- call _write_out
-
- li t0, '6'
- sb t0, 8(sp)
- li t0, 0x37202c32 # 2, 7
- sw t0, 4(sp)
- addi a0, sp, 0
- li a1, 14
- call _write_out
-
- li t0, '2'
- sb t0, 8(sp)
- li t0, 0x37202c33 # 3, 7
- sw t0, 4(sp)
- addi a0, sp, 0
- li a1, 14
- call _write_out
-
- li t0, '8'
- sb t0, 8(sp)
- li t0, 0x36202c34 # 4, 6
- sw t0, 4(sp)
- addi a0, sp, 0
- li a1, 14
- call _write_out
-
- li t0, '4'
- sb t0, 8(sp)
- li t0, 0x36202c35 # 5, 6
- sw t0, 4(sp)
- addi a0, sp, 0
- li a1, 14
+ la a0, asm_preserve_parameters
+ li a1, ASM_PRESERVE_PARAMETERS_SIZE
call _write_out
# Generate the body of the procedure.
@@ -1318,48 +1245,6 @@ _compile_procedure:
addi sp, sp, 32
ret
-# Compares two string, which of one has a length, the other one is null-terminated.
-#
-# a0 - The address of the token string.
-# a1 - The length of the string in a0.
-# a2 - The address of the null-terminated string.
-#
-# If the strings match sets a0 to 0, otherwise sets it to 1.
-.type _token_compare, @function
-_token_compare:
- addi t0, a0, 0
- addi t1, a1, 0
- addi t2, a2, 0
-
-.Ltoken_compare_loop:
- lbu t3, (t2)
-
- # Will only be 0 if the current character in the null terminated string is \0 and the remaining length of the
- # another string is 0.
- or t4, t3, t1
- beqz t4, .Ltoken_compare_equal
-
- beqz t1, .Ltoken_compare_not_equal
- beqz t3, .Ltoken_compare_not_equal
-
- lbu t4, (t0)
- bne t3, t4, .Ltoken_compare_not_equal
-
- addi t0, t0, 1
- addi t1, t1, -1
- addi t2, t2, 1
- j .Ltoken_compare_loop
-
-.Ltoken_compare_not_equal:
- li a0, 1
- j .Ltoken_compare_end
-
-.Ltoken_compare_equal:
- li a0, 0
-
-.Ltoken_compare_end:
- ret
-
.type _compile_goto, @function
_compile_goto:
# Prologue.
@@ -1475,30 +1360,30 @@ _compile_if:
sw s0, 24(sp)
addi s0, sp, 32
- addi s1, s1, 2 # Skip the if.
- call _skip_spaces
+ mv a0, s1
+ addi a1, sp, 0
+ call _tokenize_next
+ mv s1, a0 # Skip the if.
call _build_binary_expression
- call _skip_spaces
- addi s1, s1, 4 # Skip the then.
-
- # if end marker.
- li t0, 0x00646e65 # end\0
- sw t0, 20(sp)
+ mv a0, s1
+ addi a1, sp, 0
+ call _tokenize_next
+ mv s1, a0 # Skip the then.
# Label prefix.
li t0, 0x66694c2e # .Lif
- sw t0, 16(sp)
+ sw t0, 20(sp)
li t0, 0x202c3061 # a0,_
- sw t0, 12(sp)
+ sw t0, 16(sp)
li t0, 0x207a7165 # eqz_
- sw t0, 8(sp)
+ sw t0, 12(sp)
li t0, 0x62626262 # bbbb
- sb t0, 7(sp)
+ sb t0, 11(sp)
- addi a0, sp, 7
+ addi a0, sp, 11
li a1, 13
call _write_out
@@ -1510,15 +1395,13 @@ _compile_if:
call _put_char
.Lcompile_if_loop:
- call _skip_spaces
- call _read_token
-
- mv a1, a0
mv a0, s1
- addi a2, sp, 20
- call _token_compare
+ addi a1, sp, 0
+ call _tokenize_next
- beqz a0, .Lcompile_if_end
+ lw t0, 0(sp)
+ li t1, TOKEN_END
+ beq t0, t1, .Lcompile_if_end
call _read_line
li a1, 1
@@ -1527,8 +1410,10 @@ _compile_if:
j .Lcompile_if_loop
.Lcompile_if_end:
+ mv s1, a0 # Skip the end with newline. a0 is set by the last call to _tokenize_next.
+
# Write the label prefix.
- addi a0, sp, 16
+ addi a0, sp, 20
li a1, 4
call _write_out
@@ -1537,14 +1422,13 @@ _compile_if:
call _printi
# Finalize the label.
- li t0, 0x0a3a0a3a # :\n:\n
- sh t0, 12(sp)
- addi a0, sp, 12
+ li t0, 0x0a3a # :\n:\n
+ sh t0, 16(sp)
+ addi a0, sp, 16
li a1, 2
call _write_out
addi s2, s2, 1 # Increment the label counter.
- addi s1, s1, 4 # Skip the end with newline.
# Epilogue.
lw ra, 28(sp)
@@ -1554,8 +1438,6 @@ _compile_if:
# Parameters:
# a0 - Line length.
-# a1 - Whether the section header was already emitted. If not it should be
-# emitted before any code is written.
#
# Returns 1 in a0 if the parsed line contained a text section element such a
# procedure or the program entry point. Otherwise sets a0 to 0.
@@ -1569,9 +1451,6 @@ _compile_statement:
# Preserve passed arguments.
sw a0, 20(sp)
- sw a1, 16(sp)
-
- call _skip_comment
mv a0, s1
lw a1, 20(sp)
@@ -1591,25 +1470,31 @@ _compile_statement:
call _memcmp
beqz a0, .Lcompile_statement_goto
- li t0, 0x75746572 # retu
- sw t0, 12(sp)
+ call _skip_comment
+ /* DEBUG
mv a0, s1
- addi a1, sp, 12
- li a2, 4
- call _memcmp
- beqz a0, .Lcompile_statement_return
+ li a1, 4
+ call _write_error
+ mv a0, s1
+ li a1, 4
+ call _write_error
+ */
- li t0, 0x6669 # if
- sw t0, 12(sp)
mv a0, s1
- addi a1, sp, 12
- li a2, 2
- call _memcmp
- beqz a0, .Lcompile_statement_if
+ addi a1, sp, 0
+ call _tokenize_next
+ lw t0, 0(sp)
- lbu t0, (s1)
- li t1, '.'
+ li t1, TOKEN_RETURN
+ beq t0, t1, .Lcompile_statement_return
+
+ li t1, TOKEN_IF
+ beq t0, t1, .Lcompile_statement_if
+
+ li t1, TOKEN_DOT
beq t0, t1, .Lcompile_statement_label
+
+ lbu t0, (s1)
li t1, '_'
beq t0, t1, .Lcompile_statement_identifier
@@ -1688,30 +1573,25 @@ _compile_entry_point:
# Generate the body of the procedure.
.Lcompile_entry_point_body:
- call _skip_spaces
- call _read_line
- sw a0, 12(sp)
- li t0, 0x2e646e65 # end
- sw t0, 8(sp)
mv a0, s1
- addi a1, sp, 8
- li a2, 4
- call _memcmp
+ addi a1, sp, 4
+ call _tokenize_next
- beqz a0, .Lcompile_entry_point_end
+ lw t0, 4(sp)
+ li t1, TOKEN_END
+ beq t0, t1, .Lcompile_entry_point_end
lw a0, 12(sp)
call _compile_statement
j .Lcompile_entry_point_body
.Lcompile_entry_point_end:
+ mv s1, a0 # Skip end. a0 is set by the last _tokenize_next call.
+
la a0, asm_exit
li a1, ASM_EXIT_SIZE
call _write_out
- addi s1, s1, 4 # Skip end.
- call _skip_spaces # Read the possible new line at the end of the file.
-
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
diff --git a/boot/tokenizer.s b/boot/tokenizer.s
index 67b2602..e358b89 100644
--- a/boot/tokenizer.s
+++ b/boot/tokenizer.s
@@ -38,7 +38,6 @@
.equ CLASS_COUNT, 20
.type classification, @object
-.size classification, 128
classification:
.byte CLASS_EOF # 00 NUL
.byte CLASS_INVALID # 01 SOH
@@ -172,7 +171,7 @@ classification:
#
# Textual keywords in the language.
#
-.equ KEYWORDS_COUNT, 21
+.equ KEYWORDS_COUNT, TOKEN_IDENTIFIER - 1
.type keywords, @object
keywords:
@@ -222,8 +221,8 @@ keywords:
.ascii "return"
.word 4
.ascii "cast"
- .word 5
- .ascii "defer"
+ .word 4
+ .ascii "goto"
.word 4
.ascii "case"
.word 2
@@ -251,13 +250,12 @@ byte_keywords: .ascii "&.,:;()[]^=+-*@"
# handles each action.
#
.type transitions, @object
-.size transitions, 14 * CLASS_COUNT # state count * CLASS_COUNT
transitions:
# Invalid Digit Alpha Space : = ( )
# * _ Single Hex 0 x NUL .
# - " or ' > <
.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff
- .word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x0108
+ .word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x06ff
.word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff
@@ -280,9 +278,9 @@ transitions:
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x06 Left paren
+ .word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
+ .word 0x0109, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
+ .word 0x06ff, 0x06ff, 0x06ff, 0x06ff # 0x06 Left paren
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
@@ -399,6 +397,10 @@ _classify_identifier:
la a1, keywords
call _strings_index
+ bnez a0, .Lclassify_identifier_end
+ li a0, TOKEN_IDENTIFIER
+
+.Lclassify_identifier_end:
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
@@ -426,7 +428,7 @@ _classify_single:
la a1, byte_keywords
sub a0, a0, a1
- addi a0, a0, 27
+ addi a0, a0, TOKEN_IDENTIFIER + 1
# Epilogue.
lw ra, 12(sp)
@@ -466,16 +468,17 @@ _classify_composite:
.type _tokenize_next, @function
_tokenize_next:
# Prologue.
- addi sp, sp, -24
- sw ra, 20(sp)
- sw s0, 16(sp)
- addi s0, sp, 24
+ addi sp, sp, -32
+ sw ra, 28(sp)
+ sw s0, 24(sp)
+ addi s0, sp, 32
- sw s1, 12(sp) # Preserve s1 used for current source text position.
+ sw s1, 20(sp) # Preserve s1 used for current source text position.
mv s1, a0
- sw a0, 4(sp) # Keeps a pointer to the beginning of a token.
+ sw a0, 12(sp) # Keeps a pointer to the beginning of a token.
+ # 4(sp) and 8(sp) are reserved for the kind and length of the token if needed.
- sw s2, 8(sp) # Preserve s2 containing the current state.
+ sw s2, 16(sp) # Preserve s2 containing the current state.
li s2, 0x00 # Initial, start state.
sw a1, 0(sp)
@@ -529,9 +532,9 @@ _tokenize_next:
.Ltokenize_next_skip:
addi s1, s1, 1
- lw t0, 4(sp)
+ lw t0, 12(sp)
addi t0, t0, 1
- sw t0, 4(sp)
+ sw t0, 12(sp)
j .Ltokenize_next_loop
@@ -553,16 +556,20 @@ _tokenize_next:
.Ltokenize_next_identifier:
# An identifier can be a textual keyword.
# Check the kind of the token and write it into the output parameter.
- lw a1, 4(sp)
+ lw a1, 12(sp)
sub a0, s1, a1
+ sw a0, 8(sp)
call _classify_identifier
- lw a1, 0(sp)
- sw a0, (a1)
+ sw a0, 4(sp)
+ lw a0, 0(sp)
+ addi a1, sp, 4
+ li a2, 12
+ call _memcpy
j .Ltokenize_next_end
.Ltokenize_next_single:
- lw a0, 4(sp)
+ lw a0, 12(sp)
addi s1, a0, 1
lbu a0, (a0)
call _classify_single
@@ -573,7 +580,7 @@ _tokenize_next:
.Ltokenize_next_composite:
addi s1, s1, 1
- lw a1, 4(sp)
+ lw a1, 12(sp)
sub a0, s1, a1
call _classify_composite
lw a1, 0(sp)
@@ -585,11 +592,11 @@ _tokenize_next:
mv a0, s1 # Return the advanced text pointer.
# Restore saved registers.
- lw s1, 12(sp)
- lw s2, 8(sp)
+ lw s1, 20(sp)
+ lw s2, 16(sp)
# Epilogue.
- lw ra, 20(sp)
- lw s0, 16(sp)
- addi sp, sp, 24
+ lw ra, 28(sp)
+ lw s0, 24(sp)
+ addi sp, sp, 32
ret