From df1c0486c5c7e88e9a5a3bd8a4335fc74c4963fc Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Sun, 4 May 2025 23:49:39 +0200 Subject: [PATCH] Fix strings search looping --- boot/common-boot.s | 5 + boot/definitions.inc | 39 ++-- boot/stage1.s | 484 ++++++++++++++++--------------------------- boot/tokenizer.s | 65 +++--- 4 files changed, 243 insertions(+), 350 deletions(-) diff --git a/boot/common-boot.s b/boot/common-boot.s index a6fb04e..2a192d3 100644 --- a/boot/common-boot.s +++ b/boot/common-boot.s @@ -425,6 +425,9 @@ _memcpy: addi a0, a0, 1 addi a1, a1, 1 + addi a2, a2, -1 + + j .Lmemcpy_loop .Lmemcpy_end: mv a0, t0 @@ -473,6 +476,8 @@ _strings_index: beqz a0, .Lstrings_index_end .Lstrings_index_next: + # Advance the pointer, reduce the length. + lw a2, (s2) addi s2, s2, 4 add s2, s2, a2 addi s1, s1, -1 diff --git a/boot/definitions.inc b/boot/definitions.inc index 4d8ab9c..d61a5f6 100644 --- a/boot/definitions.inc +++ b/boot/definitions.inc @@ -19,34 +19,35 @@ .equ TOKEN_END, 13 .equ TOKEN_TYPE, 14 .equ TOKEN_RECORD, 15 -.equ TOKEN_RECORD, 16 +.equ TOKEN_UNION, 16 .equ TOKEN_TRUE, 17 -.equ TOKEN_FASE, 18 +.equ TOKEN_FALSE, 18 .equ TOKEN_NIL, 19 .equ TOKEN_XOR, 20 .equ TOKEN_OR, 21 .equ TOKEN_RETURN, 22 .equ TOKEN_CAST, 23 -.equ TOKEN_DEFER, 24 +.equ TOKEN_GOTO, 24 .equ TOKEN_CASE, 25 .equ TOKEN_OF, 26 +.equ TOKEN_IDENTIFIER, 27 # The constant should match the character index in the byte_keywords string. -.equ TOKEN_AND, 27 -.equ TOKEN_DOT, 28 -.equ TOKEN_COMMA, 29 -.equ TOKEN_COLON, 30 -.equ TOKEN_SEMICOLON, 31 -.equ TOKEN_LEFT_PAREN, 32 -.equ TOKEN_RIGHT_PAREN, 33 -.equ TOKEN_LEFT_BRACKET, 34 -.equ TOKEN_RIGHT_BRACKET, 35 -.equ TOKEN_HAT, 36 -.equ TOKEN_EQUALS, 37 -.equ TOKEN_PLUS, 38 -.equ TOKEN_MINUS, 39 -.equ TOKEN_ASTERISK, 40 -.equ TOKEN_AT, 41 +.equ TOKEN_AND, 28 +.equ TOKEN_DOT, 29 +.equ TOKEN_COMMA, 30 +.equ TOKEN_COLON, 31 +.equ TOKEN_SEMICOLON, 32 +.equ TOKEN_LEFT_PAREN, 33 +.equ TOKEN_RIGHT_PAREN, 34 +.equ TOKEN_LEFT_BRACKET, 35 +.equ TOKEN_RIGHT_BRACKET, 36 +.equ TOKEN_HAT, 37 +.equ TOKEN_EQUALS, 38 +.equ TOKEN_PLUS, 39 +.equ TOKEN_MINUS, 40 +.equ TOKEN_ASTERISK, 41 +.equ TOKEN_AT, 42 -.equ TOKEN_ASSIGN, 42 +.equ TOKEN_ASSIGN, 43 diff --git a/boot/stage1.s b/boot/stage1.s index 6761bb2..c86e872 100644 --- a/boot/stage1.s +++ b/boot/stage1.s @@ -53,10 +53,12 @@ asm_type_object: .ascii ", @object\n" asm_restore_parameters: .ascii "lw a0, 60(sp)\nlw a1, 56(sp)\nlw a2, 52(sp)\nlw a3, 48(sp)\nlw a4, 44(sp)\nlw a5, 40(sp)\n" .equ ASM_RESTORE_PARAMETERS_SIZE, . - asm_restore_parameters +asm_preserve_parameters: + .ascii "sw a0, 84(sp)\nsw a1, 80(sp)\nsw a2, 76(sp)\nsw a2, 76(sp)\nsw a3, 72(sp)\nsw a4, 68(sp)\nsw a5, 64(sp)\n" +.equ ASM_PRESERVE_PARAMETERS_SIZE, . - asm_preserve_parameters .section .bss .type source_code, @object -.size source_code, SOURCE_BUFFER_SIZE source_code: .zero SOURCE_BUFFER_SIZE .section .text @@ -65,10 +67,10 @@ source_code: .zero SOURCE_BUFFER_SIZE .type _compile_import, @function _compile_import: # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 + addi sp, sp, -24 + sw ra, 20(sp) + sw s0, 16(sp) + addi s0, sp, 24 .Lcompile_import_loop: call _skip_comment @@ -89,9 +91,9 @@ _compile_import: .Lcompile_import_end: # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 + lw ra, 20(sp) + lw s0, 16(sp) + addi sp, sp, 24 ret .type _build_binary_expression, @function @@ -107,9 +109,9 @@ _build_binary_expression: call _skip_spaces mv a0, s1 - addi a1, sp, 16 + addi a1, sp, 12 call _tokenize_next - lw t0, 16(sp) + lw t0, 12(sp) li t1, TOKEN_AND beq t0, t1, .L_build_binary_expression_and @@ -146,11 +148,6 @@ _build_binary_expression: j .Lbuild_binary_expression_end .L_build_binary_expression_and: - /* DEBUG - addi a0, s1, 0 - li a1, 4 - call _write_error */ - mv s1, a0 # Skip &. li a0, 1 call _build_expression @@ -385,25 +382,29 @@ _compile_identifier_expression: .type _build_expression, @function _build_expression: # Prologue. - addi sp, sp, -40 - sw ra, 36(sp) - sw s0, 32(sp) - addi s0, sp, 40 + addi sp, sp, -48 + sw ra, 44(sp) + sw s0, 40(sp) + addi s0, sp, 48 addi a0, a0, '0' # Make the register number to a character. - sw a0, 28(sp) # And save it. + sw a0, 36(sp) # And save it. + + mv a0, s1 + addi a1, sp, 24 + call _tokenize_next call _skip_spaces call _read_token - sw s1, 24(sp) - sw a0, 20(sp) + sw s1, 32(sp) + sw a0, 28(sp) - lbu a0, (s1) - li t0, '-' + lw a0, 24(sp) + + li t0, TOKEN_MINUS beq a0, t0, .Lbuild_expression_negate - lbu a0, (s1) - li t0, '@' + li t0, TOKEN_AT beq a0, t0, .Lbuild_expression_address lbu a0, (s1) @@ -414,8 +415,8 @@ _build_expression: li t0, '_' beq a0, t0, .Lbuild_expression_call - lw a0, 20(sp) - lw a1, 28(sp) + lw a0, 28(sp) + lw a1, 36(sp) call _compile_identifier_expression j .Lbuild_expression_advance @@ -432,7 +433,7 @@ _build_expression: j .Lbuild_expression_advance .Lbuild_expression_address: - lw t1, 28(sp) + lw t1, 36(sp) li t0, 0x20 # _ sw t0, 16(sp) li t0, 0x2c707320 # _sp, @@ -449,11 +450,11 @@ _build_expression: addi s1, s1, 1 # Skip @. call _skip_spaces call _read_token - sw s1, 24(sp) - sw a0, 20(sp) + sw s1, 32(sp) + sw a0, 28(sp) - lw a0, 24(sp) - lw a1, 20(sp) + lw a0, 32(sp) + lw a1, 28(sp) addi a0, a0, 4 # Skip the "loca" variable prefix. addi a1, a1, -4 # Skip the "loca" variable prefix. call _write_out @@ -464,8 +465,8 @@ _build_expression: j .Lbuild_expression_advance .Lbuild_expression_call: - lw a0, 24(sp) - lw a1, 20(sp) + lw a0, 32(sp) + lw a1, 28(sp) add s1, s1, a1 addi s1, s1, 1 call _compile_call @@ -473,7 +474,7 @@ _build_expression: j .Lbuild_expression_end .Lbuild_expression_literal: - lw t1, 28(sp) + lw t1, 36(sp) li t0, 0x00202c00 # \0,_ or t0, t0, t1 sw t0, 16(sp) @@ -483,8 +484,8 @@ _build_expression: li a1, 7 call _write_out - lw a0, 24(sp) - lw a1, 20(sp) + lw a0, 32(sp) + lw a1, 28(sp) call _write_out li a0, '\n' @@ -493,14 +494,14 @@ _build_expression: j .Lbuild_expression_advance .Lbuild_expression_advance: - lw a0, 20(sp) + lw a0, 28(sp) add s1, s1, a0 .Lbuild_expression_end: # Epilogue. - lw ra, 36(sp) - lw s0, 32(sp) - addi sp, sp, 40 + lw ra, 44(sp) + lw s0, 40(sp) + addi sp, sp, 48 ret # Compiles an lvalue. @@ -595,34 +596,20 @@ _compile_identifier: sw s0, 24(sp) addi s0, sp, 32 - call _read_token - # Save the pointer to the identifier and its length on the stack. - sw s1, 20(sp) - sw a0, 16(sp) + mv a0, s1 + addi a1, sp, 12 + call _tokenize_next + addi a1, sp, 0 + call _tokenize_next + mv s1, a0 - add s1, s1, a0 - call _skip_spaces - call _read_token + lw t0, 0(sp) - # Save the pointer and the length of the token following the identifier. - sw s1, 12(sp) - sw a0, 8(sp) + li t1, TOKEN_ASSIGN + beq t0, t1, .Lcompile_identifier_assign - add s1, s1, a0 # Skip that token. - call _skip_spaces - - li t0, 0x3d3a # := - sw t0, 4(sp) - lw a0, 12(sp) - lw a1, 8(sp) - addi a2, sp, 4 - call _token_compare - beqz a0, .Lcompile_identifier_assign - - lw t0, 12(sp) - lbu t0, (t0) - li t1, 0x28 # ( + li t1, TOKEN_LEFT_PAREN beq t0, t1, .Lcompile_identifier_call j .Lcompile_identifier_end @@ -917,10 +904,10 @@ _skip_comment: .type _compile_procedure_section, @function _compile_procedure_section: # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 + addi sp, sp, -24 + sw ra, 20(sp) + sw s0, 16(sp) + addi s0, sp, 24 .Lcompile_procedure_section_loop: call _skip_spaces @@ -928,10 +915,10 @@ _compile_procedure_section: call _skip_spaces mv a0, s1 - addi a1, sp, 0 + addi a1, sp, 4 call _tokenize_next li t0, TOKEN_PROC - lw t1, 0(sp) + lw t1, 4(sp) bne t0, t1, .Lcompile_procedure_section_end call _compile_procedure @@ -940,18 +927,18 @@ _compile_procedure_section: .Lcompile_procedure_section_end: # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 + lw ra, 20(sp) + lw s0, 16(sp) + addi sp, sp, 24 ret .type _compile_module_declaration, @function _compile_module_declaration: # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 + addi sp, sp, -24 + sw ra, 20(sp) + sw s0, 16(sp) + addi s0, sp, 24 la a0, global_start li a1, GLOBAL_START_SIZE @@ -960,32 +947,32 @@ _compile_module_declaration: # Skip "program". call _skip_comment mv a0, s1 - addi a1, sp, 0 + addi a1, sp, 4 call _tokenize_next mv s1, a0 # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 + lw ra, 20(sp) + lw s0, 16(sp) + addi sp, sp, 24 ret .type _compile_constant_section, @function _compile_constant_section: # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 + addi sp, sp, -24 + sw ra, 20(sp) + sw s0, 16(sp) + addi s0, sp, 24 call _skip_comment call _skip_spaces mv a0, s1 - addi a1, sp, 0 + addi a1, sp, 4 call _tokenize_next li t0, TOKEN_CONST - lw t1, 0(sp) + lw t1, 4(sp) bne t0, t1, .Lcompile_constant_section_end mv s1, a0 @@ -1004,21 +991,21 @@ _compile_constant_section: .Lcompile_constant_section_end: # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 + lw ra, 20(sp) + lw s0, 16(sp) + addi sp, sp, 24 ret .type _compile_constant, @function _compile_constant: # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 + addi sp, sp, -24 + sw ra, 20(sp) + sw s0, 16(sp) + addi s0, sp, 24 mv a0, s1 - addi a1, sp, 0 + addi a1, sp, 4 call _tokenize_next sub a1, a0, s1 # The identifier end from _tokenize_next should be in a0. @@ -1027,7 +1014,7 @@ _compile_constant: call _write_out mv a0, s1 - addi a1, sp, 0 + addi a1, sp, 4 call _tokenize_next mv s1, a0 # Skip the assignment sign. @@ -1052,24 +1039,24 @@ _compile_constant: call _put_char # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 + lw ra, 20(sp) + lw s0, 16(sp) + addi sp, sp, 24 ret .type _compile_variable_section, @function _compile_variable_section: # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 + addi sp, sp, -24 + sw ra, 20(sp) + sw s0, 16(sp) + addi s0, sp, 24 mv a0, s1 - addi a1, sp, 0 + addi a1, sp, 4 call _tokenize_next li t0, TOKEN_VAR - lw t1, 0(sp) + lw t1, 4(sp) bne t0, t1, .Lcompile_variable_section_end mv s1, a0 @@ -1088,24 +1075,24 @@ _compile_variable_section: .Lcompile_variable_section_end: # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 + lw ra, 20(sp) + lw s0, 16(sp) + addi sp, sp, 24 ret .type _compile_variable, @function _compile_variable: # Prologue. - addi sp, sp, -40 - sw ra, 36(sp) - sw s0, 32(sp) - addi s0, sp, 40 + addi sp, sp, -48 + sw ra, 44(sp) + sw s0, 40(sp) + addi s0, sp, 48 call _read_token # Save the identifier on the stack since it should emitted multiple times. - sw s1, 28(sp) - sw a0, 24(sp) + sw s1, 36(sp) + sw a0, 32(sp) add s1, s1, a0 call _skip_spaces @@ -1117,8 +1104,8 @@ _compile_variable: call _read_token # Save the array size on the stack since it has to be emitted multiple times. - sw s1, 20(sp) - sw a0, 16(sp) + sw s1, 28(sp) + sw a0, 24(sp) add s1, s1, a0 call _skip_spaces @@ -1133,8 +1120,8 @@ _compile_variable: li a1, ASM_TYPE_SIZE call _write_out - lw a0, 28(sp) - lw a1, 24(sp) + lw a0, 36(sp) + lw a1, 32(sp) call _write_out la a0, asm_type_object @@ -1142,29 +1129,29 @@ _compile_variable: call _write_out # identifier: .zero size - lw a0, 28(sp) - lw a1, 24(sp) + lw a0, 36(sp) + lw a1, 32(sp) call _write_out li t0, 0x206f7265 # ero_ - sw t0, 12(sp) + sw t0, 20(sp) li t0, 0x7a2e203a # : .z - sw t0, 8(sp) - addi a0, sp, 8 + sw t0, 16(sp) + addi a0, sp, 16 li a1, 8 call _write_out - lw a0, 20(sp) - lw a1, 16(sp) + lw a0, 28(sp) + lw a1, 24(sp) call _write_out li a0, '\n' call _put_char # Epilogue. - lw ra, 36(sp) - lw s0, 32(sp) - addi sp, sp, 40 + lw ra, 44(sp) + lw s0, 40(sp) + addi sp, sp, 48 ret .type _compile_procedure, @function @@ -1175,11 +1162,12 @@ _compile_procedure: sw s0, 24(sp) addi s0, sp, 32 - addi s1, s1, 5 # Skip proc_ - call _read_token - sw s1, 20(sp) - sw a0, 16(sp) - add s1, s1, a0 + mv a0, s1 + addi a1, sp, 12 + call _tokenize_next # Skip proc. + addi a1, sp, 12 + call _tokenize_next + mv s1, a0 # .type identifier, @function la a0, asm_type @@ -1204,29 +1192,16 @@ _compile_procedure: li a1, 2 call _write_out - call _skip_spaces - addi s1, s1, 1 # Skip opening argument paren. - call _skip_spaces - addi s1, s1, 1 # Skip closing argument paren. - - li t0, 0x6e # n - sw t0, 12(sp) - li t0, 0x69676562 # begi - sw t0, 8(sp) - # Skip all declarations until we find the "begin" keyword, denoting the # beginning of the procedure body. .Lcompile_procedure_begin: - call _skip_spaces - call _read_token - - mv a1, a0 mv a0, s1 - addi a2, sp, 8 - add s1, s1, a1 - call _token_compare - - bnez a0, .Lcompile_procedure_begin + addi a1, sp, 4 + call _tokenize_next + mv s1, a0 + lw t0, 4(sp) + li t1, TOKEN_BEGIN + bne t0, t1, .Lcompile_procedure_begin # Generate the procedure prologue with a predefined stack size. la a0, prologue @@ -1234,56 +1209,8 @@ _compile_procedure: call _write_out # Save passed arguments on the stack. - li t0, 0x0a29 # )\n - sw t0, 12(sp) - li t0, 0x70732834 # 4(sp - sw t0, 8(sp) - li t0, 0x38202c30 # 0, 8 - sw t0, 4(sp) - li t0, 0x61207773 # sw a - sw t0, 0(sp) - addi a0, sp, 0 - li a1, 14 - call _write_out - - li t0, '0' - sb t0, 8(sp) - li t0, 0x38202c31 # 1, 8 - sw t0, 4(sp) - addi a0, sp, 0 - li a1, 14 - call _write_out - - li t0, '6' - sb t0, 8(sp) - li t0, 0x37202c32 # 2, 7 - sw t0, 4(sp) - addi a0, sp, 0 - li a1, 14 - call _write_out - - li t0, '2' - sb t0, 8(sp) - li t0, 0x37202c33 # 3, 7 - sw t0, 4(sp) - addi a0, sp, 0 - li a1, 14 - call _write_out - - li t0, '8' - sb t0, 8(sp) - li t0, 0x36202c34 # 4, 6 - sw t0, 4(sp) - addi a0, sp, 0 - li a1, 14 - call _write_out - - li t0, '4' - sb t0, 8(sp) - li t0, 0x36202c35 # 5, 6 - sw t0, 4(sp) - addi a0, sp, 0 - li a1, 14 + la a0, asm_preserve_parameters + li a1, ASM_PRESERVE_PARAMETERS_SIZE call _write_out # Generate the body of the procedure. @@ -1318,48 +1245,6 @@ _compile_procedure: addi sp, sp, 32 ret -# Compares two string, which of one has a length, the other one is null-terminated. -# -# a0 - The address of the token string. -# a1 - The length of the string in a0. -# a2 - The address of the null-terminated string. -# -# If the strings match sets a0 to 0, otherwise sets it to 1. -.type _token_compare, @function -_token_compare: - addi t0, a0, 0 - addi t1, a1, 0 - addi t2, a2, 0 - -.Ltoken_compare_loop: - lbu t3, (t2) - - # Will only be 0 if the current character in the null terminated string is \0 and the remaining length of the - # another string is 0. - or t4, t3, t1 - beqz t4, .Ltoken_compare_equal - - beqz t1, .Ltoken_compare_not_equal - beqz t3, .Ltoken_compare_not_equal - - lbu t4, (t0) - bne t3, t4, .Ltoken_compare_not_equal - - addi t0, t0, 1 - addi t1, t1, -1 - addi t2, t2, 1 - j .Ltoken_compare_loop - -.Ltoken_compare_not_equal: - li a0, 1 - j .Ltoken_compare_end - -.Ltoken_compare_equal: - li a0, 0 - -.Ltoken_compare_end: - ret - .type _compile_goto, @function _compile_goto: # Prologue. @@ -1475,30 +1360,30 @@ _compile_if: sw s0, 24(sp) addi s0, sp, 32 - addi s1, s1, 2 # Skip the if. - call _skip_spaces + mv a0, s1 + addi a1, sp, 0 + call _tokenize_next + mv s1, a0 # Skip the if. call _build_binary_expression - call _skip_spaces - addi s1, s1, 4 # Skip the then. - - # if end marker. - li t0, 0x00646e65 # end\0 - sw t0, 20(sp) + mv a0, s1 + addi a1, sp, 0 + call _tokenize_next + mv s1, a0 # Skip the then. # Label prefix. li t0, 0x66694c2e # .Lif - sw t0, 16(sp) + sw t0, 20(sp) li t0, 0x202c3061 # a0,_ - sw t0, 12(sp) + sw t0, 16(sp) li t0, 0x207a7165 # eqz_ - sw t0, 8(sp) + sw t0, 12(sp) li t0, 0x62626262 # bbbb - sb t0, 7(sp) + sb t0, 11(sp) - addi a0, sp, 7 + addi a0, sp, 11 li a1, 13 call _write_out @@ -1510,15 +1395,13 @@ _compile_if: call _put_char .Lcompile_if_loop: - call _skip_spaces - call _read_token - - mv a1, a0 mv a0, s1 - addi a2, sp, 20 - call _token_compare + addi a1, sp, 0 + call _tokenize_next - beqz a0, .Lcompile_if_end + lw t0, 0(sp) + li t1, TOKEN_END + beq t0, t1, .Lcompile_if_end call _read_line li a1, 1 @@ -1527,8 +1410,10 @@ _compile_if: j .Lcompile_if_loop .Lcompile_if_end: + mv s1, a0 # Skip the end with newline. a0 is set by the last call to _tokenize_next. + # Write the label prefix. - addi a0, sp, 16 + addi a0, sp, 20 li a1, 4 call _write_out @@ -1537,14 +1422,13 @@ _compile_if: call _printi # Finalize the label. - li t0, 0x0a3a0a3a # :\n:\n - sh t0, 12(sp) - addi a0, sp, 12 + li t0, 0x0a3a # :\n:\n + sh t0, 16(sp) + addi a0, sp, 16 li a1, 2 call _write_out addi s2, s2, 1 # Increment the label counter. - addi s1, s1, 4 # Skip the end with newline. # Epilogue. lw ra, 28(sp) @@ -1554,8 +1438,6 @@ _compile_if: # Parameters: # a0 - Line length. -# a1 - Whether the section header was already emitted. If not it should be -# emitted before any code is written. # # Returns 1 in a0 if the parsed line contained a text section element such a # procedure or the program entry point. Otherwise sets a0 to 0. @@ -1569,9 +1451,6 @@ _compile_statement: # Preserve passed arguments. sw a0, 20(sp) - sw a1, 16(sp) - - call _skip_comment mv a0, s1 lw a1, 20(sp) @@ -1591,25 +1470,31 @@ _compile_statement: call _memcmp beqz a0, .Lcompile_statement_goto - li t0, 0x75746572 # retu - sw t0, 12(sp) + call _skip_comment + /* DEBUG mv a0, s1 - addi a1, sp, 12 - li a2, 4 - call _memcmp - beqz a0, .Lcompile_statement_return + li a1, 4 + call _write_error + mv a0, s1 + li a1, 4 + call _write_error + */ - li t0, 0x6669 # if - sw t0, 12(sp) mv a0, s1 - addi a1, sp, 12 - li a2, 2 - call _memcmp - beqz a0, .Lcompile_statement_if + addi a1, sp, 0 + call _tokenize_next + lw t0, 0(sp) + + li t1, TOKEN_RETURN + beq t0, t1, .Lcompile_statement_return + + li t1, TOKEN_IF + beq t0, t1, .Lcompile_statement_if + + li t1, TOKEN_DOT + beq t0, t1, .Lcompile_statement_label lbu t0, (s1) - li t1, '.' - beq t0, t1, .Lcompile_statement_label li t1, '_' beq t0, t1, .Lcompile_statement_identifier @@ -1688,30 +1573,25 @@ _compile_entry_point: # Generate the body of the procedure. .Lcompile_entry_point_body: - call _skip_spaces - call _read_line - sw a0, 12(sp) - li t0, 0x2e646e65 # end - sw t0, 8(sp) mv a0, s1 - addi a1, sp, 8 - li a2, 4 - call _memcmp + addi a1, sp, 4 + call _tokenize_next - beqz a0, .Lcompile_entry_point_end + lw t0, 4(sp) + li t1, TOKEN_END + beq t0, t1, .Lcompile_entry_point_end lw a0, 12(sp) call _compile_statement j .Lcompile_entry_point_body .Lcompile_entry_point_end: + mv s1, a0 # Skip end. a0 is set by the last _tokenize_next call. + la a0, asm_exit li a1, ASM_EXIT_SIZE call _write_out - addi s1, s1, 4 # Skip end. - call _skip_spaces # Read the possible new line at the end of the file. - # Epilogue. lw ra, 4(sp) lw s0, 0(sp) diff --git a/boot/tokenizer.s b/boot/tokenizer.s index 67b2602..e358b89 100644 --- a/boot/tokenizer.s +++ b/boot/tokenizer.s @@ -38,7 +38,6 @@ .equ CLASS_COUNT, 20 .type classification, @object -.size classification, 128 classification: .byte CLASS_EOF # 00 NUL .byte CLASS_INVALID # 01 SOH @@ -172,7 +171,7 @@ classification: # # Textual keywords in the language. # -.equ KEYWORDS_COUNT, 21 +.equ KEYWORDS_COUNT, TOKEN_IDENTIFIER - 1 .type keywords, @object keywords: @@ -222,8 +221,8 @@ keywords: .ascii "return" .word 4 .ascii "cast" - .word 5 - .ascii "defer" + .word 4 + .ascii "goto" .word 4 .ascii "case" .word 2 @@ -251,13 +250,12 @@ byte_keywords: .ascii "&.,:;()[]^=+-*@" # handles each action. # .type transitions, @object -.size transitions, 14 * CLASS_COUNT # state count * CLASS_COUNT transitions: # Invalid Digit Alpha Space : = ( ) # * _ Single Hex 0 x NUL . # - " or ' > < .word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff - .word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x0108 + .word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x06ff .word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff @@ -280,9 +278,9 @@ transitions: .word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff .word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus - .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff - .word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff - .word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x06 Left paren + .word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff + .word 0x0109, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff + .word 0x06ff, 0x06ff, 0x06ff, 0x06ff # 0x06 Left paren .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff @@ -399,6 +397,10 @@ _classify_identifier: la a1, keywords call _strings_index + bnez a0, .Lclassify_identifier_end + li a0, TOKEN_IDENTIFIER + +.Lclassify_identifier_end: # Epilogue. lw ra, 12(sp) lw s0, 8(sp) @@ -426,7 +428,7 @@ _classify_single: la a1, byte_keywords sub a0, a0, a1 - addi a0, a0, 27 + addi a0, a0, TOKEN_IDENTIFIER + 1 # Epilogue. lw ra, 12(sp) @@ -466,16 +468,17 @@ _classify_composite: .type _tokenize_next, @function _tokenize_next: # Prologue. - addi sp, sp, -24 - sw ra, 20(sp) - sw s0, 16(sp) - addi s0, sp, 24 + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 - sw s1, 12(sp) # Preserve s1 used for current source text position. + sw s1, 20(sp) # Preserve s1 used for current source text position. mv s1, a0 - sw a0, 4(sp) # Keeps a pointer to the beginning of a token. + sw a0, 12(sp) # Keeps a pointer to the beginning of a token. + # 4(sp) and 8(sp) are reserved for the kind and length of the token if needed. - sw s2, 8(sp) # Preserve s2 containing the current state. + sw s2, 16(sp) # Preserve s2 containing the current state. li s2, 0x00 # Initial, start state. sw a1, 0(sp) @@ -529,9 +532,9 @@ _tokenize_next: .Ltokenize_next_skip: addi s1, s1, 1 - lw t0, 4(sp) + lw t0, 12(sp) addi t0, t0, 1 - sw t0, 4(sp) + sw t0, 12(sp) j .Ltokenize_next_loop @@ -553,16 +556,20 @@ _tokenize_next: .Ltokenize_next_identifier: # An identifier can be a textual keyword. # Check the kind of the token and write it into the output parameter. - lw a1, 4(sp) + lw a1, 12(sp) sub a0, s1, a1 + sw a0, 8(sp) call _classify_identifier - lw a1, 0(sp) - sw a0, (a1) + sw a0, 4(sp) + lw a0, 0(sp) + addi a1, sp, 4 + li a2, 12 + call _memcpy j .Ltokenize_next_end .Ltokenize_next_single: - lw a0, 4(sp) + lw a0, 12(sp) addi s1, a0, 1 lbu a0, (a0) call _classify_single @@ -573,7 +580,7 @@ _tokenize_next: .Ltokenize_next_composite: addi s1, s1, 1 - lw a1, 4(sp) + lw a1, 12(sp) sub a0, s1, a1 call _classify_composite lw a1, 0(sp) @@ -585,11 +592,11 @@ _tokenize_next: mv a0, s1 # Return the advanced text pointer. # Restore saved registers. - lw s1, 12(sp) - lw s2, 8(sp) + lw s1, 20(sp) + lw s2, 16(sp) # Epilogue. - lw ra, 20(sp) - lw s0, 16(sp) - addi sp, sp, 24 + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 ret