Tokenize single character symbols
This commit is contained in:
		
							
								
								
									
										3
									
								
								Gemfile
									
									
									
									
									
								
							
							
						
						
									
										3
									
								
								Gemfile
									
									
									
									
									
								
							| @@ -1,3 +1,6 @@ | |||||||
|  | # This Source Code Form is subject to the terms of the Mozilla Public License, | ||||||
|  | # v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||||||
|  | # obtain one at https://mozilla.org/MPL/2.0/. | ||||||
| # frozen_string_literal: true | # frozen_string_literal: true | ||||||
|  |  | ||||||
| source 'https://rubygems.org' | source 'https://rubygems.org' | ||||||
|   | |||||||
							
								
								
									
										3
									
								
								Rakefile
									
									
									
									
									
								
							
							
						
						
									
										3
									
								
								Rakefile
									
									
									
									
									
								
							| @@ -1,3 +1,6 @@ | |||||||
|  | # This Source Code Form is subject to the terms of the Mozilla Public License, | ||||||
|  | # v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||||||
|  | # obtain one at https://mozilla.org/MPL/2.0/. | ||||||
| # frozen_string_literal: true | # frozen_string_literal: true | ||||||
|  |  | ||||||
| require 'open3' | require 'open3' | ||||||
|   | |||||||
| @@ -1,3 +1,7 @@ | |||||||
|  | # This Source Code Form is subject to the terms of the Mozilla Public License, | ||||||
|  | # v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||||||
|  | # obtain one at https://mozilla.org/MPL/2.0/. | ||||||
|  |  | ||||||
| .global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower | .global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower | ||||||
| .global _write_out, _read_file, _write_error, _put_char, _printi | .global _write_out, _read_file, _write_error, _put_char, _printi | ||||||
| .global _get, _memcmp, _memchr, _memmem, _memcpy | .global _get, _memcmp, _memchr, _memmem, _memcpy | ||||||
|   | |||||||
| @@ -1,3 +1,7 @@ | |||||||
|  | # This Source Code Form is subject to the terms of the Mozilla Public License, | ||||||
|  | # v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||||||
|  | # obtain one at https://mozilla.org/MPL/2.0/. | ||||||
|  |  | ||||||
| # The constant should match the index in the keywords array in tokenizer.s. | # The constant should match the index in the keywords array in tokenizer.s. | ||||||
|  |  | ||||||
| .equ TOKEN_PROGRAM, 1 | .equ TOKEN_PROGRAM, 1 | ||||||
| @@ -26,3 +30,23 @@ | |||||||
| .equ TOKEN_DEFER, 24 | .equ TOKEN_DEFER, 24 | ||||||
| .equ TOKEN_CASE, 25 | .equ TOKEN_CASE, 25 | ||||||
| .equ TOKEN_OF, 26 | .equ TOKEN_OF, 26 | ||||||
|  |  | ||||||
|  | # The constant should match the character index in the byte_keywords string. | ||||||
|  |  | ||||||
|  | .equ TOKEN_AND, 27 | ||||||
|  | .equ TOKEN_DOT, 28 | ||||||
|  | .equ TOKEN_COMMA, 29 | ||||||
|  | .equ TOKEN_COLON, 30 | ||||||
|  | .equ TOKEN_SEMICOLON, 31 | ||||||
|  | .equ TOKEN_LEFT_PAREN, 32 | ||||||
|  | .equ TOKEN_RIGHT_PAREN, 33 | ||||||
|  | .equ TOKEN_LEFT_BRACKET, 34 | ||||||
|  | .equ TOKEN_RIGHT_BRACKET, 35 | ||||||
|  | .equ TOKEN_HAT, 36 | ||||||
|  | .equ TOKEN_EQUALS, 37 | ||||||
|  | .equ TOKEN_PLUS, 38 | ||||||
|  | .equ TOKEN_MINUS, 39 | ||||||
|  | .equ TOKEN_ASTERISK, 40 | ||||||
|  | .equ TOKEN_AT, 41 | ||||||
|  |  | ||||||
|  | .equ TOKEN_ASSIGN, 42 | ||||||
|   | |||||||
							
								
								
									
										369
									
								
								boot/stage1.s
									
									
									
									
									
								
							
							
						
						
									
										369
									
								
								boot/stage1.s
									
									
									
									
									
								
							| @@ -1,6 +1,10 @@ | |||||||
|  | # This Source Code Form is subject to the terms of the Mozilla Public License, | ||||||
|  | # v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||||||
|  | # obtain one at https://mozilla.org/MPL/2.0/. | ||||||
|  |  | ||||||
| .global _start # Program entry point. | .global _start # Program entry point. | ||||||
|  |  | ||||||
| # Global variables or registers. | # Registers used as global variables: | ||||||
| # s1 - Contains the current position in the source text. | # s1 - Contains the current position in the source text. | ||||||
| # s2 - Label counter. | # s2 - Label counter. | ||||||
|  |  | ||||||
| @@ -42,6 +46,10 @@ asm_neg_a0: .ascii "neg a0, a0\n" | |||||||
| .equ ASM_NEG_A0_SIZE, . - asm_neg_a0 | .equ ASM_NEG_A0_SIZE, . - asm_neg_a0 | ||||||
| asm_type: .ascii ".type " | asm_type: .ascii ".type " | ||||||
| .equ ASM_TYPE_SIZE, . - asm_type | .equ ASM_TYPE_SIZE, . - asm_type | ||||||
|  | asm_type_function: .ascii ", @function\n" | ||||||
|  | .equ ASM_TYPE_FUNCTION_SIZE, . - asm_type_function | ||||||
|  | asm_type_object: .ascii ", @object\n" | ||||||
|  | .equ ASM_TYPE_OBJECT_SIZE, . - asm_type_object | ||||||
| asm_restore_parameters: | asm_restore_parameters: | ||||||
| 	.ascii "lw a0, 60(sp)\nlw a1, 56(sp)\nlw a2, 52(sp)\nlw a3, 48(sp)\nlw a4, 44(sp)\nlw a5, 40(sp)\n" | 	.ascii "lw a0, 60(sp)\nlw a1, 56(sp)\nlw a2, 52(sp)\nlw a3, 48(sp)\nlw a4, 44(sp)\nlw a5, 40(sp)\n" | ||||||
| .equ ASM_RESTORE_PARAMETERS_SIZE, . - asm_restore_parameters | .equ ASM_RESTORE_PARAMETERS_SIZE, . - asm_restore_parameters | ||||||
| @@ -77,14 +85,6 @@ _compile_import: | |||||||
| 	call _tokenize_next | 	call _tokenize_next | ||||||
| 	mv s1, a0 | 	mv s1, a0 | ||||||
|  |  | ||||||
| 	/* DEBUG  |  | ||||||
| 	lw t0, 0(sp) |  | ||||||
| 	addi t0, t0, '0' |  | ||||||
| 	sw t0, 4(sp) |  | ||||||
| 	addi a0, sp, 4 |  | ||||||
| 	li a1, 1 |  | ||||||
| 	call _write_error*/ |  | ||||||
|  |  | ||||||
| 	j .Lcompile_import_loop | 	j .Lcompile_import_loop | ||||||
|  |  | ||||||
| .Lcompile_import_end: | .Lcompile_import_end: | ||||||
| @@ -104,63 +104,35 @@ _build_binary_expression: | |||||||
|  |  | ||||||
| 	li a0, 0 | 	li a0, 0 | ||||||
| 	call _build_expression | 	call _build_expression | ||||||
|  |  | ||||||
| 	call _skip_spaces | 	call _skip_spaces | ||||||
| 	call _read_token |  | ||||||
| 	sw a0, 20(sp) |  | ||||||
|  |  | ||||||
| 	li t0, '&' |  | ||||||
| 	sw t0, 16(sp) |  | ||||||
| 	mv a0, s1 | 	mv a0, s1 | ||||||
| 	lw a1, 20(sp) | 	addi a1, sp, 16 | ||||||
| 	addi a2, sp, 16 | 	call _tokenize_next | ||||||
| 	call _token_compare | 	lw t0, 16(sp) | ||||||
| 	beqz a0, .L_build_binary_expression_and |  | ||||||
|  |  | ||||||
| 	li t0, 0x726f # or | 	li t1, TOKEN_AND | ||||||
| 	sw t0, 16(sp) | 	beq t0, t1, .L_build_binary_expression_and | ||||||
| 	mv a0, s1 |  | ||||||
| 	lw a1, 20(sp) |  | ||||||
| 	addi a2, sp, 16 |  | ||||||
| 	call _token_compare |  | ||||||
| 	beqz a0, .L_build_binary_expression_or |  | ||||||
|  |  | ||||||
| 	li t0, '=' | 	li t1, TOKEN_OR | ||||||
| 	sw t0, 16(sp) | 	beq t0, t1, .L_build_binary_expression_or | ||||||
| 	mv a0, s1 |  | ||||||
| 	lw a1, 20(sp) |  | ||||||
| 	addi a2, sp, 16 |  | ||||||
| 	call _token_compare |  | ||||||
| 	beqz a0, .L_build_binary_expression_equal |  | ||||||
|  |  | ||||||
| 	li t0, '+' | 	li t1, TOKEN_PLUS | ||||||
| 	sw t0, 16(sp) | 	beq t0, t1, .L_build_binary_expression_plus | ||||||
| 	mv a0, s1 |  | ||||||
| 	lw a1, 20(sp) |  | ||||||
| 	addi a2, sp, 16 |  | ||||||
| 	call _token_compare |  | ||||||
| 	beqz a0, .L_build_binary_expression_plus |  | ||||||
|  |  | ||||||
| 	li t0, '-' | 	li t1, TOKEN_EQUALS | ||||||
| 	sw t0, 16(sp) | 	beq t0, t1, .L_build_binary_expression_equal | ||||||
| 	mv a0, s1 |  | ||||||
| 	lw a1, 20(sp) |  | ||||||
| 	addi a2, sp, 16 |  | ||||||
| 	call _token_compare |  | ||||||
| 	beqz a0, .L_build_binary_expression_minus |  | ||||||
|  |  | ||||||
| 	li t0, '*' | 	li t1, TOKEN_ASTERISK | ||||||
| 	sw t0, 16(sp) | 	beq t0, t1, .L_build_binary_expression_product | ||||||
| 	mv a0, s1 |  | ||||||
| 	lw a1, 20(sp) | 	li t1, TOKEN_MINUS | ||||||
| 	addi a2, sp, 16 | 	beq t0, t1, .L_build_binary_expression_minus | ||||||
| 	call _token_compare |  | ||||||
| 	beqz a0, .L_build_binary_expression_product |  | ||||||
|  |  | ||||||
| 	j .Lbuild_binary_expression_end | 	j .Lbuild_binary_expression_end | ||||||
|  |  | ||||||
| .L_build_binary_expression_equal: | .L_build_binary_expression_equal: | ||||||
| 	addi s1, s1, 1 # Skip =. | 	mv s1, a0 # Skip =. | ||||||
| 	li a0, 1 | 	li a0, 1 | ||||||
| 	call _build_expression | 	call _build_expression | ||||||
| 	la a0, asm_sub_a0_a1 | 	la a0, asm_sub_a0_a1 | ||||||
| @@ -174,7 +146,12 @@ _build_binary_expression: | |||||||
| 	j .Lbuild_binary_expression_end | 	j .Lbuild_binary_expression_end | ||||||
|  |  | ||||||
| .L_build_binary_expression_and: | .L_build_binary_expression_and: | ||||||
| 	addi s1, s1, 1 # Skip &. | 	/* DEBUG | ||||||
|  | 	addi a0, s1, 0 | ||||||
|  | 	li a1, 4 | ||||||
|  | 	call _write_error */ | ||||||
|  |  | ||||||
|  | 	mv s1, a0 # Skip &. | ||||||
| 	li a0, 1 | 	li a0, 1 | ||||||
| 	call _build_expression | 	call _build_expression | ||||||
| 	la a0, asm_and_a0_a1 | 	la a0, asm_and_a0_a1 | ||||||
| @@ -184,7 +161,7 @@ _build_binary_expression: | |||||||
| 	j .Lbuild_binary_expression_end | 	j .Lbuild_binary_expression_end | ||||||
|  |  | ||||||
| .L_build_binary_expression_or: | .L_build_binary_expression_or: | ||||||
| 	addi s1, s1, 2 # Skip or. | 	mv s1, a0 # Skip or. | ||||||
| 	li a0, 1 | 	li a0, 1 | ||||||
| 	call _build_expression | 	call _build_expression | ||||||
| 	la a0, asm_or_a0_a1 | 	la a0, asm_or_a0_a1 | ||||||
| @@ -194,7 +171,7 @@ _build_binary_expression: | |||||||
| 	j .Lbuild_binary_expression_end | 	j .Lbuild_binary_expression_end | ||||||
|  |  | ||||||
| .L_build_binary_expression_plus: | .L_build_binary_expression_plus: | ||||||
| 	addi s1, s1, 1 # Skip +. | 	mv s1, a0 # Skip +. | ||||||
| 	li a0, 1 | 	li a0, 1 | ||||||
| 	call _build_expression | 	call _build_expression | ||||||
| 	la a0, asm_add_a0_a1 | 	la a0, asm_add_a0_a1 | ||||||
| @@ -204,7 +181,7 @@ _build_binary_expression: | |||||||
| 	j .Lbuild_binary_expression_end | 	j .Lbuild_binary_expression_end | ||||||
|  |  | ||||||
| .L_build_binary_expression_minus: | .L_build_binary_expression_minus: | ||||||
| 	addi s1, s1, 1 # Skip -. | 	mv s1, a0 # Skip -. | ||||||
| 	li a0, 1 | 	li a0, 1 | ||||||
| 	call _build_expression | 	call _build_expression | ||||||
| 	la a0, asm_sub_a0_a1 | 	la a0, asm_sub_a0_a1 | ||||||
| @@ -214,7 +191,7 @@ _build_binary_expression: | |||||||
| 	j .Lbuild_binary_expression_end | 	j .Lbuild_binary_expression_end | ||||||
|  |  | ||||||
| .L_build_binary_expression_product: | .L_build_binary_expression_product: | ||||||
| 	addi s1, s1, 1 # Skip *. | 	mv s1, a0 # Skip *. | ||||||
| 	li a0, 1 | 	li a0, 1 | ||||||
| 	call _build_expression | 	call _build_expression | ||||||
| 	la a0, asm_mul_a0_a1 | 	la a0, asm_mul_a0_a1 | ||||||
| @@ -937,29 +914,31 @@ _skip_comment: | |||||||
|  |  | ||||||
| # Parameters: | # Parameters: | ||||||
| # a0 - Line length. | # a0 - Line length. | ||||||
| .type _compile_assembly, @function | .type _compile_procedure_section, @function | ||||||
| _compile_assembly: | _compile_procedure_section: | ||||||
| 	# Prologue. | 	# Prologue. | ||||||
| 	addi sp, sp, -16 | 	addi sp, sp, -16 | ||||||
| 	sw ra, 12(sp) | 	sw ra, 12(sp) | ||||||
| 	sw s0, 8(sp) | 	sw s0, 8(sp) | ||||||
| 	addi s0, sp, 16 | 	addi s0, sp, 16 | ||||||
|  |  | ||||||
| 	sw a0, 4(sp) # a0 - Line length. | .Lcompile_procedure_section_loop: | ||||||
|  | 	call _skip_spaces | ||||||
|  | 	call _skip_comment | ||||||
|  | 	call _skip_spaces | ||||||
|  |  | ||||||
| 	# Write the source to the standard output. |  | ||||||
| 	mv a0, s1 | 	mv a0, s1 | ||||||
| 	lw a1, 4(sp) | 	addi a1, sp, 0 | ||||||
| 	call _write_out | 	call _tokenize_next | ||||||
|  | 	li t0, TOKEN_PROC | ||||||
|  | 	lw t1, 0(sp) | ||||||
|  | 	bne t0, t1, .Lcompile_procedure_section_end | ||||||
|  |  | ||||||
| 	lw t0, 4(sp) | 	call _compile_procedure | ||||||
| 	add s1, s1, t0 |  | ||||||
|  |  | ||||||
| 	li a0, '\n' | 	j .Lcompile_procedure_section_loop | ||||||
| 	call _put_char |  | ||||||
|  |  | ||||||
| 	addi s1, s1, 1 # Skip the new line. |  | ||||||
|  |  | ||||||
|  | .Lcompile_procedure_section_end: | ||||||
| 	# Epilogue. | 	# Epilogue. | ||||||
| 	lw ra, 12(sp) | 	lw ra, 12(sp) | ||||||
| 	lw s0, 8(sp) | 	lw s0, 8(sp) | ||||||
| @@ -1038,15 +1017,19 @@ _compile_constant: | |||||||
| 	sw s0, 8(sp) | 	sw s0, 8(sp) | ||||||
| 	addi s0, sp, 16 | 	addi s0, sp, 16 | ||||||
|  |  | ||||||
| 	call _read_token | 	mv a0, s1 | ||||||
|  | 	addi a1, sp, 0 | ||||||
|  | 	call _tokenize_next | ||||||
|  |  | ||||||
| 	mv a1, a0 # The identifier length from _read_token should be in a1. | 	sub a1, a0, s1 # The identifier end from _tokenize_next should be in a0. | ||||||
| 	mv a0, s1 # Save the identifier pointer before advancing it. | 	mv a0, s1 | ||||||
| 	add s1, s1, a1 | 	add s1, s1, a1 # Save the identifier pointer before advancing it. | ||||||
| 	call _write_out | 	call _write_out | ||||||
|  |  | ||||||
| 	call _skip_spaces | 	mv a0, s1 | ||||||
| 	addi s1, s1, 2 # Skip the assignment sign. | 	addi a1, sp, 0 | ||||||
|  | 	call _tokenize_next | ||||||
|  | 	mv s1, a0 # Skip the assignment sign. | ||||||
|  |  | ||||||
| 	# : .long | 	# : .long | ||||||
| 	li t0, 0x20676e6f # ong_ | 	li t0, 0x20676e6f # ong_ | ||||||
| @@ -1154,42 +1137,10 @@ _compile_variable: | |||||||
| 	lw a1, 24(sp) | 	lw a1, 24(sp) | ||||||
| 	call _write_out | 	call _write_out | ||||||
|  |  | ||||||
| 	li t0, 0x0a74 # t\n | 	la a0, asm_type_object | ||||||
| 	sw t0, 12(sp) | 	li a1, ASM_TYPE_OBJECT_SIZE | ||||||
| 	li t0, 0x63656a62 # bjec |  | ||||||
| 	sw t0, 8(sp) |  | ||||||
| 	li t0, 0x6f40202c # , @o |  | ||||||
| 	sw t0, 4(sp) |  | ||||||
| 	addi a0, sp, 4 |  | ||||||
| 	li a1, 10 |  | ||||||
| 	call _write_out | 	call _write_out | ||||||
|  |  | ||||||
| 	# .size identifier, size |  | ||||||
| 	li t0, 0x2065 # e_ |  | ||||||
| 	sw t0, 12(sp) |  | ||||||
| 	li t0, 0x7a69732e # .siz |  | ||||||
| 	sw t0, 8(sp) |  | ||||||
| 	addi a0, sp, 8 |  | ||||||
| 	li a1, 6 |  | ||||||
| 	call _write_out |  | ||||||
|  |  | ||||||
| 	lw a0, 28(sp) |  | ||||||
| 	lw a1, 24(sp) |  | ||||||
| 	call _write_out |  | ||||||
|  |  | ||||||
| 	li t0, 0x202c # ,_ |  | ||||||
| 	sw t0, 12(sp) |  | ||||||
| 	addi a0, sp, 12 |  | ||||||
| 	li a1, 2 |  | ||||||
| 	call _write_out |  | ||||||
|  |  | ||||||
| 	lw a0, 20(sp) |  | ||||||
| 	lw a1, 16(sp) |  | ||||||
| 	call _write_out |  | ||||||
|  |  | ||||||
| 	li a0, '\n' |  | ||||||
| 	call _put_char |  | ||||||
|  |  | ||||||
| 	# identifier: .zero size | 	# identifier: .zero size | ||||||
| 	lw a0, 28(sp) | 	lw a0, 28(sp) | ||||||
| 	lw a1, 24(sp) | 	lw a1, 24(sp) | ||||||
| @@ -1239,14 +1190,8 @@ _compile_procedure: | |||||||
| 	lw a1, 16(sp) | 	lw a1, 16(sp) | ||||||
| 	call _write_out | 	call _write_out | ||||||
|  |  | ||||||
| 	li t0, 0x0a6e6f69 # ion\n | 	la a0, asm_type_function | ||||||
| 	sw t0, 12(sp) | 	li a1, ASM_TYPE_FUNCTION_SIZE | ||||||
| 	li t0, 0x74636e75 # unct |  | ||||||
| 	sw t0, 8(sp) |  | ||||||
| 	li t0, 0x6640202c # , @f |  | ||||||
| 	sw t0, 4(sp) |  | ||||||
| 	addi a0, sp, 4 |  | ||||||
| 	li a1, 12 |  | ||||||
| 	call _write_out | 	call _write_out | ||||||
|  |  | ||||||
| 	lw a0, 20(sp) | 	lw a0, 20(sp) | ||||||
| @@ -1356,7 +1301,7 @@ _compile_procedure: | |||||||
| 	beqz a0, .Lcompile_procedure_end | 	beqz a0, .Lcompile_procedure_end | ||||||
|  |  | ||||||
| 	lw a0, 12(sp) | 	lw a0, 12(sp) | ||||||
| 	call _compile_line | 	call _compile_statement | ||||||
| 	j .Lcompile_procedure_body | 	j .Lcompile_procedure_body | ||||||
|  |  | ||||||
| .Lcompile_procedure_end: | .Lcompile_procedure_end: | ||||||
| @@ -1577,7 +1522,7 @@ _compile_if: | |||||||
|  |  | ||||||
| 	call _read_line | 	call _read_line | ||||||
| 	li a1, 1 | 	li a1, 1 | ||||||
| 	call _compile_line | 	call _compile_statement | ||||||
|  |  | ||||||
| 	j .Lcompile_if_loop | 	j .Lcompile_if_loop | ||||||
|  |  | ||||||
| @@ -1614,8 +1559,8 @@ _compile_if: | |||||||
| # | # | ||||||
| # Returns 1 in a0 if the parsed line contained a text section element such a | # Returns 1 in a0 if the parsed line contained a text section element such a | ||||||
| # procedure or the program entry point. Otherwise sets a0 to 0. | # procedure or the program entry point. Otherwise sets a0 to 0. | ||||||
| .type _compile_line, @function | .type _compile_statement, @function | ||||||
| _compile_line: | _compile_statement: | ||||||
| 	# Prologue. | 	# Prologue. | ||||||
| 	addi sp, sp, -32 | 	addi sp, sp, -32 | ||||||
| 	sw ra, 28(sp) | 	sw ra, 28(sp) | ||||||
| @@ -1626,45 +1571,17 @@ _compile_line: | |||||||
| 	sw a0, 20(sp) | 	sw a0, 20(sp) | ||||||
| 	sw a1, 16(sp) | 	sw a1, 16(sp) | ||||||
|  |  | ||||||
| 	beqz a0, .Lcompile_line_empty # Skip an empty line. | 	call _skip_comment | ||||||
|  |  | ||||||
| 	lbu t0, (s1) |  | ||||||
| 	li t1, '(' |  | ||||||
| 	beq t0, t1, .Lcompile_line_comment |  | ||||||
|  |  | ||||||
| 	li t0, 0x636f7270 # proc |  | ||||||
| 	sw t0, 12(sp) |  | ||||||
| 	mv a0, s1 |  | ||||||
| 	addi a1, sp, 12 |  | ||||||
| 	li a2, 4 |  | ||||||
| 	call _memcmp |  | ||||||
| 	beqz a0, .Lcompile_line_procedure |  | ||||||
|  |  | ||||||
| 	li t0, 0x69676562 # begi |  | ||||||
| 	sw t0, 12(sp) |  | ||||||
| 	mv a0, s1 |  | ||||||
| 	addi a1, sp, 12 |  | ||||||
| 	li a2, 4 |  | ||||||
| 	call _memcmp |  | ||||||
| 	beqz a0, .Lcompile_line_begin |  | ||||||
|  |  | ||||||
| 	li t0, 0x2e646e65 # end. |  | ||||||
| 	sw t0, 12(sp) |  | ||||||
| 	mv a0, s1 |  | ||||||
| 	addi a1, sp, 12 |  | ||||||
| 	li a2, 4 |  | ||||||
| 	call _memcmp |  | ||||||
| 	beqz a0, .Lcompile_line_exit |  | ||||||
|  |  | ||||||
| 	mv a0, s1 | 	mv a0, s1 | ||||||
| 	lw a1, 20(sp) | 	lw a1, 20(sp) | ||||||
| 	call _is_local_identifier | 	call _is_local_identifier | ||||||
| 	bnez a0, .Lcompile_line_identifier | 	bnez a0, .Lcompile_statement_identifier | ||||||
|  |  | ||||||
| 	mv a0, s1 | 	mv a0, s1 | ||||||
| 	li a1, 2 | 	li a1, 2 | ||||||
| 	call _is_register_identifier | 	call _is_register_identifier | ||||||
| 	bnez a0, .Lcompile_line_identifier | 	bnez a0, .Lcompile_statement_identifier | ||||||
|  |  | ||||||
| 	li t0, 0x6f746f67 # goto | 	li t0, 0x6f746f67 # goto | ||||||
| 	sw t0, 12(sp) | 	sw t0, 12(sp) | ||||||
| @@ -1672,7 +1589,7 @@ _compile_line: | |||||||
| 	addi a1, sp, 12 | 	addi a1, sp, 12 | ||||||
| 	li a2, 4 | 	li a2, 4 | ||||||
| 	call _memcmp | 	call _memcmp | ||||||
| 	beqz a0, .Lcompile_line_goto | 	beqz a0, .Lcompile_statement_goto | ||||||
|  |  | ||||||
| 	li t0, 0x75746572 # retu | 	li t0, 0x75746572 # retu | ||||||
| 	sw t0, 12(sp) | 	sw t0, 12(sp) | ||||||
| @@ -1680,7 +1597,7 @@ _compile_line: | |||||||
| 	addi a1, sp, 12 | 	addi a1, sp, 12 | ||||||
| 	li a2, 4 | 	li a2, 4 | ||||||
| 	call _memcmp | 	call _memcmp | ||||||
| 	beqz a0, .Lcompile_line_return | 	beqz a0, .Lcompile_statement_return | ||||||
|  |  | ||||||
| 	li t0, 0x6669 # if | 	li t0, 0x6669 # if | ||||||
| 	sw t0, 12(sp) | 	sw t0, 12(sp) | ||||||
| @@ -1688,77 +1605,42 @@ _compile_line: | |||||||
| 	addi a1, sp, 12 | 	addi a1, sp, 12 | ||||||
| 	li a2, 2 | 	li a2, 2 | ||||||
| 	call _memcmp | 	call _memcmp | ||||||
| 	beqz a0, .Lcompile_line_if | 	beqz a0, .Lcompile_statement_if | ||||||
|  |  | ||||||
| 	lbu t0, (s1) | 	lbu t0, (s1) | ||||||
| 	li t1, '.' | 	li t1, '.' | ||||||
| 	beq t0, t1, .Lcompile_line_label | 	beq t0, t1, .Lcompile_statement_label | ||||||
| 	li t1, '_' | 	li t1, '_' | ||||||
| 	beq t0, t1, .Lcompile_line_identifier | 	beq t0, t1, .Lcompile_statement_identifier | ||||||
|  |  | ||||||
| 	j .Lcompile_line_unchanged # Else. | 	j .Lcompile_statement_empty # Else. | ||||||
|  |  | ||||||
| .Lcompile_line_if: | .Lcompile_statement_if: | ||||||
| 	call _compile_if | 	call _compile_if | ||||||
| 	j .Lcompile_line_section | 	j .Lcompile_statement_end | ||||||
|  |  | ||||||
| .Lcompile_line_label: | .Lcompile_statement_label: | ||||||
| 	lw a0, 20(sp) | 	lw a0, 20(sp) | ||||||
| 	call _compile_label | 	call _compile_label | ||||||
| 	j .Lcompile_line_section | 	j .Lcompile_statement_end | ||||||
|  |  | ||||||
| .Lcompile_line_return: | .Lcompile_statement_return: | ||||||
| 	call _compile_return | 	call _compile_return | ||||||
| 	j .Lcompile_line_section | 	j .Lcompile_statement_end | ||||||
|  |  | ||||||
| .Lcompile_line_goto: | .Lcompile_statement_goto: | ||||||
| 	call _compile_goto | 	call _compile_goto | ||||||
| 	j .Lcompile_line_section | 	j .Lcompile_statement_end | ||||||
|  |  | ||||||
| .Lcompile_line_identifier: | .Lcompile_statement_identifier: | ||||||
| 	call _compile_identifier | 	call _compile_identifier | ||||||
| 	j .Lcompile_line_section | 	j .Lcompile_statement_end | ||||||
|  |  | ||||||
| .Lcompile_line_exit: | .Lcompile_statement_empty: | ||||||
| 	call _compile_exit |  | ||||||
| 	j .Lcompile_line_section |  | ||||||
|  |  | ||||||
| .Lcompile_line_begin: |  | ||||||
| 	lw a1, 16(sp) |  | ||||||
| 	bnez a1, .Lcompile_line_compile_entry |  | ||||||
| 	call _compile_text_section |  | ||||||
| .Lcompile_line_compile_entry: |  | ||||||
| 	call _compile_entry_point |  | ||||||
| 	li a0, 1 |  | ||||||
| 	j .Lcompile_line_end |  | ||||||
|  |  | ||||||
| .Lcompile_line_procedure: |  | ||||||
| 	lw a1, 16(sp) |  | ||||||
| 	bnez a1, .Lcompile_line_compile_procedure |  | ||||||
| 	call _compile_text_section |  | ||||||
| .Lcompile_line_compile_procedure: |  | ||||||
| 	call _compile_procedure |  | ||||||
| 	li a0, 1 |  | ||||||
| 	j .Lcompile_line_end |  | ||||||
|  |  | ||||||
| .Lcompile_line_comment: |  | ||||||
| 	lw a0, 20(sp) |  | ||||||
| 	call _skip_comment |  | ||||||
| 	j .Lcompile_line_section |  | ||||||
|  |  | ||||||
| .Lcompile_line_empty: |  | ||||||
| 	addi s1, s1, 1 | 	addi s1, s1, 1 | ||||||
| 	j .Lcompile_line_section | 	j .Lcompile_statement_end | ||||||
|  |  | ||||||
| .Lcompile_line_unchanged: | .Lcompile_statement_end: | ||||||
| 	lw a0, 20(sp) |  | ||||||
| 	call _compile_assembly |  | ||||||
| 	j .Lcompile_line_section |  | ||||||
|  |  | ||||||
| .Lcompile_line_section: |  | ||||||
| 	mv a0, zero |  | ||||||
|  |  | ||||||
| .Lcompile_line_end: |  | ||||||
| 	sw a0, 12(sp) | 	sw a0, 12(sp) | ||||||
| 	call _skip_spaces | 	call _skip_spaces | ||||||
| 	call _skip_comment | 	call _skip_comment | ||||||
| @@ -1804,20 +1686,25 @@ _compile_entry_point: | |||||||
|  |  | ||||||
| 	addi s1, s1, 6 # Skip begin\n. | 	addi s1, s1, 6 # Skip begin\n. | ||||||
|  |  | ||||||
| 	# Epilogue. | 	# Generate the body of the procedure. | ||||||
| 	lw ra, 4(sp) | .Lcompile_entry_point_body: | ||||||
| 	lw s0, 0(sp) | 	call _skip_spaces | ||||||
| 	addi sp, sp, 8 | 	call _read_line | ||||||
| 	ret | 	sw a0, 12(sp) | ||||||
|  | 	li t0, 0x2e646e65 # end | ||||||
|  | 	sw t0, 8(sp) | ||||||
|  | 	mv a0, s1 | ||||||
|  | 	addi a1, sp, 8 | ||||||
|  | 	li a2, 4 | ||||||
|  | 	call _memcmp | ||||||
|  |  | ||||||
| .type _compile_exit, @function | 	beqz a0, .Lcompile_entry_point_end | ||||||
| _compile_exit: |  | ||||||
| 	# Prologue. |  | ||||||
| 	addi sp, sp, -8 |  | ||||||
| 	sw ra, 4(sp) |  | ||||||
| 	sw s0, 0(sp) |  | ||||||
| 	addi s0, sp, 8 |  | ||||||
|  |  | ||||||
|  | 	lw a0, 12(sp) | ||||||
|  | 	call _compile_statement | ||||||
|  | 	j .Lcompile_entry_point_body | ||||||
|  |  | ||||||
|  | .Lcompile_entry_point_end: | ||||||
| 	la a0, asm_exit | 	la a0, asm_exit | ||||||
| 	li a1, ASM_EXIT_SIZE | 	li a1, ASM_EXIT_SIZE | ||||||
| 	call _write_out | 	call _write_out | ||||||
| @@ -1857,30 +1744,13 @@ _compile: | |||||||
| 	sw s0, 8(sp) | 	sw s0, 8(sp) | ||||||
| 	addi s0, sp, 16 | 	addi s0, sp, 16 | ||||||
|  |  | ||||||
| 	sw zero, 4(sp) # Whether the text section header was already emitted. |  | ||||||
|  |  | ||||||
| 	call _compile_module_declaration | 	call _compile_module_declaration | ||||||
| 	call _compile_import | 	call _compile_import | ||||||
| 	call _compile_constant_section | 	call _compile_constant_section | ||||||
| 	call _compile_variable_section | 	call _compile_variable_section | ||||||
|  | 	call _compile_text_section | ||||||
| .Lcompile_do: | 	call _compile_procedure_section | ||||||
| 	lbu t0, (s1) # t0 = Current character. | 	call _compile_entry_point | ||||||
| 	beqz t0, .Lcompile_end # Exit the loop on the NUL character. |  | ||||||
|  |  | ||||||
| 	call _skip_spaces |  | ||||||
| 	call _read_line |  | ||||||
| 	lw a1, 4(sp) |  | ||||||
| 	call _compile_line |  | ||||||
|  |  | ||||||
| 	beqz a0, .Lcompile_do |  | ||||||
| 	# Update whether the text section header was already emitted. |  | ||||||
| 	lw t0, 4(sp) |  | ||||||
| 	or t0, t0, a0 |  | ||||||
| 	sw t0, 4(sp) |  | ||||||
|  |  | ||||||
| 	j .Lcompile_do |  | ||||||
| .Lcompile_end: |  | ||||||
|  |  | ||||||
| 	# Epilogue. | 	# Epilogue. | ||||||
| 	lw ra, 12(sp) | 	lw ra, 12(sp) | ||||||
| @@ -1888,22 +1758,6 @@ _compile: | |||||||
| 	addi sp, sp, 16 | 	addi sp, sp, 16 | ||||||
| 	ret | 	ret | ||||||
|  |  | ||||||
| .type _main, @function |  | ||||||
| _main: |  | ||||||
| 	# Prologue. |  | ||||||
| 	addi sp, sp, -8 |  | ||||||
| 	sw ra, 4(sp) |  | ||||||
| 	sw s0, 0(sp) |  | ||||||
| 	addi s0, sp, 8 |  | ||||||
|  |  | ||||||
| 	li s2, 1 |  | ||||||
|  |  | ||||||
| 	# Epilogue. |  | ||||||
| 	lw ra, 4(sp) |  | ||||||
| 	lw s0, 0(sp) |  | ||||||
| 	addi sp, sp, 8 |  | ||||||
| 	ret |  | ||||||
|  |  | ||||||
| # Entry point. | # Entry point. | ||||||
| .type _start, @function | .type _start, @function | ||||||
| _start: | _start: | ||||||
| @@ -1912,8 +1766,7 @@ _start: | |||||||
| 	li a1, SOURCE_BUFFER_SIZE # Buffer size. | 	li a1, SOURCE_BUFFER_SIZE # Buffer size. | ||||||
| 	call _read_file | 	call _read_file | ||||||
|  |  | ||||||
| 	mv a0, s1 | 	li s2, 1 | ||||||
| 	call _main |  | ||||||
| 	call _compile | 	call _compile | ||||||
|  |  | ||||||
| 	# Call exit. | 	# Call exit. | ||||||
|   | |||||||
							
								
								
									
										223
									
								
								boot/tokenizer.s
									
									
									
									
									
								
							
							
						
						
									
										223
									
								
								boot/tokenizer.s
									
									
									
									
									
								
							| @@ -1,4 +1,10 @@ | |||||||
| .global _tokenize_next, classification, transitions, keywords | # This Source Code Form is subject to the terms of the Mozilla Public License, | ||||||
|  | # v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||||||
|  | # obtain one at https://mozilla.org/MPL/2.0/. | ||||||
|  |  | ||||||
|  | .global _tokenize_next, classification, transitions, keywords, byte_keywords | ||||||
|  |  | ||||||
|  | .include "boot/definitions.inc" | ||||||
|  |  | ||||||
| .section .rodata | .section .rodata | ||||||
|  |  | ||||||
| @@ -8,7 +14,7 @@ | |||||||
| # | # | ||||||
| # Classification: | # Classification: | ||||||
| # | # | ||||||
| .equ CLASS_INVALID, 0x0 | .equ CLASS_INVALID, 0x00 | ||||||
| .equ CLASS_DIGIT, 0x01 | .equ CLASS_DIGIT, 0x01 | ||||||
| .equ CLASS_CHARACTER, 0x02 | .equ CLASS_CHARACTER, 0x02 | ||||||
| .equ CLASS_SPACE, 0x03 | .equ CLASS_SPACE, 0x03 | ||||||
| @@ -25,9 +31,11 @@ | |||||||
| .equ CLASS_EOF, 0x0e | .equ CLASS_EOF, 0x0e | ||||||
| .equ CLASS_DOT, 0x0f | .equ CLASS_DOT, 0x0f | ||||||
| .equ CLASS_MINUS, 0x10 | .equ CLASS_MINUS, 0x10 | ||||||
| .equ CLASS_DOUBLE_QUOTE, 0x11 | .equ CLASS_QUOTE, 0x11 | ||||||
|  | .equ CLASS_GREATER, 0x12 | ||||||
|  | .equ CLASS_LESS, 0x13 | ||||||
|  |  | ||||||
| .equ CLASS_COUNT, 18 | .equ CLASS_COUNT, 20 | ||||||
|  |  | ||||||
| .type classification, @object | .type classification, @object | ||||||
| .size classification, 128 | .size classification, 128 | ||||||
| @@ -66,12 +74,12 @@ classification: | |||||||
| 	.byte CLASS_INVALID # 1F US | 	.byte CLASS_INVALID # 1F US | ||||||
| 	.byte CLASS_SPACE # 20 Space | 	.byte CLASS_SPACE # 20 Space | ||||||
| 	.byte CLASS_SINGLE # 21 ! | 	.byte CLASS_SINGLE # 21 ! | ||||||
| 	.byte CLASS_DOUBLE_QUOTE # 22 " | 	.byte CLASS_QUOTE # 22 " | ||||||
| 	.byte 0x00 # 23 # | 	.byte 0x00 # 23 # | ||||||
| 	.byte 0x00 # 24 $ | 	.byte 0x00 # 24 $ | ||||||
| 	.byte CLASS_SINGLE # 25 % | 	.byte CLASS_SINGLE # 25 % | ||||||
| 	.byte CLASS_SINGLE # 26 & | 	.byte CLASS_SINGLE # 26 & | ||||||
| 	.byte 0x00 # 27 ' | 	.byte CLASS_QUOTE # 27 ' | ||||||
| 	.byte CLASS_LEFT_PAREN # 28 ( | 	.byte CLASS_LEFT_PAREN # 28 ( | ||||||
| 	.byte CLASS_RIGHT_PAREN # 29 ) | 	.byte CLASS_RIGHT_PAREN # 29 ) | ||||||
| 	.byte CLASS_ASTERISK # 2A * | 	.byte CLASS_ASTERISK # 2A * | ||||||
| @@ -92,9 +100,9 @@ classification: | |||||||
| 	.byte CLASS_DIGIT # 39 9 | 	.byte CLASS_DIGIT # 39 9 | ||||||
| 	.byte CLASS_COLON # 3A : | 	.byte CLASS_COLON # 3A : | ||||||
| 	.byte CLASS_SINGLE # 3B ; | 	.byte CLASS_SINGLE # 3B ; | ||||||
| 	.byte 0x00 # 3C < | 	.byte CLASS_LESS # 3C < | ||||||
| 	.byte CLASS_EQUALS # 3D = | 	.byte CLASS_EQUALS # 3D = | ||||||
| 	.byte 0x00 # 3E > | 	.byte CLASS_GREATER # 3E > | ||||||
| 	.byte 0x00 # 3F ? | 	.byte 0x00 # 3F ? | ||||||
| 	.byte CLASS_SINGLE # 40 @ | 	.byte CLASS_SINGLE # 40 @ | ||||||
| 	.byte CLASS_CHARACTER # 41 A | 	.byte CLASS_CHARACTER # 41 A | ||||||
| @@ -220,7 +228,10 @@ keywords: | |||||||
| 	.ascii "case" | 	.ascii "case" | ||||||
| 	.word 2 | 	.word 2 | ||||||
| 	.ascii "of" | 	.ascii "of" | ||||||
| .size keywords, . - keywords |  | ||||||
|  | .type byte_keywords, @object | ||||||
|  | byte_keywords: .ascii "&.,:;()[]^=+-*@" | ||||||
|  | .equ BYTE_KEYWORDS_SIZE, . - byte_keywords | ||||||
|  |  | ||||||
| .section .data | .section .data | ||||||
|  |  | ||||||
| @@ -240,78 +251,66 @@ keywords: | |||||||
| #   handles each action. | #   handles each action. | ||||||
| # | # | ||||||
| .type transitions, @object | .type transitions, @object | ||||||
| .size transitions, 17 * CLASS_COUNT # state count * CLASS_COUNT | .size transitions, 14 * CLASS_COUNT # state count * CLASS_COUNT | ||||||
| transitions: | transitions: | ||||||
| 	#     Invalid Digit   Alpha   Space   :       =       (       )      | 	#     Invalid Digit   Alpha   Space   :       =       (       )      | ||||||
| 	#     *       _       Single  Hex     0       x       NUL     . | 	#     *       _       Single  Hex     0       x       NUL     . | ||||||
| 	#     -       " | 	#     -       " or '  >       < | ||||||
| 	.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x0105, 0x0106, 0x0107 | 	.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff | ||||||
| 	.word 0x0108, 0x0102, 0x010b, 0x0102, 0x010c, 0x0102, 0x00ff, 0x010e # 00 Start | 	.word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x0108 | ||||||
| 	.word 0x010f, 0x0110 | 	.word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start | ||||||
|  |  | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x0104, 0x02ff, 0x02ff | 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 01 Colon | 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | ||||||
| 	.word 0x02ff, 0x02ff | 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x01 Colon | ||||||
|  |  | ||||||
| 	.word 0x05ff, 0x0102, 0x0102, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff | 	.word 0x05ff, 0x0102, 0x0102, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff | ||||||
| 	.word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff # 02 Identifier | 	.word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff | ||||||
| 	.word 0x05ff, 0x05ff | 	.word 0x05ff, 0x05ff, 0x05ff, 0x05ff # 0x02 Identifier | ||||||
|  |  | ||||||
| 	.word 0x02ff, 0x0103, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | 	.word 0x02ff, 0x0103, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x0103, 0x02ff, 0x02ff, 0x02ff # 03 Integer | 	.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x0103, 0x02ff, 0x02ff, 0x02ff | ||||||
| 	.word 0x02ff, 0x02ff | 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x03 Integer | ||||||
|  |  | ||||||
|  | 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x04ff, 0x02ff, 0x02ff | ||||||
|  | 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | ||||||
|  | 	.word 0x02ff, 0x02ff, 0x04ff, 0x02ff # 0x04 Greater | ||||||
|  |  | ||||||
|  | 	.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff | ||||||
|  | 	.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff | ||||||
|  | 	.word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus | ||||||
|  |  | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 04 Assign | 	.word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | ||||||
| 	.word 0x02ff, 0x02ff | 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x06 Left paren | ||||||
|  |  | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 05 Eauals |  | ||||||
| 	.word 0x02ff, 0x02ff |  | ||||||
|  |  | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | ||||||
| 	.word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 06 Left paren | 	.word 0x02ff, 0x02ff, 0x02ff, 0x04ff # 0x07 Less | ||||||
| 	.word 0x02ff, 0x02ff |  | ||||||
|  |  | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff |  | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 07 Right paren |  | ||||||
| 	.word 0x02ff, 0x02ff |  | ||||||
|  |  | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff |  | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 08 Asterisk |  | ||||||
| 	.word 0x02ff, 0x02ff |  | ||||||
|  |  | ||||||
| 	.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109 |  | ||||||
| 	.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 09 Comment |  | ||||||
| 	.word 0x0109, 0x0109 |  | ||||||
|  |  | ||||||
| 	.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff |  | ||||||
| 	.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 0a Closing comment |  | ||||||
| 	.word 0x0109, 0x0109 |  | ||||||
|  |  | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff |  | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0b Single character token |  | ||||||
| 	.word 0x02ff, 0x02ff |  | ||||||
|  |  | ||||||
| 	.word 0x02ff, 0x00ff, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff |  | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x00ff, 0x010d, 0x02ff, 0x02ff # 0c Zero |  | ||||||
| 	.word 0x02ff, 0x02ff |  | ||||||
|  |  | ||||||
| 	.word 0x02ff, 0x010d, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff |  | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x010d, 0x010d, 0x00ff, 0x2ff, 0x02ff # 0d Hexadecimal |  | ||||||
| 	.word 0x00ff, 0x02ff |  | ||||||
|  |  | ||||||
| 	.word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | 	.word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | ||||||
| 	.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff # 0e Dot | 	.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff | ||||||
| 	.word 0x02ff, 0x02ff | 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x08 Dot | ||||||
|  |  | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0f Minus | 	.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109 | ||||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | 	.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 | ||||||
| 	.word 0x02ff, 0x02ff | 	.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x09 Comment | ||||||
|  |  | ||||||
| 	.word 0x00ff, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110 # 10 Starting string. | 	.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff | ||||||
| 	.word 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110 | 	.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 | ||||||
| 	.word 0x0110, 0x04ff | 	.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x0a Closing comment | ||||||
|  |  | ||||||
|  | 	.word 0x00ff, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110 | ||||||
|  | 	.word 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110 | ||||||
|  | 	.word 0x010b, 0x04ff, 0x010b, 0x010b # 0x0b String | ||||||
|  |  | ||||||
|  | 	.word 0x02ff, 0x00ff, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | ||||||
|  | 	.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x00ff, 0x010d, 0x02ff, 0x02ff | ||||||
|  | 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x0c Zero | ||||||
|  |  | ||||||
|  | 	.word 0x02ff, 0x010d, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | ||||||
|  | 	.word 0x02ff, 0x02ff, 0x02ff, 0x010d, 0x010d, 0x00ff, 0x2ff, 0x02ff | ||||||
|  | 	.word 0x00ff, 0x02ff, 0x02ff, 0x02ff # 0x0d Hexadecimal | ||||||
|  |  | ||||||
| .section .text | .section .text | ||||||
|  |  | ||||||
| @@ -406,6 +405,57 @@ _classify_identifier: | |||||||
| 	addi sp, sp, 16 | 	addi sp, sp, 16 | ||||||
| 	ret | 	ret | ||||||
|  |  | ||||||
|  | # Takes a symbol and determines its type. | ||||||
|  | # | ||||||
|  | # Parameters: | ||||||
|  | # a0 - Token character. | ||||||
|  | # | ||||||
|  | # Sets a0 to the appropriate token type. | ||||||
|  | .type _classify_single, @function | ||||||
|  | _classify_single: | ||||||
|  | 	# Prologue. | ||||||
|  | 	addi sp, sp, -16 | ||||||
|  | 	sw ra, 12(sp) | ||||||
|  | 	sw s0, 8(sp) | ||||||
|  | 	addi s0, sp, 16 | ||||||
|  |  | ||||||
|  | 	mv a1, a0 | ||||||
|  | 	li a2, BYTE_KEYWORDS_SIZE | ||||||
|  | 	la a0, byte_keywords | ||||||
|  | 	call _memchr | ||||||
|  |  | ||||||
|  | 	la a1, byte_keywords | ||||||
|  | 	sub a0, a0, a1 | ||||||
|  | 	addi a0, a0, 27 | ||||||
|  |  | ||||||
|  | 	# Epilogue. | ||||||
|  | 	lw ra, 12(sp) | ||||||
|  | 	lw s0, 8(sp) | ||||||
|  | 	addi sp, sp, 16 | ||||||
|  | 	ret | ||||||
|  |  | ||||||
|  | # Classified a symbol containing multiple characters (probably 2). | ||||||
|  | # | ||||||
|  | # Parameters: | ||||||
|  | # a0 - Token length. | ||||||
|  | # a1 - Token pointer. | ||||||
|  | # | ||||||
|  | # Sets a0 to the appropriate token type. | ||||||
|  | .type _classify_composite, @function | ||||||
|  | _classify_composite: | ||||||
|  | 	lbu t0, 0(a1) | ||||||
|  | 	li t1, ':' | ||||||
|  | 	beq t0, t1, .Lclassify_composite_assign | ||||||
|  |  | ||||||
|  | 	j .Lclassify_composite_end | ||||||
|  |  | ||||||
|  | .Lclassify_composite_assign: | ||||||
|  | 	li a0, TOKEN_ASSIGN | ||||||
|  | 	j .Lclassify_composite_end | ||||||
|  |  | ||||||
|  | .Lclassify_composite_end: | ||||||
|  | 	ret | ||||||
|  |  | ||||||
| # Initializes the classification table. | # Initializes the classification table. | ||||||
| # | # | ||||||
| # Paramaters: | # Paramaters: | ||||||
| @@ -453,12 +503,18 @@ _tokenize_next: | |||||||
| 	li t0, 0x03 # Skip action. | 	li t0, 0x03 # Skip action. | ||||||
| 	beq t1, t0, .Ltokenize_next_skip | 	beq t1, t0, .Ltokenize_next_skip | ||||||
|  |  | ||||||
| 	li t0, 0x04 # Comment action. | 	li t0, 0x04 # Delimited string action. | ||||||
| 	beq t1, t0, .Ltokenize_next_comment | 	beq t1, t0, .Ltokenize_next_comment | ||||||
|  |  | ||||||
| 	li t0, 0x05 # Finalize identifier. | 	li t0, 0x05 # Finalize identifier. | ||||||
| 	beq t1, t0, .Ltokenize_next_identifier | 	beq t1, t0, .Ltokenize_next_identifier | ||||||
|  |  | ||||||
|  | 	li t0, 0x06 # Single character symbol action. | ||||||
|  | 	beq t1, t0, .Ltokenize_next_single | ||||||
|  |  | ||||||
|  | 	li t0, 0x07 # An action for symbols containing multiple characters. | ||||||
|  | 	beq t1, t0, .Ltokenize_next_composite | ||||||
|  |  | ||||||
| 	j .Ltokenize_next_reject | 	j .Ltokenize_next_reject | ||||||
|  |  | ||||||
| .Ltokenize_next_reject: | .Ltokenize_next_reject: | ||||||
| @@ -481,24 +537,17 @@ _tokenize_next: | |||||||
|  |  | ||||||
| .Ltokenize_next_print: | .Ltokenize_next_print: | ||||||
| 	/* DEBUG | 	/* DEBUG | ||||||
| 	lw a0, 4(sp) | 	addi a0, a0, 21 | ||||||
| 	mv a1, s1 | 	sw a0, 0(sp) | ||||||
| 	sub a1, a1, a0 | 	addi a0, sp, 0 | ||||||
| 	call _write_error | 	li a1, 1 | ||||||
| 	DEBUG */ | 	call _write_error */ | ||||||
|  |  | ||||||
| 	j .Ltokenize_next_end | 	j .Ltokenize_next_end | ||||||
|  |  | ||||||
| .Ltokenize_next_comment: | .Ltokenize_next_comment: | ||||||
| 	addi s1, s1, 1 | 	addi s1, s1, 1 | ||||||
|  |  | ||||||
| 	/* DEBUG |  | ||||||
| 	lw a0, 4(sp) |  | ||||||
| 	mv a1, s1 |  | ||||||
| 	sub a1, a1, a0 |  | ||||||
| 	call _write_error |  | ||||||
| 	DEBUG */ |  | ||||||
|  |  | ||||||
| 	j .Ltokenize_next_end | 	j .Ltokenize_next_end | ||||||
|  |  | ||||||
| .Ltokenize_next_identifier: | .Ltokenize_next_identifier: | ||||||
| @@ -512,6 +561,26 @@ _tokenize_next: | |||||||
|  |  | ||||||
| 	j .Ltokenize_next_end | 	j .Ltokenize_next_end | ||||||
|  |  | ||||||
|  | .Ltokenize_next_single: | ||||||
|  | 	lw a0, 4(sp) | ||||||
|  | 	addi s1, a0, 1 | ||||||
|  | 	lbu a0, (a0) | ||||||
|  | 	call _classify_single | ||||||
|  | 	lw a1, 0(sp) | ||||||
|  | 	sw a0, (a1) | ||||||
|  |  | ||||||
|  | 	j .Ltokenize_next_end | ||||||
|  |  | ||||||
|  | .Ltokenize_next_composite: | ||||||
|  | 	addi s1, s1, 1 | ||||||
|  | 	lw a1, 4(sp) | ||||||
|  | 	sub a0, s1, a1 | ||||||
|  | 	call _classify_composite | ||||||
|  | 	lw a1, 0(sp) | ||||||
|  | 	sw a0, (a1) | ||||||
|  |  | ||||||
|  | 	j .Ltokenize_next_end | ||||||
|  |  | ||||||
| .Ltokenize_next_end: | .Ltokenize_next_end: | ||||||
| 	mv a0, s1 # Return the advanced text pointer. | 	mv a0, s1 # Return the advanced text pointer. | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user