Start over
This commit is contained in:
		
							
								
								
									
										91
									
								
								Rakefile
									
									
									
									
									
								
							
							
						
						
									
										91
									
								
								Rakefile
									
									
									
									
									
								
							| @@ -5,34 +5,99 @@ | ||||
|  | ||||
| require 'open3' | ||||
| require 'rake/clean' | ||||
| require 'term/ansicolor' | ||||
|  | ||||
| CLEAN.include 'build/boot' | ||||
| CROSS_GCC = '../eugenios/build/rootfs/bin/riscv32-unknown-linux-gnu-gcc' | ||||
| SYSROOT = '../eugenios/build/sysroot' | ||||
| QEMU = 'qemu-riscv32' | ||||
| STAGES = Dir.glob('boot/stage*.elna').collect { |stage| File.basename stage, '.elna' }.sort | ||||
|  | ||||
| CLEAN.include 'build/boot', 'build/valid' | ||||
|  | ||||
| directory 'build/boot' | ||||
| directory 'build/valid' | ||||
|  | ||||
| task default: :boot | ||||
|  | ||||
| desc 'Final stage' | ||||
| task default: ['build/boot/stage2b', 'build/boot/stage2b.s', 'boot/stage2.elna'] do |t| | ||||
|   exe, previous_output, source = t.prerequisites | ||||
| task boot: "build/valid/#{STAGES.last}" | ||||
| task boot: "build/valid/#{STAGES.last}.s" | ||||
| task boot: "boot/#{STAGES.last}.elna" do |t| | ||||
|   groupped = t.prerequisites.group_by { |stage| File.extname stage }.transform_values(&:first) | ||||
|   exe = groupped[''] | ||||
|   expected = groupped['.s'] | ||||
|   source = groupped['.elna'] | ||||
|  | ||||
|   cat_arguments = ['cat', source] | ||||
|   compiler_arguments = [QEMU, '-L', SYSROOT, exe] | ||||
|   diff_arguments = ['diff', '-Nur', '--text', previous_output, '-'] | ||||
|   diff_arguments = ['diff', '-Nur', '--text', expected, '-'] | ||||
|   Open3.pipeline(cat_arguments, compiler_arguments, diff_arguments) | ||||
| end | ||||
|  | ||||
| file 'build/boot/test.s' => ['build/boot/stage1', 'boot/test.elna'] do |t| | ||||
|   source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' } | ||||
| desc 'Convert previous stage language into the current stage language' | ||||
| task :convert do | ||||
|   File.open('boot/stage4.elna', 'w') do |current_stage| | ||||
|     li_value = nil | ||||
|  | ||||
|   File.open t.name, 'w' do |output| | ||||
|     assemble_stage output, exe, source | ||||
|     File.readlines('boot/stage3.elna').each do |line| | ||||
|       current_stage << line | ||||
|     end | ||||
|   end | ||||
| end | ||||
|  | ||||
| file 'build/boot/test' => ['build/boot/test.s', 'boot/common-boot.s'] do |t| | ||||
|   sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites | ||||
| STAGES.each do |stage| | ||||
|   previous = stage.delete_prefix('stage').to_i.pred | ||||
|  | ||||
|   file "build/valid/#{stage}" => "build/valid/#{stage}.s" do |t| | ||||
|     sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites | ||||
|   end | ||||
|  | ||||
|   file "build/valid/#{stage}.s" => ["build/boot/#{stage}", "boot/#{stage}.elna"] do |t| | ||||
|     exe, source = t.prerequisites | ||||
|  | ||||
|     cat_arguments = ['cat', source] | ||||
|     compiler_arguments = [QEMU, '-L', SYSROOT, exe] | ||||
|     last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments) | ||||
|  | ||||
|     IO.copy_stream last_stdout, t.name | ||||
|   end | ||||
|  | ||||
|   file "build/boot/#{stage}" => "build/boot/#{stage}.s" do |t| | ||||
|     sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites | ||||
|   end | ||||
|  | ||||
|   file "build/boot/#{stage}.s" => ["build/valid/stage#{previous}", "boot/#{stage}.elna"] do |t| | ||||
|     exe, source = t.prerequisites | ||||
|  | ||||
|     cat_arguments = ['cat', source] | ||||
|     compiler_arguments = [QEMU, '-L', SYSROOT, exe] | ||||
|     last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments) | ||||
|  | ||||
|     IO.copy_stream last_stdout, t.name | ||||
|   end | ||||
| end | ||||
|  | ||||
| task test: 'build/boot/test' do |t| | ||||
|   sh QEMU, '-L', SYSROOT, t.prerequisites.first | ||||
| # | ||||
| # Stage 1. | ||||
| # | ||||
|  | ||||
| file 'build/valid/stage1' => ['build/valid', 'build/valid/stage1.s'] do |t| | ||||
|   source = t.prerequisites.select { |prerequisite| prerequisite.end_with? '.s' } | ||||
|  | ||||
|   sh CROSS_GCC, '-nostdlib', '-o', t.name, *source | ||||
| end | ||||
|  | ||||
| file 'build/valid/stage1.s' => ['build/boot/stage1', 'boot/stage1.s', 'build/valid'] do |t| | ||||
|   source, exe, = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.s' } | ||||
|  | ||||
|   cat_arguments = ['cat', *source] | ||||
|   compiler_arguments = [QEMU, '-L', SYSROOT, *exe] | ||||
|   last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, compiler_arguments) | ||||
|  | ||||
|   IO.copy_stream last_stdout, t.name | ||||
| end | ||||
|  | ||||
| file 'build/boot/stage1' => ['build/boot', 'boot/stage1.s'] do |t| | ||||
|   source = t.prerequisites.select { |prerequisite| prerequisite.end_with? '.s' } | ||||
|  | ||||
|   sh CROSS_GCC, '-nostdlib', '-o', t.name, *source | ||||
| end | ||||
|   | ||||
| @@ -2,17 +2,15 @@ | ||||
| # v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||||
| # obtain one at https://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| .global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower | ||||
| .global _write_s, _read_file, _write_error, _write_c, _write_i, _print_i | ||||
| .global _memcmp, _memchr, _memmem, _memcpy, _mmap | ||||
| .global _read_file, _write_error | ||||
| .global _memcmp, _memchr, _memmem, _mmap | ||||
| .global _current, _get, _advance, _label_counter | ||||
| .global _divide_by_zero_error, _exit, _strings_index, _string_equal | ||||
| .global _divide_by_zero_error, _strings_index, _string_equal | ||||
|  | ||||
| .section .rodata | ||||
|  | ||||
| .equ SYS_READ, 63 | ||||
| .equ SYS_WRITE, 64 | ||||
| .equ SYS_EXIT, 93 | ||||
| .equ SYS_MMAP2, 222 | ||||
| .equ STDIN, 0 | ||||
| .equ STDOUT, 1 | ||||
| @@ -77,128 +75,6 @@ _memcmp: | ||||
| .Lmemcmp_end: | ||||
| 	ret | ||||
|  | ||||
| # Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. | ||||
| .type _is_upper, @function | ||||
| _is_upper: | ||||
| 	li t0, 'A' - 1 | ||||
| 	sltu t1, t0, a0 # t1 = a0 >= 'A' | ||||
|  | ||||
| 	sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' | ||||
| 	and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' | ||||
|  | ||||
| 	ret | ||||
|  | ||||
| # Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. | ||||
| .type _is_lower, @function | ||||
| _is_lower: | ||||
| 	li t0, 'a' - 1 | ||||
| 	sltu t2, t0, a0 # t2 = a0 >= 'a' | ||||
|  | ||||
| 	sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' | ||||
| 	and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' | ||||
|  | ||||
| 	ret | ||||
|  | ||||
| # Detects if the passed character is a 7-bit alpha character or an underscore. | ||||
| # The character is passed in a0. | ||||
| # Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. | ||||
| .type _is_alpha, @function | ||||
| _is_alpha: | ||||
| 	# Prologue. | ||||
| 	addi sp, sp, -16 | ||||
| 	sw ra, 12(sp) | ||||
| 	sw s0, 8(sp) | ||||
| 	addi s0, sp, 16 | ||||
|  | ||||
| 	sw a0, 4(sp) | ||||
|  | ||||
| 	call _is_upper | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	lw a0, 4(sp) | ||||
| 	call _is_lower | ||||
|  | ||||
| 	lw t0, 4(sp) | ||||
| 	xori t1, t0, '_' | ||||
| 	seqz t1, t1 | ||||
|  | ||||
| 	lw t0, 0(sp) | ||||
| 	or a0, a0, t0 | ||||
| 	or a0, a0, t1 | ||||
|  | ||||
| 	# Epilogue. | ||||
| 	lw ra, 12(sp) | ||||
| 	lw s0, 8(sp) | ||||
| 	addi sp, sp, 16 | ||||
| 	ret | ||||
|  | ||||
| # Detects whether the passed character is a digit | ||||
| # (a value between 0 and 9). | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Exemined value. | ||||
| # | ||||
| # Sets a0 to 1 if it is a digit, to 0 otherwise. | ||||
| .type _is_digit, @function | ||||
| _is_digit: | ||||
| 	li t0, '0' - 1 | ||||
| 	sltu t1, t0, a0 # t1 = a0 >= '0' | ||||
|  | ||||
| 	sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' | ||||
|  | ||||
| 	and a0, t1, t2 | ||||
|  | ||||
| 	ret | ||||
|  | ||||
| .type _is_alnum, @function | ||||
| _is_alnum: | ||||
| 	# Prologue. | ||||
| 	addi sp, sp, -16 | ||||
| 	sw ra, 12(sp) | ||||
| 	sw s0, 8(sp) | ||||
| 	addi s0, sp, 16 | ||||
|  | ||||
| 	sw a0, 4(sp) | ||||
|  | ||||
| 	call _is_alpha | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	lw a0, 4(sp) | ||||
| 	call _is_digit | ||||
|  | ||||
| 	lw a1, 0(sp) | ||||
| 	or a0, a0, a1 | ||||
|  | ||||
| 	# Epilogue. | ||||
| 	lw ra, 12(sp) | ||||
| 	lw s0, 8(sp) | ||||
| 	addi sp, sp, 16 | ||||
| 	ret | ||||
|  | ||||
| # Writes a string to the standard output. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Length of the string. | ||||
| # a1 - String pointer. | ||||
| .type _write_s, @function | ||||
| _write_s: | ||||
| 	# Prologue. | ||||
| 	addi sp, sp, -8 | ||||
| 	sw ra, 4(sp) | ||||
| 	sw s0, 0(sp) | ||||
| 	addi s0, sp, 8 | ||||
|  | ||||
| 	mv a2, a0 | ||||
| 	li a0, STDOUT | ||||
| 	li a7, SYS_WRITE | ||||
| 	ecall | ||||
|  | ||||
| 	# Epilogue. | ||||
| 	lw ra, 4(sp) | ||||
| 	lw s0, 0(sp) | ||||
| 	addi sp, sp, 8 | ||||
| 	ret | ||||
|  | ||||
| # Reads standard input into a buffer. | ||||
| # a0 - Buffer pointer. | ||||
| # a1 - Buffer size. | ||||
| @@ -228,16 +104,6 @@ _read_file: | ||||
| 	addi sp, sp, 8 | ||||
| 	ret | ||||
|  | ||||
| # Terminates the program. a0 contains the return code. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Status code. | ||||
| .type _exit, @function | ||||
| _exit: | ||||
| 	li a7, SYS_EXIT | ||||
| 	ecall | ||||
| 	# ret | ||||
|  | ||||
| .type _divide_by_zero_error, @function | ||||
| _divide_by_zero_error: | ||||
|     addi a7, zero, 172 # getpid | ||||
| @@ -248,106 +114,6 @@ _divide_by_zero_error: | ||||
|     ecall | ||||
|     ret | ||||
|  | ||||
| # Writes a number to a string buffer. | ||||
| # | ||||
| # t0 - Local buffer. | ||||
| # t1 - Constant 10. | ||||
| # t2 - Current character. | ||||
| # t3 - Whether the number is negative. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Whole number. | ||||
| # a1 - Buffer pointer. | ||||
| # | ||||
| # Sets a0 to the length of the written number. | ||||
| .type _print_i, @function | ||||
| _print_i: | ||||
|     addi sp, sp, -32 | ||||
|     sw ra, 28(sp) | ||||
|     sw s0, 24(sp) | ||||
|     addi s0, sp, 32 | ||||
|  | ||||
|     li t1, 10 | ||||
|     addi t0, s0, -9 | ||||
|  | ||||
|     li t3, 0 | ||||
|     bgez a0, .Lprint_i_digit10 | ||||
|     li t3, 1 | ||||
|     neg a0, a0 | ||||
|  | ||||
| .Lprint_i_digit10: | ||||
|     rem t2, a0, t1 | ||||
|     addi t2, t2, '0' | ||||
|     sb t2, 0(t0) | ||||
|     div a0, a0, t1 | ||||
|     addi t0, t0, -1 | ||||
|     bne zero, a0, .Lprint_i_digit10 | ||||
|  | ||||
|     beq zero, t3, .Lprint_i_write_call | ||||
|     addi t2, zero, '-' | ||||
|     sb t2, 0(t0) | ||||
|     addi t0, t0, -1 | ||||
|  | ||||
| .Lprint_i_write_call: | ||||
| 	mv a0, a1 | ||||
| 	addi a1, t0, 1 | ||||
|     sub a2, s0, t0 | ||||
| 	addi a2, a2, -9 | ||||
| 	sw a2, 0(sp) | ||||
|  | ||||
| 	call _memcpy | ||||
|  | ||||
| 	lw a0, 0(sp) | ||||
|  | ||||
|     lw ra, 28(sp) | ||||
|     lw s0, 24(sp) | ||||
|     addi sp, sp, 32 | ||||
|     ret | ||||
|  | ||||
| # Writes a number to the standard output. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Whole number. | ||||
| .type _write_i, @function | ||||
| _write_i: | ||||
|     addi sp, sp, -32 | ||||
|     sw ra, 28(sp) | ||||
|     sw s0, 24(sp) | ||||
|     addi s0, sp, 32 | ||||
|  | ||||
| 	addi a1, sp, 0 | ||||
| 	call _print_i | ||||
|  | ||||
| 	addi a1, sp, 0 | ||||
| 	call _write_s | ||||
|  | ||||
|     lw ra, 28(sp) | ||||
|     lw s0, 24(sp) | ||||
|     addi sp, sp, 32 | ||||
|     ret | ||||
|  | ||||
| # Writes a character from a0 into the standard output. | ||||
| .type _write_c, @function | ||||
| _write_c: | ||||
| 	# Prologue | ||||
| 	addi sp, sp, -16 | ||||
| 	sw ra, 12(sp) | ||||
| 	sw s0, 8(sp) | ||||
| 	addi s0, sp, 16 | ||||
|  | ||||
| 	sb a0, 4(sp) | ||||
| 	li a0, STDOUT | ||||
| 	addi a1, sp, 4 | ||||
| 	li a2, 1 | ||||
| 	li a7, SYS_WRITE | ||||
| 	ecall | ||||
|  | ||||
| 	# Epilogue. | ||||
| 	lw ra, 12(sp) | ||||
| 	lw s0, 8(sp) | ||||
| 	add sp, sp, 16 | ||||
| 	ret | ||||
|  | ||||
| # a0 - Pointer to an array to get the first element. | ||||
| # | ||||
| # Dereferences a pointer and returns what is on the address in a0. | ||||
| @@ -448,34 +214,6 @@ _memmem: | ||||
| 	add sp, sp, 24 | ||||
| 	ret | ||||
|  | ||||
| # Copies memory. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Destination. | ||||
| # a1 - Source. | ||||
| # a2 - Size. | ||||
| # | ||||
| # Preserves a0. | ||||
| .type _memcpy, @function | ||||
| _memcpy: | ||||
| 	mv t0, a0 | ||||
|  | ||||
| .Lmemcpy_loop: | ||||
| 	beqz a2, .Lmemcpy_end | ||||
|  | ||||
| 	lbu t1, (a1) | ||||
| 	sb t1, (a0) | ||||
|  | ||||
| 	addi a0, a0, 1 | ||||
| 	addi a1, a1, 1 | ||||
| 	addi a2, a2, -1 | ||||
|  | ||||
| 	j .Lmemcpy_loop | ||||
|  | ||||
| .Lmemcpy_end: | ||||
| 	mv a0, t0 | ||||
| 	ret | ||||
|  | ||||
| # Searches for a string in a string array. | ||||
| # | ||||
| # Parameters: | ||||
|   | ||||
| @@ -1,68 +0,0 @@ | ||||
| # This Source Code Form is subject to the terms of the Mozilla Public License, | ||||
| # v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||||
| # obtain one at https://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| # | ||||
| # Tokens. | ||||
| # | ||||
|  | ||||
| # The constant should match the index in the keywords array in tokenizer.s. | ||||
|  | ||||
| .equ TOKEN_PROGRAM, 1 | ||||
| .equ TOKEN_IMPORT, 2 | ||||
| .equ TOKEN_CONST, 3 | ||||
| .equ TOKEN_VAR, 4 | ||||
| .equ TOKEN_IF, 5 | ||||
| .equ TOKEN_THEN, 6 | ||||
| .equ TOKEN_ELSIF, 7 | ||||
| .equ TOKEN_ELSE, 8 | ||||
| .equ TOKEN_WHILE, 9 | ||||
| .equ TOKEN_DO, 10 | ||||
| .equ TOKEN_PROC, 11 | ||||
| .equ TOKEN_BEGIN, 12 | ||||
| .equ TOKEN_END, 13 | ||||
| .equ TOKEN_TYPE, 14 | ||||
| .equ TOKEN_RECORD, 15 | ||||
| .equ TOKEN_UNION, 16 | ||||
| .equ TOKEN_TRUE, 17 | ||||
| .equ TOKEN_FALSE, 18 | ||||
| .equ TOKEN_NIL, 19 | ||||
| .equ TOKEN_XOR, 20 | ||||
| .equ TOKEN_OR, 21 | ||||
| .equ TOKEN_RETURN, 22 | ||||
| .equ TOKEN_CAST, 23 | ||||
| .equ TOKEN_GOTO, 24 | ||||
| .equ TOKEN_CASE, 25 | ||||
| .equ TOKEN_OF, 26 | ||||
|  | ||||
| .equ TOKEN_IDENTIFIER, 27 | ||||
| # The constant should match the character index in the byte_keywords string. | ||||
|  | ||||
| .equ TOKEN_AND, TOKEN_IDENTIFIER + 1 | ||||
| .equ TOKEN_DOT, TOKEN_IDENTIFIER + 2 | ||||
| .equ TOKEN_COMMA, TOKEN_IDENTIFIER + 3 | ||||
| .equ TOKEN_COLON, TOKEN_IDENTIFIER + 4 | ||||
| .equ TOKEN_SEMICOLON, TOKEN_IDENTIFIER + 5 | ||||
| .equ TOKEN_LEFT_PAREN, TOKEN_IDENTIFIER + 6 | ||||
| .equ TOKEN_RIGHT_PAREN, TOKEN_IDENTIFIER + 7 | ||||
| .equ TOKEN_LEFT_BRACKET, TOKEN_IDENTIFIER + 8 | ||||
| .equ TOKEN_RIGHT_BRACKET, TOKEN_IDENTIFIER + 9 | ||||
| .equ TOKEN_HAT, TOKEN_IDENTIFIER + 10 | ||||
| .equ TOKEN_EQUALS, TOKEN_IDENTIFIER + 11 | ||||
| .equ TOKEN_PLUS, TOKEN_IDENTIFIER + 12 | ||||
| .equ TOKEN_MINUS, TOKEN_IDENTIFIER + 13 | ||||
| .equ TOKEN_ASTERISK, TOKEN_IDENTIFIER + 14 | ||||
| .equ TOKEN_AT, TOKEN_IDENTIFIER + 15 | ||||
|  | ||||
| .equ TOKEN_ASSIGN, 43 | ||||
| .equ TOKEN_INTEGER, 44 | ||||
|  | ||||
| # | ||||
| # Symbols. | ||||
| # | ||||
| .equ TYPE_PRIMITIVE, 0x01 | ||||
| .equ TYPE_POINTER, 0x02 | ||||
| .equ TYPE_PROCEDURE, 0x03 | ||||
| .equ INFO_PARAMETER, 0x10 | ||||
| .equ INFO_LOCAL, 0x20 | ||||
| .equ INFO_PROCEDURE, 0x30 | ||||
							
								
								
									
										2284
									
								
								boot/stage1.s
									
									
									
									
									
								
							
							
						
						
									
										2284
									
								
								boot/stage1.s
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										2008
									
								
								boot/stage2.elna
									
									
									
									
									
								
							
							
						
						
									
										2008
									
								
								boot/stage2.elna
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										975
									
								
								boot/stage3.elna
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										975
									
								
								boot/stage3.elna
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,975 @@ | ||||
| # This Source Code Form is subject to the terms of the Mozilla Public License, | ||||
| # v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||||
| # obtain one at https://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| # Stage3 compiler. | ||||
| # | ||||
| # - Procedures without none or one argument. | ||||
| # - Goto statements. | ||||
| # - Character and integer literals. | ||||
| # - Passing local variables to procedures. | ||||
| # - Local variables should have the format: v00, | ||||
| #   where 00 is its offset from the sp register. | ||||
|  | ||||
| .section .rodata | ||||
|  | ||||
| .type keyword_section, @object | ||||
| keyword_section: .ascii ".section" | ||||
|  | ||||
| .type keyword_type, @object | ||||
| keyword_type: .ascii ".type" | ||||
|  | ||||
| .type keyword_ret, @object | ||||
| keyword_ret: .ascii "ret" | ||||
|  | ||||
| .type keyword_global, @object | ||||
| keyword_global: .ascii ".globl" | ||||
|  | ||||
| .type keyword_proc, @object | ||||
| keyword_proc: .ascii "proc " | ||||
|  | ||||
| .type keyword_end, @object | ||||
| keyword_end: .ascii "end" | ||||
|  | ||||
| .type keyword_begin, @object | ||||
| keyword_begin: .ascii "begin" | ||||
|  | ||||
| .type keyword_var, @object | ||||
| keyword_var: .ascii "var" | ||||
|  | ||||
| .type asm_prologue, @object | ||||
| asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" | ||||
|  | ||||
| .type asm_epilogue, @object | ||||
| asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" | ||||
|  | ||||
| .type asm_type_directive, @object | ||||
| asm_type_directive: .string ".type " | ||||
|  | ||||
| .type asm_type_function, @object | ||||
| asm_type_function: .string ", @function\n" | ||||
|  | ||||
| .type asm_colon, @object | ||||
| asm_colon: .string ":\n" | ||||
|  | ||||
| .type asm_call, @object | ||||
| asm_call: .string "\tcall " | ||||
|  | ||||
| .type asm_j, @object | ||||
| asm_j: .string "\tj " | ||||
|  | ||||
| .type asm_li, @object | ||||
| asm_li: .string "\tli " | ||||
|  | ||||
| .type asm_lw, @object | ||||
| asm_lw: .string "\tlw " | ||||
|  | ||||
| .type asm_sw, @object | ||||
| asm_sw: .string "\tsw " | ||||
|  | ||||
| .type asm_mv, @object | ||||
| asm_mv: .string "mv " | ||||
|  | ||||
| .type asm_t0, @object | ||||
| asm_t0: .string "t0" | ||||
|  | ||||
| .type asm_a0, @object | ||||
| asm_a0: .string "a0" | ||||
|  | ||||
| .type asm_comma, @object | ||||
| asm_comma: .string ", " | ||||
|  | ||||
| .type asm_sp, @object | ||||
| asm_sp: .string "(sp)" | ||||
|  | ||||
| .section .bss | ||||
|  | ||||
| # When modifiying also change the read size in the entry point procedure. | ||||
| .type source_code, @object | ||||
| source_code: .zero 81920 | ||||
|  | ||||
| .section .data | ||||
|  | ||||
| .type source_code_position, @object | ||||
| source_code_position: .word source_code | ||||
|  | ||||
| .section .text | ||||
|  | ||||
| # Reads standard input into a buffer. | ||||
| # a0 - Buffer pointer. | ||||
| # a1 - Buffer size. | ||||
| # | ||||
| # Returns the amount of bytes written in a0. | ||||
| proc _read_file(); | ||||
| begin | ||||
| 	mv a2, a1 | ||||
| 	mv a1, a0 | ||||
| 	# STDIN. | ||||
| 	li a0, 0 | ||||
| 	li a7, 63 # SYS_READ. | ||||
| 	ecall | ||||
| end; | ||||
|  | ||||
| # Writes to the standard output. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Buffer. | ||||
| # a1 - Buffer length. | ||||
| proc _write_s(); | ||||
| begin | ||||
| 	mv a2, a1 | ||||
| 	mv a1, a0 | ||||
| 	# STDOUT. | ||||
| 	li a0, 1 | ||||
| 	li a7, 64 # SYS_WRITE. | ||||
| 	ecall | ||||
| end; | ||||
|  | ||||
| # Writes a number to a string buffer. | ||||
| # | ||||
| # t0 - Local buffer. | ||||
| # t1 - Constant 10. | ||||
| # t2 - Current character. | ||||
| # t3 - Whether the number is negative. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Whole number. | ||||
| # a1 - Buffer pointer. | ||||
| # | ||||
| # Sets a0 to the length of the written number. | ||||
| proc _print_i(); | ||||
| begin | ||||
| 	li t1, 10 | ||||
| 	addi t0, s0, -9 | ||||
|  | ||||
| 	li t3, 0 | ||||
| 	bgez a0, .print_i_digit10 | ||||
| 	li t3, 1 | ||||
| 	neg a0, a0 | ||||
|  | ||||
| .print_i_digit10: | ||||
| 	rem t2, a0, t1 | ||||
| 	addi t2, t2, '0' | ||||
| 	sb t2, 0(t0) | ||||
| 	div a0, a0, t1 | ||||
| 	addi t0, t0, -1 | ||||
| 	bne zero, a0, .print_i_digit10 | ||||
|  | ||||
| 	beq zero, t3, .print_i_write_call | ||||
| 	addi t2, zero, '-' | ||||
| 	sb t2, 0(t0) | ||||
| 	addi t0, t0, -1 | ||||
|  | ||||
| .print_i_write_call: | ||||
| 	mv a0, a1 | ||||
| 	addi a1, t0, 1 | ||||
| 	sub a2, s0, t0 | ||||
| 	addi a2, a2, -9 | ||||
| 	sw a2, 0(sp) | ||||
|  | ||||
| 	_memcpy(); | ||||
|  | ||||
| 	lw a0, 0(sp) | ||||
| end; | ||||
|  | ||||
| # Writes a number to the standard output. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Whole number. | ||||
| proc _write_i(); | ||||
| begin | ||||
| 	addi a1, sp, 0 | ||||
| 	_print_i(); | ||||
|  | ||||
| 	mv a1, a0 | ||||
| 	addi a0, sp, 0 | ||||
| 	_write_s(); | ||||
|  | ||||
| end; | ||||
|  | ||||
| # Writes a character from a0 into the standard output. | ||||
| proc _write_c(); | ||||
| begin | ||||
| 	sb a0, 0(sp) | ||||
| 	addi a0, sp, 0 | ||||
| 	li a1, 1 | ||||
| 	_write_s(); | ||||
| end; | ||||
|  | ||||
| # Write null terminated string. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - String. | ||||
| proc _write_z(); | ||||
| begin | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| .write_z_loop: | ||||
| 	# Check for 0 character. | ||||
| 	lb a0, (a0) | ||||
| 	beqz a0, .write_z_end | ||||
|  | ||||
| 	# Print a character. | ||||
| 	lw a0, 0(sp) | ||||
| 	lb a0, (a0) | ||||
| 	_write_c(); | ||||
|  | ||||
| 	# Advance the input string by one byte. | ||||
| 	lw a0, 0(sp) | ||||
| 	addi a0, a0, 1 | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	goto .write_z_loop; | ||||
|  | ||||
| .write_z_end: | ||||
| end; | ||||
|  | ||||
| # Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. | ||||
| proc _is_upper(); | ||||
| begin | ||||
| 	li t0, 'A' - 1 | ||||
| 	sltu t1, t0, a0 # t1 = a0 >= 'A' | ||||
|  | ||||
| 	sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' | ||||
| 	and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' | ||||
| end; | ||||
|  | ||||
| # Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. | ||||
| proc _is_lower(); | ||||
| begin | ||||
| 	li t0, 'a' - 1 | ||||
| 	sltu t2, t0, a0 # t2 = a0 >= 'a' | ||||
|  | ||||
| 	sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' | ||||
| 	and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' | ||||
| end; | ||||
|  | ||||
| # Detects if the passed character is a 7-bit alpha character or an underscore. | ||||
| # | ||||
| # Paramters: | ||||
| # a0 - Tested character. | ||||
| # | ||||
| # Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. | ||||
| proc _is_alpha(); | ||||
| begin | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	_is_upper(); | ||||
| 	sw a0, 4(sp) | ||||
|  | ||||
| 	_is_lower(v00); | ||||
|  | ||||
| 	lw t0, 0(sp) | ||||
| 	xori t1, t0, '_' | ||||
| 	seqz t1, t1 | ||||
|  | ||||
| 	lw t0, 4(sp) | ||||
| 	or a0, a0, t0 | ||||
| 	or a0, a0, t1 | ||||
| end; | ||||
|  | ||||
| # Detects whether the passed character is a digit | ||||
| # (a value between 0 and 9). | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Exemined value. | ||||
| # | ||||
| # Sets a0 to 1 if it is a digit, to 0 otherwise. | ||||
| proc _is_digit(); | ||||
| begin | ||||
| 	li t0, '0' - 1 | ||||
| 	sltu t1, t0, a0 # t1 = a0 >= '0' | ||||
|  | ||||
| 	sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' | ||||
|  | ||||
| 	and a0, t1, t2 | ||||
| end; | ||||
|  | ||||
| proc _is_alnum(); | ||||
| begin | ||||
| 	sw a0, 4(sp) | ||||
|  | ||||
| 	_is_alpha(); | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	_is_digit(v04); | ||||
|  | ||||
| 	lw a1, 0(sp) | ||||
| 	or a0, a0, a1 | ||||
| end; | ||||
|  | ||||
| # Reads the next token. | ||||
| # | ||||
| # Returns token length in a0. | ||||
| proc _read_token(); | ||||
| begin | ||||
| 	la t0, source_code_position # Token pointer. | ||||
| 	lw t0, (t0) | ||||
| 	sw t0, 0(sp) # Current token position. | ||||
| 	sw zero, 4(sp) # Token length. | ||||
|  | ||||
| .read_token_loop: | ||||
| 	lb t0, (t0) # Current character. | ||||
|  | ||||
| 	# First we try to read a derictive. | ||||
| 	# A derictive can contain a dot and characters. | ||||
| 	li t1, '.' | ||||
| 	beq t0, t1, .read_token_next | ||||
|  | ||||
| 	lw a0, 0(sp) | ||||
| 	lb a0, (a0) | ||||
| 	_is_alnum(); | ||||
| 	bnez a0, .read_token_next | ||||
|  | ||||
| 	goto .read_token_end; | ||||
|  | ||||
| .read_token_next: | ||||
| 	# Advance the source code position and token length. | ||||
| 	lw t0, 4(sp) | ||||
| 	addi t0, t0, 1 | ||||
| 	sw t0, 4(sp) | ||||
|  | ||||
| 	lw t0, 0(sp) | ||||
| 	addi t0, t0, 1 | ||||
| 	sw t0, 0(sp) | ||||
|  | ||||
| 	goto .read_token_loop; | ||||
|  | ||||
| .read_token_end: | ||||
| 	lw a0, 4(sp) | ||||
| end; | ||||
|  | ||||
| # a0 - First pointer. | ||||
| # a1 - Second pointer. | ||||
| # a2 - The length to compare. | ||||
| # | ||||
| # Returns 0 in a0 if memory regions are equal. | ||||
| proc _memcmp(); | ||||
| begin | ||||
| 	mv t0, a0 | ||||
| 	li a0, 0 | ||||
|  | ||||
| .memcmp_loop: | ||||
| 	beqz a2, .memcmp_end | ||||
|  | ||||
| 	lbu t1, (t0) | ||||
| 	lbu t2, (a1) | ||||
| 	sub a0, t1, t2 | ||||
|  | ||||
| 	bnez a0, .memcmp_end | ||||
|  | ||||
| 	addi t0, t0, 1 | ||||
| 	addi a1, a1, 1 | ||||
| 	addi a2, a2, -1 | ||||
|  | ||||
| 	goto .memcmp_loop; | ||||
|  | ||||
| .memcmp_end: | ||||
| end; | ||||
|  | ||||
| # Copies memory. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Destination. | ||||
| # a1 - Source. | ||||
| # a2 - Size. | ||||
| # | ||||
| # Preserves a0. | ||||
| proc _memcpy(); | ||||
| begin | ||||
| 	mv t0, a0 | ||||
|  | ||||
| .memcpy_loop: | ||||
| 	beqz a2, .memcpy_end | ||||
|  | ||||
| 	lbu t1, (a1) | ||||
| 	sb t1, (a0) | ||||
|  | ||||
| 	addi a0, a0, 1 | ||||
| 	addi a1, a1, 1 | ||||
| 	addi a2, a2, -1 | ||||
|  | ||||
| 	goto .memcpy_loop | ||||
|  | ||||
| .memcpy_end: | ||||
| 	mv a0, t0 | ||||
| end; | ||||
|  | ||||
| # Advances the token stream by a0 bytes. | ||||
| proc _advance_token(); | ||||
| begin | ||||
| 	la t0, source_code_position | ||||
| 	lw t1, (t0) | ||||
| 	add t1, t1, a0 | ||||
| 	sw t1, (t0) | ||||
| end; | ||||
|  | ||||
| # Prints the current token. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Token length. | ||||
| # | ||||
| # Returns a0 unchanged. | ||||
| proc _write_token(); | ||||
| begin | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	lw a1, 0(sp) | ||||
| 	_write_s(); | ||||
|  | ||||
| 	lw a0, 0(sp) | ||||
| end; | ||||
|  | ||||
| proc _compile_section(); | ||||
| begin | ||||
| 	# Print and skip the ".section" (8 characters) directive and a space after it. | ||||
| 	_write_token(9); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Read the section name. | ||||
| 	_read_token(); | ||||
| 	addi a0, a0, 1 | ||||
|  | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
| end; | ||||
|  | ||||
| # Prints and skips a line. | ||||
| proc _skip_comment(); | ||||
| begin | ||||
| 	la t0, source_code_position | ||||
| 	lw t1, (t0) | ||||
|  | ||||
| .skip_comment_loop: | ||||
| 	# Check for newline character. | ||||
| 	lb t2, (t1) | ||||
| 	li t3, '\n' | ||||
| 	beq t2, t3, .skip_comment_end | ||||
|  | ||||
| 	# Advance the input string by one byte. | ||||
| 	addi t1, t1, 1 | ||||
| 	sw t1, (t0) | ||||
|  | ||||
| 	goto .skip_comment_loop; | ||||
|  | ||||
| .skip_comment_end: | ||||
| 	# Skip the newline. | ||||
| 	addi t1, t1, 1 | ||||
| 	sw t1, (t0) | ||||
| end; | ||||
|  | ||||
| # Prints and skips a line. | ||||
| proc _compile_line(); | ||||
| begin | ||||
| .compile_line_loop: | ||||
| 	la a0, source_code_position | ||||
| 	lw a1, (a0) | ||||
|  | ||||
| 	lb t0, (a1) | ||||
| 	li t1, '\n' | ||||
| 	beq t0, t1, .compile_line_end | ||||
|  | ||||
| 	# Print a character. | ||||
| 	lw a0, (a1) | ||||
| 	_write_c(); | ||||
|  | ||||
| 	# Advance the input string by one byte. | ||||
| 	_advance_token(1); | ||||
|  | ||||
| 	goto .compile_line_loop; | ||||
|  | ||||
| .compile_line_end: | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| 	_advance_token(1); | ||||
| end; | ||||
|  | ||||
| proc _compile_integer_literal(); | ||||
| begin | ||||
| 	la a0, asm_li | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_a0 | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_comma | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_read_token(); | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	_write_c('\n'); | ||||
| end; | ||||
|  | ||||
| proc _compile_character_literal(); | ||||
| begin | ||||
| 	la a0, asm_li | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_a0 | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_comma | ||||
| 	_write_z(); | ||||
|  | ||||
|   li a0, '\'' | ||||
| 	_write_c(); | ||||
| 	_advance_token(1); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb a0, (t0) | ||||
| 	li t1, '\\' | ||||
| 	bne a0, t1, .compile_character_literal_end | ||||
| 	 | ||||
|   li a0, '\\' | ||||
| 	_write_c(); | ||||
| 	_advance_token(1); | ||||
|  | ||||
| .compile_character_literal_end: | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb a0, (t0) | ||||
| 	_write_c(); | ||||
|  | ||||
| 	li a0, '\'' | ||||
| 	_write_c(); | ||||
|  | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| 	_advance_token(2); | ||||
|  | ||||
| end; | ||||
|  | ||||
| proc _compile_variable_expression(); | ||||
| begin | ||||
| 	la a0, asm_lw | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_a0 | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_comma | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_advance_token(1); | ||||
| 	_read_token(); | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	la a0, asm_sp | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| end; | ||||
|  | ||||
| proc _compile_expression(); | ||||
| begin | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb a0, (t0) | ||||
|  | ||||
| 	li t1, '\'' | ||||
| 	beq a0, t1, .compile_expression_character_literal | ||||
|  | ||||
| 	li t1, 'v' | ||||
| 	beq a0, t1, .compile_expression_variable | ||||
|  | ||||
| 	_is_digit(); | ||||
| 	bnez a0, .compile_expression_integer_literal | ||||
|  | ||||
| 	goto .compile_expression_end; | ||||
|  | ||||
| .compile_expression_character_literal: | ||||
| 	_compile_character_literal(); | ||||
| 	goto .compile_expression_end; | ||||
|  | ||||
| .compile_expression_integer_literal: | ||||
| 	_compile_integer_literal(); | ||||
| 	goto .compile_expression_end; | ||||
|  | ||||
| .compile_expression_variable: | ||||
| 	_compile_variable_expression(); | ||||
| 	goto .compile_expression_end;; | ||||
|  | ||||
| .compile_expression_end: | ||||
| end; | ||||
|  | ||||
| proc _compile_call(); | ||||
| begin | ||||
| 	# Stack variables: | ||||
| 	# v0 - Procedure name length. | ||||
| 	# v4 - Procedure name pointer.  | ||||
| 	# v8 - Argument count. | ||||
|  | ||||
| 	_read_token(); | ||||
| 	sw a0, 0(sp) | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	sw t0, 4(sp) | ||||
|  | ||||
| 	sw zero, 8(sp) | ||||
|  | ||||
| 	# Skip the identifier and left paren. | ||||
| 	addi a0, a0, 1 | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
|  | ||||
| 	li t1, ')' | ||||
| 	beq t0, t1, .compile_call_finalize | ||||
|  | ||||
| .compile_call_loop: | ||||
| 	_compile_expression(); | ||||
|  | ||||
| 	# Save the argument on the stack. | ||||
| 	la a0, asm_sw | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_a0 | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_comma | ||||
| 	_write_z(); | ||||
|  | ||||
| 	# Calculate the stack offset: 20 - (4 * argument_counter) | ||||
| 	lw t0, 8(sp) | ||||
| 	li t1, 4 | ||||
| 	mul t0, t0, t1 | ||||
| 	li t1, 20 | ||||
| 	sub a0, t1, t0 | ||||
| 	_write_i(); | ||||
|  | ||||
| 	la a0, asm_sp | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| 	# Add one to the argument counter. | ||||
| 	lw t0, 8(sp) | ||||
| 	addi t0, t0, 1 | ||||
| 	sw t0, 8(sp) | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
|  | ||||
| 	li t1, ',' | ||||
| 	bne t0, t1, .compile_call_finalize | ||||
|  | ||||
| 	_advance_token(2); | ||||
| 	goto .compile_call_loop; | ||||
|  | ||||
| .compile_call_finalize: | ||||
| 	# Load the argument from the stack. | ||||
|  | ||||
| 	lw t0, 8(sp) | ||||
| 	beqz t0, .compile_call_end | ||||
|  | ||||
| 	# Decrement the argument counter. | ||||
| 	lw t0, 8(sp) | ||||
| 	addi t0, t0, -1 | ||||
| 	sw t0, 8(sp) | ||||
|  | ||||
| 	la a0, asm_lw | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_c('a'); | ||||
| 	lw a0, 8(sp) | ||||
| 	_write_i(); | ||||
|  | ||||
| 	la a0, asm_comma | ||||
| 	_write_z(); | ||||
|  | ||||
| 	# Calculate the stack offset: 20 - (4 * argument_counter) | ||||
| 	lw t0, 8(sp) | ||||
| 	li t1, 4 | ||||
| 	mul t0, t0, t1 | ||||
| 	li t1, 20 | ||||
| 	sub a0, t1, t0 | ||||
| 	_write_i(); | ||||
|  | ||||
| 	la a0, asm_sp | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| 	goto .compile_call_finalize; | ||||
|  | ||||
| .compile_call_end: | ||||
| 	la a0, asm_call | ||||
| 	_write_z(); | ||||
|  | ||||
| 	lw a0, 4(sp) | ||||
| 	lw a1, 0(sp) | ||||
| 	_write_s(); | ||||
|  | ||||
| 	# Skip the right paren. | ||||
| 	_advance_token(1); | ||||
| end; | ||||
|  | ||||
| proc _compile_goto(); | ||||
| begin | ||||
| 	_advance_token(5); | ||||
|  | ||||
| 	_read_token(); | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	la a0, asm_j | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_token(v00); | ||||
| 	_advance_token(); | ||||
| end; | ||||
|  | ||||
| proc _compile_statement(); | ||||
| begin | ||||
| 	# This is a call if the statement starts with an underscore. | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	# First character after alignment tab. | ||||
| 	addi t0, t0, 1 | ||||
| 	lb t0, (t0) | ||||
| 	 | ||||
| 	li t1, '_' | ||||
| 	beq t0, t1, .compile_statement_call | ||||
|  | ||||
| 	li t1, 'g' | ||||
| 	beq t0, t1, .compile_statement_goto | ||||
|  | ||||
| 	_compile_line(); | ||||
| 	goto .compile_statement_end; | ||||
|  | ||||
| .compile_statement_call: | ||||
| 	_advance_token(1); | ||||
| 	_compile_call(); | ||||
|  | ||||
| 	goto .compile_statement_semicolon; | ||||
|  | ||||
| .compile_statement_goto: | ||||
| 	_advance_token(1); | ||||
| 	_compile_goto(); | ||||
|  | ||||
| 	goto .compile_statement_semicolon; | ||||
|  | ||||
| .compile_statement_semicolon: | ||||
| 	_advance_token(2); | ||||
|  | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| .compile_statement_end: | ||||
| end; | ||||
|  | ||||
| proc _compile_procedure_body(); | ||||
| begin | ||||
| .compile_procedure_body_loop: | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	la a1, keyword_end | ||||
| 	li a2, 3 # "end" length. | ||||
| 	_memcmp(); | ||||
|  | ||||
| 	beqz a0, .compile_procedure_body_epilogue | ||||
|  | ||||
| 	_compile_statement(); | ||||
| 	goto .compile_procedure_body_loop; | ||||
|  | ||||
| .compile_procedure_body_epilogue: | ||||
| end; | ||||
|  | ||||
| proc _compile_procedure(); | ||||
| begin | ||||
| 	# Skip "proc ". | ||||
| 	_advance_token(5); | ||||
|  | ||||
| 	_read_token(); | ||||
| 	sw a0, 0(sp) # Save the procedure name length. | ||||
|  | ||||
| 	# Write .type _procedure_name, @function. | ||||
| 	la a0, asm_type_directive | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_token(v00); | ||||
|  | ||||
| 	la a0, asm_type_function | ||||
| 	_write_z(); | ||||
|  | ||||
| 	# Write procedure label, _procedure_name: | ||||
| 	_write_token(v00); | ||||
|  | ||||
| 	la a0, asm_colon | ||||
| 	_write_z(); | ||||
|  | ||||
| 	# Skip the function name and trailing parens, semicolon, "begin" and newline. | ||||
| 	lw a0, 0(sp) | ||||
| 	addi a0, a0, 10 | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	la a0, asm_prologue | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_compile_procedure_body(); | ||||
|  | ||||
| 	# Write the epilogue. | ||||
| 	la a0, asm_epilogue | ||||
| 	_write_z(); | ||||
|  | ||||
| 	# Skip the "end" keyword, semicolon and newline. | ||||
| 	_advance_token(5); | ||||
| end; | ||||
|  | ||||
| proc _compile_type(); | ||||
| begin | ||||
| 	# Print and skip the ".type" (5 characters) directive and a space after it. | ||||
| 	_write_token(6); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Read and print the symbol name. | ||||
| 	_read_token(); | ||||
|  | ||||
| 	# Print and skip the symbol name, comma, space and @. | ||||
| 	addi a0, a0, 3 | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Read the symbol type. | ||||
| 	_read_token(); | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	sw t0, 12(sp) | ||||
|  | ||||
| 	# Print the symbol type and newline. | ||||
| 	addi a0, a0, 1 | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Write the object definition itself. | ||||
| 	_compile_line(); | ||||
|  | ||||
| .compile_type_end: | ||||
| end; | ||||
|  | ||||
| proc _skip_newlines(); | ||||
| begin | ||||
| 	# Skip newlines. | ||||
| 	la t0, source_code_position | ||||
| 	lw t1, (t0) | ||||
|  | ||||
| .skip_newlines_loop: | ||||
| 	lb t2, (t1) | ||||
| 	li t3, '\n' | ||||
| 	bne t2, t3, .skip_newlines_end | ||||
| 	beqz t2, .skip_newlines_end | ||||
|  | ||||
| 	addi t1, t1, 1 | ||||
| 	sw t1, (t0) | ||||
|  | ||||
| 	goto .skip_newlines_loop; | ||||
|  | ||||
| .skip_newlines_end: | ||||
| end; | ||||
|  | ||||
| # Process the source code and print the generated code. | ||||
| proc _compile(); | ||||
| begin | ||||
| .compile_loop: | ||||
| 	_skip_newlines(); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
| 	beqz t0, .compile_end | ||||
| 	li t1, '#' | ||||
| 	beq t0, t1, .compile_comment | ||||
|  | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	la a1, keyword_section | ||||
| 	li a2, 8 # ".section" length. | ||||
| 	_memcmp(); | ||||
|  | ||||
| 	beqz a0, .compile_section | ||||
|  | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	la a1, keyword_type | ||||
| 	li a2, 5 # ".type" length. | ||||
| 	_memcmp(); | ||||
|  | ||||
| 	beqz a0, .compile_type | ||||
|  | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	la a1, keyword_proc | ||||
| 	li a2, 5 # "proc " length. Space is needed to distinguish from "procedure". | ||||
| 	_memcmp(); | ||||
|  | ||||
| 	beqz a0, .compile_procedure | ||||
|  | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	la a1, keyword_global | ||||
| 	li a2, 6 # ".globl" length. | ||||
| 	_memcmp(); | ||||
|  | ||||
| 	beqz a0, .compile_global | ||||
| 	# Not a known token, exit. | ||||
| 	goto .compile_end; | ||||
|  | ||||
| .compile_section: | ||||
| 	_compile_section(); | ||||
|  | ||||
| 	goto .compile_loop; | ||||
|  | ||||
| .compile_type: | ||||
| 	_compile_type(); | ||||
|  | ||||
| 	goto .compile_loop; | ||||
|  | ||||
| .compile_global: | ||||
| 	_compile_line(); | ||||
|  | ||||
| 	goto .compile_loop; | ||||
|  | ||||
| .compile_comment: | ||||
| 	_skip_comment(); | ||||
|  | ||||
| 	goto .compile_loop; | ||||
|  | ||||
| .compile_procedure: | ||||
| 	_compile_procedure(); | ||||
|  | ||||
| 	goto .compile_loop; | ||||
|  | ||||
| .compile_end: | ||||
| end; | ||||
|  | ||||
| # Terminates the program. a0 contains the return code. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Status code. | ||||
| proc _exit(); | ||||
| begin | ||||
| 	li a7, 93 # SYS_EXIT | ||||
| 	ecall | ||||
| end; | ||||
|  | ||||
| # Entry point. | ||||
| .globl _start | ||||
| proc _start(); | ||||
| begin | ||||
| 	# Read the source from the standard input. | ||||
| 	la a0, source_code | ||||
| 	li a1, 81920 # Buffer size. | ||||
| 	_read_file(); | ||||
| 	_compile(); | ||||
|  | ||||
| 	_exit(0); | ||||
|  | ||||
| end; | ||||
							
								
								
									
										969
									
								
								boot/stage4.elna
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										969
									
								
								boot/stage4.elna
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,969 @@ | ||||
| # This Source Code Form is subject to the terms of the Mozilla Public License, | ||||
| # v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||||
| # obtain one at https://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| # Stage3 compiler. | ||||
| # | ||||
| # - Procedures without none or one argument. | ||||
| # - Goto statements. | ||||
| # - Character and integer literals. | ||||
| # - Passing local variables to procedures. | ||||
| # - Local variables should have the format: v00, | ||||
| #   where 00 is its offset from the sp register. | ||||
|  | ||||
| .section .rodata | ||||
|  | ||||
| .type keyword_section, @object | ||||
| keyword_section: .ascii ".section" | ||||
|  | ||||
| .type keyword_type, @object | ||||
| keyword_type: .ascii ".type" | ||||
|  | ||||
| .type keyword_ret, @object | ||||
| keyword_ret: .ascii "ret" | ||||
|  | ||||
| .type keyword_global, @object | ||||
| keyword_global: .ascii ".globl" | ||||
|  | ||||
| .type keyword_proc, @object | ||||
| keyword_proc: .ascii "proc " | ||||
|  | ||||
| .type keyword_end, @object | ||||
| keyword_end: .ascii "end" | ||||
|  | ||||
| .type keyword_begin, @object | ||||
| keyword_begin: .ascii "begin" | ||||
|  | ||||
| .type keyword_var, @object | ||||
| keyword_var: .ascii "var" | ||||
|  | ||||
| .type asm_prologue, @object | ||||
| asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" | ||||
|  | ||||
| .type asm_epilogue, @object | ||||
| asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" | ||||
|  | ||||
| .type asm_type_directive, @object | ||||
| asm_type_directive: .string ".type " | ||||
|  | ||||
| .type asm_type_function, @object | ||||
| asm_type_function: .string ", @function\n" | ||||
|  | ||||
| .type asm_colon, @object | ||||
| asm_colon: .string ":\n" | ||||
|  | ||||
| .type asm_call, @object | ||||
| asm_call: .string "\tcall " | ||||
|  | ||||
| .type asm_j, @object | ||||
| asm_j: .string "\tj " | ||||
|  | ||||
| .type asm_li, @object | ||||
| asm_li: .string "\tli " | ||||
|  | ||||
| .type asm_lw, @object | ||||
| asm_lw: .string "\tlw " | ||||
|  | ||||
| .type asm_sw, @object | ||||
| asm_sw: .string "\tsw " | ||||
|  | ||||
| .type asm_mv, @object | ||||
| asm_mv: .string "mv " | ||||
|  | ||||
| .type asm_t0, @object | ||||
| asm_t0: .string "t0" | ||||
|  | ||||
| .type asm_a0, @object | ||||
| asm_a0: .string "a0" | ||||
|  | ||||
| .type asm_comma, @object | ||||
| asm_comma: .string ", " | ||||
|  | ||||
| .type asm_sp, @object | ||||
| asm_sp: .string "(sp)" | ||||
|  | ||||
| .section .bss | ||||
|  | ||||
| # When modifiying also change the read size in the entry point procedure. | ||||
| .type source_code, @object | ||||
| source_code: .zero 81920 | ||||
|  | ||||
| .section .data | ||||
|  | ||||
| .type source_code_position, @object | ||||
| source_code_position: .word source_code | ||||
|  | ||||
| .section .text | ||||
|  | ||||
| # Reads standard input into a buffer. | ||||
| # a0 - Buffer pointer. | ||||
| # a1 - Buffer size. | ||||
| # | ||||
| # Returns the amount of bytes written in a0. | ||||
| proc _read_file(); | ||||
| begin | ||||
| 	mv a2, a1 | ||||
| 	mv a1, a0 | ||||
| 	# STDIN. | ||||
| 	li a0, 0 | ||||
| 	li a7, 63 # SYS_READ. | ||||
| 	ecall | ||||
| end; | ||||
|  | ||||
| # Writes to the standard output. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Buffer. | ||||
| # a1 - Buffer length. | ||||
| proc _write_s(); | ||||
| begin | ||||
| 	mv a2, a1 | ||||
| 	mv a1, a0 | ||||
| 	# STDOUT. | ||||
| 	li a0, 1 | ||||
| 	li a7, 64 # SYS_WRITE. | ||||
| 	ecall | ||||
| end; | ||||
|  | ||||
| # Writes a number to a string buffer. | ||||
| # | ||||
| # t0 - Local buffer. | ||||
| # t1 - Constant 10. | ||||
| # t2 - Current character. | ||||
| # t3 - Whether the number is negative. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Whole number. | ||||
| # a1 - Buffer pointer. | ||||
| # | ||||
| # Sets a0 to the length of the written number. | ||||
| proc _print_i(); | ||||
| begin | ||||
| 	li t1, 10 | ||||
| 	addi t0, s0, -9 | ||||
|  | ||||
| 	li t3, 0 | ||||
| 	bgez a0, .print_i_digit10 | ||||
| 	li t3, 1 | ||||
| 	neg a0, a0 | ||||
|  | ||||
| .print_i_digit10: | ||||
| 	rem t2, a0, t1 | ||||
| 	addi t2, t2, '0' | ||||
| 	sb t2, 0(t0) | ||||
| 	div a0, a0, t1 | ||||
| 	addi t0, t0, -1 | ||||
| 	bne zero, a0, .print_i_digit10 | ||||
|  | ||||
| 	beq zero, t3, .print_i_write_call | ||||
| 	addi t2, zero, '-' | ||||
| 	sb t2, 0(t0) | ||||
| 	addi t0, t0, -1 | ||||
|  | ||||
| .print_i_write_call: | ||||
| 	mv a0, a1 | ||||
| 	addi a1, t0, 1 | ||||
| 	sub a2, s0, t0 | ||||
| 	addi a2, a2, -9 | ||||
| 	sw a2, 0(sp) | ||||
|  | ||||
| 	_memcpy(); | ||||
|  | ||||
| 	lw a0, 0(sp) | ||||
| end; | ||||
|  | ||||
| # Writes a number to the standard output. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Whole number. | ||||
| proc _write_i(); | ||||
| begin | ||||
| 	addi a1, sp, 0 | ||||
| 	_print_i(); | ||||
|  | ||||
| 	mv a1, a0 | ||||
| 	addi a0, sp, 0 | ||||
| 	_write_s(); | ||||
|  | ||||
| end; | ||||
|  | ||||
| # Writes a character from a0 into the standard output. | ||||
| proc _write_c(); | ||||
| begin | ||||
| 	sb a0, 0(sp) | ||||
| 	addi a0, sp, 0 | ||||
| 	li a1, 1 | ||||
| 	_write_s(); | ||||
| end; | ||||
|  | ||||
| # Write null terminated string. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - String. | ||||
| proc _write_z(); | ||||
| begin | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| .write_z_loop: | ||||
| 	# Check for 0 character. | ||||
| 	lb a0, (a0) | ||||
| 	beqz a0, .write_z_end | ||||
|  | ||||
| 	# Print a character. | ||||
| 	lw a0, 0(sp) | ||||
| 	lb a0, (a0) | ||||
| 	_write_c(); | ||||
|  | ||||
| 	# Advance the input string by one byte. | ||||
| 	lw a0, 0(sp) | ||||
| 	addi a0, a0, 1 | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	goto .write_z_loop; | ||||
|  | ||||
| .write_z_end: | ||||
| end; | ||||
|  | ||||
| # Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. | ||||
| proc _is_upper(); | ||||
| begin | ||||
| 	li t0, 'A' - 1 | ||||
| 	sltu t1, t0, a0 # t1 = a0 >= 'A' | ||||
|  | ||||
| 	sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' | ||||
| 	and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' | ||||
| end; | ||||
|  | ||||
| # Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. | ||||
| proc _is_lower(); | ||||
| begin | ||||
| 	li t0, 'a' - 1 | ||||
| 	sltu t2, t0, a0 # t2 = a0 >= 'a' | ||||
|  | ||||
| 	sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' | ||||
| 	and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' | ||||
| end; | ||||
|  | ||||
| # Detects if the passed character is a 7-bit alpha character or an underscore. | ||||
| # | ||||
| # Paramters: | ||||
| # a0 - Tested character. | ||||
| # | ||||
| # Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. | ||||
| proc _is_alpha(); | ||||
| begin | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	_is_upper(); | ||||
| 	sw a0, 4(sp) | ||||
|  | ||||
| 	_is_lower(v00); | ||||
|  | ||||
| 	lw t0, 0(sp) | ||||
| 	xori t1, t0, '_' | ||||
| 	seqz t1, t1 | ||||
|  | ||||
| 	lw t0, 4(sp) | ||||
| 	or a0, a0, t0 | ||||
| 	or a0, a0, t1 | ||||
| end; | ||||
|  | ||||
| # Detects whether the passed character is a digit | ||||
| # (a value between 0 and 9). | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Exemined value. | ||||
| # | ||||
| # Sets a0 to 1 if it is a digit, to 0 otherwise. | ||||
| proc _is_digit(); | ||||
| begin | ||||
| 	li t0, '0' - 1 | ||||
| 	sltu t1, t0, a0 # t1 = a0 >= '0' | ||||
|  | ||||
| 	sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' | ||||
|  | ||||
| 	and a0, t1, t2 | ||||
| end; | ||||
|  | ||||
| proc _is_alnum(); | ||||
| begin | ||||
| 	sw a0, 4(sp) | ||||
|  | ||||
| 	_is_alpha(); | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	_is_digit(v04); | ||||
|  | ||||
| 	lw a1, 0(sp) | ||||
| 	or a0, a0, a1 | ||||
| end; | ||||
|  | ||||
| # Reads the next token. | ||||
| # | ||||
| # Returns token length in a0. | ||||
| proc _read_token(); | ||||
| begin | ||||
| 	la t0, source_code_position # Token pointer. | ||||
| 	lw t0, (t0) | ||||
| 	sw t0, 0(sp) # Current token position. | ||||
| 	sw zero, 4(sp) # Token length. | ||||
|  | ||||
| .read_token_loop: | ||||
| 	lb t0, (t0) # Current character. | ||||
|  | ||||
| 	# First we try to read a derictive. | ||||
| 	# A derictive can contain a dot and characters. | ||||
| 	li t1, '.' | ||||
| 	beq t0, t1, .read_token_next | ||||
|  | ||||
| 	lw a0, 0(sp) | ||||
| 	lb a0, (a0) | ||||
| 	_is_alnum(); | ||||
| 	bnez a0, .read_token_next | ||||
|  | ||||
| 	goto .read_token_end; | ||||
|  | ||||
| .read_token_next: | ||||
| 	# Advance the source code position and token length. | ||||
| 	lw t0, 4(sp) | ||||
| 	addi t0, t0, 1 | ||||
| 	sw t0, 4(sp) | ||||
|  | ||||
| 	lw t0, 0(sp) | ||||
| 	addi t0, t0, 1 | ||||
| 	sw t0, 0(sp) | ||||
|  | ||||
| 	goto .read_token_loop; | ||||
|  | ||||
| .read_token_end: | ||||
| 	lw a0, 4(sp) | ||||
| end; | ||||
|  | ||||
| # a0 - First pointer. | ||||
| # a1 - Second pointer. | ||||
| # a2 - The length to compare. | ||||
| # | ||||
| # Returns 0 in a0 if memory regions are equal. | ||||
| proc _memcmp(); | ||||
| begin | ||||
| 	mv t0, a0 | ||||
| 	li a0, 0 | ||||
|  | ||||
| .memcmp_loop: | ||||
| 	beqz a2, .memcmp_end | ||||
|  | ||||
| 	lbu t1, (t0) | ||||
| 	lbu t2, (a1) | ||||
| 	sub a0, t1, t2 | ||||
|  | ||||
| 	bnez a0, .memcmp_end | ||||
|  | ||||
| 	addi t0, t0, 1 | ||||
| 	addi a1, a1, 1 | ||||
| 	addi a2, a2, -1 | ||||
|  | ||||
| 	goto .memcmp_loop; | ||||
|  | ||||
| .memcmp_end: | ||||
| end; | ||||
|  | ||||
| # Copies memory. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Destination. | ||||
| # a1 - Source. | ||||
| # a2 - Size. | ||||
| # | ||||
| # Preserves a0. | ||||
| proc _memcpy(); | ||||
| begin | ||||
| 	mv t0, a0 | ||||
|  | ||||
| .memcpy_loop: | ||||
| 	beqz a2, .memcpy_end | ||||
|  | ||||
| 	lbu t1, (a1) | ||||
| 	sb t1, (a0) | ||||
|  | ||||
| 	addi a0, a0, 1 | ||||
| 	addi a1, a1, 1 | ||||
| 	addi a2, a2, -1 | ||||
|  | ||||
| 	goto .memcpy_loop | ||||
|  | ||||
| .memcpy_end: | ||||
| 	mv a0, t0 | ||||
| end; | ||||
|  | ||||
| # Advances the token stream by a0 bytes. | ||||
| proc _advance_token(); | ||||
| begin | ||||
| 	la t0, source_code_position | ||||
| 	lw t1, (t0) | ||||
| 	add t1, t1, a0 | ||||
| 	sw t1, (t0) | ||||
| end; | ||||
|  | ||||
| # Prints the current token. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Token length. | ||||
| # | ||||
| # Returns a0 unchanged. | ||||
| proc _write_token(); | ||||
| begin | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	lw a1, 0(sp) | ||||
| 	_write_s(); | ||||
|  | ||||
| 	lw a0, 0(sp) | ||||
| end; | ||||
|  | ||||
| proc _compile_section(); | ||||
| begin | ||||
| 	# Print and skip the ".section" (8 characters) directive and a space after it. | ||||
| 	_write_token(9); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Read the section name. | ||||
| 	_read_token(); | ||||
| 	addi a0, a0, 1 | ||||
|  | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
| end; | ||||
|  | ||||
| # Prints and skips a line. | ||||
| proc _skip_comment(); | ||||
| begin | ||||
| 	la t0, source_code_position | ||||
| 	lw t1, (t0) | ||||
|  | ||||
| .skip_comment_loop: | ||||
| 	# Check for newline character. | ||||
| 	lb t2, (t1) | ||||
| 	li t3, '\n' | ||||
| 	beq t2, t3, .skip_comment_end | ||||
|  | ||||
| 	# Advance the input string by one byte. | ||||
| 	addi t1, t1, 1 | ||||
| 	sw t1, (t0) | ||||
|  | ||||
| 	goto .skip_comment_loop; | ||||
|  | ||||
| .skip_comment_end: | ||||
| 	# Skip the newline. | ||||
| 	addi t1, t1, 1 | ||||
| 	sw t1, (t0) | ||||
| end; | ||||
|  | ||||
| # Prints and skips a line. | ||||
| proc _compile_line(); | ||||
| begin | ||||
| .compile_line_loop: | ||||
| 	la a0, source_code_position | ||||
| 	lw a1, (a0) | ||||
|  | ||||
| 	lb t0, (a1) | ||||
| 	li t1, '\n' | ||||
| 	beq t0, t1, .compile_line_end | ||||
|  | ||||
| 	# Print a character. | ||||
| 	lw a0, (a1) | ||||
| 	_write_c(); | ||||
|  | ||||
| 	# Advance the input string by one byte. | ||||
| 	_advance_token(1); | ||||
|  | ||||
| 	goto .compile_line_loop; | ||||
|  | ||||
| .compile_line_end: | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| 	_advance_token(1); | ||||
| end; | ||||
|  | ||||
| proc _compile_integer_literal(); | ||||
| begin | ||||
| 	la a0, asm_li | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_a0 | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_comma | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_read_token(); | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	_write_c('\n'); | ||||
| end; | ||||
|  | ||||
| proc _compile_character_literal(); | ||||
| begin | ||||
| 	la a0, asm_li | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_a0 | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_comma | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_c('\''); | ||||
| 	_advance_token(1); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb a0, (t0) | ||||
| 	li t1, '\\' | ||||
| 	bne a0, t1, .compile_character_literal_end | ||||
| 	 | ||||
| 	_write_c('\\'); | ||||
| 	_advance_token(1); | ||||
|  | ||||
| .compile_character_literal_end: | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb a0, (t0) | ||||
| 	_write_c(); | ||||
|  | ||||
| 	_write_c('\''); | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| 	_advance_token(2); | ||||
|  | ||||
| end; | ||||
|  | ||||
| proc _compile_variable_expression(); | ||||
| begin | ||||
| 	la a0, asm_lw | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_a0 | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_comma | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_advance_token(1); | ||||
| 	_read_token(); | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	la a0, asm_sp | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| end; | ||||
|  | ||||
| proc _compile_expression(); | ||||
| begin | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb a0, (t0) | ||||
|  | ||||
| 	li t1, '\'' | ||||
| 	beq a0, t1, .compile_expression_character_literal | ||||
|  | ||||
| 	li t1, 'v' | ||||
| 	beq a0, t1, .compile_expression_variable | ||||
|  | ||||
| 	_is_digit(); | ||||
| 	bnez a0, .compile_expression_integer_literal | ||||
|  | ||||
| 	goto .compile_expression_end; | ||||
|  | ||||
| .compile_expression_character_literal: | ||||
| 	_compile_character_literal(); | ||||
| 	goto .compile_expression_end; | ||||
|  | ||||
| .compile_expression_integer_literal: | ||||
| 	_compile_integer_literal(); | ||||
| 	goto .compile_expression_end; | ||||
|  | ||||
| .compile_expression_variable: | ||||
| 	_compile_variable_expression(); | ||||
| 	goto .compile_expression_end;; | ||||
|  | ||||
| .compile_expression_end: | ||||
| end; | ||||
|  | ||||
| proc _compile_call(); | ||||
| begin | ||||
| 	# Stack variables: | ||||
| 	# v0 - Procedure name length. | ||||
| 	# v4 - Procedure name pointer.  | ||||
| 	# v8 - Argument count. | ||||
|  | ||||
| 	_read_token(); | ||||
| 	sw a0, 0(sp) | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	sw t0, 4(sp) | ||||
|  | ||||
| 	sw zero, 8(sp) | ||||
|  | ||||
| 	# Skip the identifier and left paren. | ||||
| 	addi a0, a0, 1 | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
|  | ||||
| 	li t1, ')' | ||||
| 	beq t0, t1, .compile_call_finalize | ||||
|  | ||||
| .compile_call_loop: | ||||
| 	_compile_expression(); | ||||
|  | ||||
| 	# Save the argument on the stack. | ||||
| 	la a0, asm_sw | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_a0 | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_comma | ||||
| 	_write_z(); | ||||
|  | ||||
| 	# Calculate the stack offset: 20 - (4 * argument_counter) | ||||
| 	lw t0, 8(sp) | ||||
| 	li t1, 4 | ||||
| 	mul t0, t0, t1 | ||||
| 	li t1, 20 | ||||
| 	sub a0, t1, t0 | ||||
| 	_write_i(); | ||||
|  | ||||
| 	la a0, asm_sp | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| 	# Add one to the argument counter. | ||||
| 	lw t0, 8(sp) | ||||
| 	addi t0, t0, 1 | ||||
| 	sw t0, 8(sp) | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
|  | ||||
| 	li t1, ',' | ||||
| 	bne t0, t1, .compile_call_finalize | ||||
|  | ||||
| 	_advance_token(2); | ||||
| 	goto .compile_call_loop; | ||||
|  | ||||
| .compile_call_finalize: | ||||
| 	# Load the argument from the stack. | ||||
|  | ||||
| 	lw t0, 8(sp) | ||||
| 	beqz t0, .compile_call_end | ||||
|  | ||||
| 	# Decrement the argument counter. | ||||
| 	lw t0, 8(sp) | ||||
| 	addi t0, t0, -1 | ||||
| 	sw t0, 8(sp) | ||||
|  | ||||
| 	la a0, asm_lw | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_c('a'); | ||||
| 	lw a0, 8(sp) | ||||
| 	_write_i(); | ||||
|  | ||||
| 	la a0, asm_comma | ||||
| 	_write_z(); | ||||
|  | ||||
| 	# Calculate the stack offset: 20 - (4 * argument_counter) | ||||
| 	lw t0, 8(sp) | ||||
| 	li t1, 4 | ||||
| 	mul t0, t0, t1 | ||||
| 	li t1, 20 | ||||
| 	sub a0, t1, t0 | ||||
| 	_write_i(); | ||||
|  | ||||
| 	la a0, asm_sp | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| 	goto .compile_call_finalize; | ||||
|  | ||||
| .compile_call_end: | ||||
| 	la a0, asm_call | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_s(v04, v00); | ||||
|  | ||||
| 	# Skip the right paren. | ||||
| 	_advance_token(1); | ||||
| end; | ||||
|  | ||||
| proc _compile_goto(); | ||||
| begin | ||||
| 	_advance_token(5); | ||||
|  | ||||
| 	_read_token(); | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	la a0, asm_j | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_token(v00); | ||||
| 	_advance_token(); | ||||
| end; | ||||
|  | ||||
| proc _compile_statement(); | ||||
| begin | ||||
| 	# This is a call if the statement starts with an underscore. | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	# First character after alignment tab. | ||||
| 	addi t0, t0, 1 | ||||
| 	lb t0, (t0) | ||||
| 	 | ||||
| 	li t1, '_' | ||||
| 	beq t0, t1, .compile_statement_call | ||||
|  | ||||
| 	li t1, 'g' | ||||
| 	beq t0, t1, .compile_statement_goto | ||||
|  | ||||
| 	_compile_line(); | ||||
| 	goto .compile_statement_end; | ||||
|  | ||||
| .compile_statement_call: | ||||
| 	_advance_token(1); | ||||
| 	_compile_call(); | ||||
|  | ||||
| 	goto .compile_statement_semicolon; | ||||
|  | ||||
| .compile_statement_goto: | ||||
| 	_advance_token(1); | ||||
| 	_compile_goto(); | ||||
|  | ||||
| 	goto .compile_statement_semicolon; | ||||
|  | ||||
| .compile_statement_semicolon: | ||||
| 	_advance_token(2); | ||||
|  | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| .compile_statement_end: | ||||
| end; | ||||
|  | ||||
| proc _compile_procedure_body(); | ||||
| begin | ||||
| .compile_procedure_body_loop: | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	la a1, keyword_end | ||||
| 	li a2, 3 # "end" length. | ||||
| 	_memcmp(); | ||||
|  | ||||
| 	beqz a0, .compile_procedure_body_epilogue | ||||
|  | ||||
| 	_compile_statement(); | ||||
| 	goto .compile_procedure_body_loop; | ||||
|  | ||||
| .compile_procedure_body_epilogue: | ||||
| end; | ||||
|  | ||||
| proc _compile_procedure(); | ||||
| begin | ||||
| 	# Skip "proc ". | ||||
| 	_advance_token(5); | ||||
|  | ||||
| 	_read_token(); | ||||
| 	sw a0, 0(sp) # Save the procedure name length. | ||||
|  | ||||
| 	# Write .type _procedure_name, @function. | ||||
| 	la a0, asm_type_directive | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_token(v00); | ||||
|  | ||||
| 	la a0, asm_type_function | ||||
| 	_write_z(); | ||||
|  | ||||
| 	# Write procedure label, _procedure_name: | ||||
| 	_write_token(v00); | ||||
|  | ||||
| 	la a0, asm_colon | ||||
| 	_write_z(); | ||||
|  | ||||
| 	# Skip the function name and trailing parens, semicolon, "begin" and newline. | ||||
| 	lw a0, 0(sp) | ||||
| 	addi a0, a0, 10 | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	la a0, asm_prologue | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_compile_procedure_body(); | ||||
|  | ||||
| 	# Write the epilogue. | ||||
| 	la a0, asm_epilogue | ||||
| 	_write_z(); | ||||
|  | ||||
| 	# Skip the "end" keyword, semicolon and newline. | ||||
| 	_advance_token(5); | ||||
| end; | ||||
|  | ||||
| proc _compile_type(); | ||||
| begin | ||||
| 	# Print and skip the ".type" (5 characters) directive and a space after it. | ||||
| 	_write_token(6); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Read and print the symbol name. | ||||
| 	_read_token(); | ||||
|  | ||||
| 	# Print and skip the symbol name, comma, space and @. | ||||
| 	addi a0, a0, 3 | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Read the symbol type. | ||||
| 	_read_token(); | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	sw t0, 12(sp) | ||||
|  | ||||
| 	# Print the symbol type and newline. | ||||
| 	addi a0, a0, 1 | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Write the object definition itself. | ||||
| 	_compile_line(); | ||||
|  | ||||
| .compile_type_end: | ||||
| end; | ||||
|  | ||||
| proc _skip_newlines(); | ||||
| begin | ||||
| 	# Skip newlines. | ||||
| 	la t0, source_code_position | ||||
| 	lw t1, (t0) | ||||
|  | ||||
| .skip_newlines_loop: | ||||
| 	lb t2, (t1) | ||||
| 	li t3, '\n' | ||||
| 	bne t2, t3, .skip_newlines_end | ||||
| 	beqz t2, .skip_newlines_end | ||||
|  | ||||
| 	addi t1, t1, 1 | ||||
| 	sw t1, (t0) | ||||
|  | ||||
| 	goto .skip_newlines_loop; | ||||
|  | ||||
| .skip_newlines_end: | ||||
| end; | ||||
|  | ||||
| # Process the source code and print the generated code. | ||||
| proc _compile(); | ||||
| begin | ||||
| .compile_loop: | ||||
| 	_skip_newlines(); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
| 	beqz t0, .compile_end | ||||
| 	li t1, '#' | ||||
| 	beq t0, t1, .compile_comment | ||||
|  | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	la a1, keyword_section | ||||
| 	li a2, 8 # ".section" length. | ||||
| 	_memcmp(); | ||||
|  | ||||
| 	beqz a0, .compile_section | ||||
|  | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	la a1, keyword_type | ||||
| 	li a2, 5 # ".type" length. | ||||
| 	_memcmp(); | ||||
|  | ||||
| 	beqz a0, .compile_type | ||||
|  | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	la a1, keyword_proc | ||||
| 	li a2, 5 # "proc " length. Space is needed to distinguish from "procedure". | ||||
| 	_memcmp(); | ||||
|  | ||||
| 	beqz a0, .compile_procedure | ||||
|  | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	la a1, keyword_global | ||||
| 	li a2, 6 # ".globl" length. | ||||
| 	_memcmp(); | ||||
|  | ||||
| 	beqz a0, .compile_global | ||||
| 	# Not a known token, exit. | ||||
| 	goto .compile_end; | ||||
|  | ||||
| .compile_section: | ||||
| 	_compile_section(); | ||||
|  | ||||
| 	goto .compile_loop; | ||||
|  | ||||
| .compile_type: | ||||
| 	_compile_type(); | ||||
|  | ||||
| 	goto .compile_loop; | ||||
|  | ||||
| .compile_global: | ||||
| 	_compile_line(); | ||||
|  | ||||
| 	goto .compile_loop; | ||||
|  | ||||
| .compile_comment: | ||||
| 	_skip_comment(); | ||||
|  | ||||
| 	goto .compile_loop; | ||||
|  | ||||
| .compile_procedure: | ||||
| 	_compile_procedure(); | ||||
|  | ||||
| 	goto .compile_loop; | ||||
|  | ||||
| .compile_end: | ||||
| end; | ||||
|  | ||||
| # Terminates the program. a0 contains the return code. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Status code. | ||||
| proc _exit(); | ||||
| begin | ||||
| 	li a7, 93 # SYS_EXIT | ||||
| 	ecall | ||||
| end; | ||||
|  | ||||
| # Entry point. | ||||
| .globl _start | ||||
| proc _start(); | ||||
| begin | ||||
| 	# Read the source from the standard input. | ||||
| 	la a0, source_code | ||||
| 	li a1, 81920 # Buffer size. | ||||
| 	_read_file(); | ||||
| 	_compile(); | ||||
|  | ||||
| 	_exit(0); | ||||
|  | ||||
| end; | ||||
							
								
								
									
										969
									
								
								boot/stage5.elna
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										969
									
								
								boot/stage5.elna
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,969 @@ | ||||
| # This Source Code Form is subject to the terms of the Mozilla Public License, | ||||
| # v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||||
| # obtain one at https://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| # Stage3 compiler. | ||||
| # | ||||
| # - Procedures without none or one argument. | ||||
| # - Goto statements. | ||||
| # - Character and integer literals. | ||||
| # - Passing local variables to procedures. | ||||
| # - Local variables should have the format: v00, | ||||
| #   where 00 is its offset from the sp register. | ||||
|  | ||||
| .section .rodata | ||||
|  | ||||
| .type keyword_section, @object | ||||
| keyword_section: .ascii ".section" | ||||
|  | ||||
| .type keyword_type, @object | ||||
| keyword_type: .ascii ".type" | ||||
|  | ||||
| .type keyword_ret, @object | ||||
| keyword_ret: .ascii "ret" | ||||
|  | ||||
| .type keyword_global, @object | ||||
| keyword_global: .ascii ".globl" | ||||
|  | ||||
| .type keyword_proc, @object | ||||
| keyword_proc: .ascii "proc " | ||||
|  | ||||
| .type keyword_end, @object | ||||
| keyword_end: .ascii "end" | ||||
|  | ||||
| .type keyword_begin, @object | ||||
| keyword_begin: .ascii "begin" | ||||
|  | ||||
| .type keyword_var, @object | ||||
| keyword_var: .ascii "var" | ||||
|  | ||||
| .type asm_prologue, @object | ||||
| asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" | ||||
|  | ||||
| .type asm_epilogue, @object | ||||
| asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" | ||||
|  | ||||
| .type asm_type_directive, @object | ||||
| asm_type_directive: .string ".type " | ||||
|  | ||||
| .type asm_type_function, @object | ||||
| asm_type_function: .string ", @function\n" | ||||
|  | ||||
| .type asm_colon, @object | ||||
| asm_colon: .string ":\n" | ||||
|  | ||||
| .type asm_call, @object | ||||
| asm_call: .string "\tcall " | ||||
|  | ||||
| .type asm_j, @object | ||||
| asm_j: .string "\tj " | ||||
|  | ||||
| .type asm_li, @object | ||||
| asm_li: .string "\tli " | ||||
|  | ||||
| .type asm_lw, @object | ||||
| asm_lw: .string "\tlw " | ||||
|  | ||||
| .type asm_sw, @object | ||||
| asm_sw: .string "\tsw " | ||||
|  | ||||
| .type asm_mv, @object | ||||
| asm_mv: .string "mv " | ||||
|  | ||||
| .type asm_t0, @object | ||||
| asm_t0: .string "t0" | ||||
|  | ||||
| .type asm_a0, @object | ||||
| asm_a0: .string "a0" | ||||
|  | ||||
| .type asm_comma, @object | ||||
| asm_comma: .string ", " | ||||
|  | ||||
| .type asm_sp, @object | ||||
| asm_sp: .string "(sp)" | ||||
|  | ||||
| .section .bss | ||||
|  | ||||
| # When modifiying also change the read size in the entry point procedure. | ||||
| .type source_code, @object | ||||
| source_code: .zero 81920 | ||||
|  | ||||
| .section .data | ||||
|  | ||||
| .type source_code_position, @object | ||||
| source_code_position: .word source_code | ||||
|  | ||||
| .section .text | ||||
|  | ||||
| # Reads standard input into a buffer. | ||||
| # a0 - Buffer pointer. | ||||
| # a1 - Buffer size. | ||||
| # | ||||
| # Returns the amount of bytes written in a0. | ||||
| proc _read_file(); | ||||
| begin | ||||
| 	mv a2, a1 | ||||
| 	mv a1, a0 | ||||
| 	# STDIN. | ||||
| 	li a0, 0 | ||||
| 	li a7, 63 # SYS_READ. | ||||
| 	ecall | ||||
| end; | ||||
|  | ||||
| # Writes to the standard output. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Buffer. | ||||
| # a1 - Buffer length. | ||||
| proc _write_s(); | ||||
| begin | ||||
| 	mv a2, a1 | ||||
| 	mv a1, a0 | ||||
| 	# STDOUT. | ||||
| 	li a0, 1 | ||||
| 	li a7, 64 # SYS_WRITE. | ||||
| 	ecall | ||||
| end; | ||||
|  | ||||
| # Writes a number to a string buffer. | ||||
| # | ||||
| # t0 - Local buffer. | ||||
| # t1 - Constant 10. | ||||
| # t2 - Current character. | ||||
| # t3 - Whether the number is negative. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Whole number. | ||||
| # a1 - Buffer pointer. | ||||
| # | ||||
| # Sets a0 to the length of the written number. | ||||
| proc _print_i(); | ||||
| begin | ||||
| 	li t1, 10 | ||||
| 	addi t0, s0, -9 | ||||
|  | ||||
| 	li t3, 0 | ||||
| 	bgez a0, .print_i_digit10 | ||||
| 	li t3, 1 | ||||
| 	neg a0, a0 | ||||
|  | ||||
| .print_i_digit10: | ||||
| 	rem t2, a0, t1 | ||||
| 	addi t2, t2, '0' | ||||
| 	sb t2, 0(t0) | ||||
| 	div a0, a0, t1 | ||||
| 	addi t0, t0, -1 | ||||
| 	bne zero, a0, .print_i_digit10 | ||||
|  | ||||
| 	beq zero, t3, .print_i_write_call | ||||
| 	addi t2, zero, '-' | ||||
| 	sb t2, 0(t0) | ||||
| 	addi t0, t0, -1 | ||||
|  | ||||
| .print_i_write_call: | ||||
| 	mv a0, a1 | ||||
| 	addi a1, t0, 1 | ||||
| 	sub a2, s0, t0 | ||||
| 	addi a2, a2, -9 | ||||
| 	sw a2, 0(sp) | ||||
|  | ||||
| 	_memcpy(); | ||||
|  | ||||
| 	lw a0, 0(sp) | ||||
| end; | ||||
|  | ||||
| # Writes a number to the standard output. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Whole number. | ||||
| proc _write_i(); | ||||
| begin | ||||
| 	addi a1, sp, 0 | ||||
| 	_print_i(); | ||||
|  | ||||
| 	mv a1, a0 | ||||
| 	addi a0, sp, 0 | ||||
| 	_write_s(); | ||||
|  | ||||
| end; | ||||
|  | ||||
| # Writes a character from a0 into the standard output. | ||||
| proc _write_c(); | ||||
| begin | ||||
| 	sb a0, 0(sp) | ||||
| 	addi a0, sp, 0 | ||||
| 	li a1, 1 | ||||
| 	_write_s(); | ||||
| end; | ||||
|  | ||||
| # Write null terminated string. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - String. | ||||
| proc _write_z(); | ||||
| begin | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| .write_z_loop: | ||||
| 	# Check for 0 character. | ||||
| 	lb a0, (a0) | ||||
| 	beqz a0, .write_z_end | ||||
|  | ||||
| 	# Print a character. | ||||
| 	lw a0, 0(sp) | ||||
| 	lb a0, (a0) | ||||
| 	_write_c(); | ||||
|  | ||||
| 	# Advance the input string by one byte. | ||||
| 	lw a0, 0(sp) | ||||
| 	addi a0, a0, 1 | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	goto .write_z_loop; | ||||
|  | ||||
| .write_z_end: | ||||
| end; | ||||
|  | ||||
| # Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. | ||||
| proc _is_upper(); | ||||
| begin | ||||
| 	li t0, 'A' - 1 | ||||
| 	sltu t1, t0, a0 # t1 = a0 >= 'A' | ||||
|  | ||||
| 	sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' | ||||
| 	and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' | ||||
| end; | ||||
|  | ||||
| # Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. | ||||
| proc _is_lower(); | ||||
| begin | ||||
| 	li t0, 'a' - 1 | ||||
| 	sltu t2, t0, a0 # t2 = a0 >= 'a' | ||||
|  | ||||
| 	sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' | ||||
| 	and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' | ||||
| end; | ||||
|  | ||||
| # Detects if the passed character is a 7-bit alpha character or an underscore. | ||||
| # | ||||
| # Paramters: | ||||
| # a0 - Tested character. | ||||
| # | ||||
| # Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. | ||||
| proc _is_alpha(); | ||||
| begin | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	_is_upper(); | ||||
| 	sw a0, 4(sp) | ||||
|  | ||||
| 	_is_lower(v00); | ||||
|  | ||||
| 	lw t0, 0(sp) | ||||
| 	xori t1, t0, '_' | ||||
| 	seqz t1, t1 | ||||
|  | ||||
| 	lw t0, 4(sp) | ||||
| 	or a0, a0, t0 | ||||
| 	or a0, a0, t1 | ||||
| end; | ||||
|  | ||||
| # Detects whether the passed character is a digit | ||||
| # (a value between 0 and 9). | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Exemined value. | ||||
| # | ||||
| # Sets a0 to 1 if it is a digit, to 0 otherwise. | ||||
| proc _is_digit(); | ||||
| begin | ||||
| 	li t0, '0' - 1 | ||||
| 	sltu t1, t0, a0 # t1 = a0 >= '0' | ||||
|  | ||||
| 	sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' | ||||
|  | ||||
| 	and a0, t1, t2 | ||||
| end; | ||||
|  | ||||
| proc _is_alnum(); | ||||
| begin | ||||
| 	sw a0, 4(sp) | ||||
|  | ||||
| 	_is_alpha(); | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	_is_digit(v04); | ||||
|  | ||||
| 	lw a1, 0(sp) | ||||
| 	or a0, a0, a1 | ||||
| end; | ||||
|  | ||||
| # Reads the next token. | ||||
| # | ||||
| # Returns token length in a0. | ||||
| proc _read_token(); | ||||
| begin | ||||
| 	la t0, source_code_position # Token pointer. | ||||
| 	lw t0, (t0) | ||||
| 	sw t0, 0(sp) # Current token position. | ||||
| 	sw zero, 4(sp) # Token length. | ||||
|  | ||||
| .read_token_loop: | ||||
| 	lb t0, (t0) # Current character. | ||||
|  | ||||
| 	# First we try to read a derictive. | ||||
| 	# A derictive can contain a dot and characters. | ||||
| 	li t1, '.' | ||||
| 	beq t0, t1, .read_token_next | ||||
|  | ||||
| 	lw a0, 0(sp) | ||||
| 	lb a0, (a0) | ||||
| 	_is_alnum(); | ||||
| 	bnez a0, .read_token_next | ||||
|  | ||||
| 	goto .read_token_end; | ||||
|  | ||||
| .read_token_next: | ||||
| 	# Advance the source code position and token length. | ||||
| 	lw t0, 4(sp) | ||||
| 	addi t0, t0, 1 | ||||
| 	sw t0, 4(sp) | ||||
|  | ||||
| 	lw t0, 0(sp) | ||||
| 	addi t0, t0, 1 | ||||
| 	sw t0, 0(sp) | ||||
|  | ||||
| 	goto .read_token_loop; | ||||
|  | ||||
| .read_token_end: | ||||
| 	lw a0, 4(sp) | ||||
| end; | ||||
|  | ||||
| # a0 - First pointer. | ||||
| # a1 - Second pointer. | ||||
| # a2 - The length to compare. | ||||
| # | ||||
| # Returns 0 in a0 if memory regions are equal. | ||||
| proc _memcmp(); | ||||
| begin | ||||
| 	mv t0, a0 | ||||
| 	li a0, 0 | ||||
|  | ||||
| .memcmp_loop: | ||||
| 	beqz a2, .memcmp_end | ||||
|  | ||||
| 	lbu t1, (t0) | ||||
| 	lbu t2, (a1) | ||||
| 	sub a0, t1, t2 | ||||
|  | ||||
| 	bnez a0, .memcmp_end | ||||
|  | ||||
| 	addi t0, t0, 1 | ||||
| 	addi a1, a1, 1 | ||||
| 	addi a2, a2, -1 | ||||
|  | ||||
| 	goto .memcmp_loop; | ||||
|  | ||||
| .memcmp_end: | ||||
| end; | ||||
|  | ||||
| # Copies memory. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Destination. | ||||
| # a1 - Source. | ||||
| # a2 - Size. | ||||
| # | ||||
| # Preserves a0. | ||||
| proc _memcpy(); | ||||
| begin | ||||
| 	mv t0, a0 | ||||
|  | ||||
| .memcpy_loop: | ||||
| 	beqz a2, .memcpy_end | ||||
|  | ||||
| 	lbu t1, (a1) | ||||
| 	sb t1, (a0) | ||||
|  | ||||
| 	addi a0, a0, 1 | ||||
| 	addi a1, a1, 1 | ||||
| 	addi a2, a2, -1 | ||||
|  | ||||
| 	goto .memcpy_loop | ||||
|  | ||||
| .memcpy_end: | ||||
| 	mv a0, t0 | ||||
| end; | ||||
|  | ||||
| # Advances the token stream by a0 bytes. | ||||
| proc _advance_token(); | ||||
| begin | ||||
| 	la t0, source_code_position | ||||
| 	lw t1, (t0) | ||||
| 	add t1, t1, a0 | ||||
| 	sw t1, (t0) | ||||
| end; | ||||
|  | ||||
| # Prints the current token. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Token length. | ||||
| # | ||||
| # Returns a0 unchanged. | ||||
| proc _write_token(); | ||||
| begin | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	lw a1, 0(sp) | ||||
| 	_write_s(); | ||||
|  | ||||
| 	lw a0, 0(sp) | ||||
| end; | ||||
|  | ||||
| proc _compile_section(); | ||||
| begin | ||||
| 	# Print and skip the ".section" (8 characters) directive and a space after it. | ||||
| 	_write_token(9); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Read the section name. | ||||
| 	_read_token(); | ||||
| 	addi a0, a0, 1 | ||||
|  | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
| end; | ||||
|  | ||||
| # Prints and skips a line. | ||||
| proc _skip_comment(); | ||||
| begin | ||||
| 	la t0, source_code_position | ||||
| 	lw t1, (t0) | ||||
|  | ||||
| .skip_comment_loop: | ||||
| 	# Check for newline character. | ||||
| 	lb t2, (t1) | ||||
| 	li t3, '\n' | ||||
| 	beq t2, t3, .skip_comment_end | ||||
|  | ||||
| 	# Advance the input string by one byte. | ||||
| 	addi t1, t1, 1 | ||||
| 	sw t1, (t0) | ||||
|  | ||||
| 	goto .skip_comment_loop; | ||||
|  | ||||
| .skip_comment_end: | ||||
| 	# Skip the newline. | ||||
| 	addi t1, t1, 1 | ||||
| 	sw t1, (t0) | ||||
| end; | ||||
|  | ||||
| # Prints and skips a line. | ||||
| proc _compile_line(); | ||||
| begin | ||||
| .compile_line_loop: | ||||
| 	la a0, source_code_position | ||||
| 	lw a1, (a0) | ||||
|  | ||||
| 	lb t0, (a1) | ||||
| 	li t1, '\n' | ||||
| 	beq t0, t1, .compile_line_end | ||||
|  | ||||
| 	# Print a character. | ||||
| 	lw a0, (a1) | ||||
| 	_write_c(); | ||||
|  | ||||
| 	# Advance the input string by one byte. | ||||
| 	_advance_token(1); | ||||
|  | ||||
| 	goto .compile_line_loop; | ||||
|  | ||||
| .compile_line_end: | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| 	_advance_token(1); | ||||
| end; | ||||
|  | ||||
| proc _compile_integer_literal(); | ||||
| begin | ||||
| 	la a0, asm_li | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_a0 | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_comma | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_read_token(); | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	_write_c('\n'); | ||||
| end; | ||||
|  | ||||
| proc _compile_character_literal(); | ||||
| begin | ||||
| 	la a0, asm_li | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_a0 | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_comma | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_c('\''); | ||||
| 	_advance_token(1); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb a0, (t0) | ||||
| 	li t1, '\\' | ||||
| 	bne a0, t1, .compile_character_literal_end | ||||
| 	 | ||||
| 	_write_c('\\'); | ||||
| 	_advance_token(1); | ||||
|  | ||||
| .compile_character_literal_end: | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb a0, (t0) | ||||
| 	_write_c(); | ||||
|  | ||||
| 	_write_c('\''); | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| 	_advance_token(2); | ||||
|  | ||||
| end; | ||||
|  | ||||
| proc _compile_variable_expression(); | ||||
| begin | ||||
| 	la a0, asm_lw | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_a0 | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_comma | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_advance_token(1); | ||||
| 	_read_token(); | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	la a0, asm_sp | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| end; | ||||
|  | ||||
| proc _compile_expression(); | ||||
| begin | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb a0, (t0) | ||||
|  | ||||
| 	li t1, '\'' | ||||
| 	beq a0, t1, .compile_expression_character_literal | ||||
|  | ||||
| 	li t1, 'v' | ||||
| 	beq a0, t1, .compile_expression_variable | ||||
|  | ||||
| 	_is_digit(); | ||||
| 	bnez a0, .compile_expression_integer_literal | ||||
|  | ||||
| 	goto .compile_expression_end; | ||||
|  | ||||
| .compile_expression_character_literal: | ||||
| 	_compile_character_literal(); | ||||
| 	goto .compile_expression_end; | ||||
|  | ||||
| .compile_expression_integer_literal: | ||||
| 	_compile_integer_literal(); | ||||
| 	goto .compile_expression_end; | ||||
|  | ||||
| .compile_expression_variable: | ||||
| 	_compile_variable_expression(); | ||||
| 	goto .compile_expression_end;; | ||||
|  | ||||
| .compile_expression_end: | ||||
| end; | ||||
|  | ||||
| proc _compile_call(); | ||||
| begin | ||||
| 	# Stack variables: | ||||
| 	# v0 - Procedure name length. | ||||
| 	# v4 - Procedure name pointer.  | ||||
| 	# v8 - Argument count. | ||||
|  | ||||
| 	_read_token(); | ||||
| 	sw a0, 0(sp) | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	sw t0, 4(sp) | ||||
|  | ||||
| 	sw zero, 8(sp) | ||||
|  | ||||
| 	# Skip the identifier and left paren. | ||||
| 	addi a0, a0, 1 | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
|  | ||||
| 	li t1, ')' | ||||
| 	beq t0, t1, .compile_call_finalize | ||||
|  | ||||
| .compile_call_loop: | ||||
| 	_compile_expression(); | ||||
|  | ||||
| 	# Save the argument on the stack. | ||||
| 	la a0, asm_sw | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_a0 | ||||
| 	_write_z(); | ||||
|  | ||||
| 	la a0, asm_comma | ||||
| 	_write_z(); | ||||
|  | ||||
| 	# Calculate the stack offset: 20 - (4 * argument_counter) | ||||
| 	lw t0, 8(sp) | ||||
| 	li t1, 4 | ||||
| 	mul t0, t0, t1 | ||||
| 	li t1, 20 | ||||
| 	sub a0, t1, t0 | ||||
| 	_write_i(); | ||||
|  | ||||
| 	la a0, asm_sp | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| 	# Add one to the argument counter. | ||||
| 	lw t0, 8(sp) | ||||
| 	addi t0, t0, 1 | ||||
| 	sw t0, 8(sp) | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
|  | ||||
| 	li t1, ',' | ||||
| 	bne t0, t1, .compile_call_finalize | ||||
|  | ||||
| 	_advance_token(2); | ||||
| 	goto .compile_call_loop; | ||||
|  | ||||
| .compile_call_finalize: | ||||
| 	# Load the argument from the stack. | ||||
|  | ||||
| 	lw t0, 8(sp) | ||||
| 	beqz t0, .compile_call_end | ||||
|  | ||||
| 	# Decrement the argument counter. | ||||
| 	lw t0, 8(sp) | ||||
| 	addi t0, t0, -1 | ||||
| 	sw t0, 8(sp) | ||||
|  | ||||
| 	la a0, asm_lw | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_c('a'); | ||||
| 	lw a0, 8(sp) | ||||
| 	_write_i(); | ||||
|  | ||||
| 	la a0, asm_comma | ||||
| 	_write_z(); | ||||
|  | ||||
| 	# Calculate the stack offset: 20 - (4 * argument_counter) | ||||
| 	lw t0, 8(sp) | ||||
| 	li t1, 4 | ||||
| 	mul t0, t0, t1 | ||||
| 	li t1, 20 | ||||
| 	sub a0, t1, t0 | ||||
| 	_write_i(); | ||||
|  | ||||
| 	la a0, asm_sp | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| 	goto .compile_call_finalize; | ||||
|  | ||||
| .compile_call_end: | ||||
| 	la a0, asm_call | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_s(v04, v00); | ||||
|  | ||||
| 	# Skip the right paren. | ||||
| 	_advance_token(1); | ||||
| end; | ||||
|  | ||||
| proc _compile_goto(); | ||||
| begin | ||||
| 	_advance_token(5); | ||||
|  | ||||
| 	_read_token(); | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	la a0, asm_j | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_token(v00); | ||||
| 	_advance_token(); | ||||
| end; | ||||
|  | ||||
| proc _compile_statement(); | ||||
| begin | ||||
| 	# This is a call if the statement starts with an underscore. | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	# First character after alignment tab. | ||||
| 	addi t0, t0, 1 | ||||
| 	lb t0, (t0) | ||||
| 	 | ||||
| 	li t1, '_' | ||||
| 	beq t0, t1, .compile_statement_call | ||||
|  | ||||
| 	li t1, 'g' | ||||
| 	beq t0, t1, .compile_statement_goto | ||||
|  | ||||
| 	_compile_line(); | ||||
| 	goto .compile_statement_end; | ||||
|  | ||||
| .compile_statement_call: | ||||
| 	_advance_token(1); | ||||
| 	_compile_call(); | ||||
|  | ||||
| 	goto .compile_statement_semicolon; | ||||
|  | ||||
| .compile_statement_goto: | ||||
| 	_advance_token(1); | ||||
| 	_compile_goto(); | ||||
|  | ||||
| 	goto .compile_statement_semicolon; | ||||
|  | ||||
| .compile_statement_semicolon: | ||||
| 	_advance_token(2); | ||||
|  | ||||
| 	_write_c('\n'); | ||||
|  | ||||
| .compile_statement_end: | ||||
| end; | ||||
|  | ||||
| proc _compile_procedure_body(); | ||||
| begin | ||||
| .compile_procedure_body_loop: | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	la a1, keyword_end | ||||
| 	li a2, 3 # "end" length. | ||||
| 	_memcmp(); | ||||
|  | ||||
| 	beqz a0, .compile_procedure_body_epilogue | ||||
|  | ||||
| 	_compile_statement(); | ||||
| 	goto .compile_procedure_body_loop; | ||||
|  | ||||
| .compile_procedure_body_epilogue: | ||||
| end; | ||||
|  | ||||
| proc _compile_procedure(); | ||||
| begin | ||||
| 	# Skip "proc ". | ||||
| 	_advance_token(5); | ||||
|  | ||||
| 	_read_token(); | ||||
| 	sw a0, 0(sp) # Save the procedure name length. | ||||
|  | ||||
| 	# Write .type _procedure_name, @function. | ||||
| 	la a0, asm_type_directive | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_write_token(v00); | ||||
|  | ||||
| 	la a0, asm_type_function | ||||
| 	_write_z(); | ||||
|  | ||||
| 	# Write procedure label, _procedure_name: | ||||
| 	_write_token(v00); | ||||
|  | ||||
| 	la a0, asm_colon | ||||
| 	_write_z(); | ||||
|  | ||||
| 	# Skip the function name and trailing parens, semicolon, "begin" and newline. | ||||
| 	lw a0, 0(sp) | ||||
| 	addi a0, a0, 10 | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	la a0, asm_prologue | ||||
| 	_write_z(); | ||||
|  | ||||
| 	_compile_procedure_body(); | ||||
|  | ||||
| 	# Write the epilogue. | ||||
| 	la a0, asm_epilogue | ||||
| 	_write_z(); | ||||
|  | ||||
| 	# Skip the "end" keyword, semicolon and newline. | ||||
| 	_advance_token(5); | ||||
| end; | ||||
|  | ||||
| proc _compile_type(); | ||||
| begin | ||||
| 	# Print and skip the ".type" (5 characters) directive and a space after it. | ||||
| 	_write_token(6); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Read and print the symbol name. | ||||
| 	_read_token(); | ||||
|  | ||||
| 	# Print and skip the symbol name, comma, space and @. | ||||
| 	addi a0, a0, 3 | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Read the symbol type. | ||||
| 	_read_token(); | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	sw t0, 12(sp) | ||||
|  | ||||
| 	# Print the symbol type and newline. | ||||
| 	addi a0, a0, 1 | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Write the object definition itself. | ||||
| 	_compile_line(); | ||||
|  | ||||
| .compile_type_end: | ||||
| end; | ||||
|  | ||||
| proc _skip_newlines(); | ||||
| begin | ||||
| 	# Skip newlines. | ||||
| 	la t0, source_code_position | ||||
| 	lw t1, (t0) | ||||
|  | ||||
| .skip_newlines_loop: | ||||
| 	lb t2, (t1) | ||||
| 	li t3, '\n' | ||||
| 	bne t2, t3, .skip_newlines_end | ||||
| 	beqz t2, .skip_newlines_end | ||||
|  | ||||
| 	addi t1, t1, 1 | ||||
| 	sw t1, (t0) | ||||
|  | ||||
| 	goto .skip_newlines_loop; | ||||
|  | ||||
| .skip_newlines_end: | ||||
| end; | ||||
|  | ||||
| # Process the source code and print the generated code. | ||||
| proc _compile(); | ||||
| begin | ||||
| .compile_loop: | ||||
| 	_skip_newlines(); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
| 	beqz t0, .compile_end | ||||
| 	li t1, '#' | ||||
| 	beq t0, t1, .compile_comment | ||||
|  | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	la a1, keyword_section | ||||
| 	li a2, 8 # ".section" length. | ||||
| 	_memcmp(); | ||||
|  | ||||
| 	beqz a0, .compile_section | ||||
|  | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	la a1, keyword_type | ||||
| 	li a2, 5 # ".type" length. | ||||
| 	_memcmp(); | ||||
|  | ||||
| 	beqz a0, .compile_type | ||||
|  | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	la a1, keyword_proc | ||||
| 	li a2, 5 # "proc " length. Space is needed to distinguish from "procedure". | ||||
| 	_memcmp(); | ||||
|  | ||||
| 	beqz a0, .compile_procedure | ||||
|  | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	la a1, keyword_global | ||||
| 	li a2, 6 # ".globl" length. | ||||
| 	_memcmp(); | ||||
|  | ||||
| 	beqz a0, .compile_global | ||||
| 	# Not a known token, exit. | ||||
| 	goto .compile_end; | ||||
|  | ||||
| .compile_section: | ||||
| 	_compile_section(); | ||||
|  | ||||
| 	goto .compile_loop; | ||||
|  | ||||
| .compile_type: | ||||
| 	_compile_type(); | ||||
|  | ||||
| 	goto .compile_loop; | ||||
|  | ||||
| .compile_global: | ||||
| 	_compile_line(); | ||||
|  | ||||
| 	goto .compile_loop; | ||||
|  | ||||
| .compile_comment: | ||||
| 	_skip_comment(); | ||||
|  | ||||
| 	goto .compile_loop; | ||||
|  | ||||
| .compile_procedure: | ||||
| 	_compile_procedure(); | ||||
|  | ||||
| 	goto .compile_loop; | ||||
|  | ||||
| .compile_end: | ||||
| end; | ||||
|  | ||||
| # Terminates the program. a0 contains the return code. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Status code. | ||||
| proc _exit(); | ||||
| begin | ||||
| 	li a7, 93 # SYS_EXIT | ||||
| 	ecall | ||||
| end; | ||||
|  | ||||
| # Entry point. | ||||
| .globl _start | ||||
| proc _start(); | ||||
| begin | ||||
| 	# Read the source from the standard input. | ||||
| 	la a0, source_code | ||||
| 	li a1, 81920 # Buffer size. | ||||
| 	_read_file(); | ||||
| 	_compile(); | ||||
|  | ||||
| 	_exit(0); | ||||
|  | ||||
| end; | ||||
| @@ -1,14 +0,0 @@ | ||||
| program | ||||
|  | ||||
| proc main(x: Word, y: Word) | ||||
| begin | ||||
| 	_write_s(4, @x); | ||||
| 	_write_s(4, @y); | ||||
|  | ||||
| 	y := 0x0a2c3063; | ||||
| 	_write_s(4, @y) | ||||
| end | ||||
|  | ||||
| begin | ||||
| 	main(0x0a2c3061, 0x0a2c3062) | ||||
| end. | ||||
							
								
								
									
										616
									
								
								boot/tokenizer.s
									
									
									
									
									
								
							
							
						
						
									
										616
									
								
								boot/tokenizer.s
									
									
									
									
									
								
							| @@ -1,616 +0,0 @@ | ||||
| # This Source Code Form is subject to the terms of the Mozilla Public License, | ||||
| # v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||||
| # obtain one at https://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| .global lex_next, classification, transitions, keywords, byte_keywords | ||||
|  | ||||
| .include "boot/definitions.inc" | ||||
|  | ||||
| .section .rodata | ||||
|  | ||||
| # | ||||
| # Classification table assigns each possible character to a group (class). All | ||||
| # characters of the same group a handled equivalently. | ||||
| # | ||||
| # Classification: | ||||
| # | ||||
| .equ CLASS_INVALID, 0x00 | ||||
| .equ CLASS_DIGIT, 0x01 | ||||
| .equ CLASS_CHARACTER, 0x02 | ||||
| .equ CLASS_SPACE, 0x03 | ||||
| .equ CLASS_COLON, 0x04 | ||||
| .equ CLASS_EQUALS, 0x05 | ||||
| .equ CLASS_LEFT_PAREN, 0x06 | ||||
| .equ CLASS_RIGHT_PAREN, 0x07 | ||||
| .equ CLASS_ASTERISK, 0x08 | ||||
| .equ CLASS_UNDERSCORE, 0x09 | ||||
| .equ CLASS_SINGLE, 0x0a | ||||
| .equ CLASS_HEX, 0x0b | ||||
| .equ CLASS_ZERO, 0x0c | ||||
| .equ CLASS_X, 0x0d | ||||
| .equ CLASS_EOF, 0x0e | ||||
| .equ CLASS_DOT, 0x0f | ||||
| .equ CLASS_MINUS, 0x10 | ||||
| .equ CLASS_QUOTE, 0x11 | ||||
| .equ CLASS_GREATER, 0x12 | ||||
| .equ CLASS_LESS, 0x13 | ||||
|  | ||||
| .equ CLASS_COUNT, 20 | ||||
|  | ||||
| .type classification, @object | ||||
| classification: | ||||
| 	.byte CLASS_EOF # 00 NUL | ||||
| 	.byte CLASS_INVALID # 01 SOH | ||||
| 	.byte CLASS_INVALID # 02 STX | ||||
| 	.byte CLASS_INVALID # 03 ETX | ||||
| 	.byte CLASS_INVALID # 04 EOT | ||||
| 	.byte CLASS_INVALID # 05 ENQ | ||||
| 	.byte CLASS_INVALID # 06 ACK | ||||
| 	.byte CLASS_INVALID # 07 BEL | ||||
| 	.byte CLASS_INVALID # 08 BS | ||||
| 	.byte CLASS_SPACE # 09 HT | ||||
| 	.byte CLASS_SPACE # 0A LF | ||||
| 	.byte CLASS_INVALID # 0B VT | ||||
| 	.byte CLASS_INVALID # 0C FF | ||||
| 	.byte CLASS_SPACE # 0D CR | ||||
| 	.byte CLASS_INVALID # 0E SO | ||||
| 	.byte CLASS_INVALID # 0F SI | ||||
| 	.byte CLASS_INVALID # 10 DLE | ||||
| 	.byte CLASS_INVALID # 11 DC1 | ||||
| 	.byte CLASS_INVALID # 12 DC2 | ||||
| 	.byte CLASS_INVALID # 13 DC3 | ||||
| 	.byte CLASS_INVALID # 14 DC4 | ||||
| 	.byte CLASS_INVALID # 15 NAK | ||||
| 	.byte CLASS_INVALID # 16 SYN | ||||
| 	.byte CLASS_INVALID # 17 ETB | ||||
| 	.byte CLASS_INVALID # 18 CAN | ||||
| 	.byte CLASS_INVALID # 19 EM | ||||
| 	.byte CLASS_INVALID # 1A SUB | ||||
| 	.byte CLASS_INVALID # 1B ESC | ||||
| 	.byte CLASS_INVALID # 1C FS | ||||
| 	.byte CLASS_INVALID # 1D GS | ||||
| 	.byte CLASS_INVALID # 1E RS | ||||
| 	.byte CLASS_INVALID # 1F US | ||||
| 	.byte CLASS_SPACE # 20 Space | ||||
| 	.byte CLASS_SINGLE # 21 ! | ||||
| 	.byte CLASS_QUOTE # 22 " | ||||
| 	.byte 0x00 # 23 # | ||||
| 	.byte 0x00 # 24 $ | ||||
| 	.byte CLASS_SINGLE # 25 % | ||||
| 	.byte CLASS_SINGLE # 26 & | ||||
| 	.byte CLASS_QUOTE # 27 ' | ||||
| 	.byte CLASS_LEFT_PAREN # 28 ( | ||||
| 	.byte CLASS_RIGHT_PAREN # 29 ) | ||||
| 	.byte CLASS_ASTERISK # 2A * | ||||
| 	.byte CLASS_SINGLE # 2B + | ||||
| 	.byte CLASS_SINGLE # 2C , | ||||
| 	.byte CLASS_MINUS # 2D - | ||||
| 	.byte CLASS_DOT # 2E . | ||||
| 	.byte CLASS_SINGLE # 2F / | ||||
| 	.byte CLASS_ZERO # 30 0 | ||||
| 	.byte CLASS_DIGIT # 31 1 | ||||
| 	.byte CLASS_DIGIT # 32 2 | ||||
| 	.byte CLASS_DIGIT # 33 3 | ||||
| 	.byte CLASS_DIGIT # 34 4 | ||||
| 	.byte CLASS_DIGIT # 35 5 | ||||
| 	.byte CLASS_DIGIT # 36 6 | ||||
| 	.byte CLASS_DIGIT # 37 7 | ||||
| 	.byte CLASS_DIGIT # 38 8 | ||||
| 	.byte CLASS_DIGIT # 39 9 | ||||
| 	.byte CLASS_COLON # 3A : | ||||
| 	.byte CLASS_SINGLE # 3B ; | ||||
| 	.byte CLASS_LESS # 3C < | ||||
| 	.byte CLASS_EQUALS # 3D = | ||||
| 	.byte CLASS_GREATER # 3E > | ||||
| 	.byte 0x00 # 3F ? | ||||
| 	.byte CLASS_SINGLE # 40 @ | ||||
| 	.byte CLASS_CHARACTER # 41 A | ||||
| 	.byte CLASS_CHARACTER # 42 B | ||||
| 	.byte CLASS_CHARACTER # 43 C | ||||
| 	.byte CLASS_CHARACTER # 44 D | ||||
| 	.byte CLASS_CHARACTER # 45 E | ||||
| 	.byte CLASS_CHARACTER # 46 F | ||||
| 	.byte CLASS_CHARACTER # 47 G | ||||
| 	.byte CLASS_CHARACTER # 48 H | ||||
| 	.byte CLASS_CHARACTER # 49 I | ||||
| 	.byte CLASS_CHARACTER # 4A J | ||||
| 	.byte CLASS_CHARACTER # 4B K | ||||
| 	.byte CLASS_CHARACTER # 4C L | ||||
| 	.byte CLASS_CHARACTER # 4D M | ||||
| 	.byte CLASS_CHARACTER # 4E N | ||||
| 	.byte CLASS_CHARACTER # 4F O | ||||
| 	.byte CLASS_CHARACTER # 50 P | ||||
| 	.byte CLASS_CHARACTER # 51 Q | ||||
| 	.byte CLASS_CHARACTER # 52 R | ||||
| 	.byte CLASS_CHARACTER # 53 S | ||||
| 	.byte CLASS_CHARACTER # 54 T | ||||
| 	.byte CLASS_CHARACTER # 55 U | ||||
| 	.byte CLASS_CHARACTER # 56 V | ||||
| 	.byte CLASS_CHARACTER # 57 W | ||||
| 	.byte CLASS_CHARACTER # 58 X | ||||
| 	.byte CLASS_CHARACTER # 59 Y | ||||
| 	.byte CLASS_CHARACTER # 5A Z | ||||
| 	.byte CLASS_SINGLE # 5B [ | ||||
| 	.byte 0x00 # 5C \ | ||||
| 	.byte CLASS_SINGLE # 5D ] | ||||
| 	.byte CLASS_SINGLE # 5E ^ | ||||
| 	.byte CLASS_UNDERSCORE # 5F _ | ||||
| 	.byte 0x00 # 60 ` | ||||
| 	.byte CLASS_HEX # 61 a | ||||
| 	.byte CLASS_HEX # 62 b | ||||
| 	.byte CLASS_HEX # 63 c | ||||
| 	.byte CLASS_HEX # 64 d | ||||
| 	.byte CLASS_HEX # 65 e | ||||
| 	.byte CLASS_HEX # 66 f | ||||
| 	.byte CLASS_CHARACTER # 67 g | ||||
| 	.byte CLASS_CHARACTER # 68 h | ||||
| 	.byte CLASS_CHARACTER # 69 i | ||||
| 	.byte CLASS_CHARACTER # 6A j | ||||
| 	.byte CLASS_CHARACTER # 6B k | ||||
| 	.byte CLASS_CHARACTER # 6C l | ||||
| 	.byte CLASS_CHARACTER # 6D m | ||||
| 	.byte CLASS_CHARACTER # 6E n | ||||
| 	.byte CLASS_CHARACTER # 6F o | ||||
| 	.byte CLASS_CHARACTER # 70 p | ||||
| 	.byte CLASS_CHARACTER # 71 q | ||||
| 	.byte CLASS_CHARACTER # 72 r | ||||
| 	.byte CLASS_CHARACTER # 73 s | ||||
| 	.byte CLASS_CHARACTER # 74 t | ||||
| 	.byte CLASS_CHARACTER # 75 u | ||||
| 	.byte CLASS_CHARACTER # 76 v | ||||
| 	.byte CLASS_CHARACTER # 77 w | ||||
| 	.byte CLASS_X # 78 x | ||||
| 	.byte CLASS_CHARACTER # 79 y | ||||
| 	.byte CLASS_CHARACTER # 7A z | ||||
| 	.byte 0x00 # 7B { | ||||
| 	.byte CLASS_SINGLE # 7C | | ||||
| 	.byte 0x00 # 7D } | ||||
| 	.byte CLASS_SINGLE # 7E ~ | ||||
| 	.byte CLASS_INVALID # 7F DEL | ||||
|  | ||||
| # | ||||
| # Textual keywords in the language. | ||||
| # | ||||
| .equ KEYWORDS_COUNT, TOKEN_IDENTIFIER - 1 | ||||
|  | ||||
| .type keywords, @object | ||||
| keywords: | ||||
| 	.word 7 | ||||
| 	.ascii "program" | ||||
| 	.word 6 | ||||
| 	.ascii "import" | ||||
| 	.word 5 | ||||
| 	.ascii "const" | ||||
| 	.word 3 | ||||
| 	.ascii "var" | ||||
| 	.word 2 | ||||
| 	.ascii "if" | ||||
| 	.word 4 | ||||
| 	.ascii "then" | ||||
| 	.word 5 | ||||
| 	.ascii "elsif" | ||||
| 	.word 4 | ||||
| 	.ascii "else" | ||||
| 	.word 5 | ||||
| 	.ascii "while" | ||||
| 	.word 2 | ||||
| 	.ascii "do" | ||||
| 	.word 4 | ||||
| 	.ascii "proc" | ||||
| 	.word 5 | ||||
| 	.ascii "begin" | ||||
| 	.word 3 | ||||
| 	.ascii "end" | ||||
| 	.word 4 | ||||
| 	.ascii "type" | ||||
| 	.word 6 | ||||
| 	.ascii "record" | ||||
| 	.word 5 | ||||
| 	.ascii "union" | ||||
| 	.word 4 | ||||
| 	.ascii "true" | ||||
| 	.word 5 | ||||
| 	.ascii "false" | ||||
| 	.word 3 | ||||
| 	.ascii "nil" | ||||
| 	.word 3 | ||||
| 	.ascii "xor" | ||||
| 	.word 2 | ||||
| 	.ascii "or" | ||||
| 	.word 6 | ||||
| 	.ascii "return" | ||||
| 	.word 4 | ||||
| 	.ascii "cast" | ||||
| 	.word 4 | ||||
| 	.ascii "goto" | ||||
| 	.word 4 | ||||
| 	.ascii "case" | ||||
| 	.word 2 | ||||
| 	.ascii "of" | ||||
|  | ||||
| .type byte_keywords, @object | ||||
| byte_keywords: .ascii "&.,:;()[]^=+-*@" | ||||
| .equ BYTE_KEYWORDS_SIZE, . - byte_keywords | ||||
|  | ||||
| .section .data | ||||
|  | ||||
| # The transition table describes transitions from one state to another, given | ||||
| # a symbol (character class). | ||||
| # | ||||
| # The table has m rows and n columns, where m is the amount of states and n is | ||||
| # the amount of classes. So given the current state and a classified character | ||||
| # the table can be used to look up the next state. | ||||
| # | ||||
| # Each cell is a word long. | ||||
| # - The least significant byte of the word is a row number (beginning with 0). | ||||
| #   It specifies the target state. "ff" means that this is an end state and no | ||||
| #   transition is possible. | ||||
| # - The next byte is the action that should be performed when transitioning. | ||||
| #   For the meaning of actions see labels in the lex_next function, which | ||||
| #   handles each action. | ||||
| # | ||||
| .type transitions, @object | ||||
| transitions: | ||||
| 	#     Invalid Digit   Alpha   Space   :       =       (       )      | ||||
| 	#     *       _       Single  Hex     0       x       NUL     . | ||||
| 	#     -       " or '  >       < | ||||
| 	.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff | ||||
| 	.word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x06ff | ||||
| 	.word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start | ||||
|  | ||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff | ||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | ||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x01 Colon | ||||
|  | ||||
| 	.word 0x05ff, 0x0102, 0x0102, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff | ||||
| 	.word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff | ||||
| 	.word 0x05ff, 0x05ff, 0x05ff, 0x05ff # 0x02 Identifier | ||||
|  | ||||
| 	.word 0x08ff, 0x0103, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff | ||||
| 	.word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x0103, 0x00ff, 0x08ff, 0x08ff | ||||
| 	.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x03 Decimal | ||||
|  | ||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x04ff, 0x02ff, 0x02ff | ||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | ||||
| 	.word 0x02ff, 0x02ff, 0x04ff, 0x02ff # 0x04 Greater | ||||
|  | ||||
| 	.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff | ||||
| 	.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff | ||||
| 	.word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus | ||||
|  | ||||
| 	.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff | ||||
| 	.word 0x0109, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff | ||||
| 	.word 0x06ff, 0x06ff, 0x06ff, 0x06ff # 0x06 Left paren | ||||
|  | ||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | ||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff | ||||
| 	.word 0x02ff, 0x02ff, 0x02ff, 0x04ff # 0x07 Less | ||||
|  | ||||
| 	.word 0x08ff, 0x0108, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff | ||||
| 	.word 0x08ff, 0x00ff, 0x08ff, 0x0108, 0x0108, 0x00ff, 0x08ff, 0x08ff | ||||
| 	.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x08 Hexadecimal after 0x. | ||||
|  | ||||
| 	.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109 | ||||
| 	.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 | ||||
| 	.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x09 Comment | ||||
|  | ||||
| 	.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff | ||||
| 	.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 | ||||
| 	.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x0a Closing comment | ||||
|  | ||||
| 	.word 0x00ff, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110 | ||||
| 	.word 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110 | ||||
| 	.word 0x010b, 0x04ff, 0x010b, 0x010b # 0x0b String | ||||
|  | ||||
| 	.word 0x08ff, 0x00ff, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff | ||||
| 	.word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x00ff, 0x010d, 0x08ff, 0x08ff | ||||
| 	.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x0c Leading zero | ||||
|  | ||||
| 	.word 0x00ff, 0x0108, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff | ||||
| 	.word 0x00ff, 0x00ff, 0x00ff, 0x0108, 0x0108, 0x00ff, 0x00ff, 0x00ff | ||||
| 	.word 0x00ff, 0x00ff, 0x00ff, 0x00ff # 0x0d Starting hexadecimal | ||||
|  | ||||
| .section .text | ||||
|  | ||||
| # Returns the class from the classification table for the given character. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Character. | ||||
| # | ||||
| # Sets a0 to the class number. | ||||
| .type classify, @function | ||||
| classify: | ||||
| 	la t0, classification | ||||
| 	add t0, t0, a0 # Character class pointer. | ||||
| 	lbu a0, (t0) # Character class. | ||||
| 	ret | ||||
|  | ||||
| # Given the current state and a character class, calculates the next state. | ||||
|  | ||||
| # Parameters: | ||||
| # a0 - Current state. | ||||
| # a1 - Character class. | ||||
| # | ||||
| # Sets a0 to the next state. | ||||
| .type lookup_state, @function | ||||
| lookup_state: | ||||
| 	li t0, CLASS_COUNT | ||||
| 	mul a0, a0, t0 # Transition row. | ||||
| 	add a0, a0, a1 # Transition column. | ||||
|  | ||||
| 	li t0, 4 | ||||
| 	mul a0, a0, t0 # Multiply by the word size. | ||||
|  | ||||
| 	la t0, transitions | ||||
| 	add t0, t0, a0 | ||||
| 	lw a0, (t0) # Next state. | ||||
|  | ||||
| 	ret | ||||
|  | ||||
| # Chains classify and lookup_state. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Current state. | ||||
| # a1 - Character. | ||||
| # | ||||
| # Sets a0 to the next state based on the given character. | ||||
| .type _next_state, @function | ||||
| _next_state: | ||||
| 	# Prologue. | ||||
| 	addi sp, sp, -16 | ||||
| 	sw ra, 12(sp) | ||||
| 	sw s0, 8(sp) | ||||
| 	addi s0, sp, 16 | ||||
|  | ||||
| 	sw a0, 4(sp) | ||||
| 	mv a0, a1 | ||||
| 	call classify | ||||
|  | ||||
| 	mv a1, a0 | ||||
| 	lw a0, 4(sp) | ||||
| 	call lookup_state | ||||
|  | ||||
| 	# Epilogue. | ||||
| 	lw ra, 12(sp) | ||||
| 	lw s0, 8(sp) | ||||
| 	addi sp, sp, 16 | ||||
| 	ret | ||||
|  | ||||
| # Takes an identifier and checks whether it's a keyword. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Token length. | ||||
| # a1 - Token pointer. | ||||
| # | ||||
| # Sets a0 to the appropriate token type. | ||||
| .type classify_identifier, @function | ||||
| classify_identifier: | ||||
| 	# Prologue. | ||||
| 	addi sp, sp, -16 | ||||
| 	sw ra, 12(sp) | ||||
| 	sw s0, 8(sp) | ||||
| 	addi s0, sp, 16 | ||||
|  | ||||
| 	mv a2, a0 | ||||
| 	mv a3, a1 | ||||
| 	li a0, KEYWORDS_COUNT | ||||
| 	la a1, keywords | ||||
| 	call _strings_index | ||||
|  | ||||
| 	bnez a0, .Lclassify_identifier_end | ||||
| 	li a0, TOKEN_IDENTIFIER | ||||
|  | ||||
| .Lclassify_identifier_end: | ||||
| 	# Epilogue. | ||||
| 	lw ra, 12(sp) | ||||
| 	lw s0, 8(sp) | ||||
| 	addi sp, sp, 16 | ||||
| 	ret | ||||
|  | ||||
| # Takes a symbol and determines its type. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Token character. | ||||
| # | ||||
| # Sets a0 to the appropriate token type. | ||||
| .type classify_single, @function | ||||
| classify_single: | ||||
| 	# Prologue. | ||||
| 	addi sp, sp, -16 | ||||
| 	sw ra, 12(sp) | ||||
| 	sw s0, 8(sp) | ||||
| 	addi s0, sp, 16 | ||||
|  | ||||
| 	mv a1, a0 | ||||
| 	li a2, BYTE_KEYWORDS_SIZE | ||||
| 	la a0, byte_keywords | ||||
| 	call _memchr | ||||
|  | ||||
| 	la a1, byte_keywords | ||||
| 	sub a0, a0, a1 | ||||
| 	addi a0, a0, TOKEN_IDENTIFIER + 1 | ||||
|  | ||||
| 	# Epilogue. | ||||
| 	lw ra, 12(sp) | ||||
| 	lw s0, 8(sp) | ||||
| 	addi sp, sp, 16 | ||||
| 	ret | ||||
|  | ||||
| # Classified a symbol containing multiple characters (probably 2). | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Token length. | ||||
| # a1 - Token pointer. | ||||
| # | ||||
| # Sets a0 to the appropriate token type. | ||||
| .type classify_composite, @function | ||||
| classify_composite: | ||||
| 	lbu t0, 0(a1) | ||||
| 	li t1, ':' | ||||
| 	beq t0, t1, .Lclassify_composite_assign | ||||
|  | ||||
| 	j .Lclassify_composite_end | ||||
|  | ||||
| .Lclassify_composite_assign: | ||||
| 	li a0, TOKEN_ASSIGN | ||||
| 	j .Lclassify_composite_end | ||||
|  | ||||
| .Lclassify_composite_end: | ||||
| 	ret | ||||
|  | ||||
| # Initializes the classification table. | ||||
| # | ||||
| # Paramaters: | ||||
| # a0 - Source text pointer. | ||||
| # a1 - A pointer for output value, the token kind. 4 Bytes. | ||||
| # | ||||
| # Sets a0 to the position of the next token. | ||||
| .type lex_next, @function | ||||
| lex_next: | ||||
| 	# Prologue. | ||||
| 	addi sp, sp, -32 | ||||
| 	sw ra, 28(sp) | ||||
| 	sw s0, 24(sp) | ||||
| 	addi s0, sp, 32 | ||||
|  | ||||
| 	sw s1, 20(sp) # Preserve s1 used for current source text position. | ||||
| 	mv s1, a0 | ||||
| 	sw a0, 12(sp) # Keeps a pointer to the beginning of a token. | ||||
| 	# 4(sp) and 8(sp) are reserved for the kind and length of the token if needed. | ||||
|  | ||||
| 	sw s2, 16(sp) # Preserve s2 containing the current state. | ||||
| 	li s2, 0x00 # Initial, start state. | ||||
|  | ||||
| 	sw a1, 0(sp) | ||||
| 	sw zero, (a1) # Initialize. | ||||
|  | ||||
| .Llex_next_loop: | ||||
| 	mv a0, s2 | ||||
| 	lbu a1, (s1) | ||||
| 	call _next_state | ||||
|  | ||||
| 	li t0, 0xff | ||||
| 	and s2, a0, t0 # Next state. | ||||
|  | ||||
| 	li t0, 0xff00 | ||||
| 	and t1, a0, t0 # Transition action. | ||||
| 	srli t1, t1, 8 | ||||
|  | ||||
| 	# Perform the provided action. | ||||
| 	li t0, 0x01 # Accumulate action. | ||||
| 	beq t1, t0, .Llex_next_accumulate | ||||
|  | ||||
| 	li t0, 0x02 # Print action. | ||||
| 	beq t1, t0, .Llex_next_print | ||||
|  | ||||
| 	li t0, 0x03 # Skip action. | ||||
| 	beq t1, t0, .Llex_next_skip | ||||
|  | ||||
| 	li t0, 0x04 # Delimited string action. | ||||
| 	beq t1, t0, .Llex_next_comment | ||||
|  | ||||
| 	li t0, 0x05 # Finalize identifier. | ||||
| 	beq t1, t0, .Llex_next_identifier | ||||
|  | ||||
| 	li t0, 0x06 # Single character symbol action. | ||||
| 	beq t1, t0, .Llex_next_single | ||||
|  | ||||
| 	li t0, 0x07 # An action for symbols containing multiple characters. | ||||
| 	beq t1, t0, .Llex_next_composite | ||||
|  | ||||
| 	li t0, 0x08 # Integer action. | ||||
| 	beq t1, t0, .Llex_next_integer | ||||
|  | ||||
| 	j .Llex_next_reject | ||||
|  | ||||
| .Llex_next_reject: | ||||
| 	addi s1, s1, 1 | ||||
|  | ||||
| 	j .Llex_next_end | ||||
|  | ||||
| .Llex_next_accumulate: | ||||
| 	addi s1, s1, 1 | ||||
|  | ||||
| 	j .Llex_next_loop | ||||
|  | ||||
| .Llex_next_skip: | ||||
| 	addi s1, s1, 1 | ||||
| 	lw t0, 12(sp) | ||||
| 	addi t0, t0, 1 | ||||
| 	sw t0, 12(sp) | ||||
|  | ||||
| 	j .Llex_next_loop | ||||
|  | ||||
| .Llex_next_print: | ||||
| 	/* DEBUG | ||||
| 	addi a0, a0, 21 | ||||
| 	sw a0, 0(sp) | ||||
| 	addi a0, sp, 0 | ||||
| 	li a1, 1 | ||||
| 	call _write_error */ | ||||
|  | ||||
| 	j .Llex_next_end | ||||
|  | ||||
| .Llex_next_comment: | ||||
| 	addi s1, s1, 1 | ||||
|  | ||||
| 	j .Llex_next_end | ||||
|  | ||||
| .Llex_next_identifier: | ||||
| 	# An identifier can be a textual keyword. | ||||
| 	# Check the kind of the token and write it into the output parameter. | ||||
| 	lw a1, 12(sp) | ||||
| 	sub a0, s1, a1 | ||||
| 	sw a0, 8(sp) | ||||
| 	call classify_identifier | ||||
| 	sw a0, 4(sp) | ||||
| 	lw a0, 0(sp) | ||||
| 	addi a1, sp, 4 | ||||
| 	li a2, 12 | ||||
| 	call _memcpy | ||||
|  | ||||
| 	j .Llex_next_end | ||||
|  | ||||
| .Llex_next_single: | ||||
| 	lw a0, 12(sp) | ||||
| 	addi s1, a0, 1 | ||||
| 	lbu a0, (a0) | ||||
| 	call classify_single | ||||
| 	lw a1, 0(sp) | ||||
| 	sw a0, (a1) | ||||
|  | ||||
| 	j .Llex_next_end | ||||
|  | ||||
| .Llex_next_composite: | ||||
| 	addi s1, s1, 1 | ||||
| 	lw a1, 12(sp) | ||||
| 	sub a0, s1, a1 | ||||
| 	call classify_composite | ||||
| 	lw a1, 0(sp) | ||||
| 	sw a0, (a1) | ||||
|  | ||||
| 	j .Llex_next_end | ||||
|  | ||||
| .Llex_next_integer: | ||||
| 	lw t0, 0(sp) | ||||
| 	li t1, TOKEN_INTEGER | ||||
| 	sw t1, 0(t0) | ||||
| 	lw t1, 12(sp) | ||||
| 	sw t1, 8(t0) | ||||
| 	sub t1, s1, t1 | ||||
| 	sw t1, 4(t0) | ||||
|  | ||||
| 	j .Llex_next_end | ||||
|  | ||||
| .Llex_next_end: | ||||
| 	mv a0, s1 # Return the advanced text pointer. | ||||
|  | ||||
| 	# Restore saved registers. | ||||
| 	lw s1, 20(sp) | ||||
| 	lw s2, 16(sp) | ||||
|  | ||||
| 	# Epilogue. | ||||
| 	lw ra, 28(sp) | ||||
| 	lw s0, 24(sp) | ||||
| 	addi sp, sp, 32 | ||||
| 	ret | ||||
| @@ -1,61 +0,0 @@ | ||||
| # This Source Code Form is subject to the terms of the Mozilla Public License, | ||||
| # v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||||
| # obtain one at https://mozilla.org/MPL/2.0/. -} | ||||
| # frozen_string_literal: true | ||||
|  | ||||
| CROSS_GCC = 'build/rootfs/bin/riscv32-unknown-linux-gnu-gcc' | ||||
| SYSROOT = 'build/sysroot' | ||||
| QEMU = 'qemu-riscv32' | ||||
|  | ||||
| def assemble_stage(output, compiler, source) | ||||
|   arguments = [QEMU, '-L', SYSROOT, *compiler] | ||||
|  | ||||
|   puts Term::ANSIColor.green(arguments * ' ') | ||||
|   puts | ||||
|   Open3.popen2(*arguments) do |qemu_in, qemu_out| | ||||
|     qemu_in.write File.read(*source) | ||||
|     qemu_in.close | ||||
|  | ||||
|     IO.copy_stream qemu_out, output | ||||
|     qemu_out.close | ||||
|   end | ||||
| end | ||||
|  | ||||
| library = [] | ||||
|  | ||||
| Dir.glob('boot/*.s').each do |assembly_source| | ||||
|   source_basename = Pathname.new(assembly_source).basename | ||||
|   target_object = Pathname.new('build/boot') + source_basename.sub_ext('.o') | ||||
|  | ||||
|   file target_object.to_s => [assembly_source, 'build/boot'] do |t| | ||||
|     sh CROSS_GCC, '-c', '-o', t.name, assembly_source | ||||
|   end | ||||
|   library << assembly_source unless source_basename.to_s.start_with? 'stage' | ||||
| end | ||||
|  | ||||
| desc 'Initial stage' | ||||
| file 'build/boot/stage1' => ['build/boot/stage1.o', *library] do |t| | ||||
|   sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites | ||||
| end | ||||
|  | ||||
| file 'build/boot/stage2a.s' => ['build/boot/stage1', 'boot/stage2.elna'] do |t| | ||||
|   source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' } | ||||
|  | ||||
|   File.open t.name, 'w' do |output| | ||||
|     assemble_stage output, exe, source | ||||
|   end | ||||
| end | ||||
|  | ||||
| ['build/boot/stage2a', 'build/boot/stage2b'].each do |exe| | ||||
|   file exe => [exe.ext('.s'), *library] do |t| | ||||
|     sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites | ||||
|   end | ||||
| end | ||||
|  | ||||
| file 'build/boot/stage2b.s' => ['build/boot/stage2a', 'boot/stage2.elna'] do |t| | ||||
|   source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' } | ||||
|  | ||||
|   File.open t.name, 'w' do |output| | ||||
|     assemble_stage output, exe, source | ||||
|   end | ||||
| end | ||||
		Reference in New Issue
	
	Block a user