Compile static initializers
This commit is contained in:
		
							
								
								
									
										41
									
								
								Rakefile
									
									
									
									
									
								
							
							
						
						
									
										41
									
								
								Rakefile
									
									
									
									
									
								
							| @@ -39,11 +39,44 @@ end | ||||
|  | ||||
| desc 'Convert previous stage language into the current stage language' | ||||
| task :convert do | ||||
|   File.open('boot/stage4.elna', 'w') do |current_stage| | ||||
|     li_value = nil | ||||
|   File.open('boot/stage8.elna', 'w') do |current_stage| | ||||
|     File.readlines('boot/stage7.elna').each do |line| | ||||
|       if line == ".section .bss\n" | ||||
|         current_stage << <<~SECTION | ||||
|           const | ||||
|           	symbol_builtin_name_int := "Int"; | ||||
|           	symbol_builtin_name_word := "Word"; | ||||
|           	symbol_builtin_name_pointer := "Pointer"; | ||||
|           	symbol_builtin_name_char := "Char"; | ||||
|           	symbol_builtin_name_bool := "Bool"; | ||||
|  | ||||
|     File.readlines('boot/stage3.elna').each do |line| | ||||
|       current_stage << line | ||||
|           	# Every type info starts with a word describing what type it is. | ||||
| 		# | ||||
| 		# PRIMITIVE_TYPE = 1 | ||||
| 		# | ||||
|           	# Primitive types have only type size. | ||||
|           	symbol_builtin_type_int := S(1, 4); | ||||
|           	symbol_builtin_type_word := S(1, 4); | ||||
|           	symbol_builtin_type_pointer := S(1, 4); | ||||
|           	symbol_builtin_type_char := S(1, 1); | ||||
|           	symbol_builtin_type_bool := S(1, 1); | ||||
|  | ||||
| 		# Info objects start with a word describing its type. | ||||
| 		# | ||||
| 		# INFO_TYPE = 1 | ||||
| 		# | ||||
| 		# Type info has the type it belongs to. | ||||
|           	symbol_type_info_int := S(1, @symbol_builtin_type_int); | ||||
|           	symbol_type_info_word := S(1, @symbol_builtin_type_word); | ||||
|           	symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); | ||||
|           	symbol_type_info_char := S(1, @symbol_builtin_type_char); | ||||
|           	symbol_type_info_bool := S(1, @symbol_builtin_type_bool); | ||||
|         SECTION | ||||
|       elsif line == ".section .data\n" | ||||
|         current_stage << "var\n" | ||||
|       elsif !(line == ".section .text\n" || line == ".globl _start\n") | ||||
|         current_stage << line | ||||
|       end | ||||
|     end | ||||
|   end | ||||
| end | ||||
|   | ||||
| @@ -1,368 +0,0 @@ | ||||
| # This Source Code Form is subject to the terms of the Mozilla Public License, | ||||
| # v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||||
| # obtain one at https://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| .global _read_file, _write_error | ||||
| .global _memcmp, _memchr, _memmem, _mmap | ||||
| .global _current, _get, _advance, _label_counter | ||||
| .global _divide_by_zero_error, _strings_index, _string_equal | ||||
|  | ||||
| .section .rodata | ||||
|  | ||||
| .equ SYS_READ, 63 | ||||
| .equ SYS_WRITE, 64 | ||||
| .equ SYS_MMAP2, 222 | ||||
| .equ STDIN, 0 | ||||
| .equ STDOUT, 1 | ||||
| .equ STDERR, 2 | ||||
| .equ PROT_READ, 0x1 | ||||
| .equ PROT_WRITE, 0x2 | ||||
| .equ MAP_PRIVATE, 0x02 | ||||
| .equ MAP_ANONYMOUS, 0x20 | ||||
|  | ||||
| new_line: .ascii "\n" | ||||
|  | ||||
| .section .text | ||||
|  | ||||
| # Write the current token to stderr. Ends the output with a newline. | ||||
| # | ||||
| # a0 - String pointer. | ||||
| # a1 - String length. | ||||
| .type _write_error, @function | ||||
| _write_error: | ||||
| 	mv t0, a0 | ||||
| 	mv t1, a1 | ||||
|  | ||||
| 	li a0, STDERR | ||||
| 	mv a1, t0 | ||||
| 	mv a2, t1 | ||||
| 	li a7, SYS_WRITE | ||||
| 	ecall | ||||
|  | ||||
| 	li a0, STDERR | ||||
| 	la a1, new_line | ||||
| 	li a2, 1 | ||||
| 	li a7, SYS_WRITE | ||||
| 	ecall | ||||
|  | ||||
| 	ret | ||||
|  | ||||
| # a0 - First pointer. | ||||
| # a1 - Second pointer. | ||||
| # a2 - The length to compare. | ||||
| # | ||||
| # Returns 0 in a0 if memory regions are equal. | ||||
| .type _memcmp, @function | ||||
| _memcmp: | ||||
| 	mv t0, a0 | ||||
| 	li a0, 0 | ||||
|  | ||||
| .Lmemcmp_loop: | ||||
| 	beqz a2, .Lmemcmp_end | ||||
|  | ||||
| 	lbu t1, (t0) | ||||
| 	lbu t2, (a1) | ||||
| 	sub a0, t1, t2 | ||||
|  | ||||
| 	bnez a0, .Lmemcmp_end | ||||
|  | ||||
| 	addi t0, t0, 1 | ||||
| 	addi a1, a1, 1 | ||||
| 	addi a2, a2, -1 | ||||
|  | ||||
| 	j .Lmemcmp_loop | ||||
|  | ||||
| .Lmemcmp_end: | ||||
| 	ret | ||||
|  | ||||
| # Reads standard input into a buffer. | ||||
| # a0 - Buffer pointer. | ||||
| # a1 - Buffer size. | ||||
| # | ||||
| # Sets s1 to the buffer passed in a0. | ||||
| # | ||||
| # Returns the amount of bytes written in a0. | ||||
| .type _read_file, @function | ||||
| _read_file: | ||||
| 	# Prologue. | ||||
| 	addi sp, sp, -8 | ||||
| 	sw ra, 4(sp) | ||||
| 	sw s0, 0(sp) | ||||
| 	addi s0, sp, 8 | ||||
|  | ||||
| 	mv s1, a0 | ||||
|  | ||||
| 	li a0, STDIN | ||||
| 	mv a2, a1 | ||||
| 	mv a1, s1 | ||||
| 	li a7, SYS_READ | ||||
| 	ecall | ||||
|  | ||||
| 	# Epilogue. | ||||
| 	lw ra, 4(sp) | ||||
| 	lw s0, 0(sp) | ||||
| 	addi sp, sp, 8 | ||||
| 	ret | ||||
|  | ||||
| .type _divide_by_zero_error, @function | ||||
| _divide_by_zero_error: | ||||
|     addi a7, zero, 172 # getpid | ||||
|     ecall | ||||
|  | ||||
|     addi a1, zero, 8 # SIGFPE | ||||
|     addi a7, zero, 129 # kill | ||||
|     ecall | ||||
|     ret | ||||
|  | ||||
| # a0 - Pointer to an array to get the first element. | ||||
| # | ||||
| # Dereferences a pointer and returns what is on the address in a0. | ||||
| .type _get, @function | ||||
| _get: | ||||
| 	lw a0, (a0) | ||||
| 	ret | ||||
|  | ||||
| # Searches for the occurences of a character in the given memory block. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Memory block. | ||||
| # a1 - Needle. | ||||
| # a2 - Memory size. | ||||
| # | ||||
| # Sets a0 to the pointer to the found character or to null if the character | ||||
| # doesn't occur in the memory block. | ||||
| .type _memchr, @function | ||||
| _memchr: | ||||
| .Lmemchr_loop: | ||||
| 	beqz a2, .Lmemchr_nil # Exit if the length is 0. | ||||
|  | ||||
| 	lbu t0, (a0) # Load the character from the memory block. | ||||
| 	beq t0, a1, .Lmemchr_end # Exit if the character was found. | ||||
|  | ||||
| 	# Otherwise, continue with the next character. | ||||
| 	addi a0, a0, 1 | ||||
| 	addi a2, a2, -1 | ||||
|  | ||||
| 	j .Lmemchr_loop | ||||
|  | ||||
| .Lmemchr_nil: | ||||
| 	li a0, 0 | ||||
|  | ||||
| .Lmemchr_end: | ||||
| 	ret | ||||
|  | ||||
| # Locates a substring. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Haystack. | ||||
| # a1 - Haystack size. | ||||
| # a2 - Needle. | ||||
| # a3 - Needle size. | ||||
| # | ||||
| # Sets a0 to the pointer to the beginning of the substring in memory or to 0 | ||||
| # if the substring doesn't occur in the block. | ||||
| .type _memmem, @function | ||||
| _memmem: | ||||
| 	# Prologue. | ||||
| 	addi sp, sp, -24 | ||||
| 	sw ra, 20(sp) | ||||
| 	sw s0, 16(sp) | ||||
| 	addi s0, sp, 24 | ||||
|  | ||||
| 	# Save preserved registers. They are used to keep arguments. | ||||
| 	sw s1, 12(sp) | ||||
| 	sw s2, 8(sp) | ||||
| 	sw s3, 4(sp) | ||||
| 	sw s4, 0(sp) | ||||
|  | ||||
| 	mv s1, a0 | ||||
| 	mv s2, a1 | ||||
| 	mv s3, a2 | ||||
| 	mv s4, a3 | ||||
|  | ||||
| .Lmemmem_loop: | ||||
| 	blt s2, s3, .Lmemmem_nil # Exit if the needle length is greater than memory. | ||||
|  | ||||
| 	mv a0, s1 | ||||
| 	mv a1, s3 | ||||
| 	mv a2, s4 | ||||
| 	call _memcmp | ||||
|  | ||||
| 	mv t0, a0 # memcmp result. | ||||
| 	mv a0, s1 # Memory pointer for the case the substring was found. | ||||
| 	beqz t0, .Lmemmem_end | ||||
|  | ||||
| 	addi s1, s1, 1 | ||||
| 	add s2, s2, -1 | ||||
|  | ||||
| 	j .Lmemmem_loop | ||||
|  | ||||
| .Lmemmem_nil: | ||||
| 	li a0, 0 | ||||
|  | ||||
| .Lmemmem_end: | ||||
|  | ||||
| 	# Restore the preserved registers. | ||||
| 	lw s1, 12(sp) | ||||
| 	lw s2, 8(sp) | ||||
| 	lw s3, 4(sp) | ||||
| 	lw s4, 0(sp) | ||||
|  | ||||
| 	# Epilogue. | ||||
| 	lw ra, 20(sp) | ||||
| 	lw s0, 16(sp) | ||||
| 	add sp, sp, 24 | ||||
| 	ret | ||||
|  | ||||
| # Searches for a string in a string array. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Number of elements in the string array. | ||||
| # a1 - String array. | ||||
| # a2 - Needle length. | ||||
| # a3 - Needle. | ||||
| # | ||||
| # Sets a0 to the 1-based index of the needle in the haystack or to 0 if the | ||||
| # element could not be found. | ||||
| .type _strings_index, @function | ||||
| _strings_index: | ||||
| 	# Prologue. | ||||
| 	addi sp, sp, -32 | ||||
| 	sw ra, 28(sp) | ||||
| 	sw s0, 24(sp) | ||||
| 	addi s0, sp, 32 | ||||
|  | ||||
| 	sw s1, 20(sp) | ||||
| 	mv s1, a0 | ||||
| 	sw s2, 16(sp) | ||||
| 	mv s2, a1 | ||||
| 	sw s3, 12(sp) | ||||
| 	mv s3, a2 | ||||
| 	sw s4, 8(sp) | ||||
| 	mv s4, a3 | ||||
| 	sw s5, 4(sp) | ||||
| 	li s5, 0 # Index counter. | ||||
|  | ||||
| .Lstrings_index_loop: | ||||
| 	addi s5, s5, 1 | ||||
| 	beqz s1, .Lstrings_index_missing | ||||
|  | ||||
| 	lw a2, (s2) # Read the length of the current element in the haystack. | ||||
| 	bne a2, s3, .Lstrings_index_next # Lengths don't match, skip the iteration. | ||||
|  | ||||
| 	addi a0, s2, 4 | ||||
| 	mv a1, s4 | ||||
| 	call _memcmp | ||||
|  | ||||
| 	beqz a0, .Lstrings_index_end | ||||
|  | ||||
| .Lstrings_index_next: | ||||
| 	# Advance the pointer, reduce the length. | ||||
| 	lw a2, (s2) | ||||
| 	addi s2, s2, 4 | ||||
| 	add s2, s2, a2 | ||||
| 	addi s1, s1, -1 | ||||
| 	j .Lstrings_index_loop | ||||
|  | ||||
| .Lstrings_index_missing: | ||||
| 	li s5, 0 | ||||
|  | ||||
| .Lstrings_index_end: | ||||
| 	mv a0, s5 | ||||
|  | ||||
| 	lw s1, 20(sp) | ||||
| 	lw s2, 16(sp) | ||||
| 	lw s3, 12(sp) | ||||
| 	lw s4, 8(sp) | ||||
| 	lw s5, 4(sp) | ||||
|  | ||||
| 	# Epilogue. | ||||
| 	lw ra, 28(sp) | ||||
| 	lw s0, 24(sp) | ||||
| 	add sp, sp, 32 | ||||
| 	ret | ||||
|  | ||||
| # Compares two strings for equality. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - Length of the first string. | ||||
| # a1 - Pointer to the first string. | ||||
| # a2 - Length of the second string. | ||||
| # a3 - Pointer to the second string. | ||||
| # | ||||
| # Sets a0 to 1 if the string are equal, to 0 if not. | ||||
| .type _string_equal, @function | ||||
| _string_equal: | ||||
| 	# Prologue. | ||||
| 	addi sp, sp, -32 | ||||
| 	sw ra, 28(sp) | ||||
| 	sw s0, 24(sp) | ||||
| 	addi s0, sp, 32 | ||||
|  | ||||
| 	# Compare string lengths. | ||||
| 	bne a0, a2, .Lstring_equal_not_found | ||||
|  | ||||
| 	# If lengths match, compare the content. | ||||
| 	mv a0, a1 | ||||
| 	mv a1, a3 | ||||
| 	# a2 is already set to the length. | ||||
| 	call _memcmp | ||||
|  | ||||
| 	bnez a0, .Lstring_equal_not_found | ||||
|  | ||||
| 	li a0, 1 | ||||
| 	j .Lstring_equal_end | ||||
|  | ||||
| .Lstring_equal_not_found: | ||||
| 	mv a0, zero | ||||
|  | ||||
| .Lstring_equal_end: | ||||
| 	# Epilogue. | ||||
| 	lw ra, 28(sp) | ||||
| 	lw s0, 24(sp) | ||||
| 	addi sp, sp, 32 | ||||
| 	ret | ||||
|  | ||||
| # Sets a0 to the mapping address. | ||||
| .type _mmap, @function | ||||
| _mmap: | ||||
| 	li a0, 0 # Address at which to create the mapping. | ||||
| 	li a1, 4096 # The length of the mapping. | ||||
| 	li a2, PROT_READ | PROT_WRITE # Protection flags. | ||||
| 	li a3, MAP_ANONYMOUS | MAP_PRIVATE # The mapping is not backed by a file. | ||||
| 	li a4, -1 # File descriptor. | ||||
| 	li a5, 0 # Page offset. | ||||
| 	li a7, SYS_MMAP2 | ||||
| 	ecall | ||||
|  | ||||
| 	ret | ||||
|  | ||||
| # Sets the a0 to the current position in the source text (s1). | ||||
| .type _current, @function | ||||
| _current: | ||||
| 	mv a0, s1 | ||||
| 	ret | ||||
|  | ||||
| # Advances the position of the source text. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - The number of bytes to advance. | ||||
| .type _advance, @function | ||||
| _advance: | ||||
| 	add s1, s1, a0 | ||||
| 	ret | ||||
|  | ||||
| # Advances the global label counter by 1 setting a0 to the previous value. | ||||
| # | ||||
| # Parameters: | ||||
| # a0 - If it is 0, resets the counter to 1. | ||||
| .type _label_counter, @function | ||||
| _label_counter: | ||||
| 	bnez a0, .Llabel_counter_advance | ||||
| 	li s2, 0 | ||||
|  | ||||
| .Llabel_counter_advance: | ||||
| 	mv a0, s2 | ||||
| 	addi s2, s2, 1 | ||||
|  | ||||
| 	ret | ||||
							
								
								
									
										304
									
								
								boot/stage7.elna
									
									
									
									
									
								
							
							
						
						
									
										304
									
								
								boot/stage7.elna
									
									
									
									
									
								
							| @@ -4,7 +4,9 @@ | ||||
|  | ||||
| # Stage 7 compiler. | ||||
| # | ||||
| # - String literals. | ||||
| # - Static global variable and constant initialization. | ||||
| # - Objct sections are determined automatically. | ||||
| # - _start is always exported. | ||||
|  | ||||
| .section .bss | ||||
|  | ||||
| @@ -1098,34 +1100,6 @@ begin | ||||
| 	_advance_token(5); | ||||
| end; | ||||
|  | ||||
| proc _compile_type(); | ||||
| begin | ||||
| 	# Print and skip the ".type" (5 characters) directive and a space after it. | ||||
| 	_write_token(6); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Read and print the symbol name. | ||||
| 	_read_token(); | ||||
|  | ||||
| 	# Print and skip the symbol name, comma, space and @. | ||||
| 	addi a0, a0, 3 | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Read the symbol type. | ||||
| 	_read_token(); | ||||
|  | ||||
| 	# Print the symbol type and newline. | ||||
| 	addi a0, a0, 1 | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Write the object definition itself. | ||||
| 	_compile_line(); | ||||
|  | ||||
| .compile_type_end: | ||||
| end; | ||||
|  | ||||
| proc _skip_newlines(); | ||||
| begin | ||||
| 	# Skip newlines. | ||||
| @@ -1146,9 +1120,271 @@ begin | ||||
| .skip_newlines_end: | ||||
| end; | ||||
|  | ||||
| # Skip newlines and comments. | ||||
| proc _skip_empty_lines(); | ||||
| begin | ||||
| .skip_empty_lines_loop: | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
|  | ||||
| 	li t1, '#' | ||||
| 	beq t0, t1, .skip_empty_lines_comment | ||||
|  | ||||
| 	li t1, '\n' | ||||
| 	beq t0, t1, .skip_empty_lines_newline | ||||
|  | ||||
| 	goto .skip_empty_lines_end; | ||||
|  | ||||
| .skip_empty_lines_comment: | ||||
| 	_skip_comment(); | ||||
| 	goto .skip_empty_lines_loop; | ||||
|  | ||||
| .skip_empty_lines_newline: | ||||
| 	_advance_token(1); | ||||
| 	goto .skip_empty_lines_loop; | ||||
|  | ||||
| .skip_empty_lines_end: | ||||
| end; | ||||
|  | ||||
| proc _compile_global_initializer(); | ||||
| begin | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
|  | ||||
| 	li t1, '"' | ||||
| 	beq t0, t1, .compile_global_initializer_string | ||||
|  | ||||
| 	li t1, 'S' | ||||
| 	beq t0, t1, .compile_global_initializer_record | ||||
|  | ||||
| 	li t1, '@' | ||||
| 	beq t0, t1, .compile_global_initializer_pointer | ||||
|  | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	lb a0, (a0) | ||||
| 	_is_digit(); | ||||
| 	bnez a0, .compile_global_initializer_number | ||||
|  | ||||
| 	unimp | ||||
|  | ||||
| .compile_global_initializer_pointer: | ||||
| 	# Skip @. | ||||
| 	_advance_token(1); | ||||
| 	_write_z("\n\t.word \0"); | ||||
| 	_read_token(); | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	goto .compile_global_initializer_end; | ||||
|  | ||||
| .compile_global_initializer_number: | ||||
| 	_write_z("\n\t.word \0"); | ||||
| 	_read_token(); | ||||
| 	_write_token(); | ||||
| 	_advance_token(1); | ||||
|  | ||||
| 	goto .compile_global_initializer_end; | ||||
|  | ||||
| .compile_global_initializer_record: | ||||
| 	# Skip "S(". | ||||
| 	_advance_token(2); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
| 	li t1, ')' | ||||
| 	beq t0, t1, .compile_global_initializer_closing | ||||
|  | ||||
| .compile_global_initializer_loop: | ||||
| 	_compile_global_initializer(); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
| 	li t1, ')' | ||||
| 	beq t0, t1, .compile_global_initializer_closing | ||||
|  | ||||
| 	# Skip comma and whitespace after it. | ||||
| 	_advance_token(2); | ||||
|  | ||||
| 	goto .compile_global_initializer_loop; | ||||
|  | ||||
| .compile_global_initializer_closing: | ||||
| 	# Skip ")" | ||||
| 	_advance_token(1); | ||||
|  | ||||
| 	goto .compile_global_initializer_end; | ||||
|  | ||||
| .compile_global_initializer_string: | ||||
| 	_write_z("\n\t.word strings + \0"); | ||||
| 	_string_length(source_code_position); | ||||
| 	sw a0, 4(sp) | ||||
|  | ||||
| 	_add_string(source_code_position); | ||||
| 	_write_i(); | ||||
|  | ||||
| 	# Skip the quoted string. | ||||
| 	_advance_token(v4 + 2); | ||||
|  | ||||
| 	goto .compile_global_initializer_end; | ||||
|  | ||||
| .compile_global_initializer_end: | ||||
| end; | ||||
|  | ||||
| proc _compile_constant_declaration(); | ||||
| begin | ||||
| 	_read_token(); | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	_write_z(".type \0"); | ||||
| 	_write_token(v0); | ||||
| 	_write_z(", @object\n\0"); | ||||
|  | ||||
| 	_write_token(v0); | ||||
| 	_write_c(':'); | ||||
|  | ||||
| 	# Skip the constant name with assignment sign and surrounding whitespaces. | ||||
| 	_advance_token(v0 + 4); | ||||
| 	_compile_global_initializer(); | ||||
| 	# Skip semicolon and newline. | ||||
| 	_advance_token(2); | ||||
| 	_write_c('\n'); | ||||
| end; | ||||
|  | ||||
| proc _compile_const_part(); | ||||
| begin | ||||
| 	_skip_empty_lines(); | ||||
|  | ||||
| 	_memcmp(source_code_position, "const\0", 5); | ||||
| 	bnez a0, .compile_const_part_end | ||||
|  | ||||
| 	# Skip "const" with the newline after it. | ||||
| 	_advance_token(6); | ||||
| 	_write_z(".section .rodata # Compiled from const section.\n\n\0"); | ||||
|  | ||||
| .compile_const_part_loop: | ||||
| 	_skip_empty_lines(); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
|  | ||||
| 	# If the character at the line beginning is not indentation, | ||||
| 	# it is probably the next code section. | ||||
| 	li t1, '\t' | ||||
| 	bne t0, t1, .compile_const_part_end | ||||
|  | ||||
| 	_advance_token(1); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
| 	li t1, '#' | ||||
| 	beq t0, t1, .compile_const_part_loop | ||||
|  | ||||
| 	_compile_constant_declaration(); | ||||
| 	goto .compile_const_part_loop; | ||||
|  | ||||
| .compile_const_part_end: | ||||
| end; | ||||
|  | ||||
| proc _compile_variable_declaration(); | ||||
| begin | ||||
| 	_read_token(); | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	_write_z(".type \0"); | ||||
| 	_write_token(v0); | ||||
| 	_write_z(", @object\n\0"); | ||||
|  | ||||
| 	_write_token(v0); | ||||
| 	_write_c(':'); | ||||
|  | ||||
| 	# Skip the variable name and colon with space before the type. | ||||
| 	_advance_token(v0 + 2); | ||||
|  | ||||
| 	# Skip the type name. | ||||
| 	_read_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
| 	li t1, ' ' | ||||
| 	beq t0, t1, .compile_variable_declaration_initializer | ||||
|  | ||||
| 	# Else we assume this is a zeroed 81920 bytes big array. | ||||
| 	_write_z(" .zero 81920\0"); | ||||
| 	goto .compile_variable_declaration_finalize; | ||||
|  | ||||
| .compile_variable_declaration_initializer: | ||||
| 	# Skip the assignment sign with surrounding whitespaces. | ||||
| 	_advance_token(4); | ||||
| 	_compile_global_initializer(); | ||||
| 	goto .compile_variable_declaration_finalize; | ||||
|  | ||||
| .compile_variable_declaration_finalize: | ||||
| 	# Skip semicolon and newline. | ||||
| 	_advance_token(2); | ||||
| 	_write_c('\n'); | ||||
| end; | ||||
|  | ||||
| proc _compile_var_part(); | ||||
| begin | ||||
| 	_memcmp(source_code_position, "var\0", 3); | ||||
| 	bnez a0, .compile_var_part_end | ||||
|  | ||||
| 	# Skip "var" and newline. | ||||
| 	_advance_token(4); | ||||
| 	_write_z(".section .data\n\0"); | ||||
|  | ||||
| .compile_var_part_loop: | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
|  | ||||
| 	li t1, 'p' | ||||
| 	beq t0, t1, .compile_var_part_end | ||||
|  | ||||
| 	li t1, '\t' | ||||
| 	beq t0, t1, .compile_var_part_declaration | ||||
|  | ||||
| 	_compile_line(); | ||||
| 	goto .compile_var_part_loop; | ||||
|  | ||||
| .compile_var_part_declaration: | ||||
| 	_advance_token(1); | ||||
| 	_compile_variable_declaration(); | ||||
| 	goto .compile_var_part_loop; | ||||
|  | ||||
| .compile_var_part_end: | ||||
| end; | ||||
|  | ||||
| # Process the source code and print the generated code. | ||||
| proc _compile_module(); | ||||
| begin | ||||
| 	_compile_const_part(); | ||||
| 	_write_z(".section .bss\n\0"); | ||||
|  | ||||
| .compile_module_bss: | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
| 	li t1, 'v' | ||||
| 	beq t0, t1, .compile_module_code | ||||
|  | ||||
| 	li t1, 'p' | ||||
| 	beq t0, t1, .compile_module_code | ||||
|  | ||||
| 	_compile_line(); | ||||
| 	goto .compile_module_bss; | ||||
|  | ||||
| .compile_module_code: | ||||
| 	_compile_var_part(); | ||||
| 	_write_z(".section .text\n\0"); | ||||
| .compile_module_loop: | ||||
| 	_skip_newlines(); | ||||
|  | ||||
| @@ -1163,10 +1399,6 @@ begin | ||||
| 	_memcmp(source_code_position, ".section", 8); | ||||
| 	beqz a0, .compile_module_section | ||||
|  | ||||
| 	# 5 is ".type" length. | ||||
| 	_memcmp(source_code_position, ".type", 5); | ||||
| 	beqz a0, .compile_module_type | ||||
|  | ||||
| 	# 5 is "proc " length. Space is needed to distinguish from "procedure". | ||||
| 	_memcmp(source_code_position, "proc ", 5); | ||||
| 	beqz a0, .compile_module_procedure | ||||
| @@ -1183,11 +1415,6 @@ begin | ||||
|  | ||||
| 	goto .compile_module_loop; | ||||
|  | ||||
| .compile_module_type: | ||||
| 	_compile_type(); | ||||
|  | ||||
| 	goto .compile_module_loop; | ||||
|  | ||||
| .compile_module_global: | ||||
| 	_compile_line(); | ||||
|  | ||||
| @@ -1208,6 +1435,7 @@ end; | ||||
|  | ||||
| proc _compile(); | ||||
| begin | ||||
| 	_write_z(".globl _start\n\n\0"); | ||||
| 	_compile_module(); | ||||
|  | ||||
| 	_write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); | ||||
|   | ||||
							
								
								
									
										349
									
								
								boot/stage8.elna
									
									
									
									
									
								
							
							
						
						
									
										349
									
								
								boot/stage8.elna
									
									
									
									
									
								
							| @@ -2,11 +2,37 @@ | ||||
| # v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||||
| # obtain one at https://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| # Stage 7 compiler. | ||||
| # Stage 8 compiler. | ||||
| # | ||||
| # - String literals. | ||||
|  | ||||
| .section .bss | ||||
| const | ||||
| 	symbol_builtin_name_int := "Int"; | ||||
| 	symbol_builtin_name_word := "Word"; | ||||
| 	symbol_builtin_name_pointer := "Pointer"; | ||||
| 	symbol_builtin_name_char := "Char"; | ||||
| 	symbol_builtin_name_bool := "Bool"; | ||||
|  | ||||
| 	# Every type info starts with a word describing what type it is. | ||||
| 	# | ||||
| 	# PRIMITIVE_TYPE = 1 | ||||
| 	# | ||||
| 	# Primitive types have only type size. | ||||
| 	symbol_builtin_type_int := S(1, 4); | ||||
| 	symbol_builtin_type_word := S(1, 4); | ||||
| 	symbol_builtin_type_pointer := S(1, 4); | ||||
| 	symbol_builtin_type_char := S(1, 1); | ||||
| 	symbol_builtin_type_bool := S(1, 1); | ||||
|  | ||||
| 	# Info objects start with a word describing its type. | ||||
| 	# | ||||
| 	# INFO_TYPE = 1 | ||||
| 	# | ||||
| 	# Type info has the type it belongs to. | ||||
| 	symbol_type_info_int := S(1, @symbol_builtin_type_int); | ||||
| 	symbol_type_info_word := S(1, @symbol_builtin_type_word); | ||||
| 	symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); | ||||
| 	symbol_type_info_char := S(1, @symbol_builtin_type_char); | ||||
| 	symbol_type_info_bool := S(1, @symbol_builtin_type_bool); | ||||
|  | ||||
| # When modifiying also change the read size in the entry point procedure. | ||||
| .type source_code, @object | ||||
| @@ -15,18 +41,10 @@ source_code: .zero 81920 | ||||
| .type compiler_strings, @object | ||||
| compiler_strings: .zero 8192 | ||||
|  | ||||
| .section .data | ||||
|  | ||||
| .type compiler_strings_position, @object | ||||
| compiler_strings_position: .word compiler_strings | ||||
|  | ||||
| .type compiler_strings_length, @object | ||||
| compiler_strings_length: .word 0 | ||||
|  | ||||
| .type source_code_position, @object | ||||
| source_code_position: .word source_code | ||||
|  | ||||
| .section .text | ||||
| var | ||||
| 	compiler_strings_position: Pointer := @compiler_strings; | ||||
| 	compiler_strings_length: Word := 0; | ||||
| 	source_code_position: Pointer := @source_code; | ||||
|  | ||||
| # Calculates and returns the string token length between quotes, including the | ||||
| # escaping slash characters. | ||||
| @@ -1098,34 +1116,6 @@ begin | ||||
| 	_advance_token(5); | ||||
| end; | ||||
|  | ||||
| proc _compile_type(); | ||||
| begin | ||||
| 	# Print and skip the ".type" (5 characters) directive and a space after it. | ||||
| 	_write_token(6); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Read and print the symbol name. | ||||
| 	_read_token(); | ||||
|  | ||||
| 	# Print and skip the symbol name, comma, space and @. | ||||
| 	addi a0, a0, 3 | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Read the symbol type. | ||||
| 	_read_token(); | ||||
|  | ||||
| 	# Print the symbol type and newline. | ||||
| 	addi a0, a0, 1 | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	# Write the object definition itself. | ||||
| 	_compile_line(); | ||||
|  | ||||
| .compile_type_end: | ||||
| end; | ||||
|  | ||||
| proc _skip_newlines(); | ||||
| begin | ||||
| 	# Skip newlines. | ||||
| @@ -1146,9 +1136,271 @@ begin | ||||
| .skip_newlines_end: | ||||
| end; | ||||
|  | ||||
| # Skip newlines and comments. | ||||
| proc _skip_empty_lines(); | ||||
| begin | ||||
| .skip_empty_lines_loop: | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
|  | ||||
| 	li t1, '#' | ||||
| 	beq t0, t1, .skip_empty_lines_comment | ||||
|  | ||||
| 	li t1, '\n' | ||||
| 	beq t0, t1, .skip_empty_lines_newline | ||||
|  | ||||
| 	goto .skip_empty_lines_end; | ||||
|  | ||||
| .skip_empty_lines_comment: | ||||
| 	_skip_comment(); | ||||
| 	goto .skip_empty_lines_loop; | ||||
|  | ||||
| .skip_empty_lines_newline: | ||||
| 	_advance_token(1); | ||||
| 	goto .skip_empty_lines_loop; | ||||
|  | ||||
| .skip_empty_lines_end: | ||||
| end; | ||||
|  | ||||
| proc _compile_global_initializer(); | ||||
| begin | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
|  | ||||
| 	li t1, '"' | ||||
| 	beq t0, t1, .compile_global_initializer_string | ||||
|  | ||||
| 	li t1, 'S' | ||||
| 	beq t0, t1, .compile_global_initializer_record | ||||
|  | ||||
| 	li t1, '@' | ||||
| 	beq t0, t1, .compile_global_initializer_pointer | ||||
|  | ||||
| 	la a0, source_code_position | ||||
| 	lw a0, (a0) | ||||
| 	lb a0, (a0) | ||||
| 	_is_digit(); | ||||
| 	bnez a0, .compile_global_initializer_number | ||||
|  | ||||
| 	unimp | ||||
|  | ||||
| .compile_global_initializer_pointer: | ||||
| 	# Skip @. | ||||
| 	_advance_token(1); | ||||
| 	_write_z("\n\t.word \0"); | ||||
| 	_read_token(); | ||||
| 	_write_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	goto .compile_global_initializer_end; | ||||
|  | ||||
| .compile_global_initializer_number: | ||||
| 	_write_z("\n\t.word \0"); | ||||
| 	_read_token(); | ||||
| 	_write_token(); | ||||
| 	_advance_token(1); | ||||
|  | ||||
| 	goto .compile_global_initializer_end; | ||||
|  | ||||
| .compile_global_initializer_record: | ||||
| 	# Skip "S(". | ||||
| 	_advance_token(2); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
| 	li t1, ')' | ||||
| 	beq t0, t1, .compile_global_initializer_closing | ||||
|  | ||||
| .compile_global_initializer_loop: | ||||
| 	_compile_global_initializer(); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
| 	li t1, ')' | ||||
| 	beq t0, t1, .compile_global_initializer_closing | ||||
|  | ||||
| 	# Skip comma and whitespace after it. | ||||
| 	_advance_token(2); | ||||
|  | ||||
| 	goto .compile_global_initializer_loop; | ||||
|  | ||||
| .compile_global_initializer_closing: | ||||
| 	# Skip ")" | ||||
| 	_advance_token(1); | ||||
|  | ||||
| 	goto .compile_global_initializer_end; | ||||
|  | ||||
| .compile_global_initializer_string: | ||||
| 	_write_z("\n\t.word strings + \0"); | ||||
| 	_string_length(source_code_position); | ||||
| 	sw a0, 4(sp) | ||||
|  | ||||
| 	_add_string(source_code_position); | ||||
| 	_write_i(); | ||||
|  | ||||
| 	# Skip the quoted string. | ||||
| 	_advance_token(v4 + 2); | ||||
|  | ||||
| 	goto .compile_global_initializer_end; | ||||
|  | ||||
| .compile_global_initializer_end: | ||||
| end; | ||||
|  | ||||
| proc _compile_constant_declaration(); | ||||
| begin | ||||
| 	_read_token(); | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	_write_z(".type \0"); | ||||
| 	_write_token(v0); | ||||
| 	_write_z(", @object\n\0"); | ||||
|  | ||||
| 	_write_token(v0); | ||||
| 	_write_c(':'); | ||||
|  | ||||
| 	# Skip the constant name with assignment sign and surrounding whitespaces. | ||||
| 	_advance_token(v0 + 4); | ||||
| 	_compile_global_initializer(); | ||||
| 	# Skip semicolon and newline. | ||||
| 	_advance_token(2); | ||||
| 	_write_c('\n'); | ||||
| end; | ||||
|  | ||||
| proc _compile_const_part(); | ||||
| begin | ||||
| 	_skip_empty_lines(); | ||||
|  | ||||
| 	_memcmp(source_code_position, "const\0", 5); | ||||
| 	bnez a0, .compile_const_part_end | ||||
|  | ||||
| 	# Skip "const" with the newline after it. | ||||
| 	_advance_token(6); | ||||
| 	_write_z(".section .rodata # Compiled from const section.\n\n\0"); | ||||
|  | ||||
| .compile_const_part_loop: | ||||
| 	_skip_empty_lines(); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
|  | ||||
| 	# If the character at the line beginning is not indentation, | ||||
| 	# it is probably the next code section. | ||||
| 	li t1, '\t' | ||||
| 	bne t0, t1, .compile_const_part_end | ||||
|  | ||||
| 	_advance_token(1); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
| 	li t1, '#' | ||||
| 	beq t0, t1, .compile_const_part_loop | ||||
|  | ||||
| 	_compile_constant_declaration(); | ||||
| 	goto .compile_const_part_loop; | ||||
|  | ||||
| .compile_const_part_end: | ||||
| end; | ||||
|  | ||||
| proc _compile_variable_declaration(); | ||||
| begin | ||||
| 	_read_token(); | ||||
| 	sw a0, 0(sp) | ||||
|  | ||||
| 	_write_z(".type \0"); | ||||
| 	_write_token(v0); | ||||
| 	_write_z(", @object\n\0"); | ||||
|  | ||||
| 	_write_token(v0); | ||||
| 	_write_c(':'); | ||||
|  | ||||
| 	# Skip the variable name and colon with space before the type. | ||||
| 	_advance_token(v0 + 2); | ||||
|  | ||||
| 	# Skip the type name. | ||||
| 	_read_token(); | ||||
| 	_advance_token(); | ||||
|  | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
| 	li t1, ' ' | ||||
| 	beq t0, t1, .compile_variable_declaration_initializer | ||||
|  | ||||
| 	# Else we assume this is a zeroed 81920 bytes big array. | ||||
| 	_write_z(" .zero 81920\0"); | ||||
| 	goto .compile_variable_declaration_finalize; | ||||
|  | ||||
| .compile_variable_declaration_initializer: | ||||
| 	# Skip the assignment sign with surrounding whitespaces. | ||||
| 	_advance_token(4); | ||||
| 	_compile_global_initializer(); | ||||
| 	goto .compile_variable_declaration_finalize; | ||||
|  | ||||
| .compile_variable_declaration_finalize: | ||||
| 	# Skip semicolon and newline. | ||||
| 	_advance_token(2); | ||||
| 	_write_c('\n'); | ||||
| end; | ||||
|  | ||||
| proc _compile_var_part(); | ||||
| begin | ||||
| 	_memcmp(source_code_position, "var\0", 3); | ||||
| 	bnez a0, .compile_var_part_end | ||||
|  | ||||
| 	# Skip "var" and newline. | ||||
| 	_advance_token(4); | ||||
| 	_write_z(".section .data\n\0"); | ||||
|  | ||||
| .compile_var_part_loop: | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
|  | ||||
| 	li t1, 'p' | ||||
| 	beq t0, t1, .compile_var_part_end | ||||
|  | ||||
| 	li t1, '\t' | ||||
| 	beq t0, t1, .compile_var_part_declaration | ||||
|  | ||||
| 	_compile_line(); | ||||
| 	goto .compile_var_part_loop; | ||||
|  | ||||
| .compile_var_part_declaration: | ||||
| 	_advance_token(1); | ||||
| 	_compile_variable_declaration(); | ||||
| 	goto .compile_var_part_loop; | ||||
|  | ||||
| .compile_var_part_end: | ||||
| end; | ||||
|  | ||||
| # Process the source code and print the generated code. | ||||
| proc _compile_module(); | ||||
| begin | ||||
| 	_compile_const_part(); | ||||
| 	_write_z(".section .bss\n\0"); | ||||
|  | ||||
| .compile_module_bss: | ||||
| 	la t0, source_code_position | ||||
| 	lw t0, (t0) | ||||
| 	lb t0, (t0) | ||||
| 	li t1, 'v' | ||||
| 	beq t0, t1, .compile_module_code | ||||
|  | ||||
| 	li t1, 'p' | ||||
| 	beq t0, t1, .compile_module_code | ||||
|  | ||||
| 	_compile_line(); | ||||
| 	goto .compile_module_bss; | ||||
|  | ||||
| .compile_module_code: | ||||
| 	_compile_var_part(); | ||||
| 	_write_z(".section .text\n\0"); | ||||
| .compile_module_loop: | ||||
| 	_skip_newlines(); | ||||
|  | ||||
| @@ -1163,10 +1415,6 @@ begin | ||||
| 	_memcmp(source_code_position, ".section", 8); | ||||
| 	beqz a0, .compile_module_section | ||||
|  | ||||
| 	# 5 is ".type" length. | ||||
| 	_memcmp(source_code_position, ".type", 5); | ||||
| 	beqz a0, .compile_module_type | ||||
|  | ||||
| 	# 5 is "proc " length. Space is needed to distinguish from "procedure". | ||||
| 	_memcmp(source_code_position, "proc ", 5); | ||||
| 	beqz a0, .compile_module_procedure | ||||
| @@ -1183,11 +1431,6 @@ begin | ||||
|  | ||||
| 	goto .compile_module_loop; | ||||
|  | ||||
| .compile_module_type: | ||||
| 	_compile_type(); | ||||
|  | ||||
| 	goto .compile_module_loop; | ||||
|  | ||||
| .compile_module_global: | ||||
| 	_compile_line(); | ||||
|  | ||||
| @@ -1208,6 +1451,7 @@ end; | ||||
|  | ||||
| proc _compile(); | ||||
| begin | ||||
| 	_write_z(".globl _start\n\n\0"); | ||||
| 	_compile_module(); | ||||
|  | ||||
| 	_write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); | ||||
| @@ -1247,7 +1491,6 @@ begin | ||||
| end; | ||||
|  | ||||
| # Entry point. | ||||
| .globl _start | ||||
| proc _start(); | ||||
| begin | ||||
| 	# Read the source from the standard input. | ||||
|   | ||||
							
								
								
									
										1503
									
								
								boot/stage9.elna
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1503
									
								
								boot/stage9.elna
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user