Compile static initializers
This commit is contained in:
39
Rakefile
39
Rakefile
@@ -39,14 +39,47 @@ end
|
||||
|
||||
desc 'Convert previous stage language into the current stage language'
|
||||
task :convert do
|
||||
File.open('boot/stage4.elna', 'w') do |current_stage|
|
||||
li_value = nil
|
||||
File.open('boot/stage8.elna', 'w') do |current_stage|
|
||||
File.readlines('boot/stage7.elna').each do |line|
|
||||
if line == ".section .bss\n"
|
||||
current_stage << <<~SECTION
|
||||
const
|
||||
symbol_builtin_name_int := "Int";
|
||||
symbol_builtin_name_word := "Word";
|
||||
symbol_builtin_name_pointer := "Pointer";
|
||||
symbol_builtin_name_char := "Char";
|
||||
symbol_builtin_name_bool := "Bool";
|
||||
|
||||
File.readlines('boot/stage3.elna').each do |line|
|
||||
# Every type info starts with a word describing what type it is.
|
||||
#
|
||||
# PRIMITIVE_TYPE = 1
|
||||
#
|
||||
# Primitive types have only type size.
|
||||
symbol_builtin_type_int := S(1, 4);
|
||||
symbol_builtin_type_word := S(1, 4);
|
||||
symbol_builtin_type_pointer := S(1, 4);
|
||||
symbol_builtin_type_char := S(1, 1);
|
||||
symbol_builtin_type_bool := S(1, 1);
|
||||
|
||||
# Info objects start with a word describing its type.
|
||||
#
|
||||
# INFO_TYPE = 1
|
||||
#
|
||||
# Type info has the type it belongs to.
|
||||
symbol_type_info_int := S(1, @symbol_builtin_type_int);
|
||||
symbol_type_info_word := S(1, @symbol_builtin_type_word);
|
||||
symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer);
|
||||
symbol_type_info_char := S(1, @symbol_builtin_type_char);
|
||||
symbol_type_info_bool := S(1, @symbol_builtin_type_bool);
|
||||
SECTION
|
||||
elsif line == ".section .data\n"
|
||||
current_stage << "var\n"
|
||||
elsif !(line == ".section .text\n" || line == ".globl _start\n")
|
||||
current_stage << line
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
rule /^build\/[[:alpha:]]+\/stage[[:digit:]]+$/ => ->(match) {
|
||||
"#{match}.s"
|
||||
|
@@ -1,368 +0,0 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
# obtain one at https://mozilla.org/MPL/2.0/.
|
||||
|
||||
.global _read_file, _write_error
|
||||
.global _memcmp, _memchr, _memmem, _mmap
|
||||
.global _current, _get, _advance, _label_counter
|
||||
.global _divide_by_zero_error, _strings_index, _string_equal
|
||||
|
||||
.section .rodata
|
||||
|
||||
.equ SYS_READ, 63
|
||||
.equ SYS_WRITE, 64
|
||||
.equ SYS_MMAP2, 222
|
||||
.equ STDIN, 0
|
||||
.equ STDOUT, 1
|
||||
.equ STDERR, 2
|
||||
.equ PROT_READ, 0x1
|
||||
.equ PROT_WRITE, 0x2
|
||||
.equ MAP_PRIVATE, 0x02
|
||||
.equ MAP_ANONYMOUS, 0x20
|
||||
|
||||
new_line: .ascii "\n"
|
||||
|
||||
.section .text
|
||||
|
||||
# Write the current token to stderr. Ends the output with a newline.
|
||||
#
|
||||
# a0 - String pointer.
|
||||
# a1 - String length.
|
||||
.type _write_error, @function
|
||||
_write_error:
|
||||
mv t0, a0
|
||||
mv t1, a1
|
||||
|
||||
li a0, STDERR
|
||||
mv a1, t0
|
||||
mv a2, t1
|
||||
li a7, SYS_WRITE
|
||||
ecall
|
||||
|
||||
li a0, STDERR
|
||||
la a1, new_line
|
||||
li a2, 1
|
||||
li a7, SYS_WRITE
|
||||
ecall
|
||||
|
||||
ret
|
||||
|
||||
# a0 - First pointer.
|
||||
# a1 - Second pointer.
|
||||
# a2 - The length to compare.
|
||||
#
|
||||
# Returns 0 in a0 if memory regions are equal.
|
||||
.type _memcmp, @function
|
||||
_memcmp:
|
||||
mv t0, a0
|
||||
li a0, 0
|
||||
|
||||
.Lmemcmp_loop:
|
||||
beqz a2, .Lmemcmp_end
|
||||
|
||||
lbu t1, (t0)
|
||||
lbu t2, (a1)
|
||||
sub a0, t1, t2
|
||||
|
||||
bnez a0, .Lmemcmp_end
|
||||
|
||||
addi t0, t0, 1
|
||||
addi a1, a1, 1
|
||||
addi a2, a2, -1
|
||||
|
||||
j .Lmemcmp_loop
|
||||
|
||||
.Lmemcmp_end:
|
||||
ret
|
||||
|
||||
# Reads standard input into a buffer.
|
||||
# a0 - Buffer pointer.
|
||||
# a1 - Buffer size.
|
||||
#
|
||||
# Sets s1 to the buffer passed in a0.
|
||||
#
|
||||
# Returns the amount of bytes written in a0.
|
||||
.type _read_file, @function
|
||||
_read_file:
|
||||
# Prologue.
|
||||
addi sp, sp, -8
|
||||
sw ra, 4(sp)
|
||||
sw s0, 0(sp)
|
||||
addi s0, sp, 8
|
||||
|
||||
mv s1, a0
|
||||
|
||||
li a0, STDIN
|
||||
mv a2, a1
|
||||
mv a1, s1
|
||||
li a7, SYS_READ
|
||||
ecall
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 4(sp)
|
||||
lw s0, 0(sp)
|
||||
addi sp, sp, 8
|
||||
ret
|
||||
|
||||
.type _divide_by_zero_error, @function
|
||||
_divide_by_zero_error:
|
||||
addi a7, zero, 172 # getpid
|
||||
ecall
|
||||
|
||||
addi a1, zero, 8 # SIGFPE
|
||||
addi a7, zero, 129 # kill
|
||||
ecall
|
||||
ret
|
||||
|
||||
# a0 - Pointer to an array to get the first element.
|
||||
#
|
||||
# Dereferences a pointer and returns what is on the address in a0.
|
||||
.type _get, @function
|
||||
_get:
|
||||
lw a0, (a0)
|
||||
ret
|
||||
|
||||
# Searches for the occurences of a character in the given memory block.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Memory block.
|
||||
# a1 - Needle.
|
||||
# a2 - Memory size.
|
||||
#
|
||||
# Sets a0 to the pointer to the found character or to null if the character
|
||||
# doesn't occur in the memory block.
|
||||
.type _memchr, @function
|
||||
_memchr:
|
||||
.Lmemchr_loop:
|
||||
beqz a2, .Lmemchr_nil # Exit if the length is 0.
|
||||
|
||||
lbu t0, (a0) # Load the character from the memory block.
|
||||
beq t0, a1, .Lmemchr_end # Exit if the character was found.
|
||||
|
||||
# Otherwise, continue with the next character.
|
||||
addi a0, a0, 1
|
||||
addi a2, a2, -1
|
||||
|
||||
j .Lmemchr_loop
|
||||
|
||||
.Lmemchr_nil:
|
||||
li a0, 0
|
||||
|
||||
.Lmemchr_end:
|
||||
ret
|
||||
|
||||
# Locates a substring.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Haystack.
|
||||
# a1 - Haystack size.
|
||||
# a2 - Needle.
|
||||
# a3 - Needle size.
|
||||
#
|
||||
# Sets a0 to the pointer to the beginning of the substring in memory or to 0
|
||||
# if the substring doesn't occur in the block.
|
||||
.type _memmem, @function
|
||||
_memmem:
|
||||
# Prologue.
|
||||
addi sp, sp, -24
|
||||
sw ra, 20(sp)
|
||||
sw s0, 16(sp)
|
||||
addi s0, sp, 24
|
||||
|
||||
# Save preserved registers. They are used to keep arguments.
|
||||
sw s1, 12(sp)
|
||||
sw s2, 8(sp)
|
||||
sw s3, 4(sp)
|
||||
sw s4, 0(sp)
|
||||
|
||||
mv s1, a0
|
||||
mv s2, a1
|
||||
mv s3, a2
|
||||
mv s4, a3
|
||||
|
||||
.Lmemmem_loop:
|
||||
blt s2, s3, .Lmemmem_nil # Exit if the needle length is greater than memory.
|
||||
|
||||
mv a0, s1
|
||||
mv a1, s3
|
||||
mv a2, s4
|
||||
call _memcmp
|
||||
|
||||
mv t0, a0 # memcmp result.
|
||||
mv a0, s1 # Memory pointer for the case the substring was found.
|
||||
beqz t0, .Lmemmem_end
|
||||
|
||||
addi s1, s1, 1
|
||||
add s2, s2, -1
|
||||
|
||||
j .Lmemmem_loop
|
||||
|
||||
.Lmemmem_nil:
|
||||
li a0, 0
|
||||
|
||||
.Lmemmem_end:
|
||||
|
||||
# Restore the preserved registers.
|
||||
lw s1, 12(sp)
|
||||
lw s2, 8(sp)
|
||||
lw s3, 4(sp)
|
||||
lw s4, 0(sp)
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 20(sp)
|
||||
lw s0, 16(sp)
|
||||
add sp, sp, 24
|
||||
ret
|
||||
|
||||
# Searches for a string in a string array.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Number of elements in the string array.
|
||||
# a1 - String array.
|
||||
# a2 - Needle length.
|
||||
# a3 - Needle.
|
||||
#
|
||||
# Sets a0 to the 1-based index of the needle in the haystack or to 0 if the
|
||||
# element could not be found.
|
||||
.type _strings_index, @function
|
||||
_strings_index:
|
||||
# Prologue.
|
||||
addi sp, sp, -32
|
||||
sw ra, 28(sp)
|
||||
sw s0, 24(sp)
|
||||
addi s0, sp, 32
|
||||
|
||||
sw s1, 20(sp)
|
||||
mv s1, a0
|
||||
sw s2, 16(sp)
|
||||
mv s2, a1
|
||||
sw s3, 12(sp)
|
||||
mv s3, a2
|
||||
sw s4, 8(sp)
|
||||
mv s4, a3
|
||||
sw s5, 4(sp)
|
||||
li s5, 0 # Index counter.
|
||||
|
||||
.Lstrings_index_loop:
|
||||
addi s5, s5, 1
|
||||
beqz s1, .Lstrings_index_missing
|
||||
|
||||
lw a2, (s2) # Read the length of the current element in the haystack.
|
||||
bne a2, s3, .Lstrings_index_next # Lengths don't match, skip the iteration.
|
||||
|
||||
addi a0, s2, 4
|
||||
mv a1, s4
|
||||
call _memcmp
|
||||
|
||||
beqz a0, .Lstrings_index_end
|
||||
|
||||
.Lstrings_index_next:
|
||||
# Advance the pointer, reduce the length.
|
||||
lw a2, (s2)
|
||||
addi s2, s2, 4
|
||||
add s2, s2, a2
|
||||
addi s1, s1, -1
|
||||
j .Lstrings_index_loop
|
||||
|
||||
.Lstrings_index_missing:
|
||||
li s5, 0
|
||||
|
||||
.Lstrings_index_end:
|
||||
mv a0, s5
|
||||
|
||||
lw s1, 20(sp)
|
||||
lw s2, 16(sp)
|
||||
lw s3, 12(sp)
|
||||
lw s4, 8(sp)
|
||||
lw s5, 4(sp)
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 28(sp)
|
||||
lw s0, 24(sp)
|
||||
add sp, sp, 32
|
||||
ret
|
||||
|
||||
# Compares two strings for equality.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Length of the first string.
|
||||
# a1 - Pointer to the first string.
|
||||
# a2 - Length of the second string.
|
||||
# a3 - Pointer to the second string.
|
||||
#
|
||||
# Sets a0 to 1 if the string are equal, to 0 if not.
|
||||
.type _string_equal, @function
|
||||
_string_equal:
|
||||
# Prologue.
|
||||
addi sp, sp, -32
|
||||
sw ra, 28(sp)
|
||||
sw s0, 24(sp)
|
||||
addi s0, sp, 32
|
||||
|
||||
# Compare string lengths.
|
||||
bne a0, a2, .Lstring_equal_not_found
|
||||
|
||||
# If lengths match, compare the content.
|
||||
mv a0, a1
|
||||
mv a1, a3
|
||||
# a2 is already set to the length.
|
||||
call _memcmp
|
||||
|
||||
bnez a0, .Lstring_equal_not_found
|
||||
|
||||
li a0, 1
|
||||
j .Lstring_equal_end
|
||||
|
||||
.Lstring_equal_not_found:
|
||||
mv a0, zero
|
||||
|
||||
.Lstring_equal_end:
|
||||
# Epilogue.
|
||||
lw ra, 28(sp)
|
||||
lw s0, 24(sp)
|
||||
addi sp, sp, 32
|
||||
ret
|
||||
|
||||
# Sets a0 to the mapping address.
|
||||
.type _mmap, @function
|
||||
_mmap:
|
||||
li a0, 0 # Address at which to create the mapping.
|
||||
li a1, 4096 # The length of the mapping.
|
||||
li a2, PROT_READ | PROT_WRITE # Protection flags.
|
||||
li a3, MAP_ANONYMOUS | MAP_PRIVATE # The mapping is not backed by a file.
|
||||
li a4, -1 # File descriptor.
|
||||
li a5, 0 # Page offset.
|
||||
li a7, SYS_MMAP2
|
||||
ecall
|
||||
|
||||
ret
|
||||
|
||||
# Sets the a0 to the current position in the source text (s1).
|
||||
.type _current, @function
|
||||
_current:
|
||||
mv a0, s1
|
||||
ret
|
||||
|
||||
# Advances the position of the source text.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - The number of bytes to advance.
|
||||
.type _advance, @function
|
||||
_advance:
|
||||
add s1, s1, a0
|
||||
ret
|
||||
|
||||
# Advances the global label counter by 1 setting a0 to the previous value.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - If it is 0, resets the counter to 1.
|
||||
.type _label_counter, @function
|
||||
_label_counter:
|
||||
bnez a0, .Llabel_counter_advance
|
||||
li s2, 0
|
||||
|
||||
.Llabel_counter_advance:
|
||||
mv a0, s2
|
||||
addi s2, s2, 1
|
||||
|
||||
ret
|
304
boot/stage7.elna
304
boot/stage7.elna
@@ -4,7 +4,9 @@
|
||||
|
||||
# Stage 7 compiler.
|
||||
#
|
||||
# - String literals.
|
||||
# - Static global variable and constant initialization.
|
||||
# - Objct sections are determined automatically.
|
||||
# - _start is always exported.
|
||||
|
||||
.section .bss
|
||||
|
||||
@@ -1098,34 +1100,6 @@ begin
|
||||
_advance_token(5);
|
||||
end;
|
||||
|
||||
proc _compile_type();
|
||||
begin
|
||||
# Print and skip the ".type" (5 characters) directive and a space after it.
|
||||
_write_token(6);
|
||||
_advance_token();
|
||||
|
||||
# Read and print the symbol name.
|
||||
_read_token();
|
||||
|
||||
# Print and skip the symbol name, comma, space and @.
|
||||
addi a0, a0, 3
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
# Read the symbol type.
|
||||
_read_token();
|
||||
|
||||
# Print the symbol type and newline.
|
||||
addi a0, a0, 1
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
# Write the object definition itself.
|
||||
_compile_line();
|
||||
|
||||
.compile_type_end:
|
||||
end;
|
||||
|
||||
proc _skip_newlines();
|
||||
begin
|
||||
# Skip newlines.
|
||||
@@ -1146,9 +1120,271 @@ begin
|
||||
.skip_newlines_end:
|
||||
end;
|
||||
|
||||
# Skip newlines and comments.
|
||||
proc _skip_empty_lines();
|
||||
begin
|
||||
.skip_empty_lines_loop:
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, '#'
|
||||
beq t0, t1, .skip_empty_lines_comment
|
||||
|
||||
li t1, '\n'
|
||||
beq t0, t1, .skip_empty_lines_newline
|
||||
|
||||
goto .skip_empty_lines_end;
|
||||
|
||||
.skip_empty_lines_comment:
|
||||
_skip_comment();
|
||||
goto .skip_empty_lines_loop;
|
||||
|
||||
.skip_empty_lines_newline:
|
||||
_advance_token(1);
|
||||
goto .skip_empty_lines_loop;
|
||||
|
||||
.skip_empty_lines_end:
|
||||
end;
|
||||
|
||||
proc _compile_global_initializer();
|
||||
begin
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, '"'
|
||||
beq t0, t1, .compile_global_initializer_string
|
||||
|
||||
li t1, 'S'
|
||||
beq t0, t1, .compile_global_initializer_record
|
||||
|
||||
li t1, '@'
|
||||
beq t0, t1, .compile_global_initializer_pointer
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
lb a0, (a0)
|
||||
_is_digit();
|
||||
bnez a0, .compile_global_initializer_number
|
||||
|
||||
unimp
|
||||
|
||||
.compile_global_initializer_pointer:
|
||||
# Skip @.
|
||||
_advance_token(1);
|
||||
_write_z("\n\t.word \0");
|
||||
_read_token();
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
goto .compile_global_initializer_end;
|
||||
|
||||
.compile_global_initializer_number:
|
||||
_write_z("\n\t.word \0");
|
||||
_read_token();
|
||||
_write_token();
|
||||
_advance_token(1);
|
||||
|
||||
goto .compile_global_initializer_end;
|
||||
|
||||
.compile_global_initializer_record:
|
||||
# Skip "S(".
|
||||
_advance_token(2);
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
li t1, ')'
|
||||
beq t0, t1, .compile_global_initializer_closing
|
||||
|
||||
.compile_global_initializer_loop:
|
||||
_compile_global_initializer();
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
li t1, ')'
|
||||
beq t0, t1, .compile_global_initializer_closing
|
||||
|
||||
# Skip comma and whitespace after it.
|
||||
_advance_token(2);
|
||||
|
||||
goto .compile_global_initializer_loop;
|
||||
|
||||
.compile_global_initializer_closing:
|
||||
# Skip ")"
|
||||
_advance_token(1);
|
||||
|
||||
goto .compile_global_initializer_end;
|
||||
|
||||
.compile_global_initializer_string:
|
||||
_write_z("\n\t.word strings + \0");
|
||||
_string_length(source_code_position);
|
||||
sw a0, 4(sp)
|
||||
|
||||
_add_string(source_code_position);
|
||||
_write_i();
|
||||
|
||||
# Skip the quoted string.
|
||||
_advance_token(v4 + 2);
|
||||
|
||||
goto .compile_global_initializer_end;
|
||||
|
||||
.compile_global_initializer_end:
|
||||
end;
|
||||
|
||||
proc _compile_constant_declaration();
|
||||
begin
|
||||
_read_token();
|
||||
sw a0, 0(sp)
|
||||
|
||||
_write_z(".type \0");
|
||||
_write_token(v0);
|
||||
_write_z(", @object\n\0");
|
||||
|
||||
_write_token(v0);
|
||||
_write_c(':');
|
||||
|
||||
# Skip the constant name with assignment sign and surrounding whitespaces.
|
||||
_advance_token(v0 + 4);
|
||||
_compile_global_initializer();
|
||||
# Skip semicolon and newline.
|
||||
_advance_token(2);
|
||||
_write_c('\n');
|
||||
end;
|
||||
|
||||
proc _compile_const_part();
|
||||
begin
|
||||
_skip_empty_lines();
|
||||
|
||||
_memcmp(source_code_position, "const\0", 5);
|
||||
bnez a0, .compile_const_part_end
|
||||
|
||||
# Skip "const" with the newline after it.
|
||||
_advance_token(6);
|
||||
_write_z(".section .rodata # Compiled from const section.\n\n\0");
|
||||
|
||||
.compile_const_part_loop:
|
||||
_skip_empty_lines();
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
|
||||
# If the character at the line beginning is not indentation,
|
||||
# it is probably the next code section.
|
||||
li t1, '\t'
|
||||
bne t0, t1, .compile_const_part_end
|
||||
|
||||
_advance_token(1);
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
li t1, '#'
|
||||
beq t0, t1, .compile_const_part_loop
|
||||
|
||||
_compile_constant_declaration();
|
||||
goto .compile_const_part_loop;
|
||||
|
||||
.compile_const_part_end:
|
||||
end;
|
||||
|
||||
proc _compile_variable_declaration();
|
||||
begin
|
||||
_read_token();
|
||||
sw a0, 0(sp)
|
||||
|
||||
_write_z(".type \0");
|
||||
_write_token(v0);
|
||||
_write_z(", @object\n\0");
|
||||
|
||||
_write_token(v0);
|
||||
_write_c(':');
|
||||
|
||||
# Skip the variable name and colon with space before the type.
|
||||
_advance_token(v0 + 2);
|
||||
|
||||
# Skip the type name.
|
||||
_read_token();
|
||||
_advance_token();
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
li t1, ' '
|
||||
beq t0, t1, .compile_variable_declaration_initializer
|
||||
|
||||
# Else we assume this is a zeroed 81920 bytes big array.
|
||||
_write_z(" .zero 81920\0");
|
||||
goto .compile_variable_declaration_finalize;
|
||||
|
||||
.compile_variable_declaration_initializer:
|
||||
# Skip the assignment sign with surrounding whitespaces.
|
||||
_advance_token(4);
|
||||
_compile_global_initializer();
|
||||
goto .compile_variable_declaration_finalize;
|
||||
|
||||
.compile_variable_declaration_finalize:
|
||||
# Skip semicolon and newline.
|
||||
_advance_token(2);
|
||||
_write_c('\n');
|
||||
end;
|
||||
|
||||
proc _compile_var_part();
|
||||
begin
|
||||
_memcmp(source_code_position, "var\0", 3);
|
||||
bnez a0, .compile_var_part_end
|
||||
|
||||
# Skip "var" and newline.
|
||||
_advance_token(4);
|
||||
_write_z(".section .data\n\0");
|
||||
|
||||
.compile_var_part_loop:
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, 'p'
|
||||
beq t0, t1, .compile_var_part_end
|
||||
|
||||
li t1, '\t'
|
||||
beq t0, t1, .compile_var_part_declaration
|
||||
|
||||
_compile_line();
|
||||
goto .compile_var_part_loop;
|
||||
|
||||
.compile_var_part_declaration:
|
||||
_advance_token(1);
|
||||
_compile_variable_declaration();
|
||||
goto .compile_var_part_loop;
|
||||
|
||||
.compile_var_part_end:
|
||||
end;
|
||||
|
||||
# Process the source code and print the generated code.
|
||||
proc _compile_module();
|
||||
begin
|
||||
_compile_const_part();
|
||||
_write_z(".section .bss\n\0");
|
||||
|
||||
.compile_module_bss:
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
li t1, 'v'
|
||||
beq t0, t1, .compile_module_code
|
||||
|
||||
li t1, 'p'
|
||||
beq t0, t1, .compile_module_code
|
||||
|
||||
_compile_line();
|
||||
goto .compile_module_bss;
|
||||
|
||||
.compile_module_code:
|
||||
_compile_var_part();
|
||||
_write_z(".section .text\n\0");
|
||||
.compile_module_loop:
|
||||
_skip_newlines();
|
||||
|
||||
@@ -1163,10 +1399,6 @@ begin
|
||||
_memcmp(source_code_position, ".section", 8);
|
||||
beqz a0, .compile_module_section
|
||||
|
||||
# 5 is ".type" length.
|
||||
_memcmp(source_code_position, ".type", 5);
|
||||
beqz a0, .compile_module_type
|
||||
|
||||
# 5 is "proc " length. Space is needed to distinguish from "procedure".
|
||||
_memcmp(source_code_position, "proc ", 5);
|
||||
beqz a0, .compile_module_procedure
|
||||
@@ -1183,11 +1415,6 @@ begin
|
||||
|
||||
goto .compile_module_loop;
|
||||
|
||||
.compile_module_type:
|
||||
_compile_type();
|
||||
|
||||
goto .compile_module_loop;
|
||||
|
||||
.compile_module_global:
|
||||
_compile_line();
|
||||
|
||||
@@ -1208,6 +1435,7 @@ end;
|
||||
|
||||
proc _compile();
|
||||
begin
|
||||
_write_z(".globl _start\n\n\0");
|
||||
_compile_module();
|
||||
|
||||
_write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0");
|
||||
|
349
boot/stage8.elna
349
boot/stage8.elna
@@ -2,11 +2,37 @@
|
||||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
# obtain one at https://mozilla.org/MPL/2.0/.
|
||||
|
||||
# Stage 7 compiler.
|
||||
# Stage 8 compiler.
|
||||
#
|
||||
# - String literals.
|
||||
|
||||
.section .bss
|
||||
const
|
||||
symbol_builtin_name_int := "Int";
|
||||
symbol_builtin_name_word := "Word";
|
||||
symbol_builtin_name_pointer := "Pointer";
|
||||
symbol_builtin_name_char := "Char";
|
||||
symbol_builtin_name_bool := "Bool";
|
||||
|
||||
# Every type info starts with a word describing what type it is.
|
||||
#
|
||||
# PRIMITIVE_TYPE = 1
|
||||
#
|
||||
# Primitive types have only type size.
|
||||
symbol_builtin_type_int := S(1, 4);
|
||||
symbol_builtin_type_word := S(1, 4);
|
||||
symbol_builtin_type_pointer := S(1, 4);
|
||||
symbol_builtin_type_char := S(1, 1);
|
||||
symbol_builtin_type_bool := S(1, 1);
|
||||
|
||||
# Info objects start with a word describing its type.
|
||||
#
|
||||
# INFO_TYPE = 1
|
||||
#
|
||||
# Type info has the type it belongs to.
|
||||
symbol_type_info_int := S(1, @symbol_builtin_type_int);
|
||||
symbol_type_info_word := S(1, @symbol_builtin_type_word);
|
||||
symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer);
|
||||
symbol_type_info_char := S(1, @symbol_builtin_type_char);
|
||||
symbol_type_info_bool := S(1, @symbol_builtin_type_bool);
|
||||
|
||||
# When modifiying also change the read size in the entry point procedure.
|
||||
.type source_code, @object
|
||||
@@ -15,18 +41,10 @@ source_code: .zero 81920
|
||||
.type compiler_strings, @object
|
||||
compiler_strings: .zero 8192
|
||||
|
||||
.section .data
|
||||
|
||||
.type compiler_strings_position, @object
|
||||
compiler_strings_position: .word compiler_strings
|
||||
|
||||
.type compiler_strings_length, @object
|
||||
compiler_strings_length: .word 0
|
||||
|
||||
.type source_code_position, @object
|
||||
source_code_position: .word source_code
|
||||
|
||||
.section .text
|
||||
var
|
||||
compiler_strings_position: Pointer := @compiler_strings;
|
||||
compiler_strings_length: Word := 0;
|
||||
source_code_position: Pointer := @source_code;
|
||||
|
||||
# Calculates and returns the string token length between quotes, including the
|
||||
# escaping slash characters.
|
||||
@@ -1098,34 +1116,6 @@ begin
|
||||
_advance_token(5);
|
||||
end;
|
||||
|
||||
proc _compile_type();
|
||||
begin
|
||||
# Print and skip the ".type" (5 characters) directive and a space after it.
|
||||
_write_token(6);
|
||||
_advance_token();
|
||||
|
||||
# Read and print the symbol name.
|
||||
_read_token();
|
||||
|
||||
# Print and skip the symbol name, comma, space and @.
|
||||
addi a0, a0, 3
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
# Read the symbol type.
|
||||
_read_token();
|
||||
|
||||
# Print the symbol type and newline.
|
||||
addi a0, a0, 1
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
# Write the object definition itself.
|
||||
_compile_line();
|
||||
|
||||
.compile_type_end:
|
||||
end;
|
||||
|
||||
proc _skip_newlines();
|
||||
begin
|
||||
# Skip newlines.
|
||||
@@ -1146,9 +1136,271 @@ begin
|
||||
.skip_newlines_end:
|
||||
end;
|
||||
|
||||
# Skip newlines and comments.
|
||||
proc _skip_empty_lines();
|
||||
begin
|
||||
.skip_empty_lines_loop:
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, '#'
|
||||
beq t0, t1, .skip_empty_lines_comment
|
||||
|
||||
li t1, '\n'
|
||||
beq t0, t1, .skip_empty_lines_newline
|
||||
|
||||
goto .skip_empty_lines_end;
|
||||
|
||||
.skip_empty_lines_comment:
|
||||
_skip_comment();
|
||||
goto .skip_empty_lines_loop;
|
||||
|
||||
.skip_empty_lines_newline:
|
||||
_advance_token(1);
|
||||
goto .skip_empty_lines_loop;
|
||||
|
||||
.skip_empty_lines_end:
|
||||
end;
|
||||
|
||||
proc _compile_global_initializer();
|
||||
begin
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, '"'
|
||||
beq t0, t1, .compile_global_initializer_string
|
||||
|
||||
li t1, 'S'
|
||||
beq t0, t1, .compile_global_initializer_record
|
||||
|
||||
li t1, '@'
|
||||
beq t0, t1, .compile_global_initializer_pointer
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
lb a0, (a0)
|
||||
_is_digit();
|
||||
bnez a0, .compile_global_initializer_number
|
||||
|
||||
unimp
|
||||
|
||||
.compile_global_initializer_pointer:
|
||||
# Skip @.
|
||||
_advance_token(1);
|
||||
_write_z("\n\t.word \0");
|
||||
_read_token();
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
goto .compile_global_initializer_end;
|
||||
|
||||
.compile_global_initializer_number:
|
||||
_write_z("\n\t.word \0");
|
||||
_read_token();
|
||||
_write_token();
|
||||
_advance_token(1);
|
||||
|
||||
goto .compile_global_initializer_end;
|
||||
|
||||
.compile_global_initializer_record:
|
||||
# Skip "S(".
|
||||
_advance_token(2);
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
li t1, ')'
|
||||
beq t0, t1, .compile_global_initializer_closing
|
||||
|
||||
.compile_global_initializer_loop:
|
||||
_compile_global_initializer();
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
li t1, ')'
|
||||
beq t0, t1, .compile_global_initializer_closing
|
||||
|
||||
# Skip comma and whitespace after it.
|
||||
_advance_token(2);
|
||||
|
||||
goto .compile_global_initializer_loop;
|
||||
|
||||
.compile_global_initializer_closing:
|
||||
# Skip ")"
|
||||
_advance_token(1);
|
||||
|
||||
goto .compile_global_initializer_end;
|
||||
|
||||
.compile_global_initializer_string:
|
||||
_write_z("\n\t.word strings + \0");
|
||||
_string_length(source_code_position);
|
||||
sw a0, 4(sp)
|
||||
|
||||
_add_string(source_code_position);
|
||||
_write_i();
|
||||
|
||||
# Skip the quoted string.
|
||||
_advance_token(v4 + 2);
|
||||
|
||||
goto .compile_global_initializer_end;
|
||||
|
||||
.compile_global_initializer_end:
|
||||
end;
|
||||
|
||||
proc _compile_constant_declaration();
|
||||
begin
|
||||
_read_token();
|
||||
sw a0, 0(sp)
|
||||
|
||||
_write_z(".type \0");
|
||||
_write_token(v0);
|
||||
_write_z(", @object\n\0");
|
||||
|
||||
_write_token(v0);
|
||||
_write_c(':');
|
||||
|
||||
# Skip the constant name with assignment sign and surrounding whitespaces.
|
||||
_advance_token(v0 + 4);
|
||||
_compile_global_initializer();
|
||||
# Skip semicolon and newline.
|
||||
_advance_token(2);
|
||||
_write_c('\n');
|
||||
end;
|
||||
|
||||
proc _compile_const_part();
|
||||
begin
|
||||
_skip_empty_lines();
|
||||
|
||||
_memcmp(source_code_position, "const\0", 5);
|
||||
bnez a0, .compile_const_part_end
|
||||
|
||||
# Skip "const" with the newline after it.
|
||||
_advance_token(6);
|
||||
_write_z(".section .rodata # Compiled from const section.\n\n\0");
|
||||
|
||||
.compile_const_part_loop:
|
||||
_skip_empty_lines();
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
|
||||
# If the character at the line beginning is not indentation,
|
||||
# it is probably the next code section.
|
||||
li t1, '\t'
|
||||
bne t0, t1, .compile_const_part_end
|
||||
|
||||
_advance_token(1);
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
li t1, '#'
|
||||
beq t0, t1, .compile_const_part_loop
|
||||
|
||||
_compile_constant_declaration();
|
||||
goto .compile_const_part_loop;
|
||||
|
||||
.compile_const_part_end:
|
||||
end;
|
||||
|
||||
proc _compile_variable_declaration();
|
||||
begin
|
||||
_read_token();
|
||||
sw a0, 0(sp)
|
||||
|
||||
_write_z(".type \0");
|
||||
_write_token(v0);
|
||||
_write_z(", @object\n\0");
|
||||
|
||||
_write_token(v0);
|
||||
_write_c(':');
|
||||
|
||||
# Skip the variable name and colon with space before the type.
|
||||
_advance_token(v0 + 2);
|
||||
|
||||
# Skip the type name.
|
||||
_read_token();
|
||||
_advance_token();
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
li t1, ' '
|
||||
beq t0, t1, .compile_variable_declaration_initializer
|
||||
|
||||
# Else we assume this is a zeroed 81920 bytes big array.
|
||||
_write_z(" .zero 81920\0");
|
||||
goto .compile_variable_declaration_finalize;
|
||||
|
||||
.compile_variable_declaration_initializer:
|
||||
# Skip the assignment sign with surrounding whitespaces.
|
||||
_advance_token(4);
|
||||
_compile_global_initializer();
|
||||
goto .compile_variable_declaration_finalize;
|
||||
|
||||
.compile_variable_declaration_finalize:
|
||||
# Skip semicolon and newline.
|
||||
_advance_token(2);
|
||||
_write_c('\n');
|
||||
end;
|
||||
|
||||
proc _compile_var_part();
|
||||
begin
|
||||
_memcmp(source_code_position, "var\0", 3);
|
||||
bnez a0, .compile_var_part_end
|
||||
|
||||
# Skip "var" and newline.
|
||||
_advance_token(4);
|
||||
_write_z(".section .data\n\0");
|
||||
|
||||
.compile_var_part_loop:
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, 'p'
|
||||
beq t0, t1, .compile_var_part_end
|
||||
|
||||
li t1, '\t'
|
||||
beq t0, t1, .compile_var_part_declaration
|
||||
|
||||
_compile_line();
|
||||
goto .compile_var_part_loop;
|
||||
|
||||
.compile_var_part_declaration:
|
||||
_advance_token(1);
|
||||
_compile_variable_declaration();
|
||||
goto .compile_var_part_loop;
|
||||
|
||||
.compile_var_part_end:
|
||||
end;
|
||||
|
||||
# Process the source code and print the generated code.
|
||||
proc _compile_module();
|
||||
begin
|
||||
_compile_const_part();
|
||||
_write_z(".section .bss\n\0");
|
||||
|
||||
.compile_module_bss:
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
li t1, 'v'
|
||||
beq t0, t1, .compile_module_code
|
||||
|
||||
li t1, 'p'
|
||||
beq t0, t1, .compile_module_code
|
||||
|
||||
_compile_line();
|
||||
goto .compile_module_bss;
|
||||
|
||||
.compile_module_code:
|
||||
_compile_var_part();
|
||||
_write_z(".section .text\n\0");
|
||||
.compile_module_loop:
|
||||
_skip_newlines();
|
||||
|
||||
@@ -1163,10 +1415,6 @@ begin
|
||||
_memcmp(source_code_position, ".section", 8);
|
||||
beqz a0, .compile_module_section
|
||||
|
||||
# 5 is ".type" length.
|
||||
_memcmp(source_code_position, ".type", 5);
|
||||
beqz a0, .compile_module_type
|
||||
|
||||
# 5 is "proc " length. Space is needed to distinguish from "procedure".
|
||||
_memcmp(source_code_position, "proc ", 5);
|
||||
beqz a0, .compile_module_procedure
|
||||
@@ -1183,11 +1431,6 @@ begin
|
||||
|
||||
goto .compile_module_loop;
|
||||
|
||||
.compile_module_type:
|
||||
_compile_type();
|
||||
|
||||
goto .compile_module_loop;
|
||||
|
||||
.compile_module_global:
|
||||
_compile_line();
|
||||
|
||||
@@ -1208,6 +1451,7 @@ end;
|
||||
|
||||
proc _compile();
|
||||
begin
|
||||
_write_z(".globl _start\n\n\0");
|
||||
_compile_module();
|
||||
|
||||
_write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0");
|
||||
@@ -1247,7 +1491,6 @@ begin
|
||||
end;
|
||||
|
||||
# Entry point.
|
||||
.globl _start
|
||||
proc _start();
|
||||
begin
|
||||
# Read the source from the standard input.
|
||||
|
1503
boot/stage9.elna
Normal file
1503
boot/stage9.elna
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user