Compile static initializers

This commit is contained in:
2025-09-07 20:51:39 +02:00
parent e0ac57dc1d
commit d16ec370dc
5 changed files with 2102 additions and 463 deletions

View File

@@ -2,11 +2,37 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
# Stage 7 compiler.
# Stage 8 compiler.
#
# - String literals.
.section .bss
const
symbol_builtin_name_int := "Int";
symbol_builtin_name_word := "Word";
symbol_builtin_name_pointer := "Pointer";
symbol_builtin_name_char := "Char";
symbol_builtin_name_bool := "Bool";
# Every type info starts with a word describing what type it is.
#
# PRIMITIVE_TYPE = 1
#
# Primitive types have only type size.
symbol_builtin_type_int := S(1, 4);
symbol_builtin_type_word := S(1, 4);
symbol_builtin_type_pointer := S(1, 4);
symbol_builtin_type_char := S(1, 1);
symbol_builtin_type_bool := S(1, 1);
# Info objects start with a word describing its type.
#
# INFO_TYPE = 1
#
# Type info has the type it belongs to.
symbol_type_info_int := S(1, @symbol_builtin_type_int);
symbol_type_info_word := S(1, @symbol_builtin_type_word);
symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer);
symbol_type_info_char := S(1, @symbol_builtin_type_char);
symbol_type_info_bool := S(1, @symbol_builtin_type_bool);
# When modifiying also change the read size in the entry point procedure.
.type source_code, @object
@@ -15,18 +41,10 @@ source_code: .zero 81920
.type compiler_strings, @object
compiler_strings: .zero 8192
.section .data
.type compiler_strings_position, @object
compiler_strings_position: .word compiler_strings
.type compiler_strings_length, @object
compiler_strings_length: .word 0
.type source_code_position, @object
source_code_position: .word source_code
.section .text
var
compiler_strings_position: Pointer := @compiler_strings;
compiler_strings_length: Word := 0;
source_code_position: Pointer := @source_code;
# Calculates and returns the string token length between quotes, including the
# escaping slash characters.
@@ -1098,34 +1116,6 @@ begin
_advance_token(5);
end;
proc _compile_type();
begin
# Print and skip the ".type" (5 characters) directive and a space after it.
_write_token(6);
_advance_token();
# Read and print the symbol name.
_read_token();
# Print and skip the symbol name, comma, space and @.
addi a0, a0, 3
_write_token();
_advance_token();
# Read the symbol type.
_read_token();
# Print the symbol type and newline.
addi a0, a0, 1
_write_token();
_advance_token();
# Write the object definition itself.
_compile_line();
.compile_type_end:
end;
proc _skip_newlines();
begin
# Skip newlines.
@@ -1146,9 +1136,271 @@ begin
.skip_newlines_end:
end;
# Skip newlines and comments.
proc _skip_empty_lines();
begin
.skip_empty_lines_loop:
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
li t1, '#'
beq t0, t1, .skip_empty_lines_comment
li t1, '\n'
beq t0, t1, .skip_empty_lines_newline
goto .skip_empty_lines_end;
.skip_empty_lines_comment:
_skip_comment();
goto .skip_empty_lines_loop;
.skip_empty_lines_newline:
_advance_token(1);
goto .skip_empty_lines_loop;
.skip_empty_lines_end:
end;
proc _compile_global_initializer();
begin
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
li t1, '"'
beq t0, t1, .compile_global_initializer_string
li t1, 'S'
beq t0, t1, .compile_global_initializer_record
li t1, '@'
beq t0, t1, .compile_global_initializer_pointer
la a0, source_code_position
lw a0, (a0)
lb a0, (a0)
_is_digit();
bnez a0, .compile_global_initializer_number
unimp
.compile_global_initializer_pointer:
# Skip @.
_advance_token(1);
_write_z("\n\t.word \0");
_read_token();
_write_token();
_advance_token();
goto .compile_global_initializer_end;
.compile_global_initializer_number:
_write_z("\n\t.word \0");
_read_token();
_write_token();
_advance_token(1);
goto .compile_global_initializer_end;
.compile_global_initializer_record:
# Skip "S(".
_advance_token(2);
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
li t1, ')'
beq t0, t1, .compile_global_initializer_closing
.compile_global_initializer_loop:
_compile_global_initializer();
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
li t1, ')'
beq t0, t1, .compile_global_initializer_closing
# Skip comma and whitespace after it.
_advance_token(2);
goto .compile_global_initializer_loop;
.compile_global_initializer_closing:
# Skip ")"
_advance_token(1);
goto .compile_global_initializer_end;
.compile_global_initializer_string:
_write_z("\n\t.word strings + \0");
_string_length(source_code_position);
sw a0, 4(sp)
_add_string(source_code_position);
_write_i();
# Skip the quoted string.
_advance_token(v4 + 2);
goto .compile_global_initializer_end;
.compile_global_initializer_end:
end;
proc _compile_constant_declaration();
begin
_read_token();
sw a0, 0(sp)
_write_z(".type \0");
_write_token(v0);
_write_z(", @object\n\0");
_write_token(v0);
_write_c(':');
# Skip the constant name with assignment sign and surrounding whitespaces.
_advance_token(v0 + 4);
_compile_global_initializer();
# Skip semicolon and newline.
_advance_token(2);
_write_c('\n');
end;
proc _compile_const_part();
begin
_skip_empty_lines();
_memcmp(source_code_position, "const\0", 5);
bnez a0, .compile_const_part_end
# Skip "const" with the newline after it.
_advance_token(6);
_write_z(".section .rodata # Compiled from const section.\n\n\0");
.compile_const_part_loop:
_skip_empty_lines();
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
# If the character at the line beginning is not indentation,
# it is probably the next code section.
li t1, '\t'
bne t0, t1, .compile_const_part_end
_advance_token(1);
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
li t1, '#'
beq t0, t1, .compile_const_part_loop
_compile_constant_declaration();
goto .compile_const_part_loop;
.compile_const_part_end:
end;
proc _compile_variable_declaration();
begin
_read_token();
sw a0, 0(sp)
_write_z(".type \0");
_write_token(v0);
_write_z(", @object\n\0");
_write_token(v0);
_write_c(':');
# Skip the variable name and colon with space before the type.
_advance_token(v0 + 2);
# Skip the type name.
_read_token();
_advance_token();
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
li t1, ' '
beq t0, t1, .compile_variable_declaration_initializer
# Else we assume this is a zeroed 81920 bytes big array.
_write_z(" .zero 81920\0");
goto .compile_variable_declaration_finalize;
.compile_variable_declaration_initializer:
# Skip the assignment sign with surrounding whitespaces.
_advance_token(4);
_compile_global_initializer();
goto .compile_variable_declaration_finalize;
.compile_variable_declaration_finalize:
# Skip semicolon and newline.
_advance_token(2);
_write_c('\n');
end;
proc _compile_var_part();
begin
_memcmp(source_code_position, "var\0", 3);
bnez a0, .compile_var_part_end
# Skip "var" and newline.
_advance_token(4);
_write_z(".section .data\n\0");
.compile_var_part_loop:
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
li t1, 'p'
beq t0, t1, .compile_var_part_end
li t1, '\t'
beq t0, t1, .compile_var_part_declaration
_compile_line();
goto .compile_var_part_loop;
.compile_var_part_declaration:
_advance_token(1);
_compile_variable_declaration();
goto .compile_var_part_loop;
.compile_var_part_end:
end;
# Process the source code and print the generated code.
proc _compile_module();
begin
_compile_const_part();
_write_z(".section .bss\n\0");
.compile_module_bss:
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
li t1, 'v'
beq t0, t1, .compile_module_code
li t1, 'p'
beq t0, t1, .compile_module_code
_compile_line();
goto .compile_module_bss;
.compile_module_code:
_compile_var_part();
_write_z(".section .text\n\0");
.compile_module_loop:
_skip_newlines();
@@ -1163,10 +1415,6 @@ begin
_memcmp(source_code_position, ".section", 8);
beqz a0, .compile_module_section
# 5 is ".type" length.
_memcmp(source_code_position, ".type", 5);
beqz a0, .compile_module_type
# 5 is "proc " length. Space is needed to distinguish from "procedure".
_memcmp(source_code_position, "proc ", 5);
beqz a0, .compile_module_procedure
@@ -1183,11 +1431,6 @@ begin
goto .compile_module_loop;
.compile_module_type:
_compile_type();
goto .compile_module_loop;
.compile_module_global:
_compile_line();
@@ -1208,6 +1451,7 @@ end;
proc _compile();
begin
_write_z(".globl _start\n\n\0");
_compile_module();
_write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0");
@@ -1247,7 +1491,6 @@ begin
end;
# Entry point.
.globl _start
proc _start();
begin
# Read the source from the standard input.