diff options
Diffstat (limited to 'boot/stage8.elna')
| -rw-r--r-- | boot/stage8.elna | 347 |
1 files changed, 295 insertions, 52 deletions
diff --git a/boot/stage8.elna b/boot/stage8.elna index 6973963..c88e19d 100644 --- a/boot/stage8.elna +++ b/boot/stage8.elna @@ -2,11 +2,37 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -# Stage 7 compiler. +# Stage 8 compiler. # -# - String literals. -.section .bss +const + symbol_builtin_name_int := "Int"; + symbol_builtin_name_word := "Word"; + symbol_builtin_name_pointer := "Pointer"; + symbol_builtin_name_char := "Char"; + symbol_builtin_name_bool := "Bool"; + + # Every type info starts with a word describing what type it is. + # + # PRIMITIVE_TYPE = 1 + # + # Primitive types have only type size. + symbol_builtin_type_int := S(1, 4); + symbol_builtin_type_word := S(1, 4); + symbol_builtin_type_pointer := S(1, 4); + symbol_builtin_type_char := S(1, 1); + symbol_builtin_type_bool := S(1, 1); + + # Info objects start with a word describing its type. + # + # INFO_TYPE = 1 + # + # Type info has the type it belongs to. + symbol_type_info_int := S(1, @symbol_builtin_type_int); + symbol_type_info_word := S(1, @symbol_builtin_type_word); + symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); + symbol_type_info_char := S(1, @symbol_builtin_type_char); + symbol_type_info_bool := S(1, @symbol_builtin_type_bool); # When modifiying also change the read size in the entry point procedure. .type source_code, @object @@ -15,18 +41,10 @@ source_code: .zero 81920 .type compiler_strings, @object compiler_strings: .zero 8192 -.section .data - -.type compiler_strings_position, @object -compiler_strings_position: .word compiler_strings - -.type compiler_strings_length, @object -compiler_strings_length: .word 0 - -.type source_code_position, @object -source_code_position: .word source_code - -.section .text +var + compiler_strings_position: Pointer := @compiler_strings; + compiler_strings_length: Word := 0; + source_code_position: Pointer := @source_code; # Calculates and returns the string token length between quotes, including the # escaping slash characters. @@ -1098,57 +1116,291 @@ begin _advance_token(5); end; -proc _compile_type(); +proc _skip_newlines(); begin - # Print and skip the ".type" (5 characters) directive and a space after it. - _write_token(6); - _advance_token(); + # Skip newlines. + la t0, source_code_position + lw t1, (t0) - # Read and print the symbol name. - _read_token(); +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end + + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_newlines_loop; + +.skip_newlines_end: +end; - # Print and skip the symbol name, comma, space and @. - addi a0, a0, 3 +# Skip newlines and comments. +proc _skip_empty_lines(); +begin +.skip_empty_lines_loop: + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '#' + beq t0, t1, .skip_empty_lines_comment + + li t1, '\n' + beq t0, t1, .skip_empty_lines_newline + + goto .skip_empty_lines_end; + +.skip_empty_lines_comment: + _skip_comment(); + goto .skip_empty_lines_loop; + +.skip_empty_lines_newline: + _advance_token(1); + goto .skip_empty_lines_loop; + +.skip_empty_lines_end: +end; + +proc _compile_global_initializer(); +begin + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '"' + beq t0, t1, .compile_global_initializer_string + + li t1, 'S' + beq t0, t1, .compile_global_initializer_record + + li t1, '@' + beq t0, t1, .compile_global_initializer_pointer + + la a0, source_code_position + lw a0, (a0) + lb a0, (a0) + _is_digit(); + bnez a0, .compile_global_initializer_number + + unimp + +.compile_global_initializer_pointer: + # Skip @. + _advance_token(1); + _write_z("\n\t.word \0"); + _read_token(); _write_token(); _advance_token(); - # Read the symbol type. - _read_token(); + goto .compile_global_initializer_end; - # Print the symbol type and newline. - addi a0, a0, 1 +.compile_global_initializer_number: + _write_z("\n\t.word \0"); + _read_token(); _write_token(); + _advance_token(1); + + goto .compile_global_initializer_end; + +.compile_global_initializer_record: + # Skip "S(". + _advance_token(2); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, ')' + beq t0, t1, .compile_global_initializer_closing + +.compile_global_initializer_loop: + _compile_global_initializer(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, ')' + beq t0, t1, .compile_global_initializer_closing + + # Skip comma and whitespace after it. + _advance_token(2); + + goto .compile_global_initializer_loop; + +.compile_global_initializer_closing: + # Skip ")" + _advance_token(1); + + goto .compile_global_initializer_end; + +.compile_global_initializer_string: + _write_z("\n\t.word strings + \0"); + _string_length(source_code_position); + sw a0, 4(sp) + + _add_string(source_code_position); + _write_i(); + + # Skip the quoted string. + _advance_token(v4 + 2); + + goto .compile_global_initializer_end; + +.compile_global_initializer_end: +end; + +proc _compile_constant_declaration(); +begin + _read_token(); + sw a0, 0(sp) + + _write_z(".type \0"); + _write_token(v0); + _write_z(", @object\n\0"); + + _write_token(v0); + _write_c(':'); + + # Skip the constant name with assignment sign and surrounding whitespaces. + _advance_token(v0 + 4); + _compile_global_initializer(); + # Skip semicolon and newline. + _advance_token(2); + _write_c('\n'); +end; + +proc _compile_const_part(); +begin + _skip_empty_lines(); + + _memcmp(source_code_position, "const\0", 5); + bnez a0, .compile_const_part_end + + # Skip "const" with the newline after it. + _advance_token(6); + _write_z(".section .rodata # Compiled from const section.\n\n\0"); + +.compile_const_part_loop: + _skip_empty_lines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + # If the character at the line beginning is not indentation, + # it is probably the next code section. + li t1, '\t' + bne t0, t1, .compile_const_part_end + + _advance_token(1); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, '#' + beq t0, t1, .compile_const_part_loop + + _compile_constant_declaration(); + goto .compile_const_part_loop; + +.compile_const_part_end: +end; + +proc _compile_variable_declaration(); +begin + _read_token(); + sw a0, 0(sp) + + _write_z(".type \0"); + _write_token(v0); + _write_z(", @object\n\0"); + + _write_token(v0); + _write_c(':'); + + # Skip the variable name and colon with space before the type. + _advance_token(v0 + 2); + + # Skip the type name. + _read_token(); _advance_token(); - # Write the object definition itself. - _compile_line(); + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, ' ' + beq t0, t1, .compile_variable_declaration_initializer -.compile_type_end: + # Else we assume this is a zeroed 81920 bytes big array. + _write_z(" .zero 81920\0"); + goto .compile_variable_declaration_finalize; + +.compile_variable_declaration_initializer: + # Skip the assignment sign with surrounding whitespaces. + _advance_token(4); + _compile_global_initializer(); + goto .compile_variable_declaration_finalize; + +.compile_variable_declaration_finalize: + # Skip semicolon and newline. + _advance_token(2); + _write_c('\n'); end; -proc _skip_newlines(); +proc _compile_var_part(); begin - # Skip newlines. + _memcmp(source_code_position, "var\0", 3); + bnez a0, .compile_var_part_end + + # Skip "var" and newline. + _advance_token(4); + _write_z(".section .data\n\0"); + +.compile_var_part_loop: la t0, source_code_position - lw t1, (t0) + lw t0, (t0) + lb t0, (t0) -.skip_newlines_loop: - lb t2, (t1) - li t3, '\n' - bne t2, t3, .skip_newlines_end - beqz t2, .skip_newlines_end + li t1, 'p' + beq t0, t1, .compile_var_part_end - addi t1, t1, 1 - sw t1, (t0) + li t1, '\t' + beq t0, t1, .compile_var_part_declaration - goto .skip_newlines_loop; + _compile_line(); + goto .compile_var_part_loop; -.skip_newlines_end: +.compile_var_part_declaration: + _advance_token(1); + _compile_variable_declaration(); + goto .compile_var_part_loop; + +.compile_var_part_end: end; # Process the source code and print the generated code. proc _compile_module(); begin + _compile_const_part(); + _write_z(".section .bss\n\0"); + +.compile_module_bss: + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, 'v' + beq t0, t1, .compile_module_code + + li t1, 'p' + beq t0, t1, .compile_module_code + + _compile_line(); + goto .compile_module_bss; + +.compile_module_code: + _compile_var_part(); + _write_z(".section .text\n\0"); .compile_module_loop: _skip_newlines(); @@ -1163,10 +1415,6 @@ begin _memcmp(source_code_position, ".section", 8); beqz a0, .compile_module_section - # 5 is ".type" length. - _memcmp(source_code_position, ".type", 5); - beqz a0, .compile_module_type - # 5 is "proc " length. Space is needed to distinguish from "procedure". _memcmp(source_code_position, "proc ", 5); beqz a0, .compile_module_procedure @@ -1183,11 +1431,6 @@ begin goto .compile_module_loop; -.compile_module_type: - _compile_type(); - - goto .compile_module_loop; - .compile_module_global: _compile_line(); @@ -1208,6 +1451,7 @@ end; proc _compile(); begin + _write_z(".globl _start\n\n\0"); _compile_module(); _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); @@ -1247,7 +1491,6 @@ begin end; # Entry point. -.globl _start proc _start(); begin # Read the source from the standard input. |
