diff options
Diffstat (limited to 'boot/stage7.elna')
| -rw-r--r-- | boot/stage7.elna | 302 |
1 files changed, 265 insertions, 37 deletions
diff --git a/boot/stage7.elna b/boot/stage7.elna index 6973963..f83a8a5 100644 --- a/boot/stage7.elna +++ b/boot/stage7.elna @@ -4,7 +4,9 @@ # Stage 7 compiler. # -# - String literals. +# - Static global variable and constant initialization. +# - Objct sections are determined automatically. +# - _start is always exported. .section .bss @@ -1098,57 +1100,291 @@ begin _advance_token(5); end; -proc _compile_type(); +proc _skip_newlines(); begin - # Print and skip the ".type" (5 characters) directive and a space after it. - _write_token(6); - _advance_token(); + # Skip newlines. + la t0, source_code_position + lw t1, (t0) - # Read and print the symbol name. - _read_token(); +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end + + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_newlines_loop; + +.skip_newlines_end: +end; + +# Skip newlines and comments. +proc _skip_empty_lines(); +begin +.skip_empty_lines_loop: + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '#' + beq t0, t1, .skip_empty_lines_comment + + li t1, '\n' + beq t0, t1, .skip_empty_lines_newline + + goto .skip_empty_lines_end; + +.skip_empty_lines_comment: + _skip_comment(); + goto .skip_empty_lines_loop; + +.skip_empty_lines_newline: + _advance_token(1); + goto .skip_empty_lines_loop; - # Print and skip the symbol name, comma, space and @. - addi a0, a0, 3 +.skip_empty_lines_end: +end; + +proc _compile_global_initializer(); +begin + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '"' + beq t0, t1, .compile_global_initializer_string + + li t1, 'S' + beq t0, t1, .compile_global_initializer_record + + li t1, '@' + beq t0, t1, .compile_global_initializer_pointer + + la a0, source_code_position + lw a0, (a0) + lb a0, (a0) + _is_digit(); + bnez a0, .compile_global_initializer_number + + unimp + +.compile_global_initializer_pointer: + # Skip @. + _advance_token(1); + _write_z("\n\t.word \0"); + _read_token(); _write_token(); _advance_token(); - # Read the symbol type. - _read_token(); + goto .compile_global_initializer_end; - # Print the symbol type and newline. - addi a0, a0, 1 +.compile_global_initializer_number: + _write_z("\n\t.word \0"); + _read_token(); _write_token(); + _advance_token(1); + + goto .compile_global_initializer_end; + +.compile_global_initializer_record: + # Skip "S(". + _advance_token(2); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, ')' + beq t0, t1, .compile_global_initializer_closing + +.compile_global_initializer_loop: + _compile_global_initializer(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, ')' + beq t0, t1, .compile_global_initializer_closing + + # Skip comma and whitespace after it. + _advance_token(2); + + goto .compile_global_initializer_loop; + +.compile_global_initializer_closing: + # Skip ")" + _advance_token(1); + + goto .compile_global_initializer_end; + +.compile_global_initializer_string: + _write_z("\n\t.word strings + \0"); + _string_length(source_code_position); + sw a0, 4(sp) + + _add_string(source_code_position); + _write_i(); + + # Skip the quoted string. + _advance_token(v4 + 2); + + goto .compile_global_initializer_end; + +.compile_global_initializer_end: +end; + +proc _compile_constant_declaration(); +begin + _read_token(); + sw a0, 0(sp) + + _write_z(".type \0"); + _write_token(v0); + _write_z(", @object\n\0"); + + _write_token(v0); + _write_c(':'); + + # Skip the constant name with assignment sign and surrounding whitespaces. + _advance_token(v0 + 4); + _compile_global_initializer(); + # Skip semicolon and newline. + _advance_token(2); + _write_c('\n'); +end; + +proc _compile_const_part(); +begin + _skip_empty_lines(); + + _memcmp(source_code_position, "const\0", 5); + bnez a0, .compile_const_part_end + + # Skip "const" with the newline after it. + _advance_token(6); + _write_z(".section .rodata # Compiled from const section.\n\n\0"); + +.compile_const_part_loop: + _skip_empty_lines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + # If the character at the line beginning is not indentation, + # it is probably the next code section. + li t1, '\t' + bne t0, t1, .compile_const_part_end + + _advance_token(1); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, '#' + beq t0, t1, .compile_const_part_loop + + _compile_constant_declaration(); + goto .compile_const_part_loop; + +.compile_const_part_end: +end; + +proc _compile_variable_declaration(); +begin + _read_token(); + sw a0, 0(sp) + + _write_z(".type \0"); + _write_token(v0); + _write_z(", @object\n\0"); + + _write_token(v0); + _write_c(':'); + + # Skip the variable name and colon with space before the type. + _advance_token(v0 + 2); + + # Skip the type name. + _read_token(); _advance_token(); - # Write the object definition itself. - _compile_line(); + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, ' ' + beq t0, t1, .compile_variable_declaration_initializer + + # Else we assume this is a zeroed 81920 bytes big array. + _write_z(" .zero 81920\0"); + goto .compile_variable_declaration_finalize; -.compile_type_end: +.compile_variable_declaration_initializer: + # Skip the assignment sign with surrounding whitespaces. + _advance_token(4); + _compile_global_initializer(); + goto .compile_variable_declaration_finalize; + +.compile_variable_declaration_finalize: + # Skip semicolon and newline. + _advance_token(2); + _write_c('\n'); end; -proc _skip_newlines(); +proc _compile_var_part(); begin - # Skip newlines. + _memcmp(source_code_position, "var\0", 3); + bnez a0, .compile_var_part_end + + # Skip "var" and newline. + _advance_token(4); + _write_z(".section .data\n\0"); + +.compile_var_part_loop: la t0, source_code_position - lw t1, (t0) + lw t0, (t0) + lb t0, (t0) -.skip_newlines_loop: - lb t2, (t1) - li t3, '\n' - bne t2, t3, .skip_newlines_end - beqz t2, .skip_newlines_end + li t1, 'p' + beq t0, t1, .compile_var_part_end - addi t1, t1, 1 - sw t1, (t0) + li t1, '\t' + beq t0, t1, .compile_var_part_declaration - goto .skip_newlines_loop; + _compile_line(); + goto .compile_var_part_loop; -.skip_newlines_end: +.compile_var_part_declaration: + _advance_token(1); + _compile_variable_declaration(); + goto .compile_var_part_loop; + +.compile_var_part_end: end; # Process the source code and print the generated code. proc _compile_module(); begin + _compile_const_part(); + _write_z(".section .bss\n\0"); + +.compile_module_bss: + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, 'v' + beq t0, t1, .compile_module_code + + li t1, 'p' + beq t0, t1, .compile_module_code + + _compile_line(); + goto .compile_module_bss; + +.compile_module_code: + _compile_var_part(); + _write_z(".section .text\n\0"); .compile_module_loop: _skip_newlines(); @@ -1163,10 +1399,6 @@ begin _memcmp(source_code_position, ".section", 8); beqz a0, .compile_module_section - # 5 is ".type" length. - _memcmp(source_code_position, ".type", 5); - beqz a0, .compile_module_type - # 5 is "proc " length. Space is needed to distinguish from "procedure". _memcmp(source_code_position, "proc ", 5); beqz a0, .compile_module_procedure @@ -1183,11 +1415,6 @@ begin goto .compile_module_loop; -.compile_module_type: - _compile_type(); - - goto .compile_module_loop; - .compile_module_global: _compile_line(); @@ -1208,6 +1435,7 @@ end; proc _compile(); begin + _write_z(".globl _start\n\n\0"); _compile_module(); _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); |
