diff options
Diffstat (limited to 'boot')
| -rw-r--r-- | boot/definitions.inc | 9 | ||||
| -rw-r--r-- | boot/stage1.s | 265 | ||||
| -rw-r--r-- | boot/stage2.elna | 63 | ||||
| -rw-r--r-- | boot/symbol.s | 104 |
4 files changed, 239 insertions, 202 deletions
diff --git a/boot/definitions.inc b/boot/definitions.inc index 97f6601..42a7943 100644 --- a/boot/definitions.inc +++ b/boot/definitions.inc @@ -2,6 +2,10 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +# +# Tokens. +# + # The constant should match the index in the keywords array in tokenizer.s. .equ TOKEN_PROGRAM, 1 @@ -52,3 +56,8 @@ .equ TOKEN_ASSIGN, 43 .equ TOKEN_INTEGER, 44 + +# +# Symbols. +# +.equ TYPE_PRIMITIVE, 1 diff --git a/boot/stage1.s b/boot/stage1.s index 525da11..b39f5bc 100644 --- a/boot/stage1.s +++ b/boot/stage1.s @@ -411,14 +411,6 @@ _build_expression: call _tokenize_next sw a0, 20(sp) - /* DEBUG - lw a0, 32(sp) - lw a1, 28(sp) - call _write_error - lw a0, 28(sp) - li a1, 8 - call _write_error */ - lw a0, 24(sp) li t0, TOKEN_MINUS @@ -754,74 +746,6 @@ _compile_call: addi sp, sp, 32 ret -# Skips the spaces till the next non space character. -.type _skip_spaces, @function -_skip_spaces: -.Lspace_loop_do: - lbu t0, (s1) # t0 = Current character. - - li t1, ' ' - beq t0, t1, .Lspace_loop_repeat - li t1, '\t' - beq t0, t1, .Lspace_loop_repeat - li t1, '\n' - beq t0, t1, .Lspace_loop_repeat - li t1, '\r' - beq t0, t1, .Lspace_loop_repeat - - j .Lspace_loop_end -.Lspace_loop_repeat: - addi s1, s1, 1 - j .Lspace_loop_do - -.Lspace_loop_end: - ret - -# Parameters: -# a0 - Line length. -.type _skip_comment, @function -_skip_comment: - # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 - - # Check whether this is a comment. - li t0, 0x2a28 # (* - sw t0, 4(sp) - addi a0, sp, 4 - mv a1, s1 - li a2, 2 - call _memcmp - bnez a0, .Lskip_comment_end - - addi s1, s1, 2 # Skip (*. - - li t0, 0x292a # *) - sw t0, 4(sp) - -.Lskip_comment_loop: - addi a0, sp, 4 - mv a1, s1 - li a2, 2 - call _memcmp - beqz a0, .Lskip_comment_close - - addi s1, s1, 1 - - j .Lskip_comment_loop - -.Lskip_comment_close: - addi s1, s1, 2 # Skip *). - -.Lskip_comment_end: - # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 - ret - # Walks through the procedure definitions. .type _compile_procedure_section, @function _compile_procedure_section: @@ -832,9 +756,6 @@ _compile_procedure_section: addi s0, sp, 32 .Lcompile_procedure_section_loop: - call _skip_spaces - call _skip_comment - mv a0, s1 addi a1, sp, 4 call _tokenize_next @@ -1083,28 +1004,9 @@ _compile_procedure: call _tokenize_next mv s1, a0 - # .type identifier, @function - la a0, asm_type - li a1, ASM_TYPE_SIZE - call _write_out - - lw a0, 20(sp) - lw a1, 16(sp) - call _write_out - - la a0, asm_type_function - li a1, ASM_TYPE_FUNCTION_SIZE - call _write_out - - lw a0, 20(sp) - lw a1, 16(sp) - call _write_out - - li t0, 0x0a3a # :\n - sw t0, 12(sp) - addi a0, sp, 12 - li a1, 2 - call _write_out + lw a0, 16(sp) + lw a1, 20(sp) + call _write_procedure_head # Skip all declarations until we find the "begin" keyword, denoting the # beginning of the procedure body. @@ -1128,21 +1030,24 @@ _compile_procedure: call _write_out # Generate the body of the procedure. -.Lcompile_procedure_body: - li t0, 0x0a646e65 # end\n - sw t0, 8(sp) - mv a0, s1 - addi a1, sp, 8 - li a2, 4 - call _memcmp - - beqz a0, .Lcompile_procedure_end - - call _compile_statement - j .Lcompile_procedure_body + call _compile_statements + mv s1, a0 # Skip end. -.Lcompile_procedure_end: - add s1, s1, 4 # Skip end\n. + /* DEBUG + sw a0, 8(sp) + lw a1, 12(sp) + li a2, TOKEN_END + sub a1, a1, a2 + seqz a1, a1 + seqz a0, a0 + addi a0, a0, '0' + addi a1, a1, '0' + sb a0, 4(sp) + sb a1, 5(sp) + addi a0, sp, 4 + li a1, 2 + call _write_error + lw a0, 8(sp) */ # Generate the procedure epilogue with a predefined stack size. la a0, epilogue @@ -1288,21 +1193,8 @@ _compile_if: li a0, '\n' call _put_char -.Lcompile_if_loop: - mv a0, s1 - addi a1, sp, 0 - call _tokenize_next - - lw t0, 0(sp) - li t1, TOKEN_END - beq t0, t1, .Lcompile_if_end - - call _compile_statement - - j .Lcompile_if_loop - -.Lcompile_if_end: - mv s1, a0 # Skip the end with newline. a0 is set by the last call to _tokenize_next. + call _compile_statements + mv s1, a0 # Skip end. # Write the label prefix. addi a0, sp, 20 @@ -1328,6 +1220,95 @@ _compile_if: addi sp, sp, 32 ret +# Writes: +# .type identifier, @function +# identifier: +# +# Parameters: +# a0 - Identifier length. +# a0 - Identifier pointer. +.type _write_procedure_head, @function +_write_procedure_head: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + sw a0, 16(sp) + sw a1, 20(sp) + + # .type identifier, @function + la a0, asm_type + li a1, ASM_TYPE_SIZE + call _write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call _write_out + + la a0, asm_type_function + li a1, ASM_TYPE_FUNCTION_SIZE + call _write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call _write_out + + li t0, 0x0a3a # :\n + sw t0, 12(sp) + addi a0, sp, 12 + li a1, 2 + call _write_out + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +# Compiles a list of statements delimited by semicolons. +# +# Sets a0 to the end of the token finishing the list +# (should be the "end" token in a valid program). +.type _compile_statements, @function +_compile_statements: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # Generate the body of the procedure. + mv a0, s1 + addi a1, sp, 0 + call _tokenize_next + lw t0, 0(sp) + li t1, TOKEN_END + + beq t0, t1, .Lcompile_statements_end + +.Lcompile_statements_body: + call _compile_statement + + mv a0, s1 + addi a1, sp, 0 + call _tokenize_next + lw t0, 0(sp) + li t1, TOKEN_SEMICOLON + + bne t0, t1, .Lcompile_statements_end + mv s1, a0 + + j .Lcompile_statements_body + +.Lcompile_statements_end: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + # Checks for the type of the current statement and compiles it. .type _compile_statement, @function _compile_statement: @@ -1357,7 +1338,7 @@ _compile_statement: li t1, TOKEN_DOT beq t0, t1, .Lcompile_statement_label - j .Lcompile_statement_empty # Else. + unimp # Else. .Lcompile_statement_if: call _compile_if @@ -1379,10 +1360,6 @@ _compile_statement: call _compile_identifier j .Lcompile_statement_end -.Lcompile_statement_empty: - addi s1, s1, 1 - j .Lcompile_statement_end - .Lcompile_statement_end: # Epilogue. lw ra, 28(sp) @@ -1422,23 +1399,14 @@ _compile_entry_point: li a1, ASM_START_SIZE call _write_out - addi s1, s1, 6 # Skip begin\n. - - # Generate the body of the procedure. -.Lcompile_entry_point_body: mv a0, s1 addi a1, sp, 4 call _tokenize_next + mv s1, a0 # Skip begin. - lw t0, 4(sp) - li t1, TOKEN_END - beq t0, t1, .Lcompile_entry_point_end - - call _compile_statement - j .Lcompile_entry_point_body - -.Lcompile_entry_point_end: - mv s1, a0 # Skip end. a0 is set by the last _tokenize_next call. + # Generate the body of the procedure. + call _compile_statements + mv s1, a0 # Skip end. la a0, asm_exit li a1, ASM_EXIT_SIZE @@ -1481,6 +1449,7 @@ _start: call _read_file li s2, 1 + call symbol_table_build call _compile # Call exit. diff --git a/boot/stage2.elna b/boot/stage2.elna index 62bd307..695f52a 100644 --- a/boot/stage2.elna +++ b/boot/stage2.elna @@ -8,7 +8,6 @@ const var source_code: [81920]Byte -(* Ignores the import. *) proc _compile_import() var loca0: Word begin @@ -61,7 +60,7 @@ begin loca24 := _token_compare(loca12, loca20, @loca16); if loca24 = 0 then goto .L_build_binary_expression_minus - end + end; loca16 := 0x2a; loca24 := _token_compare(loca12, loca20, @loca16); @@ -181,7 +180,7 @@ begin _put_char(0x0a); goto .Lcompile_identifier_expression_end - end + end; loca8 := 0x6120616c; _write_out(@loca8, 4); @@ -409,7 +408,7 @@ begin loca0 := _front(loca8) = 0x2c; if loca0 = 0 then goto .Lcompile_call_paren - end + end; loca12 := loca12 + 1; @@ -567,7 +566,7 @@ begin loca0 := _front(loca0); loca4 := loca4 + 1; - if loca0 = 0x3d then + if loca0 = 0x3d then goto .Ltoken_character_single end end; @@ -605,43 +604,6 @@ begin .Lspace_loop_end end -proc _skip_comment(loca84: Word) -var - loca0: ^Byte - loca4: Word - loca8: Int -begin - loca0 := _current(); - - loca4 := 0x2a28; - loca8 := _memcmp(loca0, @loca4, 2); - if loca8 = 0 then - goto .Lskip_comment_continue - end; - goto .Lskip_comment_end; - - .Lskip_comment_continue; - _advance(2); - - loca4 := 0x292a; - - .Lskip_comment_loop; - loca0 := _current(); - loca8 := _memcmp(loca0, @loca4, 2); - if loca8 = 0 then - goto .Lskip_comment_close - end; - - _advance(1); - - goto .Lskip_comment_loop; - - .Lskip_comment_close; - _advance(2); - - .Lskip_comment_end -end - proc _compile_assembly(loca84: Word) var loca0: ^Byte begin @@ -748,9 +710,11 @@ begin .Lcompile_variable_section_item; _skip_spaces(); loca4 := _current(); - loca0 := _front(loca4); - if _is_lower(loca0) = 0 then + loca0 := 0x636f7270; + loca0 := _memcmp(@loca0, loca4, 4); + + if loca0 = 0 then goto .Lcompile_variable_section_end end; _compile_variable(); @@ -1096,7 +1060,7 @@ proc _compile_return() begin _advance(6); _skip_spaces(); - _build_binary_expression(); + _build_binary_expression() end proc _compile_if() @@ -1164,10 +1128,6 @@ begin loca16 := _current(); loca0 := _front(loca16); - if loca0 = 0x28 then - goto .Lcompile_line_comment - end; - loca16 := _current(); loca12 := 0x676f7270; loca4 := _memcmp(loca16, @loca12, 4); @@ -1308,10 +1268,6 @@ begin _compile_program(); goto .Lcompile_line_section; - .Lcompile_line_comment; - _skip_comment(loca84); - goto .Lcompile_line_section; - .Lcompile_line_empty; _advance(1); goto .Lcompile_line_section; @@ -1325,7 +1281,6 @@ begin .Lcompile_line_end; _skip_spaces(); - _skip_comment(); return loca8 end diff --git a/boot/symbol.s b/boot/symbol.s new file mode 100644 index 0000000..66409aa --- /dev/null +++ b/boot/symbol.s @@ -0,0 +1,104 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +.global symbol_table_build + +.include "boot/definitions.inc" + +.equ SYMBOL_PRIME, 1543 + +.section .rodata + +.type symbol_builtin_name_int, @object +symbol_builtin_name_int: .ascii "Int" +.type symbol_builtin_name_word, @object +symbol_builtin_name_word: .ascii "Word" +.type symbol_builtin_name_byte, @object +symbol_builtin_name_byte: .ascii "Byte" +.type symbol_builtin_name_char, @object +symbol_builtin_name_char: .ascii "Char" + +# Every type info starts with a word describing what type it is. + +# Primitive types have only type size. +.type symbol_builtin_type_int, @object +symbol_builtin_type_int: .word TYPE_PRIMITIVE + .word 4 +.type symbol_builtin_type_word, @object +symbol_builtin_type_word: .word TYPE_PRIMITIVE + .word 4 +.type symbol_builtin_type_byte, @object +symbol_builtin_type_byte: .word TYPE_PRIMITIVE + .word 1 +.type symbol_builtin_type_char, @object +symbol_builtin_type_char: .word TYPE_PRIMITIVE + .word 1 + +.section .bss + +# The first word of the symbol table is its length. +# Then a list of type infos follows: +# +# record +# name: String +# info: ^TypeInfo +# end +.type symbol_table, @object +symbol_table: .zero SYMBOL_PRIME + +.section .text + +# Build the initial symbols. +# +# Sets a0 to the pointer to the global symbol table. +.type symbol_build, @function +symbol_table_build: + la a0, symbol_table + addi t0, a0, 4 + + li t1, 3 # Length of the word "Int". + sw t1, 0(t0) + la t1, symbol_builtin_name_int + sw t1, 4(t0) + la t1, symbol_builtin_type_int + sw t1, 8(t0) + lw t1, 0(a0) + addi t1, t1, 1 + sw t1, 0(a0) + addi t0, t0, 12 + + li t1, 4 # Length of the word "Word". + sw t1, 0(t0) + la t1, symbol_builtin_name_word + sw t1, 4(t0) + la t1, symbol_builtin_type_word + sw t1, 8(t0) + lw t1, 0(a0) + addi t1, t1, 1 + sw t1, 0(a0) + addi t0, t0, 12 + + li t1, 4 # Length of the word "Byte". + sw t1, 0(t0) + la t1, symbol_builtin_name_byte + sw t1, 4(t0) + la t1, symbol_builtin_type_byte + sw t1, 8(t0) + lw t1, 0(a0) + addi t1, t1, 1 + sw t1, 0(a0) + addi t0, t0, 12 + + li t1, 4 # Length of the word "Char". + sw t1, 0(t0) + la t1, symbol_builtin_name_char + sw t1, 4(t0) + la t1, symbol_builtin_type_char + sw t1, 8(t0) + lw t1, 0(a0) + addi t1, t1, 1 + sw t1, 0(a0) + addi t0, t0, 12 + + ret |
