diff options
Diffstat (limited to 'boot/stage4/cl.elna')
| -rw-r--r-- | boot/stage4/cl.elna | 1129 |
1 files changed, 1129 insertions, 0 deletions
diff --git a/boot/stage4/cl.elna b/boot/stage4/cl.elna new file mode 100644 index 0000000..d873b9a --- /dev/null +++ b/boot/stage4/cl.elna @@ -0,0 +1,1129 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +# Stage 4 compiler. +# +# - Taking value of local and global variables. Variables that doesn't begin +# with "v" are considered global. +# - Simple variable assignment, e.g. v0 := 5 or v0 := global_variable; +# 7 words on the stack, 28 - 56, are reversed for procedure arguments (caller side). +# - Take address unary operation "@". + +.section .rodata + +.type keyword_section, @object +keyword_section: .ascii ".section" + +.type keyword_type, @object +keyword_type: .ascii ".type" + +.type keyword_ret, @object +keyword_ret: .ascii "ret" + +.type keyword_global, @object +keyword_global: .ascii ".globl" + +.type keyword_proc, @object +keyword_proc: .ascii "proc " + +.type keyword_end, @object +keyword_end: .ascii "end" + +.type keyword_begin, @object +keyword_begin: .ascii "begin" + +.type keyword_var, @object +keyword_var: .ascii "var" + +.type asm_prologue, @object +asm_prologue: .string "\taddi sp, sp, -64\n\tsw ra, 60(sp)\n\tsw s0, 56(sp)\n\taddi s0, sp, 64\n" + +.type asm_epilogue, @object +asm_epilogue: .string "\tlw ra, 60(sp)\n\tlw s0, 56(sp)\n\taddi sp, sp, 64\n\tret\n" + +.type asm_type_directive, @object +asm_type_directive: .string ".type " + +.type asm_type_function, @object +asm_type_function: .string ", @function\n" + +.type asm_colon, @object +asm_colon: .string ":\n" + +.type asm_call, @object +asm_call: .string "\tcall " + +.type asm_j, @object +asm_j: .string "\tj " + +.type asm_li, @object +asm_li: .string "\tli " + +.type asm_lw, @object +asm_lw: .string "\tlw " + +.type asm_la, @object +asm_la: .string "\tla " + +.type asm_sw, @object +asm_sw: .string "\tsw " + +.type asm_addi, @object +asm_addi: .string "\taddi " + +.type asm_t0, @object +asm_t0: .string "t0" + +.type asm_t1, @object +asm_t1: .string "t1" + +.type asm_comma, @object +asm_comma: .string ", " + +.type asm_sp, @object +asm_sp: .string "sp" + +.section .bss + +# When modifiying also change the read size in the entry point procedure. +.type source_code, @object +source_code: .zero 81920 + +.section .data + +.type source_code_position, @object +source_code_position: .word source_code + +.section .text + +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the amount of bytes written in a0. +proc _read_file(); +begin + mv a2, a1 + mv a1, a0 + # STDIN. + li a0, 0 + li a7, 63 # SYS_READ. + ecall +end; + +# Writes to the standard output. +# +# Parameters: +# a0 - Buffer. +# a1 - Buffer length. +proc _write_s(); +begin + mv a2, a1 + mv a1, a0 + # STDOUT. + li a0, 1 + li a7, 64 # SYS_WRITE. + ecall +end; + +# Writes a number to a string buffer. +# +# t0 - Local buffer. +# t1 - Constant 10. +# t2 - Current character. +# t3 - Whether the number is negative. +# +# Parameters: +# a0 - Whole number. +# a1 - Buffer pointer. +# +# Sets a0 to the length of the written number. +proc _print_i(); +begin + li t1, 10 + addi t0, s0, -9 + + li t3, 0 + bgez a0, .print_i_digit10 + li t3, 1 + neg a0, a0 + +.print_i_digit10: + rem t2, a0, t1 + addi t2, t2, '0' + sb t2, 0(t0) + div a0, a0, t1 + addi t0, t0, -1 + bne zero, a0, .print_i_digit10 + + beq zero, t3, .print_i_write_call + addi t2, zero, '-' + sb t2, 0(t0) + addi t0, t0, -1 + +.print_i_write_call: + mv a0, a1 + addi a1, t0, 1 + sub a2, s0, t0 + addi a2, a2, -9 + sw a2, 0(sp) + + _memcpy(); + + lw a0, 0(sp) +end; + +# Writes a number to the standard output. +# +# Parameters: +# a0 - Whole number. +proc _write_i(); +begin + addi a1, sp, 0 + _print_i(); + + mv a1, a0 + addi a0, sp, 0 + _write_s(); + +end; + +# Writes a character from a0 into the standard output. +proc _write_c(); +begin + sb a0, 0(sp) + addi a0, sp, 0 + li a1, 1 + _write_s(); +end; + +# Write null terminated string. +# +# Parameters: +# a0 - String. +proc _write_z(); +begin + sw a0, 0(sp) + +.write_z_loop: + # Check for 0 character. + lb a0, (a0) + beqz a0, .write_z_end + + # Print a character. + lw a0, 0(sp) + lb a0, (a0) + _write_c(); + + # Advance the input string by one byte. + lw a0, 0(sp) + addi a0, a0, 1 + sw a0, 0(sp) + + goto .write_z_loop; + +.write_z_end: +end; + +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_upper(); +begin + li t0, 'A' - 1 + sltu t1, t0, a0 # t1 = a0 >= 'A' + + sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' + and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' +end; + +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_lower(); +begin + li t0, 'a' - 1 + sltu t2, t0, a0 # t2 = a0 >= 'a' + + sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' + and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' +end; + +# Detects if the passed character is a 7-bit alpha character or an underscore. +# +# Paramters: +# a0 - Tested character. +# +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +proc _is_alpha(); +begin + sw a0, 0(sp) + + _is_upper(); + sw a0, 4(sp) + + _is_lower(v0); + + lw t0, 0(sp) + xori t1, t0, '_' + seqz t1, t1 + + lw t0, 4(sp) + or a0, a0, t0 + or a0, a0, t1 +end; + +# Detects whether the passed character is a digit +# (a value between 0 and 9). +# +# Parameters: +# a0 - Exemined value. +# +# Sets a0 to 1 if it is a digit, to 0 otherwise. +proc _is_digit(); +begin + li t0, '0' - 1 + sltu t1, t0, a0 # t1 = a0 >= '0' + + sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' + + and a0, t1, t2 +end; + +proc _is_alnum(); +begin + sw a0, 4(sp) + + _is_alpha(); + sw a0, 0(sp) + + _is_digit(v4); + + lw a1, 0(sp) + or a0, a0, a1 +end; + +# Reads the next token. +# +# Returns token length in a0. +proc _read_token(); +begin + la t0, source_code_position # Token pointer. + lw t0, (t0) + sw t0, 0(sp) # Current token position. + sw zero, 4(sp) # Token length. + +.read_token_loop: + lb t0, (t0) # Current character. + + # First we try to read a derictive. + # A derictive can contain a dot and characters. + li t1, '.' + beq t0, t1, .read_token_next + + lw a0, 0(sp) + lb a0, (a0) + _is_alnum(); + bnez a0, .read_token_next + + goto .read_token_end; + +.read_token_next: + # Advance the source code position and token length. + lw t0, 4(sp) + addi t0, t0, 1 + sw t0, 4(sp) + + lw t0, 0(sp) + addi t0, t0, 1 + sw t0, 0(sp) + + goto .read_token_loop; + +.read_token_end: + lw a0, 4(sp) +end; + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +proc _memcmp(); +begin + mv t0, a0 + li a0, 0 + +.memcmp_loop: + beqz a2, .memcmp_end + + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 + + bnez a0, .memcmp_end + + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcmp_loop; + +.memcmp_end: +end; + +# Copies memory. +# +# Parameters: +# a0 - Destination. +# a1 - Source. +# a2 - Size. +# +# Preserves a0. +proc _memcpy(); +begin + mv t0, a0 + +.memcpy_loop: + beqz a2, .memcpy_end + + lbu t1, (a1) + sb t1, (a0) + + addi a0, a0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcpy_loop + +.memcpy_end: + mv a0, t0 +end; + +# Advances the token stream by a0 bytes. +proc _advance_token(); +begin + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 + sw t1, (t0) +end; + +# Prints the current token. +# +# Parameters: +# a0 - Token length. +# +# Returns a0 unchanged. +proc _write_token(); +begin + sw a0, 0(sp) + + la a0, source_code_position + lw a0, (a0) + lw a1, 0(sp) + _write_s(); + + lw a0, 0(sp) +end; + +proc _compile_section(); +begin + # Print and skip the ".section" (8 characters) directive and a space after it. + _write_token(9); + _advance_token(); + + # Read the section name. + _read_token(); + addi a0, a0, 1 + + _write_token(); + _advance_token(); +end; + +# Prints and skips a line. +proc _skip_comment(); +begin + la t0, source_code_position + lw t1, (t0) + +.skip_comment_loop: + # Check for newline character. + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end + + # Advance the input string by one byte. + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_comment_loop; + +.skip_comment_end: + # Skip the newline. + addi t1, t1, 1 + sw t1, (t0) +end; + +# Prints and skips a line. +proc _compile_line(); +begin +.compile_line_loop: + la a0, source_code_position + lw a1, (a0) + + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end + + # Print a character. + lw a0, (a1) + _write_c(); + + # Advance the input string by one byte. + _advance_token(1); + + goto .compile_line_loop; + +.compile_line_end: + _write_c('\n'); + + _advance_token(1); +end; + +proc _compile_integer_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_t0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _read_token(); + _write_token(); + _advance_token(); + + _write_c('\n'); +end; + +proc _compile_character_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_t0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _write_c('\''); + _advance_token(1); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + li t1, '\\' + bne a0, t1, .compile_character_literal_end + + _write_c('\\'); + _advance_token(1); + +.compile_character_literal_end: + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + _write_c(); + + _write_c('\''); + _write_c('\n'); + + _advance_token(2); + +end; + +proc _compile_variable_expression(); +begin + _compile_designator(); + + la a0, asm_lw + _write_z(); + + la a0, asm_t0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _write_c('('); + la a0, asm_t0 + _write_z(); + + _write_c(')'); + _write_c('\n'); + +end; + +proc _compile_address_expression(); +begin + # Skip the "@" sign. + _advance_token(1); + _compile_designator(); + +end; + +proc _compile_expression(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + sw a0, 0(sp) + + li t1, '\'' + beq a0, t1, .compile_expression_character_literal + + li t1, '@' + beq a0, t1, .compile_expression_address + + _is_digit(v0); + bnez a0, .compile_expression_integer_literal + + goto .compile_expression_variable; + +.compile_expression_character_literal: + _compile_character_literal(); + goto .compile_expression_end; + +.compile_expression_integer_literal: + _compile_integer_literal(); + goto .compile_expression_end; + +.compile_expression_address: + _compile_address_expression(); + goto .compile_expression_end; + +.compile_expression_variable: + _compile_variable_expression(); + goto .compile_expression_end; + +.compile_expression_end: +end; + +proc _compile_call(); +begin + # Stack variables: + # v0 - Procedure name length. + # v4 - Procedure name pointer. + # v8 - Argument count. + + _read_token(); + sw a0, 0(sp) + la t0, source_code_position + lw t0, (t0) + sw t0, 4(sp) + + sw zero, 8(sp) + + # Skip the identifier and left paren. + addi a0, a0, 1 + _advance_token(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ')' + beq t0, t1, .compile_call_finalize + +.compile_call_loop: + _compile_expression(); + + # Save the argument on the stack. + la a0, asm_sw + _write_z(); + + la a0, asm_t0 + _write_z(); + + la a0, asm_comma + _write_z(); + + # Calculate the stack offset: 52 - (4 * argument_counter) + lw t0, 8(sp) + li t1, 4 + mul t0, t0, t1 + li t1, 52 + sub a0, t1, t0 + _write_i(); + + _write_c('(') + la a0, asm_sp + _write_z(); + _write_c(')') + + _write_c('\n'); + + # Add one to the argument counter. + lw t0, 8(sp) + addi t0, t0, 1 + sw t0, 8(sp) + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ',' + bne t0, t1, .compile_call_finalize + + _advance_token(2); + goto .compile_call_loop; + +.compile_call_finalize: + # Load the argument from the stack. + + lw t0, 8(sp) + beqz t0, .compile_call_end + + # Decrement the argument counter. + lw t0, 8(sp) + addi t0, t0, -1 + sw t0, 8(sp) + + la a0, asm_lw + _write_z(); + + _write_c('a'); + lw a0, 8(sp) + _write_i(); + + la a0, asm_comma + _write_z(); + + # Calculate the stack offset: 52 - (4 * argument_counter) + lw t0, 8(sp) + li t1, 4 + mul t0, t0, t1 + li t1, 52 + sub a0, t1, t0 + _write_i(); + + _write_c('('); + la a0, asm_sp + _write_z(); + + _write_c(')'); + _write_c('\n'); + + goto .compile_call_finalize; + +.compile_call_end: + la a0, asm_call + _write_z(); + + _write_s(v4, v0); + + # Skip the right paren. + _advance_token(1); +end; + +proc _compile_goto(); +begin + _advance_token(5); + + _read_token(); + sw a0, 0(sp) + + la a0, asm_j + _write_z(); + + _write_token(v0); + _advance_token(); +end; + +proc _compile_local_designator(); +begin + # Skip "v" in the local variable name. + _advance_token(1); + + la a0, asm_addi + _write_z(); + + la a0, asm_t0 + _write_z(); + + la a0, asm_comma + _write_z(); + + la a0, asm_sp + _write_z(); + + la a0, asm_comma + _write_z(); + + # Read local variable stack offset and save it. + la t0, source_code_position + lw t0, (t0) + sw t0, 0(sp) + + _read_token(); + sw a0, 4(sp) + + _write_token(); + _advance_token(); + + _write_c('\n'); + +end; + +proc _compile_global_designator(); +begin + la a0, asm_la + _write_z(); + + la a0, asm_t0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _read_token(); + _write_token(); + _advance_token(); + + _write_c('\n'); + +end; + +proc _compile_designator(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, 'v' + beq a0, t1, .compile_designator_local + + goto .compile_designator_global; + +.compile_designator_local: + _compile_local_designator(); + goto .compile_designator_end; + +.compile_designator_global: + _compile_global_designator(); + goto .compile_designator_end; + +.compile_designator_end: +end; + +proc _compile_assignment(); +begin + _compile_designator(); + + # Save the assignee address on the stack. + la a0, asm_sw + _write_z(); + + la a0, asm_t0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _write_i(20); + _write_c('('); + la a0, asm_sp + _write_z(); + _write_c(')'); + _write_c('\n'); + # Skip the assignment sign (:=) with surrounding whitespaces. + _advance_token(4); + + # Compile the assignment. + _compile_expression(); + + la a0, asm_lw + _write_z(); + + la a0, asm_t1 + _write_z(); + + la a0, asm_comma + _write_z(); + + _write_i(20); + _write_c('('); + la a0, asm_sp + _write_z(); + _write_c(')'); + _write_c('\n'); + + la a0, asm_sw + _write_z(); + + la a0, asm_t0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _write_c('('); + la a0, asm_t1 + _write_z(); + _write_c(')'); +end; + +proc _compile_statement(); +begin + # This is a call if the statement starts with an underscore. + la t0, source_code_position + lw t0, (t0) + # First character after alignment tab. + addi t0, t0, 1 + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call + + li t1, 'g' + beq t0, t1, .compile_statement_goto + + li t1, 'v' + beq t0, t1, .compile_statement_assignment + + _compile_line(); + goto .compile_statement_end; + +.compile_statement_call: + _advance_token(1); + _compile_call(); + + goto .compile_statement_semicolon; + +.compile_statement_goto: + _advance_token(1); + _compile_goto(); + + goto .compile_statement_semicolon; + +.compile_statement_assignment: + _advance_token(1); + _compile_assignment(); + + goto .compile_statement_semicolon; + +.compile_statement_semicolon: + _advance_token(2); + + _write_c('\n'); + +.compile_statement_end: +end; + +proc _compile_procedure_body(); +begin +.compile_procedure_body_loop: + la a0, source_code_position + lw a0, (a0) + la a1, keyword_end + li a2, 3 # "end" length. + _memcmp(); + + beqz a0, .compile_procedure_body_epilogue + + _compile_statement(); + goto .compile_procedure_body_loop; + +.compile_procedure_body_epilogue: +end; + +proc _compile_procedure(); +begin + # Skip "proc ". + _advance_token(5); + + _read_token(); + sw a0, 0(sp) # Save the procedure name length. + + # Write .type _procedure_name, @function. + la a0, asm_type_directive + _write_z(); + + _write_token(v0); + + la a0, asm_type_function + _write_z(); + + # Write procedure label, _procedure_name: + _write_token(v0); + + la a0, asm_colon + _write_z(); + + # Skip the function name and trailing parens, semicolon, "begin" and newline. + lw a0, 0(sp) + addi a0, a0, 10 + _advance_token(); + + la a0, asm_prologue + _write_z(); + + _compile_procedure_body(); + + # Write the epilogue. + la a0, asm_epilogue + _write_z(); + + # Skip the "end" keyword, semicolon and newline. + _advance_token(5); +end; + +proc _compile_type(); +begin + # Print and skip the ".type" (5 characters) directive and a space after it. + _write_token(6); + _advance_token(); + + # Read and print the symbol name. + _read_token(); + + # Print and skip the symbol name, comma, space and @. + addi a0, a0, 3 + _write_token(); + _advance_token(); + + # Read the symbol type. + _read_token(); + la t0, source_code_position + lw t0, (t0) + sw t0, 12(sp) + + # Print the symbol type and newline. + addi a0, a0, 1 + _write_token(); + _advance_token(); + + # Write the object definition itself. + _compile_line(); + +.compile_type_end: +end; + +proc _skip_newlines(); +begin + # Skip newlines. + la t0, source_code_position + lw t1, (t0) + +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end + + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_newlines_loop; + +.skip_newlines_end: +end; + +# Process the source code and print the generated code. +proc _compile(); +begin +.compile_loop: + _skip_newlines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_end + li t1, '#' + beq t0, t1, .compile_comment + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_section + li a2, 8 # ".section" length. + _memcmp(); + + beqz a0, .compile_section + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_type + li a2, 5 # ".type" length. + _memcmp(); + + beqz a0, .compile_type + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_proc + li a2, 5 # "proc " length. Space is needed to distinguish from "procedure". + _memcmp(); + + beqz a0, .compile_procedure + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_global + li a2, 6 # ".globl" length. + _memcmp(); + + beqz a0, .compile_global + # Not a known token, exit. + goto .compile_end; + +.compile_section: + _compile_section(); + + goto .compile_loop; + +.compile_type: + _compile_type(); + + goto .compile_loop; + +.compile_global: + _compile_line(); + + goto .compile_loop; + +.compile_comment: + _skip_comment(); + + goto .compile_loop; + +.compile_procedure: + _compile_procedure(); + + goto .compile_loop; + +.compile_end: +end; + +# Terminates the program. a0 contains the return code. +# +# Parameters: +# a0 - Status code. +proc _exit(); +begin + li a7, 93 # SYS_EXIT + ecall +end; + +# Entry point. +.globl _start +proc _start(); +begin + # Read the source from the standard input. + la a0, source_code + li a1, 81920 # Buffer size. + _read_file(); + _compile(); + + _exit(0); + +end; |
