# This Source Code Form is subject to the terms of the Mozilla Public License, # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. # Stage4 compiler. # # - Taking value of local and global variables. Variables that doesn't begin # with "v" are considered global. # - Simple variable assignment, e.g. v0 := 5 or v0 := global_variable; # 7 words on the stack, 28 - 56, are reversed for procedure arguments (caller side). # - Take address unary operation "@". .section .rodata .type keyword_section, @object keyword_section: .ascii ".section" .type keyword_type, @object keyword_type: .ascii ".type" .type keyword_ret, @object keyword_ret: .ascii "ret" .type keyword_global, @object keyword_global: .ascii ".globl" .type keyword_proc, @object keyword_proc: .ascii "proc " .type keyword_end, @object keyword_end: .ascii "end" .type keyword_begin, @object keyword_begin: .ascii "begin" .type keyword_var, @object keyword_var: .ascii "var" .type asm_prologue, @object asm_prologue: .string "\taddi sp, sp, -64\n\tsw ra, 60(sp)\n\tsw s0, 56(sp)\n\taddi s0, sp, 64\n" .type asm_epilogue, @object asm_epilogue: .string "\tlw ra, 60(sp)\n\tlw s0, 56(sp)\n\taddi sp, sp, 64\n\tret\n" .type asm_type_directive, @object asm_type_directive: .string ".type " .type asm_type_function, @object asm_type_function: .string ", @function\n" .type asm_colon, @object asm_colon: .string ":\n" .type asm_call, @object asm_call: .string "\tcall " .type asm_j, @object asm_j: .string "\tj " .type asm_li, @object asm_li: .string "\tli " .type asm_lw, @object asm_lw: .string "\tlw " .type asm_la, @object asm_la: .string "\tla " .type asm_sw, @object asm_sw: .string "\tsw " .type asm_addi, @object asm_addi: .string "\taddi " .type asm_t0, @object asm_t0: .string "t0" .type asm_t1, @object asm_t1: .string "t1" .type asm_comma, @object asm_comma: .string ", " .type asm_sp, @object asm_sp: .string "sp" .section .bss # When modifiying also change the read size in the entry point procedure. .type source_code, @object source_code: .zero 81920 .section .data .type source_code_position, @object source_code_position: .word source_code .section .text # Reads standard input into a buffer. # a0 - Buffer pointer. # a1 - Buffer size. # # Returns the amount of bytes written in a0. proc _read_file(); begin mv a2, a1 mv a1, a0 # STDIN. li a0, 0 li a7, 63 # SYS_READ. ecall end; # Writes to the standard output. # # Parameters: # a0 - Buffer. # a1 - Buffer length. proc _write_s(); begin mv a2, a1 mv a1, a0 # STDOUT. li a0, 1 li a7, 64 # SYS_WRITE. ecall end; # Writes a number to a string buffer. # # t0 - Local buffer. # t1 - Constant 10. # t2 - Current character. # t3 - Whether the number is negative. # # Parameters: # a0 - Whole number. # a1 - Buffer pointer. # # Sets a0 to the length of the written number. proc _print_i(); begin li t1, 10 addi t0, s0, -9 li t3, 0 bgez a0, .print_i_digit10 li t3, 1 neg a0, a0 .print_i_digit10: rem t2, a0, t1 addi t2, t2, '0' sb t2, 0(t0) div a0, a0, t1 addi t0, t0, -1 bne zero, a0, .print_i_digit10 beq zero, t3, .print_i_write_call addi t2, zero, '-' sb t2, 0(t0) addi t0, t0, -1 .print_i_write_call: mv a0, a1 addi a1, t0, 1 sub a2, s0, t0 addi a2, a2, -9 sw a2, 0(sp) _memcpy(); lw a0, 0(sp) end; # Writes a number to the standard output. # # Parameters: # a0 - Whole number. proc _write_i(); begin addi a1, sp, 0 _print_i(); mv a1, a0 addi a0, sp, 0 _write_s(); end; # Writes a character from a0 into the standard output. proc _write_c(); begin sb a0, 0(sp) addi a0, sp, 0 li a1, 1 _write_s(); end; # Write null terminated string. # # Parameters: # a0 - String. proc _write_z(); begin sw a0, 0(sp) .write_z_loop: # Check for 0 character. lb a0, (a0) beqz a0, .write_z_end # Print a character. lw a0, 0(sp) lb a0, (a0) _write_c(); # Advance the input string by one byte. lw a0, 0(sp) addi a0, a0, 1 sw a0, 0(sp) goto .write_z_loop; .write_z_end: end; # Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. proc _is_upper(); begin li t0, 'A' - 1 sltu t1, t0, a0 # t1 = a0 >= 'A' sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' end; # Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. proc _is_lower(); begin li t0, 'a' - 1 sltu t2, t0, a0 # t2 = a0 >= 'a' sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' end; # Detects if the passed character is a 7-bit alpha character or an underscore. # # Paramters: # a0 - Tested character. # # Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. proc _is_alpha(); begin sw a0, 0(sp) _is_upper(); sw a0, 4(sp) _is_lower(v0); lw t0, 0(sp) xori t1, t0, '_' seqz t1, t1 lw t0, 4(sp) or a0, a0, t0 or a0, a0, t1 end; # Detects whether the passed character is a digit # (a value between 0 and 9). # # Parameters: # a0 - Exemined value. # # Sets a0 to 1 if it is a digit, to 0 otherwise. proc _is_digit(); begin li t0, '0' - 1 sltu t1, t0, a0 # t1 = a0 >= '0' sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' and a0, t1, t2 end; proc _is_alnum(); begin sw a0, 4(sp) _is_alpha(); sw a0, 0(sp) _is_digit(v4); lw a1, 0(sp) or a0, a0, a1 end; # Reads the next token. # # Returns token length in a0. proc _read_token(); begin la t0, source_code_position # Token pointer. lw t0, (t0) sw t0, 0(sp) # Current token position. sw zero, 4(sp) # Token length. .read_token_loop: lb t0, (t0) # Current character. # First we try to read a derictive. # A derictive can contain a dot and characters. li t1, '.' beq t0, t1, .read_token_next lw a0, 0(sp) lb a0, (a0) _is_alnum(); bnez a0, .read_token_next goto .read_token_end; .read_token_next: # Advance the source code position and token length. lw t0, 4(sp) addi t0, t0, 1 sw t0, 4(sp) lw t0, 0(sp) addi t0, t0, 1 sw t0, 0(sp) goto .read_token_loop; .read_token_end: lw a0, 4(sp) end; # a0 - First pointer. # a1 - Second pointer. # a2 - The length to compare. # # Returns 0 in a0 if memory regions are equal. proc _memcmp(); begin mv t0, a0 li a0, 0 .memcmp_loop: beqz a2, .memcmp_end lbu t1, (t0) lbu t2, (a1) sub a0, t1, t2 bnez a0, .memcmp_end addi t0, t0, 1 addi a1, a1, 1 addi a2, a2, -1 goto .memcmp_loop; .memcmp_end: end; # Copies memory. # # Parameters: # a0 - Destination. # a1 - Source. # a2 - Size. # # Preserves a0. proc _memcpy(); begin mv t0, a0 .memcpy_loop: beqz a2, .memcpy_end lbu t1, (a1) sb t1, (a0) addi a0, a0, 1 addi a1, a1, 1 addi a2, a2, -1 goto .memcpy_loop .memcpy_end: mv a0, t0 end; # Advances the token stream by a0 bytes. proc _advance_token(); begin la t0, source_code_position lw t1, (t0) add t1, t1, a0 sw t1, (t0) end; # Prints the current token. # # Parameters: # a0 - Token length. # # Returns a0 unchanged. proc _write_token(); begin sw a0, 0(sp) la a0, source_code_position lw a0, (a0) lw a1, 0(sp) _write_s(); lw a0, 0(sp) end; proc _compile_section(); begin # Print and skip the ".section" (8 characters) directive and a space after it. _write_token(9); _advance_token(); # Read the section name. _read_token(); addi a0, a0, 1 _write_token(); _advance_token(); end; # Prints and skips a line. proc _skip_comment(); begin la t0, source_code_position lw t1, (t0) .skip_comment_loop: # Check for newline character. lb t2, (t1) li t3, '\n' beq t2, t3, .skip_comment_end # Advance the input string by one byte. addi t1, t1, 1 sw t1, (t0) goto .skip_comment_loop; .skip_comment_end: # Skip the newline. addi t1, t1, 1 sw t1, (t0) end; # Prints and skips a line. proc _compile_line(); begin .compile_line_loop: la a0, source_code_position lw a1, (a0) lb t0, (a1) li t1, '\n' beq t0, t1, .compile_line_end # Print a character. lw a0, (a1) _write_c(); # Advance the input string by one byte. _advance_token(1); goto .compile_line_loop; .compile_line_end: _write_c('\n'); _advance_token(1); end; proc _compile_integer_literal(); begin la a0, asm_li _write_z(); la a0, asm_t0 _write_z(); la a0, asm_comma _write_z(); _read_token(); _write_token(); _advance_token(); _write_c('\n'); end; proc _compile_character_literal(); begin la a0, asm_li _write_z(); la a0, asm_t0 _write_z(); la a0, asm_comma _write_z(); _write_c('\''); _advance_token(1); la t0, source_code_position lw t0, (t0) lb a0, (t0) li t1, '\\' bne a0, t1, .compile_character_literal_end _write_c('\\'); _advance_token(1); .compile_character_literal_end: la t0, source_code_position lw t0, (t0) lb a0, (t0) _write_c(); _write_c('\''); _write_c('\n'); _advance_token(2); end; proc _compile_variable_expression(); begin _compile_designator(); la a0, asm_lw _write_z(); la a0, asm_t0 _write_z(); la a0, asm_comma _write_z(); _write_c('('); la a0, asm_t0 _write_z(); _write_c(')'); _write_c('\n'); end; proc _compile_address_expression(); begin # Skip the "@" sign. _advance_token(1); _compile_designator(); end; proc _compile_expression(); begin la t0, source_code_position lw t0, (t0) lb a0, (t0) sw a0, 0(sp) li t1, '\'' beq a0, t1, .compile_expression_character_literal li t1, '@' beq a0, t1, .compile_expression_address _is_digit(v0); bnez a0, .compile_expression_integer_literal goto .compile_expression_variable; .compile_expression_character_literal: _compile_character_literal(); goto .compile_expression_end; .compile_expression_integer_literal: _compile_integer_literal(); goto .compile_expression_end; .compile_expression_address: _compile_address_expression(); goto .compile_expression_end; .compile_expression_variable: _compile_variable_expression(); goto .compile_expression_end; .compile_expression_end: end; proc _compile_call(); begin # Stack variables: # v0 - Procedure name length. # v4 - Procedure name pointer. # v8 - Argument count. _read_token(); sw a0, 0(sp) la t0, source_code_position lw t0, (t0) sw t0, 4(sp) sw zero, 8(sp) # Skip the identifier and left paren. addi a0, a0, 1 _advance_token(); la t0, source_code_position lw t0, (t0) lb t0, (t0) li t1, ')' beq t0, t1, .compile_call_finalize .compile_call_loop: _compile_expression(); # Save the argument on the stack. la a0, asm_sw _write_z(); la a0, asm_t0 _write_z(); la a0, asm_comma _write_z(); # Calculate the stack offset: 52 - (4 * argument_counter) lw t0, 8(sp) li t1, 4 mul t0, t0, t1 li t1, 52 sub a0, t1, t0 _write_i(); _write_c('(') la a0, asm_sp _write_z(); _write_c(')') _write_c('\n'); # Add one to the argument counter. lw t0, 8(sp) addi t0, t0, 1 sw t0, 8(sp) la t0, source_code_position lw t0, (t0) lb t0, (t0) li t1, ',' bne t0, t1, .compile_call_finalize _advance_token(2); goto .compile_call_loop; .compile_call_finalize: # Load the argument from the stack. lw t0, 8(sp) beqz t0, .compile_call_end # Decrement the argument counter. lw t0, 8(sp) addi t0, t0, -1 sw t0, 8(sp) la a0, asm_lw _write_z(); _write_c('a'); lw a0, 8(sp) _write_i(); la a0, asm_comma _write_z(); # Calculate the stack offset: 52 - (4 * argument_counter) lw t0, 8(sp) li t1, 4 mul t0, t0, t1 li t1, 52 sub a0, t1, t0 _write_i(); _write_c('('); la a0, asm_sp _write_z(); _write_c(')'); _write_c('\n'); goto .compile_call_finalize; .compile_call_end: la a0, asm_call _write_z(); _write_s(v4, v0); # Skip the right paren. _advance_token(1); end; proc _compile_goto(); begin _advance_token(5); _read_token(); sw a0, 0(sp) la a0, asm_j _write_z(); _write_token(v0); _advance_token(); end; proc _compile_local_designator(); begin # Skip "v" in the local variable name. _advance_token(1); la a0, asm_addi _write_z(); la a0, asm_t0 _write_z(); la a0, asm_comma _write_z(); la a0, asm_sp _write_z(); la a0, asm_comma _write_z(); # Read local variable stack offset and save it. la t0, source_code_position lw t0, (t0) sw t0, 0(sp) _read_token(); sw a0, 4(sp) _write_token(); _advance_token(); _write_c('\n'); end; proc _compile_global_designator(); begin la a0, asm_la _write_z(); la a0, asm_t0 _write_z(); la a0, asm_comma _write_z(); _read_token(); _write_token(); _advance_token(); _write_c('\n'); end; proc _compile_designator(); begin la t0, source_code_position lw t0, (t0) lb a0, (t0) li t1, 'v' beq a0, t1, .compile_designator_local goto .compile_designator_global; .compile_designator_local: _compile_local_designator(); goto .compile_designator_end; .compile_designator_global: _compile_global_designator(); goto .compile_designator_end; .compile_designator_end: end; proc _compile_assignment(); begin _compile_designator(); # Save the assignee address on the stack. la a0, asm_sw _write_z(); la a0, asm_t0 _write_z(); la a0, asm_comma _write_z(); _write_i(20); _write_c('('); la a0, asm_sp _write_z(); _write_c(')'); _write_c('\n'); # Skip the assignment sign (:=) with surrounding whitespaces. _advance_token(4); # Compile the assignment. _compile_expression(); la a0, asm_lw _write_z(); la a0, asm_t1 _write_z(); la a0, asm_comma _write_z(); _write_i(20); _write_c('('); la a0, asm_sp _write_z(); _write_c(')'); _write_c('\n'); la a0, asm_sw _write_z(); la a0, asm_t0 _write_z(); la a0, asm_comma _write_z(); _write_c('('); la a0, asm_t1 _write_z(); _write_c(')'); end; proc _compile_statement(); begin # This is a call if the statement starts with an underscore. la t0, source_code_position lw t0, (t0) # First character after alignment tab. addi t0, t0, 1 lb t0, (t0) li t1, '_' beq t0, t1, .compile_statement_call li t1, 'g' beq t0, t1, .compile_statement_goto li t1, 'v' beq t0, t1, .compile_statement_assignment _compile_line(); goto .compile_statement_end; .compile_statement_call: _advance_token(1); _compile_call(); goto .compile_statement_semicolon; .compile_statement_goto: _advance_token(1); _compile_goto(); goto .compile_statement_semicolon; .compile_statement_assignment: _advance_token(1); _compile_assignment(); goto .compile_statement_semicolon; .compile_statement_semicolon: _advance_token(2); _write_c('\n'); .compile_statement_end: end; proc _compile_procedure_body(); begin .compile_procedure_body_loop: la a0, source_code_position lw a0, (a0) la a1, keyword_end li a2, 3 # "end" length. _memcmp(); beqz a0, .compile_procedure_body_epilogue _compile_statement(); goto .compile_procedure_body_loop; .compile_procedure_body_epilogue: end; proc _compile_procedure(); begin # Skip "proc ". _advance_token(5); _read_token(); sw a0, 0(sp) # Save the procedure name length. # Write .type _procedure_name, @function. la a0, asm_type_directive _write_z(); _write_token(v0); la a0, asm_type_function _write_z(); # Write procedure label, _procedure_name: _write_token(v0); la a0, asm_colon _write_z(); # Skip the function name and trailing parens, semicolon, "begin" and newline. lw a0, 0(sp) addi a0, a0, 10 _advance_token(); la a0, asm_prologue _write_z(); _compile_procedure_body(); # Write the epilogue. la a0, asm_epilogue _write_z(); # Skip the "end" keyword, semicolon and newline. _advance_token(5); end; proc _compile_type(); begin # Print and skip the ".type" (5 characters) directive and a space after it. _write_token(6); _advance_token(); # Read and print the symbol name. _read_token(); # Print and skip the symbol name, comma, space and @. addi a0, a0, 3 _write_token(); _advance_token(); # Read the symbol type. _read_token(); la t0, source_code_position lw t0, (t0) sw t0, 12(sp) # Print the symbol type and newline. addi a0, a0, 1 _write_token(); _advance_token(); # Write the object definition itself. _compile_line(); .compile_type_end: end; proc _skip_newlines(); begin # Skip newlines. la t0, source_code_position lw t1, (t0) .skip_newlines_loop: lb t2, (t1) li t3, '\n' bne t2, t3, .skip_newlines_end beqz t2, .skip_newlines_end addi t1, t1, 1 sw t1, (t0) goto .skip_newlines_loop; .skip_newlines_end: end; # Process the source code and print the generated code. proc _compile(); begin .compile_loop: _skip_newlines(); la t0, source_code_position lw t0, (t0) lb t0, (t0) beqz t0, .compile_end li t1, '#' beq t0, t1, .compile_comment la a0, source_code_position lw a0, (a0) la a1, keyword_section li a2, 8 # ".section" length. _memcmp(); beqz a0, .compile_section la a0, source_code_position lw a0, (a0) la a1, keyword_type li a2, 5 # ".type" length. _memcmp(); beqz a0, .compile_type la a0, source_code_position lw a0, (a0) la a1, keyword_proc li a2, 5 # "proc " length. Space is needed to distinguish from "procedure". _memcmp(); beqz a0, .compile_procedure la a0, source_code_position lw a0, (a0) la a1, keyword_global li a2, 6 # ".globl" length. _memcmp(); beqz a0, .compile_global # Not a known token, exit. goto .compile_end; .compile_section: _compile_section(); goto .compile_loop; .compile_type: _compile_type(); goto .compile_loop; .compile_global: _compile_line(); goto .compile_loop; .compile_comment: _skip_comment(); goto .compile_loop; .compile_procedure: _compile_procedure(); goto .compile_loop; .compile_end: end; # Terminates the program. a0 contains the return code. # # Parameters: # a0 - Status code. proc _exit(); begin li a7, 93 # SYS_EXIT ecall end; # Entry point. .globl _start proc _start(); begin # Read the source from the standard input. la a0, source_code li a1, 81920 # Buffer size. _read_file(); _compile(); _exit(0); end;