# This Source Code Form is subject to the terms of the Mozilla Public License, # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. # Stage2 compiler. # # It supports declaring and calling procedures without arguments. # A procedure name should start with an underscore. .section .rodata .type keyword_equ, @object keyword_equ: .ascii ".equ" .equ KEYWORD_EQU_SIZE, 4 .type keyword_section, @object keyword_section: .ascii ".section" .equ KEYWORD_SECTION_SIZE, 8 .type keyword_type, @object keyword_type: .ascii ".type" .equ KEYWORD_TYPE_SIZE, 5 .type keyword_ret, @object keyword_ret: .ascii "ret" .equ KEYWORD_RET_SIZE, 3 .type keyword_global, @object keyword_global: .ascii ".globl" .equ KEYWORD_GLOBAL_SIZE, 6 .type keyword_proc, @object keyword_proc: .ascii "proc " .equ KEYWORD_PROC_SIZE, 5 .type keyword_end, @object keyword_end: .ascii "end" .equ KEYWORD_END_SIZE, 3 .type keyword_begin, @object keyword_begin: .ascii "begin" .equ KEYWORD_BEGIN_SIZE, 5 .type keyword_var, @object keyword_var: .ascii "var" .equ KEYWORD_VAR_SIZE, 3 .type asm_prologue, @object asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" .type asm_epilogue, @object asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" .type asm_type_directive, @object asm_type_directive: .string ".type " .type asm_type_function, @object asm_type_function: .string ", @function\n" .type asm_colon, @object asm_colon: .string ":\n" .type asm_call, @object asm_call: .string "\tcall " .type asm_j, @object asm_j: .string "\tj " .type asm_li, @object asm_li: .string "\tli " .type asm_lw, @object asm_lw: .string "\tlw " .type asm_t0, @object asm_t0: .string "t0" .type asm_a0, @object asm_a0: .string "a0" .type asm_comma, @object asm_comma: .string ", " .type asm_sp, @object asm_sp: .string "(sp)" .section .bss .equ SOURCE_BUFFER_SIZE, 81920 .type source_code, @object source_code: .zero SOURCE_BUFFER_SIZE .section .data .type source_code_position, @object source_code_position: .word source_code .section .text # Reads standard input into a buffer. # a0 - Buffer pointer. # a1 - Buffer size. # # Returns the amount of bytes written in a0. proc _read_file(); begin mv a2, a1 mv a1, a0 # STDIN. li a0, 0 li a7, 63 # SYS_READ. ecall end; # Writes to the standard output. # # Parameters: # a0 - Buffer. # a1 - Buffer length. proc _write(); begin mv a2, a1 mv a1, a0 # STDOUT. li a0, 1 li a7, 64 # SYS_WRITE. ecall end; # Writes a character from a0 into the standard output. proc _write_c(); begin sb a0, 20(sp) addi a0, sp, 20 li a1, 1 _write(); end; # Write null terminated string. # # Parameters: # a0 - String. proc _write_z(); begin sw a0, 20(sp) .write_z_loop: # Check for 0 character. lb a0, (a0) beqz a0, .write_z_end # Print a character. lw a0, 20(sp) lb a0, (a0) _write_c(); # Advance the input string by one byte. lw a0, 20(sp) addi a0, a0, 1 sw a0, 20(sp) j .write_z_loop .write_z_end: end; # Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. proc _is_upper(); begin li t0, 'A' - 1 sltu t1, t0, a0 # t1 = a0 >= 'A' sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' end; # Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. proc _is_lower(); begin li t0, 'a' - 1 sltu t2, t0, a0 # t2 = a0 >= 'a' sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' end; # Detects if the passed character is a 7-bit alpha character or an underscore. # # Paramters: # a0 - Tested character. # # Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. proc _is_alpha(); begin sw a0, 20(sp) _is_upper(); sw a0, 16(sp) lw a0, 20(sp) _is_lower(); lw t0, 20(sp) xori t1, t0, '_' seqz t1, t1 lw t0, 16(sp) or a0, a0, t0 or a0, a0, t1 end; # Detects whether the passed character is a digit # (a value between 0 and 9). # # Parameters: # a0 - Exemined value. # # Sets a0 to 1 if it is a digit, to 0 otherwise. proc _is_digit(); begin li t0, '0' - 1 sltu t1, t0, a0 # t1 = a0 >= '0' sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' and a0, t1, t2 end; # Reads the next token. # # Returns token length in a0. proc _read_token(); begin la t0, source_code_position # Token pointer. lw t0, (t0) sw t0, 20(sp) # Current token position. sw zero, 16(sp) # Token length. .read_token_loop: lb t0, (t0) # Current character. # First we try to read a derictive. # A derictive can contain a dot and characters. li t1, '.' beq t0, t1, .read_token_next lw a0, 20(sp) lb a0, (a0) _is_alpha(); bnez a0, .read_token_next lw a0, 20(sp) lb a0, (a0) _is_digit(); bnez a0, .read_token_next j .read_token_end .read_token_next: # Advance the source code position and token length. lw t0, 16(sp) addi t0, t0, 1 sw t0, 16(sp) lw t0, 20(sp) addi t0, t0, 1 sw t0, 20(sp) j .read_token_loop .read_token_end: lw a0, 16(sp) end; # a0 - First pointer. # a1 - Second pointer. # a2 - The length to compare. # # Returns 0 in a0 if memory regions are equal. proc _memcmp(); begin mv t0, a0 li a0, 0 .Lmemcmp_loop: beqz a2, .Lmemcmp_end lbu t1, (t0) lbu t2, (a1) sub a0, t1, t2 bnez a0, .Lmemcmp_end addi t0, t0, 1 addi a1, a1, 1 addi a2, a2, -1 j .Lmemcmp_loop .Lmemcmp_end: end; # Advances the token stream by a0 bytes. proc _advance_token(); begin # Skip the .equ directive. la t0, source_code_position lw t1, (t0) add t1, t1, a0 sw t1, (t0) end; # Prints the current token. # # Parameters: # a0 - Token length. # # Returns a0 unchanged. proc _write_token(); begin sw a0, 20(sp) la a0, source_code_position lw a0, (a0) lw a1, 20(sp) _write(); lw a0, 20(sp) end; proc _compile_section(); begin # Print and skip the .section directive and a space after it. li a0, KEYWORD_SECTION_SIZE + 1 _write_token(); _advance_token(); # Read the section name. _read_token(); addi a0, a0, 1 _write_token(); _advance_token(); end; # Prints and skips a line. proc _skip_comment(); begin la t0, source_code_position lw t1, (t0) .skip_comment_loop: # Check for newline character. lb t2, (t1) li t3, '\n' beq t2, t3, .skip_comment_end # Advance the input string by one byte. addi t1, t1, 1 sw t1, (t0) j .skip_comment_loop .skip_comment_end: # Skip the newline. addi t1, t1, 1 sw t1, (t0) end; # Prints and skips a line. proc _compile_line(); begin .compile_line_loop: la a0, source_code_position lw a1, (a0) lb t0, (a1) li t1, '\n' beq t0, t1, .compile_line_end # Print a character. lw a0, (a1) _write_c(); # Advance the input string by one byte. li a0, 1 _advance_token(); j .compile_line_loop .compile_line_end: li a0, '\n' _write_c(); li a0, 1 _advance_token(); end; proc _compile_integer_literal(); begin la a0, asm_li _write_z(); la a0, asm_a0 _write_z(); la a0, asm_comma _write_z(); _read_token(); _write_token(); _advance_token(); li a0, '\n' _write_c(); end; proc _compile_character_literal(); begin la a0, asm_li _write_z(); la a0, asm_a0 _write_z(); la a0, asm_comma _write_z(); .compile_character_literal_loop: la a0, source_code_position lw a0, (a0) li a1, 1 _write(); li a0, 1 _advance_token(); la t0, source_code_position lw t0, (t0) lb a0, (t0) li t1, '\'' beq a0, t1, .compile_character_literal_end j .compile_character_literal_loop .compile_character_literal_end: li a0, '\'' _write_c(); li a0, '\n' _write_c(); li a0, 1 _advance_token(); end; proc _compile_variable_expression(); begin la a0, asm_lw _write_z(); la a0, asm_a0 _write_z(); la a0, asm_comma _write_z(); la a0, source_code_position lw a0, (a0) addi a0, a0, 1 li a1, 2 _write(); la a0, asm_sp _write_z(); li a0, '\n' _write_c(); li a0, 3 _advance_token(); end; proc _compile_expression(); begin la t0, source_code_position lw t0, (t0) lb a0, (t0) li t1, '\'' beq a0, t1, .compile_expression_character_literal li t1, 'v' beq a0, t1, .compile_expression_variable _is_digit(); bnez a0, .compile_expression_integer_literal j .compile_expression_end .compile_expression_character_literal: _compile_character_literal(); j .compile_expression_end .compile_expression_integer_literal: _compile_integer_literal(); j .compile_expression_end .compile_expression_variable: _compile_variable_expression(); j .compile_expression_end; .compile_expression_end: end; proc _compile_call(); begin _read_token(); sw a0, 20(sp) la t0, source_code_position lw t0, (t0) sw t0, 16(sp) # Skip the identifier and left paren. addi a0, a0, 1 _advance_token(); la t0, source_code_position lw t0, (t0) lb t0, (t0) li t1, ')' beq t0, t1, .compile_call_finalize _compile_expression(); .compile_call_finalize: la a0, asm_call _write_z(); lw a0, 16(sp) lw a1, 20(sp) _write(); # Skip the right paren. li a0, 1 _advance_token(); end; proc _compile_goto(); begin li a0, 5 _advance_token(); _read_token(); sw a0, 20(sp) la a0, asm_j _write_z(); lw a0, 20(sp) _write_token(); _advance_token(); end; proc _compile_statement(); begin # This is a call if the statement starts with an underscore. la t0, source_code_position lw t0, (t0) # First character after alignment tab. addi t0, t0, 1 lb t0, (t0) li t1, '_' beq t0, t1, .compile_statement_call li t1, 'g' beq t0, t1, .compile_statement_goto _compile_line(); j .compile_statement_end .compile_statement_call: li a0, 1 _advance_token(); _compile_call(); j .compile_statement_semicolon .compile_statement_goto: li a0, 1 _advance_token(); _compile_goto(); j .compile_statement_semicolon .compile_statement_semicolon: li a0, 2 _advance_token(); li a0, '\n' _write_c(); .compile_statement_end: end; proc _compile_procedure_body(); begin .compile_procedure_body_loop: la a0, source_code_position lw a0, (a0) la a1, keyword_end li a2, KEYWORD_END_SIZE _memcmp(); beqz a0, .compile_procedure_body_epilogue _compile_statement(); j .compile_procedure_body_loop .compile_procedure_body_epilogue: end; proc _compile_procedure(); begin # Skip "proc ". li a0, KEYWORD_PROC_SIZE _advance_token(); _read_token(); sw a0, 20(sp) # Save the procedure name length. # Write .type _procedure_name, @function. la a0, asm_type_directive _write_z(); lw a0, 20(sp) _write_token(); la a0, asm_type_function _write_z(); # Write procedure label, _procedure_name: lw a0, 20(sp) _write_token(); la a0, asm_colon _write_z(); # Skip the function name and trailing parens, semicolon, "begin" and newline. lw a0, 20(sp) addi a0, a0, KEYWORD_BEGIN_SIZE + 1 + 4 _advance_token(); la a0, asm_prologue _write_z(); _compile_procedure_body(); # Write the epilogue. la a0, asm_epilogue _write_z(); li a0, KEYWORD_END_SIZE + 2 _advance_token(); end; proc _compile_type(); begin # Print and skip the .type directive and a space after it. li a0, KEYWORD_TYPE_SIZE + 1 _write_token(); _advance_token(); # Read and print the symbol name. _read_token(); sw a0, 20(sp) # Print and skip the symbol name, comma, space and @. lw a0, 20(sp) addi a0, a0, 3 _write_token(); _advance_token(); # Read the symbol type. _read_token(); sw a0, 16(sp) la t0, source_code_position lw t0, (t0) sw t0, 12(sp) # Print the symbol type and newline. lw a0, 16(sp) addi a0, a0, 1 _write_token(); _advance_token(); # Write the object definition itself. _compile_line(); .compile_type_end: end; proc _compile_equ(); begin # Print and skip the .equ directive and a space after it. li a0, KEYWORD_EQU_SIZE + 1 _write_token(); _advance_token(); # Read and print the constant name. _read_token(); sw a0, 20(sp) # Print and skip the constant name, comma and space. lw a0, 20(sp) addi a0, a0, 2 _write_token(); _advance_token(); # Read the constant value. _read_token(); sw a0, 16(sp) # Print and skip the constant value and newline. lw a0, 16(sp) addi a0, a0, 1 _write_token(); _advance_token(); end; proc _skip_newlines(); begin # Skip newlines. la t0, source_code_position lw t1, (t0) .skip_newlines_loop: lb t2, (t1) li t3, '\n' bne t2, t3, .skip_newlines_end beqz t2, .skip_newlines_end addi t1, t1, 1 sw t1, (t0) j .skip_newlines_loop .skip_newlines_end: end; # Process the source code and print the generated code. proc _compile(); begin .compile_loop: _skip_newlines(); la t0, source_code_position lw t0, (t0) lb t0, (t0) beqz t0, .compile_end li t1, '#' beq t0, t1, .compile_comment la a0, source_code_position lw a0, (a0) la a1, keyword_equ li a2, KEYWORD_EQU_SIZE _memcmp(); beqz a0, .compile_equ la a0, source_code_position lw a0, (a0) la a1, keyword_section li a2, KEYWORD_SECTION_SIZE _memcmp(); beqz a0, .compile_section la a0, source_code_position lw a0, (a0) la a1, keyword_type li a2, KEYWORD_TYPE_SIZE _memcmp(); beqz a0, .compile_type la a0, source_code_position lw a0, (a0) la a1, keyword_proc li a2, KEYWORD_PROC_SIZE _memcmp(); beqz a0, .compile_procedure la a0, source_code_position lw a0, (a0) la a1, keyword_global li a2, KEYWORD_GLOBAL_SIZE _memcmp(); beqz a0, .compile_global # Not a known token, exit. j .compile_end .compile_equ: _compile_equ(); j .compile_loop .compile_section: _compile_section(); j .compile_loop .compile_type: _compile_type(); j .compile_loop .compile_global: _compile_line(); j .compile_loop .compile_comment: _skip_comment(); j .compile_loop .compile_procedure: _compile_procedure(); j .compile_loop .compile_end: end; # Entry point. .globl _start proc _start(); begin # Read the source from the standard input. la a0, source_code li a1, SOURCE_BUFFER_SIZE # Buffer size. _read_file(); _compile(); # Call exit. li a0, 0 # Use 0 return code. li a7, 93 # SYS_EXIT. ecall end;