# This Source Code Form is subject to the terms of the Mozilla Public License, # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. .equ SOURCE_BUFFER_SIZE, 81920 .equ SYS_READ, 63 .equ SYS_WRITE, 64 .equ SYS_EXIT, 93 .equ SYS_MMAP2, 222 .equ STDIN, 0 .equ STDOUT, 1 .equ STDERR, 2 .section .rodata .type keyword_equ, @object keyword_equ: .ascii ".equ" .equ KEYWORD_EQU_SIZE, 4 .type keyword_section, @object keyword_section: .ascii ".section" .equ KEYWORD_SECTION_SIZE, 8 .type keyword_type, @object keyword_type: .ascii ".type" .equ KEYWORD_TYPE_SIZE, 5 .type keyword_type_object, @object keyword_type_object: .ascii "object" .equ KEYWORD_TYPE_OBJECT_SIZE, 6 .type keyword_type_function, @object keyword_type_function: .ascii "function" .equ KEYWORD_TYPE_FUNCTION_SIZE, 8 .type keyword_ret, @object keyword_ret: .ascii "ret" .equ KEYWORD_RET_SIZE, 3 .type keyword_global, @object keyword_global: .ascii ".globl" .equ KEYWORD_GLOBAL_SIZE, 6 .type keyword_proc, @object keyword_proc: .ascii "proc " .equ KEYWORD_PROC_SIZE, 5 .type keyword_end, @object keyword_end: .ascii "end" .equ KEYWORD_END_SIZE, 3 .type keyword_begin, @object keyword_begin: .ascii "begin" .equ KEYWORD_BEGIN_SIZE, 5 .type keyword_var, @object keyword_var: .ascii "var" .equ KEYWORD_VAR_SIZE, 3 .type asm_prologue, @object asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" .type asm_epilogue, @object asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" .type asm_type_directive, @object asm_type_directive: .string ".type " .type asm_type_function, @object asm_type_function: .string ", @function\n" .type asm_colon, @object asm_colon: .string ":\n" .type asm_call, @object asm_call: .string "\tcall " .section .bss .type source_code, @object source_code: .zero SOURCE_BUFFER_SIZE .type source_code_position, @object source_code_position: .word 0 .section .text # Reads standard input into a buffer. # a0 - Buffer pointer. # a1 - Buffer size. # # Returns the amount of bytes written in a0. .type _read_file, @function _read_file: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 mv a2, a1 mv a1, a0 li a0, STDIN li a7, SYS_READ ecall # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret # Writes a character from a0 into the standard output. .type _write_c, @function _write_c: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 sb a0, 20(sp) li a0, STDOUT addi a1, sp, 20 li a2, 1 li a7, SYS_WRITE ecall # Epilogue. lw ra, 28(sp) lw s0, 24(sp) add sp, sp, 32 ret # Write null terminated string. # # Parameters: # a0 - String. .type _write_z, @function _write_z: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 sw a0, 20(sp) .write_z_loop: # Check for 0 character. lb a0, (a0) beqz a0, .write_z_end # Print a character. li a0, STDOUT lw a1, 20(sp) li a2, 1 li a7, SYS_WRITE ecall # Advance the input string by one byte. lw a0, 20(sp) addi a0, a0, 1 sw a0, 20(sp) j .write_z_loop .write_z_end: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret # Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. .type _is_upper, @function _is_upper: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 li t0, 'A' - 1 sltu t1, t0, a0 # t1 = a0 >= 'A' sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret # Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. .type _is_lower, @function _is_lower: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 li t0, 'a' - 1 sltu t2, t0, a0 # t2 = a0 >= 'a' sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret # Detects if the passed character is a 7-bit alpha character or an underscore. # # Paramters: # a0 - Tested character. # # Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. .type _is_alpha, @function _is_alpha: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 sw a0, 20(sp) call _is_upper sw a0, 16(sp) lw a0, 20(sp) call _is_lower lw t0, 20(sp) xori t1, t0, '_' seqz t1, t1 lw t0, 16(sp) or a0, a0, t0 or a0, a0, t1 # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret # Detects whether the passed character is a digit # (a value between 0 and 9). # # Parameters: # a0 - Exemined value. # # Sets a0 to 1 if it is a digit, to 0 otherwise. .type _is_digit, @function _is_digit: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 li t0, '0' - 1 sltu t1, t0, a0 # t1 = a0 >= '0' sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' and a0, t1, t2 # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret # Reads the next token. # # Returns token length in a0. .type _read_token, @function _read_token: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 la t0, source_code_position # Token pointer. lw t0, (t0) sw t0, 20(sp) # Current token position. sw zero, 16(sp) # Token length. .read_token_loop: lb t0, (t0) # Current character. # First we try to read a derictive. # A derictive can contain a dot and characters. li t1, '.' beq t0, t1, .read_token_next lw a0, 20(sp) lb a0, (a0) call _is_alpha bnez a0, .read_token_next lw a0, 20(sp) lb a0, (a0) call _is_digit bnez a0, .read_token_next j .read_token_end .read_token_next: # Advance the source code position and token length. lw t0, 16(sp) addi t0, t0, 1 sw t0, 16(sp) lw t0, 20(sp) addi t0, t0, 1 sw t0, 20(sp) j .read_token_loop .read_token_end: lw a0, 16(sp) # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret # a0 - First pointer. # a1 - Second pointer. # a2 - The length to compare. # # Returns 0 in a0 if memory regions are equal. .type _memcmp, @function _memcmp: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 mv t0, a0 li a0, 0 .Lmemcmp_loop: beqz a2, .Lmemcmp_end lbu t1, (t0) lbu t2, (a1) sub a0, t1, t2 bnez a0, .Lmemcmp_end addi t0, t0, 1 addi a1, a1, 1 addi a2, a2, -1 j .Lmemcmp_loop .Lmemcmp_end: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret # Advances the token stream by a0 bytes. .type _advance_token, @function _advance_token: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 # Skip the .equ directive. la t0, source_code_position lw t1, (t0) add t1, t1, a0 sw t1, (t0) # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret .type _compile_section, @function _compile_section: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 # Print the .section directive and a space after it. li a0, STDOUT la a1, source_code_position lw a1, (a1) li a2, KEYWORD_SECTION_SIZE + 1 li a7, SYS_WRITE ecall # Skip the .equ directive. li a0, KEYWORD_SECTION_SIZE + 1 call _advance_token # Read the section name. call _read_token sw a0, 16(sp) # Print the section name and newline. li a0, STDOUT la a1, source_code_position lw a1, (a1) lw a2, 16(sp) addi a2, a2, 1 li a7, SYS_WRITE ecall # Skip the section name. lw a0, 16(sp) addi a0, a0, 1 call _advance_token # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret # Prints and skips a line. .type _skip_comment, @function _skip_comment: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 la t0, source_code_position lw t1, (t0) .skip_comment_loop: # Check for newline character. lb t2, (t1) li t3, '\n' beq t2, t3, .skip_comment_end # Advance the input string by one byte. addi t1, t1, 1 sw t1, (t0) j .skip_comment_loop .skip_comment_end: # Skip the newline. addi t1, t1, 1 sw t1, (t0) # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret # Prints and skips a line. .type _compile_line, @function _compile_line: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 la a0, source_code_position lw a1, (a0) .compile_line_loop: # Check for newline character. lb t0, (a1) li t1, '\n' beq t0, t1, .compile_line_end # Print a character. li a0, STDOUT li a2, 1 li a7, SYS_WRITE ecall # Advance the input string by one byte. la a0, source_code_position lw a1, (a0) addi a1, a1, 1 sw a1, (a0) j .compile_line_loop .compile_line_end: # Print and skip the newline. li a0, STDOUT li a2, 1 li a7, SYS_WRITE ecall la a0, source_code_position lw a1, (a0) addi a1, a1, 1 sw a1, (a0) # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret .type _compile_object, @function _compile_object: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 call _compile_line # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret .type _compile_function_statements, @function _compile_function_statements: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 .compile_function_statements_loop: la t0, source_code_position lw t1, (t0) addi t1, t1, 1 # Skip the tab. mv a0, t1 la a1, keyword_ret li a2, KEYWORD_RET_SIZE call _memcmp beqz a0, .compile_function_statements_end call _compile_line j .compile_function_statements_loop .compile_function_statements_end: call _compile_line # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret .type _compile_call, @function _compile_call: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 call _read_token sw a0, 20(sp) la a0, asm_call call _write_z li a0, STDOUT la a1, source_code_position lw a1, (a1) lw a2, 20(sp) li a7, SYS_WRITE ecall # Skip parens, semicolon and newline. lw a0, 20(sp) addi a0, a0, 4 call _advance_token li a0, '\n' call _write_c # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret .type _compile_statement, @function _compile_statement: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 # This is a call if the statement starts with an underscore. la t0, source_code_position lw t0, (t0) # First character after alignment tab. addi t0, t0, 1 lb t0, (t0) li t1, '_' beq t0, t1, .compile_statement_call call _compile_line j .compile_statement_end .compile_statement_call: li a0, 1 call _advance_token call _compile_call j .compile_statement_end .compile_statement_end: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret .type _compile_procedure_body, @function _compile_procedure_body: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 .compile_procedure_body_loop: la a0, source_code_position lw a0, (a0) la a1, keyword_end li a2, KEYWORD_END_SIZE call _memcmp beqz a0, .compile_procedure_body_epilogue call _compile_statement j .compile_procedure_body_loop .compile_procedure_body_epilogue: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret .type _compile_procedure, @function _compile_procedure: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 # Skip "proc ". li a0, KEYWORD_PROC_SIZE call _advance_token call _read_token sw a0, 20(sp) # Save the procedure name length. # Write .type _procedure_name, @function. la a0, asm_type_directive call _write_z li a0, STDOUT la a1, source_code_position lw a1, (a1) lw a2, 20(sp) li a7, SYS_WRITE ecall la a0, asm_type_function call _write_z # Write procedure label, _procedure_name: li a0, STDOUT la a1, source_code_position lw a1, (a1) lw a2, 20(sp) li a7, SYS_WRITE ecall la a0, asm_colon call _write_z # Skip the function name and trailing parens, semicolon, "begin" and newline. lw a0, 20(sp) addi a0, a0, KEYWORD_BEGIN_SIZE + 1 + 4 call _advance_token la a0, asm_prologue call _write_z call _compile_procedure_body # Write the epilogue. la a0, asm_epilogue call _write_z li a0, KEYWORD_END_SIZE + 2 call _advance_token # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret .type _compile_function, @function _compile_function: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 # Write the function header. call _compile_line call _compile_function_statements # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret .type _compile_type, @function _compile_type: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 # Print the .type directive and a space after it. li a0, STDOUT la a1, source_code_position lw a1, (a1) li a2, KEYWORD_TYPE_SIZE + 1 li a7, SYS_WRITE ecall # Skip the .type directive. li a0, KEYWORD_TYPE_SIZE + 1 call _advance_token # Read and print the symbol name. call _read_token sw a0, 20(sp) # Print the symbol name, comma, space and @. li a0, STDOUT la a1, source_code_position lw a1, (a1) lw a2, 20(sp) addi a2, a2, 3 li a7, SYS_WRITE ecall # Skip the constant name, comma, space and @. lw a0, 20(sp) addi a0, a0, 3 call _advance_token # Read the symbol type. call _read_token sw a0, 16(sp) la t0, source_code_position lw t0, (t0) sw t0, 12(sp) # Print the symbol type and newline. li a0, STDOUT la a1, source_code_position lw a1, (a1) lw a2, 16(sp) addi a2, a2, 1 li a7, SYS_WRITE ecall lw a0, 16(sp) addi a0, a0, 1 call _advance_token lw a0, 12(sp) la a1, keyword_type_object li a2, KEYWORD_TYPE_OBJECT_SIZE call _memcmp beqz a0, .compile_type_object lw a0, 12(sp) la a1, keyword_type_function li a2, KEYWORD_TYPE_FUNCTION_SIZE call _memcmp beqz a0, .compile_type_function j .compile_type_end .compile_type_object: call _compile_object j .compile_type_end .compile_type_function: call _compile_function j .compile_type_end .compile_type_end: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret .type _compile_equ, @function _compile_equ: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 # Print the .equ directive and a space after it. li a0, STDOUT la a1, source_code_position lw a1, (a1) li a2, KEYWORD_EQU_SIZE + 1 li a7, SYS_WRITE ecall # Skip the .equ directive. li a0, KEYWORD_EQU_SIZE + 1 call _advance_token # Read and print the constant name. call _read_token sw a0, 20(sp) # Print the constant name, comma and space. li a0, STDOUT la a1, source_code_position lw a1, (a1) lw a2, 20(sp) addi a2, a2, 2 li a7, SYS_WRITE ecall # Skip the constant name, comma and the space after it. lw a0, 20(sp) addi a0, a0, 2 call _advance_token # Read the constant value. call _read_token sw a0, 16(sp) # Print the constant value and newline. li a0, STDOUT la a1, source_code_position lw a1, (a1) lw a2, 16(sp) addi a2, a2, 1 li a7, SYS_WRITE ecall lw a2, 16(sp) addi a2, a2, 1 call _advance_token # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret .type _skip_newlines, @function _skip_newlines: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 # Skip newlines. la t0, source_code_position lw t1, (t0) .skip_newlines_loop: lb t2, (t1) li t3, '\n' bne t2, t3, .skip_newlines_end beqz t2, .skip_newlines_end addi t1, t1, 1 sw t1, (t0) j .skip_newlines_loop .skip_newlines_end: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret # Process the source code and print the generated code. .type _compile, @function _compile: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 .compile_loop: call _skip_newlines la t0, source_code_position lw t0, (t0) lb t0, (t0) beqz t0, .compile_end li t1, '#' beq t0, t1, .compile_comment la a0, source_code_position lw a0, (a0) la a1, keyword_equ li a2, KEYWORD_EQU_SIZE call _memcmp beqz a0, .compile_equ la a0, source_code_position lw a0, (a0) la a1, keyword_section li a2, KEYWORD_SECTION_SIZE call _memcmp beqz a0, .compile_section la a0, source_code_position lw a0, (a0) la a1, keyword_type li a2, KEYWORD_TYPE_SIZE call _memcmp beqz a0, .compile_type la a0, source_code_position lw a0, (a0) la a1, keyword_proc li a2, KEYWORD_PROC_SIZE call _memcmp beqz a0, .compile_procedure la a0, source_code_position lw a0, (a0) la a1, keyword_global li a2, KEYWORD_GLOBAL_SIZE call _memcmp beqz a0, .compile_global j .compile_end # Not a known token, exit. .compile_equ: call _compile_equ j .compile_loop .compile_section: call _compile_section j .compile_loop .compile_type: call _compile_type j .compile_loop .compile_global: call _compile_line j .compile_loop .compile_comment: call _skip_comment j .compile_loop .compile_procedure: call _compile_procedure j .compile_loop .compile_end: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret # Entry point. .globl _start .type _start, @function _start: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 # Read the source from the standard input. la a0, source_code li a1, SOURCE_BUFFER_SIZE # Buffer size. call _read_file # Save the pointer to the beginning of the source code in a global variable. la t0, source_code la t1, source_code_position sw t0, (t1) call _compile # Call exit. li a0, 0 # Use 0 return code. li a7, SYS_EXIT ecall # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret