# s1 - Contains the current position in the source text. .global _start # Program entry point. .section .rodata .equ SOURCE_BUFFER_SIZE, 2048 asm_begin: .ascii ".text\n.global _start\n_start:\naddi sp, sp, -64\nsw ra, 60(sp)\nsw s0, 56(sp)\naddi s0, sp, 64\n" .equ ASM_BEGIN_SIZE, . - asm_begin asm_end: .ascii "addi a0, zero, 0\naddi a7, zero, 93\necall\nlw ra, 60(sp)\nlw s0, 56(sp)\naddi sp, sp, 64\nret\n" .equ ASM_END_SIZE, . - asm_end asm_program: .ascii ".bss\n" .equ ASM_PROGRAM_SIZE, . - asm_program asm_type: .ascii ".type " .equ ASM_TYPE_SIZE, . - asm_type asm_object: .ascii ", @object\n" .equ ASM_OBJECT_SIZE, . - asm_object asm_size: .ascii ".size " .equ ASM_SIZE_SIZE, . - asm_size asm_zero: .ascii ".zero " .equ ASM_ZERO_SIZE, . - asm_zero asm_global: .ascii ".global " .equ ASM_GLOBAL_SIZE, . - asm_global token_begin: .string "begin" token_end: .string "end" token_import: .string "import" token_open_paren: .string "(" token_close_paren: .string ")" token_open_square: .string "[" token_assign: .string ":=" token_var: .string "var" token_comma: .string "," space: .ascii " " comma: .ascii "," new_line: .ascii "\n" colon: .ascii ":" digit_zero: .ascii "0" instruction_la: .ascii "la" instruction_call: .ascii "call" instruction_addi: .ascii "addi" instruction_li: .ascii "li" instruction_sw: .ascii "sw" instruction_lw: .ascii "lw" register_a0: .ascii "a0" register_sp: .ascii "sp" register_a: .ascii "a" .section .bss .global source_code .type source_code, @object .size source_code, SOURCE_BUFFER_SIZE source_code: .zero SOURCE_BUFFER_SIZE .section .text .type _skip_spaces, @function _skip_spaces: .Lspace_loop_do: lbu t0, (s1) # t0 = Current character. li t1, ' ' beq t0, t1, .Lspace_loop_repeat li t1, '\t' beq t0, t1, .Lspace_loop_repeat li t1, '\n' beq t0, t1, .Lspace_loop_repeat li t1, '\r' beq t0, t1, .Lspace_loop_repeat j .Lspace_loop_end .Lspace_loop_repeat: addi s1, s1, 1 j .Lspace_loop_do .Lspace_loop_end: ret # Compares two string, which of one has a length, the other one is null-terminated. # # a0 - The address of the token string. # a1 - The length of the string in a0. # a2 - The address of the null-terminated string. # # If the strings match sets a0 to 0, otherwise sets it to 1. .type _token_compare, @function _token_compare: addi t0, a0, 0 addi t1, a1, 0 addi t2, a2, 0 .Ltoken_compare_loop: lbu t3, (t2) # Will only be 0 if the current character in the null terminated string is \0 and the remaining length of the # another string is 0. or t4, t3, t1 beqz t4, .Ltoken_compare_equal beqz t1, .Ltoken_compare_not_equal beqz t3, .Ltoken_compare_not_equal lbu t4, (t0) bne t3, t4, .Ltoken_compare_not_equal addi t0, t0, 1 addi t1, t1, -1 addi t2, t2, 1 j .Ltoken_compare_loop .Ltoken_compare_not_equal: li a0, 1 j .Ltoken_compare_end .Ltoken_compare_equal: li a0, 0 .Ltoken_compare_end: ret # Reads a token and returns its length in a0. # _read_token doesn't change s1, it finds the length of the token s1 is pointing to. .type _read_token, @function _read_token: # Prologue. addi sp, sp, -16 sw ra, 12(sp) sw s0, 8(sp) addi s0, sp, 16 lbu t0, (s1) # t0 = Current character. sw zero, 4(sp) li t1, '.' beq t0, t1, .Ltoken_character_single li t1, ',' beq t0, t1, .Ltoken_character_single li t1, ':' beq t0, t1, .Ltoken_character_colon li t1, ';' beq t0, t1, .Ltoken_character_single li t1, '(' beq t0, t1, .Ltoken_character_single li t1, ')' beq t0, t1, .Ltoken_character_single li t1, '[' beq t0, t1, .Ltoken_character_single li t1, ']' beq t0, t1, .Ltoken_character_single .Ltoken_character_loop_do: # Expect an identifier or a number. lw t6, 4(sp) add t1, s1, t6 lbu a0, (t1) # a0 = Current character. call is_alnum beqz a0, .Ltoken_character_end lw t6, 4(sp) addi t6, t6, 1 sw t6, 4(sp) j .Ltoken_character_loop_do .Ltoken_character_single: lw t6, 4(sp) addi t6, t6, 1 sw t6, 4(sp) j .Ltoken_character_end .Ltoken_character_colon: lbu t0, 1(s1) # t0 = The character after the colon. lw t6, 4(sp) addi t6, t6, 1 sw t6, 4(sp) li t1, '=' beq t0, t1, .Ltoken_character_single j .Ltoken_character_end .Ltoken_character_end: lw a0, 4(sp) # Epilogue. lw ra, 12(sp) lw s0, 8(sp) addi sp, sp, 16 ret # Generate entry point symbol. .type _compile_begin, @function _compile_begin: # Prologue. addi sp, sp, -8 sw ra, 4(sp) sw s0, 0(sp) addi s0, sp, 8 # Write initial assembler. la a0, asm_begin addi a1, zero, ASM_BEGIN_SIZE call write_out addi s1, s1, 5 # Epilogue. lw ra, 4(sp) lw s0, 0(sp) addi sp, sp, 8 ret # Generate program termination code. .type _compile_end, @function _compile_end: # Prologue. addi sp, sp, -8 sw ra, 4(sp) sw s0, 0(sp) addi s0, sp, 8 # Write closing assembler. la a0, asm_end addi a1, zero, ASM_END_SIZE call write_out addi s1, s1, 3 # Epilogue. lw ra, 4(sp) lw s0, 0(sp) addi sp, sp, 8 ret # Ignores the import. .type _compile_import, @function _compile_import: # Prologue. addi sp, sp, -8 sw ra, 4(sp) sw s0, 0(sp) addi s0, sp, 8 addi s1, s1, 6 call _skip_spaces call _read_token add s1, s1, a0 # Skip the imported module name. # Epilogue. lw ra, 4(sp) lw s0, 0(sp) addi sp, sp, 8 ret # Compiles a procedure call. Expects s1 to point to the first argument. # a0 - Pointer to the procedure name. # a1 - Length of the procedure name. # # Returns the procedure result in a0. .type _compile_call, @function _compile_call: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 sw a0, 20(sp) sw a1, 16(sp) sw zero, 12(sp) # Argument count for a procedure call. .Lcompile_call_paren: call _skip_spaces call _read_token addi a1, a0, 0 addi a0, s1, 0 la a2, token_close_paren call _token_compare beqz a0, .Lcompile_call_complete .Lcompile_call_argument: call _build_expression la a0, instruction_sw li a1, 2 call write_out la a0, space li a1, 1 call write_out la a0, register_a0 li a1, 2 call write_out la a0, comma li a1, 1 call write_out lw t0, 12(sp) # Argument count for a procedure call. # Only 8 arguments are supported with a0-a7. # Save all arguments on the stack so they aren't overriden afterwards. # The offset on the stack always has two digits in this case. li t1, -4 mul t1, t0, t1 addi t1, t1, 52 li t2, 10 div t3, t1, t2 rem t4, t1, t2 addi t3, t3, '0' addi t4, t4, '0' sw t3, 8(sp) sw t4, 4(sp) addi a0, sp, 8 li a1, 1 call write_out addi a0, sp, 4 li a1, 1 call write_out la a0, token_open_paren li a1, 1 call write_out la a0, register_sp li a1, 2 call write_out la a0, token_close_paren li a1, 1 call write_out la a0, new_line li a1, 1 call write_out call _skip_spaces call _read_token addi a1, a0, 0 addi a0, s1, 0 la a2, token_comma call _token_compare /* DEBUG. Write the current token to stderr. addi a0, zero, STDERR addi a1, s1, 0 li a2, 4 #(sp) addi a7, zero, SYS_WRITE ecall addi a0, zero, STDERR la a1, token_open_square li a2, 1 addi a7, zero, SYS_WRITE ecall DEBUG. End */ bnez a0, .Lcompile_call_paren lw t0, 12(sp) # Argument count for a procedure call. addi t0, t0, 1 sw t0, 12(sp) addi s1, s1, 1 # Skip the comma between the arguments. j .Lcompile_call_argument .Lcompile_call_complete: sw zero, 12(sp) .Lcompile_call_restore: # Just go through all a0-a7 registers and read them from stack. # If this stack value contains garbage, the procedure just shouldn't use it. lw t0, 12(sp) li t1, 7 bgt t0, t1, .Lcompile_call_perform la a0, instruction_lw li a1, 2 call write_out la a0, space li a1, 1 call write_out la a0, register_a li a1, 1 call write_out lw t0, 12(sp) addi t0, t0, '0' sw t0, 8(sp) addi a0, sp, 8 li a1, 1 call write_out la a0, comma li a1, 1 call write_out lw t0, 12(sp) # Argument count for a procedure call. li t1, -4 mul t1, t0, t1 addi t1, t1, 52 li t2, 10 div t3, t1, t2 rem t4, t1, t2 addi t3, t3, '0' addi t4, t4, '0' sw t3, 8(sp) sw t4, 4(sp) addi a0, sp, 8 li a1, 1 call write_out addi a0, sp, 4 li a1, 1 call write_out la a0, token_open_paren li a1, 1 call write_out la a0, register_sp li a1, 2 call write_out la a0, token_close_paren li a1, 1 call write_out la a0, new_line li a1, 1 call write_out lw t0, 12(sp) # Increment. addi t0, t0, 1 sw t0, 12(sp) j .Lcompile_call_restore .Lcompile_call_perform: la a0, instruction_call li a1, 4 call write_out la a0, space li a1, 1 call write_out lw a0, 20(sp) lw a1, 16(sp) call write_out la a0, new_line li a1, 1 call write_out call _skip_spaces addi s1, s1, 1 # Skip the close paren. # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret .type _compile, @function compile: # Prologue. addi sp, sp, -16 sw ra, 12(sp) sw s0, 8(sp) addi s0, sp, 16 # Write .bss section header for global variables. la a0, asm_program addi a1, zero, ASM_PROGRAM_SIZE call write_out call _skip_spaces addi s1, s1, 7 # Skip "program" keyword. .Lcharacter_loop_do: call _skip_spaces lbu t0, (s1) # t0 = Current character. beqz t0, .Lcharacter_loop_end call _read_token sw a0, 4(sp) # Save the token length on the stack. beqz a0, .Lcharacter_loop_end # No token read, there is unrecognized input. lw a0, 4(sp) call _handle_token j .Lcharacter_loop_do .Lcharacter_loop_end: # Epilogue. lw ra, 12(sp) lw s0, 8(sp) addi sp, sp, 16 ret # Evalutes an expression and saves the result in a0. .type _build_expression, @function _build_expression: # Prologue. addi sp, sp, -16 sw ra, 12(sp) sw s0, 8(sp) addi s0, sp, 16 call _skip_spaces call _read_token sw s1, 4(sp) sw a0, 0(sp) # Integer literal. addi a0, s1, 0 lb a0, (a0) call is_digit bnez a0, .Lbuild_expression_number_literal # Named identifier. la a0, instruction_la li a1, 2 call write_out la a0, space li a1, 1 call write_out la a0, register_a0 li a1, 2 call write_out la a0, comma li a1, 1 call write_out lw a0, 4(sp) lw a1, 0(sp) call write_out la a0, new_line li a1, 1 call write_out j .Lbuild_expression_end .Lbuild_expression_number_literal: la a0, instruction_li li a1, 2 call write_out la a0, space li a1, 1 call write_out la a0, register_a0 li a1, 2 call write_out la a0, comma li a1, 1 call write_out lw a0, 4(sp) lw a1, 0(sp) call write_out la a0, new_line li a1, 1 call write_out j .Lbuild_expression_end .Lbuild_expression_end: lw a0, 0(sp) add s1, s1, a0 # Epilogue. lw ra, 12(sp) lw s0, 8(sp) addi sp, sp, 16 ret # Compiles a statement beginning with an identifier. .type _compile_identifier, @function _compile_identifier: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 # Save the pointer to the identifier and its length on the stack. sw a0, 20(sp) sw a1, 16(sp) add s1, s1, a1 call _skip_spaces call _read_token # Save the pointer and the length of the token following the identifier. sw s1, 12(sp) sw a0, 8(sp) add s1, s1, a0 # Skip that token. call _skip_spaces lw a0, 12(sp) lw a1, 8(sp) la a2, token_assign call _token_compare beqz a0, .Lcompile_identifier_assign lw a0, 12(sp) lw a1, 8(sp) la a2, token_open_paren call _token_compare beqz a0, .Lcompile_identifier_call j .Lcompile_identifier_end .Lcompile_identifier_call: lw a0, 20(sp) lw a1, 16(sp) call _compile_call j .Lcompile_identifier_end .Lcompile_identifier_assign: call _build_expression la a0, instruction_addi li a1, 4 call write_out la a0, space li a1, 1 call write_out lw a0, 20(sp) lw a1, 16(sp) call write_out la a0, comma li a1, 1 call write_out la a0, register_a0 li a1, 2 call write_out la a0, comma li a1, 1 call write_out la a0, digit_zero li a1, 1 call write_out la a0, new_line li a1, 1 call write_out j .Lcompile_identifier_end .Lcompile_identifier_end: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret .type _compile_var, @function _compile_var: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 # Variable name. addi s1, s1, 3 call _skip_spaces call _read_token sw s1, 20(sp) sw a0, 16(sp) add s1, s1, a0 # Skip the colon. call _skip_spaces call _read_token add s1, s1, a0 call _skip_spaces call _read_token sw a0, 12(sp) addi a0, s1, 0 lw a1, 12(sp) la a2, token_open_square call _token_compare beqz a0, .Lcompile_var_array j .Lcompile_var_end .Lcompile_var_array: call _skip_spaces add s1, s1, 1 # Skip the opening square bracket. call _skip_spaces call _read_token sw a0, 8(sp) la a0, asm_type li a1, ASM_TYPE_SIZE call write_out lw a0, 20(sp) lw a1, 16(sp) call write_out la a0, asm_object li a1, ASM_OBJECT_SIZE call write_out la a0, asm_size li a1, ASM_SIZE_SIZE call write_out lw a0, 20(sp) lw a1, 16(sp) call write_out la a0, comma li a1, 1 call write_out addi a0, s1, 0 lw a1, 8(sp) call write_out la a0, new_line li a1, 1 call write_out lw a0, 20(sp) lw a1, 16(sp) call write_out la a0, colon li a1, 1 call write_out la a0, asm_zero li a1, ASM_ZERO_SIZE call write_out addi a0, s1, 0 lw a1, 8(sp) call write_out la a0, new_line li a1, 1 call write_out la a0, asm_global li a1, ASM_GLOBAL_SIZE call write_out lw a0, 20(sp) lw a1, 16(sp) call write_out la a0, new_line li a1, 1 call write_out lw a0, 8(sp) add s1, s1, a0 call _skip_spaces add s1, s1, 1 # Skip the closing square bracket. call _skip_spaces call _read_token sw a0, 12(sp) j .Lcompile_var_end .Lcompile_var_end: lw a0, 12(sp) add s1, s1, a0 # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret .type _handle_token, @function _handle_token: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 sw a0, 20(sp) # Detect what token has been read. addi a0, s1, 0 lw a1, 20(sp) la a2, token_begin call _token_compare beqz a0, .Lhandle_token_begin addi a0, s1, 0 lw a1, 20(sp) la a2, token_end call _token_compare beqz a0, .Lhandle_token_end addi a0, s1, 0 lw a1, 20(sp) la a2, token_import call _token_compare beqz a0, .Lhandle_token_import addi a0, s1, 0 lw a1, 20(sp) la a2, token_var call _token_compare beqz a0, .Lhandle_token_var # If the first symbol in the token is a character, assume an identifier. addi a0, s1, 0 lb a0, (a0) call is_alpha bnez a0, .Lhandle_token_identifier # Ignore the unknown token. lw t0, 20(sp) add s1, s1, t0 j .Lhandle_token_return .Lhandle_token_begin: call _compile_begin j .Lhandle_token_return .Lhandle_token_end: call _compile_end j .Lhandle_token_return .Lhandle_token_import: call _compile_import j .Lhandle_token_return .Lhandle_token_var: call _compile_var j .Lhandle_token_return .Lhandle_token_identifier: addi a0, s1, 0 lw a1, 20(sp) call _compile_identifier j .Lhandle_token_return .Lhandle_token_return: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret _start: # Read the source from the standard input. la a0, source_code li a1, SOURCE_BUFFER_SIZE # Buffer size. call read_file la s1, source_code # s1 = Source code position. call compile # Call exit. li a0, 0 # Use 0 return code. call exit