# s1 - Contains the current position in the source text. .data .equ SYS_READ, 63 .equ SYS_WRITE, 64 .equ SYS_EXIT, 93 .equ STDIN, 0 .equ STDOUT, 1 .equ STDERR, 2 .equ SOURCE_BUFFER_SIZE, 2048 asm_program: .ascii ".bss\n.type source_code, @object\n.size source_code, 2048\nsource_code: .zero 2048\n" .equ ASM_PROGRAM_SIZE, . - asm_program asm_begin: .ascii ".text\n.global _start\n_start:\n\taddi a0, zero, 0\n\tla a1, source_code\n\tli a2, 2048\n\taddi a7, zero, 63\n\tecall\n" .equ ASM_BEGIN_SIZE, . - asm_begin asm_end: .ascii "\taddi a0, zero, 0\n\taddi a7, zero, 93\n\tecall\n" .equ ASM_END_SIZE, . - asm_end token_begin: .string "begin" token_end: .string "end" token_import: .string "import" token_dot: .string "." token_comma: .string "," token_assign: .string ":=" .bss .type source_code, @object .size source_code, SOURCE_BUFFER_SIZE source_code: .zero SOURCE_BUFFER_SIZE .text .global _start # Program entry point. .type _skip_spaces, @function _skip_spaces: .Lspace_loop_do: lbu t0, (s1) # t0 = Current character. li t1, ' ' beq t0, t1, .Lspace_loop_repeat li t1, '\t' beq t0, t1, .Lspace_loop_repeat li t1, '\n' beq t0, t1, .Lspace_loop_repeat li t1, '\r' beq t0, t1, .Lspace_loop_repeat j .Lspace_loop_end .Lspace_loop_repeat: addi s1, s1, 1 j .Lspace_loop_do .Lspace_loop_end: ret # Compares two string, which of one has a length, the other one is null-terminated. # # a0 - The address of the token string. # a1 - The length of the string in a0. # a2 - The address of the null-terminated string. # # If the strings match sets a0 to 0, otherwise sets it to 1. .type _token_compare, @function _token_compare: addi t0, a0, 0 addi t1, a1, 0 addi t2, a2, 0 .Ltoken_compare_loop: lbu t3, (t2) # Will only be 0 if the current character in the null terminated string is \0 and the remaining length of the # another string is 0. or t4, t3, t1 beqz t4, .Ltoken_compare_equal beqz t1, .Ltoken_compare_not_equal beqz t3, .Ltoken_compare_not_equal lbu t4, (t0) bne t3, t4, .Ltoken_compare_not_equal addi t0, t0, 1 addi t1, t1, -1 addi t2, t2, 1 j .Ltoken_compare_loop .Ltoken_compare_not_equal: li a0, 1 j .Ltoken_compare_end .Ltoken_compare_equal: li a0, 0 .Ltoken_compare_end: ret # Detects if the passed character is a 7-bit alpha character or an underscore. # The character is passed in a0. # Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. .type _is_alpha, @function _is_alpha: li t0, 'A' - 1 sltu t1, t0, a0 # t1 = a0 >= 'A' sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' and t1, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' li t0, 'a' - 1 sltu t2, t0, a0 # t2 = a0 >= 'a' sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' and t2, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' xori t3, a0, '_' seqz t3, t3 or a0, t1, t2 or a0, a0, t3 ret .type _is_digit, @function _is_digit: li t0, '0' - 1 sltu t1, t0, a0 # t1 = a0 >= '0' sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' and a0, t1, t2 ret .type _is_alnum, @function _is_alnum: # Prologue. addi sp, sp, -16 sw ra, 12(sp) sw s0, 8(sp) addi s0, sp, 16 sw a0, 4(sp) call _is_alpha sw a0, 0(sp) lw a0, 4(sp) call _is_digit lw a1, 0(sp) or a0, a0, a1 # Epilogue. lw ra, 12(sp) lw s0, 8(sp) addi sp, sp, 16 ret # Reads a token and returns its length in a0. .type _read_token, @function _read_token: # Prologue. addi sp, sp, -16 sw ra, 12(sp) sw s0, 8(sp) addi s0, sp, 16 lbu t0, (s1) # t0 = Current character. sw zero, 4(sp) li t1, '.' beq t0, t1, .Ltoken_character_single li t1, ',' beq t0, t1, .Ltoken_character_single li t1, ':' beq t0, t1, .Ltoken_character_colon li t1, ';' beq t0, t1, .Ltoken_character_single li t1, '(' beq t0, t1, .Ltoken_character_single li t1, ')' beq t0, t1, .Ltoken_character_single li t1, '[' beq t0, t1, .Ltoken_character_single li t1, ']' beq t0, t1, .Ltoken_character_single .Ltoken_character_loop_do: # Expect an identifier or a number. lw t6, 4(sp) add t1, s1, t6 lbu a0, (t1) # a0 = Current character. call _is_alnum beqz a0, .Ltoken_character_end lw t6, 4(sp) addi t6, t6, 1 sw t6, 4(sp) j .Ltoken_character_loop_do .Ltoken_character_single: lw t6, 4(sp) addi t6, t6, 1 sw t6, 4(sp) j .Ltoken_character_end .Ltoken_character_colon: lbu t0, 1(s1) # t0 = The character after the colon. lw t6, 4(sp) addi t6, t6, 1 sw t6, 4(sp) li t1, '=' beq t0, t1, .Ltoken_character_single j .Ltoken_character_end .Ltoken_character_end: lw a0, 4(sp) # Epilogue. lw ra, 12(sp) lw s0, 8(sp) addi sp, sp, 16 ret # Generate entry point symbol. .type _compile_begin, @function _compile_begin: # Write initial assembler. addi a0, zero, STDOUT la a1, asm_begin addi a2, zero, ASM_BEGIN_SIZE addi a7, zero, SYS_WRITE ecall addi s1, s1, 5 ret # Generate program termination code. .type _compile_end, @function _compile_end: # Write closing assembler. addi a0, zero, STDOUT la a1, asm_end addi a2, zero, ASM_END_SIZE addi a7, zero, SYS_WRITE ecall addi s1, s1, 3 ret # Ignores the import. .type _compile_import, @function _compile_import: # Prologue. addi sp, sp, -8 sw ra, 4(sp) sw s0, 0(sp) addi s0, sp, 8 addi s1, s1, 6 call _skip_spaces call _read_token add s1, s1, a0 # Epilogue. lw ra, 4(sp) lw s0, 0(sp) addi sp, sp, 8 ret .type _compile_identifier, @function _compile_identifier: # Prologue. addi sp, sp, -16 sw ra, 12(sp) sw s0, 8(sp) addi s0, sp, 16 # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret .type _handle_token, @function _handle_token: # Prologue. addi sp, sp, -32 sw ra, 28(sp) sw s0, 24(sp) addi s0, sp, 32 sw a0, 20(sp) # Detect what token has been read. addi a0, s1, 0 lw a1, 20(sp) la a2, token_begin call _token_compare beqz a0, .Lhandle_token_begin addi a0, s1, 0 lw a1, 20(sp) la a2, token_end call _token_compare beqz a0, .Lhandle_token_end addi a0, s1, 0 lw a1, 20(sp) la a2, token_import call _token_compare beqz a0, .Lhandle_token_import # If the first symbol in the token is a character, assume and identifier. addi a0, s1, 0 lw a0, (a0) call _is_alpha bnez a0, .Lhandle_token_identifier # Ignore the unknown token. lw t0, 20(sp) add s1, s1, t0 j .Lhandle_token_return .Lhandle_token_begin: call _compile_begin j .Lhandle_token_return .Lhandle_token_end: call _compile_end j .Lhandle_token_return .Lhandle_token_import: call _compile_import j .Lhandle_token_return .Lhandle_token_identifier: addi a0, s1, 0 lw a1, 20(sp) call _compile_identifier j .Lhandle_token_return .Lhandle_token_return: # Epilogue. lw ra, 28(sp) lw s0, 24(sp) addi sp, sp, 32 ret .type _compile, @function _compile: # Prologue. addi sp, sp, -16 sw ra, 12(sp) sw s0, 8(sp) addi s0, sp, 16 la s1, source_code # s1 = Source code position. # Write .bss section header for global variables. addi a0, zero, STDOUT la a1, asm_program addi a2, zero, ASM_PROGRAM_SIZE addi a7, zero, SYS_WRITE ecall call _skip_spaces addi s1, s1, 7 # Skip "program" keyword. .Lcharacter_loop_do: call _skip_spaces lbu t0, (s1) # t0 = Current character. beqz t0, .Lcharacter_loop_end call _read_token sw a0, 4(sp) # Save the token length on the stack. beqz a0, .Lcharacter_loop_end # No token read, there is unrecognized input. # Write the current token to stderr. addi a0, zero, STDERR addi a1, s1, 0 lw a2, 4(sp) addi a7, zero, SYS_WRITE ecall lw a0, 4(sp) call _handle_token j .Lcharacter_loop_do .Lcharacter_loop_end: # Epilogue. lw ra, 12(sp) lw s0, 8(sp) addi sp, sp, 16 ret _start: # Read the source from the standard input. addi a0, zero, STDIN la a1, source_code li a2, SOURCE_BUFFER_SIZE # Buffer size. addi a7, zero, SYS_READ ecall call _compile # Call exit. addi a0, zero, 0 # Use 0 return code. addi a7, zero, SYS_EXIT ecall