421 lines
7.6 KiB
ArmAsm
421 lines
7.6 KiB
ArmAsm
# s1 - Contains the current position in the source text.
|
|
|
|
.data
|
|
|
|
.equ SYS_READ, 63
|
|
.equ SYS_WRITE, 64
|
|
.equ SYS_EXIT, 93
|
|
.equ STDIN, 0
|
|
.equ STDOUT, 1
|
|
.equ STDERR, 2
|
|
.equ SOURCE_BUFFER_SIZE, 2048
|
|
|
|
asm_program: .ascii ".bss\n.type source_code, @object\n.size source_code, 2048\nsource_code: .zero 2048\n"
|
|
.equ ASM_PROGRAM_SIZE, . - asm_program
|
|
asm_begin: .ascii ".text\n.global _start\n_start:\n\taddi a0, zero, 0\n\tla a1, source_code\n\tli a2, 2048\n\taddi a7, zero, 63\n\tecall\n"
|
|
.equ ASM_BEGIN_SIZE, . - asm_begin
|
|
asm_end: .ascii "\taddi a0, zero, 0\n\taddi a7, zero, 93\n\tecall\n"
|
|
.equ ASM_END_SIZE, . - asm_end
|
|
|
|
token_begin: .string "begin"
|
|
token_end: .string "end"
|
|
token_import: .string "import"
|
|
token_dot: .string "."
|
|
token_comma: .string ","
|
|
token_assign: .string ":="
|
|
|
|
.bss
|
|
.type source_code, @object
|
|
.size source_code, SOURCE_BUFFER_SIZE
|
|
source_code: .zero SOURCE_BUFFER_SIZE
|
|
|
|
.text
|
|
.global _start # Program entry point.
|
|
|
|
.type _skip_spaces, @function
|
|
_skip_spaces:
|
|
.Lspace_loop_do:
|
|
lbu t0, (s1) # t0 = Current character.
|
|
|
|
li t1, ' '
|
|
beq t0, t1, .Lspace_loop_repeat
|
|
li t1, '\t'
|
|
beq t0, t1, .Lspace_loop_repeat
|
|
li t1, '\n'
|
|
beq t0, t1, .Lspace_loop_repeat
|
|
li t1, '\r'
|
|
beq t0, t1, .Lspace_loop_repeat
|
|
|
|
j .Lspace_loop_end
|
|
.Lspace_loop_repeat:
|
|
addi s1, s1, 1
|
|
j .Lspace_loop_do
|
|
|
|
.Lspace_loop_end:
|
|
ret
|
|
|
|
# Compares two string, which of one has a length, the other one is null-terminated.
|
|
#
|
|
# a0 - The address of the token string.
|
|
# a1 - The length of the string in a0.
|
|
# a2 - The address of the null-terminated string.
|
|
#
|
|
# If the strings match sets a0 to 0, otherwise sets it to 1.
|
|
.type _token_compare, @function
|
|
_token_compare:
|
|
addi t0, a0, 0
|
|
addi t1, a1, 0
|
|
addi t2, a2, 0
|
|
|
|
.Ltoken_compare_loop:
|
|
lbu t3, (t2)
|
|
|
|
# Will only be 0 if the current character in the null terminated string is \0 and the remaining length of the
|
|
# another string is 0.
|
|
or t4, t3, t1
|
|
beqz t4, .Ltoken_compare_equal
|
|
|
|
beqz t1, .Ltoken_compare_not_equal
|
|
beqz t3, .Ltoken_compare_not_equal
|
|
|
|
lbu t4, (t0)
|
|
bne t3, t4, .Ltoken_compare_not_equal
|
|
|
|
addi t0, t0, 1
|
|
addi t1, t1, -1
|
|
addi t2, t2, 1
|
|
j .Ltoken_compare_loop
|
|
|
|
.Ltoken_compare_not_equal:
|
|
li a0, 1
|
|
j .Ltoken_compare_end
|
|
|
|
.Ltoken_compare_equal:
|
|
li a0, 0
|
|
|
|
.Ltoken_compare_end:
|
|
ret
|
|
|
|
# Detects if the passed character is a 7-bit alpha character or an underscore.
|
|
# The character is passed in a0.
|
|
# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
|
|
.type _is_alpha, @function
|
|
_is_alpha:
|
|
li t0, 'A' - 1
|
|
sltu t1, t0, a0 # t1 = a0 >= 'A'
|
|
|
|
sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
|
|
and t1, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
|
|
|
|
li t0, 'a' - 1
|
|
sltu t2, t0, a0 # t2 = a0 >= 'a'
|
|
|
|
sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
|
|
and t2, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
|
|
|
|
xori t3, a0, '_'
|
|
seqz t3, t3
|
|
|
|
or a0, t1, t2
|
|
or a0, a0, t3
|
|
ret
|
|
|
|
.type _is_digit, @function
|
|
_is_digit:
|
|
li t0, '0' - 1
|
|
sltu t1, t0, a0 # t1 = a0 >= '0'
|
|
|
|
sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
|
|
|
|
and a0, t1, t2
|
|
|
|
ret
|
|
|
|
.type _is_alnum, @function
|
|
_is_alnum:
|
|
# Prologue.
|
|
addi sp, sp, -16
|
|
sw ra, 12(sp)
|
|
sw s0, 8(sp)
|
|
addi s0, sp, 16
|
|
|
|
sw a0, 4(sp)
|
|
|
|
call _is_alpha
|
|
sw a0, 0(sp)
|
|
|
|
lw a0, 4(sp)
|
|
call _is_digit
|
|
|
|
lw a1, 0(sp)
|
|
or a0, a0, a1
|
|
|
|
# Epilogue.
|
|
lw ra, 12(sp)
|
|
lw s0, 8(sp)
|
|
addi sp, sp, 16
|
|
ret
|
|
|
|
# Reads a token and returns its length in a0.
|
|
.type _read_token, @function
|
|
_read_token:
|
|
# Prologue.
|
|
addi sp, sp, -16
|
|
sw ra, 12(sp)
|
|
sw s0, 8(sp)
|
|
addi s0, sp, 16
|
|
|
|
lbu t0, (s1) # t0 = Current character.
|
|
sw zero, 4(sp)
|
|
|
|
li t1, '.'
|
|
beq t0, t1, .Ltoken_character_single
|
|
|
|
li t1, ','
|
|
beq t0, t1, .Ltoken_character_single
|
|
|
|
li t1, ':'
|
|
beq t0, t1, .Ltoken_character_colon
|
|
|
|
li t1, ';'
|
|
beq t0, t1, .Ltoken_character_single
|
|
|
|
li t1, '('
|
|
beq t0, t1, .Ltoken_character_single
|
|
|
|
li t1, ')'
|
|
beq t0, t1, .Ltoken_character_single
|
|
|
|
li t1, '['
|
|
beq t0, t1, .Ltoken_character_single
|
|
|
|
li t1, ']'
|
|
beq t0, t1, .Ltoken_character_single
|
|
|
|
.Ltoken_character_loop_do: # Expect an identifier or a number.
|
|
lw t6, 4(sp)
|
|
add t1, s1, t6
|
|
lbu a0, (t1) # a0 = Current character.
|
|
|
|
call _is_alnum
|
|
|
|
beqz a0, .Ltoken_character_end
|
|
lw t6, 4(sp)
|
|
addi t6, t6, 1
|
|
sw t6, 4(sp)
|
|
j .Ltoken_character_loop_do
|
|
|
|
.Ltoken_character_single:
|
|
lw t6, 4(sp)
|
|
addi t6, t6, 1
|
|
sw t6, 4(sp)
|
|
j .Ltoken_character_end
|
|
|
|
.Ltoken_character_colon:
|
|
lbu t0, 1(s1) # t0 = The character after the colon.
|
|
lw t6, 4(sp)
|
|
addi t6, t6, 1
|
|
sw t6, 4(sp)
|
|
|
|
li t1, '='
|
|
beq t0, t1, .Ltoken_character_single
|
|
j .Ltoken_character_end
|
|
|
|
.Ltoken_character_end:
|
|
lw a0, 4(sp)
|
|
|
|
# Epilogue.
|
|
lw ra, 12(sp)
|
|
lw s0, 8(sp)
|
|
addi sp, sp, 16
|
|
ret
|
|
|
|
# Generate entry point symbol.
|
|
.type _compile_begin, @function
|
|
_compile_begin:
|
|
# Write initial assembler.
|
|
addi a0, zero, STDOUT
|
|
la a1, asm_begin
|
|
addi a2, zero, ASM_BEGIN_SIZE
|
|
addi a7, zero, SYS_WRITE
|
|
ecall
|
|
|
|
addi s1, s1, 5
|
|
ret
|
|
|
|
# Generate program termination code.
|
|
.type _compile_end, @function
|
|
_compile_end:
|
|
# Write closing assembler.
|
|
addi a0, zero, STDOUT
|
|
la a1, asm_end
|
|
addi a2, zero, ASM_END_SIZE
|
|
addi a7, zero, SYS_WRITE
|
|
ecall
|
|
|
|
addi s1, s1, 3
|
|
ret
|
|
|
|
# Ignores the import.
|
|
.type _compile_import, @function
|
|
_compile_import:
|
|
# Prologue.
|
|
addi sp, sp, -8
|
|
sw ra, 4(sp)
|
|
sw s0, 0(sp)
|
|
addi s0, sp, 8
|
|
|
|
addi s1, s1, 6
|
|
call _skip_spaces
|
|
call _read_token
|
|
add s1, s1, a0
|
|
|
|
# Epilogue.
|
|
lw ra, 4(sp)
|
|
lw s0, 0(sp)
|
|
addi sp, sp, 8
|
|
ret
|
|
|
|
.type _compile_identifier, @function
|
|
_compile_identifier:
|
|
# Prologue.
|
|
addi sp, sp, -16
|
|
sw ra, 12(sp)
|
|
sw s0, 8(sp)
|
|
addi s0, sp, 16
|
|
|
|
# Epilogue.
|
|
lw ra, 28(sp)
|
|
lw s0, 24(sp)
|
|
addi sp, sp, 32
|
|
ret
|
|
|
|
.type _handle_token, @function
|
|
_handle_token:
|
|
# Prologue.
|
|
addi sp, sp, -32
|
|
sw ra, 28(sp)
|
|
sw s0, 24(sp)
|
|
addi s0, sp, 32
|
|
|
|
sw a0, 20(sp)
|
|
|
|
# Detect what token has been read.
|
|
addi a0, s1, 0
|
|
lw a1, 20(sp)
|
|
la a2, token_begin
|
|
call _token_compare
|
|
beqz a0, .Lhandle_token_begin
|
|
|
|
addi a0, s1, 0
|
|
lw a1, 20(sp)
|
|
la a2, token_end
|
|
call _token_compare
|
|
beqz a0, .Lhandle_token_end
|
|
|
|
addi a0, s1, 0
|
|
lw a1, 20(sp)
|
|
la a2, token_import
|
|
call _token_compare
|
|
beqz a0, .Lhandle_token_import
|
|
|
|
# If the first symbol in the token is a character, assume and identifier.
|
|
addi a0, s1, 0
|
|
lw a0, (a0)
|
|
call _is_alpha
|
|
bnez a0, .Lhandle_token_identifier
|
|
|
|
# Ignore the unknown token.
|
|
lw t0, 20(sp)
|
|
add s1, s1, t0
|
|
j .Lhandle_token_return
|
|
|
|
.Lhandle_token_begin:
|
|
call _compile_begin
|
|
j .Lhandle_token_return
|
|
|
|
.Lhandle_token_end:
|
|
call _compile_end
|
|
j .Lhandle_token_return
|
|
|
|
.Lhandle_token_import:
|
|
call _compile_import
|
|
j .Lhandle_token_return
|
|
|
|
.Lhandle_token_identifier:
|
|
addi a0, s1, 0
|
|
lw a1, 20(sp)
|
|
call _compile_identifier
|
|
j .Lhandle_token_return
|
|
|
|
.Lhandle_token_return:
|
|
# Epilogue.
|
|
lw ra, 28(sp)
|
|
lw s0, 24(sp)
|
|
addi sp, sp, 32
|
|
ret
|
|
|
|
.type _compile, @function
|
|
_compile:
|
|
# Prologue.
|
|
addi sp, sp, -16
|
|
sw ra, 12(sp)
|
|
sw s0, 8(sp)
|
|
addi s0, sp, 16
|
|
|
|
la s1, source_code # s1 = Source code position.
|
|
|
|
# Write .bss section header for global variables.
|
|
addi a0, zero, STDOUT
|
|
la a1, asm_program
|
|
addi a2, zero, ASM_PROGRAM_SIZE
|
|
addi a7, zero, SYS_WRITE
|
|
ecall
|
|
|
|
call _skip_spaces
|
|
addi s1, s1, 7 # Skip "program" keyword.
|
|
|
|
.Lcharacter_loop_do:
|
|
call _skip_spaces
|
|
|
|
lbu t0, (s1) # t0 = Current character.
|
|
beqz t0, .Lcharacter_loop_end
|
|
|
|
call _read_token
|
|
sw a0, 4(sp) # Save the token length on the stack.
|
|
beqz a0, .Lcharacter_loop_end # No token read, there is unrecognized input.
|
|
|
|
# Write the current token to stderr.
|
|
addi a0, zero, STDERR
|
|
addi a1, s1, 0
|
|
lw a2, 4(sp)
|
|
addi a7, zero, SYS_WRITE
|
|
ecall
|
|
|
|
lw a0, 4(sp)
|
|
call _handle_token
|
|
|
|
j .Lcharacter_loop_do
|
|
.Lcharacter_loop_end:
|
|
|
|
# Epilogue.
|
|
lw ra, 12(sp)
|
|
lw s0, 8(sp)
|
|
addi sp, sp, 16
|
|
ret
|
|
|
|
_start:
|
|
# Read the source from the standard input.
|
|
addi a0, zero, STDIN
|
|
la a1, source_code
|
|
li a2, SOURCE_BUFFER_SIZE # Buffer size.
|
|
addi a7, zero, SYS_READ
|
|
ecall
|
|
|
|
call _compile
|
|
|
|
# Call exit.
|
|
addi a0, zero, 0 # Use 0 return code.
|
|
addi a7, zero, SYS_EXIT
|
|
ecall
|