From 5aaf9ded3646f5312e26fc7b3502f141c2e556f5 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Tue, 22 Apr 2025 22:51:11 +0200 Subject: [PATCH] Support global variables and constants --- boot/asm-boot.s | 13 - boot/common-boot.s | 108 +++++++- boot/echo-boot.s | 656 +++++++++++++++++++++++++++++++++++++++++++- boot/stage2.elna | 666 +++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 1386 insertions(+), 57 deletions(-) diff --git a/boot/asm-boot.s b/boot/asm-boot.s index 80167a2..fca0d3f 100644 --- a/boot/asm-boot.s +++ b/boot/asm-boot.s @@ -354,19 +354,6 @@ _compile_call: la a2, token_comma call _token_compare - /* DEBUG. Write the current token to stderr. - addi a0, zero, STDERR - addi a1, s1, 0 - li a2, 4 #(sp) - addi a7, zero, SYS_WRITE - ecall - addi a0, zero, STDERR - la a1, token_open_square - li a2, 1 - addi a7, zero, SYS_WRITE - ecall - DEBUG. End */ - bnez a0, .Lcompile_call_paren lw t0, 12(sp) # Argument count for a procedure call. diff --git a/boot/common-boot.s b/boot/common-boot.s index bfd1dbb..2228a57 100644 --- a/boot/common-boot.s +++ b/boot/common-boot.s @@ -1,4 +1,4 @@ -.global is_alpha, is_digit, is_alnum, write_out, read_file, exit +.global is_alpha, is_digit, is_alnum, is_upper, is_lower, write_out, read_file, exit, memcmp, write_error .section .rodata @@ -9,30 +9,112 @@ .equ STDOUT, 1 .equ STDERR, 2 +new_line: .ascii "\n" + .section .text +# Write the current token to stderr. +# a0 - String pointer. +# a1 - String length. +.type write_error, @function +write_error: + mv t0, a0 + mv t1, a1 + + li a0, STDERR + mv a1, t0 + mv a2, t1 + li a7, SYS_WRITE + ecall + + li a0, STDERR + la a1, new_line + li a2, 1 + li a7, SYS_WRITE + ecall + + ret + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +.type memcmp, @function +memcmp: + mv t0, a0 + li a0, 0 + +.Lmemcmp_loop: + beqz a2, .Lmemcmp_end + + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 + + bnez a0, .Lmemcmp_end + + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + j .Lmemcmp_loop + +.Lmemcmp_end: + ret + +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +is_upper: + li t0, 'A' - 1 + sltu t1, t0, a0 # t1 = a0 >= 'A' + + sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' + and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' + + ret + +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +.type is_lower, @function +is_lower: + li t0, 'a' - 1 + sltu t2, t0, a0 # t2 = a0 >= 'a' + + sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' + and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' + + ret + # Detects if the passed character is a 7-bit alpha character or an underscore. # The character is passed in a0. # Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. .type is_alpha, @function is_alpha: - li t0, 'A' - 1 - sltu t1, t0, a0 # t1 = a0 >= 'A' + # Prologue. + addi sp, sp, -16 + sw ra, 12(sp) + sw s0, 8(sp) + addi s0, sp, 16 - sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' - and t1, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' + sw a0, 4(sp) - li t0, 'a' - 1 - sltu t2, t0, a0 # t2 = a0 >= 'a' + call is_upper + sw a0, 0(sp) - sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' - and t2, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' + lw a0, 4(sp) + call is_lower - xori t3, a0, '_' - seqz t3, t3 + lw t0, 4(sp) + xori t1, t0, '_' + seqz t1, t1 - or a0, t1, t2 - or a0, a0, t3 + lw t0, 0(sp) + or a0, a0, t0 + or a0, a0, t1 + + # Epilogue. + lw ra, 12(sp) + lw s0, 8(sp) + addi sp, sp, 16 ret .type is_digit, @function diff --git a/boot/echo-boot.s b/boot/echo-boot.s index b6e65f7..a8faecb 100644 --- a/boot/echo-boot.s +++ b/boot/echo-boot.s @@ -1,35 +1,665 @@ -.global _start, source_code +.global _start +# s1 - Contains the current position in the source text. .section .rodata -.type SOURCE_BUFFER_SIZE, @object -.size SOURCE_BUFFER_SIZE, 4 -SOURCE_BUFFER_SIZE: .long 4096 +SOURCE_BUFFER_SIZE: .long 20480 .section .bss .type source_code, @object -.size source_code, 4096 -source_code: .zero 4096 +.size source_code, 20480 +source_code: .zero 20480 .section .text -_compile: +# Reads a token and returns its length in a0. +# _read_token doesn't change s1, it finds the length of the token s1 is pointing to. +.type _read_token, @function +_read_token: + # Prologue. + addi sp, sp, -16 + sw ra, 12(sp) + sw s0, 8(sp) + addi s0, sp, 16 + + lbu t0, (s1) # t0 = Current character. + sw zero, 4(sp) + + li t1, '.' + beq t0, t1, .Ltoken_character_single + + li t1, ',' + beq t0, t1, .Ltoken_character_single + + li t1, ':' + beq t0, t1, .Ltoken_character_colon + + li t1, ';' + beq t0, t1, .Ltoken_character_single + + li t1, '(' + beq t0, t1, .Ltoken_character_single + + li t1, ')' + beq t0, t1, .Ltoken_character_single + + li t1, '[' + beq t0, t1, .Ltoken_character_single + + li t1, ']' + beq t0, t1, .Ltoken_character_single + +.Ltoken_character_loop_do: # Expect an identifier or a number. + lw t6, 4(sp) + add t1, s1, t6 + lbu a0, (t1) # a0 = Current character. + + call is_alnum + + beqz a0, .Ltoken_character_end + lw t6, 4(sp) + addi t6, t6, 1 + sw t6, 4(sp) + j .Ltoken_character_loop_do + +.Ltoken_character_single: + lw t6, 4(sp) + addi t6, t6, 1 + sw t6, 4(sp) + j .Ltoken_character_end + +.Ltoken_character_colon: + lbu t0, 1(s1) # t0 = The character after the colon. + lw t6, 4(sp) + addi t6, t6, 1 + sw t6, 4(sp) + + li t1, '=' + beq t0, t1, .Ltoken_character_single + j .Ltoken_character_end + +.Ltoken_character_end: + lw a0, 4(sp) + + # Epilogue. + lw ra, 12(sp) + lw s0, 8(sp) + addi sp, sp, 16 ret +# Skips the spaces till the next non space character. +.type _skip_spaces, @function +_skip_spaces: +.Lspace_loop_do: + lbu t0, (s1) # t0 = Current character. + + li t1, ' ' + beq t0, t1, .Lspace_loop_repeat + li t1, '\t' + beq t0, t1, .Lspace_loop_repeat + li t1, '\n' + beq t0, t1, .Lspace_loop_repeat + li t1, '\r' + beq t0, t1, .Lspace_loop_repeat + + j .Lspace_loop_end +.Lspace_loop_repeat: + addi s1, s1, 1 + j .Lspace_loop_do + +.Lspace_loop_end: + ret + +# Skips tabs at the line beginning. +.type _skip_indentation, @function +_skip_indentation: +.Lskip_indentation_do: + lbu t0, (s1) + + li t1, '\t' + beq t0, t1, .Lskip_indentation_skip + + j .Lskip_indentation_end + +.Lskip_indentation_skip: + addi s1, s1, 1 + j .Lskip_indentation_do + +.Lskip_indentation_end: + ret + +# Parameters: +# a0 - Line length. +.type _skip_comment, @function +_skip_comment: + add s1, s1, a0 + addi s1, s1, 1 # Skip the new line. + ret + +# Parameters: +# a0 - Line length. +.type _compile_assembly, @function +_compile_assembly: + # Prologue. + addi sp, sp, -16 + sw ra, 12(sp) + sw s0, 8(sp) + addi s0, sp, 16 + + sw a0, 4(sp) # a0 - Line length. + + # Write the source to the standard output. + mv a0, s1 + lw a1, 4(sp) + call write_out + + lw t0, 4(sp) + add s1, s1, t0 + + li t0, '\n' + sb t0, 0(sp) + addi a0, sp, 0 + li a1, 1 + call write_out + + addi s1, s1, 1 # Skip the new line. + + # Epilogue. + lw ra, 12(sp) + lw s0, 8(sp) + addi sp, sp, 16 + ret + +.type _compile_program, @function +_compile_program: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # .global _start + li t0, 0x0a7472 # rt\n + sw t0, 20(sp) + li t0, 0x6174735f # _sta + sw t0, 16(sp) + li t0, 0x206c6162 # bal_ + sw t0, 12(sp) + li t0, 0x6f6c672e # .glo + sw t0, 8(sp) + + addi a0, sp, 8 + li a1, 16 + call write_out + + addi s1, s1, 8 # program\n. + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_constant_section, @function +_compile_constant_section: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # .section .rodata + li t0, 0x0a # \n + sw t0, 20(sp) + li t0, 0x61746164 # data + sw t0, 16(sp) + li t0, 0x6f722e20 # _.ro + sw t0, 12(sp) + li t0, 0x6e6f6974 # tion + sw t0, 8(sp) + li t0, 0x6365732e # .sec + sw t0, 4(sp) + + addi a0, sp, 4 + li a1, 17 + call write_out + + addi s1, s1, 6 # const\n. + +.Lcompile_constant_section_item: + call _skip_spaces + lbu a0, (s1) + call is_upper + beqz a0, .Lcompile_constant_section_end + + call _compile_constant + j .Lcompile_constant_section_item + +.Lcompile_constant_section_end: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_constant, @function +_compile_constant: + # Prologue. + addi sp, sp, -16 + sw ra, 12(sp) + sw s0, 8(sp) + addi s0, sp, 16 + + call _read_token + + mv a1, a0 # The identifier length from _read_token should be in a1. + mv a0, s1 # Save the identifier pointer before advancing it. + add s1, s1, a1 + call write_out + + call _skip_spaces + call _read_token + addi s1, s1, 2 # Skip the assignment sign. + + # : .long + li t0, 0x20676e6f # ong_ + sw t0, 4(sp) + li t0, 0x6c2e203a # : .l + sw t0, 0(sp) + mv a0, sp + li a1, 8 + call write_out + + call _skip_spaces + call _read_token + + mv a1, a0 # The literal length from _read_token should be in a1. + mv a0, s1 # Save the literal pointer before advancing it. + add s1, s1, a1 + call write_out + + li t0, '\n' + sw t0, 4(sp) + addi a0, sp, 4 + li a1, 1 + call write_out + + # Epilogue. + lw ra, 12(sp) + lw s0, 8(sp) + addi sp, sp, 16 + ret + +.type _compile_variable_section, @function +_compile_variable_section: + # Prologue. + addi sp, sp, -24 + sw ra, 20(sp) + sw s0, 16(sp) + addi s0, sp, 24 + + # .section .bss + li t0, 0x0a73 # s\n + sw t0, 12(sp) + li t0, 0x73622e20 # _.bs + sw t0, 8(sp) + li t0, 0x6e6f6974 # tion + sw t0, 4(sp) + li t0, 0x6365732e # .sec + sw t0, 0(sp) + + addi a0, sp, 0 + li a1, 14 + call write_out + + addi s1, s1, 4 # var\n. + +.Lcompile_variable_section_item: + call _skip_spaces + lbu a0, (s1) + call is_lower + beqz a0, .Lcompile_variable_section_end + + call _compile_variable + j .Lcompile_variable_section_item + +.Lcompile_variable_section_end: + # Epilogue. + lw ra, 20(sp) + lw s0, 16(sp) + addi sp, sp, 24 + ret + +.type _compile_variable, @function +_compile_variable: + # Prologue. + addi sp, sp, -40 + sw ra, 36(sp) + sw s0, 32(sp) + addi s0, sp, 40 + + call _read_token + + # Save the identifier on the stack since it should emitted multiple times. + sw s1, 28(sp) + sw a0, 24(sp) + add s1, s1, a0 + + call _skip_spaces + addi s1, s1, 1 # Skip the colon in front of the type. + + call _skip_spaces + addi s1, s1, 1 # Skip the opening bracket. + + call _read_token + + # Save the array size on the stack since it has to be emitted multiple times. + sw s1, 20(sp) + sw a0, 16(sp) + add s1, s1, a0 + + call _skip_spaces + addi s1, s1, 1 # Skip the closing bracket. + + call _skip_spaces + call _read_token + add s1, s1, a0 # Skip the type. + + # .type identifier, @object + li t0, 0x2065 # e_ + sw t0, 12(sp) + li t0, 0x7079742e # .typ + sw t0, 8(sp) + addi a0, sp, 8 + li a1, 6 + call write_out + + lw a0, 28(sp) + lw a1, 24(sp) + call write_out + + li t0, 0x0a74 # t\n + sw t0, 12(sp) + li t0, 0x63656a62 # bjec + sw t0, 8(sp) + li t0, 0x6f40202c # , @o + sw t0, 4(sp) + addi a0, sp, 4 + li a1, 10 + call write_out + + # .size identifier, size + li t0, 0x2065 # e_ + sw t0, 12(sp) + li t0, 0x7a69732e # .siz + sw t0, 8(sp) + addi a0, sp, 8 + li a1, 6 + call write_out + + lw a0, 28(sp) + lw a1, 24(sp) + call write_out + + li t0, 0x202c # , + sw t0, 12(sp) + addi a0, sp, 12 + li a1, 2 + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + li t0, 0x0a # \n + sw t0, 12(sp) + addi a0, sp, 12 + li a1, 1 + call write_out + + # identifier: .zero size + lw a0, 28(sp) + lw a1, 24(sp) + call write_out + + li t0, 0x206f7265 # ero_ + sw t0, 12(sp) + li t0, 0x7a2e203a # : .z + sw t0, 8(sp) + addi a0, sp, 8 + li a1, 8 + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + li t0, 0x0a # \n + sw t0, 12(sp) + addi a0, sp, 12 + li a1, 1 + call write_out + + # Epilogue. + lw ra, 36(sp) + lw s0, 32(sp) + addi sp, sp, 40 + ret + +.type _compile_procedure, @function +_compile_procedure: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + addi s1, s1, 5 # Skip proc_ + call _read_token + sw s1, 20(sp) + sw a0, 16(sp) + add s1, s1, a0 + + # .type identifier, @function + li t0, 0x2065 # e_ + sw t0, 12(sp) + li t0, 0x7079742e # .typ + sw t0, 8(sp) + addi a0, sp, 8 + li a1, 6 + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + li t0, 0x0a6e6f69 # ion\n + sw t0, 12(sp) + li t0, 0x74636e75 # unct + sw t0, 8(sp) + li t0, 0x6640202c # , @f + sw t0, 4(sp) + addi a0, sp, 4 + li a1, 12 + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + li t0, 0x0a3a # :\n + sw t0, 12(sp) + addi a0, sp, 12 + li a1, 2 + call write_out + + call _skip_spaces + addi s1, s1, 1 # Skip opening argument paren. + call _skip_spaces + addi s1, s1, 1 # Skip closing argument paren. + call _skip_spaces + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +# Parameters: +# a0 - Line length. +.type _compile_line, @function +_compile_line: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + sw a0, 20(sp) # a0 - Line length. + + beqz a0, .Lcompile_line_empty # Skip an empty line. + + lbu t0, (s1) + li t1, '#' + beq t0, t1, .Lcompile_line_comment + + li t0, 0x0a6d6172 # ram\n + sw t0, 16(sp) + li t0, 0x676f7270 # prog + sw t0, 12(sp) + mv a0, s1 + addi a1, sp, 12 + li a2, 8 + call memcmp + beqz a0, .Lcompile_line_program + + li t0, 0x0a74 # t\n + sw t0, 16(sp) + li t0, 0x736e6f63 # cons + sw t0, 12(sp) + mv a0, s1 + addi a1, sp, 12 + li a2, 6 + call memcmp + beqz a0, .Lcompile_line_const + + li t0, 0x0a726176 # var\n + sw t0, 16(sp) + mv a0, s1 + addi a1, sp, 16 + li a2, 4 + call memcmp + beqz a0, .Lcompile_line_var + + li t0, 0x20 # _ + sw t0, 16(sp) + li t0, 0x636f7270 # proc + sw t0, 12(sp) + mv a0, s1 + addi a1, sp, 12 + li a2, 5 + call memcmp + beqz a0, .Lcompile_line_procedure + + j .Lcompile_line_unchanged # Else. + +.Lcompile_line_const: + call _compile_constant_section + j .Lcompile_line_end + +.Lcompile_line_procedure: + call _compile_procedure + j .Lcompile_line_end + +.Lcompile_line_var: + + /* DEBUG + mv a0, s1 + li a1, 20 + call write_error */ + + call _compile_variable_section + j .Lcompile_line_end + +.Lcompile_line_program: + call _compile_program + j .Lcompile_line_end + +.Lcompile_line_comment: + lw a0, 20(sp) + call _skip_comment + j .Lcompile_line_end + +.Lcompile_line_empty: + addi s1, s1, 1 + j .Lcompile_line_end + +.Lcompile_line_unchanged: + lw a0, 20(sp) + call _compile_assembly + j .Lcompile_line_end + +.Lcompile_line_end: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +# Finds the end of the line and returns its length in a0. +.type _read_line, @function +_read_line: + mv t0, s1 # Local position in the source text. + +.Lread_line_do: + lbu t1, (t0) # t1 = Current character. + beqz t1, .Lread_line_end # Exit the loop on the NUL character. + li t2, '\n' + beq t1, t2, .Lread_line_end # Exit the loop on the new line. + + addi t0, t0, 1 + j .Lread_line_do + +.Lread_line_end: + sub a0, t0, s1 # Return the line length. + ret + +.type _compile, @function +_compile: + # Prologue. + addi sp, sp, -8 + sw ra, 4(sp) + sw s0, 0(sp) + addi s0, sp, 8 + +.Lcompile_do: + lbu t0, (s1) # t0 = Current character. + beqz t0, .Lcompile_end # Exit the loop on the NUL character. + + call _skip_indentation + call _read_line + call _compile_line + + j .Lcompile_do +.Lcompile_end: + + # Epilogue. + lw ra, 4(sp) + lw s0, 0(sp) + addi sp, sp, 8 + ret + +# Entry point. +.type _start, @function _start: # Read the source from the standard input. la a0, source_code la a1, SOURCE_BUFFER_SIZE # Buffer size. lw a1, (a1) call read_file - mv s1, a0 + la s1, source_code # s1 = Source code position. call _compile - # Write the source to the standard output. - la a0, source_code - mv a1, s1 - call write_out - # Call exit. li a0, 0 # Use 0 return code. call exit diff --git a/boot/stage2.elna b/boot/stage2.elna index a119f4a..4338ad8 100644 --- a/boot/stage2.elna +++ b/boot/stage2.elna @@ -1,31 +1,661 @@ -.global _start, source_code +program +# s1 - Contains the current position in the source text. -.equ SYS_READ, 63 -.equ SYS_WRITE, 64 -.equ SYS_EXIT, 93 -.equ STDIN, 0 -.equ STDOUT, 1 -.equ STDERR, 2 +const + SOURCE_BUFFER_SIZE := 20480 -.equ SOURCE_BUFFER_SIZE, 2048 - -.section .bss -.type source_code, @object -.size source_code, SOURCE_BUFFER_SIZE -source_code: .zero SOURCE_BUFFER_SIZE +var + source_code: [20480]Byte .section .text +# Reads a token and returns its length in a0. +# _read_token doesn't change s1, it finds the length of the token s1 is pointing to. +proc _read_token() + # Prologue. + addi sp, sp, -16 + sw ra, 12(sp) + sw s0, 8(sp) + addi s0, sp, 16 + + lbu t0, (s1) # t0 = Current character. + sw zero, 4(sp) + + li t1, '.' + beq t0, t1, .Ltoken_character_single + + li t1, ',' + beq t0, t1, .Ltoken_character_single + + li t1, ':' + beq t0, t1, .Ltoken_character_colon + + li t1, ';' + beq t0, t1, .Ltoken_character_single + + li t1, '(' + beq t0, t1, .Ltoken_character_single + + li t1, ')' + beq t0, t1, .Ltoken_character_single + + li t1, '[' + beq t0, t1, .Ltoken_character_single + + li t1, ']' + beq t0, t1, .Ltoken_character_single + +.Ltoken_character_loop_do: # Expect an identifier or a number. + lw t6, 4(sp) + add t1, s1, t6 + lbu a0, (t1) # a0 = Current character. + + call is_alnum + + beqz a0, .Ltoken_character_end + lw t6, 4(sp) + addi t6, t6, 1 + sw t6, 4(sp) + j .Ltoken_character_loop_do + +.Ltoken_character_single: + lw t6, 4(sp) + addi t6, t6, 1 + sw t6, 4(sp) + j .Ltoken_character_end + +.Ltoken_character_colon: + lbu t0, 1(s1) # t0 = The character after the colon. + lw t6, 4(sp) + addi t6, t6, 1 + sw t6, 4(sp) + + li t1, '=' + beq t0, t1, .Ltoken_character_single + j .Ltoken_character_end + +.Ltoken_character_end: + lw a0, 4(sp) + + # Epilogue. + lw ra, 12(sp) + lw s0, 8(sp) + addi sp, sp, 16 + ret + +# Skips the spaces till the next non space character. +.type _skip_spaces, @function +_skip_spaces: +.Lspace_loop_do: + lbu t0, (s1) # t0 = Current character. + + li t1, ' ' + beq t0, t1, .Lspace_loop_repeat + li t1, '\t' + beq t0, t1, .Lspace_loop_repeat + li t1, '\n' + beq t0, t1, .Lspace_loop_repeat + li t1, '\r' + beq t0, t1, .Lspace_loop_repeat + + j .Lspace_loop_end +.Lspace_loop_repeat: + addi s1, s1, 1 + j .Lspace_loop_do + +.Lspace_loop_end: + ret + +# Skips tabs at the line beginning. +.type _skip_indentation, @function +_skip_indentation: +.Lskip_indentation_do: + lbu t0, (s1) + + li t1, '\t' + beq t0, t1, .Lskip_indentation_skip + + j .Lskip_indentation_end + +.Lskip_indentation_skip: + addi s1, s1, 1 + j .Lskip_indentation_do + +.Lskip_indentation_end: + ret + +# Parameters: +# a0 - Line length. +.type _skip_comment, @function +_skip_comment: + add s1, s1, a0 + addi s1, s1, 1 # Skip the new line. + ret + +# Parameters: +# a0 - Line length. +.type _compile_assembly, @function +_compile_assembly: + # Prologue. + addi sp, sp, -16 + sw ra, 12(sp) + sw s0, 8(sp) + addi s0, sp, 16 + + sw a0, 4(sp) # a0 - Line length. + + # Write the source to the standard output. + mv a0, s1 + lw a1, 4(sp) + call write_out + + lw t0, 4(sp) + add s1, s1, t0 + + li t0, '\n' + sb t0, 0(sp) + addi a0, sp, 0 + li a1, 1 + call write_out + + addi s1, s1, 1 # Skip the new line. + + # Epilogue. + lw ra, 12(sp) + lw s0, 8(sp) + addi sp, sp, 16 + ret + +.type _compile_program, @function +_compile_program: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # .global _start + li t0, 0x0a7472 # rt\n + sw t0, 20(sp) + li t0, 0x6174735f # _sta + sw t0, 16(sp) + li t0, 0x206c6162 # bal_ + sw t0, 12(sp) + li t0, 0x6f6c672e # .glo + sw t0, 8(sp) + + addi a0, sp, 8 + li a1, 16 + call write_out + + addi s1, s1, 8 # program\n. + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_constant_section, @function +_compile_constant_section: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + # .section .rodata + li t0, 0x0a # \n + sw t0, 20(sp) + li t0, 0x61746164 # data + sw t0, 16(sp) + li t0, 0x6f722e20 # _.ro + sw t0, 12(sp) + li t0, 0x6e6f6974 # tion + sw t0, 8(sp) + li t0, 0x6365732e # .sec + sw t0, 4(sp) + + addi a0, sp, 4 + li a1, 17 + call write_out + + addi s1, s1, 6 # const\n. + +.Lcompile_constant_section_item: + call _skip_spaces + lbu a0, (s1) + call is_upper + beqz a0, .Lcompile_constant_section_end + + call _compile_constant + j .Lcompile_constant_section_item + +.Lcompile_constant_section_end: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +.type _compile_constant, @function +_compile_constant: + # Prologue. + addi sp, sp, -16 + sw ra, 12(sp) + sw s0, 8(sp) + addi s0, sp, 16 + + call _read_token + + mv a1, a0 # The identifier length from _read_token should be in a1. + mv a0, s1 # Save the identifier pointer before advancing it. + add s1, s1, a1 + call write_out + + call _skip_spaces + call _read_token + addi s1, s1, 2 # Skip the assignment sign. + + # : .long + li t0, 0x20676e6f # ong_ + sw t0, 4(sp) + li t0, 0x6c2e203a # : .l + sw t0, 0(sp) + mv a0, sp + li a1, 8 + call write_out + + call _skip_spaces + call _read_token + + mv a1, a0 # The literal length from _read_token should be in a1. + mv a0, s1 # Save the literal pointer before advancing it. + add s1, s1, a1 + call write_out + + li t0, '\n' + sw t0, 4(sp) + addi a0, sp, 4 + li a1, 1 + call write_out + + # Epilogue. + lw ra, 12(sp) + lw s0, 8(sp) + addi sp, sp, 16 + ret + +.type _compile_variable_section, @function +_compile_variable_section: + # Prologue. + addi sp, sp, -24 + sw ra, 20(sp) + sw s0, 16(sp) + addi s0, sp, 24 + + # .section .bss + li t0, 0x0a73 # s\n + sw t0, 12(sp) + li t0, 0x73622e20 # _.bs + sw t0, 8(sp) + li t0, 0x6e6f6974 # tion + sw t0, 4(sp) + li t0, 0x6365732e # .sec + sw t0, 0(sp) + + addi a0, sp, 0 + li a1, 14 + call write_out + + addi s1, s1, 4 # var\n. + +.Lcompile_variable_section_item: + call _skip_spaces + lbu a0, (s1) + call is_lower + beqz a0, .Lcompile_variable_section_end + + call _compile_variable + j .Lcompile_variable_section_item + +.Lcompile_variable_section_end: + # Epilogue. + lw ra, 20(sp) + lw s0, 16(sp) + addi sp, sp, 24 + ret + +.type _compile_variable, @function +_compile_variable: + # Prologue. + addi sp, sp, -40 + sw ra, 36(sp) + sw s0, 32(sp) + addi s0, sp, 40 + + call _read_token + + # Save the identifier on the stack since it should emitted multiple times. + sw s1, 28(sp) + sw a0, 24(sp) + add s1, s1, a0 + + call _skip_spaces + addi s1, s1, 1 # Skip the colon in front of the type. + + call _skip_spaces + addi s1, s1, 1 # Skip the opening bracket. + + call _read_token + + # Save the array size on the stack since it has to be emitted multiple times. + sw s1, 20(sp) + sw a0, 16(sp) + add s1, s1, a0 + + call _skip_spaces + addi s1, s1, 1 # Skip the closing bracket. + + call _skip_spaces + call _read_token + add s1, s1, a0 # Skip the type. + + # .type identifier, @object + li t0, 0x2065 # e_ + sw t0, 12(sp) + li t0, 0x7079742e # .typ + sw t0, 8(sp) + addi a0, sp, 8 + li a1, 6 + call write_out + + lw a0, 28(sp) + lw a1, 24(sp) + call write_out + + li t0, 0x0a74 # t\n + sw t0, 12(sp) + li t0, 0x63656a62 # bjec + sw t0, 8(sp) + li t0, 0x6f40202c # , @o + sw t0, 4(sp) + addi a0, sp, 4 + li a1, 10 + call write_out + + # .size identifier, size + li t0, 0x2065 # e_ + sw t0, 12(sp) + li t0, 0x7a69732e # .siz + sw t0, 8(sp) + addi a0, sp, 8 + li a1, 6 + call write_out + + lw a0, 28(sp) + lw a1, 24(sp) + call write_out + + li t0, 0x202c # , + sw t0, 12(sp) + addi a0, sp, 12 + li a1, 2 + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + li t0, 0x0a # \n + sw t0, 12(sp) + addi a0, sp, 12 + li a1, 1 + call write_out + + # identifier: .zero size + lw a0, 28(sp) + lw a1, 24(sp) + call write_out + + li t0, 0x206f7265 # ero_ + sw t0, 12(sp) + li t0, 0x7a2e203a # : .z + sw t0, 8(sp) + addi a0, sp, 8 + li a1, 8 + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + li t0, 0x0a # \n + sw t0, 12(sp) + addi a0, sp, 12 + li a1, 1 + call write_out + + # Epilogue. + lw ra, 36(sp) + lw s0, 32(sp) + addi sp, sp, 40 + ret + +.type _compile_procedure, @function +_compile_procedure: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + addi s1, s1, 5 # Skip proc_ + call _read_token + sw s1, 20(sp) + sw a0, 16(sp) + add s1, s1, a0 + + # .type identifier, @function + li t0, 0x2065 # e_ + sw t0, 12(sp) + li t0, 0x7079742e # .typ + sw t0, 8(sp) + addi a0, sp, 8 + li a1, 6 + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + li t0, 0x0a6e6f69 # ion\n + sw t0, 12(sp) + li t0, 0x74636e75 # unct + sw t0, 8(sp) + li t0, 0x6640202c # , @f + sw t0, 4(sp) + addi a0, sp, 4 + li a1, 12 + call write_out + + lw a0, 20(sp) + lw a1, 16(sp) + call write_out + + li t0, 0x0a3a # :\n + sw t0, 12(sp) + addi a0, sp, 12 + li a1, 2 + call write_out + + call _skip_spaces + addi s1, s1, 1 # Skip opening argument paren. + call _skip_spaces + addi s1, s1, 1 # Skip closing argument paren. + call _skip_spaces + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +# Parameters: +# a0 - Line length. +.type _compile_line, @function +_compile_line: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + sw a0, 20(sp) # a0 - Line length. + + beqz a0, .Lcompile_line_empty # Skip an empty line. + + lbu t0, (s1) + li t1, '#' + beq t0, t1, .Lcompile_line_comment + + li t0, 0x0a6d6172 # ram\n + sw t0, 16(sp) + li t0, 0x676f7270 # prog + sw t0, 12(sp) + mv a0, s1 + addi a1, sp, 12 + li a2, 8 + call memcmp + beqz a0, .Lcompile_line_program + + li t0, 0x0a74 # t\n + sw t0, 16(sp) + li t0, 0x736e6f63 # cons + sw t0, 12(sp) + mv a0, s1 + addi a1, sp, 12 + li a2, 6 + call memcmp + beqz a0, .Lcompile_line_const + + li t0, 0x0a726176 # var\n + sw t0, 16(sp) + mv a0, s1 + addi a1, sp, 16 + li a2, 4 + call memcmp + beqz a0, .Lcompile_line_var + + li t0, 0x20 # _ + sw t0, 16(sp) + li t0, 0x636f7270 # proc + sw t0, 12(sp) + mv a0, s1 + addi a1, sp, 12 + li a2, 5 + call memcmp + beqz a0, .Lcompile_line_procedure + + j .Lcompile_line_unchanged # Else. + +.Lcompile_line_const: + call _compile_constant_section + j .Lcompile_line_end + +.Lcompile_line_procedure: + call _compile_procedure + j .Lcompile_line_end + +.Lcompile_line_var: + + /* DEBUG + mv a0, s1 + li a1, 20 + call write_error */ + + call _compile_variable_section + j .Lcompile_line_end + +.Lcompile_line_program: + call _compile_program + j .Lcompile_line_end + +.Lcompile_line_comment: + lw a0, 20(sp) + call _skip_comment + j .Lcompile_line_end + +.Lcompile_line_empty: + addi s1, s1, 1 + j .Lcompile_line_end + +.Lcompile_line_unchanged: + lw a0, 20(sp) + call _compile_assembly + j .Lcompile_line_end + +.Lcompile_line_end: + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + +# Finds the end of the line and returns its length in a0. +.type _read_line, @function +_read_line: + mv t0, s1 # Local position in the source text. + +.Lread_line_do: + lbu t1, (t0) # t1 = Current character. + beqz t1, .Lread_line_end # Exit the loop on the NUL character. + li t2, '\n' + beq t1, t2, .Lread_line_end # Exit the loop on the new line. + + addi t0, t0, 1 + j .Lread_line_do + +.Lread_line_end: + sub a0, t0, s1 # Return the line length. + ret + +.type _compile, @function +_compile: + # Prologue. + addi sp, sp, -8 + sw ra, 4(sp) + sw s0, 0(sp) + addi s0, sp, 8 + +.Lcompile_do: + lbu t0, (s1) # t0 = Current character. + beqz t0, .Lcompile_end # Exit the loop on the NUL character. + + call _skip_indentation + call _read_line + call _compile_line + + j .Lcompile_do +.Lcompile_end: + + # Epilogue. + lw ra, 4(sp) + lw s0, 0(sp) + addi sp, sp, 8 + ret + +# Entry point. +.type _start, @function _start: # Read the source from the standard input. la a0, source_code - li a1, SOURCE_BUFFER_SIZE # Buffer size. + la a1, SOURCE_BUFFER_SIZE # Buffer size. + lw a1, (a1) call read_file - # Write the source to the standard output. - mv a1, a0 - la a0, source_code - call write_out + la s1, source_code # s1 = Source code position. + call _compile # Call exit. li a0, 0 # Use 0 return code.