diff options
| author | Eugen Wissner <belka@caraus.de> | 2025-11-08 11:07:39 +0100 |
|---|---|---|
| committer | Eugen Wissner <belka@caraus.de> | 2025-11-08 11:07:39 +0100 |
| commit | d144cb21012c911135d5047059449195a89ea239 (patch) | |
| tree | db8d8a69092192c07b2ab2c76bfd51848e3884fd /boot/stage2/cl.elna | |
| parent | 0b516345666b52d29bb10521b4d3c2c2420b3368 (diff) | |
| download | elna-d144cb21012c911135d5047059449195a89ea239.tar.gz | |
Move stages into subdirectories
Diffstat (limited to 'boot/stage2/cl.elna')
| -rw-r--r-- | boot/stage2/cl.elna | 859 |
1 files changed, 859 insertions, 0 deletions
diff --git a/boot/stage2/cl.elna b/boot/stage2/cl.elna new file mode 100644 index 0000000..0423b3b --- /dev/null +++ b/boot/stage2/cl.elna @@ -0,0 +1,859 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +# Stage 2 compiler. +# +# - Procedures without none or one argument. +# - Goto statements. +# - Character and integer literals. +# - Passing local variables to procedures. +# - Local variables should have the format: v00, +# where 00 is its offset from the sp register. + +.section .rodata + +.type keyword_equ, @object +keyword_equ: .ascii ".equ" +.equ KEYWORD_EQU_SIZE, 4 + +.type keyword_section, @object +keyword_section: .ascii ".section" +.equ KEYWORD_SECTION_SIZE, 8 + +.type keyword_type, @object +keyword_type: .ascii ".type" +.equ KEYWORD_TYPE_SIZE, 5 + +.type keyword_ret, @object +keyword_ret: .ascii "ret" +.equ KEYWORD_RET_SIZE, 3 + +.type keyword_global, @object +keyword_global: .ascii ".globl" +.equ KEYWORD_GLOBAL_SIZE, 6 + +.type keyword_proc, @object +keyword_proc: .ascii "proc " +.equ KEYWORD_PROC_SIZE, 5 + +.type keyword_end, @object +keyword_end: .ascii "end" +.equ KEYWORD_END_SIZE, 3 + +.type keyword_begin, @object +keyword_begin: .ascii "begin" +.equ KEYWORD_BEGIN_SIZE, 5 + +.type keyword_var, @object +keyword_var: .ascii "var" +.equ KEYWORD_VAR_SIZE, 3 + +.type asm_prologue, @object +asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" + +.type asm_epilogue, @object +asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" + +.type asm_type_directive, @object +asm_type_directive: .string ".type " + +.type asm_type_function, @object +asm_type_function: .string ", @function\n" + +.type asm_colon, @object +asm_colon: .string ":\n" + +.type asm_call, @object +asm_call: .string "\tcall " + +.type asm_j, @object +asm_j: .string "\tj " + +.type asm_li, @object +asm_li: .string "\tli " + +.type asm_lw, @object +asm_lw: .string "\tlw " + +.type asm_t0, @object +asm_t0: .string "t0" + +.type asm_a0, @object +asm_a0: .string "a0" + +.type asm_comma, @object +asm_comma: .string ", " + +.type asm_sp, @object +asm_sp: .string "(sp)" + +.section .bss + +.equ SOURCE_BUFFER_SIZE, 81920 +.type source_code, @object +source_code: .zero SOURCE_BUFFER_SIZE + +.section .data + +.type source_code_position, @object +source_code_position: .word source_code + +.section .text + +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the amount of bytes written in a0. +proc _read_file(); +begin + mv a2, a1 + mv a1, a0 + # STDIN. + li a0, 0 + li a7, 63 # SYS_READ. + ecall +end; + +# Writes to the standard output. +# +# Parameters: +# a0 - Buffer. +# a1 - Buffer length. +proc _write(); +begin + mv a2, a1 + mv a1, a0 + # STDOUT. + li a0, 1 + li a7, 64 # SYS_WRITE. + ecall +end; + +# Writes a character from a0 into the standard output. +proc _write_c(); +begin + sb a0, 20(sp) + addi a0, sp, 20 + li a1, 1 + _write(); +end; + +# Write null terminated string. +# +# Parameters: +# a0 - String. +proc _write_z(); +begin + sw a0, 20(sp) + +.write_z_loop: + # Check for 0 character. + lb a0, (a0) + beqz a0, .write_z_end + + # Print a character. + lw a0, 20(sp) + lb a0, (a0) + _write_c(); + + # Advance the input string by one byte. + lw a0, 20(sp) + addi a0, a0, 1 + sw a0, 20(sp) + + j .write_z_loop + +.write_z_end: +end; + +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_upper(); +begin + li t0, 'A' - 1 + sltu t1, t0, a0 # t1 = a0 >= 'A' + + sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' + and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' +end; + +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_lower(); +begin + li t0, 'a' - 1 + sltu t2, t0, a0 # t2 = a0 >= 'a' + + sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' + and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' +end; + +# Detects if the passed character is a 7-bit alpha character or an underscore. +# +# Paramters: +# a0 - Tested character. +# +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +proc _is_alpha(); +begin + sw a0, 20(sp) + + _is_upper(); + sw a0, 16(sp) + + lw a0, 20(sp) + _is_lower(); + + lw t0, 20(sp) + xori t1, t0, '_' + seqz t1, t1 + + lw t0, 16(sp) + or a0, a0, t0 + or a0, a0, t1 +end; + +# Detects whether the passed character is a digit +# (a value between 0 and 9). +# +# Parameters: +# a0 - Exemined value. +# +# Sets a0 to 1 if it is a digit, to 0 otherwise. +proc _is_digit(); +begin + li t0, '0' - 1 + sltu t1, t0, a0 # t1 = a0 >= '0' + + sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' + + and a0, t1, t2 +end; + +# Reads the next token. +# +# Returns token length in a0. +proc _read_token(); +begin + la t0, source_code_position # Token pointer. + lw t0, (t0) + sw t0, 20(sp) # Current token position. + sw zero, 16(sp) # Token length. + +.read_token_loop: + lb t0, (t0) # Current character. + + # First we try to read a derictive. + # A derictive can contain a dot and characters. + li t1, '.' + beq t0, t1, .read_token_next + + lw a0, 20(sp) + lb a0, (a0) + _is_alpha(); + bnez a0, .read_token_next + + lw a0, 20(sp) + lb a0, (a0) + _is_digit(); + bnez a0, .read_token_next + + j .read_token_end + +.read_token_next: + # Advance the source code position and token length. + lw t0, 16(sp) + addi t0, t0, 1 + sw t0, 16(sp) + + lw t0, 20(sp) + addi t0, t0, 1 + sw t0, 20(sp) + + j .read_token_loop + +.read_token_end: + lw a0, 16(sp) +end; + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +proc _memcmp(); +begin + mv t0, a0 + li a0, 0 + +.Lmemcmp_loop: + beqz a2, .Lmemcmp_end + + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 + + bnez a0, .Lmemcmp_end + + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + j .Lmemcmp_loop + +.Lmemcmp_end: +end; + +# Advances the token stream by a0 bytes. +proc _advance_token(); +begin + # Skip the .equ directive. + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 + sw t1, (t0) +end; + +# Prints the current token. +# +# Parameters: +# a0 - Token length. +# +# Returns a0 unchanged. +proc _write_token(); +begin + sw a0, 20(sp) + + la a0, source_code_position + lw a0, (a0) + lw a1, 20(sp) + _write(); + + lw a0, 20(sp) +end; + +proc _compile_section(); +begin + # Print and skip the .section directive and a space after it. + li a0, KEYWORD_SECTION_SIZE + 1 + _write_token(); + _advance_token(); + + # Read the section name. + _read_token(); + addi a0, a0, 1 + + _write_token(); + _advance_token(); +end; + +# Prints and skips a line. +proc _skip_comment(); +begin + la t0, source_code_position + lw t1, (t0) + +.skip_comment_loop: + # Check for newline character. + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end + + # Advance the input string by one byte. + addi t1, t1, 1 + sw t1, (t0) + + j .skip_comment_loop + +.skip_comment_end: + # Skip the newline. + addi t1, t1, 1 + sw t1, (t0) +end; + +# Prints and skips a line. +proc _compile_line(); +begin +.compile_line_loop: + la a0, source_code_position + lw a1, (a0) + + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end + + # Print a character. + lw a0, (a1) + _write_c(); + + # Advance the input string by one byte. + li a0, 1 + _advance_token(); + + j .compile_line_loop + +.compile_line_end: + li a0, '\n' + _write_c(); + + li a0, 1 + _advance_token(); +end; + +proc _compile_integer_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _read_token(); + _write_token(); + _advance_token(); + + li a0, '\n' + _write_c(); +end; + +proc _compile_character_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + +.compile_character_literal_loop: + la a0, source_code_position + lw a0, (a0) + li a1, 1 + _write(); + li a0, 1 + _advance_token(); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + li t1, '\'' + beq a0, t1, .compile_character_literal_end + + j .compile_character_literal_loop + +.compile_character_literal_end: + li a0, '\'' + _write_c(); + + li a0, '\n' + _write_c(); + + li a0, 1 + _advance_token(); +end; + +proc _compile_variable_expression(); +begin + la a0, asm_lw + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + la a0, source_code_position + lw a0, (a0) + addi a0, a0, 1 + li a1, 2 + _write(); + + la a0, asm_sp + _write_z(); + + li a0, '\n' + _write_c(); + + li a0, 3 + _advance_token(); + +end; + +proc _compile_expression(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, '\'' + beq a0, t1, .compile_expression_character_literal + + li t1, 'v' + beq a0, t1, .compile_expression_variable + + _is_digit(); + bnez a0, .compile_expression_integer_literal + + j .compile_expression_end + +.compile_expression_character_literal: + _compile_character_literal(); + j .compile_expression_end + +.compile_expression_integer_literal: + _compile_integer_literal(); + j .compile_expression_end + +.compile_expression_variable: + _compile_variable_expression(); + j .compile_expression_end; + +.compile_expression_end: +end; + +proc _compile_call(); +begin + _read_token(); + sw a0, 20(sp) + la t0, source_code_position + lw t0, (t0) + sw t0, 16(sp) + + # Skip the identifier and left paren. + addi a0, a0, 1 + _advance_token(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ')' + beq t0, t1, .compile_call_finalize + + _compile_expression(); + +.compile_call_finalize: + la a0, asm_call + _write_z(); + + lw a0, 16(sp) + lw a1, 20(sp) + _write(); + + # Skip the right paren. + li a0, 1 + _advance_token(); +end; + +proc _compile_goto(); +begin + li a0, 5 + _advance_token(); + + _read_token(); + sw a0, 20(sp) + + la a0, asm_j + _write_z(); + + lw a0, 20(sp) + _write_token(); + _advance_token(); +end; + +proc _compile_statement(); +begin + # This is a call if the statement starts with an underscore. + la t0, source_code_position + lw t0, (t0) + # First character after alignment tab. + addi t0, t0, 1 + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call + + li t1, 'g' + beq t0, t1, .compile_statement_goto + + _compile_line(); + j .compile_statement_end + +.compile_statement_call: + li a0, 1 + _advance_token(); + _compile_call(); + + j .compile_statement_semicolon + +.compile_statement_goto: + li a0, 1 + _advance_token(); + _compile_goto(); + + j .compile_statement_semicolon + +.compile_statement_semicolon: + li a0, 2 + _advance_token(); + + li a0, '\n' + _write_c(); + +.compile_statement_end: +end; + +proc _compile_procedure_body(); +begin +.compile_procedure_body_loop: + la a0, source_code_position + lw a0, (a0) + la a1, keyword_end + li a2, KEYWORD_END_SIZE + _memcmp(); + + beqz a0, .compile_procedure_body_epilogue + + _compile_statement(); + j .compile_procedure_body_loop + +.compile_procedure_body_epilogue: +end; + +proc _compile_procedure(); +begin + # Skip "proc ". + li a0, KEYWORD_PROC_SIZE + _advance_token(); + + _read_token(); + sw a0, 20(sp) # Save the procedure name length. + + # Write .type _procedure_name, @function. + la a0, asm_type_directive + _write_z(); + + lw a0, 20(sp) + _write_token(); + + la a0, asm_type_function + _write_z(); + + # Write procedure label, _procedure_name: + lw a0, 20(sp) + _write_token(); + + la a0, asm_colon + _write_z(); + + # Skip the function name and trailing parens, semicolon, "begin" and newline. + lw a0, 20(sp) + addi a0, a0, KEYWORD_BEGIN_SIZE + 1 + 4 + _advance_token(); + + la a0, asm_prologue + _write_z(); + + _compile_procedure_body(); + + # Write the epilogue. + la a0, asm_epilogue + _write_z(); + + li a0, KEYWORD_END_SIZE + 2 + _advance_token(); +end; + +proc _compile_type(); +begin + # Print and skip the .type directive and a space after it. + li a0, KEYWORD_TYPE_SIZE + 1 + _write_token(); + _advance_token(); + + # Read and print the symbol name. + _read_token(); + sw a0, 20(sp) + + # Print and skip the symbol name, comma, space and @. + lw a0, 20(sp) + addi a0, a0, 3 + _write_token(); + _advance_token(); + + # Read the symbol type. + _read_token(); + sw a0, 16(sp) + la t0, source_code_position + lw t0, (t0) + sw t0, 12(sp) + + # Print the symbol type and newline. + lw a0, 16(sp) + addi a0, a0, 1 + _write_token(); + _advance_token(); + + # Write the object definition itself. + _compile_line(); + +.compile_type_end: +end; + +proc _compile_equ(); +begin + # Print and skip the .equ directive and a space after it. + li a0, KEYWORD_EQU_SIZE + 1 + _write_token(); + _advance_token(); + + # Read and print the constant name. + _read_token(); + sw a0, 20(sp) + + # Print and skip the constant name, comma and space. + lw a0, 20(sp) + addi a0, a0, 2 + _write_token(); + _advance_token(); + + # Read the constant value. + _read_token(); + sw a0, 16(sp) + + # Print and skip the constant value and newline. + lw a0, 16(sp) + addi a0, a0, 1 + _write_token(); + _advance_token(); +end; + +proc _skip_newlines(); +begin + # Skip newlines. + la t0, source_code_position + lw t1, (t0) + +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end + + addi t1, t1, 1 + sw t1, (t0) + + j .skip_newlines_loop + +.skip_newlines_end: +end; + +# Process the source code and print the generated code. +proc _compile(); +begin +.compile_loop: + _skip_newlines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_end + li t1, '#' + beq t0, t1, .compile_comment + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_equ + li a2, KEYWORD_EQU_SIZE + _memcmp(); + + beqz a0, .compile_equ + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_section + li a2, KEYWORD_SECTION_SIZE + _memcmp(); + + beqz a0, .compile_section + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_type + li a2, KEYWORD_TYPE_SIZE + _memcmp(); + + beqz a0, .compile_type + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_proc + li a2, KEYWORD_PROC_SIZE + _memcmp(); + + beqz a0, .compile_procedure + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_global + li a2, KEYWORD_GLOBAL_SIZE + _memcmp(); + + beqz a0, .compile_global + # Not a known token, exit. + j .compile_end + +.compile_equ: + _compile_equ(); + + j .compile_loop + +.compile_section: + _compile_section(); + + j .compile_loop + +.compile_type: + _compile_type(); + + j .compile_loop + +.compile_global: + _compile_line(); + + j .compile_loop + +.compile_comment: + _skip_comment(); + + j .compile_loop + +.compile_procedure: + _compile_procedure(); + + j .compile_loop + +.compile_end: +end; + +# Entry point. +.globl _start +proc _start(); +begin + # Read the source from the standard input. + la a0, source_code + li a1, SOURCE_BUFFER_SIZE # Buffer size. + _read_file(); + _compile(); + + # Call exit. + li a0, 0 # Use 0 return code. + li a7, 93 # SYS_EXIT. + ecall +end; |
