From d144cb21012c911135d5047059449195a89ea239 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Sat, 8 Nov 2025 11:07:39 +0100 Subject: Move stages into subdirectories --- boot/stage10.elna | 1754 --------------- boot/stage10/cl.elna | 1754 +++++++++++++++ boot/stage11.elna | 1738 --------------- boot/stage11/cl.elna | 1738 +++++++++++++++ boot/stage12.elna | 1944 ----------------- boot/stage12/cl.elna | 1944 +++++++++++++++++ boot/stage13.elna | 2870 ------------------------- boot/stage13/cl.elna | 2870 +++++++++++++++++++++++++ boot/stage14.elna | 3053 -------------------------- boot/stage14/cl.elna | 3053 ++++++++++++++++++++++++++ boot/stage15.elna | 5403 ---------------------------------------------- boot/stage15/cl.elna | 5408 +++++++++++++++++++++++++++++++++++++++++++++++ boot/stage16.elna | 4772 ----------------------------------------- boot/stage16/cl.elna | 4821 ++++++++++++++++++++++++++++++++++++++++++ boot/stage16/linker.arg | 0 boot/stage2.elna | 859 -------- boot/stage2/cl.elna | 859 ++++++++ boot/stage3.elna | 971 --------- boot/stage3/cl.elna | 971 +++++++++ boot/stage4.elna | 1129 ---------- boot/stage4/cl.elna | 1129 ++++++++++ boot/stage5.elna | 1487 ------------- boot/stage5/cl.elna | 1487 +++++++++++++ boot/stage6.elna | 1588 -------------- boot/stage6/cl.elna | 1588 ++++++++++++++ boot/stage7.elna | 1488 ------------- boot/stage7/cl.elna | 1488 +++++++++++++ boot/stage8.elna | 1989 ----------------- boot/stage8/cl.elna | 1989 +++++++++++++++++ boot/stage9.elna | 1993 ----------------- boot/stage9/cl.elna | 1993 +++++++++++++++++ boot/symbol.s | 297 --- 32 files changed, 33092 insertions(+), 33335 deletions(-) delete mode 100644 boot/stage10.elna create mode 100644 boot/stage10/cl.elna delete mode 100644 boot/stage11.elna create mode 100644 boot/stage11/cl.elna delete mode 100644 boot/stage12.elna create mode 100644 boot/stage12/cl.elna delete mode 100644 boot/stage13.elna create mode 100644 boot/stage13/cl.elna delete mode 100644 boot/stage14.elna create mode 100644 boot/stage14/cl.elna delete mode 100644 boot/stage15.elna create mode 100644 boot/stage15/cl.elna delete mode 100644 boot/stage16.elna create mode 100644 boot/stage16/cl.elna create mode 100644 boot/stage16/linker.arg delete mode 100644 boot/stage2.elna create mode 100644 boot/stage2/cl.elna delete mode 100644 boot/stage3.elna create mode 100644 boot/stage3/cl.elna delete mode 100644 boot/stage4.elna create mode 100644 boot/stage4/cl.elna delete mode 100644 boot/stage5.elna create mode 100644 boot/stage5/cl.elna delete mode 100644 boot/stage6.elna create mode 100644 boot/stage6/cl.elna delete mode 100644 boot/stage7.elna create mode 100644 boot/stage7/cl.elna delete mode 100644 boot/stage8.elna create mode 100644 boot/stage8/cl.elna delete mode 100644 boot/stage9.elna create mode 100644 boot/stage9/cl.elna delete mode 100644 boot/symbol.s (limited to 'boot') diff --git a/boot/stage10.elna b/boot/stage10.elna deleted file mode 100644 index cc99d9e..0000000 --- a/boot/stage10.elna +++ /dev/null @@ -1,1754 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, *) -(* v. 2.0. If a copy of the MPL was not distributed with this file, You can *) -(* obtain one at https://mozilla.org/MPL/2.0/. *) - -(* Stage 10 compiler. *) - -(* - Integer division (/). *) -(* - Remainder operation (%). *) -(* - Label declaration statement. A label starts with a dot and *) -(* the statement ends with a semicolon like any other statement. *) -const - symbol_builtin_name_int := "Int"; - symbol_builtin_name_word := "Word"; - symbol_builtin_name_pointer := "Pointer"; - symbol_builtin_name_char := "Char"; - symbol_builtin_name_bool := "Bool"; - - (* Every type info starts with a word describing what type it is. *) - - (* PRIMITIVE_TYPE = 1 *) - - (* Primitive types have only type size. *) - symbol_builtin_type_int := S(1, 4); - symbol_builtin_type_word := S(1, 4); - symbol_builtin_type_pointer := S(1, 4); - symbol_builtin_type_char := S(1, 1); - symbol_builtin_type_bool := S(1, 1); - - (* Info objects start with a word describing its type. *) - - (* INFO_TYPE = 1 *) - - (* Type info has the type it belongs to. *) - symbol_type_info_int := S(1, @symbol_builtin_type_int); - symbol_type_info_word := S(1, @symbol_builtin_type_word); - symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); - symbol_type_info_char := S(1, @symbol_builtin_type_char); - symbol_type_info_bool := S(1, @symbol_builtin_type_bool); - -var - source_code: Array; - compiler_strings: Array; - symbol_table_global: Array; - symbol_table_local: Array; - classification: Array; - - compiler_strings_position: Pointer := @compiler_strings; - compiler_strings_length: Word := 0; - label_counter: Word := 0; - source_code_position: Pointer := @source_code; - -(* Calculates and returns the string token length between quotes, including the *) -(* escaping slash characters. *) - -(* Parameters: *) -(* a0 - String token pointer. *) - -(* Returns the length in a0. *) -proc _string_length(); -begin - (* Reset the counter. *) - v0 := 0; - -.string_length_loop: - v88 := v88 + 1; - - if _load_byte(v88) <> '"' then - v0 := v0 + 1; - goto .string_length_loop; - end; - - return v0 -end; - -(* Adds a string to the global, read-only string storage. *) - -(* Parameters: *) -(* a0 - String token. *) - -(* Returns the offset from the beginning of the storage to the new string in a0. *) -proc _add_string(); -begin - v0 := v88 + 1; - v4 := compiler_strings_length; - -.add_string_loop: - if _load_byte(v0) <> '"' then - v8 := _load_byte(v0); - _store_byte(v8, compiler_strings_position); - _store_word(compiler_strings_position + 1, @compiler_strings_position); - v0 := v0 + 1; - - if v8 <> '\\' then - _store_word(compiler_strings_length + 1, @compiler_strings_length); - end; - goto .add_string_loop; - end; - - return v4 -end; - -(* Reads standard input into a buffer. *) -(* a0 - Buffer pointer. *) -(* a1 - Buffer size. *) - -(* Returns the amount of bytes written in a0. *) -proc _read_file(); -begin - _syscall(0, v88, v84, 0, 0, 0, 63); -end; - -(* Writes to the standard output. *) - -(* Parameters: *) -(* a0 - Buffer. *) -(* a1 - Buffer length. *) -proc _write_s(); -begin - _syscall(1, v88, v84, 0, 0, 0, 64); -end; - -(* Writes a number to a string buffer. *) - -(* t0 - Local buffer. *) -(* t1 - Constant 10. *) -(* t2 - Current character. *) -(* t3 - Whether the number is negative. *) - -(* Parameters: *) -(* a0 - Whole number. *) -(* a1 - Buffer pointer. *) - -(* Sets a0 to the length of the written number. *) -proc _print_i(); -begin - li t1, 10 - addi t0, s0, -9 - - li t3, 0 - bgez a0, .print_i_digit10 - li t3, 1 - neg a0, a0 - -.print_i_digit10: - rem t2, a0, t1 - addi t2, t2, '0' - sb t2, 0(t0) - div a0, a0, t1 - addi t0, t0, -1 - bne zero, a0, .print_i_digit10 - - beq zero, t3, .print_i_write_call - addi t2, zero, '-' - sb t2, 0(t0) - addi t0, t0, -1 - -.print_i_write_call: - mv a0, a1 - addi a1, t0, 1 - sub a2, s0, t0 - addi a2, a2, -9 - sw a2, 0(sp) - - _memcpy(); - - return v0 -end; - -(* Writes a number to the standard output. *) - -(* Parameters: *) -(* a0 - Whole number. *) -proc _write_i(); -begin - v4 := _print_i(v88, @v0); - _write_s(@v0, v4); -end; - -(* Writes a character from a0 into the standard output. *) -proc _write_c(); -begin - _write_s(@v88, 1); -end; - -(* Write null terminated string. *) - -(* Parameters: *) -(* a0 - String. *) -proc _write_z(); -begin - (* Check for 0 character. *) - v0 := _load_byte(v88); - - if v0 <> 0 then - (* Print a character. *) - _write_c(v0); - - (* Advance the input string by one byte. *) - _write_z(v88 + 1); - end; -end; - -(* Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. *) -proc _is_upper(); -begin - v0 := v88 >= 'A'; - v4 := v88 <= 'Z'; - - return v0 & v4 - -end; - -(* Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. *) -proc _is_lower(); -begin - v0 := v88 >= 'a'; - v4 := v88 <= 'z'; - - return v0 & v4 - -end; - -(* Detects if the passed character is a 7-bit alpha character or an underscore. *) - -(* Paramters: *) -(* a0 - Tested character. *) - -(* Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. *) -proc _is_alpha(); -begin - v0 := _is_upper(v88); - v4 := _is_lower(v88); - v8 := v88 = '_'; - - v12 := v0 or v4; - return v12 or v8 -end; - -(* Detects whether the passed character is a digit *) -(* (a value between 0 and 9). *) - -(* Parameters: *) -(* a0 - Exemined value. *) - -(* Sets a0 to 1 if it is a digit, to 0 otherwise. *) -proc _is_digit(); -begin - v0 := v88 >= '0'; - v4 := v88 <= '9'; - - return v0 & v4 -end; - -proc _is_alnum(); -begin - v0 := _is_alpha(v88); - v4 := _is_digit(v88); - - return v0 or v4 -end; - -(* Reads the next token. *) - -(* Returns token length in a0. *) -proc _read_token(); -begin - (* Current token position. *) - v0 := source_code_position; - (* Token length. *) - v4 := 0; - -.read_token_loop: - (* Current character. *) - v8 := _load_byte(v0); - - (* First we try to read a derictive. *) - (* A derictive can contain a dot and characters. *) - v12 := v8 = '.'; - v16 := _is_alnum(v8); - - if v12 or v16 then - (* Advance the source code position and token length. *) - v4 := v4 + 1; - v0 := v0 + 1; - - goto .read_token_loop; - end; - - return v4 -end; - -(* a0 - First pointer. *) -(* a1 - Second pointer. *) -(* a2 - The length to compare. *) - -(* Returns 0 in a0 if memory regions are equal. *) -proc _memcmp(); -begin - v8 := 0; - -.memcmp_loop: - if v80 <> 0 then - v0 := _load_byte(v88); - v4 := _load_byte(v84); - v8 := v0 + -v4; - - v88 := v88 + 1; - v84 := v84 + 1; - v80 := v80 + -1; - - if v8 = 0 then - goto .memcmp_loop; - end; - end; - - return v8 -end; - -(* Copies memory. *) - -(* Parameters: *) -(* a0 - Destination. *) -(* a1 - Source. *) -(* a2 - Size. *) - -(* Preserves a0. *) -proc _memcpy(); -begin -.memcpy_loop: - if v80 <> 0 then - v0 := _load_byte(v84); - _store_byte(v0, v88); - - v88 := v88 + 1; - v84 := v84 + 1; - v80 := v80 + -1; - goto .memcpy_loop; - end; - - return v88 -end; - -(* Advances the token stream by a0 bytes. *) -proc _advance_token(); -begin - _store_word(source_code_position + v88, @source_code_position); -end; - -(* Prints the current token. *) - -(* Parameters: *) -(* a0 - Token length. *) - -(* Returns a0 unchanged. *) -proc _write_token(); -begin - _write_s(source_code_position, v88); - return v88 -end; - -(* Prints and skips a line. *) -proc _compile_line(); -begin -.compile_line_loop: - v0 := _load_byte(source_code_position); - - if v0 <> '\n' then - (* Print a character. *) - _write_c(v0); - - (* Advance the input string by one byte. *) - _advance_token(1); - - goto .compile_line_loop; - end; - -.compile_line_end: - _write_c('\n'); - _advance_token(1); -end; - -proc _compile_integer_literal(); -begin - _write_z("\tli t0, \0"); - - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - - _write_c('\n'); -end; - -proc _compile_character_literal(); -begin - _write_z("\tli t0, \0"); - - _write_c('\''); - _advance_token(1); - - v0 := _load_byte(source_code_position); - if v0 = '\\' then - _write_c('\\'); - _advance_token(1); - end; - - v0 := _load_byte(source_code_position); - _write_c(v0); - - _write_c('\''); - _write_c('\n'); - - _advance_token(2); -end; - -proc _compile_variable_expression(); -begin - _compile_designator(); - _write_z("\tlw t0, (t0)\n\0"); -end; - -proc _compile_address_expression(); -begin - (* Skip the "@" sign. *) - _advance_token(1); - _compile_designator(); -end; - -proc _compile_negate_expression(); -begin - (* Skip the "-" sign. *) - _advance_token(1); - _compile_term(); - - _write_z("\tneg t0, t0\n\0"); -end; - -proc _compile_not_expression(); -begin - (* Skip the "~" sign. *) - _advance_token(1); - _compile_term(); - - _write_z("\tnot t0, t0\n\0"); -end; - -proc _compile_string_literal(); -begin - v0 := _string_length(source_code_position); - v4 := _add_string(source_code_position); - - _advance_token(v0 + 2); - _write_z("\tla t0, strings\n\0"); - - _write_z("\tli t1, \0"); - _write_i(v4); - _write_c('\n'); - - _write_z("\tadd t0, t0, t1\n\0"); -end; - -proc _compile_term(); -begin - v0 := _load_byte(source_code_position); - - if v0 = '\'' then - _compile_character_literal(); - end; - if v0 = '@' then - _compile_address_expression(); - end; - if v0 = '-' then - _compile_negate_expression(); - end; - if v0 = '~' then - _compile_not_expression(); - end; - if v0 = '"' then - _compile_string_literal(); - end; - if v0 = '_' then - _compile_call(); - _write_z("\nmv t0, a0\n\0"); - end; - if _is_digit(v0) = 1 then - _compile_integer_literal(); - end; - if _is_lower(v0) = 1 then - _compile_variable_expression(); - end; -end; - -proc _compile_binary_rhs(); -begin - (* Skip the whitespace after the binary operator. *) - _advance_token(1); - _compile_term(); - - (* Load the left expression from the stack; *) - _write_z("\tlw t1, 24(sp)\n\0"); -end; - -proc _compile_expression(); -begin - _compile_term(); - v0 := _load_byte(source_code_position); - - if v0 <> ' ' then - goto .compile_expression_end; - end; - (* It is a binary expression. *) - - (* Save the value of the left expression on the stack. *) - _write_z("sw t0, 24(sp)\n\0"); - - (* Skip surrounding whitespace in front of the operator. *) - _advance_token(1); - v0 := _load_byte(source_code_position); - - if v0 = '+' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("add t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = '*' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tmul t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = '&' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tand t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = 'o' then - _advance_token(2); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("or t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = 'x' then - _advance_token(3); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("xor t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = '=' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("xor t0, t0, t1\nseqz t0, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '%' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("rem t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '/' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("div t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '<' then - _advance_token(1); - v0 := _load_byte(source_code_position); - - if v0 = '>' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '=' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0"); - - goto .compile_expression_end; - end; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("slt t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '>' then - _advance_token(1); - v0 := _load_byte(source_code_position); - if v0 = '=' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0"); - - goto .compile_expression_end; - end; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - -.compile_expression_end: -end; - -proc _compile_call(); -begin - (* Stack variables: *) - (* v0 - Procedure name length. *) - (* v4 - Procedure name pointer. *) - (* v8 - Argument count. *) - - v0 := _read_token(); - v4 := source_code_position; - v8 := 0; - - (* Skip the identifier and left paren. *) - _advance_token(v0 + 1); - v12 := _load_byte(source_code_position); - - if v12 = ')' then - goto .compile_call_finalize - end; -.compile_call_loop: - _compile_expression(); - - (* Save the argument on the stack. *) - _write_z("\tsw t0, \0"); - - (* Calculate the stack offset: 116 - (4 * argument_counter) *) - v12 := v8 * 4; - v12 := 116 + -v12; - _write_i(v12); - - _write_z("(sp)\n\0"); - - (* Add one to the argument counter. *) - v8 := v8 + 1; - - v12 := _load_byte(source_code_position); - - if v12 <> ',' then - goto .compile_call_finalize; - end; - _advance_token(2); - goto .compile_call_loop; - -.compile_call_finalize: - (* Load the argument from the stack. *) - if v8 <> 0 then - (* Decrement the argument counter. *) - v8 := v8 + -1; - - _write_z("\tlw a\0"); - _write_i(v8); - - _write_z(", \0"); - - (* Calculate the stack offset: 116 - (4 * argument_counter) *) - v12 := v8 * 4; - v12 := 116 + -v12; - _write_i(v12); - - _write_z("(sp)\n\0"); - - goto .compile_call_finalize; - end; - -.compile_call_end: - _write_z("\tcall \0"); - _write_s(v4, v0); - - (* Skip the right paren. *) - _advance_token(1); -end; - -proc _compile_goto(); -begin - _advance_token(5); - - v0 := _read_token(); - _write_z("\tj \0"); - - _write_token(v0); - _advance_token(); -end; - -proc _compile_local_designator(); -begin - (* Skip "v" in the local variable name. *) - _advance_token(1); - _write_z("\t addi t0, sp, \0"); - - (* Read local variable stack offset and save it. *) - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - _write_c('\n'); -end; - -proc _compile_global_designator(); -begin - _write_z("\tla t0, \0"); - - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - - _write_c('\n'); -end; - -proc _compile_designator(); -begin - if _load_byte(source_code_position) = 'v' then - _compile_local_designator(); - else - _compile_global_designator(); - end; -end; - -proc _compile_assignment(); -begin - _compile_designator(); - - (* Save the assignee address on the stack. *) - _write_z("\tsw t0, 60(sp)\n\0"); - - (* Skip the assignment sign (:=) with surrounding whitespaces. *) - _advance_token(4); - - (* Compile the assignment. *) - _compile_expression(); - - _write_z("\tlw t1, 60(sp)\nsw t0, (t1)\n\0"); -end; - -proc _compile_return_statement(); -begin - (* Skip "return" keyword and whitespace after it. *) - _advance_token(7); - _compile_expression(); - - _write_z("mv a0, t0\n\0"); -end; - -(* Writes a label, .Ln, where n is a unique number. *) - -(* Parameters: *) -(* a0 - Label counter. *) -proc _write_label(); -begin - _write_z(".L\0"); - _write_i(v88); -end; - -proc _compile_if(); -begin - (* Skip "if ". *) - _advance_token(3); - (* Compile condition. *) - _compile_expression(); - (* Skip " then" with newline. *) - _advance_token(6); - - (* v0 is the label after the if statement. *) - v0 := label_counter; - _store_word(label_counter + 1, @label_counter); - (* v4 is the label in front of the next elsif condition or end. *) - v4 := label_counter; - _store_word(label_counter + 1, @label_counter); - - _write_z("\tbeqz t0, \0"); - _write_label(v4); - _write_c('\n'); - - _compile_procedure_body(); - - _write_z("\tj \0"); - _write_label(v0); - _write_c('\n'); - - _write_label(v4); - _write_z(":\n\0"); - - if _memcmp(source_code_position, "end", 3) = 0 then - goto .compile_if_end; - end; - if _memcmp(source_code_position, "else", 3) = 0 then - goto .compile_if_else - end; -.compile_if_else: - (* Skip "else" and newline. *) - _advance_token(5); - _compile_procedure_body(); - -.compile_if_end: - (* Skip "end". *) - _advance_token(3); - - _write_label(v0); - _write_z(":\n\0"); -end; - -proc _compile_label_declaration(); -begin - (* Skip the dot. *) - _advance_token(1); - v0 := _read_token(); - _write_c('.'); - _write_s(source_code_position, v0); - _write_z(":\n\0"); - _advance_token(v0); -end; - -proc _compile_statement(); -begin - _skip_spaces(); - v0 := _load_byte(source_code_position); - - (* This is a call if the statement starts with an underscore. *) - if v0 = '_' then - _compile_call(); - goto .compile_statement_semicolon; - end; - if v0 = 'g' then - _compile_goto(); - goto .compile_statement_semicolon; - end; - if v0 = 'v' then - _compile_assignment(); - goto .compile_statement_semicolon; - end; - if v0 = 'i' then - _compile_if(); - goto .compile_statement_semicolon; - end; - if _memcmp(source_code_position, "return", 6) = 0 then - _compile_return_statement(); - _write_c('\n'); - - goto .compile_statement_end; - end; - if v0 = '.' then - _compile_label_declaration(); - - goto .compile_statement_semicolon; - end; - _compile_line(); - goto .compile_statement_end; - -.compile_statement_semicolon: - _advance_token(2); - _write_c('\n'); - -.compile_statement_end: -end; - -proc _compile_procedure_body(); -begin -.compile_procedure_body_loop: - _skip_empty_lines(); - _skip_spaces(); - - v0 := _memcmp(source_code_position, "end", 3) = 0; - v4 := _memcmp(source_code_position, "else", 4) = 0; - v4 := v0 or v4; - - if v4 = 0 then - _compile_statement(); - goto .compile_procedure_body_loop; - end; -end; - -(* Writes a regster name to the standard output. *) - -(* Parameters: *) -(* a0 - Register character. *) -(* a1 - Register number. *) -proc _write_register(); -begin - _write_c(v88); - v84 := v84 + '0'; - _write_c(v84); -end; - -proc _compile_procedure_prologue(); -begin - _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); - v0 := 0; - -.compile_procedure_prologue_loop: - _write_z("\tsw a\0"); - _write_i(v0); - _write_z(", \0"); - - (* Calculate the stack offset: 88 - (4 * parameter_counter) *) - v4 := v0 * 4; - v4 := 88 + -v4; - _write_i(v4); - - _write_z("(sp)\n\0"); - - v0 := v0 + 1; - if v0 <> 8 then - goto .compile_procedure_prologue_loop; - end; -end; - -proc _compile_procedure(); -begin - (* Skip "proc ". *) - _advance_token(5); - - (* Save the procedure name length. *) - v0 := _read_token(); - - (* Write .type _procedure_name, @function. *) - _write_z(".type \0"); - - _write_token(v0); - _write_z(", @function\n\0"); - - (* Write procedure label, _procedure_name: *) - _write_token(v0); - _write_z(":\n\0"); - - (* Skip the function name and trailing parens, semicolon, "begin" and newline. *) - _advance_token(v0 + 10); - - _compile_procedure_prologue(); - _compile_procedure_body(); - - (* Write the epilogue. *) - _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); - - (* Skip the "end" keyword, semicolon and newline. *) - _advance_token(5); -end; - -proc _skip_spaces(); -begin - v0 := _load_byte(source_code_position); - if v0 = '\t' then - _advance_token(1); - _skip_spaces(); - end; -end; - -(* Prints and skips a line. *) -proc _skip_comment(); -begin -.skip_comment_loop: - v0 := _load_byte(source_code_position); - - (* Check for newline character. *) - if v0 <> '\n' then - (* Advance the input string by one byte. *) - _advance_token(1); - - goto .skip_comment_loop; - end; - (* Skip the newline. *) - _advance_token(1); -end; - -(* Skip newlines and comments. *) -proc _skip_empty_lines(); -begin -.skip_empty_lines_rerun: - v0 := source_code_position; - -.skip_empty_lines_loop: - v4 := _load_byte(v0); - - if v4 = '\n' then - goto .skip_empty_lines_newline; - end; - if v4 = '\t' then - goto .skip_empty_lines_tab; - end; - if v4 <> '(' then - goto .skip_empty_lines_end; - end; - v4 := v0 + 1; - - if _load_byte(v4) = '*' then - goto .skip_empty_lines_comment - end; - goto .skip_empty_lines_end; - -.skip_empty_lines_comment: - _store_word(v0, @source_code_position); - _skip_comment(); - goto .skip_empty_lines_rerun; - -.skip_empty_lines_newline: - _store_word(v0 + 1, @source_code_position); - goto .skip_empty_lines_rerun; - -.skip_empty_lines_tab: - v0 := v0 + 1; - goto .skip_empty_lines_loop - -.skip_empty_lines_end: -end; - -proc _compile_global_initializer(); -begin - v0 := _load_byte(source_code_position); - - if v0 = '"' then - _write_z("\n\t.word strings + \0"); - v4 := _string_length(source_code_position); - - _add_string(source_code_position); - _write_i(); - - (* Skip the quoted string. *) - _advance_token(v4 + 2); - - goto .compile_global_initializer_end; - end; - if v0 = 'S' then - (* Skip "S(". *) - _advance_token(2); - - if _load_byte(source_code_position) = ')' then - goto .compile_global_initializer_closing; - end; - goto .compile_global_initializer_loop; - end; - if v0 = '@' then - (* Skip @. *) - _advance_token(1); - _write_z("\n\t.word \0"); - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - - goto .compile_global_initializer_end; - end; - if _is_digit(v0) = 1 then - _write_z("\n\t.word \0"); - v0 := _read_token(); - _write_token(v0); - _advance_token(1); - - goto .compile_global_initializer_end; - end; - -.compile_global_initializer_loop: - _compile_global_initializer(); - - if _load_byte(source_code_position) <> ')' then - (* Skip comma and whitespace after it. *) - _advance_token(2); - - goto .compile_global_initializer_loop; - end; - -.compile_global_initializer_closing: - (* Skip ")" *) - _advance_token(1); - - goto .compile_global_initializer_end; - -.compile_global_initializer_end: -end; - -proc _compile_constant_declaration(); -begin - v0 := _read_token(); - - _write_z(".type \0"); - _write_token(v0); - _write_z(", @object\n\0"); - - _write_token(v0); - _write_c(':'); - - (* Skip the constant name with assignment sign and surrounding whitespaces. *) - _advance_token(v0 + 4); - _compile_global_initializer(); - (* Skip semicolon and newline. *) - _advance_token(2); - _write_c('\n'); -end; - -proc _compile_const_part(); -begin - _skip_empty_lines(); - - if _memcmp(source_code_position, "const\0", 5) <> 0 then - goto .compile_const_part_end; - end; - (* Skip "const" with the newline after it. *) - _advance_token(6); - _write_z(".section .rodata # Compiled from const section.\n\n\0"); - -.compile_const_part_loop: - _skip_empty_lines(); - - (* If the character at the line beginning is not indentation, *) - (* it is probably the next code section. *) - if _load_byte(source_code_position) = '\t' then - _advance_token(1); - - _compile_constant_declaration(); - goto .compile_const_part_loop; - end; - -.compile_const_part_end: -end; - -proc _compile_variable_declaration(); -begin - v0 := _read_token(); - - _write_z(".type \0"); - _write_token(v0); - _write_z(", @object\n\0"); - - _write_token(v0); - _write_c(':'); - - (* Skip the variable name and colon with space before the type. *) - _advance_token(v0 + 2); - - (* Skip the type name. *) - v4 := _read_token(); - _advance_token(v4); - - if _load_byte(source_code_position) <> ' ' then - (* Else we assume this is a zeroed 81920 bytes big array. *) - _write_z(" .zero 81920\0"); - else - (* Skip the assignment sign with surrounding whitespaces. *) - _advance_token(4); - _compile_global_initializer(); - end; - - (* Skip semicolon and newline. *) - _advance_token(2); - _write_c('\n'); -end; - -proc _compile_var_part(); -begin - if _memcmp(source_code_position, "var\0", 3) <> 0 then - goto .compile_var_part_end; - end; - (* Skip "var" and newline. *) - _advance_token(4); - _write_z(".section .data\n\0"); - -.compile_var_part_loop: - _skip_empty_lines(); - v0 := _load_byte(source_code_position); - - if v0 = '\t' then - _advance_token(1); - _compile_variable_declaration(); - goto .compile_var_part_loop; - end; - -.compile_var_part_end: -end; - -(* Process the source code and print the generated code. *) -proc _compile_module(); -begin - _compile_const_part(); - _skip_empty_lines(); - _compile_var_part(); - - _write_z(".section .text\n\n\0"); - _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); - _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); - _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); - _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); - _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); - -.compile_module_loop: - _skip_empty_lines(); - - if _load_byte(source_code_position) <> 0 then - (* 5 is "proc " length. Space is needed to distinguish from "procedure". *) - if _memcmp(source_code_position, "proc ", 5) = 0 then - _compile_procedure(); - goto .compile_module_loop; - end; - end; -.compile_module_end: -end; - -proc _compile(); -begin - _write_z(".globl _start\n\n\0"); - _compile_module(); - - _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); - _write_c('"'); - - v0 := @compiler_strings; - v4 := compiler_strings_position; - -.compile_loop: - if v0 < v4 then - v8 := _load_byte(v0); - v0 := v0 + 1; - _write_c(v8); - - goto .compile_loop; - end; - _write_c('"'); - _write_c('\n'); -end; - -(* Terminates the program. a0 contains the return code. *) - -(* Parameters: *) -(* a0 - Status code. *) -proc _exit(); -begin - _syscall(0, 0, 0, 0, 0, 0, 93); -end; - -(* Inserts a symbol into the table. *) - -(* Parameters: *) -(* a0 - Symbol pointer. *) -(* a1 - Symbol name length. *) -(* a2 - Symbol name pointer. *) -(* a3 - Symbol table. *) -proc _symbol_table_enter(); -begin - (* The first word in the symbol table is its length, get it. *) - v0 := _load_word(v76); - - (* Calculate the offset for the new symbol. *) - v4 := v0 * 4; - v4 := v4 + 4; - v4 := v76 + 4; - - _memcpy(v4, @v80, 12); - - (* Increment the symbol table length. *) - v0 := v0 + 1; - _store_word(v0, v76); -end; - -proc _symbol_table_build(); -begin - _symbol_table_enter(@symbol_type_info_int, 3, symbol_builtin_name_int, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_word, 4, symbol_builtin_name_word, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_pointer, 7, symbol_builtin_name_pointer, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_char, 4, symbol_builtin_name_char, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_bool, 4, symbol_builtin_name_bool, @symbol_table_global); -end; - - -(* Classification table assigns each possible character to a group (class). All *) -(* characters of the same group a handled equivalently. *) - -(* Classification: *) - -(* TransitionClass = ( *) -(* transitionClassInvalid = 1, *) -(* transitionClassDigit = 2, *) -(* transitionClassAlpha = 3, *) -(* transitionClassSpace = 4, *) -(* transitionClassColon = 5, *) -(* transitionClassEquals = 6, *) -(* transitionClassLeftParen = 7, *) -(* transitionClassRightParen = 8, *) -(* transitionClassAsterisk = 9, *) -(* transitionClassUnderscore = 10, *) -(* transitionClassSingle = 11, *) -(* transitionClassHex = 12, *) -(* transitionClassZero = 13, *) -(* transitionClassX = 14, *) -(* transitionClassEof = 15, *) -(* transitionClassDot = 16, *) -(* transitionClassMinus = 17, *) -(* transitionClassSingleQuote = 18, *) -(* transitionClassDoubleQuote = 19, *) -(* transitionClassGreater = 20, *) -(* transitionClassLess = 21, *) -(* transitionClassOther = 22 *) -(* ); *) -(* TransitionState = ( *) -(* transitionStateStart = 1, *) -(* transitionStateColon = 2, *) -(* transitionStateIdentifier = 3, *) -(* transitionStateDecimal = 4, *) -(* transitionStateGreater = 5, *) -(* transitionStateMinus = 6, *) -(* transitionStateLeftParen = 7, *) -(* transitionStateLess = 8, *) -(* transitionStateDot = 9, *) -(* transitionStateComment = 10, *) -(* transitionStateClosingComment = 11, *) -(* transitionStateCharacter = 12, *) -(* transitionStateString = 13, *) -(* transitionStateLeadingZero = 14, *) -(* transitionStateDecimalSuffix = 15, *) -(* transitionStateEnd = 16 *) -(* ); *) -(* Transition = record *) -(* action: TransitionAction; *) -(* next_state: TransitionState *) -(* end; *) -(* TransitionAction = ( *) -(* none = 1, *) -(* accumulate = 2, *) -(* skip = 3, *) -(* single = 4, *) -(* eof = 5, *) -(* finalize = 6, *) -(* composite = 7, *) -(* key_id = 8, *) -(* integer = 9, *) -(* delimited = 10 *) -(* ); *) - -(* Assigns some value to at array index. *) - -(* Parameters: *) -(* a0 - Array pointer. *) -(* a1 - Index (word offset into the array). *) -(* a2 - Data to assign. *) -proc _assign_at(); -begin - v0 := v84 + -1; - v0 := v0 * 4; - v0 := v88 + v0; - - _store_word(v80, v0); -end; - -proc _create_classification(); -begin - _assign_at(@classification, 1, 15); - _assign_at(@classification, 2, 1); - _assign_at(@classification, 3, 1); - _assign_at(@classification, 4, 1); - _assign_at(@classification, 5, 1); - _assign_at(@classification, 6, 1); - _assign_at(@classification, 7, 1); - _assign_at(@classification, 8, 1); - _assign_at(@classification, 9, 1); - _assign_at(@classification, 10, 4); - _assign_at(@classification, 11, 4); - _assign_at(@classification, 12, 1); - _assign_at(@classification, 13, 1); - _assign_at(@classification, 14, 4); - _assign_at(@classification, 15, 1); - _assign_at(@classification, 16, 1); - _assign_at(@classification, 17, 1); - _assign_at(@classification, 18, 1); - _assign_at(@classification, 19, 1); - _assign_at(@classification, 20, 1); - _assign_at(@classification, 21, 1); - _assign_at(@classification, 22, 1); - _assign_at(@classification, 23, 1); - _assign_at(@classification, 24, 1); - _assign_at(@classification, 25, 1); - _assign_at(@classification, 26, 1); - _assign_at(@classification, 27, 1); - _assign_at(@classification, 28, 1); - _assign_at(@classification, 29, 1); - _assign_at(@classification, 30, 1); - _assign_at(@classification, 31, 1); - _assign_at(@classification, 32, 1); - _assign_at(@classification, 33, 4); - _assign_at(@classification, 34, 11); - _assign_at(@classification, 35, 19); - _assign_at(@classification, 36, 22); - _assign_at(@classification, 37, 22); - _assign_at(@classification, 38, 11); - _assign_at(@classification, 39, 11); - _assign_at(@classification, 40, 18); - _assign_at(@classification, 41, 7); - _assign_at(@classification, 42, 8); - _assign_at(@classification, 43, 9); - _assign_at(@classification, 44, 11); - _assign_at(@classification, 45, 11); - _assign_at(@classification, 46, 17); - _assign_at(@classification, 47, 16); - _assign_at(@classification, 48, 11); - _assign_at(@classification, 49, 13); - _assign_at(@classification, 50, 2); - _assign_at(@classification, 51, 2); - _assign_at(@classification, 52, 2); - _assign_at(@classification, 53, 2); - _assign_at(@classification, 54, 2); - _assign_at(@classification, 55, 2); - _assign_at(@classification, 56, 2); - _assign_at(@classification, 57, 2); - _assign_at(@classification, 58, 2); - _assign_at(@classification, 59, 5); - _assign_at(@classification, 60, 11); - _assign_at(@classification, 61, 21); - _assign_at(@classification, 62, 6); - _assign_at(@classification, 63, 20); - _assign_at(@classification, 64, 22); - _assign_at(@classification, 65, 11); - _assign_at(@classification, 66, 3); - _assign_at(@classification, 67, 3); - _assign_at(@classification, 68, 3); - _assign_at(@classification, 69, 3); - _assign_at(@classification, 70, 3); - _assign_at(@classification, 71, 3); - _assign_at(@classification, 72, 3); - _assign_at(@classification, 73, 3); - _assign_at(@classification, 74, 3); - _assign_at(@classification, 75, 3); - _assign_at(@classification, 76, 3); - _assign_at(@classification, 77, 3); - _assign_at(@classification, 78, 3); - _assign_at(@classification, 79, 3); - _assign_at(@classification, 80, 3); - _assign_at(@classification, 81, 3); - _assign_at(@classification, 82, 3); - _assign_at(@classification, 83, 3); - _assign_at(@classification, 84, 3); - _assign_at(@classification, 85, 3); - _assign_at(@classification, 86, 3); - _assign_at(@classification, 87, 3); - _assign_at(@classification, 88, 3); - _assign_at(@classification, 89, 3); - _assign_at(@classification, 90, 3); - _assign_at(@classification, 91, 3); - _assign_at(@classification, 92, 11); - _assign_at(@classification, 93, 22); - _assign_at(@classification, 94, 11); - _assign_at(@classification, 95, 11); - _assign_at(@classification, 96, 10); - _assign_at(@classification, 97, 22); - _assign_at(@classification, 98, 12); - _assign_at(@classification, 99, 12); - _assign_at(@classification, 100, 12); - _assign_at(@classification, 101, 12); - _assign_at(@classification, 102, 12); - _assign_at(@classification, 103, 12); - _assign_at(@classification, 104, 3); - _assign_at(@classification, 105, 3); - _assign_at(@classification, 106, 3); - _assign_at(@classification, 107, 3); - _assign_at(@classification, 108, 3); - _assign_at(@classification, 109, 3); - _assign_at(@classification, 110, 3); - _assign_at(@classification, 111, 3); - _assign_at(@classification, 112, 3); - _assign_at(@classification, 113, 3); - _assign_at(@classification, 114, 3); - _assign_at(@classification, 115, 3); - _assign_at(@classification, 116, 3); - _assign_at(@classification, 117, 3); - _assign_at(@classification, 118, 3); - _assign_at(@classification, 119, 3); - _assign_at(@classification, 120, 3); - _assign_at(@classification, 121, 14); - _assign_at(@classification, 122, 3); - _assign_at(@classification, 123, 3); - _assign_at(@classification, 124, 22); - _assign_at(@classification, 125, 11); - _assign_at(@classification, 126, 22); - _assign_at(@classification, 127, 11); - _assign_at(@classification, 128, 1); - - v0 := 129; - -(* Set the remaining 129 - 256 bytes to transitionClassOther. *) -.create_classification_loop: - _assign_at(@classification, v0, 22); - v0 := v0 + 1; - - if v0 < 257 then - goto .create_classification_loop; - end; -end; - -(* Parameters: *) -(* a0 - Current state (first index into transitions table). *) -(* a1 - Transition (second index into transitions table).. *) -(* a2 - Action to assign. *) -(* a3 - Next state to assign. *) -proc _set_transition(); -begin - (* Transitions start at offset in classification array. Save the transitions start in v0. *) - v0 := @classification + 256 - - (* Each state is 8 bytes long (2 words: action and next state). *) - (* There are 16 transition classes, so a transition 8 * 16 = 128 bytes long. *) - - v4 := v88 + -1; - v4 := v4 * 128; - - v8 := v84 + -1; - v8 := v8 * 8; - - v12 := v0 + v4; - v12 := v12 + v8; - - _store_word(v80, v12); - v12 := v12 + 4; - _store_word(v76, v12); -end; - -(* Parameters: *) -(* a0 - Current state (Transition state enumeration). *) -(* a1 - Default action (Callback). *) -(* a2 - Next state (Transition state enumeration). *) -proc _set_default_transition(); -begin - _set_transition(v88, 1, v84, v80); - _set_transition(v88, 2, v84, v80); - _set_transition(v88, 3, v84, v80); - _set_transition(v88, 4, v84, v80); - _set_transition(v88, 5, v84, v80); - _set_transition(v88, 6, v84, v80); - _set_transition(v88, 7, v84, v80); - _set_transition(v88, 8, v84, v80); - _set_transition(v88, 9, v84, v80); - _set_transition(v88, 10, v84, v80); - _set_transition(v88, 11, v84, v80); - _set_transition(v88, 12, v84, v80); - _set_transition(v88, 13, v84, v80); - _set_transition(v88, 14, v84, v80); - _set_transition(v88, 15, v84, v80); - _set_transition(v88, 16, v84, v80); - _set_transition(v88, 17, v84, v80); - _set_transition(v88, 18, v84, v80); - _set_transition(v88, 19, v84, v80); - _set_transition(v88, 20, v84, v80); - _set_transition(v88, 21, v84, v80); - _set_transition(v88, 22, v84, v80); -end; - - -(* The transition table describes transitions from one state to another, given *) -(* a symbol (character class). *) - -(* The table has m rows and n columns, where m is the amount of states and n is *) -(* the amount of classes. So given the current state and a classified character *) -(* the table can be used to look up the next state. *) - -(* Each cell is a word long. *) -(* - The least significant byte of the word is a row number (beginning with 0). *) -(* It specifies the target state. "ff" means that this is an end state and no *) -(* transition is possible. *) -(* - The next byte is the action that should be performed when transitioning. *) -(* For the meaning of actions see labels in the lex_next function, which *) -(* handles each action. *) -proc _create_transitions(); -begin - (* Start state. *) - _set_transition(1, 1, 1, 16); - _set_transition(1, 2, 2, 4); - _set_transition(1, 3, 2, 3); - _set_transition(1, 4, 3, 1); - _set_transition(1, 5, 2, 5); - _set_transition(1, 6, 4, 16); - _set_transition(1, 7, 2, 7); - _set_transition(1, 8, 4, 16); - _set_transition(1, 9, 4, 16); - _set_transition(1, 10, 2, 3); - _set_transition(1, 11, 4, 16); - _set_transition(1, 12, 2, 3); - _set_transition(1, 13, 2, 14); - _set_transition(1, 14, 2, 3); - _set_transition(1, 15, 5, 16); - _set_transition(1, 16, 2, 9); - _set_transition(1, 17, 2, 6); - _set_transition(1, 18, 2, 12); - _set_transition(1, 19, 2, 13); - _set_transition(1, 20, 2, 5); - _set_transition(1, 21, 2, 8); - _set_transition(1, 22, 1, 16); - - (* Colon state. *) - _set_default_transition(2, 6, 16); - _set_transition(2, 6, 7, 16); - - (* Identifier state. *) - _set_default_transition(3, 8, 16); - _set_transition(3, 2, 2, 3); - _set_transition(3, 3, 2, 3); - _set_transition(3, 10, 2, 3); - _set_transition(3, 12, 2, 3); - _set_transition(3, 13, 2, 3); - _set_transition(3, 14, 2, 3); - - (* Decimal state. *) - _set_default_transition(4, 9, 16); - _set_transition(4, 2, 2, 4); - _set_transition(4, 3, 2, 15); - _set_transition(4, 10, 1, 16); - _set_transition(4, 12, 2, 15); - _set_transition(4, 13, 2, 4); - _set_transition(4, 14, 2, 15); - - (* Greater state. *) - _set_default_transition(5, 6, 16); - _set_transition(5, 6, 7, 16); - - (* Minus state. *) - _set_default_transition(6, 6, 16); - _set_transition(6, 20, 7, 16); - - (* Left paren state. *) - _set_default_transition(7, 6, 16); - _set_transition(7, 9, 2, 10); - - (* Less state. *) - _set_default_transition(8, 6, 16); - _set_transition(8, 6, 7, 16); - _set_transition(8, 20, 7, 16); - - (* Hexadecimal after 0x. *) - _set_default_transition(9, 6, 16); - _set_transition(9, 16, 7, 16); - - (* Comment. *) - _set_default_transition(10, 2, 10); - _set_transition(10, 9, 2, 11); - _set_transition(10, 15, 1, 16); - - (* Closing comment. *) - _set_default_transition(11, 2, 10); - _set_transition(11, 1, 1, 16); - _set_transition(11, 8, 10, 16); - _set_transition(11, 9, 2, 11); - _set_transition(11, 15, 1, 16); - - (* Character. *) - _set_default_transition(12, 2, 12); - _set_transition(12, 1, 1, 16); - _set_transition(12, 15, 1, 16); - _set_transition(12, 18, 10, 16); - - (* String. *) - _set_default_transition(13, 2, 13); - _set_transition(13, 1, 1, 16); - _set_transition(13, 15, 1, 16); - _set_transition(13, 19, 10, 16); - - (* Leading zero. *) - _set_default_transition(14, 9, 16); - _set_transition(14, 2, 1, 16); - _set_transition(14, 3, 1, 16); - _set_transition(14, 10, 1, 16); - _set_transition(14, 12, 1, 16); - _set_transition(14, 13, 1, 16); - _set_transition(14, 14, 1, 16); - - (* Digit with a character suffix. *) - _set_default_transition(15, 9, 16); - _set_transition(15, 3, 1, 16); - _set_transition(15, 2, 1, 16); - _set_transition(15, 12, 1, 16); - _set_transition(15, 13, 1, 16); - _set_transition(15, 14, 1, 16); -end; - -proc _lexer_get_state(); -begin - (* Lexer state is saved after the transition tables. The offset is 256 + 16 * 22. *) - v0 := @classification; - v4 := 16 * 22; - v0 := v0 + 256; - - return v0 + v4 -end; - -(* Gets pointer to the current source text. *) -proc _lexer_get_current(); -begin - return _lexer_get_state() + 4; -end; - -(* Resets the lexer state for reading the next token. *) -proc _lexer_reset(); -begin - (* Transition start state is 1. *) - v0 := _lexer_get_state(); - _store_word(1, v4); - - (* Text pointer to the beginning of the currently read token. *) - v4 := _lexer_get_current(); - _store_word(source_code_position, v4); - - (* Initial length of the token is 0. *) - _store_word(0, source_code_position + 4); -end; - -(* One time lexer initialization. *) -proc _lexer_initialize(); -begin - _create_classification(); - _create_transitions(); -end; - -(* Entry point. *) -proc _start(); -begin - _lexer_initialize(); - _symbol_table_build(); - - (* Read the source from the standard input. *) - (* Second argument is buffer size. Modifying update the source_code definition. *) - _read_file(@source_code, 81920); - _compile(); - - _exit(0); -end; diff --git a/boot/stage10/cl.elna b/boot/stage10/cl.elna new file mode 100644 index 0000000..cc99d9e --- /dev/null +++ b/boot/stage10/cl.elna @@ -0,0 +1,1754 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, *) +(* v. 2.0. If a copy of the MPL was not distributed with this file, You can *) +(* obtain one at https://mozilla.org/MPL/2.0/. *) + +(* Stage 10 compiler. *) + +(* - Integer division (/). *) +(* - Remainder operation (%). *) +(* - Label declaration statement. A label starts with a dot and *) +(* the statement ends with a semicolon like any other statement. *) +const + symbol_builtin_name_int := "Int"; + symbol_builtin_name_word := "Word"; + symbol_builtin_name_pointer := "Pointer"; + symbol_builtin_name_char := "Char"; + symbol_builtin_name_bool := "Bool"; + + (* Every type info starts with a word describing what type it is. *) + + (* PRIMITIVE_TYPE = 1 *) + + (* Primitive types have only type size. *) + symbol_builtin_type_int := S(1, 4); + symbol_builtin_type_word := S(1, 4); + symbol_builtin_type_pointer := S(1, 4); + symbol_builtin_type_char := S(1, 1); + symbol_builtin_type_bool := S(1, 1); + + (* Info objects start with a word describing its type. *) + + (* INFO_TYPE = 1 *) + + (* Type info has the type it belongs to. *) + symbol_type_info_int := S(1, @symbol_builtin_type_int); + symbol_type_info_word := S(1, @symbol_builtin_type_word); + symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); + symbol_type_info_char := S(1, @symbol_builtin_type_char); + symbol_type_info_bool := S(1, @symbol_builtin_type_bool); + +var + source_code: Array; + compiler_strings: Array; + symbol_table_global: Array; + symbol_table_local: Array; + classification: Array; + + compiler_strings_position: Pointer := @compiler_strings; + compiler_strings_length: Word := 0; + label_counter: Word := 0; + source_code_position: Pointer := @source_code; + +(* Calculates and returns the string token length between quotes, including the *) +(* escaping slash characters. *) + +(* Parameters: *) +(* a0 - String token pointer. *) + +(* Returns the length in a0. *) +proc _string_length(); +begin + (* Reset the counter. *) + v0 := 0; + +.string_length_loop: + v88 := v88 + 1; + + if _load_byte(v88) <> '"' then + v0 := v0 + 1; + goto .string_length_loop; + end; + + return v0 +end; + +(* Adds a string to the global, read-only string storage. *) + +(* Parameters: *) +(* a0 - String token. *) + +(* Returns the offset from the beginning of the storage to the new string in a0. *) +proc _add_string(); +begin + v0 := v88 + 1; + v4 := compiler_strings_length; + +.add_string_loop: + if _load_byte(v0) <> '"' then + v8 := _load_byte(v0); + _store_byte(v8, compiler_strings_position); + _store_word(compiler_strings_position + 1, @compiler_strings_position); + v0 := v0 + 1; + + if v8 <> '\\' then + _store_word(compiler_strings_length + 1, @compiler_strings_length); + end; + goto .add_string_loop; + end; + + return v4 +end; + +(* Reads standard input into a buffer. *) +(* a0 - Buffer pointer. *) +(* a1 - Buffer size. *) + +(* Returns the amount of bytes written in a0. *) +proc _read_file(); +begin + _syscall(0, v88, v84, 0, 0, 0, 63); +end; + +(* Writes to the standard output. *) + +(* Parameters: *) +(* a0 - Buffer. *) +(* a1 - Buffer length. *) +proc _write_s(); +begin + _syscall(1, v88, v84, 0, 0, 0, 64); +end; + +(* Writes a number to a string buffer. *) + +(* t0 - Local buffer. *) +(* t1 - Constant 10. *) +(* t2 - Current character. *) +(* t3 - Whether the number is negative. *) + +(* Parameters: *) +(* a0 - Whole number. *) +(* a1 - Buffer pointer. *) + +(* Sets a0 to the length of the written number. *) +proc _print_i(); +begin + li t1, 10 + addi t0, s0, -9 + + li t3, 0 + bgez a0, .print_i_digit10 + li t3, 1 + neg a0, a0 + +.print_i_digit10: + rem t2, a0, t1 + addi t2, t2, '0' + sb t2, 0(t0) + div a0, a0, t1 + addi t0, t0, -1 + bne zero, a0, .print_i_digit10 + + beq zero, t3, .print_i_write_call + addi t2, zero, '-' + sb t2, 0(t0) + addi t0, t0, -1 + +.print_i_write_call: + mv a0, a1 + addi a1, t0, 1 + sub a2, s0, t0 + addi a2, a2, -9 + sw a2, 0(sp) + + _memcpy(); + + return v0 +end; + +(* Writes a number to the standard output. *) + +(* Parameters: *) +(* a0 - Whole number. *) +proc _write_i(); +begin + v4 := _print_i(v88, @v0); + _write_s(@v0, v4); +end; + +(* Writes a character from a0 into the standard output. *) +proc _write_c(); +begin + _write_s(@v88, 1); +end; + +(* Write null terminated string. *) + +(* Parameters: *) +(* a0 - String. *) +proc _write_z(); +begin + (* Check for 0 character. *) + v0 := _load_byte(v88); + + if v0 <> 0 then + (* Print a character. *) + _write_c(v0); + + (* Advance the input string by one byte. *) + _write_z(v88 + 1); + end; +end; + +(* Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. *) +proc _is_upper(); +begin + v0 := v88 >= 'A'; + v4 := v88 <= 'Z'; + + return v0 & v4 + +end; + +(* Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. *) +proc _is_lower(); +begin + v0 := v88 >= 'a'; + v4 := v88 <= 'z'; + + return v0 & v4 + +end; + +(* Detects if the passed character is a 7-bit alpha character or an underscore. *) + +(* Paramters: *) +(* a0 - Tested character. *) + +(* Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. *) +proc _is_alpha(); +begin + v0 := _is_upper(v88); + v4 := _is_lower(v88); + v8 := v88 = '_'; + + v12 := v0 or v4; + return v12 or v8 +end; + +(* Detects whether the passed character is a digit *) +(* (a value between 0 and 9). *) + +(* Parameters: *) +(* a0 - Exemined value. *) + +(* Sets a0 to 1 if it is a digit, to 0 otherwise. *) +proc _is_digit(); +begin + v0 := v88 >= '0'; + v4 := v88 <= '9'; + + return v0 & v4 +end; + +proc _is_alnum(); +begin + v0 := _is_alpha(v88); + v4 := _is_digit(v88); + + return v0 or v4 +end; + +(* Reads the next token. *) + +(* Returns token length in a0. *) +proc _read_token(); +begin + (* Current token position. *) + v0 := source_code_position; + (* Token length. *) + v4 := 0; + +.read_token_loop: + (* Current character. *) + v8 := _load_byte(v0); + + (* First we try to read a derictive. *) + (* A derictive can contain a dot and characters. *) + v12 := v8 = '.'; + v16 := _is_alnum(v8); + + if v12 or v16 then + (* Advance the source code position and token length. *) + v4 := v4 + 1; + v0 := v0 + 1; + + goto .read_token_loop; + end; + + return v4 +end; + +(* a0 - First pointer. *) +(* a1 - Second pointer. *) +(* a2 - The length to compare. *) + +(* Returns 0 in a0 if memory regions are equal. *) +proc _memcmp(); +begin + v8 := 0; + +.memcmp_loop: + if v80 <> 0 then + v0 := _load_byte(v88); + v4 := _load_byte(v84); + v8 := v0 + -v4; + + v88 := v88 + 1; + v84 := v84 + 1; + v80 := v80 + -1; + + if v8 = 0 then + goto .memcmp_loop; + end; + end; + + return v8 +end; + +(* Copies memory. *) + +(* Parameters: *) +(* a0 - Destination. *) +(* a1 - Source. *) +(* a2 - Size. *) + +(* Preserves a0. *) +proc _memcpy(); +begin +.memcpy_loop: + if v80 <> 0 then + v0 := _load_byte(v84); + _store_byte(v0, v88); + + v88 := v88 + 1; + v84 := v84 + 1; + v80 := v80 + -1; + goto .memcpy_loop; + end; + + return v88 +end; + +(* Advances the token stream by a0 bytes. *) +proc _advance_token(); +begin + _store_word(source_code_position + v88, @source_code_position); +end; + +(* Prints the current token. *) + +(* Parameters: *) +(* a0 - Token length. *) + +(* Returns a0 unchanged. *) +proc _write_token(); +begin + _write_s(source_code_position, v88); + return v88 +end; + +(* Prints and skips a line. *) +proc _compile_line(); +begin +.compile_line_loop: + v0 := _load_byte(source_code_position); + + if v0 <> '\n' then + (* Print a character. *) + _write_c(v0); + + (* Advance the input string by one byte. *) + _advance_token(1); + + goto .compile_line_loop; + end; + +.compile_line_end: + _write_c('\n'); + _advance_token(1); +end; + +proc _compile_integer_literal(); +begin + _write_z("\tli t0, \0"); + + v0 := _read_token(); + _write_token(v0); + _advance_token(v0); + + _write_c('\n'); +end; + +proc _compile_character_literal(); +begin + _write_z("\tli t0, \0"); + + _write_c('\''); + _advance_token(1); + + v0 := _load_byte(source_code_position); + if v0 = '\\' then + _write_c('\\'); + _advance_token(1); + end; + + v0 := _load_byte(source_code_position); + _write_c(v0); + + _write_c('\''); + _write_c('\n'); + + _advance_token(2); +end; + +proc _compile_variable_expression(); +begin + _compile_designator(); + _write_z("\tlw t0, (t0)\n\0"); +end; + +proc _compile_address_expression(); +begin + (* Skip the "@" sign. *) + _advance_token(1); + _compile_designator(); +end; + +proc _compile_negate_expression(); +begin + (* Skip the "-" sign. *) + _advance_token(1); + _compile_term(); + + _write_z("\tneg t0, t0\n\0"); +end; + +proc _compile_not_expression(); +begin + (* Skip the "~" sign. *) + _advance_token(1); + _compile_term(); + + _write_z("\tnot t0, t0\n\0"); +end; + +proc _compile_string_literal(); +begin + v0 := _string_length(source_code_position); + v4 := _add_string(source_code_position); + + _advance_token(v0 + 2); + _write_z("\tla t0, strings\n\0"); + + _write_z("\tli t1, \0"); + _write_i(v4); + _write_c('\n'); + + _write_z("\tadd t0, t0, t1\n\0"); +end; + +proc _compile_term(); +begin + v0 := _load_byte(source_code_position); + + if v0 = '\'' then + _compile_character_literal(); + end; + if v0 = '@' then + _compile_address_expression(); + end; + if v0 = '-' then + _compile_negate_expression(); + end; + if v0 = '~' then + _compile_not_expression(); + end; + if v0 = '"' then + _compile_string_literal(); + end; + if v0 = '_' then + _compile_call(); + _write_z("\nmv t0, a0\n\0"); + end; + if _is_digit(v0) = 1 then + _compile_integer_literal(); + end; + if _is_lower(v0) = 1 then + _compile_variable_expression(); + end; +end; + +proc _compile_binary_rhs(); +begin + (* Skip the whitespace after the binary operator. *) + _advance_token(1); + _compile_term(); + + (* Load the left expression from the stack; *) + _write_z("\tlw t1, 24(sp)\n\0"); +end; + +proc _compile_expression(); +begin + _compile_term(); + v0 := _load_byte(source_code_position); + + if v0 <> ' ' then + goto .compile_expression_end; + end; + (* It is a binary expression. *) + + (* Save the value of the left expression on the stack. *) + _write_z("sw t0, 24(sp)\n\0"); + + (* Skip surrounding whitespace in front of the operator. *) + _advance_token(1); + v0 := _load_byte(source_code_position); + + if v0 = '+' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("add t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if v0 = '*' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tmul t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if v0 = '&' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tand t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if v0 = 'o' then + _advance_token(2); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("or t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if v0 = 'x' then + _advance_token(3); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("xor t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if v0 = '=' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("xor t0, t0, t1\nseqz t0, t0\n\0"); + + goto .compile_expression_end; + end; + if v0 = '%' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("rem t0, t1, t0\n\0"); + + goto .compile_expression_end; + end; + if v0 = '/' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("div t0, t1, t0\n\0"); + + goto .compile_expression_end; + end; + if v0 = '<' then + _advance_token(1); + v0 := _load_byte(source_code_position); + + if v0 = '>' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0"); + + goto .compile_expression_end; + end; + if v0 = '=' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0"); + + goto .compile_expression_end; + end; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("slt t0, t1, t0\n\0"); + + goto .compile_expression_end; + end; + if v0 = '>' then + _advance_token(1); + v0 := _load_byte(source_code_position); + if v0 = '=' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0"); + + goto .compile_expression_end; + end; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t1, t0\n\0"); + + goto .compile_expression_end; + end; + +.compile_expression_end: +end; + +proc _compile_call(); +begin + (* Stack variables: *) + (* v0 - Procedure name length. *) + (* v4 - Procedure name pointer. *) + (* v8 - Argument count. *) + + v0 := _read_token(); + v4 := source_code_position; + v8 := 0; + + (* Skip the identifier and left paren. *) + _advance_token(v0 + 1); + v12 := _load_byte(source_code_position); + + if v12 = ')' then + goto .compile_call_finalize + end; +.compile_call_loop: + _compile_expression(); + + (* Save the argument on the stack. *) + _write_z("\tsw t0, \0"); + + (* Calculate the stack offset: 116 - (4 * argument_counter) *) + v12 := v8 * 4; + v12 := 116 + -v12; + _write_i(v12); + + _write_z("(sp)\n\0"); + + (* Add one to the argument counter. *) + v8 := v8 + 1; + + v12 := _load_byte(source_code_position); + + if v12 <> ',' then + goto .compile_call_finalize; + end; + _advance_token(2); + goto .compile_call_loop; + +.compile_call_finalize: + (* Load the argument from the stack. *) + if v8 <> 0 then + (* Decrement the argument counter. *) + v8 := v8 + -1; + + _write_z("\tlw a\0"); + _write_i(v8); + + _write_z(", \0"); + + (* Calculate the stack offset: 116 - (4 * argument_counter) *) + v12 := v8 * 4; + v12 := 116 + -v12; + _write_i(v12); + + _write_z("(sp)\n\0"); + + goto .compile_call_finalize; + end; + +.compile_call_end: + _write_z("\tcall \0"); + _write_s(v4, v0); + + (* Skip the right paren. *) + _advance_token(1); +end; + +proc _compile_goto(); +begin + _advance_token(5); + + v0 := _read_token(); + _write_z("\tj \0"); + + _write_token(v0); + _advance_token(); +end; + +proc _compile_local_designator(); +begin + (* Skip "v" in the local variable name. *) + _advance_token(1); + _write_z("\t addi t0, sp, \0"); + + (* Read local variable stack offset and save it. *) + v0 := _read_token(); + _write_token(v0); + _advance_token(v0); + _write_c('\n'); +end; + +proc _compile_global_designator(); +begin + _write_z("\tla t0, \0"); + + v0 := _read_token(); + _write_token(v0); + _advance_token(v0); + + _write_c('\n'); +end; + +proc _compile_designator(); +begin + if _load_byte(source_code_position) = 'v' then + _compile_local_designator(); + else + _compile_global_designator(); + end; +end; + +proc _compile_assignment(); +begin + _compile_designator(); + + (* Save the assignee address on the stack. *) + _write_z("\tsw t0, 60(sp)\n\0"); + + (* Skip the assignment sign (:=) with surrounding whitespaces. *) + _advance_token(4); + + (* Compile the assignment. *) + _compile_expression(); + + _write_z("\tlw t1, 60(sp)\nsw t0, (t1)\n\0"); +end; + +proc _compile_return_statement(); +begin + (* Skip "return" keyword and whitespace after it. *) + _advance_token(7); + _compile_expression(); + + _write_z("mv a0, t0\n\0"); +end; + +(* Writes a label, .Ln, where n is a unique number. *) + +(* Parameters: *) +(* a0 - Label counter. *) +proc _write_label(); +begin + _write_z(".L\0"); + _write_i(v88); +end; + +proc _compile_if(); +begin + (* Skip "if ". *) + _advance_token(3); + (* Compile condition. *) + _compile_expression(); + (* Skip " then" with newline. *) + _advance_token(6); + + (* v0 is the label after the if statement. *) + v0 := label_counter; + _store_word(label_counter + 1, @label_counter); + (* v4 is the label in front of the next elsif condition or end. *) + v4 := label_counter; + _store_word(label_counter + 1, @label_counter); + + _write_z("\tbeqz t0, \0"); + _write_label(v4); + _write_c('\n'); + + _compile_procedure_body(); + + _write_z("\tj \0"); + _write_label(v0); + _write_c('\n'); + + _write_label(v4); + _write_z(":\n\0"); + + if _memcmp(source_code_position, "end", 3) = 0 then + goto .compile_if_end; + end; + if _memcmp(source_code_position, "else", 3) = 0 then + goto .compile_if_else + end; +.compile_if_else: + (* Skip "else" and newline. *) + _advance_token(5); + _compile_procedure_body(); + +.compile_if_end: + (* Skip "end". *) + _advance_token(3); + + _write_label(v0); + _write_z(":\n\0"); +end; + +proc _compile_label_declaration(); +begin + (* Skip the dot. *) + _advance_token(1); + v0 := _read_token(); + _write_c('.'); + _write_s(source_code_position, v0); + _write_z(":\n\0"); + _advance_token(v0); +end; + +proc _compile_statement(); +begin + _skip_spaces(); + v0 := _load_byte(source_code_position); + + (* This is a call if the statement starts with an underscore. *) + if v0 = '_' then + _compile_call(); + goto .compile_statement_semicolon; + end; + if v0 = 'g' then + _compile_goto(); + goto .compile_statement_semicolon; + end; + if v0 = 'v' then + _compile_assignment(); + goto .compile_statement_semicolon; + end; + if v0 = 'i' then + _compile_if(); + goto .compile_statement_semicolon; + end; + if _memcmp(source_code_position, "return", 6) = 0 then + _compile_return_statement(); + _write_c('\n'); + + goto .compile_statement_end; + end; + if v0 = '.' then + _compile_label_declaration(); + + goto .compile_statement_semicolon; + end; + _compile_line(); + goto .compile_statement_end; + +.compile_statement_semicolon: + _advance_token(2); + _write_c('\n'); + +.compile_statement_end: +end; + +proc _compile_procedure_body(); +begin +.compile_procedure_body_loop: + _skip_empty_lines(); + _skip_spaces(); + + v0 := _memcmp(source_code_position, "end", 3) = 0; + v4 := _memcmp(source_code_position, "else", 4) = 0; + v4 := v0 or v4; + + if v4 = 0 then + _compile_statement(); + goto .compile_procedure_body_loop; + end; +end; + +(* Writes a regster name to the standard output. *) + +(* Parameters: *) +(* a0 - Register character. *) +(* a1 - Register number. *) +proc _write_register(); +begin + _write_c(v88); + v84 := v84 + '0'; + _write_c(v84); +end; + +proc _compile_procedure_prologue(); +begin + _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); + v0 := 0; + +.compile_procedure_prologue_loop: + _write_z("\tsw a\0"); + _write_i(v0); + _write_z(", \0"); + + (* Calculate the stack offset: 88 - (4 * parameter_counter) *) + v4 := v0 * 4; + v4 := 88 + -v4; + _write_i(v4); + + _write_z("(sp)\n\0"); + + v0 := v0 + 1; + if v0 <> 8 then + goto .compile_procedure_prologue_loop; + end; +end; + +proc _compile_procedure(); +begin + (* Skip "proc ". *) + _advance_token(5); + + (* Save the procedure name length. *) + v0 := _read_token(); + + (* Write .type _procedure_name, @function. *) + _write_z(".type \0"); + + _write_token(v0); + _write_z(", @function\n\0"); + + (* Write procedure label, _procedure_name: *) + _write_token(v0); + _write_z(":\n\0"); + + (* Skip the function name and trailing parens, semicolon, "begin" and newline. *) + _advance_token(v0 + 10); + + _compile_procedure_prologue(); + _compile_procedure_body(); + + (* Write the epilogue. *) + _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); + + (* Skip the "end" keyword, semicolon and newline. *) + _advance_token(5); +end; + +proc _skip_spaces(); +begin + v0 := _load_byte(source_code_position); + if v0 = '\t' then + _advance_token(1); + _skip_spaces(); + end; +end; + +(* Prints and skips a line. *) +proc _skip_comment(); +begin +.skip_comment_loop: + v0 := _load_byte(source_code_position); + + (* Check for newline character. *) + if v0 <> '\n' then + (* Advance the input string by one byte. *) + _advance_token(1); + + goto .skip_comment_loop; + end; + (* Skip the newline. *) + _advance_token(1); +end; + +(* Skip newlines and comments. *) +proc _skip_empty_lines(); +begin +.skip_empty_lines_rerun: + v0 := source_code_position; + +.skip_empty_lines_loop: + v4 := _load_byte(v0); + + if v4 = '\n' then + goto .skip_empty_lines_newline; + end; + if v4 = '\t' then + goto .skip_empty_lines_tab; + end; + if v4 <> '(' then + goto .skip_empty_lines_end; + end; + v4 := v0 + 1; + + if _load_byte(v4) = '*' then + goto .skip_empty_lines_comment + end; + goto .skip_empty_lines_end; + +.skip_empty_lines_comment: + _store_word(v0, @source_code_position); + _skip_comment(); + goto .skip_empty_lines_rerun; + +.skip_empty_lines_newline: + _store_word(v0 + 1, @source_code_position); + goto .skip_empty_lines_rerun; + +.skip_empty_lines_tab: + v0 := v0 + 1; + goto .skip_empty_lines_loop + +.skip_empty_lines_end: +end; + +proc _compile_global_initializer(); +begin + v0 := _load_byte(source_code_position); + + if v0 = '"' then + _write_z("\n\t.word strings + \0"); + v4 := _string_length(source_code_position); + + _add_string(source_code_position); + _write_i(); + + (* Skip the quoted string. *) + _advance_token(v4 + 2); + + goto .compile_global_initializer_end; + end; + if v0 = 'S' then + (* Skip "S(". *) + _advance_token(2); + + if _load_byte(source_code_position) = ')' then + goto .compile_global_initializer_closing; + end; + goto .compile_global_initializer_loop; + end; + if v0 = '@' then + (* Skip @. *) + _advance_token(1); + _write_z("\n\t.word \0"); + v0 := _read_token(); + _write_token(v0); + _advance_token(v0); + + goto .compile_global_initializer_end; + end; + if _is_digit(v0) = 1 then + _write_z("\n\t.word \0"); + v0 := _read_token(); + _write_token(v0); + _advance_token(1); + + goto .compile_global_initializer_end; + end; + +.compile_global_initializer_loop: + _compile_global_initializer(); + + if _load_byte(source_code_position) <> ')' then + (* Skip comma and whitespace after it. *) + _advance_token(2); + + goto .compile_global_initializer_loop; + end; + +.compile_global_initializer_closing: + (* Skip ")" *) + _advance_token(1); + + goto .compile_global_initializer_end; + +.compile_global_initializer_end: +end; + +proc _compile_constant_declaration(); +begin + v0 := _read_token(); + + _write_z(".type \0"); + _write_token(v0); + _write_z(", @object\n\0"); + + _write_token(v0); + _write_c(':'); + + (* Skip the constant name with assignment sign and surrounding whitespaces. *) + _advance_token(v0 + 4); + _compile_global_initializer(); + (* Skip semicolon and newline. *) + _advance_token(2); + _write_c('\n'); +end; + +proc _compile_const_part(); +begin + _skip_empty_lines(); + + if _memcmp(source_code_position, "const\0", 5) <> 0 then + goto .compile_const_part_end; + end; + (* Skip "const" with the newline after it. *) + _advance_token(6); + _write_z(".section .rodata # Compiled from const section.\n\n\0"); + +.compile_const_part_loop: + _skip_empty_lines(); + + (* If the character at the line beginning is not indentation, *) + (* it is probably the next code section. *) + if _load_byte(source_code_position) = '\t' then + _advance_token(1); + + _compile_constant_declaration(); + goto .compile_const_part_loop; + end; + +.compile_const_part_end: +end; + +proc _compile_variable_declaration(); +begin + v0 := _read_token(); + + _write_z(".type \0"); + _write_token(v0); + _write_z(", @object\n\0"); + + _write_token(v0); + _write_c(':'); + + (* Skip the variable name and colon with space before the type. *) + _advance_token(v0 + 2); + + (* Skip the type name. *) + v4 := _read_token(); + _advance_token(v4); + + if _load_byte(source_code_position) <> ' ' then + (* Else we assume this is a zeroed 81920 bytes big array. *) + _write_z(" .zero 81920\0"); + else + (* Skip the assignment sign with surrounding whitespaces. *) + _advance_token(4); + _compile_global_initializer(); + end; + + (* Skip semicolon and newline. *) + _advance_token(2); + _write_c('\n'); +end; + +proc _compile_var_part(); +begin + if _memcmp(source_code_position, "var\0", 3) <> 0 then + goto .compile_var_part_end; + end; + (* Skip "var" and newline. *) + _advance_token(4); + _write_z(".section .data\n\0"); + +.compile_var_part_loop: + _skip_empty_lines(); + v0 := _load_byte(source_code_position); + + if v0 = '\t' then + _advance_token(1); + _compile_variable_declaration(); + goto .compile_var_part_loop; + end; + +.compile_var_part_end: +end; + +(* Process the source code and print the generated code. *) +proc _compile_module(); +begin + _compile_const_part(); + _skip_empty_lines(); + _compile_var_part(); + + _write_z(".section .text\n\n\0"); + _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); + _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); + _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); + _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); + _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); + +.compile_module_loop: + _skip_empty_lines(); + + if _load_byte(source_code_position) <> 0 then + (* 5 is "proc " length. Space is needed to distinguish from "procedure". *) + if _memcmp(source_code_position, "proc ", 5) = 0 then + _compile_procedure(); + goto .compile_module_loop; + end; + end; +.compile_module_end: +end; + +proc _compile(); +begin + _write_z(".globl _start\n\n\0"); + _compile_module(); + + _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); + _write_c('"'); + + v0 := @compiler_strings; + v4 := compiler_strings_position; + +.compile_loop: + if v0 < v4 then + v8 := _load_byte(v0); + v0 := v0 + 1; + _write_c(v8); + + goto .compile_loop; + end; + _write_c('"'); + _write_c('\n'); +end; + +(* Terminates the program. a0 contains the return code. *) + +(* Parameters: *) +(* a0 - Status code. *) +proc _exit(); +begin + _syscall(0, 0, 0, 0, 0, 0, 93); +end; + +(* Inserts a symbol into the table. *) + +(* Parameters: *) +(* a0 - Symbol pointer. *) +(* a1 - Symbol name length. *) +(* a2 - Symbol name pointer. *) +(* a3 - Symbol table. *) +proc _symbol_table_enter(); +begin + (* The first word in the symbol table is its length, get it. *) + v0 := _load_word(v76); + + (* Calculate the offset for the new symbol. *) + v4 := v0 * 4; + v4 := v4 + 4; + v4 := v76 + 4; + + _memcpy(v4, @v80, 12); + + (* Increment the symbol table length. *) + v0 := v0 + 1; + _store_word(v0, v76); +end; + +proc _symbol_table_build(); +begin + _symbol_table_enter(@symbol_type_info_int, 3, symbol_builtin_name_int, @symbol_table_global); + _symbol_table_enter(@symbol_type_info_word, 4, symbol_builtin_name_word, @symbol_table_global); + _symbol_table_enter(@symbol_type_info_pointer, 7, symbol_builtin_name_pointer, @symbol_table_global); + _symbol_table_enter(@symbol_type_info_char, 4, symbol_builtin_name_char, @symbol_table_global); + _symbol_table_enter(@symbol_type_info_bool, 4, symbol_builtin_name_bool, @symbol_table_global); +end; + + +(* Classification table assigns each possible character to a group (class). All *) +(* characters of the same group a handled equivalently. *) + +(* Classification: *) + +(* TransitionClass = ( *) +(* transitionClassInvalid = 1, *) +(* transitionClassDigit = 2, *) +(* transitionClassAlpha = 3, *) +(* transitionClassSpace = 4, *) +(* transitionClassColon = 5, *) +(* transitionClassEquals = 6, *) +(* transitionClassLeftParen = 7, *) +(* transitionClassRightParen = 8, *) +(* transitionClassAsterisk = 9, *) +(* transitionClassUnderscore = 10, *) +(* transitionClassSingle = 11, *) +(* transitionClassHex = 12, *) +(* transitionClassZero = 13, *) +(* transitionClassX = 14, *) +(* transitionClassEof = 15, *) +(* transitionClassDot = 16, *) +(* transitionClassMinus = 17, *) +(* transitionClassSingleQuote = 18, *) +(* transitionClassDoubleQuote = 19, *) +(* transitionClassGreater = 20, *) +(* transitionClassLess = 21, *) +(* transitionClassOther = 22 *) +(* ); *) +(* TransitionState = ( *) +(* transitionStateStart = 1, *) +(* transitionStateColon = 2, *) +(* transitionStateIdentifier = 3, *) +(* transitionStateDecimal = 4, *) +(* transitionStateGreater = 5, *) +(* transitionStateMinus = 6, *) +(* transitionStateLeftParen = 7, *) +(* transitionStateLess = 8, *) +(* transitionStateDot = 9, *) +(* transitionStateComment = 10, *) +(* transitionStateClosingComment = 11, *) +(* transitionStateCharacter = 12, *) +(* transitionStateString = 13, *) +(* transitionStateLeadingZero = 14, *) +(* transitionStateDecimalSuffix = 15, *) +(* transitionStateEnd = 16 *) +(* ); *) +(* Transition = record *) +(* action: TransitionAction; *) +(* next_state: TransitionState *) +(* end; *) +(* TransitionAction = ( *) +(* none = 1, *) +(* accumulate = 2, *) +(* skip = 3, *) +(* single = 4, *) +(* eof = 5, *) +(* finalize = 6, *) +(* composite = 7, *) +(* key_id = 8, *) +(* integer = 9, *) +(* delimited = 10 *) +(* ); *) + +(* Assigns some value to at array index. *) + +(* Parameters: *) +(* a0 - Array pointer. *) +(* a1 - Index (word offset into the array). *) +(* a2 - Data to assign. *) +proc _assign_at(); +begin + v0 := v84 + -1; + v0 := v0 * 4; + v0 := v88 + v0; + + _store_word(v80, v0); +end; + +proc _create_classification(); +begin + _assign_at(@classification, 1, 15); + _assign_at(@classification, 2, 1); + _assign_at(@classification, 3, 1); + _assign_at(@classification, 4, 1); + _assign_at(@classification, 5, 1); + _assign_at(@classification, 6, 1); + _assign_at(@classification, 7, 1); + _assign_at(@classification, 8, 1); + _assign_at(@classification, 9, 1); + _assign_at(@classification, 10, 4); + _assign_at(@classification, 11, 4); + _assign_at(@classification, 12, 1); + _assign_at(@classification, 13, 1); + _assign_at(@classification, 14, 4); + _assign_at(@classification, 15, 1); + _assign_at(@classification, 16, 1); + _assign_at(@classification, 17, 1); + _assign_at(@classification, 18, 1); + _assign_at(@classification, 19, 1); + _assign_at(@classification, 20, 1); + _assign_at(@classification, 21, 1); + _assign_at(@classification, 22, 1); + _assign_at(@classification, 23, 1); + _assign_at(@classification, 24, 1); + _assign_at(@classification, 25, 1); + _assign_at(@classification, 26, 1); + _assign_at(@classification, 27, 1); + _assign_at(@classification, 28, 1); + _assign_at(@classification, 29, 1); + _assign_at(@classification, 30, 1); + _assign_at(@classification, 31, 1); + _assign_at(@classification, 32, 1); + _assign_at(@classification, 33, 4); + _assign_at(@classification, 34, 11); + _assign_at(@classification, 35, 19); + _assign_at(@classification, 36, 22); + _assign_at(@classification, 37, 22); + _assign_at(@classification, 38, 11); + _assign_at(@classification, 39, 11); + _assign_at(@classification, 40, 18); + _assign_at(@classification, 41, 7); + _assign_at(@classification, 42, 8); + _assign_at(@classification, 43, 9); + _assign_at(@classification, 44, 11); + _assign_at(@classification, 45, 11); + _assign_at(@classification, 46, 17); + _assign_at(@classification, 47, 16); + _assign_at(@classification, 48, 11); + _assign_at(@classification, 49, 13); + _assign_at(@classification, 50, 2); + _assign_at(@classification, 51, 2); + _assign_at(@classification, 52, 2); + _assign_at(@classification, 53, 2); + _assign_at(@classification, 54, 2); + _assign_at(@classification, 55, 2); + _assign_at(@classification, 56, 2); + _assign_at(@classification, 57, 2); + _assign_at(@classification, 58, 2); + _assign_at(@classification, 59, 5); + _assign_at(@classification, 60, 11); + _assign_at(@classification, 61, 21); + _assign_at(@classification, 62, 6); + _assign_at(@classification, 63, 20); + _assign_at(@classification, 64, 22); + _assign_at(@classification, 65, 11); + _assign_at(@classification, 66, 3); + _assign_at(@classification, 67, 3); + _assign_at(@classification, 68, 3); + _assign_at(@classification, 69, 3); + _assign_at(@classification, 70, 3); + _assign_at(@classification, 71, 3); + _assign_at(@classification, 72, 3); + _assign_at(@classification, 73, 3); + _assign_at(@classification, 74, 3); + _assign_at(@classification, 75, 3); + _assign_at(@classification, 76, 3); + _assign_at(@classification, 77, 3); + _assign_at(@classification, 78, 3); + _assign_at(@classification, 79, 3); + _assign_at(@classification, 80, 3); + _assign_at(@classification, 81, 3); + _assign_at(@classification, 82, 3); + _assign_at(@classification, 83, 3); + _assign_at(@classification, 84, 3); + _assign_at(@classification, 85, 3); + _assign_at(@classification, 86, 3); + _assign_at(@classification, 87, 3); + _assign_at(@classification, 88, 3); + _assign_at(@classification, 89, 3); + _assign_at(@classification, 90, 3); + _assign_at(@classification, 91, 3); + _assign_at(@classification, 92, 11); + _assign_at(@classification, 93, 22); + _assign_at(@classification, 94, 11); + _assign_at(@classification, 95, 11); + _assign_at(@classification, 96, 10); + _assign_at(@classification, 97, 22); + _assign_at(@classification, 98, 12); + _assign_at(@classification, 99, 12); + _assign_at(@classification, 100, 12); + _assign_at(@classification, 101, 12); + _assign_at(@classification, 102, 12); + _assign_at(@classification, 103, 12); + _assign_at(@classification, 104, 3); + _assign_at(@classification, 105, 3); + _assign_at(@classification, 106, 3); + _assign_at(@classification, 107, 3); + _assign_at(@classification, 108, 3); + _assign_at(@classification, 109, 3); + _assign_at(@classification, 110, 3); + _assign_at(@classification, 111, 3); + _assign_at(@classification, 112, 3); + _assign_at(@classification, 113, 3); + _assign_at(@classification, 114, 3); + _assign_at(@classification, 115, 3); + _assign_at(@classification, 116, 3); + _assign_at(@classification, 117, 3); + _assign_at(@classification, 118, 3); + _assign_at(@classification, 119, 3); + _assign_at(@classification, 120, 3); + _assign_at(@classification, 121, 14); + _assign_at(@classification, 122, 3); + _assign_at(@classification, 123, 3); + _assign_at(@classification, 124, 22); + _assign_at(@classification, 125, 11); + _assign_at(@classification, 126, 22); + _assign_at(@classification, 127, 11); + _assign_at(@classification, 128, 1); + + v0 := 129; + +(* Set the remaining 129 - 256 bytes to transitionClassOther. *) +.create_classification_loop: + _assign_at(@classification, v0, 22); + v0 := v0 + 1; + + if v0 < 257 then + goto .create_classification_loop; + end; +end; + +(* Parameters: *) +(* a0 - Current state (first index into transitions table). *) +(* a1 - Transition (second index into transitions table).. *) +(* a2 - Action to assign. *) +(* a3 - Next state to assign. *) +proc _set_transition(); +begin + (* Transitions start at offset in classification array. Save the transitions start in v0. *) + v0 := @classification + 256 + + (* Each state is 8 bytes long (2 words: action and next state). *) + (* There are 16 transition classes, so a transition 8 * 16 = 128 bytes long. *) + + v4 := v88 + -1; + v4 := v4 * 128; + + v8 := v84 + -1; + v8 := v8 * 8; + + v12 := v0 + v4; + v12 := v12 + v8; + + _store_word(v80, v12); + v12 := v12 + 4; + _store_word(v76, v12); +end; + +(* Parameters: *) +(* a0 - Current state (Transition state enumeration). *) +(* a1 - Default action (Callback). *) +(* a2 - Next state (Transition state enumeration). *) +proc _set_default_transition(); +begin + _set_transition(v88, 1, v84, v80); + _set_transition(v88, 2, v84, v80); + _set_transition(v88, 3, v84, v80); + _set_transition(v88, 4, v84, v80); + _set_transition(v88, 5, v84, v80); + _set_transition(v88, 6, v84, v80); + _set_transition(v88, 7, v84, v80); + _set_transition(v88, 8, v84, v80); + _set_transition(v88, 9, v84, v80); + _set_transition(v88, 10, v84, v80); + _set_transition(v88, 11, v84, v80); + _set_transition(v88, 12, v84, v80); + _set_transition(v88, 13, v84, v80); + _set_transition(v88, 14, v84, v80); + _set_transition(v88, 15, v84, v80); + _set_transition(v88, 16, v84, v80); + _set_transition(v88, 17, v84, v80); + _set_transition(v88, 18, v84, v80); + _set_transition(v88, 19, v84, v80); + _set_transition(v88, 20, v84, v80); + _set_transition(v88, 21, v84, v80); + _set_transition(v88, 22, v84, v80); +end; + + +(* The transition table describes transitions from one state to another, given *) +(* a symbol (character class). *) + +(* The table has m rows and n columns, where m is the amount of states and n is *) +(* the amount of classes. So given the current state and a classified character *) +(* the table can be used to look up the next state. *) + +(* Each cell is a word long. *) +(* - The least significant byte of the word is a row number (beginning with 0). *) +(* It specifies the target state. "ff" means that this is an end state and no *) +(* transition is possible. *) +(* - The next byte is the action that should be performed when transitioning. *) +(* For the meaning of actions see labels in the lex_next function, which *) +(* handles each action. *) +proc _create_transitions(); +begin + (* Start state. *) + _set_transition(1, 1, 1, 16); + _set_transition(1, 2, 2, 4); + _set_transition(1, 3, 2, 3); + _set_transition(1, 4, 3, 1); + _set_transition(1, 5, 2, 5); + _set_transition(1, 6, 4, 16); + _set_transition(1, 7, 2, 7); + _set_transition(1, 8, 4, 16); + _set_transition(1, 9, 4, 16); + _set_transition(1, 10, 2, 3); + _set_transition(1, 11, 4, 16); + _set_transition(1, 12, 2, 3); + _set_transition(1, 13, 2, 14); + _set_transition(1, 14, 2, 3); + _set_transition(1, 15, 5, 16); + _set_transition(1, 16, 2, 9); + _set_transition(1, 17, 2, 6); + _set_transition(1, 18, 2, 12); + _set_transition(1, 19, 2, 13); + _set_transition(1, 20, 2, 5); + _set_transition(1, 21, 2, 8); + _set_transition(1, 22, 1, 16); + + (* Colon state. *) + _set_default_transition(2, 6, 16); + _set_transition(2, 6, 7, 16); + + (* Identifier state. *) + _set_default_transition(3, 8, 16); + _set_transition(3, 2, 2, 3); + _set_transition(3, 3, 2, 3); + _set_transition(3, 10, 2, 3); + _set_transition(3, 12, 2, 3); + _set_transition(3, 13, 2, 3); + _set_transition(3, 14, 2, 3); + + (* Decimal state. *) + _set_default_transition(4, 9, 16); + _set_transition(4, 2, 2, 4); + _set_transition(4, 3, 2, 15); + _set_transition(4, 10, 1, 16); + _set_transition(4, 12, 2, 15); + _set_transition(4, 13, 2, 4); + _set_transition(4, 14, 2, 15); + + (* Greater state. *) + _set_default_transition(5, 6, 16); + _set_transition(5, 6, 7, 16); + + (* Minus state. *) + _set_default_transition(6, 6, 16); + _set_transition(6, 20, 7, 16); + + (* Left paren state. *) + _set_default_transition(7, 6, 16); + _set_transition(7, 9, 2, 10); + + (* Less state. *) + _set_default_transition(8, 6, 16); + _set_transition(8, 6, 7, 16); + _set_transition(8, 20, 7, 16); + + (* Hexadecimal after 0x. *) + _set_default_transition(9, 6, 16); + _set_transition(9, 16, 7, 16); + + (* Comment. *) + _set_default_transition(10, 2, 10); + _set_transition(10, 9, 2, 11); + _set_transition(10, 15, 1, 16); + + (* Closing comment. *) + _set_default_transition(11, 2, 10); + _set_transition(11, 1, 1, 16); + _set_transition(11, 8, 10, 16); + _set_transition(11, 9, 2, 11); + _set_transition(11, 15, 1, 16); + + (* Character. *) + _set_default_transition(12, 2, 12); + _set_transition(12, 1, 1, 16); + _set_transition(12, 15, 1, 16); + _set_transition(12, 18, 10, 16); + + (* String. *) + _set_default_transition(13, 2, 13); + _set_transition(13, 1, 1, 16); + _set_transition(13, 15, 1, 16); + _set_transition(13, 19, 10, 16); + + (* Leading zero. *) + _set_default_transition(14, 9, 16); + _set_transition(14, 2, 1, 16); + _set_transition(14, 3, 1, 16); + _set_transition(14, 10, 1, 16); + _set_transition(14, 12, 1, 16); + _set_transition(14, 13, 1, 16); + _set_transition(14, 14, 1, 16); + + (* Digit with a character suffix. *) + _set_default_transition(15, 9, 16); + _set_transition(15, 3, 1, 16); + _set_transition(15, 2, 1, 16); + _set_transition(15, 12, 1, 16); + _set_transition(15, 13, 1, 16); + _set_transition(15, 14, 1, 16); +end; + +proc _lexer_get_state(); +begin + (* Lexer state is saved after the transition tables. The offset is 256 + 16 * 22. *) + v0 := @classification; + v4 := 16 * 22; + v0 := v0 + 256; + + return v0 + v4 +end; + +(* Gets pointer to the current source text. *) +proc _lexer_get_current(); +begin + return _lexer_get_state() + 4; +end; + +(* Resets the lexer state for reading the next token. *) +proc _lexer_reset(); +begin + (* Transition start state is 1. *) + v0 := _lexer_get_state(); + _store_word(1, v4); + + (* Text pointer to the beginning of the currently read token. *) + v4 := _lexer_get_current(); + _store_word(source_code_position, v4); + + (* Initial length of the token is 0. *) + _store_word(0, source_code_position + 4); +end; + +(* One time lexer initialization. *) +proc _lexer_initialize(); +begin + _create_classification(); + _create_transitions(); +end; + +(* Entry point. *) +proc _start(); +begin + _lexer_initialize(); + _symbol_table_build(); + + (* Read the source from the standard input. *) + (* Second argument is buffer size. Modifying update the source_code definition. *) + _read_file(@source_code, 81920); + _compile(); + + _exit(0); +end; diff --git a/boot/stage11.elna b/boot/stage11.elna deleted file mode 100644 index 74447d6..0000000 --- a/boot/stage11.elna +++ /dev/null @@ -1,1738 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, *) -(* v. 2.0. If a copy of the MPL was not distributed with this file, You can *) -(* obtain one at https://mozilla.org/MPL/2.0/. *) - -(* Stage 11 compiler. *) - -(* - Removed support for inline assembly statements. *) -(* - Assignment to global variables. *) -(* - In procedure declarations skip everything between parameter parens. *) - -const - symbol_builtin_name_int := "Int"; - symbol_builtin_name_word := "Word"; - symbol_builtin_name_pointer := "Pointer"; - symbol_builtin_name_char := "Char"; - symbol_builtin_name_bool := "Bool"; - - (* Every type info starts with a word describing what type it is. *) - - (* PRIMITIVE_TYPE = 1 *) - - (* Primitive types have only type size. *) - symbol_builtin_type_int := S(1, 4); - symbol_builtin_type_word := S(1, 4); - symbol_builtin_type_pointer := S(1, 4); - symbol_builtin_type_char := S(1, 1); - symbol_builtin_type_bool := S(1, 1); - - (* Info objects start with a word describing its type. *) - - (* INFO_TYPE = 1 *) - - (* Type info has the type it belongs to. *) - symbol_type_info_int := S(1, @symbol_builtin_type_int); - symbol_type_info_word := S(1, @symbol_builtin_type_word); - symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); - symbol_type_info_char := S(1, @symbol_builtin_type_char); - symbol_type_info_bool := S(1, @symbol_builtin_type_bool); - -var - source_code: Array; - compiler_strings: Array; - symbol_table_global: Array; - symbol_table_local: Array; - classification: Array; - - compiler_strings_position: Pointer := @compiler_strings; - compiler_strings_length: Word := 0; - label_counter: Word := 0; - source_code_position: Pointer := @source_code; - -(* Calculates and returns the string token length between quotes, including the *) -(* escaping slash characters. *) - -(* Parameters: *) -(* a0 - String token pointer. *) - -(* Returns the length in a0. *) -proc _string_length(); -begin - (* Reset the counter. *) - v0 := 0; - - .string_length_loop; - v88 := v88 + 1; - - if _load_byte(v88) <> '"' then - v0 := v0 + 1; - goto .string_length_loop; - end; - - return v0 -end; - -(* Adds a string to the global, read-only string storage. *) - -(* Parameters: *) -(* a0 - String token. *) - -(* Returns the offset from the beginning of the storage to the new string in a0. *) -proc _add_string(); -begin - v0 := v88 + 1; - v4 := compiler_strings_length; - - .add_string_loop; - if _load_byte(v0) <> '"' then - v8 := _load_byte(v0); - _store_byte(v8, compiler_strings_position); - _store_word(compiler_strings_position + 1, @compiler_strings_position); - v0 := v0 + 1; - - if v8 <> '\\' then - _store_word(compiler_strings_length + 1, @compiler_strings_length); - end; - goto .add_string_loop; - end; - - return v4 -end; - -(* Reads standard input into a buffer. *) -(* a0 - Buffer pointer. *) -(* a1 - Buffer size. *) - -(* Returns the amount of bytes written in a0. *) -proc _read_file(); -begin - _syscall(0, v88, v84, 0, 0, 0, 63); -end; - -(* Writes to the standard output. *) - -(* Parameters: *) -(* a0 - Buffer. *) -(* a1 - Buffer length. *) -proc _write_s(); -begin - _syscall(1, v88, v84, 0, 0, 0, 64); -end; - -(* Writes a number to a string buffer. *) - -(* t0 - Local buffer. *) -(* t1 - Constant 10. *) -(* t2 - Current character. *) -(* t3 - Whether the number is negative. *) - -(* Parameters: *) -(* a0 - Whole number. *) -(* a1 - Buffer pointer. *) - -(* Sets a0 to the length of the written number. *) -proc _print_i(); -begin - v0 := @v23; - - if v88 >= 0 then - v4 := 0; - else - v88 = -v88; - v4 := 1; - end; - - .print_i_digit10; - v8 := v88 % 10; - _store_byte(v8 + '0', v0); - - v88 := v88 / 10; - v0 := v0 + -1; - - if v88 <> 0 then - goto .print_i_digit10; - end; - if v4 = 1 then - _store_byte('-', v0); - v0 := v0 + -1; - end; - v4 := @v23 + -v0; - _memcpy(v84, v0 + 1, v4); - - return v4 -end; - -(* Writes a number to the standard output. *) - -(* Parameters: *) -(* a0 - Whole number. *) -proc _write_i(); -begin - v4 := _print_i(v88, @v0); - _write_s(@v0, v4); -end; - -(* Writes a character from a0 into the standard output. *) -proc _write_c(); -begin - _write_s(@v88, 1); -end; - -(* Write null terminated string. *) - -(* Parameters: *) -(* a0 - String. *) -proc _write_z(); -begin - (* Check for 0 character. *) - v0 := _load_byte(v88); - - if v0 <> 0 then - (* Print a character. *) - _write_c(v0); - - (* Advance the input string by one byte. *) - _write_z(v88 + 1); - end; -end; - -(* Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. *) -proc _is_upper(); -begin - v0 := v88 >= 'A'; - v4 := v88 <= 'Z'; - - return v0 & v4 - -end; - -(* Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. *) -proc _is_lower(); -begin - v0 := v88 >= 'a'; - v4 := v88 <= 'z'; - - return v0 & v4 - -end; - -(* Detects if the passed character is a 7-bit alpha character or an underscore. *) - -(* Paramters: *) -(* a0 - Tested character. *) - -(* Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. *) -proc _is_alpha(); -begin - v0 := _is_upper(v88); - v4 := _is_lower(v88); - v8 := v88 = '_'; - - v12 := v0 or v4; - return v12 or v8 -end; - -(* Detects whether the passed character is a digit *) -(* (a value between 0 and 9). *) - -(* Parameters: *) -(* a0 - Exemined value. *) - -(* Sets a0 to 1 if it is a digit, to 0 otherwise. *) -proc _is_digit(); -begin - v0 := v88 >= '0'; - v4 := v88 <= '9'; - - return v0 & v4 -end; - -proc _is_alnum(); -begin - v0 := _is_alpha(v88); - v4 := _is_digit(v88); - - return v0 or v4 -end; - -(* Reads the next token. *) - -(* Returns token length in a0. *) -proc _read_token(); -begin - (* Current token position. *) - v0 := source_code_position; - (* Token length. *) - v4 := 0; - - .read_token_loop; - (* Current character. *) - v8 := _load_byte(v0); - - (* First we try to read a derictive. *) - (* A derictive can contain a dot and characters. *) - v12 := v8 = '.'; - v16 := _is_alnum(v8); - - if v12 or v16 then - (* Advance the source code position and token length. *) - v4 := v4 + 1; - v0 := v0 + 1; - - goto .read_token_loop; - end; - - return v4 -end; - -(* a0 - First pointer. *) -(* a1 - Second pointer. *) -(* a2 - The length to compare. *) - -(* Returns 0 in a0 if memory regions are equal. *) -proc _memcmp(); -begin - v8 := 0; - - .memcmp_loop; - if v80 <> 0 then - v0 := _load_byte(v88); - v4 := _load_byte(v84); - v8 := v0 + -v4; - - v88 := v88 + 1; - v84 := v84 + 1; - v80 := v80 + -1; - - if v8 = 0 then - goto .memcmp_loop; - end; - end; - - return v8 -end; - -(* Copies memory. *) - -(* Parameters: *) -(* a0 - Destination. *) -(* a1 - Source. *) -(* a2 - Size. *) - -(* Preserves a0. *) -proc _memcpy(); -begin - .memcpy_loop; - if v80 <> 0 then - v0 := _load_byte(v84); - _store_byte(v0, v88); - - v88 := v88 + 1; - v84 := v84 + 1; - v80 := v80 + -1; - goto .memcpy_loop; - end; - - return v88 -end; - -(* Advances the token stream by a0 bytes. *) -proc _advance_token(); -begin - _store_word(source_code_position + v88, @source_code_position); -end; - -(* Prints the current token. *) - -(* Parameters: *) -(* a0 - Token length. *) - -(* Returns a0 unchanged. *) -proc _write_token(); -begin - _write_s(source_code_position, v88); - return v88 -end; - -proc _compile_integer_literal(); -begin - _write_z("\tli t0, \0"); - - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - - _write_c('\n'); -end; - -proc _compile_character_literal(); -begin - _write_z("\tli t0, \0"); - - _write_c('\''); - _advance_token(1); - - v0 := _load_byte(source_code_position); - if v0 = '\\' then - _write_c('\\'); - _advance_token(1); - end; - - v0 := _load_byte(source_code_position); - _write_c(v0); - - _write_c('\''); - _write_c('\n'); - - _advance_token(2); -end; - -proc _compile_variable_expression(); -begin - _compile_designator(); - _write_z("\tlw t0, (t0)\n\0"); -end; - -proc _compile_address_expression(); -begin - (* Skip the "@" sign. *) - _advance_token(1); - _compile_designator(); -end; - -proc _compile_negate_expression(); -begin - (* Skip the "-" sign. *) - _advance_token(1); - _compile_term(); - - _write_z("\tneg t0, t0\n\0"); -end; - -proc _compile_not_expression(); -begin - (* Skip the "~" sign. *) - _advance_token(1); - _compile_term(); - - _write_z("\tnot t0, t0\n\0"); -end; - -proc _compile_string_literal(); -begin - v0 := _string_length(source_code_position); - v4 := _add_string(source_code_position); - - _advance_token(v0 + 2); - _write_z("\tla t0, strings\n\0"); - - _write_z("\tli t1, \0"); - _write_i(v4); - _write_c('\n'); - - _write_z("\tadd t0, t0, t1\n\0"); -end; - -proc _compile_term(); -begin - v0 := _load_byte(source_code_position); - - if v0 = '\'' then - _compile_character_literal(); - end; - if v0 = '@' then - _compile_address_expression(); - end; - if v0 = '-' then - _compile_negate_expression(); - end; - if v0 = '~' then - _compile_not_expression(); - end; - if v0 = '"' then - _compile_string_literal(); - end; - if v0 = '_' then - _compile_call(); - _write_z("\nmv t0, a0\n\0"); - end; - if _is_digit(v0) = 1 then - _compile_integer_literal(); - end; - if _is_lower(v0) = 1 then - _compile_variable_expression(); - end; -end; - -proc _compile_binary_rhs(); -begin - (* Skip the whitespace after the binary operator. *) - _advance_token(1); - _compile_term(); - - (* Load the left expression from the stack; *) - _write_z("\tlw t1, 24(sp)\n\0"); -end; - -proc _compile_expression(); -begin - _compile_term(); - v0 := _load_byte(source_code_position); - - if v0 <> ' ' then - goto .compile_expression_end; - end; - (* It is a binary expression. *) - - (* Save the value of the left expression on the stack. *) - _write_z("sw t0, 24(sp)\n\0"); - - (* Skip surrounding whitespace in front of the operator. *) - _advance_token(1); - v0 := _load_byte(source_code_position); - - if v0 = '+' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("add t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = '*' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tmul t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = '&' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tand t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = 'o' then - _advance_token(2); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("or t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = 'x' then - _advance_token(3); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("xor t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = '=' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("xor t0, t0, t1\nseqz t0, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '%' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("rem t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '/' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("div t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '<' then - _advance_token(1); - v0 := _load_byte(source_code_position); - - if v0 = '>' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '=' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0"); - - goto .compile_expression_end; - end; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("slt t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '>' then - _advance_token(1); - v0 := _load_byte(source_code_position); - if v0 = '=' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0"); - - goto .compile_expression_end; - end; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - - .compile_expression_end; -end; - -proc _compile_call(); -begin - (* Stack variables: *) - (* v0 - Procedure name length. *) - (* v4 - Procedure name pointer. *) - (* v8 - Argument count. *) - - v0 := _read_token(); - v4 := source_code_position; - v8 := 0; - - (* Skip the identifier and left paren. *) - _advance_token(v0 + 1); - v12 := _load_byte(source_code_position); - - if v12 = ')' then - goto .compile_call_finalize - end; - .compile_call_loop; - _compile_expression(); - - (* Save the argument on the stack. *) - _write_z("\tsw t0, \0"); - - (* Calculate the stack offset: 116 - (4 * argument_counter) *) - v12 := v8 * 4; - v12 := 116 + -v12; - _write_i(v12); - - _write_z("(sp)\n\0"); - - (* Add one to the argument counter. *) - v8 := v8 + 1; - - v12 := _load_byte(source_code_position); - - if v12 <> ',' then - goto .compile_call_finalize; - end; - _advance_token(2); - goto .compile_call_loop; - - .compile_call_finalize; - (* Load the argument from the stack. *) - if v8 <> 0 then - (* Decrement the argument counter. *) - v8 := v8 + -1; - - _write_z("\tlw a\0"); - _write_i(v8); - - _write_z(", \0"); - - (* Calculate the stack offset: 116 - (4 * argument_counter) *) - v12 := v8 * 4; - v12 := 116 + -v12; - _write_i(v12); - - _write_z("(sp)\n\0"); - - goto .compile_call_finalize; - end; - - .compile_call_end; - _write_z("\tcall \0"); - _write_s(v4, v0); - - (* Skip the right paren. *) - _advance_token(1); -end; - -proc _compile_goto(); -begin - _advance_token(5); - - v0 := _read_token(); - _write_z("\tj \0"); - - _write_token(v0); - _advance_token(); -end; - -proc _compile_local_designator(); -begin - (* Skip "v" in the local variable name. *) - _advance_token(1); - _write_z("\t addi t0, sp, \0"); - - (* Read local variable stack offset and save it. *) - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - _write_c('\n'); -end; - -proc _compile_global_designator(); -begin - _write_z("\tla t0, \0"); - - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - - _write_c('\n'); -end; - -proc _compile_designator(); -begin - if _load_byte(source_code_position) = 'v' then - _compile_local_designator(); - else - _compile_global_designator(); - end; -end; - -proc _compile_assignment(); -begin - _compile_designator(); - - (* Save the assignee address on the stack. *) - _write_z("\tsw t0, 60(sp)\n\0"); - - (* Skip the assignment sign (:=) with surrounding whitespaces. *) - _advance_token(4); - - (* Compile the assignment. *) - _compile_expression(); - - _write_z("\tlw t1, 60(sp)\nsw t0, (t1)\n\0"); -end; - -proc _compile_return_statement(); -begin - (* Skip "return" keyword and whitespace after it. *) - _advance_token(7); - _compile_expression(); - - _write_z("mv a0, t0\n\0"); -end; - -(* Writes a label, .Ln, where n is a unique number. *) - -(* Parameters: *) -(* a0 - Label counter. *) -proc _write_label(); -begin - _write_z(".L\0"); - _write_i(v88); -end; - -proc _compile_if(); -begin - (* Skip "if ". *) - _advance_token(3); - (* Compile condition. *) - _compile_expression(); - (* Skip " then" with newline. *) - _advance_token(6); - - (* v0 is the label after the if statement. *) - v0 := label_counter; - _store_word(label_counter + 1, @label_counter); - (* v4 is the label in front of the next elsif condition or end. *) - v4 := label_counter; - _store_word(label_counter + 1, @label_counter); - - _write_z("\tbeqz t0, \0"); - _write_label(v4); - _write_c('\n'); - - _compile_procedure_body(); - - _write_z("\tj \0"); - _write_label(v0); - _write_c('\n'); - - _write_label(v4); - _write_z(":\n\0"); - - if _memcmp(source_code_position, "end", 3) = 0 then - goto .compile_if_end; - end; - if _memcmp(source_code_position, "else", 3) = 0 then - goto .compile_if_else - end; - .compile_if_else; - (* Skip "else" and newline. *) - _advance_token(5); - _compile_procedure_body(); - - .compile_if_end; - (* Skip "end". *) - _advance_token(3); - - _write_label(v0); - _write_z(":\n\0"); -end; - -proc _compile_label_declaration(); -begin - (* Skip the dot. *) - _advance_token(1); - v0 := _read_token(); - _write_c('.'); - _write_s(source_code_position, v0); - _write_z(":\n\0"); - _advance_token(v0); -end; - -proc _compile_statement(); -begin - _skip_spaces(); - v0 := _load_byte(source_code_position); - - (* This is a call if the statement starts with an underscore. *) - if v0 = '_' then - _compile_call(); - goto .compile_statement_semicolon; - end; - if v0 = 'g' then - _compile_goto(); - goto .compile_statement_semicolon; - end; - if v0 = 'i' then - _compile_if(); - goto .compile_statement_semicolon; - end; - if v0 = 'r' then - _compile_return_statement(); - _write_c('\n'); - - goto .compile_statement_end; - end; - if v0 = '.' then - _compile_label_declaration(); - - goto .compile_statement_semicolon; - end; - _compile_assignment(); - goto .compile_statement_semicolon; - - .compile_statement_semicolon; - _advance_token(2); - _write_c('\n'); - - .compile_statement_end; -end; - -proc _compile_procedure_body(); -begin - .compile_procedure_body_loop; - _skip_empty_lines(); - _skip_spaces(); - - v0 := _memcmp(source_code_position, "end", 3) = 0; - v4 := _memcmp(source_code_position, "else", 4) = 0; - v4 := v0 or v4; - - if v4 = 0 then - _compile_statement(); - goto .compile_procedure_body_loop; - end; -end; - -(* Writes a regster name to the standard output. *) - -(* Parameters: *) -(* a0 - Register character. *) -(* a1 - Register number. *) -proc _write_register(); -begin - _write_c(v88); - v84 := v84 + '0'; - _write_c(v84); -end; - -proc _compile_procedure_prologue(); -begin - (* Skip open paren. *) - _advance_token(1); - v0 := 0; - - .compile_procedure_prologue_skip; - if _load_byte(source_code_position) <> ')' then - _advance_token(1); - goto .compile_procedure_prologue_skip; - end; - - .compile_procedure_prologue_loop; - _write_z("\tsw a\0"); - _write_i(v0); - _write_z(", \0"); - - (* Calculate the stack offset: 88 - (4 * parameter_counter) *) - v4 := v0 * 4; - v4 := 88 + -v4; - _write_i(v4); - - _write_z("(sp)\n\0"); - - v0 := v0 + 1; - if v0 <> 8 then - goto .compile_procedure_prologue_loop; - end; - (* Skip close paren. *) - _advance_token(1); -end; - -proc _compile_procedure(); -begin - (* Skip "proc ". *) - _advance_token(5); - - (* Save the procedure name length. *) - v0 := _read_token(); - - (* Write .type _procedure_name, @function. *) - _write_z(".type \0"); - - _write_token(v0); - _write_z(", @function\n\0"); - - (* Write procedure label, _procedure_name: *) - _write_token(v0); - _write_z(":\n\0"); - - (* Skip procedure name. *) - _advance_token(v0); - _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); - _compile_procedure_prologue(); - - (* Skip semicolon, "begin" and newline. *) - _advance_token(8); - - _compile_procedure_body(); - - (* Write the epilogue. *) - _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); - - (* Skip the "end" keyword, semicolon and newline. *) - _advance_token(5); -end; - -proc _skip_spaces(); -begin - v0 := _load_byte(source_code_position); - if v0 = '\t' then - _advance_token(1); - _skip_spaces(); - end; -end; - -(* Prints and skips a line. *) -proc _skip_comment(); -begin - .skip_comment_loop; - v0 := _load_byte(source_code_position); - - (* Check for newline character. *) - if v0 <> '\n' then - (* Advance the input string by one byte. *) - _advance_token(1); - - goto .skip_comment_loop; - end; - (* Skip the newline. *) - _advance_token(1); -end; - -(* Skip newlines and comments. *) -proc _skip_empty_lines(); -begin - .skip_empty_lines_rerun; - v0 := source_code_position; - - .skip_empty_lines_loop; - v4 := _load_byte(v0); - - if v4 = '\n' then - goto .skip_empty_lines_newline; - end; - if v4 = '\t' then - goto .skip_empty_lines_tab; - end; - if v4 <> '(' then - goto .skip_empty_lines_end; - end; - v4 := v0 + 1; - - if _load_byte(v4) = '*' then - goto .skip_empty_lines_comment - end; - goto .skip_empty_lines_end; - - .skip_empty_lines_comment; - _store_word(v0, @source_code_position); - _skip_comment(); - goto .skip_empty_lines_rerun; - - .skip_empty_lines_newline; - _store_word(v0 + 1, @source_code_position); - goto .skip_empty_lines_rerun; - - .skip_empty_lines_tab; - v0 := v0 + 1; - goto .skip_empty_lines_loop - - .skip_empty_lines_end; -end; - -proc _compile_global_initializer(); -begin - v0 := _load_byte(source_code_position); - - if v0 = '"' then - _write_z("\n\t.word strings + \0"); - v4 := _string_length(source_code_position); - - _add_string(source_code_position); - _write_i(); - - (* Skip the quoted string. *) - _advance_token(v4 + 2); - - goto .compile_global_initializer_end; - end; - if v0 = 'S' then - (* Skip "S(". *) - _advance_token(2); - - if _load_byte(source_code_position) = ')' then - goto .compile_global_initializer_closing; - end; - goto .compile_global_initializer_loop; - end; - if v0 = '@' then - (* Skip @. *) - _advance_token(1); - _write_z("\n\t.word \0"); - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - - goto .compile_global_initializer_end; - end; - if _is_digit(v0) = 1 then - _write_z("\n\t.word \0"); - v0 := _read_token(); - _write_token(v0); - _advance_token(1); - - goto .compile_global_initializer_end; - end; - - .compile_global_initializer_loop; - _compile_global_initializer(); - - if _load_byte(source_code_position) <> ')' then - (* Skip comma and whitespace after it. *) - _advance_token(2); - - goto .compile_global_initializer_loop; - end; - - .compile_global_initializer_closing; - (* Skip ")" *) - _advance_token(1); - - goto .compile_global_initializer_end; - - .compile_global_initializer_end; -end; - -proc _compile_constant_declaration(); -begin - v0 := _read_token(); - - _write_z(".type \0"); - _write_token(v0); - _write_z(", @object\n\0"); - - _write_token(v0); - _write_c(':'); - - (* Skip the constant name with assignment sign and surrounding whitespaces. *) - _advance_token(v0 + 4); - _compile_global_initializer(); - (* Skip semicolon and newline. *) - _advance_token(2); - _write_c('\n'); -end; - -proc _compile_const_part(); -begin - _skip_empty_lines(); - - if _memcmp(source_code_position, "const\0", 5) <> 0 then - goto .compile_const_part_end; - end; - (* Skip "const" with the newline after it. *) - _advance_token(6); - _write_z(".section .rodata # Compiled from const section.\n\n\0"); - - .compile_const_part_loop; - _skip_empty_lines(); - - (* If the character at the line beginning is not indentation, *) - (* it is probably the next code section. *) - if _load_byte(source_code_position) = '\t' then - _advance_token(1); - - _compile_constant_declaration(); - goto .compile_const_part_loop; - end; - - .compile_const_part_end; -end; - -proc _compile_variable_declaration(); -begin - v0 := _read_token(); - - _write_z(".type \0"); - _write_token(v0); - _write_z(", @object\n\0"); - - _write_token(v0); - _write_c(':'); - - (* Skip the variable name and colon with space before the type. *) - _advance_token(v0 + 2); - - (* Skip the type name. *) - v4 := _read_token(); - _advance_token(v4); - - if _load_byte(source_code_position) <> ' ' then - (* Else we assume this is a zeroed 81920 bytes big array. *) - _write_z(" .zero 81920\0"); - else - (* Skip the assignment sign with surrounding whitespaces. *) - _advance_token(4); - _compile_global_initializer(); - end; - - (* Skip semicolon and newline. *) - _advance_token(2); - _write_c('\n'); -end; - -proc _compile_var_part(); -begin - if _memcmp(source_code_position, "var\0", 3) <> 0 then - goto .compile_var_part_end; - end; - (* Skip "var" and newline. *) - _advance_token(4); - _write_z(".section .data\n\0"); - - .compile_var_part_loop; - _skip_empty_lines(); - v0 := _load_byte(source_code_position); - - if v0 = '\t' then - _advance_token(1); - _compile_variable_declaration(); - goto .compile_var_part_loop; - end; - - .compile_var_part_end; -end; - -(* Process the source code and print the generated code. *) -proc _compile_module(); -begin - _compile_const_part(); - _skip_empty_lines(); - _compile_var_part(); - - _write_z(".section .text\n\n\0"); - _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); - _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); - _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); - _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); - _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); - - .compile_module_loop; - _skip_empty_lines(); - - if _load_byte(source_code_position) <> 0 then - (* 5 is "proc " length. Space is needed to distinguish from "procedure". *) - if _memcmp(source_code_position, "proc ", 5) = 0 then - _compile_procedure(); - goto .compile_module_loop; - end; - end; - .compile_module_end; -end; - -proc _compile(); -begin - _write_z(".globl _start\n\n\0"); - _compile_module(); - - _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); - _write_c('"'); - - v0 := @compiler_strings; - v4 := compiler_strings_position; - - .compile_loop; - if v0 < v4 then - v8 := _load_byte(v0); - v0 := v0 + 1; - _write_c(v8); - - goto .compile_loop; - end; - _write_c('"'); - _write_c('\n'); -end; - -(* Terminates the program. a0 contains the return code. *) - -(* Parameters: *) -(* a0 - Status code. *) -proc _exit(); -begin - _syscall(0, 0, 0, 0, 0, 0, 93); -end; - -(* Inserts a symbol into the table. *) - -(* Parameters: *) -(* a0 - Symbol pointer. *) -(* a1 - Symbol name length. *) -(* a2 - Symbol name pointer. *) -(* a3 - Symbol table. *) -proc _symbol_table_enter(); -begin - (* The first word in the symbol table is its length, get it. *) - v0 := _load_word(v76); - - (* Calculate the offset for the new symbol. *) - v4 := v0 * 4; - v4 := v4 + 4; - v4 := v76 + 4; - - _memcpy(v4, @v80, 12); - - (* Increment the symbol table length. *) - v0 := v0 + 1; - _store_word(v0, v76); -end; - -proc _symbol_table_build(); -begin - _symbol_table_enter(@symbol_type_info_int, 3, symbol_builtin_name_int, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_word, 4, symbol_builtin_name_word, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_pointer, 7, symbol_builtin_name_pointer, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_char, 4, symbol_builtin_name_char, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_bool, 4, symbol_builtin_name_bool, @symbol_table_global); -end; - - -(* Classification table assigns each possible character to a group (class). All *) -(* characters of the same group a handled equivalently. *) - -(* Classification: *) - -(* TransitionClass = ( *) -(* transitionClassInvalid = 1, *) -(* transitionClassDigit = 2, *) -(* transitionClassAlpha = 3, *) -(* transitionClassSpace = 4, *) -(* transitionClassColon = 5, *) -(* transitionClassEquals = 6, *) -(* transitionClassLeftParen = 7, *) -(* transitionClassRightParen = 8, *) -(* transitionClassAsterisk = 9, *) -(* transitionClassUnderscore = 10, *) -(* transitionClassSingle = 11, *) -(* transitionClassHex = 12, *) -(* transitionClassZero = 13, *) -(* transitionClassX = 14, *) -(* transitionClassEof = 15, *) -(* transitionClassDot = 16, *) -(* transitionClassMinus = 17, *) -(* transitionClassSingleQuote = 18, *) -(* transitionClassDoubleQuote = 19, *) -(* transitionClassGreater = 20, *) -(* transitionClassLess = 21, *) -(* transitionClassOther = 22 *) -(* ); *) -(* TransitionState = ( *) -(* transitionStateStart = 1, *) -(* transitionStateColon = 2, *) -(* transitionStateIdentifier = 3, *) -(* transitionStateDecimal = 4, *) -(* transitionStateGreater = 5, *) -(* transitionStateMinus = 6, *) -(* transitionStateLeftParen = 7, *) -(* transitionStateLess = 8, *) -(* transitionStateDot = 9, *) -(* transitionStateComment = 10, *) -(* transitionStateClosingComment = 11, *) -(* transitionStateCharacter = 12, *) -(* transitionStateString = 13, *) -(* transitionStateLeadingZero = 14, *) -(* transitionStateDecimalSuffix = 15, *) -(* transitionStateEnd = 16 *) -(* ); *) -(* Transition = record *) -(* action: TransitionAction; *) -(* next_state: TransitionState *) -(* end; *) -(* TransitionAction = ( *) -(* none = 1, *) -(* accumulate = 2, *) -(* skip = 3, *) -(* single = 4, *) -(* eof = 5, *) -(* finalize = 6, *) -(* composite = 7, *) -(* key_id = 8, *) -(* integer = 9, *) -(* delimited = 10 *) -(* ); *) - -(* Assigns some value to at array index. *) - -(* Parameters: *) -(* a0 - Array pointer. *) -(* a1 - Index (word offset into the array). *) -(* a2 - Data to assign. *) -proc _assign_at(); -begin - v0 := v84 + -1; - v0 := v0 * 4; - v0 := v88 + v0; - - _store_word(v80, v0); -end; - -proc _create_classification(); -begin - _assign_at(@classification, 1, 15); - _assign_at(@classification, 2, 1); - _assign_at(@classification, 3, 1); - _assign_at(@classification, 4, 1); - _assign_at(@classification, 5, 1); - _assign_at(@classification, 6, 1); - _assign_at(@classification, 7, 1); - _assign_at(@classification, 8, 1); - _assign_at(@classification, 9, 1); - _assign_at(@classification, 10, 4); - _assign_at(@classification, 11, 4); - _assign_at(@classification, 12, 1); - _assign_at(@classification, 13, 1); - _assign_at(@classification, 14, 4); - _assign_at(@classification, 15, 1); - _assign_at(@classification, 16, 1); - _assign_at(@classification, 17, 1); - _assign_at(@classification, 18, 1); - _assign_at(@classification, 19, 1); - _assign_at(@classification, 20, 1); - _assign_at(@classification, 21, 1); - _assign_at(@classification, 22, 1); - _assign_at(@classification, 23, 1); - _assign_at(@classification, 24, 1); - _assign_at(@classification, 25, 1); - _assign_at(@classification, 26, 1); - _assign_at(@classification, 27, 1); - _assign_at(@classification, 28, 1); - _assign_at(@classification, 29, 1); - _assign_at(@classification, 30, 1); - _assign_at(@classification, 31, 1); - _assign_at(@classification, 32, 1); - _assign_at(@classification, 33, 4); - _assign_at(@classification, 34, 11); - _assign_at(@classification, 35, 19); - _assign_at(@classification, 36, 22); - _assign_at(@classification, 37, 22); - _assign_at(@classification, 38, 11); - _assign_at(@classification, 39, 11); - _assign_at(@classification, 40, 18); - _assign_at(@classification, 41, 7); - _assign_at(@classification, 42, 8); - _assign_at(@classification, 43, 9); - _assign_at(@classification, 44, 11); - _assign_at(@classification, 45, 11); - _assign_at(@classification, 46, 17); - _assign_at(@classification, 47, 16); - _assign_at(@classification, 48, 11); - _assign_at(@classification, 49, 13); - _assign_at(@classification, 50, 2); - _assign_at(@classification, 51, 2); - _assign_at(@classification, 52, 2); - _assign_at(@classification, 53, 2); - _assign_at(@classification, 54, 2); - _assign_at(@classification, 55, 2); - _assign_at(@classification, 56, 2); - _assign_at(@classification, 57, 2); - _assign_at(@classification, 58, 2); - _assign_at(@classification, 59, 5); - _assign_at(@classification, 60, 11); - _assign_at(@classification, 61, 21); - _assign_at(@classification, 62, 6); - _assign_at(@classification, 63, 20); - _assign_at(@classification, 64, 22); - _assign_at(@classification, 65, 11); - _assign_at(@classification, 66, 3); - _assign_at(@classification, 67, 3); - _assign_at(@classification, 68, 3); - _assign_at(@classification, 69, 3); - _assign_at(@classification, 70, 3); - _assign_at(@classification, 71, 3); - _assign_at(@classification, 72, 3); - _assign_at(@classification, 73, 3); - _assign_at(@classification, 74, 3); - _assign_at(@classification, 75, 3); - _assign_at(@classification, 76, 3); - _assign_at(@classification, 77, 3); - _assign_at(@classification, 78, 3); - _assign_at(@classification, 79, 3); - _assign_at(@classification, 80, 3); - _assign_at(@classification, 81, 3); - _assign_at(@classification, 82, 3); - _assign_at(@classification, 83, 3); - _assign_at(@classification, 84, 3); - _assign_at(@classification, 85, 3); - _assign_at(@classification, 86, 3); - _assign_at(@classification, 87, 3); - _assign_at(@classification, 88, 3); - _assign_at(@classification, 89, 3); - _assign_at(@classification, 90, 3); - _assign_at(@classification, 91, 3); - _assign_at(@classification, 92, 11); - _assign_at(@classification, 93, 22); - _assign_at(@classification, 94, 11); - _assign_at(@classification, 95, 11); - _assign_at(@classification, 96, 10); - _assign_at(@classification, 97, 22); - _assign_at(@classification, 98, 12); - _assign_at(@classification, 99, 12); - _assign_at(@classification, 100, 12); - _assign_at(@classification, 101, 12); - _assign_at(@classification, 102, 12); - _assign_at(@classification, 103, 12); - _assign_at(@classification, 104, 3); - _assign_at(@classification, 105, 3); - _assign_at(@classification, 106, 3); - _assign_at(@classification, 107, 3); - _assign_at(@classification, 108, 3); - _assign_at(@classification, 109, 3); - _assign_at(@classification, 110, 3); - _assign_at(@classification, 111, 3); - _assign_at(@classification, 112, 3); - _assign_at(@classification, 113, 3); - _assign_at(@classification, 114, 3); - _assign_at(@classification, 115, 3); - _assign_at(@classification, 116, 3); - _assign_at(@classification, 117, 3); - _assign_at(@classification, 118, 3); - _assign_at(@classification, 119, 3); - _assign_at(@classification, 120, 3); - _assign_at(@classification, 121, 14); - _assign_at(@classification, 122, 3); - _assign_at(@classification, 123, 3); - _assign_at(@classification, 124, 22); - _assign_at(@classification, 125, 11); - _assign_at(@classification, 126, 22); - _assign_at(@classification, 127, 11); - _assign_at(@classification, 128, 1); - - v0 := 129; - -(* Set the remaining 129 - 256 bytes to transitionClassOther. *) - .create_classification_loop; - _assign_at(@classification, v0, 22); - v0 := v0 + 1; - - if v0 < 257 then - goto .create_classification_loop; - end; -end; - -(* Parameters: *) -(* a0 - Current state (first index into transitions table). *) -(* a1 - Transition (second index into transitions table).. *) -(* a2 - Action to assign. *) -(* a3 - Next state to assign. *) -proc _set_transition(); -begin - (* Transitions start at offset in classification array. Save the transitions start in v0. *) - v0 := @classification + 256 - - (* Each state is 8 bytes long (2 words: action and next state). *) - (* There are 16 transition classes, so a transition 8 * 16 = 128 bytes long. *) - - v4 := v88 + -1; - v4 := v4 * 128; - - v8 := v84 + -1; - v8 := v8 * 8; - - v12 := v0 + v4; - v12 := v12 + v8; - - _store_word(v80, v12); - v12 := v12 + 4; - _store_word(v76, v12); -end; - -(* Parameters: *) -(* a0 - Current state (Transition state enumeration). *) -(* a1 - Default action (Callback). *) -(* a2 - Next state (Transition state enumeration). *) -proc _set_default_transition(); -begin - _set_transition(v88, 1, v84, v80); - _set_transition(v88, 2, v84, v80); - _set_transition(v88, 3, v84, v80); - _set_transition(v88, 4, v84, v80); - _set_transition(v88, 5, v84, v80); - _set_transition(v88, 6, v84, v80); - _set_transition(v88, 7, v84, v80); - _set_transition(v88, 8, v84, v80); - _set_transition(v88, 9, v84, v80); - _set_transition(v88, 10, v84, v80); - _set_transition(v88, 11, v84, v80); - _set_transition(v88, 12, v84, v80); - _set_transition(v88, 13, v84, v80); - _set_transition(v88, 14, v84, v80); - _set_transition(v88, 15, v84, v80); - _set_transition(v88, 16, v84, v80); - _set_transition(v88, 17, v84, v80); - _set_transition(v88, 18, v84, v80); - _set_transition(v88, 19, v84, v80); - _set_transition(v88, 20, v84, v80); - _set_transition(v88, 21, v84, v80); - _set_transition(v88, 22, v84, v80); -end; - - -(* The transition table describes transitions from one state to another, given *) -(* a symbol (character class). *) - -(* The table has m rows and n columns, where m is the amount of states and n is *) -(* the amount of classes. So given the current state and a classified character *) -(* the table can be used to look up the next state. *) - -(* Each cell is a word long. *) -(* - The least significant byte of the word is a row number (beginning with 0). *) -(* It specifies the target state. "ff" means that this is an end state and no *) -(* transition is possible. *) -(* - The next byte is the action that should be performed when transitioning. *) -(* For the meaning of actions see labels in the lex_next function, which *) -(* handles each action. *) -proc _create_transitions(); -begin - (* Start state. *) - _set_transition(1, 1, 1, 16); - _set_transition(1, 2, 2, 4); - _set_transition(1, 3, 2, 3); - _set_transition(1, 4, 3, 1); - _set_transition(1, 5, 2, 5); - _set_transition(1, 6, 4, 16); - _set_transition(1, 7, 2, 7); - _set_transition(1, 8, 4, 16); - _set_transition(1, 9, 4, 16); - _set_transition(1, 10, 2, 3); - _set_transition(1, 11, 4, 16); - _set_transition(1, 12, 2, 3); - _set_transition(1, 13, 2, 14); - _set_transition(1, 14, 2, 3); - _set_transition(1, 15, 5, 16); - _set_transition(1, 16, 2, 9); - _set_transition(1, 17, 2, 6); - _set_transition(1, 18, 2, 12); - _set_transition(1, 19, 2, 13); - _set_transition(1, 20, 2, 5); - _set_transition(1, 21, 2, 8); - _set_transition(1, 22, 1, 16); - - (* Colon state. *) - _set_default_transition(2, 6, 16); - _set_transition(2, 6, 7, 16); - - (* Identifier state. *) - _set_default_transition(3, 8, 16); - _set_transition(3, 2, 2, 3); - _set_transition(3, 3, 2, 3); - _set_transition(3, 10, 2, 3); - _set_transition(3, 12, 2, 3); - _set_transition(3, 13, 2, 3); - _set_transition(3, 14, 2, 3); - - (* Decimal state. *) - _set_default_transition(4, 9, 16); - _set_transition(4, 2, 2, 4); - _set_transition(4, 3, 2, 15); - _set_transition(4, 10, 1, 16); - _set_transition(4, 12, 2, 15); - _set_transition(4, 13, 2, 4); - _set_transition(4, 14, 2, 15); - - (* Greater state. *) - _set_default_transition(5, 6, 16); - _set_transition(5, 6, 7, 16); - - (* Minus state. *) - _set_default_transition(6, 6, 16); - _set_transition(6, 20, 7, 16); - - (* Left paren state. *) - _set_default_transition(7, 6, 16); - _set_transition(7, 9, 2, 10); - - (* Less state. *) - _set_default_transition(8, 6, 16); - _set_transition(8, 6, 7, 16); - _set_transition(8, 20, 7, 16); - - (* Hexadecimal after 0x. *) - _set_default_transition(9, 6, 16); - _set_transition(9, 16, 7, 16); - - (* Comment. *) - _set_default_transition(10, 2, 10); - _set_transition(10, 9, 2, 11); - _set_transition(10, 15, 1, 16); - - (* Closing comment. *) - _set_default_transition(11, 2, 10); - _set_transition(11, 1, 1, 16); - _set_transition(11, 8, 10, 16); - _set_transition(11, 9, 2, 11); - _set_transition(11, 15, 1, 16); - - (* Character. *) - _set_default_transition(12, 2, 12); - _set_transition(12, 1, 1, 16); - _set_transition(12, 15, 1, 16); - _set_transition(12, 18, 10, 16); - - (* String. *) - _set_default_transition(13, 2, 13); - _set_transition(13, 1, 1, 16); - _set_transition(13, 15, 1, 16); - _set_transition(13, 19, 10, 16); - - (* Leading zero. *) - _set_default_transition(14, 9, 16); - _set_transition(14, 2, 1, 16); - _set_transition(14, 3, 1, 16); - _set_transition(14, 10, 1, 16); - _set_transition(14, 12, 1, 16); - _set_transition(14, 13, 1, 16); - _set_transition(14, 14, 1, 16); - - (* Digit with a character suffix. *) - _set_default_transition(15, 9, 16); - _set_transition(15, 3, 1, 16); - _set_transition(15, 2, 1, 16); - _set_transition(15, 12, 1, 16); - _set_transition(15, 13, 1, 16); - _set_transition(15, 14, 1, 16); -end; - -proc _lexer_get_state(); -begin - (* Lexer state is saved after the transition tables. The offset is 256 + 16 * 22. *) - v0 := @classification; - v4 := 16 * 22; - v0 := v0 + 256; - - return v0 + v4 -end; - -(* Gets pointer to the current source text. *) -proc _lexer_get_current(); -begin - return _lexer_get_state() + 4 -end; - -(* Resets the lexer state for reading the next token. *) -proc _lexer_reset(); -begin - (* Transition start state is 1. *) - v0 := _lexer_get_state(); - _store_word(1, v4); - - (* Text pointer to the beginning of the currently read token. *) - v4 := _lexer_get_current(); - _store_word(source_code_position, v4); - - (* Initial length of the token is 0. *) - _store_word(0, source_code_position + 4); -end; - -(* One time lexer initialization. *) -proc _lexer_initialize(); -begin - _create_classification(); - _create_transitions(); -end; - -(* Entry point. *) -proc _start(); -begin - _lexer_initialize(); - _symbol_table_build(); - - (* Read the source from the standard input. *) - (* Second argument is buffer size. Modifying update the source_code definition. *) - _read_file(@source_code, 81920); - _compile(); - - _exit(0); -end; diff --git a/boot/stage11/cl.elna b/boot/stage11/cl.elna new file mode 100644 index 0000000..74447d6 --- /dev/null +++ b/boot/stage11/cl.elna @@ -0,0 +1,1738 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, *) +(* v. 2.0. If a copy of the MPL was not distributed with this file, You can *) +(* obtain one at https://mozilla.org/MPL/2.0/. *) + +(* Stage 11 compiler. *) + +(* - Removed support for inline assembly statements. *) +(* - Assignment to global variables. *) +(* - In procedure declarations skip everything between parameter parens. *) + +const + symbol_builtin_name_int := "Int"; + symbol_builtin_name_word := "Word"; + symbol_builtin_name_pointer := "Pointer"; + symbol_builtin_name_char := "Char"; + symbol_builtin_name_bool := "Bool"; + + (* Every type info starts with a word describing what type it is. *) + + (* PRIMITIVE_TYPE = 1 *) + + (* Primitive types have only type size. *) + symbol_builtin_type_int := S(1, 4); + symbol_builtin_type_word := S(1, 4); + symbol_builtin_type_pointer := S(1, 4); + symbol_builtin_type_char := S(1, 1); + symbol_builtin_type_bool := S(1, 1); + + (* Info objects start with a word describing its type. *) + + (* INFO_TYPE = 1 *) + + (* Type info has the type it belongs to. *) + symbol_type_info_int := S(1, @symbol_builtin_type_int); + symbol_type_info_word := S(1, @symbol_builtin_type_word); + symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); + symbol_type_info_char := S(1, @symbol_builtin_type_char); + symbol_type_info_bool := S(1, @symbol_builtin_type_bool); + +var + source_code: Array; + compiler_strings: Array; + symbol_table_global: Array; + symbol_table_local: Array; + classification: Array; + + compiler_strings_position: Pointer := @compiler_strings; + compiler_strings_length: Word := 0; + label_counter: Word := 0; + source_code_position: Pointer := @source_code; + +(* Calculates and returns the string token length between quotes, including the *) +(* escaping slash characters. *) + +(* Parameters: *) +(* a0 - String token pointer. *) + +(* Returns the length in a0. *) +proc _string_length(); +begin + (* Reset the counter. *) + v0 := 0; + + .string_length_loop; + v88 := v88 + 1; + + if _load_byte(v88) <> '"' then + v0 := v0 + 1; + goto .string_length_loop; + end; + + return v0 +end; + +(* Adds a string to the global, read-only string storage. *) + +(* Parameters: *) +(* a0 - String token. *) + +(* Returns the offset from the beginning of the storage to the new string in a0. *) +proc _add_string(); +begin + v0 := v88 + 1; + v4 := compiler_strings_length; + + .add_string_loop; + if _load_byte(v0) <> '"' then + v8 := _load_byte(v0); + _store_byte(v8, compiler_strings_position); + _store_word(compiler_strings_position + 1, @compiler_strings_position); + v0 := v0 + 1; + + if v8 <> '\\' then + _store_word(compiler_strings_length + 1, @compiler_strings_length); + end; + goto .add_string_loop; + end; + + return v4 +end; + +(* Reads standard input into a buffer. *) +(* a0 - Buffer pointer. *) +(* a1 - Buffer size. *) + +(* Returns the amount of bytes written in a0. *) +proc _read_file(); +begin + _syscall(0, v88, v84, 0, 0, 0, 63); +end; + +(* Writes to the standard output. *) + +(* Parameters: *) +(* a0 - Buffer. *) +(* a1 - Buffer length. *) +proc _write_s(); +begin + _syscall(1, v88, v84, 0, 0, 0, 64); +end; + +(* Writes a number to a string buffer. *) + +(* t0 - Local buffer. *) +(* t1 - Constant 10. *) +(* t2 - Current character. *) +(* t3 - Whether the number is negative. *) + +(* Parameters: *) +(* a0 - Whole number. *) +(* a1 - Buffer pointer. *) + +(* Sets a0 to the length of the written number. *) +proc _print_i(); +begin + v0 := @v23; + + if v88 >= 0 then + v4 := 0; + else + v88 = -v88; + v4 := 1; + end; + + .print_i_digit10; + v8 := v88 % 10; + _store_byte(v8 + '0', v0); + + v88 := v88 / 10; + v0 := v0 + -1; + + if v88 <> 0 then + goto .print_i_digit10; + end; + if v4 = 1 then + _store_byte('-', v0); + v0 := v0 + -1; + end; + v4 := @v23 + -v0; + _memcpy(v84, v0 + 1, v4); + + return v4 +end; + +(* Writes a number to the standard output. *) + +(* Parameters: *) +(* a0 - Whole number. *) +proc _write_i(); +begin + v4 := _print_i(v88, @v0); + _write_s(@v0, v4); +end; + +(* Writes a character from a0 into the standard output. *) +proc _write_c(); +begin + _write_s(@v88, 1); +end; + +(* Write null terminated string. *) + +(* Parameters: *) +(* a0 - String. *) +proc _write_z(); +begin + (* Check for 0 character. *) + v0 := _load_byte(v88); + + if v0 <> 0 then + (* Print a character. *) + _write_c(v0); + + (* Advance the input string by one byte. *) + _write_z(v88 + 1); + end; +end; + +(* Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. *) +proc _is_upper(); +begin + v0 := v88 >= 'A'; + v4 := v88 <= 'Z'; + + return v0 & v4 + +end; + +(* Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. *) +proc _is_lower(); +begin + v0 := v88 >= 'a'; + v4 := v88 <= 'z'; + + return v0 & v4 + +end; + +(* Detects if the passed character is a 7-bit alpha character or an underscore. *) + +(* Paramters: *) +(* a0 - Tested character. *) + +(* Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. *) +proc _is_alpha(); +begin + v0 := _is_upper(v88); + v4 := _is_lower(v88); + v8 := v88 = '_'; + + v12 := v0 or v4; + return v12 or v8 +end; + +(* Detects whether the passed character is a digit *) +(* (a value between 0 and 9). *) + +(* Parameters: *) +(* a0 - Exemined value. *) + +(* Sets a0 to 1 if it is a digit, to 0 otherwise. *) +proc _is_digit(); +begin + v0 := v88 >= '0'; + v4 := v88 <= '9'; + + return v0 & v4 +end; + +proc _is_alnum(); +begin + v0 := _is_alpha(v88); + v4 := _is_digit(v88); + + return v0 or v4 +end; + +(* Reads the next token. *) + +(* Returns token length in a0. *) +proc _read_token(); +begin + (* Current token position. *) + v0 := source_code_position; + (* Token length. *) + v4 := 0; + + .read_token_loop; + (* Current character. *) + v8 := _load_byte(v0); + + (* First we try to read a derictive. *) + (* A derictive can contain a dot and characters. *) + v12 := v8 = '.'; + v16 := _is_alnum(v8); + + if v12 or v16 then + (* Advance the source code position and token length. *) + v4 := v4 + 1; + v0 := v0 + 1; + + goto .read_token_loop; + end; + + return v4 +end; + +(* a0 - First pointer. *) +(* a1 - Second pointer. *) +(* a2 - The length to compare. *) + +(* Returns 0 in a0 if memory regions are equal. *) +proc _memcmp(); +begin + v8 := 0; + + .memcmp_loop; + if v80 <> 0 then + v0 := _load_byte(v88); + v4 := _load_byte(v84); + v8 := v0 + -v4; + + v88 := v88 + 1; + v84 := v84 + 1; + v80 := v80 + -1; + + if v8 = 0 then + goto .memcmp_loop; + end; + end; + + return v8 +end; + +(* Copies memory. *) + +(* Parameters: *) +(* a0 - Destination. *) +(* a1 - Source. *) +(* a2 - Size. *) + +(* Preserves a0. *) +proc _memcpy(); +begin + .memcpy_loop; + if v80 <> 0 then + v0 := _load_byte(v84); + _store_byte(v0, v88); + + v88 := v88 + 1; + v84 := v84 + 1; + v80 := v80 + -1; + goto .memcpy_loop; + end; + + return v88 +end; + +(* Advances the token stream by a0 bytes. *) +proc _advance_token(); +begin + _store_word(source_code_position + v88, @source_code_position); +end; + +(* Prints the current token. *) + +(* Parameters: *) +(* a0 - Token length. *) + +(* Returns a0 unchanged. *) +proc _write_token(); +begin + _write_s(source_code_position, v88); + return v88 +end; + +proc _compile_integer_literal(); +begin + _write_z("\tli t0, \0"); + + v0 := _read_token(); + _write_token(v0); + _advance_token(v0); + + _write_c('\n'); +end; + +proc _compile_character_literal(); +begin + _write_z("\tli t0, \0"); + + _write_c('\''); + _advance_token(1); + + v0 := _load_byte(source_code_position); + if v0 = '\\' then + _write_c('\\'); + _advance_token(1); + end; + + v0 := _load_byte(source_code_position); + _write_c(v0); + + _write_c('\''); + _write_c('\n'); + + _advance_token(2); +end; + +proc _compile_variable_expression(); +begin + _compile_designator(); + _write_z("\tlw t0, (t0)\n\0"); +end; + +proc _compile_address_expression(); +begin + (* Skip the "@" sign. *) + _advance_token(1); + _compile_designator(); +end; + +proc _compile_negate_expression(); +begin + (* Skip the "-" sign. *) + _advance_token(1); + _compile_term(); + + _write_z("\tneg t0, t0\n\0"); +end; + +proc _compile_not_expression(); +begin + (* Skip the "~" sign. *) + _advance_token(1); + _compile_term(); + + _write_z("\tnot t0, t0\n\0"); +end; + +proc _compile_string_literal(); +begin + v0 := _string_length(source_code_position); + v4 := _add_string(source_code_position); + + _advance_token(v0 + 2); + _write_z("\tla t0, strings\n\0"); + + _write_z("\tli t1, \0"); + _write_i(v4); + _write_c('\n'); + + _write_z("\tadd t0, t0, t1\n\0"); +end; + +proc _compile_term(); +begin + v0 := _load_byte(source_code_position); + + if v0 = '\'' then + _compile_character_literal(); + end; + if v0 = '@' then + _compile_address_expression(); + end; + if v0 = '-' then + _compile_negate_expression(); + end; + if v0 = '~' then + _compile_not_expression(); + end; + if v0 = '"' then + _compile_string_literal(); + end; + if v0 = '_' then + _compile_call(); + _write_z("\nmv t0, a0\n\0"); + end; + if _is_digit(v0) = 1 then + _compile_integer_literal(); + end; + if _is_lower(v0) = 1 then + _compile_variable_expression(); + end; +end; + +proc _compile_binary_rhs(); +begin + (* Skip the whitespace after the binary operator. *) + _advance_token(1); + _compile_term(); + + (* Load the left expression from the stack; *) + _write_z("\tlw t1, 24(sp)\n\0"); +end; + +proc _compile_expression(); +begin + _compile_term(); + v0 := _load_byte(source_code_position); + + if v0 <> ' ' then + goto .compile_expression_end; + end; + (* It is a binary expression. *) + + (* Save the value of the left expression on the stack. *) + _write_z("sw t0, 24(sp)\n\0"); + + (* Skip surrounding whitespace in front of the operator. *) + _advance_token(1); + v0 := _load_byte(source_code_position); + + if v0 = '+' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("add t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if v0 = '*' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tmul t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if v0 = '&' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tand t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if v0 = 'o' then + _advance_token(2); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("or t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if v0 = 'x' then + _advance_token(3); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("xor t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if v0 = '=' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("xor t0, t0, t1\nseqz t0, t0\n\0"); + + goto .compile_expression_end; + end; + if v0 = '%' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("rem t0, t1, t0\n\0"); + + goto .compile_expression_end; + end; + if v0 = '/' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("div t0, t1, t0\n\0"); + + goto .compile_expression_end; + end; + if v0 = '<' then + _advance_token(1); + v0 := _load_byte(source_code_position); + + if v0 = '>' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0"); + + goto .compile_expression_end; + end; + if v0 = '=' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0"); + + goto .compile_expression_end; + end; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("slt t0, t1, t0\n\0"); + + goto .compile_expression_end; + end; + if v0 = '>' then + _advance_token(1); + v0 := _load_byte(source_code_position); + if v0 = '=' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0"); + + goto .compile_expression_end; + end; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t1, t0\n\0"); + + goto .compile_expression_end; + end; + + .compile_expression_end; +end; + +proc _compile_call(); +begin + (* Stack variables: *) + (* v0 - Procedure name length. *) + (* v4 - Procedure name pointer. *) + (* v8 - Argument count. *) + + v0 := _read_token(); + v4 := source_code_position; + v8 := 0; + + (* Skip the identifier and left paren. *) + _advance_token(v0 + 1); + v12 := _load_byte(source_code_position); + + if v12 = ')' then + goto .compile_call_finalize + end; + .compile_call_loop; + _compile_expression(); + + (* Save the argument on the stack. *) + _write_z("\tsw t0, \0"); + + (* Calculate the stack offset: 116 - (4 * argument_counter) *) + v12 := v8 * 4; + v12 := 116 + -v12; + _write_i(v12); + + _write_z("(sp)\n\0"); + + (* Add one to the argument counter. *) + v8 := v8 + 1; + + v12 := _load_byte(source_code_position); + + if v12 <> ',' then + goto .compile_call_finalize; + end; + _advance_token(2); + goto .compile_call_loop; + + .compile_call_finalize; + (* Load the argument from the stack. *) + if v8 <> 0 then + (* Decrement the argument counter. *) + v8 := v8 + -1; + + _write_z("\tlw a\0"); + _write_i(v8); + + _write_z(", \0"); + + (* Calculate the stack offset: 116 - (4 * argument_counter) *) + v12 := v8 * 4; + v12 := 116 + -v12; + _write_i(v12); + + _write_z("(sp)\n\0"); + + goto .compile_call_finalize; + end; + + .compile_call_end; + _write_z("\tcall \0"); + _write_s(v4, v0); + + (* Skip the right paren. *) + _advance_token(1); +end; + +proc _compile_goto(); +begin + _advance_token(5); + + v0 := _read_token(); + _write_z("\tj \0"); + + _write_token(v0); + _advance_token(); +end; + +proc _compile_local_designator(); +begin + (* Skip "v" in the local variable name. *) + _advance_token(1); + _write_z("\t addi t0, sp, \0"); + + (* Read local variable stack offset and save it. *) + v0 := _read_token(); + _write_token(v0); + _advance_token(v0); + _write_c('\n'); +end; + +proc _compile_global_designator(); +begin + _write_z("\tla t0, \0"); + + v0 := _read_token(); + _write_token(v0); + _advance_token(v0); + + _write_c('\n'); +end; + +proc _compile_designator(); +begin + if _load_byte(source_code_position) = 'v' then + _compile_local_designator(); + else + _compile_global_designator(); + end; +end; + +proc _compile_assignment(); +begin + _compile_designator(); + + (* Save the assignee address on the stack. *) + _write_z("\tsw t0, 60(sp)\n\0"); + + (* Skip the assignment sign (:=) with surrounding whitespaces. *) + _advance_token(4); + + (* Compile the assignment. *) + _compile_expression(); + + _write_z("\tlw t1, 60(sp)\nsw t0, (t1)\n\0"); +end; + +proc _compile_return_statement(); +begin + (* Skip "return" keyword and whitespace after it. *) + _advance_token(7); + _compile_expression(); + + _write_z("mv a0, t0\n\0"); +end; + +(* Writes a label, .Ln, where n is a unique number. *) + +(* Parameters: *) +(* a0 - Label counter. *) +proc _write_label(); +begin + _write_z(".L\0"); + _write_i(v88); +end; + +proc _compile_if(); +begin + (* Skip "if ". *) + _advance_token(3); + (* Compile condition. *) + _compile_expression(); + (* Skip " then" with newline. *) + _advance_token(6); + + (* v0 is the label after the if statement. *) + v0 := label_counter; + _store_word(label_counter + 1, @label_counter); + (* v4 is the label in front of the next elsif condition or end. *) + v4 := label_counter; + _store_word(label_counter + 1, @label_counter); + + _write_z("\tbeqz t0, \0"); + _write_label(v4); + _write_c('\n'); + + _compile_procedure_body(); + + _write_z("\tj \0"); + _write_label(v0); + _write_c('\n'); + + _write_label(v4); + _write_z(":\n\0"); + + if _memcmp(source_code_position, "end", 3) = 0 then + goto .compile_if_end; + end; + if _memcmp(source_code_position, "else", 3) = 0 then + goto .compile_if_else + end; + .compile_if_else; + (* Skip "else" and newline. *) + _advance_token(5); + _compile_procedure_body(); + + .compile_if_end; + (* Skip "end". *) + _advance_token(3); + + _write_label(v0); + _write_z(":\n\0"); +end; + +proc _compile_label_declaration(); +begin + (* Skip the dot. *) + _advance_token(1); + v0 := _read_token(); + _write_c('.'); + _write_s(source_code_position, v0); + _write_z(":\n\0"); + _advance_token(v0); +end; + +proc _compile_statement(); +begin + _skip_spaces(); + v0 := _load_byte(source_code_position); + + (* This is a call if the statement starts with an underscore. *) + if v0 = '_' then + _compile_call(); + goto .compile_statement_semicolon; + end; + if v0 = 'g' then + _compile_goto(); + goto .compile_statement_semicolon; + end; + if v0 = 'i' then + _compile_if(); + goto .compile_statement_semicolon; + end; + if v0 = 'r' then + _compile_return_statement(); + _write_c('\n'); + + goto .compile_statement_end; + end; + if v0 = '.' then + _compile_label_declaration(); + + goto .compile_statement_semicolon; + end; + _compile_assignment(); + goto .compile_statement_semicolon; + + .compile_statement_semicolon; + _advance_token(2); + _write_c('\n'); + + .compile_statement_end; +end; + +proc _compile_procedure_body(); +begin + .compile_procedure_body_loop; + _skip_empty_lines(); + _skip_spaces(); + + v0 := _memcmp(source_code_position, "end", 3) = 0; + v4 := _memcmp(source_code_position, "else", 4) = 0; + v4 := v0 or v4; + + if v4 = 0 then + _compile_statement(); + goto .compile_procedure_body_loop; + end; +end; + +(* Writes a regster name to the standard output. *) + +(* Parameters: *) +(* a0 - Register character. *) +(* a1 - Register number. *) +proc _write_register(); +begin + _write_c(v88); + v84 := v84 + '0'; + _write_c(v84); +end; + +proc _compile_procedure_prologue(); +begin + (* Skip open paren. *) + _advance_token(1); + v0 := 0; + + .compile_procedure_prologue_skip; + if _load_byte(source_code_position) <> ')' then + _advance_token(1); + goto .compile_procedure_prologue_skip; + end; + + .compile_procedure_prologue_loop; + _write_z("\tsw a\0"); + _write_i(v0); + _write_z(", \0"); + + (* Calculate the stack offset: 88 - (4 * parameter_counter) *) + v4 := v0 * 4; + v4 := 88 + -v4; + _write_i(v4); + + _write_z("(sp)\n\0"); + + v0 := v0 + 1; + if v0 <> 8 then + goto .compile_procedure_prologue_loop; + end; + (* Skip close paren. *) + _advance_token(1); +end; + +proc _compile_procedure(); +begin + (* Skip "proc ". *) + _advance_token(5); + + (* Save the procedure name length. *) + v0 := _read_token(); + + (* Write .type _procedure_name, @function. *) + _write_z(".type \0"); + + _write_token(v0); + _write_z(", @function\n\0"); + + (* Write procedure label, _procedure_name: *) + _write_token(v0); + _write_z(":\n\0"); + + (* Skip procedure name. *) + _advance_token(v0); + _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); + _compile_procedure_prologue(); + + (* Skip semicolon, "begin" and newline. *) + _advance_token(8); + + _compile_procedure_body(); + + (* Write the epilogue. *) + _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); + + (* Skip the "end" keyword, semicolon and newline. *) + _advance_token(5); +end; + +proc _skip_spaces(); +begin + v0 := _load_byte(source_code_position); + if v0 = '\t' then + _advance_token(1); + _skip_spaces(); + end; +end; + +(* Prints and skips a line. *) +proc _skip_comment(); +begin + .skip_comment_loop; + v0 := _load_byte(source_code_position); + + (* Check for newline character. *) + if v0 <> '\n' then + (* Advance the input string by one byte. *) + _advance_token(1); + + goto .skip_comment_loop; + end; + (* Skip the newline. *) + _advance_token(1); +end; + +(* Skip newlines and comments. *) +proc _skip_empty_lines(); +begin + .skip_empty_lines_rerun; + v0 := source_code_position; + + .skip_empty_lines_loop; + v4 := _load_byte(v0); + + if v4 = '\n' then + goto .skip_empty_lines_newline; + end; + if v4 = '\t' then + goto .skip_empty_lines_tab; + end; + if v4 <> '(' then + goto .skip_empty_lines_end; + end; + v4 := v0 + 1; + + if _load_byte(v4) = '*' then + goto .skip_empty_lines_comment + end; + goto .skip_empty_lines_end; + + .skip_empty_lines_comment; + _store_word(v0, @source_code_position); + _skip_comment(); + goto .skip_empty_lines_rerun; + + .skip_empty_lines_newline; + _store_word(v0 + 1, @source_code_position); + goto .skip_empty_lines_rerun; + + .skip_empty_lines_tab; + v0 := v0 + 1; + goto .skip_empty_lines_loop + + .skip_empty_lines_end; +end; + +proc _compile_global_initializer(); +begin + v0 := _load_byte(source_code_position); + + if v0 = '"' then + _write_z("\n\t.word strings + \0"); + v4 := _string_length(source_code_position); + + _add_string(source_code_position); + _write_i(); + + (* Skip the quoted string. *) + _advance_token(v4 + 2); + + goto .compile_global_initializer_end; + end; + if v0 = 'S' then + (* Skip "S(". *) + _advance_token(2); + + if _load_byte(source_code_position) = ')' then + goto .compile_global_initializer_closing; + end; + goto .compile_global_initializer_loop; + end; + if v0 = '@' then + (* Skip @. *) + _advance_token(1); + _write_z("\n\t.word \0"); + v0 := _read_token(); + _write_token(v0); + _advance_token(v0); + + goto .compile_global_initializer_end; + end; + if _is_digit(v0) = 1 then + _write_z("\n\t.word \0"); + v0 := _read_token(); + _write_token(v0); + _advance_token(1); + + goto .compile_global_initializer_end; + end; + + .compile_global_initializer_loop; + _compile_global_initializer(); + + if _load_byte(source_code_position) <> ')' then + (* Skip comma and whitespace after it. *) + _advance_token(2); + + goto .compile_global_initializer_loop; + end; + + .compile_global_initializer_closing; + (* Skip ")" *) + _advance_token(1); + + goto .compile_global_initializer_end; + + .compile_global_initializer_end; +end; + +proc _compile_constant_declaration(); +begin + v0 := _read_token(); + + _write_z(".type \0"); + _write_token(v0); + _write_z(", @object\n\0"); + + _write_token(v0); + _write_c(':'); + + (* Skip the constant name with assignment sign and surrounding whitespaces. *) + _advance_token(v0 + 4); + _compile_global_initializer(); + (* Skip semicolon and newline. *) + _advance_token(2); + _write_c('\n'); +end; + +proc _compile_const_part(); +begin + _skip_empty_lines(); + + if _memcmp(source_code_position, "const\0", 5) <> 0 then + goto .compile_const_part_end; + end; + (* Skip "const" with the newline after it. *) + _advance_token(6); + _write_z(".section .rodata # Compiled from const section.\n\n\0"); + + .compile_const_part_loop; + _skip_empty_lines(); + + (* If the character at the line beginning is not indentation, *) + (* it is probably the next code section. *) + if _load_byte(source_code_position) = '\t' then + _advance_token(1); + + _compile_constant_declaration(); + goto .compile_const_part_loop; + end; + + .compile_const_part_end; +end; + +proc _compile_variable_declaration(); +begin + v0 := _read_token(); + + _write_z(".type \0"); + _write_token(v0); + _write_z(", @object\n\0"); + + _write_token(v0); + _write_c(':'); + + (* Skip the variable name and colon with space before the type. *) + _advance_token(v0 + 2); + + (* Skip the type name. *) + v4 := _read_token(); + _advance_token(v4); + + if _load_byte(source_code_position) <> ' ' then + (* Else we assume this is a zeroed 81920 bytes big array. *) + _write_z(" .zero 81920\0"); + else + (* Skip the assignment sign with surrounding whitespaces. *) + _advance_token(4); + _compile_global_initializer(); + end; + + (* Skip semicolon and newline. *) + _advance_token(2); + _write_c('\n'); +end; + +proc _compile_var_part(); +begin + if _memcmp(source_code_position, "var\0", 3) <> 0 then + goto .compile_var_part_end; + end; + (* Skip "var" and newline. *) + _advance_token(4); + _write_z(".section .data\n\0"); + + .compile_var_part_loop; + _skip_empty_lines(); + v0 := _load_byte(source_code_position); + + if v0 = '\t' then + _advance_token(1); + _compile_variable_declaration(); + goto .compile_var_part_loop; + end; + + .compile_var_part_end; +end; + +(* Process the source code and print the generated code. *) +proc _compile_module(); +begin + _compile_const_part(); + _skip_empty_lines(); + _compile_var_part(); + + _write_z(".section .text\n\n\0"); + _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); + _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); + _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); + _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); + _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); + + .compile_module_loop; + _skip_empty_lines(); + + if _load_byte(source_code_position) <> 0 then + (* 5 is "proc " length. Space is needed to distinguish from "procedure". *) + if _memcmp(source_code_position, "proc ", 5) = 0 then + _compile_procedure(); + goto .compile_module_loop; + end; + end; + .compile_module_end; +end; + +proc _compile(); +begin + _write_z(".globl _start\n\n\0"); + _compile_module(); + + _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); + _write_c('"'); + + v0 := @compiler_strings; + v4 := compiler_strings_position; + + .compile_loop; + if v0 < v4 then + v8 := _load_byte(v0); + v0 := v0 + 1; + _write_c(v8); + + goto .compile_loop; + end; + _write_c('"'); + _write_c('\n'); +end; + +(* Terminates the program. a0 contains the return code. *) + +(* Parameters: *) +(* a0 - Status code. *) +proc _exit(); +begin + _syscall(0, 0, 0, 0, 0, 0, 93); +end; + +(* Inserts a symbol into the table. *) + +(* Parameters: *) +(* a0 - Symbol pointer. *) +(* a1 - Symbol name length. *) +(* a2 - Symbol name pointer. *) +(* a3 - Symbol table. *) +proc _symbol_table_enter(); +begin + (* The first word in the symbol table is its length, get it. *) + v0 := _load_word(v76); + + (* Calculate the offset for the new symbol. *) + v4 := v0 * 4; + v4 := v4 + 4; + v4 := v76 + 4; + + _memcpy(v4, @v80, 12); + + (* Increment the symbol table length. *) + v0 := v0 + 1; + _store_word(v0, v76); +end; + +proc _symbol_table_build(); +begin + _symbol_table_enter(@symbol_type_info_int, 3, symbol_builtin_name_int, @symbol_table_global); + _symbol_table_enter(@symbol_type_info_word, 4, symbol_builtin_name_word, @symbol_table_global); + _symbol_table_enter(@symbol_type_info_pointer, 7, symbol_builtin_name_pointer, @symbol_table_global); + _symbol_table_enter(@symbol_type_info_char, 4, symbol_builtin_name_char, @symbol_table_global); + _symbol_table_enter(@symbol_type_info_bool, 4, symbol_builtin_name_bool, @symbol_table_global); +end; + + +(* Classification table assigns each possible character to a group (class). All *) +(* characters of the same group a handled equivalently. *) + +(* Classification: *) + +(* TransitionClass = ( *) +(* transitionClassInvalid = 1, *) +(* transitionClassDigit = 2, *) +(* transitionClassAlpha = 3, *) +(* transitionClassSpace = 4, *) +(* transitionClassColon = 5, *) +(* transitionClassEquals = 6, *) +(* transitionClassLeftParen = 7, *) +(* transitionClassRightParen = 8, *) +(* transitionClassAsterisk = 9, *) +(* transitionClassUnderscore = 10, *) +(* transitionClassSingle = 11, *) +(* transitionClassHex = 12, *) +(* transitionClassZero = 13, *) +(* transitionClassX = 14, *) +(* transitionClassEof = 15, *) +(* transitionClassDot = 16, *) +(* transitionClassMinus = 17, *) +(* transitionClassSingleQuote = 18, *) +(* transitionClassDoubleQuote = 19, *) +(* transitionClassGreater = 20, *) +(* transitionClassLess = 21, *) +(* transitionClassOther = 22 *) +(* ); *) +(* TransitionState = ( *) +(* transitionStateStart = 1, *) +(* transitionStateColon = 2, *) +(* transitionStateIdentifier = 3, *) +(* transitionStateDecimal = 4, *) +(* transitionStateGreater = 5, *) +(* transitionStateMinus = 6, *) +(* transitionStateLeftParen = 7, *) +(* transitionStateLess = 8, *) +(* transitionStateDot = 9, *) +(* transitionStateComment = 10, *) +(* transitionStateClosingComment = 11, *) +(* transitionStateCharacter = 12, *) +(* transitionStateString = 13, *) +(* transitionStateLeadingZero = 14, *) +(* transitionStateDecimalSuffix = 15, *) +(* transitionStateEnd = 16 *) +(* ); *) +(* Transition = record *) +(* action: TransitionAction; *) +(* next_state: TransitionState *) +(* end; *) +(* TransitionAction = ( *) +(* none = 1, *) +(* accumulate = 2, *) +(* skip = 3, *) +(* single = 4, *) +(* eof = 5, *) +(* finalize = 6, *) +(* composite = 7, *) +(* key_id = 8, *) +(* integer = 9, *) +(* delimited = 10 *) +(* ); *) + +(* Assigns some value to at array index. *) + +(* Parameters: *) +(* a0 - Array pointer. *) +(* a1 - Index (word offset into the array). *) +(* a2 - Data to assign. *) +proc _assign_at(); +begin + v0 := v84 + -1; + v0 := v0 * 4; + v0 := v88 + v0; + + _store_word(v80, v0); +end; + +proc _create_classification(); +begin + _assign_at(@classification, 1, 15); + _assign_at(@classification, 2, 1); + _assign_at(@classification, 3, 1); + _assign_at(@classification, 4, 1); + _assign_at(@classification, 5, 1); + _assign_at(@classification, 6, 1); + _assign_at(@classification, 7, 1); + _assign_at(@classification, 8, 1); + _assign_at(@classification, 9, 1); + _assign_at(@classification, 10, 4); + _assign_at(@classification, 11, 4); + _assign_at(@classification, 12, 1); + _assign_at(@classification, 13, 1); + _assign_at(@classification, 14, 4); + _assign_at(@classification, 15, 1); + _assign_at(@classification, 16, 1); + _assign_at(@classification, 17, 1); + _assign_at(@classification, 18, 1); + _assign_at(@classification, 19, 1); + _assign_at(@classification, 20, 1); + _assign_at(@classification, 21, 1); + _assign_at(@classification, 22, 1); + _assign_at(@classification, 23, 1); + _assign_at(@classification, 24, 1); + _assign_at(@classification, 25, 1); + _assign_at(@classification, 26, 1); + _assign_at(@classification, 27, 1); + _assign_at(@classification, 28, 1); + _assign_at(@classification, 29, 1); + _assign_at(@classification, 30, 1); + _assign_at(@classification, 31, 1); + _assign_at(@classification, 32, 1); + _assign_at(@classification, 33, 4); + _assign_at(@classification, 34, 11); + _assign_at(@classification, 35, 19); + _assign_at(@classification, 36, 22); + _assign_at(@classification, 37, 22); + _assign_at(@classification, 38, 11); + _assign_at(@classification, 39, 11); + _assign_at(@classification, 40, 18); + _assign_at(@classification, 41, 7); + _assign_at(@classification, 42, 8); + _assign_at(@classification, 43, 9); + _assign_at(@classification, 44, 11); + _assign_at(@classification, 45, 11); + _assign_at(@classification, 46, 17); + _assign_at(@classification, 47, 16); + _assign_at(@classification, 48, 11); + _assign_at(@classification, 49, 13); + _assign_at(@classification, 50, 2); + _assign_at(@classification, 51, 2); + _assign_at(@classification, 52, 2); + _assign_at(@classification, 53, 2); + _assign_at(@classification, 54, 2); + _assign_at(@classification, 55, 2); + _assign_at(@classification, 56, 2); + _assign_at(@classification, 57, 2); + _assign_at(@classification, 58, 2); + _assign_at(@classification, 59, 5); + _assign_at(@classification, 60, 11); + _assign_at(@classification, 61, 21); + _assign_at(@classification, 62, 6); + _assign_at(@classification, 63, 20); + _assign_at(@classification, 64, 22); + _assign_at(@classification, 65, 11); + _assign_at(@classification, 66, 3); + _assign_at(@classification, 67, 3); + _assign_at(@classification, 68, 3); + _assign_at(@classification, 69, 3); + _assign_at(@classification, 70, 3); + _assign_at(@classification, 71, 3); + _assign_at(@classification, 72, 3); + _assign_at(@classification, 73, 3); + _assign_at(@classification, 74, 3); + _assign_at(@classification, 75, 3); + _assign_at(@classification, 76, 3); + _assign_at(@classification, 77, 3); + _assign_at(@classification, 78, 3); + _assign_at(@classification, 79, 3); + _assign_at(@classification, 80, 3); + _assign_at(@classification, 81, 3); + _assign_at(@classification, 82, 3); + _assign_at(@classification, 83, 3); + _assign_at(@classification, 84, 3); + _assign_at(@classification, 85, 3); + _assign_at(@classification, 86, 3); + _assign_at(@classification, 87, 3); + _assign_at(@classification, 88, 3); + _assign_at(@classification, 89, 3); + _assign_at(@classification, 90, 3); + _assign_at(@classification, 91, 3); + _assign_at(@classification, 92, 11); + _assign_at(@classification, 93, 22); + _assign_at(@classification, 94, 11); + _assign_at(@classification, 95, 11); + _assign_at(@classification, 96, 10); + _assign_at(@classification, 97, 22); + _assign_at(@classification, 98, 12); + _assign_at(@classification, 99, 12); + _assign_at(@classification, 100, 12); + _assign_at(@classification, 101, 12); + _assign_at(@classification, 102, 12); + _assign_at(@classification, 103, 12); + _assign_at(@classification, 104, 3); + _assign_at(@classification, 105, 3); + _assign_at(@classification, 106, 3); + _assign_at(@classification, 107, 3); + _assign_at(@classification, 108, 3); + _assign_at(@classification, 109, 3); + _assign_at(@classification, 110, 3); + _assign_at(@classification, 111, 3); + _assign_at(@classification, 112, 3); + _assign_at(@classification, 113, 3); + _assign_at(@classification, 114, 3); + _assign_at(@classification, 115, 3); + _assign_at(@classification, 116, 3); + _assign_at(@classification, 117, 3); + _assign_at(@classification, 118, 3); + _assign_at(@classification, 119, 3); + _assign_at(@classification, 120, 3); + _assign_at(@classification, 121, 14); + _assign_at(@classification, 122, 3); + _assign_at(@classification, 123, 3); + _assign_at(@classification, 124, 22); + _assign_at(@classification, 125, 11); + _assign_at(@classification, 126, 22); + _assign_at(@classification, 127, 11); + _assign_at(@classification, 128, 1); + + v0 := 129; + +(* Set the remaining 129 - 256 bytes to transitionClassOther. *) + .create_classification_loop; + _assign_at(@classification, v0, 22); + v0 := v0 + 1; + + if v0 < 257 then + goto .create_classification_loop; + end; +end; + +(* Parameters: *) +(* a0 - Current state (first index into transitions table). *) +(* a1 - Transition (second index into transitions table).. *) +(* a2 - Action to assign. *) +(* a3 - Next state to assign. *) +proc _set_transition(); +begin + (* Transitions start at offset in classification array. Save the transitions start in v0. *) + v0 := @classification + 256 + + (* Each state is 8 bytes long (2 words: action and next state). *) + (* There are 16 transition classes, so a transition 8 * 16 = 128 bytes long. *) + + v4 := v88 + -1; + v4 := v4 * 128; + + v8 := v84 + -1; + v8 := v8 * 8; + + v12 := v0 + v4; + v12 := v12 + v8; + + _store_word(v80, v12); + v12 := v12 + 4; + _store_word(v76, v12); +end; + +(* Parameters: *) +(* a0 - Current state (Transition state enumeration). *) +(* a1 - Default action (Callback). *) +(* a2 - Next state (Transition state enumeration). *) +proc _set_default_transition(); +begin + _set_transition(v88, 1, v84, v80); + _set_transition(v88, 2, v84, v80); + _set_transition(v88, 3, v84, v80); + _set_transition(v88, 4, v84, v80); + _set_transition(v88, 5, v84, v80); + _set_transition(v88, 6, v84, v80); + _set_transition(v88, 7, v84, v80); + _set_transition(v88, 8, v84, v80); + _set_transition(v88, 9, v84, v80); + _set_transition(v88, 10, v84, v80); + _set_transition(v88, 11, v84, v80); + _set_transition(v88, 12, v84, v80); + _set_transition(v88, 13, v84, v80); + _set_transition(v88, 14, v84, v80); + _set_transition(v88, 15, v84, v80); + _set_transition(v88, 16, v84, v80); + _set_transition(v88, 17, v84, v80); + _set_transition(v88, 18, v84, v80); + _set_transition(v88, 19, v84, v80); + _set_transition(v88, 20, v84, v80); + _set_transition(v88, 21, v84, v80); + _set_transition(v88, 22, v84, v80); +end; + + +(* The transition table describes transitions from one state to another, given *) +(* a symbol (character class). *) + +(* The table has m rows and n columns, where m is the amount of states and n is *) +(* the amount of classes. So given the current state and a classified character *) +(* the table can be used to look up the next state. *) + +(* Each cell is a word long. *) +(* - The least significant byte of the word is a row number (beginning with 0). *) +(* It specifies the target state. "ff" means that this is an end state and no *) +(* transition is possible. *) +(* - The next byte is the action that should be performed when transitioning. *) +(* For the meaning of actions see labels in the lex_next function, which *) +(* handles each action. *) +proc _create_transitions(); +begin + (* Start state. *) + _set_transition(1, 1, 1, 16); + _set_transition(1, 2, 2, 4); + _set_transition(1, 3, 2, 3); + _set_transition(1, 4, 3, 1); + _set_transition(1, 5, 2, 5); + _set_transition(1, 6, 4, 16); + _set_transition(1, 7, 2, 7); + _set_transition(1, 8, 4, 16); + _set_transition(1, 9, 4, 16); + _set_transition(1, 10, 2, 3); + _set_transition(1, 11, 4, 16); + _set_transition(1, 12, 2, 3); + _set_transition(1, 13, 2, 14); + _set_transition(1, 14, 2, 3); + _set_transition(1, 15, 5, 16); + _set_transition(1, 16, 2, 9); + _set_transition(1, 17, 2, 6); + _set_transition(1, 18, 2, 12); + _set_transition(1, 19, 2, 13); + _set_transition(1, 20, 2, 5); + _set_transition(1, 21, 2, 8); + _set_transition(1, 22, 1, 16); + + (* Colon state. *) + _set_default_transition(2, 6, 16); + _set_transition(2, 6, 7, 16); + + (* Identifier state. *) + _set_default_transition(3, 8, 16); + _set_transition(3, 2, 2, 3); + _set_transition(3, 3, 2, 3); + _set_transition(3, 10, 2, 3); + _set_transition(3, 12, 2, 3); + _set_transition(3, 13, 2, 3); + _set_transition(3, 14, 2, 3); + + (* Decimal state. *) + _set_default_transition(4, 9, 16); + _set_transition(4, 2, 2, 4); + _set_transition(4, 3, 2, 15); + _set_transition(4, 10, 1, 16); + _set_transition(4, 12, 2, 15); + _set_transition(4, 13, 2, 4); + _set_transition(4, 14, 2, 15); + + (* Greater state. *) + _set_default_transition(5, 6, 16); + _set_transition(5, 6, 7, 16); + + (* Minus state. *) + _set_default_transition(6, 6, 16); + _set_transition(6, 20, 7, 16); + + (* Left paren state. *) + _set_default_transition(7, 6, 16); + _set_transition(7, 9, 2, 10); + + (* Less state. *) + _set_default_transition(8, 6, 16); + _set_transition(8, 6, 7, 16); + _set_transition(8, 20, 7, 16); + + (* Hexadecimal after 0x. *) + _set_default_transition(9, 6, 16); + _set_transition(9, 16, 7, 16); + + (* Comment. *) + _set_default_transition(10, 2, 10); + _set_transition(10, 9, 2, 11); + _set_transition(10, 15, 1, 16); + + (* Closing comment. *) + _set_default_transition(11, 2, 10); + _set_transition(11, 1, 1, 16); + _set_transition(11, 8, 10, 16); + _set_transition(11, 9, 2, 11); + _set_transition(11, 15, 1, 16); + + (* Character. *) + _set_default_transition(12, 2, 12); + _set_transition(12, 1, 1, 16); + _set_transition(12, 15, 1, 16); + _set_transition(12, 18, 10, 16); + + (* String. *) + _set_default_transition(13, 2, 13); + _set_transition(13, 1, 1, 16); + _set_transition(13, 15, 1, 16); + _set_transition(13, 19, 10, 16); + + (* Leading zero. *) + _set_default_transition(14, 9, 16); + _set_transition(14, 2, 1, 16); + _set_transition(14, 3, 1, 16); + _set_transition(14, 10, 1, 16); + _set_transition(14, 12, 1, 16); + _set_transition(14, 13, 1, 16); + _set_transition(14, 14, 1, 16); + + (* Digit with a character suffix. *) + _set_default_transition(15, 9, 16); + _set_transition(15, 3, 1, 16); + _set_transition(15, 2, 1, 16); + _set_transition(15, 12, 1, 16); + _set_transition(15, 13, 1, 16); + _set_transition(15, 14, 1, 16); +end; + +proc _lexer_get_state(); +begin + (* Lexer state is saved after the transition tables. The offset is 256 + 16 * 22. *) + v0 := @classification; + v4 := 16 * 22; + v0 := v0 + 256; + + return v0 + v4 +end; + +(* Gets pointer to the current source text. *) +proc _lexer_get_current(); +begin + return _lexer_get_state() + 4 +end; + +(* Resets the lexer state for reading the next token. *) +proc _lexer_reset(); +begin + (* Transition start state is 1. *) + v0 := _lexer_get_state(); + _store_word(1, v4); + + (* Text pointer to the beginning of the currently read token. *) + v4 := _lexer_get_current(); + _store_word(source_code_position, v4); + + (* Initial length of the token is 0. *) + _store_word(0, source_code_position + 4); +end; + +(* One time lexer initialization. *) +proc _lexer_initialize(); +begin + _create_classification(); + _create_transitions(); +end; + +(* Entry point. *) +proc _start(); +begin + _lexer_initialize(); + _symbol_table_build(); + + (* Read the source from the standard input. *) + (* Second argument is buffer size. Modifying update the source_code definition. *) + _read_file(@source_code, 81920); + _compile(); + + _exit(0); +end; diff --git a/boot/stage12.elna b/boot/stage12.elna deleted file mode 100644 index 1cf4969..0000000 --- a/boot/stage12.elna +++ /dev/null @@ -1,1944 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, *) -(* v. 2.0. If a copy of the MPL was not distributed with this file, You can *) -(* obtain one at https://mozilla.org/MPL/2.0/. *) - -(* Stage 12 compiler. *) - -(* - Local variables and parameters are saved in a local symbol table. *) -(* - Local variables and parameters can be referenced by their name in the symbol table. *) - -const - symbol_builtin_name_int := "Int"; - symbol_builtin_name_word := "Word"; - symbol_builtin_name_pointer := "Pointer"; - symbol_builtin_name_char := "Char"; - symbol_builtin_name_bool := "Bool"; - - (* Every type info starts with a word describing what type it is. *) - - (* PRIMITIVE_TYPE = 1 *) - - (* Primitive types have only type size. *) - symbol_builtin_type_int := S(1, 4); - symbol_builtin_type_word := S(1, 4); - symbol_builtin_type_pointer := S(1, 4); - symbol_builtin_type_char := S(1, 1); - symbol_builtin_type_bool := S(1, 1); - - (* Info objects start with a word describing its type. *) - - (* INFO_TYPE = 1 *) - (* INFO_PARAMETER = 2 *) - (* INFO_TEMPORARY = 3 *) - - (* Type info has the type it belongs to. *) - symbol_type_info_int := S(1, @symbol_builtin_type_int); - symbol_type_info_word := S(1, @symbol_builtin_type_word); - symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); - symbol_type_info_char := S(1, @symbol_builtin_type_char); - symbol_type_info_bool := S(1, @symbol_builtin_type_bool); - -var - source_code: Array; - compiler_strings: Array; - symbol_table_global: Array; - symbol_table_local: Array; - classification: Array; - memory: Array; - - compiler_strings_position: Pointer := @compiler_strings; - compiler_strings_length: Word := 0; - label_counter: Word := 0; - source_code_position: Pointer := @source_code; - memory_free_pointer: Word := @memory; - -(* Calculates and returns the string token length between quotes, including the *) -(* escaping slash characters. *) - -(* Parameters: *) -(* a0 - String token pointer. *) - -(* Returns the length in a0. *) -proc _string_length(v88: Word); -begin - (* Reset the counter. *) - v0 := 0; - - .string_length_loop; - v88 := v88 + 1; - - if _load_byte(v88) <> '"' then - v0 := v0 + 1; - goto .string_length_loop; - end; - - return v0 -end; - -(* Adds a string to the global, read-only string storage. *) - -(* Parameters: *) -(* a0 - String token. *) - -(* Returns the offset from the beginning of the storage to the new string in a0. *) -proc _add_string(v88: Word); -begin - v0 := v88 + 1; - v4 := compiler_strings_length; - - .add_string_loop; - if _load_byte(v0) <> '"' then - v8 := _load_byte(v0); - _store_byte(v8, compiler_strings_position); - compiler_strings_position := compiler_strings_position + 1; - v0 := v0 + 1; - - if v8 <> '\\' then - compiler_strings_length := compiler_strings_length + 1; - end; - goto .add_string_loop; - end; - - return v4 -end; - -(* Reads standard input into a buffer. *) -(* a0 - Buffer pointer. *) -(* a1 - Buffer size. *) - -(* Returns the amount of bytes written in a0. *) -proc _read_file(v88: Word, v84: Word); -begin - return _syscall(0, v88, v84, 0, 0, 0, 63) -end; - -(* Writes to the standard output. *) - -(* Parameters: *) -(* a0 - Buffer. *) -(* a1 - Buffer length. *) -proc _write_s(v88: Word, v84: Word); -begin - _syscall(1, v88, v84, 0, 0, 0, 64); -end; - -(* Writes a number to a string buffer. *) - -(* t0 - Local buffer. *) -(* t1 - Constant 10. *) -(* t2 - Current character. *) -(* t3 - Whether the number is negative. *) - -(* Parameters: *) -(* a0 - Whole number. *) -(* a1 - Buffer pointer. *) - -(* Sets a0 to the length of the written number. *) -proc _print_i(v88: Word, v84: Word); -begin - v0 := @v23; - - if v88 >= 0 then - v4 := 0; - else - v88 = -v88; - v4 := 1; - end; - - .print_i_digit10; - v8 := v88 % 10; - _store_byte(v8 + '0', v0); - - v88 := v88 / 10; - v0 := v0 + -1; - - if v88 <> 0 then - goto .print_i_digit10; - end; - if v4 = 1 then - _store_byte('-', v0); - v0 := v0 + -1; - end; - v4 := @v23 + -v0; - _memcpy(v84, v0 + 1, v4); - - return v4 -end; - -(* Writes a number to the standard output. *) - -(* Parameters: *) -(* a0 - Whole number. *) -proc _write_i(v88: Word); -begin - v4 := _print_i(v88, @v0); - _write_s(@v0, v4); -end; - -(* Writes a character from a0 into the standard output. *) -proc _write_c(v88: Word); -begin - _write_s(@v88, 1); -end; - -(* Write null terminated string. *) - -(* Parameters: *) -(* a0 - String. *) -proc _write_z(v88: Word); -begin - (* Check for 0 character. *) - v0 := _load_byte(v88); - - if v0 <> 0 then - (* Print a character. *) - _write_c(v0); - - (* Advance the input string by one byte. *) - _write_z(v88 + 1); - end; -end; - -(* Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. *) -proc _is_upper(v88: Word); -begin - v0 := v88 >= 'A'; - v4 := v88 <= 'Z'; - - return v0 & v4 - -end; - -(* Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. *) -proc _is_lower(v88: Word); -begin - v0 := v88 >= 'a'; - v4 := v88 <= 'z'; - - return v0 & v4 - -end; - -(* Detects if the passed character is a 7-bit alpha character or an underscore. *) - -(* Paramters: *) -(* a0 - Tested character. *) - -(* Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. *) -proc _is_alpha(v88: Word); -begin - v0 := _is_upper(v88); - v4 := _is_lower(v88); - v8 := v88 = '_'; - - v12 := v0 or v4; - return v12 or v8 -end; - -(* Detects whether the passed character is a digit *) -(* (a value between 0 and 9). *) - -(* Parameters: *) -(* a0 - Exemined value. *) - -(* Sets a0 to 1 if it is a digit, to 0 otherwise. *) -proc _is_digit(v88: Word); -begin - v0 := v88 >= '0'; - v4 := v88 <= '9'; - - return v0 & v4 -end; - -proc _is_alnum(v88: Word); -begin - v0 := _is_alpha(v88); - v4 := _is_digit(v88); - - return v0 or v4 -end; - -(* Reads the next token. *) - -(* Returns token length in a0. *) -proc _read_token(); -begin - (* Current token position. *) - v0 := source_code_position; - (* Token length. *) - v4 := 0; - - .read_token_loop; - (* Current character. *) - v8 := _load_byte(v0); - - (* First we try to read a derictive. *) - (* A derictive can contain a dot and characters. *) - v12 := v8 = '.'; - v16 := _is_alnum(v8); - - if v12 or v16 then - (* Advance the source code position and token length. *) - v4 := v4 + 1; - v0 := v0 + 1; - - goto .read_token_loop; - end; - - return v4 -end; - -(* a0 - First pointer. *) -(* a1 - Second pointer. *) -(* a2 - The length to compare. *) - -(* Returns 0 in a0 if memory regions are equal. *) -proc _memcmp(v88: Word, v84: Word, v80: Word); -begin - v8 := 0; - - .memcmp_loop; - if v80 <> 0 then - v0 := _load_byte(v88); - v4 := _load_byte(v84); - v8 := v0 + -v4; - - v88 := v88 + 1; - v84 := v84 + 1; - v80 := v80 + -1; - - if v8 = 0 then - goto .memcmp_loop; - end; - end; - - return v8 -end; - -(* Copies memory. *) - -(* Parameters: *) -(* a0 - Destination. *) -(* a1 - Source. *) -(* a2 - Size. *) - -(* Preserves a0. *) -proc _memcpy(v88: Word, v84: Word, v80: Word); -begin - .memcpy_loop; - if v80 <> 0 then - v0 := _load_byte(v84); - _store_byte(v0, v88); - - v88 := v88 + 1; - v84 := v84 + 1; - v80 := v80 + -1; - goto .memcpy_loop; - end; - - return v88 -end; - -(* Advances the token stream by a0 bytes. *) -proc _advance_token(v88: Word); -begin - source_code_position := source_code_position + v88; -end; - -(* Prints the current token. *) - -(* Parameters: *) -(* a0 - Token length. *) - -(* Returns a0 unchanged. *) -proc _write_token(v88: Word); -begin - _write_s(source_code_position, v88); - return v88 -end; - -proc _compile_integer_literal(); -begin - _write_z("\tli t0, \0"); - - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - - _write_c('\n'); -end; - -proc _compile_character_literal(); -begin - _write_z("\tli t0, \0"); - - _write_c('\''); - _advance_token(1); - - v0 := _load_byte(source_code_position); - if v0 = '\\' then - _write_c('\\'); - _advance_token(1); - end; - - v0 := _load_byte(source_code_position); - _write_c(v0); - - _write_c('\''); - _write_c('\n'); - - _advance_token(2); -end; - -proc _compile_variable_expression(); -begin - _compile_designator(); - _write_z("\tlw t0, (t0)\n\0"); -end; - -proc _compile_address_expression(); -begin - (* Skip the "@" sign. *) - _advance_token(1); - _compile_designator(); -end; - -proc _compile_negate_expression(); -begin - (* Skip the "-" sign. *) - _advance_token(1); - _compile_term(); - - _write_z("\tneg t0, t0\n\0"); -end; - -proc _compile_not_expression(); -begin - (* Skip the "~" sign. *) - _advance_token(1); - _compile_term(); - - _write_z("\tnot t0, t0\n\0"); -end; - -proc _compile_string_literal(); -begin - v0 := _string_length(source_code_position); - v4 := _add_string(source_code_position); - - _advance_token(v0 + 2); - _write_z("\tla t0, strings\n\0"); - - _write_z("\tli t1, \0"); - _write_i(v4); - _write_c('\n'); - - _write_z("\tadd t0, t0, t1\n\0"); -end; - -proc _compile_term(); -begin - v0 := _load_byte(source_code_position); - - if v0 = '\'' then - _compile_character_literal(); - end; - if v0 = '@' then - _compile_address_expression(); - end; - if v0 = '-' then - _compile_negate_expression(); - end; - if v0 = '~' then - _compile_not_expression(); - end; - if v0 = '"' then - _compile_string_literal(); - end; - if v0 = '_' then - _compile_call(); - _write_z("\nmv t0, a0\n\0"); - end; - if _is_digit(v0) = 1 then - _compile_integer_literal(); - end; - if _is_lower(v0) = 1 then - _compile_variable_expression(); - end; -end; - -proc _compile_binary_rhs(); -begin - (* Skip the whitespace after the binary operator. *) - _advance_token(1); - _compile_term(); - - (* Load the left expression from the stack; *) - _write_z("\tlw t1, 24(sp)\n\0"); -end; - -proc _compile_expression(); -begin - _compile_term(); - v0 := _load_byte(source_code_position); - - if v0 <> ' ' then - goto .compile_expression_end; - end; - (* It is a binary expression. *) - - (* Save the value of the left expression on the stack. *) - _write_z("sw t0, 24(sp)\n\0"); - - (* Skip surrounding whitespace in front of the operator. *) - _advance_token(1); - v0 := _load_byte(source_code_position); - - if v0 = '+' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("add t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = '*' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tmul t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = '&' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tand t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = 'o' then - _advance_token(2); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("or t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = 'x' then - _advance_token(3); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("xor t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = '=' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("xor t0, t0, t1\nseqz t0, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '%' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("rem t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '/' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("div t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '<' then - _advance_token(1); - v0 := _load_byte(source_code_position); - - if v0 = '>' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '=' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0"); - - goto .compile_expression_end; - end; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("slt t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '>' then - _advance_token(1); - v0 := _load_byte(source_code_position); - if v0 = '=' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0"); - - goto .compile_expression_end; - end; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - - .compile_expression_end; -end; - -proc _compile_call(); -begin - (* Stack variables: *) - (* v0 - Procedure name length. *) - (* v4 - Procedure name pointer. *) - (* v8 - Argument count. *) - - v0 := _read_token(); - v4 := source_code_position; - v8 := 0; - - (* Skip the identifier and left paren. *) - _advance_token(v0 + 1); - v12 := _load_byte(source_code_position); - - if v12 = ')' then - goto .compile_call_finalize - end; - .compile_call_loop; - _compile_expression(); - - (* Save the argument on the stack. *) - _write_z("\tsw t0, \0"); - - (* Calculate the stack offset: 116 - (4 * argument_counter) *) - v12 := v8 * 4; - v12 := 116 + -v12; - _write_i(v12); - - _write_z("(sp)\n\0"); - - (* Add one to the argument counter. *) - v8 := v8 + 1; - - v12 := _load_byte(source_code_position); - - if v12 <> ',' then - goto .compile_call_finalize; - end; - _advance_token(2); - goto .compile_call_loop; - - .compile_call_finalize; - (* Load the argument from the stack. *) - if v8 <> 0 then - (* Decrement the argument counter. *) - v8 := v8 + -1; - - _write_z("\tlw a\0"); - _write_i(v8); - - _write_z(", \0"); - - (* Calculate the stack offset: 116 - (4 * argument_counter) *) - v12 := v8 * 4; - v12 := 116 + -v12; - _write_i(v12); - - _write_z("(sp)\n\0"); - - goto .compile_call_finalize; - end; - - .compile_call_end; - _write_z("\tcall \0"); - _write_s(v4, v0); - - (* Skip the right paren. *) - _advance_token(1); -end; - -proc _compile_goto(); -begin - _advance_token(5); - - v0 := _read_token(); - _write_z("\tj \0"); - - _write_token(v0); - _advance_token(); -end; - -proc _compile_local_designator(); -begin - (* Skip "v" in the local variable name. *) - _advance_token(1); - _write_z("\t addi t0, sp, \0"); - - (* Read local variable stack offset and save it. *) - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - _write_c('\n'); -end; - -proc _compile_global_designator(); -begin - _write_z("\tla t0, \0"); - - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - - _write_c('\n'); -end; - -proc _compile_designator(); -begin - v0 := _read_token(); - v4 := _symbol_table_lookup(@symbol_table_local, source_code_position, v0); - - if v4 <> 0 then - _write_z("\taddi t0, sp, \0"); - v8 := _parameter_info_get_offset(v4); - _write_i(v8); - _write_c('\n'); - _advance_token(v0); - - goto .compile_designator_end; - end; - if _load_byte(source_code_position) = 'v' then - _compile_local_designator(); - goto .compile_designator_end; - end; - _compile_global_designator(); - -.compile_designator_end: -end; - -proc _compile_assignment(); -begin - _compile_designator(); - - (* Save the assignee address on the stack. *) - _write_z("\tsw t0, 60(sp)\n\0"); - - (* Skip the assignment sign (:=) with surrounding whitespaces. *) - _advance_token(4); - - (* Compile the assignment. *) - _compile_expression(); - - _write_z("\tlw t1, 60(sp)\nsw t0, (t1)\n\0"); -end; - -proc _compile_return_statement(); -begin - (* Skip "return" keyword and whitespace after it. *) - _advance_token(7); - _compile_expression(); - - _write_z("mv a0, t0\n\0"); -end; - -(* Writes a label, .Ln, where n is a unique number. *) - -(* Parameters: *) -(* a0 - Label counter. *) -proc _write_label(v88: Word); -begin - _write_z(".L\0"); - _write_i(v88); -end; - -proc _compile_if(); -begin - (* Skip "if ". *) - _advance_token(3); - (* Compile condition. *) - _compile_expression(); - (* Skip " then" with newline. *) - _advance_token(6); - - (* v0 is the label after the if statement. *) - v0 := label_counter; - label_counter := label_counter + 1; - - (* v4 is the label in front of the next elsif condition or end. *) - v4 := label_counter; - label_counter := label_counter + 1; - - _write_z("\tbeqz t0, \0"); - _write_label(v4); - _write_c('\n'); - - _compile_procedure_body(); - - _write_z("\tj \0"); - _write_label(v0); - _write_c('\n'); - - _write_label(v4); - _write_z(":\n\0"); - - if _memcmp(source_code_position, "end", 3) = 0 then - goto .compile_if_end; - end; - if _memcmp(source_code_position, "else", 3) = 0 then - goto .compile_if_else - end; - .compile_if_else; - (* Skip "else" and newline. *) - _advance_token(5); - _compile_procedure_body(); - - .compile_if_end; - (* Skip "end". *) - _advance_token(3); - - _write_label(v0); - _write_z(":\n\0"); -end; - -proc _compile_label_declaration(); -begin - (* Skip the dot. *) - _advance_token(1); - v0 := _read_token(); - _write_c('.'); - _write_s(source_code_position, v0); - _write_z(":\n\0"); - _advance_token(v0); -end; - -proc _compile_statement(); -begin - _skip_spaces(); - v0 := _load_byte(source_code_position); - - (* This is a call if the statement starts with an underscore. *) - if v0 = '_' then - _compile_call(); - goto .compile_statement_semicolon; - end; - if _memcmp(source_code_position, "goto ", 5) = 0 then - _compile_goto(); - goto .compile_statement_semicolon; - end; - if _memcmp(source_code_position, "if ", 3) = 0 then - _compile_if(); - goto .compile_statement_semicolon; - end; - if _memcmp(source_code_position, "return ", 7) = 0 then - _compile_return_statement(); - _write_c('\n'); - - goto .compile_statement_end; - end; - if v0 = '.' then - _compile_label_declaration(); - - goto .compile_statement_semicolon; - end; - _compile_assignment(); - goto .compile_statement_semicolon; - - .compile_statement_semicolon; - _advance_token(2); - _write_c('\n'); - - .compile_statement_end; -end; - -proc _compile_procedure_body(); -begin - .compile_procedure_body_loop; - _skip_empty_lines(); - _skip_spaces(); - - v0 := _memcmp(source_code_position, "end", 3) = 0; - v4 := _memcmp(source_code_position, "else", 4) = 0; - v4 := v0 or v4; - - if v4 = 0 then - _compile_statement(); - goto .compile_procedure_body_loop; - end; -end; - -(* Writes a regster name to the standard output. *) - -(* Parameters: *) -(* a0 - Register character. *) -(* a1 - Register number. *) -proc _write_register(v88: Word, v84: Word); -begin - _write_c(v88); - v84 := v84 + '0'; - _write_c(v84); -end; - -proc _skip_spaces(); -begin - v0 := _load_byte(source_code_position); - if v0 = '\t' then - _advance_token(1); - _skip_spaces(); - end; -end; - -proc _read_type_expression(); -begin - v0 := _read_token(); - _advance_token(v0); -end; - -(* Parameters: *) - -(* a0 - Parameter index. *) -proc _parameter_info_create(v88: Word); -begin - v8 := memory_free_pointer; - v4 := v8; - (* 2 is INFO_PARAMETER *) - _store_word(2, v4); - - v4 := v4 + 4; - - (* Calculate the stack offset: 88 - (4 * parameter_counter) *) - v0 := v88 * 4; - v0 := 88 + -v0; - _store_word(v0, v4); - - v4 := v4 + 4; - _store_word(v4, @memory_free_pointer); - - return v8 -end; - -proc _parameter_info_get_offset(v88: Word); -begin - v88 := v88 + 4; - return _load_word(v88) -end; - -(* Parameters: *) - -(* a0 - Parameter index. *) -proc _temporary_info_create(v88: Word); -begin - v8 := memory_free_pointer; - v4 := v8; - (* 3 is INFO_TEMPORARY *) - _store_word(3, v4); - - v4 := v4 + 4; - - (* Calculate the stack offset: 4 * variable_counter. *) - v0 := v88 * 4; - _store_word(v0, v4); - - v4 := v4 + 4; - _store_word(v4, @memory_free_pointer); - - return v8 -end; - -proc _temporary_info_get_offset(v88: Word); -begin - v88 := v88 + 4; - return _load_word(v88) -end; - -(* Parameters: *) - -(* a0 - Parameter index. *) -proc _read_procedure_parameter(v88: Word); -begin - (* Read the parameter name. *) - v8 := source_code_position; - v0 := _read_token(); - _advance_token(v0); - - (* Skip colon and space in front of the type expression. *) - _advance_token(2); - - _read_type_expression(); - - _write_z("\tsw a\0"); - _write_i(v88); - _write_z(", \0"); - - v4 := _parameter_info_create(v88); - _symbol_table_enter(@symbol_table_local, v8, v0, v4); - - v4 := _parameter_info_get_offset(v4); - _write_i(v4); - - _write_z("(sp)\n\0"); -end; - -proc _read_procedure_parameters(); -begin - (* Skip open paren. *) - _advance_token(1); - v0 := 0; - - .compile_procedure_prologue_skip; - if _load_byte(source_code_position) <> ')' then - _read_procedure_parameter(v0); - v0 := v0 + 1; - - if _load_byte(source_code_position) = ',' then - _advance_token(2); - goto .compile_procedure_prologue_skip; - end; - end; - (* Skip close paren. *) - _advance_token(1); -end; - -(* Parameters: *) -(* a0 - Variable index. *) -proc _read_procedure_temporary(v88: Word); -begin - _skip_spaces(); - v8 := source_code_position; - - (* Read and skip variable name, colon and the space *) - v0 := _read_token(); - _advance_token(v0 + 2); - - _read_type_expression(); - - v4 := _temporary_info_create(v88); - _symbol_table_enter(@symbol_table_local, v8, v0, v4); - - (* Skip semicolon and newline after the variable declaration *) - _advance_token(2); -end; - -proc _read_procedure_temporaries(); -begin - if _memcmp(source_code_position, "var", 3) <> 0 then - goto .read_local_variables_end; - end; - _advance_token(4); - v0 := 0; - -.read_local_variables_loop: - if _memcmp(source_code_position, "begin", 5) = 0 then - goto .read_local_variables_end; - end; - _read_procedure_temporary(v0); - - v0 := v0 + 1; - goto .read_local_variables_loop; - -.read_local_variables_end: -end; - -proc _compile_procedure(); -begin - (* Skip "proc ". *) - _advance_token(5); - (* Clear local symbol table. *) - _store_word(0, @symbol_table_local); - - (* Save the procedure name length. *) - v0 := _read_token(); - - (* Write .type _procedure_name, @function. *) - _write_z(".type \0"); - - _write_token(v0); - _write_z(", @function\n\0"); - - (* Write procedure label, _procedure_name: *) - _write_token(v0); - _write_z(":\n\0"); - - (* Skip procedure name. *) - _advance_token(v0); - _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); - _read_procedure_parameters(); - - (* Skip semicolon and newline. *) - _advance_token(2); - _read_procedure_temporaries(); - - (* Skip semicolon, "begin" and newline. *) - _advance_token(6); - - _compile_procedure_body(); - - (* Write the epilogue. *) - _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); - - (* Skip the "end" keyword, semicolon and newline. *) - _advance_token(5); -end; - -(* Prints and skips a line. *) -proc _skip_comment(); -begin - .skip_comment_loop; - v0 := _load_byte(source_code_position); - - (* Check for newline character. *) - if v0 <> '\n' then - (* Advance the input string by one byte. *) - _advance_token(1); - - goto .skip_comment_loop; - end; - (* Skip the newline. *) - _advance_token(1); -end; - -(* Skip newlines and comments. *) -proc _skip_empty_lines(); -begin - .skip_empty_lines_rerun; - v0 := source_code_position; - - .skip_empty_lines_loop; - v4 := _load_byte(v0); - - if v4 = '\n' then - goto .skip_empty_lines_newline; - end; - if v4 = '\t' then - goto .skip_empty_lines_tab; - end; - if v4 <> '(' then - goto .skip_empty_lines_end; - end; - v4 := v0 + 1; - - if _load_byte(v4) = '*' then - goto .skip_empty_lines_comment - end; - goto .skip_empty_lines_end; - - .skip_empty_lines_comment; - source_code_position := v0; - _skip_comment(); - goto .skip_empty_lines_rerun; - - .skip_empty_lines_newline; - source_code_position := v0 + 1; - goto .skip_empty_lines_rerun; - - .skip_empty_lines_tab; - v0 := v0 + 1; - goto .skip_empty_lines_loop - - .skip_empty_lines_end; -end; - -proc _compile_global_initializer(); -begin - v0 := _load_byte(source_code_position); - - if v0 = '"' then - _write_z("\n\t.word strings + \0"); - v4 := _string_length(source_code_position); - - _add_string(source_code_position); - _write_i(); - - (* Skip the quoted string. *) - _advance_token(v4 + 2); - - goto .compile_global_initializer_end; - end; - if v0 = 'S' then - (* Skip "S(". *) - _advance_token(2); - - if _load_byte(source_code_position) = ')' then - goto .compile_global_initializer_closing; - end; - goto .compile_global_initializer_loop; - end; - if v0 = '@' then - (* Skip @. *) - _advance_token(1); - _write_z("\n\t.word \0"); - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - - goto .compile_global_initializer_end; - end; - if _is_digit(v0) = 1 then - _write_z("\n\t.word \0"); - v0 := _read_token(); - _write_token(v0); - _advance_token(1); - - goto .compile_global_initializer_end; - end; - - .compile_global_initializer_loop; - _compile_global_initializer(); - - if _load_byte(source_code_position) <> ')' then - (* Skip comma and whitespace after it. *) - _advance_token(2); - - goto .compile_global_initializer_loop; - end; - - .compile_global_initializer_closing; - (* Skip ")" *) - _advance_token(1); - - goto .compile_global_initializer_end; - - .compile_global_initializer_end; -end; - -proc _compile_constant_declaration(); -begin - v0 := _read_token(); - - _write_z(".type \0"); - _write_token(v0); - _write_z(", @object\n\0"); - - _write_token(v0); - _write_c(':'); - - (* Skip the constant name with assignment sign and surrounding whitespaces. *) - _advance_token(v0 + 4); - _compile_global_initializer(); - (* Skip semicolon and newline. *) - _advance_token(2); - _write_c('\n'); -end; - -proc _compile_const_part(); -begin - _skip_empty_lines(); - - if _memcmp(source_code_position, "const\0", 5) <> 0 then - goto .compile_const_part_end; - end; - (* Skip "const" with the newline after it. *) - _advance_token(6); - _write_z(".section .rodata # Compiled from const section.\n\n\0"); - - .compile_const_part_loop; - _skip_empty_lines(); - - (* If the character at the line beginning is not indentation, *) - (* it is probably the next code section. *) - if _load_byte(source_code_position) = '\t' then - _advance_token(1); - - _compile_constant_declaration(); - goto .compile_const_part_loop; - end; - - .compile_const_part_end; -end; - -proc _compile_variable_declaration(); -begin - v0 := _read_token(); - - _write_z(".type \0"); - _write_token(v0); - _write_z(", @object\n\0"); - - _write_token(v0); - _write_c(':'); - - (* Skip the variable name and colon with space before the type. *) - _advance_token(v0 + 2); - - (* Skip the type name. *) - v4 := _read_token(); - _advance_token(v4); - - if _load_byte(source_code_position) <> ' ' then - (* Else we assume this is a zeroed 81920 bytes big array. *) - _write_z(" .zero 81920\0"); - else - (* Skip the assignment sign with surrounding whitespaces. *) - _advance_token(4); - _compile_global_initializer(); - end; - - (* Skip semicolon and newline. *) - _advance_token(2); - _write_c('\n'); -end; - -proc _compile_var_part(); -begin - if _memcmp(source_code_position, "var\0", 3) <> 0 then - goto .compile_var_part_end; - end; - (* Skip "var" and newline. *) - _advance_token(4); - _write_z(".section .data\n\0"); - - .compile_var_part_loop; - _skip_empty_lines(); - v0 := _load_byte(source_code_position); - - if v0 = '\t' then - _advance_token(1); - _compile_variable_declaration(); - goto .compile_var_part_loop; - end; - - .compile_var_part_end; -end; - -(* Process the source code and print the generated code. *) -proc _compile_module(); -begin - _compile_const_part(); - _skip_empty_lines(); - _compile_var_part(); - - _write_z(".section .text\n\n\0"); - _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); - _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); - _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); - _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); - _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); - - .compile_module_loop; - _skip_empty_lines(); - - if _load_byte(source_code_position) <> 0 then - (* 5 is "proc " length. Space is needed to distinguish from "procedure". *) - if _memcmp(source_code_position, "proc ", 5) = 0 then - _compile_procedure(); - goto .compile_module_loop; - end; - end; - .compile_module_end; -end; - -proc _compile(); -begin - _write_z(".globl _start\n\n\0"); - _compile_module(); - - _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); - _write_c('"'); - - v0 := @compiler_strings; - v4 := compiler_strings_position; - - .compile_loop; - if v0 < v4 then - v8 := _load_byte(v0); - v0 := v0 + 1; - _write_c(v8); - - goto .compile_loop; - end; - _write_c('"'); - _write_c('\n'); -end; - -(* Terminates the program. a0 contains the return code. *) - -(* Parameters: *) -(* a0 - Status code. *) -proc _exit(); -begin - _syscall(0, 0, 0, 0, 0, 0, 93); -end; - -(* Looks for a symbol in the given symbol table. *) - -(* Parameters: *) -(* a0 - Symbol table. *) -(* a1 - Symbol name pointer. *) -(* a2 - Symbol name length. *) - -(* Returns the symbol pointer or 0 in a0. *) -proc _symbol_table_lookup(v88: Word, v84: Word, v80: Word); -begin - v0 := 0; - - (* The first word in the symbol table is its length, get it. *) - v4 := _load_word(v88); - - (* Go to the first symbol position. *) - v88 := v88 + 4; - - .symbol_table_lookup_loop; - if v4 = 0 then - goto .symbol_table_lookup_end; - end; - - (* Symbol name pointer and length. *) - v8 := _load_word(v88); - v12 := _load_word(v88 + 4); - - (* If lengths don't match, exit and return nil. *) - if v80 <> v12 then - goto .symbol_table_lookup_repeat; - end; - (* If names don't match, exit and return nil. *) - if _memcmp(v84, v8, v80) <> 0 then - goto .symbol_table_lookup_repeat; - end; - (* Otherwise, the symbol is found. *) - v0 := _load_word(v88 + 8); - goto .symbol_table_lookup_end; - - .symbol_table_lookup_repeat; - v88 := v88 + 12; - v4 := v4 + -1; - goto .symbol_table_lookup_loop; - - .symbol_table_lookup_end; - return v0 -end; - -(* Inserts a symbol into the table. *) - -(* Parameters: *) -(* a0 - Symbol table. *) -(* a1 - Symbol name pointer. *) -(* a2 - Symbol name length. *) -(* a3 - Symbol pointer. *) -proc _symbol_table_enter(v88: Word, v84: Word, v80: Word, v76: Word); -begin - (* The first word in the symbol table is its length, get it. *) - v0 := _load_word(v88); - - (* Calculate the offset for the new symbol. *) - v4 := v0 * 12; - v4 := v4 + 4; - v4 := v88 + v4; - - _store_word(v84, v4); - v4 := v4 + 4; - _store_word(v80, v4); - v4 := v4 + 4; - _store_word(v76, v4); - - (* Increment the symbol table length. *) - v0 := v0 + 1; - _store_word(v0, v88); -end; - -proc _symbol_table_build(); -begin - (* Set the table length to 0. *) - _store_word(0, @symbol_table_global); - - (* Enter built-in symbols. *) - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_int, 3, @symbol_type_info_int); - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_word, 4, @symbol_type_info_word); - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_pointer, 7, @symbol_type_info_pointer); - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_char, 4, @symbol_type_info_char); - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_bool, 4, @symbol_type_info_bool); -end; - - -(* Classification table assigns each possible character to a group (class). All *) -(* characters of the same group a handled equivalently. *) - -(* Classification: *) - -(* TransitionClass = ( *) -(* transitionClassInvalid = 1, *) -(* transitionClassDigit = 2, *) -(* transitionClassAlpha = 3, *) -(* transitionClassSpace = 4, *) -(* transitionClassColon = 5, *) -(* transitionClassEquals = 6, *) -(* transitionClassLeftParen = 7, *) -(* transitionClassRightParen = 8, *) -(* transitionClassAsterisk = 9, *) -(* transitionClassUnderscore = 10, *) -(* transitionClassSingle = 11, *) -(* transitionClassHex = 12, *) -(* transitionClassZero = 13, *) -(* transitionClassX = 14, *) -(* transitionClassEof = 15, *) -(* transitionClassDot = 16, *) -(* transitionClassMinus = 17, *) -(* transitionClassSingleQuote = 18, *) -(* transitionClassDoubleQuote = 19, *) -(* transitionClassGreater = 20, *) -(* transitionClassLess = 21, *) -(* transitionClassOther = 22 *) -(* ); *) -(* TransitionState = ( *) -(* transitionStateStart = 1, *) -(* transitionStateColon = 2, *) -(* transitionStateIdentifier = 3, *) -(* transitionStateDecimal = 4, *) -(* transitionStateGreater = 5, *) -(* transitionStateMinus = 6, *) -(* transitionStateLeftParen = 7, *) -(* transitionStateLess = 8, *) -(* transitionStateDot = 9, *) -(* transitionStateComment = 10, *) -(* transitionStateClosingComment = 11, *) -(* transitionStateCharacter = 12, *) -(* transitionStateString = 13, *) -(* transitionStateLeadingZero = 14, *) -(* transitionStateDecimalSuffix = 15, *) -(* transitionStateEnd = 16 *) -(* ); *) -(* Transition = record *) -(* action: TransitionAction; *) -(* next_state: TransitionState *) -(* end; *) -(* TransitionAction = ( *) -(* none = 1, *) -(* accumulate = 2, *) -(* skip = 3, *) -(* single = 4, *) -(* eof = 5, *) -(* finalize = 6, *) -(* composite = 7, *) -(* key_id = 8, *) -(* integer = 9, *) -(* delimited = 10 *) -(* ); *) - -(* Assigns some value to at array index. *) - -(* Parameters: *) -(* a0 - Array pointer. *) -(* a1 - Index (word offset into the array). *) -(* a2 - Data to assign. *) -proc _assign_at(v88: Word, v84: Word, v80: Word); -begin - v0 := v84 + -1; - v0 := v0 * 4; - v0 := v88 + v0; - - _store_word(v80, v0); -end; - -proc _create_classification(); -begin - _assign_at(@classification, 1, 15); - _assign_at(@classification, 2, 1); - _assign_at(@classification, 3, 1); - _assign_at(@classification, 4, 1); - _assign_at(@classification, 5, 1); - _assign_at(@classification, 6, 1); - _assign_at(@classification, 7, 1); - _assign_at(@classification, 8, 1); - _assign_at(@classification, 9, 1); - _assign_at(@classification, 10, 4); - _assign_at(@classification, 11, 4); - _assign_at(@classification, 12, 1); - _assign_at(@classification, 13, 1); - _assign_at(@classification, 14, 4); - _assign_at(@classification, 15, 1); - _assign_at(@classification, 16, 1); - _assign_at(@classification, 17, 1); - _assign_at(@classification, 18, 1); - _assign_at(@classification, 19, 1); - _assign_at(@classification, 20, 1); - _assign_at(@classification, 21, 1); - _assign_at(@classification, 22, 1); - _assign_at(@classification, 23, 1); - _assign_at(@classification, 24, 1); - _assign_at(@classification, 25, 1); - _assign_at(@classification, 26, 1); - _assign_at(@classification, 27, 1); - _assign_at(@classification, 28, 1); - _assign_at(@classification, 29, 1); - _assign_at(@classification, 30, 1); - _assign_at(@classification, 31, 1); - _assign_at(@classification, 32, 1); - _assign_at(@classification, 33, 4); - _assign_at(@classification, 34, 11); - _assign_at(@classification, 35, 19); - _assign_at(@classification, 36, 22); - _assign_at(@classification, 37, 22); - _assign_at(@classification, 38, 11); - _assign_at(@classification, 39, 11); - _assign_at(@classification, 40, 18); - _assign_at(@classification, 41, 7); - _assign_at(@classification, 42, 8); - _assign_at(@classification, 43, 9); - _assign_at(@classification, 44, 11); - _assign_at(@classification, 45, 11); - _assign_at(@classification, 46, 17); - _assign_at(@classification, 47, 16); - _assign_at(@classification, 48, 11); - _assign_at(@classification, 49, 13); - _assign_at(@classification, 50, 2); - _assign_at(@classification, 51, 2); - _assign_at(@classification, 52, 2); - _assign_at(@classification, 53, 2); - _assign_at(@classification, 54, 2); - _assign_at(@classification, 55, 2); - _assign_at(@classification, 56, 2); - _assign_at(@classification, 57, 2); - _assign_at(@classification, 58, 2); - _assign_at(@classification, 59, 5); - _assign_at(@classification, 60, 11); - _assign_at(@classification, 61, 21); - _assign_at(@classification, 62, 6); - _assign_at(@classification, 63, 20); - _assign_at(@classification, 64, 22); - _assign_at(@classification, 65, 11); - _assign_at(@classification, 66, 3); - _assign_at(@classification, 67, 3); - _assign_at(@classification, 68, 3); - _assign_at(@classification, 69, 3); - _assign_at(@classification, 70, 3); - _assign_at(@classification, 71, 3); - _assign_at(@classification, 72, 3); - _assign_at(@classification, 73, 3); - _assign_at(@classification, 74, 3); - _assign_at(@classification, 75, 3); - _assign_at(@classification, 76, 3); - _assign_at(@classification, 77, 3); - _assign_at(@classification, 78, 3); - _assign_at(@classification, 79, 3); - _assign_at(@classification, 80, 3); - _assign_at(@classification, 81, 3); - _assign_at(@classification, 82, 3); - _assign_at(@classification, 83, 3); - _assign_at(@classification, 84, 3); - _assign_at(@classification, 85, 3); - _assign_at(@classification, 86, 3); - _assign_at(@classification, 87, 3); - _assign_at(@classification, 88, 3); - _assign_at(@classification, 89, 3); - _assign_at(@classification, 90, 3); - _assign_at(@classification, 91, 3); - _assign_at(@classification, 92, 11); - _assign_at(@classification, 93, 22); - _assign_at(@classification, 94, 11); - _assign_at(@classification, 95, 11); - _assign_at(@classification, 96, 10); - _assign_at(@classification, 97, 22); - _assign_at(@classification, 98, 12); - _assign_at(@classification, 99, 12); - _assign_at(@classification, 100, 12); - _assign_at(@classification, 101, 12); - _assign_at(@classification, 102, 12); - _assign_at(@classification, 103, 12); - _assign_at(@classification, 104, 3); - _assign_at(@classification, 105, 3); - _assign_at(@classification, 106, 3); - _assign_at(@classification, 107, 3); - _assign_at(@classification, 108, 3); - _assign_at(@classification, 109, 3); - _assign_at(@classification, 110, 3); - _assign_at(@classification, 111, 3); - _assign_at(@classification, 112, 3); - _assign_at(@classification, 113, 3); - _assign_at(@classification, 114, 3); - _assign_at(@classification, 115, 3); - _assign_at(@classification, 116, 3); - _assign_at(@classification, 117, 3); - _assign_at(@classification, 118, 3); - _assign_at(@classification, 119, 3); - _assign_at(@classification, 120, 3); - _assign_at(@classification, 121, 14); - _assign_at(@classification, 122, 3); - _assign_at(@classification, 123, 3); - _assign_at(@classification, 124, 22); - _assign_at(@classification, 125, 11); - _assign_at(@classification, 126, 22); - _assign_at(@classification, 127, 11); - _assign_at(@classification, 128, 1); - - v0 := 129; - -(* Set the remaining 129 - 256 bytes to transitionClassOther. *) - .create_classification_loop; - _assign_at(@classification, v0, 22); - v0 := v0 + 1; - - if v0 < 257 then - goto .create_classification_loop; - end; -end; - -(* Parameters: *) -(* a0 - Current state (first index into transitions table). *) -(* a1 - Transition (second index into transitions table).. *) -(* a2 - Action to assign. *) -(* a3 - Next state to assign. *) -proc _set_transition(v88: Word, v84: Word, v80: Word, v76: Word); -begin - (* Transitions start at offset in classification array. Save the transitions start in v0. *) - v0 := @classification + 256 - - (* Each state is 8 bytes long (2 words: action and next state). *) - (* There are 16 transition classes, so a transition 8 * 16 = 128 bytes long. *) - - v4 := v88 + -1; - v4 := v4 * 128; - - v8 := v84 + -1; - v8 := v8 * 8; - - v12 := v0 + v4; - v12 := v12 + v8; - - _store_word(v80, v12); - v12 := v12 + 4; - _store_word(v76, v12); -end; - -(* Parameters: *) -(* a0 - Current state (Transition state enumeration). *) -(* a1 - Default action (Callback). *) -(* a2 - Next state (Transition state enumeration). *) -proc _set_default_transition(v88: Word, v84: Word, v80: Word); -begin - _set_transition(v88, 1, v84, v80); - _set_transition(v88, 2, v84, v80); - _set_transition(v88, 3, v84, v80); - _set_transition(v88, 4, v84, v80); - _set_transition(v88, 5, v84, v80); - _set_transition(v88, 6, v84, v80); - _set_transition(v88, 7, v84, v80); - _set_transition(v88, 8, v84, v80); - _set_transition(v88, 9, v84, v80); - _set_transition(v88, 10, v84, v80); - _set_transition(v88, 11, v84, v80); - _set_transition(v88, 12, v84, v80); - _set_transition(v88, 13, v84, v80); - _set_transition(v88, 14, v84, v80); - _set_transition(v88, 15, v84, v80); - _set_transition(v88, 16, v84, v80); - _set_transition(v88, 17, v84, v80); - _set_transition(v88, 18, v84, v80); - _set_transition(v88, 19, v84, v80); - _set_transition(v88, 20, v84, v80); - _set_transition(v88, 21, v84, v80); - _set_transition(v88, 22, v84, v80); -end; - - -(* The transition table describes transitions from one state to another, given *) -(* a symbol (character class). *) - -(* The table has m rows and n columns, where m is the amount of states and n is *) -(* the amount of classes. So given the current state and a classified character *) -(* the table can be used to look up the next state. *) - -(* Each cell is a word long. *) -(* - The least significant byte of the word is a row number (beginning with 0). *) -(* It specifies the target state. "ff" means that this is an end state and no *) -(* transition is possible. *) -(* - The next byte is the action that should be performed when transitioning. *) -(* For the meaning of actions see labels in the lex_next function, which *) -(* handles each action. *) -proc _create_transitions(); -begin - (* Start state. *) - _set_transition(1, 1, 1, 16); - _set_transition(1, 2, 2, 4); - _set_transition(1, 3, 2, 3); - _set_transition(1, 4, 3, 1); - _set_transition(1, 5, 2, 5); - _set_transition(1, 6, 4, 16); - _set_transition(1, 7, 2, 7); - _set_transition(1, 8, 4, 16); - _set_transition(1, 9, 4, 16); - _set_transition(1, 10, 2, 3); - _set_transition(1, 11, 4, 16); - _set_transition(1, 12, 2, 3); - _set_transition(1, 13, 2, 14); - _set_transition(1, 14, 2, 3); - _set_transition(1, 15, 5, 16); - _set_transition(1, 16, 2, 9); - _set_transition(1, 17, 2, 6); - _set_transition(1, 18, 2, 12); - _set_transition(1, 19, 2, 13); - _set_transition(1, 20, 2, 5); - _set_transition(1, 21, 2, 8); - _set_transition(1, 22, 1, 16); - - (* Colon state. *) - _set_default_transition(2, 6, 16); - _set_transition(2, 6, 7, 16); - - (* Identifier state. *) - _set_default_transition(3, 8, 16); - _set_transition(3, 2, 2, 3); - _set_transition(3, 3, 2, 3); - _set_transition(3, 10, 2, 3); - _set_transition(3, 12, 2, 3); - _set_transition(3, 13, 2, 3); - _set_transition(3, 14, 2, 3); - - (* Decimal state. *) - _set_default_transition(4, 9, 16); - _set_transition(4, 2, 2, 4); - _set_transition(4, 3, 2, 15); - _set_transition(4, 10, 1, 16); - _set_transition(4, 12, 2, 15); - _set_transition(4, 13, 2, 4); - _set_transition(4, 14, 2, 15); - - (* Greater state. *) - _set_default_transition(5, 6, 16); - _set_transition(5, 6, 7, 16); - - (* Minus state. *) - _set_default_transition(6, 6, 16); - _set_transition(6, 20, 7, 16); - - (* Left paren state. *) - _set_default_transition(7, 6, 16); - _set_transition(7, 9, 2, 10); - - (* Less state. *) - _set_default_transition(8, 6, 16); - _set_transition(8, 6, 7, 16); - _set_transition(8, 20, 7, 16); - - (* Hexadecimal after 0x. *) - _set_default_transition(9, 6, 16); - _set_transition(9, 16, 7, 16); - - (* Comment. *) - _set_default_transition(10, 2, 10); - _set_transition(10, 9, 2, 11); - _set_transition(10, 15, 1, 16); - - (* Closing comment. *) - _set_default_transition(11, 2, 10); - _set_transition(11, 1, 1, 16); - _set_transition(11, 8, 10, 16); - _set_transition(11, 9, 2, 11); - _set_transition(11, 15, 1, 16); - - (* Character. *) - _set_default_transition(12, 2, 12); - _set_transition(12, 1, 1, 16); - _set_transition(12, 15, 1, 16); - _set_transition(12, 18, 10, 16); - - (* String. *) - _set_default_transition(13, 2, 13); - _set_transition(13, 1, 1, 16); - _set_transition(13, 15, 1, 16); - _set_transition(13, 19, 10, 16); - - (* Leading zero. *) - _set_default_transition(14, 9, 16); - _set_transition(14, 2, 1, 16); - _set_transition(14, 3, 1, 16); - _set_transition(14, 10, 1, 16); - _set_transition(14, 12, 1, 16); - _set_transition(14, 13, 1, 16); - _set_transition(14, 14, 1, 16); - - (* Digit with a character suffix. *) - _set_default_transition(15, 9, 16); - _set_transition(15, 3, 1, 16); - _set_transition(15, 2, 1, 16); - _set_transition(15, 12, 1, 16); - _set_transition(15, 13, 1, 16); - _set_transition(15, 14, 1, 16); -end; - -proc _lexer_get_state(); -begin - (* Lexer state is saved after the transition tables. The offset is 256 + 16 * 22. *) - v0 := @classification; - v4 := 16 * 22; - v0 := v0 + 256; - - return v0 + v4 -end; - -(* Gets pointer to the current source text. *) -proc _lexer_get_current(); -begin - return _lexer_get_state() + 4 -end; - -(* Resets the lexer state for reading the next token. *) -proc _lexer_reset(); -begin - (* Transition start state is 1. *) - v0 := _lexer_get_state(); - _store_word(1, v4); - - (* Text pointer to the beginning of the currently read token. *) - v4 := _lexer_get_current(); - _store_word(source_code_position, v4); - - (* Initial length of the token is 0. *) - _store_word(0, source_code_position + 4); -end; - -(* One time lexer initialization. *) -proc _lexer_initialize(); -begin - _create_classification(); - _create_transitions(); -end; - -(* Entry point. *) -proc _start(); -begin - _lexer_initialize(); - _symbol_table_build(); - - (* Read the source from the standard input. *) - v4 := @source_code; - - .start_read; - (* Second argument is buffer size. Modifying update the source_code definition. *) - v0 := _read_file(v4, 81920); - if v0 > 0 then - v4 := v4 + v0; - goto .start_read; - end; - _compile(); - - _exit(0); -end; diff --git a/boot/stage12/cl.elna b/boot/stage12/cl.elna new file mode 100644 index 0000000..1cf4969 --- /dev/null +++ b/boot/stage12/cl.elna @@ -0,0 +1,1944 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, *) +(* v. 2.0. If a copy of the MPL was not distributed with this file, You can *) +(* obtain one at https://mozilla.org/MPL/2.0/. *) + +(* Stage 12 compiler. *) + +(* - Local variables and parameters are saved in a local symbol table. *) +(* - Local variables and parameters can be referenced by their name in the symbol table. *) + +const + symbol_builtin_name_int := "Int"; + symbol_builtin_name_word := "Word"; + symbol_builtin_name_pointer := "Pointer"; + symbol_builtin_name_char := "Char"; + symbol_builtin_name_bool := "Bool"; + + (* Every type info starts with a word describing what type it is. *) + + (* PRIMITIVE_TYPE = 1 *) + + (* Primitive types have only type size. *) + symbol_builtin_type_int := S(1, 4); + symbol_builtin_type_word := S(1, 4); + symbol_builtin_type_pointer := S(1, 4); + symbol_builtin_type_char := S(1, 1); + symbol_builtin_type_bool := S(1, 1); + + (* Info objects start with a word describing its type. *) + + (* INFO_TYPE = 1 *) + (* INFO_PARAMETER = 2 *) + (* INFO_TEMPORARY = 3 *) + + (* Type info has the type it belongs to. *) + symbol_type_info_int := S(1, @symbol_builtin_type_int); + symbol_type_info_word := S(1, @symbol_builtin_type_word); + symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); + symbol_type_info_char := S(1, @symbol_builtin_type_char); + symbol_type_info_bool := S(1, @symbol_builtin_type_bool); + +var + source_code: Array; + compiler_strings: Array; + symbol_table_global: Array; + symbol_table_local: Array; + classification: Array; + memory: Array; + + compiler_strings_position: Pointer := @compiler_strings; + compiler_strings_length: Word := 0; + label_counter: Word := 0; + source_code_position: Pointer := @source_code; + memory_free_pointer: Word := @memory; + +(* Calculates and returns the string token length between quotes, including the *) +(* escaping slash characters. *) + +(* Parameters: *) +(* a0 - String token pointer. *) + +(* Returns the length in a0. *) +proc _string_length(v88: Word); +begin + (* Reset the counter. *) + v0 := 0; + + .string_length_loop; + v88 := v88 + 1; + + if _load_byte(v88) <> '"' then + v0 := v0 + 1; + goto .string_length_loop; + end; + + return v0 +end; + +(* Adds a string to the global, read-only string storage. *) + +(* Parameters: *) +(* a0 - String token. *) + +(* Returns the offset from the beginning of the storage to the new string in a0. *) +proc _add_string(v88: Word); +begin + v0 := v88 + 1; + v4 := compiler_strings_length; + + .add_string_loop; + if _load_byte(v0) <> '"' then + v8 := _load_byte(v0); + _store_byte(v8, compiler_strings_position); + compiler_strings_position := compiler_strings_position + 1; + v0 := v0 + 1; + + if v8 <> '\\' then + compiler_strings_length := compiler_strings_length + 1; + end; + goto .add_string_loop; + end; + + return v4 +end; + +(* Reads standard input into a buffer. *) +(* a0 - Buffer pointer. *) +(* a1 - Buffer size. *) + +(* Returns the amount of bytes written in a0. *) +proc _read_file(v88: Word, v84: Word); +begin + return _syscall(0, v88, v84, 0, 0, 0, 63) +end; + +(* Writes to the standard output. *) + +(* Parameters: *) +(* a0 - Buffer. *) +(* a1 - Buffer length. *) +proc _write_s(v88: Word, v84: Word); +begin + _syscall(1, v88, v84, 0, 0, 0, 64); +end; + +(* Writes a number to a string buffer. *) + +(* t0 - Local buffer. *) +(* t1 - Constant 10. *) +(* t2 - Current character. *) +(* t3 - Whether the number is negative. *) + +(* Parameters: *) +(* a0 - Whole number. *) +(* a1 - Buffer pointer. *) + +(* Sets a0 to the length of the written number. *) +proc _print_i(v88: Word, v84: Word); +begin + v0 := @v23; + + if v88 >= 0 then + v4 := 0; + else + v88 = -v88; + v4 := 1; + end; + + .print_i_digit10; + v8 := v88 % 10; + _store_byte(v8 + '0', v0); + + v88 := v88 / 10; + v0 := v0 + -1; + + if v88 <> 0 then + goto .print_i_digit10; + end; + if v4 = 1 then + _store_byte('-', v0); + v0 := v0 + -1; + end; + v4 := @v23 + -v0; + _memcpy(v84, v0 + 1, v4); + + return v4 +end; + +(* Writes a number to the standard output. *) + +(* Parameters: *) +(* a0 - Whole number. *) +proc _write_i(v88: Word); +begin + v4 := _print_i(v88, @v0); + _write_s(@v0, v4); +end; + +(* Writes a character from a0 into the standard output. *) +proc _write_c(v88: Word); +begin + _write_s(@v88, 1); +end; + +(* Write null terminated string. *) + +(* Parameters: *) +(* a0 - String. *) +proc _write_z(v88: Word); +begin + (* Check for 0 character. *) + v0 := _load_byte(v88); + + if v0 <> 0 then + (* Print a character. *) + _write_c(v0); + + (* Advance the input string by one byte. *) + _write_z(v88 + 1); + end; +end; + +(* Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. *) +proc _is_upper(v88: Word); +begin + v0 := v88 >= 'A'; + v4 := v88 <= 'Z'; + + return v0 & v4 + +end; + +(* Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. *) +proc _is_lower(v88: Word); +begin + v0 := v88 >= 'a'; + v4 := v88 <= 'z'; + + return v0 & v4 + +end; + +(* Detects if the passed character is a 7-bit alpha character or an underscore. *) + +(* Paramters: *) +(* a0 - Tested character. *) + +(* Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. *) +proc _is_alpha(v88: Word); +begin + v0 := _is_upper(v88); + v4 := _is_lower(v88); + v8 := v88 = '_'; + + v12 := v0 or v4; + return v12 or v8 +end; + +(* Detects whether the passed character is a digit *) +(* (a value between 0 and 9). *) + +(* Parameters: *) +(* a0 - Exemined value. *) + +(* Sets a0 to 1 if it is a digit, to 0 otherwise. *) +proc _is_digit(v88: Word); +begin + v0 := v88 >= '0'; + v4 := v88 <= '9'; + + return v0 & v4 +end; + +proc _is_alnum(v88: Word); +begin + v0 := _is_alpha(v88); + v4 := _is_digit(v88); + + return v0 or v4 +end; + +(* Reads the next token. *) + +(* Returns token length in a0. *) +proc _read_token(); +begin + (* Current token position. *) + v0 := source_code_position; + (* Token length. *) + v4 := 0; + + .read_token_loop; + (* Current character. *) + v8 := _load_byte(v0); + + (* First we try to read a derictive. *) + (* A derictive can contain a dot and characters. *) + v12 := v8 = '.'; + v16 := _is_alnum(v8); + + if v12 or v16 then + (* Advance the source code position and token length. *) + v4 := v4 + 1; + v0 := v0 + 1; + + goto .read_token_loop; + end; + + return v4 +end; + +(* a0 - First pointer. *) +(* a1 - Second pointer. *) +(* a2 - The length to compare. *) + +(* Returns 0 in a0 if memory regions are equal. *) +proc _memcmp(v88: Word, v84: Word, v80: Word); +begin + v8 := 0; + + .memcmp_loop; + if v80 <> 0 then + v0 := _load_byte(v88); + v4 := _load_byte(v84); + v8 := v0 + -v4; + + v88 := v88 + 1; + v84 := v84 + 1; + v80 := v80 + -1; + + if v8 = 0 then + goto .memcmp_loop; + end; + end; + + return v8 +end; + +(* Copies memory. *) + +(* Parameters: *) +(* a0 - Destination. *) +(* a1 - Source. *) +(* a2 - Size. *) + +(* Preserves a0. *) +proc _memcpy(v88: Word, v84: Word, v80: Word); +begin + .memcpy_loop; + if v80 <> 0 then + v0 := _load_byte(v84); + _store_byte(v0, v88); + + v88 := v88 + 1; + v84 := v84 + 1; + v80 := v80 + -1; + goto .memcpy_loop; + end; + + return v88 +end; + +(* Advances the token stream by a0 bytes. *) +proc _advance_token(v88: Word); +begin + source_code_position := source_code_position + v88; +end; + +(* Prints the current token. *) + +(* Parameters: *) +(* a0 - Token length. *) + +(* Returns a0 unchanged. *) +proc _write_token(v88: Word); +begin + _write_s(source_code_position, v88); + return v88 +end; + +proc _compile_integer_literal(); +begin + _write_z("\tli t0, \0"); + + v0 := _read_token(); + _write_token(v0); + _advance_token(v0); + + _write_c('\n'); +end; + +proc _compile_character_literal(); +begin + _write_z("\tli t0, \0"); + + _write_c('\''); + _advance_token(1); + + v0 := _load_byte(source_code_position); + if v0 = '\\' then + _write_c('\\'); + _advance_token(1); + end; + + v0 := _load_byte(source_code_position); + _write_c(v0); + + _write_c('\''); + _write_c('\n'); + + _advance_token(2); +end; + +proc _compile_variable_expression(); +begin + _compile_designator(); + _write_z("\tlw t0, (t0)\n\0"); +end; + +proc _compile_address_expression(); +begin + (* Skip the "@" sign. *) + _advance_token(1); + _compile_designator(); +end; + +proc _compile_negate_expression(); +begin + (* Skip the "-" sign. *) + _advance_token(1); + _compile_term(); + + _write_z("\tneg t0, t0\n\0"); +end; + +proc _compile_not_expression(); +begin + (* Skip the "~" sign. *) + _advance_token(1); + _compile_term(); + + _write_z("\tnot t0, t0\n\0"); +end; + +proc _compile_string_literal(); +begin + v0 := _string_length(source_code_position); + v4 := _add_string(source_code_position); + + _advance_token(v0 + 2); + _write_z("\tla t0, strings\n\0"); + + _write_z("\tli t1, \0"); + _write_i(v4); + _write_c('\n'); + + _write_z("\tadd t0, t0, t1\n\0"); +end; + +proc _compile_term(); +begin + v0 := _load_byte(source_code_position); + + if v0 = '\'' then + _compile_character_literal(); + end; + if v0 = '@' then + _compile_address_expression(); + end; + if v0 = '-' then + _compile_negate_expression(); + end; + if v0 = '~' then + _compile_not_expression(); + end; + if v0 = '"' then + _compile_string_literal(); + end; + if v0 = '_' then + _compile_call(); + _write_z("\nmv t0, a0\n\0"); + end; + if _is_digit(v0) = 1 then + _compile_integer_literal(); + end; + if _is_lower(v0) = 1 then + _compile_variable_expression(); + end; +end; + +proc _compile_binary_rhs(); +begin + (* Skip the whitespace after the binary operator. *) + _advance_token(1); + _compile_term(); + + (* Load the left expression from the stack; *) + _write_z("\tlw t1, 24(sp)\n\0"); +end; + +proc _compile_expression(); +begin + _compile_term(); + v0 := _load_byte(source_code_position); + + if v0 <> ' ' then + goto .compile_expression_end; + end; + (* It is a binary expression. *) + + (* Save the value of the left expression on the stack. *) + _write_z("sw t0, 24(sp)\n\0"); + + (* Skip surrounding whitespace in front of the operator. *) + _advance_token(1); + v0 := _load_byte(source_code_position); + + if v0 = '+' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("add t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if v0 = '*' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tmul t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if v0 = '&' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tand t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if v0 = 'o' then + _advance_token(2); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("or t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if v0 = 'x' then + _advance_token(3); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("xor t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if v0 = '=' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("xor t0, t0, t1\nseqz t0, t0\n\0"); + + goto .compile_expression_end; + end; + if v0 = '%' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("rem t0, t1, t0\n\0"); + + goto .compile_expression_end; + end; + if v0 = '/' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("div t0, t1, t0\n\0"); + + goto .compile_expression_end; + end; + if v0 = '<' then + _advance_token(1); + v0 := _load_byte(source_code_position); + + if v0 = '>' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0"); + + goto .compile_expression_end; + end; + if v0 = '=' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0"); + + goto .compile_expression_end; + end; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("slt t0, t1, t0\n\0"); + + goto .compile_expression_end; + end; + if v0 = '>' then + _advance_token(1); + v0 := _load_byte(source_code_position); + if v0 = '=' then + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0"); + + goto .compile_expression_end; + end; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + + .compile_expression_end; +end; + +proc _compile_call(); +begin + (* Stack variables: *) + (* v0 - Procedure name length. *) + (* v4 - Procedure name pointer. *) + (* v8 - Argument count. *) + + v0 := _read_token(); + v4 := source_code_position; + v8 := 0; + + (* Skip the identifier and left paren. *) + _advance_token(v0 + 1); + v12 := _load_byte(source_code_position); + + if v12 = ')' then + goto .compile_call_finalize + end; + .compile_call_loop; + _compile_expression(); + + (* Save the argument on the stack. *) + _write_z("\tsw t0, \0"); + + (* Calculate the stack offset: 116 - (4 * argument_counter) *) + v12 := v8 * 4; + v12 := 116 + -v12; + _write_i(v12); + + _write_z("(sp)\n\0"); + + (* Add one to the argument counter. *) + v8 := v8 + 1; + + v12 := _load_byte(source_code_position); + + if v12 <> ',' then + goto .compile_call_finalize; + end; + _advance_token(2); + goto .compile_call_loop; + + .compile_call_finalize; + (* Load the argument from the stack. *) + if v8 <> 0 then + (* Decrement the argument counter. *) + v8 := v8 + -1; + + _write_z("\tlw a\0"); + _write_i(v8); + + _write_z(", \0"); + + (* Calculate the stack offset: 116 - (4 * argument_counter) *) + v12 := v8 * 4; + v12 := 116 + -v12; + _write_i(v12); + + _write_z("(sp)\n\0"); + + goto .compile_call_finalize; + end; + + .compile_call_end; + _write_z("\tcall \0"); + _write_s(v4, v0); + + (* Skip the right paren. *) + _advance_token(1); +end; + +proc _compile_goto(); +begin + _advance_token(5); + + v0 := _read_token(); + _write_z("\tj \0"); + + _write_token(v0); + _advance_token(); +end; + +proc _compile_local_designator(); +begin + (* Skip "v" in the local variable name. *) + _advance_token(1); + _write_z("\t addi t0, sp, \0"); + + (* Read local variable stack offset and save it. *) + v0 := _read_token(); + _write_token(v0); + _advance_token(v0); + _write_c('\n'); +end; + +proc _compile_global_designator(); +begin + _write_z("\tla t0, \0"); + + v0 := _read_token(); + _write_token(v0); + _advance_token(v0); + + _write_c('\n'); +end; + +proc _compile_designator(); +begin + v0 := _read_token(); + v4 := _symbol_table_lookup(@symbol_table_local, source_code_position, v0); + + if v4 <> 0 then + _write_z("\taddi t0, sp, \0"); + v8 := _parameter_info_get_offset(v4); + _write_i(v8); + _write_c('\n'); + _advance_token(v0); + + goto .compile_designator_end; + end; + if _load_byte(source_code_position) = 'v' then + _compile_local_designator(); + goto .compile_designator_end; + end; + _compile_global_designator(); + +.compile_designator_end: +end; + +proc _compile_assignment(); +begin + _compile_designator(); + + (* Save the assignee address on the stack. *) + _write_z("\tsw t0, 60(sp)\n\0"); + + (* Skip the assignment sign (:=) with surrounding whitespaces. *) + _advance_token(4); + + (* Compile the assignment. *) + _compile_expression(); + + _write_z("\tlw t1, 60(sp)\nsw t0, (t1)\n\0"); +end; + +proc _compile_return_statement(); +begin + (* Skip "return" keyword and whitespace after it. *) + _advance_token(7); + _compile_expression(); + + _write_z("mv a0, t0\n\0"); +end; + +(* Writes a label, .Ln, where n is a unique number. *) + +(* Parameters: *) +(* a0 - Label counter. *) +proc _write_label(v88: Word); +begin + _write_z(".L\0"); + _write_i(v88); +end; + +proc _compile_if(); +begin + (* Skip "if ". *) + _advance_token(3); + (* Compile condition. *) + _compile_expression(); + (* Skip " then" with newline. *) + _advance_token(6); + + (* v0 is the label after the if statement. *) + v0 := label_counter; + label_counter := label_counter + 1; + + (* v4 is the label in front of the next elsif condition or end. *) + v4 := label_counter; + label_counter := label_counter + 1; + + _write_z("\tbeqz t0, \0"); + _write_label(v4); + _write_c('\n'); + + _compile_procedure_body(); + + _write_z("\tj \0"); + _write_label(v0); + _write_c('\n'); + + _write_label(v4); + _write_z(":\n\0"); + + if _memcmp(source_code_position, "end", 3) = 0 then + goto .compile_if_end; + end; + if _memcmp(source_code_position, "else", 3) = 0 then + goto .compile_if_else + end; + .compile_if_else; + (* Skip "else" and newline. *) + _advance_token(5); + _compile_procedure_body(); + + .compile_if_end; + (* Skip "end". *) + _advance_token(3); + + _write_label(v0); + _write_z(":\n\0"); +end; + +proc _compile_label_declaration(); +begin + (* Skip the dot. *) + _advance_token(1); + v0 := _read_token(); + _write_c('.'); + _write_s(source_code_position, v0); + _write_z(":\n\0"); + _advance_token(v0); +end; + +proc _compile_statement(); +begin + _skip_spaces(); + v0 := _load_byte(source_code_position); + + (* This is a call if the statement starts with an underscore. *) + if v0 = '_' then + _compile_call(); + goto .compile_statement_semicolon; + end; + if _memcmp(source_code_position, "goto ", 5) = 0 then + _compile_goto(); + goto .compile_statement_semicolon; + end; + if _memcmp(source_code_position, "if ", 3) = 0 then + _compile_if(); + goto .compile_statement_semicolon; + end; + if _memcmp(source_code_position, "return ", 7) = 0 then + _compile_return_statement(); + _write_c('\n'); + + goto .compile_statement_end; + end; + if v0 = '.' then + _compile_label_declaration(); + + goto .compile_statement_semicolon; + end; + _compile_assignment(); + goto .compile_statement_semicolon; + + .compile_statement_semicolon; + _advance_token(2); + _write_c('\n'); + + .compile_statement_end; +end; + +proc _compile_procedure_body(); +begin + .compile_procedure_body_loop; + _skip_empty_lines(); + _skip_spaces(); + + v0 := _memcmp(source_code_position, "end", 3) = 0; + v4 := _memcmp(source_code_position, "else", 4) = 0; + v4 := v0 or v4; + + if v4 = 0 then + _compile_statement(); + goto .compile_procedure_body_loop; + end; +end; + +(* Writes a regster name to the standard output. *) + +(* Parameters: *) +(* a0 - Register character. *) +(* a1 - Register number. *) +proc _write_register(v88: Word, v84: Word); +begin + _write_c(v88); + v84 := v84 + '0'; + _write_c(v84); +end; + +proc _skip_spaces(); +begin + v0 := _load_byte(source_code_position); + if v0 = '\t' then + _advance_token(1); + _skip_spaces(); + end; +end; + +proc _read_type_expression(); +begin + v0 := _read_token(); + _advance_token(v0); +end; + +(* Parameters: *) + +(* a0 - Parameter index. *) +proc _parameter_info_create(v88: Word); +begin + v8 := memory_free_pointer; + v4 := v8; + (* 2 is INFO_PARAMETER *) + _store_word(2, v4); + + v4 := v4 + 4; + + (* Calculate the stack offset: 88 - (4 * parameter_counter) *) + v0 := v88 * 4; + v0 := 88 + -v0; + _store_word(v0, v4); + + v4 := v4 + 4; + _store_word(v4, @memory_free_pointer); + + return v8 +end; + +proc _parameter_info_get_offset(v88: Word); +begin + v88 := v88 + 4; + return _load_word(v88) +end; + +(* Parameters: *) + +(* a0 - Parameter index. *) +proc _temporary_info_create(v88: Word); +begin + v8 := memory_free_pointer; + v4 := v8; + (* 3 is INFO_TEMPORARY *) + _store_word(3, v4); + + v4 := v4 + 4; + + (* Calculate the stack offset: 4 * variable_counter. *) + v0 := v88 * 4; + _store_word(v0, v4); + + v4 := v4 + 4; + _store_word(v4, @memory_free_pointer); + + return v8 +end; + +proc _temporary_info_get_offset(v88: Word); +begin + v88 := v88 + 4; + return _load_word(v88) +end; + +(* Parameters: *) + +(* a0 - Parameter index. *) +proc _read_procedure_parameter(v88: Word); +begin + (* Read the parameter name. *) + v8 := source_code_position; + v0 := _read_token(); + _advance_token(v0); + + (* Skip colon and space in front of the type expression. *) + _advance_token(2); + + _read_type_expression(); + + _write_z("\tsw a\0"); + _write_i(v88); + _write_z(", \0"); + + v4 := _parameter_info_create(v88); + _symbol_table_enter(@symbol_table_local, v8, v0, v4); + + v4 := _parameter_info_get_offset(v4); + _write_i(v4); + + _write_z("(sp)\n\0"); +end; + +proc _read_procedure_parameters(); +begin + (* Skip open paren. *) + _advance_token(1); + v0 := 0; + + .compile_procedure_prologue_skip; + if _load_byte(source_code_position) <> ')' then + _read_procedure_parameter(v0); + v0 := v0 + 1; + + if _load_byte(source_code_position) = ',' then + _advance_token(2); + goto .compile_procedure_prologue_skip; + end; + end; + (* Skip close paren. *) + _advance_token(1); +end; + +(* Parameters: *) +(* a0 - Variable index. *) +proc _read_procedure_temporary(v88: Word); +begin + _skip_spaces(); + v8 := source_code_position; + + (* Read and skip variable name, colon and the space *) + v0 := _read_token(); + _advance_token(v0 + 2); + + _read_type_expression(); + + v4 := _temporary_info_create(v88); + _symbol_table_enter(@symbol_table_local, v8, v0, v4); + + (* Skip semicolon and newline after the variable declaration *) + _advance_token(2); +end; + +proc _read_procedure_temporaries(); +begin + if _memcmp(source_code_position, "var", 3) <> 0 then + goto .read_local_variables_end; + end; + _advance_token(4); + v0 := 0; + +.read_local_variables_loop: + if _memcmp(source_code_position, "begin", 5) = 0 then + goto .read_local_variables_end; + end; + _read_procedure_temporary(v0); + + v0 := v0 + 1; + goto .read_local_variables_loop; + +.read_local_variables_end: +end; + +proc _compile_procedure(); +begin + (* Skip "proc ". *) + _advance_token(5); + (* Clear local symbol table. *) + _store_word(0, @symbol_table_local); + + (* Save the procedure name length. *) + v0 := _read_token(); + + (* Write .type _procedure_name, @function. *) + _write_z(".type \0"); + + _write_token(v0); + _write_z(", @function\n\0"); + + (* Write procedure label, _procedure_name: *) + _write_token(v0); + _write_z(":\n\0"); + + (* Skip procedure name. *) + _advance_token(v0); + _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); + _read_procedure_parameters(); + + (* Skip semicolon and newline. *) + _advance_token(2); + _read_procedure_temporaries(); + + (* Skip semicolon, "begin" and newline. *) + _advance_token(6); + + _compile_procedure_body(); + + (* Write the epilogue. *) + _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); + + (* Skip the "end" keyword, semicolon and newline. *) + _advance_token(5); +end; + +(* Prints and skips a line. *) +proc _skip_comment(); +begin + .skip_comment_loop; + v0 := _load_byte(source_code_position); + + (* Check for newline character. *) + if v0 <> '\n' then + (* Advance the input string by one byte. *) + _advance_token(1); + + goto .skip_comment_loop; + end; + (* Skip the newline. *) + _advance_token(1); +end; + +(* Skip newlines and comments. *) +proc _skip_empty_lines(); +begin + .skip_empty_lines_rerun; + v0 := source_code_position; + + .skip_empty_lines_loop; + v4 := _load_byte(v0); + + if v4 = '\n' then + goto .skip_empty_lines_newline; + end; + if v4 = '\t' then + goto .skip_empty_lines_tab; + end; + if v4 <> '(' then + goto .skip_empty_lines_end; + end; + v4 := v0 + 1; + + if _load_byte(v4) = '*' then + goto .skip_empty_lines_comment + end; + goto .skip_empty_lines_end; + + .skip_empty_lines_comment; + source_code_position := v0; + _skip_comment(); + goto .skip_empty_lines_rerun; + + .skip_empty_lines_newline; + source_code_position := v0 + 1; + goto .skip_empty_lines_rerun; + + .skip_empty_lines_tab; + v0 := v0 + 1; + goto .skip_empty_lines_loop + + .skip_empty_lines_end; +end; + +proc _compile_global_initializer(); +begin + v0 := _load_byte(source_code_position); + + if v0 = '"' then + _write_z("\n\t.word strings + \0"); + v4 := _string_length(source_code_position); + + _add_string(source_code_position); + _write_i(); + + (* Skip the quoted string. *) + _advance_token(v4 + 2); + + goto .compile_global_initializer_end; + end; + if v0 = 'S' then + (* Skip "S(". *) + _advance_token(2); + + if _load_byte(source_code_position) = ')' then + goto .compile_global_initializer_closing; + end; + goto .compile_global_initializer_loop; + end; + if v0 = '@' then + (* Skip @. *) + _advance_token(1); + _write_z("\n\t.word \0"); + v0 := _read_token(); + _write_token(v0); + _advance_token(v0); + + goto .compile_global_initializer_end; + end; + if _is_digit(v0) = 1 then + _write_z("\n\t.word \0"); + v0 := _read_token(); + _write_token(v0); + _advance_token(1); + + goto .compile_global_initializer_end; + end; + + .compile_global_initializer_loop; + _compile_global_initializer(); + + if _load_byte(source_code_position) <> ')' then + (* Skip comma and whitespace after it. *) + _advance_token(2); + + goto .compile_global_initializer_loop; + end; + + .compile_global_initializer_closing; + (* Skip ")" *) + _advance_token(1); + + goto .compile_global_initializer_end; + + .compile_global_initializer_end; +end; + +proc _compile_constant_declaration(); +begin + v0 := _read_token(); + + _write_z(".type \0"); + _write_token(v0); + _write_z(", @object\n\0"); + + _write_token(v0); + _write_c(':'); + + (* Skip the constant name with assignment sign and surrounding whitespaces. *) + _advance_token(v0 + 4); + _compile_global_initializer(); + (* Skip semicolon and newline. *) + _advance_token(2); + _write_c('\n'); +end; + +proc _compile_const_part(); +begin + _skip_empty_lines(); + + if _memcmp(source_code_position, "const\0", 5) <> 0 then + goto .compile_const_part_end; + end; + (* Skip "const" with the newline after it. *) + _advance_token(6); + _write_z(".section .rodata # Compiled from const section.\n\n\0"); + + .compile_const_part_loop; + _skip_empty_lines(); + + (* If the character at the line beginning is not indentation, *) + (* it is probably the next code section. *) + if _load_byte(source_code_position) = '\t' then + _advance_token(1); + + _compile_constant_declaration(); + goto .compile_const_part_loop; + end; + + .compile_const_part_end; +end; + +proc _compile_variable_declaration(); +begin + v0 := _read_token(); + + _write_z(".type \0"); + _write_token(v0); + _write_z(", @object\n\0"); + + _write_token(v0); + _write_c(':'); + + (* Skip the variable name and colon with space before the type. *) + _advance_token(v0 + 2); + + (* Skip the type name. *) + v4 := _read_token(); + _advance_token(v4); + + if _load_byte(source_code_position) <> ' ' then + (* Else we assume this is a zeroed 81920 bytes big array. *) + _write_z(" .zero 81920\0"); + else + (* Skip the assignment sign with surrounding whitespaces. *) + _advance_token(4); + _compile_global_initializer(); + end; + + (* Skip semicolon and newline. *) + _advance_token(2); + _write_c('\n'); +end; + +proc _compile_var_part(); +begin + if _memcmp(source_code_position, "var\0", 3) <> 0 then + goto .compile_var_part_end; + end; + (* Skip "var" and newline. *) + _advance_token(4); + _write_z(".section .data\n\0"); + + .compile_var_part_loop; + _skip_empty_lines(); + v0 := _load_byte(source_code_position); + + if v0 = '\t' then + _advance_token(1); + _compile_variable_declaration(); + goto .compile_var_part_loop; + end; + + .compile_var_part_end; +end; + +(* Process the source code and print the generated code. *) +proc _compile_module(); +begin + _compile_const_part(); + _skip_empty_lines(); + _compile_var_part(); + + _write_z(".section .text\n\n\0"); + _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); + _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); + _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); + _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); + _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); + + .compile_module_loop; + _skip_empty_lines(); + + if _load_byte(source_code_position) <> 0 then + (* 5 is "proc " length. Space is needed to distinguish from "procedure". *) + if _memcmp(source_code_position, "proc ", 5) = 0 then + _compile_procedure(); + goto .compile_module_loop; + end; + end; + .compile_module_end; +end; + +proc _compile(); +begin + _write_z(".globl _start\n\n\0"); + _compile_module(); + + _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); + _write_c('"'); + + v0 := @compiler_strings; + v4 := compiler_strings_position; + + .compile_loop; + if v0 < v4 then + v8 := _load_byte(v0); + v0 := v0 + 1; + _write_c(v8); + + goto .compile_loop; + end; + _write_c('"'); + _write_c('\n'); +end; + +(* Terminates the program. a0 contains the return code. *) + +(* Parameters: *) +(* a0 - Status code. *) +proc _exit(); +begin + _syscall(0, 0, 0, 0, 0, 0, 93); +end; + +(* Looks for a symbol in the given symbol table. *) + +(* Parameters: *) +(* a0 - Symbol table. *) +(* a1 - Symbol name pointer. *) +(* a2 - Symbol name length. *) + +(* Returns the symbol pointer or 0 in a0. *) +proc _symbol_table_lookup(v88: Word, v84: Word, v80: Word); +begin + v0 := 0; + + (* The first word in the symbol table is its length, get it. *) + v4 := _load_word(v88); + + (* Go to the first symbol position. *) + v88 := v88 + 4; + + .symbol_table_lookup_loop; + if v4 = 0 then + goto .symbol_table_lookup_end; + end; + + (* Symbol name pointer and length. *) + v8 := _load_word(v88); + v12 := _load_word(v88 + 4); + + (* If lengths don't match, exit and return nil. *) + if v80 <> v12 then + goto .symbol_table_lookup_repeat; + end; + (* If names don't match, exit and return nil. *) + if _memcmp(v84, v8, v80) <> 0 then + goto .symbol_table_lookup_repeat; + end; + (* Otherwise, the symbol is found. *) + v0 := _load_word(v88 + 8); + goto .symbol_table_lookup_end; + + .symbol_table_lookup_repeat; + v88 := v88 + 12; + v4 := v4 + -1; + goto .symbol_table_lookup_loop; + + .symbol_table_lookup_end; + return v0 +end; + +(* Inserts a symbol into the table. *) + +(* Parameters: *) +(* a0 - Symbol table. *) +(* a1 - Symbol name pointer. *) +(* a2 - Symbol name length. *) +(* a3 - Symbol pointer. *) +proc _symbol_table_enter(v88: Word, v84: Word, v80: Word, v76: Word); +begin + (* The first word in the symbol table is its length, get it. *) + v0 := _load_word(v88); + + (* Calculate the offset for the new symbol. *) + v4 := v0 * 12; + v4 := v4 + 4; + v4 := v88 + v4; + + _store_word(v84, v4); + v4 := v4 + 4; + _store_word(v80, v4); + v4 := v4 + 4; + _store_word(v76, v4); + + (* Increment the symbol table length. *) + v0 := v0 + 1; + _store_word(v0, v88); +end; + +proc _symbol_table_build(); +begin + (* Set the table length to 0. *) + _store_word(0, @symbol_table_global); + + (* Enter built-in symbols. *) + _symbol_table_enter(@symbol_table_global, symbol_builtin_name_int, 3, @symbol_type_info_int); + _symbol_table_enter(@symbol_table_global, symbol_builtin_name_word, 4, @symbol_type_info_word); + _symbol_table_enter(@symbol_table_global, symbol_builtin_name_pointer, 7, @symbol_type_info_pointer); + _symbol_table_enter(@symbol_table_global, symbol_builtin_name_char, 4, @symbol_type_info_char); + _symbol_table_enter(@symbol_table_global, symbol_builtin_name_bool, 4, @symbol_type_info_bool); +end; + + +(* Classification table assigns each possible character to a group (class). All *) +(* characters of the same group a handled equivalently. *) + +(* Classification: *) + +(* TransitionClass = ( *) +(* transitionClassInvalid = 1, *) +(* transitionClassDigit = 2, *) +(* transitionClassAlpha = 3, *) +(* transitionClassSpace = 4, *) +(* transitionClassColon = 5, *) +(* transitionClassEquals = 6, *) +(* transitionClassLeftParen = 7, *) +(* transitionClassRightParen = 8, *) +(* transitionClassAsterisk = 9, *) +(* transitionClassUnderscore = 10, *) +(* transitionClassSingle = 11, *) +(* transitionClassHex = 12, *) +(* transitionClassZero = 13, *) +(* transitionClassX = 14, *) +(* transitionClassEof = 15, *) +(* transitionClassDot = 16, *) +(* transitionClassMinus = 17, *) +(* transitionClassSingleQuote = 18, *) +(* transitionClassDoubleQuote = 19, *) +(* transitionClassGreater = 20, *) +(* transitionClassLess = 21, *) +(* transitionClassOther = 22 *) +(* ); *) +(* TransitionState = ( *) +(* transitionStateStart = 1, *) +(* transitionStateColon = 2, *) +(* transitionStateIdentifier = 3, *) +(* transitionStateDecimal = 4, *) +(* transitionStateGreater = 5, *) +(* transitionStateMinus = 6, *) +(* transitionStateLeftParen = 7, *) +(* transitionStateLess = 8, *) +(* transitionStateDot = 9, *) +(* transitionStateComment = 10, *) +(* transitionStateClosingComment = 11, *) +(* transitionStateCharacter = 12, *) +(* transitionStateString = 13, *) +(* transitionStateLeadingZero = 14, *) +(* transitionStateDecimalSuffix = 15, *) +(* transitionStateEnd = 16 *) +(* ); *) +(* Transition = record *) +(* action: TransitionAction; *) +(* next_state: TransitionState *) +(* end; *) +(* TransitionAction = ( *) +(* none = 1, *) +(* accumulate = 2, *) +(* skip = 3, *) +(* single = 4, *) +(* eof = 5, *) +(* finalize = 6, *) +(* composite = 7, *) +(* key_id = 8, *) +(* integer = 9, *) +(* delimited = 10 *) +(* ); *) + +(* Assigns some value to at array index. *) + +(* Parameters: *) +(* a0 - Array pointer. *) +(* a1 - Index (word offset into the array). *) +(* a2 - Data to assign. *) +proc _assign_at(v88: Word, v84: Word, v80: Word); +begin + v0 := v84 + -1; + v0 := v0 * 4; + v0 := v88 + v0; + + _store_word(v80, v0); +end; + +proc _create_classification(); +begin + _assign_at(@classification, 1, 15); + _assign_at(@classification, 2, 1); + _assign_at(@classification, 3, 1); + _assign_at(@classification, 4, 1); + _assign_at(@classification, 5, 1); + _assign_at(@classification, 6, 1); + _assign_at(@classification, 7, 1); + _assign_at(@classification, 8, 1); + _assign_at(@classification, 9, 1); + _assign_at(@classification, 10, 4); + _assign_at(@classification, 11, 4); + _assign_at(@classification, 12, 1); + _assign_at(@classification, 13, 1); + _assign_at(@classification, 14, 4); + _assign_at(@classification, 15, 1); + _assign_at(@classification, 16, 1); + _assign_at(@classification, 17, 1); + _assign_at(@classification, 18, 1); + _assign_at(@classification, 19, 1); + _assign_at(@classification, 20, 1); + _assign_at(@classification, 21, 1); + _assign_at(@classification, 22, 1); + _assign_at(@classification, 23, 1); + _assign_at(@classification, 24, 1); + _assign_at(@classification, 25, 1); + _assign_at(@classification, 26, 1); + _assign_at(@classification, 27, 1); + _assign_at(@classification, 28, 1); + _assign_at(@classification, 29, 1); + _assign_at(@classification, 30, 1); + _assign_at(@classification, 31, 1); + _assign_at(@classification, 32, 1); + _assign_at(@classification, 33, 4); + _assign_at(@classification, 34, 11); + _assign_at(@classification, 35, 19); + _assign_at(@classification, 36, 22); + _assign_at(@classification, 37, 22); + _assign_at(@classification, 38, 11); + _assign_at(@classification, 39, 11); + _assign_at(@classification, 40, 18); + _assign_at(@classification, 41, 7); + _assign_at(@classification, 42, 8); + _assign_at(@classification, 43, 9); + _assign_at(@classification, 44, 11); + _assign_at(@classification, 45, 11); + _assign_at(@classification, 46, 17); + _assign_at(@classification, 47, 16); + _assign_at(@classification, 48, 11); + _assign_at(@classification, 49, 13); + _assign_at(@classification, 50, 2); + _assign_at(@classification, 51, 2); + _assign_at(@classification, 52, 2); + _assign_at(@classification, 53, 2); + _assign_at(@classification, 54, 2); + _assign_at(@classification, 55, 2); + _assign_at(@classification, 56, 2); + _assign_at(@classification, 57, 2); + _assign_at(@classification, 58, 2); + _assign_at(@classification, 59, 5); + _assign_at(@classification, 60, 11); + _assign_at(@classification, 61, 21); + _assign_at(@classification, 62, 6); + _assign_at(@classification, 63, 20); + _assign_at(@classification, 64, 22); + _assign_at(@classification, 65, 11); + _assign_at(@classification, 66, 3); + _assign_at(@classification, 67, 3); + _assign_at(@classification, 68, 3); + _assign_at(@classification, 69, 3); + _assign_at(@classification, 70, 3); + _assign_at(@classification, 71, 3); + _assign_at(@classification, 72, 3); + _assign_at(@classification, 73, 3); + _assign_at(@classification, 74, 3); + _assign_at(@classification, 75, 3); + _assign_at(@classification, 76, 3); + _assign_at(@classification, 77, 3); + _assign_at(@classification, 78, 3); + _assign_at(@classification, 79, 3); + _assign_at(@classification, 80, 3); + _assign_at(@classification, 81, 3); + _assign_at(@classification, 82, 3); + _assign_at(@classification, 83, 3); + _assign_at(@classification, 84, 3); + _assign_at(@classification, 85, 3); + _assign_at(@classification, 86, 3); + _assign_at(@classification, 87, 3); + _assign_at(@classification, 88, 3); + _assign_at(@classification, 89, 3); + _assign_at(@classification, 90, 3); + _assign_at(@classification, 91, 3); + _assign_at(@classification, 92, 11); + _assign_at(@classification, 93, 22); + _assign_at(@classification, 94, 11); + _assign_at(@classification, 95, 11); + _assign_at(@classification, 96, 10); + _assign_at(@classification, 97, 22); + _assign_at(@classification, 98, 12); + _assign_at(@classification, 99, 12); + _assign_at(@classification, 100, 12); + _assign_at(@classification, 101, 12); + _assign_at(@classification, 102, 12); + _assign_at(@classification, 103, 12); + _assign_at(@classification, 104, 3); + _assign_at(@classification, 105, 3); + _assign_at(@classification, 106, 3); + _assign_at(@classification, 107, 3); + _assign_at(@classification, 108, 3); + _assign_at(@classification, 109, 3); + _assign_at(@classification, 110, 3); + _assign_at(@classification, 111, 3); + _assign_at(@classification, 112, 3); + _assign_at(@classification, 113, 3); + _assign_at(@classification, 114, 3); + _assign_at(@classification, 115, 3); + _assign_at(@classification, 116, 3); + _assign_at(@classification, 117, 3); + _assign_at(@classification, 118, 3); + _assign_at(@classification, 119, 3); + _assign_at(@classification, 120, 3); + _assign_at(@classification, 121, 14); + _assign_at(@classification, 122, 3); + _assign_at(@classification, 123, 3); + _assign_at(@classification, 124, 22); + _assign_at(@classification, 125, 11); + _assign_at(@classification, 126, 22); + _assign_at(@classification, 127, 11); + _assign_at(@classification, 128, 1); + + v0 := 129; + +(* Set the remaining 129 - 256 bytes to transitionClassOther. *) + .create_classification_loop; + _assign_at(@classification, v0, 22); + v0 := v0 + 1; + + if v0 < 257 then + goto .create_classification_loop; + end; +end; + +(* Parameters: *) +(* a0 - Current state (first index into transitions table). *) +(* a1 - Transition (second index into transitions table).. *) +(* a2 - Action to assign. *) +(* a3 - Next state to assign. *) +proc _set_transition(v88: Word, v84: Word, v80: Word, v76: Word); +begin + (* Transitions start at offset in classification array. Save the transitions start in v0. *) + v0 := @classification + 256 + + (* Each state is 8 bytes long (2 words: action and next state). *) + (* There are 16 transition classes, so a transition 8 * 16 = 128 bytes long. *) + + v4 := v88 + -1; + v4 := v4 * 128; + + v8 := v84 + -1; + v8 := v8 * 8; + + v12 := v0 + v4; + v12 := v12 + v8; + + _store_word(v80, v12); + v12 := v12 + 4; + _store_word(v76, v12); +end; + +(* Parameters: *) +(* a0 - Current state (Transition state enumeration). *) +(* a1 - Default action (Callback). *) +(* a2 - Next state (Transition state enumeration). *) +proc _set_default_transition(v88: Word, v84: Word, v80: Word); +begin + _set_transition(v88, 1, v84, v80); + _set_transition(v88, 2, v84, v80); + _set_transition(v88, 3, v84, v80); + _set_transition(v88, 4, v84, v80); + _set_transition(v88, 5, v84, v80); + _set_transition(v88, 6, v84, v80); + _set_transition(v88, 7, v84, v80); + _set_transition(v88, 8, v84, v80); + _set_transition(v88, 9, v84, v80); + _set_transition(v88, 10, v84, v80); + _set_transition(v88, 11, v84, v80); + _set_transition(v88, 12, v84, v80); + _set_transition(v88, 13, v84, v80); + _set_transition(v88, 14, v84, v80); + _set_transition(v88, 15, v84, v80); + _set_transition(v88, 16, v84, v80); + _set_transition(v88, 17, v84, v80); + _set_transition(v88, 18, v84, v80); + _set_transition(v88, 19, v84, v80); + _set_transition(v88, 20, v84, v80); + _set_transition(v88, 21, v84, v80); + _set_transition(v88, 22, v84, v80); +end; + + +(* The transition table describes transitions from one state to another, given *) +(* a symbol (character class). *) + +(* The table has m rows and n columns, where m is the amount of states and n is *) +(* the amount of classes. So given the current state and a classified character *) +(* the table can be used to look up the next state. *) + +(* Each cell is a word long. *) +(* - The least significant byte of the word is a row number (beginning with 0). *) +(* It specifies the target state. "ff" means that this is an end state and no *) +(* transition is possible. *) +(* - The next byte is the action that should be performed when transitioning. *) +(* For the meaning of actions see labels in the lex_next function, which *) +(* handles each action. *) +proc _create_transitions(); +begin + (* Start state. *) + _set_transition(1, 1, 1, 16); + _set_transition(1, 2, 2, 4); + _set_transition(1, 3, 2, 3); + _set_transition(1, 4, 3, 1); + _set_transition(1, 5, 2, 5); + _set_transition(1, 6, 4, 16); + _set_transition(1, 7, 2, 7); + _set_transition(1, 8, 4, 16); + _set_transition(1, 9, 4, 16); + _set_transition(1, 10, 2, 3); + _set_transition(1, 11, 4, 16); + _set_transition(1, 12, 2, 3); + _set_transition(1, 13, 2, 14); + _set_transition(1, 14, 2, 3); + _set_transition(1, 15, 5, 16); + _set_transition(1, 16, 2, 9); + _set_transition(1, 17, 2, 6); + _set_transition(1, 18, 2, 12); + _set_transition(1, 19, 2, 13); + _set_transition(1, 20, 2, 5); + _set_transition(1, 21, 2, 8); + _set_transition(1, 22, 1, 16); + + (* Colon state. *) + _set_default_transition(2, 6, 16); + _set_transition(2, 6, 7, 16); + + (* Identifier state. *) + _set_default_transition(3, 8, 16); + _set_transition(3, 2, 2, 3); + _set_transition(3, 3, 2, 3); + _set_transition(3, 10, 2, 3); + _set_transition(3, 12, 2, 3); + _set_transition(3, 13, 2, 3); + _set_transition(3, 14, 2, 3); + + (* Decimal state. *) + _set_default_transition(4, 9, 16); + _set_transition(4, 2, 2, 4); + _set_transition(4, 3, 2, 15); + _set_transition(4, 10, 1, 16); + _set_transition(4, 12, 2, 15); + _set_transition(4, 13, 2, 4); + _set_transition(4, 14, 2, 15); + + (* Greater state. *) + _set_default_transition(5, 6, 16); + _set_transition(5, 6, 7, 16); + + (* Minus state. *) + _set_default_transition(6, 6, 16); + _set_transition(6, 20, 7, 16); + + (* Left paren state. *) + _set_default_transition(7, 6, 16); + _set_transition(7, 9, 2, 10); + + (* Less state. *) + _set_default_transition(8, 6, 16); + _set_transition(8, 6, 7, 16); + _set_transition(8, 20, 7, 16); + + (* Hexadecimal after 0x. *) + _set_default_transition(9, 6, 16); + _set_transition(9, 16, 7, 16); + + (* Comment. *) + _set_default_transition(10, 2, 10); + _set_transition(10, 9, 2, 11); + _set_transition(10, 15, 1, 16); + + (* Closing comment. *) + _set_default_transition(11, 2, 10); + _set_transition(11, 1, 1, 16); + _set_transition(11, 8, 10, 16); + _set_transition(11, 9, 2, 11); + _set_transition(11, 15, 1, 16); + + (* Character. *) + _set_default_transition(12, 2, 12); + _set_transition(12, 1, 1, 16); + _set_transition(12, 15, 1, 16); + _set_transition(12, 18, 10, 16); + + (* String. *) + _set_default_transition(13, 2, 13); + _set_transition(13, 1, 1, 16); + _set_transition(13, 15, 1, 16); + _set_transition(13, 19, 10, 16); + + (* Leading zero. *) + _set_default_transition(14, 9, 16); + _set_transition(14, 2, 1, 16); + _set_transition(14, 3, 1, 16); + _set_transition(14, 10, 1, 16); + _set_transition(14, 12, 1, 16); + _set_transition(14, 13, 1, 16); + _set_transition(14, 14, 1, 16); + + (* Digit with a character suffix. *) + _set_default_transition(15, 9, 16); + _set_transition(15, 3, 1, 16); + _set_transition(15, 2, 1, 16); + _set_transition(15, 12, 1, 16); + _set_transition(15, 13, 1, 16); + _set_transition(15, 14, 1, 16); +end; + +proc _lexer_get_state(); +begin + (* Lexer state is saved after the transition tables. The offset is 256 + 16 * 22. *) + v0 := @classification; + v4 := 16 * 22; + v0 := v0 + 256; + + return v0 + v4 +end; + +(* Gets pointer to the current source text. *) +proc _lexer_get_current(); +begin + return _lexer_get_state() + 4 +end; + +(* Resets the lexer state for reading the next token. *) +proc _lexer_reset(); +begin + (* Transition start state is 1. *) + v0 := _lexer_get_state(); + _store_word(1, v4); + + (* Text pointer to the beginning of the currently read token. *) + v4 := _lexer_get_current(); + _store_word(source_code_position, v4); + + (* Initial length of the token is 0. *) + _store_word(0, source_code_position + 4); +end; + +(* One time lexer initialization. *) +proc _lexer_initialize(); +begin + _create_classification(); + _create_transitions(); +end; + +(* Entry point. *) +proc _start(); +begin + _lexer_initialize(); + _symbol_table_build(); + + (* Read the source from the standard input. *) + v4 := @source_code; + + .start_read; + (* Second argument is buffer size. Modifying update the source_code definition. *) + v0 := _read_file(v4, 81920); + if v0 > 0 then + v4 := v4 + v0; + goto .start_read; + end; + _compile(); + + _exit(0); +end; diff --git a/boot/stage13.elna b/boot/stage13.elna deleted file mode 100644 index f4fa817..0000000 --- a/boot/stage13.elna +++ /dev/null @@ -1,2870 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, *) -(* v. 2.0. If a copy of the MPL was not distributed with this file, You can *) -(* obtain one at https://mozilla.org/MPL/2.0/. *) - -(* Stage 13 compiler. *) - -(* - Multiline comments. *) -(* - elsif conditions. *) -(* - Optional "begin" if the procedure body is a single return statement. *) - -const - symbol_builtin_name_int := "Int"; - symbol_builtin_name_word := "Word"; - symbol_builtin_name_pointer := "Pointer"; - symbol_builtin_name_char := "Char"; - symbol_builtin_name_bool := "Bool"; - - (* Every type info starts with a word describing what type it is. *) - - (* PRIMITIVE_TYPE = 1 *) - - (* Primitive types have only type size. *) - symbol_builtin_type_int := S(1, 4); - symbol_builtin_type_word := S(1, 4); - symbol_builtin_type_pointer := S(1, 4); - symbol_builtin_type_char := S(1, 1); - symbol_builtin_type_bool := S(1, 1); - - (* Info objects start with a word describing its type. *) - - (* INFO_TYPE = 1 *) - (* INFO_PARAMETER = 2 *) - (* INFO_TEMPORARY = 3 *) - - (* Type info has the type it belongs to. *) - symbol_type_info_int := S(1, @symbol_builtin_type_int); - symbol_type_info_word := S(1, @symbol_builtin_type_word); - symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); - symbol_type_info_char := S(1, @symbol_builtin_type_char); - symbol_type_info_bool := S(1, @symbol_builtin_type_bool); - -var - source_code: Array; - compiler_strings: Array; - symbol_table_global: Array; - symbol_table_local: Array; - classification: Array; - memory: Array; - - compiler_strings_position: Pointer := @compiler_strings; - compiler_strings_length: Word := 0; - label_counter: Word := 0; - source_code_position: Pointer := @source_code; - memory_free_pointer: Word := @memory; - -(* Calculates and returns the string token length between quotes, including the *) -(* escaping slash characters. *) - -(* Parameters: *) -(* string - String token pointer. *) - -(* Returns the length in a0. *) -proc _string_length(string: Word); -var - counter: Word; -begin - (* Reset the counter. *) - counter := 0; - - .string_length_loop; - string := string + 1; - - if _load_byte(string) <> '"' then - counter := counter + 1; - goto .string_length_loop - end; - - return counter -end; - -(* Adds a string to the global, read-only string storage. *) - -(* Parameters: *) -(* string - String token. *) - -(* Returns the offset from the beginning of the storage to the new string in a0. *) -proc _add_string(string: Word); -var - contents: Word; - result: Word; - current_byte: Word; -begin - contents := string + 1; - result := compiler_strings_length; - - .add_string_loop; - if _load_byte(contents) <> '"' then - current_byte := _load_byte(contents); - _store_byte(current_byte, compiler_strings_position); - compiler_strings_position := compiler_strings_position + 1; - contents := contents + 1; - - if current_byte <> '\\' then - compiler_strings_length := compiler_strings_length + 1 - end; - goto .add_string_loop - end; - - return result -end; - -(* Reads standard input into a buffer. *) -(* buffer - Buffer pointer. *) -(* size - Buffer size. *) - -(* Returns the amount of bytes written in a0. *) -proc _read_file(buffer: Word, size: Word); -begin - return _syscall(0, buffer, size, 0, 0, 0, 63) -end; - -(* Writes to the standard output. *) - -(* Parameters: *) -(* buffer - Buffer. *) -(* size - Buffer length. *) -proc _write_s(buffer: Word, size: Word); -begin - _syscall(1, buffer, size, 0, 0, 0, 64); -end; - -(* Writes a number to a string buffer. *) - -(* Parameters: *) -(* number - Whole number. *) -(* output_buffer - Buffer pointer. *) - -(* Sets a0 to the length of the written number. *) -proc _print_i(number: Word, output_buffer: Word); -var - local_buffer: Word; - is_negative: Word; - current_character: Word; - result: Word; -begin - local_buffer := @result + 11; - - if number >= 0 then - is_negative := 0 - else - number = -number; - is_negative := 1 - end; - - .print_i_digit10; - current_character := number % 10; - _store_byte(current_character + '0', local_buffer); - - number := number / 10; - local_buffer := local_buffer + -1; - - if number <> 0 then - goto .print_i_digit10 - end; - if is_negative = 1 then - _store_byte('-', local_buffer); - local_buffer := local_buffer + -1 - end; - result := @result + 11; - result := result + -local_buffer; - _memcpy(output_buffer, local_buffer + 1, result); - - return result -end; - -(* Writes a number to the standard output. *) - -(* Parameters: *) -(* number - Whole number. *) -proc _write_i(number: Word); -var - local_buffer: Word; - length: Word; -begin - length := _print_i(number, @local_buffer); - _write_s(@local_buffer, length); -end; - -(* Writes a character from a0 into the standard output. *) - -(* Parameters: *) -(* character - Character to write. *) -proc _write_c(character: Word); -begin - _write_s(@character, 1); -end; - -(* Write null terminated string. *) - -(* Parameters: *) -(* string - String. *) -proc _write_z(string: Word); -var - next_byte: Word; -begin - (* Check for 0 character. *) - next_byte := _load_byte(string); - - if next_byte <> 0 then - (* Print a character. *) - _write_c(next_byte); - - (* Advance the input string by one byte. *) - _write_z(string + 1) - end; -end; - -(* Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. *) -proc _is_upper(character: Word); -var - lhs: Word; - rhs: Word; -begin - lhs := character >= 'A'; - rhs := character <= 'Z'; - - return lhs & rhs - -end; - -(* Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. *) -proc _is_lower(character: Word); -var - lhs: Word; - rhs: Word; -begin - lhs := character >= 'a'; - rhs := character <= 'z'; - - return lhs & rhs -end; - -(* Detects if the passed character is a 7-bit alpha character or an underscore. *) - -(* Paramters: *) -(* character - Tested character. *) - -(* Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. *) -proc _is_alpha(character: Word); -var - is_upper_result: Word; - is_lower_result: Word; - is_alpha_result: Word; - is_underscore: Word; -begin - is_upper_result := _is_upper(character); - is_lower_result := _is_lower(character); - is_underscore := character = '_'; - - is_alpha_result := is_lower_result or is_upper_result; - return is_alpha_result or is_underscore -end; - -(* Detects whether the passed character is a digit *) -(* (a value between 0 and 9). *) - -(* Parameters: *) -(* character - Exemined value. *) - -(* Sets a0 to 1 if it is a digit, to 0 otherwise. *) -proc _is_digit(character: Word); -var - lhs: Word; - rhs: Word; -begin - lhs := character >= '0'; - rhs := character <= '9'; - - return lhs & rhs -end; - -proc _is_alnum(character: Word); -var - lhs: Word; - rhs: Word; -begin - lhs := _is_alpha(character); - rhs := _is_digit(character); - - return lhs or rhs -end; - -(* Parameters: *) -(* lhs - First pointer. *) -(* rhs - Second pointer. *) -(* count - The length to compare. *) - -(* Returns 0 if memory regions are equal. *) -proc _memcmp(lhs: Word, rhs: Word, count: Word); -var - lhs_byte: Word; - rhs_byte: Word; - result: Word; -begin - result := 0; - - .memcmp_loop; - if count <> 0 then - lhs_byte := _load_byte(lhs); - rhs_byte := _load_byte(rhs); - result := lhs_byte + -rhs_byte; - - lhs := lhs + 1; - rhs := rhs + 1; - count := count + -1; - - if result = 0 then - goto .memcmp_loop - end - end; - - return result -end; - -(* Copies memory. *) - -(* Parameters: *) -(* destination - Destination. *) -(* source - Source. *) -(* count - Size. *) - -(* Returns the destination. *) -proc _memcpy(destination: Word, source: Word, count: Word); -var - current_byte: Word; -begin - .memcpy_loop; - if count <> 0 then - current_byte := _load_byte(source); - _store_byte(current_byte, destination); - - destination := destination + 1; - source := source + 1; - count := count + -1; - goto .memcpy_loop - end; - - return destination -end; - -(* Prints the current token. *) - -(* Parameters: *) -(* length - Token length. *) - -(* Returns a0 unchanged. *) -proc _write_token(length: Word); -begin - _write_s(source_code_position, length); - return length -end; - -proc _compile_integer_literal(); -var - integer_token: Word; - token_kind: Word; -begin - _write_z("\tli t0, \0"); - - integer_token := _lexer_read_token(@token_kind); - _write_token(integer_token); - _lexer_skip_token(); - - _write_c('\n'); -end; - -proc _compile_character_literal(); -var - character: Word; -begin - _write_z("\tli t0, '\0"); - source_code_position := source_code_position + 1; - - character := _load_byte(source_code_position); - if character = '\\' then - _write_c('\\'); - source_code_position := source_code_position + 1 - end; - _write_s(source_code_position, 1); - _write_s("'\n", 2); - source_code_position := source_code_position + 2; -end; - -proc _compile_variable_expression(); -begin - _compile_designator(); - _write_z("\tlw t0, (t0)\n\0"); -end; - -proc _compile_address_expression(); -begin - (* Skip the "@" sign. *) - source_code_position := source_code_position + 1; - _compile_designator(); -end; - -proc _compile_negate_expression(); -begin - (* Skip the "-" sign. *) - source_code_position := source_code_position + 1; - _compile_term(); - - _write_z("\tneg t0, t0\n\0"); -end; - -proc _compile_not_expression(); -begin - (* Skip the "~" sign. *) - source_code_position := source_code_position + 1; - _compile_term(); - - _write_z("\tnot t0, t0\n\0"); -end; - -proc _compile_string_literal(); -var - length: Word; - offset: Word; -begin - length := _string_length(source_code_position); - offset := _add_string(source_code_position); - - source_code_position := source_code_position + length; - source_code_position := source_code_position + 2; - _write_z("\tla t0, strings\n\0"); - - _write_z("\tli t1, \0"); - _write_i(offset); - _write_c('\n'); - - _write_z("\tadd t0, t0, t1\n\0"); -end; - -proc _compile_term(); -var - current_character: Word; -begin - current_character := _load_byte(source_code_position); - - if current_character = '\'' then - _compile_character_literal(); - end; - if current_character = '@' then - _compile_address_expression(); - end; - if current_character = '-' then - _compile_negate_expression(); - end; - if current_character = '~' then - _compile_not_expression(); - end; - if current_character = '"' then - _compile_string_literal(); - end; - if current_character = '_' then - _compile_call(); - _write_z("\nmv t0, a0\n\0"); - end; - if _is_digit(current_character) = 1 then - _compile_integer_literal(); - end; - if _is_lower(current_character) = 1 then - _compile_variable_expression(); - end; -end; - -proc _compile_binary_rhs(); -begin - (* Skip the whitespace after the binary operator. *) - source_code_position := source_code_position + 1; - _compile_term(); - - (* Load the left expression from the stack; *) - _write_z("\tlw t1, 64(sp)\n\0"); -end; - -proc _compile_expression(); -var - current_character: Word; -begin - _compile_term(); - current_character := _load_byte(source_code_position); - - if current_character <> ' ' then - goto .compile_expression_end; - end; - (* It is a binary expression. *) - - (* Save the value of the left expression on the stack. *) - _write_z("sw t0, 64(sp)\n\0"); - - (* Skip surrounding whitespace in front of the operator. *) - source_code_position := source_code_position + 1; - current_character := _load_byte(source_code_position); - - if current_character = '+' then - source_code_position := source_code_position + 1; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("add t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if current_character = '*' then - source_code_position := source_code_position + 1; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tmul t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if current_character = '&' then - source_code_position := source_code_position + 1; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tand t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if current_character = 'o' then - source_code_position := source_code_position + 2; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("or t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if current_character = 'x' then - source_code_position := source_code_position + 3; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("xor t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if current_character = '=' then - source_code_position := source_code_position + 1; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("xor t0, t0, t1\nseqz t0, t0\n\0"); - - goto .compile_expression_end; - end; - if current_character = '%' then - source_code_position := source_code_position + 1; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("rem t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if current_character = '/' then - source_code_position := source_code_position + 1; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("div t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if current_character = '<' then - source_code_position := source_code_position + 1; - current_character := _load_byte(source_code_position); - - if current_character = '>' then - source_code_position := source_code_position + 1; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0"); - - goto .compile_expression_end; - end; - if current_character = '=' then - source_code_position := source_code_position + 1; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0"); - - goto .compile_expression_end; - end; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("slt t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if current_character = '>' then - source_code_position := source_code_position + 1; - current_character := _load_byte(source_code_position); - if current_character = '=' then - source_code_position := source_code_position + 1; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0"); - - goto .compile_expression_end; - end; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - - .compile_expression_end; -end; - -proc _compile_call(); -var - name_length: Word; - name: Word; - argument_count: Word; - stack_offset: Word; - token_kind: Word; -begin - name_length := _lexer_read_token(@token_kind); - name := _lexer_global_start(); - name := _load_word(name); - name_length := _lexer_global_end(); - name_length := _load_word(name_length) + -name; - argument_count := 0; - - (* Skip the identifier and left paren. *) - _lexer_skip_token(); - source_code_position := source_code_position + 1; - - if _load_byte(source_code_position) = ')' then - goto .compile_call_finalize; - end; - .compile_call_loop; - _compile_expression(); - - (* Save the argument on the stack. *) - _write_z("\tsw t0, \0"); - - (* Calculate the stack offset: 116 - (4 * argument_counter) *) - stack_offset := argument_count * 4; - _write_i(116 + -stack_offset); - - _write_z("(sp)\n\0"); - - (* Add one to the argument counter. *) - argument_count := argument_count + 1; - - if _load_byte(source_code_position) <> ',' then - goto .compile_call_finalize; - end; - source_code_position := source_code_position + 2; - goto .compile_call_loop; - - .compile_call_finalize; - (* Load the argument from the stack. *) - if argument_count <> 0 then - (* Decrement the argument counter. *) - argument_count := argument_count + -1; - - _write_z("\tlw a\0"); - _write_i(argument_count); - - _write_z(", \0"); - - (* Calculate the stack offset: 116 - (4 * argument_counter) *) - stack_offset := argument_count * 4; - _write_i(116 + -stack_offset); - - _write_z("(sp)\n\0"); - - goto .compile_call_finalize; - end; - - .compile_call_end; - _write_z("\tcall \0"); - _write_s(name, name_length); - - (* Skip the right paren. *) - source_code_position := source_code_position + 1; -end; - -proc _compile_goto(); -var - next_token: Word; - token_kind: Word; -begin - _lexer_read_token(@token_kind); - _lexer_skip_token(); - - source_code_position := source_code_position + 2; - - next_token := _lexer_read_token(@token_kind); - _write_z("\tj .\0"); - - _write_token(next_token); - _lexer_skip_token(); -end; - -proc _compile_local_designator(symbol: Word); -var - variable_offset: Word; -begin - _write_z("\taddi t0, sp, \0"); - variable_offset := _parameter_info_get_offset(symbol); - _write_i(variable_offset); - _write_c('\n'); - _lexer_skip_token(); -end; - -proc _compile_global_designator(); -var - name: Word; - token_kind: Word; -begin - _write_z("\tla t0, \0"); - - name := _lexer_read_token(@token_kind); - _write_token(name); - _lexer_skip_token(); - - _write_c('\n'); -end; - -proc _compile_designator(); -var - name_token: Word; - lookup_result: Word; - token_kind: Word; - name: Word; -begin - name_token := _lexer_read_token(@token_kind); - name := _lexer_global_start(); - name := _load_word(name); - name_token := _lexer_global_end(); - name_token := _load_word(name_token) + -name; - lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token); - - if lookup_result <> 0 then - _compile_local_designator(lookup_result); - goto .compile_designator_end; - end; - _compile_global_designator(); - - .compile_designator_end; -end; - -proc _compile_assignment(); -begin - _compile_designator(); - - (* Save the assignee address on the stack. *) - _write_z("\tsw t0, 60(sp)\n\0"); - - (* Skip the assignment sign (:=) with surrounding whitespaces. *) - source_code_position := source_code_position + 4; - - (* Compile the assignment. *) - _compile_expression(); - - _write_z("\tlw t1, 60(sp)\nsw t0, (t1)\n\0"); -end; - -proc _compile_return_statement(); -var - token_kind: Word; -begin - (* Skip "return" keyword and whitespace after it. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - source_code_position := source_code_position + 1; - _compile_expression(); - - _write_z("\tmv a0, t0\n\0"); -end; - -(* Writes a label, .Ln, where n is a unique number. *) - -(* Parameters: *) -(* counter - Label counter. *) -proc _write_label(counter: Word); -begin - _write_z(".L\0"); - _write_i(counter); -end; - -proc _compile_if(); -var - after_end_label: Word; - condition_label: Word; - token_kind: Word; -begin - (* Skip "if ". *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - source_code_position := source_code_position + 1; - - (* Compile condition. *) - _compile_expression(); - (* Skip " then" with newline. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - - after_end_label := label_counter; - label_counter := label_counter + 1; - - (* condition_label is the label in front of the next elsif condition or end. *) - condition_label := label_counter; - label_counter := label_counter + 1; - - _write_z("\tbeqz t0, \0"); - _write_label(condition_label); - _write_c('\n'); - - _compile_procedure_body(); - - _write_z("\tj \0"); - _write_label(after_end_label); - _write_c('\n'); - - _write_label(condition_label); - _write_z(":\n\0"); - - .compile_if_loop; - - _lexer_read_token(@token_kind); - if token_kind = _lexer_token_kind_end() then - goto .compile_if_end; - end; - if token_kind = _lexer_token_kind_else() then - goto .compile_if_else; - end; - if token_kind = _lexer_token_kind_elsif() then - goto .compile_if_elsif; - end; - .compile_if_elsif; - _lexer_skip_token(); - source_code_position := source_code_position + 1; - - (* Compile condition. *) - _compile_expression(); - (* Skip " then" with newline. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - - (* condition_label is the label in front of the next elsif condition or end. *) - condition_label := label_counter; - label_counter := label_counter + 1; - - _write_z("\tbeqz t0, \0"); - _write_label(condition_label); - _write_c('\n'); - - _compile_procedure_body(); - - _write_z("\tj \0"); - _write_label(after_end_label); - _write_c('\n'); - - _write_label(condition_label); - _write_z(":\n\0"); - - goto .compile_if_loop; - - .compile_if_else; - _lexer_skip_token(); - _compile_procedure_body(); - - .compile_if_end; - _lexer_skip_token(); - - _write_label(after_end_label); - _write_z(":\n\0"); -end; - -proc _compile_label_declaration(); -var - label_token: Word; - token_kind: Word; - name: Word; -begin - (* Skip the dot. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - label_token := _lexer_read_token(@token_kind); - name := _lexer_global_start(); - name := _load_word(name); - _write_c('.'); - _write_s(name, label_token); - _write_z(":\n\0"); - _lexer_skip_token(); -end; - -proc _compile_statement(); -var - current_byte: Word; - token_kind: Word; -begin - _lexer_read_token(@token_kind); - - if token_kind = _lexer_token_kind_goto() then - _compile_goto(); - goto .compile_statement_semicolon; - end; - if token_kind = _lexer_token_kind_if() then - _compile_if(); - goto .compile_statement_semicolon; - end; - if token_kind = _lexer_token_kind_return() then - _compile_return_statement(); - goto .compile_statement_semicolon; - end; - if token_kind = _lexer_token_kind_dot() then - _compile_label_declaration(); - goto .compile_statement_semicolon; - end; - if token_kind = _lexer_token_kind_identifier() then - current_byte := _lexer_global_start(); - current_byte := _load_word(current_byte); - current_byte := _load_byte(current_byte); - - (* This is a call if the statement starts with an underscore. *) - if current_byte = '_' then - _compile_call(); - else - _compile_assignment(); - end; - goto .compile_statement_semicolon; - end; - - .compile_statement_semicolon; - _write_c('\n'); -end; - -proc _compile_procedure_body(); -var - token_kind: Word; -begin - .compile_procedure_body_loop; - - _skip_empty_lines(); - _compile_statement(); - _lexer_read_token(@token_kind); - - if token_kind = _lexer_token_kind_semicolon() then - _lexer_skip_token(); - goto .compile_procedure_body_loop; - end; - _skip_empty_lines(); -end; - -(* Writes a regster name to the standard output. *) - -(* Parameters: *) -(* register_character - Register character. *) -(* register_number - Register number. *) -proc _write_register(register_character: Word, register_number: Word); -begin - _write_c(register_character); - register_number := register_number + '0'; - _write_c(register_number); -end; - -proc _skip_spaces(); -var - current_byte: Word; - lhs: Word; - rhs: Word; -begin - current_byte := _load_byte(source_code_position); - lhs := current_byte = '\t'; - rhs := current_byte = ' '; - - if lhs or rhs then - source_code_position := source_code_position + 1; - _skip_spaces(); - end; -end; - -proc _read_type_expression(); -var - type_name: Word; - token_kind: Word; -begin - type_name := _lexer_read_token(@token_kind); - _lexer_skip_token(); -end; - -(* Parameters: *) - -(* parameter_index - Parameter index. *) -proc _parameter_info_create(parameter_index: Word); -var - offset: Word; - current_word: Word; - result: Word; -begin - result := memory_free_pointer; - current_word := result; - (* 2 is INFO_PARAMETER *) - _store_word(2, current_word); - - current_word := current_word + 4; - - (* Calculate the stack offset: 88 - (4 * parameter_counter) *) - offset := parameter_index * 4; - _store_word(88 + -offset, current_word); - - memory_free_pointer := current_word + 4; - - return result -end; - -proc _parameter_info_get_offset(info: Word); -begin - info := info + 4; - return _load_word(info) -end; - -(* Parameters: *) - -(* temporary_index - Parameter index. *) -proc _temporary_info_create(temporary_index: Word); -var - offset: Word; - current_word: Word; - result: Word; -begin - result := memory_free_pointer; - current_word := result; - (* 3 is INFO_TEMPORARY *) - _store_word(3, current_word); - - current_word := current_word + 4; - - (* Calculate the stack offset: 4 * variable_counter. *) - _store_word(temporary_index * 4, current_word); - - memory_free_pointer := current_word + 4; - - return result -end; - -proc _temporary_info_get_offset(info: Word); -begin - info := info + 4; - return _load_word(info) -end; - -(* Parameters: *) - -(* parameter_index - Parameter index. *) -proc _read_procedure_parameter(parameter_index: Word); -var - name_length: Word; - info: Word; - name_position: Word; - token_kind: Word; -begin - (* Read the parameter name. *) - name_position := source_code_position; - name_length := _lexer_read_token(@token_kind); - _lexer_skip_token(); - - (* Skip colon and space in front of the type expression. *) - source_code_position := source_code_position + 2; - - _read_type_expression(); - - _write_z("\tsw a\0"); - _write_i(parameter_index); - _write_z(", \0"); - - info := _parameter_info_create(parameter_index); - _symbol_table_enter(@symbol_table_local, name_position, name_length, info); - - info := _parameter_info_get_offset(info); - _write_i(info); - - _write_z("(sp)\n\0"); -end; - -proc _read_procedure_parameters(); -var - parameter_counter: Word; -begin - (* Skip open paren. *) - source_code_position := source_code_position + 1; - parameter_counter := 0; - - .compile_procedure_prologue_skip; - if _load_byte(source_code_position) <> ')' then - _read_procedure_parameter(parameter_counter); - parameter_counter := parameter_counter + 1; - - if _load_byte(source_code_position) = ',' then - source_code_position := source_code_position + 2; - goto .compile_procedure_prologue_skip; - end; - end; - (* Skip close paren. *) - source_code_position := source_code_position + 1; -end; - -(* Parameters: *) -(* variable_index - Variable index. *) -proc _read_procedure_temporary(variable_index: Word); -var - name_length: Word; - info: Word; - name_position: Word; - token_kind: Word; -begin - _skip_spaces(); - name_position := source_code_position; - - (* Read and skip variable name, colon and the space *) - name_length := _lexer_read_token(@token_kind); - _lexer_skip_token(name_length); - source_code_position := source_code_position + 2; - - _read_type_expression(); - - info := _temporary_info_create(variable_index); - _symbol_table_enter(@symbol_table_local, name_position, name_length, info); - - (* Skip semicolon and newline after the variable declaration *) - source_code_position := source_code_position + 2; -end; - -proc _read_procedure_temporaries(); -var - temporary_counter: Word; -begin - if _memcmp(source_code_position, "var", 3) <> 0 then - goto .read_local_variables_end; - end; - source_code_position := source_code_position + 4; - temporary_counter := 0; - - .read_local_variables_loop; - if _memcmp(source_code_position, "begin", 5) = 0 then - goto .read_local_variables_end; - end; - _read_procedure_temporary(temporary_counter); - - temporary_counter := temporary_counter + 1; - goto .read_local_variables_loop; - - .read_local_variables_end; -end; - -proc _compile_procedure(); -var - name_length: Word; - token_kind: Word; -begin - (* Skip "proc ". *) - source_code_position := source_code_position + 5; - (* Clear local symbol table. *) - _store_word(0, @symbol_table_local); - - name_length := _lexer_read_token(@token_kind); - - (* Write .type _procedure_name, @function. *) - _write_z(".type \0"); - - _write_token(name_length); - _write_z(", @function\n\0"); - - (* Write procedure label, _procedure_name: *) - _write_token(name_length); - _write_z(":\n\0"); - - (* Skip procedure name. *) - _lexer_skip_token(); - _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); - _read_procedure_parameters(); - - (* Skip semicolon and newline. *) - source_code_position := source_code_position + 2; - _read_procedure_temporaries(); - - (* Skip semicolon, "begin" and newline. *) - _lexer_read_token(@token_kind); - if token_kind = _lexer_token_kind_begin() then - _lexer_skip_token(); - _compile_procedure_body(); - end; - if token_kind = _lexer_token_kind_return() then - _compile_return_statement(); - end; - - (* Write the epilogue. *) - _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); - - (* Skip the "end" keyword, semicolon and newline. *) - source_code_position := source_code_position + 5; -end; - -(* Prints and skips a line. *) -proc _skip_comment(); -var - token_kind: Word; -begin - _lexer_read_token(@token_kind); - _lexer_skip_token(); - source_code_position := source_code_position + 1; -end; - -(* Skip newlines and comments. *) -proc _skip_empty_lines(); -var - current_position: Word; - current_byte: Word; -begin - .skip_empty_lines_rerun; - current_position := source_code_position; - - .skip_empty_lines_loop; - current_byte := _load_byte(current_position); - - if current_byte = '\n' then - goto .skip_empty_lines_newline; - end; - if current_byte = '\t' then - goto .skip_empty_lines_tab; - end; - if current_byte <> '(' then - goto .skip_empty_lines_end; - end; - current_byte := _load_byte(current_position + 1); - - if current_byte = '*' then - goto .skip_empty_lines_comment; - end; - goto .skip_empty_lines_end; - - .skip_empty_lines_comment; - source_code_position := current_position; - _skip_comment(); - goto .skip_empty_lines_rerun; - - .skip_empty_lines_newline; - source_code_position := current_position + 1; - goto .skip_empty_lines_rerun; - - .skip_empty_lines_tab; - current_position := current_position + 1; - goto .skip_empty_lines_loop; - - .skip_empty_lines_end; -end; - -proc _compile_global_initializer(); -var - current_byte: Word; - length: Word; - token_kind: Word; -begin - current_byte := _load_byte(source_code_position); - - if current_byte = '"' then - _write_z("\n\t.word strings + \0"); - length := _string_length(source_code_position); - - _add_string(source_code_position); - _write_i(); - - (* Skip the quoted string. *) - source_code_position := source_code_position + length; - source_code_position := source_code_position + 2; - - goto .compile_global_initializer_end; - end; - if current_byte = 'S' then - (* Skip "S(". *) - source_code_position := source_code_position + 2; - - if _load_byte(source_code_position) = ')' then - goto .compile_global_initializer_closing; - end; - goto .compile_global_initializer_loop; - end; - if current_byte = '@' then - (* Skip @. *) - source_code_position := source_code_position + 1; - _write_z("\n\t.word \0"); - current_byte := _lexer_read_token(@token_kind); - _write_token(current_byte); - _lexer_skip_token(); - - goto .compile_global_initializer_end; - end; - if _is_digit(current_byte) = 1 then - _write_z("\n\t.word \0"); - current_byte := _lexer_read_token(@token_kind); - _write_token(current_byte); - source_code_position := source_code_position + 1; - - goto .compile_global_initializer_end; - end; - - .compile_global_initializer_loop; - _compile_global_initializer(); - - if _load_byte(source_code_position) <> ')' then - (* Skip comma and whitespace after it. *) - source_code_position := source_code_position + 2; - - goto .compile_global_initializer_loop; - end; - - .compile_global_initializer_closing; - (* Skip ")" *) - source_code_position := source_code_position + 1; - - goto .compile_global_initializer_end; - - .compile_global_initializer_end; -end; - -proc _compile_constant_declaration(); -var - name_length: Word; - token_kind: Word; -begin - name_length := _lexer_read_token(@token_kind); - - _write_z(".type \0"); - _write_token(name_length); - _write_z(", @object\n\0"); - - _write_token(name_length); - _write_c(':'); - - (* Skip the constant name with assignment sign and surrounding whitespaces. *) - _lexer_skip_token(); - source_code_position := source_code_position + 4; - _compile_global_initializer(); - (* Skip semicolon and newline. *) - source_code_position := source_code_position + 2; - _write_c('\n'); -end; - -proc _compile_const_part(); -var - token_kind: Word; -begin - _skip_empty_lines(); - _lexer_read_token(@token_kind); - - if token_kind <> _lexer_token_kind_const() then - goto .compile_const_part_end; - end; - (* Skip "const" with the newline after it. *) - _lexer_skip_token(); - _write_z(".section .rodata # Compiled from const section.\n\n\0"); - - .compile_const_part_loop; - _skip_empty_lines(); - - (* If the character at the line beginning is not indentation, *) - (* it is probably the next code section. *) - if _load_byte(source_code_position) = '\t' then - source_code_position := source_code_position + 1; - _compile_constant_declaration(); - goto .compile_const_part_loop; - end; - - .compile_const_part_end; -end; - -proc _compile_variable_declaration(); -var - name_length: Word; - token_kind: Word; -begin - name_length := _lexer_read_token(@token_kind); - - _write_z(".type \0"); - _write_token(name_length); - _write_z(", @object\n\0"); - - _write_token(name_length); - _write_c(':'); - - (* Skip the variable name and colon with space before the type. *) - _lexer_skip_token(); - _lexer_read_token(@token_kind); - _lexer_skip_token(); - _read_type_expression(); - - if _load_byte(source_code_position) <> ' ' then - (* Else we assume this is a zeroed 81920 bytes big array. *) - _write_z(" .zero 81920\0"); - else - (* Skip the assignment sign with surrounding whitespaces. *) - source_code_position := source_code_position + 4; - _compile_global_initializer(); - end; - - (* Skip semicolon and newline. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - _write_c('\n'); -end; - -proc _compile_var_part(); -var - token_kind: Word; -begin - _lexer_read_token(@token_kind); - - if token_kind <> _lexer_token_kind_var() then - goto .compile_var_part_end; - end; - (* Skip "var" and newline. *) - _lexer_skip_token(); - _write_z(".section .data\n\0"); - - .compile_var_part_loop; - _skip_empty_lines(); - _lexer_read_token(@token_kind); - - if token_kind = _lexer_token_kind_identifier() then - _compile_variable_declaration(); - goto .compile_var_part_loop; - end; - - .compile_var_part_end; -end; - -(* Process the source code and print the generated code. *) -proc _compile_module(); -begin - _compile_const_part(); - _skip_empty_lines(); - _compile_var_part(); - - _write_z(".section .text\n\n\0"); - _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); - _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); - _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); - _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); - _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); - - .compile_module_loop; - _skip_empty_lines(); - - if _load_byte(source_code_position) <> 0 then - (* 5 is "proc " length. Space is needed to distinguish from "procedure". *) - if _memcmp(source_code_position, "proc ", 5) = 0 then - _compile_procedure(); - goto .compile_module_loop; - end; - end; - .compile_module_end; -end; - -proc _compile(); -var - compiler_strings_copy: Word; - compiler_strings_end: Word; - current_byte: Word; -begin - _write_z(".globl _start\n\n\0"); - _compile_module(); - - _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); - _write_c('"'); - - compiler_strings_copy := @compiler_strings; - compiler_strings_end := compiler_strings_position; - - .compile_loop; - if compiler_strings_copy < compiler_strings_end then - current_byte := _load_byte(compiler_strings_copy); - compiler_strings_copy := compiler_strings_copy + 1; - _write_c(current_byte); - - goto .compile_loop; - end; - _write_c('"'); - _write_c('\n'); -end; - -(* Terminates the program. a0 contains the return code. *) - -(* Parameters: *) -(* a0 - Status code. *) -proc _exit(); -begin - _syscall(0, 0, 0, 0, 0, 0, 93); -end; - -(* Looks for a symbol in the given symbol table. *) - -(* Parameters: *) -(* symbol_table - Symbol table. *) -(* symbol_name - Symbol name pointer. *) -(* name_length - Symbol name length. *) - -(* Returns the symbol pointer or 0 in a0. *) -proc _symbol_table_lookup(symbol_table: Word, symbol_name: Word, name_length: Word); -var - result: Word; - symbol_table_length: Word; - current_name: Word; - current_length: Word; -begin - result := 0; - - (* The first word in the symbol table is its length, get it. *) - symbol_table_length := _load_word(symbol_table); - - (* Go to the first symbol position. *) - symbol_table := symbol_table + 4; - - .symbol_table_lookup_loop; - if symbol_table_length = 0 then - goto .symbol_table_lookup_end; - end; - - (* Symbol name pointer and length. *) - current_name := _load_word(symbol_table); - current_length := _load_word(symbol_table + 4); - - (* If lengths don't match, exit and return nil. *) - if name_length <> current_length then - goto .symbol_table_lookup_repeat; - end; - (* If names don't match, exit and return nil. *) - if _memcmp(symbol_name, current_name, name_length) <> 0 then - goto .symbol_table_lookup_repeat; - end; - (* Otherwise, the symbol is found. *) - result := _load_word(symbol_table + 8); - goto .symbol_table_lookup_end; - - .symbol_table_lookup_repeat; - symbol_table := symbol_table + 12; - symbol_table_length := symbol_table_length + -1; - goto .symbol_table_lookup_loop; - - .symbol_table_lookup_end; - return result -end; - -(* Inserts a symbol into the table. *) - -(* Parameters: *) -(* symbol_table - Symbol table. *) -(* symbol_name - Symbol name pointer. *) -(* name_length - Symbol name length. *) -(* symbol - Symbol pointer. *) -proc _symbol_table_enter(symbol_table: Word, symbol_name: Word, name_length: Word, symbol: Word); -var - table_length: Word; - symbol_pointer: Word; -begin - (* The first word in the symbol table is its length, get it. *) - table_length := _load_word(symbol_table); - - (* Calculate the offset for the new symbol. *) - symbol_pointer := table_length * 12; - symbol_pointer := symbol_pointer + 4; - symbol_pointer := symbol_table + symbol_pointer; - - _store_word(symbol_name, symbol_pointer); - symbol_pointer := symbol_pointer + 4; - _store_word(name_length, symbol_pointer); - symbol_pointer := symbol_pointer + 4; - _store_word(symbol, symbol_pointer); - - (* Increment the symbol table length. *) - table_length := table_length + 1; - _store_word(table_length, symbol_table); -end; - -proc _symbol_table_build(); -begin - (* Set the table length to 0. *) - _store_word(0, @symbol_table_global); - - (* Enter built-in symbols. *) - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_int, 3, @symbol_type_info_int); - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_word, 4, @symbol_type_info_word); - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_pointer, 7, @symbol_type_info_pointer); - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_char, 4, @symbol_type_info_char); - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_bool, 4, @symbol_type_info_bool); -end; - - -(* Classification table assigns each possible character to a group (class). All *) -(* characters of the same group a handled equivalently. *) - -(* Transition = record *) -(* action: TransitionAction; *) -(* next_state: TransitionState *) -(* end; *) - -proc _lexer_class_invalid(); -begin - return 1 -end; - -proc _lexer_class_digit(); -begin - return 2 -end; - -proc _lexer_class_alpha(); -begin - return 3 -end; - -proc _lexer_class_space(); -begin - return 4 -end; - -proc _lexer_class_colon(); -begin - return 5 -end; - -proc _lexer_class_equals(); -begin - return 6 -end; - -proc _lexer_class_left_paren(); -begin - return 7 -end; - -proc _lexer_class_right_paren(); -begin - return 8 -end; - -proc _lexer_class_asterisk(); -begin - return 9 -end; - -proc _lexer_class_underscore(); -begin - return 10 -end; - -proc _lexer_class_single(); -begin - return 11 -end; - -proc _lexer_class_hex(); -begin - return 12 -end; - -proc _lexer_class_zero(); -begin - return 13 -end; - -proc _lexer_class_x(); -begin - return 14 -end; - -proc _lexer_class_eof(); -begin - return 15 -end; - -proc _lexer_class_dot(); -begin - return 16 -end; - -proc _lexer_class_minus(); -begin - return 17 -end; - -proc _lexer_class_single_quote(); -begin - return 18 -end; - -proc _lexer_class_double_quote(); -begin - return 19 -end; - -proc _lexer_class_greater(); -begin - return 20 -end; - -proc _lexer_class_less(); -begin - return 21 -end; - -proc _lexer_class_other(); -begin - return 22 -end; - -proc _lexer_state_start(); -begin - return 1 -end; - -proc _lexer_state_colon(); -begin - return 2 -end; - -proc _lexer_state_identifier(); -begin - return 3 -end; - -proc _lexer_state_decimal(); -begin - return 4 -end; - -proc _lexer_state_greater(); -begin - return 5 -end; - -proc _lexer_state_minus(); -begin - return 6 -end; - -proc _lexer_state_left_paren(); -begin - return 7 -end; - -proc _lexer_state_less(); -begin - return 8 -end; - -proc _lexer_state_dot(); -begin - return 9 -end; - -proc _lexer_state_comment(); -begin - return 10 -end; - -proc _lexer_state_closing_comment(); -begin - return 11 -end; - -proc _lexer_state_character(); -begin - return 12 -end; - -proc _lexer_state_string(); -begin - return 13 -end; - -proc _lexer_state_leading_zero(); -begin - return 14 -end; - -proc _lexer_state_decimal_suffix(); -begin - return 15 -end; - -proc _lexer_state_end(); -begin - return 16 -end; - -proc _lexer_action_none(); -begin - return 1 -end; - -proc _lexer_action_accumulate(); -begin - return 2 -end; - -proc _lexer_action_skip(); -begin - return 3 -end; - -proc _lexer_action_single(); -begin - return 4 -end; - -proc _lexer_action_eof(); -begin - return 5 -end; - -proc _lexer_action_finalize(); -begin - return 6 -end; - -proc _lexer_action_composite(); -begin - return 7 -end; - -proc _lexer_action_key_id(); -begin - return 8 -end; - -proc _lexer_action_integer(); -begin - return 9 -end; - -proc _lexer_action_delimited(); -begin - return 10 -end; - -(* Assigns some value to at array index. *) - -(* Parameters: *) -(* array - Array pointer. *) -(* index - Index (word offset into the array). *) -(* data - Data to assign. *) -proc _assign_at(array: Word, index: Word, data: Word); -var - target: Word; -begin - target := index + -1; - target := target * 4; - target := array + target; - - _store_word(data, target); -end; - -proc _get_at(array: Word, index: Word); -var - target: Word; -begin - target := index + -1; - target := target * 4; - target := array + target; - - return _load_word(target) -end; - -(* Initializes the array with character classes. *) -proc _lexer_classifications(); -var - code: Word; -begin - _assign_at(@classification, 1, 15); - _assign_at(@classification, 2, 1); - _assign_at(@classification, 3, 1); - _assign_at(@classification, 4, 1); - _assign_at(@classification, 5, 1); - _assign_at(@classification, 6, 1); - _assign_at(@classification, 7, 1); - _assign_at(@classification, 8, 1); - _assign_at(@classification, 9, 1); - _assign_at(@classification, 10, 4); - _assign_at(@classification, 11, 4); - _assign_at(@classification, 12, 1); - _assign_at(@classification, 13, 1); - _assign_at(@classification, 14, 4); - _assign_at(@classification, 15, 1); - _assign_at(@classification, 16, 1); - _assign_at(@classification, 17, 1); - _assign_at(@classification, 18, 1); - _assign_at(@classification, 19, 1); - _assign_at(@classification, 20, 1); - _assign_at(@classification, 21, 1); - _assign_at(@classification, 22, 1); - _assign_at(@classification, 23, 1); - _assign_at(@classification, 24, 1); - _assign_at(@classification, 25, 1); - _assign_at(@classification, 26, 1); - _assign_at(@classification, 27, 1); - _assign_at(@classification, 28, 1); - _assign_at(@classification, 29, 1); - _assign_at(@classification, 30, 1); - _assign_at(@classification, 31, 1); - _assign_at(@classification, 32, 1); - _assign_at(@classification, 33, 4); - _assign_at(@classification, 34, 11); - _assign_at(@classification, 35, 19); - _assign_at(@classification, 36, 22); - _assign_at(@classification, 37, 22); - _assign_at(@classification, 38, 11); - _assign_at(@classification, 39, 11); - _assign_at(@classification, 40, 18); - _assign_at(@classification, 41, 7); - _assign_at(@classification, 42, 8); - _assign_at(@classification, 43, 9); - _assign_at(@classification, 44, 11); - _assign_at(@classification, 45, 11); - _assign_at(@classification, 46, 17); - _assign_at(@classification, 47, 16); - _assign_at(@classification, 48, 11); - _assign_at(@classification, 49, 13); - _assign_at(@classification, 50, 2); - _assign_at(@classification, 51, 2); - _assign_at(@classification, 52, 2); - _assign_at(@classification, 53, 2); - _assign_at(@classification, 54, 2); - _assign_at(@classification, 55, 2); - _assign_at(@classification, 56, 2); - _assign_at(@classification, 57, 2); - _assign_at(@classification, 58, 2); - _assign_at(@classification, 59, 5); - _assign_at(@classification, 60, 11); - _assign_at(@classification, 61, 21); - _assign_at(@classification, 62, 6); - _assign_at(@classification, 63, 20); - _assign_at(@classification, 64, 22); - _assign_at(@classification, 65, 11); - _assign_at(@classification, 66, 3); - _assign_at(@classification, 67, 3); - _assign_at(@classification, 68, 3); - _assign_at(@classification, 69, 3); - _assign_at(@classification, 70, 3); - _assign_at(@classification, 71, 3); - _assign_at(@classification, 72, 3); - _assign_at(@classification, 73, 3); - _assign_at(@classification, 74, 3); - _assign_at(@classification, 75, 3); - _assign_at(@classification, 76, 3); - _assign_at(@classification, 77, 3); - _assign_at(@classification, 78, 3); - _assign_at(@classification, 79, 3); - _assign_at(@classification, 80, 3); - _assign_at(@classification, 81, 3); - _assign_at(@classification, 82, 3); - _assign_at(@classification, 83, 3); - _assign_at(@classification, 84, 3); - _assign_at(@classification, 85, 3); - _assign_at(@classification, 86, 3); - _assign_at(@classification, 87, 3); - _assign_at(@classification, 88, 3); - _assign_at(@classification, 89, 3); - _assign_at(@classification, 90, 3); - _assign_at(@classification, 91, 3); - _assign_at(@classification, 92, 11); - _assign_at(@classification, 93, 22); - _assign_at(@classification, 94, 11); - _assign_at(@classification, 95, 11); - _assign_at(@classification, 96, 10); - _assign_at(@classification, 97, 22); - _assign_at(@classification, 98, 12); - _assign_at(@classification, 99, 12); - _assign_at(@classification, 100, 12); - _assign_at(@classification, 101, 12); - _assign_at(@classification, 102, 12); - _assign_at(@classification, 103, 12); - _assign_at(@classification, 104, 3); - _assign_at(@classification, 105, 3); - _assign_at(@classification, 106, 3); - _assign_at(@classification, 107, 3); - _assign_at(@classification, 108, 3); - _assign_at(@classification, 109, 3); - _assign_at(@classification, 110, 3); - _assign_at(@classification, 111, 3); - _assign_at(@classification, 112, 3); - _assign_at(@classification, 113, 3); - _assign_at(@classification, 114, 3); - _assign_at(@classification, 115, 3); - _assign_at(@classification, 116, 3); - _assign_at(@classification, 117, 3); - _assign_at(@classification, 118, 3); - _assign_at(@classification, 119, 3); - _assign_at(@classification, 120, 3); - _assign_at(@classification, 121, 14); - _assign_at(@classification, 122, 3); - _assign_at(@classification, 123, 3); - _assign_at(@classification, 124, 22); - _assign_at(@classification, 125, 11); - _assign_at(@classification, 126, 22); - _assign_at(@classification, 127, 11); - _assign_at(@classification, 128, 1); - - code := 129; - - (* Set the remaining 129 - 256 bytes to transitionClassOther. *) - .create_classification_loop; - _assign_at(@classification, code, 22); - code := code + 1; - - if code < 257 then - goto .create_classification_loop; - end; -end; - -proc _lexer_get_transition(current_state: Word, character_class: Word); -var - transition_table: Word; - row_position: Word; - column_position: Word; - target: Word; -begin - (* Each state is 8 bytes long (2 words: action and next state). *) - (* There are 22 character classes, so a transition row 8 * 22 = 176 bytes long. *) - row_position := current_state + -1; - row_position := row_position * 176; - - column_position := character_class + -1; - column_position := column_position * 8; - - target := _lexer_get_transition_table() + row_position; - - return target + column_position -end; - -(* Parameters: *) -(* current_state - First index into transitions table. *) -(* character_class - Second index into transitions table. *) -(* action - Action to assign. *) -(* next_state - Next state to assign. *) -proc _lexer_set_transition(current_state: Word, character_class: Word, action: Word, next_state: Word); -var - transition: Word; -begin - transition := _lexer_get_transition(current_state, character_class); - - _lexer_transition_set_action(transition, action); - _lexer_transition_set_state(transition, next_state); -end; - -(* Sets same action and state transition for all character classes in one transition row. *) - -(* Parameters: *) -(* current_state - Current state (Transition state enumeration). *) -(* default_action - Default action (Callback). *) -(* next_state - Next state (Transition state enumeration). *) -proc _lexer_default_transition(current_state: Word, default_action: Word, next_state: Word); -begin - _lexer_set_transition(current_state, _lexer_class_invalid(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_digit(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_alpha(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_space(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_colon(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_equals(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_left_paren(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_right_paren(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_asterisk(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_underscore(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_single(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_hex(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_zero(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_x(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_eof(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_dot(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_minus(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_single_quote(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_double_quote(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_greater(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_less(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_other(), default_action, next_state); -end; - - -(* The transition table describes transitions from one state to another, given *) -(* a symbol (character class). *) - -(* The table has m rows and n columns, where m is the amount of states and n is *) -(* the amount of classes. So given the current state and a classified character *) -(* the table can be used to look up the next state. *) -proc _lexer_transitions(); -begin - (* Start state. *) - _lexer_set_transition(_lexer_state_start(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_decimal()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_identifier()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_space(), _lexer_action_skip(), _lexer_state_start()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_colon(), _lexer_action_accumulate(), _lexer_state_greater()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_equals(), _lexer_action_single(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_left_paren(), _lexer_action_accumulate(), _lexer_state_left_paren()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_right_paren(), _lexer_action_single(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_asterisk(), _lexer_action_single(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_underscore(), _lexer_action_accumulate(), _lexer_state_identifier()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_single(), _lexer_action_single(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_identifier()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_leading_zero()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_identifier()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_eof(), _lexer_action_eof(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_dot(), _lexer_action_accumulate(), _lexer_state_dot()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_minus(), _lexer_action_accumulate(), _lexer_state_minus()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_single_quote(), _lexer_action_accumulate(), _lexer_state_character()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_double_quote(), _lexer_action_accumulate(), _lexer_state_string()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_greater(), _lexer_action_accumulate(), _lexer_state_greater()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_less(), _lexer_action_accumulate(), _lexer_state_less()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_other(), _lexer_action_none(), _lexer_state_end()); - - (* Colon state. *) - _lexer_default_transition(_lexer_state_colon(), _lexer_action_finalize(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_colon(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end()); - - (* Identifier state. *) - _lexer_default_transition(_lexer_state_identifier(), _lexer_action_key_id(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_identifier(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_identifier()); - _lexer_set_transition(_lexer_state_identifier(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_identifier()); - _lexer_set_transition(_lexer_state_identifier(), _lexer_class_underscore(), _lexer_action_accumulate(), _lexer_state_identifier()); - _lexer_set_transition(_lexer_state_identifier(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_identifier()); - _lexer_set_transition(_lexer_state_identifier(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_identifier()); - _lexer_set_transition(_lexer_state_identifier(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_identifier()); - - (* Decimal state. *) - _lexer_default_transition(_lexer_state_decimal(), _lexer_action_integer(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_decimal(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_decimal()); - _lexer_set_transition(_lexer_state_decimal(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_decimal_suffix()); - _lexer_set_transition(_lexer_state_decimal(), _lexer_class_underscore(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_decimal(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_decimal_suffix()); - _lexer_set_transition(_lexer_state_decimal(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_decimal()); - _lexer_set_transition(_lexer_state_decimal(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_decimal_suffix()); - - (* Greater state. *) - _lexer_default_transition(_lexer_state_greater(), _lexer_action_finalize(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_greater(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end()); - - (* Minus state. *) - _lexer_default_transition(_lexer_state_minus(), _lexer_action_finalize(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_minus(), _lexer_class_greater(), _lexer_action_composite(), _lexer_state_end()); - - (* Left paren state. *) - _lexer_default_transition(_lexer_state_left_paren(), _lexer_action_finalize(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_left_paren(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_comment()); - - (* Less state. *) - _lexer_default_transition(_lexer_state_less(), _lexer_action_finalize(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_less(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_less(), _lexer_class_greater(), _lexer_action_composite(), _lexer_state_end()); - - (* Hexadecimal after 0x. *) - _lexer_default_transition(_lexer_state_dot(), _lexer_action_finalize(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_dot(), _lexer_class_dot(), _lexer_action_composite(), _lexer_state_end()); - - (* Comment. *) - _lexer_default_transition(_lexer_state_comment(), _lexer_action_accumulate(), _lexer_state_comment()); - _lexer_set_transition(_lexer_state_comment(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_closing_comment()); - _lexer_set_transition(_lexer_state_comment(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); - - (* Closing comment. *) - _lexer_default_transition(_lexer_state_closing_comment(), _lexer_action_accumulate(), _lexer_state_comment()); - _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_right_paren(), _lexer_action_delimited(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_closing_comment()); - _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); - - (* Character. *) - _lexer_default_transition(_lexer_state_character(), _lexer_action_accumulate(), _lexer_state_character()); - _lexer_set_transition(_lexer_state_character(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_character(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_character(), _lexer_class_single_quote(), _lexer_action_delimited(), _lexer_state_end()); - - (* String. *) - _lexer_default_transition(_lexer_state_string(), _lexer_action_accumulate(), _lexer_state_string()); - _lexer_set_transition(_lexer_state_string(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_string(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_string(), _lexer_class_double_quote(), _lexer_action_delimited(), _lexer_state_end()); - - (* Leading zero. *) - _lexer_default_transition(_lexer_state_leading_zero(), _lexer_action_integer(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_digit(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_alpha(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_underscore(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_hex(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_zero(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_x(), _lexer_action_none(), _lexer_state_end()); - - (* Digit with a character suffix. *) - _lexer_default_transition(_lexer_state_decimal_suffix(), _lexer_action_integer(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_digit(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_alpha(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_hex(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_zero(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_x(), _lexer_action_none(), _lexer_state_end()); -end; - -(* Transition table is saved after character classification table. *) -(* Each character entry is 1 word long and there are 256 characters. *) -(* 1024 = 256 * 4 *) -proc _lexer_get_transition_table(); -begin - return @classification + 1024 -end; - -(* Lexer state is saved after the transition tables. *) -(* Each transition table entry is 8 bytes long. The table has 16 rows (transition states) *) -(* and 22 columns (character classes), so 2816 = 8 * 16 * 22. *) -proc _lexer_global_state(); -begin - return _lexer_get_transition_table() + 2816 -end; - -(* Gets pointer to the token start. *) -proc _lexer_global_start(); -begin - return _lexer_global_state() + 4 -end; - -(* Gets pointer to the token end. *) -proc _lexer_global_end(); -begin - return _lexer_global_start() + 4 -end; - -proc _lexer_transition_get_action(transition: Word); -begin - return _load_word(transition) -end; - -proc _lexer_transition_set_action(transition: Word, action: Word); -begin - _store_word(action, transition); -end; - -proc _lexer_transition_get_state(transition: Word); -begin - return _load_word(transition + 4) -end; - -proc _lexer_transition_set_state(transition: Word, state: Word); -begin - _store_word(state, transition + 4); -end; - -(* Resets the lexer state for reading the next token. *) -proc _lexer_reset(); -var - state: Word; - current: Word; -begin - (* Transition start state is 1. *) - state := _lexer_global_state(); - _store_word(_lexer_state_start(), state); - - current := _lexer_global_start(); - _store_word(source_code_position, current); - - current := _lexer_global_end(); - _store_word(source_code_position, current); -end; - -(* One time lexer initialization. *) -proc _lexer_initialize(); -begin - _lexer_classifications(); - _lexer_transitions(); -end; - -proc _lexer_next_transition(); -var - current_character: Word; - character_class: Word; - current_state: Word; -begin - current_character := _lexer_global_end(); - current_character := _load_word(current_character); - current_character := _load_byte(current_character); - - character_class := _get_at(@classification, current_character + 1); - - current_state := _lexer_global_state(); - current_state := _load_word(current_state); - - return _lexer_get_transition(current_state, character_class) -end; - -proc _lexer_token_kind_identifier(); -begin - return 1 -end; - -proc _lexer_token_kind_const(); -begin - return 2 -end; - -proc _lexer_token_kind_var(); -begin - return 3 -end; - -proc _lexer_token_kind_proc(); -begin - return 4 -end; - -proc _lexer_token_kind_type(); -begin - return 5 -end; - -proc _lexer_token_kind_begin(); -begin - return 6 -end; - -proc _lexer_token_kind_end(); -begin - return 7 -end; - -proc _lexer_token_kind_if(); -begin - return 8 -end; - -proc _lexer_token_kind_then(); -begin - return 9 -end; - -proc _lexer_token_kind_else(); -begin - return 10 -end; - -proc _lexer_token_kind_elsif(); -begin - return 11 -end; - -proc _lexer_token_kind_while(); -begin - return 12 -end; - -proc _lexer_token_kind_do(); -begin - return 13 -end; - -proc _lexer_token_kind_extern(); -begin - return 14 -end; - -proc _lexer_token_kind_record(); -begin - return 15 -end; - -proc _lexer_token_kind_union(); -begin - return 16 -end; - -proc _lexer_token_kind_true(); -begin - return 17 -end; - -proc _lexer_token_kind_false(); -begin - return 18 -end; - -proc _lexer_token_kind_nil(); -begin - return 19 -end; - -proc _lexer_token_kind_and(); -begin - return 20 -end; - -proc _lexer_token_kind_or(); -begin - return 21 -end; - -proc _lexer_token_kind_xor(); -begin - return 22 -end; - -proc _lexer_token_kind_pipe(); -begin - return 23 -end; - -proc _lexer_token_kind_not(); -begin - return 24 -end; - -proc _lexer_token_kind_return(); -begin - return 24 -end; - -proc _lexer_token_kind_module(); -begin - return 25 -end; - -proc _lexer_token_kind_program(); -begin - return 26 -end; - -proc _lexer_token_kind_import(); -begin - return 27 -end; - -proc _lexer_token_kind_cast(); -begin - return 28 -end; - -proc _lexer_token_kind_defer(); -begin - return 29 -end; - -proc _lexer_token_kind_case(); -begin - return 30 -end; - -proc _lexer_token_kind_of(); -begin - return 31 -end; - -proc _lexer_token_kind_trait(); -begin - return 32 -end; - -proc _lexer_token_kind_left_paren(); -begin - return 33 -end; - -proc _lexer_token_kind_right_paren(); -begin - return 34 -end; - -proc _lexer_token_kind_left_square(); -begin - return 35 -end; - -proc _lexer_token_kind_right_square(); -begin - return 36 -end; - -proc _lexer_token_kind_shift_left(); -begin - return 37 -end; - -proc _lexer_token_kind_shift_right(); -begin - return 38 -end; - -proc _lexer_token_kind_greater_equal(); -begin - return 39 -end; - -proc _lexer_token_kind_less_equal(); -begin - return 40 -end; - -proc _lexer_token_kind_greater_than(); -begin - return 41 -end; - -proc _lexer_token_kind_less_than(); -begin - return 42 -end; - -proc _lexer_token_kind_not_equal(); -begin - return 43 -end; - -proc _lexer_token_kind_equals(); -begin - return 44 -end; - -proc _lexer_token_kind_semicolon(); -begin - return 45 -end; - -proc _lexer_token_kind_dot(); -begin - return 46 -end; - -proc _lexer_token_kind_comma(); -begin - return 47 -end; - -proc _lexer_token_kind_plus(); -begin - return 48 -end; - -proc _lexer_token_kind_arrow(); -begin - return 49 -end; - -proc _lexer_token_kind_minus(); -begin - return 50 -end; - -proc _lexer_token_kind_multiplication(); -begin - return 51 -end; - -proc _lexer_token_kind_division(); -begin - return 52 -end; - -proc _lexer_token_kind_remainder(); -begin - return 53 -end; - -proc _lexer_token_kind_assignment(); -begin - return 54 -end; - -proc _lexer_token_kind_colon(); -begin - return 55 -end; - -proc _lexer_token_kind_hat(); -begin - return 56 -end; - -proc _lexer_token_kind_at(); -begin - return 57 -end; - -proc _lexer_token_kind_exclamation(); -begin - return 58 -end; - -proc _lexer_token_kind_string(); -begin - return 59 -end; - -proc _lexer_token_kind_character(); -begin - return 60 -end; - -proc _lexer_token_kind_integer(); -begin - return 61 -end; - -proc _lexer_token_kind_word(); -begin - return 62 -end; - -proc _lexer_token_kind_goto(); -begin - return 63 -end; - -proc _lexer_compare_keyword(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word); -var - result: Word; -begin - result := 0; - - if lhs_length = rhs_length then - result := _memcmp(lhs_pointer, rhs_pointer, lhs_length) = 0; - end; - return result -end; - -proc _lexer_classify_keyword(position_start: Word, position_end: Word); -var - result: Word; - token_length: Word; -begin - result := _lexer_token_kind_identifier(); - token_length := position_end + -position_start; - - if _lexer_compare_keyword(position_start, token_length, "const", 5) = 1 then - result := _lexer_token_kind_const(); - goto .lexer_classify_keyword_end; - end; - if _lexer_compare_keyword(position_start, token_length, "var", 3) = 1 then - result := _lexer_token_kind_var(); - goto .lexer_classify_keyword_end; - end; - if _lexer_compare_keyword(position_start, token_length, "proc", 4) = 1 then - result := _lexer_token_kind_proc(); - goto .lexer_classify_keyword_end; - end; - if _lexer_compare_keyword(position_start, token_length, "type", 4) = 1 then - result := _lexer_token_kind_type(); - goto .lexer_classify_keyword_end; - end; - if _lexer_compare_keyword(position_start, token_length, "begin", 5) = 1 then - result := _lexer_token_kind_begin(); - goto .lexer_classify_keyword_end; - end; - if _lexer_compare_keyword(position_start, token_length, "end", 3) = 1 then - result := _lexer_token_kind_end(); - goto .lexer_classify_keyword_end; - end; - if _lexer_compare_keyword(position_start, token_length, "return", 6) = 1 then - result := _lexer_token_kind_return(); - goto .lexer_classify_keyword_end; - end; - if _lexer_compare_keyword(position_start, token_length, "goto", 4) = 1 then - result := _lexer_token_kind_goto(); - goto .lexer_classify_keyword_end; - end; - if _lexer_compare_keyword(position_start, token_length, "if", 2) = 1 then - result := _lexer_token_kind_if(); - goto .lexer_classify_keyword_end; - end; - if _lexer_compare_keyword(position_start, token_length, "while", 5) = 1 then - result := _lexer_token_kind_while(); - goto .lexer_classify_keyword_end; - end; - if _lexer_compare_keyword(position_start, token_length, "then", 4) = 1 then - result := _lexer_token_kind_then(); - goto .lexer_classify_keyword_end; - end; - if _lexer_compare_keyword(position_start, token_length, "else", 4) = 1 then - result := _lexer_token_kind_else(); - goto .lexer_classify_keyword_end; - end; - if _lexer_compare_keyword(position_start, token_length, "elsif", 5) = 1 then - result := _lexer_token_kind_elsif(); - goto .lexer_classify_keyword_end; - end; - .lexer_classify_keyword_end; - return result -end; - -proc _lexer_classify_finalize(start_position: Word); -var - character: Word; - result: Word; -begin - result := 0; - character := _load_byte(start_position); - - if character = ':' then - result := _lexer_token_kind_colon(); - goto .lexer_classify_finalize_result; - end; - if character = '.' then - result := _lexer_token_kind_dot(); - goto .lexer_classify_finalize_result; - end; - .lexer_classify_finalize_result; - return result -end; - -proc _lexer_classify_single(start_position: Word); -var - character: Word; - result: Word; -begin - result := 0; - character := _load_byte(start_position); - - if character = ';' then - result := _lexer_token_kind_semicolon(); - end; - return result -end; - -proc _lexer_execute_action(action_to_perform: Word, kind: Word); -var - pointer_start: Word; - pointer_end: Word; - position_start: Word; - position_end: Word; - intermediate: Word; -begin - pointer_start := _lexer_global_start(); - position_start := _load_word(pointer_start); - pointer_end := _lexer_global_end(); - position_end := _load_word(pointer_end); - - if action_to_perform = _lexer_action_none() then - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_accumulate() then - _store_word(position_end + 1, pointer_end); - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_skip() then - _store_word(position_start + 1, pointer_start); - _store_word(position_end + 1, pointer_end); - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_single() then - _store_word(position_end + 1, pointer_end); - - intermediate := _lexer_classify_single(position_start); - _store_word(intermediate, kind); - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_eof() then - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_finalize() then - intermediate := _lexer_classify_finalize(position_start); - _store_word(intermediate, kind); - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_composite() then - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_key_id() then - intermediate := _lexer_classify_keyword(position_start, position_end); - _store_word(intermediate, kind); - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_integer() then - goto .action_to_perform_end; - end; - if action_to_perform = _lexer_action_delimited() then - (* _store_word(position_end + 1, pointer_end); *) - goto .action_to_perform_end; - end; - - .action_to_perform_end; -end; - -proc _lexer_execute_transition(kind: Word); -var - next_transition: Word; - next_state: Word; - global_state: Word; - action_to_perform: Word; -begin - next_transition := _lexer_next_transition(); - next_state := _lexer_transition_get_state(next_transition); - action_to_perform := _lexer_transition_get_action(next_transition); - - global_state := _lexer_global_state(); - - _store_word(next_state, global_state); - _lexer_execute_action(action_to_perform, kind); - - return next_state -end; - -proc _lexer_advance_token(kind: Word); -begin - if _lexer_execute_transition(kind) <> _lexer_state_end() then - _lexer_advance_token(kind); - end; -end; - -(* Reads the next token. *) - -(* Returns token length in a0. *) -proc _lexer_read_token(kind: Word); -var - new_position: Word; -begin - _lexer_reset(); - _lexer_advance_token(kind); - - new_position := _lexer_global_end(); - return _load_word(new_position) + -source_code_position -end; - -(* Advances the token stream past the last read token. *) -proc _lexer_skip_token(); -var - new_position: Word; -begin - new_position := _lexer_global_end(); - source_code_position := _load_word(new_position); -end; - -(* Entry point. *) -proc _start(); -var - last_read: Word; - offset: Wort; -begin - _lexer_initialize(); - _symbol_table_build(); - - (* Read the source from the standard input. *) - offset := @source_code; - - .start_read; - (* Second argument is buffer size. Modifying update the source_code definition. *) - last_read := _read_file(offset, 81920); - if last_read > 0 then - offset := offset + last_read; - goto .start_read; - end; - _compile(); - - _exit(0); -end; diff --git a/boot/stage13/cl.elna b/boot/stage13/cl.elna new file mode 100644 index 0000000..f4fa817 --- /dev/null +++ b/boot/stage13/cl.elna @@ -0,0 +1,2870 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, *) +(* v. 2.0. If a copy of the MPL was not distributed with this file, You can *) +(* obtain one at https://mozilla.org/MPL/2.0/. *) + +(* Stage 13 compiler. *) + +(* - Multiline comments. *) +(* - elsif conditions. *) +(* - Optional "begin" if the procedure body is a single return statement. *) + +const + symbol_builtin_name_int := "Int"; + symbol_builtin_name_word := "Word"; + symbol_builtin_name_pointer := "Pointer"; + symbol_builtin_name_char := "Char"; + symbol_builtin_name_bool := "Bool"; + + (* Every type info starts with a word describing what type it is. *) + + (* PRIMITIVE_TYPE = 1 *) + + (* Primitive types have only type size. *) + symbol_builtin_type_int := S(1, 4); + symbol_builtin_type_word := S(1, 4); + symbol_builtin_type_pointer := S(1, 4); + symbol_builtin_type_char := S(1, 1); + symbol_builtin_type_bool := S(1, 1); + + (* Info objects start with a word describing its type. *) + + (* INFO_TYPE = 1 *) + (* INFO_PARAMETER = 2 *) + (* INFO_TEMPORARY = 3 *) + + (* Type info has the type it belongs to. *) + symbol_type_info_int := S(1, @symbol_builtin_type_int); + symbol_type_info_word := S(1, @symbol_builtin_type_word); + symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); + symbol_type_info_char := S(1, @symbol_builtin_type_char); + symbol_type_info_bool := S(1, @symbol_builtin_type_bool); + +var + source_code: Array; + compiler_strings: Array; + symbol_table_global: Array; + symbol_table_local: Array; + classification: Array; + memory: Array; + + compiler_strings_position: Pointer := @compiler_strings; + compiler_strings_length: Word := 0; + label_counter: Word := 0; + source_code_position: Pointer := @source_code; + memory_free_pointer: Word := @memory; + +(* Calculates and returns the string token length between quotes, including the *) +(* escaping slash characters. *) + +(* Parameters: *) +(* string - String token pointer. *) + +(* Returns the length in a0. *) +proc _string_length(string: Word); +var + counter: Word; +begin + (* Reset the counter. *) + counter := 0; + + .string_length_loop; + string := string + 1; + + if _load_byte(string) <> '"' then + counter := counter + 1; + goto .string_length_loop + end; + + return counter +end; + +(* Adds a string to the global, read-only string storage. *) + +(* Parameters: *) +(* string - String token. *) + +(* Returns the offset from the beginning of the storage to the new string in a0. *) +proc _add_string(string: Word); +var + contents: Word; + result: Word; + current_byte: Word; +begin + contents := string + 1; + result := compiler_strings_length; + + .add_string_loop; + if _load_byte(contents) <> '"' then + current_byte := _load_byte(contents); + _store_byte(current_byte, compiler_strings_position); + compiler_strings_position := compiler_strings_position + 1; + contents := contents + 1; + + if current_byte <> '\\' then + compiler_strings_length := compiler_strings_length + 1 + end; + goto .add_string_loop + end; + + return result +end; + +(* Reads standard input into a buffer. *) +(* buffer - Buffer pointer. *) +(* size - Buffer size. *) + +(* Returns the amount of bytes written in a0. *) +proc _read_file(buffer: Word, size: Word); +begin + return _syscall(0, buffer, size, 0, 0, 0, 63) +end; + +(* Writes to the standard output. *) + +(* Parameters: *) +(* buffer - Buffer. *) +(* size - Buffer length. *) +proc _write_s(buffer: Word, size: Word); +begin + _syscall(1, buffer, size, 0, 0, 0, 64); +end; + +(* Writes a number to a string buffer. *) + +(* Parameters: *) +(* number - Whole number. *) +(* output_buffer - Buffer pointer. *) + +(* Sets a0 to the length of the written number. *) +proc _print_i(number: Word, output_buffer: Word); +var + local_buffer: Word; + is_negative: Word; + current_character: Word; + result: Word; +begin + local_buffer := @result + 11; + + if number >= 0 then + is_negative := 0 + else + number = -number; + is_negative := 1 + end; + + .print_i_digit10; + current_character := number % 10; + _store_byte(current_character + '0', local_buffer); + + number := number / 10; + local_buffer := local_buffer + -1; + + if number <> 0 then + goto .print_i_digit10 + end; + if is_negative = 1 then + _store_byte('-', local_buffer); + local_buffer := local_buffer + -1 + end; + result := @result + 11; + result := result + -local_buffer; + _memcpy(output_buffer, local_buffer + 1, result); + + return result +end; + +(* Writes a number to the standard output. *) + +(* Parameters: *) +(* number - Whole number. *) +proc _write_i(number: Word); +var + local_buffer: Word; + length: Word; +begin + length := _print_i(number, @local_buffer); + _write_s(@local_buffer, length); +end; + +(* Writes a character from a0 into the standard output. *) + +(* Parameters: *) +(* character - Character to write. *) +proc _write_c(character: Word); +begin + _write_s(@character, 1); +end; + +(* Write null terminated string. *) + +(* Parameters: *) +(* string - String. *) +proc _write_z(string: Word); +var + next_byte: Word; +begin + (* Check for 0 character. *) + next_byte := _load_byte(string); + + if next_byte <> 0 then + (* Print a character. *) + _write_c(next_byte); + + (* Advance the input string by one byte. *) + _write_z(string + 1) + end; +end; + +(* Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. *) +proc _is_upper(character: Word); +var + lhs: Word; + rhs: Word; +begin + lhs := character >= 'A'; + rhs := character <= 'Z'; + + return lhs & rhs + +end; + +(* Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. *) +proc _is_lower(character: Word); +var + lhs: Word; + rhs: Word; +begin + lhs := character >= 'a'; + rhs := character <= 'z'; + + return lhs & rhs +end; + +(* Detects if the passed character is a 7-bit alpha character or an underscore. *) + +(* Paramters: *) +(* character - Tested character. *) + +(* Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. *) +proc _is_alpha(character: Word); +var + is_upper_result: Word; + is_lower_result: Word; + is_alpha_result: Word; + is_underscore: Word; +begin + is_upper_result := _is_upper(character); + is_lower_result := _is_lower(character); + is_underscore := character = '_'; + + is_alpha_result := is_lower_result or is_upper_result; + return is_alpha_result or is_underscore +end; + +(* Detects whether the passed character is a digit *) +(* (a value between 0 and 9). *) + +(* Parameters: *) +(* character - Exemined value. *) + +(* Sets a0 to 1 if it is a digit, to 0 otherwise. *) +proc _is_digit(character: Word); +var + lhs: Word; + rhs: Word; +begin + lhs := character >= '0'; + rhs := character <= '9'; + + return lhs & rhs +end; + +proc _is_alnum(character: Word); +var + lhs: Word; + rhs: Word; +begin + lhs := _is_alpha(character); + rhs := _is_digit(character); + + return lhs or rhs +end; + +(* Parameters: *) +(* lhs - First pointer. *) +(* rhs - Second pointer. *) +(* count - The length to compare. *) + +(* Returns 0 if memory regions are equal. *) +proc _memcmp(lhs: Word, rhs: Word, count: Word); +var + lhs_byte: Word; + rhs_byte: Word; + result: Word; +begin + result := 0; + + .memcmp_loop; + if count <> 0 then + lhs_byte := _load_byte(lhs); + rhs_byte := _load_byte(rhs); + result := lhs_byte + -rhs_byte; + + lhs := lhs + 1; + rhs := rhs + 1; + count := count + -1; + + if result = 0 then + goto .memcmp_loop + end + end; + + return result +end; + +(* Copies memory. *) + +(* Parameters: *) +(* destination - Destination. *) +(* source - Source. *) +(* count - Size. *) + +(* Returns the destination. *) +proc _memcpy(destination: Word, source: Word, count: Word); +var + current_byte: Word; +begin + .memcpy_loop; + if count <> 0 then + current_byte := _load_byte(source); + _store_byte(current_byte, destination); + + destination := destination + 1; + source := source + 1; + count := count + -1; + goto .memcpy_loop + end; + + return destination +end; + +(* Prints the current token. *) + +(* Parameters: *) +(* length - Token length. *) + +(* Returns a0 unchanged. *) +proc _write_token(length: Word); +begin + _write_s(source_code_position, length); + return length +end; + +proc _compile_integer_literal(); +var + integer_token: Word; + token_kind: Word; +begin + _write_z("\tli t0, \0"); + + integer_token := _lexer_read_token(@token_kind); + _write_token(integer_token); + _lexer_skip_token(); + + _write_c('\n'); +end; + +proc _compile_character_literal(); +var + character: Word; +begin + _write_z("\tli t0, '\0"); + source_code_position := source_code_position + 1; + + character := _load_byte(source_code_position); + if character = '\\' then + _write_c('\\'); + source_code_position := source_code_position + 1 + end; + _write_s(source_code_position, 1); + _write_s("'\n", 2); + source_code_position := source_code_position + 2; +end; + +proc _compile_variable_expression(); +begin + _compile_designator(); + _write_z("\tlw t0, (t0)\n\0"); +end; + +proc _compile_address_expression(); +begin + (* Skip the "@" sign. *) + source_code_position := source_code_position + 1; + _compile_designator(); +end; + +proc _compile_negate_expression(); +begin + (* Skip the "-" sign. *) + source_code_position := source_code_position + 1; + _compile_term(); + + _write_z("\tneg t0, t0\n\0"); +end; + +proc _compile_not_expression(); +begin + (* Skip the "~" sign. *) + source_code_position := source_code_position + 1; + _compile_term(); + + _write_z("\tnot t0, t0\n\0"); +end; + +proc _compile_string_literal(); +var + length: Word; + offset: Word; +begin + length := _string_length(source_code_position); + offset := _add_string(source_code_position); + + source_code_position := source_code_position + length; + source_code_position := source_code_position + 2; + _write_z("\tla t0, strings\n\0"); + + _write_z("\tli t1, \0"); + _write_i(offset); + _write_c('\n'); + + _write_z("\tadd t0, t0, t1\n\0"); +end; + +proc _compile_term(); +var + current_character: Word; +begin + current_character := _load_byte(source_code_position); + + if current_character = '\'' then + _compile_character_literal(); + end; + if current_character = '@' then + _compile_address_expression(); + end; + if current_character = '-' then + _compile_negate_expression(); + end; + if current_character = '~' then + _compile_not_expression(); + end; + if current_character = '"' then + _compile_string_literal(); + end; + if current_character = '_' then + _compile_call(); + _write_z("\nmv t0, a0\n\0"); + end; + if _is_digit(current_character) = 1 then + _compile_integer_literal(); + end; + if _is_lower(current_character) = 1 then + _compile_variable_expression(); + end; +end; + +proc _compile_binary_rhs(); +begin + (* Skip the whitespace after the binary operator. *) + source_code_position := source_code_position + 1; + _compile_term(); + + (* Load the left expression from the stack; *) + _write_z("\tlw t1, 64(sp)\n\0"); +end; + +proc _compile_expression(); +var + current_character: Word; +begin + _compile_term(); + current_character := _load_byte(source_code_position); + + if current_character <> ' ' then + goto .compile_expression_end; + end; + (* It is a binary expression. *) + + (* Save the value of the left expression on the stack. *) + _write_z("sw t0, 64(sp)\n\0"); + + (* Skip surrounding whitespace in front of the operator. *) + source_code_position := source_code_position + 1; + current_character := _load_byte(source_code_position); + + if current_character = '+' then + source_code_position := source_code_position + 1; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("add t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if current_character = '*' then + source_code_position := source_code_position + 1; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tmul t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if current_character = '&' then + source_code_position := source_code_position + 1; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tand t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if current_character = 'o' then + source_code_position := source_code_position + 2; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("or t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if current_character = 'x' then + source_code_position := source_code_position + 3; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("xor t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + if current_character = '=' then + source_code_position := source_code_position + 1; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("xor t0, t0, t1\nseqz t0, t0\n\0"); + + goto .compile_expression_end; + end; + if current_character = '%' then + source_code_position := source_code_position + 1; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("rem t0, t1, t0\n\0"); + + goto .compile_expression_end; + end; + if current_character = '/' then + source_code_position := source_code_position + 1; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("div t0, t1, t0\n\0"); + + goto .compile_expression_end; + end; + if current_character = '<' then + source_code_position := source_code_position + 1; + current_character := _load_byte(source_code_position); + + if current_character = '>' then + source_code_position := source_code_position + 1; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0"); + + goto .compile_expression_end; + end; + if current_character = '=' then + source_code_position := source_code_position + 1; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0"); + + goto .compile_expression_end; + end; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("slt t0, t1, t0\n\0"); + + goto .compile_expression_end; + end; + if current_character = '>' then + source_code_position := source_code_position + 1; + current_character := _load_byte(source_code_position); + if current_character = '=' then + source_code_position := source_code_position + 1; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0"); + + goto .compile_expression_end; + end; + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t0, t1\n\0"); + + goto .compile_expression_end; + end; + + .compile_expression_end; +end; + +proc _compile_call(); +var + name_length: Word; + name: Word; + argument_count: Word; + stack_offset: Word; + token_kind: Word; +begin + name_length := _lexer_read_token(@token_kind); + name := _lexer_global_start(); + name := _load_word(name); + name_length := _lexer_global_end(); + name_length := _load_word(name_length) + -name; + argument_count := 0; + + (* Skip the identifier and left paren. *) + _lexer_skip_token(); + source_code_position := source_code_position + 1; + + if _load_byte(source_code_position) = ')' then + goto .compile_call_finalize; + end; + .compile_call_loop; + _compile_expression(); + + (* Save the argument on the stack. *) + _write_z("\tsw t0, \0"); + + (* Calculate the stack offset: 116 - (4 * argument_counter) *) + stack_offset := argument_count * 4; + _write_i(116 + -stack_offset); + + _write_z("(sp)\n\0"); + + (* Add one to the argument counter. *) + argument_count := argument_count + 1; + + if _load_byte(source_code_position) <> ',' then + goto .compile_call_finalize; + end; + source_code_position := source_code_position + 2; + goto .compile_call_loop; + + .compile_call_finalize; + (* Load the argument from the stack. *) + if argument_count <> 0 then + (* Decrement the argument counter. *) + argument_count := argument_count + -1; + + _write_z("\tlw a\0"); + _write_i(argument_count); + + _write_z(", \0"); + + (* Calculate the stack offset: 116 - (4 * argument_counter) *) + stack_offset := argument_count * 4; + _write_i(116 + -stack_offset); + + _write_z("(sp)\n\0"); + + goto .compile_call_finalize; + end; + + .compile_call_end; + _write_z("\tcall \0"); + _write_s(name, name_length); + + (* Skip the right paren. *) + source_code_position := source_code_position + 1; +end; + +proc _compile_goto(); +var + next_token: Word; + token_kind: Word; +begin + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + source_code_position := source_code_position + 2; + + next_token := _lexer_read_token(@token_kind); + _write_z("\tj .\0"); + + _write_token(next_token); + _lexer_skip_token(); +end; + +proc _compile_local_designator(symbol: Word); +var + variable_offset: Word; +begin + _write_z("\taddi t0, sp, \0"); + variable_offset := _parameter_info_get_offset(symbol); + _write_i(variable_offset); + _write_c('\n'); + _lexer_skip_token(); +end; + +proc _compile_global_designator(); +var + name: Word; + token_kind: Word; +begin + _write_z("\tla t0, \0"); + + name := _lexer_read_token(@token_kind); + _write_token(name); + _lexer_skip_token(); + + _write_c('\n'); +end; + +proc _compile_designator(); +var + name_token: Word; + lookup_result: Word; + token_kind: Word; + name: Word; +begin + name_token := _lexer_read_token(@token_kind); + name := _lexer_global_start(); + name := _load_word(name); + name_token := _lexer_global_end(); + name_token := _load_word(name_token) + -name; + lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token); + + if lookup_result <> 0 then + _compile_local_designator(lookup_result); + goto .compile_designator_end; + end; + _compile_global_designator(); + + .compile_designator_end; +end; + +proc _compile_assignment(); +begin + _compile_designator(); + + (* Save the assignee address on the stack. *) + _write_z("\tsw t0, 60(sp)\n\0"); + + (* Skip the assignment sign (:=) with surrounding whitespaces. *) + source_code_position := source_code_position + 4; + + (* Compile the assignment. *) + _compile_expression(); + + _write_z("\tlw t1, 60(sp)\nsw t0, (t1)\n\0"); +end; + +proc _compile_return_statement(); +var + token_kind: Word; +begin + (* Skip "return" keyword and whitespace after it. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + source_code_position := source_code_position + 1; + _compile_expression(); + + _write_z("\tmv a0, t0\n\0"); +end; + +(* Writes a label, .Ln, where n is a unique number. *) + +(* Parameters: *) +(* counter - Label counter. *) +proc _write_label(counter: Word); +begin + _write_z(".L\0"); + _write_i(counter); +end; + +proc _compile_if(); +var + after_end_label: Word; + condition_label: Word; + token_kind: Word; +begin + (* Skip "if ". *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + source_code_position := source_code_position + 1; + + (* Compile condition. *) + _compile_expression(); + (* Skip " then" with newline. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + after_end_label := label_counter; + label_counter := label_counter + 1; + + (* condition_label is the label in front of the next elsif condition or end. *) + condition_label := label_counter; + label_counter := label_counter + 1; + + _write_z("\tbeqz t0, \0"); + _write_label(condition_label); + _write_c('\n'); + + _compile_procedure_body(); + + _write_z("\tj \0"); + _write_label(after_end_label); + _write_c('\n'); + + _write_label(condition_label); + _write_z(":\n\0"); + + .compile_if_loop; + + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_end() then + goto .compile_if_end; + end; + if token_kind = _lexer_token_kind_else() then + goto .compile_if_else; + end; + if token_kind = _lexer_token_kind_elsif() then + goto .compile_if_elsif; + end; + .compile_if_elsif; + _lexer_skip_token(); + source_code_position := source_code_position + 1; + + (* Compile condition. *) + _compile_expression(); + (* Skip " then" with newline. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + (* condition_label is the label in front of the next elsif condition or end. *) + condition_label := label_counter; + label_counter := label_counter + 1; + + _write_z("\tbeqz t0, \0"); + _write_label(condition_label); + _write_c('\n'); + + _compile_procedure_body(); + + _write_z("\tj \0"); + _write_label(after_end_label); + _write_c('\n'); + + _write_label(condition_label); + _write_z(":\n\0"); + + goto .compile_if_loop; + + .compile_if_else; + _lexer_skip_token(); + _compile_procedure_body(); + + .compile_if_end; + _lexer_skip_token(); + + _write_label(after_end_label); + _write_z(":\n\0"); +end; + +proc _compile_label_declaration(); +var + label_token: Word; + token_kind: Word; + name: Word; +begin + (* Skip the dot. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + label_token := _lexer_read_token(@token_kind); + name := _lexer_global_start(); + name := _load_word(name); + _write_c('.'); + _write_s(name, label_token); + _write_z(":\n\0"); + _lexer_skip_token(); +end; + +proc _compile_statement(); +var + current_byte: Word; + token_kind: Word; +begin + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_goto() then + _compile_goto(); + goto .compile_statement_semicolon; + end; + if token_kind = _lexer_token_kind_if() then + _compile_if(); + goto .compile_statement_semicolon; + end; + if token_kind = _lexer_token_kind_return() then + _compile_return_statement(); + goto .compile_statement_semicolon; + end; + if token_kind = _lexer_token_kind_dot() then + _compile_label_declaration(); + goto .compile_statement_semicolon; + end; + if token_kind = _lexer_token_kind_identifier() then + current_byte := _lexer_global_start(); + current_byte := _load_word(current_byte); + current_byte := _load_byte(current_byte); + + (* This is a call if the statement starts with an underscore. *) + if current_byte = '_' then + _compile_call(); + else + _compile_assignment(); + end; + goto .compile_statement_semicolon; + end; + + .compile_statement_semicolon; + _write_c('\n'); +end; + +proc _compile_procedure_body(); +var + token_kind: Word; +begin + .compile_procedure_body_loop; + + _skip_empty_lines(); + _compile_statement(); + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_semicolon() then + _lexer_skip_token(); + goto .compile_procedure_body_loop; + end; + _skip_empty_lines(); +end; + +(* Writes a regster name to the standard output. *) + +(* Parameters: *) +(* register_character - Register character. *) +(* register_number - Register number. *) +proc _write_register(register_character: Word, register_number: Word); +begin + _write_c(register_character); + register_number := register_number + '0'; + _write_c(register_number); +end; + +proc _skip_spaces(); +var + current_byte: Word; + lhs: Word; + rhs: Word; +begin + current_byte := _load_byte(source_code_position); + lhs := current_byte = '\t'; + rhs := current_byte = ' '; + + if lhs or rhs then + source_code_position := source_code_position + 1; + _skip_spaces(); + end; +end; + +proc _read_type_expression(); +var + type_name: Word; + token_kind: Word; +begin + type_name := _lexer_read_token(@token_kind); + _lexer_skip_token(); +end; + +(* Parameters: *) + +(* parameter_index - Parameter index. *) +proc _parameter_info_create(parameter_index: Word); +var + offset: Word; + current_word: Word; + result: Word; +begin + result := memory_free_pointer; + current_word := result; + (* 2 is INFO_PARAMETER *) + _store_word(2, current_word); + + current_word := current_word + 4; + + (* Calculate the stack offset: 88 - (4 * parameter_counter) *) + offset := parameter_index * 4; + _store_word(88 + -offset, current_word); + + memory_free_pointer := current_word + 4; + + return result +end; + +proc _parameter_info_get_offset(info: Word); +begin + info := info + 4; + return _load_word(info) +end; + +(* Parameters: *) + +(* temporary_index - Parameter index. *) +proc _temporary_info_create(temporary_index: Word); +var + offset: Word; + current_word: Word; + result: Word; +begin + result := memory_free_pointer; + current_word := result; + (* 3 is INFO_TEMPORARY *) + _store_word(3, current_word); + + current_word := current_word + 4; + + (* Calculate the stack offset: 4 * variable_counter. *) + _store_word(temporary_index * 4, current_word); + + memory_free_pointer := current_word + 4; + + return result +end; + +proc _temporary_info_get_offset(info: Word); +begin + info := info + 4; + return _load_word(info) +end; + +(* Parameters: *) + +(* parameter_index - Parameter index. *) +proc _read_procedure_parameter(parameter_index: Word); +var + name_length: Word; + info: Word; + name_position: Word; + token_kind: Word; +begin + (* Read the parameter name. *) + name_position := source_code_position; + name_length := _lexer_read_token(@token_kind); + _lexer_skip_token(); + + (* Skip colon and space in front of the type expression. *) + source_code_position := source_code_position + 2; + + _read_type_expression(); + + _write_z("\tsw a\0"); + _write_i(parameter_index); + _write_z(", \0"); + + info := _parameter_info_create(parameter_index); + _symbol_table_enter(@symbol_table_local, name_position, name_length, info); + + info := _parameter_info_get_offset(info); + _write_i(info); + + _write_z("(sp)\n\0"); +end; + +proc _read_procedure_parameters(); +var + parameter_counter: Word; +begin + (* Skip open paren. *) + source_code_position := source_code_position + 1; + parameter_counter := 0; + + .compile_procedure_prologue_skip; + if _load_byte(source_code_position) <> ')' then + _read_procedure_parameter(parameter_counter); + parameter_counter := parameter_counter + 1; + + if _load_byte(source_code_position) = ',' then + source_code_position := source_code_position + 2; + goto .compile_procedure_prologue_skip; + end; + end; + (* Skip close paren. *) + source_code_position := source_code_position + 1; +end; + +(* Parameters: *) +(* variable_index - Variable index. *) +proc _read_procedure_temporary(variable_index: Word); +var + name_length: Word; + info: Word; + name_position: Word; + token_kind: Word; +begin + _skip_spaces(); + name_position := source_code_position; + + (* Read and skip variable name, colon and the space *) + name_length := _lexer_read_token(@token_kind); + _lexer_skip_token(name_length); + source_code_position := source_code_position + 2; + + _read_type_expression(); + + info := _temporary_info_create(variable_index); + _symbol_table_enter(@symbol_table_local, name_position, name_length, info); + + (* Skip semicolon and newline after the variable declaration *) + source_code_position := source_code_position + 2; +end; + +proc _read_procedure_temporaries(); +var + temporary_counter: Word; +begin + if _memcmp(source_code_position, "var", 3) <> 0 then + goto .read_local_variables_end; + end; + source_code_position := source_code_position + 4; + temporary_counter := 0; + + .read_local_variables_loop; + if _memcmp(source_code_position, "begin", 5) = 0 then + goto .read_local_variables_end; + end; + _read_procedure_temporary(temporary_counter); + + temporary_counter := temporary_counter + 1; + goto .read_local_variables_loop; + + .read_local_variables_end; +end; + +proc _compile_procedure(); +var + name_length: Word; + token_kind: Word; +begin + (* Skip "proc ". *) + source_code_position := source_code_position + 5; + (* Clear local symbol table. *) + _store_word(0, @symbol_table_local); + + name_length := _lexer_read_token(@token_kind); + + (* Write .type _procedure_name, @function. *) + _write_z(".type \0"); + + _write_token(name_length); + _write_z(", @function\n\0"); + + (* Write procedure label, _procedure_name: *) + _write_token(name_length); + _write_z(":\n\0"); + + (* Skip procedure name. *) + _lexer_skip_token(); + _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); + _read_procedure_parameters(); + + (* Skip semicolon and newline. *) + source_code_position := source_code_position + 2; + _read_procedure_temporaries(); + + (* Skip semicolon, "begin" and newline. *) + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_begin() then + _lexer_skip_token(); + _compile_procedure_body(); + end; + if token_kind = _lexer_token_kind_return() then + _compile_return_statement(); + end; + + (* Write the epilogue. *) + _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); + + (* Skip the "end" keyword, semicolon and newline. *) + source_code_position := source_code_position + 5; +end; + +(* Prints and skips a line. *) +proc _skip_comment(); +var + token_kind: Word; +begin + _lexer_read_token(@token_kind); + _lexer_skip_token(); + source_code_position := source_code_position + 1; +end; + +(* Skip newlines and comments. *) +proc _skip_empty_lines(); +var + current_position: Word; + current_byte: Word; +begin + .skip_empty_lines_rerun; + current_position := source_code_position; + + .skip_empty_lines_loop; + current_byte := _load_byte(current_position); + + if current_byte = '\n' then + goto .skip_empty_lines_newline; + end; + if current_byte = '\t' then + goto .skip_empty_lines_tab; + end; + if current_byte <> '(' then + goto .skip_empty_lines_end; + end; + current_byte := _load_byte(current_position + 1); + + if current_byte = '*' then + goto .skip_empty_lines_comment; + end; + goto .skip_empty_lines_end; + + .skip_empty_lines_comment; + source_code_position := current_position; + _skip_comment(); + goto .skip_empty_lines_rerun; + + .skip_empty_lines_newline; + source_code_position := current_position + 1; + goto .skip_empty_lines_rerun; + + .skip_empty_lines_tab; + current_position := current_position + 1; + goto .skip_empty_lines_loop; + + .skip_empty_lines_end; +end; + +proc _compile_global_initializer(); +var + current_byte: Word; + length: Word; + token_kind: Word; +begin + current_byte := _load_byte(source_code_position); + + if current_byte = '"' then + _write_z("\n\t.word strings + \0"); + length := _string_length(source_code_position); + + _add_string(source_code_position); + _write_i(); + + (* Skip the quoted string. *) + source_code_position := source_code_position + length; + source_code_position := source_code_position + 2; + + goto .compile_global_initializer_end; + end; + if current_byte = 'S' then + (* Skip "S(". *) + source_code_position := source_code_position + 2; + + if _load_byte(source_code_position) = ')' then + goto .compile_global_initializer_closing; + end; + goto .compile_global_initializer_loop; + end; + if current_byte = '@' then + (* Skip @. *) + source_code_position := source_code_position + 1; + _write_z("\n\t.word \0"); + current_byte := _lexer_read_token(@token_kind); + _write_token(current_byte); + _lexer_skip_token(); + + goto .compile_global_initializer_end; + end; + if _is_digit(current_byte) = 1 then + _write_z("\n\t.word \0"); + current_byte := _lexer_read_token(@token_kind); + _write_token(current_byte); + source_code_position := source_code_position + 1; + + goto .compile_global_initializer_end; + end; + + .compile_global_initializer_loop; + _compile_global_initializer(); + + if _load_byte(source_code_position) <> ')' then + (* Skip comma and whitespace after it. *) + source_code_position := source_code_position + 2; + + goto .compile_global_initializer_loop; + end; + + .compile_global_initializer_closing; + (* Skip ")" *) + source_code_position := source_code_position + 1; + + goto .compile_global_initializer_end; + + .compile_global_initializer_end; +end; + +proc _compile_constant_declaration(); +var + name_length: Word; + token_kind: Word; +begin + name_length := _lexer_read_token(@token_kind); + + _write_z(".type \0"); + _write_token(name_length); + _write_z(", @object\n\0"); + + _write_token(name_length); + _write_c(':'); + + (* Skip the constant name with assignment sign and surrounding whitespaces. *) + _lexer_skip_token(); + source_code_position := source_code_position + 4; + _compile_global_initializer(); + (* Skip semicolon and newline. *) + source_code_position := source_code_position + 2; + _write_c('\n'); +end; + +proc _compile_const_part(); +var + token_kind: Word; +begin + _skip_empty_lines(); + _lexer_read_token(@token_kind); + + if token_kind <> _lexer_token_kind_const() then + goto .compile_const_part_end; + end; + (* Skip "const" with the newline after it. *) + _lexer_skip_token(); + _write_z(".section .rodata # Compiled from const section.\n\n\0"); + + .compile_const_part_loop; + _skip_empty_lines(); + + (* If the character at the line beginning is not indentation, *) + (* it is probably the next code section. *) + if _load_byte(source_code_position) = '\t' then + source_code_position := source_code_position + 1; + _compile_constant_declaration(); + goto .compile_const_part_loop; + end; + + .compile_const_part_end; +end; + +proc _compile_variable_declaration(); +var + name_length: Word; + token_kind: Word; +begin + name_length := _lexer_read_token(@token_kind); + + _write_z(".type \0"); + _write_token(name_length); + _write_z(", @object\n\0"); + + _write_token(name_length); + _write_c(':'); + + (* Skip the variable name and colon with space before the type. *) + _lexer_skip_token(); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + _read_type_expression(); + + if _load_byte(source_code_position) <> ' ' then + (* Else we assume this is a zeroed 81920 bytes big array. *) + _write_z(" .zero 81920\0"); + else + (* Skip the assignment sign with surrounding whitespaces. *) + source_code_position := source_code_position + 4; + _compile_global_initializer(); + end; + + (* Skip semicolon and newline. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + _write_c('\n'); +end; + +proc _compile_var_part(); +var + token_kind: Word; +begin + _lexer_read_token(@token_kind); + + if token_kind <> _lexer_token_kind_var() then + goto .compile_var_part_end; + end; + (* Skip "var" and newline. *) + _lexer_skip_token(); + _write_z(".section .data\n\0"); + + .compile_var_part_loop; + _skip_empty_lines(); + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_identifier() then + _compile_variable_declaration(); + goto .compile_var_part_loop; + end; + + .compile_var_part_end; +end; + +(* Process the source code and print the generated code. *) +proc _compile_module(); +begin + _compile_const_part(); + _skip_empty_lines(); + _compile_var_part(); + + _write_z(".section .text\n\n\0"); + _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); + _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); + _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); + _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); + _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); + + .compile_module_loop; + _skip_empty_lines(); + + if _load_byte(source_code_position) <> 0 then + (* 5 is "proc " length. Space is needed to distinguish from "procedure". *) + if _memcmp(source_code_position, "proc ", 5) = 0 then + _compile_procedure(); + goto .compile_module_loop; + end; + end; + .compile_module_end; +end; + +proc _compile(); +var + compiler_strings_copy: Word; + compiler_strings_end: Word; + current_byte: Word; +begin + _write_z(".globl _start\n\n\0"); + _compile_module(); + + _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); + _write_c('"'); + + compiler_strings_copy := @compiler_strings; + compiler_strings_end := compiler_strings_position; + + .compile_loop; + if compiler_strings_copy < compiler_strings_end then + current_byte := _load_byte(compiler_strings_copy); + compiler_strings_copy := compiler_strings_copy + 1; + _write_c(current_byte); + + goto .compile_loop; + end; + _write_c('"'); + _write_c('\n'); +end; + +(* Terminates the program. a0 contains the return code. *) + +(* Parameters: *) +(* a0 - Status code. *) +proc _exit(); +begin + _syscall(0, 0, 0, 0, 0, 0, 93); +end; + +(* Looks for a symbol in the given symbol table. *) + +(* Parameters: *) +(* symbol_table - Symbol table. *) +(* symbol_name - Symbol name pointer. *) +(* name_length - Symbol name length. *) + +(* Returns the symbol pointer or 0 in a0. *) +proc _symbol_table_lookup(symbol_table: Word, symbol_name: Word, name_length: Word); +var + result: Word; + symbol_table_length: Word; + current_name: Word; + current_length: Word; +begin + result := 0; + + (* The first word in the symbol table is its length, get it. *) + symbol_table_length := _load_word(symbol_table); + + (* Go to the first symbol position. *) + symbol_table := symbol_table + 4; + + .symbol_table_lookup_loop; + if symbol_table_length = 0 then + goto .symbol_table_lookup_end; + end; + + (* Symbol name pointer and length. *) + current_name := _load_word(symbol_table); + current_length := _load_word(symbol_table + 4); + + (* If lengths don't match, exit and return nil. *) + if name_length <> current_length then + goto .symbol_table_lookup_repeat; + end; + (* If names don't match, exit and return nil. *) + if _memcmp(symbol_name, current_name, name_length) <> 0 then + goto .symbol_table_lookup_repeat; + end; + (* Otherwise, the symbol is found. *) + result := _load_word(symbol_table + 8); + goto .symbol_table_lookup_end; + + .symbol_table_lookup_repeat; + symbol_table := symbol_table + 12; + symbol_table_length := symbol_table_length + -1; + goto .symbol_table_lookup_loop; + + .symbol_table_lookup_end; + return result +end; + +(* Inserts a symbol into the table. *) + +(* Parameters: *) +(* symbol_table - Symbol table. *) +(* symbol_name - Symbol name pointer. *) +(* name_length - Symbol name length. *) +(* symbol - Symbol pointer. *) +proc _symbol_table_enter(symbol_table: Word, symbol_name: Word, name_length: Word, symbol: Word); +var + table_length: Word; + symbol_pointer: Word; +begin + (* The first word in the symbol table is its length, get it. *) + table_length := _load_word(symbol_table); + + (* Calculate the offset for the new symbol. *) + symbol_pointer := table_length * 12; + symbol_pointer := symbol_pointer + 4; + symbol_pointer := symbol_table + symbol_pointer; + + _store_word(symbol_name, symbol_pointer); + symbol_pointer := symbol_pointer + 4; + _store_word(name_length, symbol_pointer); + symbol_pointer := symbol_pointer + 4; + _store_word(symbol, symbol_pointer); + + (* Increment the symbol table length. *) + table_length := table_length + 1; + _store_word(table_length, symbol_table); +end; + +proc _symbol_table_build(); +begin + (* Set the table length to 0. *) + _store_word(0, @symbol_table_global); + + (* Enter built-in symbols. *) + _symbol_table_enter(@symbol_table_global, symbol_builtin_name_int, 3, @symbol_type_info_int); + _symbol_table_enter(@symbol_table_global, symbol_builtin_name_word, 4, @symbol_type_info_word); + _symbol_table_enter(@symbol_table_global, symbol_builtin_name_pointer, 7, @symbol_type_info_pointer); + _symbol_table_enter(@symbol_table_global, symbol_builtin_name_char, 4, @symbol_type_info_char); + _symbol_table_enter(@symbol_table_global, symbol_builtin_name_bool, 4, @symbol_type_info_bool); +end; + + +(* Classification table assigns each possible character to a group (class). All *) +(* characters of the same group a handled equivalently. *) + +(* Transition = record *) +(* action: TransitionAction; *) +(* next_state: TransitionState *) +(* end; *) + +proc _lexer_class_invalid(); +begin + return 1 +end; + +proc _lexer_class_digit(); +begin + return 2 +end; + +proc _lexer_class_alpha(); +begin + return 3 +end; + +proc _lexer_class_space(); +begin + return 4 +end; + +proc _lexer_class_colon(); +begin + return 5 +end; + +proc _lexer_class_equals(); +begin + return 6 +end; + +proc _lexer_class_left_paren(); +begin + return 7 +end; + +proc _lexer_class_right_paren(); +begin + return 8 +end; + +proc _lexer_class_asterisk(); +begin + return 9 +end; + +proc _lexer_class_underscore(); +begin + return 10 +end; + +proc _lexer_class_single(); +begin + return 11 +end; + +proc _lexer_class_hex(); +begin + return 12 +end; + +proc _lexer_class_zero(); +begin + return 13 +end; + +proc _lexer_class_x(); +begin + return 14 +end; + +proc _lexer_class_eof(); +begin + return 15 +end; + +proc _lexer_class_dot(); +begin + return 16 +end; + +proc _lexer_class_minus(); +begin + return 17 +end; + +proc _lexer_class_single_quote(); +begin + return 18 +end; + +proc _lexer_class_double_quote(); +begin + return 19 +end; + +proc _lexer_class_greater(); +begin + return 20 +end; + +proc _lexer_class_less(); +begin + return 21 +end; + +proc _lexer_class_other(); +begin + return 22 +end; + +proc _lexer_state_start(); +begin + return 1 +end; + +proc _lexer_state_colon(); +begin + return 2 +end; + +proc _lexer_state_identifier(); +begin + return 3 +end; + +proc _lexer_state_decimal(); +begin + return 4 +end; + +proc _lexer_state_greater(); +begin + return 5 +end; + +proc _lexer_state_minus(); +begin + return 6 +end; + +proc _lexer_state_left_paren(); +begin + return 7 +end; + +proc _lexer_state_less(); +begin + return 8 +end; + +proc _lexer_state_dot(); +begin + return 9 +end; + +proc _lexer_state_comment(); +begin + return 10 +end; + +proc _lexer_state_closing_comment(); +begin + return 11 +end; + +proc _lexer_state_character(); +begin + return 12 +end; + +proc _lexer_state_string(); +begin + return 13 +end; + +proc _lexer_state_leading_zero(); +begin + return 14 +end; + +proc _lexer_state_decimal_suffix(); +begin + return 15 +end; + +proc _lexer_state_end(); +begin + return 16 +end; + +proc _lexer_action_none(); +begin + return 1 +end; + +proc _lexer_action_accumulate(); +begin + return 2 +end; + +proc _lexer_action_skip(); +begin + return 3 +end; + +proc _lexer_action_single(); +begin + return 4 +end; + +proc _lexer_action_eof(); +begin + return 5 +end; + +proc _lexer_action_finalize(); +begin + return 6 +end; + +proc _lexer_action_composite(); +begin + return 7 +end; + +proc _lexer_action_key_id(); +begin + return 8 +end; + +proc _lexer_action_integer(); +begin + return 9 +end; + +proc _lexer_action_delimited(); +begin + return 10 +end; + +(* Assigns some value to at array index. *) + +(* Parameters: *) +(* array - Array pointer. *) +(* index - Index (word offset into the array). *) +(* data - Data to assign. *) +proc _assign_at(array: Word, index: Word, data: Word); +var + target: Word; +begin + target := index + -1; + target := target * 4; + target := array + target; + + _store_word(data, target); +end; + +proc _get_at(array: Word, index: Word); +var + target: Word; +begin + target := index + -1; + target := target * 4; + target := array + target; + + return _load_word(target) +end; + +(* Initializes the array with character classes. *) +proc _lexer_classifications(); +var + code: Word; +begin + _assign_at(@classification, 1, 15); + _assign_at(@classification, 2, 1); + _assign_at(@classification, 3, 1); + _assign_at(@classification, 4, 1); + _assign_at(@classification, 5, 1); + _assign_at(@classification, 6, 1); + _assign_at(@classification, 7, 1); + _assign_at(@classification, 8, 1); + _assign_at(@classification, 9, 1); + _assign_at(@classification, 10, 4); + _assign_at(@classification, 11, 4); + _assign_at(@classification, 12, 1); + _assign_at(@classification, 13, 1); + _assign_at(@classification, 14, 4); + _assign_at(@classification, 15, 1); + _assign_at(@classification, 16, 1); + _assign_at(@classification, 17, 1); + _assign_at(@classification, 18, 1); + _assign_at(@classification, 19, 1); + _assign_at(@classification, 20, 1); + _assign_at(@classification, 21, 1); + _assign_at(@classification, 22, 1); + _assign_at(@classification, 23, 1); + _assign_at(@classification, 24, 1); + _assign_at(@classification, 25, 1); + _assign_at(@classification, 26, 1); + _assign_at(@classification, 27, 1); + _assign_at(@classification, 28, 1); + _assign_at(@classification, 29, 1); + _assign_at(@classification, 30, 1); + _assign_at(@classification, 31, 1); + _assign_at(@classification, 32, 1); + _assign_at(@classification, 33, 4); + _assign_at(@classification, 34, 11); + _assign_at(@classification, 35, 19); + _assign_at(@classification, 36, 22); + _assign_at(@classification, 37, 22); + _assign_at(@classification, 38, 11); + _assign_at(@classification, 39, 11); + _assign_at(@classification, 40, 18); + _assign_at(@classification, 41, 7); + _assign_at(@classification, 42, 8); + _assign_at(@classification, 43, 9); + _assign_at(@classification, 44, 11); + _assign_at(@classification, 45, 11); + _assign_at(@classification, 46, 17); + _assign_at(@classification, 47, 16); + _assign_at(@classification, 48, 11); + _assign_at(@classification, 49, 13); + _assign_at(@classification, 50, 2); + _assign_at(@classification, 51, 2); + _assign_at(@classification, 52, 2); + _assign_at(@classification, 53, 2); + _assign_at(@classification, 54, 2); + _assign_at(@classification, 55, 2); + _assign_at(@classification, 56, 2); + _assign_at(@classification, 57, 2); + _assign_at(@classification, 58, 2); + _assign_at(@classification, 59, 5); + _assign_at(@classification, 60, 11); + _assign_at(@classification, 61, 21); + _assign_at(@classification, 62, 6); + _assign_at(@classification, 63, 20); + _assign_at(@classification, 64, 22); + _assign_at(@classification, 65, 11); + _assign_at(@classification, 66, 3); + _assign_at(@classification, 67, 3); + _assign_at(@classification, 68, 3); + _assign_at(@classification, 69, 3); + _assign_at(@classification, 70, 3); + _assign_at(@classification, 71, 3); + _assign_at(@classification, 72, 3); + _assign_at(@classification, 73, 3); + _assign_at(@classification, 74, 3); + _assign_at(@classification, 75, 3); + _assign_at(@classification, 76, 3); + _assign_at(@classification, 77, 3); + _assign_at(@classification, 78, 3); + _assign_at(@classification, 79, 3); + _assign_at(@classification, 80, 3); + _assign_at(@classification, 81, 3); + _assign_at(@classification, 82, 3); + _assign_at(@classification, 83, 3); + _assign_at(@classification, 84, 3); + _assign_at(@classification, 85, 3); + _assign_at(@classification, 86, 3); + _assign_at(@classification, 87, 3); + _assign_at(@classification, 88, 3); + _assign_at(@classification, 89, 3); + _assign_at(@classification, 90, 3); + _assign_at(@classification, 91, 3); + _assign_at(@classification, 92, 11); + _assign_at(@classification, 93, 22); + _assign_at(@classification, 94, 11); + _assign_at(@classification, 95, 11); + _assign_at(@classification, 96, 10); + _assign_at(@classification, 97, 22); + _assign_at(@classification, 98, 12); + _assign_at(@classification, 99, 12); + _assign_at(@classification, 100, 12); + _assign_at(@classification, 101, 12); + _assign_at(@classification, 102, 12); + _assign_at(@classification, 103, 12); + _assign_at(@classification, 104, 3); + _assign_at(@classification, 105, 3); + _assign_at(@classification, 106, 3); + _assign_at(@classification, 107, 3); + _assign_at(@classification, 108, 3); + _assign_at(@classification, 109, 3); + _assign_at(@classification, 110, 3); + _assign_at(@classification, 111, 3); + _assign_at(@classification, 112, 3); + _assign_at(@classification, 113, 3); + _assign_at(@classification, 114, 3); + _assign_at(@classification, 115, 3); + _assign_at(@classification, 116, 3); + _assign_at(@classification, 117, 3); + _assign_at(@classification, 118, 3); + _assign_at(@classification, 119, 3); + _assign_at(@classification, 120, 3); + _assign_at(@classification, 121, 14); + _assign_at(@classification, 122, 3); + _assign_at(@classification, 123, 3); + _assign_at(@classification, 124, 22); + _assign_at(@classification, 125, 11); + _assign_at(@classification, 126, 22); + _assign_at(@classification, 127, 11); + _assign_at(@classification, 128, 1); + + code := 129; + + (* Set the remaining 129 - 256 bytes to transitionClassOther. *) + .create_classification_loop; + _assign_at(@classification, code, 22); + code := code + 1; + + if code < 257 then + goto .create_classification_loop; + end; +end; + +proc _lexer_get_transition(current_state: Word, character_class: Word); +var + transition_table: Word; + row_position: Word; + column_position: Word; + target: Word; +begin + (* Each state is 8 bytes long (2 words: action and next state). *) + (* There are 22 character classes, so a transition row 8 * 22 = 176 bytes long. *) + row_position := current_state + -1; + row_position := row_position * 176; + + column_position := character_class + -1; + column_position := column_position * 8; + + target := _lexer_get_transition_table() + row_position; + + return target + column_position +end; + +(* Parameters: *) +(* current_state - First index into transitions table. *) +(* character_class - Second index into transitions table. *) +(* action - Action to assign. *) +(* next_state - Next state to assign. *) +proc _lexer_set_transition(current_state: Word, character_class: Word, action: Word, next_state: Word); +var + transition: Word; +begin + transition := _lexer_get_transition(current_state, character_class); + + _lexer_transition_set_action(transition, action); + _lexer_transition_set_state(transition, next_state); +end; + +(* Sets same action and state transition for all character classes in one transition row. *) + +(* Parameters: *) +(* current_state - Current state (Transition state enumeration). *) +(* default_action - Default action (Callback). *) +(* next_state - Next state (Transition state enumeration). *) +proc _lexer_default_transition(current_state: Word, default_action: Word, next_state: Word); +begin + _lexer_set_transition(current_state, _lexer_class_invalid(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_digit(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_alpha(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_space(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_colon(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_equals(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_left_paren(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_right_paren(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_asterisk(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_underscore(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_single(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_hex(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_zero(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_x(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_eof(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_dot(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_minus(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_single_quote(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_double_quote(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_greater(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_less(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_other(), default_action, next_state); +end; + + +(* The transition table describes transitions from one state to another, given *) +(* a symbol (character class). *) + +(* The table has m rows and n columns, where m is the amount of states and n is *) +(* the amount of classes. So given the current state and a classified character *) +(* the table can be used to look up the next state. *) +proc _lexer_transitions(); +begin + (* Start state. *) + _lexer_set_transition(_lexer_state_start(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_decimal()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_identifier()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_space(), _lexer_action_skip(), _lexer_state_start()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_colon(), _lexer_action_accumulate(), _lexer_state_greater()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_equals(), _lexer_action_single(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_left_paren(), _lexer_action_accumulate(), _lexer_state_left_paren()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_right_paren(), _lexer_action_single(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_asterisk(), _lexer_action_single(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_underscore(), _lexer_action_accumulate(), _lexer_state_identifier()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_single(), _lexer_action_single(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_identifier()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_leading_zero()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_identifier()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_eof(), _lexer_action_eof(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_dot(), _lexer_action_accumulate(), _lexer_state_dot()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_minus(), _lexer_action_accumulate(), _lexer_state_minus()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_single_quote(), _lexer_action_accumulate(), _lexer_state_character()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_double_quote(), _lexer_action_accumulate(), _lexer_state_string()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_greater(), _lexer_action_accumulate(), _lexer_state_greater()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_less(), _lexer_action_accumulate(), _lexer_state_less()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_other(), _lexer_action_none(), _lexer_state_end()); + + (* Colon state. *) + _lexer_default_transition(_lexer_state_colon(), _lexer_action_finalize(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_colon(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end()); + + (* Identifier state. *) + _lexer_default_transition(_lexer_state_identifier(), _lexer_action_key_id(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_identifier(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_identifier()); + _lexer_set_transition(_lexer_state_identifier(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_identifier()); + _lexer_set_transition(_lexer_state_identifier(), _lexer_class_underscore(), _lexer_action_accumulate(), _lexer_state_identifier()); + _lexer_set_transition(_lexer_state_identifier(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_identifier()); + _lexer_set_transition(_lexer_state_identifier(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_identifier()); + _lexer_set_transition(_lexer_state_identifier(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_identifier()); + + (* Decimal state. *) + _lexer_default_transition(_lexer_state_decimal(), _lexer_action_integer(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_decimal(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_decimal()); + _lexer_set_transition(_lexer_state_decimal(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_decimal_suffix()); + _lexer_set_transition(_lexer_state_decimal(), _lexer_class_underscore(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_decimal(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_decimal_suffix()); + _lexer_set_transition(_lexer_state_decimal(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_decimal()); + _lexer_set_transition(_lexer_state_decimal(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_decimal_suffix()); + + (* Greater state. *) + _lexer_default_transition(_lexer_state_greater(), _lexer_action_finalize(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_greater(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end()); + + (* Minus state. *) + _lexer_default_transition(_lexer_state_minus(), _lexer_action_finalize(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_minus(), _lexer_class_greater(), _lexer_action_composite(), _lexer_state_end()); + + (* Left paren state. *) + _lexer_default_transition(_lexer_state_left_paren(), _lexer_action_finalize(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_left_paren(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_comment()); + + (* Less state. *) + _lexer_default_transition(_lexer_state_less(), _lexer_action_finalize(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_less(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_less(), _lexer_class_greater(), _lexer_action_composite(), _lexer_state_end()); + + (* Hexadecimal after 0x. *) + _lexer_default_transition(_lexer_state_dot(), _lexer_action_finalize(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_dot(), _lexer_class_dot(), _lexer_action_composite(), _lexer_state_end()); + + (* Comment. *) + _lexer_default_transition(_lexer_state_comment(), _lexer_action_accumulate(), _lexer_state_comment()); + _lexer_set_transition(_lexer_state_comment(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_closing_comment()); + _lexer_set_transition(_lexer_state_comment(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); + + (* Closing comment. *) + _lexer_default_transition(_lexer_state_closing_comment(), _lexer_action_accumulate(), _lexer_state_comment()); + _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_right_paren(), _lexer_action_delimited(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_closing_comment()); + _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); + + (* Character. *) + _lexer_default_transition(_lexer_state_character(), _lexer_action_accumulate(), _lexer_state_character()); + _lexer_set_transition(_lexer_state_character(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_character(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_character(), _lexer_class_single_quote(), _lexer_action_delimited(), _lexer_state_end()); + + (* String. *) + _lexer_default_transition(_lexer_state_string(), _lexer_action_accumulate(), _lexer_state_string()); + _lexer_set_transition(_lexer_state_string(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_string(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_string(), _lexer_class_double_quote(), _lexer_action_delimited(), _lexer_state_end()); + + (* Leading zero. *) + _lexer_default_transition(_lexer_state_leading_zero(), _lexer_action_integer(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_digit(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_alpha(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_underscore(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_hex(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_zero(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_x(), _lexer_action_none(), _lexer_state_end()); + + (* Digit with a character suffix. *) + _lexer_default_transition(_lexer_state_decimal_suffix(), _lexer_action_integer(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_digit(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_alpha(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_hex(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_zero(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_x(), _lexer_action_none(), _lexer_state_end()); +end; + +(* Transition table is saved after character classification table. *) +(* Each character entry is 1 word long and there are 256 characters. *) +(* 1024 = 256 * 4 *) +proc _lexer_get_transition_table(); +begin + return @classification + 1024 +end; + +(* Lexer state is saved after the transition tables. *) +(* Each transition table entry is 8 bytes long. The table has 16 rows (transition states) *) +(* and 22 columns (character classes), so 2816 = 8 * 16 * 22. *) +proc _lexer_global_state(); +begin + return _lexer_get_transition_table() + 2816 +end; + +(* Gets pointer to the token start. *) +proc _lexer_global_start(); +begin + return _lexer_global_state() + 4 +end; + +(* Gets pointer to the token end. *) +proc _lexer_global_end(); +begin + return _lexer_global_start() + 4 +end; + +proc _lexer_transition_get_action(transition: Word); +begin + return _load_word(transition) +end; + +proc _lexer_transition_set_action(transition: Word, action: Word); +begin + _store_word(action, transition); +end; + +proc _lexer_transition_get_state(transition: Word); +begin + return _load_word(transition + 4) +end; + +proc _lexer_transition_set_state(transition: Word, state: Word); +begin + _store_word(state, transition + 4); +end; + +(* Resets the lexer state for reading the next token. *) +proc _lexer_reset(); +var + state: Word; + current: Word; +begin + (* Transition start state is 1. *) + state := _lexer_global_state(); + _store_word(_lexer_state_start(), state); + + current := _lexer_global_start(); + _store_word(source_code_position, current); + + current := _lexer_global_end(); + _store_word(source_code_position, current); +end; + +(* One time lexer initialization. *) +proc _lexer_initialize(); +begin + _lexer_classifications(); + _lexer_transitions(); +end; + +proc _lexer_next_transition(); +var + current_character: Word; + character_class: Word; + current_state: Word; +begin + current_character := _lexer_global_end(); + current_character := _load_word(current_character); + current_character := _load_byte(current_character); + + character_class := _get_at(@classification, current_character + 1); + + current_state := _lexer_global_state(); + current_state := _load_word(current_state); + + return _lexer_get_transition(current_state, character_class) +end; + +proc _lexer_token_kind_identifier(); +begin + return 1 +end; + +proc _lexer_token_kind_const(); +begin + return 2 +end; + +proc _lexer_token_kind_var(); +begin + return 3 +end; + +proc _lexer_token_kind_proc(); +begin + return 4 +end; + +proc _lexer_token_kind_type(); +begin + return 5 +end; + +proc _lexer_token_kind_begin(); +begin + return 6 +end; + +proc _lexer_token_kind_end(); +begin + return 7 +end; + +proc _lexer_token_kind_if(); +begin + return 8 +end; + +proc _lexer_token_kind_then(); +begin + return 9 +end; + +proc _lexer_token_kind_else(); +begin + return 10 +end; + +proc _lexer_token_kind_elsif(); +begin + return 11 +end; + +proc _lexer_token_kind_while(); +begin + return 12 +end; + +proc _lexer_token_kind_do(); +begin + return 13 +end; + +proc _lexer_token_kind_extern(); +begin + return 14 +end; + +proc _lexer_token_kind_record(); +begin + return 15 +end; + +proc _lexer_token_kind_union(); +begin + return 16 +end; + +proc _lexer_token_kind_true(); +begin + return 17 +end; + +proc _lexer_token_kind_false(); +begin + return 18 +end; + +proc _lexer_token_kind_nil(); +begin + return 19 +end; + +proc _lexer_token_kind_and(); +begin + return 20 +end; + +proc _lexer_token_kind_or(); +begin + return 21 +end; + +proc _lexer_token_kind_xor(); +begin + return 22 +end; + +proc _lexer_token_kind_pipe(); +begin + return 23 +end; + +proc _lexer_token_kind_not(); +begin + return 24 +end; + +proc _lexer_token_kind_return(); +begin + return 24 +end; + +proc _lexer_token_kind_module(); +begin + return 25 +end; + +proc _lexer_token_kind_program(); +begin + return 26 +end; + +proc _lexer_token_kind_import(); +begin + return 27 +end; + +proc _lexer_token_kind_cast(); +begin + return 28 +end; + +proc _lexer_token_kind_defer(); +begin + return 29 +end; + +proc _lexer_token_kind_case(); +begin + return 30 +end; + +proc _lexer_token_kind_of(); +begin + return 31 +end; + +proc _lexer_token_kind_trait(); +begin + return 32 +end; + +proc _lexer_token_kind_left_paren(); +begin + return 33 +end; + +proc _lexer_token_kind_right_paren(); +begin + return 34 +end; + +proc _lexer_token_kind_left_square(); +begin + return 35 +end; + +proc _lexer_token_kind_right_square(); +begin + return 36 +end; + +proc _lexer_token_kind_shift_left(); +begin + return 37 +end; + +proc _lexer_token_kind_shift_right(); +begin + return 38 +end; + +proc _lexer_token_kind_greater_equal(); +begin + return 39 +end; + +proc _lexer_token_kind_less_equal(); +begin + return 40 +end; + +proc _lexer_token_kind_greater_than(); +begin + return 41 +end; + +proc _lexer_token_kind_less_than(); +begin + return 42 +end; + +proc _lexer_token_kind_not_equal(); +begin + return 43 +end; + +proc _lexer_token_kind_equals(); +begin + return 44 +end; + +proc _lexer_token_kind_semicolon(); +begin + return 45 +end; + +proc _lexer_token_kind_dot(); +begin + return 46 +end; + +proc _lexer_token_kind_comma(); +begin + return 47 +end; + +proc _lexer_token_kind_plus(); +begin + return 48 +end; + +proc _lexer_token_kind_arrow(); +begin + return 49 +end; + +proc _lexer_token_kind_minus(); +begin + return 50 +end; + +proc _lexer_token_kind_multiplication(); +begin + return 51 +end; + +proc _lexer_token_kind_division(); +begin + return 52 +end; + +proc _lexer_token_kind_remainder(); +begin + return 53 +end; + +proc _lexer_token_kind_assignment(); +begin + return 54 +end; + +proc _lexer_token_kind_colon(); +begin + return 55 +end; + +proc _lexer_token_kind_hat(); +begin + return 56 +end; + +proc _lexer_token_kind_at(); +begin + return 57 +end; + +proc _lexer_token_kind_exclamation(); +begin + return 58 +end; + +proc _lexer_token_kind_string(); +begin + return 59 +end; + +proc _lexer_token_kind_character(); +begin + return 60 +end; + +proc _lexer_token_kind_integer(); +begin + return 61 +end; + +proc _lexer_token_kind_word(); +begin + return 62 +end; + +proc _lexer_token_kind_goto(); +begin + return 63 +end; + +proc _lexer_compare_keyword(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word); +var + result: Word; +begin + result := 0; + + if lhs_length = rhs_length then + result := _memcmp(lhs_pointer, rhs_pointer, lhs_length) = 0; + end; + return result +end; + +proc _lexer_classify_keyword(position_start: Word, position_end: Word); +var + result: Word; + token_length: Word; +begin + result := _lexer_token_kind_identifier(); + token_length := position_end + -position_start; + + if _lexer_compare_keyword(position_start, token_length, "const", 5) = 1 then + result := _lexer_token_kind_const(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "var", 3) = 1 then + result := _lexer_token_kind_var(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "proc", 4) = 1 then + result := _lexer_token_kind_proc(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "type", 4) = 1 then + result := _lexer_token_kind_type(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "begin", 5) = 1 then + result := _lexer_token_kind_begin(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "end", 3) = 1 then + result := _lexer_token_kind_end(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "return", 6) = 1 then + result := _lexer_token_kind_return(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "goto", 4) = 1 then + result := _lexer_token_kind_goto(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "if", 2) = 1 then + result := _lexer_token_kind_if(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "while", 5) = 1 then + result := _lexer_token_kind_while(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "then", 4) = 1 then + result := _lexer_token_kind_then(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "else", 4) = 1 then + result := _lexer_token_kind_else(); + goto .lexer_classify_keyword_end; + end; + if _lexer_compare_keyword(position_start, token_length, "elsif", 5) = 1 then + result := _lexer_token_kind_elsif(); + goto .lexer_classify_keyword_end; + end; + .lexer_classify_keyword_end; + return result +end; + +proc _lexer_classify_finalize(start_position: Word); +var + character: Word; + result: Word; +begin + result := 0; + character := _load_byte(start_position); + + if character = ':' then + result := _lexer_token_kind_colon(); + goto .lexer_classify_finalize_result; + end; + if character = '.' then + result := _lexer_token_kind_dot(); + goto .lexer_classify_finalize_result; + end; + .lexer_classify_finalize_result; + return result +end; + +proc _lexer_classify_single(start_position: Word); +var + character: Word; + result: Word; +begin + result := 0; + character := _load_byte(start_position); + + if character = ';' then + result := _lexer_token_kind_semicolon(); + end; + return result +end; + +proc _lexer_execute_action(action_to_perform: Word, kind: Word); +var + pointer_start: Word; + pointer_end: Word; + position_start: Word; + position_end: Word; + intermediate: Word; +begin + pointer_start := _lexer_global_start(); + position_start := _load_word(pointer_start); + pointer_end := _lexer_global_end(); + position_end := _load_word(pointer_end); + + if action_to_perform = _lexer_action_none() then + goto .action_to_perform_end; + end; + if action_to_perform = _lexer_action_accumulate() then + _store_word(position_end + 1, pointer_end); + goto .action_to_perform_end; + end; + if action_to_perform = _lexer_action_skip() then + _store_word(position_start + 1, pointer_start); + _store_word(position_end + 1, pointer_end); + goto .action_to_perform_end; + end; + if action_to_perform = _lexer_action_single() then + _store_word(position_end + 1, pointer_end); + + intermediate := _lexer_classify_single(position_start); + _store_word(intermediate, kind); + goto .action_to_perform_end; + end; + if action_to_perform = _lexer_action_eof() then + goto .action_to_perform_end; + end; + if action_to_perform = _lexer_action_finalize() then + intermediate := _lexer_classify_finalize(position_start); + _store_word(intermediate, kind); + goto .action_to_perform_end; + end; + if action_to_perform = _lexer_action_composite() then + goto .action_to_perform_end; + end; + if action_to_perform = _lexer_action_key_id() then + intermediate := _lexer_classify_keyword(position_start, position_end); + _store_word(intermediate, kind); + goto .action_to_perform_end; + end; + if action_to_perform = _lexer_action_integer() then + goto .action_to_perform_end; + end; + if action_to_perform = _lexer_action_delimited() then + (* _store_word(position_end + 1, pointer_end); *) + goto .action_to_perform_end; + end; + + .action_to_perform_end; +end; + +proc _lexer_execute_transition(kind: Word); +var + next_transition: Word; + next_state: Word; + global_state: Word; + action_to_perform: Word; +begin + next_transition := _lexer_next_transition(); + next_state := _lexer_transition_get_state(next_transition); + action_to_perform := _lexer_transition_get_action(next_transition); + + global_state := _lexer_global_state(); + + _store_word(next_state, global_state); + _lexer_execute_action(action_to_perform, kind); + + return next_state +end; + +proc _lexer_advance_token(kind: Word); +begin + if _lexer_execute_transition(kind) <> _lexer_state_end() then + _lexer_advance_token(kind); + end; +end; + +(* Reads the next token. *) + +(* Returns token length in a0. *) +proc _lexer_read_token(kind: Word); +var + new_position: Word; +begin + _lexer_reset(); + _lexer_advance_token(kind); + + new_position := _lexer_global_end(); + return _load_word(new_position) + -source_code_position +end; + +(* Advances the token stream past the last read token. *) +proc _lexer_skip_token(); +var + new_position: Word; +begin + new_position := _lexer_global_end(); + source_code_position := _load_word(new_position); +end; + +(* Entry point. *) +proc _start(); +var + last_read: Word; + offset: Wort; +begin + _lexer_initialize(); + _symbol_table_build(); + + (* Read the source from the standard input. *) + offset := @source_code; + + .start_read; + (* Second argument is buffer size. Modifying update the source_code definition. *) + last_read := _read_file(offset, 81920); + if last_read > 0 then + offset := offset + last_read; + goto .start_read; + end; + _compile(); + + _exit(0); +end; diff --git a/boot/stage14.elna b/boot/stage14.elna deleted file mode 100644 index 5566518..0000000 --- a/boot/stage14.elna +++ /dev/null @@ -1,3053 +0,0 @@ -(* - * This Source Code Form is subject to the terms of the Mozilla Public License, - * v. 2.0. If a copy of the MPL was not distributed with this file, You can - * obtain one at https://mozilla.org/MPL/2.0/. - *) - -(* Stage 14 compiler. *) - -(* - Binary minus. *) -(* - Space independent parsing. *) -(* - Label names in goto statements aren't required to begin with a dot. *) -(* - Dereferencing pointers pointing to word long data. *) -(* - Enumeration type. *) - -const - symbol_builtin_name_int := "Int"; - symbol_builtin_name_word := "Word"; - symbol_builtin_name_pointer := "Pointer"; - symbol_builtin_name_char := "Char"; - symbol_builtin_name_array := "Array"; - - (* Every type info starts with a word describing what type it is. - - PRIMITIVE_TYPE = 1 - ENUMERATION_TYPE = 2 - - Primitive types have only type size. *) - symbol_builtin_type_int := S(1, 4); - symbol_builtin_type_word := S(1, 4); - symbol_builtin_type_pointer := S(1, 4); - symbol_builtin_type_char := S(1, 1); - symbol_builtin_type_array := S(1, 4); - - (* Info objects start with a word describing its type. - - TYPE_INFO = 1 - PARAMETER_INFO = 2 - TEMPORARY_INFO = 3 - - Type info has the type it belongs to. *) - symbol_type_info_int := S(1, @symbol_builtin_type_int); - symbol_type_info_word := S(1, @symbol_builtin_type_word); - symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); - symbol_type_info_char := S(1, @symbol_builtin_type_char); - symbol_type_info_array := S(1, @symbol_builtin_type_array); - -var - source_code: Array; - compiler_strings: Array; - symbol_table_global: Array; - symbol_table_local: Array; - classification: Array; - - (* To reserve memory just add the value of needed bytes to the memory_free_pointer_variable. *) - memory: Array; - - compiler_strings_position: Pointer := @compiler_strings; - compiler_strings_length: Word := 0; - label_counter: Word := 0; - - (* Points to a segment of free memory. *) - memory_free_pointer: Word := @memory; - -(** - * Calculates and returns the string token length between quotes, including the - * escaping slash characters. - * - * Parameters: - * string - String token pointer. - * - * Returns the length in a0. - *) -proc _string_length(string: Word); -var - counter: Word; -begin - (* Reset the counter. *) - counter := 0; - - .string_length_loop; - string := string + 1; - - if _load_byte(string) <> '"' then - counter := counter + 1; - goto .string_length_loop - end; - - return counter -end; - -(** - * Adds a string to the global, read-only string storage. - * - * Parameters: - * string - String token. - * - * Returns the offset from the beginning of the storage to the new string in a0. - *) -proc _add_string(string: Word); -var - contents: Word; - result: Word; - current_byte: Word; -begin - contents := string + 1; - result := compiler_strings_length; - - .add_string_loop; - if _load_byte(contents) <> '"' then - current_byte := _load_byte(contents); - _store_byte(current_byte, compiler_strings_position); - compiler_strings_position := compiler_strings_position + 1; - contents := contents + 1; - - if current_byte <> '\\' then - compiler_strings_length := compiler_strings_length + 1 - end; - goto .add_string_loop - end; - - return result -end; - -(** - * Reads standard input into a buffer. - * - * Parameters: - * buffer - Buffer pointer. - * size - Buffer size. - * - * Returns the amount of bytes written in a0. - *) -proc _read_file(buffer: Word, size: Word); - return _syscall(0, buffer, size, 0, 0, 0, 63) -end; - -(** - * Writes to the standard output. - * - * Parameters: - * buffer - Buffer. - * size - Buffer length. - *) -proc _write_s(buffer: Word, size: Word); -begin - _syscall(1, buffer, size, 0, 0, 0, 64) -end; - -(** - * Writes a number to a string buffer. - * - * Parameters: - * number - Whole number. - * output_buffer - Buffer pointer. - * - * Sets a0 to the length of the written number. - *) -proc _print_i(number: Word, output_buffer: Word); -var - local_buffer: Word; - is_negative: Word; - current_character: Word; - result: Word; -begin - local_buffer := @result + 11; - - if number >= 0 then - is_negative := 0 - else - number = -number; - is_negative := 1 - end; - - .print_i_digit10; - current_character := number % 10; - _store_byte(current_character + '0', local_buffer); - - number := number / 10; - local_buffer := local_buffer + -1; - - if number <> 0 then - goto .print_i_digit10 - end; - if is_negative = 1 then - _store_byte('-', local_buffer); - local_buffer := local_buffer + -1 - end; - result := @result + 11; - result := result + -local_buffer; - _memcpy(output_buffer, local_buffer + 1, result); - - return result -end; - -(** - * Writes a number to the standard output. - * - * Parameters: - * number - Whole number. - *) -proc _write_i(number: Word); -var - local_buffer: Word; - length: Word; -begin - length := _print_i(number, @local_buffer); - _write_s(@local_buffer, length) -end; - -(** - * Writes a character from a0 into the standard output. - * - * Parameters: - * character - Character to write. - *) -proc _write_c(character: Word); -begin - _write_s(@character, 1) -end; - -(** - * Write null terminated string. - * - * Parameters: - * string - String. - *) -proc _write_z(string: Word); -var - next_byte: Word; -begin - (* Check for 0 character. *) - next_byte := _load_byte(string); - - if next_byte <> 0 then - (* Print a character. *) - _write_c(next_byte); - - (* Advance the input string by one byte. *) - _write_z(string + 1) - end -end; - -(** - * Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. - *) -proc _is_upper(character: Word); -var - lhs: Word; - rhs: Word; -begin - lhs := character >= 'A'; - rhs := character <= 'Z'; - - return lhs & rhs - -end; - -(** - * Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. - *) -proc _is_lower(character: Word); -var - lhs: Word; - rhs: Word; -begin - lhs := character >= 'a'; - rhs := character <= 'z'; - - return lhs & rhs -end; - -(** - * Detects if the passed character is a 7-bit alpha character or an underscore. - * - * Paramters: - * character - Tested character. - * - * Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. - *) -proc _is_alpha(character: Word); -var - is_upper_result: Word; - is_lower_result: Word; - is_alpha_result: Word; - is_underscore: Word; -begin - is_upper_result := _is_upper(character); - is_lower_result := _is_lower(character); - is_underscore := character = '_'; - - is_alpha_result := is_lower_result or is_upper_result; - return is_alpha_result or is_underscore -end; - -(** - * Detects whether the passed character is a digit (a value between 0 and 9). - * - * Parameters: - * character - Exemined value. - * - * Sets a0 to 1 if it is a digit, to 0 otherwise. - *) -proc _is_digit(character: Word); -var - lhs: Word; - rhs: Word; -begin - lhs := character >= '0'; - rhs := character <= '9'; - - return lhs & rhs -end; - -proc _is_alnum(character: Word); -var - lhs: Word; - rhs: Word; -begin - lhs := _is_alpha(character); - rhs := _is_digit(character); - - return lhs or rhs -end; - -(** - * Parameters: - * lhs - First pointer. - * rhs - Second pointer. - * count - The length to compare. - * - * Returns 0 if memory regions are equal. - *) -proc _memcmp(lhs: Word, rhs: Word, count: Word); -var - lhs_byte: Word; - rhs_byte: Word; - result: Word; -begin - result := 0; - - .memcmp_loop; - if count <> 0 then - lhs_byte := _load_byte(lhs); - rhs_byte := _load_byte(rhs); - result := lhs_byte + -rhs_byte; - - lhs := lhs + 1; - rhs := rhs + 1; - count := count + -1; - - if result = 0 then - goto .memcmp_loop - end - end; - - return result -end; - -(** - * Copies memory. - * - * Parameters: - * destination - Destination. - * source - Source. - * count - Size. - * - * Returns the destination. - *) -proc _memcpy(destination: Word, source: Word, count: Word); -var - current_byte: Word; -begin - .memcpy_loop; - if count <> 0 then - current_byte := _load_byte(source); - _store_byte(current_byte, destination); - - destination := destination + 1; - source := source + 1; - count := count + -1; - goto .memcpy_loop - end; - - return destination -end; - -proc _compile_integer_literal(); -var - integer_token: Word; - integer_length: Word; - token_kind: Word; -begin - _write_z("\tli t0, \0"); - - integer_token := _lexer_global_get_start(); - integer_length := _lexer_global_get_end() + -integer_token; - - _write_s(integer_token, integer_length); - _lexer_skip_token(); - - _write_c('\n') -end; - -proc _compile_character_literal(); -var - character: Word; - token_kind: Word; - character_length: Word; -begin - character := _lexer_global_get_start(); - character_length := _lexer_global_get_end() + -character; - - _write_z("\tli t0, \0"); - _write_s(character, character_length); - _write_c('\n'); - _lexer_skip_token() -end; - -proc _compile_variable_expression(); -var - name: Word; - lookup_result: Word; - name_token: Word; -begin - name := _lexer_global_get_start(); - name_token := _lexer_global_get_end() + -name; - lookup_result := _symbol_table_lookup(@symbol_table_global, name, name_token); - - if lookup_result <> 0 then - _compile_enumeration_value(lookup_result) - else - _compile_designator(); - _write_z("\tlw t0, (t0)\n\0") - end -end; - -(** - * Compiled take address expression, starting with an "@" sign. - *) -proc _compile_address_expression(); -begin - _lexer_skip_token(); - _compile_designator() -end; - -(** - * Compile unary negation, "-" sign. - *) -proc _compile_negate_expression(); -begin - _lexer_skip_token(); - _compile_term(); - _write_z("\tneg t0, t0\n\0") -end; - -(* Compile unary negation, "~" sign. *) -proc _compile_not_expression(); -var - token_kind: Word; -begin - _lexer_read_token(@token_kind); - _lexer_skip_token(); - _compile_term(); - _write_z("\tnot t0, t0\n\0") -end; - -proc _compile_string_literal(); -var - token_kind: Word; - token_start: Word; - length: Word; - offset: Word; -begin - _lexer_read_token(@token_kind); - token_start := _lexer_global_get_start(); - length := _string_length(token_start); - offset := _add_string(token_start); - - _lexer_skip_token(); - _write_z("\tla t0, strings\n\0"); - - _write_z("\tli t1, \0"); - _write_i(offset); - _write_c('\n'); - - _write_z("\tadd t0, t0, t1\n\0") -end; - -proc _compile_term(); -var - current_character: Word; - token_kind: Word; -begin - _lexer_read_token(@token_kind); - - if token_kind = _lexer_token_kind_character() then - _compile_character_literal() - elsif token_kind = _lexer_token_kind_string() then - _compile_string_literal() - elsif token_kind = _lexer_token_kind_integer() then - _compile_integer_literal() - elsif token_kind = _lexer_token_kind_at() then - _compile_address_expression() - elsif token_kind = _lexer_token_kind_minus() then - _compile_negate_expression() - elsif token_kind = _lexer_token_kind_not() then - _compile_not_expression() - elsif token_kind = _lexer_token_kind_identifier() then - current_character := _lexer_global_get_start(); - current_character := _load_byte(current_character); - - (* This is a call if the statement starts with an underscore. *) - if current_character = '_' then - _compile_call(); - _write_z("\tmv t0, a0\n\0") - else - _compile_variable_expression() - end - end -end; - -proc _compile_binary_rhs(); -begin - (* Save the value of the left expression on the stack. *) - _write_z("\tsw t0, 64(sp)\n\0"); - _compile_term(); - - (* Load the left expression from the stack; *) - _write_z("\tlw t1, 64(sp)\n\0") -end; - -proc _compile_expression(); -var - token_kind: Word; -begin - _compile_term(); - - _lexer_read_token(@token_kind); - - if token_kind = _lexer_token_kind_plus() then - _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tadd t0, t0, t1\n\0") - elsif token_kind = _lexer_token_kind_minus() then - _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tsub t0, t1, t0\n\0"); - elsif token_kind = _lexer_token_kind_multiplication() then - _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tmul t0, t0, t1\n\0") - elsif token_kind = _lexer_token_kind_and() then - _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tand t0, t0, t1\n\0") - elsif token_kind = _lexer_token_kind_or() then - _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tor t0, t0, t1\n\0") - elsif token_kind = _lexer_token_kind_xor() then - _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\txor t0, t0, t1\n\0") - elsif token_kind = _lexer_token_kind_equals() then - _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\txor t0, t0, t1\n\tseqz t0, t0\n\0") - elsif token_kind = _lexer_token_kind_remainder() then - _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\trem t0, t1, t0\n\0") - elsif token_kind = _lexer_token_kind_division() then - _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tdiv t0, t1, t0\n\0") - elsif token_kind = _lexer_token_kind_less_than() then - _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\n\0") - elsif token_kind = _lexer_token_kind_greater_than() then - _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t0, t1\n\0") - elsif token_kind = _lexer_token_kind_less_equal() then - _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t0, t1\n\txori t0, t0, 1\n\0") - elsif token_kind = _lexer_token_kind_not_equal() then - _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\txor t0, t0, t1\n\tsnez t0, t0\n\0") - elsif token_kind = _lexer_token_kind_greater_equal() then - _lexer_skip_token(); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\n\txori t0, t0, 1\n\0") - end; - - .compile_expression_end; -end; - -proc _compile_call(); -var - name_length: Word; - name: Word; - argument_count: Word; - stack_offset: Word; - token_kind: Word; -begin - _lexer_read_token(@token_kind); - name := _lexer_global_get_start(); - name_length := _lexer_global_get_end() + -name; - argument_count := 0; - - (* Skip the identifier and left paren. *) - _lexer_skip_token(); - _lexer_read_token(@token_kind); - _lexer_skip_token(); - - _lexer_read_token(@token_kind); - if token_kind = _lexer_token_kind_right_paren() then - goto .compile_call_finalize - end; - .compile_call_loop; - _compile_expression(); - - (* Save the argument on the stack. *) - _write_z("\tsw t0, \0"); - - (* Calculate the stack offset: 116 - (4 * argument_counter) *) - stack_offset := argument_count * 4; - _write_i(116 + -stack_offset); - - _write_z("(sp)\n\0"); - - (* Add one to the argument counter. *) - argument_count := argument_count + 1; - - _lexer_read_token(@token_kind); - - if token_kind <> _lexer_token_kind_comma() then - goto .compile_call_finalize - end; - _lexer_skip_token(); - goto .compile_call_loop; - - .compile_call_finalize; - (* Load the argument from the stack. *) - if argument_count <> 0 then - (* Decrement the argument counter. *) - argument_count := argument_count + -1; - - _write_z("\tlw a\0"); - _write_i(argument_count); - - _write_z(", \0"); - - (* Calculate the stack offset: 116 - (4 * argument_counter) *) - stack_offset := argument_count * 4; - _write_i(116 + -stack_offset); - - _write_z("(sp)\n\0"); - - goto .compile_call_finalize - end; - - .compile_call_end; - _write_z("\tcall \0"); - _write_s(name, name_length); - _write_c('\n'); - - (* Skip the right paren. *) - _lexer_read_token(@token_kind); - _lexer_skip_token() -end; - -proc _compile_goto(); -var - next_token: Word; - next_length: Word; - token_kind: Word; -begin - _lexer_skip_token(); - _lexer_read_token(@token_kind); - - if token_kind = _lexer_token_kind_dot() then - _lexer_skip_token(); - _lexer_read_token(@token_kind) - end; - next_token := _lexer_global_get_start(); - next_length := _lexer_global_get_end() + -next_token; - - _write_z("\tj .\0"); - - _write_s(next_token, next_length); - _lexer_skip_token() -end; - -proc _compile_local_designator(symbol: Word); -var - variable_offset: Word; -begin - _write_z("\taddi t0, sp, \0"); - variable_offset := _parameter_info_get_offset(symbol); - _write_i(variable_offset); - _write_c('\n'); - _lexer_skip_token() -end; - -proc _compile_global_designator(); -var - name: Word; - token_kind: Word; - token_length: Word; -begin - _write_z("\tla t0, \0"); - - _lexer_read_token(@token_kind); - name := _lexer_global_get_start(); - token_length := _lexer_global_get_end() + -name; - _write_s(name, token_length); - _lexer_skip_token(); - - _write_c('\n') -end; - -proc _compile_enumeration_value(symbol: Word); -var - enumeration_type: Word; - members: Word; - members_length: Word; - token_type: Word; - value_name: Word; - name_length: Word; - member_name: Word; - member_length: Word; - counter: Word; -begin - enumeration_type := _type_info_get_type(symbol); - members := _enumeration_type_get_members(enumeration_type); - members_length := _enumeration_type_get_length(enumeration_type); - - (* Skip enumeration type name and dot. Read the enumeration value. *) - _lexer_skip_token(); - _lexer_read_token(@token_type); - _lexer_skip_token(); - _lexer_read_token(@token_type); - - value_name := _lexer_global_get_start(); - name_length := _lexer_global_get_end() + -value_name; - _lexer_skip_token(); - counter := 1; - - .compile_enumeration_value_members; - if members_length > 0 then - member_name := _load_word(members); - member_length := _load_word(members + 4); - - if _lexer_compare_keyword(value_name, name_length, member_name, member_length) = 0 then - members_length := members_length + -1; - members := members + 8; - counter := counter + 1; - goto .compile_enumeration_value_members - end; - _write_z("\tli t0, \0"); - _write_i(counter); - _write_c('\n') - end -end; - -proc _compile_designator(); -var - name_token: Word; - lookup_result: Word; - token_kind: Word; - name: Word; -begin - _lexer_read_token(@token_kind); - name := _lexer_global_get_start(); - name_token := _lexer_global_get_end() + -name; - lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token); - - if lookup_result <> 0 then - _compile_local_designator(lookup_result) - else - _compile_global_designator() - end; - _lexer_read_token(@token_kind); - if token_kind = _lexer_token_kind_hat() then - _lexer_skip_token(); - _write_z("\tlw t0, (t0)\n\0") - end -end; - -proc _compile_assignment(); -var - token_kind: Word; -begin - _compile_designator(); - - (* Save the assignee address on the stack. *) - _write_z("\tsw t0, 60(sp)\n\0"); - - (* Skip the assignment sign (:=) with surrounding whitespaces. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - - (* Compile the assignment. *) - _compile_expression(); - - _write_z("\tlw t1, 60(sp)\n\tsw t0, (t1)\n\0") -end; - -proc _compile_return_statement(); -var - token_kind: Word; -begin - (* Skip "return" keyword and whitespace after it. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - - _compile_expression(); - _write_z("\tmv a0, t0\n\0") -end; - -(** - * Writes a label, .Ln, where n is a unique number. - * - * Parameters: - * counter - Label counter. - *) -proc _write_label(counter: Word); -begin - _write_z(".L\0"); - _write_i(counter) -end; - -proc _compile_condition(after_end_label: Word); -var - condition_label: Word; - token_kind: Word; -begin - (* Compile condition. *) - _compile_expression(); - (* Skip " then" with newline. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - - (* condition_label is the label in front of the next elsif condition or end. *) - condition_label := label_counter; - label_counter := label_counter + 1; - - _write_z("\tbeqz t0, \0"); - _write_label(condition_label); - _write_c('\n'); - - _compile_statement_list(); - - _write_z("\tj \0"); - _write_label(after_end_label); - _write_c('\n'); - - _write_label(condition_label); - _write_z(":\n\0") -end; - -proc _compile_if(); -var - after_end_label: Word; - condition_label: Word; - token_kind: Word; -begin - (* Skip "if ". *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - - after_end_label := label_counter; - label_counter := label_counter + 1; - - _compile_condition(after_end_label); - .compile_if_loop; - - _lexer_read_token(@token_kind); - if token_kind = _lexer_token_kind_else() then - _lexer_skip_token(); - _compile_statement_list() - elsif token_kind = _lexer_token_kind_elsif() then - _lexer_skip_token(); - _compile_condition(after_end_label); - - goto .compile_if_loop - end; - _lexer_skip_token(); - - _write_label(after_end_label); - _write_z(":\n\0") -end; - -proc _compile_label_declaration(); -var - label_token: Word; - token_kind: Word; - name: Word; -begin - (* Skip the dot. *) - _lexer_skip_token(); - _lexer_read_token(@token_kind); - name := _lexer_global_get_start(); - label_token := _lexer_global_get_end() + -name; - _write_c('.'); - _write_s(name, label_token); - _write_z(":\n\0"); - _lexer_skip_token() -end; - -proc _compile_statement(); -var - current_byte: Word; - token_kind: Word; -begin - _lexer_read_token(@token_kind); - - if token_kind = _lexer_token_kind_goto() then - _compile_goto() - elsif token_kind = _lexer_token_kind_if() then - _compile_if() - elsif token_kind = _lexer_token_kind_return() then - _compile_return_statement() - elsif token_kind = _lexer_token_kind_dot() then - _compile_label_declaration() - elsif token_kind = _lexer_token_kind_identifier() then - current_byte := _lexer_global_get_start(); - current_byte := _load_byte(current_byte); - - (* This is a call if the statement starts with an underscore. *) - if current_byte = '_' then - _compile_call() - else - _compile_assignment() - end - end; - _write_c('\n') -end; - -proc _compile_statement_list(); -var - token_kind: Word; -begin - _skip_empty_lines(); - _compile_statement(); - _lexer_read_token(@token_kind); - - if token_kind = _lexer_token_kind_semicolon() then - _lexer_skip_token(); - _compile_statement_list() - end; - _skip_empty_lines() -end; - -(** - * Writes a regster name to the standard output. - * - * Parameters: - * register_character - Register character. - * register_number - Register number. - *) -proc _write_register(register_character: Word, register_number: Word); -begin - _write_c(register_character); - _write_c(register_number + '0') -end; - -proc _type_get_kind(this: Word); - return _load_word(this) -end; - -proc _type_set_kind(this: Word, value: Word); -begin - _store_word(value, this) -end; - -proc _type_get_size(this: Word); - return _load_word(this + 4) -end; - -proc _type_set_size(this: Word, value: Word); -begin - _store_word(value, this + 4) -end; - -proc _enumeration_type_get_members(this: Word); - return _load_word(this + 8) -end; - -proc _enumeration_type_set_members(this: Word, value: Word); -begin - _store_word(value, this + 8) -end; - -proc _enumeration_type_get_length(this: Word); - return _load_word(this + 12) -end; - -proc _enumeration_type_set_length(this: Word, value: Word); -begin - _store_word(value, this + 12) -end; - -(** - * Reads and creates enumeration type representation. - * - * record - * type_kind: Word; - * size: Word; - * members: StringArray; - * length: Word - * end; - * - * Returns enumeration type description. - *) -proc _read_type_enumeration(); -var - token_kind: Word; - enumeration_name: Word; - name_length: Word; - memory_start: Word; - member_count: Word; - result: Word; -begin - _lexer_skip_token(); - memory_start := memory_free_pointer; - member_count := 0; - - _lexer_read_token(@token_kind); - if token_kind = _lexer_token_kind_right_paren() then - goto .read_type_enumeration_end - end; - .read_type_enumeration_loop; - member_count := member_count + 1; - - enumeration_name := _lexer_global_get_start(); - name_length := _lexer_global_get_end() + -enumeration_name; - - _store_word(enumeration_name, memory_free_pointer); - memory_free_pointer := memory_free_pointer + 4; - - _store_word(name_length, memory_free_pointer); - memory_free_pointer := memory_free_pointer + 4; - - (* Skip the identifier. *) - _lexer_skip_token(); - - _lexer_read_token(@token_kind); - if token_kind = _lexer_token_kind_comma() then - _lexer_skip_token(); - _lexer_read_token(@token_kind); - goto .read_type_enumeration_loop - end; - - .read_type_enumeration_end; - _lexer_skip_token(); - - (* The resulting structure is 16 bytes long. *) - result := memory_free_pointer; - memory_free_pointer := memory_free_pointer + 16; - - (* ENUMERATION_TYPE is 2. *) - _type_set_kind(result, 2); - _type_set_size(result, 4); - _enumeration_type_set_members(result, memory_start); - _enumeration_type_set_length(result, member_count); - - return _type_info_create(result) -end; - -proc _read_type_expression(); -var - token_kind: Word; - type_name: Word; - name_length: Word; - result: Word; -begin - result := 0; - _lexer_read_token(@token_kind); - - if token_kind = _lexer_token_kind_identifier() then - (* Named type. *) - type_name := _lexer_global_get_start(); - name_length := _lexer_global_get_end() + -type_name; - result := _symbol_table_lookup(@symbol_table_global, type_name, name_length); - result := _type_info_get_type(result); - - _lexer_skip_token() - elsif token_kind = _lexer_token_kind_left_paren() then - result := _read_type_enumeration() - end; - - return result -end; - -proc _type_info_get_type(this: Word); - return _load_word(this + 4) -end; - -(** - * Parameters: - * parameter_index - Parameter index. - *) -proc _parameter_info_create(parameter_index: Word); -var - offset: Word; - current_word: Word; - result: Word; -begin - result := memory_free_pointer; - current_word := result; - (* 2 is INFO_PARAMETER *) - _store_word(2, current_word); - - current_word := current_word + 4; - - (* Calculate the stack offset: 88 - (4 * parameter_counter) *) - offset := parameter_index * 4; - _store_word(88 + -offset, current_word); - - memory_free_pointer := current_word + 4; - - return result -end; - -proc _parameter_info_get_offset(this: Word); -begin - this := this + 4; - return _load_word(this) -end; - -proc _type_info_create(type_representation: Word); -var - result: Word; - current_word: Word; -begin - result := memory_free_pointer; - current_word := result; - (* 1 is INFO_TYPE *) - _store_word(1, current_word); - - current_word := current_word + 4; - _store_word(type_representation, current_word); - - memory_free_pointer := current_word + 4; - - return result -end; - -(** - * Parameters: - * temporary_index - Parameter index. - *) -proc _temporary_info_create(temporary_index: Word); -var - offset: Word; - current_word: Word; - result: Word; -begin - result := memory_free_pointer; - current_word := result; - (* 3 is INFO_TEMPORARY *) - _store_word(3, current_word); - - current_word := current_word + 4; - - (* Calculate the stack offset: 4 * variable_counter. *) - _store_word(temporary_index * 4, current_word); - - memory_free_pointer := current_word + 4; - - return result -end; - -proc _temporary_info_get_offset(this: Word); -begin - this := this + 4; - return _load_word(this) -end; - -(** - * Parameters: - * parameter_index - Parameter index. - *) -proc _read_procedure_parameter(parameter_index: Word); -var - name_length: Word; - info: Word; - name_position: Word; - token_kind: Word; -begin - (* Read the parameter name. *) - _lexer_read_token(@token_kind); - name_position := _lexer_global_get_start(); - name_length := _lexer_global_get_end() + -name_position; - _lexer_skip_token(); - - (* Skip colon and space in front of the type expression. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - - _read_type_expression(); - - _write_z("\tsw a\0"); - _write_i(parameter_index); - _write_z(", \0"); - - info := _parameter_info_create(parameter_index); - _symbol_table_enter(@symbol_table_local, name_position, name_length, info); - - info := _parameter_info_get_offset(info); - _write_i(info); - - _write_z("(sp)\n\0") -end; - -proc _read_procedure_parameters(); -var - parameter_counter: Word; - token_kind: Word; -begin - (* Skip open paren. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - parameter_counter := 0; - - .compile_procedure_prologue_skip; - _lexer_read_token(@token_kind); - - if token_kind <> _lexer_token_kind_right_paren() then - _read_procedure_parameter(parameter_counter); - parameter_counter := parameter_counter + 1; - _lexer_read_token(@token_kind); - - if token_kind = _lexer_token_kind_comma() then - _lexer_skip_token(); - goto .compile_procedure_prologue_skip - end - end; - (* Skip close paren. *) - _lexer_skip_token() -end; - -(** - * Parameters: - * variable_index - Variable index. - *) -proc _read_procedure_temporary(variable_index: Word); -var - name_length: Word; - info: Word; - name_position: Word; - token_kind: Word; -begin - _lexer_read_token(@token_kind); - name_position := _lexer_global_get_start(); - name_length := _lexer_global_get_end() + -name_position; - _lexer_skip_token(); - - (* Read and skip variable name, colon and the space *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - - _read_type_expression(); - - info := _temporary_info_create(variable_index); - _symbol_table_enter(@symbol_table_local, name_position, name_length, info); - - (* Skip semicolon and newline after the variable declaration *) - _lexer_read_token(@token_kind); - _lexer_skip_token() -end; - -proc _read_procedure_temporaries(); -var - temporary_counter: Word; - token_kind: Word; -begin - _lexer_read_token(@token_kind); - - if token_kind = _lexer_token_kind_var() then - _lexer_skip_token(); - temporary_counter := 0; - - .read_local_variables_loop; - _lexer_read_token(@token_kind); - - if token_kind = _lexer_token_kind_identifier() then - _read_procedure_temporary(temporary_counter); - - temporary_counter := temporary_counter + 1; - goto .read_local_variables_loop - end - end -end; - -proc _compile_procedure(); -var - name_pointer: Word; - name_length: Word; - token_kind: Word; -begin - (* Skip "proc ". *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - - (* Clear local symbol table. *) - _store_word(0, @symbol_table_local); - - _lexer_read_token(@token_kind); - name_pointer := _lexer_global_get_start(); - name_length := _lexer_global_get_end() + -name_pointer; - - (* Write .type _procedure_name, @function. *) - _write_z(".type \0"); - - _write_s(name_pointer, name_length); - _write_z(", @function\n\0"); - - (* Write procedure label, _procedure_name: *) - _write_s(name_pointer, name_length); - _write_z(":\n\0"); - - (* Skip procedure name. *) - _lexer_skip_token(); - _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); - _read_procedure_parameters(); - - (* Skip semicolon and newline. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - _read_procedure_temporaries(); - - (* Skip semicolon, "begin" and newline. *) - _lexer_read_token(@token_kind); - if token_kind = _lexer_token_kind_begin() then - _lexer_skip_token(); - _compile_statement_list() - elsif token_kind = _lexer_token_kind_return() then - _compile_return_statement() - end; - - (* Write the epilogue. *) - _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); - - (* Skip the "end" keyword, semicolon and newline. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - _lexer_read_token(@token_kind); - _lexer_skip_token() -end; - -(** - * Skips comments. - *) -proc _skip_empty_lines(); -var - token_kind: Word; -begin - .skip_empty_lines_rerun; - - _lexer_read_token(@token_kind); - - if token_kind = _lexer_token_kind_comment() then - _lexer_skip_token(); - goto .skip_empty_lines_rerun - end -end; - -(** - * Compile global variable initializer. - *) -proc _compile_global_initializer(); -var - current_byte: Word; - length: Word; - token_kind: Word; - token_start: Word; -begin - _lexer_read_token(@token_kind); - token_start := _lexer_global_get_start(); - current_byte := _load_byte(token_start); - - if token_kind = _lexer_token_kind_string() then - _write_z("\n\t.word strings + \0"); - length := _string_length(token_start); - - _add_string(token_start); - _write_i(); - - (* Skip the quoted string. *) - _lexer_skip_token(); - - goto .compile_global_initializer_end - elsif current_byte = 'S' then - (* Skip "S(". *) - _lexer_skip_token(); - _lexer_read_token(@token_kind); - _lexer_skip_token(); - _lexer_read_token(@token_kind); - - if token_kind = _lexer_token_kind_right_paren() then - goto .compile_global_initializer_closing - end; - goto .compile_global_initializer_loop - elsif token_kind = _lexer_token_kind_at() then - (* Skip @. *) - _lexer_skip_token(); - _write_z("\n\t.word \0"); - _lexer_read_token(@token_kind); - token_start := _lexer_global_get_start(); - _write_s(token_start, _lexer_global_get_end() + -token_start); - _lexer_skip_token(); - - goto .compile_global_initializer_end - elsif token_kind = _lexer_token_kind_integer() then - _write_z("\n\t.word \0"); - _write_s(token_start, _lexer_global_get_end() + -token_start); - _lexer_skip_token(); - - goto .compile_global_initializer_end - end; - - .compile_global_initializer_loop; - _compile_global_initializer(); - - _lexer_read_token(@token_kind); - if token_kind <> _lexer_token_kind_right_paren() then - (* Skip comma and whitespace after it. *) - _lexer_skip_token(); - - goto .compile_global_initializer_loop - end; - - .compile_global_initializer_closing; - (* Skip ")" *) - _lexer_skip_token(); - - .compile_global_initializer_end -end; - -proc _compile_constant_declaration(); -var - name: Word; - name_length: Word; - token_kind: Word; -begin - name := _lexer_global_get_start(); - name_length := _lexer_global_get_end() + -name; - - _write_z(".type \0"); - _write_s(name, name_length); - _write_z(", @object\n\0"); - - _write_s(name, name_length); - _write_c(':'); - - (* Skip the constant name with assignment sign and surrounding whitespaces. *) - _lexer_skip_token(); - _lexer_read_token(@token_kind); - _lexer_skip_token(); - _compile_global_initializer(); - - (* Skip semicolon and newline. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - _write_c('\n') -end; - -proc _compile_type_declaration(); -var - token_kind: Word; - type_name: Word; - name_length: Word; - type_info: Word; -begin - type_name := _lexer_global_get_start(); - name_length := _lexer_global_get_end() + -type_name; - - _lexer_skip_token(); - _lexer_read_token(@token_kind); - _lexer_skip_token(); - type_info := _read_type_expression(); - - _symbol_table_enter(@symbol_table_global, type_name, name_length, type_info); - - _lexer_read_token(@token_kind); - _lexer_skip_token() -end; - -proc _compile_type_part(); -var - token_kind: Word; -begin - _skip_empty_lines(); - _lexer_read_token(@token_kind); - - if token_kind <> _lexer_token_kind_type() then - goto .compile_type_part_end - end; - _lexer_skip_token(); - - .compile_type_part_loop; - _skip_empty_lines(); - - _lexer_read_token(@token_kind); - if token_kind = _lexer_token_kind_identifier() then - _compile_type_declaration(); - goto .compile_type_part_loop - end; - - .compile_type_part_end -end; - -proc _compile_const_part(); -var - token_kind: Word; -begin - _skip_empty_lines(); - _lexer_read_token(@token_kind); - - if token_kind <> _lexer_token_kind_const() then - goto .compile_const_part_end - end; - (* Skip "const" with the newline after it. *) - _lexer_skip_token(); - _write_z(".section .rodata # Compiled from const section.\n\n\0"); - - .compile_const_part_loop; - _skip_empty_lines(); - - (* If the character at the line beginning is not indentation, - it is probably the next code section. *) - _lexer_read_token(@token_kind); - if token_kind = _lexer_token_kind_identifier() then - _compile_constant_declaration(); - goto .compile_const_part_loop - end; - - .compile_const_part_end -end; - -proc _compile_variable_declaration(); -var - name: Word; - name_length: Word; - token_kind: Word; -begin - _lexer_read_token(@token_kind); - name := _lexer_global_get_start(); - name_length := _lexer_global_get_end() + -name; - - _write_z(".type \0"); - _write_s(name, name_length); - _write_z(", @object\n\0"); - - _write_s(name, name_length); - _write_c(':'); - - (* Skip the variable name and colon with space before the type. *) - _lexer_skip_token(); - _lexer_read_token(@token_kind); - _lexer_skip_token(); - - _lexer_read_token(@token_kind); - name := _lexer_global_get_start(); - name_length := _lexer_global_get_end() + -name; - _lexer_skip_token(); - _lexer_read_token(@token_kind); - - if _lexer_compare_keyword("Array", 5, name, name_length) = 1 then - (* Else we assume this is a zeroed 819200 bytes big array. *) - _write_z(" .zero 819200\0") - elsif token_kind = _lexer_token_kind_assignment() then - (* Skip the assignment sign with surrounding whitespaces. *) - _lexer_skip_token(); - _compile_global_initializer() - else - _write_z(" .word 0\n\0") - end; - - (* Skip semicolon and newline. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - _write_c('\n') -end; - -proc _compile_var_part(); -var - token_kind: Word; -begin - _lexer_read_token(@token_kind); - - if token_kind <> _lexer_token_kind_var() then - goto .compile_var_part_end - end; - (* Skip "var" and newline. *) - _lexer_skip_token(); - _write_z(".section .data\n\0"); - - .compile_var_part_loop; - _skip_empty_lines(); - _lexer_read_token(@token_kind); - - if token_kind = _lexer_token_kind_identifier() then - _compile_variable_declaration(); - goto .compile_var_part_loop - end; - - .compile_var_part_end -end; - -(** - * Process the source code and print the generated code. - *) -proc _compile_module(); -var - token_kind: Word; -begin - _compile_type_part(); - _compile_const_part(); - _skip_empty_lines(); - _compile_var_part(); - - _write_z(".section .text\n\n\0"); - _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); - _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); - _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); - _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); - _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); - - .compile_module_loop; - _skip_empty_lines(); - _lexer_read_token(@token_kind); - - if token_kind = _lexer_token_kind_proc() then - _compile_procedure(); - goto .compile_module_loop - end -end; - -proc _compile(); -var - compiler_strings_copy: Word; - compiler_strings_end: Word; - current_byte: Word; -begin - _write_z(".globl _start\n\n\0"); - _compile_module(); - - _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); - _write_c('"'); - - compiler_strings_copy := @compiler_strings; - compiler_strings_end := compiler_strings_position; - - .compile_loop; - if compiler_strings_copy < compiler_strings_end then - current_byte := _load_byte(compiler_strings_copy); - compiler_strings_copy := compiler_strings_copy + 1; - _write_c(current_byte); - - goto .compile_loop - end; - _write_c('"'); - _write_c('\n') -end; - -(** - * Terminates the program. a0 contains the return code. - * - * Parameters: - * a0 - Status code. - *) -proc _exit(); -begin - _syscall(0, 0, 0, 0, 0, 0, 93) -end; - -(** - * Looks for a symbol in the given symbol table. - * - * Parameters: - * symbol_table - Symbol table. - * symbol_name - Symbol name pointer. - * name_length - Symbol name length. - * - * Returns the symbol pointer or 0 in a0. - *) -proc _symbol_table_lookup(symbol_table: Word, symbol_name: Word, name_length: Word); -var - result: Word; - symbol_table_length: Word; - current_name: Word; - current_length: Word; -begin - result := 0; - - (* The first word in the symbol table is its length, get it. *) - symbol_table_length := _load_word(symbol_table); - - (* Go to the first symbol position. *) - symbol_table := symbol_table + 4; - - .symbol_table_lookup_loop; - if symbol_table_length = 0 then - goto .symbol_table_lookup_end - end; - - (* Symbol name pointer and length. *) - current_name := _load_word(symbol_table); - current_length := _load_word(symbol_table + 4); - - (* If lengths don't match, exit and return nil. *) - if name_length <> current_length then - goto .symbol_table_lookup_repeat - end; - (* If names don't match, exit and return nil. *) - if _memcmp(symbol_name, current_name, name_length) <> 0 then - goto .symbol_table_lookup_repeat - end; - (* Otherwise, the symbol is found. *) - result := _load_word(symbol_table + 8); - goto .symbol_table_lookup_end; - - .symbol_table_lookup_repeat; - symbol_table := symbol_table + 12; - symbol_table_length := symbol_table_length + -1; - goto .symbol_table_lookup_loop; - - .symbol_table_lookup_end; - return result -end; - -(** - * Inserts a symbol into the table. - * - * Parameters: - * symbol_table - Symbol table. - * symbol_name - Symbol name pointer. - * name_length - Symbol name length. - * symbol - Symbol pointer. - *) -proc _symbol_table_enter(symbol_table: Word, symbol_name: Word, name_length: Word, symbol: Word); -var - table_length: Word; - symbol_pointer: Word; -begin - (* The first word in the symbol table is its length, get it. *) - table_length := _load_word(symbol_table); - - (* Calculate the offset for the new symbol. *) - symbol_pointer := table_length * 12; - symbol_pointer := symbol_pointer + 4; - symbol_pointer := symbol_table + symbol_pointer; - - _store_word(symbol_name, symbol_pointer); - symbol_pointer := symbol_pointer + 4; - _store_word(name_length, symbol_pointer); - symbol_pointer := symbol_pointer + 4; - _store_word(symbol, symbol_pointer); - - (* Increment the symbol table length. *) - table_length := table_length + 1; - _store_word(table_length, symbol_table) -end; - -proc _symbol_table_build(); -begin - (* Set the table length to 0. *) - _store_word(0, @symbol_table_global); - - (* Enter built-in symbols. *) - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_int, 3, @symbol_type_info_int); - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_word, 4, @symbol_type_info_word); - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_pointer, 7, @symbol_type_info_pointer); - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_char, 4, @symbol_type_info_char); - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_array, 5, @symbol_type_info_array) -end; - - -(** - * Classification table assigns each possible character to a group (class). All - * characters of the same group a handled equivalently. - * - * Transition = record - * action: TransitionAction; - * next_state: TransitionState - * end; - *) -proc _lexer_class_invalid(); - return 1 -end; - -proc _lexer_class_digit(); - return 2 -end; - -proc _lexer_class_alpha(); - return 3 -end; - -proc _lexer_class_space(); - return 4 -end; - -proc _lexer_class_colon(); - return 5 -end; - -proc _lexer_class_equals(); - return 6 -end; - -proc _lexer_class_left_paren(); - return 7 -end; - -proc _lexer_class_right_paren(); - return 8 -end; - -proc _lexer_class_asterisk(); - return 9 -end; - -proc _lexer_class_backslash(); - return 10 -end; - -proc _lexer_class_single(); - return 11 -end; - -proc _lexer_class_hex(); - return 12 -end; - -proc _lexer_class_zero(); - return 13 -end; - -proc _lexer_class_x(); - return 14 -end; - -proc _lexer_class_eof(); - return 15 -end; - -proc _lexer_class_dot(); - return 16 -end; - -proc _lexer_class_minus(); - return 17 -end; - -proc _lexer_class_single_quote(); - return 18 -end; - -proc _lexer_class_double_quote(); - return 19 -end; - -proc _lexer_class_greater(); - return 20 -end; - -proc _lexer_class_less(); - return 21 -end; - -proc _lexer_class_other(); - return 22 -end; - -proc _lexer_state_start(); - return 1 -end; - -proc _lexer_state_colon(); - return 2 -end; - -proc _lexer_state_identifier(); - return 3 -end; - -proc _lexer_state_decimal(); - return 4 -end; - -proc _lexer_state_leading_zero(); - return 5 -end; - -proc _lexer_state_greater(); - return 6 -end; - -proc _lexer_state_minus(); - return 7 -end; - -proc _lexer_state_left_paren(); - return 8 -end; - -proc _lexer_state_less(); - return 9 -end; - -proc _lexer_state_dot(); - return 10 -end; - -proc _lexer_state_comment(); - return 11 -end; - -proc _lexer_state_closing_comment(); - return 12 -end; - -proc _lexer_state_character(); - return 13 -end; - -proc _lexer_state_character_escape(); - return 14 -end; - -proc _lexer_state_string(); - return 15 -end; - -proc _lexer_state_string_escape(); - return 16 -end; - -proc _lexer_state_end(); - return 17 -end; - -proc _lexer_action_none(); - return 1 -end; - -proc _lexer_action_accumulate(); - return 2 -end; - -proc _lexer_action_skip(); - return 3 -end; - -proc _lexer_action_single(); - return 4 -end; - -proc _lexer_action_eof(); - return 5 -end; - -proc _lexer_action_finalize(); - return 6 -end; - -proc _lexer_action_composite(); - return 7 -end; - -proc _lexer_action_key_id(); - return 8 -end; - -proc _lexer_action_integer(); - return 9 -end; - -proc _lexer_action_delimited(); - return 10 -end; - -(** - * Assigns some value to at array index. - * - * Parameters: - * array - Array pointer. - * index - Index (word offset into the array). - * data - Data to assign. - *) -proc _assign_at(array: Word, index: Word, data: Word); -var - target: Word; -begin - target := index + -1; - target := target * 4; - target := array + target; - - _store_word(data, target) -end; - -proc _get_at(array: Word, index: Word); -var - target: Word; -begin - target := index + -1; - target := target * 4; - target := array + target; - - return _load_word(target) -end; - -(** - * Initializes the array with character classes. - *) -proc _lexer_classifications(); -var - code: Word; -begin - _assign_at(@classification, 1, _lexer_class_eof()); - _assign_at(@classification, 2, _lexer_class_invalid()); - _assign_at(@classification, 3, _lexer_class_invalid()); - _assign_at(@classification, 4, _lexer_class_invalid()); - _assign_at(@classification, 5, _lexer_class_invalid()); - _assign_at(@classification, 6, _lexer_class_invalid()); - _assign_at(@classification, 7, _lexer_class_invalid()); - _assign_at(@classification, 8, _lexer_class_invalid()); - _assign_at(@classification, 9, _lexer_class_invalid()); - _assign_at(@classification, 10, _lexer_class_space()); - _assign_at(@classification, 11, _lexer_class_space()); - _assign_at(@classification, 12, _lexer_class_invalid()); - _assign_at(@classification, 13, _lexer_class_invalid()); - _assign_at(@classification, 14, _lexer_class_space()); - _assign_at(@classification, 15, _lexer_class_invalid()); - _assign_at(@classification, 16, _lexer_class_invalid()); - _assign_at(@classification, 17, _lexer_class_invalid()); - _assign_at(@classification, 18, _lexer_class_invalid()); - _assign_at(@classification, 19, _lexer_class_invalid()); - _assign_at(@classification, 20, _lexer_class_invalid()); - _assign_at(@classification, 21, _lexer_class_invalid()); - _assign_at(@classification, 22, _lexer_class_invalid()); - _assign_at(@classification, 23, _lexer_class_invalid()); - _assign_at(@classification, 24, _lexer_class_invalid()); - _assign_at(@classification, 25, _lexer_class_invalid()); - _assign_at(@classification, 26, _lexer_class_invalid()); - _assign_at(@classification, 27, _lexer_class_invalid()); - _assign_at(@classification, 28, _lexer_class_invalid()); - _assign_at(@classification, 29, _lexer_class_invalid()); - _assign_at(@classification, 30, _lexer_class_invalid()); - _assign_at(@classification, 31, _lexer_class_invalid()); - _assign_at(@classification, 32, _lexer_class_invalid()); - _assign_at(@classification, 33, _lexer_class_space()); - _assign_at(@classification, 34, _lexer_class_single()); - _assign_at(@classification, 35, _lexer_class_double_quote()); - _assign_at(@classification, 36, _lexer_class_other()); - _assign_at(@classification, 37, _lexer_class_other()); - _assign_at(@classification, 38, _lexer_class_single()); - _assign_at(@classification, 39, _lexer_class_single()); - _assign_at(@classification, 40, _lexer_class_single_quote()); - _assign_at(@classification, 41, _lexer_class_left_paren()); - _assign_at(@classification, 42, _lexer_class_right_paren()); - _assign_at(@classification, 43, _lexer_class_asterisk()); - _assign_at(@classification, 44, _lexer_class_single()); - _assign_at(@classification, 45, _lexer_class_single()); - _assign_at(@classification, 46, _lexer_class_minus()); - _assign_at(@classification, 47, _lexer_class_dot()); - _assign_at(@classification, 48, _lexer_class_single()); - _assign_at(@classification, 49, _lexer_class_zero()); - _assign_at(@classification, 50, _lexer_class_digit()); - _assign_at(@classification, 51, _lexer_class_digit()); - _assign_at(@classification, 52, _lexer_class_digit()); - _assign_at(@classification, 53, _lexer_class_digit()); - _assign_at(@classification, 54, _lexer_class_digit()); - _assign_at(@classification, 55, _lexer_class_digit()); - _assign_at(@classification, 56, _lexer_class_digit()); - _assign_at(@classification, 57, _lexer_class_digit()); - _assign_at(@classification, 58, _lexer_class_digit()); - _assign_at(@classification, 59, _lexer_class_colon()); - _assign_at(@classification, 60, _lexer_class_single()); - _assign_at(@classification, 61, _lexer_class_less()); - _assign_at(@classification, 62, _lexer_class_equals()); - _assign_at(@classification, 63, _lexer_class_greater()); - _assign_at(@classification, 64, _lexer_class_other()); - _assign_at(@classification, 65, _lexer_class_single()); - _assign_at(@classification, 66, _lexer_class_alpha()); - _assign_at(@classification, 67, _lexer_class_alpha()); - _assign_at(@classification, 68, _lexer_class_alpha()); - _assign_at(@classification, 69, _lexer_class_alpha()); - _assign_at(@classification, 70, _lexer_class_alpha()); - _assign_at(@classification, 71, _lexer_class_alpha()); - _assign_at(@classification, 72, _lexer_class_alpha()); - _assign_at(@classification, 73, _lexer_class_alpha()); - _assign_at(@classification, 74, _lexer_class_alpha()); - _assign_at(@classification, 75, _lexer_class_alpha()); - _assign_at(@classification, 76, _lexer_class_alpha()); - _assign_at(@classification, 77, _lexer_class_alpha()); - _assign_at(@classification, 78, _lexer_class_alpha()); - _assign_at(@classification, 79, _lexer_class_alpha()); - _assign_at(@classification, 80, _lexer_class_alpha()); - _assign_at(@classification, 81, _lexer_class_alpha()); - _assign_at(@classification, 82, _lexer_class_alpha()); - _assign_at(@classification, 83, _lexer_class_alpha()); - _assign_at(@classification, 84, _lexer_class_alpha()); - _assign_at(@classification, 85, _lexer_class_alpha()); - _assign_at(@classification, 86, _lexer_class_alpha()); - _assign_at(@classification, 87, _lexer_class_alpha()); - _assign_at(@classification, 88, _lexer_class_alpha()); - _assign_at(@classification, 89, _lexer_class_alpha()); - _assign_at(@classification, 90, _lexer_class_alpha()); - _assign_at(@classification, 91, _lexer_class_alpha()); - _assign_at(@classification, 92, _lexer_class_single()); - _assign_at(@classification, 93, _lexer_class_backslash()); - _assign_at(@classification, 94, _lexer_class_single()); - _assign_at(@classification, 95, _lexer_class_single()); - _assign_at(@classification, 96, _lexer_class_alpha()); - _assign_at(@classification, 97, _lexer_class_other()); - _assign_at(@classification, 98, _lexer_class_hex()); - _assign_at(@classification, 99, _lexer_class_hex()); - _assign_at(@classification, 100, _lexer_class_hex()); - _assign_at(@classification, 101, _lexer_class_hex()); - _assign_at(@classification, 102, _lexer_class_hex()); - _assign_at(@classification, 103, _lexer_class_hex()); - _assign_at(@classification, 104, _lexer_class_alpha()); - _assign_at(@classification, 105, _lexer_class_alpha()); - _assign_at(@classification, 106, _lexer_class_alpha()); - _assign_at(@classification, 107, _lexer_class_alpha()); - _assign_at(@classification, 108, _lexer_class_alpha()); - _assign_at(@classification, 109, _lexer_class_alpha()); - _assign_at(@classification, 110, _lexer_class_alpha()); - _assign_at(@classification, 111, _lexer_class_alpha()); - _assign_at(@classification, 112, _lexer_class_alpha()); - _assign_at(@classification, 113, _lexer_class_alpha()); - _assign_at(@classification, 114, _lexer_class_alpha()); - _assign_at(@classification, 115, _lexer_class_alpha()); - _assign_at(@classification, 116, _lexer_class_alpha()); - _assign_at(@classification, 117, _lexer_class_alpha()); - _assign_at(@classification, 118, _lexer_class_alpha()); - _assign_at(@classification, 119, _lexer_class_alpha()); - _assign_at(@classification, 120, _lexer_class_alpha()); - _assign_at(@classification, 121, _lexer_class_x()); - _assign_at(@classification, 122, _lexer_class_alpha()); - _assign_at(@classification, 123, _lexer_class_alpha()); - _assign_at(@classification, 124, _lexer_class_other()); - _assign_at(@classification, 125, _lexer_class_single()); - _assign_at(@classification, 126, _lexer_class_other()); - _assign_at(@classification, 127, _lexer_class_single()); - _assign_at(@classification, 128, _lexer_class_invalid()); - - code := 129; - - (* Set the remaining 129 - 256 bytes to transitionClassOther. *) - .create_classification_loop; - _assign_at(@classification, code, _lexer_class_other()); - code := code + 1; - - if code < 257 then - goto .create_classification_loop - end -end; - -proc _lexer_get_transition(current_state: Word, character_class: Word); -var - transition_table: Word; - row_position: Word; - column_position: Word; - target: Word; -begin - (* Each state is 8 bytes long (2 words: action and next state). - There are 22 character classes, so a transition row 8 * 22 = 176 bytes long. *) - row_position := current_state + -1; - row_position := row_position * 176; - - column_position := character_class + -1; - column_position := column_position * 8; - - target := _lexer_get_transition_table() + row_position; - - return target + column_position -end; - -(** - * Parameters: - * current_state - First index into transitions table. - * character_class - Second index into transitions table. - * action - Action to assign. - * next_state - Next state to assign. - *) -proc _lexer_set_transition(current_state: Word, character_class: Word, action: Word, next_state: Word); -var - transition: Word; -begin - transition := _lexer_get_transition(current_state, character_class); - - _lexer_transition_set_action(transition, action); - _lexer_transition_set_state(transition, next_state) -end; - -(* Sets same action and state transition for all character classes in one transition row. *) - -(** - * Parameters: - * current_state - Current state (Transition state enumeration). - * default_action - Default action (Callback). - * next_state - Next state (Transition state enumeration). - *) -proc _lexer_default_transition(current_state: Word, default_action: Word, next_state: Word); -begin - _lexer_set_transition(current_state, _lexer_class_invalid(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_digit(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_alpha(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_space(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_colon(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_equals(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_left_paren(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_right_paren(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_asterisk(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_backslash(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_single(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_hex(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_zero(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_x(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_eof(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_dot(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_minus(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_single_quote(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_double_quote(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_greater(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_less(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_other(), default_action, next_state) -end; - -(** - * The transition table describes transitions from one state to another, given - * a symbol (character class). - * - * The table has m rows and n columns, where m is the amount of states and n is - * the amount of classes. So given the current state and a classified character - * the table can be used to look up the next state. - *) -proc _lexer_transitions(); -begin - (* Start state. *) - _lexer_set_transition(_lexer_state_start(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_decimal()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_identifier()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_space(), _lexer_action_skip(), _lexer_state_start()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_colon(), _lexer_action_accumulate(), _lexer_state_colon()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_equals(), _lexer_action_single(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_left_paren(), _lexer_action_accumulate(), _lexer_state_left_paren()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_right_paren(), _lexer_action_single(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_asterisk(), _lexer_action_single(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_backslash(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_single(), _lexer_action_single(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_identifier()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_leading_zero()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_identifier()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_eof(), _lexer_action_eof(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_dot(), _lexer_action_single(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_minus(), _lexer_action_accumulate(), _lexer_state_minus()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_single_quote(), _lexer_action_accumulate(), _lexer_state_character()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_double_quote(), _lexer_action_accumulate(), _lexer_state_string()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_greater(), _lexer_action_accumulate(), _lexer_state_greater()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_less(), _lexer_action_accumulate(), _lexer_state_less()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_other(), _lexer_action_none(), _lexer_state_end()); - - (* Colon state. *) - _lexer_default_transition(_lexer_state_colon(), _lexer_action_finalize(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_colon(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end()); - - (* Identifier state. *) - _lexer_default_transition(_lexer_state_identifier(), _lexer_action_key_id(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_identifier(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_identifier()); - _lexer_set_transition(_lexer_state_identifier(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_identifier()); - _lexer_set_transition(_lexer_state_identifier(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_identifier()); - _lexer_set_transition(_lexer_state_identifier(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_identifier()); - _lexer_set_transition(_lexer_state_identifier(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_identifier()); - - (* Decimal state. *) - _lexer_default_transition(_lexer_state_decimal(), _lexer_action_integer(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_decimal(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_decimal()); - _lexer_set_transition(_lexer_state_decimal(), _lexer_class_alpha(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_decimal(), _lexer_class_hex(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_decimal(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_decimal()); - _lexer_set_transition(_lexer_state_decimal(), _lexer_class_x(), _lexer_action_none(), _lexer_state_end()); - - (* Leading zero. *) - _lexer_default_transition(_lexer_state_leading_zero(), _lexer_action_integer(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_digit(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_alpha(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_hex(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_zero(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_x(), _lexer_action_none(), _lexer_state_dot()); - - (* Greater state. *) - _lexer_default_transition(_lexer_state_greater(), _lexer_action_finalize(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_greater(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end()); - - (* Minus state. *) - _lexer_default_transition(_lexer_state_minus(), _lexer_action_finalize(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_minus(), _lexer_class_greater(), _lexer_action_composite(), _lexer_state_end()); - - (* Left paren state. *) - _lexer_default_transition(_lexer_state_left_paren(), _lexer_action_finalize(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_left_paren(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_comment()); - - (* Less state. *) - _lexer_default_transition(_lexer_state_less(), _lexer_action_finalize(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_less(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_less(), _lexer_class_greater(), _lexer_action_composite(), _lexer_state_end()); - - (* Hexadecimal after 0x. *) - _lexer_default_transition(_lexer_state_dot(), _lexer_action_finalize(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_dot(), _lexer_class_dot(), _lexer_action_composite(), _lexer_state_end()); - - (* Comment. *) - _lexer_default_transition(_lexer_state_comment(), _lexer_action_accumulate(), _lexer_state_comment()); - _lexer_set_transition(_lexer_state_comment(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_closing_comment()); - _lexer_set_transition(_lexer_state_comment(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); - - (* Closing comment. *) - _lexer_default_transition(_lexer_state_closing_comment(), _lexer_action_accumulate(), _lexer_state_comment()); - _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_right_paren(), _lexer_action_delimited(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_closing_comment()); - _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); - - (* Character. *) - _lexer_default_transition(_lexer_state_character(), _lexer_action_accumulate(), _lexer_state_character()); - _lexer_set_transition(_lexer_state_character(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_character(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_character(), _lexer_class_single_quote(), _lexer_action_delimited(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_character(), _lexer_class_backslash(), _lexer_action_accumulate(), _lexer_state_character_escape()); - - (* Escape sequence in a character. *) - _lexer_default_transition(_lexer_state_character_escape(), _lexer_action_accumulate(), _lexer_state_character()); - _lexer_set_transition(_lexer_state_character_escape(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_character_escape(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); - - (* String. *) - _lexer_default_transition(_lexer_state_string(), _lexer_action_accumulate(), _lexer_state_string()); - _lexer_set_transition(_lexer_state_string(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_string(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_string(), _lexer_class_double_quote(), _lexer_action_delimited(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_string(), _lexer_class_backslash(), _lexer_action_accumulate(), _lexer_state_string_escape()); - - (* Escape sequence in a string. *) - _lexer_default_transition(_lexer_state_string_escape(), _lexer_action_accumulate(), _lexer_state_string()); - _lexer_set_transition(_lexer_state_string_escape(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_string_escape(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()) -end; - -(** - * Transition table is saved after character classification table. - * Each character entry is 1 word long and there are 256 characters. - * 1024 = 256 * 4 - *) -proc _lexer_get_transition_table(); - return @classification + 1024 -end; - -(** - * Lexer state is saved after the transition tables. - * Each transition table entry is 8 bytes long. The table has 16 rows (transition states) - * and 22 columns (character classes), so 2992 = 8 * 17 * 22. - *) -proc _lexer_global_state(); - return _lexer_get_transition_table() + 2992 -end; - -(** - * Gets pointer to the token start. - *) -proc _lexer_global_get_start(); -var - target: Word; -begin - target := _lexer_global_state() + 4; - return _load_word(target) -end; - -(** - * Sets pointer to the token start. - *) -proc _lexer_global_set_start(new_start: Word); -var - target: Word; -begin - target := _lexer_global_state() + 4; - _store_word(new_start, target) -end; - -(** - * Gets pointer to the token end. - *) -proc _lexer_global_get_end(); -var - target: Word; -begin - target := _lexer_global_state() + 8; - return _load_word(target) -end; - -(** - * Sets pointer to the token end. - *) -proc _lexer_global_set_end(new_start: Word); -var - target: Word; -begin - target := _lexer_global_state() + 8; - _store_word(new_start, target) -end; - -proc _lexer_transition_get_action(transition: Word); - return _load_word(transition) -end; - -proc _lexer_transition_set_action(transition: Word, action: Word); -begin - _store_word(action, transition) -end; - -proc _lexer_transition_get_state(transition: Word); - return _load_word(transition + 4) -end; - -proc _lexer_transition_set_state(transition: Word, state: Word); -begin - _store_word(state, transition + 4) -end; - -(** - * Resets the lexer state for reading the next token. - *) -proc _lexer_reset(); -var - state: Word; -begin - (* Transition start state is 1. *) - state := _lexer_global_state(); - _store_word(_lexer_state_start(), state); - - state := _lexer_global_get_start(); - _lexer_global_set_end(state) -end; - -(** - * One time lexer initialization. - *) -proc _lexer_initialize(); -begin - _lexer_classifications(); - _lexer_transitions(); - - _lexer_global_set_start(@source_code); - _lexer_global_set_end(@source_code) -end; - -proc _lexer_next_transition(); -var - current_character: Word; - character_class: Word; - current_state: Word; -begin - current_character := _lexer_global_get_end(); - current_character := _load_byte(current_character); - - character_class := _get_at(@classification, current_character + 1); - - current_state := _lexer_global_state(); - current_state := _load_word(current_state); - - return _lexer_get_transition(current_state, character_class) -end; - -proc _lexer_token_kind_identifier(); - return 1 -end; - -proc _lexer_token_kind_const(); - return 2 -end; - -proc _lexer_token_kind_var(); - return 3 -end; - -proc _lexer_token_kind_proc(); - return 4 -end; - -proc _lexer_token_kind_type(); - return 5 -end; - -proc _lexer_token_kind_begin(); - return 6 -end; - -proc _lexer_token_kind_end(); - return 7 -end; - -proc _lexer_token_kind_if(); - return 8 -end; - -proc _lexer_token_kind_then(); - return 9 -end; - -proc _lexer_token_kind_else(); - return 10 -end; - -proc _lexer_token_kind_elsif(); - return 11 -end; - -proc _lexer_token_kind_while(); - return 12 -end; - -proc _lexer_token_kind_do(); - return 13 -end; - -proc _lexer_token_kind_extern(); - return 14 -end; - -proc _lexer_token_kind_record(); - return 15 -end; - -proc _lexer_token_kind_union(); - return 16 -end; - -proc _lexer_token_kind_true(); - return 17 -end; - -proc _lexer_token_kind_false(); - return 18 -end; - -proc _lexer_token_kind_nil(); - return 19 -end; - -proc _lexer_token_kind_and(); - return 20 -end; - -proc _lexer_token_kind_or(); - return 21 -end; - -proc _lexer_token_kind_xor(); - return 22 -end; - -proc _lexer_token_kind_pipe(); - return 23 -end; - -proc _lexer_token_kind_not(); - return 24 -end; - -proc _lexer_token_kind_return(); - return 24 -end; - -proc _lexer_token_kind_module(); - return 25 -end; - -proc _lexer_token_kind_program(); - return 26 -end; - -proc _lexer_token_kind_import(); - return 27 -end; - -proc _lexer_token_kind_cast(); - return 28 -end; - -proc _lexer_token_kind_defer(); - return 29 -end; - -proc _lexer_token_kind_case(); - return 30 -end; - -proc _lexer_token_kind_of(); - return 31 -end; - -proc _lexer_token_kind_trait(); - return 32 -end; - -proc _lexer_token_kind_left_paren(); - return 33 -end; - -proc _lexer_token_kind_right_paren(); - return 34 -end; - -proc _lexer_token_kind_left_square(); - return 35 -end; - -proc _lexer_token_kind_right_square(); - return 36 -end; - -proc _lexer_token_kind_shift_left(); - return 37 -end; - -proc _lexer_token_kind_shift_right(); - return 38 -end; - -proc _lexer_token_kind_greater_equal(); - return 39 -end; - -proc _lexer_token_kind_less_equal(); - return 40 -end; - -proc _lexer_token_kind_greater_than(); - return 41 -end; - -proc _lexer_token_kind_less_than(); - return 42 -end; - -proc _lexer_token_kind_not_equal(); - return 43 -end; - -proc _lexer_token_kind_equals(); - return 44 -end; - -proc _lexer_token_kind_semicolon(); - return 45 -end; - -proc _lexer_token_kind_dot(); - return 46 -end; - -proc _lexer_token_kind_comma(); - return 47 -end; - -proc _lexer_token_kind_plus(); - return 48 -end; - -proc _lexer_token_kind_arrow(); - return 49 -end; - -proc _lexer_token_kind_minus(); - return 50 -end; - -proc _lexer_token_kind_multiplication(); - return 51 -end; - -proc _lexer_token_kind_division(); - return 52 -end; - -proc _lexer_token_kind_remainder(); - return 53 -end; - -proc _lexer_token_kind_assignment(); - return 54 -end; - -proc _lexer_token_kind_colon(); - return 55 -end; - -proc _lexer_token_kind_hat(); - return 56 -end; - -proc _lexer_token_kind_at(); - return 57 -end; - -proc _lexer_token_kind_comment(); - return 58 -end; - -proc _lexer_token_kind_string(); - return 59 -end; - -proc _lexer_token_kind_character(); - return 60 -end; - -proc _lexer_token_kind_integer(); - return 61 -end; - -proc _lexer_token_kind_word(); - return 62 -end; - -proc _lexer_token_kind_goto(); - return 63 -end; - -proc _lexer_token_kind_eof(); - return 64 -end; - -proc _lexer_compare_keyword(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word); -var - result: Word; -begin - result := 0; - - if lhs_length = rhs_length then - result := _memcmp(lhs_pointer, rhs_pointer, lhs_length) = 0 - end; - return result -end; - -proc _lexer_classify_keyword(position_start: Word, position_end: Word); -var - result: Word; - token_length: Word; -begin - result := _lexer_token_kind_identifier(); - token_length := position_end + -position_start; - - if _lexer_compare_keyword(position_start, token_length, "const", 5) = 1 then - result := _lexer_token_kind_const() - elsif _lexer_compare_keyword(position_start, token_length, "var", 3) = 1 then - result := _lexer_token_kind_var() - elsif _lexer_compare_keyword(position_start, token_length, "proc", 4) = 1 then - result := _lexer_token_kind_proc() - elsif _lexer_compare_keyword(position_start, token_length, "type", 4) = 1 then - result := _lexer_token_kind_type() - elsif _lexer_compare_keyword(position_start, token_length, "begin", 5) = 1 then - result := _lexer_token_kind_begin() - elsif _lexer_compare_keyword(position_start, token_length, "end", 3) = 1 then - result := _lexer_token_kind_end() - elsif _lexer_compare_keyword(position_start, token_length, "return", 6) = 1 then - result := _lexer_token_kind_return() - elsif _lexer_compare_keyword(position_start, token_length, "goto", 4) = 1 then - result := _lexer_token_kind_goto() - elsif _lexer_compare_keyword(position_start, token_length, "if", 2) = 1 then - result := _lexer_token_kind_if() - elsif _lexer_compare_keyword(position_start, token_length, "while", 5) = 1 then - result := _lexer_token_kind_while() - elsif _lexer_compare_keyword(position_start, token_length, "then", 4) = 1 then - result := _lexer_token_kind_then() - elsif _lexer_compare_keyword(position_start, token_length, "else", 4) = 1 then - result := _lexer_token_kind_else() - elsif _lexer_compare_keyword(position_start, token_length, "elsif", 5) = 1 then - result := _lexer_token_kind_elsif() - elsif _lexer_compare_keyword(position_start, token_length, "or", 2) = 1 then - result := _lexer_token_kind_or() - elsif _lexer_compare_keyword(position_start, token_length, "xor", 2) = 1 then - result := _lexer_token_kind_xor() - end; - return result -end; - -proc _lexer_classify_finalize(start_position: Word); -var - character: Word; - result: Word; -begin - result := 0; - character := _load_byte(start_position); - - if character = ':' then - result := _lexer_token_kind_colon() - elsif character = '.' then - result := _lexer_token_kind_dot() - elsif character = '(' then - result := _lexer_token_kind_left_paren() - elsif character = '-' then - result := _lexer_token_kind_minus() - elsif character = '<' then - result := _lexer_token_kind_less_than() - elsif character = '>' then - result := _lexer_token_kind_greater_than() - end; - return result -end; - -proc _lexer_classify_single(start_position: Word); -var - character: Word; - result: Word; -begin - result := 0; - character := _load_byte(start_position); - - if character = ';' then - result := _lexer_token_kind_semicolon() - elsif character = ',' then - result := _lexer_token_kind_comma() - elsif character = ')' then - result := _lexer_token_kind_right_paren() - elsif character = '@' then - result := _lexer_token_kind_at() - elsif character = '~' then - result := _lexer_token_kind_not() - elsif character = '&' then - result := _lexer_token_kind_and() - elsif character = '+' then - result := _lexer_token_kind_plus() - elsif character = '*' then - result := _lexer_token_kind_multiplication() - elsif character = '=' then - result := _lexer_token_kind_equals() - elsif character = '%' then - result := _lexer_token_kind_remainder() - elsif character = '/' then - result := _lexer_token_kind_division() - elsif character = '.' then - result := _lexer_token_kind_dot() - elsif character = '^' then - result := _lexer_token_kind_hat() - end; - return result -end; - -proc _lexer_classify_composite(start_position: Word, one_before_last: Word); -var - first_character: Word; - last_character: Word; - result: Word; -begin - first_character := _load_byte(start_position); - last_character := _load_byte(one_before_last); - - if first_character = ':' then - result := _lexer_token_kind_assignment() - elsif first_character = '<' then - if last_character = '=' then - result := _lexer_token_kind_less_equal() - elsif last_character = '>' then - result := _lexer_token_kind_not_equal() - end - elsif first_character = '>' then - if last_character = '=' then - result := _lexer_token_kind_greater_equal() - end - end; - - return result -end; - -proc _lexer_classify_delimited(start_position: Word, end_position: Word); -var - token_length: Word; - delimiter: Word; - result: Word; -begin - token_length := end_position + -start_position; - delimiter := _load_byte(start_position); - - if delimiter = '(' then - result := _lexer_token_kind_comment() - elsif delimiter = '\'' then - result := _lexer_token_kind_character() - elsif delimiter = '"' then - result := _lexer_token_kind_string() - end; - return result -end; - -proc _lexer_classify_integer(start_position: Word, end_position: Word); -begin - return _lexer_token_kind_integer() -end; - -proc _lexer_execute_action(action_to_perform: Word, kind: Word); -var - position_start: Word; - position_end: Word; - intermediate: Word; -begin - position_start := _lexer_global_get_start(); - position_end := _lexer_global_get_end(); - - if action_to_perform = _lexer_action_none() then - elsif action_to_perform = _lexer_action_accumulate() then - _lexer_global_set_end(position_end + 1) - elsif action_to_perform = _lexer_action_skip() then - _lexer_global_set_start(position_start + 1); - _lexer_global_set_end(position_end + 1) - elsif action_to_perform = _lexer_action_single() then - _lexer_global_set_end(position_end + 1); - - intermediate := _lexer_classify_single(position_start); - _store_word(intermediate, kind) - elsif action_to_perform = _lexer_action_eof() then - intermediate := _lexer_token_kind_eof(); - _store_word(intermediate, kind) - elsif action_to_perform = _lexer_action_finalize() then - intermediate := _lexer_classify_finalize(position_start); - _store_word(intermediate, kind) - elsif action_to_perform = _lexer_action_composite() then - _lexer_global_set_end(position_end + 1); - - intermediate := _lexer_classify_composite(position_start, position_end); - _store_word(intermediate, kind) - elsif action_to_perform = _lexer_action_key_id() then - intermediate := _lexer_classify_keyword(position_start, position_end); - _store_word(intermediate, kind) - elsif action_to_perform = _lexer_action_integer() then - intermediate := _lexer_classify_integer(position_start, position_end); - _store_word(intermediate, kind) - elsif action_to_perform = _lexer_action_delimited() then - _lexer_global_set_end(position_end + 1); - - intermediate := _lexer_classify_delimited(position_start, position_end + 1); - _store_word(intermediate, kind) - end; -end; - -proc _lexer_execute_transition(kind: Word); -var - next_transition: Word; - next_state: Word; - global_state: Word; - action_to_perform: Word; -begin - next_transition := _lexer_next_transition(); - next_state := _lexer_transition_get_state(next_transition); - action_to_perform := _lexer_transition_get_action(next_transition); - - global_state := _lexer_global_state(); - - _store_word(next_state, global_state); - _lexer_execute_action(action_to_perform, kind); - - return next_state -end; - -proc _lexer_advance_token(kind: Word); -begin - if _lexer_execute_transition(kind) <> _lexer_state_end() then - _lexer_advance_token(kind) - end -end; - -(** - * Reads the next token and writes its type into the address in the kind parameter. - *) -proc _lexer_read_token(kind: Word); -begin - _lexer_reset(); - _lexer_advance_token(kind) -end; - -(** - * Advances the token stream past the last read token. - *) -proc _lexer_skip_token(); -var - old_end: Word; -begin - old_end := _lexer_global_get_end(); - _lexer_global_set_start(old_end) -end; - -(* - * Entry point. - *) -proc _start(); -var - last_read: Word; - offset: Word; -begin - _lexer_initialize(); - _symbol_table_build(); - - (* Read the source from the standard input. *) - offset := @source_code; - - .start_read; - (* Second argument is buffer size. Modifying update the source_code definition. *) - last_read := _read_file(offset, 819200); - if last_read > 0 then - offset := offset + last_read; - goto .start_read - end; - _compile(); - - _exit(0) -end; diff --git a/boot/stage14/cl.elna b/boot/stage14/cl.elna new file mode 100644 index 0000000..5566518 --- /dev/null +++ b/boot/stage14/cl.elna @@ -0,0 +1,3053 @@ +(* + * This Source Code Form is subject to the terms of the Mozilla Public License, + * v. 2.0. If a copy of the MPL was not distributed with this file, You can + * obtain one at https://mozilla.org/MPL/2.0/. + *) + +(* Stage 14 compiler. *) + +(* - Binary minus. *) +(* - Space independent parsing. *) +(* - Label names in goto statements aren't required to begin with a dot. *) +(* - Dereferencing pointers pointing to word long data. *) +(* - Enumeration type. *) + +const + symbol_builtin_name_int := "Int"; + symbol_builtin_name_word := "Word"; + symbol_builtin_name_pointer := "Pointer"; + symbol_builtin_name_char := "Char"; + symbol_builtin_name_array := "Array"; + + (* Every type info starts with a word describing what type it is. + + PRIMITIVE_TYPE = 1 + ENUMERATION_TYPE = 2 + + Primitive types have only type size. *) + symbol_builtin_type_int := S(1, 4); + symbol_builtin_type_word := S(1, 4); + symbol_builtin_type_pointer := S(1, 4); + symbol_builtin_type_char := S(1, 1); + symbol_builtin_type_array := S(1, 4); + + (* Info objects start with a word describing its type. + + TYPE_INFO = 1 + PARAMETER_INFO = 2 + TEMPORARY_INFO = 3 + + Type info has the type it belongs to. *) + symbol_type_info_int := S(1, @symbol_builtin_type_int); + symbol_type_info_word := S(1, @symbol_builtin_type_word); + symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); + symbol_type_info_char := S(1, @symbol_builtin_type_char); + symbol_type_info_array := S(1, @symbol_builtin_type_array); + +var + source_code: Array; + compiler_strings: Array; + symbol_table_global: Array; + symbol_table_local: Array; + classification: Array; + + (* To reserve memory just add the value of needed bytes to the memory_free_pointer_variable. *) + memory: Array; + + compiler_strings_position: Pointer := @compiler_strings; + compiler_strings_length: Word := 0; + label_counter: Word := 0; + + (* Points to a segment of free memory. *) + memory_free_pointer: Word := @memory; + +(** + * Calculates and returns the string token length between quotes, including the + * escaping slash characters. + * + * Parameters: + * string - String token pointer. + * + * Returns the length in a0. + *) +proc _string_length(string: Word); +var + counter: Word; +begin + (* Reset the counter. *) + counter := 0; + + .string_length_loop; + string := string + 1; + + if _load_byte(string) <> '"' then + counter := counter + 1; + goto .string_length_loop + end; + + return counter +end; + +(** + * Adds a string to the global, read-only string storage. + * + * Parameters: + * string - String token. + * + * Returns the offset from the beginning of the storage to the new string in a0. + *) +proc _add_string(string: Word); +var + contents: Word; + result: Word; + current_byte: Word; +begin + contents := string + 1; + result := compiler_strings_length; + + .add_string_loop; + if _load_byte(contents) <> '"' then + current_byte := _load_byte(contents); + _store_byte(current_byte, compiler_strings_position); + compiler_strings_position := compiler_strings_position + 1; + contents := contents + 1; + + if current_byte <> '\\' then + compiler_strings_length := compiler_strings_length + 1 + end; + goto .add_string_loop + end; + + return result +end; + +(** + * Reads standard input into a buffer. + * + * Parameters: + * buffer - Buffer pointer. + * size - Buffer size. + * + * Returns the amount of bytes written in a0. + *) +proc _read_file(buffer: Word, size: Word); + return _syscall(0, buffer, size, 0, 0, 0, 63) +end; + +(** + * Writes to the standard output. + * + * Parameters: + * buffer - Buffer. + * size - Buffer length. + *) +proc _write_s(buffer: Word, size: Word); +begin + _syscall(1, buffer, size, 0, 0, 0, 64) +end; + +(** + * Writes a number to a string buffer. + * + * Parameters: + * number - Whole number. + * output_buffer - Buffer pointer. + * + * Sets a0 to the length of the written number. + *) +proc _print_i(number: Word, output_buffer: Word); +var + local_buffer: Word; + is_negative: Word; + current_character: Word; + result: Word; +begin + local_buffer := @result + 11; + + if number >= 0 then + is_negative := 0 + else + number = -number; + is_negative := 1 + end; + + .print_i_digit10; + current_character := number % 10; + _store_byte(current_character + '0', local_buffer); + + number := number / 10; + local_buffer := local_buffer + -1; + + if number <> 0 then + goto .print_i_digit10 + end; + if is_negative = 1 then + _store_byte('-', local_buffer); + local_buffer := local_buffer + -1 + end; + result := @result + 11; + result := result + -local_buffer; + _memcpy(output_buffer, local_buffer + 1, result); + + return result +end; + +(** + * Writes a number to the standard output. + * + * Parameters: + * number - Whole number. + *) +proc _write_i(number: Word); +var + local_buffer: Word; + length: Word; +begin + length := _print_i(number, @local_buffer); + _write_s(@local_buffer, length) +end; + +(** + * Writes a character from a0 into the standard output. + * + * Parameters: + * character - Character to write. + *) +proc _write_c(character: Word); +begin + _write_s(@character, 1) +end; + +(** + * Write null terminated string. + * + * Parameters: + * string - String. + *) +proc _write_z(string: Word); +var + next_byte: Word; +begin + (* Check for 0 character. *) + next_byte := _load_byte(string); + + if next_byte <> 0 then + (* Print a character. *) + _write_c(next_byte); + + (* Advance the input string by one byte. *) + _write_z(string + 1) + end +end; + +(** + * Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. + *) +proc _is_upper(character: Word); +var + lhs: Word; + rhs: Word; +begin + lhs := character >= 'A'; + rhs := character <= 'Z'; + + return lhs & rhs + +end; + +(** + * Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. + *) +proc _is_lower(character: Word); +var + lhs: Word; + rhs: Word; +begin + lhs := character >= 'a'; + rhs := character <= 'z'; + + return lhs & rhs +end; + +(** + * Detects if the passed character is a 7-bit alpha character or an underscore. + * + * Paramters: + * character - Tested character. + * + * Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. + *) +proc _is_alpha(character: Word); +var + is_upper_result: Word; + is_lower_result: Word; + is_alpha_result: Word; + is_underscore: Word; +begin + is_upper_result := _is_upper(character); + is_lower_result := _is_lower(character); + is_underscore := character = '_'; + + is_alpha_result := is_lower_result or is_upper_result; + return is_alpha_result or is_underscore +end; + +(** + * Detects whether the passed character is a digit (a value between 0 and 9). + * + * Parameters: + * character - Exemined value. + * + * Sets a0 to 1 if it is a digit, to 0 otherwise. + *) +proc _is_digit(character: Word); +var + lhs: Word; + rhs: Word; +begin + lhs := character >= '0'; + rhs := character <= '9'; + + return lhs & rhs +end; + +proc _is_alnum(character: Word); +var + lhs: Word; + rhs: Word; +begin + lhs := _is_alpha(character); + rhs := _is_digit(character); + + return lhs or rhs +end; + +(** + * Parameters: + * lhs - First pointer. + * rhs - Second pointer. + * count - The length to compare. + * + * Returns 0 if memory regions are equal. + *) +proc _memcmp(lhs: Word, rhs: Word, count: Word); +var + lhs_byte: Word; + rhs_byte: Word; + result: Word; +begin + result := 0; + + .memcmp_loop; + if count <> 0 then + lhs_byte := _load_byte(lhs); + rhs_byte := _load_byte(rhs); + result := lhs_byte + -rhs_byte; + + lhs := lhs + 1; + rhs := rhs + 1; + count := count + -1; + + if result = 0 then + goto .memcmp_loop + end + end; + + return result +end; + +(** + * Copies memory. + * + * Parameters: + * destination - Destination. + * source - Source. + * count - Size. + * + * Returns the destination. + *) +proc _memcpy(destination: Word, source: Word, count: Word); +var + current_byte: Word; +begin + .memcpy_loop; + if count <> 0 then + current_byte := _load_byte(source); + _store_byte(current_byte, destination); + + destination := destination + 1; + source := source + 1; + count := count + -1; + goto .memcpy_loop + end; + + return destination +end; + +proc _compile_integer_literal(); +var + integer_token: Word; + integer_length: Word; + token_kind: Word; +begin + _write_z("\tli t0, \0"); + + integer_token := _lexer_global_get_start(); + integer_length := _lexer_global_get_end() + -integer_token; + + _write_s(integer_token, integer_length); + _lexer_skip_token(); + + _write_c('\n') +end; + +proc _compile_character_literal(); +var + character: Word; + token_kind: Word; + character_length: Word; +begin + character := _lexer_global_get_start(); + character_length := _lexer_global_get_end() + -character; + + _write_z("\tli t0, \0"); + _write_s(character, character_length); + _write_c('\n'); + _lexer_skip_token() +end; + +proc _compile_variable_expression(); +var + name: Word; + lookup_result: Word; + name_token: Word; +begin + name := _lexer_global_get_start(); + name_token := _lexer_global_get_end() + -name; + lookup_result := _symbol_table_lookup(@symbol_table_global, name, name_token); + + if lookup_result <> 0 then + _compile_enumeration_value(lookup_result) + else + _compile_designator(); + _write_z("\tlw t0, (t0)\n\0") + end +end; + +(** + * Compiled take address expression, starting with an "@" sign. + *) +proc _compile_address_expression(); +begin + _lexer_skip_token(); + _compile_designator() +end; + +(** + * Compile unary negation, "-" sign. + *) +proc _compile_negate_expression(); +begin + _lexer_skip_token(); + _compile_term(); + _write_z("\tneg t0, t0\n\0") +end; + +(* Compile unary negation, "~" sign. *) +proc _compile_not_expression(); +var + token_kind: Word; +begin + _lexer_read_token(@token_kind); + _lexer_skip_token(); + _compile_term(); + _write_z("\tnot t0, t0\n\0") +end; + +proc _compile_string_literal(); +var + token_kind: Word; + token_start: Word; + length: Word; + offset: Word; +begin + _lexer_read_token(@token_kind); + token_start := _lexer_global_get_start(); + length := _string_length(token_start); + offset := _add_string(token_start); + + _lexer_skip_token(); + _write_z("\tla t0, strings\n\0"); + + _write_z("\tli t1, \0"); + _write_i(offset); + _write_c('\n'); + + _write_z("\tadd t0, t0, t1\n\0") +end; + +proc _compile_term(); +var + current_character: Word; + token_kind: Word; +begin + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_character() then + _compile_character_literal() + elsif token_kind = _lexer_token_kind_string() then + _compile_string_literal() + elsif token_kind = _lexer_token_kind_integer() then + _compile_integer_literal() + elsif token_kind = _lexer_token_kind_at() then + _compile_address_expression() + elsif token_kind = _lexer_token_kind_minus() then + _compile_negate_expression() + elsif token_kind = _lexer_token_kind_not() then + _compile_not_expression() + elsif token_kind = _lexer_token_kind_identifier() then + current_character := _lexer_global_get_start(); + current_character := _load_byte(current_character); + + (* This is a call if the statement starts with an underscore. *) + if current_character = '_' then + _compile_call(); + _write_z("\tmv t0, a0\n\0") + else + _compile_variable_expression() + end + end +end; + +proc _compile_binary_rhs(); +begin + (* Save the value of the left expression on the stack. *) + _write_z("\tsw t0, 64(sp)\n\0"); + _compile_term(); + + (* Load the left expression from the stack; *) + _write_z("\tlw t1, 64(sp)\n\0") +end; + +proc _compile_expression(); +var + token_kind: Word; +begin + _compile_term(); + + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_plus() then + _lexer_skip_token(); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tadd t0, t0, t1\n\0") + elsif token_kind = _lexer_token_kind_minus() then + _lexer_skip_token(); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tsub t0, t1, t0\n\0"); + elsif token_kind = _lexer_token_kind_multiplication() then + _lexer_skip_token(); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tmul t0, t0, t1\n\0") + elsif token_kind = _lexer_token_kind_and() then + _lexer_skip_token(); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tand t0, t0, t1\n\0") + elsif token_kind = _lexer_token_kind_or() then + _lexer_skip_token(); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tor t0, t0, t1\n\0") + elsif token_kind = _lexer_token_kind_xor() then + _lexer_skip_token(); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\txor t0, t0, t1\n\0") + elsif token_kind = _lexer_token_kind_equals() then + _lexer_skip_token(); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\txor t0, t0, t1\n\tseqz t0, t0\n\0") + elsif token_kind = _lexer_token_kind_remainder() then + _lexer_skip_token(); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\trem t0, t1, t0\n\0") + elsif token_kind = _lexer_token_kind_division() then + _lexer_skip_token(); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tdiv t0, t1, t0\n\0") + elsif token_kind = _lexer_token_kind_less_than() then + _lexer_skip_token(); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t1, t0\n\0") + elsif token_kind = _lexer_token_kind_greater_than() then + _lexer_skip_token(); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t0, t1\n\0") + elsif token_kind = _lexer_token_kind_less_equal() then + _lexer_skip_token(); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t0, t1\n\txori t0, t0, 1\n\0") + elsif token_kind = _lexer_token_kind_not_equal() then + _lexer_skip_token(); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\txor t0, t0, t1\n\tsnez t0, t0\n\0") + elsif token_kind = _lexer_token_kind_greater_equal() then + _lexer_skip_token(); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t1, t0\n\txori t0, t0, 1\n\0") + end; + + .compile_expression_end; +end; + +proc _compile_call(); +var + name_length: Word; + name: Word; + argument_count: Word; + stack_offset: Word; + token_kind: Word; +begin + _lexer_read_token(@token_kind); + name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -name; + argument_count := 0; + + (* Skip the identifier and left paren. *) + _lexer_skip_token(); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_right_paren() then + goto .compile_call_finalize + end; + .compile_call_loop; + _compile_expression(); + + (* Save the argument on the stack. *) + _write_z("\tsw t0, \0"); + + (* Calculate the stack offset: 116 - (4 * argument_counter) *) + stack_offset := argument_count * 4; + _write_i(116 + -stack_offset); + + _write_z("(sp)\n\0"); + + (* Add one to the argument counter. *) + argument_count := argument_count + 1; + + _lexer_read_token(@token_kind); + + if token_kind <> _lexer_token_kind_comma() then + goto .compile_call_finalize + end; + _lexer_skip_token(); + goto .compile_call_loop; + + .compile_call_finalize; + (* Load the argument from the stack. *) + if argument_count <> 0 then + (* Decrement the argument counter. *) + argument_count := argument_count + -1; + + _write_z("\tlw a\0"); + _write_i(argument_count); + + _write_z(", \0"); + + (* Calculate the stack offset: 116 - (4 * argument_counter) *) + stack_offset := argument_count * 4; + _write_i(116 + -stack_offset); + + _write_z("(sp)\n\0"); + + goto .compile_call_finalize + end; + + .compile_call_end; + _write_z("\tcall \0"); + _write_s(name, name_length); + _write_c('\n'); + + (* Skip the right paren. *) + _lexer_read_token(@token_kind); + _lexer_skip_token() +end; + +proc _compile_goto(); +var + next_token: Word; + next_length: Word; + token_kind: Word; +begin + _lexer_skip_token(); + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_dot() then + _lexer_skip_token(); + _lexer_read_token(@token_kind) + end; + next_token := _lexer_global_get_start(); + next_length := _lexer_global_get_end() + -next_token; + + _write_z("\tj .\0"); + + _write_s(next_token, next_length); + _lexer_skip_token() +end; + +proc _compile_local_designator(symbol: Word); +var + variable_offset: Word; +begin + _write_z("\taddi t0, sp, \0"); + variable_offset := _parameter_info_get_offset(symbol); + _write_i(variable_offset); + _write_c('\n'); + _lexer_skip_token() +end; + +proc _compile_global_designator(); +var + name: Word; + token_kind: Word; + token_length: Word; +begin + _write_z("\tla t0, \0"); + + _lexer_read_token(@token_kind); + name := _lexer_global_get_start(); + token_length := _lexer_global_get_end() + -name; + _write_s(name, token_length); + _lexer_skip_token(); + + _write_c('\n') +end; + +proc _compile_enumeration_value(symbol: Word); +var + enumeration_type: Word; + members: Word; + members_length: Word; + token_type: Word; + value_name: Word; + name_length: Word; + member_name: Word; + member_length: Word; + counter: Word; +begin + enumeration_type := _type_info_get_type(symbol); + members := _enumeration_type_get_members(enumeration_type); + members_length := _enumeration_type_get_length(enumeration_type); + + (* Skip enumeration type name and dot. Read the enumeration value. *) + _lexer_skip_token(); + _lexer_read_token(@token_type); + _lexer_skip_token(); + _lexer_read_token(@token_type); + + value_name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -value_name; + _lexer_skip_token(); + counter := 1; + + .compile_enumeration_value_members; + if members_length > 0 then + member_name := _load_word(members); + member_length := _load_word(members + 4); + + if _lexer_compare_keyword(value_name, name_length, member_name, member_length) = 0 then + members_length := members_length + -1; + members := members + 8; + counter := counter + 1; + goto .compile_enumeration_value_members + end; + _write_z("\tli t0, \0"); + _write_i(counter); + _write_c('\n') + end +end; + +proc _compile_designator(); +var + name_token: Word; + lookup_result: Word; + token_kind: Word; + name: Word; +begin + _lexer_read_token(@token_kind); + name := _lexer_global_get_start(); + name_token := _lexer_global_get_end() + -name; + lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token); + + if lookup_result <> 0 then + _compile_local_designator(lookup_result) + else + _compile_global_designator() + end; + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_hat() then + _lexer_skip_token(); + _write_z("\tlw t0, (t0)\n\0") + end +end; + +proc _compile_assignment(); +var + token_kind: Word; +begin + _compile_designator(); + + (* Save the assignee address on the stack. *) + _write_z("\tsw t0, 60(sp)\n\0"); + + (* Skip the assignment sign (:=) with surrounding whitespaces. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + (* Compile the assignment. *) + _compile_expression(); + + _write_z("\tlw t1, 60(sp)\n\tsw t0, (t1)\n\0") +end; + +proc _compile_return_statement(); +var + token_kind: Word; +begin + (* Skip "return" keyword and whitespace after it. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + _compile_expression(); + _write_z("\tmv a0, t0\n\0") +end; + +(** + * Writes a label, .Ln, where n is a unique number. + * + * Parameters: + * counter - Label counter. + *) +proc _write_label(counter: Word); +begin + _write_z(".L\0"); + _write_i(counter) +end; + +proc _compile_condition(after_end_label: Word); +var + condition_label: Word; + token_kind: Word; +begin + (* Compile condition. *) + _compile_expression(); + (* Skip " then" with newline. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + (* condition_label is the label in front of the next elsif condition or end. *) + condition_label := label_counter; + label_counter := label_counter + 1; + + _write_z("\tbeqz t0, \0"); + _write_label(condition_label); + _write_c('\n'); + + _compile_statement_list(); + + _write_z("\tj \0"); + _write_label(after_end_label); + _write_c('\n'); + + _write_label(condition_label); + _write_z(":\n\0") +end; + +proc _compile_if(); +var + after_end_label: Word; + condition_label: Word; + token_kind: Word; +begin + (* Skip "if ". *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + after_end_label := label_counter; + label_counter := label_counter + 1; + + _compile_condition(after_end_label); + .compile_if_loop; + + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_else() then + _lexer_skip_token(); + _compile_statement_list() + elsif token_kind = _lexer_token_kind_elsif() then + _lexer_skip_token(); + _compile_condition(after_end_label); + + goto .compile_if_loop + end; + _lexer_skip_token(); + + _write_label(after_end_label); + _write_z(":\n\0") +end; + +proc _compile_label_declaration(); +var + label_token: Word; + token_kind: Word; + name: Word; +begin + (* Skip the dot. *) + _lexer_skip_token(); + _lexer_read_token(@token_kind); + name := _lexer_global_get_start(); + label_token := _lexer_global_get_end() + -name; + _write_c('.'); + _write_s(name, label_token); + _write_z(":\n\0"); + _lexer_skip_token() +end; + +proc _compile_statement(); +var + current_byte: Word; + token_kind: Word; +begin + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_goto() then + _compile_goto() + elsif token_kind = _lexer_token_kind_if() then + _compile_if() + elsif token_kind = _lexer_token_kind_return() then + _compile_return_statement() + elsif token_kind = _lexer_token_kind_dot() then + _compile_label_declaration() + elsif token_kind = _lexer_token_kind_identifier() then + current_byte := _lexer_global_get_start(); + current_byte := _load_byte(current_byte); + + (* This is a call if the statement starts with an underscore. *) + if current_byte = '_' then + _compile_call() + else + _compile_assignment() + end + end; + _write_c('\n') +end; + +proc _compile_statement_list(); +var + token_kind: Word; +begin + _skip_empty_lines(); + _compile_statement(); + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_semicolon() then + _lexer_skip_token(); + _compile_statement_list() + end; + _skip_empty_lines() +end; + +(** + * Writes a regster name to the standard output. + * + * Parameters: + * register_character - Register character. + * register_number - Register number. + *) +proc _write_register(register_character: Word, register_number: Word); +begin + _write_c(register_character); + _write_c(register_number + '0') +end; + +proc _type_get_kind(this: Word); + return _load_word(this) +end; + +proc _type_set_kind(this: Word, value: Word); +begin + _store_word(value, this) +end; + +proc _type_get_size(this: Word); + return _load_word(this + 4) +end; + +proc _type_set_size(this: Word, value: Word); +begin + _store_word(value, this + 4) +end; + +proc _enumeration_type_get_members(this: Word); + return _load_word(this + 8) +end; + +proc _enumeration_type_set_members(this: Word, value: Word); +begin + _store_word(value, this + 8) +end; + +proc _enumeration_type_get_length(this: Word); + return _load_word(this + 12) +end; + +proc _enumeration_type_set_length(this: Word, value: Word); +begin + _store_word(value, this + 12) +end; + +(** + * Reads and creates enumeration type representation. + * + * record + * type_kind: Word; + * size: Word; + * members: StringArray; + * length: Word + * end; + * + * Returns enumeration type description. + *) +proc _read_type_enumeration(); +var + token_kind: Word; + enumeration_name: Word; + name_length: Word; + memory_start: Word; + member_count: Word; + result: Word; +begin + _lexer_skip_token(); + memory_start := memory_free_pointer; + member_count := 0; + + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_right_paren() then + goto .read_type_enumeration_end + end; + .read_type_enumeration_loop; + member_count := member_count + 1; + + enumeration_name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -enumeration_name; + + _store_word(enumeration_name, memory_free_pointer); + memory_free_pointer := memory_free_pointer + 4; + + _store_word(name_length, memory_free_pointer); + memory_free_pointer := memory_free_pointer + 4; + + (* Skip the identifier. *) + _lexer_skip_token(); + + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_comma() then + _lexer_skip_token(); + _lexer_read_token(@token_kind); + goto .read_type_enumeration_loop + end; + + .read_type_enumeration_end; + _lexer_skip_token(); + + (* The resulting structure is 16 bytes long. *) + result := memory_free_pointer; + memory_free_pointer := memory_free_pointer + 16; + + (* ENUMERATION_TYPE is 2. *) + _type_set_kind(result, 2); + _type_set_size(result, 4); + _enumeration_type_set_members(result, memory_start); + _enumeration_type_set_length(result, member_count); + + return _type_info_create(result) +end; + +proc _read_type_expression(); +var + token_kind: Word; + type_name: Word; + name_length: Word; + result: Word; +begin + result := 0; + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_identifier() then + (* Named type. *) + type_name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -type_name; + result := _symbol_table_lookup(@symbol_table_global, type_name, name_length); + result := _type_info_get_type(result); + + _lexer_skip_token() + elsif token_kind = _lexer_token_kind_left_paren() then + result := _read_type_enumeration() + end; + + return result +end; + +proc _type_info_get_type(this: Word); + return _load_word(this + 4) +end; + +(** + * Parameters: + * parameter_index - Parameter index. + *) +proc _parameter_info_create(parameter_index: Word); +var + offset: Word; + current_word: Word; + result: Word; +begin + result := memory_free_pointer; + current_word := result; + (* 2 is INFO_PARAMETER *) + _store_word(2, current_word); + + current_word := current_word + 4; + + (* Calculate the stack offset: 88 - (4 * parameter_counter) *) + offset := parameter_index * 4; + _store_word(88 + -offset, current_word); + + memory_free_pointer := current_word + 4; + + return result +end; + +proc _parameter_info_get_offset(this: Word); +begin + this := this + 4; + return _load_word(this) +end; + +proc _type_info_create(type_representation: Word); +var + result: Word; + current_word: Word; +begin + result := memory_free_pointer; + current_word := result; + (* 1 is INFO_TYPE *) + _store_word(1, current_word); + + current_word := current_word + 4; + _store_word(type_representation, current_word); + + memory_free_pointer := current_word + 4; + + return result +end; + +(** + * Parameters: + * temporary_index - Parameter index. + *) +proc _temporary_info_create(temporary_index: Word); +var + offset: Word; + current_word: Word; + result: Word; +begin + result := memory_free_pointer; + current_word := result; + (* 3 is INFO_TEMPORARY *) + _store_word(3, current_word); + + current_word := current_word + 4; + + (* Calculate the stack offset: 4 * variable_counter. *) + _store_word(temporary_index * 4, current_word); + + memory_free_pointer := current_word + 4; + + return result +end; + +proc _temporary_info_get_offset(this: Word); +begin + this := this + 4; + return _load_word(this) +end; + +(** + * Parameters: + * parameter_index - Parameter index. + *) +proc _read_procedure_parameter(parameter_index: Word); +var + name_length: Word; + info: Word; + name_position: Word; + token_kind: Word; +begin + (* Read the parameter name. *) + _lexer_read_token(@token_kind); + name_position := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -name_position; + _lexer_skip_token(); + + (* Skip colon and space in front of the type expression. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + _read_type_expression(); + + _write_z("\tsw a\0"); + _write_i(parameter_index); + _write_z(", \0"); + + info := _parameter_info_create(parameter_index); + _symbol_table_enter(@symbol_table_local, name_position, name_length, info); + + info := _parameter_info_get_offset(info); + _write_i(info); + + _write_z("(sp)\n\0") +end; + +proc _read_procedure_parameters(); +var + parameter_counter: Word; + token_kind: Word; +begin + (* Skip open paren. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + parameter_counter := 0; + + .compile_procedure_prologue_skip; + _lexer_read_token(@token_kind); + + if token_kind <> _lexer_token_kind_right_paren() then + _read_procedure_parameter(parameter_counter); + parameter_counter := parameter_counter + 1; + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_comma() then + _lexer_skip_token(); + goto .compile_procedure_prologue_skip + end + end; + (* Skip close paren. *) + _lexer_skip_token() +end; + +(** + * Parameters: + * variable_index - Variable index. + *) +proc _read_procedure_temporary(variable_index: Word); +var + name_length: Word; + info: Word; + name_position: Word; + token_kind: Word; +begin + _lexer_read_token(@token_kind); + name_position := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -name_position; + _lexer_skip_token(); + + (* Read and skip variable name, colon and the space *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + _read_type_expression(); + + info := _temporary_info_create(variable_index); + _symbol_table_enter(@symbol_table_local, name_position, name_length, info); + + (* Skip semicolon and newline after the variable declaration *) + _lexer_read_token(@token_kind); + _lexer_skip_token() +end; + +proc _read_procedure_temporaries(); +var + temporary_counter: Word; + token_kind: Word; +begin + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_var() then + _lexer_skip_token(); + temporary_counter := 0; + + .read_local_variables_loop; + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_identifier() then + _read_procedure_temporary(temporary_counter); + + temporary_counter := temporary_counter + 1; + goto .read_local_variables_loop + end + end +end; + +proc _compile_procedure(); +var + name_pointer: Word; + name_length: Word; + token_kind: Word; +begin + (* Skip "proc ". *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + (* Clear local symbol table. *) + _store_word(0, @symbol_table_local); + + _lexer_read_token(@token_kind); + name_pointer := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -name_pointer; + + (* Write .type _procedure_name, @function. *) + _write_z(".type \0"); + + _write_s(name_pointer, name_length); + _write_z(", @function\n\0"); + + (* Write procedure label, _procedure_name: *) + _write_s(name_pointer, name_length); + _write_z(":\n\0"); + + (* Skip procedure name. *) + _lexer_skip_token(); + _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); + _read_procedure_parameters(); + + (* Skip semicolon and newline. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + _read_procedure_temporaries(); + + (* Skip semicolon, "begin" and newline. *) + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_begin() then + _lexer_skip_token(); + _compile_statement_list() + elsif token_kind = _lexer_token_kind_return() then + _compile_return_statement() + end; + + (* Write the epilogue. *) + _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); + + (* Skip the "end" keyword, semicolon and newline. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + _lexer_read_token(@token_kind); + _lexer_skip_token() +end; + +(** + * Skips comments. + *) +proc _skip_empty_lines(); +var + token_kind: Word; +begin + .skip_empty_lines_rerun; + + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_comment() then + _lexer_skip_token(); + goto .skip_empty_lines_rerun + end +end; + +(** + * Compile global variable initializer. + *) +proc _compile_global_initializer(); +var + current_byte: Word; + length: Word; + token_kind: Word; + token_start: Word; +begin + _lexer_read_token(@token_kind); + token_start := _lexer_global_get_start(); + current_byte := _load_byte(token_start); + + if token_kind = _lexer_token_kind_string() then + _write_z("\n\t.word strings + \0"); + length := _string_length(token_start); + + _add_string(token_start); + _write_i(); + + (* Skip the quoted string. *) + _lexer_skip_token(); + + goto .compile_global_initializer_end + elsif current_byte = 'S' then + (* Skip "S(". *) + _lexer_skip_token(); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_right_paren() then + goto .compile_global_initializer_closing + end; + goto .compile_global_initializer_loop + elsif token_kind = _lexer_token_kind_at() then + (* Skip @. *) + _lexer_skip_token(); + _write_z("\n\t.word \0"); + _lexer_read_token(@token_kind); + token_start := _lexer_global_get_start(); + _write_s(token_start, _lexer_global_get_end() + -token_start); + _lexer_skip_token(); + + goto .compile_global_initializer_end + elsif token_kind = _lexer_token_kind_integer() then + _write_z("\n\t.word \0"); + _write_s(token_start, _lexer_global_get_end() + -token_start); + _lexer_skip_token(); + + goto .compile_global_initializer_end + end; + + .compile_global_initializer_loop; + _compile_global_initializer(); + + _lexer_read_token(@token_kind); + if token_kind <> _lexer_token_kind_right_paren() then + (* Skip comma and whitespace after it. *) + _lexer_skip_token(); + + goto .compile_global_initializer_loop + end; + + .compile_global_initializer_closing; + (* Skip ")" *) + _lexer_skip_token(); + + .compile_global_initializer_end +end; + +proc _compile_constant_declaration(); +var + name: Word; + name_length: Word; + token_kind: Word; +begin + name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -name; + + _write_z(".type \0"); + _write_s(name, name_length); + _write_z(", @object\n\0"); + + _write_s(name, name_length); + _write_c(':'); + + (* Skip the constant name with assignment sign and surrounding whitespaces. *) + _lexer_skip_token(); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + _compile_global_initializer(); + + (* Skip semicolon and newline. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + _write_c('\n') +end; + +proc _compile_type_declaration(); +var + token_kind: Word; + type_name: Word; + name_length: Word; + type_info: Word; +begin + type_name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -type_name; + + _lexer_skip_token(); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + type_info := _read_type_expression(); + + _symbol_table_enter(@symbol_table_global, type_name, name_length, type_info); + + _lexer_read_token(@token_kind); + _lexer_skip_token() +end; + +proc _compile_type_part(); +var + token_kind: Word; +begin + _skip_empty_lines(); + _lexer_read_token(@token_kind); + + if token_kind <> _lexer_token_kind_type() then + goto .compile_type_part_end + end; + _lexer_skip_token(); + + .compile_type_part_loop; + _skip_empty_lines(); + + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_identifier() then + _compile_type_declaration(); + goto .compile_type_part_loop + end; + + .compile_type_part_end +end; + +proc _compile_const_part(); +var + token_kind: Word; +begin + _skip_empty_lines(); + _lexer_read_token(@token_kind); + + if token_kind <> _lexer_token_kind_const() then + goto .compile_const_part_end + end; + (* Skip "const" with the newline after it. *) + _lexer_skip_token(); + _write_z(".section .rodata # Compiled from const section.\n\n\0"); + + .compile_const_part_loop; + _skip_empty_lines(); + + (* If the character at the line beginning is not indentation, + it is probably the next code section. *) + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_identifier() then + _compile_constant_declaration(); + goto .compile_const_part_loop + end; + + .compile_const_part_end +end; + +proc _compile_variable_declaration(); +var + name: Word; + name_length: Word; + token_kind: Word; +begin + _lexer_read_token(@token_kind); + name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -name; + + _write_z(".type \0"); + _write_s(name, name_length); + _write_z(", @object\n\0"); + + _write_s(name, name_length); + _write_c(':'); + + (* Skip the variable name and colon with space before the type. *) + _lexer_skip_token(); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + _lexer_read_token(@token_kind); + name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -name; + _lexer_skip_token(); + _lexer_read_token(@token_kind); + + if _lexer_compare_keyword("Array", 5, name, name_length) = 1 then + (* Else we assume this is a zeroed 819200 bytes big array. *) + _write_z(" .zero 819200\0") + elsif token_kind = _lexer_token_kind_assignment() then + (* Skip the assignment sign with surrounding whitespaces. *) + _lexer_skip_token(); + _compile_global_initializer() + else + _write_z(" .word 0\n\0") + end; + + (* Skip semicolon and newline. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + _write_c('\n') +end; + +proc _compile_var_part(); +var + token_kind: Word; +begin + _lexer_read_token(@token_kind); + + if token_kind <> _lexer_token_kind_var() then + goto .compile_var_part_end + end; + (* Skip "var" and newline. *) + _lexer_skip_token(); + _write_z(".section .data\n\0"); + + .compile_var_part_loop; + _skip_empty_lines(); + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_identifier() then + _compile_variable_declaration(); + goto .compile_var_part_loop + end; + + .compile_var_part_end +end; + +(** + * Process the source code and print the generated code. + *) +proc _compile_module(); +var + token_kind: Word; +begin + _compile_type_part(); + _compile_const_part(); + _skip_empty_lines(); + _compile_var_part(); + + _write_z(".section .text\n\n\0"); + _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); + _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); + _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); + _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); + _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); + + .compile_module_loop; + _skip_empty_lines(); + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_proc() then + _compile_procedure(); + goto .compile_module_loop + end +end; + +proc _compile(); +var + compiler_strings_copy: Word; + compiler_strings_end: Word; + current_byte: Word; +begin + _write_z(".globl _start\n\n\0"); + _compile_module(); + + _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); + _write_c('"'); + + compiler_strings_copy := @compiler_strings; + compiler_strings_end := compiler_strings_position; + + .compile_loop; + if compiler_strings_copy < compiler_strings_end then + current_byte := _load_byte(compiler_strings_copy); + compiler_strings_copy := compiler_strings_copy + 1; + _write_c(current_byte); + + goto .compile_loop + end; + _write_c('"'); + _write_c('\n') +end; + +(** + * Terminates the program. a0 contains the return code. + * + * Parameters: + * a0 - Status code. + *) +proc _exit(); +begin + _syscall(0, 0, 0, 0, 0, 0, 93) +end; + +(** + * Looks for a symbol in the given symbol table. + * + * Parameters: + * symbol_table - Symbol table. + * symbol_name - Symbol name pointer. + * name_length - Symbol name length. + * + * Returns the symbol pointer or 0 in a0. + *) +proc _symbol_table_lookup(symbol_table: Word, symbol_name: Word, name_length: Word); +var + result: Word; + symbol_table_length: Word; + current_name: Word; + current_length: Word; +begin + result := 0; + + (* The first word in the symbol table is its length, get it. *) + symbol_table_length := _load_word(symbol_table); + + (* Go to the first symbol position. *) + symbol_table := symbol_table + 4; + + .symbol_table_lookup_loop; + if symbol_table_length = 0 then + goto .symbol_table_lookup_end + end; + + (* Symbol name pointer and length. *) + current_name := _load_word(symbol_table); + current_length := _load_word(symbol_table + 4); + + (* If lengths don't match, exit and return nil. *) + if name_length <> current_length then + goto .symbol_table_lookup_repeat + end; + (* If names don't match, exit and return nil. *) + if _memcmp(symbol_name, current_name, name_length) <> 0 then + goto .symbol_table_lookup_repeat + end; + (* Otherwise, the symbol is found. *) + result := _load_word(symbol_table + 8); + goto .symbol_table_lookup_end; + + .symbol_table_lookup_repeat; + symbol_table := symbol_table + 12; + symbol_table_length := symbol_table_length + -1; + goto .symbol_table_lookup_loop; + + .symbol_table_lookup_end; + return result +end; + +(** + * Inserts a symbol into the table. + * + * Parameters: + * symbol_table - Symbol table. + * symbol_name - Symbol name pointer. + * name_length - Symbol name length. + * symbol - Symbol pointer. + *) +proc _symbol_table_enter(symbol_table: Word, symbol_name: Word, name_length: Word, symbol: Word); +var + table_length: Word; + symbol_pointer: Word; +begin + (* The first word in the symbol table is its length, get it. *) + table_length := _load_word(symbol_table); + + (* Calculate the offset for the new symbol. *) + symbol_pointer := table_length * 12; + symbol_pointer := symbol_pointer + 4; + symbol_pointer := symbol_table + symbol_pointer; + + _store_word(symbol_name, symbol_pointer); + symbol_pointer := symbol_pointer + 4; + _store_word(name_length, symbol_pointer); + symbol_pointer := symbol_pointer + 4; + _store_word(symbol, symbol_pointer); + + (* Increment the symbol table length. *) + table_length := table_length + 1; + _store_word(table_length, symbol_table) +end; + +proc _symbol_table_build(); +begin + (* Set the table length to 0. *) + _store_word(0, @symbol_table_global); + + (* Enter built-in symbols. *) + _symbol_table_enter(@symbol_table_global, symbol_builtin_name_int, 3, @symbol_type_info_int); + _symbol_table_enter(@symbol_table_global, symbol_builtin_name_word, 4, @symbol_type_info_word); + _symbol_table_enter(@symbol_table_global, symbol_builtin_name_pointer, 7, @symbol_type_info_pointer); + _symbol_table_enter(@symbol_table_global, symbol_builtin_name_char, 4, @symbol_type_info_char); + _symbol_table_enter(@symbol_table_global, symbol_builtin_name_array, 5, @symbol_type_info_array) +end; + + +(** + * Classification table assigns each possible character to a group (class). All + * characters of the same group a handled equivalently. + * + * Transition = record + * action: TransitionAction; + * next_state: TransitionState + * end; + *) +proc _lexer_class_invalid(); + return 1 +end; + +proc _lexer_class_digit(); + return 2 +end; + +proc _lexer_class_alpha(); + return 3 +end; + +proc _lexer_class_space(); + return 4 +end; + +proc _lexer_class_colon(); + return 5 +end; + +proc _lexer_class_equals(); + return 6 +end; + +proc _lexer_class_left_paren(); + return 7 +end; + +proc _lexer_class_right_paren(); + return 8 +end; + +proc _lexer_class_asterisk(); + return 9 +end; + +proc _lexer_class_backslash(); + return 10 +end; + +proc _lexer_class_single(); + return 11 +end; + +proc _lexer_class_hex(); + return 12 +end; + +proc _lexer_class_zero(); + return 13 +end; + +proc _lexer_class_x(); + return 14 +end; + +proc _lexer_class_eof(); + return 15 +end; + +proc _lexer_class_dot(); + return 16 +end; + +proc _lexer_class_minus(); + return 17 +end; + +proc _lexer_class_single_quote(); + return 18 +end; + +proc _lexer_class_double_quote(); + return 19 +end; + +proc _lexer_class_greater(); + return 20 +end; + +proc _lexer_class_less(); + return 21 +end; + +proc _lexer_class_other(); + return 22 +end; + +proc _lexer_state_start(); + return 1 +end; + +proc _lexer_state_colon(); + return 2 +end; + +proc _lexer_state_identifier(); + return 3 +end; + +proc _lexer_state_decimal(); + return 4 +end; + +proc _lexer_state_leading_zero(); + return 5 +end; + +proc _lexer_state_greater(); + return 6 +end; + +proc _lexer_state_minus(); + return 7 +end; + +proc _lexer_state_left_paren(); + return 8 +end; + +proc _lexer_state_less(); + return 9 +end; + +proc _lexer_state_dot(); + return 10 +end; + +proc _lexer_state_comment(); + return 11 +end; + +proc _lexer_state_closing_comment(); + return 12 +end; + +proc _lexer_state_character(); + return 13 +end; + +proc _lexer_state_character_escape(); + return 14 +end; + +proc _lexer_state_string(); + return 15 +end; + +proc _lexer_state_string_escape(); + return 16 +end; + +proc _lexer_state_end(); + return 17 +end; + +proc _lexer_action_none(); + return 1 +end; + +proc _lexer_action_accumulate(); + return 2 +end; + +proc _lexer_action_skip(); + return 3 +end; + +proc _lexer_action_single(); + return 4 +end; + +proc _lexer_action_eof(); + return 5 +end; + +proc _lexer_action_finalize(); + return 6 +end; + +proc _lexer_action_composite(); + return 7 +end; + +proc _lexer_action_key_id(); + return 8 +end; + +proc _lexer_action_integer(); + return 9 +end; + +proc _lexer_action_delimited(); + return 10 +end; + +(** + * Assigns some value to at array index. + * + * Parameters: + * array - Array pointer. + * index - Index (word offset into the array). + * data - Data to assign. + *) +proc _assign_at(array: Word, index: Word, data: Word); +var + target: Word; +begin + target := index + -1; + target := target * 4; + target := array + target; + + _store_word(data, target) +end; + +proc _get_at(array: Word, index: Word); +var + target: Word; +begin + target := index + -1; + target := target * 4; + target := array + target; + + return _load_word(target) +end; + +(** + * Initializes the array with character classes. + *) +proc _lexer_classifications(); +var + code: Word; +begin + _assign_at(@classification, 1, _lexer_class_eof()); + _assign_at(@classification, 2, _lexer_class_invalid()); + _assign_at(@classification, 3, _lexer_class_invalid()); + _assign_at(@classification, 4, _lexer_class_invalid()); + _assign_at(@classification, 5, _lexer_class_invalid()); + _assign_at(@classification, 6, _lexer_class_invalid()); + _assign_at(@classification, 7, _lexer_class_invalid()); + _assign_at(@classification, 8, _lexer_class_invalid()); + _assign_at(@classification, 9, _lexer_class_invalid()); + _assign_at(@classification, 10, _lexer_class_space()); + _assign_at(@classification, 11, _lexer_class_space()); + _assign_at(@classification, 12, _lexer_class_invalid()); + _assign_at(@classification, 13, _lexer_class_invalid()); + _assign_at(@classification, 14, _lexer_class_space()); + _assign_at(@classification, 15, _lexer_class_invalid()); + _assign_at(@classification, 16, _lexer_class_invalid()); + _assign_at(@classification, 17, _lexer_class_invalid()); + _assign_at(@classification, 18, _lexer_class_invalid()); + _assign_at(@classification, 19, _lexer_class_invalid()); + _assign_at(@classification, 20, _lexer_class_invalid()); + _assign_at(@classification, 21, _lexer_class_invalid()); + _assign_at(@classification, 22, _lexer_class_invalid()); + _assign_at(@classification, 23, _lexer_class_invalid()); + _assign_at(@classification, 24, _lexer_class_invalid()); + _assign_at(@classification, 25, _lexer_class_invalid()); + _assign_at(@classification, 26, _lexer_class_invalid()); + _assign_at(@classification, 27, _lexer_class_invalid()); + _assign_at(@classification, 28, _lexer_class_invalid()); + _assign_at(@classification, 29, _lexer_class_invalid()); + _assign_at(@classification, 30, _lexer_class_invalid()); + _assign_at(@classification, 31, _lexer_class_invalid()); + _assign_at(@classification, 32, _lexer_class_invalid()); + _assign_at(@classification, 33, _lexer_class_space()); + _assign_at(@classification, 34, _lexer_class_single()); + _assign_at(@classification, 35, _lexer_class_double_quote()); + _assign_at(@classification, 36, _lexer_class_other()); + _assign_at(@classification, 37, _lexer_class_other()); + _assign_at(@classification, 38, _lexer_class_single()); + _assign_at(@classification, 39, _lexer_class_single()); + _assign_at(@classification, 40, _lexer_class_single_quote()); + _assign_at(@classification, 41, _lexer_class_left_paren()); + _assign_at(@classification, 42, _lexer_class_right_paren()); + _assign_at(@classification, 43, _lexer_class_asterisk()); + _assign_at(@classification, 44, _lexer_class_single()); + _assign_at(@classification, 45, _lexer_class_single()); + _assign_at(@classification, 46, _lexer_class_minus()); + _assign_at(@classification, 47, _lexer_class_dot()); + _assign_at(@classification, 48, _lexer_class_single()); + _assign_at(@classification, 49, _lexer_class_zero()); + _assign_at(@classification, 50, _lexer_class_digit()); + _assign_at(@classification, 51, _lexer_class_digit()); + _assign_at(@classification, 52, _lexer_class_digit()); + _assign_at(@classification, 53, _lexer_class_digit()); + _assign_at(@classification, 54, _lexer_class_digit()); + _assign_at(@classification, 55, _lexer_class_digit()); + _assign_at(@classification, 56, _lexer_class_digit()); + _assign_at(@classification, 57, _lexer_class_digit()); + _assign_at(@classification, 58, _lexer_class_digit()); + _assign_at(@classification, 59, _lexer_class_colon()); + _assign_at(@classification, 60, _lexer_class_single()); + _assign_at(@classification, 61, _lexer_class_less()); + _assign_at(@classification, 62, _lexer_class_equals()); + _assign_at(@classification, 63, _lexer_class_greater()); + _assign_at(@classification, 64, _lexer_class_other()); + _assign_at(@classification, 65, _lexer_class_single()); + _assign_at(@classification, 66, _lexer_class_alpha()); + _assign_at(@classification, 67, _lexer_class_alpha()); + _assign_at(@classification, 68, _lexer_class_alpha()); + _assign_at(@classification, 69, _lexer_class_alpha()); + _assign_at(@classification, 70, _lexer_class_alpha()); + _assign_at(@classification, 71, _lexer_class_alpha()); + _assign_at(@classification, 72, _lexer_class_alpha()); + _assign_at(@classification, 73, _lexer_class_alpha()); + _assign_at(@classification, 74, _lexer_class_alpha()); + _assign_at(@classification, 75, _lexer_class_alpha()); + _assign_at(@classification, 76, _lexer_class_alpha()); + _assign_at(@classification, 77, _lexer_class_alpha()); + _assign_at(@classification, 78, _lexer_class_alpha()); + _assign_at(@classification, 79, _lexer_class_alpha()); + _assign_at(@classification, 80, _lexer_class_alpha()); + _assign_at(@classification, 81, _lexer_class_alpha()); + _assign_at(@classification, 82, _lexer_class_alpha()); + _assign_at(@classification, 83, _lexer_class_alpha()); + _assign_at(@classification, 84, _lexer_class_alpha()); + _assign_at(@classification, 85, _lexer_class_alpha()); + _assign_at(@classification, 86, _lexer_class_alpha()); + _assign_at(@classification, 87, _lexer_class_alpha()); + _assign_at(@classification, 88, _lexer_class_alpha()); + _assign_at(@classification, 89, _lexer_class_alpha()); + _assign_at(@classification, 90, _lexer_class_alpha()); + _assign_at(@classification, 91, _lexer_class_alpha()); + _assign_at(@classification, 92, _lexer_class_single()); + _assign_at(@classification, 93, _lexer_class_backslash()); + _assign_at(@classification, 94, _lexer_class_single()); + _assign_at(@classification, 95, _lexer_class_single()); + _assign_at(@classification, 96, _lexer_class_alpha()); + _assign_at(@classification, 97, _lexer_class_other()); + _assign_at(@classification, 98, _lexer_class_hex()); + _assign_at(@classification, 99, _lexer_class_hex()); + _assign_at(@classification, 100, _lexer_class_hex()); + _assign_at(@classification, 101, _lexer_class_hex()); + _assign_at(@classification, 102, _lexer_class_hex()); + _assign_at(@classification, 103, _lexer_class_hex()); + _assign_at(@classification, 104, _lexer_class_alpha()); + _assign_at(@classification, 105, _lexer_class_alpha()); + _assign_at(@classification, 106, _lexer_class_alpha()); + _assign_at(@classification, 107, _lexer_class_alpha()); + _assign_at(@classification, 108, _lexer_class_alpha()); + _assign_at(@classification, 109, _lexer_class_alpha()); + _assign_at(@classification, 110, _lexer_class_alpha()); + _assign_at(@classification, 111, _lexer_class_alpha()); + _assign_at(@classification, 112, _lexer_class_alpha()); + _assign_at(@classification, 113, _lexer_class_alpha()); + _assign_at(@classification, 114, _lexer_class_alpha()); + _assign_at(@classification, 115, _lexer_class_alpha()); + _assign_at(@classification, 116, _lexer_class_alpha()); + _assign_at(@classification, 117, _lexer_class_alpha()); + _assign_at(@classification, 118, _lexer_class_alpha()); + _assign_at(@classification, 119, _lexer_class_alpha()); + _assign_at(@classification, 120, _lexer_class_alpha()); + _assign_at(@classification, 121, _lexer_class_x()); + _assign_at(@classification, 122, _lexer_class_alpha()); + _assign_at(@classification, 123, _lexer_class_alpha()); + _assign_at(@classification, 124, _lexer_class_other()); + _assign_at(@classification, 125, _lexer_class_single()); + _assign_at(@classification, 126, _lexer_class_other()); + _assign_at(@classification, 127, _lexer_class_single()); + _assign_at(@classification, 128, _lexer_class_invalid()); + + code := 129; + + (* Set the remaining 129 - 256 bytes to transitionClassOther. *) + .create_classification_loop; + _assign_at(@classification, code, _lexer_class_other()); + code := code + 1; + + if code < 257 then + goto .create_classification_loop + end +end; + +proc _lexer_get_transition(current_state: Word, character_class: Word); +var + transition_table: Word; + row_position: Word; + column_position: Word; + target: Word; +begin + (* Each state is 8 bytes long (2 words: action and next state). + There are 22 character classes, so a transition row 8 * 22 = 176 bytes long. *) + row_position := current_state + -1; + row_position := row_position * 176; + + column_position := character_class + -1; + column_position := column_position * 8; + + target := _lexer_get_transition_table() + row_position; + + return target + column_position +end; + +(** + * Parameters: + * current_state - First index into transitions table. + * character_class - Second index into transitions table. + * action - Action to assign. + * next_state - Next state to assign. + *) +proc _lexer_set_transition(current_state: Word, character_class: Word, action: Word, next_state: Word); +var + transition: Word; +begin + transition := _lexer_get_transition(current_state, character_class); + + _lexer_transition_set_action(transition, action); + _lexer_transition_set_state(transition, next_state) +end; + +(* Sets same action and state transition for all character classes in one transition row. *) + +(** + * Parameters: + * current_state - Current state (Transition state enumeration). + * default_action - Default action (Callback). + * next_state - Next state (Transition state enumeration). + *) +proc _lexer_default_transition(current_state: Word, default_action: Word, next_state: Word); +begin + _lexer_set_transition(current_state, _lexer_class_invalid(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_digit(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_alpha(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_space(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_colon(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_equals(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_left_paren(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_right_paren(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_asterisk(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_backslash(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_single(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_hex(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_zero(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_x(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_eof(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_dot(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_minus(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_single_quote(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_double_quote(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_greater(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_less(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_other(), default_action, next_state) +end; + +(** + * The transition table describes transitions from one state to another, given + * a symbol (character class). + * + * The table has m rows and n columns, where m is the amount of states and n is + * the amount of classes. So given the current state and a classified character + * the table can be used to look up the next state. + *) +proc _lexer_transitions(); +begin + (* Start state. *) + _lexer_set_transition(_lexer_state_start(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_decimal()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_identifier()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_space(), _lexer_action_skip(), _lexer_state_start()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_colon(), _lexer_action_accumulate(), _lexer_state_colon()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_equals(), _lexer_action_single(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_left_paren(), _lexer_action_accumulate(), _lexer_state_left_paren()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_right_paren(), _lexer_action_single(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_asterisk(), _lexer_action_single(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_backslash(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_single(), _lexer_action_single(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_identifier()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_leading_zero()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_identifier()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_eof(), _lexer_action_eof(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_dot(), _lexer_action_single(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_minus(), _lexer_action_accumulate(), _lexer_state_minus()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_single_quote(), _lexer_action_accumulate(), _lexer_state_character()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_double_quote(), _lexer_action_accumulate(), _lexer_state_string()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_greater(), _lexer_action_accumulate(), _lexer_state_greater()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_less(), _lexer_action_accumulate(), _lexer_state_less()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_other(), _lexer_action_none(), _lexer_state_end()); + + (* Colon state. *) + _lexer_default_transition(_lexer_state_colon(), _lexer_action_finalize(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_colon(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end()); + + (* Identifier state. *) + _lexer_default_transition(_lexer_state_identifier(), _lexer_action_key_id(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_identifier(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_identifier()); + _lexer_set_transition(_lexer_state_identifier(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_identifier()); + _lexer_set_transition(_lexer_state_identifier(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_identifier()); + _lexer_set_transition(_lexer_state_identifier(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_identifier()); + _lexer_set_transition(_lexer_state_identifier(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_identifier()); + + (* Decimal state. *) + _lexer_default_transition(_lexer_state_decimal(), _lexer_action_integer(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_decimal(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_decimal()); + _lexer_set_transition(_lexer_state_decimal(), _lexer_class_alpha(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_decimal(), _lexer_class_hex(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_decimal(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_decimal()); + _lexer_set_transition(_lexer_state_decimal(), _lexer_class_x(), _lexer_action_none(), _lexer_state_end()); + + (* Leading zero. *) + _lexer_default_transition(_lexer_state_leading_zero(), _lexer_action_integer(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_digit(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_alpha(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_hex(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_zero(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_x(), _lexer_action_none(), _lexer_state_dot()); + + (* Greater state. *) + _lexer_default_transition(_lexer_state_greater(), _lexer_action_finalize(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_greater(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end()); + + (* Minus state. *) + _lexer_default_transition(_lexer_state_minus(), _lexer_action_finalize(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_minus(), _lexer_class_greater(), _lexer_action_composite(), _lexer_state_end()); + + (* Left paren state. *) + _lexer_default_transition(_lexer_state_left_paren(), _lexer_action_finalize(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_left_paren(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_comment()); + + (* Less state. *) + _lexer_default_transition(_lexer_state_less(), _lexer_action_finalize(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_less(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_less(), _lexer_class_greater(), _lexer_action_composite(), _lexer_state_end()); + + (* Hexadecimal after 0x. *) + _lexer_default_transition(_lexer_state_dot(), _lexer_action_finalize(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_dot(), _lexer_class_dot(), _lexer_action_composite(), _lexer_state_end()); + + (* Comment. *) + _lexer_default_transition(_lexer_state_comment(), _lexer_action_accumulate(), _lexer_state_comment()); + _lexer_set_transition(_lexer_state_comment(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_closing_comment()); + _lexer_set_transition(_lexer_state_comment(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); + + (* Closing comment. *) + _lexer_default_transition(_lexer_state_closing_comment(), _lexer_action_accumulate(), _lexer_state_comment()); + _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_right_paren(), _lexer_action_delimited(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_closing_comment()); + _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); + + (* Character. *) + _lexer_default_transition(_lexer_state_character(), _lexer_action_accumulate(), _lexer_state_character()); + _lexer_set_transition(_lexer_state_character(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_character(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_character(), _lexer_class_single_quote(), _lexer_action_delimited(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_character(), _lexer_class_backslash(), _lexer_action_accumulate(), _lexer_state_character_escape()); + + (* Escape sequence in a character. *) + _lexer_default_transition(_lexer_state_character_escape(), _lexer_action_accumulate(), _lexer_state_character()); + _lexer_set_transition(_lexer_state_character_escape(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_character_escape(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); + + (* String. *) + _lexer_default_transition(_lexer_state_string(), _lexer_action_accumulate(), _lexer_state_string()); + _lexer_set_transition(_lexer_state_string(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_string(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_string(), _lexer_class_double_quote(), _lexer_action_delimited(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_string(), _lexer_class_backslash(), _lexer_action_accumulate(), _lexer_state_string_escape()); + + (* Escape sequence in a string. *) + _lexer_default_transition(_lexer_state_string_escape(), _lexer_action_accumulate(), _lexer_state_string()); + _lexer_set_transition(_lexer_state_string_escape(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_string_escape(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()) +end; + +(** + * Transition table is saved after character classification table. + * Each character entry is 1 word long and there are 256 characters. + * 1024 = 256 * 4 + *) +proc _lexer_get_transition_table(); + return @classification + 1024 +end; + +(** + * Lexer state is saved after the transition tables. + * Each transition table entry is 8 bytes long. The table has 16 rows (transition states) + * and 22 columns (character classes), so 2992 = 8 * 17 * 22. + *) +proc _lexer_global_state(); + return _lexer_get_transition_table() + 2992 +end; + +(** + * Gets pointer to the token start. + *) +proc _lexer_global_get_start(); +var + target: Word; +begin + target := _lexer_global_state() + 4; + return _load_word(target) +end; + +(** + * Sets pointer to the token start. + *) +proc _lexer_global_set_start(new_start: Word); +var + target: Word; +begin + target := _lexer_global_state() + 4; + _store_word(new_start, target) +end; + +(** + * Gets pointer to the token end. + *) +proc _lexer_global_get_end(); +var + target: Word; +begin + target := _lexer_global_state() + 8; + return _load_word(target) +end; + +(** + * Sets pointer to the token end. + *) +proc _lexer_global_set_end(new_start: Word); +var + target: Word; +begin + target := _lexer_global_state() + 8; + _store_word(new_start, target) +end; + +proc _lexer_transition_get_action(transition: Word); + return _load_word(transition) +end; + +proc _lexer_transition_set_action(transition: Word, action: Word); +begin + _store_word(action, transition) +end; + +proc _lexer_transition_get_state(transition: Word); + return _load_word(transition + 4) +end; + +proc _lexer_transition_set_state(transition: Word, state: Word); +begin + _store_word(state, transition + 4) +end; + +(** + * Resets the lexer state for reading the next token. + *) +proc _lexer_reset(); +var + state: Word; +begin + (* Transition start state is 1. *) + state := _lexer_global_state(); + _store_word(_lexer_state_start(), state); + + state := _lexer_global_get_start(); + _lexer_global_set_end(state) +end; + +(** + * One time lexer initialization. + *) +proc _lexer_initialize(); +begin + _lexer_classifications(); + _lexer_transitions(); + + _lexer_global_set_start(@source_code); + _lexer_global_set_end(@source_code) +end; + +proc _lexer_next_transition(); +var + current_character: Word; + character_class: Word; + current_state: Word; +begin + current_character := _lexer_global_get_end(); + current_character := _load_byte(current_character); + + character_class := _get_at(@classification, current_character + 1); + + current_state := _lexer_global_state(); + current_state := _load_word(current_state); + + return _lexer_get_transition(current_state, character_class) +end; + +proc _lexer_token_kind_identifier(); + return 1 +end; + +proc _lexer_token_kind_const(); + return 2 +end; + +proc _lexer_token_kind_var(); + return 3 +end; + +proc _lexer_token_kind_proc(); + return 4 +end; + +proc _lexer_token_kind_type(); + return 5 +end; + +proc _lexer_token_kind_begin(); + return 6 +end; + +proc _lexer_token_kind_end(); + return 7 +end; + +proc _lexer_token_kind_if(); + return 8 +end; + +proc _lexer_token_kind_then(); + return 9 +end; + +proc _lexer_token_kind_else(); + return 10 +end; + +proc _lexer_token_kind_elsif(); + return 11 +end; + +proc _lexer_token_kind_while(); + return 12 +end; + +proc _lexer_token_kind_do(); + return 13 +end; + +proc _lexer_token_kind_extern(); + return 14 +end; + +proc _lexer_token_kind_record(); + return 15 +end; + +proc _lexer_token_kind_union(); + return 16 +end; + +proc _lexer_token_kind_true(); + return 17 +end; + +proc _lexer_token_kind_false(); + return 18 +end; + +proc _lexer_token_kind_nil(); + return 19 +end; + +proc _lexer_token_kind_and(); + return 20 +end; + +proc _lexer_token_kind_or(); + return 21 +end; + +proc _lexer_token_kind_xor(); + return 22 +end; + +proc _lexer_token_kind_pipe(); + return 23 +end; + +proc _lexer_token_kind_not(); + return 24 +end; + +proc _lexer_token_kind_return(); + return 24 +end; + +proc _lexer_token_kind_module(); + return 25 +end; + +proc _lexer_token_kind_program(); + return 26 +end; + +proc _lexer_token_kind_import(); + return 27 +end; + +proc _lexer_token_kind_cast(); + return 28 +end; + +proc _lexer_token_kind_defer(); + return 29 +end; + +proc _lexer_token_kind_case(); + return 30 +end; + +proc _lexer_token_kind_of(); + return 31 +end; + +proc _lexer_token_kind_trait(); + return 32 +end; + +proc _lexer_token_kind_left_paren(); + return 33 +end; + +proc _lexer_token_kind_right_paren(); + return 34 +end; + +proc _lexer_token_kind_left_square(); + return 35 +end; + +proc _lexer_token_kind_right_square(); + return 36 +end; + +proc _lexer_token_kind_shift_left(); + return 37 +end; + +proc _lexer_token_kind_shift_right(); + return 38 +end; + +proc _lexer_token_kind_greater_equal(); + return 39 +end; + +proc _lexer_token_kind_less_equal(); + return 40 +end; + +proc _lexer_token_kind_greater_than(); + return 41 +end; + +proc _lexer_token_kind_less_than(); + return 42 +end; + +proc _lexer_token_kind_not_equal(); + return 43 +end; + +proc _lexer_token_kind_equals(); + return 44 +end; + +proc _lexer_token_kind_semicolon(); + return 45 +end; + +proc _lexer_token_kind_dot(); + return 46 +end; + +proc _lexer_token_kind_comma(); + return 47 +end; + +proc _lexer_token_kind_plus(); + return 48 +end; + +proc _lexer_token_kind_arrow(); + return 49 +end; + +proc _lexer_token_kind_minus(); + return 50 +end; + +proc _lexer_token_kind_multiplication(); + return 51 +end; + +proc _lexer_token_kind_division(); + return 52 +end; + +proc _lexer_token_kind_remainder(); + return 53 +end; + +proc _lexer_token_kind_assignment(); + return 54 +end; + +proc _lexer_token_kind_colon(); + return 55 +end; + +proc _lexer_token_kind_hat(); + return 56 +end; + +proc _lexer_token_kind_at(); + return 57 +end; + +proc _lexer_token_kind_comment(); + return 58 +end; + +proc _lexer_token_kind_string(); + return 59 +end; + +proc _lexer_token_kind_character(); + return 60 +end; + +proc _lexer_token_kind_integer(); + return 61 +end; + +proc _lexer_token_kind_word(); + return 62 +end; + +proc _lexer_token_kind_goto(); + return 63 +end; + +proc _lexer_token_kind_eof(); + return 64 +end; + +proc _lexer_compare_keyword(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word); +var + result: Word; +begin + result := 0; + + if lhs_length = rhs_length then + result := _memcmp(lhs_pointer, rhs_pointer, lhs_length) = 0 + end; + return result +end; + +proc _lexer_classify_keyword(position_start: Word, position_end: Word); +var + result: Word; + token_length: Word; +begin + result := _lexer_token_kind_identifier(); + token_length := position_end + -position_start; + + if _lexer_compare_keyword(position_start, token_length, "const", 5) = 1 then + result := _lexer_token_kind_const() + elsif _lexer_compare_keyword(position_start, token_length, "var", 3) = 1 then + result := _lexer_token_kind_var() + elsif _lexer_compare_keyword(position_start, token_length, "proc", 4) = 1 then + result := _lexer_token_kind_proc() + elsif _lexer_compare_keyword(position_start, token_length, "type", 4) = 1 then + result := _lexer_token_kind_type() + elsif _lexer_compare_keyword(position_start, token_length, "begin", 5) = 1 then + result := _lexer_token_kind_begin() + elsif _lexer_compare_keyword(position_start, token_length, "end", 3) = 1 then + result := _lexer_token_kind_end() + elsif _lexer_compare_keyword(position_start, token_length, "return", 6) = 1 then + result := _lexer_token_kind_return() + elsif _lexer_compare_keyword(position_start, token_length, "goto", 4) = 1 then + result := _lexer_token_kind_goto() + elsif _lexer_compare_keyword(position_start, token_length, "if", 2) = 1 then + result := _lexer_token_kind_if() + elsif _lexer_compare_keyword(position_start, token_length, "while", 5) = 1 then + result := _lexer_token_kind_while() + elsif _lexer_compare_keyword(position_start, token_length, "then", 4) = 1 then + result := _lexer_token_kind_then() + elsif _lexer_compare_keyword(position_start, token_length, "else", 4) = 1 then + result := _lexer_token_kind_else() + elsif _lexer_compare_keyword(position_start, token_length, "elsif", 5) = 1 then + result := _lexer_token_kind_elsif() + elsif _lexer_compare_keyword(position_start, token_length, "or", 2) = 1 then + result := _lexer_token_kind_or() + elsif _lexer_compare_keyword(position_start, token_length, "xor", 2) = 1 then + result := _lexer_token_kind_xor() + end; + return result +end; + +proc _lexer_classify_finalize(start_position: Word); +var + character: Word; + result: Word; +begin + result := 0; + character := _load_byte(start_position); + + if character = ':' then + result := _lexer_token_kind_colon() + elsif character = '.' then + result := _lexer_token_kind_dot() + elsif character = '(' then + result := _lexer_token_kind_left_paren() + elsif character = '-' then + result := _lexer_token_kind_minus() + elsif character = '<' then + result := _lexer_token_kind_less_than() + elsif character = '>' then + result := _lexer_token_kind_greater_than() + end; + return result +end; + +proc _lexer_classify_single(start_position: Word); +var + character: Word; + result: Word; +begin + result := 0; + character := _load_byte(start_position); + + if character = ';' then + result := _lexer_token_kind_semicolon() + elsif character = ',' then + result := _lexer_token_kind_comma() + elsif character = ')' then + result := _lexer_token_kind_right_paren() + elsif character = '@' then + result := _lexer_token_kind_at() + elsif character = '~' then + result := _lexer_token_kind_not() + elsif character = '&' then + result := _lexer_token_kind_and() + elsif character = '+' then + result := _lexer_token_kind_plus() + elsif character = '*' then + result := _lexer_token_kind_multiplication() + elsif character = '=' then + result := _lexer_token_kind_equals() + elsif character = '%' then + result := _lexer_token_kind_remainder() + elsif character = '/' then + result := _lexer_token_kind_division() + elsif character = '.' then + result := _lexer_token_kind_dot() + elsif character = '^' then + result := _lexer_token_kind_hat() + end; + return result +end; + +proc _lexer_classify_composite(start_position: Word, one_before_last: Word); +var + first_character: Word; + last_character: Word; + result: Word; +begin + first_character := _load_byte(start_position); + last_character := _load_byte(one_before_last); + + if first_character = ':' then + result := _lexer_token_kind_assignment() + elsif first_character = '<' then + if last_character = '=' then + result := _lexer_token_kind_less_equal() + elsif last_character = '>' then + result := _lexer_token_kind_not_equal() + end + elsif first_character = '>' then + if last_character = '=' then + result := _lexer_token_kind_greater_equal() + end + end; + + return result +end; + +proc _lexer_classify_delimited(start_position: Word, end_position: Word); +var + token_length: Word; + delimiter: Word; + result: Word; +begin + token_length := end_position + -start_position; + delimiter := _load_byte(start_position); + + if delimiter = '(' then + result := _lexer_token_kind_comment() + elsif delimiter = '\'' then + result := _lexer_token_kind_character() + elsif delimiter = '"' then + result := _lexer_token_kind_string() + end; + return result +end; + +proc _lexer_classify_integer(start_position: Word, end_position: Word); +begin + return _lexer_token_kind_integer() +end; + +proc _lexer_execute_action(action_to_perform: Word, kind: Word); +var + position_start: Word; + position_end: Word; + intermediate: Word; +begin + position_start := _lexer_global_get_start(); + position_end := _lexer_global_get_end(); + + if action_to_perform = _lexer_action_none() then + elsif action_to_perform = _lexer_action_accumulate() then + _lexer_global_set_end(position_end + 1) + elsif action_to_perform = _lexer_action_skip() then + _lexer_global_set_start(position_start + 1); + _lexer_global_set_end(position_end + 1) + elsif action_to_perform = _lexer_action_single() then + _lexer_global_set_end(position_end + 1); + + intermediate := _lexer_classify_single(position_start); + _store_word(intermediate, kind) + elsif action_to_perform = _lexer_action_eof() then + intermediate := _lexer_token_kind_eof(); + _store_word(intermediate, kind) + elsif action_to_perform = _lexer_action_finalize() then + intermediate := _lexer_classify_finalize(position_start); + _store_word(intermediate, kind) + elsif action_to_perform = _lexer_action_composite() then + _lexer_global_set_end(position_end + 1); + + intermediate := _lexer_classify_composite(position_start, position_end); + _store_word(intermediate, kind) + elsif action_to_perform = _lexer_action_key_id() then + intermediate := _lexer_classify_keyword(position_start, position_end); + _store_word(intermediate, kind) + elsif action_to_perform = _lexer_action_integer() then + intermediate := _lexer_classify_integer(position_start, position_end); + _store_word(intermediate, kind) + elsif action_to_perform = _lexer_action_delimited() then + _lexer_global_set_end(position_end + 1); + + intermediate := _lexer_classify_delimited(position_start, position_end + 1); + _store_word(intermediate, kind) + end; +end; + +proc _lexer_execute_transition(kind: Word); +var + next_transition: Word; + next_state: Word; + global_state: Word; + action_to_perform: Word; +begin + next_transition := _lexer_next_transition(); + next_state := _lexer_transition_get_state(next_transition); + action_to_perform := _lexer_transition_get_action(next_transition); + + global_state := _lexer_global_state(); + + _store_word(next_state, global_state); + _lexer_execute_action(action_to_perform, kind); + + return next_state +end; + +proc _lexer_advance_token(kind: Word); +begin + if _lexer_execute_transition(kind) <> _lexer_state_end() then + _lexer_advance_token(kind) + end +end; + +(** + * Reads the next token and writes its type into the address in the kind parameter. + *) +proc _lexer_read_token(kind: Word); +begin + _lexer_reset(); + _lexer_advance_token(kind) +end; + +(** + * Advances the token stream past the last read token. + *) +proc _lexer_skip_token(); +var + old_end: Word; +begin + old_end := _lexer_global_get_end(); + _lexer_global_set_start(old_end) +end; + +(* + * Entry point. + *) +proc _start(); +var + last_read: Word; + offset: Word; +begin + _lexer_initialize(); + _symbol_table_build(); + + (* Read the source from the standard input. *) + offset := @source_code; + + .start_read; + (* Second argument is buffer size. Modifying update the source_code definition. *) + last_read := _read_file(offset, 819200); + if last_read > 0 then + offset := offset + last_read; + goto .start_read + end; + _compile(); + + _exit(0) +end; diff --git a/boot/stage15.elna b/boot/stage15.elna deleted file mode 100644 index 987d655..0000000 --- a/boot/stage15.elna +++ /dev/null @@ -1,5403 +0,0 @@ -(* - * This Source Code Form is subject to the terms of the Mozilla Public License, - * v. 2.0. If a copy of the MPL was not distributed with this file, You can - * obtain one at https://mozilla.org/MPL/2.0/. - *) - -(* Stage 15 compiler. *) - -type - ElnaLexerAction = (none, accumulate, skip, single, eof, finalize, composite, key_id, integer, delimited); - - (** - * Classification table assigns each possible character to a group (class). All - * characters of the same group a handled equivalently. - * - * Transition = record - * action: TransitionAction; - * next_state: TransitionState - * end; - *) - ElnaLexerClass = ( - invalid, - digit, - alpha, - space, - colon, - equals, - left_paren, - right_paren, - asterisk, - backslash, - single, - hex, - zero, - x, - eof, - dot, - minus, - single_quote, - double_quote, - greater, - less, - other - ); - ElnaLexerState = ( - start, - colon, - identifier, - decimal, - leading_zero, - greater, - minus, - left_paren, - less, - dot, - comment, - closing_comment, - character, - character_escape, - string, - string_escape, - finish - ); - ElnaLexerKind = ( - identifier, - _const, - _var, - _proc, - _type, - _begin, - _end, - _if, - _then, - _else, - _elsif, - _while, - _do, - _extern, - _record, - _true, - _false, - null, - and, - _or, - _xor, - pipe, - not, - _return, - _module, - _program, - _import, - _cast, - _defer, - _case, - _of, - trait, - left_paren, - right_paren, - left_square, - right_square, - shift_left, - shift_right, - greater_equal, - less_equal, - greater_than, - less_than, - not_equal, - equals, - semicolon, - dot, - comma, - plus, - arrow, - minus, - multiplication, - division, - remainder, - assignment, - colon, - hat, - at, - comment, - string, - character, - integer, - word, - _goto, - eof - ); - NodeKind = ( - integer_literal, - string_literal, - character_literal, - variable_expression, - field_access_expression, - dereference_expression, - unary_expression, - binary_expression, - call, - goto_statement, - label_declaration, - return_statement, - assign_statement, - if_statement, - procedure_declaration, - variable_declaration, - enumeration_type_expression, - named_type_expression, - type_declaration, - module_declaration, - record_type_expression - ); - InfoKind = (type_info, parameter_info, temporary_info, procedure_info); - TypeKind = (primitive, enumeration, _record); - ElnaTacOperator = ( - load_immediate, - load_address, - add, - add_immediate, - load_word, - store_word, - jal, - move, - sub, - div, - rem, - mul, - _xor, - _or, - and, - seqz, - snez, - slt, - xor_immediate, - neg, - not, - jump, - beqz, - label, - start, - ret - ); - ElnaTacOperand = (register, immediate, symbol, offset); - ElnaTacRegister = ( - zero, - ra, - sp, - gp, - tp, - t0, - t1, - t2, - s0, - s1, - a0, - a1, - a2, - a3, - a4, - a5, - a6, - a7, - s2, - s3, - s4, - s5, - s6, - s7, - s8, - s9, - s10, - s11, - t3, - t4, - t5, - t6 - ); - -var - symbol_table_global: Array; - compiler_strings: Array; - classification: Array; - - source_code: Word; - compiler_strings_position: Word; - compiler_strings_length: Word; - label_counter: Word; - symbol_table_store: Word; - - (* Points to a segment of free memory. *) - memory_free_pointer: Word; - -(** - * Calculates and returns the string token length between quotes, including the - * escaping slash characters. - * - * Parameters: - * string - String token pointer. - * - * Returns the length in a0. - *) -proc _string_length(string: Word); -var - counter: Word; - current_byte: Word; -begin - (* Reset the counter. *) - counter := 0; - - .string_length_loop; - string := string + 1; - - current_byte := _load_byte(string); - if current_byte <> '"' then - counter := counter + 1; - goto string_length_loop - end; - - return counter -end; - -(** - * Adds a string to the global, read-only string storage. - * - * Parameters: - * string - String token. - * - * Returns the offset from the beginning of the storage to the new string in a0. - *) -proc _add_string(string: Word); -var - contents: Word; - result: Word; - current_byte: Word; -begin - contents := string + 1; - result := compiler_strings_length; - - .add_string_loop; - current_byte := _load_byte(contents); - if current_byte <> '"' then - _store_byte(current_byte, compiler_strings_position); - compiler_strings_position := compiler_strings_position + 1; - contents := contents + 1; - - if current_byte <> '\\' then - compiler_strings_length := compiler_strings_length + 1 - end; - goto add_string_loop - end; - - return result -end; - -(** - * Reads standard input into a buffer. - * - * Parameters: - * buffer - Buffer pointer. - * size - Buffer size. - * - * Returns the amount of bytes written in a0. - *) -proc _read_file(buffer: Word, size: Word); - return _syscall(0, buffer, size, 0, 0, 0, 63) -end; - -(** - * MAP_ANONYMOUS is 32. - * PROT_READ | PORT_WRITE is (1 | 2). - * MAP_ANONYMOUS | MAP_PRIVATE is (32 | 2) - *) -proc _mmap(length: Word); - return _syscall(0, length, 1 or 2, 32 or 2, -1, 0, 222) -end; - -(** - * Writes to the standard output. - * - * Parameters: - * buffer - Buffer. - * size - Buffer length. - *) -proc _write_s(buffer: Word, size: Word); -begin - _syscall(1, buffer, size, 0, 0, 0, 64) -end; - -(** - * Writes a number to a string buffer. - * - * Parameters: - * number - Whole number. - * output_buffer - Buffer pointer. - * - * Sets a0 to the length of the written number. - *) -proc _print_i(number: Word, output_buffer: Word); -var - local_buffer: Word; - is_negative: Word; - current_character: Word; - result: Word; -begin - local_buffer := @result + 11; - - if number >= 0 then - is_negative := 0 - else - number = -number; - is_negative := 1 - end; - - .print_i_digit10; - current_character := number % 10; - _store_byte(current_character + '0', local_buffer); - - number := number / 10; - local_buffer := local_buffer - 1; - - if number <> 0 then - goto print_i_digit10 - end; - if is_negative = 1 then - _store_byte('-', local_buffer); - local_buffer := local_buffer - 1 - end; - result := @result + 11; - result := result - local_buffer; - _memcpy(output_buffer, local_buffer + 1, result); - - return result -end; - -(** - * Writes a number to the standard output. - * - * Parameters: - * number - Whole number. - *) -proc _write_i(number: Word); -var - local_buffer: Word; - length: Word; -begin - length := _print_i(number, @local_buffer); - _write_s(@local_buffer, length) -end; - -(** - * Writes a character from a0 into the standard output. - * - * Parameters: - * character - Character to write. - *) -proc _write_c(character: Word); -begin - _write_s(@character, 1) -end; - -(** - * Write null terminated string. - * - * Parameters: - * string - String. - *) -proc _write_z(string: Word); -var - next_byte: Word; -begin - (* Check for 0 character. *) - next_byte := _load_byte(string); - - if next_byte <> 0 then - (* Print a character. *) - _write_c(next_byte); - - (* Advance the input string by one byte. *) - _write_z(string + 1) - end -end; - -(** - * Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. - *) -proc _is_upper(character: Word); -var - lhs: Word; - rhs: Word; -begin - lhs := character >= 'A'; - rhs := character <= 'Z'; - - return lhs & rhs -end; - -(** - * Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. - *) -proc _is_lower(character: Word); -var - lhs: Word; - rhs: Word; -begin - lhs := character >= 'a'; - rhs := character <= 'z'; - - return lhs & rhs -end; - -(** - * Detects if the passed character is a 7-bit alpha character or an underscore. - * - * Paramters: - * character - Tested character. - * - * Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. - *) -proc _is_alpha(character: Word); -var - is_upper_result: Word; - is_lower_result: Word; - is_alpha_result: Word; - is_underscore: Word; -begin - is_upper_result := _is_upper(character); - is_lower_result := _is_lower(character); - is_underscore := character = '_'; - - is_alpha_result := is_lower_result or is_upper_result; - return is_alpha_result or is_underscore -end; - -(** - * Detects whether the passed character is a digit (a value between 0 and 9). - * - * Parameters: - * character - Exemined value. - * - * Sets a0 to 1 if it is a digit, to 0 otherwise. - *) -proc _is_digit(character: Word); -var - lhs: Word; - rhs: Word; -begin - lhs := character >= '0'; - rhs := character <= '9'; - - return lhs & rhs -end; - -proc _is_alnum(character: Word); -var - lhs: Word; - rhs: Word; -begin - lhs := _is_alpha(character); - rhs := _is_digit(character); - - return lhs or rhs -end; - -(** - * Parameters: - * lhs - First pointer. - * rhs - Second pointer. - * count - The length to compare. - * - * Returns 0 if memory regions are equal. - *) -proc _memcmp(lhs: Word, rhs: Word, count: Word); -var - lhs_byte: Word; - rhs_byte: Word; - result: Word; -begin - result := 0; - - .memcmp_loop; - if count <> 0 then - lhs_byte := _load_byte(lhs); - rhs_byte := _load_byte(rhs); - result := lhs_byte - rhs_byte; - - lhs := lhs + 1; - rhs := rhs + 1; - count := count - 1; - - if result = 0 then - goto memcmp_loop - end - end; - - return result -end; - -(** - * Copies memory. - * - * Parameters: - * destination - Destination. - * source - Source. - * count - Size. - * - * Returns the destination. - *) -proc _memcpy(destination: Word, source: Word, count: Word); -var - current_byte: Word; -begin - .memcpy_loop; - if count <> 0 then - current_byte := _load_byte(source); - _store_byte(current_byte, destination); - - destination := destination + 1; - source := source + 1; - count := count - 1; - goto memcpy_loop - end; - - return destination -end; - -proc _node_get_kind(this: Word); - return this^ -end; - -proc _elna_tac_instruction_size(); - return 44 -end; - -proc _elna_tac_instruction_get_kind(this: Word); - return this^ -end; - -proc _elna_tac_instruction_set_kind(this: Word, value: Word); -begin - this^ := value -end; - -proc _elna_tac_instruction_get_next(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _elna_tac_instruction_set_next(this: Word, value: Word); -begin - .elna_tac_instruction_set_next_loop; - this := this + 4; - if value <> 0 then - if this^ <> 0 then - this := this^; - goto elna_tac_instruction_set_next_loop - end - end; - this^ := value -end; - -proc _elna_tac_instruction_get_operand_type(this: Word, n: Word); -begin - n := n - 1; - n := n * 12; - this := this + 8; - this := this + n; - - return this^ -end; - -proc _elna_tac_instruction_get_operand_value(this: Word, n: Word); -begin - n := n - 1; - n := n * 12; - this := this + 8; - this := this + n; - - this := this + 4; - return this^ -end; - -proc _elna_tac_instruction_get_operand_length(this: Word, n: Word); -begin - n := n - 1; - n := n * 12; - this := this + 8; - this := this + n; - - this := this + 8; - return this^ -end; - -proc _elna_tac_instruction_set_operand(this: Word, n: Word, operand_type: Word, operand_value: Word, operand_length: Word); -begin - n := n - 1; - n := n * 12; - this := this + 8; - this := this + n; - - this^ := operand_type; - this := this + 4; - this^ := operand_value; - this := this + 4; - this^ := operand_length -end; - -proc _elna_tac_instruction_create(kind: Word); -var - result: Word; - instruction_size: Word; -begin - instruction_size := _elna_tac_instruction_size(); - result := _allocate(instruction_size); - - _elna_tac_instruction_set_kind(result, kind); - _elna_tac_instruction_set_next(result, 0); - - return result -end; - -proc _elna_tac_module_create(data: Word, code: Word); -var - result: Word; - current_word: Word; -begin - result := _allocate(8); - - current_word := result; - current_word^ := data; - current_word := current_word + 4; - current_word^ := code; - - return result -end; - -proc _elna_tac_module_get_data(this: Word); - return this^ -end; - -proc _elna_tac_module_get_code(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _elna_tac_declaration_size(); - return 16 -end; - -proc _elna_tac_declaration_get_next(this: Word); - return this^ -end; - -proc _elna_tac_declaration_set_next(this: Word, value: Word); -begin - this^ := value -end; - -proc _elna_tac_declaration_get_name(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _elna_tac_declaration_set_name(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -proc _elna_tac_declaration_get_length(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _elna_tac_declaration_set_length(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _elna_tac_declaration_get_body(this: Word); -begin - this := this + 12; - return this^ -end; - -proc _elna_tac_declaration_set_body(this: Word, value: Word); -begin - this := this + 12; - this^ := value -end; - -proc _elna_tac_load_immediate(target_register: Word, source_immediate: Word, immediate_length: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.load_immediate); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target_register, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.immediate, source_immediate, immediate_length); - - return result -end; - -proc _elna_tac_load_address(target_register: Word, source_symbol: Word, symbol_length: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.load_address); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target_register, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.symbol, source_symbol, symbol_length); - - return result -end; - -proc _elna_tac_beqz(target_register: Word, source_symbol: Word, symbol_length: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.beqz); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target_register, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.symbol, source_symbol, symbol_length); - - return result -end; - -proc _elna_tac_jump(source_symbol: Word, symbol_length: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.jump); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.symbol, source_symbol, symbol_length); - - return result -end; - -proc _elna_tac_add(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.add); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_mul(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.mul); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_sub(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.sub); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_div(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.div); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_rem(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.rem); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_xor(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator._xor); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_xor_immediate(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator._xor); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.immediate, rhs, 0); - - return result -end; - -proc _elna_tac_or(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator._or); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_and(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.and); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_add_immediate(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.add_immediate); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.immediate, rhs, 0); - - return result -end; - -proc _elna_tac_slt(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.slt); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_jal(symbol: Word, length: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.jal); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.symbol, symbol, length); - - return result -end; - -proc _elna_tac_load_word(target: Word, register: Word, offset: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.load_word); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.offset, register, offset); - - return result -end; - -proc _elna_tac_store_word(target: Word, register: Word, offset: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.store_word); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.offset, register, offset); - - return result -end; - -proc _elna_tac_move(destination: Word, source: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.move); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); - - return result -end; - -proc _elna_tac_seqz(destination: Word, source: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.seqz); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); - - return result -end; - -proc _elna_tac_snez(destination: Word, source: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.snez); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); - - return result -end; - -proc _elna_tac_neg(destination: Word, source: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.neg); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); - - return result -end; - -proc _elna_tac_not(destination: Word, source: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.not); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); - - return result -end; - -proc _elna_tac_label(counter: Word, length: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.label); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.symbol, counter, length); - - return result -end; - -proc _elna_writer_instruction_name(instruction_kind: Word); -var - argument_count: Word; -begin - if instruction_kind = ElnaTacOperator.load_immediate then - argument_count := 2; - _write_s("\tli", 3) - elsif instruction_kind = ElnaTacOperator.load_address then - argument_count := 2; - _write_s("\tla", 3) - elsif instruction_kind = ElnaTacOperator.add then - argument_count := 3; - _write_s("\tadd", 4) - elsif instruction_kind = ElnaTacOperator.add_immediate then - argument_count := 3; - _write_s("\taddi", 5) - elsif instruction_kind = ElnaTacOperator.load_word then - argument_count := 2; - _write_s("\tlw", 3) - elsif instruction_kind = ElnaTacOperator.store_word then - argument_count := 2; - _write_s("\tsw", 3) - elsif instruction_kind = ElnaTacOperator.jal then - argument_count := 1; - _write_s("\tcall", 5) - elsif instruction_kind = ElnaTacOperator.move then - argument_count := 2; - _write_s("\tmv", 3) - elsif instruction_kind = ElnaTacOperator.sub then - argument_count := 3; - _write_s("\tsub", 4) - elsif instruction_kind = ElnaTacOperator.mul then - argument_count := 3; - _write_s("\tmul", 4) - elsif instruction_kind = ElnaTacOperator.div then - argument_count := 3; - _write_s("\tdiv", 4) - elsif instruction_kind = ElnaTacOperator.rem then - argument_count := 3; - _write_s("\trem", 4) - elsif instruction_kind = ElnaTacOperator._xor then - argument_count := 3; - _write_s("\txor", 4) - elsif instruction_kind = ElnaTacOperator.xor_immediate then - argument_count := 3; - _write_s("\txori", 5) - elsif instruction_kind = ElnaTacOperator._or then - argument_count := 3; - _write_s("\tor", 3) - elsif instruction_kind = ElnaTacOperator.and then - argument_count := 3; - _write_s("\tand", 4) - elsif instruction_kind = ElnaTacOperator.seqz then - argument_count := 2; - _write_s("\tseqz", 5) - elsif instruction_kind = ElnaTacOperator.snez then - argument_count := 2; - _write_s("\tsnez", 5) - elsif instruction_kind = ElnaTacOperator.slt then - argument_count := 3; - _write_s("\tslt", 4) - elsif instruction_kind = ElnaTacOperator.neg then - argument_count := 2; - _write_s("\tneg", 4) - elsif instruction_kind = ElnaTacOperator.not then - argument_count := 2; - _write_s("\tnot", 4) - elsif instruction_kind = ElnaTacOperator.jump then - argument_count := 1; - _write_s("\tj", 2) - elsif instruction_kind = ElnaTacOperator.beqz then - argument_count := 2; - _write_s("\tbeqz", 5) - elsif instruction_kind = ElnaTacOperator.start then - argument_count := 0; - _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\0") - elsif instruction_kind = ElnaTacOperator.ret then - argument_count := 0; - _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\0") - end; - return argument_count -end; - -proc _elna_writer_register(register: Word); -begin - _write_c('x'); - _write_i(register - 1) -end; - -proc _elna_writer_operand(instruction: Word, n: Word); -var - operand_value: Word; - operand_length: Word; - operand_type: Word; -begin - operand_type := _elna_tac_instruction_get_operand_type(instruction, n); - operand_value := _elna_tac_instruction_get_operand_value(instruction, n); - operand_length := _elna_tac_instruction_get_operand_length(instruction, n); - - _write_c(' '); - if operand_type = ElnaTacOperand.register then - _elna_writer_register(operand_value) - elsif operand_type = ElnaTacOperand.offset then - _write_i(operand_length); - _write_c('('); - _elna_writer_register(operand_value); - _write_c(')') - elsif operand_type = ElnaTacOperand.symbol then - if operand_length = 0 then - _write_label(operand_value, 0) - else - _write_s(operand_value, operand_length) - end - elsif operand_length = 0 then (* ElnaTacOperand.immediate *) - _write_i(operand_value) - else - _write_s(operand_value, operand_length) - end -end; - -proc _elna_writer_instruction(instruction: Word); -var - instruction_kind: Word; - argument_count: Word; - current_argument: Word; - operand_value: Word; - operand_length: Word; -begin - instruction_kind := _elna_tac_instruction_get_kind(instruction); - - if instruction_kind = ElnaTacOperator.label then - argument_count := 0; - operand_value := _elna_tac_instruction_get_operand_value(instruction, 1); - operand_length := _elna_tac_instruction_get_operand_length(instruction, 1); - _write_label(operand_value, operand_length); - _write_c(':') - else - argument_count := _elna_writer_instruction_name(instruction_kind) - end; - current_argument := 1; - - .elna_writer_instruction_loop; - if current_argument <= argument_count then - _elna_writer_operand(instruction, current_argument); - current_argument := current_argument + 1 - end; - if current_argument <= argument_count then - _write_c(','); - goto elna_writer_instruction_loop - end; - - _write_c('\n') -end; - -proc _elna_writer_instructions(instruction: Word); -begin - if instruction <> 0 then - _elna_writer_instruction(instruction); - instruction := _elna_tac_instruction_get_next(instruction); - _elna_writer_instructions(instruction) - end -end; - -proc _elna_writer_procedure(procedure: Word); -var - name_pointer: Word; - name_length: Word; - body_statements: Word; -begin - .elna_writer_procedure_loop; - name_pointer := _elna_tac_declaration_get_name(procedure); - name_length := _elna_tac_declaration_get_length(procedure); - body_statements := _elna_tac_declaration_get_body(procedure); - - (* Write .type _procedure_name, @function. *) - _write_z(".type \0"); - - _write_s(name_pointer, name_length); - _write_z(", @function\n\0"); - - (* Write procedure label, _procedure_name: *) - _write_s(name_pointer, name_length); - _write_z(":\n\0"); - - _elna_writer_instructions(body_statements); - _write_z("\tret\n\0"); - - procedure := _elna_tac_declaration_get_next(procedure); - if procedure <> 0 then - goto elna_writer_procedure_loop - end -end; - -proc _elna_writer_variable(variable: Word); -var - name: Word; - name_length: Word; - size: Word; -begin - .elna_writer_variable_loop; - if variable <> 0 then - name := _elna_tac_declaration_get_name(variable); - name_length := _elna_tac_declaration_get_length(variable); - size := _elna_tac_declaration_get_body(variable); - - _write_z(".type \0"); - _write_s(name, name_length); - _write_z(", @object\n\0"); - - _write_s(name, name_length); - _write_c(':'); - - _write_z(" .zero \0"); - _write_i(size); - - _write_c('\n'); - variable := _elna_tac_declaration_get_next(variable); - - goto elna_writer_variable_loop - end -end; - -proc _elna_writer_module(pair: Word); -var - compiler_strings_copy: Word; - compiler_strings_end: Word; - current_byte: Word; - current_part: Word; -begin - _write_z(".globl _start\n\n\0"); - _write_z(".section .data\n\0"); - - current_part := _elna_tac_module_get_data(pair); - _elna_writer_variable(current_part); - - _write_z(".section .text\n\n\0"); - _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); - _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); - _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); - - current_part := _elna_tac_module_get_code(pair); - _elna_writer_procedure(current_part); - - _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); - _write_c('"'); - - compiler_strings_copy := @compiler_strings; - compiler_strings_end := compiler_strings_position; - - .elna_writer_module_loop; - if compiler_strings_copy < compiler_strings_end then - current_byte := _load_byte(compiler_strings_copy); - compiler_strings_copy := compiler_strings_copy + 1; - _write_c(current_byte); - - goto elna_writer_module_loop - end; - _write_c('"'); - _write_c('\n'); -end; - -proc _node_set_kind(this: Word, kind: Word); -begin - this^ := kind -end; - -proc _integer_literal_node_size(); - return 12 -end; - -proc _integer_literal_node_get_value(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _integer_literal_node_set_value(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -proc _integer_literal_node_get_length(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _integer_literal_node_set_length(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _elna_parser_integer_literal(); -var - integer_token: Word; - integer_length: Word; - result: Word; - literal_size: Word; -begin - literal_size := _integer_literal_node_size(); - result := _allocate(literal_size); - - integer_token := _elna_lexer_global_get_start(); - integer_length := _elna_lexer_global_get_end(); - integer_length := integer_length - integer_token; - _elna_lexer_skip_token(); - - _node_set_kind(result, NodeKind.integer_literal); - _integer_literal_node_set_value(result, integer_token); - _integer_literal_node_set_length(result, integer_length); - - return result -end; - -proc _elna_tac_integer_literal(integer_literal_node: Word); -var - integer_token: Word; - integer_length: Word; - token_kind: Word; -begin - integer_token := _integer_literal_node_get_value(integer_literal_node); - integer_length := _integer_literal_node_get_length(integer_literal_node); - - return _elna_tac_load_immediate(ElnaTacRegister.t0, integer_token, integer_length) -end; - -proc _character_literal_node_size(); - return 12 -end; - -proc _character_literal_node_get_value(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _character_literal_node_set_value(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -proc _character_literal_node_get_length(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _character_literal_node_set_length(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _elna_parser_character_literal(); -var - character: Word; - character_length: Word; - result: Word; - literal_size: Word; -begin - literal_size := _character_literal_node_size(); - result := _allocate(literal_size); - - character := _elna_lexer_global_get_start(); - character_length := _elna_lexer_global_get_end(); - character_length := character_length - character; - _elna_lexer_skip_token(); - - _node_set_kind(result, NodeKind.character_literal); - _integer_literal_node_set_value(result, character); - _integer_literal_node_set_length(result, character_length); - - return result -end; - -proc _elna_tac_character_literal(character_literal_node: Word); -var - character: Word; - character_length: Word; -begin - character := _character_literal_node_get_value(character_literal_node); - character_length := _character_literal_node_get_length(character_literal_node); - - return _elna_tac_load_immediate(ElnaTacRegister.t0, character, character_length) -end; - -proc _variable_expression_size(); - return 12 -end; - -proc _variable_expression_get_name(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _variable_expression_set_name(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -proc _variable_expression_get_length(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _variable_expression_set_length(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _allocate(size: Word); -var - result: Word; -begin - result := memory_free_pointer; - memory_free_pointer := memory_free_pointer + size; - return result -end; - -proc _elna_parser_variable_expression(); -var - name: Word; - name_token: Word; - result: Word; - memory_size: Word; -begin - name := _elna_lexer_global_get_start(); - name_token := _elna_lexer_global_get_end(); - name_token := name_token - name; - _elna_lexer_skip_token(); - - memory_size := _variable_expression_size(); - result := _allocate(memory_size); - - _node_set_kind(result, NodeKind.variable_expression); - _variable_expression_set_name(result, name); - _variable_expression_set_length(result, name_token); - - return result -end; - -proc _elna_tac_variable_expression(variable_expression: Word, symbol_table: Word); -var - name: Word; - name_token: Word; - lookup_result: Word; - instruction: Word; -begin - name := _variable_expression_get_name(variable_expression); - name_token := _variable_expression_get_length(variable_expression); - - lookup_result := _symbol_table_lookup(symbol_table, name, name_token); - if lookup_result <> 0 then - instruction := _elna_tac_local_designator(lookup_result) - else - instruction := _elna_tac_global_designator(variable_expression) - end; - return instruction -end; - -proc _string_literal_node_size(); - return 12 -end; - -proc _string_literal_node_get_value(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _string_literal_node_set_value(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -proc _string_literal_node_get_length(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _string_literal_node_set_length(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _elna_parser_string_literal(); -var - length: Word; - token_start: Word; - result: Word; - memory_size: Word; -begin - memory_size := _string_literal_node_size(); - result := _allocate(memory_size); - - token_start := _elna_lexer_global_get_start(); - length := _string_length(token_start); - _elna_lexer_skip_token(); - - _node_set_kind(result, NodeKind.string_literal); - _string_literal_node_set_value(result, token_start); - _string_literal_node_set_length(result, length); - - return result -end; - -proc _elna_tac_string_literal(string_literal_node: Word); -var - token_start: Word; - length: Word; - offset: Word; - instruction: Word; - first_instruction: Word; - next_instruction: Word; -begin - token_start := _string_literal_node_get_value(string_literal_node); - length := _string_literal_node_get_length(string_literal_node); - offset := _add_string(token_start); - - first_instruction := _elna_tac_load_address(ElnaTacRegister.t0, "strings", 7); - instruction := _elna_tac_load_immediate(ElnaTacRegister.t1, offset, 0); - _elna_tac_instruction_set_next(first_instruction, instruction); - next_instruction := _elna_tac_add(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(instruction, next_instruction); - - return first_instruction -end; - -proc _elna_parser_simple_expression(); -var - current_character: Word; - parser_node: Word; - token_kind: Word; -begin - parser_node := 0; - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.character then - parser_node := _elna_parser_character_literal() - elsif token_kind = ElnaLexerKind.integer then - parser_node := _elna_parser_integer_literal() - elsif token_kind = ElnaLexerKind.string then - parser_node := _elna_parser_string_literal() - elsif token_kind = ElnaLexerKind.identifier then - parser_node := _elna_parser_variable_expression() - end; - return parser_node -end; - -proc _dereference_expression_size(); - return 8 -end; - -proc _dereference_expression_get_pointer(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _dereference_expression_set_pointer(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -proc _elna_parser_dereference_expression(simple_expression: Word); -var - result: Word; - memory_size: Word; -begin - memory_size := _dereference_expression_size(); - result := _allocate(memory_size); - - _node_set_kind(result, NodeKind.dereference_expression); - _dereference_expression_set_pointer(result, simple_expression); - _elna_lexer_skip_token(); - - return result -end; - -proc _elna_parser_designator(); -var - simple_expression: Word; - token_kind: Word; -begin - simple_expression := _elna_parser_simple_expression(); - - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.hat then - simple_expression := _elna_parser_dereference_expression(simple_expression) - elsif token_kind = ElnaLexerKind.dot then - simple_expression := _elna_parser_field_access_expression(simple_expression) - elsif token_kind = ElnaLexerKind.left_paren then - simple_expression := _elna_parser_call(simple_expression) - end; - return simple_expression -end; - -proc _elna_tac_simple_expression(parser_node: Word, symbol_table: Word, is_address: Word); -var - is_address: Word; - node_kind: Word; - instruction: Word; -begin - is_address^ := 0; - node_kind := _node_get_kind(parser_node); - - if node_kind = NodeKind.character_literal then - instruction := _elna_tac_character_literal(parser_node) - elsif node_kind = NodeKind.string_literal then - instruction := _elna_tac_string_literal(parser_node) - elsif node_kind = NodeKind.integer_literal then - instruction := _elna_tac_integer_literal(parser_node) - else - instruction := _elna_tac_variable_expression(parser_node, symbol_table); - is_address^ := 1 - end; - return instruction -end; - -proc _unary_expression_size(); - return 12 -end; - -proc _unary_expression_get_operand(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _unary_expression_set_operand(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -proc _unary_expression_get_operator(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _unary_expression_set_operator(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _elna_parser_unary_expression(); -var - token_kind: Word; - result: Word; - memory_size: Word; - operand: Word; - operator: Word; -begin - _elna_lexer_read_token(@token_kind); - operator := 0; - - if token_kind = ElnaLexerKind.at then - operator := '@' - elsif token_kind = ElnaLexerKind.minus then - operator := '-' - elsif token_kind = ElnaLexerKind.not then - operator := '~' - end; - if operator <> 0 then - _elna_lexer_skip_token() - end; - result := _elna_parser_designator(); - - if operator <> 0 then - operand := result; - memory_size := _unary_expression_size(); - result := _allocate(memory_size); - - _node_set_kind(result, NodeKind.unary_expression); - _unary_expression_set_operand(result, operand); - _unary_expression_set_operator(result, operator) - end; - - return result -end; - -proc _elna_tac_unary_expression(parser_node: Word, symbol_table: Word); -var - current_character: Word; - token_kind: Word; - expression_kind: Word; - operator: Word; - operand: Word; - is_address: Word; - first_instruction: Word; - instruction: Word; -begin - operator := 0; - operand := 0; - - expression_kind := _node_get_kind(parser_node); - - if expression_kind = NodeKind.unary_expression then - operator := _unary_expression_get_operator(parser_node); - operand := _unary_expression_get_operand(parser_node) - else - operand := parser_node - end; - - if operator = '@' then - first_instruction := _elna_tac_designator(operand, symbol_table, @is_address) - else - first_instruction := _elna_tac_designator(operand, symbol_table, @is_address); - if is_address then - instruction := _elna_tac_load_word(ElnaTacRegister.t0, ElnaTacRegister.t0, 0); - _elna_tac_instruction_set_next(first_instruction, instruction) - end - end; - if operator = '-' then - instruction := _elna_tac_neg(ElnaTacRegister.t0, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(first_instruction, instruction) - elsif operator = '~' then - instruction := _elna_tac_not(ElnaTacRegister.t0, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(first_instruction, instruction) - end; - return first_instruction -end; - -proc _binary_expression_size(); - return 16 -end; - -proc _binary_expression_get_lhs(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _binary_expression_set_lhs(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -proc _binary_expression_get_rhs(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _binary_expression_set_rhs(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _binary_expression_get_operator(this: Word); -begin - this := this + 12; - return this^ -end; - -proc _binary_expression_set_operator(this: Word, value: Word); -begin - this := this + 12; - this^ := value -end; - -proc _elna_parser_binary_expression(); -var - lhs_node: Word; - rhs_node: Word; - token_kind: Word; - memory_size: Word; - result: Word; -begin - lhs_node := _elna_parser_unary_expression(); - rhs_node := 0; - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.plus then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.minus then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.multiplication then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.and then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind._or then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind._xor then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.equals then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.remainder then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.division then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.less_than then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.greater_than then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.less_equal then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.not_equal then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.greater_equal then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - end; - if rhs_node <> 0 then - memory_size := _binary_expression_size(); - result := _allocate(memory_size); - - _node_set_kind(result, NodeKind.binary_expression); - _binary_expression_set_lhs(result, lhs_node); - _binary_expression_set_rhs(result, rhs_node); - _binary_expression_set_operator(result, token_kind) - else - result := lhs_node - end; - return result -end; - -proc _elna_tac_binary_expression(parser_node: Word, symbol_table: Word); -var - token_kind: Word; - expression_kind: Word; - operand_node: Word; - first_instruction: Word; - instruction: Word; - current_instruction: Word; -begin - expression_kind := _node_get_kind(parser_node); - - if expression_kind <> NodeKind.binary_expression then - first_instruction := _elna_tac_unary_expression(parser_node, symbol_table) - else - token_kind := _binary_expression_get_operator(parser_node); - - operand_node := _binary_expression_get_lhs(parser_node); - first_instruction := _elna_tac_unary_expression(operand_node, symbol_table); - - (* Save the value of the left expression on the stack. *) - instruction := _elna_tac_store_word(ElnaTacRegister.t0, ElnaTacRegister.sp, 64); - _elna_tac_instruction_set_next(first_instruction, instruction); - current_instruction := instruction; - - operand_node := _binary_expression_get_rhs(parser_node); - instruction := _elna_tac_unary_expression(operand_node, symbol_table); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - - (* Load the left expression from the stack; *) - instruction := _elna_tac_load_word(ElnaTacRegister.t1, ElnaTacRegister.sp, 64); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - - if token_kind = ElnaLexerKind.plus then - instruction := _elna_tac_add(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.minus then - instruction := _elna_tac_sub(ElnaTacRegister.t0, ElnaTacRegister.t1, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.multiplication then - instruction := _elna_tac_mul(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.and then - instruction := _elna_tac_and(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind._or then - instruction := _elna_tac_or(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind._xor then - instruction := _elna_tac_xor(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.equals then - instruction := _elna_tac_xor(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - - instruction := _elna_tac_seqz(ElnaTacRegister.t0, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.remainder then - instruction := _elna_tac_rem(ElnaTacRegister.t0, ElnaTacRegister.t1, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.division then - instruction := _elna_tac_div(ElnaTacRegister.t0, ElnaTacRegister.t1, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.less_than then - instruction := _elna_tac_slt(ElnaTacRegister.t0, ElnaTacRegister.t1, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.greater_than then - instruction := _elna_tac_slt(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.less_equal then - instruction := _elna_tac_slt(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - - instruction := _elna_tac_xor_immediate(ElnaTacRegister.t0, ElnaTacRegister.t0, 1); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.not_equal then - instruction := _elna_tac_xor(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - - instruction := _elna_tac_snez(ElnaTacRegister.t0, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.greater_equal then - instruction := _elna_tac_slt(ElnaTacRegister.t0, ElnaTacRegister.t1, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - - instruction := _elna_tac_xor_immediate(ElnaTacRegister.t0, ElnaTacRegister.t0, 1); - _elna_tac_instruction_set_next(current_instruction, instruction) - end - end; - return first_instruction -end; - -(* 4 bytes node kind + 4 byte pointer to variable expression + 4 * 7 for arguments. *) -proc _call_size(); - return 44 -end; - -proc _call_get_name(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _call_set_name(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _call_get_argument(this: Word, n: Word); -begin - n := n * 4; - this := this + 8; - this := this + n; - return this^ -end; - -proc _call_set_argument(this: Word, n: Word, value: Word); -begin - n := n * 4; - this := this + 8; - this := this + n; - this^ := value -end; - -proc _elna_parser_call(callee: Word); -var - parsed_expression: Word; - result: Word; - argument_number: Word; - token_kind: Word; - call_size: Word; -begin - call_size := _call_size(); - result := _allocate(call_size); - _node_set_kind(result, NodeKind.call); - _statement_set_next(result, 0); - - argument_number := 1; - _call_set_name(result, callee); - - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.right_paren then - _elna_lexer_skip_token(); - goto elna_parser_call_end - end; - - .elna_parser_call_loop; - parsed_expression := _elna_parser_binary_expression(); - _call_set_argument(result, argument_number, parsed_expression); - argument_number := argument_number + 1; - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - if token_kind = ElnaLexerKind.comma then - goto elna_parser_call_loop - end; - - .elna_parser_call_end; - (* Set the trailing argument to nil. *) - _call_set_argument(result, argument_number, 0); - - return result -end; - -proc _elna_tac_call(parsed_call: Word, symbol_table: Word); -var - name_length: Word; - name: Word; - argument_count: Word; - stack_offset: Word; - parsed_expression: Word; - instruction: Word; - first_instruction: Word; - current_instruction: Word; -begin - parsed_expression := _call_get_name(parsed_call); - name := _variable_expression_get_name(parsed_expression); - name_length := _variable_expression_get_length(parsed_expression); - argument_count := 0; - first_instruction := 0; - - .elna_tac_call_loop; - - parsed_expression := _call_get_argument(parsed_call, argument_count + 1); - if parsed_expression = 0 then - goto elna_tac_call_finalize - else - instruction := _elna_tac_binary_expression(parsed_expression, symbol_table); - if first_instruction = 0 then - first_instruction := instruction - else - _elna_tac_instruction_set_next(current_instruction, instruction) - end; - current_instruction := instruction; - - (* Save the argument on the stack. *) - stack_offset := argument_count * 4; - - instruction := _elna_tac_store_word(ElnaTacRegister.t0, - ElnaTacRegister.sp, 116 - stack_offset); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - - argument_count := argument_count + 1; - goto elna_tac_call_loop - end; - .elna_tac_call_finalize; - - (* Load the argument from the stack. *) - if argument_count <> 0 then - (* Decrement the argument counter. *) - argument_count := argument_count - 1; - stack_offset := argument_count * 4; - - (* Calculate the stack offset: 116 - (4 * argument_counter) *) - instruction := _elna_tac_load_word(ElnaTacRegister.a0 + argument_count, - ElnaTacRegister.sp, 116 - stack_offset); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - - goto elna_tac_call_finalize - end; - instruction := _elna_tac_jal(name, name_length); - if first_instruction = 0 then - first_instruction := instruction - else - _elna_tac_instruction_set_next(current_instruction, instruction) - end; - return first_instruction -end; - -(** - * All statements are chained into a list. Next contains a pointer to the next - * statement in the statement list. - *) -proc _statement_get_next(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _statement_set_next(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -proc _goto_statement_size(); - return 16 -end; - -proc _goto_statement_get_label(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _goto_statement_set_label(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _goto_statement_get_length(this: Word); -begin - this := this + 12; - return this^ -end; - -proc _goto_statement_set_length(this: Word, value: Word); -begin - this := this + 12; - this^ := value -end; - -proc _elna_parser_goto_statement(); -var - token_kind: Word; - label_name: Word; - label_length: Word; - statement_size: Word; - result: Word; -begin - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - - label_name := _elna_lexer_global_get_start(); - label_length := _elna_lexer_global_get_end() - label_name; - _elna_lexer_skip_token(); - - statement_size := _goto_statement_size(); - result := _allocate(statement_size); - - _node_set_kind(result, NodeKind.goto_statement); - _statement_set_next(result, 0); - _goto_statement_set_label(result, label_name); - _goto_statement_set_length(result, label_length); - - return result -end; - -proc _elna_tac_goto_statement(parser_node: Word); -var - label_name: Word; - label_length: Word; - label_with_dot: Word; - instruction: Word; -begin - label_name := _goto_statement_get_label(parser_node); - label_length := _goto_statement_get_length(parser_node); - label_with_dot := _allocate(label_length + 1); - - _store_byte('.', label_with_dot); - _memcpy(label_with_dot + 1, label_name, label_length); - - return _elna_tac_jump(label_with_dot, label_length + 1) -end; - -proc _label_declaration_size(); - return 16 -end; - -proc _label_declaration_get_label(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _label_declaration_set_label(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _label_declaration_get_length(this: Word); -begin - this := this + 12; - return this^ -end; - -proc _label_declaration_set_length(this: Word, value: Word); -begin - this := this + 12; - this^ := value -end; - -proc _elna_parser_label_declaration(); -var - token_kind: Word; - label_name: Word; - label_length: Word; - statement_size: Word; - result: Word; -begin - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - - label_name := _elna_lexer_global_get_start(); - label_length := _elna_lexer_global_get_end() - label_name; - _elna_lexer_skip_token(); - - statement_size := _label_declaration_size(); - result := _allocate(statement_size); - - _node_set_kind(result, NodeKind.label_declaration); - _statement_set_next(result, 0); - _goto_statement_set_label(result, label_name); - _goto_statement_set_length(result, label_length); - - return result -end; - -proc _elna_tac_label_declaration(parser_node: Word); -var - label_name: Word; - label_length: Word; -begin - label_name := _label_declaration_get_label(parser_node); - label_length := _label_declaration_get_length(parser_node); - - return _elna_tac_label(label_name, label_length) -end; - -proc _elna_tac_local_designator(symbol: Word); -var - variable_offset: Word; -begin - variable_offset := _parameter_info_get_offset(symbol); - - return _elna_tac_add_immediate(ElnaTacRegister.t0, ElnaTacRegister.sp, variable_offset) -end; - -proc _elna_tac_global_designator(variable_expression: Word); -var - name: Word; - token_length: Word; -begin - name := _variable_expression_get_name(variable_expression); - token_length := _variable_expression_get_length(variable_expression); - - return _elna_tac_load_address(ElnaTacRegister.t0, name, token_length) -end; - -proc _field_access_expression_size(); - return 16 -end; - -proc _field_access_expression_get_aggregate(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _field_access_expression_set_aggregate(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -proc _field_access_expression_get_field(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _field_access_expression_set_field(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _field_access_expression_get_length(this: Word); -begin - this := this + 12; - return this^ -end; - -proc _field_access_expression_set_length(this: Word, value: Word); -begin - this := this + 12; - this^ := value -end; - -proc _elna_tac_enumeration_value(field_access_expression: Word); -var - enumeration_type: Word; - members: Word; - members_length: Word; - token_type: Word; - value_name: Word; - name_length: Word; - member_name: Word; - member_length: Word; - counter: Word; - symbol: Word; - instruction: Word; -begin - symbol := _field_access_expression_get_aggregate(field_access_expression); - value_name := _variable_expression_get_name(symbol); - name_length := _variable_expression_get_length(symbol); - - symbol := _symbol_table_lookup(@symbol_table_global, value_name, name_length); - - enumeration_type := _type_info_get_type(symbol); - members := _enumeration_type_get_members(enumeration_type); - members_length := _enumeration_type_get_length(enumeration_type); - - _elna_lexer_read_token(@token_type); - - value_name := _field_access_expression_get_field(field_access_expression); - name_length := _field_access_expression_get_length(field_access_expression); - counter := 1; - - instruction := 0; - .elna_tac_enumeration_value_members; - if members_length > 0 then - member_name := members^; - member_length := members + 4; - member_length := member_length^; - - if _string_compare(value_name, name_length, member_name, member_length) = 0 then - members_length := members_length - 1; - members := members + 8; - counter := counter + 1; - goto elna_tac_enumeration_value_members - end; - instruction := _elna_tac_load_immediate(ElnaTacRegister.t0, counter, 0) - end; - return instruction -end; - -proc _elna_parser_field_access_expression(aggregate: Word); -var - token_kind: Word; - name: Word; - name_token: Word; - result: Word; - memory_size: Word; -begin - (* Skip dot. Read the enumeration value. *) - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - - name := _elna_lexer_global_get_start(); - name_token := _elna_lexer_global_get_end(); - name_token := name_token - name; - _elna_lexer_skip_token(); - memory_size := _field_access_expression_size(); - result := _allocate(memory_size); - - _node_set_kind(result, NodeKind.field_access_expression); - _field_access_expression_set_aggregate(result, aggregate); - _field_access_expression_set_field(result, name); - _field_access_expression_set_length(result, name_token); - - return result -end; - -proc _elna_tac_designator(parser_node: Word, symbol_table: Word, is_address: Word); -var - name_token: Word; - lookup_result: Word; - token_kind: Word; - parser_node: Word; - node_kind: Word; - first_instruction: Word; - instruction: Word; -begin - node_kind := _node_get_kind(parser_node); - - if node_kind = NodeKind.dereference_expression then - parser_node := _dereference_expression_get_pointer(parser_node); - first_instruction := _elna_tac_simple_expression(parser_node, symbol_table, is_address); - instruction := _elna_tac_load_word(ElnaTacRegister.t0, ElnaTacRegister.t0, 0); - _elna_tac_instruction_set_next(first_instruction, instruction) - elsif node_kind = NodeKind.field_access_expression then - first_instruction := _elna_tac_enumeration_value(parser_node); - is_address^ := 0 - elsif node_kind = NodeKind.call then - first_instruction := _elna_tac_call(parser_node, symbol_table); - instruction := _elna_tac_move(ElnaTacRegister.t0, ElnaTacRegister.a0); - _elna_tac_instruction_set_next(first_instruction, instruction); - is_address^ := 0 - else - first_instruction := _elna_tac_simple_expression(parser_node, symbol_table, is_address) - end; - return first_instruction -end; - -proc _assign_statement_size(); - return 16 -end; - -proc _assign_statement_get_assignee(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _assign_statement_set_assignee(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _assign_statement_get_assignment(this: Word); -begin - this := this + 12; - return this^ -end; - -proc _assign_statement_set_assignment(this: Word, value: Word); -begin - this := this + 12; - this^ := value -end; - -proc _elna_parser_assign_statement(assignee: Word); -var - statement_size: Word; - result: Word; - token_kind: Word; - assignment_node: Word; -begin - statement_size := _assign_statement_size(); - result := _allocate(statement_size); - - _node_set_kind(result, NodeKind.assign_statement); - _statement_set_next(result, 0); - _assign_statement_set_assignee(result, assignee); - - (* Skip the assignment sign (:=) with surrounding whitespaces. *) - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - assignment_node := _elna_parser_binary_expression(); - _assign_statement_set_assignment(result, assignment_node); - - return result -end; - -proc _elna_tac_assign_statement(parser_tree: Word, symbol_table: Word); -var - current_expression: Word; - is_address: Word; - first_instruction: Word; - instruction: Word; - current_instruction: Word; -begin - current_expression := _assign_statement_get_assignee(parser_tree); - first_instruction := _elna_tac_designator(current_expression, symbol_table, @is_address); - - (* Save the assignee address on the stack. *) - current_instruction := _elna_tac_store_word(ElnaTacRegister.t0, ElnaTacRegister.sp, 60); - _elna_tac_instruction_set_next(first_instruction, current_instruction); - - (* Compile the assignment. *) - current_expression := _assign_statement_get_assignment(parser_tree); - instruction := _elna_tac_binary_expression(current_expression, symbol_table); - _elna_tac_instruction_set_next(current_instruction, instruction); - - current_instruction := _elna_tac_load_word(ElnaTacRegister.t1, ElnaTacRegister.sp, 60); - _elna_tac_instruction_set_next(instruction, current_instruction); - - instruction := _elna_tac_store_word(ElnaTacRegister.t0, ElnaTacRegister.t1, 0); - _elna_tac_instruction_set_next(current_instruction, instruction); - - return first_instruction -end; - -proc _return_statement_size(); - return 12 -end; - -proc _return_statement_get_returned(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _return_statement_set_returned(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _elna_parser_return_statement(); -var - token_kind: Word; - returned: Word; - label_length: Word; - statement_size: Word; - result: Word; -begin - (* Skip "return" keyword and whitespace after it. *) - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - - returned := _elna_parser_binary_expression(); - - statement_size := _return_statement_size(); - result := _allocate(statement_size); - - _node_set_kind(result, NodeKind.return_statement); - _statement_set_next(result, 0); - _return_statement_set_returned(result, returned); - - return result -end; - -proc _elna_tac_return_statement(parser_node: Word, symbol_table: Word); -var - return_expression: Word; - first_instruction: Word; - instruction: Word; -begin - return_expression := _return_statement_get_returned(parser_node); - first_instruction := _elna_tac_binary_expression(return_expression, symbol_table); - instruction := _elna_tac_move(ElnaTacRegister.a0, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(first_instruction, instruction); - return first_instruction -end; - -(** - * Writes a label, .Ln, where n is a unique number. - * - * Parameters: - * counter - Label counter. - *) -proc _write_label(counter: Word, length: Word); -var - first_byte: Word; -begin - if length = 0 then - _write_s(".L", 2); - _write_i(counter) - else - first_byte := _load_byte(counter); - if first_byte <> '.' then - _write_c('.') - end; - _write_s(counter, length) - end -end; - -proc _elna_parser_conditional_statements(); -var - conditional_size: Word; - token_kind: Word; - current_node: Word; - result: Word; -begin - conditional_size := _conditional_statements_size(); - result := _allocate(conditional_size); - - (* Skip "if", "while" or "elsif". *) - _elna_lexer_skip_token(); - - current_node := _elna_parser_binary_expression(); - _conditional_statements_set_condition(result, current_node); - - (* Skip "then" or "do". *) - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - current_node := _elna_parser_statements(); - _conditional_statements_set_statements(result, current_node); - - _conditional_statements_set_next(result, 0); - return result -end; - -proc _elna_tac_conditional_statements(parser_node: Word, after_end_label: Word, symbol_table: Word); -var - condition_label: Word; - current_node: Word; - instruction: Word; - current_instruction: Word; - first_instruction: Word; -begin - (* Compile condition. *) - current_node := _conditional_statements_get_condition(parser_node); - first_instruction := _elna_tac_binary_expression(current_node, symbol_table); - - (* condition_label is the label in front of the next elsif condition or end. *) - condition_label := label_counter; - label_counter := label_counter + 1; - - current_instruction := _elna_tac_beqz(ElnaTacRegister.t0, condition_label, 0); - _elna_tac_instruction_set_next(first_instruction, current_instruction); - - current_node := _conditional_statements_get_statements(parser_node); - instruction := _elna_tac_statements(current_node, symbol_table); - if instruction <> 0 then - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction - end; - - instruction := _elna_tac_jump(after_end_label, 0); - _elna_tac_instruction_set_next(current_instruction, instruction); - - current_instruction := _elna_tac_label(condition_label, 0); - _elna_tac_instruction_set_next(instruction, current_instruction); - - return first_instruction -end; - -(** - * Conditional statements is a list of pairs: condition and statements. - * Used for example to represent if and elsif blocks with beloning statements. - *) -proc _conditional_statements_size(); - return 12 -end; - -proc _conditional_statements_get_condition(this: Word); - return this^ -end; - -proc _conditional_statements_set_condition(this: Word, value: Word); -begin - this^ := value -end; - -proc _conditional_statements_get_statements(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _conditional_statements_set_statements(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -proc _conditional_statements_get_next(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _conditional_statements_set_next(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _if_statement_size(); - return 16 -end; - -proc _if_statement_get_conditionals(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _if_statement_set_conditionals(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _if_statement_get_else(this: Word); -begin - this := this + 12; - return this^ -end; - -proc _if_statement_set_else(this: Word, value: Word); -begin - this := this + 12; - this^ := value -end; - -proc _elna_parser_if_statement(); -var - current_node: Word; - result: Word; - object_size: Word; - token_kind: Word; - previous_conditional: Word; - next_conditional: Word; -begin - object_size := _if_statement_size(); - result := _allocate(object_size); - - _node_set_kind(result, NodeKind.if_statement); - _statement_set_next(result, 0); - - previous_conditional := _elna_parser_conditional_statements(); - _if_statement_set_conditionals(result, previous_conditional); - - .elna_parser_if_statement_loop; - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind._elsif then - next_conditional := _elna_parser_conditional_statements(); - _conditional_statements_set_next(previous_conditional, next_conditional); - previous_conditional = next_conditional; - - goto elna_parser_if_statement_loop - elsif token_kind = ElnaLexerKind._else then - _elna_lexer_skip_token(); - - current_node := _elna_parser_statements(); - _if_statement_set_else(result, current_node) - else - _if_statement_set_else(result, 0) - end; - _elna_lexer_skip_token(); - - return result -end; - -proc _elna_parser_statement(); -var - token_kind: Word; - result : Word; -begin - result := 0; - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind._goto then - result := _elna_parser_goto_statement() - elsif token_kind = ElnaLexerKind._if then - result := _elna_parser_if_statement() - elsif token_kind = ElnaLexerKind._return then - result := _elna_parser_return_statement() - elsif token_kind = ElnaLexerKind.dot then - result := _elna_parser_label_declaration() - elsif token_kind = ElnaLexerKind.identifier then - result := _elna_parser_designator(); - - if _node_get_kind(result) <> NodeKind.call then - result := _elna_parser_assign_statement(result) - end - end; - return result -end; - -proc _elna_parser_statements(); -var - token_kind: Word; - previous_statement: Word; - next_statement: Word; - first_statement: Word; -begin - _skip_empty_lines(); - - first_statement := _elna_parser_statement(); - previous_statement := first_statement; - if previous_statement = 0 then - goto elna_parser_statements_end - end; - - .elna_parser_statement_loop; - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.semicolon then - _elna_lexer_skip_token(); - _skip_empty_lines(); - next_statement := _elna_parser_statement(); - _statement_set_next(previous_statement, next_statement); - previous_statement := next_statement; - - if previous_statement <> 0 then - goto elna_parser_statement_loop - end - end; - .elna_parser_statements_end; - _skip_empty_lines(); - - return first_statement -end; - -proc _elna_tac_statements(parser_node: Word, symbol_table: Word); -var - current_statement: Word; - instruction: Word; - first_instruction: Word; - current_instruction: Word; -begin - current_statement := parser_node; - first_instruction := 0; - - .elna_tac_statements_loop; - if current_statement <> 0 then - instruction := _elna_tac_statement(current_statement, symbol_table); - current_statement := _statement_get_next(current_statement); - if instruction = 0 then - goto elna_tac_statements_loop - end; - if first_instruction = 0 then - first_instruction := instruction - else - _elna_tac_instruction_set_next(current_instruction, instruction) - end; - current_instruction := instruction; - goto elna_tac_statements_loop - end; - return first_instruction -end; - -proc _elna_tac_if_statement(parser_node: Word, symbol_table: Word); -var - current_node: Word; - after_end_label: Word; - condition_label: Word; - first_instruction: Word; - instruction: Word; - current_instruction: Word; -begin - after_end_label := label_counter; - label_counter := label_counter + 1; - - current_node := _if_statement_get_conditionals(parser_node); - first_instruction := _elna_tac_conditional_statements(current_node, after_end_label, symbol_table); - current_instruction := first_instruction; - - .elna_tac_if_statement_loop; - current_node := _conditional_statements_get_next(current_node); - if current_node <> 0 then - instruction := _elna_tac_conditional_statements(current_node, after_end_label, symbol_table); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - goto elna_tac_if_statement_loop - end; - current_node := _if_statement_get_else(parser_node); - - if current_node <> 0 then - instruction := _elna_tac_statements(current_node, symbol_table); - if instruction <> 0 then - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction - end - end; - instruction := _elna_tac_label(after_end_label, 0); - _elna_tac_instruction_set_next(current_instruction, instruction); - - return first_instruction -end; - -proc _elna_tac_statement(parser_node: Word, symbol_table: Word); -var - statement_kind: Word; - instruction: Word; -begin - statement_kind := _node_get_kind(parser_node); - - if statement_kind = NodeKind.goto_statement then - instruction := _elna_tac_goto_statement(parser_node) - elsif statement_kind = NodeKind.if_statement then - instruction := _elna_tac_if_statement(parser_node, symbol_table) - elsif statement_kind = NodeKind.return_statement then - instruction := _elna_tac_return_statement(parser_node, symbol_table) - elsif statement_kind = NodeKind.label_declaration then - instruction := _elna_tac_label_declaration(parser_node) - elsif statement_kind = NodeKind.call then - instruction := _elna_tac_call(parser_node, symbol_table) - elsif statement_kind = NodeKind.assign_statement then - instruction := _elna_tac_assign_statement(parser_node, symbol_table) - else - instruction := 0 - end; - return instruction -end; - -(** - * Writes a regster name to the standard output. - * - * Parameters: - * register_character - Register character. - * register_number - Register number. - *) -proc _write_register(register_character: Word, register_number: Word); -begin - _write_c(register_character); - _write_c(register_number + '0') -end; - -proc _type_get_kind(this: Word); - return this^ -end; - -proc _type_set_kind(this: Word, value: Word); -begin - this^ := value -end; - -proc _type_get_size(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _type_set_size(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -proc _enumeration_type_size(); - return 16 -end; - -proc _enumeration_type_get_members(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _enumeration_type_set_members(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _enumeration_type_get_length(this: Word); -begin - this := this + 12; - return this^ -end; - -proc _enumeration_type_set_length(this: Word, value: Word); -begin - this := this + 12; - this^ := value -end; - -proc _enumeration_type_expression_size(); - return 12 -end; - -proc _enumeration_type_expression_get_members(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _enumeration_type_expression_set_members(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -proc _enumeration_type_expression_get_length(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _enumeration_type_expression_set_length(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _record_type_size(); - return 16 -end; - -proc _record_type_get_members(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _record_type_set_members(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _record_type_get_length(this: Word); -begin - this := this + 12; - return this^ -end; - -proc _record_type_set_length(this: Word, value: Word); -begin - this := this + 12; - this^ := value -end; - -proc _record_type_expression_size(); - return 12 -end; - -proc _record_type_expression_get_members(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _record_type_expression_set_members(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -proc _record_type_expression_get_length(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _record_type_expression_set_length(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _named_type_expression_size(); - return 12 -end; - -proc _named_type_expression_get_name(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _named_type_expression_set_name(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -proc _named_type_expression_get_length(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _named_type_expression_set_length(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _elna_parser_record_type_expression(); -var - entry: Word; - member_count: Word; - memory_start: Word; - field_name: Word; - field_length: Word; - field_type: Word; - token_kind: Word; - type_expression_size: Word; - result: Word; - previous_entry: Word; -begin - _elna_lexer_skip_token(); - member_count := 0; - memory_start := 0; - - _elna_lexer_read_token(@token_kind); - if token_kind = ElnaLexerKind._end then - goto elna_parser_record_type_expression_end - end; - .elna_parser_record_type_expression_loop; - entry := _allocate(16); - member_count := member_count + 1; - - field_name := _elna_lexer_global_get_start(); - field_length := _elna_lexer_global_get_end() - field_name; - - entry^ := field_name; - entry := entry + 4; - - entry^ := field_length; - entry := entry + 4; - - (* Skip the identifier. *) - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - field_type := _elna_parser_type_expression(); - - entry^ := field_type; - entry := entry + 4; - - entry^ := 0; - if memory_start = 0 then - memory_start := entry - 12 - else - previous_entry^ := entry - 12 - end; - previous_entry := entry; - - _elna_lexer_read_token(@token_kind); - if token_kind = ElnaLexerKind.semicolon then - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - goto elna_parser_record_type_expression_loop - end; - - .elna_parser_record_type_expression_end; - _elna_lexer_skip_token(); - - type_expression_size := _enumeration_type_expression_size(); - result := _allocate(type_expression_size); - - _node_set_kind(result, NodeKind.record_type_expression); - _record_type_expression_set_members(result, memory_start); - _record_type_expression_set_length(result, member_count); - - return result -end; - -proc _elna_parser_enumeration_type_expression(); -var - token_kind: Word; - enumeration_name: Word; - name_length: Word; - memory_start: Word; - member_count: Word; - result: Word; - type_expression_size: Word; - entry: Word; - previous_entry: Word; -begin - _elna_lexer_skip_token(); - memory_start := 0; - member_count := 0; - - _elna_lexer_read_token(@token_kind); - if token_kind = ElnaLexerKind.right_paren then - goto elna_parser_enumeration_type_expression_end - end; - .elna_parser_enumeration_type_expression_loop; - entry := _allocate(12); - member_count := member_count + 1; - - enumeration_name := _elna_lexer_global_get_start(); - name_length := _elna_lexer_global_get_end() - enumeration_name; - - entry^ := enumeration_name; - entry := entry + 4; - - entry^ := name_length; - entry := entry + 4; - - entry^ := 0; - if memory_start = 0 then - memory_start := entry - 8 - else - previous_entry^ := entry - 8 - end; - previous_entry := entry; - - (* Skip the identifier. *) - _elna_lexer_skip_token(); - - _elna_lexer_read_token(@token_kind); - if token_kind = ElnaLexerKind.comma then - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - goto elna_parser_enumeration_type_expression_loop - end; - - .elna_parser_enumeration_type_expression_end; - _elna_lexer_skip_token(); - - type_expression_size := _enumeration_type_expression_size(); - result := _allocate(type_expression_size); - - _node_set_kind(result, NodeKind.enumeration_type_expression); - _enumeration_type_expression_set_members(result, memory_start); - _enumeration_type_expression_set_length(result, member_count); - - return result -end; - -(** - * Reads and creates enumeration type representation. - * - * record - * type_kind: Word; - * size: Word; - * members: StringArray; - * length: Word - * end; - * - * Returns enumeration type description. - *) -proc _elna_name_type_enumeration(parser_node: Word); -var - result: Word; - memory_start: Word; - member_count: Word; - member_array_size: Word; - member_array_start: Word; - member_array_current: Word; -begin - member_array_size := _enumeration_type_size(); - result := _allocate(member_array_size); - - memory_start := _enumeration_type_expression_get_members(parser_node); - member_count := _enumeration_type_expression_get_length(parser_node); - - (* Copy the list of enumeration members into an array of strings. *) - member_array_size := member_count * 8; - member_array_start := _allocate(member_array_size); - member_array_current := member_array_start; - - .elna_name_type_enumeration_loop; - if member_count > 0 then - member_array_current^ := memory_start^; - member_array_current := member_array_current + 4; - memory_start := memory_start + 4; - - member_array_current^ := memory_start^; - member_array_current := member_array_current + 4; - memory_start := memory_start + 4; - - memory_start := memory_start^; - member_count := member_count - 1; - goto elna_name_type_enumeration_loop - end; - member_count := _enumeration_type_expression_get_length(parser_node); - - _type_set_kind(result, TypeKind.enumeration); - _type_set_size(result, 4); - _enumeration_type_set_members(result, member_array_start); - _enumeration_type_set_length(result, member_count); - - return _type_info_create(result) -end; - -proc _elna_name_type_record(parser_node: Word); -var - result: Word; - memory_start: Word; - member_count: Word; - member_array_size: Word; - member_array_start: Word; - member_array_current: Word; -begin - member_array_size := _record_type_size(); - result := _allocate(member_array_size); - - memory_start := _record_type_expression_get_members(parser_node); - member_count := _record_type_expression_get_length(parser_node); - - member_array_size := member_count * 12; - member_array_start := _allocate(member_array_size); - member_array_current := member_array_start; - - .elna_name_type_record_loop; - if member_count > 0 then - member_array_current^ := memory_start^; - member_array_current := member_array_current + 4; - memory_start := memory_start + 4; - - member_array_current^ := memory_start^; - member_array_current := member_array_current + 4; - memory_start := memory_start + 4; - - member_array_current^ := _elna_name_type_expression(memory_start^); - member_array_current := member_array_current + 4; - memory_start := memory_start + 4; - - memory_start := memory_start^; - member_count := member_count - 1; - goto elna_name_type_record_loop - end; - member_count := _record_type_expression_get_length(parser_node); - - _type_set_kind(result, TypeKind._record); - _type_set_size(result, member_count * 4); - _record_type_set_members(result, member_array_start); - _record_type_set_length(result, member_count); - - return _type_info_create(result) -end; - -proc _elna_parser_named_type_expression(); -var - type_expression_size: Word; - result: Word; - type_name: Word; - name_length: Word; -begin - type_expression_size := _named_type_expression_size(); - result := _allocate(type_expression_size); - - _node_set_kind(result, NodeKind.named_type_expression); - type_name := _elna_lexer_global_get_start(); - name_length := _elna_lexer_global_get_end() - type_name; - _named_type_expression_set_name(result, type_name); - _named_type_expression_set_length(result, name_length); - _elna_lexer_skip_token(); - - return result -end; - -proc _elna_parser_type_expression(); -var - token_kind: Word; - result: Word; -begin - result := 0; - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.identifier then - result := _elna_parser_named_type_expression() - elsif token_kind = ElnaLexerKind.left_paren then - result := _elna_parser_enumeration_type_expression() - elsif token_kind = ElnaLexerKind._record then - result := _elna_parser_record_type_expression() - end; - return result -end; - -proc _elna_name_type_expression(parser_node: Word); -var - token_kind: Word; - type_name: Word; - name_length: Word; - result: Word; -begin - token_kind := _node_get_kind(parser_node); - - if token_kind = NodeKind.named_type_expression then - type_name := _named_type_expression_get_name(parser_node); - name_length := _named_type_expression_get_length(parser_node); - - result := _symbol_table_lookup(@symbol_table_global, type_name, name_length); - result := _type_info_get_type(result) - elsif token_kind = NodeKind.enumeration_type_expression then - result := _elna_name_type_enumeration(parser_node) - elsif token_kind = NodeKind.record_type_expression then - result := _elna_name_type_record(parser_node) - end; - - return result -end; - -proc _type_info_get_type(this: Word); -begin - this := this + 4; - return this^ -end; - -(** - * Parameters: - * parameter_index - Parameter index. - *) -proc _parameter_info_create(parameter_index: Word); -var - offset: Word; - current_word: Word; - result: Word; -begin - result := _allocate(8); - current_word := result; - current_word^ := InfoKind.parameter_info; - - current_word := current_word + 4; - - (* Calculate the stack offset: 88 - (4 * parameter_counter) *) - offset := parameter_index * 4; - current_word^ := 88 - offset; - - return result -end; - -proc _parameter_info_get_offset(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _type_info_create(type_representation: Word); -var - result: Word; - current_word: Word; -begin - result := _allocate(8); - current_word := result; - current_word^ := InfoKind.type_info; - - current_word := current_word + 4; - current_word^ := type_representation; - - return result -end; - -(** - * Parameters: - * temporary_index - Parameter index. - *) -proc _temporary_info_create(temporary_index: Word); -var - offset: Word; - current_word: Word; - result: Word; -begin - result := _allocate(8); - current_word := result; - current_word^ := InfoKind.temporary_info; - current_word := current_word + 4; - - (* Calculate the stack offset: 4 * variable_counter. *) - current_word^ := temporary_index * 4; - - return result -end; - -(** - * Parameters: - * symbol_table - Local symbol table. - *) -proc _procedure_info_create(symbol_table: Word); -var - current_word: Word; - result: Word; -begin - result := _allocate(8); - current_word := result; - current_word^ := InfoKind.procedure_info; - current_word := current_word + 4; - - current_word^ := symbol_table; - - return result -end; - -proc _procedure_info_get_symbol_table(this: Word); -begin - this := this + 4; - return this^ -end; - -(** - * Parameters: - * parameter_index - Parameter index. - *) -proc _elna_name_procedure_parameter(parser_node: Word, parameter_index: Word, symbol_table: Word); -var - name_length: Word; - info: Word; - name_position: Word; -begin - name_position := _declaration_get_name(parser_node); - name_length := _declaration_get_length(parser_node); - - info := _parameter_info_create(parameter_index); - _symbol_table_enter(symbol_table, name_position, name_length, info) -end; - -(** - * Parameters: - * variable_index - Variable index. - *) -proc _elna_name_procedure_temporary(parser_node: Word, variable_index: Word, symbol_table: Word); -var - name_length: Word; - info: Word; - name_position: Word; -begin - name_position := _declaration_get_name(parser_node); - name_length := _declaration_get_length(parser_node); - - info := _temporary_info_create(variable_index); - _symbol_table_enter(symbol_table, name_position, name_length, info) -end; - -proc _elna_name_procedure_temporaries(parser_node: Word, symbol_table: Word); -var - temporary_counter: Word; -begin - temporary_counter := 0; - - .elna_name_procedure_temporaries_loop; - if parser_node <> 0 then - _elna_name_procedure_temporary(parser_node, temporary_counter, symbol_table); - - temporary_counter := temporary_counter + 1; - parser_node := _declaration_get_next(parser_node); - goto elna_name_procedure_temporaries_loop - end -end; - -proc _declaration_get_next(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _declaration_set_next(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -proc _declaration_get_name(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _declaration_set_name(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _declaration_get_length(this: Word); -begin - this := this + 12; - return this^ -end; - -proc _declaration_set_length(this: Word, value: Word); -begin - this := this + 12; - this^ := value -end; - -(* Kind + next declaration pointer + argument list + procedure name + statement list pointer + temporary list pointer. *) -proc _procedure_declaration_size(); - return 28 -end; - -proc _procedure_declaration_get_body(this: Word); -begin - this := this + 16; - return this^ -end; - -proc _procedure_declaration_set_body(this: Word, value: Word); -begin - this := this + 16; - this^ := value -end; - -proc _procedure_declaration_get_temporaries(this: Word); -begin - this := this + 20; - return this^ -end; - -proc _procedure_declaration_set_temporaries(this: Word, value: Word); -begin - this := this + 20; - this^ := value -end; - -proc _procedure_declaration_get_parameters(this: Word); -begin - this := this + 24; - return this^ -end; - -proc _procedure_declaration_set_parameters(this: Word, value: Word); -begin - this := this + 24; - this^ := value -end; - -proc _elna_parser_procedure_declaration(); -var - name_pointer: Word; - name_length: Word; - token_kind: Word; - result: Word; - declaration_size: Word; - parameter_head: Word; -begin - declaration_size := _procedure_declaration_size(); - result := _allocate(declaration_size); - - _node_set_kind(result, NodeKind.procedure_declaration); - _declaration_set_next(result, 0); - - (* Skip "proc ". *) - _elna_lexer_skip_token(); - - _elna_lexer_read_token(@token_kind); - name_pointer := _elna_lexer_global_get_start(); - name_length := _elna_lexer_global_get_end() - name_pointer; - - _declaration_set_name(result, name_pointer); - _declaration_set_length(result, name_length); - (* Skip procedure name. *) - _elna_lexer_skip_token(); - - (* Skip open paren. *) - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - parameter_head := 0; - - .elna_parser_procedure_declaration_parameter; - _elna_lexer_read_token(@token_kind); - - if token_kind <> ElnaLexerKind.right_paren then - name_pointer := _elna_parser_variable_declaration(); - if parameter_head = 0 then - parameter_head := name_pointer - else - _declaration_set_next(name_length, name_pointer) - end; - name_length := name_pointer; - - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.comma then - _elna_lexer_skip_token(); - goto elna_parser_procedure_declaration_parameter - end - end; - (* Skip close paren. *) - _elna_lexer_skip_token(); - _procedure_declaration_set_parameters(result, parameter_head); - - (* Skip semicolon and newline. *) - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - parameter_head := _elna_parser_var_part(); - _procedure_declaration_set_temporaries(result, parameter_head); - - (* Skip semicolon, "begin" and newline. *) - _elna_lexer_read_token(@token_kind); - if token_kind = ElnaLexerKind._begin then - _elna_lexer_skip_token(); - parameter_head := _elna_parser_statements() - elsif token_kind = ElnaLexerKind._return then - parameter_head := _elna_parser_return_statement() - end; - _procedure_declaration_set_body(result, parameter_head); - - (* Skip the "end" keyword. *) - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - return result -end; - -proc _elna_tac_parameters(current_parameter: Word, new_symbol_table: Word); -var - name_pointer: Word; - name_length: Word; - parameter_counter: Word; - instruction: Word; - first_instruction: Word; - current_instruction: Word; - symbol_info: Word; -begin - first_instruction := 0; - parameter_counter := 0; - - .elna_tac_parameters_loop; - if current_parameter <> 0 then - name_pointer := _declaration_get_name(current_parameter); - name_length := _declaration_get_length(current_parameter); - symbol_info := _symbol_table_lookup(new_symbol_table, name_pointer, name_length); - - symbol_info := _parameter_info_get_offset(symbol_info); - - instruction := _elna_tac_store_word(ElnaTacRegister.a0 + parameter_counter, - ElnaTacRegister.sp, symbol_info); - if first_instruction = 0 then - first_instruction := instruction - else - _elna_tac_instruction_set_next(current_instruction, instruction) - end; - current_instruction := instruction; - - parameter_counter := parameter_counter + 1; - - current_parameter := _declaration_get_next(current_parameter); - goto elna_tac_parameters_loop - end; - return first_instruction -end; - -proc _elna_tac_procedure_declaration(parser_node: Word); -var - name_pointer: Word; - name_length: Word; - current_parameter: Word; - body: Word; - new_symbol_table: Word; - symbol_info: Word; - instruction: Word; - first_instruction: Word; - result: Word; - result_size: Word; -begin - result_size := _elna_tac_declaration_size(); - result := _allocate(result_size); - - _elna_tac_declaration_set_next(result, 0); - - name_pointer := _declaration_get_name(parser_node); - name_length := _declaration_get_length(parser_node); - - _elna_tac_declaration_set_name(result, name_pointer); - _elna_tac_declaration_set_length(result, name_length); - - symbol_info := _symbol_table_lookup(@symbol_table_global, name_pointer, name_length); - new_symbol_table := _procedure_info_get_symbol_table(symbol_info); - - (* Write the prologue. *) - first_instruction := _elna_tac_instruction_create(ElnaTacOperator.start); - - current_parameter := _procedure_declaration_get_parameters(parser_node); - current_parameter := _elna_tac_parameters(current_parameter, new_symbol_table); - _elna_tac_instruction_set_next(first_instruction, current_parameter); - - body := _procedure_declaration_get_body(parser_node); - instruction := _elna_tac_statements(body, new_symbol_table); - _elna_tac_instruction_set_next(first_instruction, instruction); - - (* Write the epilogue. *) - instruction := _elna_tac_instruction_create(ElnaTacOperator.ret); - _elna_tac_instruction_set_next(first_instruction, instruction); - - _elna_tac_declaration_set_body(result, first_instruction); - - return result -end; - -proc _elna_parser_procedures(); -var - parser_node: Word; - result: Word; - current_declaration: Word; - token_kind: Word; -begin - result := 0; - - .elna_parser_procedures_loop; - _skip_empty_lines(); - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind._proc then - parser_node := _elna_parser_procedure_declaration(); - if result = 0 then - result := parser_node - else - _declaration_set_next(current_declaration, parser_node) - end; - current_declaration := parser_node; - - (* Skip semicolon. *) - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - goto elna_parser_procedures_loop - end; - return result -end; - -proc _elna_tac_procedures(parser_node: Word); -var - result: Word; - current_procedure: Word; - first_procedure: Word; -begin - first_procedure := 0; - - .elna_tac_procedures_loop; - if parser_node = 0 then - goto elna_tac_procedures_end - end; - result := _elna_tac_procedure_declaration(parser_node); - if first_procedure = 0 then - first_procedure := result - else - _elna_tac_declaration_set_next(current_procedure, result) - end; - current_procedure := result; - - parser_node := _declaration_get_next(parser_node); - goto elna_tac_procedures_loop; - - .elna_tac_procedures_end; - return first_procedure -end; - -(** - * Skips comments. - *) -proc _skip_empty_lines(); -var - token_kind: Word; -begin - .skip_empty_lines_rerun; - - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.comment then - _elna_lexer_skip_token(); - goto skip_empty_lines_rerun - end -end; - - -proc _type_declaration_size(); - return 20 -end; - -proc _type_declaration_get_type(this: Word); -begin - this := this + 16; - return this^ -end; - -proc _type_declaration_set_type(this: Word, value: Word); -begin - this := this + 16; - this^ := value -end; - -proc _elna_parser_type_declaration(); -var - token_kind: Word; - type_name: Word; - name_length: Word; - parser_node: Word; - result: Word; - declaration_size: Word; -begin - _elna_lexer_read_token(@token_kind); - type_name := _elna_lexer_global_get_start(); - name_length := _elna_lexer_global_get_end() - type_name; - - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - parser_node := _elna_parser_type_expression(); - declaration_size := _type_declaration_size(); - result := _allocate(declaration_size); - - _node_set_kind(result, NodeKind.type_declaration); - _declaration_set_next(result, 0); - _declaration_set_name(result, type_name); - _declaration_set_length(result, name_length); - _type_declaration_set_type(result, parser_node); - - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - return result -end; - -proc _elna_name_type_declaration(parser_node: Word); -var - type_name: Word; - name_length: Word; - type_info: Word; -begin - type_name := _declaration_get_name(parser_node); - name_length := _declaration_get_length(parser_node); - - parser_node := _type_declaration_get_type(parser_node); - type_info := _elna_name_type_expression(parser_node); - - _symbol_table_enter(@symbol_table_global, type_name, name_length, type_info) -end; - -proc _elna_type_type_declaration(parser_node: Word); -begin -end; - -proc _elna_parser_type_part(); -var - token_kind: Word; - parser_node: Word; - result: Word; - current_declaration: Word; -begin - result := 0; - _skip_empty_lines(); - _elna_lexer_read_token(@token_kind); - - if token_kind <> ElnaLexerKind._type then - goto elna_parser_type_part_end - end; - _elna_lexer_skip_token(); - - .elna_parser_type_part_loop; - _skip_empty_lines(); - - _elna_lexer_read_token(@token_kind); - if token_kind = ElnaLexerKind.identifier then - parser_node := _elna_parser_type_declaration(); - - if result = 0 then - result := parser_node - else - _declaration_set_next(current_declaration, parser_node) - end; - current_declaration := parser_node; - goto elna_parser_type_part_loop - end; - - .elna_parser_type_part_end; - return result -end; - -proc _variable_declaration_size(); - return 20 -end; - -proc _variable_declaration_get_type(this: Word); -begin - this := this + 16; - return this^ -end; - -proc _variable_declaration_set_type(this: Word, value: Word); -begin - this := this + 16; - this^ := value -end; - -proc _elna_parser_variable_declaration(); -var - token_kind: Word; - name: Word; - name_length: Word; - variable_type: Word; - result: Word; - declaration_size: Word; -begin - _elna_lexer_read_token(@token_kind); - - name := _elna_lexer_global_get_start(); - name_length := _elna_lexer_global_get_end() - name; - - (* Skip the variable name and colon with the type. *) - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - variable_type := _elna_parser_type_expression(); - - declaration_size := _variable_declaration_size(); - result := _allocate(declaration_size); - - _node_set_kind(result, NodeKind.variable_declaration); - _declaration_set_next(result, 0); - _declaration_set_name(result, name); - _declaration_set_length(result, name_length); - _variable_declaration_set_type(result, variable_type); - - return result -end; - -proc _elna_tac_variable_declaration(parser_tree: Word); -var - name: Word; - name_length: Word; - variable_type: Word; - result: Word; - result_size: Word; -begin - result_size := _elna_tac_declaration_size(); - result := _allocate(result_size); - - _elna_tac_declaration_set_next(result, 0); - - name := _declaration_get_name(parser_tree); - name_length := _declaration_get_length(parser_tree); - variable_type := _variable_declaration_get_type(parser_tree); - - _elna_tac_declaration_set_name(result, name); - _elna_tac_declaration_set_length(result, name_length); - - name := _named_type_expression_get_name(variable_type); - name_length := _named_type_expression_get_length(variable_type); - - if _string_compare("Array", 5, name, name_length) then - (* Else we assume this is a zeroed 4096 bytes big array. *) - _elna_tac_declaration_set_body(result, 4096) - else - _elna_tac_declaration_set_body(result, 4) - end; - return result -end; - -proc _elna_tac_type_field(name_pointer: Word, name_length: Word, field_pointer: Word, field_offset: Word); -var - result_size: Word; - first_result: Word; - second_result: Word; - new_name: Word; - new_length: Word; - field_length: Word; - instruction: Word; - name_target: Word; - next_instruction: Word; -begin - result_size := _elna_tac_declaration_size(); - field_length := field_pointer + 4; - field_length := field_length^; - new_length := field_length + name_length; - new_length := new_length + 5; - - first_result := _allocate(result_size); - _elna_tac_declaration_set_next(first_result, 0); - - new_name := _allocate(new_length); - - name_target := new_name; - _memcpy(name_target, name_pointer, name_length); - name_target := name_target + name_length; - _memcpy(name_target, "_get_", 5); - name_target := name_target + 5; - _memcpy(name_target, field_pointer^, field_length); - - _elna_tac_declaration_set_name(first_result, new_name); - _elna_tac_declaration_set_length(first_result, new_length); - - instruction := _elna_tac_add_immediate(ElnaTacRegister.a0, ElnaTacRegister.a0, field_offset, 0); - next_instruction := _elna_tac_load_word(ElnaTacRegister.a0, ElnaTacRegister.a0, 0); - _elna_tac_instruction_set_next(instruction, next_instruction); - _elna_tac_declaration_set_body(first_result, instruction); - - second_result := _allocate(result_size); - _elna_tac_declaration_set_next(second_result, 0); - - new_name := _allocate(new_length); - - name_target := new_name; - _memcpy(name_target, name_pointer, name_length); - name_target := name_target + name_length; - _memcpy(name_target, "_set_", 5); - name_target := name_target + 5; - _memcpy(name_target, field_pointer^, field_length); - - _elna_tac_declaration_set_name(second_result, new_name); - _elna_tac_declaration_set_length(second_result, new_length); - - instruction := _elna_tac_add_immediate(ElnaTacRegister.a0, ElnaTacRegister.a0, field_offset, 0); - next_instruction := _elna_tac_store_word(ElnaTacRegister.a1, ElnaTacRegister.a0, 0); - _elna_tac_instruction_set_next(instruction, next_instruction); - _elna_tac_declaration_set_body(second_result, instruction); - - _elna_tac_declaration_set_next(first_result, second_result); - - return first_result -end; - -proc _elna_tac_type_record(name_pointer: Word, name_length: Word, type_representation: Word, current_result: Word); -var - result_size: Word; - first_result: Word; - result: Word; - type_size: Word; - new_name: Word; - new_length: Word; - instruction: Word; - field_count: Word; - field_offset: Word; - field_pointer: Word; -begin - result_size := _elna_tac_declaration_size(); - first_result := _allocate(result_size); - result := 0; - - (* Debug. Error stream output. - _syscall(2, name_pointer, name_length, 0, 0, 0, 64); *) - - type_size := _type_get_size(type_representation); - new_length := name_length + 5; - new_name := _allocate(new_length); - - _memcpy(new_name, name_pointer, name_length); - _memcpy(new_name + name_length, "_size", 5); - - _elna_tac_declaration_set_name(first_result, new_name); - _elna_tac_declaration_set_length(first_result, new_length); - - instruction := _elna_tac_load_immediate(ElnaTacRegister.a0, type_size, 0); - _elna_tac_declaration_set_body(first_result, instruction); - - field_count := _record_type_get_length(type_representation); - field_pointer := _record_type_get_members(type_representation); - field_offset := 0; - current_result^ := first_result; - - .elna_tac_type_record_fields; - if field_count > 0 then - result := _elna_tac_type_field(name_pointer, name_length, field_pointer, field_offset); - - _elna_tac_declaration_set_next(current_result^, result); - current_result^ := _elna_tac_declaration_get_next(result); - - field_offset := field_offset + 4; - field_count := field_count - 1; - field_pointer := field_pointer + 12; - goto elna_tac_type_record_fields - end; - - return first_result -end; - -proc _elna_tac_type_part(parser_node: Word); -var - name_pointer: Word; - name_length: Word; - result: Word; - first_result: Word; - symbol: Word; - info_type: Word; - type_kind: Word; - current_result: Word; - out_result: Word; -begin - first_result := 0; - - .elna_tac_type_part_loop; - if parser_node = 0 then - goto elna_tac_type_part_end - end; - - name_pointer := _declaration_get_name(parser_node); - name_length := _declaration_get_length(parser_node); - symbol := _symbol_table_lookup(@symbol_table_global, name_pointer, name_length); - - info_type := _type_info_get_type(symbol); - type_kind := _type_get_kind(info_type); - - if type_kind = TypeKind._record then - result := _elna_tac_type_record(name_pointer, name_length, info_type, @out_result) - else - result := 0; - out_result := 0 - end; - if first_result = 0 then - first_result := result; - current_result := out_result - elsif result <> 0 then - _elna_tac_declaration_set_next(current_result, result); - current_result := out_result - end; - parser_node := _declaration_get_next(parser_node); - goto elna_tac_type_part_loop; - - .elna_tac_type_part_end; - return first_result -end; - -proc _elna_parser_var_part(); -var - result: Word; - token_kind: Word; - variable_node: Word; - current_declaration: Word; -begin - result := 0; - _elna_lexer_read_token(@token_kind); - - if token_kind <> ElnaLexerKind._var then - goto elna_parser_var_part_end - end; - (* Skip "var". *) - _elna_lexer_skip_token(); - - .elna_parser_var_part_loop; - _skip_empty_lines(); - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.identifier then - variable_node := _elna_parser_variable_declaration(); - - (* Skip semicolon. *) - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - if result = 0 then - result := variable_node - else - _declaration_set_next(current_declaration, variable_node) - end; - current_declaration := variable_node; - goto elna_parser_var_part_loop - end; - - .elna_parser_var_part_end; - return result -end; - -proc _elna_tac_var_part(parser_node: Word); -var - node: Word; - current_variable: Word; - first_variable: Word; -begin - first_variable := 0; - if parser_node = 0 then - goto elna_tac_var_part_end - end; - - .elna_tac_var_part_loop; - node := _elna_tac_variable_declaration(parser_node); - if first_variable = 0 then - first_variable := node - else - _elna_tac_declaration_set_next(current_variable, node) - end; - current_variable := node; - - parser_node := _declaration_get_next(parser_node); - if parser_node <> 0 then - goto elna_tac_var_part_loop - end; - - .elna_tac_var_part_end; - return first_variable -end; - -proc _module_declaration_size(); - return 16 -end; - -proc _module_declaration_get_types(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _module_declaration_set_types(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -proc _module_declaration_get_globals(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _module_declaration_set_globals(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _module_declaration_get_procedures(this: Word); -begin - this := this + 12; - return this^ -end; - -proc _module_declaration_set_procedures(this: Word, value: Word); -begin - this := this + 12; - this^ := value -end; - -proc _elna_parser_module_declaration(); -var - parser_node: Word; - declaration_size: Word; - result: Word; -begin - declaration_size := _module_declaration_size(); - result := _allocate(declaration_size); - - _node_set_kind(result, NodeKind.module_declaration); - - parser_node := _elna_parser_type_part(); - _module_declaration_set_types(result, parser_node); - - parser_node := _elna_parser_var_part(); - _module_declaration_set_globals(result, parser_node); - - parser_node := _elna_parser_procedures(); - _module_declaration_set_procedures(result, parser_node); - - return result -end; - -(** - * Process the source code and print the generated code. - *) -proc _elna_tac_module_declaration(parser_node: Word); -var - data_part: Word; - code_part: Word; - type_part: Word; - current_declaration: Word; - next_declaration: Word; -begin - type_part := _module_declaration_get_types(parser_node); - type_part := _elna_tac_type_part(type_part); - - data_part := _module_declaration_get_globals(parser_node); - data_part := _elna_tac_var_part(data_part); - - code_part := _module_declaration_get_procedures(parser_node); - code_part := _elna_tac_procedures(code_part); - - current_declaration := code_part; - - .elna_tac_module_declaration_types; - next_declaration := _elna_tac_declaration_get_next(current_declaration); - if next_declaration <> 0 then - current_declaration := next_declaration; - - goto elna_tac_module_declaration_types - end; - _elna_tac_declaration_set_next(current_declaration, type_part); - - return _elna_tac_module_create(data_part, code_part) -end; - -proc _elna_name_procedure_declaration(parser_node: Word); -var - name_pointer: Word; - name_length: Word; - new_symbol_table: Word; - parameter_counter: Word; - symbol_info: Word; - current_parameter: Word; -begin - new_symbol_table := _symbol_table_create(); - symbol_info := _procedure_info_create(new_symbol_table); - - name_pointer := _declaration_get_name(parser_node); - name_length := _declaration_get_length(parser_node); - - current_parameter := _procedure_declaration_get_parameters(parser_node); - parameter_counter := 0; - .elna_name_procedure_declaration_parameter; - if current_parameter <> 0 then - _elna_name_procedure_parameter(current_parameter, parameter_counter, new_symbol_table); - parameter_counter := parameter_counter + 1; - - current_parameter := _declaration_get_next(current_parameter); - goto elna_name_procedure_declaration_parameter - end; - current_parameter := _procedure_declaration_get_temporaries(parser_node); - _elna_name_procedure_temporaries(current_parameter, new_symbol_table); - - _symbol_table_enter(@symbol_table_global, name_pointer, name_length, symbol_info) -end; - -proc _elna_type_procedure_declaration(parser_node: Word); -begin -end; - -proc _elna_name_module_declaration(parser_node: Word); -var - current_part: Word; - result: Word; -begin - current_part := _module_declaration_get_types(parser_node); - .elna_name_module_declaration_type; - if current_part <> 0 then - _elna_name_type_declaration(current_part); - current_part := _declaration_get_next(current_part); - - goto elna_name_module_declaration_type - end; - - current_part := _module_declaration_get_procedures(parser_node); - .elna_name_module_declaration_procedure; - if current_part <> 0 then - _elna_name_procedure_declaration(current_part); - current_part := _declaration_get_next(current_part); - - goto elna_name_module_declaration_procedure - end -end; - -proc _elna_type_module_declaration(parser_node: Word); -var - current_part: Word; -begin - current_part := _module_declaration_get_types(parser_node); - .elna_type_module_declaration_type; - if current_part <> 0 then - _elna_type_type_declaration(current_part); - current_part := _declaration_get_next(current_part); - - goto elna_type_module_declaration_type - end; - - current_part := _module_declaration_get_procedures(parser_node); - .elna_type_module_declaration_procedure; - if current_part <> 0 then - _elna_type_procedure_declaration(current_part); - current_part := _declaration_get_next(current_part); - - goto elna_type_module_declaration_procedure - end -end; - -proc _compile(); -var - parser_node: Word; - tac: Word; -begin - parser_node := _elna_parser_module_declaration(); - _elna_name_module_declaration(parser_node); - _elna_type_module_declaration(parser_node); - tac := _elna_tac_module_declaration(parser_node); - _elna_writer_module(tac) -end; - -(** - * Terminates the program. a0 contains the return code. - * - * Parameters: - * a0 - Status code. - *) -proc _exit(status: Word); -begin - _syscall(status, 0, 0, 0, 0, 0, 93) -end; - -(** - * Looks for a symbol in the given symbol table. - * - * Parameters: - * symbol_table - Symbol table. - * symbol_name - Symbol name pointer. - * name_length - Symbol name length. - * - * Returns the symbol pointer or 0 in a0. - *) -proc _symbol_table_lookup(symbol_table: Word, symbol_name: Word, name_length: Word); -var - result: Word; - symbol_table_length: Word; - current_name: Word; - current_length: Word; -begin - result := 0; - - (* The first word in the symbol table is its length, get it. *) - symbol_table_length := symbol_table^; - - (* Go to the first symbol position. *) - symbol_table := symbol_table + 4; - - .symbol_table_lookup_loop; - if symbol_table_length = 0 then - goto symbol_table_lookup_end - end; - - (* Symbol name pointer and length. *) - current_name := symbol_table^; - current_length := symbol_table + 4; - current_length := current_length^; - - (* If lengths don't match, exit and return nil. *) - if name_length <> current_length then - goto symbol_table_lookup_repeat - end; - (* If names don't match, exit and return nil. *) - if _memcmp(symbol_name, current_name, name_length) then - goto symbol_table_lookup_repeat - end; - (* Otherwise, the symbol is found. *) - result := symbol_table + 8; - result := result^; - goto symbol_table_lookup_end; - - .symbol_table_lookup_repeat; - symbol_table := symbol_table + 12; - symbol_table_length := symbol_table_length - 1; - goto symbol_table_lookup_loop; - - .symbol_table_lookup_end; - return result -end; - -(** - * Create a new local symbol table in the symbol memory region after the last - * known symbol table. - *) -proc _symbol_table_create(); -var - new_symbol_table: Word; - table_length: Word; - current_table: Word; -begin - new_symbol_table := symbol_table_store; - - .symbol_table_create_loop; - table_length := new_symbol_table^; - - if table_length <> 0 then - table_length := table_length * 12; - table_length := table_length + 4; - new_symbol_table := new_symbol_table + table_length; - goto symbol_table_create_loop - end; - - return new_symbol_table -end; - -(** - * Inserts a symbol into the table. - * - * Parameters: - * symbol_table - Symbol table. - * symbol_name - Symbol name pointer. - * name_length - Symbol name length. - * symbol - Symbol pointer. - *) -proc _symbol_table_enter(symbol_table: Word, symbol_name: Word, name_length: Word, symbol: Word); -var - table_length: Word; - symbol_pointer: Word; -begin - (* The first word in the symbol table is its length, get it. *) - table_length := symbol_table^; - - (* Calculate the offset for the new symbol. *) - symbol_pointer := table_length * 12; - symbol_pointer := symbol_pointer + 4; - symbol_pointer := symbol_table + symbol_pointer; - - symbol_pointer^ := symbol_name; - symbol_pointer := symbol_pointer + 4; - symbol_pointer^ := name_length; - symbol_pointer := symbol_pointer + 4; - symbol_pointer^ := symbol; - - (* Increment the symbol table length. *) - table_length := table_length + 1; - symbol_table^ := table_length -end; - -proc _symbol_table_build(); -var - current_info: Word; - current_type: Word; -begin - (* Set the table length to 0. *) - symbol_table_global := 0; - - current_type := _allocate(8); - _type_set_kind(current_type, TypeKind.primitive); - _type_set_size(current_type, 4); - - (* Enter built-in symbols. *) - current_info := _type_info_create(current_type); - _symbol_table_enter(@symbol_table_global, "Word", 4, current_info); - - current_info := _type_info_create(current_type); - _symbol_table_enter(@symbol_table_global, "Array", 5, current_info) -end; - -(** - * Assigns some value to at array index. - * - * Parameters: - * array - Array pointer. - * index - Index (word offset into the array). - * data - Data to assign. - *) -proc _assign_at(array: Word, index: Word, data: Word); -var - target: Word; -begin - target := index - 1; - target := target * 4; - target := array + target; - - target^ := data -end; - -proc _get_at(array: Word, index: Word); -var - target: Word; -begin - target := index - 1; - target := target * 4; - target := array + target; - - return target^ -end; - -(** - * Initializes the array with character classes. - *) -proc _elna_lexer_classifications(); -var - code: Word; -begin - _assign_at(@classification, 1, ElnaLexerClass.eof); - _assign_at(@classification, 2, ElnaLexerClass.invalid); - _assign_at(@classification, 3, ElnaLexerClass.invalid); - _assign_at(@classification, 4, ElnaLexerClass.invalid); - _assign_at(@classification, 5, ElnaLexerClass.invalid); - _assign_at(@classification, 6, ElnaLexerClass.invalid); - _assign_at(@classification, 7, ElnaLexerClass.invalid); - _assign_at(@classification, 8, ElnaLexerClass.invalid); - _assign_at(@classification, 9, ElnaLexerClass.invalid); - _assign_at(@classification, 10, ElnaLexerClass.space); - _assign_at(@classification, 11, ElnaLexerClass.space); - _assign_at(@classification, 12, ElnaLexerClass.invalid); - _assign_at(@classification, 13, ElnaLexerClass.invalid); - _assign_at(@classification, 14, ElnaLexerClass.space); - _assign_at(@classification, 15, ElnaLexerClass.invalid); - _assign_at(@classification, 16, ElnaLexerClass.invalid); - _assign_at(@classification, 17, ElnaLexerClass.invalid); - _assign_at(@classification, 18, ElnaLexerClass.invalid); - _assign_at(@classification, 19, ElnaLexerClass.invalid); - _assign_at(@classification, 20, ElnaLexerClass.invalid); - _assign_at(@classification, 21, ElnaLexerClass.invalid); - _assign_at(@classification, 22, ElnaLexerClass.invalid); - _assign_at(@classification, 23, ElnaLexerClass.invalid); - _assign_at(@classification, 24, ElnaLexerClass.invalid); - _assign_at(@classification, 25, ElnaLexerClass.invalid); - _assign_at(@classification, 26, ElnaLexerClass.invalid); - _assign_at(@classification, 27, ElnaLexerClass.invalid); - _assign_at(@classification, 28, ElnaLexerClass.invalid); - _assign_at(@classification, 29, ElnaLexerClass.invalid); - _assign_at(@classification, 30, ElnaLexerClass.invalid); - _assign_at(@classification, 31, ElnaLexerClass.invalid); - _assign_at(@classification, 32, ElnaLexerClass.invalid); - _assign_at(@classification, 33, ElnaLexerClass.space); - _assign_at(@classification, 34, ElnaLexerClass.single); - _assign_at(@classification, 35, ElnaLexerClass.double_quote); - _assign_at(@classification, 36, ElnaLexerClass.other); - _assign_at(@classification, 37, ElnaLexerClass.other); - _assign_at(@classification, 38, ElnaLexerClass.single); - _assign_at(@classification, 39, ElnaLexerClass.single); - _assign_at(@classification, 40, ElnaLexerClass.single_quote); - _assign_at(@classification, 41, ElnaLexerClass.left_paren); - _assign_at(@classification, 42, ElnaLexerClass.right_paren); - _assign_at(@classification, 43, ElnaLexerClass.asterisk); - _assign_at(@classification, 44, ElnaLexerClass.single); - _assign_at(@classification, 45, ElnaLexerClass.single); - _assign_at(@classification, 46, ElnaLexerClass.minus); - _assign_at(@classification, 47, ElnaLexerClass.dot); - _assign_at(@classification, 48, ElnaLexerClass.single); - _assign_at(@classification, 49, ElnaLexerClass.zero); - _assign_at(@classification, 50, ElnaLexerClass.digit); - _assign_at(@classification, 51, ElnaLexerClass.digit); - _assign_at(@classification, 52, ElnaLexerClass.digit); - _assign_at(@classification, 53, ElnaLexerClass.digit); - _assign_at(@classification, 54, ElnaLexerClass.digit); - _assign_at(@classification, 55, ElnaLexerClass.digit); - _assign_at(@classification, 56, ElnaLexerClass.digit); - _assign_at(@classification, 57, ElnaLexerClass.digit); - _assign_at(@classification, 58, ElnaLexerClass.digit); - _assign_at(@classification, 59, ElnaLexerClass.colon); - _assign_at(@classification, 60, ElnaLexerClass.single); - _assign_at(@classification, 61, ElnaLexerClass.less); - _assign_at(@classification, 62, ElnaLexerClass.equals); - _assign_at(@classification, 63, ElnaLexerClass.greater); - _assign_at(@classification, 64, ElnaLexerClass.other); - _assign_at(@classification, 65, ElnaLexerClass.single); - _assign_at(@classification, 66, ElnaLexerClass.alpha); - _assign_at(@classification, 67, ElnaLexerClass.alpha); - _assign_at(@classification, 68, ElnaLexerClass.alpha); - _assign_at(@classification, 69, ElnaLexerClass.alpha); - _assign_at(@classification, 70, ElnaLexerClass.alpha); - _assign_at(@classification, 71, ElnaLexerClass.alpha); - _assign_at(@classification, 72, ElnaLexerClass.alpha); - _assign_at(@classification, 73, ElnaLexerClass.alpha); - _assign_at(@classification, 74, ElnaLexerClass.alpha); - _assign_at(@classification, 75, ElnaLexerClass.alpha); - _assign_at(@classification, 76, ElnaLexerClass.alpha); - _assign_at(@classification, 77, ElnaLexerClass.alpha); - _assign_at(@classification, 78, ElnaLexerClass.alpha); - _assign_at(@classification, 79, ElnaLexerClass.alpha); - _assign_at(@classification, 80, ElnaLexerClass.alpha); - _assign_at(@classification, 81, ElnaLexerClass.alpha); - _assign_at(@classification, 82, ElnaLexerClass.alpha); - _assign_at(@classification, 83, ElnaLexerClass.alpha); - _assign_at(@classification, 84, ElnaLexerClass.alpha); - _assign_at(@classification, 85, ElnaLexerClass.alpha); - _assign_at(@classification, 86, ElnaLexerClass.alpha); - _assign_at(@classification, 87, ElnaLexerClass.alpha); - _assign_at(@classification, 88, ElnaLexerClass.alpha); - _assign_at(@classification, 89, ElnaLexerClass.alpha); - _assign_at(@classification, 90, ElnaLexerClass.alpha); - _assign_at(@classification, 91, ElnaLexerClass.alpha); - _assign_at(@classification, 92, ElnaLexerClass.single); - _assign_at(@classification, 93, ElnaLexerClass.backslash); - _assign_at(@classification, 94, ElnaLexerClass.single); - _assign_at(@classification, 95, ElnaLexerClass.single); - _assign_at(@classification, 96, ElnaLexerClass.alpha); - _assign_at(@classification, 97, ElnaLexerClass.other); - _assign_at(@classification, 98, ElnaLexerClass.hex); - _assign_at(@classification, 99, ElnaLexerClass.hex); - _assign_at(@classification, 100, ElnaLexerClass.hex); - _assign_at(@classification, 101, ElnaLexerClass.hex); - _assign_at(@classification, 102, ElnaLexerClass.hex); - _assign_at(@classification, 103, ElnaLexerClass.hex); - _assign_at(@classification, 104, ElnaLexerClass.alpha); - _assign_at(@classification, 105, ElnaLexerClass.alpha); - _assign_at(@classification, 106, ElnaLexerClass.alpha); - _assign_at(@classification, 107, ElnaLexerClass.alpha); - _assign_at(@classification, 108, ElnaLexerClass.alpha); - _assign_at(@classification, 109, ElnaLexerClass.alpha); - _assign_at(@classification, 110, ElnaLexerClass.alpha); - _assign_at(@classification, 111, ElnaLexerClass.alpha); - _assign_at(@classification, 112, ElnaLexerClass.alpha); - _assign_at(@classification, 113, ElnaLexerClass.alpha); - _assign_at(@classification, 114, ElnaLexerClass.alpha); - _assign_at(@classification, 115, ElnaLexerClass.alpha); - _assign_at(@classification, 116, ElnaLexerClass.alpha); - _assign_at(@classification, 117, ElnaLexerClass.alpha); - _assign_at(@classification, 118, ElnaLexerClass.alpha); - _assign_at(@classification, 119, ElnaLexerClass.alpha); - _assign_at(@classification, 120, ElnaLexerClass.alpha); - _assign_at(@classification, 121, ElnaLexerClass.x); - _assign_at(@classification, 122, ElnaLexerClass.alpha); - _assign_at(@classification, 123, ElnaLexerClass.alpha); - _assign_at(@classification, 124, ElnaLexerClass.other); - _assign_at(@classification, 125, ElnaLexerClass.single); - _assign_at(@classification, 126, ElnaLexerClass.other); - _assign_at(@classification, 127, ElnaLexerClass.single); - _assign_at(@classification, 128, ElnaLexerClass.invalid); - - code := 129; - - (* Set the remaining 129 - 256 bytes to transitionClassOther. *) - .create_classification_loop; - _assign_at(@classification, code, ElnaLexerClass.other); - code := code + 1; - - if code < 257 then - goto create_classification_loop - end -end; - -proc _elna_lexer_get_transition(current_state: Word, character_class: Word); -var - transition_table: Word; - row_position: Word; - column_position: Word; - target: Word; -begin - (* Each state is 8 bytes long (2 words: action and next state). - There are 22 character classes, so a transition row 8 * 22 = 176 bytes long. *) - row_position := current_state - 1; - row_position := row_position * 176; - - column_position := character_class - 1; - column_position := column_position * 8; - - target := _elna_lexer_get_transition_table(); - target := target + row_position; - - return target + column_position -end; - -(** - * Parameters: - * current_state - First index into transitions table. - * character_class - Second index into transitions table. - * action - Action to assign. - * next_state - Next state to assign. - *) -proc _elna_lexer_set_transition(current_state: Word, character_class: Word, action: Word, next_state: Word); -var - transition: Word; -begin - transition := _elna_lexer_get_transition(current_state, character_class); - - _elna_lexer_transition_set_action(transition, action); - _elna_lexer_transition_set_state(transition, next_state) -end; - -(* Sets same action and state transition for all character classes in one transition row. *) - -(** - * Parameters: - * current_state - Current state (Transition state enumeration). - * default_action - Default action (Callback). - * next_state - Next state (Transition state enumeration). - *) -proc _elna_lexer_default_transition(current_state: Word, default_action: Word, next_state: Word); -begin - _elna_lexer_set_transition(current_state, ElnaLexerClass.invalid, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.digit, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.alpha, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.space, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.colon, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.equals, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.left_paren, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.right_paren, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.asterisk, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.backslash, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.single, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.hex, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.zero, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.x, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.eof, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.dot, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.minus, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.single_quote, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.double_quote, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.greater, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.less, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.other, default_action, next_state) -end; - -(** - * The transition table describes transitions from one state to another, given - * a symbol (character class). - * - * The table has m rows and n columns, where m is the amount of states and n is - * the amount of classes. So given the current state and a classified character - * the table can be used to look up the next state. - *) -proc _elna_lexer_transitions(); -begin - (* Start state. *) - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.digit, ElnaLexerAction.accumulate, ElnaLexerState.decimal); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.alpha, ElnaLexerAction.accumulate, ElnaLexerState.identifier); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.space, ElnaLexerAction.skip, ElnaLexerState.start); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.colon, ElnaLexerAction.accumulate, ElnaLexerState.colon); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.equals, ElnaLexerAction.single, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.left_paren, ElnaLexerAction.accumulate, ElnaLexerState.left_paren); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.right_paren, ElnaLexerAction.single, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.asterisk, ElnaLexerAction.single, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.backslash, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.single, ElnaLexerAction.single, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.hex, ElnaLexerAction.accumulate, ElnaLexerState.identifier); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.zero, ElnaLexerAction.accumulate, ElnaLexerState.leading_zero); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.x, ElnaLexerAction.accumulate, ElnaLexerState.identifier); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.eof, ElnaLexerAction.eof, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.dot, ElnaLexerAction.single, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.minus, ElnaLexerAction.accumulate, ElnaLexerState.minus); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.single_quote, ElnaLexerAction.accumulate, ElnaLexerState.character); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.double_quote, ElnaLexerAction.accumulate, ElnaLexerState.string); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.greater, ElnaLexerAction.accumulate, ElnaLexerState.greater); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.less, ElnaLexerAction.accumulate, ElnaLexerState.less); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.other, ElnaLexerAction.none, ElnaLexerState.finish); - - (* Colon state. *) - _elna_lexer_default_transition(ElnaLexerState.colon, ElnaLexerAction.finalize, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.colon, ElnaLexerClass.equals, ElnaLexerAction.composite, ElnaLexerState.finish); - - (* Identifier state. *) - _elna_lexer_default_transition(ElnaLexerState.identifier, ElnaLexerAction.key_id, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.digit, ElnaLexerAction.accumulate, ElnaLexerState.identifier); - _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.alpha, ElnaLexerAction.accumulate, ElnaLexerState.identifier); - _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.hex, ElnaLexerAction.accumulate, ElnaLexerState.identifier); - _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.zero, ElnaLexerAction.accumulate, ElnaLexerState.identifier); - _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.x, ElnaLexerAction.accumulate, ElnaLexerState.identifier); - - (* Decimal state. *) - _elna_lexer_default_transition(ElnaLexerState.decimal, ElnaLexerAction.integer, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.digit, ElnaLexerAction.accumulate, ElnaLexerState.decimal); - _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.alpha, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.hex, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.zero, ElnaLexerAction.accumulate, ElnaLexerState.decimal); - _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.x, ElnaLexerAction.none, ElnaLexerState.finish); - - (* Leading zero. *) - _elna_lexer_default_transition(ElnaLexerState.leading_zero, ElnaLexerAction.integer, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.digit, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.alpha, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.hex, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.zero, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.x, ElnaLexerAction.none, ElnaLexerState.dot); - - (* Greater state. *) - _elna_lexer_default_transition(ElnaLexerState.greater, ElnaLexerAction.finalize, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.greater, ElnaLexerClass.equals, ElnaLexerAction.composite, ElnaLexerState.finish); - - (* Minus state. *) - _elna_lexer_default_transition(ElnaLexerState.minus, ElnaLexerAction.finalize, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.minus, ElnaLexerClass.greater, ElnaLexerAction.composite, ElnaLexerState.finish); - - (* Left paren state. *) - _elna_lexer_default_transition(ElnaLexerState.left_paren, ElnaLexerAction.finalize, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.left_paren, ElnaLexerClass.asterisk, ElnaLexerAction.accumulate, ElnaLexerState.comment); - - (* Less state. *) - _elna_lexer_default_transition(ElnaLexerState.less, ElnaLexerAction.finalize, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.less, ElnaLexerClass.equals, ElnaLexerAction.composite, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.less, ElnaLexerClass.greater, ElnaLexerAction.composite, ElnaLexerState.finish); - - (* Hexadecimal after 0x. *) - _elna_lexer_default_transition(ElnaLexerState.dot, ElnaLexerAction.finalize, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.dot, ElnaLexerClass.dot, ElnaLexerAction.composite, ElnaLexerState.finish); - - (* Comment. *) - _elna_lexer_default_transition(ElnaLexerState.comment, ElnaLexerAction.accumulate, ElnaLexerState.comment); - _elna_lexer_set_transition(ElnaLexerState.comment, ElnaLexerClass.asterisk, ElnaLexerAction.accumulate, ElnaLexerState.closing_comment); - _elna_lexer_set_transition(ElnaLexerState.comment, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); - - (* Closing comment. *) - _elna_lexer_default_transition(ElnaLexerState.closing_comment, ElnaLexerAction.accumulate, ElnaLexerState.comment); - _elna_lexer_set_transition(ElnaLexerState.closing_comment, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.closing_comment, ElnaLexerClass.right_paren, ElnaLexerAction.delimited, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.closing_comment, ElnaLexerClass.asterisk, ElnaLexerAction.accumulate, ElnaLexerState.closing_comment); - _elna_lexer_set_transition(ElnaLexerState.closing_comment, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); - - (* Character. *) - _elna_lexer_default_transition(ElnaLexerState.character, ElnaLexerAction.accumulate, ElnaLexerState.character); - _elna_lexer_set_transition(ElnaLexerState.character, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.character, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.character, ElnaLexerClass.single_quote, ElnaLexerAction.delimited, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.character, ElnaLexerClass.backslash, ElnaLexerAction.accumulate, ElnaLexerState.character_escape); - - (* Escape sequence in a character. *) - _elna_lexer_default_transition(ElnaLexerState.character_escape, ElnaLexerAction.accumulate, ElnaLexerState.character); - _elna_lexer_set_transition(ElnaLexerState.character_escape, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.character_escape, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); - - (* String. *) - _elna_lexer_default_transition(ElnaLexerState.string, ElnaLexerAction.accumulate, ElnaLexerState.string); - _elna_lexer_set_transition(ElnaLexerState.string, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.string, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.string, ElnaLexerClass.double_quote, ElnaLexerAction.delimited, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.string, ElnaLexerClass.backslash, ElnaLexerAction.accumulate, ElnaLexerState.string_escape); - - (* Escape sequence in a string. *) - _elna_lexer_default_transition(ElnaLexerState.string_escape, ElnaLexerAction.accumulate, ElnaLexerState.string); - _elna_lexer_set_transition(ElnaLexerState.string_escape, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.string_escape, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish) -end; - -(** - * Transition table is saved after character classification table. - * Each character entry is 1 word long and there are 256 characters. - * 1024 = 256 * 4 - *) -proc _elna_lexer_get_transition_table(); - return @classification + 1024 -end; - -(** - * Lexer state is saved after the transition tables. - * Each transition table entry is 8 bytes long. The table has 16 rows (transition states) - * and 22 columns (character classes), so 2992 = 8 * 17 * 22. - *) -proc _elna_lexer_global_state(); -var - result: Word; -begin - result := _elna_lexer_get_transition_table(); - return result + 2992 -end; - -(** - * Gets pointer to the token start. - *) -proc _elna_lexer_global_get_start(); -var - target: Word; -begin - target := _elna_lexer_global_state() + 4; - return target^ -end; - -(** - * Sets pointer to the token start. - *) -proc _elna_lexer_global_set_start(new_start: Word); -var - target: Word; -begin - target := _elna_lexer_global_state() + 4; - target^ := new_start -end; - -(** - * Gets pointer to the token end. - *) -proc _elna_lexer_global_get_end(); -var - target: Word; -begin - target := _elna_lexer_global_state() + 8; - return target^ -end; - -(** - * Sets pointer to the token end. - *) -proc _elna_lexer_global_set_end(new_start: Word); -var - target: Word; -begin - target := _elna_lexer_global_state() + 8; - target^ := new_start -end; - -proc _elna_lexer_transition_get_action(this: Word); - return this^ -end; - -proc _elna_lexer_transition_set_action(this: Word, value: Word); -begin - this^ := value -end; - -proc _elna_lexer_transition_get_state(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _elna_lexer_transition_set_state(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -(** - * Resets the lexer state for reading the next token. - *) -proc _elna_lexer_reset(); -var - state: Word; -begin - (* Transition start state is 1. *) - state := _elna_lexer_global_state(); - state^ := ElnaLexerState.start; - - state := _elna_lexer_global_get_start(); - _elna_lexer_global_set_end(state) -end; - -(** - * One time lexer initialization. - *) -proc _elna_lexer_initialize(code_pointer: Word); -begin - _elna_lexer_classifications(); - _elna_lexer_transitions(); - - _elna_lexer_global_set_start(code_pointer); - _elna_lexer_global_set_end(code_pointer) -end; - -proc _elna_lexer_next_transition(); -var - current_character: Word; - character_class: Word; - current_state: Word; -begin - current_character := _elna_lexer_global_get_end(); - current_character := _load_byte(current_character); - - character_class := _get_at(@classification, current_character + 1); - - current_state := _elna_lexer_global_state(); - current_state := current_state^; - - return _elna_lexer_get_transition(current_state, character_class) -end; - -proc _string_compare(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word); -var - result: Word; -begin - result := 0; - - if lhs_length = rhs_length then - result := _memcmp(lhs_pointer, rhs_pointer, lhs_length); - result := result = 0 - end; - return result -end; - -proc _elna_lexer_classify_keyword(position_start: Word, position_end: Word); -var - result: Word; - token_length: Word; -begin - result := ElnaLexerKind.identifier; - token_length := position_end - position_start; - - if _string_compare(position_start, token_length, "const", 5) then - result := ElnaLexerKind._const - elsif _string_compare(position_start, token_length, "var", 3) then - result := ElnaLexerKind._var - elsif _string_compare(position_start, token_length, "proc", 4) then - result := ElnaLexerKind._proc - elsif _string_compare(position_start, token_length, "type", 4) then - result := ElnaLexerKind._type - elsif _string_compare(position_start, token_length, "begin", 5) then - result := ElnaLexerKind._begin - elsif _string_compare(position_start, token_length, "end", 3) then - result := ElnaLexerKind._end - elsif _string_compare(position_start, token_length, "return", 6) then - result := ElnaLexerKind._return - elsif _string_compare(position_start, token_length, "goto", 4) then - result := ElnaLexerKind._goto - elsif _string_compare(position_start, token_length, "if", 2) then - result := ElnaLexerKind._if - elsif _string_compare(position_start, token_length, "while", 5) then - result := ElnaLexerKind._while - elsif _string_compare(position_start, token_length, "then", 4) then - result := ElnaLexerKind._then - elsif _string_compare(position_start, token_length, "else", 4) then - result := ElnaLexerKind._else - elsif _string_compare(position_start, token_length, "elsif", 5) then - result := ElnaLexerKind._elsif - elsif _string_compare(position_start, token_length, "record", 6) then - result := ElnaLexerKind._record - elsif _string_compare(position_start, token_length, "or", 2) then - result := ElnaLexerKind._or - elsif _string_compare(position_start, token_length, "xor", 2) then - result := ElnaLexerKind._xor - end; - return result -end; - -proc _elna_lexer_classify_finalize(start_position: Word); -var - character: Word; - result: Word; -begin - result := 0; - character := _load_byte(start_position); - - if character = ':' then - result := ElnaLexerKind.colon - elsif character = '.' then - result := ElnaLexerKind.dot - elsif character = '(' then - result := ElnaLexerKind.left_paren - elsif character = '-' then - result := ElnaLexerKind.minus - elsif character = '<' then - result := ElnaLexerKind.less_than - elsif character = '>' then - result := ElnaLexerKind.greater_than - end; - return result -end; - -proc _elna_lexer_classify_single(start_position: Word); -var - character: Word; - result: Word; -begin - result := 0; - character := _load_byte(start_position); - - if character = ';' then - result := ElnaLexerKind.semicolon - elsif character = ',' then - result := ElnaLexerKind.comma - elsif character = ')' then - result := ElnaLexerKind.right_paren - elsif character = '@' then - result := ElnaLexerKind.at - elsif character = '~' then - result := ElnaLexerKind.not - elsif character = '&' then - result := ElnaLexerKind.and - elsif character = '+' then - result := ElnaLexerKind.plus - elsif character = '*' then - result := ElnaLexerKind.multiplication - elsif character = '=' then - result := ElnaLexerKind.equals - elsif character = '%' then - result := ElnaLexerKind.remainder - elsif character = '/' then - result := ElnaLexerKind.division - elsif character = '.' then - result := ElnaLexerKind.dot - elsif character = '^' then - result := ElnaLexerKind.hat - end; - return result -end; - -proc _elna_lexer_classify_composite(start_position: Word, one_before_last: Word); -var - first_character: Word; - last_character: Word; - result: Word; -begin - first_character := _load_byte(start_position); - last_character := _load_byte(one_before_last); - - if first_character = ':' then - result := ElnaLexerKind.assignment - elsif first_character = '<' then - if last_character = '=' then - result := ElnaLexerKind.less_equal - elsif last_character = '>' then - result := ElnaLexerKind.not_equal - end - elsif first_character = '>' then - if last_character = '=' then - result := ElnaLexerKind.greater_equal - end - end; - - return result -end; - -proc _elna_lexer_classify_delimited(start_position: Word, end_position: Word); -var - token_length: Word; - delimiter: Word; - result: Word; -begin - token_length := end_position - start_position; - delimiter := _load_byte(start_position); - - if delimiter = '(' then - result := ElnaLexerKind.comment - elsif delimiter = '\'' then - result := ElnaLexerKind.character - elsif delimiter = '"' then - result := ElnaLexerKind.string - end; - return result -end; - -proc _elna_lexer_classify_integer(start_position: Word, end_position: Word); - return ElnaLexerKind.integer -end; - -proc _elna_lexer_execute_action(action_to_perform: Word, kind: Word); -var - position_start: Word; - position_end: Word; - intermediate: Word; -begin - position_start := _elna_lexer_global_get_start(); - position_end := _elna_lexer_global_get_end(); - - if action_to_perform = ElnaLexerAction.none then - elsif action_to_perform = ElnaLexerAction.accumulate then - _elna_lexer_global_set_end(position_end + 1) - elsif action_to_perform = ElnaLexerAction.skip then - _elna_lexer_global_set_start(position_start + 1); - _elna_lexer_global_set_end(position_end + 1) - elsif action_to_perform = ElnaLexerAction.single then - _elna_lexer_global_set_end(position_end + 1); - - intermediate := _elna_lexer_classify_single(position_start); - kind^ := intermediate - elsif action_to_perform = ElnaLexerAction.eof then - intermediate := ElnaLexerKind.eof; - kind^ := intermediate - elsif action_to_perform = ElnaLexerAction.finalize then - intermediate := _elna_lexer_classify_finalize(position_start); - kind^ := intermediate - elsif action_to_perform = ElnaLexerAction.composite then - _elna_lexer_global_set_end(position_end + 1); - - intermediate := _elna_lexer_classify_composite(position_start, position_end); - kind^ := intermediate - elsif action_to_perform = ElnaLexerAction.key_id then - intermediate := _elna_lexer_classify_keyword(position_start, position_end); - kind^ := intermediate - elsif action_to_perform = ElnaLexerAction.integer then - intermediate := _elna_lexer_classify_integer(position_start, position_end); - kind^ := intermediate - elsif action_to_perform = ElnaLexerAction.delimited then - _elna_lexer_global_set_end(position_end + 1); - - intermediate := _elna_lexer_classify_delimited(position_start, position_end + 1); - kind^ := intermediate - end -end; - -proc _elna_lexer_execute_transition(kind: Word); -var - next_transition: Word; - next_state: Word; - global_state: Word; - action_to_perform: Word; -begin - next_transition := _elna_lexer_next_transition(); - next_state := _elna_lexer_transition_get_state(next_transition); - action_to_perform := _elna_lexer_transition_get_action(next_transition); - - global_state := _elna_lexer_global_state(); - - global_state^ := next_state; - _elna_lexer_execute_action(action_to_perform, kind); - - return next_state -end; - -proc _elna_lexer_advance_token(kind: Word); -var - result_state: Word; -begin - result_state := _elna_lexer_execute_transition(kind); - if result_state <> ElnaLexerState.finish then - _elna_lexer_advance_token(kind) - end -end; - -(** - * Reads the next token and writes its type into the address in the kind parameter. - *) -proc _elna_lexer_read_token(kind: Word); -begin - _elna_lexer_reset(); - _elna_lexer_advance_token(kind) -end; - -(** - * Advances the token stream past the last read token. - *) -proc _elna_lexer_skip_token(); -var - old_end: Word; -begin - old_end := _elna_lexer_global_get_end(); - _elna_lexer_global_set_start(old_end) -end; - -proc _initialize_global_state(); -begin - compiler_strings_position := @compiler_strings; - memory_free_pointer := _mmap(4194304); - source_code := _mmap(495616); - symbol_table_store := _mmap(495616) -end; - -(* - * Entry point. - *) -proc _start(); -var - last_read: Word; - offset: Word; -begin - _initialize_global_state(); - _elna_lexer_initialize(source_code); - _symbol_table_build(); - - (* Read the source from the standard input. *) - offset := source_code; - - .start_read; - (* Second argument is buffer size. Modifying update the source_code definition. *) - last_read := _read_file(offset, 409600); - if last_read > 0 then - offset := offset + last_read; - goto start_read - end; - _compile(); - - _exit(0) -end; diff --git a/boot/stage15/cl.elna b/boot/stage15/cl.elna new file mode 100644 index 0000000..3f2dde2 --- /dev/null +++ b/boot/stage15/cl.elna @@ -0,0 +1,5408 @@ +(* + * This Source Code Form is subject to the terms of the Mozilla Public License, + * v. 2.0. If a copy of the MPL was not distributed with this file, You can + * obtain one at https://mozilla.org/MPL/2.0/. + *) + +(* Stage 15 compiler. *) + +(* - Procedure names are not required anymore to start with an underscore. *) +(* - Record declarations are supported. Access is done with generated procedures, + record_name_get_field and record_name_set_field. Record size can be queried with + record_name_size(). *) + +type + ElnaLexerAction = (none, accumulate, skip, single, eof, finalize, composite, key_id, integer, delimited); + + (** + * Classification table assigns each possible character to a group (class). All + * characters of the same group a handled equivalently. + * + * Transition = record + * action: TransitionAction; + * next_state: TransitionState + * end; + *) + ElnaLexerClass = ( + invalid, + digit, + alpha, + space, + colon, + equals, + left_paren, + right_paren, + asterisk, + backslash, + single, + hex, + zero, + x, + eof, + dot, + minus, + single_quote, + double_quote, + greater, + less, + other + ); + ElnaLexerState = ( + start, + colon, + identifier, + decimal, + leading_zero, + greater, + minus, + left_paren, + less, + dot, + comment, + closing_comment, + character, + character_escape, + string, + string_escape, + finish + ); + ElnaLexerKind = ( + identifier, + _const, + _var, + _proc, + _type, + _begin, + _end, + _if, + _then, + _else, + _elsif, + _while, + _do, + _extern, + _record, + _true, + _false, + null, + and, + _or, + _xor, + pipe, + not, + _return, + _module, + _program, + _import, + _cast, + _defer, + _case, + _of, + trait, + left_paren, + right_paren, + left_square, + right_square, + shift_left, + shift_right, + greater_equal, + less_equal, + greater_than, + less_than, + not_equal, + equals, + semicolon, + dot, + comma, + plus, + arrow, + minus, + multiplication, + division, + remainder, + assignment, + colon, + hat, + at, + comment, + string, + character, + integer, + word, + _goto, + eof + ); + NodeKind = ( + integer_literal, + string_literal, + character_literal, + variable_expression, + field_access_expression, + dereference_expression, + unary_expression, + binary_expression, + call, + goto_statement, + label_declaration, + return_statement, + assign_statement, + if_statement, + procedure_declaration, + variable_declaration, + enumeration_type_expression, + named_type_expression, + type_declaration, + module_declaration, + record_type_expression + ); + InfoKind = (type_info, parameter_info, temporary_info, procedure_info); + TypeKind = (primitive, enumeration, _record); + ElnaTacOperator = ( + load_immediate, + load_address, + add, + add_immediate, + load_word, + store_word, + jal, + move, + sub, + div, + rem, + mul, + _xor, + _or, + and, + seqz, + snez, + slt, + xor_immediate, + neg, + not, + jump, + beqz, + label, + start, + ret + ); + ElnaTacOperand = (register, immediate, symbol, offset); + ElnaTacRegister = ( + zero, + ra, + sp, + gp, + tp, + t0, + t1, + t2, + s0, + s1, + a0, + a1, + a2, + a3, + a4, + a5, + a6, + a7, + s2, + s3, + s4, + s5, + s6, + s7, + s8, + s9, + s10, + s11, + t3, + t4, + t5, + t6 + ); + +var + symbol_table_global: Array; + compiler_strings: Array; + classification: Array; + + source_code: Word; + compiler_strings_position: Word; + compiler_strings_length: Word; + label_counter: Word; + symbol_table_store: Word; + + (* Points to a segment of free memory. *) + memory_free_pointer: Word; + +(** + * Calculates and returns the string token length between quotes, including the + * escaping slash characters. + * + * Parameters: + * string - String token pointer. + * + * Returns the length in a0. + *) +proc _string_length(string: Word); +var + counter: Word; + current_byte: Word; +begin + (* Reset the counter. *) + counter := 0; + + .string_length_loop; + string := string + 1; + + current_byte := _load_byte(string); + if current_byte <> '"' then + counter := counter + 1; + goto string_length_loop + end; + + return counter +end; + +(** + * Adds a string to the global, read-only string storage. + * + * Parameters: + * string - String token. + * + * Returns the offset from the beginning of the storage to the new string in a0. + *) +proc _add_string(string: Word); +var + contents: Word; + result: Word; + current_byte: Word; +begin + contents := string + 1; + result := compiler_strings_length; + + .add_string_loop; + current_byte := _load_byte(contents); + if current_byte <> '"' then + _store_byte(current_byte, compiler_strings_position); + compiler_strings_position := compiler_strings_position + 1; + contents := contents + 1; + + if current_byte <> '\\' then + compiler_strings_length := compiler_strings_length + 1 + end; + goto add_string_loop + end; + + return result +end; + +(** + * Reads standard input into a buffer. + * + * Parameters: + * buffer - Buffer pointer. + * size - Buffer size. + * + * Returns the amount of bytes written in a0. + *) +proc _read_file(buffer: Word, size: Word); + return _syscall(0, buffer, size, 0, 0, 0, 63) +end; + +(** + * MAP_ANONYMOUS is 32. + * PROT_READ | PORT_WRITE is (1 | 2). + * MAP_ANONYMOUS | MAP_PRIVATE is (32 | 2) + *) +proc _mmap(length: Word); + return _syscall(0, length, 1 or 2, 32 or 2, -1, 0, 222) +end; + +(** + * Writes to the standard output. + * + * Parameters: + * buffer - Buffer. + * size - Buffer length. + *) +proc _write_s(buffer: Word, size: Word); +begin + _syscall(1, buffer, size, 0, 0, 0, 64) +end; + +(** + * Writes a number to a string buffer. + * + * Parameters: + * number - Whole number. + * output_buffer - Buffer pointer. + * + * Sets a0 to the length of the written number. + *) +proc _print_i(number: Word, output_buffer: Word); +var + local_buffer: Word; + is_negative: Word; + current_character: Word; + result: Word; +begin + local_buffer := @result + 11; + + if number >= 0 then + is_negative := 0 + else + number = -number; + is_negative := 1 + end; + + .print_i_digit10; + current_character := number % 10; + _store_byte(current_character + '0', local_buffer); + + number := number / 10; + local_buffer := local_buffer - 1; + + if number <> 0 then + goto print_i_digit10 + end; + if is_negative = 1 then + _store_byte('-', local_buffer); + local_buffer := local_buffer - 1 + end; + result := @result + 11; + result := result - local_buffer; + _memcpy(output_buffer, local_buffer + 1, result); + + return result +end; + +(** + * Writes a number to the standard output. + * + * Parameters: + * number - Whole number. + *) +proc _write_i(number: Word); +var + local_buffer: Word; + length: Word; +begin + length := _print_i(number, @local_buffer); + _write_s(@local_buffer, length) +end; + +(** + * Writes a character from a0 into the standard output. + * + * Parameters: + * character - Character to write. + *) +proc _write_c(character: Word); +begin + _write_s(@character, 1) +end; + +(** + * Write null terminated string. + * + * Parameters: + * string - String. + *) +proc _write_z(string: Word); +var + next_byte: Word; +begin + (* Check for 0 character. *) + next_byte := _load_byte(string); + + if next_byte <> 0 then + (* Print a character. *) + _write_c(next_byte); + + (* Advance the input string by one byte. *) + _write_z(string + 1) + end +end; + +(** + * Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. + *) +proc _is_upper(character: Word); +var + lhs: Word; + rhs: Word; +begin + lhs := character >= 'A'; + rhs := character <= 'Z'; + + return lhs & rhs +end; + +(** + * Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. + *) +proc _is_lower(character: Word); +var + lhs: Word; + rhs: Word; +begin + lhs := character >= 'a'; + rhs := character <= 'z'; + + return lhs & rhs +end; + +(** + * Detects if the passed character is a 7-bit alpha character or an underscore. + * + * Paramters: + * character - Tested character. + * + * Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. + *) +proc _is_alpha(character: Word); +var + is_upper_result: Word; + is_lower_result: Word; + is_alpha_result: Word; + is_underscore: Word; +begin + is_upper_result := _is_upper(character); + is_lower_result := _is_lower(character); + is_underscore := character = '_'; + + is_alpha_result := is_lower_result or is_upper_result; + return is_alpha_result or is_underscore +end; + +(** + * Detects whether the passed character is a digit (a value between 0 and 9). + * + * Parameters: + * character - Exemined value. + * + * Sets a0 to 1 if it is a digit, to 0 otherwise. + *) +proc _is_digit(character: Word); +var + lhs: Word; + rhs: Word; +begin + lhs := character >= '0'; + rhs := character <= '9'; + + return lhs & rhs +end; + +proc _is_alnum(character: Word); +var + lhs: Word; + rhs: Word; +begin + lhs := _is_alpha(character); + rhs := _is_digit(character); + + return lhs or rhs +end; + +(** + * Parameters: + * lhs - First pointer. + * rhs - Second pointer. + * count - The length to compare. + * + * Returns 0 if memory regions are equal. + *) +proc _memcmp(lhs: Word, rhs: Word, count: Word); +var + lhs_byte: Word; + rhs_byte: Word; + result: Word; +begin + result := 0; + + .memcmp_loop; + if count <> 0 then + lhs_byte := _load_byte(lhs); + rhs_byte := _load_byte(rhs); + result := lhs_byte - rhs_byte; + + lhs := lhs + 1; + rhs := rhs + 1; + count := count - 1; + + if result = 0 then + goto memcmp_loop + end + end; + + return result +end; + +(** + * Copies memory. + * + * Parameters: + * destination - Destination. + * source - Source. + * count - Size. + * + * Returns the destination. + *) +proc _memcpy(destination: Word, source: Word, count: Word); +var + current_byte: Word; +begin + .memcpy_loop; + if count <> 0 then + current_byte := _load_byte(source); + _store_byte(current_byte, destination); + + destination := destination + 1; + source := source + 1; + count := count - 1; + goto memcpy_loop + end; + + return destination +end; + +proc _node_get_kind(this: Word); + return this^ +end; + +proc _elna_tac_instruction_size(); + return 44 +end; + +proc _elna_tac_instruction_get_kind(this: Word); + return this^ +end; + +proc _elna_tac_instruction_set_kind(this: Word, value: Word); +begin + this^ := value +end; + +proc _elna_tac_instruction_get_next(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _elna_tac_instruction_set_next(this: Word, value: Word); +begin + .elna_tac_instruction_set_next_loop; + this := this + 4; + if value <> 0 then + if this^ <> 0 then + this := this^; + goto elna_tac_instruction_set_next_loop + end + end; + this^ := value +end; + +proc _elna_tac_instruction_get_operand_type(this: Word, n: Word); +begin + n := n - 1; + n := n * 12; + this := this + 8; + this := this + n; + + return this^ +end; + +proc _elna_tac_instruction_get_operand_value(this: Word, n: Word); +begin + n := n - 1; + n := n * 12; + this := this + 8; + this := this + n; + + this := this + 4; + return this^ +end; + +proc _elna_tac_instruction_get_operand_length(this: Word, n: Word); +begin + n := n - 1; + n := n * 12; + this := this + 8; + this := this + n; + + this := this + 8; + return this^ +end; + +proc _elna_tac_instruction_set_operand(this: Word, n: Word, operand_type: Word, operand_value: Word, operand_length: Word); +begin + n := n - 1; + n := n * 12; + this := this + 8; + this := this + n; + + this^ := operand_type; + this := this + 4; + this^ := operand_value; + this := this + 4; + this^ := operand_length +end; + +proc _elna_tac_instruction_create(kind: Word); +var + result: Word; + instruction_size: Word; +begin + instruction_size := _elna_tac_instruction_size(); + result := _allocate(instruction_size); + + _elna_tac_instruction_set_kind(result, kind); + _elna_tac_instruction_set_next(result, 0); + + return result +end; + +proc _elna_tac_module_create(data: Word, code: Word); +var + result: Word; + current_word: Word; +begin + result := _allocate(8); + + current_word := result; + current_word^ := data; + current_word := current_word + 4; + current_word^ := code; + + return result +end; + +proc _elna_tac_module_get_data(this: Word); + return this^ +end; + +proc _elna_tac_module_get_code(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _elna_tac_declaration_size(); + return 16 +end; + +proc _elna_tac_declaration_get_next(this: Word); + return this^ +end; + +proc _elna_tac_declaration_set_next(this: Word, value: Word); +begin + this^ := value +end; + +proc _elna_tac_declaration_get_name(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _elna_tac_declaration_set_name(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _elna_tac_declaration_get_length(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _elna_tac_declaration_set_length(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _elna_tac_declaration_get_body(this: Word); +begin + this := this + 12; + return this^ +end; + +proc _elna_tac_declaration_set_body(this: Word, value: Word); +begin + this := this + 12; + this^ := value +end; + +proc _elna_tac_load_immediate(target_register: Word, source_immediate: Word, immediate_length: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.load_immediate); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target_register, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.immediate, source_immediate, immediate_length); + + return result +end; + +proc _elna_tac_load_address(target_register: Word, source_symbol: Word, symbol_length: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.load_address); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target_register, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.symbol, source_symbol, symbol_length); + + return result +end; + +proc _elna_tac_beqz(target_register: Word, source_symbol: Word, symbol_length: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.beqz); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target_register, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.symbol, source_symbol, symbol_length); + + return result +end; + +proc _elna_tac_jump(source_symbol: Word, symbol_length: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.jump); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.symbol, source_symbol, symbol_length); + + return result +end; + +proc _elna_tac_add(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.add); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_mul(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.mul); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_sub(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.sub); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_div(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.div); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_rem(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.rem); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_xor(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator._xor); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_xor_immediate(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator._xor); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.immediate, rhs, 0); + + return result +end; + +proc _elna_tac_or(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator._or); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_and(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.and); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_add_immediate(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.add_immediate); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.immediate, rhs, 0); + + return result +end; + +proc _elna_tac_slt(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.slt); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_jal(symbol: Word, length: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.jal); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.symbol, symbol, length); + + return result +end; + +proc _elna_tac_load_word(target: Word, register: Word, offset: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.load_word); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.offset, register, offset); + + return result +end; + +proc _elna_tac_store_word(target: Word, register: Word, offset: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.store_word); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.offset, register, offset); + + return result +end; + +proc _elna_tac_move(destination: Word, source: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.move); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); + + return result +end; + +proc _elna_tac_seqz(destination: Word, source: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.seqz); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); + + return result +end; + +proc _elna_tac_snez(destination: Word, source: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.snez); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); + + return result +end; + +proc _elna_tac_neg(destination: Word, source: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.neg); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); + + return result +end; + +proc _elna_tac_not(destination: Word, source: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.not); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); + + return result +end; + +proc _elna_tac_label(counter: Word, length: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.label); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.symbol, counter, length); + + return result +end; + +proc _elna_writer_instruction_name(instruction_kind: Word); +var + argument_count: Word; +begin + if instruction_kind = ElnaTacOperator.load_immediate then + argument_count := 2; + _write_s("\tli", 3) + elsif instruction_kind = ElnaTacOperator.load_address then + argument_count := 2; + _write_s("\tla", 3) + elsif instruction_kind = ElnaTacOperator.add then + argument_count := 3; + _write_s("\tadd", 4) + elsif instruction_kind = ElnaTacOperator.add_immediate then + argument_count := 3; + _write_s("\taddi", 5) + elsif instruction_kind = ElnaTacOperator.load_word then + argument_count := 2; + _write_s("\tlw", 3) + elsif instruction_kind = ElnaTacOperator.store_word then + argument_count := 2; + _write_s("\tsw", 3) + elsif instruction_kind = ElnaTacOperator.jal then + argument_count := 1; + _write_s("\tcall", 5) + elsif instruction_kind = ElnaTacOperator.move then + argument_count := 2; + _write_s("\tmv", 3) + elsif instruction_kind = ElnaTacOperator.sub then + argument_count := 3; + _write_s("\tsub", 4) + elsif instruction_kind = ElnaTacOperator.mul then + argument_count := 3; + _write_s("\tmul", 4) + elsif instruction_kind = ElnaTacOperator.div then + argument_count := 3; + _write_s("\tdiv", 4) + elsif instruction_kind = ElnaTacOperator.rem then + argument_count := 3; + _write_s("\trem", 4) + elsif instruction_kind = ElnaTacOperator._xor then + argument_count := 3; + _write_s("\txor", 4) + elsif instruction_kind = ElnaTacOperator.xor_immediate then + argument_count := 3; + _write_s("\txori", 5) + elsif instruction_kind = ElnaTacOperator._or then + argument_count := 3; + _write_s("\tor", 3) + elsif instruction_kind = ElnaTacOperator.and then + argument_count := 3; + _write_s("\tand", 4) + elsif instruction_kind = ElnaTacOperator.seqz then + argument_count := 2; + _write_s("\tseqz", 5) + elsif instruction_kind = ElnaTacOperator.snez then + argument_count := 2; + _write_s("\tsnez", 5) + elsif instruction_kind = ElnaTacOperator.slt then + argument_count := 3; + _write_s("\tslt", 4) + elsif instruction_kind = ElnaTacOperator.neg then + argument_count := 2; + _write_s("\tneg", 4) + elsif instruction_kind = ElnaTacOperator.not then + argument_count := 2; + _write_s("\tnot", 4) + elsif instruction_kind = ElnaTacOperator.jump then + argument_count := 1; + _write_s("\tj", 2) + elsif instruction_kind = ElnaTacOperator.beqz then + argument_count := 2; + _write_s("\tbeqz", 5) + elsif instruction_kind = ElnaTacOperator.start then + argument_count := 0; + _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\0") + elsif instruction_kind = ElnaTacOperator.ret then + argument_count := 0; + _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\0") + end; + return argument_count +end; + +proc _elna_writer_register(register: Word); +begin + _write_c('x'); + _write_i(register - 1) +end; + +proc _elna_writer_operand(instruction: Word, n: Word); +var + operand_value: Word; + operand_length: Word; + operand_type: Word; +begin + operand_type := _elna_tac_instruction_get_operand_type(instruction, n); + operand_value := _elna_tac_instruction_get_operand_value(instruction, n); + operand_length := _elna_tac_instruction_get_operand_length(instruction, n); + + _write_c(' '); + if operand_type = ElnaTacOperand.register then + _elna_writer_register(operand_value) + elsif operand_type = ElnaTacOperand.offset then + _write_i(operand_length); + _write_c('('); + _elna_writer_register(operand_value); + _write_c(')') + elsif operand_type = ElnaTacOperand.symbol then + if operand_length = 0 then + _write_label(operand_value, 0) + else + _write_s(operand_value, operand_length) + end + elsif operand_length = 0 then (* ElnaTacOperand.immediate *) + _write_i(operand_value) + else + _write_s(operand_value, operand_length) + end +end; + +proc _elna_writer_instruction(instruction: Word); +var + instruction_kind: Word; + argument_count: Word; + current_argument: Word; + operand_value: Word; + operand_length: Word; +begin + instruction_kind := _elna_tac_instruction_get_kind(instruction); + + if instruction_kind = ElnaTacOperator.label then + argument_count := 0; + operand_value := _elna_tac_instruction_get_operand_value(instruction, 1); + operand_length := _elna_tac_instruction_get_operand_length(instruction, 1); + _write_label(operand_value, operand_length); + _write_c(':') + else + argument_count := _elna_writer_instruction_name(instruction_kind) + end; + current_argument := 1; + + .elna_writer_instruction_loop; + if current_argument <= argument_count then + _elna_writer_operand(instruction, current_argument); + current_argument := current_argument + 1 + end; + if current_argument <= argument_count then + _write_c(','); + goto elna_writer_instruction_loop + end; + + _write_c('\n') +end; + +proc _elna_writer_instructions(instruction: Word); +begin + if instruction <> 0 then + _elna_writer_instruction(instruction); + instruction := _elna_tac_instruction_get_next(instruction); + _elna_writer_instructions(instruction) + end +end; + +proc _elna_writer_procedure(procedure: Word); +var + name_pointer: Word; + name_length: Word; + body_statements: Word; +begin + .elna_writer_procedure_loop; + name_pointer := _elna_tac_declaration_get_name(procedure); + name_length := _elna_tac_declaration_get_length(procedure); + body_statements := _elna_tac_declaration_get_body(procedure); + + (* Write .type _procedure_name, @function. *) + _write_z(".type \0"); + + _write_s(name_pointer, name_length); + _write_z(", @function\n\0"); + + (* Write procedure label, _procedure_name: *) + _write_s(name_pointer, name_length); + _write_z(":\n\0"); + + _elna_writer_instructions(body_statements); + _write_z("\tret\n\0"); + + procedure := _elna_tac_declaration_get_next(procedure); + if procedure <> 0 then + goto elna_writer_procedure_loop + end +end; + +proc _elna_writer_variable(variable: Word); +var + name: Word; + name_length: Word; + size: Word; +begin + .elna_writer_variable_loop; + if variable <> 0 then + name := _elna_tac_declaration_get_name(variable); + name_length := _elna_tac_declaration_get_length(variable); + size := _elna_tac_declaration_get_body(variable); + + _write_z(".type \0"); + _write_s(name, name_length); + _write_z(", @object\n\0"); + + _write_s(name, name_length); + _write_c(':'); + + _write_z(" .zero \0"); + _write_i(size); + + _write_c('\n'); + variable := _elna_tac_declaration_get_next(variable); + + goto elna_writer_variable_loop + end +end; + +proc _elna_writer_module(pair: Word); +var + compiler_strings_copy: Word; + compiler_strings_end: Word; + current_byte: Word; + current_part: Word; +begin + _write_z(".globl _start, main\n\n\0"); + _write_z(".section .data\n\0"); + + current_part := _elna_tac_module_get_data(pair); + _elna_writer_variable(current_part); + + _write_z(".section .text\n\n\0"); + _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); + _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); + _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); + + current_part := _elna_tac_module_get_code(pair); + _elna_writer_procedure(current_part); + + _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); + _write_c('"'); + + compiler_strings_copy := @compiler_strings; + compiler_strings_end := compiler_strings_position; + + .elna_writer_module_loop; + if compiler_strings_copy < compiler_strings_end then + current_byte := _load_byte(compiler_strings_copy); + compiler_strings_copy := compiler_strings_copy + 1; + _write_c(current_byte); + + goto elna_writer_module_loop + end; + _write_c('"'); + _write_c('\n'); +end; + +proc _node_set_kind(this: Word, kind: Word); +begin + this^ := kind +end; + +proc _integer_literal_node_size(); + return 12 +end; + +proc _integer_literal_node_get_value(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _integer_literal_node_set_value(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _integer_literal_node_get_length(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _integer_literal_node_set_length(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _elna_parser_integer_literal(); +var + integer_token: Word; + integer_length: Word; + result: Word; + literal_size: Word; +begin + literal_size := _integer_literal_node_size(); + result := _allocate(literal_size); + + integer_token := _elna_lexer_global_get_start(); + integer_length := _elna_lexer_global_get_end(); + integer_length := integer_length - integer_token; + _elna_lexer_skip_token(); + + _node_set_kind(result, NodeKind.integer_literal); + _integer_literal_node_set_value(result, integer_token); + _integer_literal_node_set_length(result, integer_length); + + return result +end; + +proc _elna_tac_integer_literal(integer_literal_node: Word); +var + integer_token: Word; + integer_length: Word; + token_kind: Word; +begin + integer_token := _integer_literal_node_get_value(integer_literal_node); + integer_length := _integer_literal_node_get_length(integer_literal_node); + + return _elna_tac_load_immediate(ElnaTacRegister.t0, integer_token, integer_length) +end; + +proc _character_literal_node_size(); + return 12 +end; + +proc _character_literal_node_get_value(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _character_literal_node_set_value(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _character_literal_node_get_length(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _character_literal_node_set_length(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _elna_parser_character_literal(); +var + character: Word; + character_length: Word; + result: Word; + literal_size: Word; +begin + literal_size := _character_literal_node_size(); + result := _allocate(literal_size); + + character := _elna_lexer_global_get_start(); + character_length := _elna_lexer_global_get_end(); + character_length := character_length - character; + _elna_lexer_skip_token(); + + _node_set_kind(result, NodeKind.character_literal); + _integer_literal_node_set_value(result, character); + _integer_literal_node_set_length(result, character_length); + + return result +end; + +proc _elna_tac_character_literal(character_literal_node: Word); +var + character: Word; + character_length: Word; +begin + character := _character_literal_node_get_value(character_literal_node); + character_length := _character_literal_node_get_length(character_literal_node); + + return _elna_tac_load_immediate(ElnaTacRegister.t0, character, character_length) +end; + +proc _variable_expression_size(); + return 12 +end; + +proc _variable_expression_get_name(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _variable_expression_set_name(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _variable_expression_get_length(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _variable_expression_set_length(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _allocate(size: Word); +var + result: Word; +begin + result := memory_free_pointer; + memory_free_pointer := memory_free_pointer + size; + return result +end; + +proc _elna_parser_variable_expression(); +var + name: Word; + name_token: Word; + result: Word; + memory_size: Word; +begin + name := _elna_lexer_global_get_start(); + name_token := _elna_lexer_global_get_end(); + name_token := name_token - name; + _elna_lexer_skip_token(); + + memory_size := _variable_expression_size(); + result := _allocate(memory_size); + + _node_set_kind(result, NodeKind.variable_expression); + _variable_expression_set_name(result, name); + _variable_expression_set_length(result, name_token); + + return result +end; + +proc _elna_tac_variable_expression(variable_expression: Word, symbol_table: Word); +var + name: Word; + name_token: Word; + lookup_result: Word; + instruction: Word; +begin + name := _variable_expression_get_name(variable_expression); + name_token := _variable_expression_get_length(variable_expression); + + lookup_result := _symbol_table_lookup(symbol_table, name, name_token); + if lookup_result <> 0 then + instruction := _elna_tac_local_designator(lookup_result) + else + instruction := _elna_tac_global_designator(variable_expression) + end; + return instruction +end; + +proc _string_literal_node_size(); + return 12 +end; + +proc _string_literal_node_get_value(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _string_literal_node_set_value(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _string_literal_node_get_length(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _string_literal_node_set_length(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _elna_parser_string_literal(); +var + length: Word; + token_start: Word; + result: Word; + memory_size: Word; +begin + memory_size := _string_literal_node_size(); + result := _allocate(memory_size); + + token_start := _elna_lexer_global_get_start(); + length := _string_length(token_start); + _elna_lexer_skip_token(); + + _node_set_kind(result, NodeKind.string_literal); + _string_literal_node_set_value(result, token_start); + _string_literal_node_set_length(result, length); + + return result +end; + +proc _elna_tac_string_literal(string_literal_node: Word); +var + token_start: Word; + length: Word; + offset: Word; + instruction: Word; + first_instruction: Word; + next_instruction: Word; +begin + token_start := _string_literal_node_get_value(string_literal_node); + length := _string_literal_node_get_length(string_literal_node); + offset := _add_string(token_start); + + first_instruction := _elna_tac_load_address(ElnaTacRegister.t0, "strings", 7); + instruction := _elna_tac_load_immediate(ElnaTacRegister.t1, offset, 0); + _elna_tac_instruction_set_next(first_instruction, instruction); + next_instruction := _elna_tac_add(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); + _elna_tac_instruction_set_next(instruction, next_instruction); + + return first_instruction +end; + +proc _elna_parser_simple_expression(); +var + current_character: Word; + parser_node: Word; + token_kind: Word; +begin + parser_node := 0; + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.character then + parser_node := _elna_parser_character_literal() + elsif token_kind = ElnaLexerKind.integer then + parser_node := _elna_parser_integer_literal() + elsif token_kind = ElnaLexerKind.string then + parser_node := _elna_parser_string_literal() + elsif token_kind = ElnaLexerKind.identifier then + parser_node := _elna_parser_variable_expression() + end; + return parser_node +end; + +proc _dereference_expression_size(); + return 8 +end; + +proc _dereference_expression_get_pointer(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _dereference_expression_set_pointer(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _elna_parser_dereference_expression(simple_expression: Word); +var + result: Word; + memory_size: Word; +begin + memory_size := _dereference_expression_size(); + result := _allocate(memory_size); + + _node_set_kind(result, NodeKind.dereference_expression); + _dereference_expression_set_pointer(result, simple_expression); + _elna_lexer_skip_token(); + + return result +end; + +proc _elna_parser_designator(); +var + simple_expression: Word; + token_kind: Word; +begin + simple_expression := _elna_parser_simple_expression(); + + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.hat then + simple_expression := _elna_parser_dereference_expression(simple_expression) + elsif token_kind = ElnaLexerKind.dot then + simple_expression := _elna_parser_field_access_expression(simple_expression) + elsif token_kind = ElnaLexerKind.left_paren then + simple_expression := _elna_parser_call(simple_expression) + end; + return simple_expression +end; + +proc _elna_tac_simple_expression(parser_node: Word, symbol_table: Word, is_address: Word); +var + is_address: Word; + node_kind: Word; + instruction: Word; +begin + is_address^ := 0; + node_kind := _node_get_kind(parser_node); + + if node_kind = NodeKind.character_literal then + instruction := _elna_tac_character_literal(parser_node) + elsif node_kind = NodeKind.string_literal then + instruction := _elna_tac_string_literal(parser_node) + elsif node_kind = NodeKind.integer_literal then + instruction := _elna_tac_integer_literal(parser_node) + else + instruction := _elna_tac_variable_expression(parser_node, symbol_table); + is_address^ := 1 + end; + return instruction +end; + +proc _unary_expression_size(); + return 12 +end; + +proc _unary_expression_get_operand(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _unary_expression_set_operand(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _unary_expression_get_operator(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _unary_expression_set_operator(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _elna_parser_unary_expression(); +var + token_kind: Word; + result: Word; + memory_size: Word; + operand: Word; + operator: Word; +begin + _elna_lexer_read_token(@token_kind); + operator := 0; + + if token_kind = ElnaLexerKind.at then + operator := '@' + elsif token_kind = ElnaLexerKind.minus then + operator := '-' + elsif token_kind = ElnaLexerKind.not then + operator := '~' + end; + if operator <> 0 then + _elna_lexer_skip_token() + end; + result := _elna_parser_designator(); + + if operator <> 0 then + operand := result; + memory_size := _unary_expression_size(); + result := _allocate(memory_size); + + _node_set_kind(result, NodeKind.unary_expression); + _unary_expression_set_operand(result, operand); + _unary_expression_set_operator(result, operator) + end; + + return result +end; + +proc _elna_tac_unary_expression(parser_node: Word, symbol_table: Word); +var + current_character: Word; + token_kind: Word; + expression_kind: Word; + operator: Word; + operand: Word; + is_address: Word; + first_instruction: Word; + instruction: Word; +begin + operator := 0; + operand := 0; + + expression_kind := _node_get_kind(parser_node); + + if expression_kind = NodeKind.unary_expression then + operator := _unary_expression_get_operator(parser_node); + operand := _unary_expression_get_operand(parser_node) + else + operand := parser_node + end; + + if operator = '@' then + first_instruction := _elna_tac_designator(operand, symbol_table, @is_address) + else + first_instruction := _elna_tac_designator(operand, symbol_table, @is_address); + if is_address then + instruction := _elna_tac_load_word(ElnaTacRegister.t0, ElnaTacRegister.t0, 0); + _elna_tac_instruction_set_next(first_instruction, instruction) + end + end; + if operator = '-' then + instruction := _elna_tac_neg(ElnaTacRegister.t0, ElnaTacRegister.t0); + _elna_tac_instruction_set_next(first_instruction, instruction) + elsif operator = '~' then + instruction := _elna_tac_not(ElnaTacRegister.t0, ElnaTacRegister.t0); + _elna_tac_instruction_set_next(first_instruction, instruction) + end; + return first_instruction +end; + +proc _binary_expression_size(); + return 16 +end; + +proc _binary_expression_get_lhs(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _binary_expression_set_lhs(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _binary_expression_get_rhs(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _binary_expression_set_rhs(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _binary_expression_get_operator(this: Word); +begin + this := this + 12; + return this^ +end; + +proc _binary_expression_set_operator(this: Word, value: Word); +begin + this := this + 12; + this^ := value +end; + +proc _elna_parser_binary_expression(); +var + lhs_node: Word; + rhs_node: Word; + token_kind: Word; + memory_size: Word; + result: Word; +begin + lhs_node := _elna_parser_unary_expression(); + rhs_node := 0; + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.plus then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.minus then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.multiplication then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.and then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind._or then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind._xor then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.equals then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.remainder then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.division then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.less_than then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.greater_than then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.less_equal then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.not_equal then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.greater_equal then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + end; + if rhs_node <> 0 then + memory_size := _binary_expression_size(); + result := _allocate(memory_size); + + _node_set_kind(result, NodeKind.binary_expression); + _binary_expression_set_lhs(result, lhs_node); + _binary_expression_set_rhs(result, rhs_node); + _binary_expression_set_operator(result, token_kind) + else + result := lhs_node + end; + return result +end; + +proc _elna_tac_binary_expression(parser_node: Word, symbol_table: Word); +var + token_kind: Word; + expression_kind: Word; + operand_node: Word; + first_instruction: Word; + instruction: Word; + current_instruction: Word; +begin + expression_kind := _node_get_kind(parser_node); + + if expression_kind <> NodeKind.binary_expression then + first_instruction := _elna_tac_unary_expression(parser_node, symbol_table) + else + token_kind := _binary_expression_get_operator(parser_node); + + operand_node := _binary_expression_get_lhs(parser_node); + first_instruction := _elna_tac_unary_expression(operand_node, symbol_table); + + (* Save the value of the left expression on the stack. *) + instruction := _elna_tac_store_word(ElnaTacRegister.t0, ElnaTacRegister.sp, 64); + _elna_tac_instruction_set_next(first_instruction, instruction); + current_instruction := instruction; + + operand_node := _binary_expression_get_rhs(parser_node); + instruction := _elna_tac_unary_expression(operand_node, symbol_table); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + + (* Load the left expression from the stack; *) + instruction := _elna_tac_load_word(ElnaTacRegister.t1, ElnaTacRegister.sp, 64); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + + if token_kind = ElnaLexerKind.plus then + instruction := _elna_tac_add(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.minus then + instruction := _elna_tac_sub(ElnaTacRegister.t0, ElnaTacRegister.t1, ElnaTacRegister.t0); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.multiplication then + instruction := _elna_tac_mul(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.and then + instruction := _elna_tac_and(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind._or then + instruction := _elna_tac_or(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind._xor then + instruction := _elna_tac_xor(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.equals then + instruction := _elna_tac_xor(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + + instruction := _elna_tac_seqz(ElnaTacRegister.t0, ElnaTacRegister.t0); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.remainder then + instruction := _elna_tac_rem(ElnaTacRegister.t0, ElnaTacRegister.t1, ElnaTacRegister.t0); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.division then + instruction := _elna_tac_div(ElnaTacRegister.t0, ElnaTacRegister.t1, ElnaTacRegister.t0); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.less_than then + instruction := _elna_tac_slt(ElnaTacRegister.t0, ElnaTacRegister.t1, ElnaTacRegister.t0); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.greater_than then + instruction := _elna_tac_slt(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.less_equal then + instruction := _elna_tac_slt(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + + instruction := _elna_tac_xor_immediate(ElnaTacRegister.t0, ElnaTacRegister.t0, 1); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.not_equal then + instruction := _elna_tac_xor(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + + instruction := _elna_tac_snez(ElnaTacRegister.t0, ElnaTacRegister.t0); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.greater_equal then + instruction := _elna_tac_slt(ElnaTacRegister.t0, ElnaTacRegister.t1, ElnaTacRegister.t0); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + + instruction := _elna_tac_xor_immediate(ElnaTacRegister.t0, ElnaTacRegister.t0, 1); + _elna_tac_instruction_set_next(current_instruction, instruction) + end + end; + return first_instruction +end; + +(* 4 bytes node kind + 4 byte pointer to variable expression + 4 * 7 for arguments. *) +proc _call_size(); + return 44 +end; + +proc _call_get_name(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _call_set_name(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _call_get_argument(this: Word, n: Word); +begin + n := n * 4; + this := this + 8; + this := this + n; + return this^ +end; + +proc _call_set_argument(this: Word, n: Word, value: Word); +begin + n := n * 4; + this := this + 8; + this := this + n; + this^ := value +end; + +proc _elna_parser_call(callee: Word); +var + parsed_expression: Word; + result: Word; + argument_number: Word; + token_kind: Word; + call_size: Word; +begin + call_size := _call_size(); + result := _allocate(call_size); + _node_set_kind(result, NodeKind.call); + _statement_set_next(result, 0); + + argument_number := 1; + _call_set_name(result, callee); + + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.right_paren then + _elna_lexer_skip_token(); + goto elna_parser_call_end + end; + + .elna_parser_call_loop; + parsed_expression := _elna_parser_binary_expression(); + _call_set_argument(result, argument_number, parsed_expression); + argument_number := argument_number + 1; + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + if token_kind = ElnaLexerKind.comma then + goto elna_parser_call_loop + end; + + .elna_parser_call_end; + (* Set the trailing argument to nil. *) + _call_set_argument(result, argument_number, 0); + + return result +end; + +proc _elna_tac_call(parsed_call: Word, symbol_table: Word); +var + name_length: Word; + name: Word; + argument_count: Word; + stack_offset: Word; + parsed_expression: Word; + instruction: Word; + first_instruction: Word; + current_instruction: Word; +begin + parsed_expression := _call_get_name(parsed_call); + name := _variable_expression_get_name(parsed_expression); + name_length := _variable_expression_get_length(parsed_expression); + argument_count := 0; + first_instruction := 0; + + .elna_tac_call_loop; + + parsed_expression := _call_get_argument(parsed_call, argument_count + 1); + if parsed_expression = 0 then + goto elna_tac_call_finalize + else + instruction := _elna_tac_binary_expression(parsed_expression, symbol_table); + if first_instruction = 0 then + first_instruction := instruction + else + _elna_tac_instruction_set_next(current_instruction, instruction) + end; + current_instruction := instruction; + + (* Save the argument on the stack. *) + stack_offset := argument_count * 4; + + instruction := _elna_tac_store_word(ElnaTacRegister.t0, + ElnaTacRegister.sp, 116 - stack_offset); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + + argument_count := argument_count + 1; + goto elna_tac_call_loop + end; + .elna_tac_call_finalize; + + (* Load the argument from the stack. *) + if argument_count <> 0 then + (* Decrement the argument counter. *) + argument_count := argument_count - 1; + stack_offset := argument_count * 4; + + (* Calculate the stack offset: 116 - (4 * argument_counter) *) + instruction := _elna_tac_load_word(ElnaTacRegister.a0 + argument_count, + ElnaTacRegister.sp, 116 - stack_offset); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + + goto elna_tac_call_finalize + end; + instruction := _elna_tac_jal(name, name_length); + if first_instruction = 0 then + first_instruction := instruction + else + _elna_tac_instruction_set_next(current_instruction, instruction) + end; + return first_instruction +end; + +(** + * All statements are chained into a list. Next contains a pointer to the next + * statement in the statement list. + *) +proc _statement_get_next(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _statement_set_next(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _goto_statement_size(); + return 16 +end; + +proc _goto_statement_get_label(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _goto_statement_set_label(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _goto_statement_get_length(this: Word); +begin + this := this + 12; + return this^ +end; + +proc _goto_statement_set_length(this: Word, value: Word); +begin + this := this + 12; + this^ := value +end; + +proc _elna_parser_goto_statement(); +var + token_kind: Word; + label_name: Word; + label_length: Word; + statement_size: Word; + result: Word; +begin + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + + label_name := _elna_lexer_global_get_start(); + label_length := _elna_lexer_global_get_end() - label_name; + _elna_lexer_skip_token(); + + statement_size := _goto_statement_size(); + result := _allocate(statement_size); + + _node_set_kind(result, NodeKind.goto_statement); + _statement_set_next(result, 0); + _goto_statement_set_label(result, label_name); + _goto_statement_set_length(result, label_length); + + return result +end; + +proc _elna_tac_goto_statement(parser_node: Word); +var + label_name: Word; + label_length: Word; + label_with_dot: Word; + instruction: Word; +begin + label_name := _goto_statement_get_label(parser_node); + label_length := _goto_statement_get_length(parser_node); + label_with_dot := _allocate(label_length + 1); + + _store_byte('.', label_with_dot); + _memcpy(label_with_dot + 1, label_name, label_length); + + return _elna_tac_jump(label_with_dot, label_length + 1) +end; + +proc _label_declaration_size(); + return 16 +end; + +proc _label_declaration_get_label(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _label_declaration_set_label(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _label_declaration_get_length(this: Word); +begin + this := this + 12; + return this^ +end; + +proc _label_declaration_set_length(this: Word, value: Word); +begin + this := this + 12; + this^ := value +end; + +proc _elna_parser_label_declaration(); +var + token_kind: Word; + label_name: Word; + label_length: Word; + statement_size: Word; + result: Word; +begin + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + + label_name := _elna_lexer_global_get_start(); + label_length := _elna_lexer_global_get_end() - label_name; + _elna_lexer_skip_token(); + + statement_size := _label_declaration_size(); + result := _allocate(statement_size); + + _node_set_kind(result, NodeKind.label_declaration); + _statement_set_next(result, 0); + _goto_statement_set_label(result, label_name); + _goto_statement_set_length(result, label_length); + + return result +end; + +proc _elna_tac_label_declaration(parser_node: Word); +var + label_name: Word; + label_length: Word; +begin + label_name := _label_declaration_get_label(parser_node); + label_length := _label_declaration_get_length(parser_node); + + return _elna_tac_label(label_name, label_length) +end; + +proc _elna_tac_local_designator(symbol: Word); +var + variable_offset: Word; +begin + variable_offset := _parameter_info_get_offset(symbol); + + return _elna_tac_add_immediate(ElnaTacRegister.t0, ElnaTacRegister.sp, variable_offset) +end; + +proc _elna_tac_global_designator(variable_expression: Word); +var + name: Word; + token_length: Word; +begin + name := _variable_expression_get_name(variable_expression); + token_length := _variable_expression_get_length(variable_expression); + + return _elna_tac_load_address(ElnaTacRegister.t0, name, token_length) +end; + +proc _field_access_expression_size(); + return 16 +end; + +proc _field_access_expression_get_aggregate(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _field_access_expression_set_aggregate(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _field_access_expression_get_field(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _field_access_expression_set_field(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _field_access_expression_get_length(this: Word); +begin + this := this + 12; + return this^ +end; + +proc _field_access_expression_set_length(this: Word, value: Word); +begin + this := this + 12; + this^ := value +end; + +proc _elna_tac_enumeration_value(field_access_expression: Word); +var + enumeration_type: Word; + members: Word; + members_length: Word; + token_type: Word; + value_name: Word; + name_length: Word; + member_name: Word; + member_length: Word; + counter: Word; + symbol: Word; + instruction: Word; +begin + symbol := _field_access_expression_get_aggregate(field_access_expression); + value_name := _variable_expression_get_name(symbol); + name_length := _variable_expression_get_length(symbol); + + symbol := _symbol_table_lookup(@symbol_table_global, value_name, name_length); + + enumeration_type := _type_info_get_type(symbol); + members := _enumeration_type_get_members(enumeration_type); + members_length := _enumeration_type_get_length(enumeration_type); + + _elna_lexer_read_token(@token_type); + + value_name := _field_access_expression_get_field(field_access_expression); + name_length := _field_access_expression_get_length(field_access_expression); + counter := 1; + + instruction := 0; + .elna_tac_enumeration_value_members; + if members_length > 0 then + member_name := members^; + member_length := members + 4; + member_length := member_length^; + + if _string_compare(value_name, name_length, member_name, member_length) = 0 then + members_length := members_length - 1; + members := members + 8; + counter := counter + 1; + goto elna_tac_enumeration_value_members + end; + instruction := _elna_tac_load_immediate(ElnaTacRegister.t0, counter, 0) + end; + return instruction +end; + +proc _elna_parser_field_access_expression(aggregate: Word); +var + token_kind: Word; + name: Word; + name_token: Word; + result: Word; + memory_size: Word; +begin + (* Skip dot. Read the enumeration value. *) + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + + name := _elna_lexer_global_get_start(); + name_token := _elna_lexer_global_get_end(); + name_token := name_token - name; + _elna_lexer_skip_token(); + memory_size := _field_access_expression_size(); + result := _allocate(memory_size); + + _node_set_kind(result, NodeKind.field_access_expression); + _field_access_expression_set_aggregate(result, aggregate); + _field_access_expression_set_field(result, name); + _field_access_expression_set_length(result, name_token); + + return result +end; + +proc _elna_tac_designator(parser_node: Word, symbol_table: Word, is_address: Word); +var + name_token: Word; + lookup_result: Word; + token_kind: Word; + parser_node: Word; + node_kind: Word; + first_instruction: Word; + instruction: Word; +begin + node_kind := _node_get_kind(parser_node); + + if node_kind = NodeKind.dereference_expression then + parser_node := _dereference_expression_get_pointer(parser_node); + first_instruction := _elna_tac_simple_expression(parser_node, symbol_table, is_address); + instruction := _elna_tac_load_word(ElnaTacRegister.t0, ElnaTacRegister.t0, 0); + _elna_tac_instruction_set_next(first_instruction, instruction) + elsif node_kind = NodeKind.field_access_expression then + first_instruction := _elna_tac_enumeration_value(parser_node); + is_address^ := 0 + elsif node_kind = NodeKind.call then + first_instruction := _elna_tac_call(parser_node, symbol_table); + instruction := _elna_tac_move(ElnaTacRegister.t0, ElnaTacRegister.a0); + _elna_tac_instruction_set_next(first_instruction, instruction); + is_address^ := 0 + else + first_instruction := _elna_tac_simple_expression(parser_node, symbol_table, is_address) + end; + return first_instruction +end; + +proc _assign_statement_size(); + return 16 +end; + +proc _assign_statement_get_assignee(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _assign_statement_set_assignee(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _assign_statement_get_assignment(this: Word); +begin + this := this + 12; + return this^ +end; + +proc _assign_statement_set_assignment(this: Word, value: Word); +begin + this := this + 12; + this^ := value +end; + +proc _elna_parser_assign_statement(assignee: Word); +var + statement_size: Word; + result: Word; + token_kind: Word; + assignment_node: Word; +begin + statement_size := _assign_statement_size(); + result := _allocate(statement_size); + + _node_set_kind(result, NodeKind.assign_statement); + _statement_set_next(result, 0); + _assign_statement_set_assignee(result, assignee); + + (* Skip the assignment sign (:=) with surrounding whitespaces. *) + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + assignment_node := _elna_parser_binary_expression(); + _assign_statement_set_assignment(result, assignment_node); + + return result +end; + +proc _elna_tac_assign_statement(parser_tree: Word, symbol_table: Word); +var + current_expression: Word; + is_address: Word; + first_instruction: Word; + instruction: Word; + current_instruction: Word; +begin + current_expression := _assign_statement_get_assignee(parser_tree); + first_instruction := _elna_tac_designator(current_expression, symbol_table, @is_address); + + (* Save the assignee address on the stack. *) + current_instruction := _elna_tac_store_word(ElnaTacRegister.t0, ElnaTacRegister.sp, 60); + _elna_tac_instruction_set_next(first_instruction, current_instruction); + + (* Compile the assignment. *) + current_expression := _assign_statement_get_assignment(parser_tree); + instruction := _elna_tac_binary_expression(current_expression, symbol_table); + _elna_tac_instruction_set_next(current_instruction, instruction); + + current_instruction := _elna_tac_load_word(ElnaTacRegister.t1, ElnaTacRegister.sp, 60); + _elna_tac_instruction_set_next(instruction, current_instruction); + + instruction := _elna_tac_store_word(ElnaTacRegister.t0, ElnaTacRegister.t1, 0); + _elna_tac_instruction_set_next(current_instruction, instruction); + + return first_instruction +end; + +proc _return_statement_size(); + return 12 +end; + +proc _return_statement_get_returned(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _return_statement_set_returned(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _elna_parser_return_statement(); +var + token_kind: Word; + returned: Word; + label_length: Word; + statement_size: Word; + result: Word; +begin + (* Skip "return" keyword and whitespace after it. *) + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + + returned := _elna_parser_binary_expression(); + + statement_size := _return_statement_size(); + result := _allocate(statement_size); + + _node_set_kind(result, NodeKind.return_statement); + _statement_set_next(result, 0); + _return_statement_set_returned(result, returned); + + return result +end; + +proc _elna_tac_return_statement(parser_node: Word, symbol_table: Word); +var + return_expression: Word; + first_instruction: Word; + instruction: Word; +begin + return_expression := _return_statement_get_returned(parser_node); + first_instruction := _elna_tac_binary_expression(return_expression, symbol_table); + instruction := _elna_tac_move(ElnaTacRegister.a0, ElnaTacRegister.t0); + _elna_tac_instruction_set_next(first_instruction, instruction); + return first_instruction +end; + +(** + * Writes a label, .Ln, where n is a unique number. + * + * Parameters: + * counter - Label counter. + *) +proc _write_label(counter: Word, length: Word); +var + first_byte: Word; +begin + if length = 0 then + _write_s(".L", 2); + _write_i(counter) + else + first_byte := _load_byte(counter); + if first_byte <> '.' then + _write_c('.') + end; + _write_s(counter, length) + end +end; + +proc _elna_parser_conditional_statements(); +var + conditional_size: Word; + token_kind: Word; + current_node: Word; + result: Word; +begin + conditional_size := _conditional_statements_size(); + result := _allocate(conditional_size); + + (* Skip "if", "while" or "elsif". *) + _elna_lexer_skip_token(); + + current_node := _elna_parser_binary_expression(); + _conditional_statements_set_condition(result, current_node); + + (* Skip "then" or "do". *) + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + current_node := _elna_parser_statements(); + _conditional_statements_set_statements(result, current_node); + + _conditional_statements_set_next(result, 0); + return result +end; + +proc _elna_tac_conditional_statements(parser_node: Word, after_end_label: Word, symbol_table: Word); +var + condition_label: Word; + current_node: Word; + instruction: Word; + current_instruction: Word; + first_instruction: Word; +begin + (* Compile condition. *) + current_node := _conditional_statements_get_condition(parser_node); + first_instruction := _elna_tac_binary_expression(current_node, symbol_table); + + (* condition_label is the label in front of the next elsif condition or end. *) + condition_label := label_counter; + label_counter := label_counter + 1; + + current_instruction := _elna_tac_beqz(ElnaTacRegister.t0, condition_label, 0); + _elna_tac_instruction_set_next(first_instruction, current_instruction); + + current_node := _conditional_statements_get_statements(parser_node); + instruction := _elna_tac_statements(current_node, symbol_table); + if instruction <> 0 then + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction + end; + + instruction := _elna_tac_jump(after_end_label, 0); + _elna_tac_instruction_set_next(current_instruction, instruction); + + current_instruction := _elna_tac_label(condition_label, 0); + _elna_tac_instruction_set_next(instruction, current_instruction); + + return first_instruction +end; + +(** + * Conditional statements is a list of pairs: condition and statements. + * Used for example to represent if and elsif blocks with beloning statements. + *) +proc _conditional_statements_size(); + return 12 +end; + +proc _conditional_statements_get_condition(this: Word); + return this^ +end; + +proc _conditional_statements_set_condition(this: Word, value: Word); +begin + this^ := value +end; + +proc _conditional_statements_get_statements(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _conditional_statements_set_statements(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _conditional_statements_get_next(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _conditional_statements_set_next(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _if_statement_size(); + return 16 +end; + +proc _if_statement_get_conditionals(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _if_statement_set_conditionals(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _if_statement_get_else(this: Word); +begin + this := this + 12; + return this^ +end; + +proc _if_statement_set_else(this: Word, value: Word); +begin + this := this + 12; + this^ := value +end; + +proc _elna_parser_if_statement(); +var + current_node: Word; + result: Word; + object_size: Word; + token_kind: Word; + previous_conditional: Word; + next_conditional: Word; +begin + object_size := _if_statement_size(); + result := _allocate(object_size); + + _node_set_kind(result, NodeKind.if_statement); + _statement_set_next(result, 0); + + previous_conditional := _elna_parser_conditional_statements(); + _if_statement_set_conditionals(result, previous_conditional); + + .elna_parser_if_statement_loop; + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind._elsif then + next_conditional := _elna_parser_conditional_statements(); + _conditional_statements_set_next(previous_conditional, next_conditional); + previous_conditional = next_conditional; + + goto elna_parser_if_statement_loop + elsif token_kind = ElnaLexerKind._else then + _elna_lexer_skip_token(); + + current_node := _elna_parser_statements(); + _if_statement_set_else(result, current_node) + else + _if_statement_set_else(result, 0) + end; + _elna_lexer_skip_token(); + + return result +end; + +proc _elna_parser_statement(); +var + token_kind: Word; + result : Word; +begin + result := 0; + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind._goto then + result := _elna_parser_goto_statement() + elsif token_kind = ElnaLexerKind._if then + result := _elna_parser_if_statement() + elsif token_kind = ElnaLexerKind._return then + result := _elna_parser_return_statement() + elsif token_kind = ElnaLexerKind.dot then + result := _elna_parser_label_declaration() + elsif token_kind = ElnaLexerKind.identifier then + result := _elna_parser_designator(); + + if _node_get_kind(result) <> NodeKind.call then + result := _elna_parser_assign_statement(result) + end + end; + return result +end; + +proc _elna_parser_statements(); +var + token_kind: Word; + previous_statement: Word; + next_statement: Word; + first_statement: Word; +begin + _skip_empty_lines(); + + first_statement := _elna_parser_statement(); + previous_statement := first_statement; + if previous_statement = 0 then + goto elna_parser_statements_end + end; + + .elna_parser_statement_loop; + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.semicolon then + _elna_lexer_skip_token(); + _skip_empty_lines(); + next_statement := _elna_parser_statement(); + _statement_set_next(previous_statement, next_statement); + previous_statement := next_statement; + + if previous_statement <> 0 then + goto elna_parser_statement_loop + end + end; + .elna_parser_statements_end; + _skip_empty_lines(); + + return first_statement +end; + +proc _elna_tac_statements(parser_node: Word, symbol_table: Word); +var + current_statement: Word; + instruction: Word; + first_instruction: Word; + current_instruction: Word; +begin + current_statement := parser_node; + first_instruction := 0; + + .elna_tac_statements_loop; + if current_statement <> 0 then + instruction := _elna_tac_statement(current_statement, symbol_table); + current_statement := _statement_get_next(current_statement); + if instruction = 0 then + goto elna_tac_statements_loop + end; + if first_instruction = 0 then + first_instruction := instruction + else + _elna_tac_instruction_set_next(current_instruction, instruction) + end; + current_instruction := instruction; + goto elna_tac_statements_loop + end; + return first_instruction +end; + +proc _elna_tac_if_statement(parser_node: Word, symbol_table: Word); +var + current_node: Word; + after_end_label: Word; + condition_label: Word; + first_instruction: Word; + instruction: Word; + current_instruction: Word; +begin + after_end_label := label_counter; + label_counter := label_counter + 1; + + current_node := _if_statement_get_conditionals(parser_node); + first_instruction := _elna_tac_conditional_statements(current_node, after_end_label, symbol_table); + current_instruction := first_instruction; + + .elna_tac_if_statement_loop; + current_node := _conditional_statements_get_next(current_node); + if current_node <> 0 then + instruction := _elna_tac_conditional_statements(current_node, after_end_label, symbol_table); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + goto elna_tac_if_statement_loop + end; + current_node := _if_statement_get_else(parser_node); + + if current_node <> 0 then + instruction := _elna_tac_statements(current_node, symbol_table); + if instruction <> 0 then + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction + end + end; + instruction := _elna_tac_label(after_end_label, 0); + _elna_tac_instruction_set_next(current_instruction, instruction); + + return first_instruction +end; + +proc _elna_tac_statement(parser_node: Word, symbol_table: Word); +var + statement_kind: Word; + instruction: Word; +begin + statement_kind := _node_get_kind(parser_node); + + if statement_kind = NodeKind.goto_statement then + instruction := _elna_tac_goto_statement(parser_node) + elsif statement_kind = NodeKind.if_statement then + instruction := _elna_tac_if_statement(parser_node, symbol_table) + elsif statement_kind = NodeKind.return_statement then + instruction := _elna_tac_return_statement(parser_node, symbol_table) + elsif statement_kind = NodeKind.label_declaration then + instruction := _elna_tac_label_declaration(parser_node) + elsif statement_kind = NodeKind.call then + instruction := _elna_tac_call(parser_node, symbol_table) + elsif statement_kind = NodeKind.assign_statement then + instruction := _elna_tac_assign_statement(parser_node, symbol_table) + else + instruction := 0 + end; + return instruction +end; + +(** + * Writes a regster name to the standard output. + * + * Parameters: + * register_character - Register character. + * register_number - Register number. + *) +proc _write_register(register_character: Word, register_number: Word); +begin + _write_c(register_character); + _write_c(register_number + '0') +end; + +proc _type_get_kind(this: Word); + return this^ +end; + +proc _type_set_kind(this: Word, value: Word); +begin + this^ := value +end; + +proc _type_get_size(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _type_set_size(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _enumeration_type_size(); + return 16 +end; + +proc _enumeration_type_get_members(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _enumeration_type_set_members(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _enumeration_type_get_length(this: Word); +begin + this := this + 12; + return this^ +end; + +proc _enumeration_type_set_length(this: Word, value: Word); +begin + this := this + 12; + this^ := value +end; + +proc _enumeration_type_expression_size(); + return 12 +end; + +proc _enumeration_type_expression_get_members(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _enumeration_type_expression_set_members(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _enumeration_type_expression_get_length(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _enumeration_type_expression_set_length(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _record_type_size(); + return 16 +end; + +proc _record_type_get_members(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _record_type_set_members(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _record_type_get_length(this: Word); +begin + this := this + 12; + return this^ +end; + +proc _record_type_set_length(this: Word, value: Word); +begin + this := this + 12; + this^ := value +end; + +proc _record_type_expression_size(); + return 12 +end; + +proc _record_type_expression_get_members(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _record_type_expression_set_members(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _record_type_expression_get_length(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _record_type_expression_set_length(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _named_type_expression_size(); + return 12 +end; + +proc _named_type_expression_get_name(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _named_type_expression_set_name(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _named_type_expression_get_length(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _named_type_expression_set_length(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _elna_parser_record_type_expression(); +var + entry: Word; + member_count: Word; + memory_start: Word; + field_name: Word; + field_length: Word; + field_type: Word; + token_kind: Word; + type_expression_size: Word; + result: Word; + previous_entry: Word; +begin + _elna_lexer_skip_token(); + member_count := 0; + memory_start := 0; + + _elna_lexer_read_token(@token_kind); + if token_kind = ElnaLexerKind._end then + goto elna_parser_record_type_expression_end + end; + .elna_parser_record_type_expression_loop; + entry := _allocate(16); + member_count := member_count + 1; + + field_name := _elna_lexer_global_get_start(); + field_length := _elna_lexer_global_get_end() - field_name; + + entry^ := field_name; + entry := entry + 4; + + entry^ := field_length; + entry := entry + 4; + + (* Skip the identifier. *) + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + field_type := _elna_parser_type_expression(); + + entry^ := field_type; + entry := entry + 4; + + entry^ := 0; + if memory_start = 0 then + memory_start := entry - 12 + else + previous_entry^ := entry - 12 + end; + previous_entry := entry; + + _elna_lexer_read_token(@token_kind); + if token_kind = ElnaLexerKind.semicolon then + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + goto elna_parser_record_type_expression_loop + end; + + .elna_parser_record_type_expression_end; + _elna_lexer_skip_token(); + + type_expression_size := _enumeration_type_expression_size(); + result := _allocate(type_expression_size); + + _node_set_kind(result, NodeKind.record_type_expression); + _record_type_expression_set_members(result, memory_start); + _record_type_expression_set_length(result, member_count); + + return result +end; + +proc _elna_parser_enumeration_type_expression(); +var + token_kind: Word; + enumeration_name: Word; + name_length: Word; + memory_start: Word; + member_count: Word; + result: Word; + type_expression_size: Word; + entry: Word; + previous_entry: Word; +begin + _elna_lexer_skip_token(); + memory_start := 0; + member_count := 0; + + _elna_lexer_read_token(@token_kind); + if token_kind = ElnaLexerKind.right_paren then + goto elna_parser_enumeration_type_expression_end + end; + .elna_parser_enumeration_type_expression_loop; + entry := _allocate(12); + member_count := member_count + 1; + + enumeration_name := _elna_lexer_global_get_start(); + name_length := _elna_lexer_global_get_end() - enumeration_name; + + entry^ := enumeration_name; + entry := entry + 4; + + entry^ := name_length; + entry := entry + 4; + + entry^ := 0; + if memory_start = 0 then + memory_start := entry - 8 + else + previous_entry^ := entry - 8 + end; + previous_entry := entry; + + (* Skip the identifier. *) + _elna_lexer_skip_token(); + + _elna_lexer_read_token(@token_kind); + if token_kind = ElnaLexerKind.comma then + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + goto elna_parser_enumeration_type_expression_loop + end; + + .elna_parser_enumeration_type_expression_end; + _elna_lexer_skip_token(); + + type_expression_size := _enumeration_type_expression_size(); + result := _allocate(type_expression_size); + + _node_set_kind(result, NodeKind.enumeration_type_expression); + _enumeration_type_expression_set_members(result, memory_start); + _enumeration_type_expression_set_length(result, member_count); + + return result +end; + +(** + * Reads and creates enumeration type representation. + * + * record + * type_kind: Word; + * size: Word; + * members: StringArray; + * length: Word + * end; + * + * Returns enumeration type description. + *) +proc _elna_name_type_enumeration(parser_node: Word); +var + result: Word; + memory_start: Word; + member_count: Word; + member_array_size: Word; + member_array_start: Word; + member_array_current: Word; +begin + member_array_size := _enumeration_type_size(); + result := _allocate(member_array_size); + + memory_start := _enumeration_type_expression_get_members(parser_node); + member_count := _enumeration_type_expression_get_length(parser_node); + + (* Copy the list of enumeration members into an array of strings. *) + member_array_size := member_count * 8; + member_array_start := _allocate(member_array_size); + member_array_current := member_array_start; + + .elna_name_type_enumeration_loop; + if member_count > 0 then + member_array_current^ := memory_start^; + member_array_current := member_array_current + 4; + memory_start := memory_start + 4; + + member_array_current^ := memory_start^; + member_array_current := member_array_current + 4; + memory_start := memory_start + 4; + + memory_start := memory_start^; + member_count := member_count - 1; + goto elna_name_type_enumeration_loop + end; + member_count := _enumeration_type_expression_get_length(parser_node); + + _type_set_kind(result, TypeKind.enumeration); + _type_set_size(result, 4); + _enumeration_type_set_members(result, member_array_start); + _enumeration_type_set_length(result, member_count); + + return _type_info_create(result) +end; + +proc _elna_name_type_record(parser_node: Word); +var + result: Word; + memory_start: Word; + member_count: Word; + member_array_size: Word; + member_array_start: Word; + member_array_current: Word; +begin + member_array_size := _record_type_size(); + result := _allocate(member_array_size); + + memory_start := _record_type_expression_get_members(parser_node); + member_count := _record_type_expression_get_length(parser_node); + + member_array_size := member_count * 12; + member_array_start := _allocate(member_array_size); + member_array_current := member_array_start; + + .elna_name_type_record_loop; + if member_count > 0 then + member_array_current^ := memory_start^; + member_array_current := member_array_current + 4; + memory_start := memory_start + 4; + + member_array_current^ := memory_start^; + member_array_current := member_array_current + 4; + memory_start := memory_start + 4; + + member_array_current^ := _elna_name_type_expression(memory_start^); + member_array_current := member_array_current + 4; + memory_start := memory_start + 4; + + memory_start := memory_start^; + member_count := member_count - 1; + goto elna_name_type_record_loop + end; + member_count := _record_type_expression_get_length(parser_node); + + _type_set_kind(result, TypeKind._record); + _type_set_size(result, member_count * 4); + _record_type_set_members(result, member_array_start); + _record_type_set_length(result, member_count); + + return _type_info_create(result) +end; + +proc _elna_parser_named_type_expression(); +var + type_expression_size: Word; + result: Word; + type_name: Word; + name_length: Word; +begin + type_expression_size := _named_type_expression_size(); + result := _allocate(type_expression_size); + + _node_set_kind(result, NodeKind.named_type_expression); + type_name := _elna_lexer_global_get_start(); + name_length := _elna_lexer_global_get_end() - type_name; + _named_type_expression_set_name(result, type_name); + _named_type_expression_set_length(result, name_length); + _elna_lexer_skip_token(); + + return result +end; + +proc _elna_parser_type_expression(); +var + token_kind: Word; + result: Word; +begin + result := 0; + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.identifier then + result := _elna_parser_named_type_expression() + elsif token_kind = ElnaLexerKind.left_paren then + result := _elna_parser_enumeration_type_expression() + elsif token_kind = ElnaLexerKind._record then + result := _elna_parser_record_type_expression() + end; + return result +end; + +proc _elna_name_type_expression(parser_node: Word); +var + token_kind: Word; + type_name: Word; + name_length: Word; + result: Word; +begin + token_kind := _node_get_kind(parser_node); + + if token_kind = NodeKind.named_type_expression then + type_name := _named_type_expression_get_name(parser_node); + name_length := _named_type_expression_get_length(parser_node); + + result := _symbol_table_lookup(@symbol_table_global, type_name, name_length); + result := _type_info_get_type(result) + elsif token_kind = NodeKind.enumeration_type_expression then + result := _elna_name_type_enumeration(parser_node) + elsif token_kind = NodeKind.record_type_expression then + result := _elna_name_type_record(parser_node) + end; + + return result +end; + +proc _type_info_get_type(this: Word); +begin + this := this + 4; + return this^ +end; + +(** + * Parameters: + * parameter_index - Parameter index. + *) +proc _parameter_info_create(parameter_index: Word); +var + offset: Word; + current_word: Word; + result: Word; +begin + result := _allocate(8); + current_word := result; + current_word^ := InfoKind.parameter_info; + + current_word := current_word + 4; + + (* Calculate the stack offset: 88 - (4 * parameter_counter) *) + offset := parameter_index * 4; + current_word^ := 88 - offset; + + return result +end; + +proc _parameter_info_get_offset(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _type_info_create(type_representation: Word); +var + result: Word; + current_word: Word; +begin + result := _allocate(8); + current_word := result; + current_word^ := InfoKind.type_info; + + current_word := current_word + 4; + current_word^ := type_representation; + + return result +end; + +(** + * Parameters: + * temporary_index - Parameter index. + *) +proc _temporary_info_create(temporary_index: Word); +var + offset: Word; + current_word: Word; + result: Word; +begin + result := _allocate(8); + current_word := result; + current_word^ := InfoKind.temporary_info; + current_word := current_word + 4; + + (* Calculate the stack offset: 4 * variable_counter. *) + current_word^ := temporary_index * 4; + + return result +end; + +(** + * Parameters: + * symbol_table - Local symbol table. + *) +proc _procedure_info_create(symbol_table: Word); +var + current_word: Word; + result: Word; +begin + result := _allocate(8); + current_word := result; + current_word^ := InfoKind.procedure_info; + current_word := current_word + 4; + + current_word^ := symbol_table; + + return result +end; + +proc _procedure_info_get_symbol_table(this: Word); +begin + this := this + 4; + return this^ +end; + +(** + * Parameters: + * parameter_index - Parameter index. + *) +proc _elna_name_procedure_parameter(parser_node: Word, parameter_index: Word, symbol_table: Word); +var + name_length: Word; + info: Word; + name_position: Word; +begin + name_position := _declaration_get_name(parser_node); + name_length := _declaration_get_length(parser_node); + + info := _parameter_info_create(parameter_index); + _symbol_table_enter(symbol_table, name_position, name_length, info) +end; + +(** + * Parameters: + * variable_index - Variable index. + *) +proc _elna_name_procedure_temporary(parser_node: Word, variable_index: Word, symbol_table: Word); +var + name_length: Word; + info: Word; + name_position: Word; +begin + name_position := _declaration_get_name(parser_node); + name_length := _declaration_get_length(parser_node); + + info := _temporary_info_create(variable_index); + _symbol_table_enter(symbol_table, name_position, name_length, info) +end; + +proc _elna_name_procedure_temporaries(parser_node: Word, symbol_table: Word); +var + temporary_counter: Word; +begin + temporary_counter := 0; + + .elna_name_procedure_temporaries_loop; + if parser_node <> 0 then + _elna_name_procedure_temporary(parser_node, temporary_counter, symbol_table); + + temporary_counter := temporary_counter + 1; + parser_node := _declaration_get_next(parser_node); + goto elna_name_procedure_temporaries_loop + end +end; + +proc _declaration_get_next(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _declaration_set_next(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _declaration_get_name(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _declaration_set_name(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _declaration_get_length(this: Word); +begin + this := this + 12; + return this^ +end; + +proc _declaration_set_length(this: Word, value: Word); +begin + this := this + 12; + this^ := value +end; + +(* Kind + next declaration pointer + argument list + procedure name + statement list pointer + temporary list pointer. *) +proc _procedure_declaration_size(); + return 28 +end; + +proc _procedure_declaration_get_body(this: Word); +begin + this := this + 16; + return this^ +end; + +proc _procedure_declaration_set_body(this: Word, value: Word); +begin + this := this + 16; + this^ := value +end; + +proc _procedure_declaration_get_temporaries(this: Word); +begin + this := this + 20; + return this^ +end; + +proc _procedure_declaration_set_temporaries(this: Word, value: Word); +begin + this := this + 20; + this^ := value +end; + +proc _procedure_declaration_get_parameters(this: Word); +begin + this := this + 24; + return this^ +end; + +proc _procedure_declaration_set_parameters(this: Word, value: Word); +begin + this := this + 24; + this^ := value +end; + +proc _elna_parser_procedure_declaration(); +var + name_pointer: Word; + name_length: Word; + token_kind: Word; + result: Word; + declaration_size: Word; + parameter_head: Word; +begin + declaration_size := _procedure_declaration_size(); + result := _allocate(declaration_size); + + _node_set_kind(result, NodeKind.procedure_declaration); + _declaration_set_next(result, 0); + + (* Skip "proc ". *) + _elna_lexer_skip_token(); + + _elna_lexer_read_token(@token_kind); + name_pointer := _elna_lexer_global_get_start(); + name_length := _elna_lexer_global_get_end() - name_pointer; + + _declaration_set_name(result, name_pointer); + _declaration_set_length(result, name_length); + (* Skip procedure name. *) + _elna_lexer_skip_token(); + + (* Skip open paren. *) + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + parameter_head := 0; + + .elna_parser_procedure_declaration_parameter; + _elna_lexer_read_token(@token_kind); + + if token_kind <> ElnaLexerKind.right_paren then + name_pointer := _elna_parser_variable_declaration(); + if parameter_head = 0 then + parameter_head := name_pointer + else + _declaration_set_next(name_length, name_pointer) + end; + name_length := name_pointer; + + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.comma then + _elna_lexer_skip_token(); + goto elna_parser_procedure_declaration_parameter + end + end; + (* Skip close paren. *) + _elna_lexer_skip_token(); + _procedure_declaration_set_parameters(result, parameter_head); + + (* Skip semicolon and newline. *) + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + parameter_head := _elna_parser_var_part(); + _procedure_declaration_set_temporaries(result, parameter_head); + + (* Skip semicolon, "begin" and newline. *) + _elna_lexer_read_token(@token_kind); + if token_kind = ElnaLexerKind._begin then + _elna_lexer_skip_token(); + parameter_head := _elna_parser_statements() + elsif token_kind = ElnaLexerKind._return then + parameter_head := _elna_parser_return_statement() + end; + _procedure_declaration_set_body(result, parameter_head); + + (* Skip the "end" keyword. *) + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + return result +end; + +proc _elna_tac_parameters(current_parameter: Word, new_symbol_table: Word); +var + name_pointer: Word; + name_length: Word; + parameter_counter: Word; + instruction: Word; + first_instruction: Word; + current_instruction: Word; + symbol_info: Word; +begin + first_instruction := 0; + parameter_counter := 0; + + .elna_tac_parameters_loop; + if current_parameter <> 0 then + name_pointer := _declaration_get_name(current_parameter); + name_length := _declaration_get_length(current_parameter); + symbol_info := _symbol_table_lookup(new_symbol_table, name_pointer, name_length); + + symbol_info := _parameter_info_get_offset(symbol_info); + + instruction := _elna_tac_store_word(ElnaTacRegister.a0 + parameter_counter, + ElnaTacRegister.sp, symbol_info); + if first_instruction = 0 then + first_instruction := instruction + else + _elna_tac_instruction_set_next(current_instruction, instruction) + end; + current_instruction := instruction; + + parameter_counter := parameter_counter + 1; + + current_parameter := _declaration_get_next(current_parameter); + goto elna_tac_parameters_loop + end; + return first_instruction +end; + +proc _elna_tac_procedure_declaration(parser_node: Word); +var + name_pointer: Word; + name_length: Word; + current_parameter: Word; + body: Word; + new_symbol_table: Word; + symbol_info: Word; + instruction: Word; + first_instruction: Word; + result: Word; + result_size: Word; +begin + result_size := _elna_tac_declaration_size(); + result := _allocate(result_size); + + _elna_tac_declaration_set_next(result, 0); + + name_pointer := _declaration_get_name(parser_node); + name_length := _declaration_get_length(parser_node); + + _elna_tac_declaration_set_name(result, name_pointer); + _elna_tac_declaration_set_length(result, name_length); + + symbol_info := _symbol_table_lookup(@symbol_table_global, name_pointer, name_length); + new_symbol_table := _procedure_info_get_symbol_table(symbol_info); + + (* Write the prologue. *) + first_instruction := _elna_tac_instruction_create(ElnaTacOperator.start); + + current_parameter := _procedure_declaration_get_parameters(parser_node); + current_parameter := _elna_tac_parameters(current_parameter, new_symbol_table); + _elna_tac_instruction_set_next(first_instruction, current_parameter); + + body := _procedure_declaration_get_body(parser_node); + instruction := _elna_tac_statements(body, new_symbol_table); + _elna_tac_instruction_set_next(first_instruction, instruction); + + (* Write the epilogue. *) + instruction := _elna_tac_instruction_create(ElnaTacOperator.ret); + _elna_tac_instruction_set_next(first_instruction, instruction); + + _elna_tac_declaration_set_body(result, first_instruction); + + return result +end; + +proc _elna_parser_procedures(); +var + parser_node: Word; + result: Word; + current_declaration: Word; + token_kind: Word; +begin + result := 0; + + .elna_parser_procedures_loop; + _skip_empty_lines(); + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind._proc then + parser_node := _elna_parser_procedure_declaration(); + if result = 0 then + result := parser_node + else + _declaration_set_next(current_declaration, parser_node) + end; + current_declaration := parser_node; + + (* Skip semicolon. *) + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + goto elna_parser_procedures_loop + end; + return result +end; + +proc _elna_tac_procedures(parser_node: Word); +var + result: Word; + current_procedure: Word; + first_procedure: Word; +begin + first_procedure := 0; + + .elna_tac_procedures_loop; + if parser_node = 0 then + goto elna_tac_procedures_end + end; + result := _elna_tac_procedure_declaration(parser_node); + if first_procedure = 0 then + first_procedure := result + else + _elna_tac_declaration_set_next(current_procedure, result) + end; + current_procedure := result; + + parser_node := _declaration_get_next(parser_node); + goto elna_tac_procedures_loop; + + .elna_tac_procedures_end; + return first_procedure +end; + +(** + * Skips comments. + *) +proc _skip_empty_lines(); +var + token_kind: Word; +begin + .skip_empty_lines_rerun; + + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.comment then + _elna_lexer_skip_token(); + goto skip_empty_lines_rerun + end +end; + + +proc _type_declaration_size(); + return 20 +end; + +proc _type_declaration_get_type(this: Word); +begin + this := this + 16; + return this^ +end; + +proc _type_declaration_set_type(this: Word, value: Word); +begin + this := this + 16; + this^ := value +end; + +proc _elna_parser_type_declaration(); +var + token_kind: Word; + type_name: Word; + name_length: Word; + parser_node: Word; + result: Word; + declaration_size: Word; +begin + _elna_lexer_read_token(@token_kind); + type_name := _elna_lexer_global_get_start(); + name_length := _elna_lexer_global_get_end() - type_name; + + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + parser_node := _elna_parser_type_expression(); + declaration_size := _type_declaration_size(); + result := _allocate(declaration_size); + + _node_set_kind(result, NodeKind.type_declaration); + _declaration_set_next(result, 0); + _declaration_set_name(result, type_name); + _declaration_set_length(result, name_length); + _type_declaration_set_type(result, parser_node); + + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + return result +end; + +proc _elna_name_type_declaration(parser_node: Word); +var + type_name: Word; + name_length: Word; + type_info: Word; +begin + type_name := _declaration_get_name(parser_node); + name_length := _declaration_get_length(parser_node); + + parser_node := _type_declaration_get_type(parser_node); + type_info := _elna_name_type_expression(parser_node); + + _symbol_table_enter(@symbol_table_global, type_name, name_length, type_info) +end; + +proc _elna_type_type_declaration(parser_node: Word); +begin +end; + +proc _elna_parser_type_part(); +var + token_kind: Word; + parser_node: Word; + result: Word; + current_declaration: Word; +begin + result := 0; + _skip_empty_lines(); + _elna_lexer_read_token(@token_kind); + + if token_kind <> ElnaLexerKind._type then + goto elna_parser_type_part_end + end; + _elna_lexer_skip_token(); + + .elna_parser_type_part_loop; + _skip_empty_lines(); + + _elna_lexer_read_token(@token_kind); + if token_kind = ElnaLexerKind.identifier then + parser_node := _elna_parser_type_declaration(); + + if result = 0 then + result := parser_node + else + _declaration_set_next(current_declaration, parser_node) + end; + current_declaration := parser_node; + goto elna_parser_type_part_loop + end; + + .elna_parser_type_part_end; + return result +end; + +proc _variable_declaration_size(); + return 20 +end; + +proc _variable_declaration_get_type(this: Word); +begin + this := this + 16; + return this^ +end; + +proc _variable_declaration_set_type(this: Word, value: Word); +begin + this := this + 16; + this^ := value +end; + +proc _elna_parser_variable_declaration(); +var + token_kind: Word; + name: Word; + name_length: Word; + variable_type: Word; + result: Word; + declaration_size: Word; +begin + _elna_lexer_read_token(@token_kind); + + name := _elna_lexer_global_get_start(); + name_length := _elna_lexer_global_get_end() - name; + + (* Skip the variable name and colon with the type. *) + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + variable_type := _elna_parser_type_expression(); + + declaration_size := _variable_declaration_size(); + result := _allocate(declaration_size); + + _node_set_kind(result, NodeKind.variable_declaration); + _declaration_set_next(result, 0); + _declaration_set_name(result, name); + _declaration_set_length(result, name_length); + _variable_declaration_set_type(result, variable_type); + + return result +end; + +proc _elna_tac_variable_declaration(parser_tree: Word); +var + name: Word; + name_length: Word; + variable_type: Word; + result: Word; + result_size: Word; +begin + result_size := _elna_tac_declaration_size(); + result := _allocate(result_size); + + _elna_tac_declaration_set_next(result, 0); + + name := _declaration_get_name(parser_tree); + name_length := _declaration_get_length(parser_tree); + variable_type := _variable_declaration_get_type(parser_tree); + + _elna_tac_declaration_set_name(result, name); + _elna_tac_declaration_set_length(result, name_length); + + name := _named_type_expression_get_name(variable_type); + name_length := _named_type_expression_get_length(variable_type); + + if _string_compare("Array", 5, name, name_length) then + (* Else we assume this is a zeroed 4096 bytes big array. *) + _elna_tac_declaration_set_body(result, 4096) + else + _elna_tac_declaration_set_body(result, 4) + end; + return result +end; + +proc _elna_tac_type_field(name_pointer: Word, name_length: Word, field_pointer: Word, field_offset: Word); +var + result_size: Word; + first_result: Word; + second_result: Word; + new_name: Word; + new_length: Word; + field_length: Word; + instruction: Word; + name_target: Word; + next_instruction: Word; +begin + result_size := _elna_tac_declaration_size(); + field_length := field_pointer + 4; + field_length := field_length^; + new_length := field_length + name_length; + new_length := new_length + 5; + + first_result := _allocate(result_size); + _elna_tac_declaration_set_next(first_result, 0); + + new_name := _allocate(new_length); + + name_target := new_name; + _memcpy(name_target, name_pointer, name_length); + name_target := name_target + name_length; + _memcpy(name_target, "_get_", 5); + name_target := name_target + 5; + _memcpy(name_target, field_pointer^, field_length); + + _elna_tac_declaration_set_name(first_result, new_name); + _elna_tac_declaration_set_length(first_result, new_length); + + instruction := _elna_tac_add_immediate(ElnaTacRegister.a0, ElnaTacRegister.a0, field_offset, 0); + next_instruction := _elna_tac_load_word(ElnaTacRegister.a0, ElnaTacRegister.a0, 0); + _elna_tac_instruction_set_next(instruction, next_instruction); + _elna_tac_declaration_set_body(first_result, instruction); + + second_result := _allocate(result_size); + _elna_tac_declaration_set_next(second_result, 0); + + new_name := _allocate(new_length); + + name_target := new_name; + _memcpy(name_target, name_pointer, name_length); + name_target := name_target + name_length; + _memcpy(name_target, "_set_", 5); + name_target := name_target + 5; + _memcpy(name_target, field_pointer^, field_length); + + _elna_tac_declaration_set_name(second_result, new_name); + _elna_tac_declaration_set_length(second_result, new_length); + + instruction := _elna_tac_add_immediate(ElnaTacRegister.a0, ElnaTacRegister.a0, field_offset, 0); + next_instruction := _elna_tac_store_word(ElnaTacRegister.a1, ElnaTacRegister.a0, 0); + _elna_tac_instruction_set_next(instruction, next_instruction); + _elna_tac_declaration_set_body(second_result, instruction); + + _elna_tac_declaration_set_next(first_result, second_result); + + return first_result +end; + +proc _elna_tac_type_record(name_pointer: Word, name_length: Word, type_representation: Word, current_result: Word); +var + result_size: Word; + first_result: Word; + result: Word; + type_size: Word; + new_name: Word; + new_length: Word; + instruction: Word; + field_count: Word; + field_offset: Word; + field_pointer: Word; +begin + result_size := _elna_tac_declaration_size(); + first_result := _allocate(result_size); + result := 0; + + (* Debug. Error stream output. + _syscall(2, name_pointer, name_length, 0, 0, 0, 64); *) + + type_size := _type_get_size(type_representation); + new_length := name_length + 5; + new_name := _allocate(new_length); + + _memcpy(new_name, name_pointer, name_length); + _memcpy(new_name + name_length, "_size", 5); + + _elna_tac_declaration_set_name(first_result, new_name); + _elna_tac_declaration_set_length(first_result, new_length); + + instruction := _elna_tac_load_immediate(ElnaTacRegister.a0, type_size, 0); + _elna_tac_declaration_set_body(first_result, instruction); + + field_count := _record_type_get_length(type_representation); + field_pointer := _record_type_get_members(type_representation); + field_offset := 0; + current_result^ := first_result; + + .elna_tac_type_record_fields; + if field_count > 0 then + result := _elna_tac_type_field(name_pointer, name_length, field_pointer, field_offset); + + _elna_tac_declaration_set_next(current_result^, result); + current_result^ := _elna_tac_declaration_get_next(result); + + field_offset := field_offset + 4; + field_count := field_count - 1; + field_pointer := field_pointer + 12; + goto elna_tac_type_record_fields + end; + + return first_result +end; + +proc _elna_tac_type_part(parser_node: Word); +var + name_pointer: Word; + name_length: Word; + result: Word; + first_result: Word; + symbol: Word; + info_type: Word; + type_kind: Word; + current_result: Word; + out_result: Word; +begin + first_result := 0; + + .elna_tac_type_part_loop; + if parser_node = 0 then + goto elna_tac_type_part_end + end; + + name_pointer := _declaration_get_name(parser_node); + name_length := _declaration_get_length(parser_node); + symbol := _symbol_table_lookup(@symbol_table_global, name_pointer, name_length); + + info_type := _type_info_get_type(symbol); + type_kind := _type_get_kind(info_type); + + if type_kind = TypeKind._record then + result := _elna_tac_type_record(name_pointer, name_length, info_type, @out_result) + else + result := 0; + out_result := 0 + end; + if first_result = 0 then + first_result := result; + current_result := out_result + elsif result <> 0 then + _elna_tac_declaration_set_next(current_result, result); + current_result := out_result + end; + parser_node := _declaration_get_next(parser_node); + goto elna_tac_type_part_loop; + + .elna_tac_type_part_end; + return first_result +end; + +proc _elna_parser_var_part(); +var + result: Word; + token_kind: Word; + variable_node: Word; + current_declaration: Word; +begin + result := 0; + _elna_lexer_read_token(@token_kind); + + if token_kind <> ElnaLexerKind._var then + goto elna_parser_var_part_end + end; + (* Skip "var". *) + _elna_lexer_skip_token(); + + .elna_parser_var_part_loop; + _skip_empty_lines(); + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.identifier then + variable_node := _elna_parser_variable_declaration(); + + (* Skip semicolon. *) + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + if result = 0 then + result := variable_node + else + _declaration_set_next(current_declaration, variable_node) + end; + current_declaration := variable_node; + goto elna_parser_var_part_loop + end; + + .elna_parser_var_part_end; + return result +end; + +proc _elna_tac_var_part(parser_node: Word); +var + node: Word; + current_variable: Word; + first_variable: Word; +begin + first_variable := 0; + if parser_node = 0 then + goto elna_tac_var_part_end + end; + + .elna_tac_var_part_loop; + node := _elna_tac_variable_declaration(parser_node); + if first_variable = 0 then + first_variable := node + else + _elna_tac_declaration_set_next(current_variable, node) + end; + current_variable := node; + + parser_node := _declaration_get_next(parser_node); + if parser_node <> 0 then + goto elna_tac_var_part_loop + end; + + .elna_tac_var_part_end; + return first_variable +end; + +proc _module_declaration_size(); + return 16 +end; + +proc _module_declaration_get_types(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _module_declaration_set_types(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _module_declaration_get_globals(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _module_declaration_set_globals(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _module_declaration_get_procedures(this: Word); +begin + this := this + 12; + return this^ +end; + +proc _module_declaration_set_procedures(this: Word, value: Word); +begin + this := this + 12; + this^ := value +end; + +proc _elna_parser_module_declaration(); +var + parser_node: Word; + declaration_size: Word; + result: Word; +begin + declaration_size := _module_declaration_size(); + result := _allocate(declaration_size); + + _node_set_kind(result, NodeKind.module_declaration); + + parser_node := _elna_parser_type_part(); + _module_declaration_set_types(result, parser_node); + + parser_node := _elna_parser_var_part(); + _module_declaration_set_globals(result, parser_node); + + parser_node := _elna_parser_procedures(); + _module_declaration_set_procedures(result, parser_node); + + return result +end; + +(** + * Process the source code and print the generated code. + *) +proc _elna_tac_module_declaration(parser_node: Word); +var + data_part: Word; + code_part: Word; + type_part: Word; + current_declaration: Word; + next_declaration: Word; +begin + type_part := _module_declaration_get_types(parser_node); + type_part := _elna_tac_type_part(type_part); + + data_part := _module_declaration_get_globals(parser_node); + data_part := _elna_tac_var_part(data_part); + + code_part := _module_declaration_get_procedures(parser_node); + code_part := _elna_tac_procedures(code_part); + + current_declaration := code_part; + + .elna_tac_module_declaration_types; + next_declaration := _elna_tac_declaration_get_next(current_declaration); + if next_declaration <> 0 then + current_declaration := next_declaration; + + goto elna_tac_module_declaration_types + end; + _elna_tac_declaration_set_next(current_declaration, type_part); + + return _elna_tac_module_create(data_part, code_part) +end; + +proc _elna_name_procedure_declaration(parser_node: Word); +var + name_pointer: Word; + name_length: Word; + new_symbol_table: Word; + parameter_counter: Word; + symbol_info: Word; + current_parameter: Word; +begin + new_symbol_table := _symbol_table_create(); + symbol_info := _procedure_info_create(new_symbol_table); + + name_pointer := _declaration_get_name(parser_node); + name_length := _declaration_get_length(parser_node); + + current_parameter := _procedure_declaration_get_parameters(parser_node); + parameter_counter := 0; + .elna_name_procedure_declaration_parameter; + if current_parameter <> 0 then + _elna_name_procedure_parameter(current_parameter, parameter_counter, new_symbol_table); + parameter_counter := parameter_counter + 1; + + current_parameter := _declaration_get_next(current_parameter); + goto elna_name_procedure_declaration_parameter + end; + current_parameter := _procedure_declaration_get_temporaries(parser_node); + _elna_name_procedure_temporaries(current_parameter, new_symbol_table); + + _symbol_table_enter(@symbol_table_global, name_pointer, name_length, symbol_info) +end; + +proc _elna_type_procedure_declaration(parser_node: Word); +begin +end; + +proc _elna_name_module_declaration(parser_node: Word); +var + current_part: Word; + result: Word; +begin + current_part := _module_declaration_get_types(parser_node); + .elna_name_module_declaration_type; + if current_part <> 0 then + _elna_name_type_declaration(current_part); + current_part := _declaration_get_next(current_part); + + goto elna_name_module_declaration_type + end; + + current_part := _module_declaration_get_procedures(parser_node); + .elna_name_module_declaration_procedure; + if current_part <> 0 then + _elna_name_procedure_declaration(current_part); + current_part := _declaration_get_next(current_part); + + goto elna_name_module_declaration_procedure + end +end; + +proc _elna_type_module_declaration(parser_node: Word); +var + current_part: Word; +begin + current_part := _module_declaration_get_types(parser_node); + .elna_type_module_declaration_type; + if current_part <> 0 then + _elna_type_type_declaration(current_part); + current_part := _declaration_get_next(current_part); + + goto elna_type_module_declaration_type + end; + + current_part := _module_declaration_get_procedures(parser_node); + .elna_type_module_declaration_procedure; + if current_part <> 0 then + _elna_type_procedure_declaration(current_part); + current_part := _declaration_get_next(current_part); + + goto elna_type_module_declaration_procedure + end +end; + +proc _compile(); +var + parser_node: Word; + tac: Word; +begin + parser_node := _elna_parser_module_declaration(); + _elna_name_module_declaration(parser_node); + _elna_type_module_declaration(parser_node); + tac := _elna_tac_module_declaration(parser_node); + _elna_writer_module(tac) +end; + +(** + * Terminates the program. a0 contains the return code. + * + * Parameters: + * a0 - Status code. + *) +proc _exit(status: Word); +begin + _syscall(status, 0, 0, 0, 0, 0, 93) +end; + +(** + * Looks for a symbol in the given symbol table. + * + * Parameters: + * symbol_table - Symbol table. + * symbol_name - Symbol name pointer. + * name_length - Symbol name length. + * + * Returns the symbol pointer or 0 in a0. + *) +proc _symbol_table_lookup(symbol_table: Word, symbol_name: Word, name_length: Word); +var + result: Word; + symbol_table_length: Word; + current_name: Word; + current_length: Word; +begin + result := 0; + + (* The first word in the symbol table is its length, get it. *) + symbol_table_length := symbol_table^; + + (* Go to the first symbol position. *) + symbol_table := symbol_table + 4; + + .symbol_table_lookup_loop; + if symbol_table_length = 0 then + goto symbol_table_lookup_end + end; + + (* Symbol name pointer and length. *) + current_name := symbol_table^; + current_length := symbol_table + 4; + current_length := current_length^; + + (* If lengths don't match, exit and return nil. *) + if name_length <> current_length then + goto symbol_table_lookup_repeat + end; + (* If names don't match, exit and return nil. *) + if _memcmp(symbol_name, current_name, name_length) then + goto symbol_table_lookup_repeat + end; + (* Otherwise, the symbol is found. *) + result := symbol_table + 8; + result := result^; + goto symbol_table_lookup_end; + + .symbol_table_lookup_repeat; + symbol_table := symbol_table + 12; + symbol_table_length := symbol_table_length - 1; + goto symbol_table_lookup_loop; + + .symbol_table_lookup_end; + return result +end; + +(** + * Create a new local symbol table in the symbol memory region after the last + * known symbol table. + *) +proc _symbol_table_create(); +var + new_symbol_table: Word; + table_length: Word; + current_table: Word; +begin + new_symbol_table := symbol_table_store; + + .symbol_table_create_loop; + table_length := new_symbol_table^; + + if table_length <> 0 then + table_length := table_length * 12; + table_length := table_length + 4; + new_symbol_table := new_symbol_table + table_length; + goto symbol_table_create_loop + end; + + return new_symbol_table +end; + +(** + * Inserts a symbol into the table. + * + * Parameters: + * symbol_table - Symbol table. + * symbol_name - Symbol name pointer. + * name_length - Symbol name length. + * symbol - Symbol pointer. + *) +proc _symbol_table_enter(symbol_table: Word, symbol_name: Word, name_length: Word, symbol: Word); +var + table_length: Word; + symbol_pointer: Word; +begin + (* The first word in the symbol table is its length, get it. *) + table_length := symbol_table^; + + (* Calculate the offset for the new symbol. *) + symbol_pointer := table_length * 12; + symbol_pointer := symbol_pointer + 4; + symbol_pointer := symbol_table + symbol_pointer; + + symbol_pointer^ := symbol_name; + symbol_pointer := symbol_pointer + 4; + symbol_pointer^ := name_length; + symbol_pointer := symbol_pointer + 4; + symbol_pointer^ := symbol; + + (* Increment the symbol table length. *) + table_length := table_length + 1; + symbol_table^ := table_length +end; + +proc _symbol_table_build(); +var + current_info: Word; + current_type: Word; +begin + (* Set the table length to 0. *) + symbol_table_global := 0; + + current_type := _allocate(8); + _type_set_kind(current_type, TypeKind.primitive); + _type_set_size(current_type, 4); + + (* Enter built-in symbols. *) + current_info := _type_info_create(current_type); + _symbol_table_enter(@symbol_table_global, "Word", 4, current_info); + + current_info := _type_info_create(current_type); + _symbol_table_enter(@symbol_table_global, "Array", 5, current_info) +end; + +(** + * Assigns some value to at array index. + * + * Parameters: + * array - Array pointer. + * index - Index (word offset into the array). + * data - Data to assign. + *) +proc _assign_at(array: Word, index: Word, data: Word); +var + target: Word; +begin + target := index - 1; + target := target * 4; + target := array + target; + + target^ := data +end; + +proc _get_at(array: Word, index: Word); +var + target: Word; +begin + target := index - 1; + target := target * 4; + target := array + target; + + return target^ +end; + +(** + * Initializes the array with character classes. + *) +proc _elna_lexer_classifications(); +var + code: Word; +begin + _assign_at(@classification, 1, ElnaLexerClass.eof); + _assign_at(@classification, 2, ElnaLexerClass.invalid); + _assign_at(@classification, 3, ElnaLexerClass.invalid); + _assign_at(@classification, 4, ElnaLexerClass.invalid); + _assign_at(@classification, 5, ElnaLexerClass.invalid); + _assign_at(@classification, 6, ElnaLexerClass.invalid); + _assign_at(@classification, 7, ElnaLexerClass.invalid); + _assign_at(@classification, 8, ElnaLexerClass.invalid); + _assign_at(@classification, 9, ElnaLexerClass.invalid); + _assign_at(@classification, 10, ElnaLexerClass.space); + _assign_at(@classification, 11, ElnaLexerClass.space); + _assign_at(@classification, 12, ElnaLexerClass.invalid); + _assign_at(@classification, 13, ElnaLexerClass.invalid); + _assign_at(@classification, 14, ElnaLexerClass.space); + _assign_at(@classification, 15, ElnaLexerClass.invalid); + _assign_at(@classification, 16, ElnaLexerClass.invalid); + _assign_at(@classification, 17, ElnaLexerClass.invalid); + _assign_at(@classification, 18, ElnaLexerClass.invalid); + _assign_at(@classification, 19, ElnaLexerClass.invalid); + _assign_at(@classification, 20, ElnaLexerClass.invalid); + _assign_at(@classification, 21, ElnaLexerClass.invalid); + _assign_at(@classification, 22, ElnaLexerClass.invalid); + _assign_at(@classification, 23, ElnaLexerClass.invalid); + _assign_at(@classification, 24, ElnaLexerClass.invalid); + _assign_at(@classification, 25, ElnaLexerClass.invalid); + _assign_at(@classification, 26, ElnaLexerClass.invalid); + _assign_at(@classification, 27, ElnaLexerClass.invalid); + _assign_at(@classification, 28, ElnaLexerClass.invalid); + _assign_at(@classification, 29, ElnaLexerClass.invalid); + _assign_at(@classification, 30, ElnaLexerClass.invalid); + _assign_at(@classification, 31, ElnaLexerClass.invalid); + _assign_at(@classification, 32, ElnaLexerClass.invalid); + _assign_at(@classification, 33, ElnaLexerClass.space); + _assign_at(@classification, 34, ElnaLexerClass.single); + _assign_at(@classification, 35, ElnaLexerClass.double_quote); + _assign_at(@classification, 36, ElnaLexerClass.other); + _assign_at(@classification, 37, ElnaLexerClass.other); + _assign_at(@classification, 38, ElnaLexerClass.single); + _assign_at(@classification, 39, ElnaLexerClass.single); + _assign_at(@classification, 40, ElnaLexerClass.single_quote); + _assign_at(@classification, 41, ElnaLexerClass.left_paren); + _assign_at(@classification, 42, ElnaLexerClass.right_paren); + _assign_at(@classification, 43, ElnaLexerClass.asterisk); + _assign_at(@classification, 44, ElnaLexerClass.single); + _assign_at(@classification, 45, ElnaLexerClass.single); + _assign_at(@classification, 46, ElnaLexerClass.minus); + _assign_at(@classification, 47, ElnaLexerClass.dot); + _assign_at(@classification, 48, ElnaLexerClass.single); + _assign_at(@classification, 49, ElnaLexerClass.zero); + _assign_at(@classification, 50, ElnaLexerClass.digit); + _assign_at(@classification, 51, ElnaLexerClass.digit); + _assign_at(@classification, 52, ElnaLexerClass.digit); + _assign_at(@classification, 53, ElnaLexerClass.digit); + _assign_at(@classification, 54, ElnaLexerClass.digit); + _assign_at(@classification, 55, ElnaLexerClass.digit); + _assign_at(@classification, 56, ElnaLexerClass.digit); + _assign_at(@classification, 57, ElnaLexerClass.digit); + _assign_at(@classification, 58, ElnaLexerClass.digit); + _assign_at(@classification, 59, ElnaLexerClass.colon); + _assign_at(@classification, 60, ElnaLexerClass.single); + _assign_at(@classification, 61, ElnaLexerClass.less); + _assign_at(@classification, 62, ElnaLexerClass.equals); + _assign_at(@classification, 63, ElnaLexerClass.greater); + _assign_at(@classification, 64, ElnaLexerClass.other); + _assign_at(@classification, 65, ElnaLexerClass.single); + _assign_at(@classification, 66, ElnaLexerClass.alpha); + _assign_at(@classification, 67, ElnaLexerClass.alpha); + _assign_at(@classification, 68, ElnaLexerClass.alpha); + _assign_at(@classification, 69, ElnaLexerClass.alpha); + _assign_at(@classification, 70, ElnaLexerClass.alpha); + _assign_at(@classification, 71, ElnaLexerClass.alpha); + _assign_at(@classification, 72, ElnaLexerClass.alpha); + _assign_at(@classification, 73, ElnaLexerClass.alpha); + _assign_at(@classification, 74, ElnaLexerClass.alpha); + _assign_at(@classification, 75, ElnaLexerClass.alpha); + _assign_at(@classification, 76, ElnaLexerClass.alpha); + _assign_at(@classification, 77, ElnaLexerClass.alpha); + _assign_at(@classification, 78, ElnaLexerClass.alpha); + _assign_at(@classification, 79, ElnaLexerClass.alpha); + _assign_at(@classification, 80, ElnaLexerClass.alpha); + _assign_at(@classification, 81, ElnaLexerClass.alpha); + _assign_at(@classification, 82, ElnaLexerClass.alpha); + _assign_at(@classification, 83, ElnaLexerClass.alpha); + _assign_at(@classification, 84, ElnaLexerClass.alpha); + _assign_at(@classification, 85, ElnaLexerClass.alpha); + _assign_at(@classification, 86, ElnaLexerClass.alpha); + _assign_at(@classification, 87, ElnaLexerClass.alpha); + _assign_at(@classification, 88, ElnaLexerClass.alpha); + _assign_at(@classification, 89, ElnaLexerClass.alpha); + _assign_at(@classification, 90, ElnaLexerClass.alpha); + _assign_at(@classification, 91, ElnaLexerClass.alpha); + _assign_at(@classification, 92, ElnaLexerClass.single); + _assign_at(@classification, 93, ElnaLexerClass.backslash); + _assign_at(@classification, 94, ElnaLexerClass.single); + _assign_at(@classification, 95, ElnaLexerClass.single); + _assign_at(@classification, 96, ElnaLexerClass.alpha); + _assign_at(@classification, 97, ElnaLexerClass.other); + _assign_at(@classification, 98, ElnaLexerClass.hex); + _assign_at(@classification, 99, ElnaLexerClass.hex); + _assign_at(@classification, 100, ElnaLexerClass.hex); + _assign_at(@classification, 101, ElnaLexerClass.hex); + _assign_at(@classification, 102, ElnaLexerClass.hex); + _assign_at(@classification, 103, ElnaLexerClass.hex); + _assign_at(@classification, 104, ElnaLexerClass.alpha); + _assign_at(@classification, 105, ElnaLexerClass.alpha); + _assign_at(@classification, 106, ElnaLexerClass.alpha); + _assign_at(@classification, 107, ElnaLexerClass.alpha); + _assign_at(@classification, 108, ElnaLexerClass.alpha); + _assign_at(@classification, 109, ElnaLexerClass.alpha); + _assign_at(@classification, 110, ElnaLexerClass.alpha); + _assign_at(@classification, 111, ElnaLexerClass.alpha); + _assign_at(@classification, 112, ElnaLexerClass.alpha); + _assign_at(@classification, 113, ElnaLexerClass.alpha); + _assign_at(@classification, 114, ElnaLexerClass.alpha); + _assign_at(@classification, 115, ElnaLexerClass.alpha); + _assign_at(@classification, 116, ElnaLexerClass.alpha); + _assign_at(@classification, 117, ElnaLexerClass.alpha); + _assign_at(@classification, 118, ElnaLexerClass.alpha); + _assign_at(@classification, 119, ElnaLexerClass.alpha); + _assign_at(@classification, 120, ElnaLexerClass.alpha); + _assign_at(@classification, 121, ElnaLexerClass.x); + _assign_at(@classification, 122, ElnaLexerClass.alpha); + _assign_at(@classification, 123, ElnaLexerClass.alpha); + _assign_at(@classification, 124, ElnaLexerClass.other); + _assign_at(@classification, 125, ElnaLexerClass.single); + _assign_at(@classification, 126, ElnaLexerClass.other); + _assign_at(@classification, 127, ElnaLexerClass.single); + _assign_at(@classification, 128, ElnaLexerClass.invalid); + + code := 129; + + (* Set the remaining 129 - 256 bytes to transitionClassOther. *) + .create_classification_loop; + _assign_at(@classification, code, ElnaLexerClass.other); + code := code + 1; + + if code < 257 then + goto create_classification_loop + end +end; + +proc _elna_lexer_get_transition(current_state: Word, character_class: Word); +var + transition_table: Word; + row_position: Word; + column_position: Word; + target: Word; +begin + (* Each state is 8 bytes long (2 words: action and next state). + There are 22 character classes, so a transition row 8 * 22 = 176 bytes long. *) + row_position := current_state - 1; + row_position := row_position * 176; + + column_position := character_class - 1; + column_position := column_position * 8; + + target := _elna_lexer_get_transition_table(); + target := target + row_position; + + return target + column_position +end; + +(** + * Parameters: + * current_state - First index into transitions table. + * character_class - Second index into transitions table. + * action - Action to assign. + * next_state - Next state to assign. + *) +proc _elna_lexer_set_transition(current_state: Word, character_class: Word, action: Word, next_state: Word); +var + transition: Word; +begin + transition := _elna_lexer_get_transition(current_state, character_class); + + _elna_lexer_transition_set_action(transition, action); + _elna_lexer_transition_set_state(transition, next_state) +end; + +(* Sets same action and state transition for all character classes in one transition row. *) + +(** + * Parameters: + * current_state - Current state (Transition state enumeration). + * default_action - Default action (Callback). + * next_state - Next state (Transition state enumeration). + *) +proc _elna_lexer_default_transition(current_state: Word, default_action: Word, next_state: Word); +begin + _elna_lexer_set_transition(current_state, ElnaLexerClass.invalid, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.digit, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.alpha, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.space, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.colon, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.equals, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.left_paren, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.right_paren, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.asterisk, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.backslash, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.single, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.hex, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.zero, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.x, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.eof, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.dot, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.minus, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.single_quote, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.double_quote, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.greater, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.less, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.other, default_action, next_state) +end; + +(** + * The transition table describes transitions from one state to another, given + * a symbol (character class). + * + * The table has m rows and n columns, where m is the amount of states and n is + * the amount of classes. So given the current state and a classified character + * the table can be used to look up the next state. + *) +proc _elna_lexer_transitions(); +begin + (* Start state. *) + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.digit, ElnaLexerAction.accumulate, ElnaLexerState.decimal); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.alpha, ElnaLexerAction.accumulate, ElnaLexerState.identifier); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.space, ElnaLexerAction.skip, ElnaLexerState.start); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.colon, ElnaLexerAction.accumulate, ElnaLexerState.colon); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.equals, ElnaLexerAction.single, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.left_paren, ElnaLexerAction.accumulate, ElnaLexerState.left_paren); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.right_paren, ElnaLexerAction.single, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.asterisk, ElnaLexerAction.single, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.backslash, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.single, ElnaLexerAction.single, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.hex, ElnaLexerAction.accumulate, ElnaLexerState.identifier); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.zero, ElnaLexerAction.accumulate, ElnaLexerState.leading_zero); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.x, ElnaLexerAction.accumulate, ElnaLexerState.identifier); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.eof, ElnaLexerAction.eof, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.dot, ElnaLexerAction.single, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.minus, ElnaLexerAction.accumulate, ElnaLexerState.minus); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.single_quote, ElnaLexerAction.accumulate, ElnaLexerState.character); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.double_quote, ElnaLexerAction.accumulate, ElnaLexerState.string); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.greater, ElnaLexerAction.accumulate, ElnaLexerState.greater); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.less, ElnaLexerAction.accumulate, ElnaLexerState.less); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.other, ElnaLexerAction.none, ElnaLexerState.finish); + + (* Colon state. *) + _elna_lexer_default_transition(ElnaLexerState.colon, ElnaLexerAction.finalize, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.colon, ElnaLexerClass.equals, ElnaLexerAction.composite, ElnaLexerState.finish); + + (* Identifier state. *) + _elna_lexer_default_transition(ElnaLexerState.identifier, ElnaLexerAction.key_id, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.digit, ElnaLexerAction.accumulate, ElnaLexerState.identifier); + _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.alpha, ElnaLexerAction.accumulate, ElnaLexerState.identifier); + _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.hex, ElnaLexerAction.accumulate, ElnaLexerState.identifier); + _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.zero, ElnaLexerAction.accumulate, ElnaLexerState.identifier); + _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.x, ElnaLexerAction.accumulate, ElnaLexerState.identifier); + + (* Decimal state. *) + _elna_lexer_default_transition(ElnaLexerState.decimal, ElnaLexerAction.integer, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.digit, ElnaLexerAction.accumulate, ElnaLexerState.decimal); + _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.alpha, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.hex, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.zero, ElnaLexerAction.accumulate, ElnaLexerState.decimal); + _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.x, ElnaLexerAction.none, ElnaLexerState.finish); + + (* Leading zero. *) + _elna_lexer_default_transition(ElnaLexerState.leading_zero, ElnaLexerAction.integer, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.digit, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.alpha, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.hex, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.zero, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.x, ElnaLexerAction.none, ElnaLexerState.dot); + + (* Greater state. *) + _elna_lexer_default_transition(ElnaLexerState.greater, ElnaLexerAction.finalize, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.greater, ElnaLexerClass.equals, ElnaLexerAction.composite, ElnaLexerState.finish); + + (* Minus state. *) + _elna_lexer_default_transition(ElnaLexerState.minus, ElnaLexerAction.finalize, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.minus, ElnaLexerClass.greater, ElnaLexerAction.composite, ElnaLexerState.finish); + + (* Left paren state. *) + _elna_lexer_default_transition(ElnaLexerState.left_paren, ElnaLexerAction.finalize, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.left_paren, ElnaLexerClass.asterisk, ElnaLexerAction.accumulate, ElnaLexerState.comment); + + (* Less state. *) + _elna_lexer_default_transition(ElnaLexerState.less, ElnaLexerAction.finalize, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.less, ElnaLexerClass.equals, ElnaLexerAction.composite, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.less, ElnaLexerClass.greater, ElnaLexerAction.composite, ElnaLexerState.finish); + + (* Hexadecimal after 0x. *) + _elna_lexer_default_transition(ElnaLexerState.dot, ElnaLexerAction.finalize, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.dot, ElnaLexerClass.dot, ElnaLexerAction.composite, ElnaLexerState.finish); + + (* Comment. *) + _elna_lexer_default_transition(ElnaLexerState.comment, ElnaLexerAction.accumulate, ElnaLexerState.comment); + _elna_lexer_set_transition(ElnaLexerState.comment, ElnaLexerClass.asterisk, ElnaLexerAction.accumulate, ElnaLexerState.closing_comment); + _elna_lexer_set_transition(ElnaLexerState.comment, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); + + (* Closing comment. *) + _elna_lexer_default_transition(ElnaLexerState.closing_comment, ElnaLexerAction.accumulate, ElnaLexerState.comment); + _elna_lexer_set_transition(ElnaLexerState.closing_comment, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.closing_comment, ElnaLexerClass.right_paren, ElnaLexerAction.delimited, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.closing_comment, ElnaLexerClass.asterisk, ElnaLexerAction.accumulate, ElnaLexerState.closing_comment); + _elna_lexer_set_transition(ElnaLexerState.closing_comment, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); + + (* Character. *) + _elna_lexer_default_transition(ElnaLexerState.character, ElnaLexerAction.accumulate, ElnaLexerState.character); + _elna_lexer_set_transition(ElnaLexerState.character, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.character, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.character, ElnaLexerClass.single_quote, ElnaLexerAction.delimited, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.character, ElnaLexerClass.backslash, ElnaLexerAction.accumulate, ElnaLexerState.character_escape); + + (* Escape sequence in a character. *) + _elna_lexer_default_transition(ElnaLexerState.character_escape, ElnaLexerAction.accumulate, ElnaLexerState.character); + _elna_lexer_set_transition(ElnaLexerState.character_escape, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.character_escape, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); + + (* String. *) + _elna_lexer_default_transition(ElnaLexerState.string, ElnaLexerAction.accumulate, ElnaLexerState.string); + _elna_lexer_set_transition(ElnaLexerState.string, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.string, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.string, ElnaLexerClass.double_quote, ElnaLexerAction.delimited, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.string, ElnaLexerClass.backslash, ElnaLexerAction.accumulate, ElnaLexerState.string_escape); + + (* Escape sequence in a string. *) + _elna_lexer_default_transition(ElnaLexerState.string_escape, ElnaLexerAction.accumulate, ElnaLexerState.string); + _elna_lexer_set_transition(ElnaLexerState.string_escape, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.string_escape, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish) +end; + +(** + * Transition table is saved after character classification table. + * Each character entry is 1 word long and there are 256 characters. + * 1024 = 256 * 4 + *) +proc _elna_lexer_get_transition_table(); + return @classification + 1024 +end; + +(** + * Lexer state is saved after the transition tables. + * Each transition table entry is 8 bytes long. The table has 16 rows (transition states) + * and 22 columns (character classes), so 2992 = 8 * 17 * 22. + *) +proc _elna_lexer_global_state(); +var + result: Word; +begin + result := _elna_lexer_get_transition_table(); + return result + 2992 +end; + +(** + * Gets pointer to the token start. + *) +proc _elna_lexer_global_get_start(); +var + target: Word; +begin + target := _elna_lexer_global_state() + 4; + return target^ +end; + +(** + * Sets pointer to the token start. + *) +proc _elna_lexer_global_set_start(new_start: Word); +var + target: Word; +begin + target := _elna_lexer_global_state() + 4; + target^ := new_start +end; + +(** + * Gets pointer to the token end. + *) +proc _elna_lexer_global_get_end(); +var + target: Word; +begin + target := _elna_lexer_global_state() + 8; + return target^ +end; + +(** + * Sets pointer to the token end. + *) +proc _elna_lexer_global_set_end(new_start: Word); +var + target: Word; +begin + target := _elna_lexer_global_state() + 8; + target^ := new_start +end; + +proc _elna_lexer_transition_get_action(this: Word); + return this^ +end; + +proc _elna_lexer_transition_set_action(this: Word, value: Word); +begin + this^ := value +end; + +proc _elna_lexer_transition_get_state(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _elna_lexer_transition_set_state(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +(** + * Resets the lexer state for reading the next token. + *) +proc _elna_lexer_reset(); +var + state: Word; +begin + (* Transition start state is 1. *) + state := _elna_lexer_global_state(); + state^ := ElnaLexerState.start; + + state := _elna_lexer_global_get_start(); + _elna_lexer_global_set_end(state) +end; + +(** + * One time lexer initialization. + *) +proc _elna_lexer_initialize(code_pointer: Word); +begin + _elna_lexer_classifications(); + _elna_lexer_transitions(); + + _elna_lexer_global_set_start(code_pointer); + _elna_lexer_global_set_end(code_pointer) +end; + +proc _elna_lexer_next_transition(); +var + current_character: Word; + character_class: Word; + current_state: Word; +begin + current_character := _elna_lexer_global_get_end(); + current_character := _load_byte(current_character); + + character_class := _get_at(@classification, current_character + 1); + + current_state := _elna_lexer_global_state(); + current_state := current_state^; + + return _elna_lexer_get_transition(current_state, character_class) +end; + +proc _string_compare(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word); +var + result: Word; +begin + result := 0; + + if lhs_length = rhs_length then + result := _memcmp(lhs_pointer, rhs_pointer, lhs_length); + result := result = 0 + end; + return result +end; + +proc _elna_lexer_classify_keyword(position_start: Word, position_end: Word); +var + result: Word; + token_length: Word; +begin + result := ElnaLexerKind.identifier; + token_length := position_end - position_start; + + if _string_compare(position_start, token_length, "const", 5) then + result := ElnaLexerKind._const + elsif _string_compare(position_start, token_length, "var", 3) then + result := ElnaLexerKind._var + elsif _string_compare(position_start, token_length, "proc", 4) then + result := ElnaLexerKind._proc + elsif _string_compare(position_start, token_length, "type", 4) then + result := ElnaLexerKind._type + elsif _string_compare(position_start, token_length, "begin", 5) then + result := ElnaLexerKind._begin + elsif _string_compare(position_start, token_length, "end", 3) then + result := ElnaLexerKind._end + elsif _string_compare(position_start, token_length, "return", 6) then + result := ElnaLexerKind._return + elsif _string_compare(position_start, token_length, "goto", 4) then + result := ElnaLexerKind._goto + elsif _string_compare(position_start, token_length, "if", 2) then + result := ElnaLexerKind._if + elsif _string_compare(position_start, token_length, "while", 5) then + result := ElnaLexerKind._while + elsif _string_compare(position_start, token_length, "then", 4) then + result := ElnaLexerKind._then + elsif _string_compare(position_start, token_length, "else", 4) then + result := ElnaLexerKind._else + elsif _string_compare(position_start, token_length, "elsif", 5) then + result := ElnaLexerKind._elsif + elsif _string_compare(position_start, token_length, "record", 6) then + result := ElnaLexerKind._record + elsif _string_compare(position_start, token_length, "or", 2) then + result := ElnaLexerKind._or + elsif _string_compare(position_start, token_length, "xor", 2) then + result := ElnaLexerKind._xor + end; + return result +end; + +proc _elna_lexer_classify_finalize(start_position: Word); +var + character: Word; + result: Word; +begin + result := 0; + character := _load_byte(start_position); + + if character = ':' then + result := ElnaLexerKind.colon + elsif character = '.' then + result := ElnaLexerKind.dot + elsif character = '(' then + result := ElnaLexerKind.left_paren + elsif character = '-' then + result := ElnaLexerKind.minus + elsif character = '<' then + result := ElnaLexerKind.less_than + elsif character = '>' then + result := ElnaLexerKind.greater_than + end; + return result +end; + +proc _elna_lexer_classify_single(start_position: Word); +var + character: Word; + result: Word; +begin + result := 0; + character := _load_byte(start_position); + + if character = ';' then + result := ElnaLexerKind.semicolon + elsif character = ',' then + result := ElnaLexerKind.comma + elsif character = ')' then + result := ElnaLexerKind.right_paren + elsif character = '@' then + result := ElnaLexerKind.at + elsif character = '~' then + result := ElnaLexerKind.not + elsif character = '&' then + result := ElnaLexerKind.and + elsif character = '+' then + result := ElnaLexerKind.plus + elsif character = '*' then + result := ElnaLexerKind.multiplication + elsif character = '=' then + result := ElnaLexerKind.equals + elsif character = '%' then + result := ElnaLexerKind.remainder + elsif character = '/' then + result := ElnaLexerKind.division + elsif character = '.' then + result := ElnaLexerKind.dot + elsif character = '^' then + result := ElnaLexerKind.hat + end; + return result +end; + +proc _elna_lexer_classify_composite(start_position: Word, one_before_last: Word); +var + first_character: Word; + last_character: Word; + result: Word; +begin + first_character := _load_byte(start_position); + last_character := _load_byte(one_before_last); + + if first_character = ':' then + result := ElnaLexerKind.assignment + elsif first_character = '<' then + if last_character = '=' then + result := ElnaLexerKind.less_equal + elsif last_character = '>' then + result := ElnaLexerKind.not_equal + end + elsif first_character = '>' then + if last_character = '=' then + result := ElnaLexerKind.greater_equal + end + end; + + return result +end; + +proc _elna_lexer_classify_delimited(start_position: Word, end_position: Word); +var + token_length: Word; + delimiter: Word; + result: Word; +begin + token_length := end_position - start_position; + delimiter := _load_byte(start_position); + + if delimiter = '(' then + result := ElnaLexerKind.comment + elsif delimiter = '\'' then + result := ElnaLexerKind.character + elsif delimiter = '"' then + result := ElnaLexerKind.string + end; + return result +end; + +proc _elna_lexer_classify_integer(start_position: Word, end_position: Word); + return ElnaLexerKind.integer +end; + +proc _elna_lexer_execute_action(action_to_perform: Word, kind: Word); +var + position_start: Word; + position_end: Word; + intermediate: Word; +begin + position_start := _elna_lexer_global_get_start(); + position_end := _elna_lexer_global_get_end(); + + if action_to_perform = ElnaLexerAction.none then + elsif action_to_perform = ElnaLexerAction.accumulate then + _elna_lexer_global_set_end(position_end + 1) + elsif action_to_perform = ElnaLexerAction.skip then + _elna_lexer_global_set_start(position_start + 1); + _elna_lexer_global_set_end(position_end + 1) + elsif action_to_perform = ElnaLexerAction.single then + _elna_lexer_global_set_end(position_end + 1); + + intermediate := _elna_lexer_classify_single(position_start); + kind^ := intermediate + elsif action_to_perform = ElnaLexerAction.eof then + intermediate := ElnaLexerKind.eof; + kind^ := intermediate + elsif action_to_perform = ElnaLexerAction.finalize then + intermediate := _elna_lexer_classify_finalize(position_start); + kind^ := intermediate + elsif action_to_perform = ElnaLexerAction.composite then + _elna_lexer_global_set_end(position_end + 1); + + intermediate := _elna_lexer_classify_composite(position_start, position_end); + kind^ := intermediate + elsif action_to_perform = ElnaLexerAction.key_id then + intermediate := _elna_lexer_classify_keyword(position_start, position_end); + kind^ := intermediate + elsif action_to_perform = ElnaLexerAction.integer then + intermediate := _elna_lexer_classify_integer(position_start, position_end); + kind^ := intermediate + elsif action_to_perform = ElnaLexerAction.delimited then + _elna_lexer_global_set_end(position_end + 1); + + intermediate := _elna_lexer_classify_delimited(position_start, position_end + 1); + kind^ := intermediate + end +end; + +proc _elna_lexer_execute_transition(kind: Word); +var + next_transition: Word; + next_state: Word; + global_state: Word; + action_to_perform: Word; +begin + next_transition := _elna_lexer_next_transition(); + next_state := _elna_lexer_transition_get_state(next_transition); + action_to_perform := _elna_lexer_transition_get_action(next_transition); + + global_state := _elna_lexer_global_state(); + + global_state^ := next_state; + _elna_lexer_execute_action(action_to_perform, kind); + + return next_state +end; + +proc _elna_lexer_advance_token(kind: Word); +var + result_state: Word; +begin + result_state := _elna_lexer_execute_transition(kind); + if result_state <> ElnaLexerState.finish then + _elna_lexer_advance_token(kind) + end +end; + +(** + * Reads the next token and writes its type into the address in the kind parameter. + *) +proc _elna_lexer_read_token(kind: Word); +begin + _elna_lexer_reset(); + _elna_lexer_advance_token(kind) +end; + +(** + * Advances the token stream past the last read token. + *) +proc _elna_lexer_skip_token(); +var + old_end: Word; +begin + old_end := _elna_lexer_global_get_end(); + _elna_lexer_global_set_start(old_end) +end; + +proc _initialize_global_state(); +begin + compiler_strings_position := @compiler_strings; + memory_free_pointer := _mmap(4194304); + source_code := _mmap(495616); + symbol_table_store := _mmap(495616) +end; + +(* + * Entry point. + *) +proc _start(); +var + last_read: Word; + offset: Word; +begin + _initialize_global_state(); + _elna_lexer_initialize(source_code); + _symbol_table_build(); + + (* Read the source from the standard input. *) + offset := source_code; + + .start_read; + (* Second argument is buffer size. Modifying update the source_code definition. *) + last_read := _read_file(offset, 409600); + if last_read > 0 then + offset := offset + last_read; + goto start_read + end; + _compile(); + + _exit(0) +end; diff --git a/boot/stage16.elna b/boot/stage16.elna deleted file mode 100644 index cd56cc2..0000000 --- a/boot/stage16.elna +++ /dev/null @@ -1,4772 +0,0 @@ -(* - * This Source Code Form is subject to the terms of the Mozilla Public License, - * v. 2.0. If a copy of the MPL was not distributed with this file, You can - * obtain one at https://mozilla.org/MPL/2.0/. - *) - -(* Stage 15 compiler. *) - -type - _elna_tac_declaration = record - next: Word; - name: Word; - length: Word; - body: Word - end; - _node = record - kind: Word - end; - _integer_literal_node = record - kind: Word; - value: Word; - length: Word - end; - _character_literal_node = record - kind: Word; - value: Word; - length: Word - end; - _variable_expression = record - kind: Word; - name: Word; - length: Word - end; - _string_literal_node = record - kind: Word; - value: Word; - length: Word - end; - _dereference_expression = record - kind: Word; - pointer: Word - end; - _binary_expression = record - kind: Word; - lhs: Word; - rhs: Word; - operator: Word - end; - _unary_expression = record - kind: Word; - operand: Word; - operator: Word - end; - _if_statement = record - kind: Word; - next: Word; - conditionals: Word; - _else: Word - end; - - (** - * All statements are chained into a list. Next contains a pointer to the next - * statement in the statement list. - *) - _statement = record - kind: Word; - next: Word - end; - _goto_statement = record - kind: Word; - next: Word; - label: Word; - length: Word - end; - _label_declaration = record - kind: Word; - next: Word; - label: Word; - length: Word - end; - _field_access_expression = record - kind: Word; - aggregate: Word; - field: Word; - length: Word - end; - _elna_tac_module = record - data: Word; - code: Word - end; - _module_declaration = record - kind: Word; - types: Word; - globals: Word; - procedures: Word - end; - _assign_statement = record - kind: Word; - next: Word; - assignee: Word; - assignment: Word - end; - _return_statement = record - kind: Word; - next: Word; - returned: Word - end; - _type = record - kind: Word; - size: Word - end; - _enumeration_type = record - kind: Word; - size: Word; - members: Word; - length: Word - end; - _enumeration_type_expression = record - kind: Word; - members: Word; - length: Word - end; - _record_type = record - kind: Word; - size: Word; - members: Word; - length: Word - end; - _record_type_expression = record - kind: Word; - members: Word; - length: Word - end; - _named_type_expression = record - kind: Word; - name: Word; - length: Word - end; - - (** - * Conditional statements is a list of pairs: condition and statements. - * Used for example to represent if and elsif blocks with beloning statements. - *) - _conditional_statements = record - condition: Word; - statements: Word; - next: Word - end; - - _declaration = record - kind: Word; - next: Word; - name: Word; - length: Word - end; - _procedure_declaration = record - kind: Word; - next: Word; - name: Word; - length: Word; - body: Word; - temporaries: Word; - parameters: Word - end; - _type_declaration = record - kind: Word; - next: Word; - name: Word; - length: Word; - _type: Word - end; - _variable_declaration = record - kind: Word; - next: Word; - name: Word; - length: Word; - _type: Word - end; - - ElnaLexerAction = (none, accumulate, skip, single, eof, finalize, composite, key_id, integer, delimited); - - (** - * Classification table assigns each possible character to a group (class). All - * characters of the same group a handled equivalently. - * - * Transition = record - * action: TransitionAction; - * next_state: TransitionState - * end; - *) - ElnaLexerClass = ( - invalid, - digit, - alpha, - space, - colon, - equals, - left_paren, - right_paren, - asterisk, - backslash, - single, - hex, - zero, - x, - eof, - dot, - minus, - single_quote, - double_quote, - greater, - less, - other - ); - ElnaLexerState = ( - start, - colon, - identifier, - decimal, - leading_zero, - greater, - minus, - left_paren, - less, - dot, - comment, - closing_comment, - character, - character_escape, - string, - string_escape, - finish - ); - ElnaLexerKind = ( - identifier, - _const, - _var, - _proc, - _type, - _begin, - _end, - _if, - _then, - _else, - _elsif, - _while, - _do, - _extern, - _record, - _true, - _false, - null, - and, - _or, - _xor, - pipe, - not, - _return, - _module, - _program, - _import, - _cast, - _defer, - _case, - _of, - trait, - left_paren, - right_paren, - left_square, - right_square, - shift_left, - shift_right, - greater_equal, - less_equal, - greater_than, - less_than, - not_equal, - equals, - semicolon, - dot, - comma, - plus, - arrow, - minus, - multiplication, - division, - remainder, - assignment, - colon, - hat, - at, - comment, - string, - character, - integer, - word, - _goto, - eof - ); - NodeKind = ( - integer_literal, - string_literal, - character_literal, - variable_expression, - field_access_expression, - dereference_expression, - unary_expression, - binary_expression, - call, - goto_statement, - label_declaration, - return_statement, - assign_statement, - if_statement, - procedure_declaration, - variable_declaration, - enumeration_type_expression, - named_type_expression, - type_declaration, - module_declaration, - record_type_expression - ); - InfoKind = (type_info, parameter_info, temporary_info, procedure_info); - TypeKind = (primitive, enumeration, _record); - ElnaTacOperator = ( - load_immediate, - load_address, - add, - add_immediate, - load_word, - store_word, - jal, - move, - sub, - div, - rem, - mul, - _xor, - _or, - and, - seqz, - snez, - slt, - xor_immediate, - neg, - not, - jump, - beqz, - label, - start, - ret - ); - ElnaTacOperand = (register, immediate, symbol, offset); - ElnaTacRegister = ( - zero, - ra, - sp, - gp, - tp, - t0, - t1, - t2, - s0, - s1, - a0, - a1, - a2, - a3, - a4, - a5, - a6, - a7, - s2, - s3, - s4, - s5, - s6, - s7, - s8, - s9, - s10, - s11, - t3, - t4, - t5, - t6 - ); - -var - symbol_table_global: Array; - compiler_strings: Array; - classification: Array; - - source_code: Word; - compiler_strings_position: Word; - compiler_strings_length: Word; - label_counter: Word; - symbol_table_store: Word; - - (* Points to a segment of free memory. *) - memory_free_pointer: Word; - -(** - * Calculates and returns the string token length between quotes, including the - * escaping slash characters. - * - * Parameters: - * string - String token pointer. - * - * Returns the length in a0. - *) -proc _string_length(string: Word); -var - counter: Word; - current_byte: Word; -begin - (* Reset the counter. *) - counter := 0; - - .string_length_loop; - string := string + 1; - - current_byte := _load_byte(string); - if current_byte <> '"' then - counter := counter + 1; - goto string_length_loop - end; - - return counter -end; - -(** - * Adds a string to the global, read-only string storage. - * - * Parameters: - * string - String token. - * - * Returns the offset from the beginning of the storage to the new string in a0. - *) -proc _add_string(string: Word); -var - contents: Word; - result: Word; - current_byte: Word; -begin - contents := string + 1; - result := compiler_strings_length; - - .add_string_loop; - current_byte := _load_byte(contents); - if current_byte <> '"' then - _store_byte(current_byte, compiler_strings_position); - compiler_strings_position := compiler_strings_position + 1; - contents := contents + 1; - - if current_byte <> '\\' then - compiler_strings_length := compiler_strings_length + 1 - end; - goto add_string_loop - end; - - return result -end; - -(** - * Reads standard input into a buffer. - * - * Parameters: - * buffer - Buffer pointer. - * size - Buffer size. - * - * Returns the amount of bytes written in a0. - *) -proc _read_file(buffer: Word, size: Word); - return _syscall(0, buffer, size, 0, 0, 0, 63) -end; - -(** - * MAP_ANONYMOUS is 32. - * PROT_READ | PORT_WRITE is (1 | 2). - * MAP_ANONYMOUS | MAP_PRIVATE is (32 | 2) - *) -proc _mmap(length: Word); - return _syscall(0, length, 1 or 2, 32 or 2, -1, 0, 222) -end; - -(** - * Writes to the standard output. - * - * Parameters: - * buffer - Buffer. - * size - Buffer length. - *) -proc _write_s(buffer: Word, size: Word); -begin - _syscall(1, buffer, size, 0, 0, 0, 64) -end; - -(** - * Writes a number to a string buffer. - * - * Parameters: - * number - Whole number. - * output_buffer - Buffer pointer. - * - * Sets a0 to the length of the written number. - *) -proc _print_i(number: Word, output_buffer: Word); -var - local_buffer: Word; - is_negative: Word; - current_character: Word; - result: Word; -begin - local_buffer := @result + 11; - - if number >= 0 then - is_negative := 0 - else - number = -number; - is_negative := 1 - end; - - .print_i_digit10; - current_character := number % 10; - _store_byte(current_character + '0', local_buffer); - - number := number / 10; - local_buffer := local_buffer - 1; - - if number <> 0 then - goto print_i_digit10 - end; - if is_negative = 1 then - _store_byte('-', local_buffer); - local_buffer := local_buffer - 1 - end; - result := @result + 11; - result := result - local_buffer; - _memcpy(output_buffer, local_buffer + 1, result); - - return result -end; - -(** - * Writes a number to the standard output. - * - * Parameters: - * number - Whole number. - *) -proc _write_i(number: Word); -var - local_buffer: Word; - length: Word; -begin - length := _print_i(number, @local_buffer); - _write_s(@local_buffer, length) -end; - -(** - * Writes a character from a0 into the standard output. - * - * Parameters: - * character - Character to write. - *) -proc _write_c(character: Word); -begin - _write_s(@character, 1) -end; - -(** - * Write null terminated string. - * - * Parameters: - * string - String. - *) -proc _write_z(string: Word); -var - next_byte: Word; -begin - (* Check for 0 character. *) - next_byte := _load_byte(string); - - if next_byte <> 0 then - (* Print a character. *) - _write_c(next_byte); - - (* Advance the input string by one byte. *) - _write_z(string + 1) - end -end; - -(** - * Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. - *) -proc _is_upper(character: Word); -var - lhs: Word; - rhs: Word; -begin - lhs := character >= 'A'; - rhs := character <= 'Z'; - - return lhs & rhs -end; - -(** - * Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. - *) -proc _is_lower(character: Word); -var - lhs: Word; - rhs: Word; -begin - lhs := character >= 'a'; - rhs := character <= 'z'; - - return lhs & rhs -end; - -(** - * Detects if the passed character is a 7-bit alpha character or an underscore. - * - * Paramters: - * character - Tested character. - * - * Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. - *) -proc _is_alpha(character: Word); -var - is_upper_result: Word; - is_lower_result: Word; - is_alpha_result: Word; - is_underscore: Word; -begin - is_upper_result := _is_upper(character); - is_lower_result := _is_lower(character); - is_underscore := character = '_'; - - is_alpha_result := is_lower_result or is_upper_result; - return is_alpha_result or is_underscore -end; - -(** - * Detects whether the passed character is a digit (a value between 0 and 9). - * - * Parameters: - * character - Exemined value. - * - * Sets a0 to 1 if it is a digit, to 0 otherwise. - *) -proc _is_digit(character: Word); -var - lhs: Word; - rhs: Word; -begin - lhs := character >= '0'; - rhs := character <= '9'; - - return lhs & rhs -end; - -proc _is_alnum(character: Word); -var - lhs: Word; - rhs: Word; -begin - lhs := _is_alpha(character); - rhs := _is_digit(character); - - return lhs or rhs -end; - -(** - * Parameters: - * lhs - First pointer. - * rhs - Second pointer. - * count - The length to compare. - * - * Returns 0 if memory regions are equal. - *) -proc _memcmp(lhs: Word, rhs: Word, count: Word); -var - lhs_byte: Word; - rhs_byte: Word; - result: Word; -begin - result := 0; - - .memcmp_loop; - if count <> 0 then - lhs_byte := _load_byte(lhs); - rhs_byte := _load_byte(rhs); - result := lhs_byte - rhs_byte; - - lhs := lhs + 1; - rhs := rhs + 1; - count := count - 1; - - if result = 0 then - goto memcmp_loop - end - end; - - return result -end; - -(** - * Copies memory. - * - * Parameters: - * destination - Destination. - * source - Source. - * count - Size. - * - * Returns the destination. - *) -proc _memcpy(destination: Word, source: Word, count: Word); -var - current_byte: Word; -begin - .memcpy_loop; - if count <> 0 then - current_byte := _load_byte(source); - _store_byte(current_byte, destination); - - destination := destination + 1; - source := source + 1; - count := count - 1; - goto memcpy_loop - end; - - return destination -end; - -proc _elna_tac_instruction_size(); - return 44 -end; - -proc _elna_tac_instruction_get_kind(this: Word); - return this^ -end; - -proc _elna_tac_instruction_set_kind(this: Word, value: Word); -begin - this^ := value -end; - -proc _elna_tac_instruction_get_next(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _elna_tac_instruction_set_next(this: Word, value: Word); -begin - .elna_tac_instruction_set_next_loop; - this := this + 4; - if value <> 0 then - if this^ <> 0 then - this := this^; - goto elna_tac_instruction_set_next_loop - end - end; - this^ := value -end; - -proc _elna_tac_instruction_get_operand_type(this: Word, n: Word); -begin - n := n - 1; - n := n * 12; - this := this + 8; - this := this + n; - - return this^ -end; - -proc _elna_tac_instruction_get_operand_value(this: Word, n: Word); -begin - n := n - 1; - n := n * 12; - this := this + 8; - this := this + n; - - this := this + 4; - return this^ -end; - -proc _elna_tac_instruction_get_operand_length(this: Word, n: Word); -begin - n := n - 1; - n := n * 12; - this := this + 8; - this := this + n; - - this := this + 8; - return this^ -end; - -proc _elna_tac_instruction_set_operand(this: Word, n: Word, operand_type: Word, operand_value: Word, operand_length: Word); -begin - n := n - 1; - n := n * 12; - this := this + 8; - this := this + n; - - this^ := operand_type; - this := this + 4; - this^ := operand_value; - this := this + 4; - this^ := operand_length -end; - -proc _elna_tac_instruction_create(kind: Word); -var - result: Word; - instruction_size: Word; -begin - instruction_size := _elna_tac_instruction_size(); - result := _allocate(instruction_size); - - _elna_tac_instruction_set_kind(result, kind); - _elna_tac_instruction_set_next(result, 0); - - return result -end; - -proc _elna_tac_module_create(data: Word, code: Word); -var - result: Word; - result_size: Word; -begin - result_size := _elna_tac_module_size(); - result := _allocate(result_size); - - _elna_tac_module_set_data(result, data); - _elna_tac_module_set_code(result, code); - - return result -end; - -proc _elna_tac_load_immediate(target_register: Word, source_immediate: Word, immediate_length: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.load_immediate); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target_register, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.immediate, source_immediate, immediate_length); - - return result -end; - -proc _elna_tac_load_address(target_register: Word, source_symbol: Word, symbol_length: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.load_address); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target_register, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.symbol, source_symbol, symbol_length); - - return result -end; - -proc _elna_tac_beqz(target_register: Word, source_symbol: Word, symbol_length: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.beqz); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target_register, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.symbol, source_symbol, symbol_length); - - return result -end; - -proc _elna_tac_jump(source_symbol: Word, symbol_length: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.jump); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.symbol, source_symbol, symbol_length); - - return result -end; - -proc _elna_tac_add(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.add); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_mul(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.mul); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_sub(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.sub); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_div(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.div); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_rem(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.rem); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_xor(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator._xor); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_xor_immediate(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator._xor); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.immediate, rhs, 0); - - return result -end; - -proc _elna_tac_or(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator._or); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_and(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.and); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_add_immediate(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.add_immediate); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.immediate, rhs, 0); - - return result -end; - -proc _elna_tac_slt(destination: Word, lhs: Word, rhs: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.slt); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); - _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); - - return result -end; - -proc _elna_tac_jal(symbol: Word, length: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.jal); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.symbol, symbol, length); - - return result -end; - -proc _elna_tac_load_word(target: Word, register: Word, offset: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.load_word); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.offset, register, offset); - - return result -end; - -proc _elna_tac_store_word(target: Word, register: Word, offset: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.store_word); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.offset, register, offset); - - return result -end; - -proc _elna_tac_move(destination: Word, source: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.move); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); - - return result -end; - -proc _elna_tac_seqz(destination: Word, source: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.seqz); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); - - return result -end; - -proc _elna_tac_snez(destination: Word, source: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.snez); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); - - return result -end; - -proc _elna_tac_neg(destination: Word, source: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.neg); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); - - return result -end; - -proc _elna_tac_not(destination: Word, source: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.not); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); - _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); - - return result -end; - -proc _elna_tac_label(counter: Word, length: Word); -var - result: Word; -begin - result := _elna_tac_instruction_create(ElnaTacOperator.label); - - _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.symbol, counter, length); - - return result -end; - -proc _elna_writer_instruction_name(instruction_kind: Word); -var - argument_count: Word; -begin - if instruction_kind = ElnaTacOperator.load_immediate then - argument_count := 2; - _write_s("\tli", 3) - elsif instruction_kind = ElnaTacOperator.load_address then - argument_count := 2; - _write_s("\tla", 3) - elsif instruction_kind = ElnaTacOperator.add then - argument_count := 3; - _write_s("\tadd", 4) - elsif instruction_kind = ElnaTacOperator.add_immediate then - argument_count := 3; - _write_s("\taddi", 5) - elsif instruction_kind = ElnaTacOperator.load_word then - argument_count := 2; - _write_s("\tlw", 3) - elsif instruction_kind = ElnaTacOperator.store_word then - argument_count := 2; - _write_s("\tsw", 3) - elsif instruction_kind = ElnaTacOperator.jal then - argument_count := 1; - _write_s("\tcall", 5) - elsif instruction_kind = ElnaTacOperator.move then - argument_count := 2; - _write_s("\tmv", 3) - elsif instruction_kind = ElnaTacOperator.sub then - argument_count := 3; - _write_s("\tsub", 4) - elsif instruction_kind = ElnaTacOperator.mul then - argument_count := 3; - _write_s("\tmul", 4) - elsif instruction_kind = ElnaTacOperator.div then - argument_count := 3; - _write_s("\tdiv", 4) - elsif instruction_kind = ElnaTacOperator.rem then - argument_count := 3; - _write_s("\trem", 4) - elsif instruction_kind = ElnaTacOperator._xor then - argument_count := 3; - _write_s("\txor", 4) - elsif instruction_kind = ElnaTacOperator.xor_immediate then - argument_count := 3; - _write_s("\txori", 5) - elsif instruction_kind = ElnaTacOperator._or then - argument_count := 3; - _write_s("\tor", 3) - elsif instruction_kind = ElnaTacOperator.and then - argument_count := 3; - _write_s("\tand", 4) - elsif instruction_kind = ElnaTacOperator.seqz then - argument_count := 2; - _write_s("\tseqz", 5) - elsif instruction_kind = ElnaTacOperator.snez then - argument_count := 2; - _write_s("\tsnez", 5) - elsif instruction_kind = ElnaTacOperator.slt then - argument_count := 3; - _write_s("\tslt", 4) - elsif instruction_kind = ElnaTacOperator.neg then - argument_count := 2; - _write_s("\tneg", 4) - elsif instruction_kind = ElnaTacOperator.not then - argument_count := 2; - _write_s("\tnot", 4) - elsif instruction_kind = ElnaTacOperator.jump then - argument_count := 1; - _write_s("\tj", 2) - elsif instruction_kind = ElnaTacOperator.beqz then - argument_count := 2; - _write_s("\tbeqz", 5) - elsif instruction_kind = ElnaTacOperator.start then - argument_count := 0; - _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\0") - elsif instruction_kind = ElnaTacOperator.ret then - argument_count := 0; - _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\0") - end; - return argument_count -end; - -proc _elna_writer_register(register: Word); -begin - _write_c('x'); - _write_i(register - 1) -end; - -proc _elna_writer_operand(instruction: Word, n: Word); -var - operand_value: Word; - operand_length: Word; - operand_type: Word; -begin - operand_type := _elna_tac_instruction_get_operand_type(instruction, n); - operand_value := _elna_tac_instruction_get_operand_value(instruction, n); - operand_length := _elna_tac_instruction_get_operand_length(instruction, n); - - _write_c(' '); - if operand_type = ElnaTacOperand.register then - _elna_writer_register(operand_value) - elsif operand_type = ElnaTacOperand.offset then - _write_i(operand_length); - _write_c('('); - _elna_writer_register(operand_value); - _write_c(')') - elsif operand_type = ElnaTacOperand.symbol then - if operand_length = 0 then - _write_label(operand_value, 0) - else - _write_s(operand_value, operand_length) - end - elsif operand_length = 0 then (* ElnaTacOperand.immediate *) - _write_i(operand_value) - else - _write_s(operand_value, operand_length) - end -end; - -proc _elna_writer_instruction(instruction: Word); -var - instruction_kind: Word; - argument_count: Word; - current_argument: Word; - operand_value: Word; - operand_length: Word; -begin - instruction_kind := _elna_tac_instruction_get_kind(instruction); - - if instruction_kind = ElnaTacOperator.label then - argument_count := 0; - operand_value := _elna_tac_instruction_get_operand_value(instruction, 1); - operand_length := _elna_tac_instruction_get_operand_length(instruction, 1); - _write_label(operand_value, operand_length); - _write_c(':') - else - argument_count := _elna_writer_instruction_name(instruction_kind) - end; - current_argument := 1; - - .elna_writer_instruction_loop; - if current_argument <= argument_count then - _elna_writer_operand(instruction, current_argument); - current_argument := current_argument + 1 - end; - if current_argument <= argument_count then - _write_c(','); - goto elna_writer_instruction_loop - end; - - _write_c('\n') -end; - -proc _elna_writer_instructions(instruction: Word); -begin - if instruction <> 0 then - _elna_writer_instruction(instruction); - instruction := _elna_tac_instruction_get_next(instruction); - _elna_writer_instructions(instruction) - end -end; - -proc _elna_writer_procedure(procedure: Word); -var - name_pointer: Word; - name_length: Word; - body_statements: Word; -begin - .elna_writer_procedure_loop; - name_pointer := _elna_tac_declaration_get_name(procedure); - name_length := _elna_tac_declaration_get_length(procedure); - body_statements := _elna_tac_declaration_get_body(procedure); - - (* Write .type _procedure_name, @function. *) - _write_z(".type \0"); - - _write_s(name_pointer, name_length); - _write_z(", @function\n\0"); - - (* Write procedure label, _procedure_name: *) - _write_s(name_pointer, name_length); - _write_z(":\n\0"); - - _elna_writer_instructions(body_statements); - _write_z("\tret\n\0"); - - procedure := _elna_tac_declaration_get_next(procedure); - if procedure <> 0 then - goto elna_writer_procedure_loop - end -end; - -proc _elna_writer_variable(variable: Word); -var - name: Word; - name_length: Word; - size: Word; -begin - .elna_writer_variable_loop; - if variable <> 0 then - name := _elna_tac_declaration_get_name(variable); - name_length := _elna_tac_declaration_get_length(variable); - size := _elna_tac_declaration_get_body(variable); - - _write_z(".type \0"); - _write_s(name, name_length); - _write_z(", @object\n\0"); - - _write_s(name, name_length); - _write_c(':'); - - _write_z(" .zero \0"); - _write_i(size); - - _write_c('\n'); - variable := _elna_tac_declaration_get_next(variable); - - goto elna_writer_variable_loop - end -end; - -proc _elna_writer_module(pair: Word); -var - compiler_strings_copy: Word; - compiler_strings_end: Word; - current_byte: Word; - current_part: Word; -begin - _write_z(".globl _start\n\n\0"); - _write_z(".section .data\n\0"); - - current_part := _elna_tac_module_get_data(pair); - _elna_writer_variable(current_part); - - _write_z(".section .text\n\n\0"); - _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); - _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); - _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); - - current_part := _elna_tac_module_get_code(pair); - _elna_writer_procedure(current_part); - - _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); - _write_c('"'); - - compiler_strings_copy := @compiler_strings; - compiler_strings_end := compiler_strings_position; - - .elna_writer_module_loop; - if compiler_strings_copy < compiler_strings_end then - current_byte := _load_byte(compiler_strings_copy); - compiler_strings_copy := compiler_strings_copy + 1; - _write_c(current_byte); - - goto elna_writer_module_loop - end; - _write_c('"'); - _write_c('\n'); -end; - -proc _elna_parser_integer_literal(); -var - integer_token: Word; - integer_length: Word; - result: Word; - literal_size: Word; -begin - literal_size := _integer_literal_node_size(); - result := _allocate(literal_size); - - integer_token := _elna_lexer_global_get_start(); - integer_length := _elna_lexer_global_get_end(); - integer_length := integer_length - integer_token; - _elna_lexer_skip_token(); - - _node_set_kind(result, NodeKind.integer_literal); - _integer_literal_node_set_value(result, integer_token); - _integer_literal_node_set_length(result, integer_length); - - return result -end; - -proc _elna_tac_integer_literal(integer_literal_node: Word); -var - integer_token: Word; - integer_length: Word; - token_kind: Word; -begin - integer_token := _integer_literal_node_get_value(integer_literal_node); - integer_length := _integer_literal_node_get_length(integer_literal_node); - - return _elna_tac_load_immediate(ElnaTacRegister.t0, integer_token, integer_length) -end; - -proc _elna_parser_character_literal(); -var - character: Word; - character_length: Word; - result: Word; - literal_size: Word; -begin - literal_size := _character_literal_node_size(); - result := _allocate(literal_size); - - character := _elna_lexer_global_get_start(); - character_length := _elna_lexer_global_get_end(); - character_length := character_length - character; - _elna_lexer_skip_token(); - - _node_set_kind(result, NodeKind.character_literal); - _integer_literal_node_set_value(result, character); - _integer_literal_node_set_length(result, character_length); - - return result -end; - -proc _elna_tac_character_literal(character_literal_node: Word); -var - character: Word; - character_length: Word; -begin - character := _character_literal_node_get_value(character_literal_node); - character_length := _character_literal_node_get_length(character_literal_node); - - return _elna_tac_load_immediate(ElnaTacRegister.t0, character, character_length) -end; - -proc _allocate(size: Word); -var - result: Word; -begin - result := memory_free_pointer; - memory_free_pointer := memory_free_pointer + size; - return result -end; - -proc _elna_parser_variable_expression(); -var - name: Word; - name_token: Word; - result: Word; - memory_size: Word; -begin - name := _elna_lexer_global_get_start(); - name_token := _elna_lexer_global_get_end(); - name_token := name_token - name; - _elna_lexer_skip_token(); - - memory_size := _variable_expression_size(); - result := _allocate(memory_size); - - _node_set_kind(result, NodeKind.variable_expression); - _variable_expression_set_name(result, name); - _variable_expression_set_length(result, name_token); - - return result -end; - -proc _elna_tac_variable_expression(variable_expression: Word, symbol_table: Word); -var - name: Word; - name_token: Word; - lookup_result: Word; - instruction: Word; -begin - name := _variable_expression_get_name(variable_expression); - name_token := _variable_expression_get_length(variable_expression); - - lookup_result := _symbol_table_lookup(symbol_table, name, name_token); - if lookup_result <> 0 then - instruction := _elna_tac_local_designator(lookup_result) - else - instruction := _elna_tac_global_designator(variable_expression) - end; - return instruction -end; - -proc _elna_parser_string_literal(); -var - length: Word; - token_start: Word; - result: Word; - memory_size: Word; -begin - memory_size := _string_literal_node_size(); - result := _allocate(memory_size); - - token_start := _elna_lexer_global_get_start(); - length := _string_length(token_start); - _elna_lexer_skip_token(); - - _node_set_kind(result, NodeKind.string_literal); - _string_literal_node_set_value(result, token_start); - _string_literal_node_set_length(result, length); - - return result -end; - -proc _elna_tac_string_literal(string_literal_node: Word); -var - token_start: Word; - length: Word; - offset: Word; - instruction: Word; - first_instruction: Word; - next_instruction: Word; -begin - token_start := _string_literal_node_get_value(string_literal_node); - length := _string_literal_node_get_length(string_literal_node); - offset := _add_string(token_start); - - first_instruction := _elna_tac_load_address(ElnaTacRegister.t0, "strings", 7); - instruction := _elna_tac_load_immediate(ElnaTacRegister.t1, offset, 0); - _elna_tac_instruction_set_next(first_instruction, instruction); - next_instruction := _elna_tac_add(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(instruction, next_instruction); - - return first_instruction -end; - -proc _elna_parser_simple_expression(); -var - current_character: Word; - parser_node: Word; - token_kind: Word; -begin - parser_node := 0; - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.character then - parser_node := _elna_parser_character_literal() - elsif token_kind = ElnaLexerKind.integer then - parser_node := _elna_parser_integer_literal() - elsif token_kind = ElnaLexerKind.string then - parser_node := _elna_parser_string_literal() - elsif token_kind = ElnaLexerKind.identifier then - parser_node := _elna_parser_variable_expression() - end; - return parser_node -end; - -proc _elna_parser_dereference_expression(simple_expression: Word); -var - result: Word; - memory_size: Word; -begin - memory_size := _dereference_expression_size(); - result := _allocate(memory_size); - - _node_set_kind(result, NodeKind.dereference_expression); - _dereference_expression_set_pointer(result, simple_expression); - _elna_lexer_skip_token(); - - return result -end; - -proc _elna_parser_designator(); -var - simple_expression: Word; - token_kind: Word; -begin - simple_expression := _elna_parser_simple_expression(); - - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.hat then - simple_expression := _elna_parser_dereference_expression(simple_expression) - elsif token_kind = ElnaLexerKind.dot then - simple_expression := _elna_parser_field_access_expression(simple_expression) - elsif token_kind = ElnaLexerKind.left_paren then - simple_expression := _elna_parser_call(simple_expression) - end; - return simple_expression -end; - -proc _elna_tac_simple_expression(parser_node: Word, symbol_table: Word, is_address: Word); -var - is_address: Word; - node_kind: Word; - instruction: Word; -begin - is_address^ := 0; - node_kind := _node_get_kind(parser_node); - - if node_kind = NodeKind.character_literal then - instruction := _elna_tac_character_literal(parser_node) - elsif node_kind = NodeKind.string_literal then - instruction := _elna_tac_string_literal(parser_node) - elsif node_kind = NodeKind.integer_literal then - instruction := _elna_tac_integer_literal(parser_node) - else - instruction := _elna_tac_variable_expression(parser_node, symbol_table); - is_address^ := 1 - end; - return instruction -end; - -proc _elna_parser_unary_expression(); -var - token_kind: Word; - result: Word; - memory_size: Word; - operand: Word; - operator: Word; -begin - _elna_lexer_read_token(@token_kind); - operator := 0; - - if token_kind = ElnaLexerKind.at then - operator := '@' - elsif token_kind = ElnaLexerKind.minus then - operator := '-' - elsif token_kind = ElnaLexerKind.not then - operator := '~' - end; - if operator <> 0 then - _elna_lexer_skip_token() - end; - result := _elna_parser_designator(); - - if operator <> 0 then - operand := result; - memory_size := _unary_expression_size(); - result := _allocate(memory_size); - - _node_set_kind(result, NodeKind.unary_expression); - _unary_expression_set_operand(result, operand); - _unary_expression_set_operator(result, operator) - end; - - return result -end; - -proc _elna_tac_unary_expression(parser_node: Word, symbol_table: Word); -var - current_character: Word; - token_kind: Word; - expression_kind: Word; - operator: Word; - operand: Word; - is_address: Word; - first_instruction: Word; - instruction: Word; -begin - operator := 0; - operand := 0; - - expression_kind := _node_get_kind(parser_node); - - if expression_kind = NodeKind.unary_expression then - operator := _unary_expression_get_operator(parser_node); - operand := _unary_expression_get_operand(parser_node) - else - operand := parser_node - end; - - if operator = '@' then - first_instruction := _elna_tac_designator(operand, symbol_table, @is_address) - else - first_instruction := _elna_tac_designator(operand, symbol_table, @is_address); - if is_address then - instruction := _elna_tac_load_word(ElnaTacRegister.t0, ElnaTacRegister.t0, 0); - _elna_tac_instruction_set_next(first_instruction, instruction) - end - end; - if operator = '-' then - instruction := _elna_tac_neg(ElnaTacRegister.t0, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(first_instruction, instruction) - elsif operator = '~' then - instruction := _elna_tac_not(ElnaTacRegister.t0, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(first_instruction, instruction) - end; - return first_instruction -end; - -proc _elna_parser_binary_expression(); -var - lhs_node: Word; - rhs_node: Word; - token_kind: Word; - memory_size: Word; - result: Word; -begin - lhs_node := _elna_parser_unary_expression(); - rhs_node := 0; - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.plus then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.minus then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.multiplication then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.and then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind._or then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind._xor then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.equals then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.remainder then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.division then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.less_than then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.greater_than then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.less_equal then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.not_equal then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - elsif token_kind = ElnaLexerKind.greater_equal then - _elna_lexer_skip_token(); - rhs_node := _elna_parser_unary_expression() - end; - if rhs_node <> 0 then - memory_size := _binary_expression_size(); - result := _allocate(memory_size); - - _node_set_kind(result, NodeKind.binary_expression); - _binary_expression_set_lhs(result, lhs_node); - _binary_expression_set_rhs(result, rhs_node); - _binary_expression_set_operator(result, token_kind) - else - result := lhs_node - end; - return result -end; - -proc _elna_tac_binary_expression(parser_node: Word, symbol_table: Word); -var - token_kind: Word; - expression_kind: Word; - operand_node: Word; - first_instruction: Word; - instruction: Word; - current_instruction: Word; -begin - expression_kind := _node_get_kind(parser_node); - - if expression_kind <> NodeKind.binary_expression then - first_instruction := _elna_tac_unary_expression(parser_node, symbol_table) - else - token_kind := _binary_expression_get_operator(parser_node); - - operand_node := _binary_expression_get_lhs(parser_node); - first_instruction := _elna_tac_unary_expression(operand_node, symbol_table); - - (* Save the value of the left expression on the stack. *) - instruction := _elna_tac_store_word(ElnaTacRegister.t0, ElnaTacRegister.sp, 64); - _elna_tac_instruction_set_next(first_instruction, instruction); - current_instruction := instruction; - - operand_node := _binary_expression_get_rhs(parser_node); - instruction := _elna_tac_unary_expression(operand_node, symbol_table); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - - (* Load the left expression from the stack; *) - instruction := _elna_tac_load_word(ElnaTacRegister.t1, ElnaTacRegister.sp, 64); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - - if token_kind = ElnaLexerKind.plus then - instruction := _elna_tac_add(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.minus then - instruction := _elna_tac_sub(ElnaTacRegister.t0, ElnaTacRegister.t1, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.multiplication then - instruction := _elna_tac_mul(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.and then - instruction := _elna_tac_and(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind._or then - instruction := _elna_tac_or(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind._xor then - instruction := _elna_tac_xor(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.equals then - instruction := _elna_tac_xor(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - - instruction := _elna_tac_seqz(ElnaTacRegister.t0, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.remainder then - instruction := _elna_tac_rem(ElnaTacRegister.t0, ElnaTacRegister.t1, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.division then - instruction := _elna_tac_div(ElnaTacRegister.t0, ElnaTacRegister.t1, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.less_than then - instruction := _elna_tac_slt(ElnaTacRegister.t0, ElnaTacRegister.t1, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.greater_than then - instruction := _elna_tac_slt(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.less_equal then - instruction := _elna_tac_slt(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - - instruction := _elna_tac_xor_immediate(ElnaTacRegister.t0, ElnaTacRegister.t0, 1); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.not_equal then - instruction := _elna_tac_xor(ElnaTacRegister.t0, ElnaTacRegister.t0, ElnaTacRegister.t1); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - - instruction := _elna_tac_snez(ElnaTacRegister.t0, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(current_instruction, instruction) - elsif token_kind = ElnaLexerKind.greater_equal then - instruction := _elna_tac_slt(ElnaTacRegister.t0, ElnaTacRegister.t1, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - - instruction := _elna_tac_xor_immediate(ElnaTacRegister.t0, ElnaTacRegister.t0, 1); - _elna_tac_instruction_set_next(current_instruction, instruction) - end - end; - return first_instruction -end; - -(* 4 bytes node kind + 4 byte pointer to variable expression + 4 * 7 for arguments. *) -proc _call_size(); - return 44 -end; - -proc _call_get_name(this: Word); -begin - this := this + 8; - return this^ -end; - -proc _call_set_name(this: Word, value: Word); -begin - this := this + 8; - this^ := value -end; - -proc _call_get_argument(this: Word, n: Word); -begin - n := n * 4; - this := this + 8; - this := this + n; - return this^ -end; - -proc _call_set_argument(this: Word, n: Word, value: Word); -begin - n := n * 4; - this := this + 8; - this := this + n; - this^ := value -end; - -proc _elna_parser_call(callee: Word); -var - parsed_expression: Word; - result: Word; - argument_number: Word; - token_kind: Word; - call_size: Word; -begin - call_size := _call_size(); - result := _allocate(call_size); - _node_set_kind(result, NodeKind.call); - _statement_set_next(result, 0); - - argument_number := 1; - _call_set_name(result, callee); - - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.right_paren then - _elna_lexer_skip_token(); - goto elna_parser_call_end - end; - - .elna_parser_call_loop; - parsed_expression := _elna_parser_binary_expression(); - _call_set_argument(result, argument_number, parsed_expression); - argument_number := argument_number + 1; - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - if token_kind = ElnaLexerKind.comma then - goto elna_parser_call_loop - end; - - .elna_parser_call_end; - (* Set the trailing argument to nil. *) - _call_set_argument(result, argument_number, 0); - - return result -end; - -proc _elna_tac_call(parsed_call: Word, symbol_table: Word); -var - name_length: Word; - name: Word; - argument_count: Word; - stack_offset: Word; - parsed_expression: Word; - instruction: Word; - first_instruction: Word; - current_instruction: Word; -begin - parsed_expression := _call_get_name(parsed_call); - name := _variable_expression_get_name(parsed_expression); - name_length := _variable_expression_get_length(parsed_expression); - argument_count := 0; - first_instruction := 0; - - .elna_tac_call_loop; - - parsed_expression := _call_get_argument(parsed_call, argument_count + 1); - if parsed_expression = 0 then - goto elna_tac_call_finalize - else - instruction := _elna_tac_binary_expression(parsed_expression, symbol_table); - if first_instruction = 0 then - first_instruction := instruction - else - _elna_tac_instruction_set_next(current_instruction, instruction) - end; - current_instruction := instruction; - - (* Save the argument on the stack. *) - stack_offset := argument_count * 4; - - instruction := _elna_tac_store_word(ElnaTacRegister.t0, - ElnaTacRegister.sp, 116 - stack_offset); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - - argument_count := argument_count + 1; - goto elna_tac_call_loop - end; - .elna_tac_call_finalize; - - (* Load the argument from the stack. *) - if argument_count <> 0 then - (* Decrement the argument counter. *) - argument_count := argument_count - 1; - stack_offset := argument_count * 4; - - (* Calculate the stack offset: 116 - (4 * argument_counter) *) - instruction := _elna_tac_load_word(ElnaTacRegister.a0 + argument_count, - ElnaTacRegister.sp, 116 - stack_offset); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - - goto elna_tac_call_finalize - end; - instruction := _elna_tac_jal(name, name_length); - if first_instruction = 0 then - first_instruction := instruction - else - _elna_tac_instruction_set_next(current_instruction, instruction) - end; - return first_instruction -end; - -proc _elna_parser_goto_statement(); -var - token_kind: Word; - label_name: Word; - label_length: Word; - statement_size: Word; - result: Word; -begin - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - - label_name := _elna_lexer_global_get_start(); - label_length := _elna_lexer_global_get_end() - label_name; - _elna_lexer_skip_token(); - - statement_size := _goto_statement_size(); - result := _allocate(statement_size); - - _node_set_kind(result, NodeKind.goto_statement); - _statement_set_next(result, 0); - _goto_statement_set_label(result, label_name); - _goto_statement_set_length(result, label_length); - - return result -end; - -proc _elna_tac_goto_statement(parser_node: Word); -var - label_name: Word; - label_length: Word; - label_with_dot: Word; - instruction: Word; -begin - label_name := _goto_statement_get_label(parser_node); - label_length := _goto_statement_get_length(parser_node); - label_with_dot := _allocate(label_length + 1); - - _store_byte('.', label_with_dot); - _memcpy(label_with_dot + 1, label_name, label_length); - - return _elna_tac_jump(label_with_dot, label_length + 1) -end; - -proc _elna_parser_label_declaration(); -var - token_kind: Word; - label_name: Word; - label_length: Word; - statement_size: Word; - result: Word; -begin - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - - label_name := _elna_lexer_global_get_start(); - label_length := _elna_lexer_global_get_end() - label_name; - _elna_lexer_skip_token(); - - statement_size := _label_declaration_size(); - result := _allocate(statement_size); - - _node_set_kind(result, NodeKind.label_declaration); - _statement_set_next(result, 0); - _goto_statement_set_label(result, label_name); - _goto_statement_set_length(result, label_length); - - return result -end; - -proc _elna_tac_label_declaration(parser_node: Word); -var - label_name: Word; - label_length: Word; -begin - label_name := _label_declaration_get_label(parser_node); - label_length := _label_declaration_get_length(parser_node); - - return _elna_tac_label(label_name, label_length) -end; - -proc _elna_tac_local_designator(symbol: Word); -var - variable_offset: Word; -begin - variable_offset := _parameter_info_get_offset(symbol); - - return _elna_tac_add_immediate(ElnaTacRegister.t0, ElnaTacRegister.sp, variable_offset) -end; - -proc _elna_tac_global_designator(variable_expression: Word); -var - name: Word; - token_length: Word; -begin - name := _variable_expression_get_name(variable_expression); - token_length := _variable_expression_get_length(variable_expression); - - return _elna_tac_load_address(ElnaTacRegister.t0, name, token_length) -end; - -proc _elna_tac_enumeration_value(field_access_expression: Word); -var - enumeration_type: Word; - members: Word; - members_length: Word; - token_type: Word; - value_name: Word; - name_length: Word; - member_name: Word; - member_length: Word; - counter: Word; - symbol: Word; - instruction: Word; -begin - symbol := _field_access_expression_get_aggregate(field_access_expression); - value_name := _variable_expression_get_name(symbol); - name_length := _variable_expression_get_length(symbol); - - symbol := _symbol_table_lookup(@symbol_table_global, value_name, name_length); - - enumeration_type := _type_info_get_type(symbol); - members := _enumeration_type_get_members(enumeration_type); - members_length := _enumeration_type_get_length(enumeration_type); - - _elna_lexer_read_token(@token_type); - - value_name := _field_access_expression_get_field(field_access_expression); - name_length := _field_access_expression_get_length(field_access_expression); - counter := 1; - - instruction := 0; - .elna_tac_enumeration_value_members; - if members_length > 0 then - member_name := members^; - member_length := members + 4; - member_length := member_length^; - - if _string_compare(value_name, name_length, member_name, member_length) = 0 then - members_length := members_length - 1; - members := members + 8; - counter := counter + 1; - goto elna_tac_enumeration_value_members - end; - instruction := _elna_tac_load_immediate(ElnaTacRegister.t0, counter, 0) - end; - return instruction -end; - -proc _elna_parser_field_access_expression(aggregate: Word); -var - token_kind: Word; - name: Word; - name_token: Word; - result: Word; - memory_size: Word; -begin - (* Skip dot. Read the enumeration value. *) - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - - name := _elna_lexer_global_get_start(); - name_token := _elna_lexer_global_get_end(); - name_token := name_token - name; - _elna_lexer_skip_token(); - memory_size := _field_access_expression_size(); - result := _allocate(memory_size); - - _node_set_kind(result, NodeKind.field_access_expression); - _field_access_expression_set_aggregate(result, aggregate); - _field_access_expression_set_field(result, name); - _field_access_expression_set_length(result, name_token); - - return result -end; - -proc _elna_tac_designator(parser_node: Word, symbol_table: Word, is_address: Word); -var - name_token: Word; - lookup_result: Word; - token_kind: Word; - parser_node: Word; - node_kind: Word; - first_instruction: Word; - instruction: Word; -begin - node_kind := _node_get_kind(parser_node); - - if node_kind = NodeKind.dereference_expression then - parser_node := _dereference_expression_get_pointer(parser_node); - first_instruction := _elna_tac_simple_expression(parser_node, symbol_table, is_address); - instruction := _elna_tac_load_word(ElnaTacRegister.t0, ElnaTacRegister.t0, 0); - _elna_tac_instruction_set_next(first_instruction, instruction) - elsif node_kind = NodeKind.field_access_expression then - first_instruction := _elna_tac_enumeration_value(parser_node); - is_address^ := 0 - elsif node_kind = NodeKind.call then - first_instruction := _elna_tac_call(parser_node, symbol_table); - instruction := _elna_tac_move(ElnaTacRegister.t0, ElnaTacRegister.a0); - _elna_tac_instruction_set_next(first_instruction, instruction); - is_address^ := 0 - else - first_instruction := _elna_tac_simple_expression(parser_node, symbol_table, is_address) - end; - return first_instruction -end; - -proc _elna_parser_assign_statement(assignee: Word); -var - statement_size: Word; - result: Word; - token_kind: Word; - assignment_node: Word; -begin - statement_size := _assign_statement_size(); - result := _allocate(statement_size); - - _node_set_kind(result, NodeKind.assign_statement); - _statement_set_next(result, 0); - _assign_statement_set_assignee(result, assignee); - - (* Skip the assignment sign (:=) with surrounding whitespaces. *) - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - assignment_node := _elna_parser_binary_expression(); - _assign_statement_set_assignment(result, assignment_node); - - return result -end; - -proc _elna_tac_assign_statement(parser_tree: Word, symbol_table: Word); -var - current_expression: Word; - is_address: Word; - first_instruction: Word; - instruction: Word; - current_instruction: Word; -begin - current_expression := _assign_statement_get_assignee(parser_tree); - first_instruction := _elna_tac_designator(current_expression, symbol_table, @is_address); - - (* Save the assignee address on the stack. *) - current_instruction := _elna_tac_store_word(ElnaTacRegister.t0, ElnaTacRegister.sp, 60); - _elna_tac_instruction_set_next(first_instruction, current_instruction); - - (* Compile the assignment. *) - current_expression := _assign_statement_get_assignment(parser_tree); - instruction := _elna_tac_binary_expression(current_expression, symbol_table); - _elna_tac_instruction_set_next(current_instruction, instruction); - - current_instruction := _elna_tac_load_word(ElnaTacRegister.t1, ElnaTacRegister.sp, 60); - _elna_tac_instruction_set_next(instruction, current_instruction); - - instruction := _elna_tac_store_word(ElnaTacRegister.t0, ElnaTacRegister.t1, 0); - _elna_tac_instruction_set_next(current_instruction, instruction); - - return first_instruction -end; - -proc _elna_parser_return_statement(); -var - token_kind: Word; - returned: Word; - label_length: Word; - statement_size: Word; - result: Word; -begin - (* Skip "return" keyword and whitespace after it. *) - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - - returned := _elna_parser_binary_expression(); - - statement_size := _return_statement_size(); - result := _allocate(statement_size); - - _node_set_kind(result, NodeKind.return_statement); - _statement_set_next(result, 0); - _return_statement_set_returned(result, returned); - - return result -end; - -proc _elna_tac_return_statement(parser_node: Word, symbol_table: Word); -var - return_expression: Word; - first_instruction: Word; - instruction: Word; -begin - return_expression := _return_statement_get_returned(parser_node); - first_instruction := _elna_tac_binary_expression(return_expression, symbol_table); - instruction := _elna_tac_move(ElnaTacRegister.a0, ElnaTacRegister.t0); - _elna_tac_instruction_set_next(first_instruction, instruction); - return first_instruction -end; - -(** - * Writes a label, .Ln, where n is a unique number. - * - * Parameters: - * counter - Label counter. - *) -proc _write_label(counter: Word, length: Word); -var - first_byte: Word; -begin - if length = 0 then - _write_s(".L", 2); - _write_i(counter) - else - first_byte := _load_byte(counter); - if first_byte <> '.' then - _write_c('.') - end; - _write_s(counter, length) - end -end; - -proc _elna_parser_conditional_statements(); -var - conditional_size: Word; - token_kind: Word; - current_node: Word; - result: Word; -begin - conditional_size := _conditional_statements_size(); - result := _allocate(conditional_size); - - (* Skip "if", "while" or "elsif". *) - _elna_lexer_skip_token(); - - current_node := _elna_parser_binary_expression(); - _conditional_statements_set_condition(result, current_node); - - (* Skip "then" or "do". *) - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - current_node := _elna_parser_statements(); - _conditional_statements_set_statements(result, current_node); - - _conditional_statements_set_next(result, 0); - return result -end; - -proc _elna_tac_conditional_statements(parser_node: Word, after_end_label: Word, symbol_table: Word); -var - condition_label: Word; - current_node: Word; - instruction: Word; - current_instruction: Word; - first_instruction: Word; -begin - (* Compile condition. *) - current_node := _conditional_statements_get_condition(parser_node); - first_instruction := _elna_tac_binary_expression(current_node, symbol_table); - - (* condition_label is the label in front of the next elsif condition or end. *) - condition_label := label_counter; - label_counter := label_counter + 1; - - current_instruction := _elna_tac_beqz(ElnaTacRegister.t0, condition_label, 0); - _elna_tac_instruction_set_next(first_instruction, current_instruction); - - current_node := _conditional_statements_get_statements(parser_node); - instruction := _elna_tac_statements(current_node, symbol_table); - if instruction <> 0 then - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction - end; - - instruction := _elna_tac_jump(after_end_label, 0); - _elna_tac_instruction_set_next(current_instruction, instruction); - - current_instruction := _elna_tac_label(condition_label, 0); - _elna_tac_instruction_set_next(instruction, current_instruction); - - return first_instruction -end; - -proc _elna_parser_if_statement(); -var - current_node: Word; - result: Word; - object_size: Word; - token_kind: Word; - previous_conditional: Word; - next_conditional: Word; -begin - object_size := _if_statement_size(); - result := _allocate(object_size); - - _node_set_kind(result, NodeKind.if_statement); - _statement_set_next(result, 0); - - previous_conditional := _elna_parser_conditional_statements(); - _if_statement_set_conditionals(result, previous_conditional); - - .elna_parser_if_statement_loop; - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind._elsif then - next_conditional := _elna_parser_conditional_statements(); - _conditional_statements_set_next(previous_conditional, next_conditional); - previous_conditional = next_conditional; - - goto elna_parser_if_statement_loop - elsif token_kind = ElnaLexerKind._else then - _elna_lexer_skip_token(); - - current_node := _elna_parser_statements(); - _if_statement_set__else(result, current_node) - else - _if_statement_set__else(result, 0) - end; - _elna_lexer_skip_token(); - - return result -end; - -proc _elna_parser_statement(); -var - token_kind: Word; - result : Word; -begin - result := 0; - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind._goto then - result := _elna_parser_goto_statement() - elsif token_kind = ElnaLexerKind._if then - result := _elna_parser_if_statement() - elsif token_kind = ElnaLexerKind._return then - result := _elna_parser_return_statement() - elsif token_kind = ElnaLexerKind.dot then - result := _elna_parser_label_declaration() - elsif token_kind = ElnaLexerKind.identifier then - result := _elna_parser_designator(); - - if _node_get_kind(result) <> NodeKind.call then - result := _elna_parser_assign_statement(result) - end - end; - return result -end; - -proc _elna_parser_statements(); -var - token_kind: Word; - previous_statement: Word; - next_statement: Word; - first_statement: Word; -begin - _skip_empty_lines(); - - first_statement := _elna_parser_statement(); - previous_statement := first_statement; - if previous_statement = 0 then - goto elna_parser_statements_end - end; - - .elna_parser_statement_loop; - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.semicolon then - _elna_lexer_skip_token(); - _skip_empty_lines(); - next_statement := _elna_parser_statement(); - _statement_set_next(previous_statement, next_statement); - previous_statement := next_statement; - - if previous_statement <> 0 then - goto elna_parser_statement_loop - end - end; - .elna_parser_statements_end; - _skip_empty_lines(); - - return first_statement -end; - -proc _elna_tac_statements(parser_node: Word, symbol_table: Word); -var - current_statement: Word; - instruction: Word; - first_instruction: Word; - current_instruction: Word; -begin - current_statement := parser_node; - first_instruction := 0; - - .elna_tac_statements_loop; - if current_statement <> 0 then - instruction := _elna_tac_statement(current_statement, symbol_table); - current_statement := _statement_get_next(current_statement); - if instruction = 0 then - goto elna_tac_statements_loop - end; - if first_instruction = 0 then - first_instruction := instruction - else - _elna_tac_instruction_set_next(current_instruction, instruction) - end; - current_instruction := instruction; - goto elna_tac_statements_loop - end; - return first_instruction -end; - -proc _elna_tac_if_statement(parser_node: Word, symbol_table: Word); -var - current_node: Word; - after_end_label: Word; - condition_label: Word; - first_instruction: Word; - instruction: Word; - current_instruction: Word; -begin - after_end_label := label_counter; - label_counter := label_counter + 1; - - current_node := _if_statement_get_conditionals(parser_node); - first_instruction := _elna_tac_conditional_statements(current_node, after_end_label, symbol_table); - current_instruction := first_instruction; - - .elna_tac_if_statement_loop; - current_node := _conditional_statements_get_next(current_node); - if current_node <> 0 then - instruction := _elna_tac_conditional_statements(current_node, after_end_label, symbol_table); - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction; - goto elna_tac_if_statement_loop - end; - current_node := _if_statement_get__else(parser_node); - - if current_node <> 0 then - instruction := _elna_tac_statements(current_node, symbol_table); - if instruction <> 0 then - _elna_tac_instruction_set_next(current_instruction, instruction); - current_instruction := instruction - end - end; - instruction := _elna_tac_label(after_end_label, 0); - _elna_tac_instruction_set_next(current_instruction, instruction); - - return first_instruction -end; - -proc _elna_tac_statement(parser_node: Word, symbol_table: Word); -var - statement_kind: Word; - instruction: Word; -begin - statement_kind := _node_get_kind(parser_node); - - if statement_kind = NodeKind.goto_statement then - instruction := _elna_tac_goto_statement(parser_node) - elsif statement_kind = NodeKind.if_statement then - instruction := _elna_tac_if_statement(parser_node, symbol_table) - elsif statement_kind = NodeKind.return_statement then - instruction := _elna_tac_return_statement(parser_node, symbol_table) - elsif statement_kind = NodeKind.label_declaration then - instruction := _elna_tac_label_declaration(parser_node) - elsif statement_kind = NodeKind.call then - instruction := _elna_tac_call(parser_node, symbol_table) - elsif statement_kind = NodeKind.assign_statement then - instruction := _elna_tac_assign_statement(parser_node, symbol_table) - else - instruction := 0 - end; - return instruction -end; - -(** - * Writes a regster name to the standard output. - * - * Parameters: - * register_character - Register character. - * register_number - Register number. - *) -proc _write_register(register_character: Word, register_number: Word); -begin - _write_c(register_character); - _write_c(register_number + '0') -end; - -proc _elna_parser_record_type_expression(); -var - entry: Word; - member_count: Word; - memory_start: Word; - field_name: Word; - field_length: Word; - field_type: Word; - token_kind: Word; - type_expression_size: Word; - result: Word; - previous_entry: Word; -begin - _elna_lexer_skip_token(); - member_count := 0; - memory_start := 0; - - _elna_lexer_read_token(@token_kind); - if token_kind = ElnaLexerKind._end then - goto elna_parser_record_type_expression_end - end; - .elna_parser_record_type_expression_loop; - entry := _allocate(16); - member_count := member_count + 1; - - field_name := _elna_lexer_global_get_start(); - field_length := _elna_lexer_global_get_end() - field_name; - - entry^ := field_name; - entry := entry + 4; - - entry^ := field_length; - entry := entry + 4; - - (* Skip the identifier. *) - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - field_type := _elna_parser_type_expression(); - - entry^ := field_type; - entry := entry + 4; - - entry^ := 0; - if memory_start = 0 then - memory_start := entry - 12 - else - previous_entry^ := entry - 12 - end; - previous_entry := entry; - - _elna_lexer_read_token(@token_kind); - if token_kind = ElnaLexerKind.semicolon then - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - goto elna_parser_record_type_expression_loop - end; - - .elna_parser_record_type_expression_end; - _elna_lexer_skip_token(); - - type_expression_size := _enumeration_type_expression_size(); - result := _allocate(type_expression_size); - - _node_set_kind(result, NodeKind.record_type_expression); - _record_type_expression_set_members(result, memory_start); - _record_type_expression_set_length(result, member_count); - - return result -end; - -proc _elna_parser_enumeration_type_expression(); -var - token_kind: Word; - enumeration_name: Word; - name_length: Word; - memory_start: Word; - member_count: Word; - result: Word; - type_expression_size: Word; - entry: Word; - previous_entry: Word; -begin - _elna_lexer_skip_token(); - memory_start := 0; - member_count := 0; - - _elna_lexer_read_token(@token_kind); - if token_kind = ElnaLexerKind.right_paren then - goto elna_parser_enumeration_type_expression_end - end; - .elna_parser_enumeration_type_expression_loop; - entry := _allocate(12); - member_count := member_count + 1; - - enumeration_name := _elna_lexer_global_get_start(); - name_length := _elna_lexer_global_get_end() - enumeration_name; - - entry^ := enumeration_name; - entry := entry + 4; - - entry^ := name_length; - entry := entry + 4; - - entry^ := 0; - if memory_start = 0 then - memory_start := entry - 8 - else - previous_entry^ := entry - 8 - end; - previous_entry := entry; - - (* Skip the identifier. *) - _elna_lexer_skip_token(); - - _elna_lexer_read_token(@token_kind); - if token_kind = ElnaLexerKind.comma then - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - goto elna_parser_enumeration_type_expression_loop - end; - - .elna_parser_enumeration_type_expression_end; - _elna_lexer_skip_token(); - - type_expression_size := _enumeration_type_expression_size(); - result := _allocate(type_expression_size); - - _node_set_kind(result, NodeKind.enumeration_type_expression); - _enumeration_type_expression_set_members(result, memory_start); - _enumeration_type_expression_set_length(result, member_count); - - return result -end; - -(** - * Reads and creates enumeration type representation. - * - * record - * type_kind: Word; - * size: Word; - * members: StringArray; - * length: Word - * end; - * - * Returns enumeration type description. - *) -proc _elna_name_type_enumeration(parser_node: Word); -var - result: Word; - memory_start: Word; - member_count: Word; - member_array_size: Word; - member_array_start: Word; - member_array_current: Word; -begin - member_array_size := _enumeration_type_size(); - result := _allocate(member_array_size); - - memory_start := _enumeration_type_expression_get_members(parser_node); - member_count := _enumeration_type_expression_get_length(parser_node); - - (* Copy the list of enumeration members into an array of strings. *) - member_array_size := member_count * 8; - member_array_start := _allocate(member_array_size); - member_array_current := member_array_start; - - .elna_name_type_enumeration_loop; - if member_count > 0 then - member_array_current^ := memory_start^; - member_array_current := member_array_current + 4; - memory_start := memory_start + 4; - - member_array_current^ := memory_start^; - member_array_current := member_array_current + 4; - memory_start := memory_start + 4; - - memory_start := memory_start^; - member_count := member_count - 1; - goto elna_name_type_enumeration_loop - end; - member_count := _enumeration_type_expression_get_length(parser_node); - - _type_set_kind(result, TypeKind.enumeration); - _type_set_size(result, 4); - _enumeration_type_set_members(result, member_array_start); - _enumeration_type_set_length(result, member_count); - - return _type_info_create(result) -end; - -proc _elna_name_type_record(parser_node: Word); -var - result: Word; - memory_start: Word; - member_count: Word; - member_array_size: Word; - member_array_start: Word; - member_array_current: Word; -begin - member_array_size := _record_type_size(); - result := _allocate(member_array_size); - - memory_start := _record_type_expression_get_members(parser_node); - member_count := _record_type_expression_get_length(parser_node); - - member_array_size := member_count * 12; - member_array_start := _allocate(member_array_size); - member_array_current := member_array_start; - - .elna_name_type_record_loop; - if member_count > 0 then - member_array_current^ := memory_start^; - member_array_current := member_array_current + 4; - memory_start := memory_start + 4; - - member_array_current^ := memory_start^; - member_array_current := member_array_current + 4; - memory_start := memory_start + 4; - - member_array_current^ := _elna_name_type_expression(memory_start^); - member_array_current := member_array_current + 4; - memory_start := memory_start + 4; - - memory_start := memory_start^; - member_count := member_count - 1; - goto elna_name_type_record_loop - end; - member_count := _record_type_expression_get_length(parser_node); - - _type_set_kind(result, TypeKind._record); - _type_set_size(result, member_count * 4); - _record_type_set_members(result, member_array_start); - _record_type_set_length(result, member_count); - - return _type_info_create(result) -end; - -proc _elna_parser_named_type_expression(); -var - type_expression_size: Word; - result: Word; - type_name: Word; - name_length: Word; -begin - type_expression_size := _named_type_expression_size(); - result := _allocate(type_expression_size); - - _node_set_kind(result, NodeKind.named_type_expression); - type_name := _elna_lexer_global_get_start(); - name_length := _elna_lexer_global_get_end() - type_name; - _named_type_expression_set_name(result, type_name); - _named_type_expression_set_length(result, name_length); - _elna_lexer_skip_token(); - - return result -end; - -proc _elna_parser_type_expression(); -var - token_kind: Word; - result: Word; -begin - result := 0; - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.identifier then - result := _elna_parser_named_type_expression() - elsif token_kind = ElnaLexerKind.left_paren then - result := _elna_parser_enumeration_type_expression() - elsif token_kind = ElnaLexerKind._record then - result := _elna_parser_record_type_expression() - end; - return result -end; - -proc _elna_name_type_expression(parser_node: Word); -var - token_kind: Word; - type_name: Word; - name_length: Word; - result: Word; -begin - token_kind := _node_get_kind(parser_node); - - if token_kind = NodeKind.named_type_expression then - type_name := _named_type_expression_get_name(parser_node); - name_length := _named_type_expression_get_length(parser_node); - - result := _symbol_table_lookup(@symbol_table_global, type_name, name_length); - result := _type_info_get_type(result) - elsif token_kind = NodeKind.enumeration_type_expression then - result := _elna_name_type_enumeration(parser_node) - elsif token_kind = NodeKind.record_type_expression then - result := _elna_name_type_record(parser_node) - end; - - return result -end; - -proc _type_info_get_type(this: Word); -begin - this := this + 4; - return this^ -end; - -(** - * Parameters: - * parameter_index - Parameter index. - *) -proc _parameter_info_create(parameter_index: Word); -var - offset: Word; - current_word: Word; - result: Word; -begin - result := _allocate(8); - current_word := result; - current_word^ := InfoKind.parameter_info; - - current_word := current_word + 4; - - (* Calculate the stack offset: 88 - (4 * parameter_counter) *) - offset := parameter_index * 4; - current_word^ := 88 - offset; - - return result -end; - -proc _parameter_info_get_offset(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _type_info_create(type_representation: Word); -var - result: Word; - current_word: Word; -begin - result := _allocate(8); - current_word := result; - current_word^ := InfoKind.type_info; - - current_word := current_word + 4; - current_word^ := type_representation; - - return result -end; - -(** - * Parameters: - * temporary_index - Parameter index. - *) -proc _temporary_info_create(temporary_index: Word); -var - offset: Word; - current_word: Word; - result: Word; -begin - result := _allocate(8); - current_word := result; - current_word^ := InfoKind.temporary_info; - current_word := current_word + 4; - - (* Calculate the stack offset: 4 * variable_counter. *) - current_word^ := temporary_index * 4; - - return result -end; - -(** - * Parameters: - * symbol_table - Local symbol table. - *) -proc _procedure_info_create(symbol_table: Word); -var - current_word: Word; - result: Word; -begin - result := _allocate(8); - current_word := result; - current_word^ := InfoKind.procedure_info; - current_word := current_word + 4; - - current_word^ := symbol_table; - - return result -end; - -proc _procedure_info_get_symbol_table(this: Word); -begin - this := this + 4; - return this^ -end; - -(** - * Parameters: - * parameter_index - Parameter index. - *) -proc _elna_name_procedure_parameter(parser_node: Word, parameter_index: Word, symbol_table: Word); -var - name_length: Word; - info: Word; - name_position: Word; -begin - name_position := _declaration_get_name(parser_node); - name_length := _declaration_get_length(parser_node); - - info := _parameter_info_create(parameter_index); - _symbol_table_enter(symbol_table, name_position, name_length, info) -end; - -(** - * Parameters: - * variable_index - Variable index. - *) -proc _elna_name_procedure_temporary(parser_node: Word, variable_index: Word, symbol_table: Word); -var - name_length: Word; - info: Word; - name_position: Word; -begin - name_position := _declaration_get_name(parser_node); - name_length := _declaration_get_length(parser_node); - - info := _temporary_info_create(variable_index); - _symbol_table_enter(symbol_table, name_position, name_length, info) -end; - -proc _elna_name_procedure_temporaries(parser_node: Word, symbol_table: Word); -var - temporary_counter: Word; -begin - temporary_counter := 0; - - .elna_name_procedure_temporaries_loop; - if parser_node <> 0 then - _elna_name_procedure_temporary(parser_node, temporary_counter, symbol_table); - - temporary_counter := temporary_counter + 1; - parser_node := _declaration_get_next(parser_node); - goto elna_name_procedure_temporaries_loop - end -end; - -proc _elna_parser_procedure_declaration(); -var - name_pointer: Word; - name_length: Word; - token_kind: Word; - result: Word; - declaration_size: Word; - parameter_head: Word; -begin - declaration_size := _procedure_declaration_size(); - result := _allocate(declaration_size); - - _node_set_kind(result, NodeKind.procedure_declaration); - _declaration_set_next(result, 0); - - (* Skip "proc ". *) - _elna_lexer_skip_token(); - - _elna_lexer_read_token(@token_kind); - name_pointer := _elna_lexer_global_get_start(); - name_length := _elna_lexer_global_get_end() - name_pointer; - - _declaration_set_name(result, name_pointer); - _declaration_set_length(result, name_length); - (* Skip procedure name. *) - _elna_lexer_skip_token(); - - (* Skip open paren. *) - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - parameter_head := 0; - - .elna_parser_procedure_declaration_parameter; - _elna_lexer_read_token(@token_kind); - - if token_kind <> ElnaLexerKind.right_paren then - name_pointer := _elna_parser_variable_declaration(); - if parameter_head = 0 then - parameter_head := name_pointer - else - _declaration_set_next(name_length, name_pointer) - end; - name_length := name_pointer; - - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.comma then - _elna_lexer_skip_token(); - goto elna_parser_procedure_declaration_parameter - end - end; - (* Skip close paren. *) - _elna_lexer_skip_token(); - _procedure_declaration_set_parameters(result, parameter_head); - - (* Skip semicolon and newline. *) - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - parameter_head := _elna_parser_var_part(); - _procedure_declaration_set_temporaries(result, parameter_head); - - (* Skip semicolon, "begin" and newline. *) - _elna_lexer_read_token(@token_kind); - if token_kind = ElnaLexerKind._begin then - _elna_lexer_skip_token(); - parameter_head := _elna_parser_statements() - elsif token_kind = ElnaLexerKind._return then - parameter_head := _elna_parser_return_statement() - end; - _procedure_declaration_set_body(result, parameter_head); - - (* Skip the "end" keyword. *) - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - return result -end; - -proc _elna_tac_parameters(current_parameter: Word, new_symbol_table: Word); -var - name_pointer: Word; - name_length: Word; - parameter_counter: Word; - instruction: Word; - first_instruction: Word; - current_instruction: Word; - symbol_info: Word; -begin - first_instruction := 0; - parameter_counter := 0; - - .elna_tac_parameters_loop; - if current_parameter <> 0 then - name_pointer := _declaration_get_name(current_parameter); - name_length := _declaration_get_length(current_parameter); - symbol_info := _symbol_table_lookup(new_symbol_table, name_pointer, name_length); - - symbol_info := _parameter_info_get_offset(symbol_info); - - instruction := _elna_tac_store_word(ElnaTacRegister.a0 + parameter_counter, - ElnaTacRegister.sp, symbol_info); - if first_instruction = 0 then - first_instruction := instruction - else - _elna_tac_instruction_set_next(current_instruction, instruction) - end; - current_instruction := instruction; - - parameter_counter := parameter_counter + 1; - - current_parameter := _declaration_get_next(current_parameter); - goto elna_tac_parameters_loop - end; - return first_instruction -end; - -proc _elna_tac_procedure_declaration(parser_node: Word); -var - name_pointer: Word; - name_length: Word; - current_parameter: Word; - body: Word; - new_symbol_table: Word; - symbol_info: Word; - instruction: Word; - first_instruction: Word; - result: Word; - result_size: Word; -begin - result_size := _elna_tac_declaration_size(); - result := _allocate(result_size); - - _elna_tac_declaration_set_next(result, 0); - - name_pointer := _declaration_get_name(parser_node); - name_length := _declaration_get_length(parser_node); - - _elna_tac_declaration_set_name(result, name_pointer); - _elna_tac_declaration_set_length(result, name_length); - - symbol_info := _symbol_table_lookup(@symbol_table_global, name_pointer, name_length); - new_symbol_table := _procedure_info_get_symbol_table(symbol_info); - - (* Write the prologue. *) - first_instruction := _elna_tac_instruction_create(ElnaTacOperator.start); - - current_parameter := _procedure_declaration_get_parameters(parser_node); - current_parameter := _elna_tac_parameters(current_parameter, new_symbol_table); - _elna_tac_instruction_set_next(first_instruction, current_parameter); - - body := _procedure_declaration_get_body(parser_node); - instruction := _elna_tac_statements(body, new_symbol_table); - _elna_tac_instruction_set_next(first_instruction, instruction); - - (* Write the epilogue. *) - instruction := _elna_tac_instruction_create(ElnaTacOperator.ret); - _elna_tac_instruction_set_next(first_instruction, instruction); - - _elna_tac_declaration_set_body(result, first_instruction); - - return result -end; - -proc _elna_parser_procedures(); -var - parser_node: Word; - result: Word; - current_declaration: Word; - token_kind: Word; -begin - result := 0; - - .elna_parser_procedures_loop; - _skip_empty_lines(); - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind._proc then - parser_node := _elna_parser_procedure_declaration(); - if result = 0 then - result := parser_node - else - _declaration_set_next(current_declaration, parser_node) - end; - current_declaration := parser_node; - - (* Skip semicolon. *) - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - goto elna_parser_procedures_loop - end; - return result -end; - -proc _elna_tac_procedures(parser_node: Word); -var - result: Word; - current_procedure: Word; - first_procedure: Word; -begin - first_procedure := 0; - - .elna_tac_procedures_loop; - if parser_node = 0 then - goto elna_tac_procedures_end - end; - result := _elna_tac_procedure_declaration(parser_node); - if first_procedure = 0 then - first_procedure := result - else - _elna_tac_declaration_set_next(current_procedure, result) - end; - current_procedure := result; - - parser_node := _declaration_get_next(parser_node); - goto elna_tac_procedures_loop; - - .elna_tac_procedures_end; - return first_procedure -end; - -(** - * Skips comments. - *) -proc _skip_empty_lines(); -var - token_kind: Word; -begin - .skip_empty_lines_rerun; - - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.comment then - _elna_lexer_skip_token(); - goto skip_empty_lines_rerun - end -end; - -proc _elna_parser_type_declaration(); -var - token_kind: Word; - type_name: Word; - name_length: Word; - parser_node: Word; - result: Word; - declaration_size: Word; -begin - _elna_lexer_read_token(@token_kind); - type_name := _elna_lexer_global_get_start(); - name_length := _elna_lexer_global_get_end() - type_name; - - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - parser_node := _elna_parser_type_expression(); - declaration_size := _type_declaration_size(); - result := _allocate(declaration_size); - - _node_set_kind(result, NodeKind.type_declaration); - _declaration_set_next(result, 0); - _declaration_set_name(result, type_name); - _declaration_set_length(result, name_length); - _type_declaration_set__type(result, parser_node); - - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - return result -end; - -proc _elna_name_type_declaration(parser_node: Word); -var - type_name: Word; - name_length: Word; - type_info: Word; -begin - type_name := _declaration_get_name(parser_node); - name_length := _declaration_get_length(parser_node); - - parser_node := _type_declaration_get__type(parser_node); - type_info := _elna_name_type_expression(parser_node); - - _symbol_table_enter(@symbol_table_global, type_name, name_length, type_info) -end; - -proc _elna_type_type_declaration(parser_node: Word); -begin -end; - -proc _elna_parser_type_part(); -var - token_kind: Word; - parser_node: Word; - result: Word; - current_declaration: Word; -begin - result := 0; - _skip_empty_lines(); - _elna_lexer_read_token(@token_kind); - - if token_kind <> ElnaLexerKind._type then - goto elna_parser_type_part_end - end; - _elna_lexer_skip_token(); - - .elna_parser_type_part_loop; - _skip_empty_lines(); - - _elna_lexer_read_token(@token_kind); - if token_kind = ElnaLexerKind.identifier then - parser_node := _elna_parser_type_declaration(); - - if result = 0 then - result := parser_node - else - _declaration_set_next(current_declaration, parser_node) - end; - current_declaration := parser_node; - goto elna_parser_type_part_loop - end; - - .elna_parser_type_part_end; - return result -end; - -proc _elna_parser_variable_declaration(); -var - token_kind: Word; - name: Word; - name_length: Word; - variable_type: Word; - result: Word; - declaration_size: Word; -begin - _elna_lexer_read_token(@token_kind); - - name := _elna_lexer_global_get_start(); - name_length := _elna_lexer_global_get_end() - name; - - (* Skip the variable name and colon with the type. *) - _elna_lexer_skip_token(); - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - variable_type := _elna_parser_type_expression(); - - declaration_size := _variable_declaration_size(); - result := _allocate(declaration_size); - - _node_set_kind(result, NodeKind.variable_declaration); - _declaration_set_next(result, 0); - _declaration_set_name(result, name); - _declaration_set_length(result, name_length); - _variable_declaration_set__type(result, variable_type); - - return result -end; - -proc _elna_tac_variable_declaration(parser_tree: Word); -var - name: Word; - name_length: Word; - variable_type: Word; - result: Word; - result_size: Word; -begin - result_size := _elna_tac_declaration_size(); - result := _allocate(result_size); - - _elna_tac_declaration_set_next(result, 0); - - name := _declaration_get_name(parser_tree); - name_length := _declaration_get_length(parser_tree); - variable_type := _variable_declaration_get__type(parser_tree); - - _elna_tac_declaration_set_name(result, name); - _elna_tac_declaration_set_length(result, name_length); - - name := _named_type_expression_get_name(variable_type); - name_length := _named_type_expression_get_length(variable_type); - - if _string_compare("Array", 5, name, name_length) then - (* Else we assume this is a zeroed 4096 bytes big array. *) - _elna_tac_declaration_set_body(result, 4096) - else - _elna_tac_declaration_set_body(result, 4) - end; - return result -end; - -proc _elna_tac_type_field(name_pointer: Word, name_length: Word, field_pointer: Word, field_offset: Word); -var - result_size: Word; - first_result: Word; - second_result: Word; - new_name: Word; - new_length: Word; - field_length: Word; - instruction: Word; - name_target: Word; - next_instruction: Word; -begin - result_size := _elna_tac_declaration_size(); - field_length := field_pointer + 4; - field_length := field_length^; - new_length := field_length + name_length; - new_length := new_length + 5; - - first_result := _allocate(result_size); - _elna_tac_declaration_set_next(first_result, 0); - - new_name := _allocate(new_length); - - name_target := new_name; - _memcpy(name_target, name_pointer, name_length); - name_target := name_target + name_length; - _memcpy(name_target, "_get_", 5); - name_target := name_target + 5; - _memcpy(name_target, field_pointer^, field_length); - - _elna_tac_declaration_set_name(first_result, new_name); - _elna_tac_declaration_set_length(first_result, new_length); - - instruction := _elna_tac_add_immediate(ElnaTacRegister.a0, ElnaTacRegister.a0, field_offset, 0); - next_instruction := _elna_tac_load_word(ElnaTacRegister.a0, ElnaTacRegister.a0, 0); - _elna_tac_instruction_set_next(instruction, next_instruction); - _elna_tac_declaration_set_body(first_result, instruction); - - second_result := _allocate(result_size); - _elna_tac_declaration_set_next(second_result, 0); - - new_name := _allocate(new_length); - - name_target := new_name; - _memcpy(name_target, name_pointer, name_length); - name_target := name_target + name_length; - _memcpy(name_target, "_set_", 5); - name_target := name_target + 5; - _memcpy(name_target, field_pointer^, field_length); - - _elna_tac_declaration_set_name(second_result, new_name); - _elna_tac_declaration_set_length(second_result, new_length); - - instruction := _elna_tac_add_immediate(ElnaTacRegister.a0, ElnaTacRegister.a0, field_offset, 0); - next_instruction := _elna_tac_store_word(ElnaTacRegister.a1, ElnaTacRegister.a0, 0); - _elna_tac_instruction_set_next(instruction, next_instruction); - _elna_tac_declaration_set_body(second_result, instruction); - - _elna_tac_declaration_set_next(first_result, second_result); - - return first_result -end; - -proc _elna_tac_type_record(name_pointer: Word, name_length: Word, type_representation: Word, current_result: Word); -var - result_size: Word; - first_result: Word; - result: Word; - type_size: Word; - new_name: Word; - new_length: Word; - instruction: Word; - field_count: Word; - field_offset: Word; - field_pointer: Word; -begin - result_size := _elna_tac_declaration_size(); - first_result := _allocate(result_size); - result := 0; - - (* Debug. Error stream output. - _syscall(2, name_pointer, name_length, 0, 0, 0, 64); *) - - type_size := _type_get_size(type_representation); - new_length := name_length + 5; - new_name := _allocate(new_length); - - _memcpy(new_name, name_pointer, name_length); - _memcpy(new_name + name_length, "_size", 5); - - _elna_tac_declaration_set_name(first_result, new_name); - _elna_tac_declaration_set_length(first_result, new_length); - - instruction := _elna_tac_load_immediate(ElnaTacRegister.a0, type_size, 0); - _elna_tac_declaration_set_body(first_result, instruction); - - field_count := _record_type_get_length(type_representation); - field_pointer := _record_type_get_members(type_representation); - field_offset := 0; - current_result^ := first_result; - - .elna_tac_type_record_fields; - if field_count > 0 then - result := _elna_tac_type_field(name_pointer, name_length, field_pointer, field_offset); - - _elna_tac_declaration_set_next(current_result^, result); - current_result^ := _elna_tac_declaration_get_next(result); - - field_offset := field_offset + 4; - field_count := field_count - 1; - field_pointer := field_pointer + 12; - goto elna_tac_type_record_fields - end; - - return first_result -end; - -proc _elna_tac_type_part(parser_node: Word); -var - name_pointer: Word; - name_length: Word; - result: Word; - first_result: Word; - symbol: Word; - info_type: Word; - type_kind: Word; - current_result: Word; - out_result: Word; -begin - first_result := 0; - - .elna_tac_type_part_loop; - if parser_node = 0 then - goto elna_tac_type_part_end - end; - - name_pointer := _declaration_get_name(parser_node); - name_length := _declaration_get_length(parser_node); - symbol := _symbol_table_lookup(@symbol_table_global, name_pointer, name_length); - - info_type := _type_info_get_type(symbol); - type_kind := _type_get_kind(info_type); - - if type_kind = TypeKind._record then - result := _elna_tac_type_record(name_pointer, name_length, info_type, @out_result) - else - result := 0; - out_result := 0 - end; - if first_result = 0 then - first_result := result; - current_result := out_result - elsif result <> 0 then - _elna_tac_declaration_set_next(current_result, result); - current_result := out_result - end; - parser_node := _declaration_get_next(parser_node); - goto elna_tac_type_part_loop; - - .elna_tac_type_part_end; - return first_result -end; - -proc _elna_parser_var_part(); -var - result: Word; - token_kind: Word; - variable_node: Word; - current_declaration: Word; -begin - result := 0; - _elna_lexer_read_token(@token_kind); - - if token_kind <> ElnaLexerKind._var then - goto elna_parser_var_part_end - end; - (* Skip "var". *) - _elna_lexer_skip_token(); - - .elna_parser_var_part_loop; - _skip_empty_lines(); - _elna_lexer_read_token(@token_kind); - - if token_kind = ElnaLexerKind.identifier then - variable_node := _elna_parser_variable_declaration(); - - (* Skip semicolon. *) - _elna_lexer_read_token(@token_kind); - _elna_lexer_skip_token(); - - if result = 0 then - result := variable_node - else - _declaration_set_next(current_declaration, variable_node) - end; - current_declaration := variable_node; - goto elna_parser_var_part_loop - end; - - .elna_parser_var_part_end; - return result -end; - -proc _elna_tac_var_part(parser_node: Word); -var - node: Word; - current_variable: Word; - first_variable: Word; -begin - first_variable := 0; - if parser_node = 0 then - goto elna_tac_var_part_end - end; - - .elna_tac_var_part_loop; - node := _elna_tac_variable_declaration(parser_node); - if first_variable = 0 then - first_variable := node - else - _elna_tac_declaration_set_next(current_variable, node) - end; - current_variable := node; - - parser_node := _declaration_get_next(parser_node); - if parser_node <> 0 then - goto elna_tac_var_part_loop - end; - - .elna_tac_var_part_end; - return first_variable -end; - -proc _elna_parser_module_declaration(); -var - parser_node: Word; - declaration_size: Word; - result: Word; -begin - declaration_size := _module_declaration_size(); - result := _allocate(declaration_size); - - _node_set_kind(result, NodeKind.module_declaration); - - parser_node := _elna_parser_type_part(); - _module_declaration_set_types(result, parser_node); - - parser_node := _elna_parser_var_part(); - _module_declaration_set_globals(result, parser_node); - - parser_node := _elna_parser_procedures(); - _module_declaration_set_procedures(result, parser_node); - - return result -end; - -(** - * Process the source code and print the generated code. - *) -proc _elna_tac_module_declaration(parser_node: Word); -var - data_part: Word; - code_part: Word; - type_part: Word; - current_declaration: Word; - next_declaration: Word; -begin - type_part := _module_declaration_get_types(parser_node); - type_part := _elna_tac_type_part(type_part); - - data_part := _module_declaration_get_globals(parser_node); - data_part := _elna_tac_var_part(data_part); - - code_part := _module_declaration_get_procedures(parser_node); - code_part := _elna_tac_procedures(code_part); - - current_declaration := code_part; - - .elna_tac_module_declaration_types; - next_declaration := _elna_tac_declaration_get_next(current_declaration); - if next_declaration <> 0 then - current_declaration := next_declaration; - - goto elna_tac_module_declaration_types - end; - _elna_tac_declaration_set_next(current_declaration, type_part); - - return _elna_tac_module_create(data_part, code_part) -end; - -proc _elna_name_procedure_declaration(parser_node: Word); -var - name_pointer: Word; - name_length: Word; - new_symbol_table: Word; - parameter_counter: Word; - symbol_info: Word; - current_parameter: Word; -begin - new_symbol_table := _symbol_table_create(); - symbol_info := _procedure_info_create(new_symbol_table); - - name_pointer := _declaration_get_name(parser_node); - name_length := _declaration_get_length(parser_node); - - current_parameter := _procedure_declaration_get_parameters(parser_node); - parameter_counter := 0; - .elna_name_procedure_declaration_parameter; - if current_parameter <> 0 then - _elna_name_procedure_parameter(current_parameter, parameter_counter, new_symbol_table); - parameter_counter := parameter_counter + 1; - - current_parameter := _declaration_get_next(current_parameter); - goto elna_name_procedure_declaration_parameter - end; - current_parameter := _procedure_declaration_get_temporaries(parser_node); - _elna_name_procedure_temporaries(current_parameter, new_symbol_table); - - _symbol_table_enter(@symbol_table_global, name_pointer, name_length, symbol_info) -end; - -proc _elna_type_procedure_declaration(parser_node: Word); -begin -end; - -proc _elna_name_module_declaration(parser_node: Word); -var - current_part: Word; - result: Word; -begin - current_part := _module_declaration_get_types(parser_node); - .elna_name_module_declaration_type; - if current_part <> 0 then - _elna_name_type_declaration(current_part); - current_part := _declaration_get_next(current_part); - - goto elna_name_module_declaration_type - end; - - current_part := _module_declaration_get_procedures(parser_node); - .elna_name_module_declaration_procedure; - if current_part <> 0 then - _elna_name_procedure_declaration(current_part); - current_part := _declaration_get_next(current_part); - - goto elna_name_module_declaration_procedure - end -end; - -proc _elna_type_module_declaration(parser_node: Word); -var - current_part: Word; -begin - current_part := _module_declaration_get_types(parser_node); - .elna_type_module_declaration_type; - if current_part <> 0 then - _elna_type_type_declaration(current_part); - current_part := _declaration_get_next(current_part); - - goto elna_type_module_declaration_type - end; - - current_part := _module_declaration_get_procedures(parser_node); - .elna_type_module_declaration_procedure; - if current_part <> 0 then - _elna_type_procedure_declaration(current_part); - current_part := _declaration_get_next(current_part); - - goto elna_type_module_declaration_procedure - end -end; - -proc _compile(); -var - parser_node: Word; - tac: Word; -begin - parser_node := _elna_parser_module_declaration(); - _elna_name_module_declaration(parser_node); - _elna_type_module_declaration(parser_node); - tac := _elna_tac_module_declaration(parser_node); - _elna_writer_module(tac) -end; - -(** - * Terminates the program. a0 contains the return code. - * - * Parameters: - * a0 - Status code. - *) -proc _exit(status: Word); -begin - _syscall(status, 0, 0, 0, 0, 0, 93) -end; - -(** - * Looks for a symbol in the given symbol table. - * - * Parameters: - * symbol_table - Symbol table. - * symbol_name - Symbol name pointer. - * name_length - Symbol name length. - * - * Returns the symbol pointer or 0 in a0. - *) -proc _symbol_table_lookup(symbol_table: Word, symbol_name: Word, name_length: Word); -var - result: Word; - symbol_table_length: Word; - current_name: Word; - current_length: Word; -begin - result := 0; - - (* The first word in the symbol table is its length, get it. *) - symbol_table_length := symbol_table^; - - (* Go to the first symbol position. *) - symbol_table := symbol_table + 4; - - .symbol_table_lookup_loop; - if symbol_table_length = 0 then - goto symbol_table_lookup_end - end; - - (* Symbol name pointer and length. *) - current_name := symbol_table^; - current_length := symbol_table + 4; - current_length := current_length^; - - (* If lengths don't match, exit and return nil. *) - if name_length <> current_length then - goto symbol_table_lookup_repeat - end; - (* If names don't match, exit and return nil. *) - if _memcmp(symbol_name, current_name, name_length) then - goto symbol_table_lookup_repeat - end; - (* Otherwise, the symbol is found. *) - result := symbol_table + 8; - result := result^; - goto symbol_table_lookup_end; - - .symbol_table_lookup_repeat; - symbol_table := symbol_table + 12; - symbol_table_length := symbol_table_length - 1; - goto symbol_table_lookup_loop; - - .symbol_table_lookup_end; - return result -end; - -(** - * Create a new local symbol table in the symbol memory region after the last - * known symbol table. - *) -proc _symbol_table_create(); -var - new_symbol_table: Word; - table_length: Word; - current_table: Word; -begin - new_symbol_table := symbol_table_store; - - .symbol_table_create_loop; - table_length := new_symbol_table^; - - if table_length <> 0 then - table_length := table_length * 12; - table_length := table_length + 4; - new_symbol_table := new_symbol_table + table_length; - goto symbol_table_create_loop - end; - - return new_symbol_table -end; - -(** - * Inserts a symbol into the table. - * - * Parameters: - * symbol_table - Symbol table. - * symbol_name - Symbol name pointer. - * name_length - Symbol name length. - * symbol - Symbol pointer. - *) -proc _symbol_table_enter(symbol_table: Word, symbol_name: Word, name_length: Word, symbol: Word); -var - table_length: Word; - symbol_pointer: Word; -begin - (* The first word in the symbol table is its length, get it. *) - table_length := symbol_table^; - - (* Calculate the offset for the new symbol. *) - symbol_pointer := table_length * 12; - symbol_pointer := symbol_pointer + 4; - symbol_pointer := symbol_table + symbol_pointer; - - symbol_pointer^ := symbol_name; - symbol_pointer := symbol_pointer + 4; - symbol_pointer^ := name_length; - symbol_pointer := symbol_pointer + 4; - symbol_pointer^ := symbol; - - (* Increment the symbol table length. *) - table_length := table_length + 1; - symbol_table^ := table_length -end; - -proc _symbol_table_build(); -var - current_info: Word; - current_type: Word; -begin - (* Set the table length to 0. *) - symbol_table_global := 0; - - current_type := _allocate(8); - _type_set_kind(current_type, TypeKind.primitive); - _type_set_size(current_type, 4); - - (* Enter built-in symbols. *) - current_info := _type_info_create(current_type); - _symbol_table_enter(@symbol_table_global, "Word", 4, current_info); - - current_info := _type_info_create(current_type); - _symbol_table_enter(@symbol_table_global, "Array", 5, current_info) -end; - -(** - * Assigns some value to at array index. - * - * Parameters: - * array - Array pointer. - * index - Index (word offset into the array). - * data - Data to assign. - *) -proc _assign_at(array: Word, index: Word, data: Word); -var - target: Word; -begin - target := index - 1; - target := target * 4; - target := array + target; - - target^ := data -end; - -proc _get_at(array: Word, index: Word); -var - target: Word; -begin - target := index - 1; - target := target * 4; - target := array + target; - - return target^ -end; - -(** - * Initializes the array with character classes. - *) -proc _elna_lexer_classifications(); -var - code: Word; -begin - _assign_at(@classification, 1, ElnaLexerClass.eof); - _assign_at(@classification, 2, ElnaLexerClass.invalid); - _assign_at(@classification, 3, ElnaLexerClass.invalid); - _assign_at(@classification, 4, ElnaLexerClass.invalid); - _assign_at(@classification, 5, ElnaLexerClass.invalid); - _assign_at(@classification, 6, ElnaLexerClass.invalid); - _assign_at(@classification, 7, ElnaLexerClass.invalid); - _assign_at(@classification, 8, ElnaLexerClass.invalid); - _assign_at(@classification, 9, ElnaLexerClass.invalid); - _assign_at(@classification, 10, ElnaLexerClass.space); - _assign_at(@classification, 11, ElnaLexerClass.space); - _assign_at(@classification, 12, ElnaLexerClass.invalid); - _assign_at(@classification, 13, ElnaLexerClass.invalid); - _assign_at(@classification, 14, ElnaLexerClass.space); - _assign_at(@classification, 15, ElnaLexerClass.invalid); - _assign_at(@classification, 16, ElnaLexerClass.invalid); - _assign_at(@classification, 17, ElnaLexerClass.invalid); - _assign_at(@classification, 18, ElnaLexerClass.invalid); - _assign_at(@classification, 19, ElnaLexerClass.invalid); - _assign_at(@classification, 20, ElnaLexerClass.invalid); - _assign_at(@classification, 21, ElnaLexerClass.invalid); - _assign_at(@classification, 22, ElnaLexerClass.invalid); - _assign_at(@classification, 23, ElnaLexerClass.invalid); - _assign_at(@classification, 24, ElnaLexerClass.invalid); - _assign_at(@classification, 25, ElnaLexerClass.invalid); - _assign_at(@classification, 26, ElnaLexerClass.invalid); - _assign_at(@classification, 27, ElnaLexerClass.invalid); - _assign_at(@classification, 28, ElnaLexerClass.invalid); - _assign_at(@classification, 29, ElnaLexerClass.invalid); - _assign_at(@classification, 30, ElnaLexerClass.invalid); - _assign_at(@classification, 31, ElnaLexerClass.invalid); - _assign_at(@classification, 32, ElnaLexerClass.invalid); - _assign_at(@classification, 33, ElnaLexerClass.space); - _assign_at(@classification, 34, ElnaLexerClass.single); - _assign_at(@classification, 35, ElnaLexerClass.double_quote); - _assign_at(@classification, 36, ElnaLexerClass.other); - _assign_at(@classification, 37, ElnaLexerClass.other); - _assign_at(@classification, 38, ElnaLexerClass.single); - _assign_at(@classification, 39, ElnaLexerClass.single); - _assign_at(@classification, 40, ElnaLexerClass.single_quote); - _assign_at(@classification, 41, ElnaLexerClass.left_paren); - _assign_at(@classification, 42, ElnaLexerClass.right_paren); - _assign_at(@classification, 43, ElnaLexerClass.asterisk); - _assign_at(@classification, 44, ElnaLexerClass.single); - _assign_at(@classification, 45, ElnaLexerClass.single); - _assign_at(@classification, 46, ElnaLexerClass.minus); - _assign_at(@classification, 47, ElnaLexerClass.dot); - _assign_at(@classification, 48, ElnaLexerClass.single); - _assign_at(@classification, 49, ElnaLexerClass.zero); - _assign_at(@classification, 50, ElnaLexerClass.digit); - _assign_at(@classification, 51, ElnaLexerClass.digit); - _assign_at(@classification, 52, ElnaLexerClass.digit); - _assign_at(@classification, 53, ElnaLexerClass.digit); - _assign_at(@classification, 54, ElnaLexerClass.digit); - _assign_at(@classification, 55, ElnaLexerClass.digit); - _assign_at(@classification, 56, ElnaLexerClass.digit); - _assign_at(@classification, 57, ElnaLexerClass.digit); - _assign_at(@classification, 58, ElnaLexerClass.digit); - _assign_at(@classification, 59, ElnaLexerClass.colon); - _assign_at(@classification, 60, ElnaLexerClass.single); - _assign_at(@classification, 61, ElnaLexerClass.less); - _assign_at(@classification, 62, ElnaLexerClass.equals); - _assign_at(@classification, 63, ElnaLexerClass.greater); - _assign_at(@classification, 64, ElnaLexerClass.other); - _assign_at(@classification, 65, ElnaLexerClass.single); - _assign_at(@classification, 66, ElnaLexerClass.alpha); - _assign_at(@classification, 67, ElnaLexerClass.alpha); - _assign_at(@classification, 68, ElnaLexerClass.alpha); - _assign_at(@classification, 69, ElnaLexerClass.alpha); - _assign_at(@classification, 70, ElnaLexerClass.alpha); - _assign_at(@classification, 71, ElnaLexerClass.alpha); - _assign_at(@classification, 72, ElnaLexerClass.alpha); - _assign_at(@classification, 73, ElnaLexerClass.alpha); - _assign_at(@classification, 74, ElnaLexerClass.alpha); - _assign_at(@classification, 75, ElnaLexerClass.alpha); - _assign_at(@classification, 76, ElnaLexerClass.alpha); - _assign_at(@classification, 77, ElnaLexerClass.alpha); - _assign_at(@classification, 78, ElnaLexerClass.alpha); - _assign_at(@classification, 79, ElnaLexerClass.alpha); - _assign_at(@classification, 80, ElnaLexerClass.alpha); - _assign_at(@classification, 81, ElnaLexerClass.alpha); - _assign_at(@classification, 82, ElnaLexerClass.alpha); - _assign_at(@classification, 83, ElnaLexerClass.alpha); - _assign_at(@classification, 84, ElnaLexerClass.alpha); - _assign_at(@classification, 85, ElnaLexerClass.alpha); - _assign_at(@classification, 86, ElnaLexerClass.alpha); - _assign_at(@classification, 87, ElnaLexerClass.alpha); - _assign_at(@classification, 88, ElnaLexerClass.alpha); - _assign_at(@classification, 89, ElnaLexerClass.alpha); - _assign_at(@classification, 90, ElnaLexerClass.alpha); - _assign_at(@classification, 91, ElnaLexerClass.alpha); - _assign_at(@classification, 92, ElnaLexerClass.single); - _assign_at(@classification, 93, ElnaLexerClass.backslash); - _assign_at(@classification, 94, ElnaLexerClass.single); - _assign_at(@classification, 95, ElnaLexerClass.single); - _assign_at(@classification, 96, ElnaLexerClass.alpha); - _assign_at(@classification, 97, ElnaLexerClass.other); - _assign_at(@classification, 98, ElnaLexerClass.hex); - _assign_at(@classification, 99, ElnaLexerClass.hex); - _assign_at(@classification, 100, ElnaLexerClass.hex); - _assign_at(@classification, 101, ElnaLexerClass.hex); - _assign_at(@classification, 102, ElnaLexerClass.hex); - _assign_at(@classification, 103, ElnaLexerClass.hex); - _assign_at(@classification, 104, ElnaLexerClass.alpha); - _assign_at(@classification, 105, ElnaLexerClass.alpha); - _assign_at(@classification, 106, ElnaLexerClass.alpha); - _assign_at(@classification, 107, ElnaLexerClass.alpha); - _assign_at(@classification, 108, ElnaLexerClass.alpha); - _assign_at(@classification, 109, ElnaLexerClass.alpha); - _assign_at(@classification, 110, ElnaLexerClass.alpha); - _assign_at(@classification, 111, ElnaLexerClass.alpha); - _assign_at(@classification, 112, ElnaLexerClass.alpha); - _assign_at(@classification, 113, ElnaLexerClass.alpha); - _assign_at(@classification, 114, ElnaLexerClass.alpha); - _assign_at(@classification, 115, ElnaLexerClass.alpha); - _assign_at(@classification, 116, ElnaLexerClass.alpha); - _assign_at(@classification, 117, ElnaLexerClass.alpha); - _assign_at(@classification, 118, ElnaLexerClass.alpha); - _assign_at(@classification, 119, ElnaLexerClass.alpha); - _assign_at(@classification, 120, ElnaLexerClass.alpha); - _assign_at(@classification, 121, ElnaLexerClass.x); - _assign_at(@classification, 122, ElnaLexerClass.alpha); - _assign_at(@classification, 123, ElnaLexerClass.alpha); - _assign_at(@classification, 124, ElnaLexerClass.other); - _assign_at(@classification, 125, ElnaLexerClass.single); - _assign_at(@classification, 126, ElnaLexerClass.other); - _assign_at(@classification, 127, ElnaLexerClass.single); - _assign_at(@classification, 128, ElnaLexerClass.invalid); - - code := 129; - - (* Set the remaining 129 - 256 bytes to transitionClassOther. *) - .create_classification_loop; - _assign_at(@classification, code, ElnaLexerClass.other); - code := code + 1; - - if code < 257 then - goto create_classification_loop - end -end; - -proc _elna_lexer_get_transition(current_state: Word, character_class: Word); -var - transition_table: Word; - row_position: Word; - column_position: Word; - target: Word; -begin - (* Each state is 8 bytes long (2 words: action and next state). - There are 22 character classes, so a transition row 8 * 22 = 176 bytes long. *) - row_position := current_state - 1; - row_position := row_position * 176; - - column_position := character_class - 1; - column_position := column_position * 8; - - target := _elna_lexer_get_transition_table(); - target := target + row_position; - - return target + column_position -end; - -(** - * Parameters: - * current_state - First index into transitions table. - * character_class - Second index into transitions table. - * action - Action to assign. - * next_state - Next state to assign. - *) -proc _elna_lexer_set_transition(current_state: Word, character_class: Word, action: Word, next_state: Word); -var - transition: Word; -begin - transition := _elna_lexer_get_transition(current_state, character_class); - - _elna_lexer_transition_set_action(transition, action); - _elna_lexer_transition_set_state(transition, next_state) -end; - -(* Sets same action and state transition for all character classes in one transition row. *) - -(** - * Parameters: - * current_state - Current state (Transition state enumeration). - * default_action - Default action (Callback). - * next_state - Next state (Transition state enumeration). - *) -proc _elna_lexer_default_transition(current_state: Word, default_action: Word, next_state: Word); -begin - _elna_lexer_set_transition(current_state, ElnaLexerClass.invalid, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.digit, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.alpha, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.space, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.colon, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.equals, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.left_paren, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.right_paren, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.asterisk, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.backslash, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.single, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.hex, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.zero, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.x, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.eof, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.dot, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.minus, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.single_quote, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.double_quote, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.greater, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.less, default_action, next_state); - _elna_lexer_set_transition(current_state, ElnaLexerClass.other, default_action, next_state) -end; - -(** - * The transition table describes transitions from one state to another, given - * a symbol (character class). - * - * The table has m rows and n columns, where m is the amount of states and n is - * the amount of classes. So given the current state and a classified character - * the table can be used to look up the next state. - *) -proc _elna_lexer_transitions(); -begin - (* Start state. *) - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.digit, ElnaLexerAction.accumulate, ElnaLexerState.decimal); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.alpha, ElnaLexerAction.accumulate, ElnaLexerState.identifier); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.space, ElnaLexerAction.skip, ElnaLexerState.start); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.colon, ElnaLexerAction.accumulate, ElnaLexerState.colon); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.equals, ElnaLexerAction.single, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.left_paren, ElnaLexerAction.accumulate, ElnaLexerState.left_paren); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.right_paren, ElnaLexerAction.single, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.asterisk, ElnaLexerAction.single, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.backslash, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.single, ElnaLexerAction.single, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.hex, ElnaLexerAction.accumulate, ElnaLexerState.identifier); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.zero, ElnaLexerAction.accumulate, ElnaLexerState.leading_zero); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.x, ElnaLexerAction.accumulate, ElnaLexerState.identifier); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.eof, ElnaLexerAction.eof, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.dot, ElnaLexerAction.single, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.minus, ElnaLexerAction.accumulate, ElnaLexerState.minus); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.single_quote, ElnaLexerAction.accumulate, ElnaLexerState.character); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.double_quote, ElnaLexerAction.accumulate, ElnaLexerState.string); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.greater, ElnaLexerAction.accumulate, ElnaLexerState.greater); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.less, ElnaLexerAction.accumulate, ElnaLexerState.less); - _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.other, ElnaLexerAction.none, ElnaLexerState.finish); - - (* Colon state. *) - _elna_lexer_default_transition(ElnaLexerState.colon, ElnaLexerAction.finalize, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.colon, ElnaLexerClass.equals, ElnaLexerAction.composite, ElnaLexerState.finish); - - (* Identifier state. *) - _elna_lexer_default_transition(ElnaLexerState.identifier, ElnaLexerAction.key_id, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.digit, ElnaLexerAction.accumulate, ElnaLexerState.identifier); - _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.alpha, ElnaLexerAction.accumulate, ElnaLexerState.identifier); - _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.hex, ElnaLexerAction.accumulate, ElnaLexerState.identifier); - _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.zero, ElnaLexerAction.accumulate, ElnaLexerState.identifier); - _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.x, ElnaLexerAction.accumulate, ElnaLexerState.identifier); - - (* Decimal state. *) - _elna_lexer_default_transition(ElnaLexerState.decimal, ElnaLexerAction.integer, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.digit, ElnaLexerAction.accumulate, ElnaLexerState.decimal); - _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.alpha, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.hex, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.zero, ElnaLexerAction.accumulate, ElnaLexerState.decimal); - _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.x, ElnaLexerAction.none, ElnaLexerState.finish); - - (* Leading zero. *) - _elna_lexer_default_transition(ElnaLexerState.leading_zero, ElnaLexerAction.integer, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.digit, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.alpha, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.hex, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.zero, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.x, ElnaLexerAction.none, ElnaLexerState.dot); - - (* Greater state. *) - _elna_lexer_default_transition(ElnaLexerState.greater, ElnaLexerAction.finalize, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.greater, ElnaLexerClass.equals, ElnaLexerAction.composite, ElnaLexerState.finish); - - (* Minus state. *) - _elna_lexer_default_transition(ElnaLexerState.minus, ElnaLexerAction.finalize, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.minus, ElnaLexerClass.greater, ElnaLexerAction.composite, ElnaLexerState.finish); - - (* Left paren state. *) - _elna_lexer_default_transition(ElnaLexerState.left_paren, ElnaLexerAction.finalize, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.left_paren, ElnaLexerClass.asterisk, ElnaLexerAction.accumulate, ElnaLexerState.comment); - - (* Less state. *) - _elna_lexer_default_transition(ElnaLexerState.less, ElnaLexerAction.finalize, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.less, ElnaLexerClass.equals, ElnaLexerAction.composite, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.less, ElnaLexerClass.greater, ElnaLexerAction.composite, ElnaLexerState.finish); - - (* Hexadecimal after 0x. *) - _elna_lexer_default_transition(ElnaLexerState.dot, ElnaLexerAction.finalize, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.dot, ElnaLexerClass.dot, ElnaLexerAction.composite, ElnaLexerState.finish); - - (* Comment. *) - _elna_lexer_default_transition(ElnaLexerState.comment, ElnaLexerAction.accumulate, ElnaLexerState.comment); - _elna_lexer_set_transition(ElnaLexerState.comment, ElnaLexerClass.asterisk, ElnaLexerAction.accumulate, ElnaLexerState.closing_comment); - _elna_lexer_set_transition(ElnaLexerState.comment, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); - - (* Closing comment. *) - _elna_lexer_default_transition(ElnaLexerState.closing_comment, ElnaLexerAction.accumulate, ElnaLexerState.comment); - _elna_lexer_set_transition(ElnaLexerState.closing_comment, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.closing_comment, ElnaLexerClass.right_paren, ElnaLexerAction.delimited, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.closing_comment, ElnaLexerClass.asterisk, ElnaLexerAction.accumulate, ElnaLexerState.closing_comment); - _elna_lexer_set_transition(ElnaLexerState.closing_comment, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); - - (* Character. *) - _elna_lexer_default_transition(ElnaLexerState.character, ElnaLexerAction.accumulate, ElnaLexerState.character); - _elna_lexer_set_transition(ElnaLexerState.character, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.character, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.character, ElnaLexerClass.single_quote, ElnaLexerAction.delimited, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.character, ElnaLexerClass.backslash, ElnaLexerAction.accumulate, ElnaLexerState.character_escape); - - (* Escape sequence in a character. *) - _elna_lexer_default_transition(ElnaLexerState.character_escape, ElnaLexerAction.accumulate, ElnaLexerState.character); - _elna_lexer_set_transition(ElnaLexerState.character_escape, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.character_escape, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); - - (* String. *) - _elna_lexer_default_transition(ElnaLexerState.string, ElnaLexerAction.accumulate, ElnaLexerState.string); - _elna_lexer_set_transition(ElnaLexerState.string, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.string, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.string, ElnaLexerClass.double_quote, ElnaLexerAction.delimited, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.string, ElnaLexerClass.backslash, ElnaLexerAction.accumulate, ElnaLexerState.string_escape); - - (* Escape sequence in a string. *) - _elna_lexer_default_transition(ElnaLexerState.string_escape, ElnaLexerAction.accumulate, ElnaLexerState.string); - _elna_lexer_set_transition(ElnaLexerState.string_escape, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); - _elna_lexer_set_transition(ElnaLexerState.string_escape, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish) -end; - -(** - * Transition table is saved after character classification table. - * Each character entry is 1 word long and there are 256 characters. - * 1024 = 256 * 4 - *) -proc _elna_lexer_get_transition_table(); - return @classification + 1024 -end; - -(** - * Lexer state is saved after the transition tables. - * Each transition table entry is 8 bytes long. The table has 16 rows (transition states) - * and 22 columns (character classes), so 2992 = 8 * 17 * 22. - *) -proc _elna_lexer_global_state(); -var - result: Word; -begin - result := _elna_lexer_get_transition_table(); - return result + 2992 -end; - -(** - * Gets pointer to the token start. - *) -proc _elna_lexer_global_get_start(); -var - target: Word; -begin - target := _elna_lexer_global_state() + 4; - return target^ -end; - -(** - * Sets pointer to the token start. - *) -proc _elna_lexer_global_set_start(new_start: Word); -var - target: Word; -begin - target := _elna_lexer_global_state() + 4; - target^ := new_start -end; - -(** - * Gets pointer to the token end. - *) -proc _elna_lexer_global_get_end(); -var - target: Word; -begin - target := _elna_lexer_global_state() + 8; - return target^ -end; - -(** - * Sets pointer to the token end. - *) -proc _elna_lexer_global_set_end(new_start: Word); -var - target: Word; -begin - target := _elna_lexer_global_state() + 8; - target^ := new_start -end; - -proc _elna_lexer_transition_get_action(this: Word); - return this^ -end; - -proc _elna_lexer_transition_set_action(this: Word, value: Word); -begin - this^ := value -end; - -proc _elna_lexer_transition_get_state(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _elna_lexer_transition_set_state(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -(** - * Resets the lexer state for reading the next token. - *) -proc _elna_lexer_reset(); -var - state: Word; -begin - (* Transition start state is 1. *) - state := _elna_lexer_global_state(); - state^ := ElnaLexerState.start; - - state := _elna_lexer_global_get_start(); - _elna_lexer_global_set_end(state) -end; - -(** - * One time lexer initialization. - *) -proc _elna_lexer_initialize(code_pointer: Word); -begin - _elna_lexer_classifications(); - _elna_lexer_transitions(); - - _elna_lexer_global_set_start(code_pointer); - _elna_lexer_global_set_end(code_pointer) -end; - -proc _elna_lexer_next_transition(); -var - current_character: Word; - character_class: Word; - current_state: Word; -begin - current_character := _elna_lexer_global_get_end(); - current_character := _load_byte(current_character); - - character_class := _get_at(@classification, current_character + 1); - - current_state := _elna_lexer_global_state(); - current_state := current_state^; - - return _elna_lexer_get_transition(current_state, character_class) -end; - -proc _string_compare(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word); -var - result: Word; -begin - result := 0; - - if lhs_length = rhs_length then - result := _memcmp(lhs_pointer, rhs_pointer, lhs_length); - result := result = 0 - end; - return result -end; - -proc _elna_lexer_classify_keyword(position_start: Word, position_end: Word); -var - result: Word; - token_length: Word; -begin - result := ElnaLexerKind.identifier; - token_length := position_end - position_start; - - if _string_compare(position_start, token_length, "const", 5) then - result := ElnaLexerKind._const - elsif _string_compare(position_start, token_length, "var", 3) then - result := ElnaLexerKind._var - elsif _string_compare(position_start, token_length, "proc", 4) then - result := ElnaLexerKind._proc - elsif _string_compare(position_start, token_length, "type", 4) then - result := ElnaLexerKind._type - elsif _string_compare(position_start, token_length, "begin", 5) then - result := ElnaLexerKind._begin - elsif _string_compare(position_start, token_length, "end", 3) then - result := ElnaLexerKind._end - elsif _string_compare(position_start, token_length, "return", 6) then - result := ElnaLexerKind._return - elsif _string_compare(position_start, token_length, "goto", 4) then - result := ElnaLexerKind._goto - elsif _string_compare(position_start, token_length, "if", 2) then - result := ElnaLexerKind._if - elsif _string_compare(position_start, token_length, "while", 5) then - result := ElnaLexerKind._while - elsif _string_compare(position_start, token_length, "then", 4) then - result := ElnaLexerKind._then - elsif _string_compare(position_start, token_length, "else", 4) then - result := ElnaLexerKind._else - elsif _string_compare(position_start, token_length, "elsif", 5) then - result := ElnaLexerKind._elsif - elsif _string_compare(position_start, token_length, "record", 6) then - result := ElnaLexerKind._record - elsif _string_compare(position_start, token_length, "or", 2) then - result := ElnaLexerKind._or - elsif _string_compare(position_start, token_length, "xor", 2) then - result := ElnaLexerKind._xor - end; - return result -end; - -proc _elna_lexer_classify_finalize(start_position: Word); -var - character: Word; - result: Word; -begin - result := 0; - character := _load_byte(start_position); - - if character = ':' then - result := ElnaLexerKind.colon - elsif character = '.' then - result := ElnaLexerKind.dot - elsif character = '(' then - result := ElnaLexerKind.left_paren - elsif character = '-' then - result := ElnaLexerKind.minus - elsif character = '<' then - result := ElnaLexerKind.less_than - elsif character = '>' then - result := ElnaLexerKind.greater_than - end; - return result -end; - -proc _elna_lexer_classify_single(start_position: Word); -var - character: Word; - result: Word; -begin - result := 0; - character := _load_byte(start_position); - - if character = ';' then - result := ElnaLexerKind.semicolon - elsif character = ',' then - result := ElnaLexerKind.comma - elsif character = ')' then - result := ElnaLexerKind.right_paren - elsif character = '@' then - result := ElnaLexerKind.at - elsif character = '~' then - result := ElnaLexerKind.not - elsif character = '&' then - result := ElnaLexerKind.and - elsif character = '+' then - result := ElnaLexerKind.plus - elsif character = '*' then - result := ElnaLexerKind.multiplication - elsif character = '=' then - result := ElnaLexerKind.equals - elsif character = '%' then - result := ElnaLexerKind.remainder - elsif character = '/' then - result := ElnaLexerKind.division - elsif character = '.' then - result := ElnaLexerKind.dot - elsif character = '^' then - result := ElnaLexerKind.hat - end; - return result -end; - -proc _elna_lexer_classify_composite(start_position: Word, one_before_last: Word); -var - first_character: Word; - last_character: Word; - result: Word; -begin - first_character := _load_byte(start_position); - last_character := _load_byte(one_before_last); - - if first_character = ':' then - result := ElnaLexerKind.assignment - elsif first_character = '<' then - if last_character = '=' then - result := ElnaLexerKind.less_equal - elsif last_character = '>' then - result := ElnaLexerKind.not_equal - end - elsif first_character = '>' then - if last_character = '=' then - result := ElnaLexerKind.greater_equal - end - end; - - return result -end; - -proc _elna_lexer_classify_delimited(start_position: Word, end_position: Word); -var - token_length: Word; - delimiter: Word; - result: Word; -begin - token_length := end_position - start_position; - delimiter := _load_byte(start_position); - - if delimiter = '(' then - result := ElnaLexerKind.comment - elsif delimiter = '\'' then - result := ElnaLexerKind.character - elsif delimiter = '"' then - result := ElnaLexerKind.string - end; - return result -end; - -proc _elna_lexer_classify_integer(start_position: Word, end_position: Word); - return ElnaLexerKind.integer -end; - -proc _elna_lexer_execute_action(action_to_perform: Word, kind: Word); -var - position_start: Word; - position_end: Word; - intermediate: Word; -begin - position_start := _elna_lexer_global_get_start(); - position_end := _elna_lexer_global_get_end(); - - if action_to_perform = ElnaLexerAction.none then - elsif action_to_perform = ElnaLexerAction.accumulate then - _elna_lexer_global_set_end(position_end + 1) - elsif action_to_perform = ElnaLexerAction.skip then - _elna_lexer_global_set_start(position_start + 1); - _elna_lexer_global_set_end(position_end + 1) - elsif action_to_perform = ElnaLexerAction.single then - _elna_lexer_global_set_end(position_end + 1); - - intermediate := _elna_lexer_classify_single(position_start); - kind^ := intermediate - elsif action_to_perform = ElnaLexerAction.eof then - intermediate := ElnaLexerKind.eof; - kind^ := intermediate - elsif action_to_perform = ElnaLexerAction.finalize then - intermediate := _elna_lexer_classify_finalize(position_start); - kind^ := intermediate - elsif action_to_perform = ElnaLexerAction.composite then - _elna_lexer_global_set_end(position_end + 1); - - intermediate := _elna_lexer_classify_composite(position_start, position_end); - kind^ := intermediate - elsif action_to_perform = ElnaLexerAction.key_id then - intermediate := _elna_lexer_classify_keyword(position_start, position_end); - kind^ := intermediate - elsif action_to_perform = ElnaLexerAction.integer then - intermediate := _elna_lexer_classify_integer(position_start, position_end); - kind^ := intermediate - elsif action_to_perform = ElnaLexerAction.delimited then - _elna_lexer_global_set_end(position_end + 1); - - intermediate := _elna_lexer_classify_delimited(position_start, position_end + 1); - kind^ := intermediate - end -end; - -proc _elna_lexer_execute_transition(kind: Word); -var - next_transition: Word; - next_state: Word; - global_state: Word; - action_to_perform: Word; -begin - next_transition := _elna_lexer_next_transition(); - next_state := _elna_lexer_transition_get_state(next_transition); - action_to_perform := _elna_lexer_transition_get_action(next_transition); - - global_state := _elna_lexer_global_state(); - - global_state^ := next_state; - _elna_lexer_execute_action(action_to_perform, kind); - - return next_state -end; - -proc _elna_lexer_advance_token(kind: Word); -var - result_state: Word; -begin - result_state := _elna_lexer_execute_transition(kind); - if result_state <> ElnaLexerState.finish then - _elna_lexer_advance_token(kind) - end -end; - -(** - * Reads the next token and writes its type into the address in the kind parameter. - *) -proc _elna_lexer_read_token(kind: Word); -begin - _elna_lexer_reset(); - _elna_lexer_advance_token(kind) -end; - -(** - * Advances the token stream past the last read token. - *) -proc _elna_lexer_skip_token(); -var - old_end: Word; -begin - old_end := _elna_lexer_global_get_end(); - _elna_lexer_global_set_start(old_end) -end; - -proc _initialize_global_state(); -begin - compiler_strings_position := @compiler_strings; - memory_free_pointer := _mmap(4194304); - source_code := _mmap(495616); - symbol_table_store := _mmap(495616) -end; - -(* - * Entry point. - *) -proc _start(); -var - last_read: Word; - offset: Word; -begin - _initialize_global_state(); - _elna_lexer_initialize(source_code); - _symbol_table_build(); - - (* Read the source from the standard input. *) - offset := source_code; - - .start_read; - (* Second argument is buffer size. Modifying update the source_code definition. *) - last_read := _read_file(offset, 409600); - if last_read > 0 then - offset := offset + last_read; - goto start_read - end; - _compile(); - - _exit(0) -end; diff --git a/boot/stage16/cl.elna b/boot/stage16/cl.elna new file mode 100644 index 0000000..f76edae --- /dev/null +++ b/boot/stage16/cl.elna @@ -0,0 +1,4821 @@ +(* + * This Source Code Form is subject to the terms of the Mozilla Public License, + * v. 2.0. If a copy of the MPL was not distributed with this file, You can + * obtain one at https://mozilla.org/MPL/2.0/. + *) + +(* Stage 16 compiler. *) + +type + _elna_tac_declaration = record + next: Word; + name: Word; + length: Word; + body: Word + end; + elna_rtl_declaration = record + next: Word; + name: Word; + length: Word; + body: Word + end; + _node = record + kind: Word + end; + _integer_literal_node = record + kind: Word; + value: Word; + length: Word + end; + _character_literal_node = record + kind: Word; + value: Word; + length: Word + end; + _variable_expression = record + kind: Word; + name: Word; + length: Word + end; + _string_literal_node = record + kind: Word; + value: Word; + length: Word + end; + _dereference_expression = record + kind: Word; + pointer: Word + end; + _binary_expression = record + kind: Word; + lhs: Word; + rhs: Word; + operator: Word + end; + _unary_expression = record + kind: Word; + operand: Word; + operator: Word + end; + _if_statement = record + kind: Word; + next: Word; + conditionals: Word; + _else: Word + end; + + (** + * All statements are chained into a list. Next contains a pointer to the next + * statement in the statement list. + *) + _statement = record + kind: Word; + next: Word + end; + _goto_statement = record + kind: Word; + next: Word; + label: Word; + length: Word + end; + _label_declaration = record + kind: Word; + next: Word; + label: Word; + length: Word + end; + _field_access_expression = record + kind: Word; + aggregate: Word; + field: Word; + length: Word + end; + _elna_tac_module = record + data: Word; + code: Word + end; + elna_rtl_module = record + data: Word; + code: Word + end; + _module_declaration = record + kind: Word; + types: Word; + globals: Word; + procedures: Word + end; + _assign_statement = record + kind: Word; + next: Word; + assignee: Word; + assignment: Word + end; + _return_statement = record + kind: Word; + next: Word; + returned: Word + end; + _type = record + kind: Word; + size: Word + end; + _enumeration_type = record + kind: Word; + size: Word; + members: Word; + length: Word + end; + _enumeration_type_expression = record + kind: Word; + members: Word; + length: Word + end; + _record_type = record + kind: Word; + size: Word; + members: Word; + length: Word + end; + _record_type_expression = record + kind: Word; + members: Word; + length: Word + end; + _named_type_expression = record + kind: Word; + name: Word; + length: Word + end; + _info = record + kind: Word + end; + _type_info = record + kind: Word; + _type: Word + end; + _parameter_info = record + kind: Word; + offset: Word + end; + _temporary_info = record + kind: Word; + offset: Word + end; + _procedure_info = record + kind: Word; + symbol_table: Word + end; + + (** + * Conditional statements is a list of pairs: condition and statements. + * Used for example to represent if and elsif blocks with beloning statements. + *) + _conditional_statements = record + condition: Word; + statements: Word; + next: Word + end; + + _declaration = record + kind: Word; + next: Word; + name: Word; + length: Word + end; + _procedure_declaration = record + kind: Word; + next: Word; + name: Word; + length: Word; + body: Word; + temporaries: Word; + parameters: Word + end; + _type_declaration = record + kind: Word; + next: Word; + name: Word; + length: Word; + _type: Word + end; + _variable_declaration = record + kind: Word; + next: Word; + name: Word; + length: Word; + _type: Word + end; + + ElnaLexerAction = (none, accumulate, skip, single, eof, finalize, composite, key_id, integer, delimited); + + (** + * Classification table assigns each possible character to a group (class). All + * characters of the same group a handled equivalently. + * + * Transition = record + * action: TransitionAction; + * next_state: TransitionState + * end; + *) + ElnaLexerClass = ( + invalid, + digit, + alpha, + space, + colon, + equals, + left_paren, + right_paren, + asterisk, + backslash, + single, + hex, + zero, + x, + eof, + dot, + minus, + single_quote, + double_quote, + greater, + less, + other + ); + ElnaLexerState = ( + start, + colon, + identifier, + decimal, + leading_zero, + greater, + minus, + left_paren, + less, + dot, + comment, + closing_comment, + character, + character_escape, + string, + string_escape, + finish + ); + ElnaLexerKind = ( + identifier, + _const, + _var, + _proc, + _type, + _begin, + _end, + _if, + _then, + _else, + _elsif, + _while, + _do, + _extern, + _record, + _true, + _false, + null, + and, + _or, + _xor, + pipe, + not, + _return, + _module, + _program, + _import, + _cast, + _defer, + _case, + _of, + trait, + left_paren, + right_paren, + left_square, + right_square, + shift_left, + shift_right, + greater_equal, + less_equal, + greater_than, + less_than, + not_equal, + equals, + semicolon, + dot, + comma, + plus, + arrow, + minus, + multiplication, + division, + remainder, + assignment, + colon, + hat, + at, + comment, + string, + character, + integer, + word, + _goto, + eof + ); + NodeKind = ( + integer_literal, + string_literal, + character_literal, + variable_expression, + field_access_expression, + dereference_expression, + unary_expression, + binary_expression, + call, + goto_statement, + label_declaration, + return_statement, + assign_statement, + if_statement, + procedure_declaration, + variable_declaration, + enumeration_type_expression, + named_type_expression, + type_declaration, + module_declaration, + record_type_expression + ); + InfoKind = (type_info, parameter_info, temporary_info, procedure_info); + TypeKind = (primitive, enumeration, _record); + ElnaTacOperator = ( + load_immediate, + load_address, + add, + add_immediate, + load_word, + store_word, + jal, + move, + sub, + div, + rem, + mul, + _xor, + _or, + and, + seqz, + snez, + slt, + xor_immediate, + neg, + not, + jump, + beqz, + label, + start, + ret + ); + ElnaRtlOperator = ( + load_immediate, + load_address, + add, + add_immediate, + load_word, + store_word, + jal, + move, + sub, + div, + rem, + mul, + _xor, + _or, + and, + seqz, + snez, + slt, + xor_immediate, + neg, + not, + jump, + beqz, + label, + start, + ret + ); + ElnaTacOperand = (register, immediate, symbol, offset); + ElnaRtlOperand = (register, immediate, symbol, offset); + ElnaRtlRegister = ( + zero, + ra, + sp, + gp, + tp, + t0, + t1, + t2, + s0, + s1, + a0, + a1, + a2, + a3, + a4, + a5, + a6, + a7, + s2, + s3, + s4, + s5, + s6, + s7, + s8, + s9, + s10, + s11, + t3, + t4, + t5, + t6 + ); + +var + symbol_table_global: Array; + compiler_strings: Array; + classification: Array; + + source_code: Word; + compiler_strings_position: Word; + compiler_strings_length: Word; + label_counter: Word; + symbol_table_store: Word; + +(** + * Calculates and returns the string token length between quotes, including the + * escaping slash characters. + * + * Parameters: + * string - String token pointer. + * + * Returns the length in a0. + *) +proc _string_length(string: Word); +var + counter: Word; + current_byte: Word; +begin + (* Reset the counter. *) + counter := 0; + + .string_length_loop; + string := string + 1; + + current_byte := _load_byte(string); + if current_byte <> '"' then + counter := counter + 1; + goto string_length_loop + end; + + return counter +end; + +(** + * Adds a string to the global, read-only string storage. + * + * Parameters: + * string - String token. + * + * Returns the offset from the beginning of the storage to the new string in a0. + *) +proc _add_string(string: Word); +var + contents: Word; + result: Word; + current_byte: Word; +begin + contents := string + 1; + result := compiler_strings_length; + + .add_string_loop; + current_byte := _load_byte(contents); + if current_byte <> '"' then + _store_byte(current_byte, compiler_strings_position); + compiler_strings_position := compiler_strings_position + 1; + contents := contents + 1; + + if current_byte <> '\\' then + compiler_strings_length := compiler_strings_length + 1 + end; + goto add_string_loop + end; + + return result +end; + +(** + * Reads standard input into a buffer. + * + * Parameters: + * buffer - Buffer pointer. + * size - Buffer size. + * + * Returns the amount of bytes written in a0. + *) +proc _read_file(buffer: Word, size: Word); + return _syscall(0, buffer, size, 0, 0, 0, 63) +end; + +(** + * Writes to the standard output. + * + * Parameters: + * buffer - Buffer. + * size - Buffer length. + *) +proc _write_s(buffer: Word, size: Word); +begin + _syscall(1, buffer, size, 0, 0, 0, 64) +end; + +(** + * Writes a number to a string buffer. + * + * Parameters: + * number - Whole number. + * output_buffer - Buffer pointer. + * + * Sets a0 to the length of the written number. + *) +proc _print_i(number: Word, output_buffer: Word); +var + local_buffer: Word; + is_negative: Word; + current_character: Word; + result: Word; +begin + local_buffer := @result + 11; + + if number >= 0 then + is_negative := 0 + else + number = -number; + is_negative := 1 + end; + + .print_i_digit10; + current_character := number % 10; + _store_byte(current_character + '0', local_buffer); + + number := number / 10; + local_buffer := local_buffer - 1; + + if number <> 0 then + goto print_i_digit10 + end; + if is_negative = 1 then + _store_byte('-', local_buffer); + local_buffer := local_buffer - 1 + end; + result := @result + 11; + result := result - local_buffer; + memcpy(output_buffer, local_buffer + 1, result); + + return result +end; + +(** + * Writes a number to the standard output. + * + * Parameters: + * number - Whole number. + *) +proc _write_i(number: Word); +var + local_buffer: Word; + length: Word; +begin + length := _print_i(number, @local_buffer); + _write_s(@local_buffer, length) +end; + +(** + * Writes a character from a0 into the standard output. + * + * Parameters: + * character - Character to write. + *) +proc _write_c(character: Word); +begin + _write_s(@character, 1) +end; + +(** + * Write null terminated string. + * + * Parameters: + * string - String. + *) +proc _write_z(string: Word); +var + next_byte: Word; +begin + (* Check for 0 character. *) + next_byte := _load_byte(string); + + if next_byte <> 0 then + (* Print a character. *) + _write_c(next_byte); + + (* Advance the input string by one byte. *) + _write_z(string + 1) + end +end; + +(** + * Detects if the passed character is a 7-bit alpha character or an underscore. + * + * Paramters: + * character - Tested character. + * + * Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. + *) +proc _is_alpha(character: Word); +var + is_underscore: Word; +begin + is_underscore := character = '_'; + + return isalpha(character) or is_underscore +end; + +proc _is_alnum(character: Word); + return _is_alpha(character) or isdigit(character) +end; + +proc _elna_tac_instruction_size(); + return 44 +end; + +proc _elna_tac_instruction_get_kind(this: Word); + return this^ +end; + +proc _elna_tac_instruction_set_kind(this: Word, value: Word); +begin + this^ := value +end; + +proc _elna_tac_instruction_get_next(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _elna_tac_instruction_set_next(this: Word, value: Word); +begin + .elna_tac_instruction_set_next_loop; + this := this + 4; + if value <> 0 then + if this^ <> 0 then + this := this^; + goto elna_tac_instruction_set_next_loop + end + end; + this^ := value +end; + +proc _elna_tac_instruction_get_operand_type(this: Word, n: Word); +begin + n := n - 1; + n := n * 12; + this := this + 8; + this := this + n; + + return this^ +end; + +proc _elna_tac_instruction_get_operand_value(this: Word, n: Word); +begin + n := n - 1; + n := n * 12; + this := this + 8; + this := this + n; + + this := this + 4; + return this^ +end; + +proc _elna_tac_instruction_get_operand_length(this: Word, n: Word); +begin + n := n - 1; + n := n * 12; + this := this + 8; + this := this + n; + + this := this + 8; + return this^ +end; + +proc _elna_tac_instruction_set_operand(this: Word, n: Word, operand_type: Word, operand_value: Word, operand_length: Word); +begin + n := n - 1; + n := n * 12; + this := this + 8; + this := this + n; + + this^ := operand_type; + this := this + 4; + this^ := operand_value; + this := this + 4; + this^ := operand_length +end; + +proc _elna_tac_instruction_create(kind: Word); +var + result: Word; +begin + result := malloc(_elna_tac_instruction_size()); + + _elna_tac_instruction_set_kind(result, kind); + _elna_tac_instruction_set_next(result, 0); + + return result +end; + +proc elna_rtl_instruction_size(); + return 44 +end; + +proc elna_rtl_instruction_get_kind(this: Word); + return this^ +end; + +proc elna_rtl_instruction_get_next(this: Word); +begin + this := this + 4; + return this^ +end; + +proc elna_rtl_instruction_set_next(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc elna_rtl_instruction_set_kind(this: Word, value: Word); +begin + this^ := value +end; + +proc elna_rtl_instruction_get_operand_type(this: Word, n: Word); +begin + n := n - 1; + n := n * 12; + this := this + 8; + this := this + n; + + return this^ +end; + +proc elna_rtl_instruction_get_operand_value(this: Word, n: Word); +begin + n := n - 1; + n := n * 12; + this := this + 8; + this := this + n; + + this := this + 4; + return this^ +end; + +proc elna_rtl_instruction_get_operand_length(this: Word, n: Word); +begin + n := n - 1; + n := n * 12; + this := this + 8; + this := this + n; + + this := this + 8; + return this^ +end; + +proc elna_rtl_instruction_set_operand(this: Word, n: Word, operand_type: Word, operand_value: Word, operand_length: Word); +begin + n := n - 1; + n := n * 12; + this := this + 8; + this := this + n; + + this^ := operand_type; + this := this + 4; + this^ := operand_value; + this := this + 4; + this^ := operand_length +end; + +proc elna_rtl_instruction(tac_instruction: Word); +var + result: Word; + instruction_size: Word; +begin + instruction_size := elna_rtl_instruction_size(); + result := malloc(instruction_size); + + memcpy(result, tac_instruction, instruction_size); + + return result +end; + +proc _elna_tac_module_create(data: Word, code: Word); +var + result: Word; +begin + result := malloc(_elna_tac_module_size()); + + _elna_tac_module_set_data(result, data); + _elna_tac_module_set_code(result, code); + + return result +end; + +proc _elna_tac_load_immediate(target_register: Word, source_immediate: Word, immediate_length: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.load_immediate); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target_register, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.immediate, source_immediate, immediate_length); + + return result +end; + +proc _elna_tac_load_address(target_register: Word, source_symbol: Word, symbol_length: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.load_address); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target_register, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.symbol, source_symbol, symbol_length); + + return result +end; + +proc _elna_tac_beqz(target_register: Word, source_symbol: Word, symbol_length: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.beqz); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target_register, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.symbol, source_symbol, symbol_length); + + return result +end; + +proc _elna_tac_jump(source_symbol: Word, symbol_length: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.jump); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.symbol, source_symbol, symbol_length); + + return result +end; + +proc _elna_tac_add(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.add); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_mul(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.mul); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_sub(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.sub); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_div(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.div); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_rem(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.rem); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_xor(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator._xor); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_xor_immediate(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator._xor); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.immediate, rhs, 0); + + return result +end; + +proc _elna_tac_or(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator._or); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_and(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.and); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_add_immediate(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.add_immediate); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.immediate, rhs, 0); + + return result +end; + +proc _elna_tac_slt(destination: Word, lhs: Word, rhs: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.slt); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, lhs, 0); + _elna_tac_instruction_set_operand(result, 3, ElnaTacOperand.register, rhs, 0); + + return result +end; + +proc _elna_tac_jal(symbol: Word, length: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.jal); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.symbol, symbol, length); + + return result +end; + +proc _elna_tac_load_word(target: Word, register: Word, offset: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.load_word); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.offset, register, offset); + + return result +end; + +proc _elna_tac_store_word(target: Word, register: Word, offset: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.store_word); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, target, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.offset, register, offset); + + return result +end; + +proc _elna_tac_move(destination: Word, source: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.move); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); + + return result +end; + +proc _elna_tac_seqz(destination: Word, source: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.seqz); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); + + return result +end; + +proc _elna_tac_snez(destination: Word, source: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.snez); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); + + return result +end; + +proc _elna_tac_neg(destination: Word, source: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.neg); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); + + return result +end; + +proc _elna_tac_not(destination: Word, source: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.not); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.register, destination, 0); + _elna_tac_instruction_set_operand(result, 2, ElnaTacOperand.register, source, 0); + + return result +end; + +proc _elna_tac_label(counter: Word, length: Word); +var + result: Word; +begin + result := _elna_tac_instruction_create(ElnaTacOperator.label); + + _elna_tac_instruction_set_operand(result, 1, ElnaTacOperand.symbol, counter, length); + + return result +end; + +proc _elna_writer_instruction_name(instruction_kind: Word); +var + argument_count: Word; +begin + if instruction_kind = ElnaRtlOperator.load_immediate then + argument_count := 2; + _write_s("\tli", 3) + elsif instruction_kind = ElnaRtlOperator.load_address then + argument_count := 2; + _write_s("\tla", 3) + elsif instruction_kind = ElnaRtlOperator.add then + argument_count := 3; + _write_s("\tadd", 4) + elsif instruction_kind = ElnaRtlOperator.add_immediate then + argument_count := 3; + _write_s("\taddi", 5) + elsif instruction_kind = ElnaRtlOperator.load_word then + argument_count := 2; + _write_s("\tlw", 3) + elsif instruction_kind = ElnaRtlOperator.store_word then + argument_count := 2; + _write_s("\tsw", 3) + elsif instruction_kind = ElnaRtlOperator.jal then + argument_count := 1; + _write_s("\tcall", 5) + elsif instruction_kind = ElnaRtlOperator.move then + argument_count := 2; + _write_s("\tmv", 3) + elsif instruction_kind = ElnaRtlOperator.sub then + argument_count := 3; + _write_s("\tsub", 4) + elsif instruction_kind = ElnaRtlOperator.mul then + argument_count := 3; + _write_s("\tmul", 4) + elsif instruction_kind = ElnaRtlOperator.div then + argument_count := 3; + _write_s("\tdiv", 4) + elsif instruction_kind = ElnaRtlOperator.rem then + argument_count := 3; + _write_s("\trem", 4) + elsif instruction_kind = ElnaRtlOperator._xor then + argument_count := 3; + _write_s("\txor", 4) + elsif instruction_kind = ElnaRtlOperator.xor_immediate then + argument_count := 3; + _write_s("\txori", 5) + elsif instruction_kind = ElnaRtlOperator._or then + argument_count := 3; + _write_s("\tor", 3) + elsif instruction_kind = ElnaRtlOperator.and then + argument_count := 3; + _write_s("\tand", 4) + elsif instruction_kind = ElnaRtlOperator.seqz then + argument_count := 2; + _write_s("\tseqz", 5) + elsif instruction_kind = ElnaRtlOperator.snez then + argument_count := 2; + _write_s("\tsnez", 5) + elsif instruction_kind = ElnaRtlOperator.slt then + argument_count := 3; + _write_s("\tslt", 4) + elsif instruction_kind = ElnaRtlOperator.neg then + argument_count := 2; + _write_s("\tneg", 4) + elsif instruction_kind = ElnaRtlOperator.not then + argument_count := 2; + _write_s("\tnot", 4) + elsif instruction_kind = ElnaRtlOperator.jump then + argument_count := 1; + _write_s("\tj", 2) + elsif instruction_kind = ElnaRtlOperator.beqz then + argument_count := 2; + _write_s("\tbeqz", 5) + elsif instruction_kind = ElnaRtlOperator.start then + argument_count := 0; + _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\0") + elsif instruction_kind = ElnaRtlOperator.ret then + argument_count := 0; + _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\0") + end; + return argument_count +end; + +proc _elna_writer_register(register: Word); +begin + _write_c('x'); + _write_i(register - 1) +end; + +proc _elna_writer_operand(instruction: Word, n: Word); +var + operand_value: Word; + operand_length: Word; + operand_type: Word; +begin + operand_type := elna_rtl_instruction_get_operand_type(instruction, n); + operand_value := elna_rtl_instruction_get_operand_value(instruction, n); + operand_length := elna_rtl_instruction_get_operand_length(instruction, n); + + _write_c(' '); + if operand_type = ElnaRtlOperand.register then + _elna_writer_register(operand_value) + elsif operand_type = ElnaRtlOperand.offset then + _write_i(operand_length); + _write_c('('); + _elna_writer_register(operand_value); + _write_c(')') + elsif operand_type = ElnaRtlOperand.symbol then + if operand_length = 0 then + _write_label(operand_value, 0) + else + _write_s(operand_value, operand_length) + end + elsif operand_length = 0 then (* ElnaRtlOperand.immediate *) + _write_i(operand_value) + else + _write_s(operand_value, operand_length) + end +end; + +proc _elna_writer_instruction(instruction: Word); +var + instruction_kind: Word; + argument_count: Word; + current_argument: Word; + operand_value: Word; + operand_length: Word; +begin + instruction_kind := elna_rtl_instruction_get_kind(instruction); + + if instruction_kind = ElnaRtlOperator.label then + argument_count := 0; + operand_value := elna_rtl_instruction_get_operand_value(instruction, 1); + operand_length := elna_rtl_instruction_get_operand_length(instruction, 1); + _write_label(operand_value, operand_length); + _write_c(':') + else + argument_count := _elna_writer_instruction_name(instruction_kind) + end; + current_argument := 1; + + .elna_writer_instruction_loop; + if current_argument <= argument_count then + _elna_writer_operand(instruction, current_argument); + current_argument := current_argument + 1 + end; + if current_argument <= argument_count then + _write_c(','); + goto elna_writer_instruction_loop + end; + + _write_c('\n') +end; + +proc elna_rtl_instructions(instruction: Word); +var + current_copy: Word; + next_copy: Word; + first_copy: Word; +begin + if instruction <> 0 then + first_copy := elna_rtl_instruction(instruction); + instruction := _elna_tac_instruction_get_next(instruction) + else + first_copy := 0; + end; + current_copy := first_copy; + + .elna_rtl_instructions_start; + + if instruction <> 0 then + next_copy := elna_rtl_instruction(instruction); + + instruction := _elna_tac_instruction_get_next(instruction); + elna_rtl_instruction_set_next(current_copy, next_copy); + current_copy := next_copy; + goto elna_rtl_instructions_start + end; + + return first_copy +end; + +proc _elna_writer_instructions(instruction: Word); +begin + .elna_writer_instructions_start; + if instruction <> 0 then + _elna_writer_instruction(instruction); + instruction := elna_rtl_instruction_get_next(instruction); + goto elna_writer_instructions_start + end +end; + +proc _elna_writer_procedure(procedure: Word); +var + name_pointer: Word; + name_length: Word; + body_statements: Word; +begin + .elna_writer_procedure_loop; + name_pointer := elna_rtl_declaration_get_name(procedure); + name_length := elna_rtl_declaration_get_length(procedure); + body_statements := elna_rtl_declaration_get_body(procedure); + + (* Write .type _procedure_name, @function. *) + _write_z(".type \0"); + + _write_s(name_pointer, name_length); + _write_z(", @function\n\0"); + + (* Write procedure label, _procedure_name: *) + _write_s(name_pointer, name_length); + _write_z(":\n\0"); + + _elna_writer_instructions(body_statements); + _write_z("\tret\n\0"); + + procedure := elna_rtl_declaration_get_next(procedure); + if procedure <> 0 then + goto elna_writer_procedure_loop + end +end; + +proc _elna_writer_variable(variable: Word); +var + name: Word; + name_length: Word; + size: Word; +begin + .elna_writer_variable_loop; + if variable <> 0 then + name := elna_rtl_declaration_get_name(variable); + name_length := elna_rtl_declaration_get_length(variable); + size := elna_rtl_declaration_get_body(variable); + + _write_z(".type \0"); + _write_s(name, name_length); + _write_z(", @object\n\0"); + + _write_s(name, name_length); + _write_c(':'); + + _write_z(" .zero \0"); + _write_i(size); + + _write_c('\n'); + variable := elna_rtl_declaration_get_next(variable); + + goto elna_writer_variable_loop + end +end; + +proc elna_rtl_module_declaration(tac_module: Word); +var + result: Word; + current_part: Word; +begin + result := malloc(elna_rtl_module_size()); + + current_part := _elna_tac_module_get_data(tac_module); + current_part := elna_rtl_globals(current_part); + elna_rtl_module_set_data(result, current_part); + + current_part := _elna_tac_module_get_code(tac_module); + current_part := elna_rtl_procedures(current_part); + elna_rtl_module_set_code(result, current_part); + + return result +end; + +proc _elna_writer_module(pair: Word); +var + compiler_strings_copy: Word; + compiler_strings_end: Word; + current_byte: Word; + current_part: Word; +begin + _write_z(".globl main\n\n\0"); + _write_z(".section .data\n\0"); + + current_part := elna_rtl_module_get_data(pair); + _elna_writer_variable(current_part); + + _write_z(".section .text\n\n\0"); + _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); + _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); + _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); + + current_part := elna_rtl_module_get_code(pair); + _elna_writer_procedure(current_part); + + _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); + _write_c('"'); + + compiler_strings_copy := @compiler_strings; + compiler_strings_end := compiler_strings_position; + + .elna_writer_module_loop; + if compiler_strings_copy < compiler_strings_end then + current_byte := _load_byte(compiler_strings_copy); + compiler_strings_copy := compiler_strings_copy + 1; + _write_c(current_byte); + + goto elna_writer_module_loop + end; + _write_c('"'); + _write_c('\n'); +end; + +proc _elna_parser_integer_literal(); +var + integer_token: Word; + integer_length: Word; + result: Word; +begin + result := malloc(_integer_literal_node_size()); + + integer_token := _elna_lexer_global_get_start(); + integer_length := _elna_lexer_global_get_end(); + integer_length := integer_length - integer_token; + _elna_lexer_skip_token(); + + _node_set_kind(result, NodeKind.integer_literal); + _integer_literal_node_set_value(result, integer_token); + _integer_literal_node_set_length(result, integer_length); + + return result +end; + +proc _elna_tac_integer_literal(integer_literal_node: Word); +var + integer_token: Word; + integer_length: Word; + token_kind: Word; +begin + integer_token := _integer_literal_node_get_value(integer_literal_node); + integer_length := _integer_literal_node_get_length(integer_literal_node); + + return _elna_tac_load_immediate(ElnaRtlRegister.t0, integer_token, integer_length) +end; + +proc _elna_parser_character_literal(); +var + character: Word; + character_length: Word; + result: Word; +begin + result := malloc(_character_literal_node_size()); + + character := _elna_lexer_global_get_start(); + character_length := _elna_lexer_global_get_end(); + character_length := character_length - character; + _elna_lexer_skip_token(); + + _node_set_kind(result, NodeKind.character_literal); + _integer_literal_node_set_value(result, character); + _integer_literal_node_set_length(result, character_length); + + return result +end; + +proc _elna_tac_character_literal(character_literal_node: Word); +var + character: Word; + character_length: Word; +begin + character := _character_literal_node_get_value(character_literal_node); + character_length := _character_literal_node_get_length(character_literal_node); + + return _elna_tac_load_immediate(ElnaRtlRegister.t0, character, character_length) +end; + +proc _elna_parser_variable_expression(); +var + name: Word; + name_token: Word; + result: Word; +begin + name := _elna_lexer_global_get_start(); + name_token := _elna_lexer_global_get_end(); + name_token := name_token - name; + _elna_lexer_skip_token(); + + result := malloc(_variable_expression_size()); + + _node_set_kind(result, NodeKind.variable_expression); + _variable_expression_set_name(result, name); + _variable_expression_set_length(result, name_token); + + return result +end; + +proc _elna_tac_variable_expression(variable_expression: Word, symbol_table: Word); +var + name: Word; + name_token: Word; + lookup_result: Word; + instruction: Word; +begin + name := _variable_expression_get_name(variable_expression); + name_token := _variable_expression_get_length(variable_expression); + + lookup_result := _symbol_table_lookup(symbol_table, name, name_token); + if lookup_result <> 0 then + instruction := _elna_tac_local_designator(lookup_result) + else + instruction := _elna_tac_global_designator(variable_expression) + end; + return instruction +end; + +proc _elna_parser_string_literal(); +var + length: Word; + token_start: Word; + result: Word; +begin + result := malloc(_string_literal_node_size()); + + token_start := _elna_lexer_global_get_start(); + length := _string_length(token_start); + _elna_lexer_skip_token(); + + _node_set_kind(result, NodeKind.string_literal); + _string_literal_node_set_value(result, token_start); + _string_literal_node_set_length(result, length); + + return result +end; + +proc _elna_tac_string_literal(string_literal_node: Word); +var + token_start: Word; + length: Word; + offset: Word; + instruction: Word; + first_instruction: Word; + next_instruction: Word; +begin + token_start := _string_literal_node_get_value(string_literal_node); + length := _string_literal_node_get_length(string_literal_node); + offset := _add_string(token_start); + + first_instruction := _elna_tac_load_address(ElnaRtlRegister.t0, "strings", 7); + instruction := _elna_tac_load_immediate(ElnaRtlRegister.t1, offset, 0); + _elna_tac_instruction_set_next(first_instruction, instruction); + next_instruction := _elna_tac_add(ElnaRtlRegister.t0, ElnaRtlRegister.t0, ElnaRtlRegister.t1); + _elna_tac_instruction_set_next(instruction, next_instruction); + + return first_instruction +end; + +proc _elna_parser_simple_expression(); +var + current_character: Word; + parser_node: Word; + token_kind: Word; +begin + parser_node := 0; + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.character then + parser_node := _elna_parser_character_literal() + elsif token_kind = ElnaLexerKind.integer then + parser_node := _elna_parser_integer_literal() + elsif token_kind = ElnaLexerKind.string then + parser_node := _elna_parser_string_literal() + elsif token_kind = ElnaLexerKind.identifier then + parser_node := _elna_parser_variable_expression() + end; + return parser_node +end; + +proc _elna_parser_dereference_expression(simple_expression: Word); +var + result: Word; +begin + result := malloc(_dereference_expression_size()); + + _node_set_kind(result, NodeKind.dereference_expression); + _dereference_expression_set_pointer(result, simple_expression); + _elna_lexer_skip_token(); + + return result +end; + +proc _elna_parser_designator(); +var + simple_expression: Word; + token_kind: Word; +begin + simple_expression := _elna_parser_simple_expression(); + + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.hat then + simple_expression := _elna_parser_dereference_expression(simple_expression) + elsif token_kind = ElnaLexerKind.dot then + simple_expression := _elna_parser_field_access_expression(simple_expression) + elsif token_kind = ElnaLexerKind.left_paren then + simple_expression := _elna_parser_call(simple_expression) + end; + return simple_expression +end; + +proc _elna_tac_simple_expression(parser_node: Word, symbol_table: Word, is_address: Word); +var + is_address: Word; + node_kind: Word; + instruction: Word; +begin + is_address^ := 0; + node_kind := _node_get_kind(parser_node); + + if node_kind = NodeKind.character_literal then + instruction := _elna_tac_character_literal(parser_node) + elsif node_kind = NodeKind.string_literal then + instruction := _elna_tac_string_literal(parser_node) + elsif node_kind = NodeKind.integer_literal then + instruction := _elna_tac_integer_literal(parser_node) + else + instruction := _elna_tac_variable_expression(parser_node, symbol_table); + is_address^ := 1 + end; + return instruction +end; + +proc _elna_parser_unary_expression(); +var + token_kind: Word; + result: Word; + operand: Word; + operator: Word; +begin + _elna_lexer_read_token(@token_kind); + operator := 0; + + if token_kind = ElnaLexerKind.at then + operator := '@' + elsif token_kind = ElnaLexerKind.minus then + operator := '-' + elsif token_kind = ElnaLexerKind.not then + operator := '~' + end; + if operator <> 0 then + _elna_lexer_skip_token() + end; + result := _elna_parser_designator(); + + if operator <> 0 then + operand := result; + result := malloc(_unary_expression_size()); + + _node_set_kind(result, NodeKind.unary_expression); + _unary_expression_set_operand(result, operand); + _unary_expression_set_operator(result, operator) + end; + + return result +end; + +proc _elna_tac_unary_expression(parser_node: Word, symbol_table: Word); +var + current_character: Word; + token_kind: Word; + expression_kind: Word; + operator: Word; + operand: Word; + is_address: Word; + first_instruction: Word; + instruction: Word; +begin + operator := 0; + operand := 0; + + expression_kind := _node_get_kind(parser_node); + + if expression_kind = NodeKind.unary_expression then + operator := _unary_expression_get_operator(parser_node); + operand := _unary_expression_get_operand(parser_node) + else + operand := parser_node + end; + + if operator = '@' then + first_instruction := _elna_tac_designator(operand, symbol_table, @is_address) + else + first_instruction := _elna_tac_designator(operand, symbol_table, @is_address); + if is_address then + instruction := _elna_tac_load_word(ElnaRtlRegister.t0, ElnaRtlRegister.t0, 0); + _elna_tac_instruction_set_next(first_instruction, instruction) + end + end; + if operator = '-' then + instruction := _elna_tac_neg(ElnaRtlRegister.t0, ElnaRtlRegister.t0); + _elna_tac_instruction_set_next(first_instruction, instruction) + elsif operator = '~' then + instruction := _elna_tac_not(ElnaRtlRegister.t0, ElnaRtlRegister.t0); + _elna_tac_instruction_set_next(first_instruction, instruction) + end; + return first_instruction +end; + +proc _elna_parser_binary_expression(); +var + lhs_node: Word; + rhs_node: Word; + token_kind: Word; + result: Word; +begin + lhs_node := _elna_parser_unary_expression(); + rhs_node := 0; + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.plus then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.minus then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.multiplication then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.and then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind._or then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind._xor then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.equals then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.remainder then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.division then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.less_than then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.greater_than then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.less_equal then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.not_equal then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + elsif token_kind = ElnaLexerKind.greater_equal then + _elna_lexer_skip_token(); + rhs_node := _elna_parser_unary_expression() + end; + if rhs_node <> 0 then + result := malloc(_binary_expression_size()); + + _node_set_kind(result, NodeKind.binary_expression); + _binary_expression_set_lhs(result, lhs_node); + _binary_expression_set_rhs(result, rhs_node); + _binary_expression_set_operator(result, token_kind) + else + result := lhs_node + end; + return result +end; + +proc _elna_tac_binary_expression(parser_node: Word, symbol_table: Word); +var + token_kind: Word; + expression_kind: Word; + operand_node: Word; + first_instruction: Word; + instruction: Word; + current_instruction: Word; +begin + expression_kind := _node_get_kind(parser_node); + + if expression_kind <> NodeKind.binary_expression then + first_instruction := _elna_tac_unary_expression(parser_node, symbol_table) + else + token_kind := _binary_expression_get_operator(parser_node); + + operand_node := _binary_expression_get_lhs(parser_node); + first_instruction := _elna_tac_unary_expression(operand_node, symbol_table); + + (* Save the value of the left expression on the stack. *) + instruction := _elna_tac_store_word(ElnaRtlRegister.t0, ElnaRtlRegister.sp, 64); + _elna_tac_instruction_set_next(first_instruction, instruction); + current_instruction := instruction; + + operand_node := _binary_expression_get_rhs(parser_node); + instruction := _elna_tac_unary_expression(operand_node, symbol_table); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + + (* Load the left expression from the stack; *) + instruction := _elna_tac_load_word(ElnaRtlRegister.t1, ElnaRtlRegister.sp, 64); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + + if token_kind = ElnaLexerKind.plus then + instruction := _elna_tac_add(ElnaRtlRegister.t0, ElnaRtlRegister.t0, ElnaRtlRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.minus then + instruction := _elna_tac_sub(ElnaRtlRegister.t0, ElnaRtlRegister.t1, ElnaRtlRegister.t0); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.multiplication then + instruction := _elna_tac_mul(ElnaRtlRegister.t0, ElnaRtlRegister.t0, ElnaRtlRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.and then + instruction := _elna_tac_and(ElnaRtlRegister.t0, ElnaRtlRegister.t0, ElnaRtlRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind._or then + instruction := _elna_tac_or(ElnaRtlRegister.t0, ElnaRtlRegister.t0, ElnaRtlRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind._xor then + instruction := _elna_tac_xor(ElnaRtlRegister.t0, ElnaRtlRegister.t0, ElnaRtlRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.equals then + instruction := _elna_tac_xor(ElnaRtlRegister.t0, ElnaRtlRegister.t0, ElnaRtlRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + + instruction := _elna_tac_seqz(ElnaRtlRegister.t0, ElnaRtlRegister.t0); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.remainder then + instruction := _elna_tac_rem(ElnaRtlRegister.t0, ElnaRtlRegister.t1, ElnaRtlRegister.t0); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.division then + instruction := _elna_tac_div(ElnaRtlRegister.t0, ElnaRtlRegister.t1, ElnaRtlRegister.t0); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.less_than then + instruction := _elna_tac_slt(ElnaRtlRegister.t0, ElnaRtlRegister.t1, ElnaRtlRegister.t0); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.greater_than then + instruction := _elna_tac_slt(ElnaRtlRegister.t0, ElnaRtlRegister.t0, ElnaRtlRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.less_equal then + instruction := _elna_tac_slt(ElnaRtlRegister.t0, ElnaRtlRegister.t0, ElnaRtlRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + + instruction := _elna_tac_xor_immediate(ElnaRtlRegister.t0, ElnaRtlRegister.t0, 1); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.not_equal then + instruction := _elna_tac_xor(ElnaRtlRegister.t0, ElnaRtlRegister.t0, ElnaRtlRegister.t1); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + + instruction := _elna_tac_snez(ElnaRtlRegister.t0, ElnaRtlRegister.t0); + _elna_tac_instruction_set_next(current_instruction, instruction) + elsif token_kind = ElnaLexerKind.greater_equal then + instruction := _elna_tac_slt(ElnaRtlRegister.t0, ElnaRtlRegister.t1, ElnaRtlRegister.t0); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + + instruction := _elna_tac_xor_immediate(ElnaRtlRegister.t0, ElnaRtlRegister.t0, 1); + _elna_tac_instruction_set_next(current_instruction, instruction) + end + end; + return first_instruction +end; + +(* 4 bytes node kind + 4 byte pointer to variable expression + 4 * 7 for arguments. *) +proc _call_size(); + return 44 +end; + +proc _call_get_name(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _call_set_name(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _call_get_argument(this: Word, n: Word); +begin + n := n * 4; + this := this + 8; + this := this + n; + return this^ +end; + +proc _call_set_argument(this: Word, n: Word, value: Word); +begin + n := n * 4; + this := this + 8; + this := this + n; + this^ := value +end; + +proc _elna_parser_call(callee: Word); +var + parsed_expression: Word; + result: Word; + argument_number: Word; + token_kind: Word; +begin + result := malloc(_call_size()); + _node_set_kind(result, NodeKind.call); + _statement_set_next(result, 0); + + argument_number := 1; + _call_set_name(result, callee); + + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.right_paren then + _elna_lexer_skip_token(); + goto elna_parser_call_end + end; + + .elna_parser_call_loop; + parsed_expression := _elna_parser_binary_expression(); + _call_set_argument(result, argument_number, parsed_expression); + argument_number := argument_number + 1; + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + if token_kind = ElnaLexerKind.comma then + goto elna_parser_call_loop + end; + + .elna_parser_call_end; + (* Set the trailing argument to nil. *) + _call_set_argument(result, argument_number, 0); + + return result +end; + +proc _elna_tac_call(parsed_call: Word, symbol_table: Word); +var + name_length: Word; + name: Word; + argument_count: Word; + stack_offset: Word; + parsed_expression: Word; + instruction: Word; + first_instruction: Word; + current_instruction: Word; +begin + parsed_expression := _call_get_name(parsed_call); + name := _variable_expression_get_name(parsed_expression); + name_length := _variable_expression_get_length(parsed_expression); + argument_count := 0; + first_instruction := 0; + + .elna_tac_call_loop; + + parsed_expression := _call_get_argument(parsed_call, argument_count + 1); + if parsed_expression = 0 then + goto elna_tac_call_finalize + else + instruction := _elna_tac_binary_expression(parsed_expression, symbol_table); + if first_instruction = 0 then + first_instruction := instruction + else + _elna_tac_instruction_set_next(current_instruction, instruction) + end; + current_instruction := instruction; + + (* Save the argument on the stack. *) + stack_offset := argument_count * 4; + + instruction := _elna_tac_store_word(ElnaRtlRegister.t0, + ElnaRtlRegister.sp, 116 - stack_offset); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + + argument_count := argument_count + 1; + goto elna_tac_call_loop + end; + .elna_tac_call_finalize; + + (* Load the argument from the stack. *) + if argument_count <> 0 then + (* Decrement the argument counter. *) + argument_count := argument_count - 1; + stack_offset := argument_count * 4; + + (* Calculate the stack offset: 116 - (4 * argument_counter) *) + instruction := _elna_tac_load_word(ElnaRtlRegister.a0 + argument_count, + ElnaRtlRegister.sp, 116 - stack_offset); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + + goto elna_tac_call_finalize + end; + instruction := _elna_tac_jal(name, name_length); + if first_instruction = 0 then + first_instruction := instruction + else + _elna_tac_instruction_set_next(current_instruction, instruction) + end; + return first_instruction +end; + +proc _elna_parser_goto_statement(); +var + token_kind: Word; + label_name: Word; + label_length: Word; + result: Word; +begin + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + + label_name := _elna_lexer_global_get_start(); + label_length := _elna_lexer_global_get_end() - label_name; + _elna_lexer_skip_token(); + + result := malloc(_goto_statement_size()); + + _node_set_kind(result, NodeKind.goto_statement); + _statement_set_next(result, 0); + _goto_statement_set_label(result, label_name); + _goto_statement_set_length(result, label_length); + + return result +end; + +proc _elna_tac_goto_statement(parser_node: Word); +var + label_name: Word; + label_length: Word; + label_with_dot: Word; + instruction: Word; +begin + label_name := _goto_statement_get_label(parser_node); + label_length := _goto_statement_get_length(parser_node); + label_with_dot := malloc(label_length + 1); + + _store_byte('.', label_with_dot); + memcpy(label_with_dot + 1, label_name, label_length); + + return _elna_tac_jump(label_with_dot, label_length + 1) +end; + +proc _elna_parser_label_declaration(); +var + token_kind: Word; + label_name: Word; + label_length: Word; + result: Word; +begin + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + + label_name := _elna_lexer_global_get_start(); + label_length := _elna_lexer_global_get_end() - label_name; + _elna_lexer_skip_token(); + + result := malloc(_label_declaration_size()); + + _node_set_kind(result, NodeKind.label_declaration); + _statement_set_next(result, 0); + _goto_statement_set_label(result, label_name); + _goto_statement_set_length(result, label_length); + + return result +end; + +proc _elna_tac_label_declaration(parser_node: Word); +var + label_name: Word; + label_length: Word; +begin + label_name := _label_declaration_get_label(parser_node); + label_length := _label_declaration_get_length(parser_node); + + return _elna_tac_label(label_name, label_length) +end; + +proc _elna_tac_local_designator(symbol: Word); +var + variable_offset: Word; +begin + variable_offset := _parameter_info_get_offset(symbol); + + return _elna_tac_add_immediate(ElnaRtlRegister.t0, ElnaRtlRegister.sp, variable_offset) +end; + +proc _elna_tac_global_designator(variable_expression: Word); +var + name: Word; + token_length: Word; +begin + name := _variable_expression_get_name(variable_expression); + token_length := _variable_expression_get_length(variable_expression); + + return _elna_tac_load_address(ElnaRtlRegister.t0, name, token_length) +end; + +proc _elna_tac_enumeration_value(field_access_expression: Word); +var + enumeration_type: Word; + members: Word; + members_length: Word; + token_type: Word; + value_name: Word; + name_length: Word; + member_name: Word; + member_length: Word; + counter: Word; + symbol: Word; + instruction: Word; +begin + symbol := _field_access_expression_get_aggregate(field_access_expression); + value_name := _variable_expression_get_name(symbol); + name_length := _variable_expression_get_length(symbol); + + symbol := _symbol_table_lookup(@symbol_table_global, value_name, name_length); + + enumeration_type := _type_info_get__type(symbol); + members := _enumeration_type_get_members(enumeration_type); + members_length := _enumeration_type_get_length(enumeration_type); + + _elna_lexer_read_token(@token_type); + + value_name := _field_access_expression_get_field(field_access_expression); + name_length := _field_access_expression_get_length(field_access_expression); + counter := 1; + + instruction := 0; + .elna_tac_enumeration_value_members; + if members_length > 0 then + member_name := members^; + member_length := members + 4; + member_length := member_length^; + + if string_compare(value_name, name_length, member_name, member_length) = 0 then + members_length := members_length - 1; + members := members + 8; + counter := counter + 1; + goto elna_tac_enumeration_value_members + end; + instruction := _elna_tac_load_immediate(ElnaRtlRegister.t0, counter, 0) + end; + return instruction +end; + +proc _elna_parser_field_access_expression(aggregate: Word); +var + token_kind: Word; + name: Word; + name_token: Word; + result: Word; +begin + (* Skip dot. Read the enumeration value. *) + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + + name := _elna_lexer_global_get_start(); + name_token := _elna_lexer_global_get_end(); + name_token := name_token - name; + + _elna_lexer_skip_token(); + result := malloc(_field_access_expression_size()); + + _node_set_kind(result, NodeKind.field_access_expression); + _field_access_expression_set_aggregate(result, aggregate); + _field_access_expression_set_field(result, name); + _field_access_expression_set_length(result, name_token); + + return result +end; + +proc _elna_tac_designator(parser_node: Word, symbol_table: Word, is_address: Word); +var + name_token: Word; + lookup_result: Word; + token_kind: Word; + parser_node: Word; + node_kind: Word; + first_instruction: Word; + instruction: Word; +begin + node_kind := _node_get_kind(parser_node); + + if node_kind = NodeKind.dereference_expression then + parser_node := _dereference_expression_get_pointer(parser_node); + first_instruction := _elna_tac_simple_expression(parser_node, symbol_table, is_address); + instruction := _elna_tac_load_word(ElnaRtlRegister.t0, ElnaRtlRegister.t0, 0); + _elna_tac_instruction_set_next(first_instruction, instruction) + elsif node_kind = NodeKind.field_access_expression then + first_instruction := _elna_tac_enumeration_value(parser_node); + is_address^ := 0 + elsif node_kind = NodeKind.call then + first_instruction := _elna_tac_call(parser_node, symbol_table); + instruction := _elna_tac_move(ElnaRtlRegister.t0, ElnaRtlRegister.a0); + _elna_tac_instruction_set_next(first_instruction, instruction); + is_address^ := 0 + else + first_instruction := _elna_tac_simple_expression(parser_node, symbol_table, is_address) + end; + return first_instruction +end; + +proc _elna_parser_assign_statement(assignee: Word); +var + result: Word; + token_kind: Word; + assignment_node: Word; +begin + result := malloc(_assign_statement_size()); + + _node_set_kind(result, NodeKind.assign_statement); + _statement_set_next(result, 0); + _assign_statement_set_assignee(result, assignee); + + (* Skip the assignment sign (:=) with surrounding whitespaces. *) + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + assignment_node := _elna_parser_binary_expression(); + _assign_statement_set_assignment(result, assignment_node); + + return result +end; + +proc _elna_tac_assign_statement(parser_tree: Word, symbol_table: Word); +var + current_expression: Word; + is_address: Word; + first_instruction: Word; + instruction: Word; + current_instruction: Word; +begin + current_expression := _assign_statement_get_assignee(parser_tree); + first_instruction := _elna_tac_designator(current_expression, symbol_table, @is_address); + + (* Save the assignee address on the stack. *) + current_instruction := _elna_tac_store_word(ElnaRtlRegister.t0, ElnaRtlRegister.sp, 60); + _elna_tac_instruction_set_next(first_instruction, current_instruction); + + (* Compile the assignment. *) + current_expression := _assign_statement_get_assignment(parser_tree); + instruction := _elna_tac_binary_expression(current_expression, symbol_table); + _elna_tac_instruction_set_next(current_instruction, instruction); + + current_instruction := _elna_tac_load_word(ElnaRtlRegister.t1, ElnaRtlRegister.sp, 60); + _elna_tac_instruction_set_next(instruction, current_instruction); + + instruction := _elna_tac_store_word(ElnaRtlRegister.t0, ElnaRtlRegister.t1, 0); + _elna_tac_instruction_set_next(current_instruction, instruction); + + return first_instruction +end; + +proc _elna_parser_return_statement(); +var + token_kind: Word; + returned: Word; + label_length: Word; + result: Word; +begin + (* Skip "return" keyword and whitespace after it. *) + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + + returned := _elna_parser_binary_expression(); + result := malloc(_return_statement_size()); + + _node_set_kind(result, NodeKind.return_statement); + _statement_set_next(result, 0); + _return_statement_set_returned(result, returned); + + return result +end; + +proc _elna_tac_return_statement(parser_node: Word, symbol_table: Word); +var + return_expression: Word; + first_instruction: Word; + instruction: Word; +begin + return_expression := _return_statement_get_returned(parser_node); + first_instruction := _elna_tac_binary_expression(return_expression, symbol_table); + instruction := _elna_tac_move(ElnaRtlRegister.a0, ElnaRtlRegister.t0); + _elna_tac_instruction_set_next(first_instruction, instruction); + return first_instruction +end; + +(** + * Writes a label, .Ln, where n is a unique number. + * + * Parameters: + * counter - Label counter. + *) +proc _write_label(counter: Word, length: Word); +var + first_byte: Word; +begin + if length = 0 then + _write_s(".L", 2); + _write_i(counter) + else + first_byte := _load_byte(counter); + if first_byte <> '.' then + _write_c('.') + end; + _write_s(counter, length) + end +end; + +proc _elna_parser_conditional_statements(); +var + token_kind: Word; + current_node: Word; + result: Word; +begin + result := malloc(_conditional_statements_size()); + + (* Skip "if", "while" or "elsif". *) + _elna_lexer_skip_token(); + + current_node := _elna_parser_binary_expression(); + _conditional_statements_set_condition(result, current_node); + + (* Skip "then" or "do". *) + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + current_node := _elna_parser_statements(); + _conditional_statements_set_statements(result, current_node); + + _conditional_statements_set_next(result, 0); + return result +end; + +proc _elna_tac_conditional_statements(parser_node: Word, after_end_label: Word, symbol_table: Word); +var + condition_label: Word; + current_node: Word; + instruction: Word; + current_instruction: Word; + first_instruction: Word; +begin + (* Compile condition. *) + current_node := _conditional_statements_get_condition(parser_node); + first_instruction := _elna_tac_binary_expression(current_node, symbol_table); + + (* condition_label is the label in front of the next elsif condition or end. *) + condition_label := label_counter; + label_counter := label_counter + 1; + + current_instruction := _elna_tac_beqz(ElnaRtlRegister.t0, condition_label, 0); + _elna_tac_instruction_set_next(first_instruction, current_instruction); + + current_node := _conditional_statements_get_statements(parser_node); + instruction := _elna_tac_statements(current_node, symbol_table); + if instruction <> 0 then + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction + end; + + instruction := _elna_tac_jump(after_end_label, 0); + _elna_tac_instruction_set_next(current_instruction, instruction); + + current_instruction := _elna_tac_label(condition_label, 0); + _elna_tac_instruction_set_next(instruction, current_instruction); + + return first_instruction +end; + +proc _elna_parser_if_statement(); +var + current_node: Word; + result: Word; + token_kind: Word; + previous_conditional: Word; + next_conditional: Word; +begin + result := malloc(_if_statement_size()); + + _node_set_kind(result, NodeKind.if_statement); + _statement_set_next(result, 0); + + previous_conditional := _elna_parser_conditional_statements(); + _if_statement_set_conditionals(result, previous_conditional); + + .elna_parser_if_statement_loop; + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind._elsif then + next_conditional := _elna_parser_conditional_statements(); + _conditional_statements_set_next(previous_conditional, next_conditional); + previous_conditional = next_conditional; + + goto elna_parser_if_statement_loop + elsif token_kind = ElnaLexerKind._else then + _elna_lexer_skip_token(); + + current_node := _elna_parser_statements(); + _if_statement_set__else(result, current_node) + else + _if_statement_set__else(result, 0) + end; + _elna_lexer_skip_token(); + + return result +end; + +proc _elna_parser_statement(); +var + token_kind: Word; + result : Word; +begin + result := 0; + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind._goto then + result := _elna_parser_goto_statement() + elsif token_kind = ElnaLexerKind._if then + result := _elna_parser_if_statement() + elsif token_kind = ElnaLexerKind._return then + result := _elna_parser_return_statement() + elsif token_kind = ElnaLexerKind.dot then + result := _elna_parser_label_declaration() + elsif token_kind = ElnaLexerKind.identifier then + result := _elna_parser_designator(); + + if _node_get_kind(result) <> NodeKind.call then + result := _elna_parser_assign_statement(result) + end + end; + return result +end; + +proc _elna_parser_statements(); +var + token_kind: Word; + previous_statement: Word; + next_statement: Word; + first_statement: Word; +begin + _skip_empty_lines(); + + first_statement := _elna_parser_statement(); + previous_statement := first_statement; + if previous_statement = 0 then + goto elna_parser_statements_end + end; + + .elna_parser_statement_loop; + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.semicolon then + _elna_lexer_skip_token(); + _skip_empty_lines(); + next_statement := _elna_parser_statement(); + _statement_set_next(previous_statement, next_statement); + previous_statement := next_statement; + + if previous_statement <> 0 then + goto elna_parser_statement_loop + end + end; + .elna_parser_statements_end; + _skip_empty_lines(); + + return first_statement +end; + +proc _elna_tac_statements(parser_node: Word, symbol_table: Word); +var + current_statement: Word; + instruction: Word; + first_instruction: Word; + current_instruction: Word; +begin + current_statement := parser_node; + first_instruction := 0; + + .elna_tac_statements_loop; + if current_statement <> 0 then + instruction := _elna_tac_statement(current_statement, symbol_table); + current_statement := _statement_get_next(current_statement); + if instruction = 0 then + goto elna_tac_statements_loop + end; + if first_instruction = 0 then + first_instruction := instruction + else + _elna_tac_instruction_set_next(current_instruction, instruction) + end; + current_instruction := instruction; + goto elna_tac_statements_loop + end; + return first_instruction +end; + +proc _elna_tac_if_statement(parser_node: Word, symbol_table: Word); +var + current_node: Word; + after_end_label: Word; + condition_label: Word; + first_instruction: Word; + instruction: Word; + current_instruction: Word; +begin + after_end_label := label_counter; + label_counter := label_counter + 1; + + current_node := _if_statement_get_conditionals(parser_node); + first_instruction := _elna_tac_conditional_statements(current_node, after_end_label, symbol_table); + current_instruction := first_instruction; + + .elna_tac_if_statement_loop; + current_node := _conditional_statements_get_next(current_node); + if current_node <> 0 then + instruction := _elna_tac_conditional_statements(current_node, after_end_label, symbol_table); + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction; + goto elna_tac_if_statement_loop + end; + current_node := _if_statement_get__else(parser_node); + + if current_node <> 0 then + instruction := _elna_tac_statements(current_node, symbol_table); + if instruction <> 0 then + _elna_tac_instruction_set_next(current_instruction, instruction); + current_instruction := instruction + end + end; + instruction := _elna_tac_label(after_end_label, 0); + _elna_tac_instruction_set_next(current_instruction, instruction); + + return first_instruction +end; + +proc _elna_tac_statement(parser_node: Word, symbol_table: Word); +var + statement_kind: Word; + instruction: Word; +begin + statement_kind := _node_get_kind(parser_node); + + if statement_kind = NodeKind.goto_statement then + instruction := _elna_tac_goto_statement(parser_node) + elsif statement_kind = NodeKind.if_statement then + instruction := _elna_tac_if_statement(parser_node, symbol_table) + elsif statement_kind = NodeKind.return_statement then + instruction := _elna_tac_return_statement(parser_node, symbol_table) + elsif statement_kind = NodeKind.label_declaration then + instruction := _elna_tac_label_declaration(parser_node) + elsif statement_kind = NodeKind.call then + instruction := _elna_tac_call(parser_node, symbol_table) + elsif statement_kind = NodeKind.assign_statement then + instruction := _elna_tac_assign_statement(parser_node, symbol_table) + else + instruction := 0 + end; + return instruction +end; + +(** + * Writes a regster name to the standard output. + * + * Parameters: + * register_character - Register character. + * register_number - Register number. + *) +proc _write_register(register_character: Word, register_number: Word); +begin + _write_c(register_character); + _write_c(register_number + '0') +end; + +proc _elna_parser_record_type_expression(); +var + entry: Word; + member_count: Word; + memory_start: Word; + field_name: Word; + field_length: Word; + field_type: Word; + token_kind: Word; + result: Word; + previous_entry: Word; +begin + _elna_lexer_skip_token(); + member_count := 0; + memory_start := 0; + + _elna_lexer_read_token(@token_kind); + if token_kind = ElnaLexerKind._end then + goto elna_parser_record_type_expression_end + end; + .elna_parser_record_type_expression_loop; + entry := malloc(16); + member_count := member_count + 1; + + field_name := _elna_lexer_global_get_start(); + field_length := _elna_lexer_global_get_end() - field_name; + + entry^ := field_name; + entry := entry + 4; + + entry^ := field_length; + entry := entry + 4; + + (* Skip the identifier. *) + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + field_type := _elna_parser_type_expression(); + + entry^ := field_type; + entry := entry + 4; + + entry^ := 0; + if memory_start = 0 then + memory_start := entry - 12 + else + previous_entry^ := entry - 12 + end; + previous_entry := entry; + + _elna_lexer_read_token(@token_kind); + if token_kind = ElnaLexerKind.semicolon then + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + goto elna_parser_record_type_expression_loop + end; + + .elna_parser_record_type_expression_end; + _elna_lexer_skip_token(); + + result := malloc(_enumeration_type_expression_size()); + + _node_set_kind(result, NodeKind.record_type_expression); + _record_type_expression_set_members(result, memory_start); + _record_type_expression_set_length(result, member_count); + + return result +end; + +proc _elna_parser_enumeration_type_expression(); +var + token_kind: Word; + enumeration_name: Word; + name_length: Word; + memory_start: Word; + member_count: Word; + result: Word; + entry: Word; + previous_entry: Word; +begin + _elna_lexer_skip_token(); + memory_start := 0; + member_count := 0; + + _elna_lexer_read_token(@token_kind); + if token_kind = ElnaLexerKind.right_paren then + goto elna_parser_enumeration_type_expression_end + end; + .elna_parser_enumeration_type_expression_loop; + entry := malloc(12); + member_count := member_count + 1; + + enumeration_name := _elna_lexer_global_get_start(); + name_length := _elna_lexer_global_get_end() - enumeration_name; + + entry^ := enumeration_name; + entry := entry + 4; + + entry^ := name_length; + entry := entry + 4; + + entry^ := 0; + if memory_start = 0 then + memory_start := entry - 8 + else + previous_entry^ := entry - 8 + end; + previous_entry := entry; + + (* Skip the identifier. *) + _elna_lexer_skip_token(); + + _elna_lexer_read_token(@token_kind); + if token_kind = ElnaLexerKind.comma then + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + goto elna_parser_enumeration_type_expression_loop + end; + + .elna_parser_enumeration_type_expression_end; + _elna_lexer_skip_token(); + + result := malloc(_enumeration_type_expression_size()); + + _node_set_kind(result, NodeKind.enumeration_type_expression); + _enumeration_type_expression_set_members(result, memory_start); + _enumeration_type_expression_set_length(result, member_count); + + return result +end; + +(** + * Reads and creates enumeration type representation. + * + * record + * type_kind: Word; + * size: Word; + * members: StringArray; + * length: Word + * end; + * + * Returns enumeration type description. + *) +proc _elna_name_type_enumeration(parser_node: Word); +var + result: Word; + memory_start: Word; + member_count: Word; + member_array_start: Word; + member_array_current: Word; +begin + result := malloc(_enumeration_type_size()); + + memory_start := _enumeration_type_expression_get_members(parser_node); + member_count := _enumeration_type_expression_get_length(parser_node); + + (* Copy the list of enumeration members into an array of strings. *) + member_array_start := malloc(member_count * 8); + member_array_current := member_array_start; + + .elna_name_type_enumeration_loop; + if member_count > 0 then + member_array_current^ := memory_start^; + member_array_current := member_array_current + 4; + memory_start := memory_start + 4; + + member_array_current^ := memory_start^; + member_array_current := member_array_current + 4; + memory_start := memory_start + 4; + + memory_start := memory_start^; + member_count := member_count - 1; + goto elna_name_type_enumeration_loop + end; + member_count := _enumeration_type_expression_get_length(parser_node); + + _type_set_kind(result, TypeKind.enumeration); + _type_set_size(result, 4); + _enumeration_type_set_members(result, member_array_start); + _enumeration_type_set_length(result, member_count); + + return _type_info_create(result) +end; + +proc _elna_name_type_record(parser_node: Word); +var + result: Word; + memory_start: Word; + member_count: Word; + member_array_start: Word; + member_array_current: Word; +begin + result := malloc(_record_type_size()); + + memory_start := _record_type_expression_get_members(parser_node); + member_count := _record_type_expression_get_length(parser_node); + + member_array_start := malloc(member_count * 12); + member_array_current := member_array_start; + + .elna_name_type_record_loop; + if member_count > 0 then + member_array_current^ := memory_start^; + member_array_current := member_array_current + 4; + memory_start := memory_start + 4; + + member_array_current^ := memory_start^; + member_array_current := member_array_current + 4; + memory_start := memory_start + 4; + + member_array_current^ := _elna_name_type_expression(memory_start^); + member_array_current := member_array_current + 4; + memory_start := memory_start + 4; + + memory_start := memory_start^; + member_count := member_count - 1; + goto elna_name_type_record_loop + end; + member_count := _record_type_expression_get_length(parser_node); + + _type_set_kind(result, TypeKind._record); + _type_set_size(result, member_count * 4); + _record_type_set_members(result, member_array_start); + _record_type_set_length(result, member_count); + + return _type_info_create(result) +end; + +proc _elna_parser_named_type_expression(); +var + result: Word; + type_name: Word; + name_length: Word; +begin + result := malloc(_named_type_expression_size()); + + _node_set_kind(result, NodeKind.named_type_expression); + type_name := _elna_lexer_global_get_start(); + name_length := _elna_lexer_global_get_end() - type_name; + _named_type_expression_set_name(result, type_name); + _named_type_expression_set_length(result, name_length); + _elna_lexer_skip_token(); + + return result +end; + +proc _elna_parser_type_expression(); +var + token_kind: Word; + result: Word; +begin + result := 0; + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.identifier then + result := _elna_parser_named_type_expression() + elsif token_kind = ElnaLexerKind.left_paren then + result := _elna_parser_enumeration_type_expression() + elsif token_kind = ElnaLexerKind._record then + result := _elna_parser_record_type_expression() + end; + return result +end; + +proc _elna_name_type_expression(parser_node: Word); +var + token_kind: Word; + type_name: Word; + name_length: Word; + result: Word; +begin + token_kind := _node_get_kind(parser_node); + + if token_kind = NodeKind.named_type_expression then + type_name := _named_type_expression_get_name(parser_node); + name_length := _named_type_expression_get_length(parser_node); + + result := _symbol_table_lookup(@symbol_table_global, type_name, name_length); + result := _type_info_get__type(result) + elsif token_kind = NodeKind.enumeration_type_expression then + result := _elna_name_type_enumeration(parser_node) + elsif token_kind = NodeKind.record_type_expression then + result := _elna_name_type_record(parser_node) + end; + + return result +end; + +(** + * Parameters: + * parameter_index - Parameter index. + *) +proc _parameter_info_create(parameter_index: Word); +var + offset: Word; + result: Word; +begin + result := malloc(_parameter_info_size()); + _info_set_kind(result, InfoKind.parameter_info); + + (* Calculate the stack offset: 88 - (4 * parameter_counter) *) + offset := parameter_index * 4; + _parameter_info_set_offset(result, 88 - offset); + + return result +end; + +proc _type_info_create(type_representation: Word); +var + result: Word; +begin + result := malloc(_type_info_size()); + _info_set_kind(result, InfoKind.type_info); + _type_info_set__type(result, type_representation); + + return result +end; + +(** + * Parameters: + * temporary_index - Parameter index. + *) +proc _temporary_info_create(temporary_index: Word); +var + result: Word; +begin + result := malloc(_temporary_info_size()); + _info_set_kind(result, InfoKind.temporary_info); + + (* Calculate the stack offset: 4 * variable_counter. *) + _temporary_info_set_offset(result, temporary_index * 4); + + return result +end; + +(** + * Parameters: + * symbol_table - Local symbol table. + *) +proc _procedure_info_create(symbol_table: Word); +var + result: Word; +begin + result := malloc(_procedure_info_size()); + _info_set_kind(result, InfoKind.procedure_info); + _procedure_info_set_symbol_table(result, symbol_table); + + return result +end; + +(** + * Parameters: + * parameter_index - Parameter index. + *) +proc _elna_name_procedure_parameter(parser_node: Word, parameter_index: Word, symbol_table: Word); +var + name_length: Word; + info: Word; + name_position: Word; +begin + name_position := _declaration_get_name(parser_node); + name_length := _declaration_get_length(parser_node); + + info := _parameter_info_create(parameter_index); + _symbol_table_enter(symbol_table, name_position, name_length, info) +end; + +(** + * Parameters: + * variable_index - Variable index. + *) +proc _elna_name_procedure_temporary(parser_node: Word, variable_index: Word, symbol_table: Word); +var + name_length: Word; + info: Word; + name_position: Word; +begin + name_position := _declaration_get_name(parser_node); + name_length := _declaration_get_length(parser_node); + + info := _temporary_info_create(variable_index); + _symbol_table_enter(symbol_table, name_position, name_length, info) +end; + +proc _elna_name_procedure_temporaries(parser_node: Word, symbol_table: Word); +var + temporary_counter: Word; +begin + temporary_counter := 0; + + .elna_name_procedure_temporaries_loop; + if parser_node <> 0 then + _elna_name_procedure_temporary(parser_node, temporary_counter, symbol_table); + + temporary_counter := temporary_counter + 1; + parser_node := _declaration_get_next(parser_node); + goto elna_name_procedure_temporaries_loop + end +end; + +proc _elna_parser_procedure_declaration(); +var + name_pointer: Word; + name_length: Word; + token_kind: Word; + result: Word; + parameter_head: Word; +begin + result := malloc(_procedure_declaration_size()); + + _node_set_kind(result, NodeKind.procedure_declaration); + _declaration_set_next(result, 0); + + (* Skip "proc ". *) + _elna_lexer_skip_token(); + + _elna_lexer_read_token(@token_kind); + name_pointer := _elna_lexer_global_get_start(); + name_length := _elna_lexer_global_get_end() - name_pointer; + + _declaration_set_name(result, name_pointer); + _declaration_set_length(result, name_length); + (* Skip procedure name. *) + _elna_lexer_skip_token(); + + (* Skip open paren. *) + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + parameter_head := 0; + + .elna_parser_procedure_declaration_parameter; + _elna_lexer_read_token(@token_kind); + + if token_kind <> ElnaLexerKind.right_paren then + name_pointer := _elna_parser_variable_declaration(); + if parameter_head = 0 then + parameter_head := name_pointer + else + _declaration_set_next(name_length, name_pointer) + end; + name_length := name_pointer; + + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.comma then + _elna_lexer_skip_token(); + goto elna_parser_procedure_declaration_parameter + end + end; + (* Skip close paren. *) + _elna_lexer_skip_token(); + _procedure_declaration_set_parameters(result, parameter_head); + + (* Skip semicolon and newline. *) + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + parameter_head := _elna_parser_var_part(); + _procedure_declaration_set_temporaries(result, parameter_head); + + (* Skip semicolon, "begin" and newline. *) + _elna_lexer_read_token(@token_kind); + if token_kind = ElnaLexerKind._begin then + _elna_lexer_skip_token(); + parameter_head := _elna_parser_statements() + elsif token_kind = ElnaLexerKind._return then + parameter_head := _elna_parser_return_statement() + end; + _procedure_declaration_set_body(result, parameter_head); + + (* Skip the "end" keyword. *) + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + return result +end; + +proc _elna_tac_parameters(current_parameter: Word, new_symbol_table: Word); +var + name_pointer: Word; + name_length: Word; + parameter_counter: Word; + instruction: Word; + first_instruction: Word; + current_instruction: Word; + symbol_info: Word; +begin + first_instruction := 0; + parameter_counter := 0; + + .elna_tac_parameters_loop; + if current_parameter <> 0 then + name_pointer := _declaration_get_name(current_parameter); + name_length := _declaration_get_length(current_parameter); + symbol_info := _symbol_table_lookup(new_symbol_table, name_pointer, name_length); + + symbol_info := _parameter_info_get_offset(symbol_info); + + instruction := _elna_tac_store_word(ElnaRtlRegister.a0 + parameter_counter, + ElnaRtlRegister.sp, symbol_info); + if first_instruction = 0 then + first_instruction := instruction + else + _elna_tac_instruction_set_next(current_instruction, instruction) + end; + current_instruction := instruction; + + parameter_counter := parameter_counter + 1; + + current_parameter := _declaration_get_next(current_parameter); + goto elna_tac_parameters_loop + end; + return first_instruction +end; + +proc elna_rtl_global_declaration(tac_declaration: Word); +var + name: Word; + length: Word; + body: Word; + result: Word; +begin + result := malloc(elna_rtl_declaration_size()); + name := _elna_tac_declaration_get_name(tac_declaration); + length := _elna_tac_declaration_get_length(tac_declaration); + body := _elna_tac_declaration_get_body(tac_declaration); + + elna_rtl_declaration_set_next(result, 0); + elna_rtl_declaration_set_name(result, name); + elna_rtl_declaration_set_length(result, length); + elna_rtl_declaration_set_body(result, body); + + return result +end; + +proc elna_rtl_procedure_declaration(tac_declaration: Word); +var + name: Word; + length: Word; + body: Word; + result: Word; +begin + result := malloc(elna_rtl_declaration_size()); + name := _elna_tac_declaration_get_name(tac_declaration); + length := _elna_tac_declaration_get_length(tac_declaration); + body := _elna_tac_declaration_get_body(tac_declaration); + body := elna_rtl_instructions(body); + + elna_rtl_declaration_set_next(result, 0); + elna_rtl_declaration_set_name(result, name); + elna_rtl_declaration_set_length(result, length); + elna_rtl_declaration_set_body(result, body); + + return result +end; + +proc _elna_tac_procedure_declaration(parser_node: Word); +var + name_pointer: Word; + name_length: Word; + current_parameter: Word; + body: Word; + new_symbol_table: Word; + symbol_info: Word; + instruction: Word; + first_instruction: Word; + result: Word; + result_size: Word; +begin + result := malloc(_elna_tac_declaration_size()); + + _elna_tac_declaration_set_next(result, 0); + + name_pointer := _declaration_get_name(parser_node); + name_length := _declaration_get_length(parser_node); + + _elna_tac_declaration_set_name(result, name_pointer); + _elna_tac_declaration_set_length(result, name_length); + + symbol_info := _symbol_table_lookup(@symbol_table_global, name_pointer, name_length); + new_symbol_table := _procedure_info_get_symbol_table(symbol_info); + + (* Write the prologue. *) + first_instruction := _elna_tac_instruction_create(ElnaTacOperator.start); + + current_parameter := _procedure_declaration_get_parameters(parser_node); + current_parameter := _elna_tac_parameters(current_parameter, new_symbol_table); + _elna_tac_instruction_set_next(first_instruction, current_parameter); + + body := _procedure_declaration_get_body(parser_node); + instruction := _elna_tac_statements(body, new_symbol_table); + _elna_tac_instruction_set_next(first_instruction, instruction); + + (* Write the epilogue. *) + instruction := _elna_tac_instruction_create(ElnaTacOperator.ret); + _elna_tac_instruction_set_next(first_instruction, instruction); + + _elna_tac_declaration_set_body(result, first_instruction); + + return result +end; + +proc _elna_parser_procedures(); +var + parser_node: Word; + result: Word; + current_declaration: Word; + token_kind: Word; +begin + result := 0; + + .elna_parser_procedures_loop; + _skip_empty_lines(); + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind._proc then + parser_node := _elna_parser_procedure_declaration(); + if result = 0 then + result := parser_node + else + _declaration_set_next(current_declaration, parser_node) + end; + current_declaration := parser_node; + + (* Skip semicolon. *) + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + goto elna_parser_procedures_loop + end; + return result +end; + +proc elna_rtl_globals(tac_procedure: Word); +var + current_copy: Word; + next_copy: Word; + first_copy: Word; +begin + if tac_procedure <> 0 then + first_copy := elna_rtl_global_declaration(tac_procedure); + tac_procedure := _elna_tac_declaration_get_next(tac_procedure) + else + first_copy := 0; + end; + current_copy := first_copy; + + .elna_rtl_globals_start; + + if tac_procedure <> 0 then + next_copy := elna_rtl_global_declaration(tac_procedure); + + tac_procedure := _elna_tac_declaration_get_next(tac_procedure); + elna_rtl_declaration_set_next(current_copy, next_copy); + current_copy := next_copy; + goto elna_rtl_globals_start + end; + + return first_copy +end; + +proc elna_rtl_procedures(tac_procedure: Word); +var + current_copy: Word; + next_copy: Word; + first_copy: Word; +begin + if tac_procedure <> 0 then + first_copy := elna_rtl_procedure_declaration(tac_procedure); + tac_procedure := _elna_tac_declaration_get_next(tac_procedure) + else + first_copy := 0; + end; + current_copy := first_copy; + + .elna_rtl_procedures_start; + + if tac_procedure <> 0 then + next_copy := elna_rtl_procedure_declaration(tac_procedure); + + tac_procedure := _elna_tac_declaration_get_next(tac_procedure); + elna_rtl_declaration_set_next(current_copy, next_copy); + current_copy := next_copy; + goto elna_rtl_procedures_start + end; + + return first_copy +end; + +proc _elna_tac_procedures(parser_node: Word); +var + result: Word; + current_procedure: Word; + first_procedure: Word; +begin + first_procedure := 0; + + .elna_tac_procedures_loop; + if parser_node = 0 then + goto elna_tac_procedures_end + end; + result := _elna_tac_procedure_declaration(parser_node); + if first_procedure = 0 then + first_procedure := result + else + _elna_tac_declaration_set_next(current_procedure, result) + end; + current_procedure := result; + + parser_node := _declaration_get_next(parser_node); + goto elna_tac_procedures_loop; + + .elna_tac_procedures_end; + return first_procedure +end; + +(** + * Skips comments. + *) +proc _skip_empty_lines(); +var + token_kind: Word; +begin + .skip_empty_lines_rerun; + + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.comment then + _elna_lexer_skip_token(); + goto skip_empty_lines_rerun + end +end; + +proc _elna_parser_type_declaration(); +var + token_kind: Word; + type_name: Word; + name_length: Word; + parser_node: Word; + result: Word; +begin + _elna_lexer_read_token(@token_kind); + type_name := _elna_lexer_global_get_start(); + name_length := _elna_lexer_global_get_end() - type_name; + + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + parser_node := _elna_parser_type_expression(); + result := malloc(_type_declaration_size()); + + _node_set_kind(result, NodeKind.type_declaration); + _declaration_set_next(result, 0); + _declaration_set_name(result, type_name); + _declaration_set_length(result, name_length); + _type_declaration_set__type(result, parser_node); + + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + return result +end; + +proc _elna_name_type_declaration(parser_node: Word); +var + type_name: Word; + name_length: Word; + type_info: Word; +begin + type_name := _declaration_get_name(parser_node); + name_length := _declaration_get_length(parser_node); + + parser_node := _type_declaration_get__type(parser_node); + type_info := _elna_name_type_expression(parser_node); + + _symbol_table_enter(@symbol_table_global, type_name, name_length, type_info) +end; + +proc _elna_type_type_declaration(parser_node: Word); +begin +end; + +proc _elna_parser_type_part(); +var + token_kind: Word; + parser_node: Word; + result: Word; + current_declaration: Word; +begin + result := 0; + _skip_empty_lines(); + _elna_lexer_read_token(@token_kind); + + if token_kind <> ElnaLexerKind._type then + goto elna_parser_type_part_end + end; + _elna_lexer_skip_token(); + + .elna_parser_type_part_loop; + _skip_empty_lines(); + + _elna_lexer_read_token(@token_kind); + if token_kind = ElnaLexerKind.identifier then + parser_node := _elna_parser_type_declaration(); + + if result = 0 then + result := parser_node + else + _declaration_set_next(current_declaration, parser_node) + end; + current_declaration := parser_node; + goto elna_parser_type_part_loop + end; + + .elna_parser_type_part_end; + return result +end; + +proc _elna_parser_variable_declaration(); +var + token_kind: Word; + name: Word; + name_length: Word; + variable_type: Word; + result: Word; +begin + _elna_lexer_read_token(@token_kind); + + name := _elna_lexer_global_get_start(); + name_length := _elna_lexer_global_get_end() - name; + + (* Skip the variable name and colon with the type. *) + _elna_lexer_skip_token(); + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + variable_type := _elna_parser_type_expression(); + result := malloc(_variable_declaration_size()); + + _node_set_kind(result, NodeKind.variable_declaration); + _declaration_set_next(result, 0); + _declaration_set_name(result, name); + _declaration_set_length(result, name_length); + _variable_declaration_set__type(result, variable_type); + + return result +end; + +proc _elna_tac_variable_declaration(parser_tree: Word); +var + name: Word; + name_length: Word; + variable_type: Word; + result: Word; +begin + result := malloc(_elna_tac_declaration_size()); + + _elna_tac_declaration_set_next(result, 0); + + name := _declaration_get_name(parser_tree); + name_length := _declaration_get_length(parser_tree); + variable_type := _variable_declaration_get__type(parser_tree); + + _elna_tac_declaration_set_name(result, name); + _elna_tac_declaration_set_length(result, name_length); + + name := _named_type_expression_get_name(variable_type); + name_length := _named_type_expression_get_length(variable_type); + + if string_compare("Array", 5, name, name_length) then + (* Else we assume this is a zeroed 4096 bytes big array. *) + _elna_tac_declaration_set_body(result, 4096) + else + _elna_tac_declaration_set_body(result, 4) + end; + return result +end; + +proc _elna_tac_type_field(name_pointer: Word, name_length: Word, field_pointer: Word, field_offset: Word); +var + first_result: Word; + second_result: Word; + new_name: Word; + new_length: Word; + field_length: Word; + instruction: Word; + name_target: Word; + next_instruction: Word; +begin + field_length := field_pointer + 4; + field_length := field_length^; + new_length := field_length + name_length; + new_length := new_length + 5; + + first_result := malloc(_elna_tac_declaration_size()); + _elna_tac_declaration_set_next(first_result, 0); + + new_name := malloc(new_length); + + name_target := new_name; + memcpy(name_target, name_pointer, name_length); + name_target := name_target + name_length; + memcpy(name_target, "_get_", 5); + name_target := name_target + 5; + memcpy(name_target, field_pointer^, field_length); + + _elna_tac_declaration_set_name(first_result, new_name); + _elna_tac_declaration_set_length(first_result, new_length); + + instruction := _elna_tac_add_immediate(ElnaRtlRegister.a0, ElnaRtlRegister.a0, field_offset, 0); + next_instruction := _elna_tac_load_word(ElnaRtlRegister.a0, ElnaRtlRegister.a0, 0); + _elna_tac_instruction_set_next(instruction, next_instruction); + _elna_tac_declaration_set_body(first_result, instruction); + + second_result := malloc(_elna_tac_declaration_size()); + _elna_tac_declaration_set_next(second_result, 0); + + new_name := malloc(new_length); + + name_target := new_name; + memcpy(name_target, name_pointer, name_length); + name_target := name_target + name_length; + memcpy(name_target, "_set_", 5); + name_target := name_target + 5; + memcpy(name_target, field_pointer^, field_length); + + _elna_tac_declaration_set_name(second_result, new_name); + _elna_tac_declaration_set_length(second_result, new_length); + + instruction := _elna_tac_add_immediate(ElnaRtlRegister.a0, ElnaRtlRegister.a0, field_offset, 0); + next_instruction := _elna_tac_store_word(ElnaRtlRegister.a1, ElnaRtlRegister.a0, 0); + _elna_tac_instruction_set_next(instruction, next_instruction); + _elna_tac_declaration_set_body(second_result, instruction); + + _elna_tac_declaration_set_next(first_result, second_result); + + return first_result +end; + +proc _elna_tac_type_record(name_pointer: Word, name_length: Word, type_representation: Word, current_result: Word); +var + first_result: Word; + result: Word; + type_size: Word; + new_name: Word; + new_length: Word; + instruction: Word; + field_count: Word; + field_offset: Word; + field_pointer: Word; +begin + first_result := malloc(_elna_tac_declaration_size()); + result := 0; + + (* Debug. Error stream output. + _syscall(2, name_pointer, name_length, 0, 0, 0, 64); *) + + type_size := _type_get_size(type_representation); + new_length := name_length + 5; + new_name := malloc(new_length); + + memcpy(new_name, name_pointer, name_length); + memcpy(new_name + name_length, "_size", 5); + + _elna_tac_declaration_set_name(first_result, new_name); + _elna_tac_declaration_set_length(first_result, new_length); + + instruction := _elna_tac_load_immediate(ElnaRtlRegister.a0, type_size, 0); + _elna_tac_declaration_set_body(first_result, instruction); + + field_count := _record_type_get_length(type_representation); + field_pointer := _record_type_get_members(type_representation); + field_offset := 0; + current_result^ := first_result; + + .elna_tac_type_record_fields; + if field_count > 0 then + result := _elna_tac_type_field(name_pointer, name_length, field_pointer, field_offset); + + _elna_tac_declaration_set_next(current_result^, result); + current_result^ := _elna_tac_declaration_get_next(result); + + field_offset := field_offset + 4; + field_count := field_count - 1; + field_pointer := field_pointer + 12; + goto elna_tac_type_record_fields + end; + + return first_result +end; + +proc _elna_tac_type_part(parser_node: Word); +var + name_pointer: Word; + name_length: Word; + result: Word; + first_result: Word; + symbol: Word; + info_type: Word; + type_kind: Word; + current_result: Word; + out_result: Word; +begin + first_result := 0; + + .elna_tac_type_part_loop; + if parser_node = 0 then + goto elna_tac_type_part_end + end; + + name_pointer := _declaration_get_name(parser_node); + name_length := _declaration_get_length(parser_node); + symbol := _symbol_table_lookup(@symbol_table_global, name_pointer, name_length); + + info_type := _type_info_get__type(symbol); + type_kind := _type_get_kind(info_type); + + if type_kind = TypeKind._record then + result := _elna_tac_type_record(name_pointer, name_length, info_type, @out_result) + else + result := 0; + out_result := 0 + end; + if first_result = 0 then + first_result := result; + current_result := out_result + elsif result <> 0 then + _elna_tac_declaration_set_next(current_result, result); + current_result := out_result + end; + parser_node := _declaration_get_next(parser_node); + goto elna_tac_type_part_loop; + + .elna_tac_type_part_end; + return first_result +end; + +proc _elna_parser_var_part(); +var + result: Word; + token_kind: Word; + variable_node: Word; + current_declaration: Word; +begin + result := 0; + _elna_lexer_read_token(@token_kind); + + if token_kind <> ElnaLexerKind._var then + goto elna_parser_var_part_end + end; + (* Skip "var". *) + _elna_lexer_skip_token(); + + .elna_parser_var_part_loop; + _skip_empty_lines(); + _elna_lexer_read_token(@token_kind); + + if token_kind = ElnaLexerKind.identifier then + variable_node := _elna_parser_variable_declaration(); + + (* Skip semicolon. *) + _elna_lexer_read_token(@token_kind); + _elna_lexer_skip_token(); + + if result = 0 then + result := variable_node + else + _declaration_set_next(current_declaration, variable_node) + end; + current_declaration := variable_node; + goto elna_parser_var_part_loop + end; + + .elna_parser_var_part_end; + return result +end; + +proc _elna_tac_var_part(parser_node: Word); +var + node: Word; + current_variable: Word; + first_variable: Word; +begin + first_variable := 0; + if parser_node = 0 then + goto elna_tac_var_part_end + end; + + .elna_tac_var_part_loop; + node := _elna_tac_variable_declaration(parser_node); + if first_variable = 0 then + first_variable := node + else + _elna_tac_declaration_set_next(current_variable, node) + end; + current_variable := node; + + parser_node := _declaration_get_next(parser_node); + if parser_node <> 0 then + goto elna_tac_var_part_loop + end; + + .elna_tac_var_part_end; + return first_variable +end; + +proc _elna_parser_module_declaration(); +var + parser_node: Word; + result: Word; +begin + result := malloc(_module_declaration_size()); + + _node_set_kind(result, NodeKind.module_declaration); + + parser_node := _elna_parser_type_part(); + _module_declaration_set_types(result, parser_node); + + parser_node := _elna_parser_var_part(); + _module_declaration_set_globals(result, parser_node); + + parser_node := _elna_parser_procedures(); + _module_declaration_set_procedures(result, parser_node); + + return result +end; + +(** + * Process the source code and print the generated code. + *) +proc _elna_tac_module_declaration(parser_node: Word); +var + data_part: Word; + code_part: Word; + type_part: Word; + current_declaration: Word; + next_declaration: Word; +begin + type_part := _module_declaration_get_types(parser_node); + type_part := _elna_tac_type_part(type_part); + + data_part := _module_declaration_get_globals(parser_node); + data_part := _elna_tac_var_part(data_part); + + code_part := _module_declaration_get_procedures(parser_node); + code_part := _elna_tac_procedures(code_part); + + current_declaration := code_part; + + .elna_tac_module_declaration_types; + next_declaration := _elna_tac_declaration_get_next(current_declaration); + if next_declaration <> 0 then + current_declaration := next_declaration; + + goto elna_tac_module_declaration_types + end; + _elna_tac_declaration_set_next(current_declaration, type_part); + + return _elna_tac_module_create(data_part, code_part) +end; + +proc _elna_name_procedure_declaration(parser_node: Word); +var + name_pointer: Word; + name_length: Word; + new_symbol_table: Word; + parameter_counter: Word; + symbol_info: Word; + current_parameter: Word; +begin + new_symbol_table := _symbol_table_create(); + symbol_info := _procedure_info_create(new_symbol_table); + + name_pointer := _declaration_get_name(parser_node); + name_length := _declaration_get_length(parser_node); + + current_parameter := _procedure_declaration_get_parameters(parser_node); + parameter_counter := 0; + .elna_name_procedure_declaration_parameter; + if current_parameter <> 0 then + _elna_name_procedure_parameter(current_parameter, parameter_counter, new_symbol_table); + parameter_counter := parameter_counter + 1; + + current_parameter := _declaration_get_next(current_parameter); + goto elna_name_procedure_declaration_parameter + end; + current_parameter := _procedure_declaration_get_temporaries(parser_node); + _elna_name_procedure_temporaries(current_parameter, new_symbol_table); + + _symbol_table_enter(@symbol_table_global, name_pointer, name_length, symbol_info) +end; + +proc _elna_type_procedure_declaration(parser_node: Word); +begin +end; + +proc _elna_name_module_declaration(parser_node: Word); +var + current_part: Word; + result: Word; +begin + current_part := _module_declaration_get_types(parser_node); + .elna_name_module_declaration_type; + if current_part <> 0 then + _elna_name_type_declaration(current_part); + current_part := _declaration_get_next(current_part); + + goto elna_name_module_declaration_type + end; + + current_part := _module_declaration_get_procedures(parser_node); + .elna_name_module_declaration_procedure; + if current_part <> 0 then + _elna_name_procedure_declaration(current_part); + current_part := _declaration_get_next(current_part); + + goto elna_name_module_declaration_procedure + end +end; + +proc _elna_type_module_declaration(parser_node: Word); +var + current_part: Word; +begin + current_part := _module_declaration_get_types(parser_node); + .elna_type_module_declaration_type; + if current_part <> 0 then + _elna_type_type_declaration(current_part); + current_part := _declaration_get_next(current_part); + + goto elna_type_module_declaration_type + end; + + current_part := _module_declaration_get_procedures(parser_node); + .elna_type_module_declaration_procedure; + if current_part <> 0 then + _elna_type_procedure_declaration(current_part); + current_part := _declaration_get_next(current_part); + + goto elna_type_module_declaration_procedure + end +end; + +proc _compile(); +var + parser_node: Word; + tac: Word; + rtl: Word; +begin + parser_node := _elna_parser_module_declaration(); + _elna_name_module_declaration(parser_node); + _elna_type_module_declaration(parser_node); + tac := _elna_tac_module_declaration(parser_node); + rtl := elna_rtl_module_declaration(tac); + _elna_writer_module(rtl) +end; + +(** + * Terminates the program. a0 contains the return code. + * + * Parameters: + * a0 - Status code. + *) +proc _exit(status: Word); +begin + _syscall(status, 0, 0, 0, 0, 0, 93) +end; + +(** + * Looks for a symbol in the given symbol table. + * + * Parameters: + * symbol_table - Symbol table. + * symbol_name - Symbol name pointer. + * name_length - Symbol name length. + * + * Returns the symbol pointer or 0 in a0. + *) +proc _symbol_table_lookup(symbol_table: Word, symbol_name: Word, name_length: Word); +var + result: Word; + symbol_table_length: Word; + current_name: Word; + current_length: Word; +begin + result := 0; + + (* The first word in the symbol table is its length, get it. *) + symbol_table_length := symbol_table^; + + (* Go to the first symbol position. *) + symbol_table := symbol_table + 4; + + .symbol_table_lookup_loop; + if symbol_table_length = 0 then + goto symbol_table_lookup_end + end; + + (* Symbol name pointer and length. *) + current_name := symbol_table^; + current_length := symbol_table + 4; + current_length := current_length^; + + (* If lengths don't match, exit and return nil. *) + if name_length <> current_length then + goto symbol_table_lookup_repeat + end; + (* If names don't match, exit and return nil. *) + if memcmp(symbol_name, current_name, name_length) then + goto symbol_table_lookup_repeat + end; + (* Otherwise, the symbol is found. *) + result := symbol_table + 8; + result := result^; + goto symbol_table_lookup_end; + + .symbol_table_lookup_repeat; + symbol_table := symbol_table + 12; + symbol_table_length := symbol_table_length - 1; + goto symbol_table_lookup_loop; + + .symbol_table_lookup_end; + return result +end; + +(** + * Create a new local symbol table in the symbol memory region after the last + * known symbol table. + *) +proc _symbol_table_create(); +var + new_symbol_table: Word; + table_length: Word; + current_table: Word; +begin + new_symbol_table := symbol_table_store; + + .symbol_table_create_loop; + table_length := new_symbol_table^; + + if table_length <> 0 then + table_length := table_length * 12; + table_length := table_length + 4; + new_symbol_table := new_symbol_table + table_length; + goto symbol_table_create_loop + end; + + return new_symbol_table +end; + +(** + * Inserts a symbol into the table. + * + * Parameters: + * symbol_table - Symbol table. + * symbol_name - Symbol name pointer. + * name_length - Symbol name length. + * symbol - Symbol pointer. + *) +proc _symbol_table_enter(symbol_table: Word, symbol_name: Word, name_length: Word, symbol: Word); +var + table_length: Word; + symbol_pointer: Word; +begin + (* The first word in the symbol table is its length, get it. *) + table_length := symbol_table^; + + (* Calculate the offset for the new symbol. *) + symbol_pointer := table_length * 12; + symbol_pointer := symbol_pointer + 4; + symbol_pointer := symbol_table + symbol_pointer; + + symbol_pointer^ := symbol_name; + symbol_pointer := symbol_pointer + 4; + symbol_pointer^ := name_length; + symbol_pointer := symbol_pointer + 4; + symbol_pointer^ := symbol; + + (* Increment the symbol table length. *) + table_length := table_length + 1; + symbol_table^ := table_length +end; + +proc _symbol_table_build(); +var + current_info: Word; + current_type: Word; +begin + (* Set the table length to 0. *) + symbol_table_global := 0; + + current_type := malloc(_type_size()); + _type_set_kind(current_type, TypeKind.primitive); + _type_set_size(current_type, 4); + + (* Enter built-in symbols. *) + current_info := _type_info_create(current_type); + _symbol_table_enter(@symbol_table_global, "Word", 4, current_info); + + current_info := _type_info_create(current_type); + _symbol_table_enter(@symbol_table_global, "Array", 5, current_info) +end; + +(** + * Assigns some value to at array index. + * + * Parameters: + * array - Array pointer. + * index - Index (word offset into the array). + * data - Data to assign. + *) +proc _assign_at(array: Word, index: Word, data: Word); +var + target: Word; +begin + target := index - 1; + target := target * 4; + target := array + target; + + target^ := data +end; + +proc _get_at(array: Word, index: Word); +var + target: Word; +begin + target := index - 1; + target := target * 4; + target := array + target; + + return target^ +end; + +(** + * Initializes the array with character classes. + *) +proc _elna_lexer_classifications(); +var + code: Word; +begin + _assign_at(@classification, 1, ElnaLexerClass.eof); + _assign_at(@classification, 2, ElnaLexerClass.invalid); + _assign_at(@classification, 3, ElnaLexerClass.invalid); + _assign_at(@classification, 4, ElnaLexerClass.invalid); + _assign_at(@classification, 5, ElnaLexerClass.invalid); + _assign_at(@classification, 6, ElnaLexerClass.invalid); + _assign_at(@classification, 7, ElnaLexerClass.invalid); + _assign_at(@classification, 8, ElnaLexerClass.invalid); + _assign_at(@classification, 9, ElnaLexerClass.invalid); + _assign_at(@classification, 10, ElnaLexerClass.space); + _assign_at(@classification, 11, ElnaLexerClass.space); + _assign_at(@classification, 12, ElnaLexerClass.invalid); + _assign_at(@classification, 13, ElnaLexerClass.invalid); + _assign_at(@classification, 14, ElnaLexerClass.space); + _assign_at(@classification, 15, ElnaLexerClass.invalid); + _assign_at(@classification, 16, ElnaLexerClass.invalid); + _assign_at(@classification, 17, ElnaLexerClass.invalid); + _assign_at(@classification, 18, ElnaLexerClass.invalid); + _assign_at(@classification, 19, ElnaLexerClass.invalid); + _assign_at(@classification, 20, ElnaLexerClass.invalid); + _assign_at(@classification, 21, ElnaLexerClass.invalid); + _assign_at(@classification, 22, ElnaLexerClass.invalid); + _assign_at(@classification, 23, ElnaLexerClass.invalid); + _assign_at(@classification, 24, ElnaLexerClass.invalid); + _assign_at(@classification, 25, ElnaLexerClass.invalid); + _assign_at(@classification, 26, ElnaLexerClass.invalid); + _assign_at(@classification, 27, ElnaLexerClass.invalid); + _assign_at(@classification, 28, ElnaLexerClass.invalid); + _assign_at(@classification, 29, ElnaLexerClass.invalid); + _assign_at(@classification, 30, ElnaLexerClass.invalid); + _assign_at(@classification, 31, ElnaLexerClass.invalid); + _assign_at(@classification, 32, ElnaLexerClass.invalid); + _assign_at(@classification, 33, ElnaLexerClass.space); + _assign_at(@classification, 34, ElnaLexerClass.single); + _assign_at(@classification, 35, ElnaLexerClass.double_quote); + _assign_at(@classification, 36, ElnaLexerClass.other); + _assign_at(@classification, 37, ElnaLexerClass.other); + _assign_at(@classification, 38, ElnaLexerClass.single); + _assign_at(@classification, 39, ElnaLexerClass.single); + _assign_at(@classification, 40, ElnaLexerClass.single_quote); + _assign_at(@classification, 41, ElnaLexerClass.left_paren); + _assign_at(@classification, 42, ElnaLexerClass.right_paren); + _assign_at(@classification, 43, ElnaLexerClass.asterisk); + _assign_at(@classification, 44, ElnaLexerClass.single); + _assign_at(@classification, 45, ElnaLexerClass.single); + _assign_at(@classification, 46, ElnaLexerClass.minus); + _assign_at(@classification, 47, ElnaLexerClass.dot); + _assign_at(@classification, 48, ElnaLexerClass.single); + _assign_at(@classification, 49, ElnaLexerClass.zero); + _assign_at(@classification, 50, ElnaLexerClass.digit); + _assign_at(@classification, 51, ElnaLexerClass.digit); + _assign_at(@classification, 52, ElnaLexerClass.digit); + _assign_at(@classification, 53, ElnaLexerClass.digit); + _assign_at(@classification, 54, ElnaLexerClass.digit); + _assign_at(@classification, 55, ElnaLexerClass.digit); + _assign_at(@classification, 56, ElnaLexerClass.digit); + _assign_at(@classification, 57, ElnaLexerClass.digit); + _assign_at(@classification, 58, ElnaLexerClass.digit); + _assign_at(@classification, 59, ElnaLexerClass.colon); + _assign_at(@classification, 60, ElnaLexerClass.single); + _assign_at(@classification, 61, ElnaLexerClass.less); + _assign_at(@classification, 62, ElnaLexerClass.equals); + _assign_at(@classification, 63, ElnaLexerClass.greater); + _assign_at(@classification, 64, ElnaLexerClass.other); + _assign_at(@classification, 65, ElnaLexerClass.single); + _assign_at(@classification, 66, ElnaLexerClass.alpha); + _assign_at(@classification, 67, ElnaLexerClass.alpha); + _assign_at(@classification, 68, ElnaLexerClass.alpha); + _assign_at(@classification, 69, ElnaLexerClass.alpha); + _assign_at(@classification, 70, ElnaLexerClass.alpha); + _assign_at(@classification, 71, ElnaLexerClass.alpha); + _assign_at(@classification, 72, ElnaLexerClass.alpha); + _assign_at(@classification, 73, ElnaLexerClass.alpha); + _assign_at(@classification, 74, ElnaLexerClass.alpha); + _assign_at(@classification, 75, ElnaLexerClass.alpha); + _assign_at(@classification, 76, ElnaLexerClass.alpha); + _assign_at(@classification, 77, ElnaLexerClass.alpha); + _assign_at(@classification, 78, ElnaLexerClass.alpha); + _assign_at(@classification, 79, ElnaLexerClass.alpha); + _assign_at(@classification, 80, ElnaLexerClass.alpha); + _assign_at(@classification, 81, ElnaLexerClass.alpha); + _assign_at(@classification, 82, ElnaLexerClass.alpha); + _assign_at(@classification, 83, ElnaLexerClass.alpha); + _assign_at(@classification, 84, ElnaLexerClass.alpha); + _assign_at(@classification, 85, ElnaLexerClass.alpha); + _assign_at(@classification, 86, ElnaLexerClass.alpha); + _assign_at(@classification, 87, ElnaLexerClass.alpha); + _assign_at(@classification, 88, ElnaLexerClass.alpha); + _assign_at(@classification, 89, ElnaLexerClass.alpha); + _assign_at(@classification, 90, ElnaLexerClass.alpha); + _assign_at(@classification, 91, ElnaLexerClass.alpha); + _assign_at(@classification, 92, ElnaLexerClass.single); + _assign_at(@classification, 93, ElnaLexerClass.backslash); + _assign_at(@classification, 94, ElnaLexerClass.single); + _assign_at(@classification, 95, ElnaLexerClass.single); + _assign_at(@classification, 96, ElnaLexerClass.alpha); + _assign_at(@classification, 97, ElnaLexerClass.other); + _assign_at(@classification, 98, ElnaLexerClass.hex); + _assign_at(@classification, 99, ElnaLexerClass.hex); + _assign_at(@classification, 100, ElnaLexerClass.hex); + _assign_at(@classification, 101, ElnaLexerClass.hex); + _assign_at(@classification, 102, ElnaLexerClass.hex); + _assign_at(@classification, 103, ElnaLexerClass.hex); + _assign_at(@classification, 104, ElnaLexerClass.alpha); + _assign_at(@classification, 105, ElnaLexerClass.alpha); + _assign_at(@classification, 106, ElnaLexerClass.alpha); + _assign_at(@classification, 107, ElnaLexerClass.alpha); + _assign_at(@classification, 108, ElnaLexerClass.alpha); + _assign_at(@classification, 109, ElnaLexerClass.alpha); + _assign_at(@classification, 110, ElnaLexerClass.alpha); + _assign_at(@classification, 111, ElnaLexerClass.alpha); + _assign_at(@classification, 112, ElnaLexerClass.alpha); + _assign_at(@classification, 113, ElnaLexerClass.alpha); + _assign_at(@classification, 114, ElnaLexerClass.alpha); + _assign_at(@classification, 115, ElnaLexerClass.alpha); + _assign_at(@classification, 116, ElnaLexerClass.alpha); + _assign_at(@classification, 117, ElnaLexerClass.alpha); + _assign_at(@classification, 118, ElnaLexerClass.alpha); + _assign_at(@classification, 119, ElnaLexerClass.alpha); + _assign_at(@classification, 120, ElnaLexerClass.alpha); + _assign_at(@classification, 121, ElnaLexerClass.x); + _assign_at(@classification, 122, ElnaLexerClass.alpha); + _assign_at(@classification, 123, ElnaLexerClass.alpha); + _assign_at(@classification, 124, ElnaLexerClass.other); + _assign_at(@classification, 125, ElnaLexerClass.single); + _assign_at(@classification, 126, ElnaLexerClass.other); + _assign_at(@classification, 127, ElnaLexerClass.single); + _assign_at(@classification, 128, ElnaLexerClass.invalid); + + code := 129; + + (* Set the remaining 129 - 256 bytes to transitionClassOther. *) + .create_classification_loop; + _assign_at(@classification, code, ElnaLexerClass.other); + code := code + 1; + + if code < 257 then + goto create_classification_loop + end +end; + +proc _elna_lexer_get_transition(current_state: Word, character_class: Word); +var + transition_table: Word; + row_position: Word; + column_position: Word; + target: Word; +begin + (* Each state is 8 bytes long (2 words: action and next state). + There are 22 character classes, so a transition row 8 * 22 = 176 bytes long. *) + row_position := current_state - 1; + row_position := row_position * 176; + + column_position := character_class - 1; + column_position := column_position * 8; + + target := _elna_lexer_get_transition_table(); + target := target + row_position; + + return target + column_position +end; + +(** + * Parameters: + * current_state - First index into transitions table. + * character_class - Second index into transitions table. + * action - Action to assign. + * next_state - Next state to assign. + *) +proc _elna_lexer_set_transition(current_state: Word, character_class: Word, action: Word, next_state: Word); +var + transition: Word; +begin + transition := _elna_lexer_get_transition(current_state, character_class); + + _elna_lexer_transition_set_action(transition, action); + _elna_lexer_transition_set_state(transition, next_state) +end; + +(* Sets same action and state transition for all character classes in one transition row. *) + +(** + * Parameters: + * current_state - Current state (Transition state enumeration). + * default_action - Default action (Callback). + * next_state - Next state (Transition state enumeration). + *) +proc _elna_lexer_default_transition(current_state: Word, default_action: Word, next_state: Word); +begin + _elna_lexer_set_transition(current_state, ElnaLexerClass.invalid, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.digit, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.alpha, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.space, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.colon, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.equals, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.left_paren, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.right_paren, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.asterisk, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.backslash, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.single, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.hex, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.zero, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.x, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.eof, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.dot, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.minus, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.single_quote, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.double_quote, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.greater, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.less, default_action, next_state); + _elna_lexer_set_transition(current_state, ElnaLexerClass.other, default_action, next_state) +end; + +(** + * The transition table describes transitions from one state to another, given + * a symbol (character class). + * + * The table has m rows and n columns, where m is the amount of states and n is + * the amount of classes. So given the current state and a classified character + * the table can be used to look up the next state. + *) +proc _elna_lexer_transitions(); +begin + (* Start state. *) + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.digit, ElnaLexerAction.accumulate, ElnaLexerState.decimal); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.alpha, ElnaLexerAction.accumulate, ElnaLexerState.identifier); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.space, ElnaLexerAction.skip, ElnaLexerState.start); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.colon, ElnaLexerAction.accumulate, ElnaLexerState.colon); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.equals, ElnaLexerAction.single, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.left_paren, ElnaLexerAction.accumulate, ElnaLexerState.left_paren); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.right_paren, ElnaLexerAction.single, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.asterisk, ElnaLexerAction.single, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.backslash, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.single, ElnaLexerAction.single, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.hex, ElnaLexerAction.accumulate, ElnaLexerState.identifier); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.zero, ElnaLexerAction.accumulate, ElnaLexerState.leading_zero); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.x, ElnaLexerAction.accumulate, ElnaLexerState.identifier); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.eof, ElnaLexerAction.eof, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.dot, ElnaLexerAction.single, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.minus, ElnaLexerAction.accumulate, ElnaLexerState.minus); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.single_quote, ElnaLexerAction.accumulate, ElnaLexerState.character); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.double_quote, ElnaLexerAction.accumulate, ElnaLexerState.string); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.greater, ElnaLexerAction.accumulate, ElnaLexerState.greater); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.less, ElnaLexerAction.accumulate, ElnaLexerState.less); + _elna_lexer_set_transition(ElnaLexerState.start, ElnaLexerClass.other, ElnaLexerAction.none, ElnaLexerState.finish); + + (* Colon state. *) + _elna_lexer_default_transition(ElnaLexerState.colon, ElnaLexerAction.finalize, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.colon, ElnaLexerClass.equals, ElnaLexerAction.composite, ElnaLexerState.finish); + + (* Identifier state. *) + _elna_lexer_default_transition(ElnaLexerState.identifier, ElnaLexerAction.key_id, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.digit, ElnaLexerAction.accumulate, ElnaLexerState.identifier); + _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.alpha, ElnaLexerAction.accumulate, ElnaLexerState.identifier); + _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.hex, ElnaLexerAction.accumulate, ElnaLexerState.identifier); + _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.zero, ElnaLexerAction.accumulate, ElnaLexerState.identifier); + _elna_lexer_set_transition(ElnaLexerState.identifier, ElnaLexerClass.x, ElnaLexerAction.accumulate, ElnaLexerState.identifier); + + (* Decimal state. *) + _elna_lexer_default_transition(ElnaLexerState.decimal, ElnaLexerAction.integer, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.digit, ElnaLexerAction.accumulate, ElnaLexerState.decimal); + _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.alpha, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.hex, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.zero, ElnaLexerAction.accumulate, ElnaLexerState.decimal); + _elna_lexer_set_transition(ElnaLexerState.decimal, ElnaLexerClass.x, ElnaLexerAction.none, ElnaLexerState.finish); + + (* Leading zero. *) + _elna_lexer_default_transition(ElnaLexerState.leading_zero, ElnaLexerAction.integer, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.digit, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.alpha, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.hex, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.zero, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.leading_zero, ElnaLexerClass.x, ElnaLexerAction.none, ElnaLexerState.dot); + + (* Greater state. *) + _elna_lexer_default_transition(ElnaLexerState.greater, ElnaLexerAction.finalize, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.greater, ElnaLexerClass.equals, ElnaLexerAction.composite, ElnaLexerState.finish); + + (* Minus state. *) + _elna_lexer_default_transition(ElnaLexerState.minus, ElnaLexerAction.finalize, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.minus, ElnaLexerClass.greater, ElnaLexerAction.composite, ElnaLexerState.finish); + + (* Left paren state. *) + _elna_lexer_default_transition(ElnaLexerState.left_paren, ElnaLexerAction.finalize, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.left_paren, ElnaLexerClass.asterisk, ElnaLexerAction.accumulate, ElnaLexerState.comment); + + (* Less state. *) + _elna_lexer_default_transition(ElnaLexerState.less, ElnaLexerAction.finalize, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.less, ElnaLexerClass.equals, ElnaLexerAction.composite, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.less, ElnaLexerClass.greater, ElnaLexerAction.composite, ElnaLexerState.finish); + + (* Hexadecimal after 0x. *) + _elna_lexer_default_transition(ElnaLexerState.dot, ElnaLexerAction.finalize, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.dot, ElnaLexerClass.dot, ElnaLexerAction.composite, ElnaLexerState.finish); + + (* Comment. *) + _elna_lexer_default_transition(ElnaLexerState.comment, ElnaLexerAction.accumulate, ElnaLexerState.comment); + _elna_lexer_set_transition(ElnaLexerState.comment, ElnaLexerClass.asterisk, ElnaLexerAction.accumulate, ElnaLexerState.closing_comment); + _elna_lexer_set_transition(ElnaLexerState.comment, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); + + (* Closing comment. *) + _elna_lexer_default_transition(ElnaLexerState.closing_comment, ElnaLexerAction.accumulate, ElnaLexerState.comment); + _elna_lexer_set_transition(ElnaLexerState.closing_comment, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.closing_comment, ElnaLexerClass.right_paren, ElnaLexerAction.delimited, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.closing_comment, ElnaLexerClass.asterisk, ElnaLexerAction.accumulate, ElnaLexerState.closing_comment); + _elna_lexer_set_transition(ElnaLexerState.closing_comment, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); + + (* Character. *) + _elna_lexer_default_transition(ElnaLexerState.character, ElnaLexerAction.accumulate, ElnaLexerState.character); + _elna_lexer_set_transition(ElnaLexerState.character, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.character, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.character, ElnaLexerClass.single_quote, ElnaLexerAction.delimited, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.character, ElnaLexerClass.backslash, ElnaLexerAction.accumulate, ElnaLexerState.character_escape); + + (* Escape sequence in a character. *) + _elna_lexer_default_transition(ElnaLexerState.character_escape, ElnaLexerAction.accumulate, ElnaLexerState.character); + _elna_lexer_set_transition(ElnaLexerState.character_escape, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.character_escape, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); + + (* String. *) + _elna_lexer_default_transition(ElnaLexerState.string, ElnaLexerAction.accumulate, ElnaLexerState.string); + _elna_lexer_set_transition(ElnaLexerState.string, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.string, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.string, ElnaLexerClass.double_quote, ElnaLexerAction.delimited, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.string, ElnaLexerClass.backslash, ElnaLexerAction.accumulate, ElnaLexerState.string_escape); + + (* Escape sequence in a string. *) + _elna_lexer_default_transition(ElnaLexerState.string_escape, ElnaLexerAction.accumulate, ElnaLexerState.string); + _elna_lexer_set_transition(ElnaLexerState.string_escape, ElnaLexerClass.invalid, ElnaLexerAction.none, ElnaLexerState.finish); + _elna_lexer_set_transition(ElnaLexerState.string_escape, ElnaLexerClass.eof, ElnaLexerAction.none, ElnaLexerState.finish) +end; + +(** + * Transition table is saved after character classification table. + * Each character entry is 1 word long and there are 256 characters. + * 1024 = 256 * 4 + *) +proc _elna_lexer_get_transition_table(); + return @classification + 1024 +end; + +(** + * Lexer state is saved after the transition tables. + * Each transition table entry is 8 bytes long. The table has 16 rows (transition states) + * and 22 columns (character classes), so 2992 = 8 * 17 * 22. + *) +proc _elna_lexer_global_state(); +var + result: Word; +begin + result := _elna_lexer_get_transition_table(); + return result + 2992 +end; + +(** + * Gets pointer to the token start. + *) +proc _elna_lexer_global_get_start(); +var + target: Word; +begin + target := _elna_lexer_global_state() + 4; + return target^ +end; + +(** + * Sets pointer to the token start. + *) +proc _elna_lexer_global_set_start(new_start: Word); +var + target: Word; +begin + target := _elna_lexer_global_state() + 4; + target^ := new_start +end; + +(** + * Gets pointer to the token end. + *) +proc _elna_lexer_global_get_end(); +var + target: Word; +begin + target := _elna_lexer_global_state() + 8; + return target^ +end; + +(** + * Sets pointer to the token end. + *) +proc _elna_lexer_global_set_end(new_start: Word); +var + target: Word; +begin + target := _elna_lexer_global_state() + 8; + target^ := new_start +end; + +proc _elna_lexer_transition_get_action(this: Word); + return this^ +end; + +proc _elna_lexer_transition_set_action(this: Word, value: Word); +begin + this^ := value +end; + +proc _elna_lexer_transition_get_state(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _elna_lexer_transition_set_state(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +(** + * Resets the lexer state for reading the next token. + *) +proc _elna_lexer_reset(); +var + state: Word; +begin + (* Transition start state is 1. *) + state := _elna_lexer_global_state(); + state^ := ElnaLexerState.start; + + state := _elna_lexer_global_get_start(); + _elna_lexer_global_set_end(state) +end; + +(** + * One time lexer initialization. + *) +proc _elna_lexer_initialize(code_pointer: Word); +begin + _elna_lexer_classifications(); + _elna_lexer_transitions(); + + _elna_lexer_global_set_start(code_pointer); + _elna_lexer_global_set_end(code_pointer) +end; + +proc _elna_lexer_next_transition(); +var + current_character: Word; + character_class: Word; + current_state: Word; +begin + current_character := _elna_lexer_global_get_end(); + current_character := _load_byte(current_character); + + character_class := _get_at(@classification, current_character + 1); + + current_state := _elna_lexer_global_state(); + current_state := current_state^; + + return _elna_lexer_get_transition(current_state, character_class) +end; + +proc string_compare(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word); +var + result: Word; +begin + result := 0; + + if lhs_length = rhs_length then + result := memcmp(lhs_pointer, rhs_pointer, lhs_length); + result := result = 0 + end; + return result +end; + +proc _elna_lexer_classify_keyword(position_start: Word, position_end: Word); +var + result: Word; + token_length: Word; +begin + result := ElnaLexerKind.identifier; + token_length := position_end - position_start; + + if string_compare(position_start, token_length, "const", 5) then + result := ElnaLexerKind._const + elsif string_compare(position_start, token_length, "var", 3) then + result := ElnaLexerKind._var + elsif string_compare(position_start, token_length, "proc", 4) then + result := ElnaLexerKind._proc + elsif string_compare(position_start, token_length, "type", 4) then + result := ElnaLexerKind._type + elsif string_compare(position_start, token_length, "begin", 5) then + result := ElnaLexerKind._begin + elsif string_compare(position_start, token_length, "end", 3) then + result := ElnaLexerKind._end + elsif string_compare(position_start, token_length, "return", 6) then + result := ElnaLexerKind._return + elsif string_compare(position_start, token_length, "goto", 4) then + result := ElnaLexerKind._goto + elsif string_compare(position_start, token_length, "if", 2) then + result := ElnaLexerKind._if + elsif string_compare(position_start, token_length, "while", 5) then + result := ElnaLexerKind._while + elsif string_compare(position_start, token_length, "then", 4) then + result := ElnaLexerKind._then + elsif string_compare(position_start, token_length, "else", 4) then + result := ElnaLexerKind._else + elsif string_compare(position_start, token_length, "elsif", 5) then + result := ElnaLexerKind._elsif + elsif string_compare(position_start, token_length, "record", 6) then + result := ElnaLexerKind._record + elsif string_compare(position_start, token_length, "or", 2) then + result := ElnaLexerKind._or + elsif string_compare(position_start, token_length, "xor", 2) then + result := ElnaLexerKind._xor + end; + return result +end; + +proc _elna_lexer_classify_finalize(start_position: Word); +var + character: Word; + result: Word; +begin + result := 0; + character := _load_byte(start_position); + + if character = ':' then + result := ElnaLexerKind.colon + elsif character = '.' then + result := ElnaLexerKind.dot + elsif character = '(' then + result := ElnaLexerKind.left_paren + elsif character = '-' then + result := ElnaLexerKind.minus + elsif character = '<' then + result := ElnaLexerKind.less_than + elsif character = '>' then + result := ElnaLexerKind.greater_than + end; + return result +end; + +proc _elna_lexer_classify_single(start_position: Word); +var + character: Word; + result: Word; +begin + result := 0; + character := _load_byte(start_position); + + if character = ';' then + result := ElnaLexerKind.semicolon + elsif character = ',' then + result := ElnaLexerKind.comma + elsif character = ')' then + result := ElnaLexerKind.right_paren + elsif character = '@' then + result := ElnaLexerKind.at + elsif character = '~' then + result := ElnaLexerKind.not + elsif character = '&' then + result := ElnaLexerKind.and + elsif character = '+' then + result := ElnaLexerKind.plus + elsif character = '*' then + result := ElnaLexerKind.multiplication + elsif character = '=' then + result := ElnaLexerKind.equals + elsif character = '%' then + result := ElnaLexerKind.remainder + elsif character = '/' then + result := ElnaLexerKind.division + elsif character = '.' then + result := ElnaLexerKind.dot + elsif character = '^' then + result := ElnaLexerKind.hat + end; + return result +end; + +proc _elna_lexer_classify_composite(start_position: Word, one_before_last: Word); +var + first_character: Word; + last_character: Word; + result: Word; +begin + first_character := _load_byte(start_position); + last_character := _load_byte(one_before_last); + + if first_character = ':' then + result := ElnaLexerKind.assignment + elsif first_character = '<' then + if last_character = '=' then + result := ElnaLexerKind.less_equal + elsif last_character = '>' then + result := ElnaLexerKind.not_equal + end + elsif first_character = '>' then + if last_character = '=' then + result := ElnaLexerKind.greater_equal + end + end; + + return result +end; + +proc _elna_lexer_classify_delimited(start_position: Word, end_position: Word); +var + token_length: Word; + delimiter: Word; + result: Word; +begin + token_length := end_position - start_position; + delimiter := _load_byte(start_position); + + if delimiter = '(' then + result := ElnaLexerKind.comment + elsif delimiter = '\'' then + result := ElnaLexerKind.character + elsif delimiter = '"' then + result := ElnaLexerKind.string + end; + return result +end; + +proc _elna_lexer_classify_integer(start_position: Word, end_position: Word); + return ElnaLexerKind.integer +end; + +proc _elna_lexer_execute_action(action_to_perform: Word, kind: Word); +var + position_start: Word; + position_end: Word; + intermediate: Word; +begin + position_start := _elna_lexer_global_get_start(); + position_end := _elna_lexer_global_get_end(); + + if action_to_perform = ElnaLexerAction.none then + elsif action_to_perform = ElnaLexerAction.accumulate then + _elna_lexer_global_set_end(position_end + 1) + elsif action_to_perform = ElnaLexerAction.skip then + _elna_lexer_global_set_start(position_start + 1); + _elna_lexer_global_set_end(position_end + 1) + elsif action_to_perform = ElnaLexerAction.single then + _elna_lexer_global_set_end(position_end + 1); + + intermediate := _elna_lexer_classify_single(position_start); + kind^ := intermediate + elsif action_to_perform = ElnaLexerAction.eof then + intermediate := ElnaLexerKind.eof; + kind^ := intermediate + elsif action_to_perform = ElnaLexerAction.finalize then + intermediate := _elna_lexer_classify_finalize(position_start); + kind^ := intermediate + elsif action_to_perform = ElnaLexerAction.composite then + _elna_lexer_global_set_end(position_end + 1); + + intermediate := _elna_lexer_classify_composite(position_start, position_end); + kind^ := intermediate + elsif action_to_perform = ElnaLexerAction.key_id then + intermediate := _elna_lexer_classify_keyword(position_start, position_end); + kind^ := intermediate + elsif action_to_perform = ElnaLexerAction.integer then + intermediate := _elna_lexer_classify_integer(position_start, position_end); + kind^ := intermediate + elsif action_to_perform = ElnaLexerAction.delimited then + _elna_lexer_global_set_end(position_end + 1); + + intermediate := _elna_lexer_classify_delimited(position_start, position_end + 1); + kind^ := intermediate + end +end; + +proc _elna_lexer_execute_transition(kind: Word); +var + next_transition: Word; + next_state: Word; + global_state: Word; + action_to_perform: Word; +begin + next_transition := _elna_lexer_next_transition(); + next_state := _elna_lexer_transition_get_state(next_transition); + action_to_perform := _elna_lexer_transition_get_action(next_transition); + + global_state := _elna_lexer_global_state(); + + global_state^ := next_state; + _elna_lexer_execute_action(action_to_perform, kind); + + return next_state +end; + +proc _elna_lexer_advance_token(kind: Word); +var + result_state: Word; +begin + result_state := _elna_lexer_execute_transition(kind); + if result_state <> ElnaLexerState.finish then + _elna_lexer_advance_token(kind) + end +end; + +(** + * Reads the next token and writes its type into the address in the kind parameter. + *) +proc _elna_lexer_read_token(kind: Word); +begin + _elna_lexer_reset(); + _elna_lexer_advance_token(kind) +end; + +(** + * Advances the token stream past the last read token. + *) +proc _elna_lexer_skip_token(); +var + old_end: Word; +begin + old_end := _elna_lexer_global_get_end(); + _elna_lexer_global_set_start(old_end) +end; + +proc _initialize_global_state(); +begin + compiler_strings_position := @compiler_strings; + source_code := malloc(495616); + symbol_table_store := malloc(4194304) +end; + +(* + * Entry point. + *) +proc main(); +var + last_read: Word; + offset: Word; +begin + _initialize_global_state(); + _elna_lexer_initialize(source_code); + _symbol_table_build(); + + (* Read the source from the standard input. *) + offset := source_code; + + .start_read; + (* Second argument is buffer size. Modifying update the source_code definition. *) + last_read := _read_file(offset, 409600); + if last_read > 0 then + offset := offset + last_read; + goto start_read + end; + _compile(); + + _exit(0) +end; diff --git a/boot/stage16/linker.arg b/boot/stage16/linker.arg new file mode 100644 index 0000000..e69de29 diff --git a/boot/stage2.elna b/boot/stage2.elna deleted file mode 100644 index 0423b3b..0000000 --- a/boot/stage2.elna +++ /dev/null @@ -1,859 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public License, -# v. 2.0. If a copy of the MPL was not distributed with this file, You can -# obtain one at https://mozilla.org/MPL/2.0/. - -# Stage 2 compiler. -# -# - Procedures without none or one argument. -# - Goto statements. -# - Character and integer literals. -# - Passing local variables to procedures. -# - Local variables should have the format: v00, -# where 00 is its offset from the sp register. - -.section .rodata - -.type keyword_equ, @object -keyword_equ: .ascii ".equ" -.equ KEYWORD_EQU_SIZE, 4 - -.type keyword_section, @object -keyword_section: .ascii ".section" -.equ KEYWORD_SECTION_SIZE, 8 - -.type keyword_type, @object -keyword_type: .ascii ".type" -.equ KEYWORD_TYPE_SIZE, 5 - -.type keyword_ret, @object -keyword_ret: .ascii "ret" -.equ KEYWORD_RET_SIZE, 3 - -.type keyword_global, @object -keyword_global: .ascii ".globl" -.equ KEYWORD_GLOBAL_SIZE, 6 - -.type keyword_proc, @object -keyword_proc: .ascii "proc " -.equ KEYWORD_PROC_SIZE, 5 - -.type keyword_end, @object -keyword_end: .ascii "end" -.equ KEYWORD_END_SIZE, 3 - -.type keyword_begin, @object -keyword_begin: .ascii "begin" -.equ KEYWORD_BEGIN_SIZE, 5 - -.type keyword_var, @object -keyword_var: .ascii "var" -.equ KEYWORD_VAR_SIZE, 3 - -.type asm_prologue, @object -asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" - -.type asm_epilogue, @object -asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" - -.type asm_type_directive, @object -asm_type_directive: .string ".type " - -.type asm_type_function, @object -asm_type_function: .string ", @function\n" - -.type asm_colon, @object -asm_colon: .string ":\n" - -.type asm_call, @object -asm_call: .string "\tcall " - -.type asm_j, @object -asm_j: .string "\tj " - -.type asm_li, @object -asm_li: .string "\tli " - -.type asm_lw, @object -asm_lw: .string "\tlw " - -.type asm_t0, @object -asm_t0: .string "t0" - -.type asm_a0, @object -asm_a0: .string "a0" - -.type asm_comma, @object -asm_comma: .string ", " - -.type asm_sp, @object -asm_sp: .string "(sp)" - -.section .bss - -.equ SOURCE_BUFFER_SIZE, 81920 -.type source_code, @object -source_code: .zero SOURCE_BUFFER_SIZE - -.section .data - -.type source_code_position, @object -source_code_position: .word source_code - -.section .text - -# Reads standard input into a buffer. -# a0 - Buffer pointer. -# a1 - Buffer size. -# -# Returns the amount of bytes written in a0. -proc _read_file(); -begin - mv a2, a1 - mv a1, a0 - # STDIN. - li a0, 0 - li a7, 63 # SYS_READ. - ecall -end; - -# Writes to the standard output. -# -# Parameters: -# a0 - Buffer. -# a1 - Buffer length. -proc _write(); -begin - mv a2, a1 - mv a1, a0 - # STDOUT. - li a0, 1 - li a7, 64 # SYS_WRITE. - ecall -end; - -# Writes a character from a0 into the standard output. -proc _write_c(); -begin - sb a0, 20(sp) - addi a0, sp, 20 - li a1, 1 - _write(); -end; - -# Write null terminated string. -# -# Parameters: -# a0 - String. -proc _write_z(); -begin - sw a0, 20(sp) - -.write_z_loop: - # Check for 0 character. - lb a0, (a0) - beqz a0, .write_z_end - - # Print a character. - lw a0, 20(sp) - lb a0, (a0) - _write_c(); - - # Advance the input string by one byte. - lw a0, 20(sp) - addi a0, a0, 1 - sw a0, 20(sp) - - j .write_z_loop - -.write_z_end: -end; - -# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. -proc _is_upper(); -begin - li t0, 'A' - 1 - sltu t1, t0, a0 # t1 = a0 >= 'A' - - sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' - and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' -end; - -# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. -proc _is_lower(); -begin - li t0, 'a' - 1 - sltu t2, t0, a0 # t2 = a0 >= 'a' - - sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' - and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' -end; - -# Detects if the passed character is a 7-bit alpha character or an underscore. -# -# Paramters: -# a0 - Tested character. -# -# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. -proc _is_alpha(); -begin - sw a0, 20(sp) - - _is_upper(); - sw a0, 16(sp) - - lw a0, 20(sp) - _is_lower(); - - lw t0, 20(sp) - xori t1, t0, '_' - seqz t1, t1 - - lw t0, 16(sp) - or a0, a0, t0 - or a0, a0, t1 -end; - -# Detects whether the passed character is a digit -# (a value between 0 and 9). -# -# Parameters: -# a0 - Exemined value. -# -# Sets a0 to 1 if it is a digit, to 0 otherwise. -proc _is_digit(); -begin - li t0, '0' - 1 - sltu t1, t0, a0 # t1 = a0 >= '0' - - sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' - - and a0, t1, t2 -end; - -# Reads the next token. -# -# Returns token length in a0. -proc _read_token(); -begin - la t0, source_code_position # Token pointer. - lw t0, (t0) - sw t0, 20(sp) # Current token position. - sw zero, 16(sp) # Token length. - -.read_token_loop: - lb t0, (t0) # Current character. - - # First we try to read a derictive. - # A derictive can contain a dot and characters. - li t1, '.' - beq t0, t1, .read_token_next - - lw a0, 20(sp) - lb a0, (a0) - _is_alpha(); - bnez a0, .read_token_next - - lw a0, 20(sp) - lb a0, (a0) - _is_digit(); - bnez a0, .read_token_next - - j .read_token_end - -.read_token_next: - # Advance the source code position and token length. - lw t0, 16(sp) - addi t0, t0, 1 - sw t0, 16(sp) - - lw t0, 20(sp) - addi t0, t0, 1 - sw t0, 20(sp) - - j .read_token_loop - -.read_token_end: - lw a0, 16(sp) -end; - -# a0 - First pointer. -# a1 - Second pointer. -# a2 - The length to compare. -# -# Returns 0 in a0 if memory regions are equal. -proc _memcmp(); -begin - mv t0, a0 - li a0, 0 - -.Lmemcmp_loop: - beqz a2, .Lmemcmp_end - - lbu t1, (t0) - lbu t2, (a1) - sub a0, t1, t2 - - bnez a0, .Lmemcmp_end - - addi t0, t0, 1 - addi a1, a1, 1 - addi a2, a2, -1 - - j .Lmemcmp_loop - -.Lmemcmp_end: -end; - -# Advances the token stream by a0 bytes. -proc _advance_token(); -begin - # Skip the .equ directive. - la t0, source_code_position - lw t1, (t0) - add t1, t1, a0 - sw t1, (t0) -end; - -# Prints the current token. -# -# Parameters: -# a0 - Token length. -# -# Returns a0 unchanged. -proc _write_token(); -begin - sw a0, 20(sp) - - la a0, source_code_position - lw a0, (a0) - lw a1, 20(sp) - _write(); - - lw a0, 20(sp) -end; - -proc _compile_section(); -begin - # Print and skip the .section directive and a space after it. - li a0, KEYWORD_SECTION_SIZE + 1 - _write_token(); - _advance_token(); - - # Read the section name. - _read_token(); - addi a0, a0, 1 - - _write_token(); - _advance_token(); -end; - -# Prints and skips a line. -proc _skip_comment(); -begin - la t0, source_code_position - lw t1, (t0) - -.skip_comment_loop: - # Check for newline character. - lb t2, (t1) - li t3, '\n' - beq t2, t3, .skip_comment_end - - # Advance the input string by one byte. - addi t1, t1, 1 - sw t1, (t0) - - j .skip_comment_loop - -.skip_comment_end: - # Skip the newline. - addi t1, t1, 1 - sw t1, (t0) -end; - -# Prints and skips a line. -proc _compile_line(); -begin -.compile_line_loop: - la a0, source_code_position - lw a1, (a0) - - lb t0, (a1) - li t1, '\n' - beq t0, t1, .compile_line_end - - # Print a character. - lw a0, (a1) - _write_c(); - - # Advance the input string by one byte. - li a0, 1 - _advance_token(); - - j .compile_line_loop - -.compile_line_end: - li a0, '\n' - _write_c(); - - li a0, 1 - _advance_token(); -end; - -proc _compile_integer_literal(); -begin - la a0, asm_li - _write_z(); - - la a0, asm_a0 - _write_z(); - - la a0, asm_comma - _write_z(); - - _read_token(); - _write_token(); - _advance_token(); - - li a0, '\n' - _write_c(); -end; - -proc _compile_character_literal(); -begin - la a0, asm_li - _write_z(); - - la a0, asm_a0 - _write_z(); - - la a0, asm_comma - _write_z(); - -.compile_character_literal_loop: - la a0, source_code_position - lw a0, (a0) - li a1, 1 - _write(); - li a0, 1 - _advance_token(); - - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - li t1, '\'' - beq a0, t1, .compile_character_literal_end - - j .compile_character_literal_loop - -.compile_character_literal_end: - li a0, '\'' - _write_c(); - - li a0, '\n' - _write_c(); - - li a0, 1 - _advance_token(); -end; - -proc _compile_variable_expression(); -begin - la a0, asm_lw - _write_z(); - - la a0, asm_a0 - _write_z(); - - la a0, asm_comma - _write_z(); - - la a0, source_code_position - lw a0, (a0) - addi a0, a0, 1 - li a1, 2 - _write(); - - la a0, asm_sp - _write_z(); - - li a0, '\n' - _write_c(); - - li a0, 3 - _advance_token(); - -end; - -proc _compile_expression(); -begin - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - - li t1, '\'' - beq a0, t1, .compile_expression_character_literal - - li t1, 'v' - beq a0, t1, .compile_expression_variable - - _is_digit(); - bnez a0, .compile_expression_integer_literal - - j .compile_expression_end - -.compile_expression_character_literal: - _compile_character_literal(); - j .compile_expression_end - -.compile_expression_integer_literal: - _compile_integer_literal(); - j .compile_expression_end - -.compile_expression_variable: - _compile_variable_expression(); - j .compile_expression_end; - -.compile_expression_end: -end; - -proc _compile_call(); -begin - _read_token(); - sw a0, 20(sp) - la t0, source_code_position - lw t0, (t0) - sw t0, 16(sp) - - # Skip the identifier and left paren. - addi a0, a0, 1 - _advance_token(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, ')' - beq t0, t1, .compile_call_finalize - - _compile_expression(); - -.compile_call_finalize: - la a0, asm_call - _write_z(); - - lw a0, 16(sp) - lw a1, 20(sp) - _write(); - - # Skip the right paren. - li a0, 1 - _advance_token(); -end; - -proc _compile_goto(); -begin - li a0, 5 - _advance_token(); - - _read_token(); - sw a0, 20(sp) - - la a0, asm_j - _write_z(); - - lw a0, 20(sp) - _write_token(); - _advance_token(); -end; - -proc _compile_statement(); -begin - # This is a call if the statement starts with an underscore. - la t0, source_code_position - lw t0, (t0) - # First character after alignment tab. - addi t0, t0, 1 - lb t0, (t0) - - li t1, '_' - beq t0, t1, .compile_statement_call - - li t1, 'g' - beq t0, t1, .compile_statement_goto - - _compile_line(); - j .compile_statement_end - -.compile_statement_call: - li a0, 1 - _advance_token(); - _compile_call(); - - j .compile_statement_semicolon - -.compile_statement_goto: - li a0, 1 - _advance_token(); - _compile_goto(); - - j .compile_statement_semicolon - -.compile_statement_semicolon: - li a0, 2 - _advance_token(); - - li a0, '\n' - _write_c(); - -.compile_statement_end: -end; - -proc _compile_procedure_body(); -begin -.compile_procedure_body_loop: - la a0, source_code_position - lw a0, (a0) - la a1, keyword_end - li a2, KEYWORD_END_SIZE - _memcmp(); - - beqz a0, .compile_procedure_body_epilogue - - _compile_statement(); - j .compile_procedure_body_loop - -.compile_procedure_body_epilogue: -end; - -proc _compile_procedure(); -begin - # Skip "proc ". - li a0, KEYWORD_PROC_SIZE - _advance_token(); - - _read_token(); - sw a0, 20(sp) # Save the procedure name length. - - # Write .type _procedure_name, @function. - la a0, asm_type_directive - _write_z(); - - lw a0, 20(sp) - _write_token(); - - la a0, asm_type_function - _write_z(); - - # Write procedure label, _procedure_name: - lw a0, 20(sp) - _write_token(); - - la a0, asm_colon - _write_z(); - - # Skip the function name and trailing parens, semicolon, "begin" and newline. - lw a0, 20(sp) - addi a0, a0, KEYWORD_BEGIN_SIZE + 1 + 4 - _advance_token(); - - la a0, asm_prologue - _write_z(); - - _compile_procedure_body(); - - # Write the epilogue. - la a0, asm_epilogue - _write_z(); - - li a0, KEYWORD_END_SIZE + 2 - _advance_token(); -end; - -proc _compile_type(); -begin - # Print and skip the .type directive and a space after it. - li a0, KEYWORD_TYPE_SIZE + 1 - _write_token(); - _advance_token(); - - # Read and print the symbol name. - _read_token(); - sw a0, 20(sp) - - # Print and skip the symbol name, comma, space and @. - lw a0, 20(sp) - addi a0, a0, 3 - _write_token(); - _advance_token(); - - # Read the symbol type. - _read_token(); - sw a0, 16(sp) - la t0, source_code_position - lw t0, (t0) - sw t0, 12(sp) - - # Print the symbol type and newline. - lw a0, 16(sp) - addi a0, a0, 1 - _write_token(); - _advance_token(); - - # Write the object definition itself. - _compile_line(); - -.compile_type_end: -end; - -proc _compile_equ(); -begin - # Print and skip the .equ directive and a space after it. - li a0, KEYWORD_EQU_SIZE + 1 - _write_token(); - _advance_token(); - - # Read and print the constant name. - _read_token(); - sw a0, 20(sp) - - # Print and skip the constant name, comma and space. - lw a0, 20(sp) - addi a0, a0, 2 - _write_token(); - _advance_token(); - - # Read the constant value. - _read_token(); - sw a0, 16(sp) - - # Print and skip the constant value and newline. - lw a0, 16(sp) - addi a0, a0, 1 - _write_token(); - _advance_token(); -end; - -proc _skip_newlines(); -begin - # Skip newlines. - la t0, source_code_position - lw t1, (t0) - -.skip_newlines_loop: - lb t2, (t1) - li t3, '\n' - bne t2, t3, .skip_newlines_end - beqz t2, .skip_newlines_end - - addi t1, t1, 1 - sw t1, (t0) - - j .skip_newlines_loop - -.skip_newlines_end: -end; - -# Process the source code and print the generated code. -proc _compile(); -begin -.compile_loop: - _skip_newlines(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - beqz t0, .compile_end - li t1, '#' - beq t0, t1, .compile_comment - - la a0, source_code_position - lw a0, (a0) - la a1, keyword_equ - li a2, KEYWORD_EQU_SIZE - _memcmp(); - - beqz a0, .compile_equ - - la a0, source_code_position - lw a0, (a0) - la a1, keyword_section - li a2, KEYWORD_SECTION_SIZE - _memcmp(); - - beqz a0, .compile_section - - la a0, source_code_position - lw a0, (a0) - la a1, keyword_type - li a2, KEYWORD_TYPE_SIZE - _memcmp(); - - beqz a0, .compile_type - - la a0, source_code_position - lw a0, (a0) - la a1, keyword_proc - li a2, KEYWORD_PROC_SIZE - _memcmp(); - - beqz a0, .compile_procedure - - la a0, source_code_position - lw a0, (a0) - la a1, keyword_global - li a2, KEYWORD_GLOBAL_SIZE - _memcmp(); - - beqz a0, .compile_global - # Not a known token, exit. - j .compile_end - -.compile_equ: - _compile_equ(); - - j .compile_loop - -.compile_section: - _compile_section(); - - j .compile_loop - -.compile_type: - _compile_type(); - - j .compile_loop - -.compile_global: - _compile_line(); - - j .compile_loop - -.compile_comment: - _skip_comment(); - - j .compile_loop - -.compile_procedure: - _compile_procedure(); - - j .compile_loop - -.compile_end: -end; - -# Entry point. -.globl _start -proc _start(); -begin - # Read the source from the standard input. - la a0, source_code - li a1, SOURCE_BUFFER_SIZE # Buffer size. - _read_file(); - _compile(); - - # Call exit. - li a0, 0 # Use 0 return code. - li a7, 93 # SYS_EXIT. - ecall -end; diff --git a/boot/stage2/cl.elna b/boot/stage2/cl.elna new file mode 100644 index 0000000..0423b3b --- /dev/null +++ b/boot/stage2/cl.elna @@ -0,0 +1,859 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +# Stage 2 compiler. +# +# - Procedures without none or one argument. +# - Goto statements. +# - Character and integer literals. +# - Passing local variables to procedures. +# - Local variables should have the format: v00, +# where 00 is its offset from the sp register. + +.section .rodata + +.type keyword_equ, @object +keyword_equ: .ascii ".equ" +.equ KEYWORD_EQU_SIZE, 4 + +.type keyword_section, @object +keyword_section: .ascii ".section" +.equ KEYWORD_SECTION_SIZE, 8 + +.type keyword_type, @object +keyword_type: .ascii ".type" +.equ KEYWORD_TYPE_SIZE, 5 + +.type keyword_ret, @object +keyword_ret: .ascii "ret" +.equ KEYWORD_RET_SIZE, 3 + +.type keyword_global, @object +keyword_global: .ascii ".globl" +.equ KEYWORD_GLOBAL_SIZE, 6 + +.type keyword_proc, @object +keyword_proc: .ascii "proc " +.equ KEYWORD_PROC_SIZE, 5 + +.type keyword_end, @object +keyword_end: .ascii "end" +.equ KEYWORD_END_SIZE, 3 + +.type keyword_begin, @object +keyword_begin: .ascii "begin" +.equ KEYWORD_BEGIN_SIZE, 5 + +.type keyword_var, @object +keyword_var: .ascii "var" +.equ KEYWORD_VAR_SIZE, 3 + +.type asm_prologue, @object +asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" + +.type asm_epilogue, @object +asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" + +.type asm_type_directive, @object +asm_type_directive: .string ".type " + +.type asm_type_function, @object +asm_type_function: .string ", @function\n" + +.type asm_colon, @object +asm_colon: .string ":\n" + +.type asm_call, @object +asm_call: .string "\tcall " + +.type asm_j, @object +asm_j: .string "\tj " + +.type asm_li, @object +asm_li: .string "\tli " + +.type asm_lw, @object +asm_lw: .string "\tlw " + +.type asm_t0, @object +asm_t0: .string "t0" + +.type asm_a0, @object +asm_a0: .string "a0" + +.type asm_comma, @object +asm_comma: .string ", " + +.type asm_sp, @object +asm_sp: .string "(sp)" + +.section .bss + +.equ SOURCE_BUFFER_SIZE, 81920 +.type source_code, @object +source_code: .zero SOURCE_BUFFER_SIZE + +.section .data + +.type source_code_position, @object +source_code_position: .word source_code + +.section .text + +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the amount of bytes written in a0. +proc _read_file(); +begin + mv a2, a1 + mv a1, a0 + # STDIN. + li a0, 0 + li a7, 63 # SYS_READ. + ecall +end; + +# Writes to the standard output. +# +# Parameters: +# a0 - Buffer. +# a1 - Buffer length. +proc _write(); +begin + mv a2, a1 + mv a1, a0 + # STDOUT. + li a0, 1 + li a7, 64 # SYS_WRITE. + ecall +end; + +# Writes a character from a0 into the standard output. +proc _write_c(); +begin + sb a0, 20(sp) + addi a0, sp, 20 + li a1, 1 + _write(); +end; + +# Write null terminated string. +# +# Parameters: +# a0 - String. +proc _write_z(); +begin + sw a0, 20(sp) + +.write_z_loop: + # Check for 0 character. + lb a0, (a0) + beqz a0, .write_z_end + + # Print a character. + lw a0, 20(sp) + lb a0, (a0) + _write_c(); + + # Advance the input string by one byte. + lw a0, 20(sp) + addi a0, a0, 1 + sw a0, 20(sp) + + j .write_z_loop + +.write_z_end: +end; + +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_upper(); +begin + li t0, 'A' - 1 + sltu t1, t0, a0 # t1 = a0 >= 'A' + + sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' + and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' +end; + +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_lower(); +begin + li t0, 'a' - 1 + sltu t2, t0, a0 # t2 = a0 >= 'a' + + sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' + and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' +end; + +# Detects if the passed character is a 7-bit alpha character or an underscore. +# +# Paramters: +# a0 - Tested character. +# +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +proc _is_alpha(); +begin + sw a0, 20(sp) + + _is_upper(); + sw a0, 16(sp) + + lw a0, 20(sp) + _is_lower(); + + lw t0, 20(sp) + xori t1, t0, '_' + seqz t1, t1 + + lw t0, 16(sp) + or a0, a0, t0 + or a0, a0, t1 +end; + +# Detects whether the passed character is a digit +# (a value between 0 and 9). +# +# Parameters: +# a0 - Exemined value. +# +# Sets a0 to 1 if it is a digit, to 0 otherwise. +proc _is_digit(); +begin + li t0, '0' - 1 + sltu t1, t0, a0 # t1 = a0 >= '0' + + sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' + + and a0, t1, t2 +end; + +# Reads the next token. +# +# Returns token length in a0. +proc _read_token(); +begin + la t0, source_code_position # Token pointer. + lw t0, (t0) + sw t0, 20(sp) # Current token position. + sw zero, 16(sp) # Token length. + +.read_token_loop: + lb t0, (t0) # Current character. + + # First we try to read a derictive. + # A derictive can contain a dot and characters. + li t1, '.' + beq t0, t1, .read_token_next + + lw a0, 20(sp) + lb a0, (a0) + _is_alpha(); + bnez a0, .read_token_next + + lw a0, 20(sp) + lb a0, (a0) + _is_digit(); + bnez a0, .read_token_next + + j .read_token_end + +.read_token_next: + # Advance the source code position and token length. + lw t0, 16(sp) + addi t0, t0, 1 + sw t0, 16(sp) + + lw t0, 20(sp) + addi t0, t0, 1 + sw t0, 20(sp) + + j .read_token_loop + +.read_token_end: + lw a0, 16(sp) +end; + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +proc _memcmp(); +begin + mv t0, a0 + li a0, 0 + +.Lmemcmp_loop: + beqz a2, .Lmemcmp_end + + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 + + bnez a0, .Lmemcmp_end + + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + j .Lmemcmp_loop + +.Lmemcmp_end: +end; + +# Advances the token stream by a0 bytes. +proc _advance_token(); +begin + # Skip the .equ directive. + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 + sw t1, (t0) +end; + +# Prints the current token. +# +# Parameters: +# a0 - Token length. +# +# Returns a0 unchanged. +proc _write_token(); +begin + sw a0, 20(sp) + + la a0, source_code_position + lw a0, (a0) + lw a1, 20(sp) + _write(); + + lw a0, 20(sp) +end; + +proc _compile_section(); +begin + # Print and skip the .section directive and a space after it. + li a0, KEYWORD_SECTION_SIZE + 1 + _write_token(); + _advance_token(); + + # Read the section name. + _read_token(); + addi a0, a0, 1 + + _write_token(); + _advance_token(); +end; + +# Prints and skips a line. +proc _skip_comment(); +begin + la t0, source_code_position + lw t1, (t0) + +.skip_comment_loop: + # Check for newline character. + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end + + # Advance the input string by one byte. + addi t1, t1, 1 + sw t1, (t0) + + j .skip_comment_loop + +.skip_comment_end: + # Skip the newline. + addi t1, t1, 1 + sw t1, (t0) +end; + +# Prints and skips a line. +proc _compile_line(); +begin +.compile_line_loop: + la a0, source_code_position + lw a1, (a0) + + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end + + # Print a character. + lw a0, (a1) + _write_c(); + + # Advance the input string by one byte. + li a0, 1 + _advance_token(); + + j .compile_line_loop + +.compile_line_end: + li a0, '\n' + _write_c(); + + li a0, 1 + _advance_token(); +end; + +proc _compile_integer_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _read_token(); + _write_token(); + _advance_token(); + + li a0, '\n' + _write_c(); +end; + +proc _compile_character_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + +.compile_character_literal_loop: + la a0, source_code_position + lw a0, (a0) + li a1, 1 + _write(); + li a0, 1 + _advance_token(); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + li t1, '\'' + beq a0, t1, .compile_character_literal_end + + j .compile_character_literal_loop + +.compile_character_literal_end: + li a0, '\'' + _write_c(); + + li a0, '\n' + _write_c(); + + li a0, 1 + _advance_token(); +end; + +proc _compile_variable_expression(); +begin + la a0, asm_lw + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + la a0, source_code_position + lw a0, (a0) + addi a0, a0, 1 + li a1, 2 + _write(); + + la a0, asm_sp + _write_z(); + + li a0, '\n' + _write_c(); + + li a0, 3 + _advance_token(); + +end; + +proc _compile_expression(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, '\'' + beq a0, t1, .compile_expression_character_literal + + li t1, 'v' + beq a0, t1, .compile_expression_variable + + _is_digit(); + bnez a0, .compile_expression_integer_literal + + j .compile_expression_end + +.compile_expression_character_literal: + _compile_character_literal(); + j .compile_expression_end + +.compile_expression_integer_literal: + _compile_integer_literal(); + j .compile_expression_end + +.compile_expression_variable: + _compile_variable_expression(); + j .compile_expression_end; + +.compile_expression_end: +end; + +proc _compile_call(); +begin + _read_token(); + sw a0, 20(sp) + la t0, source_code_position + lw t0, (t0) + sw t0, 16(sp) + + # Skip the identifier and left paren. + addi a0, a0, 1 + _advance_token(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ')' + beq t0, t1, .compile_call_finalize + + _compile_expression(); + +.compile_call_finalize: + la a0, asm_call + _write_z(); + + lw a0, 16(sp) + lw a1, 20(sp) + _write(); + + # Skip the right paren. + li a0, 1 + _advance_token(); +end; + +proc _compile_goto(); +begin + li a0, 5 + _advance_token(); + + _read_token(); + sw a0, 20(sp) + + la a0, asm_j + _write_z(); + + lw a0, 20(sp) + _write_token(); + _advance_token(); +end; + +proc _compile_statement(); +begin + # This is a call if the statement starts with an underscore. + la t0, source_code_position + lw t0, (t0) + # First character after alignment tab. + addi t0, t0, 1 + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call + + li t1, 'g' + beq t0, t1, .compile_statement_goto + + _compile_line(); + j .compile_statement_end + +.compile_statement_call: + li a0, 1 + _advance_token(); + _compile_call(); + + j .compile_statement_semicolon + +.compile_statement_goto: + li a0, 1 + _advance_token(); + _compile_goto(); + + j .compile_statement_semicolon + +.compile_statement_semicolon: + li a0, 2 + _advance_token(); + + li a0, '\n' + _write_c(); + +.compile_statement_end: +end; + +proc _compile_procedure_body(); +begin +.compile_procedure_body_loop: + la a0, source_code_position + lw a0, (a0) + la a1, keyword_end + li a2, KEYWORD_END_SIZE + _memcmp(); + + beqz a0, .compile_procedure_body_epilogue + + _compile_statement(); + j .compile_procedure_body_loop + +.compile_procedure_body_epilogue: +end; + +proc _compile_procedure(); +begin + # Skip "proc ". + li a0, KEYWORD_PROC_SIZE + _advance_token(); + + _read_token(); + sw a0, 20(sp) # Save the procedure name length. + + # Write .type _procedure_name, @function. + la a0, asm_type_directive + _write_z(); + + lw a0, 20(sp) + _write_token(); + + la a0, asm_type_function + _write_z(); + + # Write procedure label, _procedure_name: + lw a0, 20(sp) + _write_token(); + + la a0, asm_colon + _write_z(); + + # Skip the function name and trailing parens, semicolon, "begin" and newline. + lw a0, 20(sp) + addi a0, a0, KEYWORD_BEGIN_SIZE + 1 + 4 + _advance_token(); + + la a0, asm_prologue + _write_z(); + + _compile_procedure_body(); + + # Write the epilogue. + la a0, asm_epilogue + _write_z(); + + li a0, KEYWORD_END_SIZE + 2 + _advance_token(); +end; + +proc _compile_type(); +begin + # Print and skip the .type directive and a space after it. + li a0, KEYWORD_TYPE_SIZE + 1 + _write_token(); + _advance_token(); + + # Read and print the symbol name. + _read_token(); + sw a0, 20(sp) + + # Print and skip the symbol name, comma, space and @. + lw a0, 20(sp) + addi a0, a0, 3 + _write_token(); + _advance_token(); + + # Read the symbol type. + _read_token(); + sw a0, 16(sp) + la t0, source_code_position + lw t0, (t0) + sw t0, 12(sp) + + # Print the symbol type and newline. + lw a0, 16(sp) + addi a0, a0, 1 + _write_token(); + _advance_token(); + + # Write the object definition itself. + _compile_line(); + +.compile_type_end: +end; + +proc _compile_equ(); +begin + # Print and skip the .equ directive and a space after it. + li a0, KEYWORD_EQU_SIZE + 1 + _write_token(); + _advance_token(); + + # Read and print the constant name. + _read_token(); + sw a0, 20(sp) + + # Print and skip the constant name, comma and space. + lw a0, 20(sp) + addi a0, a0, 2 + _write_token(); + _advance_token(); + + # Read the constant value. + _read_token(); + sw a0, 16(sp) + + # Print and skip the constant value and newline. + lw a0, 16(sp) + addi a0, a0, 1 + _write_token(); + _advance_token(); +end; + +proc _skip_newlines(); +begin + # Skip newlines. + la t0, source_code_position + lw t1, (t0) + +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end + + addi t1, t1, 1 + sw t1, (t0) + + j .skip_newlines_loop + +.skip_newlines_end: +end; + +# Process the source code and print the generated code. +proc _compile(); +begin +.compile_loop: + _skip_newlines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_end + li t1, '#' + beq t0, t1, .compile_comment + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_equ + li a2, KEYWORD_EQU_SIZE + _memcmp(); + + beqz a0, .compile_equ + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_section + li a2, KEYWORD_SECTION_SIZE + _memcmp(); + + beqz a0, .compile_section + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_type + li a2, KEYWORD_TYPE_SIZE + _memcmp(); + + beqz a0, .compile_type + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_proc + li a2, KEYWORD_PROC_SIZE + _memcmp(); + + beqz a0, .compile_procedure + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_global + li a2, KEYWORD_GLOBAL_SIZE + _memcmp(); + + beqz a0, .compile_global + # Not a known token, exit. + j .compile_end + +.compile_equ: + _compile_equ(); + + j .compile_loop + +.compile_section: + _compile_section(); + + j .compile_loop + +.compile_type: + _compile_type(); + + j .compile_loop + +.compile_global: + _compile_line(); + + j .compile_loop + +.compile_comment: + _skip_comment(); + + j .compile_loop + +.compile_procedure: + _compile_procedure(); + + j .compile_loop + +.compile_end: +end; + +# Entry point. +.globl _start +proc _start(); +begin + # Read the source from the standard input. + la a0, source_code + li a1, SOURCE_BUFFER_SIZE # Buffer size. + _read_file(); + _compile(); + + # Call exit. + li a0, 0 # Use 0 return code. + li a7, 93 # SYS_EXIT. + ecall +end; diff --git a/boot/stage3.elna b/boot/stage3.elna deleted file mode 100644 index aec9832..0000000 --- a/boot/stage3.elna +++ /dev/null @@ -1,971 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public License, -# v. 2.0. If a copy of the MPL was not distributed with this file, You can -# obtain one at https://mozilla.org/MPL/2.0/. - -# Stage 3 compiler. -# -# - Procedures with multiple arguments. -# - Character literals with and without escaping. - -.section .rodata - -.type keyword_section, @object -keyword_section: .ascii ".section" - -.type keyword_type, @object -keyword_type: .ascii ".type" - -.type keyword_ret, @object -keyword_ret: .ascii "ret" - -.type keyword_global, @object -keyword_global: .ascii ".globl" - -.type keyword_proc, @object -keyword_proc: .ascii "proc " - -.type keyword_end, @object -keyword_end: .ascii "end" - -.type keyword_begin, @object -keyword_begin: .ascii "begin" - -.type keyword_var, @object -keyword_var: .ascii "var" - -.type asm_prologue, @object -asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" - -.type asm_epilogue, @object -asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" - -.type asm_type_directive, @object -asm_type_directive: .string ".type " - -.type asm_type_function, @object -asm_type_function: .string ", @function\n" - -.type asm_colon, @object -asm_colon: .string ":\n" - -.type asm_call, @object -asm_call: .string "\tcall " - -.type asm_j, @object -asm_j: .string "\tj " - -.type asm_li, @object -asm_li: .string "\tli " - -.type asm_lw, @object -asm_lw: .string "\tlw " - -.type asm_sw, @object -asm_sw: .string "\tsw " - -.type asm_mv, @object -asm_mv: .string "mv " - -.type asm_t0, @object -asm_t0: .string "t0" - -.type asm_a0, @object -asm_a0: .string "a0" - -.type asm_comma, @object -asm_comma: .string ", " - -.type asm_sp, @object -asm_sp: .string "(sp)" - -.section .bss - -# When modifiying also change the read size in the entry point procedure. -.type source_code, @object -source_code: .zero 81920 - -.section .data - -.type source_code_position, @object -source_code_position: .word source_code - -.section .text - -# Reads standard input into a buffer. -# a0 - Buffer pointer. -# a1 - Buffer size. -# -# Returns the amount of bytes written in a0. -proc _read_file(); -begin - mv a2, a1 - mv a1, a0 - # STDIN. - li a0, 0 - li a7, 63 # SYS_READ. - ecall -end; - -# Writes to the standard output. -# -# Parameters: -# a0 - Buffer. -# a1 - Buffer length. -proc _write_s(); -begin - mv a2, a1 - mv a1, a0 - # STDOUT. - li a0, 1 - li a7, 64 # SYS_WRITE. - ecall -end; - -# Writes a number to a string buffer. -# -# t0 - Local buffer. -# t1 - Constant 10. -# t2 - Current character. -# t3 - Whether the number is negative. -# -# Parameters: -# a0 - Whole number. -# a1 - Buffer pointer. -# -# Sets a0 to the length of the written number. -proc _print_i(); -begin - li t1, 10 - addi t0, s0, -9 - - li t3, 0 - bgez a0, .print_i_digit10 - li t3, 1 - neg a0, a0 - -.print_i_digit10: - rem t2, a0, t1 - addi t2, t2, '0' - sb t2, 0(t0) - div a0, a0, t1 - addi t0, t0, -1 - bne zero, a0, .print_i_digit10 - - beq zero, t3, .print_i_write_call - addi t2, zero, '-' - sb t2, 0(t0) - addi t0, t0, -1 - -.print_i_write_call: - mv a0, a1 - addi a1, t0, 1 - sub a2, s0, t0 - addi a2, a2, -9 - sw a2, 0(sp) - - _memcpy(); - - lw a0, 0(sp) -end; - -# Writes a number to the standard output. -# -# Parameters: -# a0 - Whole number. -proc _write_i(); -begin - addi a1, sp, 0 - _print_i(); - - mv a1, a0 - addi a0, sp, 0 - _write_s(); - -end; - -# Writes a character from a0 into the standard output. -proc _write_c(); -begin - sb a0, 0(sp) - addi a0, sp, 0 - li a1, 1 - _write_s(); -end; - -# Write null terminated string. -# -# Parameters: -# a0 - String. -proc _write_z(); -begin - sw a0, 0(sp) - -.write_z_loop: - # Check for 0 character. - lb a0, (a0) - beqz a0, .write_z_end - - # Print a character. - lw a0, 0(sp) - lb a0, (a0) - _write_c(); - - # Advance the input string by one byte. - lw a0, 0(sp) - addi a0, a0, 1 - sw a0, 0(sp) - - goto .write_z_loop; - -.write_z_end: -end; - -# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. -proc _is_upper(); -begin - li t0, 'A' - 1 - sltu t1, t0, a0 # t1 = a0 >= 'A' - - sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' - and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' -end; - -# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. -proc _is_lower(); -begin - li t0, 'a' - 1 - sltu t2, t0, a0 # t2 = a0 >= 'a' - - sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' - and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' -end; - -# Detects if the passed character is a 7-bit alpha character or an underscore. -# -# Paramters: -# a0 - Tested character. -# -# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. -proc _is_alpha(); -begin - sw a0, 0(sp) - - _is_upper(); - sw a0, 4(sp) - - _is_lower(v00); - - lw t0, 0(sp) - xori t1, t0, '_' - seqz t1, t1 - - lw t0, 4(sp) - or a0, a0, t0 - or a0, a0, t1 -end; - -# Detects whether the passed character is a digit -# (a value between 0 and 9). -# -# Parameters: -# a0 - Exemined value. -# -# Sets a0 to 1 if it is a digit, to 0 otherwise. -proc _is_digit(); -begin - li t0, '0' - 1 - sltu t1, t0, a0 # t1 = a0 >= '0' - - sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' - - and a0, t1, t2 -end; - -proc _is_alnum(); -begin - sw a0, 4(sp) - - _is_alpha(); - sw a0, 0(sp) - - _is_digit(v04); - - lw a1, 0(sp) - or a0, a0, a1 -end; - -# Reads the next token. -# -# Returns token length in a0. -proc _read_token(); -begin - la t0, source_code_position # Token pointer. - lw t0, (t0) - sw t0, 0(sp) # Current token position. - sw zero, 4(sp) # Token length. - -.read_token_loop: - lb t0, (t0) # Current character. - - # First we try to read a derictive. - # A derictive can contain a dot and characters. - li t1, '.' - beq t0, t1, .read_token_next - - lw a0, 0(sp) - lb a0, (a0) - _is_alnum(); - bnez a0, .read_token_next - - goto .read_token_end; - -.read_token_next: - # Advance the source code position and token length. - lw t0, 4(sp) - addi t0, t0, 1 - sw t0, 4(sp) - - lw t0, 0(sp) - addi t0, t0, 1 - sw t0, 0(sp) - - goto .read_token_loop; - -.read_token_end: - lw a0, 4(sp) -end; - -# a0 - First pointer. -# a1 - Second pointer. -# a2 - The length to compare. -# -# Returns 0 in a0 if memory regions are equal. -proc _memcmp(); -begin - mv t0, a0 - li a0, 0 - -.memcmp_loop: - beqz a2, .memcmp_end - - lbu t1, (t0) - lbu t2, (a1) - sub a0, t1, t2 - - bnez a0, .memcmp_end - - addi t0, t0, 1 - addi a1, a1, 1 - addi a2, a2, -1 - - goto .memcmp_loop; - -.memcmp_end: -end; - -# Copies memory. -# -# Parameters: -# a0 - Destination. -# a1 - Source. -# a2 - Size. -# -# Preserves a0. -proc _memcpy(); -begin - mv t0, a0 - -.memcpy_loop: - beqz a2, .memcpy_end - - lbu t1, (a1) - sb t1, (a0) - - addi a0, a0, 1 - addi a1, a1, 1 - addi a2, a2, -1 - - goto .memcpy_loop - -.memcpy_end: - mv a0, t0 -end; - -# Advances the token stream by a0 bytes. -proc _advance_token(); -begin - la t0, source_code_position - lw t1, (t0) - add t1, t1, a0 - sw t1, (t0) -end; - -# Prints the current token. -# -# Parameters: -# a0 - Token length. -# -# Returns a0 unchanged. -proc _write_token(); -begin - sw a0, 0(sp) - - la a0, source_code_position - lw a0, (a0) - lw a1, 0(sp) - _write_s(); - - lw a0, 0(sp) -end; - -proc _compile_section(); -begin - # Print and skip the ".section" (8 characters) directive and a space after it. - _write_token(9); - _advance_token(); - - # Read the section name. - _read_token(); - addi a0, a0, 1 - - _write_token(); - _advance_token(); -end; - -# Prints and skips a line. -proc _skip_comment(); -begin - la t0, source_code_position - lw t1, (t0) - -.skip_comment_loop: - # Check for newline character. - lb t2, (t1) - li t3, '\n' - beq t2, t3, .skip_comment_end - - # Advance the input string by one byte. - addi t1, t1, 1 - sw t1, (t0) - - goto .skip_comment_loop; - -.skip_comment_end: - # Skip the newline. - addi t1, t1, 1 - sw t1, (t0) -end; - -# Prints and skips a line. -proc _compile_line(); -begin -.compile_line_loop: - la a0, source_code_position - lw a1, (a0) - - lb t0, (a1) - li t1, '\n' - beq t0, t1, .compile_line_end - - # Print a character. - lw a0, (a1) - _write_c(); - - # Advance the input string by one byte. - _advance_token(1); - - goto .compile_line_loop; - -.compile_line_end: - _write_c('\n'); - - _advance_token(1); -end; - -proc _compile_integer_literal(); -begin - la a0, asm_li - _write_z(); - - la a0, asm_a0 - _write_z(); - - la a0, asm_comma - _write_z(); - - _read_token(); - _write_token(); - _advance_token(); - - _write_c('\n'); -end; - -proc _compile_character_literal(); -begin - la a0, asm_li - _write_z(); - - la a0, asm_a0 - _write_z(); - - la a0, asm_comma - _write_z(); - - li a0, '\'' - _write_c(); - _advance_token(1); - - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - li t1, '\\' - bne a0, t1, .compile_character_literal_end - - li a0, '\\' - _write_c(); - _advance_token(1); - -.compile_character_literal_end: - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - _write_c(); - - li a0, '\'' - _write_c(); - - _write_c('\n'); - - _advance_token(2); - -end; - -proc _compile_variable_expression(); -begin - la a0, asm_lw - _write_z(); - - la a0, asm_a0 - _write_z(); - - la a0, asm_comma - _write_z(); - - _advance_token(1); - _read_token(); - _write_token(); - _advance_token(); - - la a0, asm_sp - _write_z(); - - _write_c('\n'); - -end; - -proc _compile_expression(); -begin - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - - li t1, '\'' - beq a0, t1, .compile_expression_character_literal - - li t1, 'v' - beq a0, t1, .compile_expression_variable - - _is_digit(); - bnez a0, .compile_expression_integer_literal - - goto .compile_expression_end; - -.compile_expression_character_literal: - _compile_character_literal(); - goto .compile_expression_end; - -.compile_expression_integer_literal: - _compile_integer_literal(); - goto .compile_expression_end; - -.compile_expression_variable: - _compile_variable_expression(); - goto .compile_expression_end;; - -.compile_expression_end: -end; - -proc _compile_call(); -begin - # Stack variables: - # v0 - Procedure name length. - # v4 - Procedure name pointer. - # v8 - Argument count. - - _read_token(); - sw a0, 0(sp) - la t0, source_code_position - lw t0, (t0) - sw t0, 4(sp) - - sw zero, 8(sp) - - # Skip the identifier and left paren. - addi a0, a0, 1 - _advance_token(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, ')' - beq t0, t1, .compile_call_finalize - -.compile_call_loop: - _compile_expression(); - - # Save the argument on the stack. - la a0, asm_sw - _write_z(); - - la a0, asm_a0 - _write_z(); - - la a0, asm_comma - _write_z(); - - # Calculate the stack offset: 20 - (4 * argument_counter) - lw t0, 8(sp) - li t1, 4 - mul t0, t0, t1 - li t1, 20 - sub a0, t1, t0 - _write_i(); - - la a0, asm_sp - _write_z(); - - _write_c('\n'); - - # Add one to the argument counter. - lw t0, 8(sp) - addi t0, t0, 1 - sw t0, 8(sp) - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, ',' - bne t0, t1, .compile_call_finalize - - _advance_token(2); - goto .compile_call_loop; - -.compile_call_finalize: - # Load the argument from the stack. - - lw t0, 8(sp) - beqz t0, .compile_call_end - - # Decrement the argument counter. - lw t0, 8(sp) - addi t0, t0, -1 - sw t0, 8(sp) - - la a0, asm_lw - _write_z(); - - _write_c('a'); - lw a0, 8(sp) - _write_i(); - - la a0, asm_comma - _write_z(); - - # Calculate the stack offset: 20 - (4 * argument_counter) - lw t0, 8(sp) - li t1, 4 - mul t0, t0, t1 - li t1, 20 - sub a0, t1, t0 - _write_i(); - - la a0, asm_sp - _write_z(); - - _write_c('\n'); - - goto .compile_call_finalize; - -.compile_call_end: - la a0, asm_call - _write_z(); - - lw a0, 4(sp) - lw a1, 0(sp) - _write_s(); - - # Skip the right paren. - _advance_token(1); -end; - -proc _compile_goto(); -begin - _advance_token(5); - - _read_token(); - sw a0, 0(sp) - - la a0, asm_j - _write_z(); - - _write_token(v00); - _advance_token(); -end; - -proc _compile_statement(); -begin - # This is a call if the statement starts with an underscore. - la t0, source_code_position - lw t0, (t0) - # First character after alignment tab. - addi t0, t0, 1 - lb t0, (t0) - - li t1, '_' - beq t0, t1, .compile_statement_call - - li t1, 'g' - beq t0, t1, .compile_statement_goto - - _compile_line(); - goto .compile_statement_end; - -.compile_statement_call: - _advance_token(1); - _compile_call(); - - goto .compile_statement_semicolon; - -.compile_statement_goto: - _advance_token(1); - _compile_goto(); - - goto .compile_statement_semicolon; - -.compile_statement_semicolon: - _advance_token(2); - - _write_c('\n'); - -.compile_statement_end: -end; - -proc _compile_procedure_body(); -begin -.compile_procedure_body_loop: - la a0, source_code_position - lw a0, (a0) - la a1, keyword_end - li a2, 3 # "end" length. - _memcmp(); - - beqz a0, .compile_procedure_body_epilogue - - _compile_statement(); - goto .compile_procedure_body_loop; - -.compile_procedure_body_epilogue: -end; - -proc _compile_procedure(); -begin - # Skip "proc ". - _advance_token(5); - - _read_token(); - sw a0, 0(sp) # Save the procedure name length. - - # Write .type _procedure_name, @function. - la a0, asm_type_directive - _write_z(); - - _write_token(v00); - - la a0, asm_type_function - _write_z(); - - # Write procedure label, _procedure_name: - _write_token(v00); - - la a0, asm_colon - _write_z(); - - # Skip the function name and trailing parens, semicolon, "begin" and newline. - lw a0, 0(sp) - addi a0, a0, 10 - _advance_token(); - - la a0, asm_prologue - _write_z(); - - _compile_procedure_body(); - - # Write the epilogue. - la a0, asm_epilogue - _write_z(); - - # Skip the "end" keyword, semicolon and newline. - _advance_token(5); -end; - -proc _compile_type(); -begin - # Print and skip the ".type" (5 characters) directive and a space after it. - _write_token(6); - _advance_token(); - - # Read and print the symbol name. - _read_token(); - - # Print and skip the symbol name, comma, space and @. - addi a0, a0, 3 - _write_token(); - _advance_token(); - - # Read the symbol type. - _read_token(); - la t0, source_code_position - lw t0, (t0) - sw t0, 12(sp) - - # Print the symbol type and newline. - addi a0, a0, 1 - _write_token(); - _advance_token(); - - # Write the object definition itself. - _compile_line(); - -.compile_type_end: -end; - -proc _skip_newlines(); -begin - # Skip newlines. - la t0, source_code_position - lw t1, (t0) - -.skip_newlines_loop: - lb t2, (t1) - li t3, '\n' - bne t2, t3, .skip_newlines_end - beqz t2, .skip_newlines_end - - addi t1, t1, 1 - sw t1, (t0) - - goto .skip_newlines_loop; - -.skip_newlines_end: -end; - -# Process the source code and print the generated code. -proc _compile(); -begin -.compile_loop: - _skip_newlines(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - beqz t0, .compile_end - li t1, '#' - beq t0, t1, .compile_comment - - la a0, source_code_position - lw a0, (a0) - la a1, keyword_section - li a2, 8 # ".section" length. - _memcmp(); - - beqz a0, .compile_section - - la a0, source_code_position - lw a0, (a0) - la a1, keyword_type - li a2, 5 # ".type" length. - _memcmp(); - - beqz a0, .compile_type - - la a0, source_code_position - lw a0, (a0) - la a1, keyword_proc - li a2, 5 # "proc " length. Space is needed to distinguish from "procedure". - _memcmp(); - - beqz a0, .compile_procedure - - la a0, source_code_position - lw a0, (a0) - la a1, keyword_global - li a2, 6 # ".globl" length. - _memcmp(); - - beqz a0, .compile_global - # Not a known token, exit. - goto .compile_end; - -.compile_section: - _compile_section(); - - goto .compile_loop; - -.compile_type: - _compile_type(); - - goto .compile_loop; - -.compile_global: - _compile_line(); - - goto .compile_loop; - -.compile_comment: - _skip_comment(); - - goto .compile_loop; - -.compile_procedure: - _compile_procedure(); - - goto .compile_loop; - -.compile_end: -end; - -# Terminates the program. a0 contains the return code. -# -# Parameters: -# a0 - Status code. -proc _exit(); -begin - li a7, 93 # SYS_EXIT - ecall -end; - -# Entry point. -.globl _start -proc _start(); -begin - # Read the source from the standard input. - la a0, source_code - li a1, 81920 # Buffer size. - _read_file(); - _compile(); - - _exit(0); - -end; diff --git a/boot/stage3/cl.elna b/boot/stage3/cl.elna new file mode 100644 index 0000000..aec9832 --- /dev/null +++ b/boot/stage3/cl.elna @@ -0,0 +1,971 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +# Stage 3 compiler. +# +# - Procedures with multiple arguments. +# - Character literals with and without escaping. + +.section .rodata + +.type keyword_section, @object +keyword_section: .ascii ".section" + +.type keyword_type, @object +keyword_type: .ascii ".type" + +.type keyword_ret, @object +keyword_ret: .ascii "ret" + +.type keyword_global, @object +keyword_global: .ascii ".globl" + +.type keyword_proc, @object +keyword_proc: .ascii "proc " + +.type keyword_end, @object +keyword_end: .ascii "end" + +.type keyword_begin, @object +keyword_begin: .ascii "begin" + +.type keyword_var, @object +keyword_var: .ascii "var" + +.type asm_prologue, @object +asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n" + +.type asm_epilogue, @object +asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n" + +.type asm_type_directive, @object +asm_type_directive: .string ".type " + +.type asm_type_function, @object +asm_type_function: .string ", @function\n" + +.type asm_colon, @object +asm_colon: .string ":\n" + +.type asm_call, @object +asm_call: .string "\tcall " + +.type asm_j, @object +asm_j: .string "\tj " + +.type asm_li, @object +asm_li: .string "\tli " + +.type asm_lw, @object +asm_lw: .string "\tlw " + +.type asm_sw, @object +asm_sw: .string "\tsw " + +.type asm_mv, @object +asm_mv: .string "mv " + +.type asm_t0, @object +asm_t0: .string "t0" + +.type asm_a0, @object +asm_a0: .string "a0" + +.type asm_comma, @object +asm_comma: .string ", " + +.type asm_sp, @object +asm_sp: .string "(sp)" + +.section .bss + +# When modifiying also change the read size in the entry point procedure. +.type source_code, @object +source_code: .zero 81920 + +.section .data + +.type source_code_position, @object +source_code_position: .word source_code + +.section .text + +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the amount of bytes written in a0. +proc _read_file(); +begin + mv a2, a1 + mv a1, a0 + # STDIN. + li a0, 0 + li a7, 63 # SYS_READ. + ecall +end; + +# Writes to the standard output. +# +# Parameters: +# a0 - Buffer. +# a1 - Buffer length. +proc _write_s(); +begin + mv a2, a1 + mv a1, a0 + # STDOUT. + li a0, 1 + li a7, 64 # SYS_WRITE. + ecall +end; + +# Writes a number to a string buffer. +# +# t0 - Local buffer. +# t1 - Constant 10. +# t2 - Current character. +# t3 - Whether the number is negative. +# +# Parameters: +# a0 - Whole number. +# a1 - Buffer pointer. +# +# Sets a0 to the length of the written number. +proc _print_i(); +begin + li t1, 10 + addi t0, s0, -9 + + li t3, 0 + bgez a0, .print_i_digit10 + li t3, 1 + neg a0, a0 + +.print_i_digit10: + rem t2, a0, t1 + addi t2, t2, '0' + sb t2, 0(t0) + div a0, a0, t1 + addi t0, t0, -1 + bne zero, a0, .print_i_digit10 + + beq zero, t3, .print_i_write_call + addi t2, zero, '-' + sb t2, 0(t0) + addi t0, t0, -1 + +.print_i_write_call: + mv a0, a1 + addi a1, t0, 1 + sub a2, s0, t0 + addi a2, a2, -9 + sw a2, 0(sp) + + _memcpy(); + + lw a0, 0(sp) +end; + +# Writes a number to the standard output. +# +# Parameters: +# a0 - Whole number. +proc _write_i(); +begin + addi a1, sp, 0 + _print_i(); + + mv a1, a0 + addi a0, sp, 0 + _write_s(); + +end; + +# Writes a character from a0 into the standard output. +proc _write_c(); +begin + sb a0, 0(sp) + addi a0, sp, 0 + li a1, 1 + _write_s(); +end; + +# Write null terminated string. +# +# Parameters: +# a0 - String. +proc _write_z(); +begin + sw a0, 0(sp) + +.write_z_loop: + # Check for 0 character. + lb a0, (a0) + beqz a0, .write_z_end + + # Print a character. + lw a0, 0(sp) + lb a0, (a0) + _write_c(); + + # Advance the input string by one byte. + lw a0, 0(sp) + addi a0, a0, 1 + sw a0, 0(sp) + + goto .write_z_loop; + +.write_z_end: +end; + +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_upper(); +begin + li t0, 'A' - 1 + sltu t1, t0, a0 # t1 = a0 >= 'A' + + sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' + and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' +end; + +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_lower(); +begin + li t0, 'a' - 1 + sltu t2, t0, a0 # t2 = a0 >= 'a' + + sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' + and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' +end; + +# Detects if the passed character is a 7-bit alpha character or an underscore. +# +# Paramters: +# a0 - Tested character. +# +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +proc _is_alpha(); +begin + sw a0, 0(sp) + + _is_upper(); + sw a0, 4(sp) + + _is_lower(v00); + + lw t0, 0(sp) + xori t1, t0, '_' + seqz t1, t1 + + lw t0, 4(sp) + or a0, a0, t0 + or a0, a0, t1 +end; + +# Detects whether the passed character is a digit +# (a value between 0 and 9). +# +# Parameters: +# a0 - Exemined value. +# +# Sets a0 to 1 if it is a digit, to 0 otherwise. +proc _is_digit(); +begin + li t0, '0' - 1 + sltu t1, t0, a0 # t1 = a0 >= '0' + + sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' + + and a0, t1, t2 +end; + +proc _is_alnum(); +begin + sw a0, 4(sp) + + _is_alpha(); + sw a0, 0(sp) + + _is_digit(v04); + + lw a1, 0(sp) + or a0, a0, a1 +end; + +# Reads the next token. +# +# Returns token length in a0. +proc _read_token(); +begin + la t0, source_code_position # Token pointer. + lw t0, (t0) + sw t0, 0(sp) # Current token position. + sw zero, 4(sp) # Token length. + +.read_token_loop: + lb t0, (t0) # Current character. + + # First we try to read a derictive. + # A derictive can contain a dot and characters. + li t1, '.' + beq t0, t1, .read_token_next + + lw a0, 0(sp) + lb a0, (a0) + _is_alnum(); + bnez a0, .read_token_next + + goto .read_token_end; + +.read_token_next: + # Advance the source code position and token length. + lw t0, 4(sp) + addi t0, t0, 1 + sw t0, 4(sp) + + lw t0, 0(sp) + addi t0, t0, 1 + sw t0, 0(sp) + + goto .read_token_loop; + +.read_token_end: + lw a0, 4(sp) +end; + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +proc _memcmp(); +begin + mv t0, a0 + li a0, 0 + +.memcmp_loop: + beqz a2, .memcmp_end + + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 + + bnez a0, .memcmp_end + + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcmp_loop; + +.memcmp_end: +end; + +# Copies memory. +# +# Parameters: +# a0 - Destination. +# a1 - Source. +# a2 - Size. +# +# Preserves a0. +proc _memcpy(); +begin + mv t0, a0 + +.memcpy_loop: + beqz a2, .memcpy_end + + lbu t1, (a1) + sb t1, (a0) + + addi a0, a0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcpy_loop + +.memcpy_end: + mv a0, t0 +end; + +# Advances the token stream by a0 bytes. +proc _advance_token(); +begin + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 + sw t1, (t0) +end; + +# Prints the current token. +# +# Parameters: +# a0 - Token length. +# +# Returns a0 unchanged. +proc _write_token(); +begin + sw a0, 0(sp) + + la a0, source_code_position + lw a0, (a0) + lw a1, 0(sp) + _write_s(); + + lw a0, 0(sp) +end; + +proc _compile_section(); +begin + # Print and skip the ".section" (8 characters) directive and a space after it. + _write_token(9); + _advance_token(); + + # Read the section name. + _read_token(); + addi a0, a0, 1 + + _write_token(); + _advance_token(); +end; + +# Prints and skips a line. +proc _skip_comment(); +begin + la t0, source_code_position + lw t1, (t0) + +.skip_comment_loop: + # Check for newline character. + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end + + # Advance the input string by one byte. + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_comment_loop; + +.skip_comment_end: + # Skip the newline. + addi t1, t1, 1 + sw t1, (t0) +end; + +# Prints and skips a line. +proc _compile_line(); +begin +.compile_line_loop: + la a0, source_code_position + lw a1, (a0) + + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end + + # Print a character. + lw a0, (a1) + _write_c(); + + # Advance the input string by one byte. + _advance_token(1); + + goto .compile_line_loop; + +.compile_line_end: + _write_c('\n'); + + _advance_token(1); +end; + +proc _compile_integer_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _read_token(); + _write_token(); + _advance_token(); + + _write_c('\n'); +end; + +proc _compile_character_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + li a0, '\'' + _write_c(); + _advance_token(1); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + li t1, '\\' + bne a0, t1, .compile_character_literal_end + + li a0, '\\' + _write_c(); + _advance_token(1); + +.compile_character_literal_end: + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + _write_c(); + + li a0, '\'' + _write_c(); + + _write_c('\n'); + + _advance_token(2); + +end; + +proc _compile_variable_expression(); +begin + la a0, asm_lw + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _advance_token(1); + _read_token(); + _write_token(); + _advance_token(); + + la a0, asm_sp + _write_z(); + + _write_c('\n'); + +end; + +proc _compile_expression(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, '\'' + beq a0, t1, .compile_expression_character_literal + + li t1, 'v' + beq a0, t1, .compile_expression_variable + + _is_digit(); + bnez a0, .compile_expression_integer_literal + + goto .compile_expression_end; + +.compile_expression_character_literal: + _compile_character_literal(); + goto .compile_expression_end; + +.compile_expression_integer_literal: + _compile_integer_literal(); + goto .compile_expression_end; + +.compile_expression_variable: + _compile_variable_expression(); + goto .compile_expression_end;; + +.compile_expression_end: +end; + +proc _compile_call(); +begin + # Stack variables: + # v0 - Procedure name length. + # v4 - Procedure name pointer. + # v8 - Argument count. + + _read_token(); + sw a0, 0(sp) + la t0, source_code_position + lw t0, (t0) + sw t0, 4(sp) + + sw zero, 8(sp) + + # Skip the identifier and left paren. + addi a0, a0, 1 + _advance_token(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ')' + beq t0, t1, .compile_call_finalize + +.compile_call_loop: + _compile_expression(); + + # Save the argument on the stack. + la a0, asm_sw + _write_z(); + + la a0, asm_a0 + _write_z(); + + la a0, asm_comma + _write_z(); + + # Calculate the stack offset: 20 - (4 * argument_counter) + lw t0, 8(sp) + li t1, 4 + mul t0, t0, t1 + li t1, 20 + sub a0, t1, t0 + _write_i(); + + la a0, asm_sp + _write_z(); + + _write_c('\n'); + + # Add one to the argument counter. + lw t0, 8(sp) + addi t0, t0, 1 + sw t0, 8(sp) + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ',' + bne t0, t1, .compile_call_finalize + + _advance_token(2); + goto .compile_call_loop; + +.compile_call_finalize: + # Load the argument from the stack. + + lw t0, 8(sp) + beqz t0, .compile_call_end + + # Decrement the argument counter. + lw t0, 8(sp) + addi t0, t0, -1 + sw t0, 8(sp) + + la a0, asm_lw + _write_z(); + + _write_c('a'); + lw a0, 8(sp) + _write_i(); + + la a0, asm_comma + _write_z(); + + # Calculate the stack offset: 20 - (4 * argument_counter) + lw t0, 8(sp) + li t1, 4 + mul t0, t0, t1 + li t1, 20 + sub a0, t1, t0 + _write_i(); + + la a0, asm_sp + _write_z(); + + _write_c('\n'); + + goto .compile_call_finalize; + +.compile_call_end: + la a0, asm_call + _write_z(); + + lw a0, 4(sp) + lw a1, 0(sp) + _write_s(); + + # Skip the right paren. + _advance_token(1); +end; + +proc _compile_goto(); +begin + _advance_token(5); + + _read_token(); + sw a0, 0(sp) + + la a0, asm_j + _write_z(); + + _write_token(v00); + _advance_token(); +end; + +proc _compile_statement(); +begin + # This is a call if the statement starts with an underscore. + la t0, source_code_position + lw t0, (t0) + # First character after alignment tab. + addi t0, t0, 1 + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call + + li t1, 'g' + beq t0, t1, .compile_statement_goto + + _compile_line(); + goto .compile_statement_end; + +.compile_statement_call: + _advance_token(1); + _compile_call(); + + goto .compile_statement_semicolon; + +.compile_statement_goto: + _advance_token(1); + _compile_goto(); + + goto .compile_statement_semicolon; + +.compile_statement_semicolon: + _advance_token(2); + + _write_c('\n'); + +.compile_statement_end: +end; + +proc _compile_procedure_body(); +begin +.compile_procedure_body_loop: + la a0, source_code_position + lw a0, (a0) + la a1, keyword_end + li a2, 3 # "end" length. + _memcmp(); + + beqz a0, .compile_procedure_body_epilogue + + _compile_statement(); + goto .compile_procedure_body_loop; + +.compile_procedure_body_epilogue: +end; + +proc _compile_procedure(); +begin + # Skip "proc ". + _advance_token(5); + + _read_token(); + sw a0, 0(sp) # Save the procedure name length. + + # Write .type _procedure_name, @function. + la a0, asm_type_directive + _write_z(); + + _write_token(v00); + + la a0, asm_type_function + _write_z(); + + # Write procedure label, _procedure_name: + _write_token(v00); + + la a0, asm_colon + _write_z(); + + # Skip the function name and trailing parens, semicolon, "begin" and newline. + lw a0, 0(sp) + addi a0, a0, 10 + _advance_token(); + + la a0, asm_prologue + _write_z(); + + _compile_procedure_body(); + + # Write the epilogue. + la a0, asm_epilogue + _write_z(); + + # Skip the "end" keyword, semicolon and newline. + _advance_token(5); +end; + +proc _compile_type(); +begin + # Print and skip the ".type" (5 characters) directive and a space after it. + _write_token(6); + _advance_token(); + + # Read and print the symbol name. + _read_token(); + + # Print and skip the symbol name, comma, space and @. + addi a0, a0, 3 + _write_token(); + _advance_token(); + + # Read the symbol type. + _read_token(); + la t0, source_code_position + lw t0, (t0) + sw t0, 12(sp) + + # Print the symbol type and newline. + addi a0, a0, 1 + _write_token(); + _advance_token(); + + # Write the object definition itself. + _compile_line(); + +.compile_type_end: +end; + +proc _skip_newlines(); +begin + # Skip newlines. + la t0, source_code_position + lw t1, (t0) + +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end + + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_newlines_loop; + +.skip_newlines_end: +end; + +# Process the source code and print the generated code. +proc _compile(); +begin +.compile_loop: + _skip_newlines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_end + li t1, '#' + beq t0, t1, .compile_comment + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_section + li a2, 8 # ".section" length. + _memcmp(); + + beqz a0, .compile_section + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_type + li a2, 5 # ".type" length. + _memcmp(); + + beqz a0, .compile_type + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_proc + li a2, 5 # "proc " length. Space is needed to distinguish from "procedure". + _memcmp(); + + beqz a0, .compile_procedure + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_global + li a2, 6 # ".globl" length. + _memcmp(); + + beqz a0, .compile_global + # Not a known token, exit. + goto .compile_end; + +.compile_section: + _compile_section(); + + goto .compile_loop; + +.compile_type: + _compile_type(); + + goto .compile_loop; + +.compile_global: + _compile_line(); + + goto .compile_loop; + +.compile_comment: + _skip_comment(); + + goto .compile_loop; + +.compile_procedure: + _compile_procedure(); + + goto .compile_loop; + +.compile_end: +end; + +# Terminates the program. a0 contains the return code. +# +# Parameters: +# a0 - Status code. +proc _exit(); +begin + li a7, 93 # SYS_EXIT + ecall +end; + +# Entry point. +.globl _start +proc _start(); +begin + # Read the source from the standard input. + la a0, source_code + li a1, 81920 # Buffer size. + _read_file(); + _compile(); + + _exit(0); + +end; diff --git a/boot/stage4.elna b/boot/stage4.elna deleted file mode 100644 index d873b9a..0000000 --- a/boot/stage4.elna +++ /dev/null @@ -1,1129 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public License, -# v. 2.0. If a copy of the MPL was not distributed with this file, You can -# obtain one at https://mozilla.org/MPL/2.0/. - -# Stage 4 compiler. -# -# - Taking value of local and global variables. Variables that doesn't begin -# with "v" are considered global. -# - Simple variable assignment, e.g. v0 := 5 or v0 := global_variable; -# 7 words on the stack, 28 - 56, are reversed for procedure arguments (caller side). -# - Take address unary operation "@". - -.section .rodata - -.type keyword_section, @object -keyword_section: .ascii ".section" - -.type keyword_type, @object -keyword_type: .ascii ".type" - -.type keyword_ret, @object -keyword_ret: .ascii "ret" - -.type keyword_global, @object -keyword_global: .ascii ".globl" - -.type keyword_proc, @object -keyword_proc: .ascii "proc " - -.type keyword_end, @object -keyword_end: .ascii "end" - -.type keyword_begin, @object -keyword_begin: .ascii "begin" - -.type keyword_var, @object -keyword_var: .ascii "var" - -.type asm_prologue, @object -asm_prologue: .string "\taddi sp, sp, -64\n\tsw ra, 60(sp)\n\tsw s0, 56(sp)\n\taddi s0, sp, 64\n" - -.type asm_epilogue, @object -asm_epilogue: .string "\tlw ra, 60(sp)\n\tlw s0, 56(sp)\n\taddi sp, sp, 64\n\tret\n" - -.type asm_type_directive, @object -asm_type_directive: .string ".type " - -.type asm_type_function, @object -asm_type_function: .string ", @function\n" - -.type asm_colon, @object -asm_colon: .string ":\n" - -.type asm_call, @object -asm_call: .string "\tcall " - -.type asm_j, @object -asm_j: .string "\tj " - -.type asm_li, @object -asm_li: .string "\tli " - -.type asm_lw, @object -asm_lw: .string "\tlw " - -.type asm_la, @object -asm_la: .string "\tla " - -.type asm_sw, @object -asm_sw: .string "\tsw " - -.type asm_addi, @object -asm_addi: .string "\taddi " - -.type asm_t0, @object -asm_t0: .string "t0" - -.type asm_t1, @object -asm_t1: .string "t1" - -.type asm_comma, @object -asm_comma: .string ", " - -.type asm_sp, @object -asm_sp: .string "sp" - -.section .bss - -# When modifiying also change the read size in the entry point procedure. -.type source_code, @object -source_code: .zero 81920 - -.section .data - -.type source_code_position, @object -source_code_position: .word source_code - -.section .text - -# Reads standard input into a buffer. -# a0 - Buffer pointer. -# a1 - Buffer size. -# -# Returns the amount of bytes written in a0. -proc _read_file(); -begin - mv a2, a1 - mv a1, a0 - # STDIN. - li a0, 0 - li a7, 63 # SYS_READ. - ecall -end; - -# Writes to the standard output. -# -# Parameters: -# a0 - Buffer. -# a1 - Buffer length. -proc _write_s(); -begin - mv a2, a1 - mv a1, a0 - # STDOUT. - li a0, 1 - li a7, 64 # SYS_WRITE. - ecall -end; - -# Writes a number to a string buffer. -# -# t0 - Local buffer. -# t1 - Constant 10. -# t2 - Current character. -# t3 - Whether the number is negative. -# -# Parameters: -# a0 - Whole number. -# a1 - Buffer pointer. -# -# Sets a0 to the length of the written number. -proc _print_i(); -begin - li t1, 10 - addi t0, s0, -9 - - li t3, 0 - bgez a0, .print_i_digit10 - li t3, 1 - neg a0, a0 - -.print_i_digit10: - rem t2, a0, t1 - addi t2, t2, '0' - sb t2, 0(t0) - div a0, a0, t1 - addi t0, t0, -1 - bne zero, a0, .print_i_digit10 - - beq zero, t3, .print_i_write_call - addi t2, zero, '-' - sb t2, 0(t0) - addi t0, t0, -1 - -.print_i_write_call: - mv a0, a1 - addi a1, t0, 1 - sub a2, s0, t0 - addi a2, a2, -9 - sw a2, 0(sp) - - _memcpy(); - - lw a0, 0(sp) -end; - -# Writes a number to the standard output. -# -# Parameters: -# a0 - Whole number. -proc _write_i(); -begin - addi a1, sp, 0 - _print_i(); - - mv a1, a0 - addi a0, sp, 0 - _write_s(); - -end; - -# Writes a character from a0 into the standard output. -proc _write_c(); -begin - sb a0, 0(sp) - addi a0, sp, 0 - li a1, 1 - _write_s(); -end; - -# Write null terminated string. -# -# Parameters: -# a0 - String. -proc _write_z(); -begin - sw a0, 0(sp) - -.write_z_loop: - # Check for 0 character. - lb a0, (a0) - beqz a0, .write_z_end - - # Print a character. - lw a0, 0(sp) - lb a0, (a0) - _write_c(); - - # Advance the input string by one byte. - lw a0, 0(sp) - addi a0, a0, 1 - sw a0, 0(sp) - - goto .write_z_loop; - -.write_z_end: -end; - -# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. -proc _is_upper(); -begin - li t0, 'A' - 1 - sltu t1, t0, a0 # t1 = a0 >= 'A' - - sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' - and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' -end; - -# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. -proc _is_lower(); -begin - li t0, 'a' - 1 - sltu t2, t0, a0 # t2 = a0 >= 'a' - - sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' - and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' -end; - -# Detects if the passed character is a 7-bit alpha character or an underscore. -# -# Paramters: -# a0 - Tested character. -# -# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. -proc _is_alpha(); -begin - sw a0, 0(sp) - - _is_upper(); - sw a0, 4(sp) - - _is_lower(v0); - - lw t0, 0(sp) - xori t1, t0, '_' - seqz t1, t1 - - lw t0, 4(sp) - or a0, a0, t0 - or a0, a0, t1 -end; - -# Detects whether the passed character is a digit -# (a value between 0 and 9). -# -# Parameters: -# a0 - Exemined value. -# -# Sets a0 to 1 if it is a digit, to 0 otherwise. -proc _is_digit(); -begin - li t0, '0' - 1 - sltu t1, t0, a0 # t1 = a0 >= '0' - - sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' - - and a0, t1, t2 -end; - -proc _is_alnum(); -begin - sw a0, 4(sp) - - _is_alpha(); - sw a0, 0(sp) - - _is_digit(v4); - - lw a1, 0(sp) - or a0, a0, a1 -end; - -# Reads the next token. -# -# Returns token length in a0. -proc _read_token(); -begin - la t0, source_code_position # Token pointer. - lw t0, (t0) - sw t0, 0(sp) # Current token position. - sw zero, 4(sp) # Token length. - -.read_token_loop: - lb t0, (t0) # Current character. - - # First we try to read a derictive. - # A derictive can contain a dot and characters. - li t1, '.' - beq t0, t1, .read_token_next - - lw a0, 0(sp) - lb a0, (a0) - _is_alnum(); - bnez a0, .read_token_next - - goto .read_token_end; - -.read_token_next: - # Advance the source code position and token length. - lw t0, 4(sp) - addi t0, t0, 1 - sw t0, 4(sp) - - lw t0, 0(sp) - addi t0, t0, 1 - sw t0, 0(sp) - - goto .read_token_loop; - -.read_token_end: - lw a0, 4(sp) -end; - -# a0 - First pointer. -# a1 - Second pointer. -# a2 - The length to compare. -# -# Returns 0 in a0 if memory regions are equal. -proc _memcmp(); -begin - mv t0, a0 - li a0, 0 - -.memcmp_loop: - beqz a2, .memcmp_end - - lbu t1, (t0) - lbu t2, (a1) - sub a0, t1, t2 - - bnez a0, .memcmp_end - - addi t0, t0, 1 - addi a1, a1, 1 - addi a2, a2, -1 - - goto .memcmp_loop; - -.memcmp_end: -end; - -# Copies memory. -# -# Parameters: -# a0 - Destination. -# a1 - Source. -# a2 - Size. -# -# Preserves a0. -proc _memcpy(); -begin - mv t0, a0 - -.memcpy_loop: - beqz a2, .memcpy_end - - lbu t1, (a1) - sb t1, (a0) - - addi a0, a0, 1 - addi a1, a1, 1 - addi a2, a2, -1 - - goto .memcpy_loop - -.memcpy_end: - mv a0, t0 -end; - -# Advances the token stream by a0 bytes. -proc _advance_token(); -begin - la t0, source_code_position - lw t1, (t0) - add t1, t1, a0 - sw t1, (t0) -end; - -# Prints the current token. -# -# Parameters: -# a0 - Token length. -# -# Returns a0 unchanged. -proc _write_token(); -begin - sw a0, 0(sp) - - la a0, source_code_position - lw a0, (a0) - lw a1, 0(sp) - _write_s(); - - lw a0, 0(sp) -end; - -proc _compile_section(); -begin - # Print and skip the ".section" (8 characters) directive and a space after it. - _write_token(9); - _advance_token(); - - # Read the section name. - _read_token(); - addi a0, a0, 1 - - _write_token(); - _advance_token(); -end; - -# Prints and skips a line. -proc _skip_comment(); -begin - la t0, source_code_position - lw t1, (t0) - -.skip_comment_loop: - # Check for newline character. - lb t2, (t1) - li t3, '\n' - beq t2, t3, .skip_comment_end - - # Advance the input string by one byte. - addi t1, t1, 1 - sw t1, (t0) - - goto .skip_comment_loop; - -.skip_comment_end: - # Skip the newline. - addi t1, t1, 1 - sw t1, (t0) -end; - -# Prints and skips a line. -proc _compile_line(); -begin -.compile_line_loop: - la a0, source_code_position - lw a1, (a0) - - lb t0, (a1) - li t1, '\n' - beq t0, t1, .compile_line_end - - # Print a character. - lw a0, (a1) - _write_c(); - - # Advance the input string by one byte. - _advance_token(1); - - goto .compile_line_loop; - -.compile_line_end: - _write_c('\n'); - - _advance_token(1); -end; - -proc _compile_integer_literal(); -begin - la a0, asm_li - _write_z(); - - la a0, asm_t0 - _write_z(); - - la a0, asm_comma - _write_z(); - - _read_token(); - _write_token(); - _advance_token(); - - _write_c('\n'); -end; - -proc _compile_character_literal(); -begin - la a0, asm_li - _write_z(); - - la a0, asm_t0 - _write_z(); - - la a0, asm_comma - _write_z(); - - _write_c('\''); - _advance_token(1); - - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - li t1, '\\' - bne a0, t1, .compile_character_literal_end - - _write_c('\\'); - _advance_token(1); - -.compile_character_literal_end: - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - _write_c(); - - _write_c('\''); - _write_c('\n'); - - _advance_token(2); - -end; - -proc _compile_variable_expression(); -begin - _compile_designator(); - - la a0, asm_lw - _write_z(); - - la a0, asm_t0 - _write_z(); - - la a0, asm_comma - _write_z(); - - _write_c('('); - la a0, asm_t0 - _write_z(); - - _write_c(')'); - _write_c('\n'); - -end; - -proc _compile_address_expression(); -begin - # Skip the "@" sign. - _advance_token(1); - _compile_designator(); - -end; - -proc _compile_expression(); -begin - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - sw a0, 0(sp) - - li t1, '\'' - beq a0, t1, .compile_expression_character_literal - - li t1, '@' - beq a0, t1, .compile_expression_address - - _is_digit(v0); - bnez a0, .compile_expression_integer_literal - - goto .compile_expression_variable; - -.compile_expression_character_literal: - _compile_character_literal(); - goto .compile_expression_end; - -.compile_expression_integer_literal: - _compile_integer_literal(); - goto .compile_expression_end; - -.compile_expression_address: - _compile_address_expression(); - goto .compile_expression_end; - -.compile_expression_variable: - _compile_variable_expression(); - goto .compile_expression_end; - -.compile_expression_end: -end; - -proc _compile_call(); -begin - # Stack variables: - # v0 - Procedure name length. - # v4 - Procedure name pointer. - # v8 - Argument count. - - _read_token(); - sw a0, 0(sp) - la t0, source_code_position - lw t0, (t0) - sw t0, 4(sp) - - sw zero, 8(sp) - - # Skip the identifier and left paren. - addi a0, a0, 1 - _advance_token(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, ')' - beq t0, t1, .compile_call_finalize - -.compile_call_loop: - _compile_expression(); - - # Save the argument on the stack. - la a0, asm_sw - _write_z(); - - la a0, asm_t0 - _write_z(); - - la a0, asm_comma - _write_z(); - - # Calculate the stack offset: 52 - (4 * argument_counter) - lw t0, 8(sp) - li t1, 4 - mul t0, t0, t1 - li t1, 52 - sub a0, t1, t0 - _write_i(); - - _write_c('(') - la a0, asm_sp - _write_z(); - _write_c(')') - - _write_c('\n'); - - # Add one to the argument counter. - lw t0, 8(sp) - addi t0, t0, 1 - sw t0, 8(sp) - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, ',' - bne t0, t1, .compile_call_finalize - - _advance_token(2); - goto .compile_call_loop; - -.compile_call_finalize: - # Load the argument from the stack. - - lw t0, 8(sp) - beqz t0, .compile_call_end - - # Decrement the argument counter. - lw t0, 8(sp) - addi t0, t0, -1 - sw t0, 8(sp) - - la a0, asm_lw - _write_z(); - - _write_c('a'); - lw a0, 8(sp) - _write_i(); - - la a0, asm_comma - _write_z(); - - # Calculate the stack offset: 52 - (4 * argument_counter) - lw t0, 8(sp) - li t1, 4 - mul t0, t0, t1 - li t1, 52 - sub a0, t1, t0 - _write_i(); - - _write_c('('); - la a0, asm_sp - _write_z(); - - _write_c(')'); - _write_c('\n'); - - goto .compile_call_finalize; - -.compile_call_end: - la a0, asm_call - _write_z(); - - _write_s(v4, v0); - - # Skip the right paren. - _advance_token(1); -end; - -proc _compile_goto(); -begin - _advance_token(5); - - _read_token(); - sw a0, 0(sp) - - la a0, asm_j - _write_z(); - - _write_token(v0); - _advance_token(); -end; - -proc _compile_local_designator(); -begin - # Skip "v" in the local variable name. - _advance_token(1); - - la a0, asm_addi - _write_z(); - - la a0, asm_t0 - _write_z(); - - la a0, asm_comma - _write_z(); - - la a0, asm_sp - _write_z(); - - la a0, asm_comma - _write_z(); - - # Read local variable stack offset and save it. - la t0, source_code_position - lw t0, (t0) - sw t0, 0(sp) - - _read_token(); - sw a0, 4(sp) - - _write_token(); - _advance_token(); - - _write_c('\n'); - -end; - -proc _compile_global_designator(); -begin - la a0, asm_la - _write_z(); - - la a0, asm_t0 - _write_z(); - - la a0, asm_comma - _write_z(); - - _read_token(); - _write_token(); - _advance_token(); - - _write_c('\n'); - -end; - -proc _compile_designator(); -begin - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - - li t1, 'v' - beq a0, t1, .compile_designator_local - - goto .compile_designator_global; - -.compile_designator_local: - _compile_local_designator(); - goto .compile_designator_end; - -.compile_designator_global: - _compile_global_designator(); - goto .compile_designator_end; - -.compile_designator_end: -end; - -proc _compile_assignment(); -begin - _compile_designator(); - - # Save the assignee address on the stack. - la a0, asm_sw - _write_z(); - - la a0, asm_t0 - _write_z(); - - la a0, asm_comma - _write_z(); - - _write_i(20); - _write_c('('); - la a0, asm_sp - _write_z(); - _write_c(')'); - _write_c('\n'); - # Skip the assignment sign (:=) with surrounding whitespaces. - _advance_token(4); - - # Compile the assignment. - _compile_expression(); - - la a0, asm_lw - _write_z(); - - la a0, asm_t1 - _write_z(); - - la a0, asm_comma - _write_z(); - - _write_i(20); - _write_c('('); - la a0, asm_sp - _write_z(); - _write_c(')'); - _write_c('\n'); - - la a0, asm_sw - _write_z(); - - la a0, asm_t0 - _write_z(); - - la a0, asm_comma - _write_z(); - - _write_c('('); - la a0, asm_t1 - _write_z(); - _write_c(')'); -end; - -proc _compile_statement(); -begin - # This is a call if the statement starts with an underscore. - la t0, source_code_position - lw t0, (t0) - # First character after alignment tab. - addi t0, t0, 1 - lb t0, (t0) - - li t1, '_' - beq t0, t1, .compile_statement_call - - li t1, 'g' - beq t0, t1, .compile_statement_goto - - li t1, 'v' - beq t0, t1, .compile_statement_assignment - - _compile_line(); - goto .compile_statement_end; - -.compile_statement_call: - _advance_token(1); - _compile_call(); - - goto .compile_statement_semicolon; - -.compile_statement_goto: - _advance_token(1); - _compile_goto(); - - goto .compile_statement_semicolon; - -.compile_statement_assignment: - _advance_token(1); - _compile_assignment(); - - goto .compile_statement_semicolon; - -.compile_statement_semicolon: - _advance_token(2); - - _write_c('\n'); - -.compile_statement_end: -end; - -proc _compile_procedure_body(); -begin -.compile_procedure_body_loop: - la a0, source_code_position - lw a0, (a0) - la a1, keyword_end - li a2, 3 # "end" length. - _memcmp(); - - beqz a0, .compile_procedure_body_epilogue - - _compile_statement(); - goto .compile_procedure_body_loop; - -.compile_procedure_body_epilogue: -end; - -proc _compile_procedure(); -begin - # Skip "proc ". - _advance_token(5); - - _read_token(); - sw a0, 0(sp) # Save the procedure name length. - - # Write .type _procedure_name, @function. - la a0, asm_type_directive - _write_z(); - - _write_token(v0); - - la a0, asm_type_function - _write_z(); - - # Write procedure label, _procedure_name: - _write_token(v0); - - la a0, asm_colon - _write_z(); - - # Skip the function name and trailing parens, semicolon, "begin" and newline. - lw a0, 0(sp) - addi a0, a0, 10 - _advance_token(); - - la a0, asm_prologue - _write_z(); - - _compile_procedure_body(); - - # Write the epilogue. - la a0, asm_epilogue - _write_z(); - - # Skip the "end" keyword, semicolon and newline. - _advance_token(5); -end; - -proc _compile_type(); -begin - # Print and skip the ".type" (5 characters) directive and a space after it. - _write_token(6); - _advance_token(); - - # Read and print the symbol name. - _read_token(); - - # Print and skip the symbol name, comma, space and @. - addi a0, a0, 3 - _write_token(); - _advance_token(); - - # Read the symbol type. - _read_token(); - la t0, source_code_position - lw t0, (t0) - sw t0, 12(sp) - - # Print the symbol type and newline. - addi a0, a0, 1 - _write_token(); - _advance_token(); - - # Write the object definition itself. - _compile_line(); - -.compile_type_end: -end; - -proc _skip_newlines(); -begin - # Skip newlines. - la t0, source_code_position - lw t1, (t0) - -.skip_newlines_loop: - lb t2, (t1) - li t3, '\n' - bne t2, t3, .skip_newlines_end - beqz t2, .skip_newlines_end - - addi t1, t1, 1 - sw t1, (t0) - - goto .skip_newlines_loop; - -.skip_newlines_end: -end; - -# Process the source code and print the generated code. -proc _compile(); -begin -.compile_loop: - _skip_newlines(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - beqz t0, .compile_end - li t1, '#' - beq t0, t1, .compile_comment - - la a0, source_code_position - lw a0, (a0) - la a1, keyword_section - li a2, 8 # ".section" length. - _memcmp(); - - beqz a0, .compile_section - - la a0, source_code_position - lw a0, (a0) - la a1, keyword_type - li a2, 5 # ".type" length. - _memcmp(); - - beqz a0, .compile_type - - la a0, source_code_position - lw a0, (a0) - la a1, keyword_proc - li a2, 5 # "proc " length. Space is needed to distinguish from "procedure". - _memcmp(); - - beqz a0, .compile_procedure - - la a0, source_code_position - lw a0, (a0) - la a1, keyword_global - li a2, 6 # ".globl" length. - _memcmp(); - - beqz a0, .compile_global - # Not a known token, exit. - goto .compile_end; - -.compile_section: - _compile_section(); - - goto .compile_loop; - -.compile_type: - _compile_type(); - - goto .compile_loop; - -.compile_global: - _compile_line(); - - goto .compile_loop; - -.compile_comment: - _skip_comment(); - - goto .compile_loop; - -.compile_procedure: - _compile_procedure(); - - goto .compile_loop; - -.compile_end: -end; - -# Terminates the program. a0 contains the return code. -# -# Parameters: -# a0 - Status code. -proc _exit(); -begin - li a7, 93 # SYS_EXIT - ecall -end; - -# Entry point. -.globl _start -proc _start(); -begin - # Read the source from the standard input. - la a0, source_code - li a1, 81920 # Buffer size. - _read_file(); - _compile(); - - _exit(0); - -end; diff --git a/boot/stage4/cl.elna b/boot/stage4/cl.elna new file mode 100644 index 0000000..d873b9a --- /dev/null +++ b/boot/stage4/cl.elna @@ -0,0 +1,1129 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +# Stage 4 compiler. +# +# - Taking value of local and global variables. Variables that doesn't begin +# with "v" are considered global. +# - Simple variable assignment, e.g. v0 := 5 or v0 := global_variable; +# 7 words on the stack, 28 - 56, are reversed for procedure arguments (caller side). +# - Take address unary operation "@". + +.section .rodata + +.type keyword_section, @object +keyword_section: .ascii ".section" + +.type keyword_type, @object +keyword_type: .ascii ".type" + +.type keyword_ret, @object +keyword_ret: .ascii "ret" + +.type keyword_global, @object +keyword_global: .ascii ".globl" + +.type keyword_proc, @object +keyword_proc: .ascii "proc " + +.type keyword_end, @object +keyword_end: .ascii "end" + +.type keyword_begin, @object +keyword_begin: .ascii "begin" + +.type keyword_var, @object +keyword_var: .ascii "var" + +.type asm_prologue, @object +asm_prologue: .string "\taddi sp, sp, -64\n\tsw ra, 60(sp)\n\tsw s0, 56(sp)\n\taddi s0, sp, 64\n" + +.type asm_epilogue, @object +asm_epilogue: .string "\tlw ra, 60(sp)\n\tlw s0, 56(sp)\n\taddi sp, sp, 64\n\tret\n" + +.type asm_type_directive, @object +asm_type_directive: .string ".type " + +.type asm_type_function, @object +asm_type_function: .string ", @function\n" + +.type asm_colon, @object +asm_colon: .string ":\n" + +.type asm_call, @object +asm_call: .string "\tcall " + +.type asm_j, @object +asm_j: .string "\tj " + +.type asm_li, @object +asm_li: .string "\tli " + +.type asm_lw, @object +asm_lw: .string "\tlw " + +.type asm_la, @object +asm_la: .string "\tla " + +.type asm_sw, @object +asm_sw: .string "\tsw " + +.type asm_addi, @object +asm_addi: .string "\taddi " + +.type asm_t0, @object +asm_t0: .string "t0" + +.type asm_t1, @object +asm_t1: .string "t1" + +.type asm_comma, @object +asm_comma: .string ", " + +.type asm_sp, @object +asm_sp: .string "sp" + +.section .bss + +# When modifiying also change the read size in the entry point procedure. +.type source_code, @object +source_code: .zero 81920 + +.section .data + +.type source_code_position, @object +source_code_position: .word source_code + +.section .text + +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the amount of bytes written in a0. +proc _read_file(); +begin + mv a2, a1 + mv a1, a0 + # STDIN. + li a0, 0 + li a7, 63 # SYS_READ. + ecall +end; + +# Writes to the standard output. +# +# Parameters: +# a0 - Buffer. +# a1 - Buffer length. +proc _write_s(); +begin + mv a2, a1 + mv a1, a0 + # STDOUT. + li a0, 1 + li a7, 64 # SYS_WRITE. + ecall +end; + +# Writes a number to a string buffer. +# +# t0 - Local buffer. +# t1 - Constant 10. +# t2 - Current character. +# t3 - Whether the number is negative. +# +# Parameters: +# a0 - Whole number. +# a1 - Buffer pointer. +# +# Sets a0 to the length of the written number. +proc _print_i(); +begin + li t1, 10 + addi t0, s0, -9 + + li t3, 0 + bgez a0, .print_i_digit10 + li t3, 1 + neg a0, a0 + +.print_i_digit10: + rem t2, a0, t1 + addi t2, t2, '0' + sb t2, 0(t0) + div a0, a0, t1 + addi t0, t0, -1 + bne zero, a0, .print_i_digit10 + + beq zero, t3, .print_i_write_call + addi t2, zero, '-' + sb t2, 0(t0) + addi t0, t0, -1 + +.print_i_write_call: + mv a0, a1 + addi a1, t0, 1 + sub a2, s0, t0 + addi a2, a2, -9 + sw a2, 0(sp) + + _memcpy(); + + lw a0, 0(sp) +end; + +# Writes a number to the standard output. +# +# Parameters: +# a0 - Whole number. +proc _write_i(); +begin + addi a1, sp, 0 + _print_i(); + + mv a1, a0 + addi a0, sp, 0 + _write_s(); + +end; + +# Writes a character from a0 into the standard output. +proc _write_c(); +begin + sb a0, 0(sp) + addi a0, sp, 0 + li a1, 1 + _write_s(); +end; + +# Write null terminated string. +# +# Parameters: +# a0 - String. +proc _write_z(); +begin + sw a0, 0(sp) + +.write_z_loop: + # Check for 0 character. + lb a0, (a0) + beqz a0, .write_z_end + + # Print a character. + lw a0, 0(sp) + lb a0, (a0) + _write_c(); + + # Advance the input string by one byte. + lw a0, 0(sp) + addi a0, a0, 1 + sw a0, 0(sp) + + goto .write_z_loop; + +.write_z_end: +end; + +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_upper(); +begin + li t0, 'A' - 1 + sltu t1, t0, a0 # t1 = a0 >= 'A' + + sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' + and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' +end; + +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_lower(); +begin + li t0, 'a' - 1 + sltu t2, t0, a0 # t2 = a0 >= 'a' + + sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' + and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' +end; + +# Detects if the passed character is a 7-bit alpha character or an underscore. +# +# Paramters: +# a0 - Tested character. +# +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +proc _is_alpha(); +begin + sw a0, 0(sp) + + _is_upper(); + sw a0, 4(sp) + + _is_lower(v0); + + lw t0, 0(sp) + xori t1, t0, '_' + seqz t1, t1 + + lw t0, 4(sp) + or a0, a0, t0 + or a0, a0, t1 +end; + +# Detects whether the passed character is a digit +# (a value between 0 and 9). +# +# Parameters: +# a0 - Exemined value. +# +# Sets a0 to 1 if it is a digit, to 0 otherwise. +proc _is_digit(); +begin + li t0, '0' - 1 + sltu t1, t0, a0 # t1 = a0 >= '0' + + sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' + + and a0, t1, t2 +end; + +proc _is_alnum(); +begin + sw a0, 4(sp) + + _is_alpha(); + sw a0, 0(sp) + + _is_digit(v4); + + lw a1, 0(sp) + or a0, a0, a1 +end; + +# Reads the next token. +# +# Returns token length in a0. +proc _read_token(); +begin + la t0, source_code_position # Token pointer. + lw t0, (t0) + sw t0, 0(sp) # Current token position. + sw zero, 4(sp) # Token length. + +.read_token_loop: + lb t0, (t0) # Current character. + + # First we try to read a derictive. + # A derictive can contain a dot and characters. + li t1, '.' + beq t0, t1, .read_token_next + + lw a0, 0(sp) + lb a0, (a0) + _is_alnum(); + bnez a0, .read_token_next + + goto .read_token_end; + +.read_token_next: + # Advance the source code position and token length. + lw t0, 4(sp) + addi t0, t0, 1 + sw t0, 4(sp) + + lw t0, 0(sp) + addi t0, t0, 1 + sw t0, 0(sp) + + goto .read_token_loop; + +.read_token_end: + lw a0, 4(sp) +end; + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +proc _memcmp(); +begin + mv t0, a0 + li a0, 0 + +.memcmp_loop: + beqz a2, .memcmp_end + + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 + + bnez a0, .memcmp_end + + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcmp_loop; + +.memcmp_end: +end; + +# Copies memory. +# +# Parameters: +# a0 - Destination. +# a1 - Source. +# a2 - Size. +# +# Preserves a0. +proc _memcpy(); +begin + mv t0, a0 + +.memcpy_loop: + beqz a2, .memcpy_end + + lbu t1, (a1) + sb t1, (a0) + + addi a0, a0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcpy_loop + +.memcpy_end: + mv a0, t0 +end; + +# Advances the token stream by a0 bytes. +proc _advance_token(); +begin + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 + sw t1, (t0) +end; + +# Prints the current token. +# +# Parameters: +# a0 - Token length. +# +# Returns a0 unchanged. +proc _write_token(); +begin + sw a0, 0(sp) + + la a0, source_code_position + lw a0, (a0) + lw a1, 0(sp) + _write_s(); + + lw a0, 0(sp) +end; + +proc _compile_section(); +begin + # Print and skip the ".section" (8 characters) directive and a space after it. + _write_token(9); + _advance_token(); + + # Read the section name. + _read_token(); + addi a0, a0, 1 + + _write_token(); + _advance_token(); +end; + +# Prints and skips a line. +proc _skip_comment(); +begin + la t0, source_code_position + lw t1, (t0) + +.skip_comment_loop: + # Check for newline character. + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end + + # Advance the input string by one byte. + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_comment_loop; + +.skip_comment_end: + # Skip the newline. + addi t1, t1, 1 + sw t1, (t0) +end; + +# Prints and skips a line. +proc _compile_line(); +begin +.compile_line_loop: + la a0, source_code_position + lw a1, (a0) + + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end + + # Print a character. + lw a0, (a1) + _write_c(); + + # Advance the input string by one byte. + _advance_token(1); + + goto .compile_line_loop; + +.compile_line_end: + _write_c('\n'); + + _advance_token(1); +end; + +proc _compile_integer_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_t0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _read_token(); + _write_token(); + _advance_token(); + + _write_c('\n'); +end; + +proc _compile_character_literal(); +begin + la a0, asm_li + _write_z(); + + la a0, asm_t0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _write_c('\''); + _advance_token(1); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + li t1, '\\' + bne a0, t1, .compile_character_literal_end + + _write_c('\\'); + _advance_token(1); + +.compile_character_literal_end: + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + _write_c(); + + _write_c('\''); + _write_c('\n'); + + _advance_token(2); + +end; + +proc _compile_variable_expression(); +begin + _compile_designator(); + + la a0, asm_lw + _write_z(); + + la a0, asm_t0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _write_c('('); + la a0, asm_t0 + _write_z(); + + _write_c(')'); + _write_c('\n'); + +end; + +proc _compile_address_expression(); +begin + # Skip the "@" sign. + _advance_token(1); + _compile_designator(); + +end; + +proc _compile_expression(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + sw a0, 0(sp) + + li t1, '\'' + beq a0, t1, .compile_expression_character_literal + + li t1, '@' + beq a0, t1, .compile_expression_address + + _is_digit(v0); + bnez a0, .compile_expression_integer_literal + + goto .compile_expression_variable; + +.compile_expression_character_literal: + _compile_character_literal(); + goto .compile_expression_end; + +.compile_expression_integer_literal: + _compile_integer_literal(); + goto .compile_expression_end; + +.compile_expression_address: + _compile_address_expression(); + goto .compile_expression_end; + +.compile_expression_variable: + _compile_variable_expression(); + goto .compile_expression_end; + +.compile_expression_end: +end; + +proc _compile_call(); +begin + # Stack variables: + # v0 - Procedure name length. + # v4 - Procedure name pointer. + # v8 - Argument count. + + _read_token(); + sw a0, 0(sp) + la t0, source_code_position + lw t0, (t0) + sw t0, 4(sp) + + sw zero, 8(sp) + + # Skip the identifier and left paren. + addi a0, a0, 1 + _advance_token(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ')' + beq t0, t1, .compile_call_finalize + +.compile_call_loop: + _compile_expression(); + + # Save the argument on the stack. + la a0, asm_sw + _write_z(); + + la a0, asm_t0 + _write_z(); + + la a0, asm_comma + _write_z(); + + # Calculate the stack offset: 52 - (4 * argument_counter) + lw t0, 8(sp) + li t1, 4 + mul t0, t0, t1 + li t1, 52 + sub a0, t1, t0 + _write_i(); + + _write_c('(') + la a0, asm_sp + _write_z(); + _write_c(')') + + _write_c('\n'); + + # Add one to the argument counter. + lw t0, 8(sp) + addi t0, t0, 1 + sw t0, 8(sp) + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ',' + bne t0, t1, .compile_call_finalize + + _advance_token(2); + goto .compile_call_loop; + +.compile_call_finalize: + # Load the argument from the stack. + + lw t0, 8(sp) + beqz t0, .compile_call_end + + # Decrement the argument counter. + lw t0, 8(sp) + addi t0, t0, -1 + sw t0, 8(sp) + + la a0, asm_lw + _write_z(); + + _write_c('a'); + lw a0, 8(sp) + _write_i(); + + la a0, asm_comma + _write_z(); + + # Calculate the stack offset: 52 - (4 * argument_counter) + lw t0, 8(sp) + li t1, 4 + mul t0, t0, t1 + li t1, 52 + sub a0, t1, t0 + _write_i(); + + _write_c('('); + la a0, asm_sp + _write_z(); + + _write_c(')'); + _write_c('\n'); + + goto .compile_call_finalize; + +.compile_call_end: + la a0, asm_call + _write_z(); + + _write_s(v4, v0); + + # Skip the right paren. + _advance_token(1); +end; + +proc _compile_goto(); +begin + _advance_token(5); + + _read_token(); + sw a0, 0(sp) + + la a0, asm_j + _write_z(); + + _write_token(v0); + _advance_token(); +end; + +proc _compile_local_designator(); +begin + # Skip "v" in the local variable name. + _advance_token(1); + + la a0, asm_addi + _write_z(); + + la a0, asm_t0 + _write_z(); + + la a0, asm_comma + _write_z(); + + la a0, asm_sp + _write_z(); + + la a0, asm_comma + _write_z(); + + # Read local variable stack offset and save it. + la t0, source_code_position + lw t0, (t0) + sw t0, 0(sp) + + _read_token(); + sw a0, 4(sp) + + _write_token(); + _advance_token(); + + _write_c('\n'); + +end; + +proc _compile_global_designator(); +begin + la a0, asm_la + _write_z(); + + la a0, asm_t0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _read_token(); + _write_token(); + _advance_token(); + + _write_c('\n'); + +end; + +proc _compile_designator(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, 'v' + beq a0, t1, .compile_designator_local + + goto .compile_designator_global; + +.compile_designator_local: + _compile_local_designator(); + goto .compile_designator_end; + +.compile_designator_global: + _compile_global_designator(); + goto .compile_designator_end; + +.compile_designator_end: +end; + +proc _compile_assignment(); +begin + _compile_designator(); + + # Save the assignee address on the stack. + la a0, asm_sw + _write_z(); + + la a0, asm_t0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _write_i(20); + _write_c('('); + la a0, asm_sp + _write_z(); + _write_c(')'); + _write_c('\n'); + # Skip the assignment sign (:=) with surrounding whitespaces. + _advance_token(4); + + # Compile the assignment. + _compile_expression(); + + la a0, asm_lw + _write_z(); + + la a0, asm_t1 + _write_z(); + + la a0, asm_comma + _write_z(); + + _write_i(20); + _write_c('('); + la a0, asm_sp + _write_z(); + _write_c(')'); + _write_c('\n'); + + la a0, asm_sw + _write_z(); + + la a0, asm_t0 + _write_z(); + + la a0, asm_comma + _write_z(); + + _write_c('('); + la a0, asm_t1 + _write_z(); + _write_c(')'); +end; + +proc _compile_statement(); +begin + # This is a call if the statement starts with an underscore. + la t0, source_code_position + lw t0, (t0) + # First character after alignment tab. + addi t0, t0, 1 + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call + + li t1, 'g' + beq t0, t1, .compile_statement_goto + + li t1, 'v' + beq t0, t1, .compile_statement_assignment + + _compile_line(); + goto .compile_statement_end; + +.compile_statement_call: + _advance_token(1); + _compile_call(); + + goto .compile_statement_semicolon; + +.compile_statement_goto: + _advance_token(1); + _compile_goto(); + + goto .compile_statement_semicolon; + +.compile_statement_assignment: + _advance_token(1); + _compile_assignment(); + + goto .compile_statement_semicolon; + +.compile_statement_semicolon: + _advance_token(2); + + _write_c('\n'); + +.compile_statement_end: +end; + +proc _compile_procedure_body(); +begin +.compile_procedure_body_loop: + la a0, source_code_position + lw a0, (a0) + la a1, keyword_end + li a2, 3 # "end" length. + _memcmp(); + + beqz a0, .compile_procedure_body_epilogue + + _compile_statement(); + goto .compile_procedure_body_loop; + +.compile_procedure_body_epilogue: +end; + +proc _compile_procedure(); +begin + # Skip "proc ". + _advance_token(5); + + _read_token(); + sw a0, 0(sp) # Save the procedure name length. + + # Write .type _procedure_name, @function. + la a0, asm_type_directive + _write_z(); + + _write_token(v0); + + la a0, asm_type_function + _write_z(); + + # Write procedure label, _procedure_name: + _write_token(v0); + + la a0, asm_colon + _write_z(); + + # Skip the function name and trailing parens, semicolon, "begin" and newline. + lw a0, 0(sp) + addi a0, a0, 10 + _advance_token(); + + la a0, asm_prologue + _write_z(); + + _compile_procedure_body(); + + # Write the epilogue. + la a0, asm_epilogue + _write_z(); + + # Skip the "end" keyword, semicolon and newline. + _advance_token(5); +end; + +proc _compile_type(); +begin + # Print and skip the ".type" (5 characters) directive and a space after it. + _write_token(6); + _advance_token(); + + # Read and print the symbol name. + _read_token(); + + # Print and skip the symbol name, comma, space and @. + addi a0, a0, 3 + _write_token(); + _advance_token(); + + # Read the symbol type. + _read_token(); + la t0, source_code_position + lw t0, (t0) + sw t0, 12(sp) + + # Print the symbol type and newline. + addi a0, a0, 1 + _write_token(); + _advance_token(); + + # Write the object definition itself. + _compile_line(); + +.compile_type_end: +end; + +proc _skip_newlines(); +begin + # Skip newlines. + la t0, source_code_position + lw t1, (t0) + +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end + + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_newlines_loop; + +.skip_newlines_end: +end; + +# Process the source code and print the generated code. +proc _compile(); +begin +.compile_loop: + _skip_newlines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_end + li t1, '#' + beq t0, t1, .compile_comment + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_section + li a2, 8 # ".section" length. + _memcmp(); + + beqz a0, .compile_section + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_type + li a2, 5 # ".type" length. + _memcmp(); + + beqz a0, .compile_type + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_proc + li a2, 5 # "proc " length. Space is needed to distinguish from "procedure". + _memcmp(); + + beqz a0, .compile_procedure + + la a0, source_code_position + lw a0, (a0) + la a1, keyword_global + li a2, 6 # ".globl" length. + _memcmp(); + + beqz a0, .compile_global + # Not a known token, exit. + goto .compile_end; + +.compile_section: + _compile_section(); + + goto .compile_loop; + +.compile_type: + _compile_type(); + + goto .compile_loop; + +.compile_global: + _compile_line(); + + goto .compile_loop; + +.compile_comment: + _skip_comment(); + + goto .compile_loop; + +.compile_procedure: + _compile_procedure(); + + goto .compile_loop; + +.compile_end: +end; + +# Terminates the program. a0 contains the return code. +# +# Parameters: +# a0 - Status code. +proc _exit(); +begin + li a7, 93 # SYS_EXIT + ecall +end; + +# Entry point. +.globl _start +proc _start(); +begin + # Read the source from the standard input. + la a0, source_code + li a1, 81920 # Buffer size. + _read_file(); + _compile(); + + _exit(0); + +end; diff --git a/boot/stage5.elna b/boot/stage5.elna deleted file mode 100644 index 69623db..0000000 --- a/boot/stage5.elna +++ /dev/null @@ -1,1487 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public License, -# v. 2.0. If a copy of the MPL was not distributed with this file, You can -# obtain one at https://mozilla.org/MPL/2.0/. - -# Stage 5 compiler. -# -# - Stack size increased to 128 bytes per procedure. -# 7 words on the stack, 92 - 120, are reversed for procedure arguments (caller side). -# 7 words on the stack, 64 - 92, are reserved for procedure parameters (callee side). -# The first parameter is in 88, the second in 84 and so forth. -# - Unary negate operation, e.g. -5. -# - Unary locical not operation "~". -# - Binary addition "+" and multiplication "*". -# - Binary logical operations: & (and), or and xor. -# - Binary comparison operations: =, <, <=, >, >=, <>. -# - Return statement. - -.section .rodata - -.type keyword_section, @object -keyword_section: .ascii ".section" - -.type keyword_type, @object -keyword_type: .ascii ".type" - -.type keyword_ret, @object -keyword_ret: .ascii "\tret" - -.type keyword_global, @object -keyword_global: .ascii ".globl" - -.type keyword_proc, @object -keyword_proc: .ascii "proc " - -.type keyword_end, @object -keyword_end: .ascii "end" - -.type keyword_begin, @object -keyword_begin: .ascii "begin" - -.type keyword_var, @object -keyword_var: .ascii "var" - -.type asm_prologue, @object -asm_prologue: .string "\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n" - -.type asm_epilogue, @object -asm_epilogue: .string "\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n" - -.type asm_type_directive, @object -asm_type_directive: .string ".type " - -.type asm_type_function, @object -asm_type_function: .string ", @function\n" - -.type asm_colon, @object -asm_colon: .string ":\n" - -.type asm_call, @object -asm_call: .string "\tcall " - -.type asm_j, @object -asm_j: .string "\tj " - -.type asm_li, @object -asm_li: .string "\tli " - -.type asm_lw, @object -asm_lw: .string "\tlw " - -.type asm_la, @object -asm_la: .string "\tla " - -.type asm_sw, @object -asm_sw: .string "\tsw " - -.type asm_addi, @object -asm_addi: .string "\taddi " - -.type asm_add, @object -asm_add: .string "\tadd " - -.type asm_mul, @object -asm_mul: .string "\tmul " - -.type asm_neg, @object -asm_neg: .string "\tneg " - -.type asm_not, @object -asm_not: .string "\tnot " - -.type asm_and, @object -asm_and: .string "\tand " - -.type asm_or, @object -asm_or: .string "\tor " - -.type asm_xor, @object -asm_xor: .string "\txor " - -.type asm_xori, @object -asm_xori: .string "\txori " - -.type asm_sub, @object -asm_sub: .string "\tsub " - -.type asm_seqz, @object -asm_seqz: .string "\tseqz " - -.type asm_snez, @object -asm_snez: .string "\tsnez " - -.type asm_slt, @object -asm_slt: .string "\tslt " - -.type asm_mv, @object -asm_mv: .string "\tmv " - -.type asm_comma, @object -asm_comma: .string ", " - -.type asm_sp, @object -asm_sp: .string "sp" - -.section .bss - -# When modifiying also change the read size in the entry point procedure. -.type source_code, @object -source_code: .zero 81920 - -.section .data - -.type source_code_position, @object -source_code_position: .word source_code - -.section .text - -# Reads standard input into a buffer. -# a0 - Buffer pointer. -# a1 - Buffer size. -# -# Returns the amount of bytes written in a0. -proc _read_file(); -begin - mv a2, a1 - mv a1, a0 - # STDIN. - li a0, 0 - li a7, 63 # SYS_READ. - ecall -end; - -# Writes to the standard output. -# -# Parameters: -# a0 - Buffer. -# a1 - Buffer length. -proc _write_s(); -begin - mv a2, a1 - mv a1, a0 - # STDOUT. - li a0, 1 - li a7, 64 # SYS_WRITE. - ecall -end; - -# Writes a number to a string buffer. -# -# t0 - Local buffer. -# t1 - Constant 10. -# t2 - Current character. -# t3 - Whether the number is negative. -# -# Parameters: -# a0 - Whole number. -# a1 - Buffer pointer. -# -# Sets a0 to the length of the written number. -proc _print_i(); -begin - li t1, 10 - addi t0, s0, -9 - - li t3, 0 - bgez a0, .print_i_digit10 - li t3, 1 - neg a0, a0 - -.print_i_digit10: - rem t2, a0, t1 - addi t2, t2, '0' - sb t2, 0(t0) - div a0, a0, t1 - addi t0, t0, -1 - bne zero, a0, .print_i_digit10 - - beq zero, t3, .print_i_write_call - addi t2, zero, '-' - sb t2, 0(t0) - addi t0, t0, -1 - -.print_i_write_call: - mv a0, a1 - addi a1, t0, 1 - sub a2, s0, t0 - addi a2, a2, -9 - sw a2, 0(sp) - - _memcpy(); - - lw a0, 0(sp) -end; - -# Writes a number to the standard output. -# -# Parameters: -# a0 - Whole number. -proc _write_i(); -begin - addi a1, sp, 0 - _print_i(); - - mv a1, a0 - addi a0, sp, 0 - _write_s(); - -end; - -# Writes a character from a0 into the standard output. -proc _write_c(); -begin - sb a0, 0(sp) - addi a0, sp, 0 - li a1, 1 - _write_s(); -end; - -# Write null terminated string. -# -# Parameters: -# a0 - String. -proc _write_z(); -begin - sw a0, 0(sp) - -.write_z_loop: - # Check for 0 character. - lb a0, (a0) - beqz a0, .write_z_end - - # Print a character. - lw a0, 0(sp) - lb a0, (a0) - _write_c(); - - # Advance the input string by one byte. - lw a0, 0(sp) - addi a0, a0, 1 - sw a0, 0(sp) - - goto .write_z_loop; - -.write_z_end: -end; - -# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. -proc _is_upper(); -begin - li t0, 'A' - 1 - sltu t1, t0, a0 # t1 = a0 >= 'A' - - sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' - and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' -end; - -# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. -proc _is_lower(); -begin - li t0, 'a' - 1 - sltu t2, t0, a0 # t2 = a0 >= 'a' - - sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' - and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' -end; - -# Detects if the passed character is a 7-bit alpha character or an underscore. -# -# Paramters: -# a0 - Tested character. -# -# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. -proc _is_alpha(); -begin - sw a0, 0(sp) - - _is_upper(); - sw a0, 4(sp) - - _is_lower(v0); - - lw t0, 0(sp) - xori t1, t0, '_' - seqz t1, t1 - - lw t0, 4(sp) - or a0, a0, t0 - or a0, a0, t1 -end; - -# Detects whether the passed character is a digit -# (a value between 0 and 9). -# -# Parameters: -# a0 - Exemined value. -# -# Sets a0 to 1 if it is a digit, to 0 otherwise. -proc _is_digit(); -begin - li t0, '0' - 1 - sltu t1, t0, a0 # t1 = a0 >= '0' - - sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' - - and a0, t1, t2 -end; - -proc _is_alnum(); -begin - sw a0, 4(sp) - - _is_alpha(); - sw a0, 0(sp) - - _is_digit(v4); - - lw a1, 0(sp) - or a0, a0, a1 -end; - -# Reads the next token. -# -# Returns token length in a0. -proc _read_token(); -begin - la t0, source_code_position # Token pointer. - lw t0, (t0) - sw t0, 0(sp) # Current token position. - sw zero, 4(sp) # Token length. - -.read_token_loop: - lb t0, (t0) # Current character. - - # First we try to read a derictive. - # A derictive can contain a dot and characters. - li t1, '.' - beq t0, t1, .read_token_next - - lw a0, 0(sp) - lb a0, (a0) - _is_alnum(); - bnez a0, .read_token_next - - goto .read_token_end; - -.read_token_next: - # Advance the source code position and token length. - lw t0, 4(sp) - addi t0, t0, 1 - sw t0, 4(sp) - - lw t0, 0(sp) - addi t0, t0, 1 - sw t0, 0(sp) - - goto .read_token_loop; - -.read_token_end: - lw a0, 4(sp) -end; - -# a0 - First pointer. -# a1 - Second pointer. -# a2 - The length to compare. -# -# Returns 0 in a0 if memory regions are equal. -proc _memcmp(); -begin - mv t0, a0 - li a0, 0 - -.memcmp_loop: - beqz a2, .memcmp_end - - lbu t1, (t0) - lbu t2, (a1) - sub a0, t1, t2 - - bnez a0, .memcmp_end - - addi t0, t0, 1 - addi a1, a1, 1 - addi a2, a2, -1 - - goto .memcmp_loop; - -.memcmp_end: -end; - -# Copies memory. -# -# Parameters: -# a0 - Destination. -# a1 - Source. -# a2 - Size. -# -# Preserves a0. -proc _memcpy(); -begin - mv t0, a0 - -.memcpy_loop: - beqz a2, .memcpy_end - - lbu t1, (a1) - sb t1, (a0) - - addi a0, a0, 1 - addi a1, a1, 1 - addi a2, a2, -1 - - goto .memcpy_loop - -.memcpy_end: - mv a0, t0 -end; - -# Advances the token stream by a0 bytes. -proc _advance_token(); -begin - la t0, source_code_position - lw t1, (t0) - add t1, t1, a0 - sw t1, (t0) -end; - -# Prints the current token. -# -# Parameters: -# a0 - Token length. -# -# Returns a0 unchanged. -proc _write_token(); -begin - sw a0, 0(sp) - _write_s(source_code_position, v0); - lw a0, 0(sp) -end; - -proc _compile_section(); -begin - # Print and skip the ".section" (8 characters) directive and a space after it. - _write_token(9); - _advance_token(); - - # Read the section name. - _read_token(); - addi a0, a0, 1 - - _write_token(); - _advance_token(); -end; - -# Prints and skips a line. -proc _skip_comment(); -begin - la t0, source_code_position - lw t1, (t0) - -.skip_comment_loop: - # Check for newline character. - lb t2, (t1) - li t3, '\n' - beq t2, t3, .skip_comment_end - - # Advance the input string by one byte. - addi t1, t1, 1 - sw t1, (t0) - - goto .skip_comment_loop; - -.skip_comment_end: - # Skip the newline. - addi t1, t1, 1 - sw t1, (t0) -end; - -# Prints and skips a line. -proc _compile_line(); -begin -.compile_line_loop: - la a0, source_code_position - lw a1, (a0) - - lb t0, (a1) - li t1, '\n' - beq t0, t1, .compile_line_end - - # Print a character. - lw a0, (a1) - _write_c(); - - # Advance the input string by one byte. - _advance_token(1); - - goto .compile_line_loop; - -.compile_line_end: - _write_c('\n'); - - _advance_token(1); -end; - -proc _compile_integer_literal(); -begin - _write_z(@asm_li); - _write_register('t', 0); - _write_z(@asm_comma); - - _read_token(); - _write_token(); - _advance_token(); - - _write_c('\n'); -end; - -proc _compile_character_literal(); -begin - _write_z(@asm_li); - _write_register('t', 0); - _write_z(@asm_comma); - - _write_c('\''); - _advance_token(1); - - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - li t1, '\\' - bne a0, t1, .compile_character_literal_end - - _write_c('\\'); - _advance_token(1); - -.compile_character_literal_end: - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - _write_c(); - - _write_c('\''); - _write_c('\n'); - - _advance_token(2); - -end; - -proc _compile_variable_expression(); -begin - _compile_designator(); - - _write_z(@asm_lw); - _write_register('t', 0); - _write_z(@asm_comma); - - _write_c('('); - _write_register('t', 0); - - _write_c(')'); - _write_c('\n'); - -end; - -proc _compile_address_expression(); -begin - # Skip the "@" sign. - _advance_token(1); - _compile_designator(); - -end; - -proc _compile_negate_expression(); -begin - # Skip the "-" sign. - _advance_token(1); - _compile_term(); - - _write_z(@asm_neg); - _write_register('t', 0); - - _write_z(@asm_comma); - _write_register('t', 0); - - _write_c('\n'); - -end; - -proc _compile_not_expression(); -begin - # Skip the "~" sign. - _advance_token(1); - _compile_term(); - - _write_z(@asm_not); - _write_register('t', 0); - - _write_z(@asm_comma); - _write_register('t', 0); - - _write_c('\n'); - -end; - -proc _compile_term(); -begin - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - sw a0, 0(sp) - - li t1, '\'' - beq a0, t1, .compile_term_character_literal - - li t1, '@' - beq a0, t1, .compile_term_address - - li t1, '-' - beq a0, t1, .compile_term_negation - - li t1, '~' - beq a0, t1, .compile_term_not - - _is_digit(v0); - bnez a0, .compile_term_integer_literal - - goto .compile_term_variable; - -.compile_term_character_literal: - _compile_character_literal(); - goto .compile_term_end; - -.compile_term_integer_literal: - _compile_integer_literal(); - goto .compile_term_end; - -.compile_term_address: - _compile_address_expression(); - goto .compile_term_end; - -.compile_term_negation: - _compile_negate_expression(); - goto .compile_term_end; - -.compile_term_not: - _compile_not_expression(); - goto .compile_term_end; - -.compile_term_variable: - _compile_variable_expression(); - goto .compile_term_end; - -.compile_term_end: -end; - -proc _compile_binary_rhs(); -begin - # Skip the whitespace after the binary operator. - _advance_token(1); - _compile_term(); - - # Load the left expression from the stack; - _write_z(@asm_lw); - _write_register('t', 1); - _write_z(@asm_comma); - _write_i(24); - _write_c('('); - _write_z(@asm_sp); - _write_c(')'); - _write_c('\n'); - -end; - -proc _compile_expression(); -begin - _compile_term(); - - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - - li t1, ' ' - bne a0, t1, .compile_expression_end - - # It is a binary expression. - - # Save the value of the left expression on the stack. - _write_z(@asm_sw); - _write_register('t', 0); - _write_z(@asm_comma); - _write_i(24); - _write_c('('); - _write_z(@asm_sp); - _write_c(')'); - _write_c('\n'); - - # Skip surrounding whitespace in front of the operator. - _advance_token(1); - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '+' - beq t0, t1, .compile_expression_add - - li t1, '*' - beq t0, t1, .compile_expression_mul - - li t1, '&' - beq t0, t1, .compile_expression_and - - li t1, 'o' - beq t0, t1, .compile_expression_or - - li t1, 'x' - beq t0, t1, .compile_expression_xor - - li t1, '=' - beq t0, t1, .compile_expression_equals - - li t1, '<' - beq t0, t1, .compile_expression_less - - li t1, '>' - beq t0, t1, .compile_expression_greater - - # Unknown binary operator. - unimp - -.compile_expression_add: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_add); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_mul: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_mul); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_and: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_and); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_or: - _advance_token(2); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_or); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_xor: - _advance_token(3); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_xor); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_equals: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_xor); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - _write_z(@asm_seqz); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_less: - _advance_token(1); - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '>' - beq t0, t1, .compile_expression_not_equal - - li t1, '=' - beq t0, t1, .compile_expression_less_equal - - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_slt); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_not_equal: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_xor); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - _write_z(@asm_snez); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_less_equal: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_slt); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - # Execute the operation. - _write_z(@asm_xori); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_i(1); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_greater: - _advance_token(1); - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '=' - beq t0, t1, .compile_expression_greater_equal - - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_slt); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_z(@asm_comma); - _write_register('t', 0); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_greater_equal: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_slt); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_z(@asm_comma); - _write_register('t', 0); - _write_c('\n'); - - # Execute the operation. - _write_z(@asm_xori); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_i(1); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_end: -end; - -proc _compile_call(); -begin - # Stack variables: - # v0 - Procedure name length. - # v4 - Procedure name pointer. - # v8 - Argument count. - - _read_token(); - sw a0, 0(sp) - v4 := source_code_position; - - sw zero, 8(sp) - - # Skip the identifier and left paren. - addi a0, a0, 1 - _advance_token(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, ')' - beq t0, t1, .compile_call_finalize - -.compile_call_loop: - _compile_expression(); - - # Save the argument on the stack. - _write_z(@asm_sw); - _write_register('t', 0); - _write_z(@asm_comma); - - # Calculate the stack offset: 116 - (4 * argument_counter) - lw t0, 8(sp) - li t1, 4 - mul t0, t0, t1 - li t1, 116 - sub a0, t1, t0 - _write_i(); - - _write_c('('); - _write_z(@asm_sp); - _write_c(')') - - _write_c('\n'); - - # Add one to the argument counter. - lw t0, 8(sp) - addi t0, t0, 1 - sw t0, 8(sp) - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, ',' - bne t0, t1, .compile_call_finalize - - _advance_token(2); - goto .compile_call_loop; - -.compile_call_finalize: - # Load the argument from the stack. - - lw t0, 8(sp) - beqz t0, .compile_call_end - - # Decrement the argument counter. - lw t0, 8(sp) - addi t0, t0, -1 - sw t0, 8(sp) - - _write_z(@asm_lw); - - _write_c('a'); - lw a0, 8(sp) - _write_i(); - - _write_z(@asm_comma); - - # Calculate the stack offset: 116 - (4 * argument_counter) - lw t0, 8(sp) - li t1, 4 - mul t0, t0, t1 - li t1, 116 - sub a0, t1, t0 - _write_i(); - - _write_c('('); - _write_z(@asm_sp); - - _write_c(')'); - _write_c('\n'); - - goto .compile_call_finalize; - -.compile_call_end: - _write_z(@asm_call); - - _write_s(v4, v0); - - # Skip the right paren. - _advance_token(1); -end; - -proc _compile_goto(); -begin - _advance_token(5); - - _read_token(); - sw a0, 0(sp) - - _write_z(@asm_j); - - _write_token(v0); - _advance_token(); -end; - -proc _compile_local_designator(); -begin - # Skip "v" in the local variable name. - _advance_token(1); - - _write_z(@asm_addi); - _write_register('t', 0); - _write_z(@asm_comma); - _write_z(@asm_sp); - _write_z(@asm_comma); - - # Read local variable stack offset and save it. - v0 := source_code_position; - - _read_token(); - sw a0, 4(sp) - - _write_token(); - _advance_token(); - - _write_c('\n'); - -end; - -proc _compile_global_designator(); -begin - _write_z(@asm_la); - _write_register('t', 0); - _write_z(@asm_comma); - - _read_token(); - _write_token(); - _advance_token(); - - _write_c('\n'); - -end; - -proc _compile_designator(); -begin - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - - li t1, 'v' - beq a0, t1, .compile_designator_local - - goto .compile_designator_global; - -.compile_designator_local: - _compile_local_designator(); - goto .compile_designator_end; - -.compile_designator_global: - _compile_global_designator(); - goto .compile_designator_end; - -.compile_designator_end: -end; - -proc _compile_assignment(); -begin - _compile_designator(); - - # Save the assignee address on the stack. - _write_z(@asm_sw); - _write_register('t', 0); - _write_z(@asm_comma); - - _write_i(20); - _write_c('('); - _write_z(@asm_sp); - _write_c(')'); - _write_c('\n'); - # Skip the assignment sign (:=) with surrounding whitespaces. - _advance_token(4); - - # Compile the assignment. - _compile_expression(); - - _write_z(@asm_lw); - _write_register('t', 1); - _write_z(@asm_comma); - - _write_i(20); - _write_c('('); - _write_z(@asm_sp); - _write_c(')'); - _write_c('\n'); - - _write_z(@asm_sw); - _write_register('t', 0); - _write_z(@asm_comma); - - _write_c('('); - _write_register('t', 1); - _write_c(')'); -end; - -proc _compile_return_statement(); -begin - # Skip "return" keyword and whitespace after it. - _advance_token(7); - _compile_expression(); - - _write_z(@asm_mv); - _write_register('a', 0); - _write_z(@asm_comma); - _write_register('t', 0); - -end; - -proc _compile_statement(); -begin - # This is a call if the statement starts with an underscore. - la t0, source_code_position - lw t0, (t0) - # First character after alignment tab. - addi t0, t0, 1 - lb t0, (t0) - - li t1, '_' - beq t0, t1, .compile_statement_call - - li t1, 'g' - beq t0, t1, .compile_statement_goto - - li t1, 'v' - beq t0, t1, .compile_statement_assignment - - # keyword_ret contains "\tret", so it's 4 bytes long. - _memcmp(source_code_position, @keyword_ret, 4); - beqz a0, .compile_statement_return - - _compile_line(); - goto .compile_statement_end; - -.compile_statement_call: - _advance_token(1); - _compile_call(); - - goto .compile_statement_semicolon; - -.compile_statement_goto: - _advance_token(1); - _compile_goto(); - - goto .compile_statement_semicolon; - -.compile_statement_assignment: - _advance_token(1); - _compile_assignment(); - - goto .compile_statement_semicolon; - -.compile_statement_return: - _advance_token(1); - _compile_return_statement(); - _write_c('\n'); - - goto .compile_statement_end; - -.compile_statement_semicolon: - _advance_token(2); - _write_c('\n'); - -.compile_statement_end: -end; - -proc _compile_procedure_body(); -begin -.compile_procedure_body_loop: - # 3 is "end" length. - _memcmp(source_code_position, @keyword_end, 3); - - beqz a0, .compile_procedure_body_epilogue - - _compile_statement(); - goto .compile_procedure_body_loop; - -.compile_procedure_body_epilogue: -end; - -# Writes a regster name to the standard output. -# -# Parameters: -# a0 - Register character. -# a1 - Register number. -proc _write_register(); -begin - sw a0, 0(sp) - sw a1, 4(sp) - - _write_c(); - - lw a0, 4(sp) - li t0, '0' - add a0, a0, t0 - _write_c(); -end; - -proc _compile_procedure_prologue(); -begin - _write_z(@asm_prologue); - - v0 := 0; - -.compile_procedure_prologue_loop: - _write_z(@asm_sw); - - li a0, 'a' - lw a1, 0(sp) - _write_register(); - - _write_z(@asm_comma); - - # Calculate the stack offset: 88 - (4 * parameter_counter) - lw t0, 0(sp) - li t1, 4 - mul t0, t0, t1 - li t1, 88 - sub a0, t1, t0 - _write_i(); - - _write_c('('); - _write_z(@asm_sp); - _write_c(')'); - _write_c('\n'); - - lw a0, 0(sp) - addi a0, a0, 1 - sw a0, 0(sp) - - li t0, 8 - bne a0, t0, .compile_procedure_prologue_loop -end; - -proc _compile_procedure(); -begin - # Skip "proc ". - _advance_token(5); - - _read_token(); - sw a0, 0(sp) # Save the procedure name length. - - # Write .type _procedure_name, @function. - la a0, asm_type_directive - _write_z(); - - _write_token(v0); - - _write_z(@asm_type_function); - - # Write procedure label, _procedure_name: - _write_token(v0); - - _write_z(@asm_colon); - - # Skip the function name and trailing parens, semicolon, "begin" and newline. - lw a0, 0(sp) - addi a0, a0, 10 - _advance_token(); - - _compile_procedure_prologue(); - _compile_procedure_body(); - - # Write the epilogue. - _write_z(@asm_epilogue); - - # Skip the "end" keyword, semicolon and newline. - _advance_token(5); -end; - -proc _compile_type(); -begin - # Print and skip the ".type" (5 characters) directive and a space after it. - _write_token(6); - _advance_token(); - - # Read and print the symbol name. - _read_token(); - - # Print and skip the symbol name, comma, space and @. - addi a0, a0, 3 - _write_token(); - _advance_token(); - - # Read the symbol type. - _read_token(); - la t0, source_code_position - lw t0, (t0) - sw t0, 12(sp) - - # Print the symbol type and newline. - addi a0, a0, 1 - _write_token(); - _advance_token(); - - # Write the object definition itself. - _compile_line(); - -.compile_type_end: -end; - -proc _skip_newlines(); -begin - # Skip newlines. - la t0, source_code_position - lw t1, (t0) - -.skip_newlines_loop: - lb t2, (t1) - li t3, '\n' - bne t2, t3, .skip_newlines_end - beqz t2, .skip_newlines_end - - addi t1, t1, 1 - sw t1, (t0) - - goto .skip_newlines_loop; - -.skip_newlines_end: -end; - -# Process the source code and print the generated code. -proc _compile(); -begin -.compile_loop: - _skip_newlines(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - beqz t0, .compile_end - li t1, '#' - beq t0, t1, .compile_comment - - # 8 is ".section" length. - _memcmp(source_code_position, @keyword_section, 8); - beqz a0, .compile_section - - # 5 is ".type" length. - _memcmp(source_code_position, @keyword_type, 5); - beqz a0, .compile_type - - # 5 is "proc " length. Space is needed to distinguish from "procedure". - _memcmp(source_code_position, @keyword_proc, 5); - beqz a0, .compile_procedure - - # 6 is ".globl" length. - _memcmp(source_code_position, @keyword_global, 6); - beqz a0, .compile_global - - # Not a known token, exit. - goto .compile_end; - -.compile_section: - _compile_section(); - - goto .compile_loop; - -.compile_type: - _compile_type(); - - goto .compile_loop; - -.compile_global: - _compile_line(); - - goto .compile_loop; - -.compile_comment: - _skip_comment(); - - goto .compile_loop; - -.compile_procedure: - _compile_procedure(); - - goto .compile_loop; - -.compile_end: -end; - -# Terminates the program. a0 contains the return code. -# -# Parameters: -# a0 - Status code. -proc _exit(); -begin - li a7, 93 # SYS_EXIT - ecall -end; - -# Entry point. -.globl _start -proc _start(); -begin - # Read the source from the standard input. - # Second argument is buffer size. Modifying update the source_code definition. - _read_file(@source_code, 81920); - _compile(); - - _exit(0); - -end; diff --git a/boot/stage5/cl.elna b/boot/stage5/cl.elna new file mode 100644 index 0000000..69623db --- /dev/null +++ b/boot/stage5/cl.elna @@ -0,0 +1,1487 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +# Stage 5 compiler. +# +# - Stack size increased to 128 bytes per procedure. +# 7 words on the stack, 92 - 120, are reversed for procedure arguments (caller side). +# 7 words on the stack, 64 - 92, are reserved for procedure parameters (callee side). +# The first parameter is in 88, the second in 84 and so forth. +# - Unary negate operation, e.g. -5. +# - Unary locical not operation "~". +# - Binary addition "+" and multiplication "*". +# - Binary logical operations: & (and), or and xor. +# - Binary comparison operations: =, <, <=, >, >=, <>. +# - Return statement. + +.section .rodata + +.type keyword_section, @object +keyword_section: .ascii ".section" + +.type keyword_type, @object +keyword_type: .ascii ".type" + +.type keyword_ret, @object +keyword_ret: .ascii "\tret" + +.type keyword_global, @object +keyword_global: .ascii ".globl" + +.type keyword_proc, @object +keyword_proc: .ascii "proc " + +.type keyword_end, @object +keyword_end: .ascii "end" + +.type keyword_begin, @object +keyword_begin: .ascii "begin" + +.type keyword_var, @object +keyword_var: .ascii "var" + +.type asm_prologue, @object +asm_prologue: .string "\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n" + +.type asm_epilogue, @object +asm_epilogue: .string "\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n" + +.type asm_type_directive, @object +asm_type_directive: .string ".type " + +.type asm_type_function, @object +asm_type_function: .string ", @function\n" + +.type asm_colon, @object +asm_colon: .string ":\n" + +.type asm_call, @object +asm_call: .string "\tcall " + +.type asm_j, @object +asm_j: .string "\tj " + +.type asm_li, @object +asm_li: .string "\tli " + +.type asm_lw, @object +asm_lw: .string "\tlw " + +.type asm_la, @object +asm_la: .string "\tla " + +.type asm_sw, @object +asm_sw: .string "\tsw " + +.type asm_addi, @object +asm_addi: .string "\taddi " + +.type asm_add, @object +asm_add: .string "\tadd " + +.type asm_mul, @object +asm_mul: .string "\tmul " + +.type asm_neg, @object +asm_neg: .string "\tneg " + +.type asm_not, @object +asm_not: .string "\tnot " + +.type asm_and, @object +asm_and: .string "\tand " + +.type asm_or, @object +asm_or: .string "\tor " + +.type asm_xor, @object +asm_xor: .string "\txor " + +.type asm_xori, @object +asm_xori: .string "\txori " + +.type asm_sub, @object +asm_sub: .string "\tsub " + +.type asm_seqz, @object +asm_seqz: .string "\tseqz " + +.type asm_snez, @object +asm_snez: .string "\tsnez " + +.type asm_slt, @object +asm_slt: .string "\tslt " + +.type asm_mv, @object +asm_mv: .string "\tmv " + +.type asm_comma, @object +asm_comma: .string ", " + +.type asm_sp, @object +asm_sp: .string "sp" + +.section .bss + +# When modifiying also change the read size in the entry point procedure. +.type source_code, @object +source_code: .zero 81920 + +.section .data + +.type source_code_position, @object +source_code_position: .word source_code + +.section .text + +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the amount of bytes written in a0. +proc _read_file(); +begin + mv a2, a1 + mv a1, a0 + # STDIN. + li a0, 0 + li a7, 63 # SYS_READ. + ecall +end; + +# Writes to the standard output. +# +# Parameters: +# a0 - Buffer. +# a1 - Buffer length. +proc _write_s(); +begin + mv a2, a1 + mv a1, a0 + # STDOUT. + li a0, 1 + li a7, 64 # SYS_WRITE. + ecall +end; + +# Writes a number to a string buffer. +# +# t0 - Local buffer. +# t1 - Constant 10. +# t2 - Current character. +# t3 - Whether the number is negative. +# +# Parameters: +# a0 - Whole number. +# a1 - Buffer pointer. +# +# Sets a0 to the length of the written number. +proc _print_i(); +begin + li t1, 10 + addi t0, s0, -9 + + li t3, 0 + bgez a0, .print_i_digit10 + li t3, 1 + neg a0, a0 + +.print_i_digit10: + rem t2, a0, t1 + addi t2, t2, '0' + sb t2, 0(t0) + div a0, a0, t1 + addi t0, t0, -1 + bne zero, a0, .print_i_digit10 + + beq zero, t3, .print_i_write_call + addi t2, zero, '-' + sb t2, 0(t0) + addi t0, t0, -1 + +.print_i_write_call: + mv a0, a1 + addi a1, t0, 1 + sub a2, s0, t0 + addi a2, a2, -9 + sw a2, 0(sp) + + _memcpy(); + + lw a0, 0(sp) +end; + +# Writes a number to the standard output. +# +# Parameters: +# a0 - Whole number. +proc _write_i(); +begin + addi a1, sp, 0 + _print_i(); + + mv a1, a0 + addi a0, sp, 0 + _write_s(); + +end; + +# Writes a character from a0 into the standard output. +proc _write_c(); +begin + sb a0, 0(sp) + addi a0, sp, 0 + li a1, 1 + _write_s(); +end; + +# Write null terminated string. +# +# Parameters: +# a0 - String. +proc _write_z(); +begin + sw a0, 0(sp) + +.write_z_loop: + # Check for 0 character. + lb a0, (a0) + beqz a0, .write_z_end + + # Print a character. + lw a0, 0(sp) + lb a0, (a0) + _write_c(); + + # Advance the input string by one byte. + lw a0, 0(sp) + addi a0, a0, 1 + sw a0, 0(sp) + + goto .write_z_loop; + +.write_z_end: +end; + +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_upper(); +begin + li t0, 'A' - 1 + sltu t1, t0, a0 # t1 = a0 >= 'A' + + sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z' + and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z' +end; + +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_lower(); +begin + li t0, 'a' - 1 + sltu t2, t0, a0 # t2 = a0 >= 'a' + + sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z' + and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z' +end; + +# Detects if the passed character is a 7-bit alpha character or an underscore. +# +# Paramters: +# a0 - Tested character. +# +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +proc _is_alpha(); +begin + sw a0, 0(sp) + + _is_upper(); + sw a0, 4(sp) + + _is_lower(v0); + + lw t0, 0(sp) + xori t1, t0, '_' + seqz t1, t1 + + lw t0, 4(sp) + or a0, a0, t0 + or a0, a0, t1 +end; + +# Detects whether the passed character is a digit +# (a value between 0 and 9). +# +# Parameters: +# a0 - Exemined value. +# +# Sets a0 to 1 if it is a digit, to 0 otherwise. +proc _is_digit(); +begin + li t0, '0' - 1 + sltu t1, t0, a0 # t1 = a0 >= '0' + + sltiu t2, a0, '9' + 1 # t2 = a0 <= '9' + + and a0, t1, t2 +end; + +proc _is_alnum(); +begin + sw a0, 4(sp) + + _is_alpha(); + sw a0, 0(sp) + + _is_digit(v4); + + lw a1, 0(sp) + or a0, a0, a1 +end; + +# Reads the next token. +# +# Returns token length in a0. +proc _read_token(); +begin + la t0, source_code_position # Token pointer. + lw t0, (t0) + sw t0, 0(sp) # Current token position. + sw zero, 4(sp) # Token length. + +.read_token_loop: + lb t0, (t0) # Current character. + + # First we try to read a derictive. + # A derictive can contain a dot and characters. + li t1, '.' + beq t0, t1, .read_token_next + + lw a0, 0(sp) + lb a0, (a0) + _is_alnum(); + bnez a0, .read_token_next + + goto .read_token_end; + +.read_token_next: + # Advance the source code position and token length. + lw t0, 4(sp) + addi t0, t0, 1 + sw t0, 4(sp) + + lw t0, 0(sp) + addi t0, t0, 1 + sw t0, 0(sp) + + goto .read_token_loop; + +.read_token_end: + lw a0, 4(sp) +end; + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +proc _memcmp(); +begin + mv t0, a0 + li a0, 0 + +.memcmp_loop: + beqz a2, .memcmp_end + + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 + + bnez a0, .memcmp_end + + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcmp_loop; + +.memcmp_end: +end; + +# Copies memory. +# +# Parameters: +# a0 - Destination. +# a1 - Source. +# a2 - Size. +# +# Preserves a0. +proc _memcpy(); +begin + mv t0, a0 + +.memcpy_loop: + beqz a2, .memcpy_end + + lbu t1, (a1) + sb t1, (a0) + + addi a0, a0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcpy_loop + +.memcpy_end: + mv a0, t0 +end; + +# Advances the token stream by a0 bytes. +proc _advance_token(); +begin + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 + sw t1, (t0) +end; + +# Prints the current token. +# +# Parameters: +# a0 - Token length. +# +# Returns a0 unchanged. +proc _write_token(); +begin + sw a0, 0(sp) + _write_s(source_code_position, v0); + lw a0, 0(sp) +end; + +proc _compile_section(); +begin + # Print and skip the ".section" (8 characters) directive and a space after it. + _write_token(9); + _advance_token(); + + # Read the section name. + _read_token(); + addi a0, a0, 1 + + _write_token(); + _advance_token(); +end; + +# Prints and skips a line. +proc _skip_comment(); +begin + la t0, source_code_position + lw t1, (t0) + +.skip_comment_loop: + # Check for newline character. + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end + + # Advance the input string by one byte. + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_comment_loop; + +.skip_comment_end: + # Skip the newline. + addi t1, t1, 1 + sw t1, (t0) +end; + +# Prints and skips a line. +proc _compile_line(); +begin +.compile_line_loop: + la a0, source_code_position + lw a1, (a0) + + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end + + # Print a character. + lw a0, (a1) + _write_c(); + + # Advance the input string by one byte. + _advance_token(1); + + goto .compile_line_loop; + +.compile_line_end: + _write_c('\n'); + + _advance_token(1); +end; + +proc _compile_integer_literal(); +begin + _write_z(@asm_li); + _write_register('t', 0); + _write_z(@asm_comma); + + _read_token(); + _write_token(); + _advance_token(); + + _write_c('\n'); +end; + +proc _compile_character_literal(); +begin + _write_z(@asm_li); + _write_register('t', 0); + _write_z(@asm_comma); + + _write_c('\''); + _advance_token(1); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + li t1, '\\' + bne a0, t1, .compile_character_literal_end + + _write_c('\\'); + _advance_token(1); + +.compile_character_literal_end: + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + _write_c(); + + _write_c('\''); + _write_c('\n'); + + _advance_token(2); + +end; + +proc _compile_variable_expression(); +begin + _compile_designator(); + + _write_z(@asm_lw); + _write_register('t', 0); + _write_z(@asm_comma); + + _write_c('('); + _write_register('t', 0); + + _write_c(')'); + _write_c('\n'); + +end; + +proc _compile_address_expression(); +begin + # Skip the "@" sign. + _advance_token(1); + _compile_designator(); + +end; + +proc _compile_negate_expression(); +begin + # Skip the "-" sign. + _advance_token(1); + _compile_term(); + + _write_z(@asm_neg); + _write_register('t', 0); + + _write_z(@asm_comma); + _write_register('t', 0); + + _write_c('\n'); + +end; + +proc _compile_not_expression(); +begin + # Skip the "~" sign. + _advance_token(1); + _compile_term(); + + _write_z(@asm_not); + _write_register('t', 0); + + _write_z(@asm_comma); + _write_register('t', 0); + + _write_c('\n'); + +end; + +proc _compile_term(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + sw a0, 0(sp) + + li t1, '\'' + beq a0, t1, .compile_term_character_literal + + li t1, '@' + beq a0, t1, .compile_term_address + + li t1, '-' + beq a0, t1, .compile_term_negation + + li t1, '~' + beq a0, t1, .compile_term_not + + _is_digit(v0); + bnez a0, .compile_term_integer_literal + + goto .compile_term_variable; + +.compile_term_character_literal: + _compile_character_literal(); + goto .compile_term_end; + +.compile_term_integer_literal: + _compile_integer_literal(); + goto .compile_term_end; + +.compile_term_address: + _compile_address_expression(); + goto .compile_term_end; + +.compile_term_negation: + _compile_negate_expression(); + goto .compile_term_end; + +.compile_term_not: + _compile_not_expression(); + goto .compile_term_end; + +.compile_term_variable: + _compile_variable_expression(); + goto .compile_term_end; + +.compile_term_end: +end; + +proc _compile_binary_rhs(); +begin + # Skip the whitespace after the binary operator. + _advance_token(1); + _compile_term(); + + # Load the left expression from the stack; + _write_z(@asm_lw); + _write_register('t', 1); + _write_z(@asm_comma); + _write_i(24); + _write_c('('); + _write_z(@asm_sp); + _write_c(')'); + _write_c('\n'); + +end; + +proc _compile_expression(); +begin + _compile_term(); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, ' ' + bne a0, t1, .compile_expression_end + + # It is a binary expression. + + # Save the value of the left expression on the stack. + _write_z(@asm_sw); + _write_register('t', 0); + _write_z(@asm_comma); + _write_i(24); + _write_c('('); + _write_z(@asm_sp); + _write_c(')'); + _write_c('\n'); + + # Skip surrounding whitespace in front of the operator. + _advance_token(1); + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '+' + beq t0, t1, .compile_expression_add + + li t1, '*' + beq t0, t1, .compile_expression_mul + + li t1, '&' + beq t0, t1, .compile_expression_and + + li t1, 'o' + beq t0, t1, .compile_expression_or + + li t1, 'x' + beq t0, t1, .compile_expression_xor + + li t1, '=' + beq t0, t1, .compile_expression_equals + + li t1, '<' + beq t0, t1, .compile_expression_less + + li t1, '>' + beq t0, t1, .compile_expression_greater + + # Unknown binary operator. + unimp + +.compile_expression_add: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_add); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_mul: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_mul); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_and: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_and); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_or: + _advance_token(2); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_or); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_xor: + _advance_token(3); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_xor); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_equals: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_xor); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + _write_z(@asm_seqz); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_less: + _advance_token(1); + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '>' + beq t0, t1, .compile_expression_not_equal + + li t1, '=' + beq t0, t1, .compile_expression_less_equal + + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_slt); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_not_equal: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_xor); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + _write_z(@asm_snez); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_less_equal: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_slt); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + # Execute the operation. + _write_z(@asm_xori); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_i(1); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_greater: + _advance_token(1); + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '=' + beq t0, t1, .compile_expression_greater_equal + + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_slt); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_z(@asm_comma); + _write_register('t', 0); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_greater_equal: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_slt); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_z(@asm_comma); + _write_register('t', 0); + _write_c('\n'); + + # Execute the operation. + _write_z(@asm_xori); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_i(1); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_end: +end; + +proc _compile_call(); +begin + # Stack variables: + # v0 - Procedure name length. + # v4 - Procedure name pointer. + # v8 - Argument count. + + _read_token(); + sw a0, 0(sp) + v4 := source_code_position; + + sw zero, 8(sp) + + # Skip the identifier and left paren. + addi a0, a0, 1 + _advance_token(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ')' + beq t0, t1, .compile_call_finalize + +.compile_call_loop: + _compile_expression(); + + # Save the argument on the stack. + _write_z(@asm_sw); + _write_register('t', 0); + _write_z(@asm_comma); + + # Calculate the stack offset: 116 - (4 * argument_counter) + lw t0, 8(sp) + li t1, 4 + mul t0, t0, t1 + li t1, 116 + sub a0, t1, t0 + _write_i(); + + _write_c('('); + _write_z(@asm_sp); + _write_c(')') + + _write_c('\n'); + + # Add one to the argument counter. + lw t0, 8(sp) + addi t0, t0, 1 + sw t0, 8(sp) + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ',' + bne t0, t1, .compile_call_finalize + + _advance_token(2); + goto .compile_call_loop; + +.compile_call_finalize: + # Load the argument from the stack. + + lw t0, 8(sp) + beqz t0, .compile_call_end + + # Decrement the argument counter. + lw t0, 8(sp) + addi t0, t0, -1 + sw t0, 8(sp) + + _write_z(@asm_lw); + + _write_c('a'); + lw a0, 8(sp) + _write_i(); + + _write_z(@asm_comma); + + # Calculate the stack offset: 116 - (4 * argument_counter) + lw t0, 8(sp) + li t1, 4 + mul t0, t0, t1 + li t1, 116 + sub a0, t1, t0 + _write_i(); + + _write_c('('); + _write_z(@asm_sp); + + _write_c(')'); + _write_c('\n'); + + goto .compile_call_finalize; + +.compile_call_end: + _write_z(@asm_call); + + _write_s(v4, v0); + + # Skip the right paren. + _advance_token(1); +end; + +proc _compile_goto(); +begin + _advance_token(5); + + _read_token(); + sw a0, 0(sp) + + _write_z(@asm_j); + + _write_token(v0); + _advance_token(); +end; + +proc _compile_local_designator(); +begin + # Skip "v" in the local variable name. + _advance_token(1); + + _write_z(@asm_addi); + _write_register('t', 0); + _write_z(@asm_comma); + _write_z(@asm_sp); + _write_z(@asm_comma); + + # Read local variable stack offset and save it. + v0 := source_code_position; + + _read_token(); + sw a0, 4(sp) + + _write_token(); + _advance_token(); + + _write_c('\n'); + +end; + +proc _compile_global_designator(); +begin + _write_z(@asm_la); + _write_register('t', 0); + _write_z(@asm_comma); + + _read_token(); + _write_token(); + _advance_token(); + + _write_c('\n'); + +end; + +proc _compile_designator(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, 'v' + beq a0, t1, .compile_designator_local + + goto .compile_designator_global; + +.compile_designator_local: + _compile_local_designator(); + goto .compile_designator_end; + +.compile_designator_global: + _compile_global_designator(); + goto .compile_designator_end; + +.compile_designator_end: +end; + +proc _compile_assignment(); +begin + _compile_designator(); + + # Save the assignee address on the stack. + _write_z(@asm_sw); + _write_register('t', 0); + _write_z(@asm_comma); + + _write_i(20); + _write_c('('); + _write_z(@asm_sp); + _write_c(')'); + _write_c('\n'); + # Skip the assignment sign (:=) with surrounding whitespaces. + _advance_token(4); + + # Compile the assignment. + _compile_expression(); + + _write_z(@asm_lw); + _write_register('t', 1); + _write_z(@asm_comma); + + _write_i(20); + _write_c('('); + _write_z(@asm_sp); + _write_c(')'); + _write_c('\n'); + + _write_z(@asm_sw); + _write_register('t', 0); + _write_z(@asm_comma); + + _write_c('('); + _write_register('t', 1); + _write_c(')'); +end; + +proc _compile_return_statement(); +begin + # Skip "return" keyword and whitespace after it. + _advance_token(7); + _compile_expression(); + + _write_z(@asm_mv); + _write_register('a', 0); + _write_z(@asm_comma); + _write_register('t', 0); + +end; + +proc _compile_statement(); +begin + # This is a call if the statement starts with an underscore. + la t0, source_code_position + lw t0, (t0) + # First character after alignment tab. + addi t0, t0, 1 + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call + + li t1, 'g' + beq t0, t1, .compile_statement_goto + + li t1, 'v' + beq t0, t1, .compile_statement_assignment + + # keyword_ret contains "\tret", so it's 4 bytes long. + _memcmp(source_code_position, @keyword_ret, 4); + beqz a0, .compile_statement_return + + _compile_line(); + goto .compile_statement_end; + +.compile_statement_call: + _advance_token(1); + _compile_call(); + + goto .compile_statement_semicolon; + +.compile_statement_goto: + _advance_token(1); + _compile_goto(); + + goto .compile_statement_semicolon; + +.compile_statement_assignment: + _advance_token(1); + _compile_assignment(); + + goto .compile_statement_semicolon; + +.compile_statement_return: + _advance_token(1); + _compile_return_statement(); + _write_c('\n'); + + goto .compile_statement_end; + +.compile_statement_semicolon: + _advance_token(2); + _write_c('\n'); + +.compile_statement_end: +end; + +proc _compile_procedure_body(); +begin +.compile_procedure_body_loop: + # 3 is "end" length. + _memcmp(source_code_position, @keyword_end, 3); + + beqz a0, .compile_procedure_body_epilogue + + _compile_statement(); + goto .compile_procedure_body_loop; + +.compile_procedure_body_epilogue: +end; + +# Writes a regster name to the standard output. +# +# Parameters: +# a0 - Register character. +# a1 - Register number. +proc _write_register(); +begin + sw a0, 0(sp) + sw a1, 4(sp) + + _write_c(); + + lw a0, 4(sp) + li t0, '0' + add a0, a0, t0 + _write_c(); +end; + +proc _compile_procedure_prologue(); +begin + _write_z(@asm_prologue); + + v0 := 0; + +.compile_procedure_prologue_loop: + _write_z(@asm_sw); + + li a0, 'a' + lw a1, 0(sp) + _write_register(); + + _write_z(@asm_comma); + + # Calculate the stack offset: 88 - (4 * parameter_counter) + lw t0, 0(sp) + li t1, 4 + mul t0, t0, t1 + li t1, 88 + sub a0, t1, t0 + _write_i(); + + _write_c('('); + _write_z(@asm_sp); + _write_c(')'); + _write_c('\n'); + + lw a0, 0(sp) + addi a0, a0, 1 + sw a0, 0(sp) + + li t0, 8 + bne a0, t0, .compile_procedure_prologue_loop +end; + +proc _compile_procedure(); +begin + # Skip "proc ". + _advance_token(5); + + _read_token(); + sw a0, 0(sp) # Save the procedure name length. + + # Write .type _procedure_name, @function. + la a0, asm_type_directive + _write_z(); + + _write_token(v0); + + _write_z(@asm_type_function); + + # Write procedure label, _procedure_name: + _write_token(v0); + + _write_z(@asm_colon); + + # Skip the function name and trailing parens, semicolon, "begin" and newline. + lw a0, 0(sp) + addi a0, a0, 10 + _advance_token(); + + _compile_procedure_prologue(); + _compile_procedure_body(); + + # Write the epilogue. + _write_z(@asm_epilogue); + + # Skip the "end" keyword, semicolon and newline. + _advance_token(5); +end; + +proc _compile_type(); +begin + # Print and skip the ".type" (5 characters) directive and a space after it. + _write_token(6); + _advance_token(); + + # Read and print the symbol name. + _read_token(); + + # Print and skip the symbol name, comma, space and @. + addi a0, a0, 3 + _write_token(); + _advance_token(); + + # Read the symbol type. + _read_token(); + la t0, source_code_position + lw t0, (t0) + sw t0, 12(sp) + + # Print the symbol type and newline. + addi a0, a0, 1 + _write_token(); + _advance_token(); + + # Write the object definition itself. + _compile_line(); + +.compile_type_end: +end; + +proc _skip_newlines(); +begin + # Skip newlines. + la t0, source_code_position + lw t1, (t0) + +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end + + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_newlines_loop; + +.skip_newlines_end: +end; + +# Process the source code and print the generated code. +proc _compile(); +begin +.compile_loop: + _skip_newlines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_end + li t1, '#' + beq t0, t1, .compile_comment + + # 8 is ".section" length. + _memcmp(source_code_position, @keyword_section, 8); + beqz a0, .compile_section + + # 5 is ".type" length. + _memcmp(source_code_position, @keyword_type, 5); + beqz a0, .compile_type + + # 5 is "proc " length. Space is needed to distinguish from "procedure". + _memcmp(source_code_position, @keyword_proc, 5); + beqz a0, .compile_procedure + + # 6 is ".globl" length. + _memcmp(source_code_position, @keyword_global, 6); + beqz a0, .compile_global + + # Not a known token, exit. + goto .compile_end; + +.compile_section: + _compile_section(); + + goto .compile_loop; + +.compile_type: + _compile_type(); + + goto .compile_loop; + +.compile_global: + _compile_line(); + + goto .compile_loop; + +.compile_comment: + _skip_comment(); + + goto .compile_loop; + +.compile_procedure: + _compile_procedure(); + + goto .compile_loop; + +.compile_end: +end; + +# Terminates the program. a0 contains the return code. +# +# Parameters: +# a0 - Status code. +proc _exit(); +begin + li a7, 93 # SYS_EXIT + ecall +end; + +# Entry point. +.globl _start +proc _start(); +begin + # Read the source from the standard input. + # Second argument is buffer size. Modifying update the source_code definition. + _read_file(@source_code, 81920); + _compile(); + + _exit(0); + +end; diff --git a/boot/stage6.elna b/boot/stage6.elna deleted file mode 100644 index 7d426f9..0000000 --- a/boot/stage6.elna +++ /dev/null @@ -1,1588 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public License, -# v. 2.0. If a copy of the MPL was not distributed with this file, You can -# obtain one at https://mozilla.org/MPL/2.0/. - -# Stage 6 compiler. -# -# - String literals. - -.section .rodata - -.type keyword_section, @object -keyword_section: .ascii ".section" - -.type keyword_type, @object -keyword_type: .ascii ".type" - -.type keyword_ret, @object -keyword_ret: .ascii "\tret" - -.type keyword_global, @object -keyword_global: .ascii ".globl" - -.type keyword_proc, @object -keyword_proc: .ascii "proc " - -.type keyword_end, @object -keyword_end: .ascii "end" - -.type keyword_begin, @object -keyword_begin: .ascii "begin" - -.type keyword_var, @object -keyword_var: .ascii "var" - -.type asm_prologue, @object -asm_prologue: .string "\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n" - -.type asm_epilogue, @object -asm_epilogue: .string "\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n" - -.type asm_type_directive, @object -asm_type_directive: .string ".type " - -.type asm_type_function, @object -asm_type_function: .string ", @function\n" - -.type asm_type_object, @object -asm_type_object: .string ", @object\n" - -.type asm_colon, @object -asm_colon: .string ":\n" - -.type asm_call, @object -asm_call: .string "\tcall " - -.type asm_j, @object -asm_j: .string "\tj " - -.type asm_li, @object -asm_li: .string "\tli " - -.type asm_lw, @object -asm_lw: .string "\tlw " - -.type asm_la, @object -asm_la: .string "\tla " - -.type asm_sw, @object -asm_sw: .string "\tsw " - -.type asm_addi, @object -asm_addi: .string "\taddi " - -.type asm_add, @object -asm_add: .string "\tadd " - -.type asm_mul, @object -asm_mul: .string "\tmul " - -.type asm_neg, @object -asm_neg: .string "\tneg " - -.type asm_not, @object -asm_not: .string "\tnot " - -.type asm_and, @object -asm_and: .string "\tand " - -.type asm_or, @object -asm_or: .string "\tor " - -.type asm_xor, @object -asm_xor: .string "\txor " - -.type asm_xori, @object -asm_xori: .string "\txori " - -.type asm_sub, @object -asm_sub: .string "\tsub " - -.type asm_seqz, @object -asm_seqz: .string "\tseqz " - -.type asm_snez, @object -asm_snez: .string "\tsnez " - -.type asm_slt, @object -asm_slt: .string "\tslt " - -.type asm_mv, @object -asm_mv: .string "\tmv " - -.type asm_comma, @object -asm_comma: .string ", " - -.type asm_sp, @object -asm_sp: .string "sp" - -.type asm_rodata, @object -asm_rodata: .string ".section .rodata\n" - -.type asm_strings, @object -asm_strings: .string "strings" - -.type asm_ascii, @object -asm_ascii: .string " .ascii " - -.section .bss - -# When modifiying also change the read size in the entry point procedure. -.type source_code, @object -source_code: .zero 81920 - -.type compiler_strings, @object -compiler_strings: .zero 8192 - -.section .data - -.type compiler_strings_position, @object -compiler_strings_position: .word compiler_strings - -.type compiler_strings_length, @object -compiler_strings_length: .word 0 - -.type source_code_position, @object -source_code_position: .word source_code - -.section .text - -# Calculates and returns the string token length between quotes, including the -# escaping slash characters. -# -# Parameters: -# a0 - String token pointer. -# -# Returns the length in a0. -proc _string_length(); -begin - # Reset the counter. - v0 := 0; - -.string_length_loop: - v88 := v88 + 1; - - lw t0, 88(sp) - lb t0, (t0) - - li t1, '"' - beq t0, t1, .string_length_end - - v0 := v0 + 1; - goto .string_length_loop; - -.string_length_end: - return v0 -end; - -# Adds a string to the global, read-only string storage. -# -# Parameters: -# a0 - String token. -# -# Returns the offset from the beginning of the storage to the new string in a0. -proc _add_string(); -begin - v0 := v88 + 1; - v4 := compiler_strings_length; - -.add_string_loop: - lw t0, 0(sp) - lb t1, (t0) - li t2, '"' - - beq t1, t2, .add_string_end - - la t2, compiler_strings_position - lw t3, (t2) - sb t1, (t3) - - addi t3, t3, 1 - sw t3, (t2) - - addi t0, t0, 1 - sw t0, 0(sp) - - li t2, '\\' - bne t1, t2, .add_string_increment - - goto .add_string_loop; - -.add_string_increment: - la t2, compiler_strings_length - lw t4, (t2) - addi t4, t4, 1 - sw t4, (t2) - - goto .add_string_loop; - -.add_string_end: - return v4 -end; - -# Reads standard input into a buffer. -# a0 - Buffer pointer. -# a1 - Buffer size. -# -# Returns the amount of bytes written in a0. -proc _read_file(); -begin - mv a2, a1 - mv a1, a0 - # STDIN. - li a0, 0 - li a7, 63 # SYS_READ. - ecall -end; - -# Writes to the standard output. -# -# Parameters: -# a0 - Buffer. -# a1 - Buffer length. -proc _write_s(); -begin - mv a2, a1 - mv a1, a0 - # STDOUT. - li a0, 1 - li a7, 64 # SYS_WRITE. - ecall -end; - -# Writes a number to a string buffer. -# -# t0 - Local buffer. -# t1 - Constant 10. -# t2 - Current character. -# t3 - Whether the number is negative. -# -# Parameters: -# a0 - Whole number. -# a1 - Buffer pointer. -# -# Sets a0 to the length of the written number. -proc _print_i(); -begin - li t1, 10 - addi t0, s0, -9 - - li t3, 0 - bgez a0, .print_i_digit10 - li t3, 1 - neg a0, a0 - -.print_i_digit10: - rem t2, a0, t1 - addi t2, t2, '0' - sb t2, 0(t0) - div a0, a0, t1 - addi t0, t0, -1 - bne zero, a0, .print_i_digit10 - - beq zero, t3, .print_i_write_call - addi t2, zero, '-' - sb t2, 0(t0) - addi t0, t0, -1 - -.print_i_write_call: - mv a0, a1 - addi a1, t0, 1 - sub a2, s0, t0 - addi a2, a2, -9 - sw a2, 0(sp) - - _memcpy(); - - return v0 -end; - -# Writes a number to the standard output. -# -# Parameters: -# a0 - Whole number. -proc _write_i(); -begin - _print_i(v88, @v0); - - mv a1, a0 - addi a0, sp, 0 - _write_s(); - -end; - -# Writes a character from a0 into the standard output. -proc _write_c(); -begin - _write_s(@v88, 1); -end; - -# Write null terminated string. -# -# Parameters: -# a0 - String. -proc _write_z(); -begin -.write_z_loop: - # Check for 0 character. - lw a0, 88(sp) - lb a0, (a0) - beqz a0, .write_z_end - - # Print a character. - _write_c(); - - # Advance the input string by one byte. - v88 := v88 + 1; - - goto .write_z_loop; - -.write_z_end: -end; - -# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. -proc _is_upper(); -begin - v0 := v88 >= 'A'; - v4 := v88 <= 'Z'; - - return v0 & v4 - -end; - -# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. -proc _is_lower(); -begin - v0 := v88 >= 'a'; - v4 := v88 <= 'z'; - - return v0 & v4 - -end; - -# Detects if the passed character is a 7-bit alpha character or an underscore. -# -# Paramters: -# a0 - Tested character. -# -# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. -proc _is_alpha(); -begin - sw a0, 0(sp) - - _is_upper(); - sw a0, 4(sp) - - _is_lower(v0); - - lw t0, 0(sp) - xori t1, t0, '_' - seqz t1, t1 - - lw t0, 4(sp) - or a0, a0, t0 - or a0, a0, t1 -end; - -# Detects whether the passed character is a digit -# (a value between 0 and 9). -# -# Parameters: -# a0 - Exemined value. -# -# Sets a0 to 1 if it is a digit, to 0 otherwise. -proc _is_digit(); -begin - v0 := v88 >= '0'; - v4 := v88 <= '9'; - - return v0 & v4 -end; - -proc _is_alnum(); -begin - sw a0, 4(sp) - - _is_alpha(); - sw a0, 0(sp) - - _is_digit(v4); - - lw a1, 0(sp) - or a0, a0, a1 -end; - -# Reads the next token. -# -# Returns token length in a0. -proc _read_token(); -begin - # Current token position. - v0 := source_code_position; - # Token length. - v4 := 0; - -.read_token_loop: - lw t0, 0(sp) - lb t0, (t0) # Current character. - - # First we try to read a derictive. - # A derictive can contain a dot and characters. - li t1, '.' - beq t0, t1, .read_token_next - - lw a0, 0(sp) - lb a0, (a0) - _is_alnum(); - bnez a0, .read_token_next - - goto .read_token_end; - -.read_token_next: - # Advance the source code position and token length. - v4 := v4 + 1; - v0 := v0 + 1; - - goto .read_token_loop; - -.read_token_end: - return v4 -end; - -# a0 - First pointer. -# a1 - Second pointer. -# a2 - The length to compare. -# -# Returns 0 in a0 if memory regions are equal. -proc _memcmp(); -begin - mv t0, a0 - li a0, 0 - -.memcmp_loop: - beqz a2, .memcmp_end - - lbu t1, (t0) - lbu t2, (a1) - sub a0, t1, t2 - - bnez a0, .memcmp_end - - addi t0, t0, 1 - addi a1, a1, 1 - addi a2, a2, -1 - - goto .memcmp_loop; - -.memcmp_end: -end; - -# Copies memory. -# -# Parameters: -# a0 - Destination. -# a1 - Source. -# a2 - Size. -# -# Preserves a0. -proc _memcpy(); -begin - mv t0, a0 - -.memcpy_loop: - beqz a2, .memcpy_end - - lbu t1, (a1) - sb t1, (a0) - - addi a0, a0, 1 - addi a1, a1, 1 - addi a2, a2, -1 - - goto .memcpy_loop - -.memcpy_end: - mv a0, t0 -end; - -# Advances the token stream by a0 bytes. -proc _advance_token(); -begin - la t0, source_code_position - lw t1, (t0) - add t1, t1, a0 - sw t1, (t0) -end; - -# Prints the current token. -# -# Parameters: -# a0 - Token length. -# -# Returns a0 unchanged. -proc _write_token(); -begin - _write_s(source_code_position, v88); - return v88 -end; - -proc _compile_section(); -begin - # Print and skip the ".section" (8 characters) directive and a space after it. - _write_token(9); - _advance_token(); - - # Read the section name. - _read_token(); - addi a0, a0, 1 - - _write_token(); - _advance_token(); -end; - -# Prints and skips a line. -proc _skip_comment(); -begin - la t0, source_code_position - lw t1, (t0) - -.skip_comment_loop: - # Check for newline character. - lb t2, (t1) - li t3, '\n' - beq t2, t3, .skip_comment_end - - # Advance the input string by one byte. - addi t1, t1, 1 - sw t1, (t0) - - goto .skip_comment_loop; - -.skip_comment_end: - # Skip the newline. - addi t1, t1, 1 - sw t1, (t0) -end; - -# Prints and skips a line. -proc _compile_line(); -begin -.compile_line_loop: - la a0, source_code_position - lw a1, (a0) - - lb t0, (a1) - li t1, '\n' - beq t0, t1, .compile_line_end - - # Print a character. - lw a0, (a1) - _write_c(); - - # Advance the input string by one byte. - _advance_token(1); - - goto .compile_line_loop; - -.compile_line_end: - _write_c('\n'); - - _advance_token(1); -end; - -proc _compile_integer_literal(); -begin - _write_z(@asm_li); - _write_register('t', 0); - _write_z(@asm_comma); - - _read_token(); - _write_token(); - _advance_token(); - - _write_c('\n'); -end; - -proc _compile_character_literal(); -begin - _write_z(@asm_li); - _write_register('t', 0); - _write_z(@asm_comma); - - _write_c('\''); - _advance_token(1); - - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - li t1, '\\' - bne a0, t1, .compile_character_literal_end - - _write_c('\\'); - _advance_token(1); - -.compile_character_literal_end: - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - _write_c(); - - _write_c('\''); - _write_c('\n'); - - _advance_token(2); - -end; - -proc _compile_variable_expression(); -begin - _compile_designator(); - - _write_z(@asm_lw); - _write_register('t', 0); - _write_z(@asm_comma); - - _write_c('('); - _write_register('t', 0); - - _write_c(')'); - _write_c('\n'); - -end; - -proc _compile_address_expression(); -begin - # Skip the "@" sign. - _advance_token(1); - _compile_designator(); - -end; - -proc _compile_negate_expression(); -begin - # Skip the "-" sign. - _advance_token(1); - _compile_term(); - - _write_z(@asm_neg); - _write_register('t', 0); - - _write_z(@asm_comma); - _write_register('t', 0); - - _write_c('\n'); - -end; - -proc _compile_not_expression(); -begin - # Skip the "~" sign. - _advance_token(1); - _compile_term(); - - _write_z(@asm_not); - _write_register('t', 0); - - _write_z(@asm_comma); - _write_register('t', 0); - - _write_c('\n'); - -end; - -proc _compile_string_literal(); -begin - _string_length(source_code_position); - sw a0, 0(sp) - - _add_string(source_code_position); - sw a0, 4(sp) - - _advance_token(v0 + 2); - - _write_z(@asm_la); - _write_register('t', 0); - _write_z(@asm_comma); - _write_z(@asm_strings); - _write_c('\n'); - - _write_z(@asm_li); - _write_register('t', 1); - _write_z(@asm_comma); - _write_i(v4); - _write_c('\n'); - - _write_z(@asm_add); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); -end; - -proc _compile_term(); -begin - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - sw a0, 0(sp) - - li t1, '\'' - beq a0, t1, .compile_term_character_literal - - li t1, '@' - beq a0, t1, .compile_term_address - - li t1, '-' - beq a0, t1, .compile_term_negation - - li t1, '~' - beq a0, t1, .compile_term_not - - li t1, '"' - beq a0, t1, .compile_term_string_literal - - _is_digit(v0); - bnez a0, .compile_term_integer_literal - - goto .compile_term_variable; - -.compile_term_character_literal: - _compile_character_literal(); - goto .compile_term_end; - -.compile_term_integer_literal: - _compile_integer_literal(); - goto .compile_term_end; - -.compile_term_address: - _compile_address_expression(); - goto .compile_term_end; - -.compile_term_negation: - _compile_negate_expression(); - goto .compile_term_end; - -.compile_term_not: - _compile_not_expression(); - goto .compile_term_end; - -.compile_term_string_literal: - _compile_string_literal(); - goto .compile_term_end; - -.compile_term_variable: - _compile_variable_expression(); - goto .compile_term_end; - -.compile_term_end: -end; - -proc _compile_binary_rhs(); -begin - # Skip the whitespace after the binary operator. - _advance_token(1); - _compile_term(); - - # Load the left expression from the stack; - _write_z(@asm_lw); - _write_register('t', 1); - _write_z(@asm_comma); - _write_i(24); - _write_c('('); - _write_z(@asm_sp); - _write_c(')'); - _write_c('\n'); - -end; - -proc _compile_expression(); -begin - _compile_term(); - - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - - li t1, ' ' - bne a0, t1, .compile_expression_end - - # It is a binary expression. - - # Save the value of the left expression on the stack. - _write_z(@asm_sw); - _write_register('t', 0); - _write_z(@asm_comma); - _write_i(24); - _write_c('('); - _write_z(@asm_sp); - _write_c(')'); - _write_c('\n'); - - # Skip surrounding whitespace in front of the operator. - _advance_token(1); - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '+' - beq t0, t1, .compile_expression_add - - li t1, '*' - beq t0, t1, .compile_expression_mul - - li t1, '&' - beq t0, t1, .compile_expression_and - - li t1, 'o' - beq t0, t1, .compile_expression_or - - li t1, 'x' - beq t0, t1, .compile_expression_xor - - li t1, '=' - beq t0, t1, .compile_expression_equals - - li t1, '<' - beq t0, t1, .compile_expression_less - - li t1, '>' - beq t0, t1, .compile_expression_greater - - # Unknown binary operator. - unimp - -.compile_expression_add: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_add); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_mul: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_mul); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_and: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_and); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_or: - _advance_token(2); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_or); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_xor: - _advance_token(3); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_xor); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_equals: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_xor); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - _write_z(@asm_seqz); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_less: - _advance_token(1); - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '>' - beq t0, t1, .compile_expression_not_equal - - li t1, '=' - beq t0, t1, .compile_expression_less_equal - - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_slt); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_not_equal: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_xor); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - _write_z(@asm_snez); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_less_equal: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_slt); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_c('\n'); - - # Execute the operation. - _write_z(@asm_xori); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_i(1); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_greater: - _advance_token(1); - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '=' - beq t0, t1, .compile_expression_greater_equal - - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_slt); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_z(@asm_comma); - _write_register('t', 0); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_greater_equal: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z(@asm_slt); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 1); - _write_z(@asm_comma); - _write_register('t', 0); - _write_c('\n'); - - # Execute the operation. - _write_z(@asm_xori); - _write_register('t', 0); - _write_z(@asm_comma); - _write_register('t', 0); - _write_z(@asm_comma); - _write_i(1); - _write_c('\n'); - - goto .compile_expression_end; - -.compile_expression_end: -end; - -proc _compile_call(); -begin - # Stack variables: - # v0 - Procedure name length. - # v4 - Procedure name pointer. - # v8 - Argument count. - - _read_token(); - sw a0, 0(sp) - v4 := source_code_position; - v8 := 0; - - # Skip the identifier and left paren. - _advance_token(v0 + 1); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, ')' - beq t0, t1, .compile_call_finalize - -.compile_call_loop: - _compile_expression(); - - # Save the argument on the stack. - _write_z(@asm_sw); - _write_register('t', 0); - _write_z(@asm_comma); - - # Calculate the stack offset: 116 - (4 * argument_counter) - v12 := v8 * 4; - v12 := 116 + -v12; - _write_i(v12); - - _write_c('('); - _write_z(@asm_sp); - _write_c(')') - - _write_c('\n'); - - # Add one to the argument counter. - v8 := v8 + 1; - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, ',' - bne t0, t1, .compile_call_finalize - - _advance_token(2); - goto .compile_call_loop; - -.compile_call_finalize: - # Load the argument from the stack. - - lw t0, 8(sp) - beqz t0, .compile_call_end - - # Decrement the argument counter. - v8 := v8 + -1; - - _write_z(@asm_lw); - - _write_c('a'); - _write_i(v8); - - _write_z(@asm_comma); - - # Calculate the stack offset: 116 - (4 * argument_counter) - v12 := v8 * 4; - v12 := 116 + -v12; - _write_i(v12); - - _write_c('('); - _write_z(@asm_sp); - - _write_c(')'); - _write_c('\n'); - - goto .compile_call_finalize; - -.compile_call_end: - _write_z(@asm_call); - _write_s(v4, v0); - - # Skip the right paren. - _advance_token(1); -end; - -proc _compile_goto(); -begin - _advance_token(5); - - _read_token(); - sw a0, 0(sp) - - _write_z(@asm_j); - - _write_token(v0); - _advance_token(); -end; - -proc _compile_local_designator(); -begin - # Skip "v" in the local variable name. - _advance_token(1); - - _write_z(@asm_addi); - _write_register('t', 0); - _write_z(@asm_comma); - _write_z(@asm_sp); - _write_z(@asm_comma); - - # Read local variable stack offset and save it. - _read_token(); - _write_token(); - _advance_token(); - _write_c('\n'); - -end; - -proc _compile_global_designator(); -begin - _write_z(@asm_la); - _write_register('t', 0); - _write_z(@asm_comma); - - _read_token(); - _write_token(); - _advance_token(); - - _write_c('\n'); -end; - -proc _compile_designator(); -begin - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - - li t1, 'v' - beq a0, t1, .compile_designator_local - - goto .compile_designator_global; - -.compile_designator_local: - _compile_local_designator(); - goto .compile_designator_end; - -.compile_designator_global: - _compile_global_designator(); - goto .compile_designator_end; - -.compile_designator_end: -end; - -proc _compile_assignment(); -begin - _compile_designator(); - - # Save the assignee address on the stack. - _write_z(@asm_sw); - _write_register('t', 0); - _write_z(@asm_comma); - - _write_i(20); - _write_c('('); - _write_z(@asm_sp); - _write_c(')'); - _write_c('\n'); - # Skip the assignment sign (:=) with surrounding whitespaces. - _advance_token(4); - - # Compile the assignment. - _compile_expression(); - - _write_z(@asm_lw); - _write_register('t', 1); - _write_z(@asm_comma); - - _write_i(20); - _write_c('('); - _write_z(@asm_sp); - _write_c(')'); - _write_c('\n'); - - _write_z(@asm_sw); - _write_register('t', 0); - _write_z(@asm_comma); - - _write_c('('); - _write_register('t', 1); - _write_c(')'); -end; - -proc _compile_return_statement(); -begin - # Skip "return" keyword and whitespace after it. - _advance_token(7); - _compile_expression(); - - _write_z(@asm_mv); - _write_register('a', 0); - _write_z(@asm_comma); - _write_register('t', 0); - -end; - -proc _compile_statement(); -begin - # This is a call if the statement starts with an underscore. - la t0, source_code_position - lw t0, (t0) - # First character after alignment tab. - addi t0, t0, 1 - lb t0, (t0) - - li t1, '_' - beq t0, t1, .compile_statement_call - - li t1, 'g' - beq t0, t1, .compile_statement_goto - - li t1, 'v' - beq t0, t1, .compile_statement_assignment - - # keyword_ret contains "\tret", so it's 4 bytes long. - _memcmp(source_code_position, @keyword_ret, 4); - beqz a0, .compile_statement_return - - _compile_line(); - goto .compile_statement_end; - -.compile_statement_call: - _advance_token(1); - _compile_call(); - - goto .compile_statement_semicolon; - -.compile_statement_goto: - _advance_token(1); - _compile_goto(); - - goto .compile_statement_semicolon; - -.compile_statement_assignment: - _advance_token(1); - _compile_assignment(); - - goto .compile_statement_semicolon; - -.compile_statement_return: - _advance_token(1); - _compile_return_statement(); - _write_c('\n'); - - goto .compile_statement_end; - -.compile_statement_semicolon: - _advance_token(2); - _write_c('\n'); - -.compile_statement_end: -end; - -proc _compile_procedure_body(); -begin -.compile_procedure_body_loop: - # 3 is "end" length. - _memcmp(source_code_position, @keyword_end, 3); - - beqz a0, .compile_procedure_body_epilogue - - _compile_statement(); - goto .compile_procedure_body_loop; - -.compile_procedure_body_epilogue: -end; - -# Writes a regster name to the standard output. -# -# Parameters: -# a0 - Register character. -# a1 - Register number. -proc _write_register(); -begin - _write_c(v88); - v84 := v84 + '0'; - _write_c(v84); -end; - -proc _compile_procedure_prologue(); -begin - _write_z(@asm_prologue); - v0 := 0; - -.compile_procedure_prologue_loop: - _write_z(@asm_sw); - _write_register('a', v0); - _write_z(@asm_comma); - - # Calculate the stack offset: 88 - (4 * parameter_counter) - v4 := v0 * 4; - v4 := 88 + -v4; - _write_i(v4); - - _write_c('('); - _write_z(@asm_sp); - _write_c(')'); - _write_c('\n'); - - v0 := v0 + 1; - lw a0, 0(sp) - - li t0, 8 - bne a0, t0, .compile_procedure_prologue_loop -end; - -proc _compile_procedure(); -begin - # Skip "proc ". - _advance_token(5); - - _read_token(); - sw a0, 0(sp) # Save the procedure name length. - - # Write .type _procedure_name, @function. - la a0, asm_type_directive - _write_z(); - - _write_token(v0); - _write_z(@asm_type_function); - - # Write procedure label, _procedure_name: - _write_token(v0); - _write_z(@asm_colon); - - # Skip the function name and trailing parens, semicolon, "begin" and newline. - _advance_token(v0 + 10); - - _compile_procedure_prologue(); - _compile_procedure_body(); - - # Write the epilogue. - _write_z(@asm_epilogue); - - # Skip the "end" keyword, semicolon and newline. - _advance_token(5); -end; - -proc _compile_type(); -begin - # Print and skip the ".type" (5 characters) directive and a space after it. - _write_token(6); - _advance_token(); - - # Read and print the symbol name. - _read_token(); - - # Print and skip the symbol name, comma, space and @. - addi a0, a0, 3 - _write_token(); - _advance_token(); - - # Read the symbol type. - _read_token(); - - # Print the symbol type and newline. - addi a0, a0, 1 - _write_token(); - _advance_token(); - - # Write the object definition itself. - _compile_line(); - -.compile_type_end: -end; - -proc _skip_newlines(); -begin - # Skip newlines. - la t0, source_code_position - lw t1, (t0) - -.skip_newlines_loop: - lb t2, (t1) - li t3, '\n' - bne t2, t3, .skip_newlines_end - beqz t2, .skip_newlines_end - - addi t1, t1, 1 - sw t1, (t0) - - goto .skip_newlines_loop; - -.skip_newlines_end: -end; - -# Process the source code and print the generated code. -proc _compile_module(); -begin -.compile_module_loop: - _skip_newlines(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - beqz t0, .compile_module_end - li t1, '#' - beq t0, t1, .compile_module_comment - - # 8 is ".section" length. - _memcmp(source_code_position, @keyword_section, 8); - beqz a0, .compile_module_section - - # 5 is ".type" length. - _memcmp(source_code_position, @keyword_type, 5); - beqz a0, .compile_module_type - - # 5 is "proc " length. Space is needed to distinguish from "procedure". - _memcmp(source_code_position, @keyword_proc, 5); - beqz a0, .compile_module_procedure - - # 6 is ".globl" length. - _memcmp(source_code_position, @keyword_global, 6); - beqz a0, .compile_module_global - - # Not a known token, exit. - goto .compile_module_end; - -.compile_module_section: - _compile_section(); - - goto .compile_module_loop; - -.compile_module_type: - _compile_type(); - - goto .compile_module_loop; - -.compile_module_global: - _compile_line(); - - goto .compile_module_loop; - -.compile_module_comment: - _skip_comment(); - - goto .compile_module_loop; - -.compile_module_procedure: - _compile_procedure(); - - goto .compile_module_loop; - -.compile_module_end: -end; - -proc _compile(); -begin - _compile_module(); - - _write_z(@asm_rodata); - _write_z(@asm_type_directive); - _write_z(@asm_strings); - _write_z(@asm_type_object); - _write_z(@asm_strings); - _write_c(':'); - _write_z(@asm_ascii); - _write_c('"'); - - la t0, compiler_strings - sw t0, 0(sp) - -.compile_loop: - lw t0, 0(sp) - la t1, compiler_strings_position - lw t1, (t1) - bge t0, t1, .compile_end - - lb a0, (t0) - - addi t0, t0, 1 - sw t0, 0(sp) - - _write_c(); - - j .compile_loop - -.compile_end: - _write_c('"'); - _write_c('\n'); -end; - -# Terminates the program. a0 contains the return code. -# -# Parameters: -# a0 - Status code. -proc _exit(); -begin - li a7, 93 # SYS_EXIT - ecall -end; - -# Entry point. -.globl _start -proc _start(); -begin - # Read the source from the standard input. - # Second argument is buffer size. Modifying update the source_code definition. - _read_file(@source_code, 81920); - _compile(); - - _exit(0); - -end; diff --git a/boot/stage6/cl.elna b/boot/stage6/cl.elna new file mode 100644 index 0000000..7d426f9 --- /dev/null +++ b/boot/stage6/cl.elna @@ -0,0 +1,1588 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +# Stage 6 compiler. +# +# - String literals. + +.section .rodata + +.type keyword_section, @object +keyword_section: .ascii ".section" + +.type keyword_type, @object +keyword_type: .ascii ".type" + +.type keyword_ret, @object +keyword_ret: .ascii "\tret" + +.type keyword_global, @object +keyword_global: .ascii ".globl" + +.type keyword_proc, @object +keyword_proc: .ascii "proc " + +.type keyword_end, @object +keyword_end: .ascii "end" + +.type keyword_begin, @object +keyword_begin: .ascii "begin" + +.type keyword_var, @object +keyword_var: .ascii "var" + +.type asm_prologue, @object +asm_prologue: .string "\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n" + +.type asm_epilogue, @object +asm_epilogue: .string "\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n" + +.type asm_type_directive, @object +asm_type_directive: .string ".type " + +.type asm_type_function, @object +asm_type_function: .string ", @function\n" + +.type asm_type_object, @object +asm_type_object: .string ", @object\n" + +.type asm_colon, @object +asm_colon: .string ":\n" + +.type asm_call, @object +asm_call: .string "\tcall " + +.type asm_j, @object +asm_j: .string "\tj " + +.type asm_li, @object +asm_li: .string "\tli " + +.type asm_lw, @object +asm_lw: .string "\tlw " + +.type asm_la, @object +asm_la: .string "\tla " + +.type asm_sw, @object +asm_sw: .string "\tsw " + +.type asm_addi, @object +asm_addi: .string "\taddi " + +.type asm_add, @object +asm_add: .string "\tadd " + +.type asm_mul, @object +asm_mul: .string "\tmul " + +.type asm_neg, @object +asm_neg: .string "\tneg " + +.type asm_not, @object +asm_not: .string "\tnot " + +.type asm_and, @object +asm_and: .string "\tand " + +.type asm_or, @object +asm_or: .string "\tor " + +.type asm_xor, @object +asm_xor: .string "\txor " + +.type asm_xori, @object +asm_xori: .string "\txori " + +.type asm_sub, @object +asm_sub: .string "\tsub " + +.type asm_seqz, @object +asm_seqz: .string "\tseqz " + +.type asm_snez, @object +asm_snez: .string "\tsnez " + +.type asm_slt, @object +asm_slt: .string "\tslt " + +.type asm_mv, @object +asm_mv: .string "\tmv " + +.type asm_comma, @object +asm_comma: .string ", " + +.type asm_sp, @object +asm_sp: .string "sp" + +.type asm_rodata, @object +asm_rodata: .string ".section .rodata\n" + +.type asm_strings, @object +asm_strings: .string "strings" + +.type asm_ascii, @object +asm_ascii: .string " .ascii " + +.section .bss + +# When modifiying also change the read size in the entry point procedure. +.type source_code, @object +source_code: .zero 81920 + +.type compiler_strings, @object +compiler_strings: .zero 8192 + +.section .data + +.type compiler_strings_position, @object +compiler_strings_position: .word compiler_strings + +.type compiler_strings_length, @object +compiler_strings_length: .word 0 + +.type source_code_position, @object +source_code_position: .word source_code + +.section .text + +# Calculates and returns the string token length between quotes, including the +# escaping slash characters. +# +# Parameters: +# a0 - String token pointer. +# +# Returns the length in a0. +proc _string_length(); +begin + # Reset the counter. + v0 := 0; + +.string_length_loop: + v88 := v88 + 1; + + lw t0, 88(sp) + lb t0, (t0) + + li t1, '"' + beq t0, t1, .string_length_end + + v0 := v0 + 1; + goto .string_length_loop; + +.string_length_end: + return v0 +end; + +# Adds a string to the global, read-only string storage. +# +# Parameters: +# a0 - String token. +# +# Returns the offset from the beginning of the storage to the new string in a0. +proc _add_string(); +begin + v0 := v88 + 1; + v4 := compiler_strings_length; + +.add_string_loop: + lw t0, 0(sp) + lb t1, (t0) + li t2, '"' + + beq t1, t2, .add_string_end + + la t2, compiler_strings_position + lw t3, (t2) + sb t1, (t3) + + addi t3, t3, 1 + sw t3, (t2) + + addi t0, t0, 1 + sw t0, 0(sp) + + li t2, '\\' + bne t1, t2, .add_string_increment + + goto .add_string_loop; + +.add_string_increment: + la t2, compiler_strings_length + lw t4, (t2) + addi t4, t4, 1 + sw t4, (t2) + + goto .add_string_loop; + +.add_string_end: + return v4 +end; + +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the amount of bytes written in a0. +proc _read_file(); +begin + mv a2, a1 + mv a1, a0 + # STDIN. + li a0, 0 + li a7, 63 # SYS_READ. + ecall +end; + +# Writes to the standard output. +# +# Parameters: +# a0 - Buffer. +# a1 - Buffer length. +proc _write_s(); +begin + mv a2, a1 + mv a1, a0 + # STDOUT. + li a0, 1 + li a7, 64 # SYS_WRITE. + ecall +end; + +# Writes a number to a string buffer. +# +# t0 - Local buffer. +# t1 - Constant 10. +# t2 - Current character. +# t3 - Whether the number is negative. +# +# Parameters: +# a0 - Whole number. +# a1 - Buffer pointer. +# +# Sets a0 to the length of the written number. +proc _print_i(); +begin + li t1, 10 + addi t0, s0, -9 + + li t3, 0 + bgez a0, .print_i_digit10 + li t3, 1 + neg a0, a0 + +.print_i_digit10: + rem t2, a0, t1 + addi t2, t2, '0' + sb t2, 0(t0) + div a0, a0, t1 + addi t0, t0, -1 + bne zero, a0, .print_i_digit10 + + beq zero, t3, .print_i_write_call + addi t2, zero, '-' + sb t2, 0(t0) + addi t0, t0, -1 + +.print_i_write_call: + mv a0, a1 + addi a1, t0, 1 + sub a2, s0, t0 + addi a2, a2, -9 + sw a2, 0(sp) + + _memcpy(); + + return v0 +end; + +# Writes a number to the standard output. +# +# Parameters: +# a0 - Whole number. +proc _write_i(); +begin + _print_i(v88, @v0); + + mv a1, a0 + addi a0, sp, 0 + _write_s(); + +end; + +# Writes a character from a0 into the standard output. +proc _write_c(); +begin + _write_s(@v88, 1); +end; + +# Write null terminated string. +# +# Parameters: +# a0 - String. +proc _write_z(); +begin +.write_z_loop: + # Check for 0 character. + lw a0, 88(sp) + lb a0, (a0) + beqz a0, .write_z_end + + # Print a character. + _write_c(); + + # Advance the input string by one byte. + v88 := v88 + 1; + + goto .write_z_loop; + +.write_z_end: +end; + +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_upper(); +begin + v0 := v88 >= 'A'; + v4 := v88 <= 'Z'; + + return v0 & v4 + +end; + +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_lower(); +begin + v0 := v88 >= 'a'; + v4 := v88 <= 'z'; + + return v0 & v4 + +end; + +# Detects if the passed character is a 7-bit alpha character or an underscore. +# +# Paramters: +# a0 - Tested character. +# +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +proc _is_alpha(); +begin + sw a0, 0(sp) + + _is_upper(); + sw a0, 4(sp) + + _is_lower(v0); + + lw t0, 0(sp) + xori t1, t0, '_' + seqz t1, t1 + + lw t0, 4(sp) + or a0, a0, t0 + or a0, a0, t1 +end; + +# Detects whether the passed character is a digit +# (a value between 0 and 9). +# +# Parameters: +# a0 - Exemined value. +# +# Sets a0 to 1 if it is a digit, to 0 otherwise. +proc _is_digit(); +begin + v0 := v88 >= '0'; + v4 := v88 <= '9'; + + return v0 & v4 +end; + +proc _is_alnum(); +begin + sw a0, 4(sp) + + _is_alpha(); + sw a0, 0(sp) + + _is_digit(v4); + + lw a1, 0(sp) + or a0, a0, a1 +end; + +# Reads the next token. +# +# Returns token length in a0. +proc _read_token(); +begin + # Current token position. + v0 := source_code_position; + # Token length. + v4 := 0; + +.read_token_loop: + lw t0, 0(sp) + lb t0, (t0) # Current character. + + # First we try to read a derictive. + # A derictive can contain a dot and characters. + li t1, '.' + beq t0, t1, .read_token_next + + lw a0, 0(sp) + lb a0, (a0) + _is_alnum(); + bnez a0, .read_token_next + + goto .read_token_end; + +.read_token_next: + # Advance the source code position and token length. + v4 := v4 + 1; + v0 := v0 + 1; + + goto .read_token_loop; + +.read_token_end: + return v4 +end; + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +proc _memcmp(); +begin + mv t0, a0 + li a0, 0 + +.memcmp_loop: + beqz a2, .memcmp_end + + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 + + bnez a0, .memcmp_end + + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcmp_loop; + +.memcmp_end: +end; + +# Copies memory. +# +# Parameters: +# a0 - Destination. +# a1 - Source. +# a2 - Size. +# +# Preserves a0. +proc _memcpy(); +begin + mv t0, a0 + +.memcpy_loop: + beqz a2, .memcpy_end + + lbu t1, (a1) + sb t1, (a0) + + addi a0, a0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcpy_loop + +.memcpy_end: + mv a0, t0 +end; + +# Advances the token stream by a0 bytes. +proc _advance_token(); +begin + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 + sw t1, (t0) +end; + +# Prints the current token. +# +# Parameters: +# a0 - Token length. +# +# Returns a0 unchanged. +proc _write_token(); +begin + _write_s(source_code_position, v88); + return v88 +end; + +proc _compile_section(); +begin + # Print and skip the ".section" (8 characters) directive and a space after it. + _write_token(9); + _advance_token(); + + # Read the section name. + _read_token(); + addi a0, a0, 1 + + _write_token(); + _advance_token(); +end; + +# Prints and skips a line. +proc _skip_comment(); +begin + la t0, source_code_position + lw t1, (t0) + +.skip_comment_loop: + # Check for newline character. + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end + + # Advance the input string by one byte. + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_comment_loop; + +.skip_comment_end: + # Skip the newline. + addi t1, t1, 1 + sw t1, (t0) +end; + +# Prints and skips a line. +proc _compile_line(); +begin +.compile_line_loop: + la a0, source_code_position + lw a1, (a0) + + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end + + # Print a character. + lw a0, (a1) + _write_c(); + + # Advance the input string by one byte. + _advance_token(1); + + goto .compile_line_loop; + +.compile_line_end: + _write_c('\n'); + + _advance_token(1); +end; + +proc _compile_integer_literal(); +begin + _write_z(@asm_li); + _write_register('t', 0); + _write_z(@asm_comma); + + _read_token(); + _write_token(); + _advance_token(); + + _write_c('\n'); +end; + +proc _compile_character_literal(); +begin + _write_z(@asm_li); + _write_register('t', 0); + _write_z(@asm_comma); + + _write_c('\''); + _advance_token(1); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + li t1, '\\' + bne a0, t1, .compile_character_literal_end + + _write_c('\\'); + _advance_token(1); + +.compile_character_literal_end: + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + _write_c(); + + _write_c('\''); + _write_c('\n'); + + _advance_token(2); + +end; + +proc _compile_variable_expression(); +begin + _compile_designator(); + + _write_z(@asm_lw); + _write_register('t', 0); + _write_z(@asm_comma); + + _write_c('('); + _write_register('t', 0); + + _write_c(')'); + _write_c('\n'); + +end; + +proc _compile_address_expression(); +begin + # Skip the "@" sign. + _advance_token(1); + _compile_designator(); + +end; + +proc _compile_negate_expression(); +begin + # Skip the "-" sign. + _advance_token(1); + _compile_term(); + + _write_z(@asm_neg); + _write_register('t', 0); + + _write_z(@asm_comma); + _write_register('t', 0); + + _write_c('\n'); + +end; + +proc _compile_not_expression(); +begin + # Skip the "~" sign. + _advance_token(1); + _compile_term(); + + _write_z(@asm_not); + _write_register('t', 0); + + _write_z(@asm_comma); + _write_register('t', 0); + + _write_c('\n'); + +end; + +proc _compile_string_literal(); +begin + _string_length(source_code_position); + sw a0, 0(sp) + + _add_string(source_code_position); + sw a0, 4(sp) + + _advance_token(v0 + 2); + + _write_z(@asm_la); + _write_register('t', 0); + _write_z(@asm_comma); + _write_z(@asm_strings); + _write_c('\n'); + + _write_z(@asm_li); + _write_register('t', 1); + _write_z(@asm_comma); + _write_i(v4); + _write_c('\n'); + + _write_z(@asm_add); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); +end; + +proc _compile_term(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + sw a0, 0(sp) + + li t1, '\'' + beq a0, t1, .compile_term_character_literal + + li t1, '@' + beq a0, t1, .compile_term_address + + li t1, '-' + beq a0, t1, .compile_term_negation + + li t1, '~' + beq a0, t1, .compile_term_not + + li t1, '"' + beq a0, t1, .compile_term_string_literal + + _is_digit(v0); + bnez a0, .compile_term_integer_literal + + goto .compile_term_variable; + +.compile_term_character_literal: + _compile_character_literal(); + goto .compile_term_end; + +.compile_term_integer_literal: + _compile_integer_literal(); + goto .compile_term_end; + +.compile_term_address: + _compile_address_expression(); + goto .compile_term_end; + +.compile_term_negation: + _compile_negate_expression(); + goto .compile_term_end; + +.compile_term_not: + _compile_not_expression(); + goto .compile_term_end; + +.compile_term_string_literal: + _compile_string_literal(); + goto .compile_term_end; + +.compile_term_variable: + _compile_variable_expression(); + goto .compile_term_end; + +.compile_term_end: +end; + +proc _compile_binary_rhs(); +begin + # Skip the whitespace after the binary operator. + _advance_token(1); + _compile_term(); + + # Load the left expression from the stack; + _write_z(@asm_lw); + _write_register('t', 1); + _write_z(@asm_comma); + _write_i(24); + _write_c('('); + _write_z(@asm_sp); + _write_c(')'); + _write_c('\n'); + +end; + +proc _compile_expression(); +begin + _compile_term(); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, ' ' + bne a0, t1, .compile_expression_end + + # It is a binary expression. + + # Save the value of the left expression on the stack. + _write_z(@asm_sw); + _write_register('t', 0); + _write_z(@asm_comma); + _write_i(24); + _write_c('('); + _write_z(@asm_sp); + _write_c(')'); + _write_c('\n'); + + # Skip surrounding whitespace in front of the operator. + _advance_token(1); + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '+' + beq t0, t1, .compile_expression_add + + li t1, '*' + beq t0, t1, .compile_expression_mul + + li t1, '&' + beq t0, t1, .compile_expression_and + + li t1, 'o' + beq t0, t1, .compile_expression_or + + li t1, 'x' + beq t0, t1, .compile_expression_xor + + li t1, '=' + beq t0, t1, .compile_expression_equals + + li t1, '<' + beq t0, t1, .compile_expression_less + + li t1, '>' + beq t0, t1, .compile_expression_greater + + # Unknown binary operator. + unimp + +.compile_expression_add: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_add); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_mul: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_mul); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_and: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_and); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_or: + _advance_token(2); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_or); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_xor: + _advance_token(3); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_xor); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_equals: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_xor); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + _write_z(@asm_seqz); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_less: + _advance_token(1); + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '>' + beq t0, t1, .compile_expression_not_equal + + li t1, '=' + beq t0, t1, .compile_expression_less_equal + + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_slt); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_not_equal: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_xor); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + _write_z(@asm_snez); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_less_equal: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_slt); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); + + # Execute the operation. + _write_z(@asm_xori); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_i(1); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_greater: + _advance_token(1); + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '=' + beq t0, t1, .compile_expression_greater_equal + + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_slt); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_z(@asm_comma); + _write_register('t', 0); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_greater_equal: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z(@asm_slt); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_z(@asm_comma); + _write_register('t', 0); + _write_c('\n'); + + # Execute the operation. + _write_z(@asm_xori); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_i(1); + _write_c('\n'); + + goto .compile_expression_end; + +.compile_expression_end: +end; + +proc _compile_call(); +begin + # Stack variables: + # v0 - Procedure name length. + # v4 - Procedure name pointer. + # v8 - Argument count. + + _read_token(); + sw a0, 0(sp) + v4 := source_code_position; + v8 := 0; + + # Skip the identifier and left paren. + _advance_token(v0 + 1); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ')' + beq t0, t1, .compile_call_finalize + +.compile_call_loop: + _compile_expression(); + + # Save the argument on the stack. + _write_z(@asm_sw); + _write_register('t', 0); + _write_z(@asm_comma); + + # Calculate the stack offset: 116 - (4 * argument_counter) + v12 := v8 * 4; + v12 := 116 + -v12; + _write_i(v12); + + _write_c('('); + _write_z(@asm_sp); + _write_c(')') + + _write_c('\n'); + + # Add one to the argument counter. + v8 := v8 + 1; + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ',' + bne t0, t1, .compile_call_finalize + + _advance_token(2); + goto .compile_call_loop; + +.compile_call_finalize: + # Load the argument from the stack. + + lw t0, 8(sp) + beqz t0, .compile_call_end + + # Decrement the argument counter. + v8 := v8 + -1; + + _write_z(@asm_lw); + + _write_c('a'); + _write_i(v8); + + _write_z(@asm_comma); + + # Calculate the stack offset: 116 - (4 * argument_counter) + v12 := v8 * 4; + v12 := 116 + -v12; + _write_i(v12); + + _write_c('('); + _write_z(@asm_sp); + + _write_c(')'); + _write_c('\n'); + + goto .compile_call_finalize; + +.compile_call_end: + _write_z(@asm_call); + _write_s(v4, v0); + + # Skip the right paren. + _advance_token(1); +end; + +proc _compile_goto(); +begin + _advance_token(5); + + _read_token(); + sw a0, 0(sp) + + _write_z(@asm_j); + + _write_token(v0); + _advance_token(); +end; + +proc _compile_local_designator(); +begin + # Skip "v" in the local variable name. + _advance_token(1); + + _write_z(@asm_addi); + _write_register('t', 0); + _write_z(@asm_comma); + _write_z(@asm_sp); + _write_z(@asm_comma); + + # Read local variable stack offset and save it. + _read_token(); + _write_token(); + _advance_token(); + _write_c('\n'); + +end; + +proc _compile_global_designator(); +begin + _write_z(@asm_la); + _write_register('t', 0); + _write_z(@asm_comma); + + _read_token(); + _write_token(); + _advance_token(); + + _write_c('\n'); +end; + +proc _compile_designator(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, 'v' + beq a0, t1, .compile_designator_local + + goto .compile_designator_global; + +.compile_designator_local: + _compile_local_designator(); + goto .compile_designator_end; + +.compile_designator_global: + _compile_global_designator(); + goto .compile_designator_end; + +.compile_designator_end: +end; + +proc _compile_assignment(); +begin + _compile_designator(); + + # Save the assignee address on the stack. + _write_z(@asm_sw); + _write_register('t', 0); + _write_z(@asm_comma); + + _write_i(20); + _write_c('('); + _write_z(@asm_sp); + _write_c(')'); + _write_c('\n'); + # Skip the assignment sign (:=) with surrounding whitespaces. + _advance_token(4); + + # Compile the assignment. + _compile_expression(); + + _write_z(@asm_lw); + _write_register('t', 1); + _write_z(@asm_comma); + + _write_i(20); + _write_c('('); + _write_z(@asm_sp); + _write_c(')'); + _write_c('\n'); + + _write_z(@asm_sw); + _write_register('t', 0); + _write_z(@asm_comma); + + _write_c('('); + _write_register('t', 1); + _write_c(')'); +end; + +proc _compile_return_statement(); +begin + # Skip "return" keyword and whitespace after it. + _advance_token(7); + _compile_expression(); + + _write_z(@asm_mv); + _write_register('a', 0); + _write_z(@asm_comma); + _write_register('t', 0); + +end; + +proc _compile_statement(); +begin + # This is a call if the statement starts with an underscore. + la t0, source_code_position + lw t0, (t0) + # First character after alignment tab. + addi t0, t0, 1 + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call + + li t1, 'g' + beq t0, t1, .compile_statement_goto + + li t1, 'v' + beq t0, t1, .compile_statement_assignment + + # keyword_ret contains "\tret", so it's 4 bytes long. + _memcmp(source_code_position, @keyword_ret, 4); + beqz a0, .compile_statement_return + + _compile_line(); + goto .compile_statement_end; + +.compile_statement_call: + _advance_token(1); + _compile_call(); + + goto .compile_statement_semicolon; + +.compile_statement_goto: + _advance_token(1); + _compile_goto(); + + goto .compile_statement_semicolon; + +.compile_statement_assignment: + _advance_token(1); + _compile_assignment(); + + goto .compile_statement_semicolon; + +.compile_statement_return: + _advance_token(1); + _compile_return_statement(); + _write_c('\n'); + + goto .compile_statement_end; + +.compile_statement_semicolon: + _advance_token(2); + _write_c('\n'); + +.compile_statement_end: +end; + +proc _compile_procedure_body(); +begin +.compile_procedure_body_loop: + # 3 is "end" length. + _memcmp(source_code_position, @keyword_end, 3); + + beqz a0, .compile_procedure_body_epilogue + + _compile_statement(); + goto .compile_procedure_body_loop; + +.compile_procedure_body_epilogue: +end; + +# Writes a regster name to the standard output. +# +# Parameters: +# a0 - Register character. +# a1 - Register number. +proc _write_register(); +begin + _write_c(v88); + v84 := v84 + '0'; + _write_c(v84); +end; + +proc _compile_procedure_prologue(); +begin + _write_z(@asm_prologue); + v0 := 0; + +.compile_procedure_prologue_loop: + _write_z(@asm_sw); + _write_register('a', v0); + _write_z(@asm_comma); + + # Calculate the stack offset: 88 - (4 * parameter_counter) + v4 := v0 * 4; + v4 := 88 + -v4; + _write_i(v4); + + _write_c('('); + _write_z(@asm_sp); + _write_c(')'); + _write_c('\n'); + + v0 := v0 + 1; + lw a0, 0(sp) + + li t0, 8 + bne a0, t0, .compile_procedure_prologue_loop +end; + +proc _compile_procedure(); +begin + # Skip "proc ". + _advance_token(5); + + _read_token(); + sw a0, 0(sp) # Save the procedure name length. + + # Write .type _procedure_name, @function. + la a0, asm_type_directive + _write_z(); + + _write_token(v0); + _write_z(@asm_type_function); + + # Write procedure label, _procedure_name: + _write_token(v0); + _write_z(@asm_colon); + + # Skip the function name and trailing parens, semicolon, "begin" and newline. + _advance_token(v0 + 10); + + _compile_procedure_prologue(); + _compile_procedure_body(); + + # Write the epilogue. + _write_z(@asm_epilogue); + + # Skip the "end" keyword, semicolon and newline. + _advance_token(5); +end; + +proc _compile_type(); +begin + # Print and skip the ".type" (5 characters) directive and a space after it. + _write_token(6); + _advance_token(); + + # Read and print the symbol name. + _read_token(); + + # Print and skip the symbol name, comma, space and @. + addi a0, a0, 3 + _write_token(); + _advance_token(); + + # Read the symbol type. + _read_token(); + + # Print the symbol type and newline. + addi a0, a0, 1 + _write_token(); + _advance_token(); + + # Write the object definition itself. + _compile_line(); + +.compile_type_end: +end; + +proc _skip_newlines(); +begin + # Skip newlines. + la t0, source_code_position + lw t1, (t0) + +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end + + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_newlines_loop; + +.skip_newlines_end: +end; + +# Process the source code and print the generated code. +proc _compile_module(); +begin +.compile_module_loop: + _skip_newlines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_module_end + li t1, '#' + beq t0, t1, .compile_module_comment + + # 8 is ".section" length. + _memcmp(source_code_position, @keyword_section, 8); + beqz a0, .compile_module_section + + # 5 is ".type" length. + _memcmp(source_code_position, @keyword_type, 5); + beqz a0, .compile_module_type + + # 5 is "proc " length. Space is needed to distinguish from "procedure". + _memcmp(source_code_position, @keyword_proc, 5); + beqz a0, .compile_module_procedure + + # 6 is ".globl" length. + _memcmp(source_code_position, @keyword_global, 6); + beqz a0, .compile_module_global + + # Not a known token, exit. + goto .compile_module_end; + +.compile_module_section: + _compile_section(); + + goto .compile_module_loop; + +.compile_module_type: + _compile_type(); + + goto .compile_module_loop; + +.compile_module_global: + _compile_line(); + + goto .compile_module_loop; + +.compile_module_comment: + _skip_comment(); + + goto .compile_module_loop; + +.compile_module_procedure: + _compile_procedure(); + + goto .compile_module_loop; + +.compile_module_end: +end; + +proc _compile(); +begin + _compile_module(); + + _write_z(@asm_rodata); + _write_z(@asm_type_directive); + _write_z(@asm_strings); + _write_z(@asm_type_object); + _write_z(@asm_strings); + _write_c(':'); + _write_z(@asm_ascii); + _write_c('"'); + + la t0, compiler_strings + sw t0, 0(sp) + +.compile_loop: + lw t0, 0(sp) + la t1, compiler_strings_position + lw t1, (t1) + bge t0, t1, .compile_end + + lb a0, (t0) + + addi t0, t0, 1 + sw t0, 0(sp) + + _write_c(); + + j .compile_loop + +.compile_end: + _write_c('"'); + _write_c('\n'); +end; + +# Terminates the program. a0 contains the return code. +# +# Parameters: +# a0 - Status code. +proc _exit(); +begin + li a7, 93 # SYS_EXIT + ecall +end; + +# Entry point. +.globl _start +proc _start(); +begin + # Read the source from the standard input. + # Second argument is buffer size. Modifying update the source_code definition. + _read_file(@source_code, 81920); + _compile(); + + _exit(0); + +end; diff --git a/boot/stage7.elna b/boot/stage7.elna deleted file mode 100644 index f83a8a5..0000000 --- a/boot/stage7.elna +++ /dev/null @@ -1,1488 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public License, -# v. 2.0. If a copy of the MPL was not distributed with this file, You can -# obtain one at https://mozilla.org/MPL/2.0/. - -# Stage 7 compiler. -# -# - Static global variable and constant initialization. -# - Objct sections are determined automatically. -# - _start is always exported. - -.section .bss - -# When modifiying also change the read size in the entry point procedure. -.type source_code, @object -source_code: .zero 81920 - -.type compiler_strings, @object -compiler_strings: .zero 8192 - -.section .data - -.type compiler_strings_position, @object -compiler_strings_position: .word compiler_strings - -.type compiler_strings_length, @object -compiler_strings_length: .word 0 - -.type source_code_position, @object -source_code_position: .word source_code - -.section .text - -# Calculates and returns the string token length between quotes, including the -# escaping slash characters. -# -# Parameters: -# a0 - String token pointer. -# -# Returns the length in a0. -proc _string_length(); -begin - # Reset the counter. - v0 := 0; - -.string_length_loop: - v88 := v88 + 1; - - lw t0, 88(sp) - lb t0, (t0) - - li t1, '"' - beq t0, t1, .string_length_end - - v0 := v0 + 1; - goto .string_length_loop; - -.string_length_end: - return v0 -end; - -# Adds a string to the global, read-only string storage. -# -# Parameters: -# a0 - String token. -# -# Returns the offset from the beginning of the storage to the new string in a0. -proc _add_string(); -begin - v0 := v88 + 1; - v4 := compiler_strings_length; - -.add_string_loop: - lw t0, 0(sp) - lb t1, (t0) - li t2, '"' - - beq t1, t2, .add_string_end - - la t2, compiler_strings_position - lw t3, (t2) - sb t1, (t3) - - addi t3, t3, 1 - sw t3, (t2) - - addi t0, t0, 1 - sw t0, 0(sp) - - li t2, '\\' - bne t1, t2, .add_string_increment - - goto .add_string_loop; - -.add_string_increment: - la t2, compiler_strings_length - lw t4, (t2) - addi t4, t4, 1 - sw t4, (t2) - - goto .add_string_loop; - -.add_string_end: - return v4 -end; - -# Reads standard input into a buffer. -# a0 - Buffer pointer. -# a1 - Buffer size. -# -# Returns the amount of bytes written in a0. -proc _read_file(); -begin - mv a2, a1 - mv a1, a0 - # STDIN. - li a0, 0 - li a7, 63 # SYS_READ. - ecall -end; - -# Writes to the standard output. -# -# Parameters: -# a0 - Buffer. -# a1 - Buffer length. -proc _write_s(); -begin - mv a2, a1 - mv a1, a0 - # STDOUT. - li a0, 1 - li a7, 64 # SYS_WRITE. - ecall -end; - -# Writes a number to a string buffer. -# -# t0 - Local buffer. -# t1 - Constant 10. -# t2 - Current character. -# t3 - Whether the number is negative. -# -# Parameters: -# a0 - Whole number. -# a1 - Buffer pointer. -# -# Sets a0 to the length of the written number. -proc _print_i(); -begin - li t1, 10 - addi t0, s0, -9 - - li t3, 0 - bgez a0, .print_i_digit10 - li t3, 1 - neg a0, a0 - -.print_i_digit10: - rem t2, a0, t1 - addi t2, t2, '0' - sb t2, 0(t0) - div a0, a0, t1 - addi t0, t0, -1 - bne zero, a0, .print_i_digit10 - - beq zero, t3, .print_i_write_call - addi t2, zero, '-' - sb t2, 0(t0) - addi t0, t0, -1 - -.print_i_write_call: - mv a0, a1 - addi a1, t0, 1 - sub a2, s0, t0 - addi a2, a2, -9 - sw a2, 0(sp) - - _memcpy(); - - return v0 -end; - -# Writes a number to the standard output. -# -# Parameters: -# a0 - Whole number. -proc _write_i(); -begin - _print_i(v88, @v0); - - mv a1, a0 - addi a0, sp, 0 - _write_s(); - -end; - -# Writes a character from a0 into the standard output. -proc _write_c(); -begin - _write_s(@v88, 1); -end; - -# Write null terminated string. -# -# Parameters: -# a0 - String. -proc _write_z(); -begin -.write_z_loop: - # Check for 0 character. - lw a0, 88(sp) - lb a0, (a0) - beqz a0, .write_z_end - - # Print a character. - _write_c(); - - # Advance the input string by one byte. - v88 := v88 + 1; - - goto .write_z_loop; - -.write_z_end: -end; - -# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. -proc _is_upper(); -begin - v0 := v88 >= 'A'; - v4 := v88 <= 'Z'; - - return v0 & v4 - -end; - -# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. -proc _is_lower(); -begin - v0 := v88 >= 'a'; - v4 := v88 <= 'z'; - - return v0 & v4 - -end; - -# Detects if the passed character is a 7-bit alpha character or an underscore. -# -# Paramters: -# a0 - Tested character. -# -# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. -proc _is_alpha(); -begin - sw a0, 0(sp) - - _is_upper(); - sw a0, 4(sp) - - _is_lower(v0); - - lw t0, 0(sp) - xori t1, t0, '_' - seqz t1, t1 - - lw t0, 4(sp) - or a0, a0, t0 - or a0, a0, t1 -end; - -# Detects whether the passed character is a digit -# (a value between 0 and 9). -# -# Parameters: -# a0 - Exemined value. -# -# Sets a0 to 1 if it is a digit, to 0 otherwise. -proc _is_digit(); -begin - v0 := v88 >= '0'; - v4 := v88 <= '9'; - - return v0 & v4 -end; - -proc _is_alnum(); -begin - sw a0, 4(sp) - - _is_alpha(); - sw a0, 0(sp) - - _is_digit(v4); - - lw a1, 0(sp) - or a0, a0, a1 -end; - -# Reads the next token. -# -# Returns token length in a0. -proc _read_token(); -begin - # Current token position. - v0 := source_code_position; - # Token length. - v4 := 0; - -.read_token_loop: - lw t0, 0(sp) - lb t0, (t0) # Current character. - - # First we try to read a derictive. - # A derictive can contain a dot and characters. - li t1, '.' - beq t0, t1, .read_token_next - - lw a0, 0(sp) - lb a0, (a0) - _is_alnum(); - bnez a0, .read_token_next - - goto .read_token_end; - -.read_token_next: - # Advance the source code position and token length. - v4 := v4 + 1; - v0 := v0 + 1; - - goto .read_token_loop; - -.read_token_end: - return v4 -end; - -# a0 - First pointer. -# a1 - Second pointer. -# a2 - The length to compare. -# -# Returns 0 in a0 if memory regions are equal. -proc _memcmp(); -begin - mv t0, a0 - li a0, 0 - -.memcmp_loop: - beqz a2, .memcmp_end - - lbu t1, (t0) - lbu t2, (a1) - sub a0, t1, t2 - - bnez a0, .memcmp_end - - addi t0, t0, 1 - addi a1, a1, 1 - addi a2, a2, -1 - - goto .memcmp_loop; - -.memcmp_end: -end; - -# Copies memory. -# -# Parameters: -# a0 - Destination. -# a1 - Source. -# a2 - Size. -# -# Preserves a0. -proc _memcpy(); -begin - mv t0, a0 - -.memcpy_loop: - beqz a2, .memcpy_end - - lbu t1, (a1) - sb t1, (a0) - - addi a0, a0, 1 - addi a1, a1, 1 - addi a2, a2, -1 - - goto .memcpy_loop - -.memcpy_end: - mv a0, t0 -end; - -# Advances the token stream by a0 bytes. -proc _advance_token(); -begin - la t0, source_code_position - lw t1, (t0) - add t1, t1, a0 - sw t1, (t0) -end; - -# Prints the current token. -# -# Parameters: -# a0 - Token length. -# -# Returns a0 unchanged. -proc _write_token(); -begin - _write_s(source_code_position, v88); - return v88 -end; - -proc _compile_section(); -begin - # Print and skip the ".section" (8 characters) directive and a space after it. - _write_token(9); - _advance_token(); - - # Read the section name. - _read_token(); - addi a0, a0, 1 - - _write_token(); - _advance_token(); -end; - -# Prints and skips a line. -proc _skip_comment(); -begin - la t0, source_code_position - lw t1, (t0) - -.skip_comment_loop: - # Check for newline character. - lb t2, (t1) - li t3, '\n' - beq t2, t3, .skip_comment_end - - # Advance the input string by one byte. - addi t1, t1, 1 - sw t1, (t0) - - goto .skip_comment_loop; - -.skip_comment_end: - # Skip the newline. - addi t1, t1, 1 - sw t1, (t0) -end; - -# Prints and skips a line. -proc _compile_line(); -begin -.compile_line_loop: - la a0, source_code_position - lw a1, (a0) - - lb t0, (a1) - li t1, '\n' - beq t0, t1, .compile_line_end - - # Print a character. - lw a0, (a1) - _write_c(); - - # Advance the input string by one byte. - _advance_token(1); - - goto .compile_line_loop; - -.compile_line_end: - _write_c('\n'); - - _advance_token(1); -end; - -proc _compile_integer_literal(); -begin - _write_z("\tli t0, \0"); - - _read_token(); - _write_token(); - _advance_token(); - - _write_c('\n'); -end; - -proc _compile_character_literal(); -begin - _write_z("\tli t0, \0"); - - _write_c('\''); - _advance_token(1); - - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - li t1, '\\' - bne a0, t1, .compile_character_literal_end - - _write_c('\\'); - _advance_token(1); - -.compile_character_literal_end: - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - _write_c(); - - _write_c('\''); - _write_c('\n'); - - _advance_token(2); - -end; - -proc _compile_variable_expression(); -begin - _compile_designator(); - _write_z("\tlw t0, (t0)\n\0"); -end; - -proc _compile_address_expression(); -begin - # Skip the "@" sign. - _advance_token(1); - _compile_designator(); - -end; - -proc _compile_negate_expression(); -begin - # Skip the "-" sign. - _advance_token(1); - _compile_term(); - - _write_z("\tneg t0, t0\n\0"); -end; - -proc _compile_not_expression(); -begin - # Skip the "~" sign. - _advance_token(1); - _compile_term(); - - _write_z("\tnot t0, t0\n\0"); -end; - -proc _compile_string_literal(); -begin - _string_length(source_code_position); - sw a0, 0(sp) - - _add_string(source_code_position); - sw a0, 4(sp) - - _advance_token(v0 + 2); - _write_z("\tla t0, strings\n\0"); - - _write_z("\tli t1, \0"); - _write_i(v4); - _write_c('\n'); - - _write_z("\tadd t0, t0, t1\n\0"); -end; - -proc _compile_term(); -begin - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - sw a0, 0(sp) - - li t1, '\'' - beq a0, t1, .compile_term_character_literal - - li t1, '@' - beq a0, t1, .compile_term_address - - li t1, '-' - beq a0, t1, .compile_term_negation - - li t1, '~' - beq a0, t1, .compile_term_not - - li t1, '"' - beq a0, t1, .compile_term_string_literal - - _is_digit(v0); - bnez a0, .compile_term_integer_literal - - goto .compile_term_variable; - -.compile_term_character_literal: - _compile_character_literal(); - goto .compile_term_end; - -.compile_term_integer_literal: - _compile_integer_literal(); - goto .compile_term_end; - -.compile_term_address: - _compile_address_expression(); - goto .compile_term_end; - -.compile_term_negation: - _compile_negate_expression(); - goto .compile_term_end; - -.compile_term_not: - _compile_not_expression(); - goto .compile_term_end; - -.compile_term_string_literal: - _compile_string_literal(); - goto .compile_term_end; - -.compile_term_variable: - _compile_variable_expression(); - goto .compile_term_end; - -.compile_term_end: -end; - -proc _compile_binary_rhs(); -begin - # Skip the whitespace after the binary operator. - _advance_token(1); - _compile_term(); - - # Load the left expression from the stack; - _write_z("\tlw t1, 24(sp)\n\0"); -end; - -proc _compile_expression(); -begin - _compile_term(); - - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - - li t1, ' ' - bne a0, t1, .compile_expression_end - - # It is a binary expression. - - # Save the value of the left expression on the stack. - _write_z("sw t0, 24(sp)\n\0"); - - # Skip surrounding whitespace in front of the operator. - _advance_token(1); - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '+' - beq t0, t1, .compile_expression_add - - li t1, '*' - beq t0, t1, .compile_expression_mul - - li t1, '&' - beq t0, t1, .compile_expression_and - - li t1, 'o' - beq t0, t1, .compile_expression_or - - li t1, 'x' - beq t0, t1, .compile_expression_xor - - li t1, '=' - beq t0, t1, .compile_expression_equals - - li t1, '<' - beq t0, t1, .compile_expression_less - - li t1, '>' - beq t0, t1, .compile_expression_greater - - # Unknown binary operator. - unimp - -.compile_expression_add: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("add t0, t0, t1\n\0"); - - goto .compile_expression_end; - -.compile_expression_mul: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("\tmul t0, t0, t1\n\0"); - - goto .compile_expression_end; - -.compile_expression_and: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("\tand t0, t0, t1\n\0"); - - goto .compile_expression_end; - -.compile_expression_or: - _advance_token(2); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("or t0, t0, t1\n\0"); - - goto .compile_expression_end; - -.compile_expression_xor: - _advance_token(3); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("xor t0, t0, t1\n\0"); - - goto .compile_expression_end; - -.compile_expression_equals: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("xor t0, t0, t1\nseqz t0, t0\n\0"); - - goto .compile_expression_end; - -.compile_expression_less: - _advance_token(1); - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '>' - beq t0, t1, .compile_expression_not_equal - - li t1, '=' - beq t0, t1, .compile_expression_less_equal - - _compile_binary_rhs(); - - # Execute the operation. - _write_z("slt t0, t0, t1\n\0"); - - goto .compile_expression_end; - -.compile_expression_not_equal: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0"); - - goto .compile_expression_end; - -.compile_expression_less_equal: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0"); - - goto .compile_expression_end; - -.compile_expression_greater: - _advance_token(1); - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '=' - beq t0, t1, .compile_expression_greater_equal - - _compile_binary_rhs(); - - # Execute the operation. - _write_z("\tslt t0, t1, t0\n\0"); - - goto .compile_expression_end; - -.compile_expression_greater_equal: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0"); - - goto .compile_expression_end; - -.compile_expression_end: -end; - -proc _compile_call(); -begin - # Stack variables: - # v0 - Procedure name length. - # v4 - Procedure name pointer. - # v8 - Argument count. - - _read_token(); - sw a0, 0(sp) - v4 := source_code_position; - v8 := 0; - - # Skip the identifier and left paren. - _advance_token(v0 + 1); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, ')' - beq t0, t1, .compile_call_finalize - -.compile_call_loop: - _compile_expression(); - - # Save the argument on the stack. - _write_z("\tsw t0, \0"); - - # Calculate the stack offset: 116 - (4 * argument_counter) - v12 := v8 * 4; - v12 := 116 + -v12; - _write_i(v12); - - _write_z("(sp)\n\0"); - - # Add one to the argument counter. - v8 := v8 + 1; - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, ',' - bne t0, t1, .compile_call_finalize - - _advance_token(2); - goto .compile_call_loop; - -.compile_call_finalize: - # Load the argument from the stack. - - lw t0, 8(sp) - beqz t0, .compile_call_end - - # Decrement the argument counter. - v8 := v8 + -1; - - _write_z("\tlw a\0"); - _write_i(v8); - - _write_z(", \0"); - - # Calculate the stack offset: 116 - (4 * argument_counter) - v12 := v8 * 4; - v12 := 116 + -v12; - _write_i(v12); - - _write_z("(sp)\n\0"); - - goto .compile_call_finalize; - -.compile_call_end: - _write_z("\tcall \0"); - _write_s(v4, v0); - - # Skip the right paren. - _advance_token(1); -end; - -proc _compile_goto(); -begin - _advance_token(5); - - _read_token(); - sw a0, 0(sp) - - _write_z("\tj \0"); - - _write_token(v0); - _advance_token(); -end; - -proc _compile_local_designator(); -begin - # Skip "v" in the local variable name. - _advance_token(1); - _write_z("\t addi t0, sp, \0"); - - # Read local variable stack offset and save it. - _read_token(); - _write_token(); - _advance_token(); - _write_c('\n'); -end; - -proc _compile_global_designator(); -begin - _write_z("\tla t0, \0"); - - _read_token(); - _write_token(); - _advance_token(); - - _write_c('\n'); -end; - -proc _compile_designator(); -begin - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - - li t1, 'v' - beq a0, t1, .compile_designator_local - - goto .compile_designator_global; - -.compile_designator_local: - _compile_local_designator(); - goto .compile_designator_end; - -.compile_designator_global: - _compile_global_designator(); - goto .compile_designator_end; - -.compile_designator_end: -end; - -proc _compile_assignment(); -begin - _compile_designator(); - - # Save the assignee address on the stack. - _write_z("\tsw t0, 20(sp)\n\0"); - - # Skip the assignment sign (:=) with surrounding whitespaces. - _advance_token(4); - - # Compile the assignment. - _compile_expression(); - - _write_z("\tlw t1, 20(sp)\nsw t0, (t1)\n\0"); -end; - -proc _compile_return_statement(); -begin - # Skip "return" keyword and whitespace after it. - _advance_token(7); - _compile_expression(); - - _write_z("mv a0, t0\n\0"); -end; - -proc _compile_statement(); -begin - # This is a call if the statement starts with an underscore. - la t0, source_code_position - lw t0, (t0) - # First character after alignment tab. - addi t0, t0, 1 - lb t0, (t0) - - li t1, '_' - beq t0, t1, .compile_statement_call - - li t1, 'g' - beq t0, t1, .compile_statement_goto - - li t1, 'v' - beq t0, t1, .compile_statement_assignment - - # keyword_ret contains "\tret", so it's 4 bytes long. - _memcmp(source_code_position, "\treturn", 7); - beqz a0, .compile_statement_return - - _compile_line(); - goto .compile_statement_end; - -.compile_statement_call: - _advance_token(1); - _compile_call(); - - goto .compile_statement_semicolon; - -.compile_statement_goto: - _advance_token(1); - _compile_goto(); - - goto .compile_statement_semicolon; - -.compile_statement_assignment: - _advance_token(1); - _compile_assignment(); - - goto .compile_statement_semicolon; - -.compile_statement_return: - _advance_token(1); - _compile_return_statement(); - _write_c('\n'); - - goto .compile_statement_end; - -.compile_statement_semicolon: - _advance_token(2); - _write_c('\n'); - -.compile_statement_end: -end; - -proc _compile_procedure_body(); -begin -.compile_procedure_body_loop: - # 3 is "end" length. - _memcmp(source_code_position, "end", 3); - - beqz a0, .compile_procedure_body_epilogue - - _compile_statement(); - goto .compile_procedure_body_loop; - -.compile_procedure_body_epilogue: -end; - -# Writes a regster name to the standard output. -# -# Parameters: -# a0 - Register character. -# a1 - Register number. -proc _write_register(); -begin - _write_c(v88); - v84 := v84 + '0'; - _write_c(v84); -end; - -proc _compile_procedure_prologue(); -begin - _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); - v0 := 0; - -.compile_procedure_prologue_loop: - _write_z("\tsw a\0"); - _write_i(v0); - _write_z(", \0"); - - # Calculate the stack offset: 88 - (4 * parameter_counter) - v4 := v0 * 4; - v4 := 88 + -v4; - _write_i(v4); - - _write_z("(sp)\n\0"); - - v0 := v0 + 1; - lw a0, 0(sp) - - li t0, 8 - bne a0, t0, .compile_procedure_prologue_loop -end; - -proc _compile_procedure(); -begin - # Skip "proc ". - _advance_token(5); - - _read_token(); - sw a0, 0(sp) # Save the procedure name length. - - # Write .type _procedure_name, @function. - _write_z(".type \0"); - - _write_token(v0); - _write_z(", @function\n\0"); - - # Write procedure label, _procedure_name: - _write_token(v0); - _write_z(":\n\0"); - - # Skip the function name and trailing parens, semicolon, "begin" and newline. - _advance_token(v0 + 10); - - _compile_procedure_prologue(); - _compile_procedure_body(); - - # Write the epilogue. - _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); - - # Skip the "end" keyword, semicolon and newline. - _advance_token(5); -end; - -proc _skip_newlines(); -begin - # Skip newlines. - la t0, source_code_position - lw t1, (t0) - -.skip_newlines_loop: - lb t2, (t1) - li t3, '\n' - bne t2, t3, .skip_newlines_end - beqz t2, .skip_newlines_end - - addi t1, t1, 1 - sw t1, (t0) - - goto .skip_newlines_loop; - -.skip_newlines_end: -end; - -# Skip newlines and comments. -proc _skip_empty_lines(); -begin -.skip_empty_lines_loop: - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '#' - beq t0, t1, .skip_empty_lines_comment - - li t1, '\n' - beq t0, t1, .skip_empty_lines_newline - - goto .skip_empty_lines_end; - -.skip_empty_lines_comment: - _skip_comment(); - goto .skip_empty_lines_loop; - -.skip_empty_lines_newline: - _advance_token(1); - goto .skip_empty_lines_loop; - -.skip_empty_lines_end: -end; - -proc _compile_global_initializer(); -begin - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '"' - beq t0, t1, .compile_global_initializer_string - - li t1, 'S' - beq t0, t1, .compile_global_initializer_record - - li t1, '@' - beq t0, t1, .compile_global_initializer_pointer - - la a0, source_code_position - lw a0, (a0) - lb a0, (a0) - _is_digit(); - bnez a0, .compile_global_initializer_number - - unimp - -.compile_global_initializer_pointer: - # Skip @. - _advance_token(1); - _write_z("\n\t.word \0"); - _read_token(); - _write_token(); - _advance_token(); - - goto .compile_global_initializer_end; - -.compile_global_initializer_number: - _write_z("\n\t.word \0"); - _read_token(); - _write_token(); - _advance_token(1); - - goto .compile_global_initializer_end; - -.compile_global_initializer_record: - # Skip "S(". - _advance_token(2); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - li t1, ')' - beq t0, t1, .compile_global_initializer_closing - -.compile_global_initializer_loop: - _compile_global_initializer(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - li t1, ')' - beq t0, t1, .compile_global_initializer_closing - - # Skip comma and whitespace after it. - _advance_token(2); - - goto .compile_global_initializer_loop; - -.compile_global_initializer_closing: - # Skip ")" - _advance_token(1); - - goto .compile_global_initializer_end; - -.compile_global_initializer_string: - _write_z("\n\t.word strings + \0"); - _string_length(source_code_position); - sw a0, 4(sp) - - _add_string(source_code_position); - _write_i(); - - # Skip the quoted string. - _advance_token(v4 + 2); - - goto .compile_global_initializer_end; - -.compile_global_initializer_end: -end; - -proc _compile_constant_declaration(); -begin - _read_token(); - sw a0, 0(sp) - - _write_z(".type \0"); - _write_token(v0); - _write_z(", @object\n\0"); - - _write_token(v0); - _write_c(':'); - - # Skip the constant name with assignment sign and surrounding whitespaces. - _advance_token(v0 + 4); - _compile_global_initializer(); - # Skip semicolon and newline. - _advance_token(2); - _write_c('\n'); -end; - -proc _compile_const_part(); -begin - _skip_empty_lines(); - - _memcmp(source_code_position, "const\0", 5); - bnez a0, .compile_const_part_end - - # Skip "const" with the newline after it. - _advance_token(6); - _write_z(".section .rodata # Compiled from const section.\n\n\0"); - -.compile_const_part_loop: - _skip_empty_lines(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - # If the character at the line beginning is not indentation, - # it is probably the next code section. - li t1, '\t' - bne t0, t1, .compile_const_part_end - - _advance_token(1); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - li t1, '#' - beq t0, t1, .compile_const_part_loop - - _compile_constant_declaration(); - goto .compile_const_part_loop; - -.compile_const_part_end: -end; - -proc _compile_variable_declaration(); -begin - _read_token(); - sw a0, 0(sp) - - _write_z(".type \0"); - _write_token(v0); - _write_z(", @object\n\0"); - - _write_token(v0); - _write_c(':'); - - # Skip the variable name and colon with space before the type. - _advance_token(v0 + 2); - - # Skip the type name. - _read_token(); - _advance_token(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - li t1, ' ' - beq t0, t1, .compile_variable_declaration_initializer - - # Else we assume this is a zeroed 81920 bytes big array. - _write_z(" .zero 81920\0"); - goto .compile_variable_declaration_finalize; - -.compile_variable_declaration_initializer: - # Skip the assignment sign with surrounding whitespaces. - _advance_token(4); - _compile_global_initializer(); - goto .compile_variable_declaration_finalize; - -.compile_variable_declaration_finalize: - # Skip semicolon and newline. - _advance_token(2); - _write_c('\n'); -end; - -proc _compile_var_part(); -begin - _memcmp(source_code_position, "var\0", 3); - bnez a0, .compile_var_part_end - - # Skip "var" and newline. - _advance_token(4); - _write_z(".section .data\n\0"); - -.compile_var_part_loop: - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, 'p' - beq t0, t1, .compile_var_part_end - - li t1, '\t' - beq t0, t1, .compile_var_part_declaration - - _compile_line(); - goto .compile_var_part_loop; - -.compile_var_part_declaration: - _advance_token(1); - _compile_variable_declaration(); - goto .compile_var_part_loop; - -.compile_var_part_end: -end; - -# Process the source code and print the generated code. -proc _compile_module(); -begin - _compile_const_part(); - _write_z(".section .bss\n\0"); - -.compile_module_bss: - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - li t1, 'v' - beq t0, t1, .compile_module_code - - li t1, 'p' - beq t0, t1, .compile_module_code - - _compile_line(); - goto .compile_module_bss; - -.compile_module_code: - _compile_var_part(); - _write_z(".section .text\n\0"); -.compile_module_loop: - _skip_newlines(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - beqz t0, .compile_module_end - li t1, '#' - beq t0, t1, .compile_module_comment - - # 8 is ".section" length. - _memcmp(source_code_position, ".section", 8); - beqz a0, .compile_module_section - - # 5 is "proc " length. Space is needed to distinguish from "procedure". - _memcmp(source_code_position, "proc ", 5); - beqz a0, .compile_module_procedure - - # 6 is ".globl" length. - _memcmp(source_code_position, ".globl", 6); - beqz a0, .compile_module_global - - # Not a known token, exit. - goto .compile_module_end; - -.compile_module_section: - _compile_section(); - - goto .compile_module_loop; - -.compile_module_global: - _compile_line(); - - goto .compile_module_loop; - -.compile_module_comment: - _skip_comment(); - - goto .compile_module_loop; - -.compile_module_procedure: - _compile_procedure(); - - goto .compile_module_loop; - -.compile_module_end: -end; - -proc _compile(); -begin - _write_z(".globl _start\n\n\0"); - _compile_module(); - - _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); - _write_c('"'); - - la t0, compiler_strings - sw t0, 0(sp) - -.compile_loop: - lw t0, 0(sp) - la t1, compiler_strings_position - lw t1, (t1) - bge t0, t1, .compile_end - - lb a0, (t0) - - addi t0, t0, 1 - sw t0, 0(sp) - - _write_c(); - - j .compile_loop - -.compile_end: - _write_c('"'); - _write_c('\n'); -end; - -# Terminates the program. a0 contains the return code. -# -# Parameters: -# a0 - Status code. -proc _exit(); -begin - li a7, 93 # SYS_EXIT - ecall -end; - -# Entry point. -.globl _start -proc _start(); -begin - # Read the source from the standard input. - # Second argument is buffer size. Modifying update the source_code definition. - _read_file(@source_code, 81920); - _compile(); - - _exit(0); - -end; diff --git a/boot/stage7/cl.elna b/boot/stage7/cl.elna new file mode 100644 index 0000000..f83a8a5 --- /dev/null +++ b/boot/stage7/cl.elna @@ -0,0 +1,1488 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +# Stage 7 compiler. +# +# - Static global variable and constant initialization. +# - Objct sections are determined automatically. +# - _start is always exported. + +.section .bss + +# When modifiying also change the read size in the entry point procedure. +.type source_code, @object +source_code: .zero 81920 + +.type compiler_strings, @object +compiler_strings: .zero 8192 + +.section .data + +.type compiler_strings_position, @object +compiler_strings_position: .word compiler_strings + +.type compiler_strings_length, @object +compiler_strings_length: .word 0 + +.type source_code_position, @object +source_code_position: .word source_code + +.section .text + +# Calculates and returns the string token length between quotes, including the +# escaping slash characters. +# +# Parameters: +# a0 - String token pointer. +# +# Returns the length in a0. +proc _string_length(); +begin + # Reset the counter. + v0 := 0; + +.string_length_loop: + v88 := v88 + 1; + + lw t0, 88(sp) + lb t0, (t0) + + li t1, '"' + beq t0, t1, .string_length_end + + v0 := v0 + 1; + goto .string_length_loop; + +.string_length_end: + return v0 +end; + +# Adds a string to the global, read-only string storage. +# +# Parameters: +# a0 - String token. +# +# Returns the offset from the beginning of the storage to the new string in a0. +proc _add_string(); +begin + v0 := v88 + 1; + v4 := compiler_strings_length; + +.add_string_loop: + lw t0, 0(sp) + lb t1, (t0) + li t2, '"' + + beq t1, t2, .add_string_end + + la t2, compiler_strings_position + lw t3, (t2) + sb t1, (t3) + + addi t3, t3, 1 + sw t3, (t2) + + addi t0, t0, 1 + sw t0, 0(sp) + + li t2, '\\' + bne t1, t2, .add_string_increment + + goto .add_string_loop; + +.add_string_increment: + la t2, compiler_strings_length + lw t4, (t2) + addi t4, t4, 1 + sw t4, (t2) + + goto .add_string_loop; + +.add_string_end: + return v4 +end; + +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the amount of bytes written in a0. +proc _read_file(); +begin + mv a2, a1 + mv a1, a0 + # STDIN. + li a0, 0 + li a7, 63 # SYS_READ. + ecall +end; + +# Writes to the standard output. +# +# Parameters: +# a0 - Buffer. +# a1 - Buffer length. +proc _write_s(); +begin + mv a2, a1 + mv a1, a0 + # STDOUT. + li a0, 1 + li a7, 64 # SYS_WRITE. + ecall +end; + +# Writes a number to a string buffer. +# +# t0 - Local buffer. +# t1 - Constant 10. +# t2 - Current character. +# t3 - Whether the number is negative. +# +# Parameters: +# a0 - Whole number. +# a1 - Buffer pointer. +# +# Sets a0 to the length of the written number. +proc _print_i(); +begin + li t1, 10 + addi t0, s0, -9 + + li t3, 0 + bgez a0, .print_i_digit10 + li t3, 1 + neg a0, a0 + +.print_i_digit10: + rem t2, a0, t1 + addi t2, t2, '0' + sb t2, 0(t0) + div a0, a0, t1 + addi t0, t0, -1 + bne zero, a0, .print_i_digit10 + + beq zero, t3, .print_i_write_call + addi t2, zero, '-' + sb t2, 0(t0) + addi t0, t0, -1 + +.print_i_write_call: + mv a0, a1 + addi a1, t0, 1 + sub a2, s0, t0 + addi a2, a2, -9 + sw a2, 0(sp) + + _memcpy(); + + return v0 +end; + +# Writes a number to the standard output. +# +# Parameters: +# a0 - Whole number. +proc _write_i(); +begin + _print_i(v88, @v0); + + mv a1, a0 + addi a0, sp, 0 + _write_s(); + +end; + +# Writes a character from a0 into the standard output. +proc _write_c(); +begin + _write_s(@v88, 1); +end; + +# Write null terminated string. +# +# Parameters: +# a0 - String. +proc _write_z(); +begin +.write_z_loop: + # Check for 0 character. + lw a0, 88(sp) + lb a0, (a0) + beqz a0, .write_z_end + + # Print a character. + _write_c(); + + # Advance the input string by one byte. + v88 := v88 + 1; + + goto .write_z_loop; + +.write_z_end: +end; + +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_upper(); +begin + v0 := v88 >= 'A'; + v4 := v88 <= 'Z'; + + return v0 & v4 + +end; + +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_lower(); +begin + v0 := v88 >= 'a'; + v4 := v88 <= 'z'; + + return v0 & v4 + +end; + +# Detects if the passed character is a 7-bit alpha character or an underscore. +# +# Paramters: +# a0 - Tested character. +# +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +proc _is_alpha(); +begin + sw a0, 0(sp) + + _is_upper(); + sw a0, 4(sp) + + _is_lower(v0); + + lw t0, 0(sp) + xori t1, t0, '_' + seqz t1, t1 + + lw t0, 4(sp) + or a0, a0, t0 + or a0, a0, t1 +end; + +# Detects whether the passed character is a digit +# (a value between 0 and 9). +# +# Parameters: +# a0 - Exemined value. +# +# Sets a0 to 1 if it is a digit, to 0 otherwise. +proc _is_digit(); +begin + v0 := v88 >= '0'; + v4 := v88 <= '9'; + + return v0 & v4 +end; + +proc _is_alnum(); +begin + sw a0, 4(sp) + + _is_alpha(); + sw a0, 0(sp) + + _is_digit(v4); + + lw a1, 0(sp) + or a0, a0, a1 +end; + +# Reads the next token. +# +# Returns token length in a0. +proc _read_token(); +begin + # Current token position. + v0 := source_code_position; + # Token length. + v4 := 0; + +.read_token_loop: + lw t0, 0(sp) + lb t0, (t0) # Current character. + + # First we try to read a derictive. + # A derictive can contain a dot and characters. + li t1, '.' + beq t0, t1, .read_token_next + + lw a0, 0(sp) + lb a0, (a0) + _is_alnum(); + bnez a0, .read_token_next + + goto .read_token_end; + +.read_token_next: + # Advance the source code position and token length. + v4 := v4 + 1; + v0 := v0 + 1; + + goto .read_token_loop; + +.read_token_end: + return v4 +end; + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +proc _memcmp(); +begin + mv t0, a0 + li a0, 0 + +.memcmp_loop: + beqz a2, .memcmp_end + + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 + + bnez a0, .memcmp_end + + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcmp_loop; + +.memcmp_end: +end; + +# Copies memory. +# +# Parameters: +# a0 - Destination. +# a1 - Source. +# a2 - Size. +# +# Preserves a0. +proc _memcpy(); +begin + mv t0, a0 + +.memcpy_loop: + beqz a2, .memcpy_end + + lbu t1, (a1) + sb t1, (a0) + + addi a0, a0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcpy_loop + +.memcpy_end: + mv a0, t0 +end; + +# Advances the token stream by a0 bytes. +proc _advance_token(); +begin + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 + sw t1, (t0) +end; + +# Prints the current token. +# +# Parameters: +# a0 - Token length. +# +# Returns a0 unchanged. +proc _write_token(); +begin + _write_s(source_code_position, v88); + return v88 +end; + +proc _compile_section(); +begin + # Print and skip the ".section" (8 characters) directive and a space after it. + _write_token(9); + _advance_token(); + + # Read the section name. + _read_token(); + addi a0, a0, 1 + + _write_token(); + _advance_token(); +end; + +# Prints and skips a line. +proc _skip_comment(); +begin + la t0, source_code_position + lw t1, (t0) + +.skip_comment_loop: + # Check for newline character. + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end + + # Advance the input string by one byte. + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_comment_loop; + +.skip_comment_end: + # Skip the newline. + addi t1, t1, 1 + sw t1, (t0) +end; + +# Prints and skips a line. +proc _compile_line(); +begin +.compile_line_loop: + la a0, source_code_position + lw a1, (a0) + + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end + + # Print a character. + lw a0, (a1) + _write_c(); + + # Advance the input string by one byte. + _advance_token(1); + + goto .compile_line_loop; + +.compile_line_end: + _write_c('\n'); + + _advance_token(1); +end; + +proc _compile_integer_literal(); +begin + _write_z("\tli t0, \0"); + + _read_token(); + _write_token(); + _advance_token(); + + _write_c('\n'); +end; + +proc _compile_character_literal(); +begin + _write_z("\tli t0, \0"); + + _write_c('\''); + _advance_token(1); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + li t1, '\\' + bne a0, t1, .compile_character_literal_end + + _write_c('\\'); + _advance_token(1); + +.compile_character_literal_end: + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + _write_c(); + + _write_c('\''); + _write_c('\n'); + + _advance_token(2); + +end; + +proc _compile_variable_expression(); +begin + _compile_designator(); + _write_z("\tlw t0, (t0)\n\0"); +end; + +proc _compile_address_expression(); +begin + # Skip the "@" sign. + _advance_token(1); + _compile_designator(); + +end; + +proc _compile_negate_expression(); +begin + # Skip the "-" sign. + _advance_token(1); + _compile_term(); + + _write_z("\tneg t0, t0\n\0"); +end; + +proc _compile_not_expression(); +begin + # Skip the "~" sign. + _advance_token(1); + _compile_term(); + + _write_z("\tnot t0, t0\n\0"); +end; + +proc _compile_string_literal(); +begin + _string_length(source_code_position); + sw a0, 0(sp) + + _add_string(source_code_position); + sw a0, 4(sp) + + _advance_token(v0 + 2); + _write_z("\tla t0, strings\n\0"); + + _write_z("\tli t1, \0"); + _write_i(v4); + _write_c('\n'); + + _write_z("\tadd t0, t0, t1\n\0"); +end; + +proc _compile_term(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + sw a0, 0(sp) + + li t1, '\'' + beq a0, t1, .compile_term_character_literal + + li t1, '@' + beq a0, t1, .compile_term_address + + li t1, '-' + beq a0, t1, .compile_term_negation + + li t1, '~' + beq a0, t1, .compile_term_not + + li t1, '"' + beq a0, t1, .compile_term_string_literal + + _is_digit(v0); + bnez a0, .compile_term_integer_literal + + goto .compile_term_variable; + +.compile_term_character_literal: + _compile_character_literal(); + goto .compile_term_end; + +.compile_term_integer_literal: + _compile_integer_literal(); + goto .compile_term_end; + +.compile_term_address: + _compile_address_expression(); + goto .compile_term_end; + +.compile_term_negation: + _compile_negate_expression(); + goto .compile_term_end; + +.compile_term_not: + _compile_not_expression(); + goto .compile_term_end; + +.compile_term_string_literal: + _compile_string_literal(); + goto .compile_term_end; + +.compile_term_variable: + _compile_variable_expression(); + goto .compile_term_end; + +.compile_term_end: +end; + +proc _compile_binary_rhs(); +begin + # Skip the whitespace after the binary operator. + _advance_token(1); + _compile_term(); + + # Load the left expression from the stack; + _write_z("\tlw t1, 24(sp)\n\0"); +end; + +proc _compile_expression(); +begin + _compile_term(); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, ' ' + bne a0, t1, .compile_expression_end + + # It is a binary expression. + + # Save the value of the left expression on the stack. + _write_z("sw t0, 24(sp)\n\0"); + + # Skip surrounding whitespace in front of the operator. + _advance_token(1); + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '+' + beq t0, t1, .compile_expression_add + + li t1, '*' + beq t0, t1, .compile_expression_mul + + li t1, '&' + beq t0, t1, .compile_expression_and + + li t1, 'o' + beq t0, t1, .compile_expression_or + + li t1, 'x' + beq t0, t1, .compile_expression_xor + + li t1, '=' + beq t0, t1, .compile_expression_equals + + li t1, '<' + beq t0, t1, .compile_expression_less + + li t1, '>' + beq t0, t1, .compile_expression_greater + + # Unknown binary operator. + unimp + +.compile_expression_add: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("add t0, t0, t1\n\0"); + + goto .compile_expression_end; + +.compile_expression_mul: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("\tmul t0, t0, t1\n\0"); + + goto .compile_expression_end; + +.compile_expression_and: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("\tand t0, t0, t1\n\0"); + + goto .compile_expression_end; + +.compile_expression_or: + _advance_token(2); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("or t0, t0, t1\n\0"); + + goto .compile_expression_end; + +.compile_expression_xor: + _advance_token(3); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("xor t0, t0, t1\n\0"); + + goto .compile_expression_end; + +.compile_expression_equals: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("xor t0, t0, t1\nseqz t0, t0\n\0"); + + goto .compile_expression_end; + +.compile_expression_less: + _advance_token(1); + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '>' + beq t0, t1, .compile_expression_not_equal + + li t1, '=' + beq t0, t1, .compile_expression_less_equal + + _compile_binary_rhs(); + + # Execute the operation. + _write_z("slt t0, t0, t1\n\0"); + + goto .compile_expression_end; + +.compile_expression_not_equal: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0"); + + goto .compile_expression_end; + +.compile_expression_less_equal: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0"); + + goto .compile_expression_end; + +.compile_expression_greater: + _advance_token(1); + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '=' + beq t0, t1, .compile_expression_greater_equal + + _compile_binary_rhs(); + + # Execute the operation. + _write_z("\tslt t0, t1, t0\n\0"); + + goto .compile_expression_end; + +.compile_expression_greater_equal: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0"); + + goto .compile_expression_end; + +.compile_expression_end: +end; + +proc _compile_call(); +begin + # Stack variables: + # v0 - Procedure name length. + # v4 - Procedure name pointer. + # v8 - Argument count. + + _read_token(); + sw a0, 0(sp) + v4 := source_code_position; + v8 := 0; + + # Skip the identifier and left paren. + _advance_token(v0 + 1); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ')' + beq t0, t1, .compile_call_finalize + +.compile_call_loop: + _compile_expression(); + + # Save the argument on the stack. + _write_z("\tsw t0, \0"); + + # Calculate the stack offset: 116 - (4 * argument_counter) + v12 := v8 * 4; + v12 := 116 + -v12; + _write_i(v12); + + _write_z("(sp)\n\0"); + + # Add one to the argument counter. + v8 := v8 + 1; + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ',' + bne t0, t1, .compile_call_finalize + + _advance_token(2); + goto .compile_call_loop; + +.compile_call_finalize: + # Load the argument from the stack. + + lw t0, 8(sp) + beqz t0, .compile_call_end + + # Decrement the argument counter. + v8 := v8 + -1; + + _write_z("\tlw a\0"); + _write_i(v8); + + _write_z(", \0"); + + # Calculate the stack offset: 116 - (4 * argument_counter) + v12 := v8 * 4; + v12 := 116 + -v12; + _write_i(v12); + + _write_z("(sp)\n\0"); + + goto .compile_call_finalize; + +.compile_call_end: + _write_z("\tcall \0"); + _write_s(v4, v0); + + # Skip the right paren. + _advance_token(1); +end; + +proc _compile_goto(); +begin + _advance_token(5); + + _read_token(); + sw a0, 0(sp) + + _write_z("\tj \0"); + + _write_token(v0); + _advance_token(); +end; + +proc _compile_local_designator(); +begin + # Skip "v" in the local variable name. + _advance_token(1); + _write_z("\t addi t0, sp, \0"); + + # Read local variable stack offset and save it. + _read_token(); + _write_token(); + _advance_token(); + _write_c('\n'); +end; + +proc _compile_global_designator(); +begin + _write_z("\tla t0, \0"); + + _read_token(); + _write_token(); + _advance_token(); + + _write_c('\n'); +end; + +proc _compile_designator(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, 'v' + beq a0, t1, .compile_designator_local + + goto .compile_designator_global; + +.compile_designator_local: + _compile_local_designator(); + goto .compile_designator_end; + +.compile_designator_global: + _compile_global_designator(); + goto .compile_designator_end; + +.compile_designator_end: +end; + +proc _compile_assignment(); +begin + _compile_designator(); + + # Save the assignee address on the stack. + _write_z("\tsw t0, 20(sp)\n\0"); + + # Skip the assignment sign (:=) with surrounding whitespaces. + _advance_token(4); + + # Compile the assignment. + _compile_expression(); + + _write_z("\tlw t1, 20(sp)\nsw t0, (t1)\n\0"); +end; + +proc _compile_return_statement(); +begin + # Skip "return" keyword and whitespace after it. + _advance_token(7); + _compile_expression(); + + _write_z("mv a0, t0\n\0"); +end; + +proc _compile_statement(); +begin + # This is a call if the statement starts with an underscore. + la t0, source_code_position + lw t0, (t0) + # First character after alignment tab. + addi t0, t0, 1 + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call + + li t1, 'g' + beq t0, t1, .compile_statement_goto + + li t1, 'v' + beq t0, t1, .compile_statement_assignment + + # keyword_ret contains "\tret", so it's 4 bytes long. + _memcmp(source_code_position, "\treturn", 7); + beqz a0, .compile_statement_return + + _compile_line(); + goto .compile_statement_end; + +.compile_statement_call: + _advance_token(1); + _compile_call(); + + goto .compile_statement_semicolon; + +.compile_statement_goto: + _advance_token(1); + _compile_goto(); + + goto .compile_statement_semicolon; + +.compile_statement_assignment: + _advance_token(1); + _compile_assignment(); + + goto .compile_statement_semicolon; + +.compile_statement_return: + _advance_token(1); + _compile_return_statement(); + _write_c('\n'); + + goto .compile_statement_end; + +.compile_statement_semicolon: + _advance_token(2); + _write_c('\n'); + +.compile_statement_end: +end; + +proc _compile_procedure_body(); +begin +.compile_procedure_body_loop: + # 3 is "end" length. + _memcmp(source_code_position, "end", 3); + + beqz a0, .compile_procedure_body_epilogue + + _compile_statement(); + goto .compile_procedure_body_loop; + +.compile_procedure_body_epilogue: +end; + +# Writes a regster name to the standard output. +# +# Parameters: +# a0 - Register character. +# a1 - Register number. +proc _write_register(); +begin + _write_c(v88); + v84 := v84 + '0'; + _write_c(v84); +end; + +proc _compile_procedure_prologue(); +begin + _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); + v0 := 0; + +.compile_procedure_prologue_loop: + _write_z("\tsw a\0"); + _write_i(v0); + _write_z(", \0"); + + # Calculate the stack offset: 88 - (4 * parameter_counter) + v4 := v0 * 4; + v4 := 88 + -v4; + _write_i(v4); + + _write_z("(sp)\n\0"); + + v0 := v0 + 1; + lw a0, 0(sp) + + li t0, 8 + bne a0, t0, .compile_procedure_prologue_loop +end; + +proc _compile_procedure(); +begin + # Skip "proc ". + _advance_token(5); + + _read_token(); + sw a0, 0(sp) # Save the procedure name length. + + # Write .type _procedure_name, @function. + _write_z(".type \0"); + + _write_token(v0); + _write_z(", @function\n\0"); + + # Write procedure label, _procedure_name: + _write_token(v0); + _write_z(":\n\0"); + + # Skip the function name and trailing parens, semicolon, "begin" and newline. + _advance_token(v0 + 10); + + _compile_procedure_prologue(); + _compile_procedure_body(); + + # Write the epilogue. + _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); + + # Skip the "end" keyword, semicolon and newline. + _advance_token(5); +end; + +proc _skip_newlines(); +begin + # Skip newlines. + la t0, source_code_position + lw t1, (t0) + +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end + + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_newlines_loop; + +.skip_newlines_end: +end; + +# Skip newlines and comments. +proc _skip_empty_lines(); +begin +.skip_empty_lines_loop: + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '#' + beq t0, t1, .skip_empty_lines_comment + + li t1, '\n' + beq t0, t1, .skip_empty_lines_newline + + goto .skip_empty_lines_end; + +.skip_empty_lines_comment: + _skip_comment(); + goto .skip_empty_lines_loop; + +.skip_empty_lines_newline: + _advance_token(1); + goto .skip_empty_lines_loop; + +.skip_empty_lines_end: +end; + +proc _compile_global_initializer(); +begin + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '"' + beq t0, t1, .compile_global_initializer_string + + li t1, 'S' + beq t0, t1, .compile_global_initializer_record + + li t1, '@' + beq t0, t1, .compile_global_initializer_pointer + + la a0, source_code_position + lw a0, (a0) + lb a0, (a0) + _is_digit(); + bnez a0, .compile_global_initializer_number + + unimp + +.compile_global_initializer_pointer: + # Skip @. + _advance_token(1); + _write_z("\n\t.word \0"); + _read_token(); + _write_token(); + _advance_token(); + + goto .compile_global_initializer_end; + +.compile_global_initializer_number: + _write_z("\n\t.word \0"); + _read_token(); + _write_token(); + _advance_token(1); + + goto .compile_global_initializer_end; + +.compile_global_initializer_record: + # Skip "S(". + _advance_token(2); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, ')' + beq t0, t1, .compile_global_initializer_closing + +.compile_global_initializer_loop: + _compile_global_initializer(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, ')' + beq t0, t1, .compile_global_initializer_closing + + # Skip comma and whitespace after it. + _advance_token(2); + + goto .compile_global_initializer_loop; + +.compile_global_initializer_closing: + # Skip ")" + _advance_token(1); + + goto .compile_global_initializer_end; + +.compile_global_initializer_string: + _write_z("\n\t.word strings + \0"); + _string_length(source_code_position); + sw a0, 4(sp) + + _add_string(source_code_position); + _write_i(); + + # Skip the quoted string. + _advance_token(v4 + 2); + + goto .compile_global_initializer_end; + +.compile_global_initializer_end: +end; + +proc _compile_constant_declaration(); +begin + _read_token(); + sw a0, 0(sp) + + _write_z(".type \0"); + _write_token(v0); + _write_z(", @object\n\0"); + + _write_token(v0); + _write_c(':'); + + # Skip the constant name with assignment sign and surrounding whitespaces. + _advance_token(v0 + 4); + _compile_global_initializer(); + # Skip semicolon and newline. + _advance_token(2); + _write_c('\n'); +end; + +proc _compile_const_part(); +begin + _skip_empty_lines(); + + _memcmp(source_code_position, "const\0", 5); + bnez a0, .compile_const_part_end + + # Skip "const" with the newline after it. + _advance_token(6); + _write_z(".section .rodata # Compiled from const section.\n\n\0"); + +.compile_const_part_loop: + _skip_empty_lines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + # If the character at the line beginning is not indentation, + # it is probably the next code section. + li t1, '\t' + bne t0, t1, .compile_const_part_end + + _advance_token(1); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, '#' + beq t0, t1, .compile_const_part_loop + + _compile_constant_declaration(); + goto .compile_const_part_loop; + +.compile_const_part_end: +end; + +proc _compile_variable_declaration(); +begin + _read_token(); + sw a0, 0(sp) + + _write_z(".type \0"); + _write_token(v0); + _write_z(", @object\n\0"); + + _write_token(v0); + _write_c(':'); + + # Skip the variable name and colon with space before the type. + _advance_token(v0 + 2); + + # Skip the type name. + _read_token(); + _advance_token(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, ' ' + beq t0, t1, .compile_variable_declaration_initializer + + # Else we assume this is a zeroed 81920 bytes big array. + _write_z(" .zero 81920\0"); + goto .compile_variable_declaration_finalize; + +.compile_variable_declaration_initializer: + # Skip the assignment sign with surrounding whitespaces. + _advance_token(4); + _compile_global_initializer(); + goto .compile_variable_declaration_finalize; + +.compile_variable_declaration_finalize: + # Skip semicolon and newline. + _advance_token(2); + _write_c('\n'); +end; + +proc _compile_var_part(); +begin + _memcmp(source_code_position, "var\0", 3); + bnez a0, .compile_var_part_end + + # Skip "var" and newline. + _advance_token(4); + _write_z(".section .data\n\0"); + +.compile_var_part_loop: + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, 'p' + beq t0, t1, .compile_var_part_end + + li t1, '\t' + beq t0, t1, .compile_var_part_declaration + + _compile_line(); + goto .compile_var_part_loop; + +.compile_var_part_declaration: + _advance_token(1); + _compile_variable_declaration(); + goto .compile_var_part_loop; + +.compile_var_part_end: +end; + +# Process the source code and print the generated code. +proc _compile_module(); +begin + _compile_const_part(); + _write_z(".section .bss\n\0"); + +.compile_module_bss: + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, 'v' + beq t0, t1, .compile_module_code + + li t1, 'p' + beq t0, t1, .compile_module_code + + _compile_line(); + goto .compile_module_bss; + +.compile_module_code: + _compile_var_part(); + _write_z(".section .text\n\0"); +.compile_module_loop: + _skip_newlines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_module_end + li t1, '#' + beq t0, t1, .compile_module_comment + + # 8 is ".section" length. + _memcmp(source_code_position, ".section", 8); + beqz a0, .compile_module_section + + # 5 is "proc " length. Space is needed to distinguish from "procedure". + _memcmp(source_code_position, "proc ", 5); + beqz a0, .compile_module_procedure + + # 6 is ".globl" length. + _memcmp(source_code_position, ".globl", 6); + beqz a0, .compile_module_global + + # Not a known token, exit. + goto .compile_module_end; + +.compile_module_section: + _compile_section(); + + goto .compile_module_loop; + +.compile_module_global: + _compile_line(); + + goto .compile_module_loop; + +.compile_module_comment: + _skip_comment(); + + goto .compile_module_loop; + +.compile_module_procedure: + _compile_procedure(); + + goto .compile_module_loop; + +.compile_module_end: +end; + +proc _compile(); +begin + _write_z(".globl _start\n\n\0"); + _compile_module(); + + _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); + _write_c('"'); + + la t0, compiler_strings + sw t0, 0(sp) + +.compile_loop: + lw t0, 0(sp) + la t1, compiler_strings_position + lw t1, (t1) + bge t0, t1, .compile_end + + lb a0, (t0) + + addi t0, t0, 1 + sw t0, 0(sp) + + _write_c(); + + j .compile_loop + +.compile_end: + _write_c('"'); + _write_c('\n'); +end; + +# Terminates the program. a0 contains the return code. +# +# Parameters: +# a0 - Status code. +proc _exit(); +begin + li a7, 93 # SYS_EXIT + ecall +end; + +# Entry point. +.globl _start +proc _start(); +begin + # Read the source from the standard input. + # Second argument is buffer size. Modifying update the source_code definition. + _read_file(@source_code, 81920); + _compile(); + + _exit(0); + +end; diff --git a/boot/stage8.elna b/boot/stage8.elna deleted file mode 100644 index 75c24d7..0000000 --- a/boot/stage8.elna +++ /dev/null @@ -1,1989 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public License, -# v. 2.0. If a copy of the MPL was not distributed with this file, You can -# obtain one at https://mozilla.org/MPL/2.0/. - -# Stage 8 compiler. -# -# - Procedure calls in expressions. -# - Comments between (* and *) are supported. These are still single line -# comments and they should be on a separate line. -# - _syscall builtin. _syscall takes 7 arguments, -# the 7th argument gets stored in a7 before invoking ecall. -# Other arguments are saved in a0 through a5. -# - New intrinsics: _load_byte, _load_word, _store_byte, _store_word. -const - symbol_builtin_name_int := "Int"; - symbol_builtin_name_word := "Word"; - symbol_builtin_name_pointer := "Pointer"; - symbol_builtin_name_char := "Char"; - symbol_builtin_name_bool := "Bool"; - - # Every type info starts with a word describing what type it is. - # - # PRIMITIVE_TYPE = 1 - # - # Primitive types have only type size. - symbol_builtin_type_int := S(1, 4); - symbol_builtin_type_word := S(1, 4); - symbol_builtin_type_pointer := S(1, 4); - symbol_builtin_type_char := S(1, 1); - symbol_builtin_type_bool := S(1, 1); - - # Info objects start with a word describing its type. - # - # INFO_TYPE = 1 - # - # Type info has the type it belongs to. - symbol_type_info_int := S(1, @symbol_builtin_type_int); - symbol_type_info_word := S(1, @symbol_builtin_type_word); - symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); - symbol_type_info_char := S(1, @symbol_builtin_type_char); - symbol_type_info_bool := S(1, @symbol_builtin_type_bool); - -var - source_code: Array; - compiler_strings: Array; - symbol_table_global: Array; - symbol_table_local: Array; - classification: Array; - - compiler_strings_position: Pointer := @compiler_strings; - compiler_strings_length: Word := 0; - source_code_position: Pointer := @source_code; - -# Calculates and returns the string token length between quotes, including the -# escaping slash characters. -# -# Parameters: -# a0 - String token pointer. -# -# Returns the length in a0. -proc _string_length(); -begin - # Reset the counter. - v0 := 0; - -.string_length_loop: - v88 := v88 + 1; - - lw t0, 88(sp) - lb t0, (t0) - - li t1, '"' - beq t0, t1, .string_length_end - - v0 := v0 + 1; - goto .string_length_loop; - -.string_length_end: - return v0 -end; - -# Adds a string to the global, read-only string storage. -# -# Parameters: -# a0 - String token. -# -# Returns the offset from the beginning of the storage to the new string in a0. -proc _add_string(); -begin - v0 := v88 + 1; - v4 := compiler_strings_length; - -.add_string_loop: - lw t0, 0(sp) - lb t1, (t0) - li t2, '"' - - beq t1, t2, .add_string_end - - la t2, compiler_strings_position - lw t3, (t2) - sb t1, (t3) - - addi t3, t3, 1 - sw t3, (t2) - - addi t0, t0, 1 - sw t0, 0(sp) - - li t2, '\\' - bne t1, t2, .add_string_increment - - goto .add_string_loop; - -.add_string_increment: - la t2, compiler_strings_length - lw t4, (t2) - addi t4, t4, 1 - sw t4, (t2) - - goto .add_string_loop; - -.add_string_end: - return v4 -end; - -# Reads standard input into a buffer. -# a0 - Buffer pointer. -# a1 - Buffer size. -# -# Returns the amount of bytes written in a0. -proc _read_file(); -begin - mv a2, a1 - mv a1, a0 - # STDIN. - li a0, 0 - # SYS_READ. - li a7, 63 - ecall -end; - -# Writes to the standard output. -# -# Parameters: -# a0 - Buffer. -# a1 - Buffer length. -proc _write_s(); -begin - mv a2, a1 - mv a1, a0 - # STDOUT. - li a0, 1 - # SYS_WRITE. - li a7, 64 - ecall -end; - -# Writes a number to a string buffer. -# -# t0 - Local buffer. -# t1 - Constant 10. -# t2 - Current character. -# t3 - Whether the number is negative. -# -# Parameters: -# a0 - Whole number. -# a1 - Buffer pointer. -# -# Sets a0 to the length of the written number. -proc _print_i(); -begin - li t1, 10 - addi t0, s0, -9 - - li t3, 0 - bgez a0, .print_i_digit10 - li t3, 1 - neg a0, a0 - -.print_i_digit10: - rem t2, a0, t1 - addi t2, t2, '0' - sb t2, 0(t0) - div a0, a0, t1 - addi t0, t0, -1 - bne zero, a0, .print_i_digit10 - - beq zero, t3, .print_i_write_call - addi t2, zero, '-' - sb t2, 0(t0) - addi t0, t0, -1 - -.print_i_write_call: - mv a0, a1 - addi a1, t0, 1 - sub a2, s0, t0 - addi a2, a2, -9 - sw a2, 0(sp) - - _memcpy(); - - return v0 -end; - -# Writes a number to the standard output. -# -# Parameters: -# a0 - Whole number. -proc _write_i(); -begin - _print_i(v88, @v0); - - mv a1, a0 - addi a0, sp, 0 - _write_s(); - -end; - -# Writes a character from a0 into the standard output. -proc _write_c(); -begin - _write_s(@v88, 1); -end; - -# Write null terminated string. -# -# Parameters: -# a0 - String. -proc _write_z(); -begin -.write_z_loop: - # Check for 0 character. - lw a0, 88(sp) - lb a0, (a0) - beqz a0, .write_z_end - - # Print a character. - _write_c(); - - # Advance the input string by one byte. - v88 := v88 + 1; - - goto .write_z_loop; - -.write_z_end: -end; - -# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. -proc _is_upper(); -begin - v0 := v88 >= 'A'; - v4 := v88 <= 'Z'; - - return v0 & v4 - -end; - -# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. -proc _is_lower(); -begin - v0 := v88 >= 'a'; - v4 := v88 <= 'z'; - - return v0 & v4 - -end; - -# Detects if the passed character is a 7-bit alpha character or an underscore. -# -# Paramters: -# a0 - Tested character. -# -# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. -proc _is_alpha(); -begin - sw a0, 0(sp) - - _is_upper(); - sw a0, 4(sp) - - _is_lower(v0); - - lw t0, 0(sp) - xori t1, t0, '_' - seqz t1, t1 - - lw t0, 4(sp) - or a0, a0, t0 - or a0, a0, t1 -end; - -# Detects whether the passed character is a digit -# (a value between 0 and 9). -# -# Parameters: -# a0 - Exemined value. -# -# Sets a0 to 1 if it is a digit, to 0 otherwise. -proc _is_digit(); -begin - v0 := v88 >= '0'; - v4 := v88 <= '9'; - - return v0 & v4 -end; - -proc _is_alnum(); -begin - sw a0, 4(sp) - - _is_alpha(); - sw a0, 0(sp) - - _is_digit(v4); - - lw a1, 0(sp) - or a0, a0, a1 -end; - -# Reads the next token. -# -# Returns token length in a0. -proc _read_token(); -begin - # Current token position. - v0 := source_code_position; - # Token length. - v4 := 0; - -.read_token_loop: - lw t0, 0(sp) - # Current character. - lb t0, (t0) - - # First we try to read a derictive. - # A derictive can contain a dot and characters. - li t1, '.' - beq t0, t1, .read_token_next - - lw a0, 0(sp) - lb a0, (a0) - _is_alnum(); - bnez a0, .read_token_next - - goto .read_token_end; - -.read_token_next: - # Advance the source code position and token length. - v4 := v4 + 1; - v0 := v0 + 1; - - goto .read_token_loop; - -.read_token_end: - return v4 -end; - -# a0 - First pointer. -# a1 - Second pointer. -# a2 - The length to compare. -# -# Returns 0 in a0 if memory regions are equal. -proc _memcmp(); -begin - mv t0, a0 - li a0, 0 - -.memcmp_loop: - beqz a2, .memcmp_end - - lbu t1, (t0) - lbu t2, (a1) - sub a0, t1, t2 - - bnez a0, .memcmp_end - - addi t0, t0, 1 - addi a1, a1, 1 - addi a2, a2, -1 - - goto .memcmp_loop; - -.memcmp_end: -end; - -# Copies memory. -# -# Parameters: -# a0 - Destination. -# a1 - Source. -# a2 - Size. -# -# Preserves a0. -proc _memcpy(); -begin - mv t0, a0 - -.memcpy_loop: - beqz a2, .memcpy_end - - lbu t1, (a1) - sb t1, (a0) - - addi a0, a0, 1 - addi a1, a1, 1 - addi a2, a2, -1 - - goto .memcpy_loop - -.memcpy_end: - mv a0, t0 -end; - -# Advances the token stream by a0 bytes. -proc _advance_token(); -begin - la t0, source_code_position - lw t1, (t0) - add t1, t1, a0 - sw t1, (t0) -end; - -# Prints the current token. -# -# Parameters: -# a0 - Token length. -# -# Returns a0 unchanged. -proc _write_token(); -begin - _write_s(source_code_position, v88); - return v88 -end; - -# Prints and skips a line. -proc _compile_line(); -begin -.compile_line_loop: - la a0, source_code_position - lw a1, (a0) - - lb t0, (a1) - li t1, '\n' - beq t0, t1, .compile_line_end - - # Print a character. - lw a0, (a1) - _write_c(); - - # Advance the input string by one byte. - _advance_token(1); - - goto .compile_line_loop; - -.compile_line_end: - _write_c('\n'); - - _advance_token(1); -end; - -proc _compile_integer_literal(); -begin - _write_z("\tli t0, \0"); - - _read_token(); - _write_token(); - _advance_token(); - - _write_c('\n'); -end; - -proc _compile_character_literal(); -begin - _write_z("\tli t0, \0"); - - _write_c('\''); - _advance_token(1); - - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - li t1, '\\' - bne a0, t1, .compile_character_literal_end - - _write_c('\\'); - _advance_token(1); - -.compile_character_literal_end: - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - _write_c(); - - _write_c('\''); - _write_c('\n'); - - _advance_token(2); - -end; - -proc _compile_variable_expression(); -begin - _compile_designator(); - _write_z("\tlw t0, (t0)\n\0"); -end; - -proc _compile_address_expression(); -begin - # Skip the "@" sign. - _advance_token(1); - _compile_designator(); - -end; - -proc _compile_negate_expression(); -begin - # Skip the "-" sign. - _advance_token(1); - _compile_term(); - - _write_z("\tneg t0, t0\n\0"); -end; - -proc _compile_not_expression(); -begin - # Skip the "~" sign. - _advance_token(1); - _compile_term(); - - _write_z("\tnot t0, t0\n\0"); -end; - -proc _compile_string_literal(); -begin - _string_length(source_code_position); - sw a0, 0(sp) - - _add_string(source_code_position); - sw a0, 4(sp) - - _advance_token(v0 + 2); - _write_z("\tla t0, strings\n\0"); - - _write_z("\tli t1, \0"); - _write_i(v4); - _write_c('\n'); - - _write_z("\tadd t0, t0, t1\n\0"); -end; - -proc _compile_term(); -begin - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - sw a0, 0(sp) - - li t1, '\'' - beq a0, t1, .compile_term_character_literal - - li t1, '@' - beq a0, t1, .compile_term_address - - li t1, '-' - beq a0, t1, .compile_term_negation - - li t1, '~' - beq a0, t1, .compile_term_not - - li t1, '"' - beq a0, t1, .compile_term_string_literal - - li t1, '_' - beq a0, t1, .compile_term_call - - _is_digit(v0); - bnez a0, .compile_term_integer_literal - - goto .compile_term_variable; - -.compile_term_character_literal: - _compile_character_literal(); - goto .compile_term_end; - -.compile_term_integer_literal: - _compile_integer_literal(); - goto .compile_term_end; - -.compile_term_address: - _compile_address_expression(); - goto .compile_term_end; - -.compile_term_negation: - _compile_negate_expression(); - goto .compile_term_end; - -.compile_term_not: - _compile_not_expression(); - goto .compile_term_end; - -.compile_term_string_literal: - _compile_string_literal(); - goto .compile_term_end; - -.compile_term_call: - _compile_call(); - _write_z("\nmv t0, a0\n\0"); - goto .compile_term_end; - -.compile_term_variable: - _compile_variable_expression(); - goto .compile_term_end; - -.compile_term_end: -end; - -proc _compile_binary_rhs(); -begin - # Skip the whitespace after the binary operator. - _advance_token(1); - _compile_term(); - - # Load the left expression from the stack; - _write_z("\tlw t1, 24(sp)\n\0"); -end; - -proc _compile_expression(); -begin - _compile_term(); - - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - - li t1, ' ' - bne a0, t1, .compile_expression_end - - # It is a binary expression. - - # Save the value of the left expression on the stack. - _write_z("sw t0, 24(sp)\n\0"); - - # Skip surrounding whitespace in front of the operator. - _advance_token(1); - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '+' - beq t0, t1, .compile_expression_add - - li t1, '*' - beq t0, t1, .compile_expression_mul - - li t1, '&' - beq t0, t1, .compile_expression_and - - li t1, 'o' - beq t0, t1, .compile_expression_or - - li t1, 'x' - beq t0, t1, .compile_expression_xor - - li t1, '=' - beq t0, t1, .compile_expression_equals - - li t1, '<' - beq t0, t1, .compile_expression_less - - li t1, '>' - beq t0, t1, .compile_expression_greater - - # Unknown binary operator. - unimp - -.compile_expression_add: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("add t0, t0, t1\n\0"); - - goto .compile_expression_end; - -.compile_expression_mul: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("\tmul t0, t0, t1\n\0"); - - goto .compile_expression_end; - -.compile_expression_and: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("\tand t0, t0, t1\n\0"); - - goto .compile_expression_end; - -.compile_expression_or: - _advance_token(2); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("or t0, t0, t1\n\0"); - - goto .compile_expression_end; - -.compile_expression_xor: - _advance_token(3); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("xor t0, t0, t1\n\0"); - - goto .compile_expression_end; - -.compile_expression_equals: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("xor t0, t0, t1\nseqz t0, t0\n\0"); - - goto .compile_expression_end; - -.compile_expression_less: - _advance_token(1); - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '>' - beq t0, t1, .compile_expression_not_equal - - li t1, '=' - beq t0, t1, .compile_expression_less_equal - - _compile_binary_rhs(); - - # Execute the operation. - _write_z("slt t0, t0, t1\n\0"); - - goto .compile_expression_end; - -.compile_expression_not_equal: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0"); - - goto .compile_expression_end; - -.compile_expression_less_equal: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0"); - - goto .compile_expression_end; - -.compile_expression_greater: - _advance_token(1); - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '=' - beq t0, t1, .compile_expression_greater_equal - - _compile_binary_rhs(); - - # Execute the operation. - _write_z("\tslt t0, t1, t0\n\0"); - - goto .compile_expression_end; - -.compile_expression_greater_equal: - _advance_token(1); - _compile_binary_rhs(); - - # Execute the operation. - _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0"); - - goto .compile_expression_end; - -.compile_expression_end: -end; - -proc _compile_call(); -begin - # Stack variables: - # v0 - Procedure name length. - # v4 - Procedure name pointer. - # v8 - Argument count. - - _read_token(); - sw a0, 0(sp) - v4 := source_code_position; - v8 := 0; - - # Skip the identifier and left paren. - _advance_token(v0 + 1); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, ')' - beq t0, t1, .compile_call_finalize - -.compile_call_loop: - _compile_expression(); - - # Save the argument on the stack. - _write_z("\tsw t0, \0"); - - # Calculate the stack offset: 116 - (4 * argument_counter) - v12 := v8 * 4; - v12 := 116 + -v12; - _write_i(v12); - - _write_z("(sp)\n\0"); - - # Add one to the argument counter. - v8 := v8 + 1; - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, ',' - bne t0, t1, .compile_call_finalize - - _advance_token(2); - goto .compile_call_loop; - -.compile_call_finalize: - # Load the argument from the stack. - - lw t0, 8(sp) - beqz t0, .compile_call_end - - # Decrement the argument counter. - v8 := v8 + -1; - - _write_z("\tlw a\0"); - _write_i(v8); - - _write_z(", \0"); - - # Calculate the stack offset: 116 - (4 * argument_counter) - v12 := v8 * 4; - v12 := 116 + -v12; - _write_i(v12); - - _write_z("(sp)\n\0"); - - goto .compile_call_finalize; - -.compile_call_end: - _write_z("\tcall \0"); - _write_s(v4, v0); - - # Skip the right paren. - _advance_token(1); -end; - -proc _compile_goto(); -begin - _advance_token(5); - - _read_token(); - sw a0, 0(sp) - - _write_z("\tj \0"); - - _write_token(v0); - _advance_token(); -end; - -proc _compile_local_designator(); -begin - # Skip "v" in the local variable name. - _advance_token(1); - _write_z("\t addi t0, sp, \0"); - - # Read local variable stack offset and save it. - _read_token(); - _write_token(); - _advance_token(); - _write_c('\n'); -end; - -proc _compile_global_designator(); -begin - _write_z("\tla t0, \0"); - - _read_token(); - _write_token(); - _advance_token(); - - _write_c('\n'); -end; - -proc _compile_designator(); -begin - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - - li t1, 'v' - beq a0, t1, .compile_designator_local - - goto .compile_designator_global; - -.compile_designator_local: - _compile_local_designator(); - goto .compile_designator_end; - -.compile_designator_global: - _compile_global_designator(); - goto .compile_designator_end; - -.compile_designator_end: -end; - -proc _compile_assignment(); -begin - _compile_designator(); - - # Save the assignee address on the stack. - _write_z("\tsw t0, 20(sp)\n\0"); - - # Skip the assignment sign (:=) with surrounding whitespaces. - _advance_token(4); - - # Compile the assignment. - _compile_expression(); - - _write_z("\tlw t1, 20(sp)\nsw t0, (t1)\n\0"); -end; - -proc _compile_return_statement(); -begin - # Skip "return" keyword and whitespace after it. - _advance_token(7); - _compile_expression(); - - _write_z("mv a0, t0\n\0"); -end; - -proc _compile_statement(); -begin - # This is a call if the statement starts with an underscore. - la t0, source_code_position - lw t0, (t0) - # First character after alignment tab. - addi t0, t0, 1 - lb t0, (t0) - - li t1, '_' - beq t0, t1, .compile_statement_call - - li t1, 'g' - beq t0, t1, .compile_statement_goto - - li t1, 'v' - beq t0, t1, .compile_statement_assignment - - # keyword_ret contains "\tret", so it's 4 bytes long. - _memcmp(source_code_position, "\treturn", 7); - beqz a0, .compile_statement_return - - _compile_line(); - goto .compile_statement_end; - -.compile_statement_call: - _advance_token(1); - _compile_call(); - - goto .compile_statement_semicolon; - -.compile_statement_goto: - _advance_token(1); - _compile_goto(); - - goto .compile_statement_semicolon; - -.compile_statement_assignment: - _advance_token(1); - _compile_assignment(); - - goto .compile_statement_semicolon; - -.compile_statement_return: - _advance_token(1); - _compile_return_statement(); - _write_c('\n'); - - goto .compile_statement_end; - -.compile_statement_semicolon: - _advance_token(2); - _write_c('\n'); - -.compile_statement_end: -end; - -proc _compile_procedure_body(); -begin -.compile_procedure_body_loop: - _skip_empty_lines(); - - # 3 is "end" length. - _memcmp(source_code_position, "end", 3); - - beqz a0, .compile_procedure_body_epilogue - - _compile_statement(); - goto .compile_procedure_body_loop; - -.compile_procedure_body_epilogue: -end; - -# Writes a regster name to the standard output. -# -# Parameters: -# a0 - Register character. -# a1 - Register number. -proc _write_register(); -begin - _write_c(v88); - v84 := v84 + '0'; - _write_c(v84); -end; - -proc _compile_procedure_prologue(); -begin - _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); - v0 := 0; - -.compile_procedure_prologue_loop: - _write_z("\tsw a\0"); - _write_i(v0); - _write_z(", \0"); - - # Calculate the stack offset: 88 - (4 * parameter_counter) - v4 := v0 * 4; - v4 := 88 + -v4; - _write_i(v4); - - _write_z("(sp)\n\0"); - - v0 := v0 + 1; - lw a0, 0(sp) - - li t0, 8 - bne a0, t0, .compile_procedure_prologue_loop -end; - -proc _compile_procedure(); -begin - # Skip "proc ". - _advance_token(5); - - _read_token(); - # Save the procedure name length. - sw a0, 0(sp) - - # Write .type _procedure_name, @function. - _write_z(".type \0"); - - _write_token(v0); - _write_z(", @function\n\0"); - - # Write procedure label, _procedure_name: - _write_token(v0); - _write_z(":\n\0"); - - # Skip the function name and trailing parens, semicolon, "begin" and newline. - _advance_token(v0 + 10); - - _compile_procedure_prologue(); - _compile_procedure_body(); - - # Write the epilogue. - _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); - - # Skip the "end" keyword, semicolon and newline. - _advance_token(5); -end; - -proc _skip_newlines(); -begin - # Skip newlines. - la t0, source_code_position - lw t1, (t0) - -.skip_newlines_loop: - lb t2, (t1) - li t3, '\n' - bne t2, t3, .skip_newlines_end - beqz t2, .skip_newlines_end - - addi t1, t1, 1 - sw t1, (t0) - - goto .skip_newlines_loop; - -.skip_newlines_end: -end; - -# Prints and skips a line. -proc _skip_comment(); -begin - la t0, source_code_position - lw t1, (t0) - -.skip_comment_loop: - # Check for newline character. - lb t2, (t1) - li t3, '\n' - beq t2, t3, .skip_comment_end - - # Advance the input string by one byte. - addi t1, t1, 1 - sw t1, (t0) - - goto .skip_comment_loop; - -.skip_comment_end: - # Skip the newline. - addi t1, t1, 1 - sw t1, (t0) -end; - -# Skip newlines and comments. -proc _skip_empty_lines(); -begin -.skip_empty_lines_rerun: - la t0, source_code_position - lw t0, (t0) - sw t0, 0(sp) - -.skip_empty_lines_loop: - lw t2, 0(sp) - lb t0, (t2) - - li t1, '#' - beq t0, t1, .skip_empty_lines_comment - - li t1, '\n' - beq t0, t1, .skip_empty_lines_newline - - li t1, '\t' - beq t0, t1, .skip_empty_lines_tab - - li t1, '(' - bne t0, t1, .skip_empty_lines_end - addi t2, t2, 1 - lb t0, (t2) - li t1, '*' - beq t0, t1, .skip_empty_lines_comment - - goto .skip_empty_lines_end; - -.skip_empty_lines_comment: - la t0, source_code_position - lw t1, 0(sp) - sw t1, (t0) - _skip_comment(); - goto .skip_empty_lines_rerun; - -.skip_empty_lines_newline: - la t0, source_code_position - lw t1, 0(sp) - addi t1, t1, 1 - sw t1, (t0) - goto .skip_empty_lines_rerun; - -.skip_empty_lines_tab: - v0 := v0 + 1; - goto .skip_empty_lines_loop - -.skip_empty_lines_end: -end; - -proc _compile_global_initializer(); -begin - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '"' - beq t0, t1, .compile_global_initializer_string - - li t1, 'S' - beq t0, t1, .compile_global_initializer_record - - li t1, '@' - beq t0, t1, .compile_global_initializer_pointer - - la a0, source_code_position - lw a0, (a0) - lb a0, (a0) - _is_digit(); - bnez a0, .compile_global_initializer_number - - unimp - -.compile_global_initializer_pointer: - # Skip @. - _advance_token(1); - _write_z("\n\t.word \0"); - _read_token(); - _write_token(); - _advance_token(); - - goto .compile_global_initializer_end; - -.compile_global_initializer_number: - _write_z("\n\t.word \0"); - _read_token(); - _write_token(); - _advance_token(1); - - goto .compile_global_initializer_end; - -.compile_global_initializer_record: - # Skip "S(". - _advance_token(2); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - li t1, ')' - beq t0, t1, .compile_global_initializer_closing - -.compile_global_initializer_loop: - _compile_global_initializer(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - li t1, ')' - beq t0, t1, .compile_global_initializer_closing - - # Skip comma and whitespace after it. - _advance_token(2); - - goto .compile_global_initializer_loop; - -.compile_global_initializer_closing: - # Skip ")" - _advance_token(1); - - goto .compile_global_initializer_end; - -.compile_global_initializer_string: - _write_z("\n\t.word strings + \0"); - _string_length(source_code_position); - sw a0, 4(sp) - - _add_string(source_code_position); - _write_i(); - - # Skip the quoted string. - _advance_token(v4 + 2); - - goto .compile_global_initializer_end; - -.compile_global_initializer_end: -end; - -proc _compile_constant_declaration(); -begin - _read_token(); - sw a0, 0(sp) - - _write_z(".type \0"); - _write_token(v0); - _write_z(", @object\n\0"); - - _write_token(v0); - _write_c(':'); - - # Skip the constant name with assignment sign and surrounding whitespaces. - _advance_token(v0 + 4); - _compile_global_initializer(); - # Skip semicolon and newline. - _advance_token(2); - _write_c('\n'); -end; - -proc _compile_const_part(); -begin - _skip_empty_lines(); - - _memcmp(source_code_position, "const\0", 5); - bnez a0, .compile_const_part_end - - # Skip "const" with the newline after it. - _advance_token(6); - _write_z(".section .rodata # Compiled from const section.\n\n\0"); - -.compile_const_part_loop: - _skip_empty_lines(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - # If the character at the line beginning is not indentation, - # it is probably the next code section. - li t1, '\t' - bne t0, t1, .compile_const_part_end - - _advance_token(1); - - _compile_constant_declaration(); - goto .compile_const_part_loop; - -.compile_const_part_end: -end; - -proc _compile_variable_declaration(); -begin - _read_token(); - sw a0, 0(sp) - - _write_z(".type \0"); - _write_token(v0); - _write_z(", @object\n\0"); - - _write_token(v0); - _write_c(':'); - - # Skip the variable name and colon with space before the type. - _advance_token(v0 + 2); - - # Skip the type name. - _read_token(); - _advance_token(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - li t1, ' ' - beq t0, t1, .compile_variable_declaration_initializer - - # Else we assume this is a zeroed 81920 bytes big array. - _write_z(" .zero 81920\0"); - goto .compile_variable_declaration_finalize; - -.compile_variable_declaration_initializer: - # Skip the assignment sign with surrounding whitespaces. - _advance_token(4); - _compile_global_initializer(); - goto .compile_variable_declaration_finalize; - -.compile_variable_declaration_finalize: - # Skip semicolon and newline. - _advance_token(2); - _write_c('\n'); -end; - -proc _compile_var_part(); -begin - _memcmp(source_code_position, "var\0", 3); - bnez a0, .compile_var_part_end - - # Skip "var" and newline. - _advance_token(4); - _write_z(".section .data\n\0"); - -.compile_var_part_loop: - _skip_empty_lines(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '\t' - beq t0, t1, .compile_var_part_declaration - - goto .compile_var_part_end; - -.compile_var_part_declaration: - _advance_token(1); - _compile_variable_declaration(); - goto .compile_var_part_loop; - -.compile_var_part_end: -end; - -# Process the source code and print the generated code. -proc _compile_module(); -begin - _compile_const_part(); - _skip_empty_lines(); - _compile_var_part(); - - _write_z(".section .text\n\n\0"); - _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); - _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); - _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); - _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); - _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); - -.compile_module_loop: - _skip_empty_lines(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - beqz t0, .compile_module_end - - # 5 is "proc " length. Space is needed to distinguish from "procedure". - _memcmp(source_code_position, "proc ", 5); - beqz a0, .compile_module_procedure - - # Not a known token, exit. - goto .compile_module_end; - -.compile_module_procedure: - _compile_procedure(); - - goto .compile_module_loop; - -.compile_module_end: -end; - -proc _compile(); -begin - _write_z(".globl _start\n\n\0"); - _compile_module(); - - _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); - _write_c('"'); - - la t0, compiler_strings - sw t0, 0(sp) - -.compile_loop: - lw t0, 0(sp) - la t1, compiler_strings_position - lw t1, (t1) - bge t0, t1, .compile_end - - lb a0, (t0) - - addi t0, t0, 1 - sw t0, 0(sp) - - _write_c(); - - j .compile_loop - -.compile_end: - _write_c('"'); - _write_c('\n'); -end; - -# Terminates the program. a0 contains the return code. -# -# Parameters: -# a0 - Status code. -proc _exit(); -begin - li a7, 93 # SYS_EXIT - ecall -end; - -# Inserts a symbol into the table. -# -# Parameters: -# a0 - Symbol pointer. -# a1 - Symbol name length. -# a2 - Symbol name pointer. -# a3 - Symbol table. -proc _symbol_table_enter(); -begin - # The first word in the symbol table is its length, get it. - lw a0, 76(sp) - lw a0, (a0) - sw a0, 0(sp) - - # Calculate the offset for the new symbol. - v4 := v0 * 4; - v4 := v4 + 4; - v4 := v76 + 4; - - _memcpy(v4, @v80, 12); - - # Increment the symbol table length. - v0 := v0 + 1; - lw t0, 0(sp) - lw t1, 76(sp) - sw t0, (t1) -end; - -proc _symbol_table_build(); -begin - _symbol_table_enter(@symbol_type_info_int, 3, symbol_builtin_name_int, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_word, 4, symbol_builtin_name_word, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_pointer, 7, symbol_builtin_name_pointer, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_char, 4, symbol_builtin_name_char, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_bool, 4, symbol_builtin_name_bool, @symbol_table_global); -end; - -# -# Classification table assigns each possible character to a group (class). All -# characters of the same group a handled equivalently. -# -# Classification: -# -# TransitionClass = ( -# transitionClassInvalid = 1, -# transitionClassDigit = 2, -# transitionClassAlpha = 3, -# transitionClassSpace = 4, -# transitionClassColon = 5, -# transitionClassEquals = 6, -# transitionClassLeftParen = 7, -# transitionClassRightParen = 8, -# transitionClassAsterisk = 9, -# transitionClassUnderscore = 10, -# transitionClassSingle = 11, -# transitionClassHex = 12, -# transitionClassZero = 13, -# transitionClassX = 14, -# transitionClassEof = 15, -# transitionClassDot = 16, -# transitionClassMinus = 17, -# transitionClassSingleQuote = 18, -# transitionClassDoubleQuote = 19, -# transitionClassGreater = 20, -# transitionClassLess = 21, -# transitionClassOther = 22 -# ); -# TransitionState = ( -# transitionStateStart = 1, -# transitionStateColon = 2, -# transitionStateIdentifier = 3, -# transitionStateDecimal = 4, -# transitionStateGreater = 5, -# transitionStateMinus = 6, -# transitionStateLeftParen = 7, -# transitionStateLess = 8, -# transitionStateDot = 9, -# transitionStateComment = 10, -# transitionStateClosingComment = 11, -# transitionStateCharacter = 12, -# transitionStateString = 13, -# transitionStateLeadingZero = 14, -# transitionStateDecimalSuffix = 15, -# transitionStateEnd = 16 -# ); -# Transition = record -# action: TransitionAction; -# next_state: TransitionState -# end; -# TransitionAction = ( -# none = 1, -# accumulate = 2, -# skip = 3, -# single = 4, -# eof = 5, -# finalize = 6, -# composite = 7, -# key_id = 8, -# integer = 9, -# delimited = 10 -# ); - -# Assigns some value to at array index. -# -# Parameters: -# a0 - Array pointer. -# a1 - Index (word offset into the array). -# a2 - Data to assign. -proc _assign_at(); -begin - v0 := v84 + -1; - v0 := v0 * 4; - v0 := v88 + v0; - - lw t0, 0(sp) - lw t1, 80(sp) - sw t1, (t0) -end; - -proc _initialize_classification(); -begin - _assign_at(@classification, 1, 15); - _assign_at(@classification, 2, 1); - _assign_at(@classification, 3, 1); - _assign_at(@classification, 4, 1); - _assign_at(@classification, 5, 1); - _assign_at(@classification, 6, 1); - _assign_at(@classification, 7, 1); - _assign_at(@classification, 8, 1); - _assign_at(@classification, 9, 1); - _assign_at(@classification, 10, 4); - _assign_at(@classification, 11, 4); - _assign_at(@classification, 12, 1); - _assign_at(@classification, 13, 1); - _assign_at(@classification, 14, 4); - _assign_at(@classification, 15, 1); - _assign_at(@classification, 16, 1); - _assign_at(@classification, 17, 1); - _assign_at(@classification, 18, 1); - _assign_at(@classification, 19, 1); - _assign_at(@classification, 20, 1); - _assign_at(@classification, 21, 1); - _assign_at(@classification, 22, 1); - _assign_at(@classification, 23, 1); - _assign_at(@classification, 24, 1); - _assign_at(@classification, 25, 1); - _assign_at(@classification, 26, 1); - _assign_at(@classification, 27, 1); - _assign_at(@classification, 28, 1); - _assign_at(@classification, 29, 1); - _assign_at(@classification, 30, 1); - _assign_at(@classification, 31, 1); - _assign_at(@classification, 32, 1); - _assign_at(@classification, 33, 4); - _assign_at(@classification, 34, 11); - _assign_at(@classification, 35, 19); - _assign_at(@classification, 36, 22); - _assign_at(@classification, 37, 22); - _assign_at(@classification, 38, 11); - _assign_at(@classification, 39, 11); - _assign_at(@classification, 40, 18); - _assign_at(@classification, 41, 7); - _assign_at(@classification, 42, 8); - _assign_at(@classification, 43, 9); - _assign_at(@classification, 44, 11); - _assign_at(@classification, 45, 11); - _assign_at(@classification, 46, 17); - _assign_at(@classification, 47, 16); - _assign_at(@classification, 48, 11); - _assign_at(@classification, 49, 13); - _assign_at(@classification, 50, 2); - _assign_at(@classification, 51, 2); - _assign_at(@classification, 52, 2); - _assign_at(@classification, 53, 2); - _assign_at(@classification, 54, 2); - _assign_at(@classification, 55, 2); - _assign_at(@classification, 56, 2); - _assign_at(@classification, 57, 2); - _assign_at(@classification, 58, 2); - _assign_at(@classification, 59, 5); - _assign_at(@classification, 60, 11); - _assign_at(@classification, 61, 21); - _assign_at(@classification, 62, 6); - _assign_at(@classification, 63, 20); - _assign_at(@classification, 64, 22); - _assign_at(@classification, 65, 11); - _assign_at(@classification, 66, 3); - _assign_at(@classification, 67, 3); - _assign_at(@classification, 68, 3); - _assign_at(@classification, 69, 3); - _assign_at(@classification, 70, 3); - _assign_at(@classification, 71, 3); - _assign_at(@classification, 72, 3); - _assign_at(@classification, 73, 3); - _assign_at(@classification, 74, 3); - _assign_at(@classification, 75, 3); - _assign_at(@classification, 76, 3); - _assign_at(@classification, 77, 3); - _assign_at(@classification, 78, 3); - _assign_at(@classification, 79, 3); - _assign_at(@classification, 80, 3); - _assign_at(@classification, 81, 3); - _assign_at(@classification, 82, 3); - _assign_at(@classification, 83, 3); - _assign_at(@classification, 84, 3); - _assign_at(@classification, 85, 3); - _assign_at(@classification, 86, 3); - _assign_at(@classification, 87, 3); - _assign_at(@classification, 88, 3); - _assign_at(@classification, 89, 3); - _assign_at(@classification, 90, 3); - _assign_at(@classification, 91, 3); - _assign_at(@classification, 92, 11); - _assign_at(@classification, 93, 22); - _assign_at(@classification, 94, 11); - _assign_at(@classification, 95, 11); - _assign_at(@classification, 96, 10); - _assign_at(@classification, 97, 22); - _assign_at(@classification, 98, 12); - _assign_at(@classification, 99, 12); - _assign_at(@classification, 100, 12); - _assign_at(@classification, 101, 12); - _assign_at(@classification, 102, 12); - _assign_at(@classification, 103, 12); - _assign_at(@classification, 104, 3); - _assign_at(@classification, 105, 3); - _assign_at(@classification, 106, 3); - _assign_at(@classification, 107, 3); - _assign_at(@classification, 108, 3); - _assign_at(@classification, 109, 3); - _assign_at(@classification, 110, 3); - _assign_at(@classification, 111, 3); - _assign_at(@classification, 112, 3); - _assign_at(@classification, 113, 3); - _assign_at(@classification, 114, 3); - _assign_at(@classification, 115, 3); - _assign_at(@classification, 116, 3); - _assign_at(@classification, 117, 3); - _assign_at(@classification, 118, 3); - _assign_at(@classification, 119, 3); - _assign_at(@classification, 120, 3); - _assign_at(@classification, 121, 14); - _assign_at(@classification, 122, 3); - _assign_at(@classification, 123, 3); - _assign_at(@classification, 124, 22); - _assign_at(@classification, 125, 11); - _assign_at(@classification, 126, 22); - _assign_at(@classification, 127, 11); - _assign_at(@classification, 128, 1); - - v0 := 129; - -# Set the remaining 129 - 256 bytes to transitionClassOther. -.initialize_classification_loop: - _assign_at(@classification, v0, 22); - v0 := v0 + 1; - - lw t0, 0(sp) - li t1, 257 - blt t0, t1, .initialize_classification_loop -end; - -# Parameters: -# a0 - Current state (first index into transitions table). -# a1 - Transition (second index into transitions table).. -# a2 - Action to assign. -# a3 - Next state to assign. -proc _set_transition(); -begin - # Transitions start at offset in classification array. Save the transitions start in v0. - la t0, classification - addi t0, t0, 256 - sw t0, 0(sp) - - # Each state is 8 bytes long (2 words: action and next state). - # There are 16 transition classes, so a transition 8 * 16 = 128 bytes long. - - v4 := v88 + -1; - v4 := v4 * 128; - - v8 := v84 + -1; - v8 := v8 * 8; - - v12 := v0 + v4; - v12 := v12 + v8; - - lw t0, 12(sp) - lw t1, 80(sp) - lw t2, 76(sp) - sw t1, (t0) - addi t0, t0, 4 - sw t2, (t0) -end; - -# Parameters: -# a0 - Current state (Transition state enumeration). -# a1 - Default action (Callback). -# a2 - Next state (Transition state enumeration). -proc _set_default_transition(); -begin - _set_transition(v88, 1, v84, v80); - _set_transition(v88, 2, v84, v80); - _set_transition(v88, 3, v84, v80); - _set_transition(v88, 4, v84, v80); - _set_transition(v88, 5, v84, v80); - _set_transition(v88, 6, v84, v80); - _set_transition(v88, 7, v84, v80); - _set_transition(v88, 8, v84, v80); - _set_transition(v88, 9, v84, v80); - _set_transition(v88, 10, v84, v80); - _set_transition(v88, 11, v84, v80); - _set_transition(v88, 12, v84, v80); - _set_transition(v88, 13, v84, v80); - _set_transition(v88, 14, v84, v80); - _set_transition(v88, 15, v84, v80); - _set_transition(v88, 16, v84, v80); - _set_transition(v88, 17, v84, v80); - _set_transition(v88, 18, v84, v80); - _set_transition(v88, 19, v84, v80); - _set_transition(v88, 20, v84, v80); - _set_transition(v88, 21, v84, v80); - _set_transition(v88, 22, v84, v80); -end; - - -# The transition table describes transitions from one state to another, given -# a symbol (character class). -# -# The table has m rows and n columns, where m is the amount of states and n is -# the amount of classes. So given the current state and a classified character -# the table can be used to look up the next state. -# -# Each cell is a word long. -# - The least significant byte of the word is a row number (beginning with 0). -# It specifies the target state. "ff" means that this is an end state and no -# transition is possible. -# - The next byte is the action that should be performed when transitioning. -# For the meaning of actions see labels in the lex_next function, which -# handles each action. -proc _initialize_transitions(); -begin - # Start state. - _set_transition(1, 1, 1, 16); - _set_transition(1, 2, 2, 4); - _set_transition(1, 3, 2, 3); - _set_transition(1, 4, 3, 1); - _set_transition(1, 5, 2, 5); - _set_transition(1, 6, 4, 16); - _set_transition(1, 7, 2, 7); - _set_transition(1, 8, 4, 16); - _set_transition(1, 9, 4, 16); - _set_transition(1, 10, 2, 3); - _set_transition(1, 11, 4, 16); - _set_transition(1, 12, 2, 3); - _set_transition(1, 13, 2, 14); - _set_transition(1, 14, 2, 3); - _set_transition(1, 15, 5, 16); - _set_transition(1, 16, 2, 9); - _set_transition(1, 17, 2, 6); - _set_transition(1, 18, 2, 12); - _set_transition(1, 19, 2, 13); - _set_transition(1, 20, 2, 5); - _set_transition(1, 21, 2, 8); - _set_transition(1, 22, 1, 16); - - # Colon state. - _set_default_transition(2, 6, 16); - _set_transition(2, 6, 7, 16); - - # Identifier state. - _set_default_transition(3, 8, 16); - _set_transition(3, 2, 2, 3); - _set_transition(3, 3, 2, 3); - _set_transition(3, 10, 2, 3); - _set_transition(3, 12, 2, 3); - _set_transition(3, 13, 2, 3); - _set_transition(3, 14, 2, 3); - - # Decimal state. - _set_default_transition(4, 9, 16); - _set_transition(4, 2, 2, 4); - _set_transition(4, 3, 2, 15); - _set_transition(4, 10, 1, 16); - _set_transition(4, 12, 2, 15); - _set_transition(4, 13, 2, 4); - _set_transition(4, 14, 2, 15); - - # Greater state. - _set_default_transition(5, 6, 16); - _set_transition(5, 6, 7, 16); - - # Minus state. - _set_default_transition(6, 6, 16); - _set_transition(6, 20, 7, 16); - - # Left paren state. - _set_default_transition(7, 6, 16); - _set_transition(7, 9, 2, 10); - - # Less state. - _set_default_transition(8, 6, 16); - _set_transition(8, 6, 7, 16); - _set_transition(8, 20, 7, 16); - - # Hexadecimal after 0x. - _set_default_transition(9, 6, 16); - _set_transition(9, 16, 7, 16); - - # Comment. - _set_default_transition(10, 2, 10); - _set_transition(10, 9, 2, 11); - _set_transition(10, 15, 1, 16); - - # Closing comment. - _set_default_transition(11, 2, 10); - _set_transition(11, 1, 1, 16); - _set_transition(11, 8, 10, 16); - _set_transition(11, 9, 2, 11); - _set_transition(11, 15, 1, 16); - - # Character. - _set_default_transition(12, 2, 12); - _set_transition(12, 1, 1, 16); - _set_transition(12, 15, 1, 16); - _set_transition(12, 18, 10, 16); - - # String. - _set_default_transition(13, 2, 13); - _set_transition(13, 1, 1, 16); - _set_transition(13, 15, 1, 16); - _set_transition(13, 19, 10, 16); - - # Leading zero. - _set_default_transition(14, 9, 16); - _set_transition(14, 2, 1, 16); - _set_transition(14, 3, 1, 16); - _set_transition(14, 10, 1, 16); - _set_transition(14, 12, 1, 16); - _set_transition(14, 13, 1, 16); - _set_transition(14, 14, 1, 16); - - # Digit with a character suffix. - _set_default_transition(15, 9, 16); - _set_transition(15, 3, 1, 16); - _set_transition(15, 2, 1, 16); - _set_transition(15, 12, 1, 16); - _set_transition(15, 13, 1, 16); - _set_transition(15, 14, 1, 16); -end; - -proc _lexer_get_state(); -begin - # Lexer state is saved after the transition tables. The offset is 256 + 16 * 22. - v0 := @classification; - v4 := 16 * 22; - v0 := v0 + 256; - - return v0 + v4 -end; - -# Gets pointer to the current source text. -proc _lexer_get_current(); -begin - _lexer_get_state(); - sw a0, 0(sp) - - return v0 + 4 -end; - -# Resets the lexer state for reading the next token. -proc _lexer_reset(); -begin - # Transition start state is 1. - _lexer_get_state(); - li t0, 1 - sw t0, (a0) - sw a0, 0(sp) - - # Text pointer to the beginning of the currently read token. - _lexer_get_current(); - la t0, source_code_position - lw t0, (t0) - sw t0, (a0) - - # Initial length of the token is 0. - addi t0, t0, 4 - sw zero, (t0) -end; - -# One time lexer initialization. -proc _lexer_initialize(); -begin - _initialize_classification(); - _initialize_transitions(); -end; - -# Entry point. -proc _start(); -begin - _lexer_initialize(); - _symbol_table_build(); - - # Read the source from the standard input. - # Second argument is buffer size. Modifying update the source_code definition. - _read_file(@source_code, 81920); - _compile(); - - _exit(0); -end; diff --git a/boot/stage8/cl.elna b/boot/stage8/cl.elna new file mode 100644 index 0000000..75c24d7 --- /dev/null +++ b/boot/stage8/cl.elna @@ -0,0 +1,1989 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +# Stage 8 compiler. +# +# - Procedure calls in expressions. +# - Comments between (* and *) are supported. These are still single line +# comments and they should be on a separate line. +# - _syscall builtin. _syscall takes 7 arguments, +# the 7th argument gets stored in a7 before invoking ecall. +# Other arguments are saved in a0 through a5. +# - New intrinsics: _load_byte, _load_word, _store_byte, _store_word. +const + symbol_builtin_name_int := "Int"; + symbol_builtin_name_word := "Word"; + symbol_builtin_name_pointer := "Pointer"; + symbol_builtin_name_char := "Char"; + symbol_builtin_name_bool := "Bool"; + + # Every type info starts with a word describing what type it is. + # + # PRIMITIVE_TYPE = 1 + # + # Primitive types have only type size. + symbol_builtin_type_int := S(1, 4); + symbol_builtin_type_word := S(1, 4); + symbol_builtin_type_pointer := S(1, 4); + symbol_builtin_type_char := S(1, 1); + symbol_builtin_type_bool := S(1, 1); + + # Info objects start with a word describing its type. + # + # INFO_TYPE = 1 + # + # Type info has the type it belongs to. + symbol_type_info_int := S(1, @symbol_builtin_type_int); + symbol_type_info_word := S(1, @symbol_builtin_type_word); + symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); + symbol_type_info_char := S(1, @symbol_builtin_type_char); + symbol_type_info_bool := S(1, @symbol_builtin_type_bool); + +var + source_code: Array; + compiler_strings: Array; + symbol_table_global: Array; + symbol_table_local: Array; + classification: Array; + + compiler_strings_position: Pointer := @compiler_strings; + compiler_strings_length: Word := 0; + source_code_position: Pointer := @source_code; + +# Calculates and returns the string token length between quotes, including the +# escaping slash characters. +# +# Parameters: +# a0 - String token pointer. +# +# Returns the length in a0. +proc _string_length(); +begin + # Reset the counter. + v0 := 0; + +.string_length_loop: + v88 := v88 + 1; + + lw t0, 88(sp) + lb t0, (t0) + + li t1, '"' + beq t0, t1, .string_length_end + + v0 := v0 + 1; + goto .string_length_loop; + +.string_length_end: + return v0 +end; + +# Adds a string to the global, read-only string storage. +# +# Parameters: +# a0 - String token. +# +# Returns the offset from the beginning of the storage to the new string in a0. +proc _add_string(); +begin + v0 := v88 + 1; + v4 := compiler_strings_length; + +.add_string_loop: + lw t0, 0(sp) + lb t1, (t0) + li t2, '"' + + beq t1, t2, .add_string_end + + la t2, compiler_strings_position + lw t3, (t2) + sb t1, (t3) + + addi t3, t3, 1 + sw t3, (t2) + + addi t0, t0, 1 + sw t0, 0(sp) + + li t2, '\\' + bne t1, t2, .add_string_increment + + goto .add_string_loop; + +.add_string_increment: + la t2, compiler_strings_length + lw t4, (t2) + addi t4, t4, 1 + sw t4, (t2) + + goto .add_string_loop; + +.add_string_end: + return v4 +end; + +# Reads standard input into a buffer. +# a0 - Buffer pointer. +# a1 - Buffer size. +# +# Returns the amount of bytes written in a0. +proc _read_file(); +begin + mv a2, a1 + mv a1, a0 + # STDIN. + li a0, 0 + # SYS_READ. + li a7, 63 + ecall +end; + +# Writes to the standard output. +# +# Parameters: +# a0 - Buffer. +# a1 - Buffer length. +proc _write_s(); +begin + mv a2, a1 + mv a1, a0 + # STDOUT. + li a0, 1 + # SYS_WRITE. + li a7, 64 + ecall +end; + +# Writes a number to a string buffer. +# +# t0 - Local buffer. +# t1 - Constant 10. +# t2 - Current character. +# t3 - Whether the number is negative. +# +# Parameters: +# a0 - Whole number. +# a1 - Buffer pointer. +# +# Sets a0 to the length of the written number. +proc _print_i(); +begin + li t1, 10 + addi t0, s0, -9 + + li t3, 0 + bgez a0, .print_i_digit10 + li t3, 1 + neg a0, a0 + +.print_i_digit10: + rem t2, a0, t1 + addi t2, t2, '0' + sb t2, 0(t0) + div a0, a0, t1 + addi t0, t0, -1 + bne zero, a0, .print_i_digit10 + + beq zero, t3, .print_i_write_call + addi t2, zero, '-' + sb t2, 0(t0) + addi t0, t0, -1 + +.print_i_write_call: + mv a0, a1 + addi a1, t0, 1 + sub a2, s0, t0 + addi a2, a2, -9 + sw a2, 0(sp) + + _memcpy(); + + return v0 +end; + +# Writes a number to the standard output. +# +# Parameters: +# a0 - Whole number. +proc _write_i(); +begin + _print_i(v88, @v0); + + mv a1, a0 + addi a0, sp, 0 + _write_s(); + +end; + +# Writes a character from a0 into the standard output. +proc _write_c(); +begin + _write_s(@v88, 1); +end; + +# Write null terminated string. +# +# Parameters: +# a0 - String. +proc _write_z(); +begin +.write_z_loop: + # Check for 0 character. + lw a0, 88(sp) + lb a0, (a0) + beqz a0, .write_z_end + + # Print a character. + _write_c(); + + # Advance the input string by one byte. + v88 := v88 + 1; + + goto .write_z_loop; + +.write_z_end: +end; + +# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_upper(); +begin + v0 := v88 >= 'A'; + v4 := v88 <= 'Z'; + + return v0 & v4 + +end; + +# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. +proc _is_lower(); +begin + v0 := v88 >= 'a'; + v4 := v88 <= 'z'; + + return v0 & v4 + +end; + +# Detects if the passed character is a 7-bit alpha character or an underscore. +# +# Paramters: +# a0 - Tested character. +# +# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. +proc _is_alpha(); +begin + sw a0, 0(sp) + + _is_upper(); + sw a0, 4(sp) + + _is_lower(v0); + + lw t0, 0(sp) + xori t1, t0, '_' + seqz t1, t1 + + lw t0, 4(sp) + or a0, a0, t0 + or a0, a0, t1 +end; + +# Detects whether the passed character is a digit +# (a value between 0 and 9). +# +# Parameters: +# a0 - Exemined value. +# +# Sets a0 to 1 if it is a digit, to 0 otherwise. +proc _is_digit(); +begin + v0 := v88 >= '0'; + v4 := v88 <= '9'; + + return v0 & v4 +end; + +proc _is_alnum(); +begin + sw a0, 4(sp) + + _is_alpha(); + sw a0, 0(sp) + + _is_digit(v4); + + lw a1, 0(sp) + or a0, a0, a1 +end; + +# Reads the next token. +# +# Returns token length in a0. +proc _read_token(); +begin + # Current token position. + v0 := source_code_position; + # Token length. + v4 := 0; + +.read_token_loop: + lw t0, 0(sp) + # Current character. + lb t0, (t0) + + # First we try to read a derictive. + # A derictive can contain a dot and characters. + li t1, '.' + beq t0, t1, .read_token_next + + lw a0, 0(sp) + lb a0, (a0) + _is_alnum(); + bnez a0, .read_token_next + + goto .read_token_end; + +.read_token_next: + # Advance the source code position and token length. + v4 := v4 + 1; + v0 := v0 + 1; + + goto .read_token_loop; + +.read_token_end: + return v4 +end; + +# a0 - First pointer. +# a1 - Second pointer. +# a2 - The length to compare. +# +# Returns 0 in a0 if memory regions are equal. +proc _memcmp(); +begin + mv t0, a0 + li a0, 0 + +.memcmp_loop: + beqz a2, .memcmp_end + + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 + + bnez a0, .memcmp_end + + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcmp_loop; + +.memcmp_end: +end; + +# Copies memory. +# +# Parameters: +# a0 - Destination. +# a1 - Source. +# a2 - Size. +# +# Preserves a0. +proc _memcpy(); +begin + mv t0, a0 + +.memcpy_loop: + beqz a2, .memcpy_end + + lbu t1, (a1) + sb t1, (a0) + + addi a0, a0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcpy_loop + +.memcpy_end: + mv a0, t0 +end; + +# Advances the token stream by a0 bytes. +proc _advance_token(); +begin + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 + sw t1, (t0) +end; + +# Prints the current token. +# +# Parameters: +# a0 - Token length. +# +# Returns a0 unchanged. +proc _write_token(); +begin + _write_s(source_code_position, v88); + return v88 +end; + +# Prints and skips a line. +proc _compile_line(); +begin +.compile_line_loop: + la a0, source_code_position + lw a1, (a0) + + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end + + # Print a character. + lw a0, (a1) + _write_c(); + + # Advance the input string by one byte. + _advance_token(1); + + goto .compile_line_loop; + +.compile_line_end: + _write_c('\n'); + + _advance_token(1); +end; + +proc _compile_integer_literal(); +begin + _write_z("\tli t0, \0"); + + _read_token(); + _write_token(); + _advance_token(); + + _write_c('\n'); +end; + +proc _compile_character_literal(); +begin + _write_z("\tli t0, \0"); + + _write_c('\''); + _advance_token(1); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + li t1, '\\' + bne a0, t1, .compile_character_literal_end + + _write_c('\\'); + _advance_token(1); + +.compile_character_literal_end: + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + _write_c(); + + _write_c('\''); + _write_c('\n'); + + _advance_token(2); + +end; + +proc _compile_variable_expression(); +begin + _compile_designator(); + _write_z("\tlw t0, (t0)\n\0"); +end; + +proc _compile_address_expression(); +begin + # Skip the "@" sign. + _advance_token(1); + _compile_designator(); + +end; + +proc _compile_negate_expression(); +begin + # Skip the "-" sign. + _advance_token(1); + _compile_term(); + + _write_z("\tneg t0, t0\n\0"); +end; + +proc _compile_not_expression(); +begin + # Skip the "~" sign. + _advance_token(1); + _compile_term(); + + _write_z("\tnot t0, t0\n\0"); +end; + +proc _compile_string_literal(); +begin + _string_length(source_code_position); + sw a0, 0(sp) + + _add_string(source_code_position); + sw a0, 4(sp) + + _advance_token(v0 + 2); + _write_z("\tla t0, strings\n\0"); + + _write_z("\tli t1, \0"); + _write_i(v4); + _write_c('\n'); + + _write_z("\tadd t0, t0, t1\n\0"); +end; + +proc _compile_term(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + sw a0, 0(sp) + + li t1, '\'' + beq a0, t1, .compile_term_character_literal + + li t1, '@' + beq a0, t1, .compile_term_address + + li t1, '-' + beq a0, t1, .compile_term_negation + + li t1, '~' + beq a0, t1, .compile_term_not + + li t1, '"' + beq a0, t1, .compile_term_string_literal + + li t1, '_' + beq a0, t1, .compile_term_call + + _is_digit(v0); + bnez a0, .compile_term_integer_literal + + goto .compile_term_variable; + +.compile_term_character_literal: + _compile_character_literal(); + goto .compile_term_end; + +.compile_term_integer_literal: + _compile_integer_literal(); + goto .compile_term_end; + +.compile_term_address: + _compile_address_expression(); + goto .compile_term_end; + +.compile_term_negation: + _compile_negate_expression(); + goto .compile_term_end; + +.compile_term_not: + _compile_not_expression(); + goto .compile_term_end; + +.compile_term_string_literal: + _compile_string_literal(); + goto .compile_term_end; + +.compile_term_call: + _compile_call(); + _write_z("\nmv t0, a0\n\0"); + goto .compile_term_end; + +.compile_term_variable: + _compile_variable_expression(); + goto .compile_term_end; + +.compile_term_end: +end; + +proc _compile_binary_rhs(); +begin + # Skip the whitespace after the binary operator. + _advance_token(1); + _compile_term(); + + # Load the left expression from the stack; + _write_z("\tlw t1, 24(sp)\n\0"); +end; + +proc _compile_expression(); +begin + _compile_term(); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, ' ' + bne a0, t1, .compile_expression_end + + # It is a binary expression. + + # Save the value of the left expression on the stack. + _write_z("sw t0, 24(sp)\n\0"); + + # Skip surrounding whitespace in front of the operator. + _advance_token(1); + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '+' + beq t0, t1, .compile_expression_add + + li t1, '*' + beq t0, t1, .compile_expression_mul + + li t1, '&' + beq t0, t1, .compile_expression_and + + li t1, 'o' + beq t0, t1, .compile_expression_or + + li t1, 'x' + beq t0, t1, .compile_expression_xor + + li t1, '=' + beq t0, t1, .compile_expression_equals + + li t1, '<' + beq t0, t1, .compile_expression_less + + li t1, '>' + beq t0, t1, .compile_expression_greater + + # Unknown binary operator. + unimp + +.compile_expression_add: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("add t0, t0, t1\n\0"); + + goto .compile_expression_end; + +.compile_expression_mul: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("\tmul t0, t0, t1\n\0"); + + goto .compile_expression_end; + +.compile_expression_and: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("\tand t0, t0, t1\n\0"); + + goto .compile_expression_end; + +.compile_expression_or: + _advance_token(2); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("or t0, t0, t1\n\0"); + + goto .compile_expression_end; + +.compile_expression_xor: + _advance_token(3); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("xor t0, t0, t1\n\0"); + + goto .compile_expression_end; + +.compile_expression_equals: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("xor t0, t0, t1\nseqz t0, t0\n\0"); + + goto .compile_expression_end; + +.compile_expression_less: + _advance_token(1); + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '>' + beq t0, t1, .compile_expression_not_equal + + li t1, '=' + beq t0, t1, .compile_expression_less_equal + + _compile_binary_rhs(); + + # Execute the operation. + _write_z("slt t0, t0, t1\n\0"); + + goto .compile_expression_end; + +.compile_expression_not_equal: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0"); + + goto .compile_expression_end; + +.compile_expression_less_equal: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0"); + + goto .compile_expression_end; + +.compile_expression_greater: + _advance_token(1); + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '=' + beq t0, t1, .compile_expression_greater_equal + + _compile_binary_rhs(); + + # Execute the operation. + _write_z("\tslt t0, t1, t0\n\0"); + + goto .compile_expression_end; + +.compile_expression_greater_equal: + _advance_token(1); + _compile_binary_rhs(); + + # Execute the operation. + _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0"); + + goto .compile_expression_end; + +.compile_expression_end: +end; + +proc _compile_call(); +begin + # Stack variables: + # v0 - Procedure name length. + # v4 - Procedure name pointer. + # v8 - Argument count. + + _read_token(); + sw a0, 0(sp) + v4 := source_code_position; + v8 := 0; + + # Skip the identifier and left paren. + _advance_token(v0 + 1); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ')' + beq t0, t1, .compile_call_finalize + +.compile_call_loop: + _compile_expression(); + + # Save the argument on the stack. + _write_z("\tsw t0, \0"); + + # Calculate the stack offset: 116 - (4 * argument_counter) + v12 := v8 * 4; + v12 := 116 + -v12; + _write_i(v12); + + _write_z("(sp)\n\0"); + + # Add one to the argument counter. + v8 := v8 + 1; + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ',' + bne t0, t1, .compile_call_finalize + + _advance_token(2); + goto .compile_call_loop; + +.compile_call_finalize: + # Load the argument from the stack. + + lw t0, 8(sp) + beqz t0, .compile_call_end + + # Decrement the argument counter. + v8 := v8 + -1; + + _write_z("\tlw a\0"); + _write_i(v8); + + _write_z(", \0"); + + # Calculate the stack offset: 116 - (4 * argument_counter) + v12 := v8 * 4; + v12 := 116 + -v12; + _write_i(v12); + + _write_z("(sp)\n\0"); + + goto .compile_call_finalize; + +.compile_call_end: + _write_z("\tcall \0"); + _write_s(v4, v0); + + # Skip the right paren. + _advance_token(1); +end; + +proc _compile_goto(); +begin + _advance_token(5); + + _read_token(); + sw a0, 0(sp) + + _write_z("\tj \0"); + + _write_token(v0); + _advance_token(); +end; + +proc _compile_local_designator(); +begin + # Skip "v" in the local variable name. + _advance_token(1); + _write_z("\t addi t0, sp, \0"); + + # Read local variable stack offset and save it. + _read_token(); + _write_token(); + _advance_token(); + _write_c('\n'); +end; + +proc _compile_global_designator(); +begin + _write_z("\tla t0, \0"); + + _read_token(); + _write_token(); + _advance_token(); + + _write_c('\n'); +end; + +proc _compile_designator(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, 'v' + beq a0, t1, .compile_designator_local + + goto .compile_designator_global; + +.compile_designator_local: + _compile_local_designator(); + goto .compile_designator_end; + +.compile_designator_global: + _compile_global_designator(); + goto .compile_designator_end; + +.compile_designator_end: +end; + +proc _compile_assignment(); +begin + _compile_designator(); + + # Save the assignee address on the stack. + _write_z("\tsw t0, 20(sp)\n\0"); + + # Skip the assignment sign (:=) with surrounding whitespaces. + _advance_token(4); + + # Compile the assignment. + _compile_expression(); + + _write_z("\tlw t1, 20(sp)\nsw t0, (t1)\n\0"); +end; + +proc _compile_return_statement(); +begin + # Skip "return" keyword and whitespace after it. + _advance_token(7); + _compile_expression(); + + _write_z("mv a0, t0\n\0"); +end; + +proc _compile_statement(); +begin + # This is a call if the statement starts with an underscore. + la t0, source_code_position + lw t0, (t0) + # First character after alignment tab. + addi t0, t0, 1 + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call + + li t1, 'g' + beq t0, t1, .compile_statement_goto + + li t1, 'v' + beq t0, t1, .compile_statement_assignment + + # keyword_ret contains "\tret", so it's 4 bytes long. + _memcmp(source_code_position, "\treturn", 7); + beqz a0, .compile_statement_return + + _compile_line(); + goto .compile_statement_end; + +.compile_statement_call: + _advance_token(1); + _compile_call(); + + goto .compile_statement_semicolon; + +.compile_statement_goto: + _advance_token(1); + _compile_goto(); + + goto .compile_statement_semicolon; + +.compile_statement_assignment: + _advance_token(1); + _compile_assignment(); + + goto .compile_statement_semicolon; + +.compile_statement_return: + _advance_token(1); + _compile_return_statement(); + _write_c('\n'); + + goto .compile_statement_end; + +.compile_statement_semicolon: + _advance_token(2); + _write_c('\n'); + +.compile_statement_end: +end; + +proc _compile_procedure_body(); +begin +.compile_procedure_body_loop: + _skip_empty_lines(); + + # 3 is "end" length. + _memcmp(source_code_position, "end", 3); + + beqz a0, .compile_procedure_body_epilogue + + _compile_statement(); + goto .compile_procedure_body_loop; + +.compile_procedure_body_epilogue: +end; + +# Writes a regster name to the standard output. +# +# Parameters: +# a0 - Register character. +# a1 - Register number. +proc _write_register(); +begin + _write_c(v88); + v84 := v84 + '0'; + _write_c(v84); +end; + +proc _compile_procedure_prologue(); +begin + _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); + v0 := 0; + +.compile_procedure_prologue_loop: + _write_z("\tsw a\0"); + _write_i(v0); + _write_z(", \0"); + + # Calculate the stack offset: 88 - (4 * parameter_counter) + v4 := v0 * 4; + v4 := 88 + -v4; + _write_i(v4); + + _write_z("(sp)\n\0"); + + v0 := v0 + 1; + lw a0, 0(sp) + + li t0, 8 + bne a0, t0, .compile_procedure_prologue_loop +end; + +proc _compile_procedure(); +begin + # Skip "proc ". + _advance_token(5); + + _read_token(); + # Save the procedure name length. + sw a0, 0(sp) + + # Write .type _procedure_name, @function. + _write_z(".type \0"); + + _write_token(v0); + _write_z(", @function\n\0"); + + # Write procedure label, _procedure_name: + _write_token(v0); + _write_z(":\n\0"); + + # Skip the function name and trailing parens, semicolon, "begin" and newline. + _advance_token(v0 + 10); + + _compile_procedure_prologue(); + _compile_procedure_body(); + + # Write the epilogue. + _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); + + # Skip the "end" keyword, semicolon and newline. + _advance_token(5); +end; + +proc _skip_newlines(); +begin + # Skip newlines. + la t0, source_code_position + lw t1, (t0) + +.skip_newlines_loop: + lb t2, (t1) + li t3, '\n' + bne t2, t3, .skip_newlines_end + beqz t2, .skip_newlines_end + + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_newlines_loop; + +.skip_newlines_end: +end; + +# Prints and skips a line. +proc _skip_comment(); +begin + la t0, source_code_position + lw t1, (t0) + +.skip_comment_loop: + # Check for newline character. + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end + + # Advance the input string by one byte. + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_comment_loop; + +.skip_comment_end: + # Skip the newline. + addi t1, t1, 1 + sw t1, (t0) +end; + +# Skip newlines and comments. +proc _skip_empty_lines(); +begin +.skip_empty_lines_rerun: + la t0, source_code_position + lw t0, (t0) + sw t0, 0(sp) + +.skip_empty_lines_loop: + lw t2, 0(sp) + lb t0, (t2) + + li t1, '#' + beq t0, t1, .skip_empty_lines_comment + + li t1, '\n' + beq t0, t1, .skip_empty_lines_newline + + li t1, '\t' + beq t0, t1, .skip_empty_lines_tab + + li t1, '(' + bne t0, t1, .skip_empty_lines_end + addi t2, t2, 1 + lb t0, (t2) + li t1, '*' + beq t0, t1, .skip_empty_lines_comment + + goto .skip_empty_lines_end; + +.skip_empty_lines_comment: + la t0, source_code_position + lw t1, 0(sp) + sw t1, (t0) + _skip_comment(); + goto .skip_empty_lines_rerun; + +.skip_empty_lines_newline: + la t0, source_code_position + lw t1, 0(sp) + addi t1, t1, 1 + sw t1, (t0) + goto .skip_empty_lines_rerun; + +.skip_empty_lines_tab: + v0 := v0 + 1; + goto .skip_empty_lines_loop + +.skip_empty_lines_end: +end; + +proc _compile_global_initializer(); +begin + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '"' + beq t0, t1, .compile_global_initializer_string + + li t1, 'S' + beq t0, t1, .compile_global_initializer_record + + li t1, '@' + beq t0, t1, .compile_global_initializer_pointer + + la a0, source_code_position + lw a0, (a0) + lb a0, (a0) + _is_digit(); + bnez a0, .compile_global_initializer_number + + unimp + +.compile_global_initializer_pointer: + # Skip @. + _advance_token(1); + _write_z("\n\t.word \0"); + _read_token(); + _write_token(); + _advance_token(); + + goto .compile_global_initializer_end; + +.compile_global_initializer_number: + _write_z("\n\t.word \0"); + _read_token(); + _write_token(); + _advance_token(1); + + goto .compile_global_initializer_end; + +.compile_global_initializer_record: + # Skip "S(". + _advance_token(2); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, ')' + beq t0, t1, .compile_global_initializer_closing + +.compile_global_initializer_loop: + _compile_global_initializer(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, ')' + beq t0, t1, .compile_global_initializer_closing + + # Skip comma and whitespace after it. + _advance_token(2); + + goto .compile_global_initializer_loop; + +.compile_global_initializer_closing: + # Skip ")" + _advance_token(1); + + goto .compile_global_initializer_end; + +.compile_global_initializer_string: + _write_z("\n\t.word strings + \0"); + _string_length(source_code_position); + sw a0, 4(sp) + + _add_string(source_code_position); + _write_i(); + + # Skip the quoted string. + _advance_token(v4 + 2); + + goto .compile_global_initializer_end; + +.compile_global_initializer_end: +end; + +proc _compile_constant_declaration(); +begin + _read_token(); + sw a0, 0(sp) + + _write_z(".type \0"); + _write_token(v0); + _write_z(", @object\n\0"); + + _write_token(v0); + _write_c(':'); + + # Skip the constant name with assignment sign and surrounding whitespaces. + _advance_token(v0 + 4); + _compile_global_initializer(); + # Skip semicolon and newline. + _advance_token(2); + _write_c('\n'); +end; + +proc _compile_const_part(); +begin + _skip_empty_lines(); + + _memcmp(source_code_position, "const\0", 5); + bnez a0, .compile_const_part_end + + # Skip "const" with the newline after it. + _advance_token(6); + _write_z(".section .rodata # Compiled from const section.\n\n\0"); + +.compile_const_part_loop: + _skip_empty_lines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + # If the character at the line beginning is not indentation, + # it is probably the next code section. + li t1, '\t' + bne t0, t1, .compile_const_part_end + + _advance_token(1); + + _compile_constant_declaration(); + goto .compile_const_part_loop; + +.compile_const_part_end: +end; + +proc _compile_variable_declaration(); +begin + _read_token(); + sw a0, 0(sp) + + _write_z(".type \0"); + _write_token(v0); + _write_z(", @object\n\0"); + + _write_token(v0); + _write_c(':'); + + # Skip the variable name and colon with space before the type. + _advance_token(v0 + 2); + + # Skip the type name. + _read_token(); + _advance_token(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, ' ' + beq t0, t1, .compile_variable_declaration_initializer + + # Else we assume this is a zeroed 81920 bytes big array. + _write_z(" .zero 81920\0"); + goto .compile_variable_declaration_finalize; + +.compile_variable_declaration_initializer: + # Skip the assignment sign with surrounding whitespaces. + _advance_token(4); + _compile_global_initializer(); + goto .compile_variable_declaration_finalize; + +.compile_variable_declaration_finalize: + # Skip semicolon and newline. + _advance_token(2); + _write_c('\n'); +end; + +proc _compile_var_part(); +begin + _memcmp(source_code_position, "var\0", 3); + bnez a0, .compile_var_part_end + + # Skip "var" and newline. + _advance_token(4); + _write_z(".section .data\n\0"); + +.compile_var_part_loop: + _skip_empty_lines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '\t' + beq t0, t1, .compile_var_part_declaration + + goto .compile_var_part_end; + +.compile_var_part_declaration: + _advance_token(1); + _compile_variable_declaration(); + goto .compile_var_part_loop; + +.compile_var_part_end: +end; + +# Process the source code and print the generated code. +proc _compile_module(); +begin + _compile_const_part(); + _skip_empty_lines(); + _compile_var_part(); + + _write_z(".section .text\n\n\0"); + _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); + _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); + _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); + _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); + _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); + +.compile_module_loop: + _skip_empty_lines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_module_end + + # 5 is "proc " length. Space is needed to distinguish from "procedure". + _memcmp(source_code_position, "proc ", 5); + beqz a0, .compile_module_procedure + + # Not a known token, exit. + goto .compile_module_end; + +.compile_module_procedure: + _compile_procedure(); + + goto .compile_module_loop; + +.compile_module_end: +end; + +proc _compile(); +begin + _write_z(".globl _start\n\n\0"); + _compile_module(); + + _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); + _write_c('"'); + + la t0, compiler_strings + sw t0, 0(sp) + +.compile_loop: + lw t0, 0(sp) + la t1, compiler_strings_position + lw t1, (t1) + bge t0, t1, .compile_end + + lb a0, (t0) + + addi t0, t0, 1 + sw t0, 0(sp) + + _write_c(); + + j .compile_loop + +.compile_end: + _write_c('"'); + _write_c('\n'); +end; + +# Terminates the program. a0 contains the return code. +# +# Parameters: +# a0 - Status code. +proc _exit(); +begin + li a7, 93 # SYS_EXIT + ecall +end; + +# Inserts a symbol into the table. +# +# Parameters: +# a0 - Symbol pointer. +# a1 - Symbol name length. +# a2 - Symbol name pointer. +# a3 - Symbol table. +proc _symbol_table_enter(); +begin + # The first word in the symbol table is its length, get it. + lw a0, 76(sp) + lw a0, (a0) + sw a0, 0(sp) + + # Calculate the offset for the new symbol. + v4 := v0 * 4; + v4 := v4 + 4; + v4 := v76 + 4; + + _memcpy(v4, @v80, 12); + + # Increment the symbol table length. + v0 := v0 + 1; + lw t0, 0(sp) + lw t1, 76(sp) + sw t0, (t1) +end; + +proc _symbol_table_build(); +begin + _symbol_table_enter(@symbol_type_info_int, 3, symbol_builtin_name_int, @symbol_table_global); + _symbol_table_enter(@symbol_type_info_word, 4, symbol_builtin_name_word, @symbol_table_global); + _symbol_table_enter(@symbol_type_info_pointer, 7, symbol_builtin_name_pointer, @symbol_table_global); + _symbol_table_enter(@symbol_type_info_char, 4, symbol_builtin_name_char, @symbol_table_global); + _symbol_table_enter(@symbol_type_info_bool, 4, symbol_builtin_name_bool, @symbol_table_global); +end; + +# +# Classification table assigns each possible character to a group (class). All +# characters of the same group a handled equivalently. +# +# Classification: +# +# TransitionClass = ( +# transitionClassInvalid = 1, +# transitionClassDigit = 2, +# transitionClassAlpha = 3, +# transitionClassSpace = 4, +# transitionClassColon = 5, +# transitionClassEquals = 6, +# transitionClassLeftParen = 7, +# transitionClassRightParen = 8, +# transitionClassAsterisk = 9, +# transitionClassUnderscore = 10, +# transitionClassSingle = 11, +# transitionClassHex = 12, +# transitionClassZero = 13, +# transitionClassX = 14, +# transitionClassEof = 15, +# transitionClassDot = 16, +# transitionClassMinus = 17, +# transitionClassSingleQuote = 18, +# transitionClassDoubleQuote = 19, +# transitionClassGreater = 20, +# transitionClassLess = 21, +# transitionClassOther = 22 +# ); +# TransitionState = ( +# transitionStateStart = 1, +# transitionStateColon = 2, +# transitionStateIdentifier = 3, +# transitionStateDecimal = 4, +# transitionStateGreater = 5, +# transitionStateMinus = 6, +# transitionStateLeftParen = 7, +# transitionStateLess = 8, +# transitionStateDot = 9, +# transitionStateComment = 10, +# transitionStateClosingComment = 11, +# transitionStateCharacter = 12, +# transitionStateString = 13, +# transitionStateLeadingZero = 14, +# transitionStateDecimalSuffix = 15, +# transitionStateEnd = 16 +# ); +# Transition = record +# action: TransitionAction; +# next_state: TransitionState +# end; +# TransitionAction = ( +# none = 1, +# accumulate = 2, +# skip = 3, +# single = 4, +# eof = 5, +# finalize = 6, +# composite = 7, +# key_id = 8, +# integer = 9, +# delimited = 10 +# ); + +# Assigns some value to at array index. +# +# Parameters: +# a0 - Array pointer. +# a1 - Index (word offset into the array). +# a2 - Data to assign. +proc _assign_at(); +begin + v0 := v84 + -1; + v0 := v0 * 4; + v0 := v88 + v0; + + lw t0, 0(sp) + lw t1, 80(sp) + sw t1, (t0) +end; + +proc _initialize_classification(); +begin + _assign_at(@classification, 1, 15); + _assign_at(@classification, 2, 1); + _assign_at(@classification, 3, 1); + _assign_at(@classification, 4, 1); + _assign_at(@classification, 5, 1); + _assign_at(@classification, 6, 1); + _assign_at(@classification, 7, 1); + _assign_at(@classification, 8, 1); + _assign_at(@classification, 9, 1); + _assign_at(@classification, 10, 4); + _assign_at(@classification, 11, 4); + _assign_at(@classification, 12, 1); + _assign_at(@classification, 13, 1); + _assign_at(@classification, 14, 4); + _assign_at(@classification, 15, 1); + _assign_at(@classification, 16, 1); + _assign_at(@classification, 17, 1); + _assign_at(@classification, 18, 1); + _assign_at(@classification, 19, 1); + _assign_at(@classification, 20, 1); + _assign_at(@classification, 21, 1); + _assign_at(@classification, 22, 1); + _assign_at(@classification, 23, 1); + _assign_at(@classification, 24, 1); + _assign_at(@classification, 25, 1); + _assign_at(@classification, 26, 1); + _assign_at(@classification, 27, 1); + _assign_at(@classification, 28, 1); + _assign_at(@classification, 29, 1); + _assign_at(@classification, 30, 1); + _assign_at(@classification, 31, 1); + _assign_at(@classification, 32, 1); + _assign_at(@classification, 33, 4); + _assign_at(@classification, 34, 11); + _assign_at(@classification, 35, 19); + _assign_at(@classification, 36, 22); + _assign_at(@classification, 37, 22); + _assign_at(@classification, 38, 11); + _assign_at(@classification, 39, 11); + _assign_at(@classification, 40, 18); + _assign_at(@classification, 41, 7); + _assign_at(@classification, 42, 8); + _assign_at(@classification, 43, 9); + _assign_at(@classification, 44, 11); + _assign_at(@classification, 45, 11); + _assign_at(@classification, 46, 17); + _assign_at(@classification, 47, 16); + _assign_at(@classification, 48, 11); + _assign_at(@classification, 49, 13); + _assign_at(@classification, 50, 2); + _assign_at(@classification, 51, 2); + _assign_at(@classification, 52, 2); + _assign_at(@classification, 53, 2); + _assign_at(@classification, 54, 2); + _assign_at(@classification, 55, 2); + _assign_at(@classification, 56, 2); + _assign_at(@classification, 57, 2); + _assign_at(@classification, 58, 2); + _assign_at(@classification, 59, 5); + _assign_at(@classification, 60, 11); + _assign_at(@classification, 61, 21); + _assign_at(@classification, 62, 6); + _assign_at(@classification, 63, 20); + _assign_at(@classification, 64, 22); + _assign_at(@classification, 65, 11); + _assign_at(@classification, 66, 3); + _assign_at(@classification, 67, 3); + _assign_at(@classification, 68, 3); + _assign_at(@classification, 69, 3); + _assign_at(@classification, 70, 3); + _assign_at(@classification, 71, 3); + _assign_at(@classification, 72, 3); + _assign_at(@classification, 73, 3); + _assign_at(@classification, 74, 3); + _assign_at(@classification, 75, 3); + _assign_at(@classification, 76, 3); + _assign_at(@classification, 77, 3); + _assign_at(@classification, 78, 3); + _assign_at(@classification, 79, 3); + _assign_at(@classification, 80, 3); + _assign_at(@classification, 81, 3); + _assign_at(@classification, 82, 3); + _assign_at(@classification, 83, 3); + _assign_at(@classification, 84, 3); + _assign_at(@classification, 85, 3); + _assign_at(@classification, 86, 3); + _assign_at(@classification, 87, 3); + _assign_at(@classification, 88, 3); + _assign_at(@classification, 89, 3); + _assign_at(@classification, 90, 3); + _assign_at(@classification, 91, 3); + _assign_at(@classification, 92, 11); + _assign_at(@classification, 93, 22); + _assign_at(@classification, 94, 11); + _assign_at(@classification, 95, 11); + _assign_at(@classification, 96, 10); + _assign_at(@classification, 97, 22); + _assign_at(@classification, 98, 12); + _assign_at(@classification, 99, 12); + _assign_at(@classification, 100, 12); + _assign_at(@classification, 101, 12); + _assign_at(@classification, 102, 12); + _assign_at(@classification, 103, 12); + _assign_at(@classification, 104, 3); + _assign_at(@classification, 105, 3); + _assign_at(@classification, 106, 3); + _assign_at(@classification, 107, 3); + _assign_at(@classification, 108, 3); + _assign_at(@classification, 109, 3); + _assign_at(@classification, 110, 3); + _assign_at(@classification, 111, 3); + _assign_at(@classification, 112, 3); + _assign_at(@classification, 113, 3); + _assign_at(@classification, 114, 3); + _assign_at(@classification, 115, 3); + _assign_at(@classification, 116, 3); + _assign_at(@classification, 117, 3); + _assign_at(@classification, 118, 3); + _assign_at(@classification, 119, 3); + _assign_at(@classification, 120, 3); + _assign_at(@classification, 121, 14); + _assign_at(@classification, 122, 3); + _assign_at(@classification, 123, 3); + _assign_at(@classification, 124, 22); + _assign_at(@classification, 125, 11); + _assign_at(@classification, 126, 22); + _assign_at(@classification, 127, 11); + _assign_at(@classification, 128, 1); + + v0 := 129; + +# Set the remaining 129 - 256 bytes to transitionClassOther. +.initialize_classification_loop: + _assign_at(@classification, v0, 22); + v0 := v0 + 1; + + lw t0, 0(sp) + li t1, 257 + blt t0, t1, .initialize_classification_loop +end; + +# Parameters: +# a0 - Current state (first index into transitions table). +# a1 - Transition (second index into transitions table).. +# a2 - Action to assign. +# a3 - Next state to assign. +proc _set_transition(); +begin + # Transitions start at offset in classification array. Save the transitions start in v0. + la t0, classification + addi t0, t0, 256 + sw t0, 0(sp) + + # Each state is 8 bytes long (2 words: action and next state). + # There are 16 transition classes, so a transition 8 * 16 = 128 bytes long. + + v4 := v88 + -1; + v4 := v4 * 128; + + v8 := v84 + -1; + v8 := v8 * 8; + + v12 := v0 + v4; + v12 := v12 + v8; + + lw t0, 12(sp) + lw t1, 80(sp) + lw t2, 76(sp) + sw t1, (t0) + addi t0, t0, 4 + sw t2, (t0) +end; + +# Parameters: +# a0 - Current state (Transition state enumeration). +# a1 - Default action (Callback). +# a2 - Next state (Transition state enumeration). +proc _set_default_transition(); +begin + _set_transition(v88, 1, v84, v80); + _set_transition(v88, 2, v84, v80); + _set_transition(v88, 3, v84, v80); + _set_transition(v88, 4, v84, v80); + _set_transition(v88, 5, v84, v80); + _set_transition(v88, 6, v84, v80); + _set_transition(v88, 7, v84, v80); + _set_transition(v88, 8, v84, v80); + _set_transition(v88, 9, v84, v80); + _set_transition(v88, 10, v84, v80); + _set_transition(v88, 11, v84, v80); + _set_transition(v88, 12, v84, v80); + _set_transition(v88, 13, v84, v80); + _set_transition(v88, 14, v84, v80); + _set_transition(v88, 15, v84, v80); + _set_transition(v88, 16, v84, v80); + _set_transition(v88, 17, v84, v80); + _set_transition(v88, 18, v84, v80); + _set_transition(v88, 19, v84, v80); + _set_transition(v88, 20, v84, v80); + _set_transition(v88, 21, v84, v80); + _set_transition(v88, 22, v84, v80); +end; + + +# The transition table describes transitions from one state to another, given +# a symbol (character class). +# +# The table has m rows and n columns, where m is the amount of states and n is +# the amount of classes. So given the current state and a classified character +# the table can be used to look up the next state. +# +# Each cell is a word long. +# - The least significant byte of the word is a row number (beginning with 0). +# It specifies the target state. "ff" means that this is an end state and no +# transition is possible. +# - The next byte is the action that should be performed when transitioning. +# For the meaning of actions see labels in the lex_next function, which +# handles each action. +proc _initialize_transitions(); +begin + # Start state. + _set_transition(1, 1, 1, 16); + _set_transition(1, 2, 2, 4); + _set_transition(1, 3, 2, 3); + _set_transition(1, 4, 3, 1); + _set_transition(1, 5, 2, 5); + _set_transition(1, 6, 4, 16); + _set_transition(1, 7, 2, 7); + _set_transition(1, 8, 4, 16); + _set_transition(1, 9, 4, 16); + _set_transition(1, 10, 2, 3); + _set_transition(1, 11, 4, 16); + _set_transition(1, 12, 2, 3); + _set_transition(1, 13, 2, 14); + _set_transition(1, 14, 2, 3); + _set_transition(1, 15, 5, 16); + _set_transition(1, 16, 2, 9); + _set_transition(1, 17, 2, 6); + _set_transition(1, 18, 2, 12); + _set_transition(1, 19, 2, 13); + _set_transition(1, 20, 2, 5); + _set_transition(1, 21, 2, 8); + _set_transition(1, 22, 1, 16); + + # Colon state. + _set_default_transition(2, 6, 16); + _set_transition(2, 6, 7, 16); + + # Identifier state. + _set_default_transition(3, 8, 16); + _set_transition(3, 2, 2, 3); + _set_transition(3, 3, 2, 3); + _set_transition(3, 10, 2, 3); + _set_transition(3, 12, 2, 3); + _set_transition(3, 13, 2, 3); + _set_transition(3, 14, 2, 3); + + # Decimal state. + _set_default_transition(4, 9, 16); + _set_transition(4, 2, 2, 4); + _set_transition(4, 3, 2, 15); + _set_transition(4, 10, 1, 16); + _set_transition(4, 12, 2, 15); + _set_transition(4, 13, 2, 4); + _set_transition(4, 14, 2, 15); + + # Greater state. + _set_default_transition(5, 6, 16); + _set_transition(5, 6, 7, 16); + + # Minus state. + _set_default_transition(6, 6, 16); + _set_transition(6, 20, 7, 16); + + # Left paren state. + _set_default_transition(7, 6, 16); + _set_transition(7, 9, 2, 10); + + # Less state. + _set_default_transition(8, 6, 16); + _set_transition(8, 6, 7, 16); + _set_transition(8, 20, 7, 16); + + # Hexadecimal after 0x. + _set_default_transition(9, 6, 16); + _set_transition(9, 16, 7, 16); + + # Comment. + _set_default_transition(10, 2, 10); + _set_transition(10, 9, 2, 11); + _set_transition(10, 15, 1, 16); + + # Closing comment. + _set_default_transition(11, 2, 10); + _set_transition(11, 1, 1, 16); + _set_transition(11, 8, 10, 16); + _set_transition(11, 9, 2, 11); + _set_transition(11, 15, 1, 16); + + # Character. + _set_default_transition(12, 2, 12); + _set_transition(12, 1, 1, 16); + _set_transition(12, 15, 1, 16); + _set_transition(12, 18, 10, 16); + + # String. + _set_default_transition(13, 2, 13); + _set_transition(13, 1, 1, 16); + _set_transition(13, 15, 1, 16); + _set_transition(13, 19, 10, 16); + + # Leading zero. + _set_default_transition(14, 9, 16); + _set_transition(14, 2, 1, 16); + _set_transition(14, 3, 1, 16); + _set_transition(14, 10, 1, 16); + _set_transition(14, 12, 1, 16); + _set_transition(14, 13, 1, 16); + _set_transition(14, 14, 1, 16); + + # Digit with a character suffix. + _set_default_transition(15, 9, 16); + _set_transition(15, 3, 1, 16); + _set_transition(15, 2, 1, 16); + _set_transition(15, 12, 1, 16); + _set_transition(15, 13, 1, 16); + _set_transition(15, 14, 1, 16); +end; + +proc _lexer_get_state(); +begin + # Lexer state is saved after the transition tables. The offset is 256 + 16 * 22. + v0 := @classification; + v4 := 16 * 22; + v0 := v0 + 256; + + return v0 + v4 +end; + +# Gets pointer to the current source text. +proc _lexer_get_current(); +begin + _lexer_get_state(); + sw a0, 0(sp) + + return v0 + 4 +end; + +# Resets the lexer state for reading the next token. +proc _lexer_reset(); +begin + # Transition start state is 1. + _lexer_get_state(); + li t0, 1 + sw t0, (a0) + sw a0, 0(sp) + + # Text pointer to the beginning of the currently read token. + _lexer_get_current(); + la t0, source_code_position + lw t0, (t0) + sw t0, (a0) + + # Initial length of the token is 0. + addi t0, t0, 4 + sw zero, (t0) +end; + +# One time lexer initialization. +proc _lexer_initialize(); +begin + _initialize_classification(); + _initialize_transitions(); +end; + +# Entry point. +proc _start(); +begin + _lexer_initialize(); + _symbol_table_build(); + + # Read the source from the standard input. + # Second argument is buffer size. Modifying update the source_code definition. + _read_file(@source_code, 81920); + _compile(); + + _exit(0); +end; diff --git a/boot/stage9.elna b/boot/stage9.elna deleted file mode 100644 index 21d87ef..0000000 --- a/boot/stage9.elna +++ /dev/null @@ -1,1993 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, *) -(* v. 2.0. If a copy of the MPL was not distributed with this file, You can *) -(* obtain one at https://mozilla.org/MPL/2.0/. *) - -(* Stage 9 compiler. *) - -(* - Procedure calls in expressions. *) -(* - Comments between (* and *) are supported. These are still single line *) -(* comments and they should be on a separate line. *) -(* - if-else statements. *) -const - symbol_builtin_name_int := "Int"; - symbol_builtin_name_word := "Word"; - symbol_builtin_name_pointer := "Pointer"; - symbol_builtin_name_char := "Char"; - symbol_builtin_name_bool := "Bool"; - - (* Every type info starts with a word describing what type it is. *) - - (* PRIMITIVE_TYPE = 1 *) - - (* Primitive types have only type size. *) - symbol_builtin_type_int := S(1, 4); - symbol_builtin_type_word := S(1, 4); - symbol_builtin_type_pointer := S(1, 4); - symbol_builtin_type_char := S(1, 1); - symbol_builtin_type_bool := S(1, 1); - - (* Info objects start with a word describing its type. *) - - (* INFO_TYPE = 1 *) - - (* Type info has the type it belongs to. *) - symbol_type_info_int := S(1, @symbol_builtin_type_int); - symbol_type_info_word := S(1, @symbol_builtin_type_word); - symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); - symbol_type_info_char := S(1, @symbol_builtin_type_char); - symbol_type_info_bool := S(1, @symbol_builtin_type_bool); - -var - source_code: Array; - compiler_strings: Array; - symbol_table_global: Array; - symbol_table_local: Array; - classification: Array; - - compiler_strings_position: Pointer := @compiler_strings; - compiler_strings_length: Word := 0; - label_counter: Word := 0; - source_code_position: Pointer := @source_code; - -(* Calculates and returns the string token length between quotes, including the *) -(* escaping slash characters. *) - -(* Parameters: *) -(* a0 - String token pointer. *) - -(* Returns the length in a0. *) -proc _string_length(); -begin - (* Reset the counter. *) - v0 := 0; - -.string_length_loop: - v88 := v88 + 1; - - lw t0, 88(sp) - lb t0, (t0) - - li t1, '"' - beq t0, t1, .string_length_end - - v0 := v0 + 1; - goto .string_length_loop; - -.string_length_end: - return v0 -end; - -(* Adds a string to the global, read-only string storage. *) - -(* Parameters: *) -(* a0 - String token. *) - -(* Returns the offset from the beginning of the storage to the new string in a0. *) -proc _add_string(); -begin - v0 := v88 + 1; - v4 := compiler_strings_length; - -.add_string_loop: - lw t0, 0(sp) - lb t1, (t0) - li t2, '"' - - beq t1, t2, .add_string_end - - v8 := _load_byte(v0); - _store_byte(v8, compiler_strings_position); - _store_word(compiler_strings_position + 1, @compiler_strings_position); - v0 := v0 + 1; - - lb t1, 8(sp) - li t2, '\\' - bne t1, t2, .add_string_increment - - goto .add_string_loop; - -.add_string_increment: - la t2, compiler_strings_length - lw t4, (t2) - addi t4, t4, 1 - sw t4, (t2) - - goto .add_string_loop; - -.add_string_end: - return v4 -end; - -(* Reads standard input into a buffer. *) -(* a0 - Buffer pointer. *) -(* a1 - Buffer size. *) - -(* Returns the amount of bytes written in a0. *) -proc _read_file(); -begin - _syscall(0, v88, v84, 0, 0, 0, 63); -end; - -(* Writes to the standard output. *) - -(* Parameters: *) -(* a0 - Buffer. *) -(* a1 - Buffer length. *) -proc _write_s(); -begin - _syscall(1, v88, v84, 0, 0, 0, 64); -end; - -(* Writes a number to a string buffer. *) - -(* t0 - Local buffer. *) -(* t1 - Constant 10. *) -(* t2 - Current character. *) -(* t3 - Whether the number is negative. *) - -(* Parameters: *) -(* a0 - Whole number. *) -(* a1 - Buffer pointer. *) - -(* Sets a0 to the length of the written number. *) -proc _print_i(); -begin - li t1, 10 - addi t0, s0, -9 - - li t3, 0 - bgez a0, .print_i_digit10 - li t3, 1 - neg a0, a0 - -.print_i_digit10: - rem t2, a0, t1 - addi t2, t2, '0' - sb t2, 0(t0) - div a0, a0, t1 - addi t0, t0, -1 - bne zero, a0, .print_i_digit10 - - beq zero, t3, .print_i_write_call - addi t2, zero, '-' - sb t2, 0(t0) - addi t0, t0, -1 - -.print_i_write_call: - mv a0, a1 - addi a1, t0, 1 - sub a2, s0, t0 - addi a2, a2, -9 - sw a2, 0(sp) - - _memcpy(); - - return v0 -end; - -(* Writes a number to the standard output. *) - -(* Parameters: *) -(* a0 - Whole number. *) -proc _write_i(); -begin - v4 := _print_i(v88, @v0); - _write_s(@v0, v4); -end; - -(* Writes a character from a0 into the standard output. *) -proc _write_c(); -begin - _write_s(@v88, 1); -end; - -(* Write null terminated string. *) - -(* Parameters: *) -(* a0 - String. *) -proc _write_z(); -begin -.write_z_loop: - (* Check for 0 character. *) - lw a0, 88(sp) - lb a0, (a0) - beqz a0, .write_z_end - - (* Print a character. *) - _write_c(); - - (* Advance the input string by one byte. *) - v88 := v88 + 1; - - goto .write_z_loop; - -.write_z_end: -end; - -(* Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. *) -proc _is_upper(); -begin - v0 := v88 >= 'A'; - v4 := v88 <= 'Z'; - - return v0 & v4 - -end; - -(* Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. *) -proc _is_lower(); -begin - v0 := v88 >= 'a'; - v4 := v88 <= 'z'; - - return v0 & v4 - -end; - -(* Detects if the passed character is a 7-bit alpha character or an underscore. *) - -(* Paramters: *) -(* a0 - Tested character. *) - -(* Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. *) -proc _is_alpha(); -begin - v0 := _is_upper(v88); - v4 := _is_lower(v88); - v8 := v88 = '_'; - - v12 := v0 or v4; - return v12 or v8 -end; - -(* Detects whether the passed character is a digit *) -(* (a value between 0 and 9). *) - -(* Parameters: *) -(* a0 - Exemined value. *) - -(* Sets a0 to 1 if it is a digit, to 0 otherwise. *) -proc _is_digit(); -begin - v0 := v88 >= '0'; - v4 := v88 <= '9'; - - return v0 & v4 -end; - -proc _is_alnum(); -begin - v0 := _is_alpha(v88); - v4 := _is_digit(v88); - - return v0 or v4 -end; - -(* Reads the next token. *) - -(* Returns token length in a0. *) -proc _read_token(); -begin - (* Current token position. *) - v0 := source_code_position; - (* Token length. *) - v4 := 0; - -.read_token_loop: - lw t0, 0(sp) - (* Current character. *) - lb t0, (t0) - - (* First we try to read a derictive. *) - (* A derictive can contain a dot and characters. *) - li t1, '.' - beq t0, t1, .read_token_next - - v8 := _load_byte(v0); - _is_alnum(v8); - bnez a0, .read_token_next - - goto .read_token_end; - -.read_token_next: - (* Advance the source code position and token length. *) - v4 := v4 + 1; - v0 := v0 + 1; - - goto .read_token_loop; - -.read_token_end: - return v4 -end; - -(* a0 - First pointer. *) -(* a1 - Second pointer. *) -(* a2 - The length to compare. *) - -(* Returns 0 in a0 if memory regions are equal. *) -proc _memcmp(); -begin - mv t0, a0 - li a0, 0 - -.memcmp_loop: - beqz a2, .memcmp_end - - lbu t1, (t0) - lbu t2, (a1) - sub a0, t1, t2 - - bnez a0, .memcmp_end - - addi t0, t0, 1 - addi a1, a1, 1 - addi a2, a2, -1 - - goto .memcmp_loop; - -.memcmp_end: -end; - -(* Copies memory. *) - -(* Parameters: *) -(* a0 - Destination. *) -(* a1 - Source. *) -(* a2 - Size. *) - -(* Preserves a0. *) -proc _memcpy(); -begin -.memcpy_loop: - beqz a2, .memcpy_end - - lbu t1, (a1) - sb t1, (a0) - - addi a0, a0, 1 - addi a1, a1, 1 - addi a2, a2, -1 - - goto .memcpy_loop - -.memcpy_end: - return v88 -end; - -(* Advances the token stream by a0 bytes. *) -proc _advance_token(); -begin - la t0, source_code_position - lw t1, (t0) - add t1, t1, a0 - sw t1, (t0) -end; - -(* Prints the current token. *) - -(* Parameters: *) -(* a0 - Token length. *) - -(* Returns a0 unchanged. *) -proc _write_token(); -begin - _write_s(source_code_position, v88); - return v88 -end; - -(* Prints and skips a line. *) -proc _compile_line(); -begin -.compile_line_loop: - la a0, source_code_position - lw a1, (a0) - - lb t0, (a1) - li t1, '\n' - beq t0, t1, .compile_line_end - - (* Print a character. *) - lw a0, (a1) - _write_c(); - - (* Advance the input string by one byte. *) - _advance_token(1); - - goto .compile_line_loop; - -.compile_line_end: - _write_c('\n'); - - _advance_token(1); -end; - -proc _compile_integer_literal(); -begin - _write_z("\tli t0, \0"); - - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - - _write_c('\n'); -end; - -proc _compile_character_literal(); -begin - _write_z("\tli t0, \0"); - - _write_c('\''); - _advance_token(1); - - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - li t1, '\\' - bne a0, t1, .compile_character_literal_end - - _write_c('\\'); - _advance_token(1); - -.compile_character_literal_end: - v0 := _load_byte(source_code_position); - _write_c(v0); - - _write_c('\''); - _write_c('\n'); - - _advance_token(2); -end; - -proc _compile_variable_expression(); -begin - _compile_designator(); - _write_z("\tlw t0, (t0)\n\0"); -end; - -proc _compile_address_expression(); -begin - (* Skip the "@" sign. *) - _advance_token(1); - _compile_designator(); -end; - -proc _compile_negate_expression(); -begin - (* Skip the "-" sign. *) - _advance_token(1); - _compile_term(); - - _write_z("\tneg t0, t0\n\0"); -end; - -proc _compile_not_expression(); -begin - (* Skip the "~" sign. *) - _advance_token(1); - _compile_term(); - - _write_z("\tnot t0, t0\n\0"); -end; - -proc _compile_string_literal(); -begin - v0 := _string_length(source_code_position); - v4 := _add_string(source_code_position); - - _advance_token(v0 + 2); - _write_z("\tla t0, strings\n\0"); - - _write_z("\tli t1, \0"); - _write_i(v4); - _write_c('\n'); - - _write_z("\tadd t0, t0, t1\n\0"); -end; - -proc _compile_term(); -begin - v0 := _load_byte(source_code_position); - lb a0, 0(sp) - - li t1, '\'' - beq a0, t1, .compile_term_character_literal - - li t1, '@' - beq a0, t1, .compile_term_address - - li t1, '-' - beq a0, t1, .compile_term_negation - - li t1, '~' - beq a0, t1, .compile_term_not - - li t1, '"' - beq a0, t1, .compile_term_string_literal - - li t1, '_' - beq a0, t1, .compile_term_call - - _is_digit(v0); - bnez a0, .compile_term_integer_literal - - goto .compile_term_variable; - -.compile_term_character_literal: - _compile_character_literal(); - goto .compile_term_end; - -.compile_term_integer_literal: - _compile_integer_literal(); - goto .compile_term_end; - -.compile_term_address: - _compile_address_expression(); - goto .compile_term_end; - -.compile_term_negation: - _compile_negate_expression(); - goto .compile_term_end; - -.compile_term_not: - _compile_not_expression(); - goto .compile_term_end; - -.compile_term_string_literal: - _compile_string_literal(); - goto .compile_term_end; - -.compile_term_call: - _compile_call(); - _write_z("\nmv t0, a0\n\0"); - goto .compile_term_end; - -.compile_term_variable: - _compile_variable_expression(); - goto .compile_term_end; - -.compile_term_end: -end; - -proc _compile_binary_rhs(); -begin - (* Skip the whitespace after the binary operator. *) - _advance_token(1); - _compile_term(); - - (* Load the left expression from the stack; *) - _write_z("\tlw t1, 24(sp)\n\0"); -end; - -proc _compile_expression(); -begin - _compile_term(); - - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - - li t1, ' ' - bne a0, t1, .compile_expression_end - - (* It is a binary expression. *) - - (* Save the value of the left expression on the stack. *) - _write_z("sw t0, 24(sp)\n\0"); - - (* Skip surrounding whitespace in front of the operator. *) - _advance_token(1); - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '+' - beq t0, t1, .compile_expression_add - - li t1, '*' - beq t0, t1, .compile_expression_mul - - li t1, '&' - beq t0, t1, .compile_expression_and - - li t1, 'o' - beq t0, t1, .compile_expression_or - - li t1, 'x' - beq t0, t1, .compile_expression_xor - - li t1, '=' - beq t0, t1, .compile_expression_equals - - li t1, '<' - beq t0, t1, .compile_expression_less - - li t1, '>' - beq t0, t1, .compile_expression_greater - - (* Unknown binary operator. *) - unimp - -.compile_expression_add: - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("add t0, t0, t1\n\0"); - - goto .compile_expression_end; - -.compile_expression_mul: - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tmul t0, t0, t1\n\0"); - - goto .compile_expression_end; - -.compile_expression_and: - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tand t0, t0, t1\n\0"); - - goto .compile_expression_end; - -.compile_expression_or: - _advance_token(2); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("or t0, t0, t1\n\0"); - - goto .compile_expression_end; - -.compile_expression_xor: - _advance_token(3); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("xor t0, t0, t1\n\0"); - - goto .compile_expression_end; - -.compile_expression_equals: - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("xor t0, t0, t1\nseqz t0, t0\n\0"); - - goto .compile_expression_end; - -.compile_expression_less: - _advance_token(1); - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '>' - beq t0, t1, .compile_expression_not_equal - - li t1, '=' - beq t0, t1, .compile_expression_less_equal - - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("slt t0, t1, t0\n\0"); - - goto .compile_expression_end; - -.compile_expression_not_equal: - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0"); - - goto .compile_expression_end; - -.compile_expression_less_equal: - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0"); - - goto .compile_expression_end; - -.compile_expression_greater: - _advance_token(1); - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '=' - beq t0, t1, .compile_expression_greater_equal - - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\n\0"); - - goto .compile_expression_end; - -.compile_expression_greater_equal: - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0"); - - goto .compile_expression_end; - -.compile_expression_end: -end; - -proc _compile_call(); -begin - (* Stack variables: *) - (* v0 - Procedure name length. *) - (* v4 - Procedure name pointer. *) - (* v8 - Argument count. *) - - v0 := _read_token(); - v4 := source_code_position; - v8 := 0; - - (* Skip the identifier and left paren. *) - _advance_token(v0 + 1); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, ')' - beq t0, t1, .compile_call_finalize - -.compile_call_loop: - _compile_expression(); - - (* Save the argument on the stack. *) - _write_z("\tsw t0, \0"); - - (* Calculate the stack offset: 116 - (4 * argument_counter) *) - v12 := v8 * 4; - v12 := 116 + -v12; - _write_i(v12); - - _write_z("(sp)\n\0"); - - (* Add one to the argument counter. *) - v8 := v8 + 1; - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, ',' - bne t0, t1, .compile_call_finalize - - _advance_token(2); - goto .compile_call_loop; - -.compile_call_finalize: - (* Load the argument from the stack. *) - - lw t0, 8(sp) - beqz t0, .compile_call_end - - (* Decrement the argument counter. *) - v8 := v8 + -1; - - _write_z("\tlw a\0"); - _write_i(v8); - - _write_z(", \0"); - - (* Calculate the stack offset: 116 - (4 * argument_counter) *) - v12 := v8 * 4; - v12 := 116 + -v12; - _write_i(v12); - - _write_z("(sp)\n\0"); - - goto .compile_call_finalize; - -.compile_call_end: - _write_z("\tcall \0"); - _write_s(v4, v0); - - (* Skip the right paren. *) - _advance_token(1); -end; - -proc _compile_goto(); -begin - _advance_token(5); - - v0 := _read_token(); - _write_z("\tj \0"); - - _write_token(v0); - _advance_token(); -end; - -proc _compile_local_designator(); -begin - (* Skip "v" in the local variable name. *) - _advance_token(1); - _write_z("\t addi t0, sp, \0"); - - (* Read local variable stack offset and save it. *) - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - _write_c('\n'); -end; - -proc _compile_global_designator(); -begin - _write_z("\tla t0, \0"); - - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - - _write_c('\n'); -end; - -proc _compile_designator(); -begin - la t0, source_code_position - lw t0, (t0) - lb a0, (t0) - - li t1, 'v' - beq a0, t1, .compile_designator_local - - goto .compile_designator_global; - -.compile_designator_local: - _compile_local_designator(); - goto .compile_designator_end; - -.compile_designator_global: - _compile_global_designator(); - goto .compile_designator_end; - -.compile_designator_end: -end; - -proc _compile_assignment(); -begin - _compile_designator(); - - (* Save the assignee address on the stack. *) - _write_z("\tsw t0, 20(sp)\n\0"); - - (* Skip the assignment sign (:=) with surrounding whitespaces. *) - _advance_token(4); - - (* Compile the assignment. *) - _compile_expression(); - - _write_z("\tlw t1, 20(sp)\nsw t0, (t1)\n\0"); -end; - -proc _compile_return_statement(); -begin - (* Skip "return" keyword and whitespace after it. *) - _advance_token(7); - _compile_expression(); - - _write_z("mv a0, t0\n\0"); -end; - -(* Writes a label, .Ln, where n is a unique number. *) - -(* Parameters: *) -(* a0 - Label counter. *) -proc _write_label(); -begin - _write_z(".L\0"); - _write_i(v88); -end; - -proc _compile_if(); -begin - (* Skip "if ". *) - _advance_token(3); - (* Compile condition. *) - _compile_expression(); - (* Skip " then" with newline. *) - _advance_token(6); - - (* v0 is the label after the if statement. *) - v0 := label_counter; - _store_word(label_counter + 1, @label_counter); - (* v4 is the label in front of the next elsif condition or end. *) - v4 := label_counter; - _store_word(label_counter + 1, @label_counter); - - _write_z("\tbeqz t0, \0"); - _write_label(v4); - _write_c('\n'); - - _compile_procedure_body(); - - _write_z("\tj \0"); - _write_label(v0); - _write_c('\n'); - - _write_label(v4); - _write_z(":\n\0"); - - _memcmp(source_code_position, "end", 3); - beqz a0, .compile_if_end - - _memcmp(source_code_position, "else", 3); - beqz a0, .compile_if_else - -.compile_if_else: - (* Skip "else" and newline. *) - _advance_token(5); - _compile_procedure_body(); - -.compile_if_end: - (* Skip "end". *) - _advance_token(3); - - _write_label(v0); - _write_z(":\n\0"); -end; - -proc _compile_statement(); -begin - _skip_spaces(); - (* This is a call if the statement starts with an underscore. *) - la t0, source_code_position - lw t0, (t0) - (* First character after alignment tab. *) - (* addi t0, t0, 1 *) - lb t0, (t0) - - li t1, '_' - beq t0, t1, .compile_statement_call - - li t1, 'g' - beq t0, t1, .compile_statement_goto - - li t1, 'v' - beq t0, t1, .compile_statement_assignment - - li t1, 'i' - beq t0, t1, .compile_statement_if - - (* keyword_ret contains "\tret", so it's 4 bytes long. *) - _memcmp(source_code_position, "return", 6); - beqz a0, .compile_statement_return - - _compile_line(); - goto .compile_statement_end; - -.compile_statement_call: - (* _advance_token(1); *) - _compile_call(); - - goto .compile_statement_semicolon; - -.compile_statement_goto: - (* _advance_token(1); *) - _compile_goto(); - - goto .compile_statement_semicolon; - -.compile_statement_assignment: - (* _advance_token(1); *) - _compile_assignment(); - - goto .compile_statement_semicolon; - -.compile_statement_if: - (* _advance_token(1); *) - _compile_if(); - - goto .compile_statement_semicolon; - -.compile_statement_return: - (* _advance_token(1); *) - _compile_return_statement(); - _write_c('\n'); - - goto .compile_statement_end; - -.compile_statement_semicolon: - _advance_token(2); - _write_c('\n'); - -.compile_statement_end: -end; - -proc _compile_procedure_body(); -begin -.compile_procedure_body_loop: - _skip_empty_lines(); - _skip_spaces(); - - _memcmp(source_code_position, "end", 3); - beqz a0, .compile_procedure_body_epilogue - - _memcmp(source_code_position, "else", 4); - beqz a0, .compile_procedure_body_epilogue - - _compile_statement(); - goto .compile_procedure_body_loop; - -.compile_procedure_body_epilogue: -end; - -(* Writes a regster name to the standard output. *) - -(* Parameters: *) -(* a0 - Register character. *) -(* a1 - Register number. *) -proc _write_register(); -begin - _write_c(v88); - v84 := v84 + '0'; - _write_c(v84); -end; - -proc _compile_procedure_prologue(); -begin - _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); - v0 := 0; - -.compile_procedure_prologue_loop: - _write_z("\tsw a\0"); - _write_i(v0); - _write_z(", \0"); - - (* Calculate the stack offset: 88 - (4 * parameter_counter) *) - v4 := v0 * 4; - v4 := 88 + -v4; - _write_i(v4); - - _write_z("(sp)\n\0"); - - v0 := v0 + 1; - lw a0, 0(sp) - - li t0, 8 - bne a0, t0, .compile_procedure_prologue_loop -end; - -proc _compile_procedure(); -begin - (* Skip "proc ". *) - _advance_token(5); - - (* Save the procedure name length. *) - v0 := _read_token(); - - (* Write .type _procedure_name, @function. *) - _write_z(".type \0"); - - _write_token(v0); - _write_z(", @function\n\0"); - - (* Write procedure label, _procedure_name: *) - _write_token(v0); - _write_z(":\n\0"); - - (* Skip the function name and trailing parens, semicolon, "begin" and newline. *) - _advance_token(v0 + 10); - - _compile_procedure_prologue(); - _compile_procedure_body(); - - (* Write the epilogue. *) - _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); - - (* Skip the "end" keyword, semicolon and newline. *) - _advance_token(5); -end; - -proc _skip_spaces(); -begin - (* Skip newlines. *) - la t0, source_code_position - lw t1, (t0) - -.skip_spaces_loop: - lb t2, (t1) - li t3, '\t' - bne t2, t3, .skip_spaces_end - beqz t2, .skip_spaces_end - - addi t1, t1, 1 - sw t1, (t0) - - goto .skip_spaces_loop; - -.skip_spaces_end: -end; - -(* Prints and skips a line. *) -proc _skip_comment(); -begin - la t0, source_code_position - lw t1, (t0) - -.skip_comment_loop: - (* Check for newline character. *) - lb t2, (t1) - li t3, '\n' - beq t2, t3, .skip_comment_end - - (* Advance the input string by one byte. *) - addi t1, t1, 1 - sw t1, (t0) - - goto .skip_comment_loop; - -.skip_comment_end: - (* Skip the newline. *) - addi t1, t1, 1 - sw t1, (t0) -end; - -(* Skip newlines and comments. *) -proc _skip_empty_lines(); -begin -.skip_empty_lines_rerun: - la t0, source_code_position - lw t0, (t0) - sw t0, 0(sp) - -.skip_empty_lines_loop: - lw t2, 0(sp) - lb t0, (t2) - - li t1, '\n' - beq t0, t1, .skip_empty_lines_newline - - li t1, '\t' - beq t0, t1, .skip_empty_lines_tab - - li t1, '(' - bne t0, t1, .skip_empty_lines_end - addi t2, t2, 1 - lb t0, (t2) - li t1, '*' - beq t0, t1, .skip_empty_lines_comment - - goto .skip_empty_lines_end; - -.skip_empty_lines_comment: - la t0, source_code_position - lw t1, 0(sp) - sw t1, (t0) - _skip_comment(); - goto .skip_empty_lines_rerun; - -.skip_empty_lines_newline: - la t0, source_code_position - lw t1, 0(sp) - addi t1, t1, 1 - sw t1, (t0) - goto .skip_empty_lines_rerun; - -.skip_empty_lines_tab: - v0 := v0 + 1; - goto .skip_empty_lines_loop - -.skip_empty_lines_end: -end; - -proc _compile_global_initializer(); -begin - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '"' - beq t0, t1, .compile_global_initializer_string - - li t1, 'S' - beq t0, t1, .compile_global_initializer_record - - li t1, '@' - beq t0, t1, .compile_global_initializer_pointer - - v0 := _load_byte(source_code_position); - _is_digit(v0); - bnez a0, .compile_global_initializer_number - - unimp - -.compile_global_initializer_pointer: - (* Skip @. *) - _advance_token(1); - _write_z("\n\t.word \0"); - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - - goto .compile_global_initializer_end; - -.compile_global_initializer_number: - _write_z("\n\t.word \0"); - v0 := _read_token(); - _write_token(v0); - _advance_token(1); - - goto .compile_global_initializer_end; - -.compile_global_initializer_record: - (* Skip "S(". *) - _advance_token(2); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - li t1, ')' - beq t0, t1, .compile_global_initializer_closing - -.compile_global_initializer_loop: - _compile_global_initializer(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - li t1, ')' - beq t0, t1, .compile_global_initializer_closing - - (* Skip comma and whitespace after it. *) - _advance_token(2); - - goto .compile_global_initializer_loop; - -.compile_global_initializer_closing: - (* Skip ")" *) - _advance_token(1); - - goto .compile_global_initializer_end; - -.compile_global_initializer_string: - _write_z("\n\t.word strings + \0"); - v4 := _string_length(source_code_position); - - _add_string(source_code_position); - _write_i(); - - (* Skip the quoted string. *) - _advance_token(v4 + 2); - - goto .compile_global_initializer_end; - -.compile_global_initializer_end: -end; - -proc _compile_constant_declaration(); -begin - _read_token(); - sw a0, 0(sp) - - _write_z(".type \0"); - _write_token(v0); - _write_z(", @object\n\0"); - - _write_token(v0); - _write_c(':'); - - (* Skip the constant name with assignment sign and surrounding whitespaces. *) - _advance_token(v0 + 4); - _compile_global_initializer(); - (* Skip semicolon and newline. *) - _advance_token(2); - _write_c('\n'); -end; - -proc _compile_const_part(); -begin - _skip_empty_lines(); - - _memcmp(source_code_position, "const\0", 5); - bnez a0, .compile_const_part_end - - (* Skip "const" with the newline after it. *) - _advance_token(6); - _write_z(".section .rodata # Compiled from const section.\n\n\0"); - -.compile_const_part_loop: - _skip_empty_lines(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - (* If the character at the line beginning is not indentation, *) - (* it is probably the next code section. *) - li t1, '\t' - bne t0, t1, .compile_const_part_end - - _advance_token(1); - - _compile_constant_declaration(); - goto .compile_const_part_loop; - -.compile_const_part_end: -end; - -proc _compile_variable_declaration(); -begin - v0 := _read_token(); - - _write_z(".type \0"); - _write_token(v0); - _write_z(", @object\n\0"); - - _write_token(v0); - _write_c(':'); - - (* Skip the variable name and colon with space before the type. *) - _advance_token(v0 + 2); - - (* Skip the type name. *) - v4 := _read_token(); - _advance_token(v4); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - li t1, ' ' - beq t0, t1, .compile_variable_declaration_initializer - - (* Else we assume this is a zeroed 81920 bytes big array. *) - _write_z(" .zero 81920\0"); - goto .compile_variable_declaration_finalize; - -.compile_variable_declaration_initializer: - (* Skip the assignment sign with surrounding whitespaces. *) - _advance_token(4); - _compile_global_initializer(); - goto .compile_variable_declaration_finalize; - -.compile_variable_declaration_finalize: - (* Skip semicolon and newline. *) - _advance_token(2); - _write_c('\n'); -end; - -proc _compile_var_part(); -begin - _memcmp(source_code_position, "var\0", 3); - bnez a0, .compile_var_part_end - - (* Skip "var" and newline. *) - _advance_token(4); - _write_z(".section .data\n\0"); - -.compile_var_part_loop: - _skip_empty_lines(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - - li t1, '\t' - beq t0, t1, .compile_var_part_declaration - - goto .compile_var_part_end; - -.compile_var_part_declaration: - _advance_token(1); - _compile_variable_declaration(); - goto .compile_var_part_loop; - -.compile_var_part_end: -end; - -(* Process the source code and print the generated code. *) -proc _compile_module(); -begin - _compile_const_part(); - _skip_empty_lines(); - _compile_var_part(); - - _write_z(".section .text\n\n\0"); - _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); - _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); - _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); - _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); - _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); - -.compile_module_loop: - _skip_empty_lines(); - - la t0, source_code_position - lw t0, (t0) - lb t0, (t0) - beqz t0, .compile_module_end - - (* 5 is "proc " length. Space is needed to distinguish from "procedure". *) - _memcmp(source_code_position, "proc ", 5); - beqz a0, .compile_module_procedure - - (* Not a known token, exit. *) - goto .compile_module_end; - -.compile_module_procedure: - _compile_procedure(); - - goto .compile_module_loop; - -.compile_module_end: -end; - -proc _compile(); -begin - _write_z(".globl _start\n\n\0"); - _compile_module(); - - _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); - _write_c('"'); - - v0 := @compiler_strings; - v4 := compiler_strings_position; - -.compile_loop: - lw t0, 0(sp) - lw t1, 4(sp) - bge t0, t1, .compile_end - - v8 := _load_byte(v0); - v0 := v0 + 1; - _write_c(v8); - - j .compile_loop - -.compile_end: - _write_c('"'); - _write_c('\n'); -end; - -(* Terminates the program. a0 contains the return code. *) - -(* Parameters: *) -(* a0 - Status code. *) -proc _exit(); -begin - _syscall(0, 0, 0, 0, 0, 0, 93); -end; - -(* Inserts a symbol into the table. *) - -(* Parameters: *) -(* a0 - Symbol pointer. *) -(* a1 - Symbol name length. *) -(* a2 - Symbol name pointer. *) -(* a3 - Symbol table. *) -proc _symbol_table_enter(); -begin - (* The first word in the symbol table is its length, get it. *) - lw a0, 76(sp) - lw a0, (a0) - sw a0, 0(sp) - - (* Calculate the offset for the new symbol. *) - v4 := v0 * 4; - v4 := v4 + 4; - v4 := v76 + 4; - - _memcpy(v4, @v80, 12); - - (* Increment the symbol table length. *) - v0 := v0 + 1; - lw t0, 0(sp) - lw t1, 76(sp) - sw t0, (t1) -end; - -proc _symbol_table_build(); -begin - _symbol_table_enter(@symbol_type_info_int, 3, symbol_builtin_name_int, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_word, 4, symbol_builtin_name_word, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_pointer, 7, symbol_builtin_name_pointer, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_char, 4, symbol_builtin_name_char, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_bool, 4, symbol_builtin_name_bool, @symbol_table_global); -end; - - -(* Classification table assigns each possible character to a group (class). All *) -(* characters of the same group a handled equivalently. *) - -(* Classification: *) - -(* TransitionClass = ( *) -(* transitionClassInvalid = 1, *) -(* transitionClassDigit = 2, *) -(* transitionClassAlpha = 3, *) -(* transitionClassSpace = 4, *) -(* transitionClassColon = 5, *) -(* transitionClassEquals = 6, *) -(* transitionClassLeftParen = 7, *) -(* transitionClassRightParen = 8, *) -(* transitionClassAsterisk = 9, *) -(* transitionClassUnderscore = 10, *) -(* transitionClassSingle = 11, *) -(* transitionClassHex = 12, *) -(* transitionClassZero = 13, *) -(* transitionClassX = 14, *) -(* transitionClassEof = 15, *) -(* transitionClassDot = 16, *) -(* transitionClassMinus = 17, *) -(* transitionClassSingleQuote = 18, *) -(* transitionClassDoubleQuote = 19, *) -(* transitionClassGreater = 20, *) -(* transitionClassLess = 21, *) -(* transitionClassOther = 22 *) -(* ); *) -(* TransitionState = ( *) -(* transitionStateStart = 1, *) -(* transitionStateColon = 2, *) -(* transitionStateIdentifier = 3, *) -(* transitionStateDecimal = 4, *) -(* transitionStateGreater = 5, *) -(* transitionStateMinus = 6, *) -(* transitionStateLeftParen = 7, *) -(* transitionStateLess = 8, *) -(* transitionStateDot = 9, *) -(* transitionStateComment = 10, *) -(* transitionStateClosingComment = 11, *) -(* transitionStateCharacter = 12, *) -(* transitionStateString = 13, *) -(* transitionStateLeadingZero = 14, *) -(* transitionStateDecimalSuffix = 15, *) -(* transitionStateEnd = 16 *) -(* ); *) -(* Transition = record *) -(* action: TransitionAction; *) -(* next_state: TransitionState *) -(* end; *) -(* TransitionAction = ( *) -(* none = 1, *) -(* accumulate = 2, *) -(* skip = 3, *) -(* single = 4, *) -(* eof = 5, *) -(* finalize = 6, *) -(* composite = 7, *) -(* key_id = 8, *) -(* integer = 9, *) -(* delimited = 10 *) -(* ); *) - -(* Assigns some value to at array index. *) - -(* Parameters: *) -(* a0 - Array pointer. *) -(* a1 - Index (word offset into the array). *) -(* a2 - Data to assign. *) -proc _assign_at(); -begin - v0 := v84 + -1; - v0 := v0 * 4; - v0 := v88 + v0; - - lw t0, 0(sp) - lw t1, 80(sp) - sw t1, (t0) -end; - -proc _create_classification(); -begin - _assign_at(@classification, 1, 15); - _assign_at(@classification, 2, 1); - _assign_at(@classification, 3, 1); - _assign_at(@classification, 4, 1); - _assign_at(@classification, 5, 1); - _assign_at(@classification, 6, 1); - _assign_at(@classification, 7, 1); - _assign_at(@classification, 8, 1); - _assign_at(@classification, 9, 1); - _assign_at(@classification, 10, 4); - _assign_at(@classification, 11, 4); - _assign_at(@classification, 12, 1); - _assign_at(@classification, 13, 1); - _assign_at(@classification, 14, 4); - _assign_at(@classification, 15, 1); - _assign_at(@classification, 16, 1); - _assign_at(@classification, 17, 1); - _assign_at(@classification, 18, 1); - _assign_at(@classification, 19, 1); - _assign_at(@classification, 20, 1); - _assign_at(@classification, 21, 1); - _assign_at(@classification, 22, 1); - _assign_at(@classification, 23, 1); - _assign_at(@classification, 24, 1); - _assign_at(@classification, 25, 1); - _assign_at(@classification, 26, 1); - _assign_at(@classification, 27, 1); - _assign_at(@classification, 28, 1); - _assign_at(@classification, 29, 1); - _assign_at(@classification, 30, 1); - _assign_at(@classification, 31, 1); - _assign_at(@classification, 32, 1); - _assign_at(@classification, 33, 4); - _assign_at(@classification, 34, 11); - _assign_at(@classification, 35, 19); - _assign_at(@classification, 36, 22); - _assign_at(@classification, 37, 22); - _assign_at(@classification, 38, 11); - _assign_at(@classification, 39, 11); - _assign_at(@classification, 40, 18); - _assign_at(@classification, 41, 7); - _assign_at(@classification, 42, 8); - _assign_at(@classification, 43, 9); - _assign_at(@classification, 44, 11); - _assign_at(@classification, 45, 11); - _assign_at(@classification, 46, 17); - _assign_at(@classification, 47, 16); - _assign_at(@classification, 48, 11); - _assign_at(@classification, 49, 13); - _assign_at(@classification, 50, 2); - _assign_at(@classification, 51, 2); - _assign_at(@classification, 52, 2); - _assign_at(@classification, 53, 2); - _assign_at(@classification, 54, 2); - _assign_at(@classification, 55, 2); - _assign_at(@classification, 56, 2); - _assign_at(@classification, 57, 2); - _assign_at(@classification, 58, 2); - _assign_at(@classification, 59, 5); - _assign_at(@classification, 60, 11); - _assign_at(@classification, 61, 21); - _assign_at(@classification, 62, 6); - _assign_at(@classification, 63, 20); - _assign_at(@classification, 64, 22); - _assign_at(@classification, 65, 11); - _assign_at(@classification, 66, 3); - _assign_at(@classification, 67, 3); - _assign_at(@classification, 68, 3); - _assign_at(@classification, 69, 3); - _assign_at(@classification, 70, 3); - _assign_at(@classification, 71, 3); - _assign_at(@classification, 72, 3); - _assign_at(@classification, 73, 3); - _assign_at(@classification, 74, 3); - _assign_at(@classification, 75, 3); - _assign_at(@classification, 76, 3); - _assign_at(@classification, 77, 3); - _assign_at(@classification, 78, 3); - _assign_at(@classification, 79, 3); - _assign_at(@classification, 80, 3); - _assign_at(@classification, 81, 3); - _assign_at(@classification, 82, 3); - _assign_at(@classification, 83, 3); - _assign_at(@classification, 84, 3); - _assign_at(@classification, 85, 3); - _assign_at(@classification, 86, 3); - _assign_at(@classification, 87, 3); - _assign_at(@classification, 88, 3); - _assign_at(@classification, 89, 3); - _assign_at(@classification, 90, 3); - _assign_at(@classification, 91, 3); - _assign_at(@classification, 92, 11); - _assign_at(@classification, 93, 22); - _assign_at(@classification, 94, 11); - _assign_at(@classification, 95, 11); - _assign_at(@classification, 96, 10); - _assign_at(@classification, 97, 22); - _assign_at(@classification, 98, 12); - _assign_at(@classification, 99, 12); - _assign_at(@classification, 100, 12); - _assign_at(@classification, 101, 12); - _assign_at(@classification, 102, 12); - _assign_at(@classification, 103, 12); - _assign_at(@classification, 104, 3); - _assign_at(@classification, 105, 3); - _assign_at(@classification, 106, 3); - _assign_at(@classification, 107, 3); - _assign_at(@classification, 108, 3); - _assign_at(@classification, 109, 3); - _assign_at(@classification, 110, 3); - _assign_at(@classification, 111, 3); - _assign_at(@classification, 112, 3); - _assign_at(@classification, 113, 3); - _assign_at(@classification, 114, 3); - _assign_at(@classification, 115, 3); - _assign_at(@classification, 116, 3); - _assign_at(@classification, 117, 3); - _assign_at(@classification, 118, 3); - _assign_at(@classification, 119, 3); - _assign_at(@classification, 120, 3); - _assign_at(@classification, 121, 14); - _assign_at(@classification, 122, 3); - _assign_at(@classification, 123, 3); - _assign_at(@classification, 124, 22); - _assign_at(@classification, 125, 11); - _assign_at(@classification, 126, 22); - _assign_at(@classification, 127, 11); - _assign_at(@classification, 128, 1); - - v0 := 129; - -(* Set the remaining 129 - 256 bytes to transitionClassOther. *) -.create_classification_loop: - _assign_at(@classification, v0, 22); - v0 := v0 + 1; - - lw t0, 0(sp) - li t1, 257 - blt t0, t1, .create_classification_loop -end; - -(* Parameters: *) -(* a0 - Current state (first index into transitions table). *) -(* a1 - Transition (second index into transitions table).. *) -(* a2 - Action to assign. *) -(* a3 - Next state to assign. *) -proc _set_transition(); -begin - (* Transitions start at offset in classification array. Save the transitions start in v0. *) - v0 := @classification + 256 - - (* Each state is 8 bytes long (2 words: action and next state). *) - (* There are 16 transition classes, so a transition 8 * 16 = 128 bytes long. *) - - v4 := v88 + -1; - v4 := v4 * 128; - - v8 := v84 + -1; - v8 := v8 * 8; - - v12 := v0 + v4; - v12 := v12 + v8; - - lw t0, 12(sp) - lw t1, 80(sp) - lw t2, 76(sp) - sw t1, (t0) - addi t0, t0, 4 - sw t2, (t0) -end; - -(* Parameters: *) -(* a0 - Current state (Transition state enumeration). *) -(* a1 - Default action (Callback). *) -(* a2 - Next state (Transition state enumeration). *) -proc _set_default_transition(); -begin - _set_transition(v88, 1, v84, v80); - _set_transition(v88, 2, v84, v80); - _set_transition(v88, 3, v84, v80); - _set_transition(v88, 4, v84, v80); - _set_transition(v88, 5, v84, v80); - _set_transition(v88, 6, v84, v80); - _set_transition(v88, 7, v84, v80); - _set_transition(v88, 8, v84, v80); - _set_transition(v88, 9, v84, v80); - _set_transition(v88, 10, v84, v80); - _set_transition(v88, 11, v84, v80); - _set_transition(v88, 12, v84, v80); - _set_transition(v88, 13, v84, v80); - _set_transition(v88, 14, v84, v80); - _set_transition(v88, 15, v84, v80); - _set_transition(v88, 16, v84, v80); - _set_transition(v88, 17, v84, v80); - _set_transition(v88, 18, v84, v80); - _set_transition(v88, 19, v84, v80); - _set_transition(v88, 20, v84, v80); - _set_transition(v88, 21, v84, v80); - _set_transition(v88, 22, v84, v80); -end; - - -(* The transition table describes transitions from one state to another, given *) -(* a symbol (character class). *) - -(* The table has m rows and n columns, where m is the amount of states and n is *) -(* the amount of classes. So given the current state and a classified character *) -(* the table can be used to look up the next state. *) - -(* Each cell is a word long. *) -(* - The least significant byte of the word is a row number (beginning with 0). *) -(* It specifies the target state. "ff" means that this is an end state and no *) -(* transition is possible. *) -(* - The next byte is the action that should be performed when transitioning. *) -(* For the meaning of actions see labels in the lex_next function, which *) -(* handles each action. *) -proc _create_transitions(); -begin - (* Start state. *) - _set_transition(1, 1, 1, 16); - _set_transition(1, 2, 2, 4); - _set_transition(1, 3, 2, 3); - _set_transition(1, 4, 3, 1); - _set_transition(1, 5, 2, 5); - _set_transition(1, 6, 4, 16); - _set_transition(1, 7, 2, 7); - _set_transition(1, 8, 4, 16); - _set_transition(1, 9, 4, 16); - _set_transition(1, 10, 2, 3); - _set_transition(1, 11, 4, 16); - _set_transition(1, 12, 2, 3); - _set_transition(1, 13, 2, 14); - _set_transition(1, 14, 2, 3); - _set_transition(1, 15, 5, 16); - _set_transition(1, 16, 2, 9); - _set_transition(1, 17, 2, 6); - _set_transition(1, 18, 2, 12); - _set_transition(1, 19, 2, 13); - _set_transition(1, 20, 2, 5); - _set_transition(1, 21, 2, 8); - _set_transition(1, 22, 1, 16); - - (* Colon state. *) - _set_default_transition(2, 6, 16); - _set_transition(2, 6, 7, 16); - - (* Identifier state. *) - _set_default_transition(3, 8, 16); - _set_transition(3, 2, 2, 3); - _set_transition(3, 3, 2, 3); - _set_transition(3, 10, 2, 3); - _set_transition(3, 12, 2, 3); - _set_transition(3, 13, 2, 3); - _set_transition(3, 14, 2, 3); - - (* Decimal state. *) - _set_default_transition(4, 9, 16); - _set_transition(4, 2, 2, 4); - _set_transition(4, 3, 2, 15); - _set_transition(4, 10, 1, 16); - _set_transition(4, 12, 2, 15); - _set_transition(4, 13, 2, 4); - _set_transition(4, 14, 2, 15); - - (* Greater state. *) - _set_default_transition(5, 6, 16); - _set_transition(5, 6, 7, 16); - - (* Minus state. *) - _set_default_transition(6, 6, 16); - _set_transition(6, 20, 7, 16); - - (* Left paren state. *) - _set_default_transition(7, 6, 16); - _set_transition(7, 9, 2, 10); - - (* Less state. *) - _set_default_transition(8, 6, 16); - _set_transition(8, 6, 7, 16); - _set_transition(8, 20, 7, 16); - - (* Hexadecimal after 0x. *) - _set_default_transition(9, 6, 16); - _set_transition(9, 16, 7, 16); - - (* Comment. *) - _set_default_transition(10, 2, 10); - _set_transition(10, 9, 2, 11); - _set_transition(10, 15, 1, 16); - - (* Closing comment. *) - _set_default_transition(11, 2, 10); - _set_transition(11, 1, 1, 16); - _set_transition(11, 8, 10, 16); - _set_transition(11, 9, 2, 11); - _set_transition(11, 15, 1, 16); - - (* Character. *) - _set_default_transition(12, 2, 12); - _set_transition(12, 1, 1, 16); - _set_transition(12, 15, 1, 16); - _set_transition(12, 18, 10, 16); - - (* String. *) - _set_default_transition(13, 2, 13); - _set_transition(13, 1, 1, 16); - _set_transition(13, 15, 1, 16); - _set_transition(13, 19, 10, 16); - - (* Leading zero. *) - _set_default_transition(14, 9, 16); - _set_transition(14, 2, 1, 16); - _set_transition(14, 3, 1, 16); - _set_transition(14, 10, 1, 16); - _set_transition(14, 12, 1, 16); - _set_transition(14, 13, 1, 16); - _set_transition(14, 14, 1, 16); - - (* Digit with a character suffix. *) - _set_default_transition(15, 9, 16); - _set_transition(15, 3, 1, 16); - _set_transition(15, 2, 1, 16); - _set_transition(15, 12, 1, 16); - _set_transition(15, 13, 1, 16); - _set_transition(15, 14, 1, 16); -end; - -proc _lexer_get_state(); -begin - (* Lexer state is saved after the transition tables. The offset is 256 + 16 * 22. *) - v0 := @classification; - v4 := 16 * 22; - v0 := v0 + 256; - - return v0 + v4 -end; - -(* Gets pointer to the current source text. *) -proc _lexer_get_current(); -begin - v0 := _lexer_get_state(); - - return v0 + 4 -end; - -(* Resets the lexer state for reading the next token. *) -proc _lexer_reset(); -begin - (* Transition start state is 1. *) - _lexer_get_state(); - li t0, 1 - sw t0, (a0) - sw a0, 0(sp) - - (* Text pointer to the beginning of the currently read token. *) - _lexer_get_current(); - la t0, source_code_position - lw t0, (t0) - sw t0, (a0) - - (* Initial length of the token is 0. *) - addi t0, t0, 4 - sw zero, (t0) -end; - -(* One time lexer initialization. *) -proc _lexer_initialize(); -begin - _create_classification(); - _create_transitions(); -end; - -(* Entry point. *) -proc _start(); -begin - _lexer_initialize(); - _symbol_table_build(); - - (* Read the source from the standard input. *) - (* Second argument is buffer size. Modifying update the source_code definition. *) - _read_file(@source_code, 81920); - _compile(); - - _exit(0); -end; diff --git a/boot/stage9/cl.elna b/boot/stage9/cl.elna new file mode 100644 index 0000000..21d87ef --- /dev/null +++ b/boot/stage9/cl.elna @@ -0,0 +1,1993 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, *) +(* v. 2.0. If a copy of the MPL was not distributed with this file, You can *) +(* obtain one at https://mozilla.org/MPL/2.0/. *) + +(* Stage 9 compiler. *) + +(* - Procedure calls in expressions. *) +(* - Comments between (* and *) are supported. These are still single line *) +(* comments and they should be on a separate line. *) +(* - if-else statements. *) +const + symbol_builtin_name_int := "Int"; + symbol_builtin_name_word := "Word"; + symbol_builtin_name_pointer := "Pointer"; + symbol_builtin_name_char := "Char"; + symbol_builtin_name_bool := "Bool"; + + (* Every type info starts with a word describing what type it is. *) + + (* PRIMITIVE_TYPE = 1 *) + + (* Primitive types have only type size. *) + symbol_builtin_type_int := S(1, 4); + symbol_builtin_type_word := S(1, 4); + symbol_builtin_type_pointer := S(1, 4); + symbol_builtin_type_char := S(1, 1); + symbol_builtin_type_bool := S(1, 1); + + (* Info objects start with a word describing its type. *) + + (* INFO_TYPE = 1 *) + + (* Type info has the type it belongs to. *) + symbol_type_info_int := S(1, @symbol_builtin_type_int); + symbol_type_info_word := S(1, @symbol_builtin_type_word); + symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); + symbol_type_info_char := S(1, @symbol_builtin_type_char); + symbol_type_info_bool := S(1, @symbol_builtin_type_bool); + +var + source_code: Array; + compiler_strings: Array; + symbol_table_global: Array; + symbol_table_local: Array; + classification: Array; + + compiler_strings_position: Pointer := @compiler_strings; + compiler_strings_length: Word := 0; + label_counter: Word := 0; + source_code_position: Pointer := @source_code; + +(* Calculates and returns the string token length between quotes, including the *) +(* escaping slash characters. *) + +(* Parameters: *) +(* a0 - String token pointer. *) + +(* Returns the length in a0. *) +proc _string_length(); +begin + (* Reset the counter. *) + v0 := 0; + +.string_length_loop: + v88 := v88 + 1; + + lw t0, 88(sp) + lb t0, (t0) + + li t1, '"' + beq t0, t1, .string_length_end + + v0 := v0 + 1; + goto .string_length_loop; + +.string_length_end: + return v0 +end; + +(* Adds a string to the global, read-only string storage. *) + +(* Parameters: *) +(* a0 - String token. *) + +(* Returns the offset from the beginning of the storage to the new string in a0. *) +proc _add_string(); +begin + v0 := v88 + 1; + v4 := compiler_strings_length; + +.add_string_loop: + lw t0, 0(sp) + lb t1, (t0) + li t2, '"' + + beq t1, t2, .add_string_end + + v8 := _load_byte(v0); + _store_byte(v8, compiler_strings_position); + _store_word(compiler_strings_position + 1, @compiler_strings_position); + v0 := v0 + 1; + + lb t1, 8(sp) + li t2, '\\' + bne t1, t2, .add_string_increment + + goto .add_string_loop; + +.add_string_increment: + la t2, compiler_strings_length + lw t4, (t2) + addi t4, t4, 1 + sw t4, (t2) + + goto .add_string_loop; + +.add_string_end: + return v4 +end; + +(* Reads standard input into a buffer. *) +(* a0 - Buffer pointer. *) +(* a1 - Buffer size. *) + +(* Returns the amount of bytes written in a0. *) +proc _read_file(); +begin + _syscall(0, v88, v84, 0, 0, 0, 63); +end; + +(* Writes to the standard output. *) + +(* Parameters: *) +(* a0 - Buffer. *) +(* a1 - Buffer length. *) +proc _write_s(); +begin + _syscall(1, v88, v84, 0, 0, 0, 64); +end; + +(* Writes a number to a string buffer. *) + +(* t0 - Local buffer. *) +(* t1 - Constant 10. *) +(* t2 - Current character. *) +(* t3 - Whether the number is negative. *) + +(* Parameters: *) +(* a0 - Whole number. *) +(* a1 - Buffer pointer. *) + +(* Sets a0 to the length of the written number. *) +proc _print_i(); +begin + li t1, 10 + addi t0, s0, -9 + + li t3, 0 + bgez a0, .print_i_digit10 + li t3, 1 + neg a0, a0 + +.print_i_digit10: + rem t2, a0, t1 + addi t2, t2, '0' + sb t2, 0(t0) + div a0, a0, t1 + addi t0, t0, -1 + bne zero, a0, .print_i_digit10 + + beq zero, t3, .print_i_write_call + addi t2, zero, '-' + sb t2, 0(t0) + addi t0, t0, -1 + +.print_i_write_call: + mv a0, a1 + addi a1, t0, 1 + sub a2, s0, t0 + addi a2, a2, -9 + sw a2, 0(sp) + + _memcpy(); + + return v0 +end; + +(* Writes a number to the standard output. *) + +(* Parameters: *) +(* a0 - Whole number. *) +proc _write_i(); +begin + v4 := _print_i(v88, @v0); + _write_s(@v0, v4); +end; + +(* Writes a character from a0 into the standard output. *) +proc _write_c(); +begin + _write_s(@v88, 1); +end; + +(* Write null terminated string. *) + +(* Parameters: *) +(* a0 - String. *) +proc _write_z(); +begin +.write_z_loop: + (* Check for 0 character. *) + lw a0, 88(sp) + lb a0, (a0) + beqz a0, .write_z_end + + (* Print a character. *) + _write_c(); + + (* Advance the input string by one byte. *) + v88 := v88 + 1; + + goto .write_z_loop; + +.write_z_end: +end; + +(* Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. *) +proc _is_upper(); +begin + v0 := v88 >= 'A'; + v4 := v88 <= 'Z'; + + return v0 & v4 + +end; + +(* Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. *) +proc _is_lower(); +begin + v0 := v88 >= 'a'; + v4 := v88 <= 'z'; + + return v0 & v4 + +end; + +(* Detects if the passed character is a 7-bit alpha character or an underscore. *) + +(* Paramters: *) +(* a0 - Tested character. *) + +(* Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. *) +proc _is_alpha(); +begin + v0 := _is_upper(v88); + v4 := _is_lower(v88); + v8 := v88 = '_'; + + v12 := v0 or v4; + return v12 or v8 +end; + +(* Detects whether the passed character is a digit *) +(* (a value between 0 and 9). *) + +(* Parameters: *) +(* a0 - Exemined value. *) + +(* Sets a0 to 1 if it is a digit, to 0 otherwise. *) +proc _is_digit(); +begin + v0 := v88 >= '0'; + v4 := v88 <= '9'; + + return v0 & v4 +end; + +proc _is_alnum(); +begin + v0 := _is_alpha(v88); + v4 := _is_digit(v88); + + return v0 or v4 +end; + +(* Reads the next token. *) + +(* Returns token length in a0. *) +proc _read_token(); +begin + (* Current token position. *) + v0 := source_code_position; + (* Token length. *) + v4 := 0; + +.read_token_loop: + lw t0, 0(sp) + (* Current character. *) + lb t0, (t0) + + (* First we try to read a derictive. *) + (* A derictive can contain a dot and characters. *) + li t1, '.' + beq t0, t1, .read_token_next + + v8 := _load_byte(v0); + _is_alnum(v8); + bnez a0, .read_token_next + + goto .read_token_end; + +.read_token_next: + (* Advance the source code position and token length. *) + v4 := v4 + 1; + v0 := v0 + 1; + + goto .read_token_loop; + +.read_token_end: + return v4 +end; + +(* a0 - First pointer. *) +(* a1 - Second pointer. *) +(* a2 - The length to compare. *) + +(* Returns 0 in a0 if memory regions are equal. *) +proc _memcmp(); +begin + mv t0, a0 + li a0, 0 + +.memcmp_loop: + beqz a2, .memcmp_end + + lbu t1, (t0) + lbu t2, (a1) + sub a0, t1, t2 + + bnez a0, .memcmp_end + + addi t0, t0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcmp_loop; + +.memcmp_end: +end; + +(* Copies memory. *) + +(* Parameters: *) +(* a0 - Destination. *) +(* a1 - Source. *) +(* a2 - Size. *) + +(* Preserves a0. *) +proc _memcpy(); +begin +.memcpy_loop: + beqz a2, .memcpy_end + + lbu t1, (a1) + sb t1, (a0) + + addi a0, a0, 1 + addi a1, a1, 1 + addi a2, a2, -1 + + goto .memcpy_loop + +.memcpy_end: + return v88 +end; + +(* Advances the token stream by a0 bytes. *) +proc _advance_token(); +begin + la t0, source_code_position + lw t1, (t0) + add t1, t1, a0 + sw t1, (t0) +end; + +(* Prints the current token. *) + +(* Parameters: *) +(* a0 - Token length. *) + +(* Returns a0 unchanged. *) +proc _write_token(); +begin + _write_s(source_code_position, v88); + return v88 +end; + +(* Prints and skips a line. *) +proc _compile_line(); +begin +.compile_line_loop: + la a0, source_code_position + lw a1, (a0) + + lb t0, (a1) + li t1, '\n' + beq t0, t1, .compile_line_end + + (* Print a character. *) + lw a0, (a1) + _write_c(); + + (* Advance the input string by one byte. *) + _advance_token(1); + + goto .compile_line_loop; + +.compile_line_end: + _write_c('\n'); + + _advance_token(1); +end; + +proc _compile_integer_literal(); +begin + _write_z("\tli t0, \0"); + + v0 := _read_token(); + _write_token(v0); + _advance_token(v0); + + _write_c('\n'); +end; + +proc _compile_character_literal(); +begin + _write_z("\tli t0, \0"); + + _write_c('\''); + _advance_token(1); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + li t1, '\\' + bne a0, t1, .compile_character_literal_end + + _write_c('\\'); + _advance_token(1); + +.compile_character_literal_end: + v0 := _load_byte(source_code_position); + _write_c(v0); + + _write_c('\''); + _write_c('\n'); + + _advance_token(2); +end; + +proc _compile_variable_expression(); +begin + _compile_designator(); + _write_z("\tlw t0, (t0)\n\0"); +end; + +proc _compile_address_expression(); +begin + (* Skip the "@" sign. *) + _advance_token(1); + _compile_designator(); +end; + +proc _compile_negate_expression(); +begin + (* Skip the "-" sign. *) + _advance_token(1); + _compile_term(); + + _write_z("\tneg t0, t0\n\0"); +end; + +proc _compile_not_expression(); +begin + (* Skip the "~" sign. *) + _advance_token(1); + _compile_term(); + + _write_z("\tnot t0, t0\n\0"); +end; + +proc _compile_string_literal(); +begin + v0 := _string_length(source_code_position); + v4 := _add_string(source_code_position); + + _advance_token(v0 + 2); + _write_z("\tla t0, strings\n\0"); + + _write_z("\tli t1, \0"); + _write_i(v4); + _write_c('\n'); + + _write_z("\tadd t0, t0, t1\n\0"); +end; + +proc _compile_term(); +begin + v0 := _load_byte(source_code_position); + lb a0, 0(sp) + + li t1, '\'' + beq a0, t1, .compile_term_character_literal + + li t1, '@' + beq a0, t1, .compile_term_address + + li t1, '-' + beq a0, t1, .compile_term_negation + + li t1, '~' + beq a0, t1, .compile_term_not + + li t1, '"' + beq a0, t1, .compile_term_string_literal + + li t1, '_' + beq a0, t1, .compile_term_call + + _is_digit(v0); + bnez a0, .compile_term_integer_literal + + goto .compile_term_variable; + +.compile_term_character_literal: + _compile_character_literal(); + goto .compile_term_end; + +.compile_term_integer_literal: + _compile_integer_literal(); + goto .compile_term_end; + +.compile_term_address: + _compile_address_expression(); + goto .compile_term_end; + +.compile_term_negation: + _compile_negate_expression(); + goto .compile_term_end; + +.compile_term_not: + _compile_not_expression(); + goto .compile_term_end; + +.compile_term_string_literal: + _compile_string_literal(); + goto .compile_term_end; + +.compile_term_call: + _compile_call(); + _write_z("\nmv t0, a0\n\0"); + goto .compile_term_end; + +.compile_term_variable: + _compile_variable_expression(); + goto .compile_term_end; + +.compile_term_end: +end; + +proc _compile_binary_rhs(); +begin + (* Skip the whitespace after the binary operator. *) + _advance_token(1); + _compile_term(); + + (* Load the left expression from the stack; *) + _write_z("\tlw t1, 24(sp)\n\0"); +end; + +proc _compile_expression(); +begin + _compile_term(); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, ' ' + bne a0, t1, .compile_expression_end + + (* It is a binary expression. *) + + (* Save the value of the left expression on the stack. *) + _write_z("sw t0, 24(sp)\n\0"); + + (* Skip surrounding whitespace in front of the operator. *) + _advance_token(1); + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '+' + beq t0, t1, .compile_expression_add + + li t1, '*' + beq t0, t1, .compile_expression_mul + + li t1, '&' + beq t0, t1, .compile_expression_and + + li t1, 'o' + beq t0, t1, .compile_expression_or + + li t1, 'x' + beq t0, t1, .compile_expression_xor + + li t1, '=' + beq t0, t1, .compile_expression_equals + + li t1, '<' + beq t0, t1, .compile_expression_less + + li t1, '>' + beq t0, t1, .compile_expression_greater + + (* Unknown binary operator. *) + unimp + +.compile_expression_add: + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("add t0, t0, t1\n\0"); + + goto .compile_expression_end; + +.compile_expression_mul: + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tmul t0, t0, t1\n\0"); + + goto .compile_expression_end; + +.compile_expression_and: + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tand t0, t0, t1\n\0"); + + goto .compile_expression_end; + +.compile_expression_or: + _advance_token(2); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("or t0, t0, t1\n\0"); + + goto .compile_expression_end; + +.compile_expression_xor: + _advance_token(3); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("xor t0, t0, t1\n\0"); + + goto .compile_expression_end; + +.compile_expression_equals: + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("xor t0, t0, t1\nseqz t0, t0\n\0"); + + goto .compile_expression_end; + +.compile_expression_less: + _advance_token(1); + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '>' + beq t0, t1, .compile_expression_not_equal + + li t1, '=' + beq t0, t1, .compile_expression_less_equal + + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("slt t0, t1, t0\n\0"); + + goto .compile_expression_end; + +.compile_expression_not_equal: + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0"); + + goto .compile_expression_end; + +.compile_expression_less_equal: + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0"); + + goto .compile_expression_end; + +.compile_expression_greater: + _advance_token(1); + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '=' + beq t0, t1, .compile_expression_greater_equal + + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t1, t0\n\0"); + + goto .compile_expression_end; + +.compile_expression_greater_equal: + _advance_token(1); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0"); + + goto .compile_expression_end; + +.compile_expression_end: +end; + +proc _compile_call(); +begin + (* Stack variables: *) + (* v0 - Procedure name length. *) + (* v4 - Procedure name pointer. *) + (* v8 - Argument count. *) + + v0 := _read_token(); + v4 := source_code_position; + v8 := 0; + + (* Skip the identifier and left paren. *) + _advance_token(v0 + 1); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ')' + beq t0, t1, .compile_call_finalize + +.compile_call_loop: + _compile_expression(); + + (* Save the argument on the stack. *) + _write_z("\tsw t0, \0"); + + (* Calculate the stack offset: 116 - (4 * argument_counter) *) + v12 := v8 * 4; + v12 := 116 + -v12; + _write_i(v12); + + _write_z("(sp)\n\0"); + + (* Add one to the argument counter. *) + v8 := v8 + 1; + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, ',' + bne t0, t1, .compile_call_finalize + + _advance_token(2); + goto .compile_call_loop; + +.compile_call_finalize: + (* Load the argument from the stack. *) + + lw t0, 8(sp) + beqz t0, .compile_call_end + + (* Decrement the argument counter. *) + v8 := v8 + -1; + + _write_z("\tlw a\0"); + _write_i(v8); + + _write_z(", \0"); + + (* Calculate the stack offset: 116 - (4 * argument_counter) *) + v12 := v8 * 4; + v12 := 116 + -v12; + _write_i(v12); + + _write_z("(sp)\n\0"); + + goto .compile_call_finalize; + +.compile_call_end: + _write_z("\tcall \0"); + _write_s(v4, v0); + + (* Skip the right paren. *) + _advance_token(1); +end; + +proc _compile_goto(); +begin + _advance_token(5); + + v0 := _read_token(); + _write_z("\tj \0"); + + _write_token(v0); + _advance_token(); +end; + +proc _compile_local_designator(); +begin + (* Skip "v" in the local variable name. *) + _advance_token(1); + _write_z("\t addi t0, sp, \0"); + + (* Read local variable stack offset and save it. *) + v0 := _read_token(); + _write_token(v0); + _advance_token(v0); + _write_c('\n'); +end; + +proc _compile_global_designator(); +begin + _write_z("\tla t0, \0"); + + v0 := _read_token(); + _write_token(v0); + _advance_token(v0); + + _write_c('\n'); +end; + +proc _compile_designator(); +begin + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, 'v' + beq a0, t1, .compile_designator_local + + goto .compile_designator_global; + +.compile_designator_local: + _compile_local_designator(); + goto .compile_designator_end; + +.compile_designator_global: + _compile_global_designator(); + goto .compile_designator_end; + +.compile_designator_end: +end; + +proc _compile_assignment(); +begin + _compile_designator(); + + (* Save the assignee address on the stack. *) + _write_z("\tsw t0, 20(sp)\n\0"); + + (* Skip the assignment sign (:=) with surrounding whitespaces. *) + _advance_token(4); + + (* Compile the assignment. *) + _compile_expression(); + + _write_z("\tlw t1, 20(sp)\nsw t0, (t1)\n\0"); +end; + +proc _compile_return_statement(); +begin + (* Skip "return" keyword and whitespace after it. *) + _advance_token(7); + _compile_expression(); + + _write_z("mv a0, t0\n\0"); +end; + +(* Writes a label, .Ln, where n is a unique number. *) + +(* Parameters: *) +(* a0 - Label counter. *) +proc _write_label(); +begin + _write_z(".L\0"); + _write_i(v88); +end; + +proc _compile_if(); +begin + (* Skip "if ". *) + _advance_token(3); + (* Compile condition. *) + _compile_expression(); + (* Skip " then" with newline. *) + _advance_token(6); + + (* v0 is the label after the if statement. *) + v0 := label_counter; + _store_word(label_counter + 1, @label_counter); + (* v4 is the label in front of the next elsif condition or end. *) + v4 := label_counter; + _store_word(label_counter + 1, @label_counter); + + _write_z("\tbeqz t0, \0"); + _write_label(v4); + _write_c('\n'); + + _compile_procedure_body(); + + _write_z("\tj \0"); + _write_label(v0); + _write_c('\n'); + + _write_label(v4); + _write_z(":\n\0"); + + _memcmp(source_code_position, "end", 3); + beqz a0, .compile_if_end + + _memcmp(source_code_position, "else", 3); + beqz a0, .compile_if_else + +.compile_if_else: + (* Skip "else" and newline. *) + _advance_token(5); + _compile_procedure_body(); + +.compile_if_end: + (* Skip "end". *) + _advance_token(3); + + _write_label(v0); + _write_z(":\n\0"); +end; + +proc _compile_statement(); +begin + _skip_spaces(); + (* This is a call if the statement starts with an underscore. *) + la t0, source_code_position + lw t0, (t0) + (* First character after alignment tab. *) + (* addi t0, t0, 1 *) + lb t0, (t0) + + li t1, '_' + beq t0, t1, .compile_statement_call + + li t1, 'g' + beq t0, t1, .compile_statement_goto + + li t1, 'v' + beq t0, t1, .compile_statement_assignment + + li t1, 'i' + beq t0, t1, .compile_statement_if + + (* keyword_ret contains "\tret", so it's 4 bytes long. *) + _memcmp(source_code_position, "return", 6); + beqz a0, .compile_statement_return + + _compile_line(); + goto .compile_statement_end; + +.compile_statement_call: + (* _advance_token(1); *) + _compile_call(); + + goto .compile_statement_semicolon; + +.compile_statement_goto: + (* _advance_token(1); *) + _compile_goto(); + + goto .compile_statement_semicolon; + +.compile_statement_assignment: + (* _advance_token(1); *) + _compile_assignment(); + + goto .compile_statement_semicolon; + +.compile_statement_if: + (* _advance_token(1); *) + _compile_if(); + + goto .compile_statement_semicolon; + +.compile_statement_return: + (* _advance_token(1); *) + _compile_return_statement(); + _write_c('\n'); + + goto .compile_statement_end; + +.compile_statement_semicolon: + _advance_token(2); + _write_c('\n'); + +.compile_statement_end: +end; + +proc _compile_procedure_body(); +begin +.compile_procedure_body_loop: + _skip_empty_lines(); + _skip_spaces(); + + _memcmp(source_code_position, "end", 3); + beqz a0, .compile_procedure_body_epilogue + + _memcmp(source_code_position, "else", 4); + beqz a0, .compile_procedure_body_epilogue + + _compile_statement(); + goto .compile_procedure_body_loop; + +.compile_procedure_body_epilogue: +end; + +(* Writes a regster name to the standard output. *) + +(* Parameters: *) +(* a0 - Register character. *) +(* a1 - Register number. *) +proc _write_register(); +begin + _write_c(v88); + v84 := v84 + '0'; + _write_c(v84); +end; + +proc _compile_procedure_prologue(); +begin + _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); + v0 := 0; + +.compile_procedure_prologue_loop: + _write_z("\tsw a\0"); + _write_i(v0); + _write_z(", \0"); + + (* Calculate the stack offset: 88 - (4 * parameter_counter) *) + v4 := v0 * 4; + v4 := 88 + -v4; + _write_i(v4); + + _write_z("(sp)\n\0"); + + v0 := v0 + 1; + lw a0, 0(sp) + + li t0, 8 + bne a0, t0, .compile_procedure_prologue_loop +end; + +proc _compile_procedure(); +begin + (* Skip "proc ". *) + _advance_token(5); + + (* Save the procedure name length. *) + v0 := _read_token(); + + (* Write .type _procedure_name, @function. *) + _write_z(".type \0"); + + _write_token(v0); + _write_z(", @function\n\0"); + + (* Write procedure label, _procedure_name: *) + _write_token(v0); + _write_z(":\n\0"); + + (* Skip the function name and trailing parens, semicolon, "begin" and newline. *) + _advance_token(v0 + 10); + + _compile_procedure_prologue(); + _compile_procedure_body(); + + (* Write the epilogue. *) + _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); + + (* Skip the "end" keyword, semicolon and newline. *) + _advance_token(5); +end; + +proc _skip_spaces(); +begin + (* Skip newlines. *) + la t0, source_code_position + lw t1, (t0) + +.skip_spaces_loop: + lb t2, (t1) + li t3, '\t' + bne t2, t3, .skip_spaces_end + beqz t2, .skip_spaces_end + + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_spaces_loop; + +.skip_spaces_end: +end; + +(* Prints and skips a line. *) +proc _skip_comment(); +begin + la t0, source_code_position + lw t1, (t0) + +.skip_comment_loop: + (* Check for newline character. *) + lb t2, (t1) + li t3, '\n' + beq t2, t3, .skip_comment_end + + (* Advance the input string by one byte. *) + addi t1, t1, 1 + sw t1, (t0) + + goto .skip_comment_loop; + +.skip_comment_end: + (* Skip the newline. *) + addi t1, t1, 1 + sw t1, (t0) +end; + +(* Skip newlines and comments. *) +proc _skip_empty_lines(); +begin +.skip_empty_lines_rerun: + la t0, source_code_position + lw t0, (t0) + sw t0, 0(sp) + +.skip_empty_lines_loop: + lw t2, 0(sp) + lb t0, (t2) + + li t1, '\n' + beq t0, t1, .skip_empty_lines_newline + + li t1, '\t' + beq t0, t1, .skip_empty_lines_tab + + li t1, '(' + bne t0, t1, .skip_empty_lines_end + addi t2, t2, 1 + lb t0, (t2) + li t1, '*' + beq t0, t1, .skip_empty_lines_comment + + goto .skip_empty_lines_end; + +.skip_empty_lines_comment: + la t0, source_code_position + lw t1, 0(sp) + sw t1, (t0) + _skip_comment(); + goto .skip_empty_lines_rerun; + +.skip_empty_lines_newline: + la t0, source_code_position + lw t1, 0(sp) + addi t1, t1, 1 + sw t1, (t0) + goto .skip_empty_lines_rerun; + +.skip_empty_lines_tab: + v0 := v0 + 1; + goto .skip_empty_lines_loop + +.skip_empty_lines_end: +end; + +proc _compile_global_initializer(); +begin + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '"' + beq t0, t1, .compile_global_initializer_string + + li t1, 'S' + beq t0, t1, .compile_global_initializer_record + + li t1, '@' + beq t0, t1, .compile_global_initializer_pointer + + v0 := _load_byte(source_code_position); + _is_digit(v0); + bnez a0, .compile_global_initializer_number + + unimp + +.compile_global_initializer_pointer: + (* Skip @. *) + _advance_token(1); + _write_z("\n\t.word \0"); + v0 := _read_token(); + _write_token(v0); + _advance_token(v0); + + goto .compile_global_initializer_end; + +.compile_global_initializer_number: + _write_z("\n\t.word \0"); + v0 := _read_token(); + _write_token(v0); + _advance_token(1); + + goto .compile_global_initializer_end; + +.compile_global_initializer_record: + (* Skip "S(". *) + _advance_token(2); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, ')' + beq t0, t1, .compile_global_initializer_closing + +.compile_global_initializer_loop: + _compile_global_initializer(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, ')' + beq t0, t1, .compile_global_initializer_closing + + (* Skip comma and whitespace after it. *) + _advance_token(2); + + goto .compile_global_initializer_loop; + +.compile_global_initializer_closing: + (* Skip ")" *) + _advance_token(1); + + goto .compile_global_initializer_end; + +.compile_global_initializer_string: + _write_z("\n\t.word strings + \0"); + v4 := _string_length(source_code_position); + + _add_string(source_code_position); + _write_i(); + + (* Skip the quoted string. *) + _advance_token(v4 + 2); + + goto .compile_global_initializer_end; + +.compile_global_initializer_end: +end; + +proc _compile_constant_declaration(); +begin + _read_token(); + sw a0, 0(sp) + + _write_z(".type \0"); + _write_token(v0); + _write_z(", @object\n\0"); + + _write_token(v0); + _write_c(':'); + + (* Skip the constant name with assignment sign and surrounding whitespaces. *) + _advance_token(v0 + 4); + _compile_global_initializer(); + (* Skip semicolon and newline. *) + _advance_token(2); + _write_c('\n'); +end; + +proc _compile_const_part(); +begin + _skip_empty_lines(); + + _memcmp(source_code_position, "const\0", 5); + bnez a0, .compile_const_part_end + + (* Skip "const" with the newline after it. *) + _advance_token(6); + _write_z(".section .rodata # Compiled from const section.\n\n\0"); + +.compile_const_part_loop: + _skip_empty_lines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + (* If the character at the line beginning is not indentation, *) + (* it is probably the next code section. *) + li t1, '\t' + bne t0, t1, .compile_const_part_end + + _advance_token(1); + + _compile_constant_declaration(); + goto .compile_const_part_loop; + +.compile_const_part_end: +end; + +proc _compile_variable_declaration(); +begin + v0 := _read_token(); + + _write_z(".type \0"); + _write_token(v0); + _write_z(", @object\n\0"); + + _write_token(v0); + _write_c(':'); + + (* Skip the variable name and colon with space before the type. *) + _advance_token(v0 + 2); + + (* Skip the type name. *) + v4 := _read_token(); + _advance_token(v4); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + li t1, ' ' + beq t0, t1, .compile_variable_declaration_initializer + + (* Else we assume this is a zeroed 81920 bytes big array. *) + _write_z(" .zero 81920\0"); + goto .compile_variable_declaration_finalize; + +.compile_variable_declaration_initializer: + (* Skip the assignment sign with surrounding whitespaces. *) + _advance_token(4); + _compile_global_initializer(); + goto .compile_variable_declaration_finalize; + +.compile_variable_declaration_finalize: + (* Skip semicolon and newline. *) + _advance_token(2); + _write_c('\n'); +end; + +proc _compile_var_part(); +begin + _memcmp(source_code_position, "var\0", 3); + bnez a0, .compile_var_part_end + + (* Skip "var" and newline. *) + _advance_token(4); + _write_z(".section .data\n\0"); + +.compile_var_part_loop: + _skip_empty_lines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + + li t1, '\t' + beq t0, t1, .compile_var_part_declaration + + goto .compile_var_part_end; + +.compile_var_part_declaration: + _advance_token(1); + _compile_variable_declaration(); + goto .compile_var_part_loop; + +.compile_var_part_end: +end; + +(* Process the source code and print the generated code. *) +proc _compile_module(); +begin + _compile_const_part(); + _skip_empty_lines(); + _compile_var_part(); + + _write_z(".section .text\n\n\0"); + _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); + _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); + _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); + _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); + _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); + +.compile_module_loop: + _skip_empty_lines(); + + la t0, source_code_position + lw t0, (t0) + lb t0, (t0) + beqz t0, .compile_module_end + + (* 5 is "proc " length. Space is needed to distinguish from "procedure". *) + _memcmp(source_code_position, "proc ", 5); + beqz a0, .compile_module_procedure + + (* Not a known token, exit. *) + goto .compile_module_end; + +.compile_module_procedure: + _compile_procedure(); + + goto .compile_module_loop; + +.compile_module_end: +end; + +proc _compile(); +begin + _write_z(".globl _start\n\n\0"); + _compile_module(); + + _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); + _write_c('"'); + + v0 := @compiler_strings; + v4 := compiler_strings_position; + +.compile_loop: + lw t0, 0(sp) + lw t1, 4(sp) + bge t0, t1, .compile_end + + v8 := _load_byte(v0); + v0 := v0 + 1; + _write_c(v8); + + j .compile_loop + +.compile_end: + _write_c('"'); + _write_c('\n'); +end; + +(* Terminates the program. a0 contains the return code. *) + +(* Parameters: *) +(* a0 - Status code. *) +proc _exit(); +begin + _syscall(0, 0, 0, 0, 0, 0, 93); +end; + +(* Inserts a symbol into the table. *) + +(* Parameters: *) +(* a0 - Symbol pointer. *) +(* a1 - Symbol name length. *) +(* a2 - Symbol name pointer. *) +(* a3 - Symbol table. *) +proc _symbol_table_enter(); +begin + (* The first word in the symbol table is its length, get it. *) + lw a0, 76(sp) + lw a0, (a0) + sw a0, 0(sp) + + (* Calculate the offset for the new symbol. *) + v4 := v0 * 4; + v4 := v4 + 4; + v4 := v76 + 4; + + _memcpy(v4, @v80, 12); + + (* Increment the symbol table length. *) + v0 := v0 + 1; + lw t0, 0(sp) + lw t1, 76(sp) + sw t0, (t1) +end; + +proc _symbol_table_build(); +begin + _symbol_table_enter(@symbol_type_info_int, 3, symbol_builtin_name_int, @symbol_table_global); + _symbol_table_enter(@symbol_type_info_word, 4, symbol_builtin_name_word, @symbol_table_global); + _symbol_table_enter(@symbol_type_info_pointer, 7, symbol_builtin_name_pointer, @symbol_table_global); + _symbol_table_enter(@symbol_type_info_char, 4, symbol_builtin_name_char, @symbol_table_global); + _symbol_table_enter(@symbol_type_info_bool, 4, symbol_builtin_name_bool, @symbol_table_global); +end; + + +(* Classification table assigns each possible character to a group (class). All *) +(* characters of the same group a handled equivalently. *) + +(* Classification: *) + +(* TransitionClass = ( *) +(* transitionClassInvalid = 1, *) +(* transitionClassDigit = 2, *) +(* transitionClassAlpha = 3, *) +(* transitionClassSpace = 4, *) +(* transitionClassColon = 5, *) +(* transitionClassEquals = 6, *) +(* transitionClassLeftParen = 7, *) +(* transitionClassRightParen = 8, *) +(* transitionClassAsterisk = 9, *) +(* transitionClassUnderscore = 10, *) +(* transitionClassSingle = 11, *) +(* transitionClassHex = 12, *) +(* transitionClassZero = 13, *) +(* transitionClassX = 14, *) +(* transitionClassEof = 15, *) +(* transitionClassDot = 16, *) +(* transitionClassMinus = 17, *) +(* transitionClassSingleQuote = 18, *) +(* transitionClassDoubleQuote = 19, *) +(* transitionClassGreater = 20, *) +(* transitionClassLess = 21, *) +(* transitionClassOther = 22 *) +(* ); *) +(* TransitionState = ( *) +(* transitionStateStart = 1, *) +(* transitionStateColon = 2, *) +(* transitionStateIdentifier = 3, *) +(* transitionStateDecimal = 4, *) +(* transitionStateGreater = 5, *) +(* transitionStateMinus = 6, *) +(* transitionStateLeftParen = 7, *) +(* transitionStateLess = 8, *) +(* transitionStateDot = 9, *) +(* transitionStateComment = 10, *) +(* transitionStateClosingComment = 11, *) +(* transitionStateCharacter = 12, *) +(* transitionStateString = 13, *) +(* transitionStateLeadingZero = 14, *) +(* transitionStateDecimalSuffix = 15, *) +(* transitionStateEnd = 16 *) +(* ); *) +(* Transition = record *) +(* action: TransitionAction; *) +(* next_state: TransitionState *) +(* end; *) +(* TransitionAction = ( *) +(* none = 1, *) +(* accumulate = 2, *) +(* skip = 3, *) +(* single = 4, *) +(* eof = 5, *) +(* finalize = 6, *) +(* composite = 7, *) +(* key_id = 8, *) +(* integer = 9, *) +(* delimited = 10 *) +(* ); *) + +(* Assigns some value to at array index. *) + +(* Parameters: *) +(* a0 - Array pointer. *) +(* a1 - Index (word offset into the array). *) +(* a2 - Data to assign. *) +proc _assign_at(); +begin + v0 := v84 + -1; + v0 := v0 * 4; + v0 := v88 + v0; + + lw t0, 0(sp) + lw t1, 80(sp) + sw t1, (t0) +end; + +proc _create_classification(); +begin + _assign_at(@classification, 1, 15); + _assign_at(@classification, 2, 1); + _assign_at(@classification, 3, 1); + _assign_at(@classification, 4, 1); + _assign_at(@classification, 5, 1); + _assign_at(@classification, 6, 1); + _assign_at(@classification, 7, 1); + _assign_at(@classification, 8, 1); + _assign_at(@classification, 9, 1); + _assign_at(@classification, 10, 4); + _assign_at(@classification, 11, 4); + _assign_at(@classification, 12, 1); + _assign_at(@classification, 13, 1); + _assign_at(@classification, 14, 4); + _assign_at(@classification, 15, 1); + _assign_at(@classification, 16, 1); + _assign_at(@classification, 17, 1); + _assign_at(@classification, 18, 1); + _assign_at(@classification, 19, 1); + _assign_at(@classification, 20, 1); + _assign_at(@classification, 21, 1); + _assign_at(@classification, 22, 1); + _assign_at(@classification, 23, 1); + _assign_at(@classification, 24, 1); + _assign_at(@classification, 25, 1); + _assign_at(@classification, 26, 1); + _assign_at(@classification, 27, 1); + _assign_at(@classification, 28, 1); + _assign_at(@classification, 29, 1); + _assign_at(@classification, 30, 1); + _assign_at(@classification, 31, 1); + _assign_at(@classification, 32, 1); + _assign_at(@classification, 33, 4); + _assign_at(@classification, 34, 11); + _assign_at(@classification, 35, 19); + _assign_at(@classification, 36, 22); + _assign_at(@classification, 37, 22); + _assign_at(@classification, 38, 11); + _assign_at(@classification, 39, 11); + _assign_at(@classification, 40, 18); + _assign_at(@classification, 41, 7); + _assign_at(@classification, 42, 8); + _assign_at(@classification, 43, 9); + _assign_at(@classification, 44, 11); + _assign_at(@classification, 45, 11); + _assign_at(@classification, 46, 17); + _assign_at(@classification, 47, 16); + _assign_at(@classification, 48, 11); + _assign_at(@classification, 49, 13); + _assign_at(@classification, 50, 2); + _assign_at(@classification, 51, 2); + _assign_at(@classification, 52, 2); + _assign_at(@classification, 53, 2); + _assign_at(@classification, 54, 2); + _assign_at(@classification, 55, 2); + _assign_at(@classification, 56, 2); + _assign_at(@classification, 57, 2); + _assign_at(@classification, 58, 2); + _assign_at(@classification, 59, 5); + _assign_at(@classification, 60, 11); + _assign_at(@classification, 61, 21); + _assign_at(@classification, 62, 6); + _assign_at(@classification, 63, 20); + _assign_at(@classification, 64, 22); + _assign_at(@classification, 65, 11); + _assign_at(@classification, 66, 3); + _assign_at(@classification, 67, 3); + _assign_at(@classification, 68, 3); + _assign_at(@classification, 69, 3); + _assign_at(@classification, 70, 3); + _assign_at(@classification, 71, 3); + _assign_at(@classification, 72, 3); + _assign_at(@classification, 73, 3); + _assign_at(@classification, 74, 3); + _assign_at(@classification, 75, 3); + _assign_at(@classification, 76, 3); + _assign_at(@classification, 77, 3); + _assign_at(@classification, 78, 3); + _assign_at(@classification, 79, 3); + _assign_at(@classification, 80, 3); + _assign_at(@classification, 81, 3); + _assign_at(@classification, 82, 3); + _assign_at(@classification, 83, 3); + _assign_at(@classification, 84, 3); + _assign_at(@classification, 85, 3); + _assign_at(@classification, 86, 3); + _assign_at(@classification, 87, 3); + _assign_at(@classification, 88, 3); + _assign_at(@classification, 89, 3); + _assign_at(@classification, 90, 3); + _assign_at(@classification, 91, 3); + _assign_at(@classification, 92, 11); + _assign_at(@classification, 93, 22); + _assign_at(@classification, 94, 11); + _assign_at(@classification, 95, 11); + _assign_at(@classification, 96, 10); + _assign_at(@classification, 97, 22); + _assign_at(@classification, 98, 12); + _assign_at(@classification, 99, 12); + _assign_at(@classification, 100, 12); + _assign_at(@classification, 101, 12); + _assign_at(@classification, 102, 12); + _assign_at(@classification, 103, 12); + _assign_at(@classification, 104, 3); + _assign_at(@classification, 105, 3); + _assign_at(@classification, 106, 3); + _assign_at(@classification, 107, 3); + _assign_at(@classification, 108, 3); + _assign_at(@classification, 109, 3); + _assign_at(@classification, 110, 3); + _assign_at(@classification, 111, 3); + _assign_at(@classification, 112, 3); + _assign_at(@classification, 113, 3); + _assign_at(@classification, 114, 3); + _assign_at(@classification, 115, 3); + _assign_at(@classification, 116, 3); + _assign_at(@classification, 117, 3); + _assign_at(@classification, 118, 3); + _assign_at(@classification, 119, 3); + _assign_at(@classification, 120, 3); + _assign_at(@classification, 121, 14); + _assign_at(@classification, 122, 3); + _assign_at(@classification, 123, 3); + _assign_at(@classification, 124, 22); + _assign_at(@classification, 125, 11); + _assign_at(@classification, 126, 22); + _assign_at(@classification, 127, 11); + _assign_at(@classification, 128, 1); + + v0 := 129; + +(* Set the remaining 129 - 256 bytes to transitionClassOther. *) +.create_classification_loop: + _assign_at(@classification, v0, 22); + v0 := v0 + 1; + + lw t0, 0(sp) + li t1, 257 + blt t0, t1, .create_classification_loop +end; + +(* Parameters: *) +(* a0 - Current state (first index into transitions table). *) +(* a1 - Transition (second index into transitions table).. *) +(* a2 - Action to assign. *) +(* a3 - Next state to assign. *) +proc _set_transition(); +begin + (* Transitions start at offset in classification array. Save the transitions start in v0. *) + v0 := @classification + 256 + + (* Each state is 8 bytes long (2 words: action and next state). *) + (* There are 16 transition classes, so a transition 8 * 16 = 128 bytes long. *) + + v4 := v88 + -1; + v4 := v4 * 128; + + v8 := v84 + -1; + v8 := v8 * 8; + + v12 := v0 + v4; + v12 := v12 + v8; + + lw t0, 12(sp) + lw t1, 80(sp) + lw t2, 76(sp) + sw t1, (t0) + addi t0, t0, 4 + sw t2, (t0) +end; + +(* Parameters: *) +(* a0 - Current state (Transition state enumeration). *) +(* a1 - Default action (Callback). *) +(* a2 - Next state (Transition state enumeration). *) +proc _set_default_transition(); +begin + _set_transition(v88, 1, v84, v80); + _set_transition(v88, 2, v84, v80); + _set_transition(v88, 3, v84, v80); + _set_transition(v88, 4, v84, v80); + _set_transition(v88, 5, v84, v80); + _set_transition(v88, 6, v84, v80); + _set_transition(v88, 7, v84, v80); + _set_transition(v88, 8, v84, v80); + _set_transition(v88, 9, v84, v80); + _set_transition(v88, 10, v84, v80); + _set_transition(v88, 11, v84, v80); + _set_transition(v88, 12, v84, v80); + _set_transition(v88, 13, v84, v80); + _set_transition(v88, 14, v84, v80); + _set_transition(v88, 15, v84, v80); + _set_transition(v88, 16, v84, v80); + _set_transition(v88, 17, v84, v80); + _set_transition(v88, 18, v84, v80); + _set_transition(v88, 19, v84, v80); + _set_transition(v88, 20, v84, v80); + _set_transition(v88, 21, v84, v80); + _set_transition(v88, 22, v84, v80); +end; + + +(* The transition table describes transitions from one state to another, given *) +(* a symbol (character class). *) + +(* The table has m rows and n columns, where m is the amount of states and n is *) +(* the amount of classes. So given the current state and a classified character *) +(* the table can be used to look up the next state. *) + +(* Each cell is a word long. *) +(* - The least significant byte of the word is a row number (beginning with 0). *) +(* It specifies the target state. "ff" means that this is an end state and no *) +(* transition is possible. *) +(* - The next byte is the action that should be performed when transitioning. *) +(* For the meaning of actions see labels in the lex_next function, which *) +(* handles each action. *) +proc _create_transitions(); +begin + (* Start state. *) + _set_transition(1, 1, 1, 16); + _set_transition(1, 2, 2, 4); + _set_transition(1, 3, 2, 3); + _set_transition(1, 4, 3, 1); + _set_transition(1, 5, 2, 5); + _set_transition(1, 6, 4, 16); + _set_transition(1, 7, 2, 7); + _set_transition(1, 8, 4, 16); + _set_transition(1, 9, 4, 16); + _set_transition(1, 10, 2, 3); + _set_transition(1, 11, 4, 16); + _set_transition(1, 12, 2, 3); + _set_transition(1, 13, 2, 14); + _set_transition(1, 14, 2, 3); + _set_transition(1, 15, 5, 16); + _set_transition(1, 16, 2, 9); + _set_transition(1, 17, 2, 6); + _set_transition(1, 18, 2, 12); + _set_transition(1, 19, 2, 13); + _set_transition(1, 20, 2, 5); + _set_transition(1, 21, 2, 8); + _set_transition(1, 22, 1, 16); + + (* Colon state. *) + _set_default_transition(2, 6, 16); + _set_transition(2, 6, 7, 16); + + (* Identifier state. *) + _set_default_transition(3, 8, 16); + _set_transition(3, 2, 2, 3); + _set_transition(3, 3, 2, 3); + _set_transition(3, 10, 2, 3); + _set_transition(3, 12, 2, 3); + _set_transition(3, 13, 2, 3); + _set_transition(3, 14, 2, 3); + + (* Decimal state. *) + _set_default_transition(4, 9, 16); + _set_transition(4, 2, 2, 4); + _set_transition(4, 3, 2, 15); + _set_transition(4, 10, 1, 16); + _set_transition(4, 12, 2, 15); + _set_transition(4, 13, 2, 4); + _set_transition(4, 14, 2, 15); + + (* Greater state. *) + _set_default_transition(5, 6, 16); + _set_transition(5, 6, 7, 16); + + (* Minus state. *) + _set_default_transition(6, 6, 16); + _set_transition(6, 20, 7, 16); + + (* Left paren state. *) + _set_default_transition(7, 6, 16); + _set_transition(7, 9, 2, 10); + + (* Less state. *) + _set_default_transition(8, 6, 16); + _set_transition(8, 6, 7, 16); + _set_transition(8, 20, 7, 16); + + (* Hexadecimal after 0x. *) + _set_default_transition(9, 6, 16); + _set_transition(9, 16, 7, 16); + + (* Comment. *) + _set_default_transition(10, 2, 10); + _set_transition(10, 9, 2, 11); + _set_transition(10, 15, 1, 16); + + (* Closing comment. *) + _set_default_transition(11, 2, 10); + _set_transition(11, 1, 1, 16); + _set_transition(11, 8, 10, 16); + _set_transition(11, 9, 2, 11); + _set_transition(11, 15, 1, 16); + + (* Character. *) + _set_default_transition(12, 2, 12); + _set_transition(12, 1, 1, 16); + _set_transition(12, 15, 1, 16); + _set_transition(12, 18, 10, 16); + + (* String. *) + _set_default_transition(13, 2, 13); + _set_transition(13, 1, 1, 16); + _set_transition(13, 15, 1, 16); + _set_transition(13, 19, 10, 16); + + (* Leading zero. *) + _set_default_transition(14, 9, 16); + _set_transition(14, 2, 1, 16); + _set_transition(14, 3, 1, 16); + _set_transition(14, 10, 1, 16); + _set_transition(14, 12, 1, 16); + _set_transition(14, 13, 1, 16); + _set_transition(14, 14, 1, 16); + + (* Digit with a character suffix. *) + _set_default_transition(15, 9, 16); + _set_transition(15, 3, 1, 16); + _set_transition(15, 2, 1, 16); + _set_transition(15, 12, 1, 16); + _set_transition(15, 13, 1, 16); + _set_transition(15, 14, 1, 16); +end; + +proc _lexer_get_state(); +begin + (* Lexer state is saved after the transition tables. The offset is 256 + 16 * 22. *) + v0 := @classification; + v4 := 16 * 22; + v0 := v0 + 256; + + return v0 + v4 +end; + +(* Gets pointer to the current source text. *) +proc _lexer_get_current(); +begin + v0 := _lexer_get_state(); + + return v0 + 4 +end; + +(* Resets the lexer state for reading the next token. *) +proc _lexer_reset(); +begin + (* Transition start state is 1. *) + _lexer_get_state(); + li t0, 1 + sw t0, (a0) + sw a0, 0(sp) + + (* Text pointer to the beginning of the currently read token. *) + _lexer_get_current(); + la t0, source_code_position + lw t0, (t0) + sw t0, (a0) + + (* Initial length of the token is 0. *) + addi t0, t0, 4 + sw zero, (t0) +end; + +(* One time lexer initialization. *) +proc _lexer_initialize(); +begin + _create_classification(); + _create_transitions(); +end; + +(* Entry point. *) +proc _start(); +begin + _lexer_initialize(); + _symbol_table_build(); + + (* Read the source from the standard input. *) + (* Second argument is buffer size. Modifying update the source_code definition. *) + _read_file(@source_code, 81920); + _compile(); + + _exit(0); +end; diff --git a/boot/symbol.s b/boot/symbol.s deleted file mode 100644 index fdf4ad2..0000000 --- a/boot/symbol.s +++ /dev/null @@ -1,297 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public License, -# v. 2.0. If a copy of the MPL was not distributed with this file, You can -# obtain one at https://mozilla.org/MPL/2.0/. - -.global symbol_table -.global symbol_table_build, symbol_table_lookup, symbol_table_enter, symbol_table_dump -.global symbol_table_make_pointer, symbol_table_make_parameter, symbol_table_make_local, symbol_table_make_procedure - -.include "boot/definitions.inc" - -.equ SYMBOL_PRIME, 1543 - -.section .rodata - -.type symbol_builtin_name_int, @object -symbol_builtin_name_int: .ascii "Int" -.type symbol_builtin_name_word, @object -symbol_builtin_name_word: .ascii "Word" -.type symbol_builtin_name_byte, @object -symbol_builtin_name_byte: .ascii "Byte" -.type symbol_builtin_name_char, @object -symbol_builtin_name_char: .ascii "Char" -.type symbol_builtin_name_bool, @object -symbol_builtin_name_bool: .ascii "Bool" - -# Every type info starts with a word describing what type it is. - -# Primitive types have only type size. -.type symbol_builtin_type_int, @object -symbol_builtin_type_int: .word TYPE_PRIMITIVE - .word 4 -.type symbol_builtin_type_word, @object -symbol_builtin_type_word: .word TYPE_PRIMITIVE - .word 4 -.type symbol_builtin_type_byte, @object -symbol_builtin_type_byte: .word TYPE_PRIMITIVE - .word 1 -.type symbol_builtin_type_char, @object -symbol_builtin_type_char: .word TYPE_PRIMITIVE - .word 1 -.type symbol_builtin_type_bool, @object -symbol_builtin_type_bool: .word TYPE_PRIMITIVE - .word 1 - -.section .bss - -# The first word of the symbol table is its length. -# Then a list of type infos follows: -# -# record -# name: String -# info: ^TypeInfo -# end -.type symbol_table, @object -symbol_table: .zero SYMBOL_PRIME - -.section .text - -# Prints the list of symbols in the table. -.type symbol_table_dump, @function -symbol_table_dump: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - sw s1, 20(sp) # Current symbol in the table. - sw s2, 16(sp) # Symbol table length. - - la s1, symbol_table - lw s2, 0(s1) - addi s1, s1, 4 # Advance to the first symbol in the table. - -.Lsymbol_table_dump_loop: - beqz s2, .Lsymbol_table_dump_end - - # Compare string lengths. - lw a0, 4(s1) - lw a1, 0(s1) - call _write_error - - addi s1, s1, 12 - addi s2, s2, -1 - j .Lsymbol_table_dump_loop - -.Lsymbol_table_dump_end: - lw s1, 20(sp) - lw s2, 16(sp) - - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Searches for a symbol by name. -# -# Parameters: -# a0 - Length of the symbol to search. -# a1 - Pointer to the symbol name. -# -# Sets a0 to the symbol info pointer or 0 if the symbol has not been found. -.type symbol_table_lookup, @function -symbol_table_lookup: - # Prologue. - addi sp, sp, -32 - sw ra, 28(sp) - sw s0, 24(sp) - addi s0, sp, 32 - - sw s1, 20(sp) # Current symbol in the table. - sw s2, 16(sp) # Symbol table length. - sw s3, 12(sp) # Length of the symbol to search. - sw s4, 8(sp) # Pointer to the symbol to search. - - mv s3, a0 - mv s4, a1 - - la s1, symbol_table - lw s2, 0(s1) - addi s1, s1, 4 # Advance to the first symbol in the table. - -.Lsymbol_table_lookup_loop: - beqz s2, .Lsymbol_table_lookup_not_found - - # Compare string lengths. - mv a0, s3 - mv a1, s4 - lw a2, 0(s1) - lw a3, 4(s1) - call _string_equal - - beqz a0, .Lsymbol_table_lookup_continue - - lw a0, 8(s1) # Pointer to the symbol. - j .Lsymbol_table_lookup_end - -.Lsymbol_table_lookup_continue: - addi s1, s1, 12 - addi s2, s2, -1 - j .Lsymbol_table_lookup_loop - -.Lsymbol_table_lookup_not_found: - li a0, 0 - -.Lsymbol_table_lookup_end: - lw s1, 20(sp) - lw s2, 16(sp) - lw s3, 12(sp) - lw s4, 8(sp) - - # Epilogue. - lw ra, 28(sp) - lw s0, 24(sp) - addi sp, sp, 32 - ret - -# Creates a pointer type. -# -# Parameters: -# a0 - Pointer to the base type. -# a1 - Output memory. -# -# Sets a0 to the size of newly created type in bytes. -.type symbol_table_make_pointer, @function -symbol_table_make_pointer: - li t0, TYPE_POINTER - sw t0, 0(a1) - sw a0, 4(a1) - - li a0, 8 - ret - -# Creates a parameter info. -# -# Parameters: -# a0 - Pointer to the parameter type. -# a1 - Parameter offset. -# a2 - Output memory. -# -# Sets a0 to the size of newly created info object in bytes. -.type symbol_table_make_parameter, @function -symbol_table_make_parameter: - li t0, INFO_PARAMETER - sw t0, 0(a2) - sw a0, 4(a2) - sw a1, 8(a2) - - li a0, 12 - ret - -# Creates a local variable info. -# -# Parameters: -# a0 - Pointer to the variable type. -# a1 - Variable stack offset. -# a2 - Output memory. -# -# Sets a0 to the size of newly created info object in bytes. -.type symbol_table_make_local, @function -symbol_table_make_local: - li t0, INFO_LOCAL - sw t0, 0(a2) - sw a0, 4(a2) - sw a1, 8(a2) - - li a0, 12 - ret - -# Creates a procedure type and procedure info objects refering the type. -# -# Parameters: -# a0 - Output memory. -# -# Sets a0 to the size of newly created info object in bytes. -.type symbol_table_make_procedure, @function -symbol_table_make_procedure: - li t0, TYPE_PROCEDURE - sw t0, 8(a0) - - li t0, INFO_PROCEDURE - sw t0, 0(a0) - sw a0, 4(a0) # Procedure type stored in the same memory segment. - - li a0, 12 - ret - -# Inserts a symbol into the table. -# -# Parameters: -# a0 - Symbol name length. -# a1 - Symbol name pointer. -# a2 - Symbol pointer. -.type symbol_table_enter, @function -symbol_table_enter: - la t0, symbol_table - - lw t1, 0(t0) # Current table length. - li t2, 12 # Calculate the offset to the next entry. - mul t2, t1, t2 - addi t2, t2, 4 - add t2, t0, t2 - - sw a0, 0(t2) - sw a1, 4(t2) - sw a2, 8(t2) - - addi t1, t1, 1 # Save the new length. - sw t1, 0(t0) - - ret - -# Build the initial symbols. -# -# Sets a0 to the pointer to the global symbol table. -.type symbol_build, @function -symbol_table_build: - # Prologue. - addi sp, sp, -16 - sw ra, 12(sp) - sw s0, 8(sp) - addi s0, sp, 16 - - la a0, symbol_table - addi t0, a0, 4 - - li a0, 3 # Length of the word "Int". - la a1, symbol_builtin_name_int - la a2, symbol_builtin_type_int - call symbol_table_enter - - li a0, 4 # Length of the word "Word". - la a1, symbol_builtin_name_word - la a2, symbol_builtin_type_word - call symbol_table_enter - - li a0, 4 # Length of the word "Byte". - la a1, symbol_builtin_name_byte - la a2, symbol_builtin_type_byte - call symbol_table_enter - - li a0, 4 # Length of the word "Char". - la a1, symbol_builtin_name_char - la a2, symbol_builtin_type_char - call symbol_table_enter - - li a0, 4 # Length of the word "Bool". - la a1, symbol_builtin_name_bool - la a2, symbol_builtin_type_bool - call symbol_table_enter - - # Epilogue. - lw ra, 12(sp) - lw s0, 8(sp) - addi sp, sp, 16 - ret -- cgit v1.2.3