diff options
| author | Eugen Wissner <belka@caraus.de> | 2025-11-08 11:07:39 +0100 |
|---|---|---|
| committer | Eugen Wissner <belka@caraus.de> | 2025-11-08 11:07:39 +0100 |
| commit | d144cb21012c911135d5047059449195a89ea239 (patch) | |
| tree | db8d8a69092192c07b2ab2c76bfd51848e3884fd /boot/stage11.elna | |
| parent | 0b516345666b52d29bb10521b4d3c2c2420b3368 (diff) | |
| download | elna-d144cb21012c911135d5047059449195a89ea239.tar.gz | |
Move stages into subdirectories
Diffstat (limited to 'boot/stage11.elna')
| -rw-r--r-- | boot/stage11.elna | 1738 |
1 files changed, 0 insertions, 1738 deletions
diff --git a/boot/stage11.elna b/boot/stage11.elna deleted file mode 100644 index 74447d6..0000000 --- a/boot/stage11.elna +++ /dev/null @@ -1,1738 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, *) -(* v. 2.0. If a copy of the MPL was not distributed with this file, You can *) -(* obtain one at https://mozilla.org/MPL/2.0/. *) - -(* Stage 11 compiler. *) - -(* - Removed support for inline assembly statements. *) -(* - Assignment to global variables. *) -(* - In procedure declarations skip everything between parameter parens. *) - -const - symbol_builtin_name_int := "Int"; - symbol_builtin_name_word := "Word"; - symbol_builtin_name_pointer := "Pointer"; - symbol_builtin_name_char := "Char"; - symbol_builtin_name_bool := "Bool"; - - (* Every type info starts with a word describing what type it is. *) - - (* PRIMITIVE_TYPE = 1 *) - - (* Primitive types have only type size. *) - symbol_builtin_type_int := S(1, 4); - symbol_builtin_type_word := S(1, 4); - symbol_builtin_type_pointer := S(1, 4); - symbol_builtin_type_char := S(1, 1); - symbol_builtin_type_bool := S(1, 1); - - (* Info objects start with a word describing its type. *) - - (* INFO_TYPE = 1 *) - - (* Type info has the type it belongs to. *) - symbol_type_info_int := S(1, @symbol_builtin_type_int); - symbol_type_info_word := S(1, @symbol_builtin_type_word); - symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); - symbol_type_info_char := S(1, @symbol_builtin_type_char); - symbol_type_info_bool := S(1, @symbol_builtin_type_bool); - -var - source_code: Array; - compiler_strings: Array; - symbol_table_global: Array; - symbol_table_local: Array; - classification: Array; - - compiler_strings_position: Pointer := @compiler_strings; - compiler_strings_length: Word := 0; - label_counter: Word := 0; - source_code_position: Pointer := @source_code; - -(* Calculates and returns the string token length between quotes, including the *) -(* escaping slash characters. *) - -(* Parameters: *) -(* a0 - String token pointer. *) - -(* Returns the length in a0. *) -proc _string_length(); -begin - (* Reset the counter. *) - v0 := 0; - - .string_length_loop; - v88 := v88 + 1; - - if _load_byte(v88) <> '"' then - v0 := v0 + 1; - goto .string_length_loop; - end; - - return v0 -end; - -(* Adds a string to the global, read-only string storage. *) - -(* Parameters: *) -(* a0 - String token. *) - -(* Returns the offset from the beginning of the storage to the new string in a0. *) -proc _add_string(); -begin - v0 := v88 + 1; - v4 := compiler_strings_length; - - .add_string_loop; - if _load_byte(v0) <> '"' then - v8 := _load_byte(v0); - _store_byte(v8, compiler_strings_position); - _store_word(compiler_strings_position + 1, @compiler_strings_position); - v0 := v0 + 1; - - if v8 <> '\\' then - _store_word(compiler_strings_length + 1, @compiler_strings_length); - end; - goto .add_string_loop; - end; - - return v4 -end; - -(* Reads standard input into a buffer. *) -(* a0 - Buffer pointer. *) -(* a1 - Buffer size. *) - -(* Returns the amount of bytes written in a0. *) -proc _read_file(); -begin - _syscall(0, v88, v84, 0, 0, 0, 63); -end; - -(* Writes to the standard output. *) - -(* Parameters: *) -(* a0 - Buffer. *) -(* a1 - Buffer length. *) -proc _write_s(); -begin - _syscall(1, v88, v84, 0, 0, 0, 64); -end; - -(* Writes a number to a string buffer. *) - -(* t0 - Local buffer. *) -(* t1 - Constant 10. *) -(* t2 - Current character. *) -(* t3 - Whether the number is negative. *) - -(* Parameters: *) -(* a0 - Whole number. *) -(* a1 - Buffer pointer. *) - -(* Sets a0 to the length of the written number. *) -proc _print_i(); -begin - v0 := @v23; - - if v88 >= 0 then - v4 := 0; - else - v88 = -v88; - v4 := 1; - end; - - .print_i_digit10; - v8 := v88 % 10; - _store_byte(v8 + '0', v0); - - v88 := v88 / 10; - v0 := v0 + -1; - - if v88 <> 0 then - goto .print_i_digit10; - end; - if v4 = 1 then - _store_byte('-', v0); - v0 := v0 + -1; - end; - v4 := @v23 + -v0; - _memcpy(v84, v0 + 1, v4); - - return v4 -end; - -(* Writes a number to the standard output. *) - -(* Parameters: *) -(* a0 - Whole number. *) -proc _write_i(); -begin - v4 := _print_i(v88, @v0); - _write_s(@v0, v4); -end; - -(* Writes a character from a0 into the standard output. *) -proc _write_c(); -begin - _write_s(@v88, 1); -end; - -(* Write null terminated string. *) - -(* Parameters: *) -(* a0 - String. *) -proc _write_z(); -begin - (* Check for 0 character. *) - v0 := _load_byte(v88); - - if v0 <> 0 then - (* Print a character. *) - _write_c(v0); - - (* Advance the input string by one byte. *) - _write_z(v88 + 1); - end; -end; - -(* Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. *) -proc _is_upper(); -begin - v0 := v88 >= 'A'; - v4 := v88 <= 'Z'; - - return v0 & v4 - -end; - -(* Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. *) -proc _is_lower(); -begin - v0 := v88 >= 'a'; - v4 := v88 <= 'z'; - - return v0 & v4 - -end; - -(* Detects if the passed character is a 7-bit alpha character or an underscore. *) - -(* Paramters: *) -(* a0 - Tested character. *) - -(* Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. *) -proc _is_alpha(); -begin - v0 := _is_upper(v88); - v4 := _is_lower(v88); - v8 := v88 = '_'; - - v12 := v0 or v4; - return v12 or v8 -end; - -(* Detects whether the passed character is a digit *) -(* (a value between 0 and 9). *) - -(* Parameters: *) -(* a0 - Exemined value. *) - -(* Sets a0 to 1 if it is a digit, to 0 otherwise. *) -proc _is_digit(); -begin - v0 := v88 >= '0'; - v4 := v88 <= '9'; - - return v0 & v4 -end; - -proc _is_alnum(); -begin - v0 := _is_alpha(v88); - v4 := _is_digit(v88); - - return v0 or v4 -end; - -(* Reads the next token. *) - -(* Returns token length in a0. *) -proc _read_token(); -begin - (* Current token position. *) - v0 := source_code_position; - (* Token length. *) - v4 := 0; - - .read_token_loop; - (* Current character. *) - v8 := _load_byte(v0); - - (* First we try to read a derictive. *) - (* A derictive can contain a dot and characters. *) - v12 := v8 = '.'; - v16 := _is_alnum(v8); - - if v12 or v16 then - (* Advance the source code position and token length. *) - v4 := v4 + 1; - v0 := v0 + 1; - - goto .read_token_loop; - end; - - return v4 -end; - -(* a0 - First pointer. *) -(* a1 - Second pointer. *) -(* a2 - The length to compare. *) - -(* Returns 0 in a0 if memory regions are equal. *) -proc _memcmp(); -begin - v8 := 0; - - .memcmp_loop; - if v80 <> 0 then - v0 := _load_byte(v88); - v4 := _load_byte(v84); - v8 := v0 + -v4; - - v88 := v88 + 1; - v84 := v84 + 1; - v80 := v80 + -1; - - if v8 = 0 then - goto .memcmp_loop; - end; - end; - - return v8 -end; - -(* Copies memory. *) - -(* Parameters: *) -(* a0 - Destination. *) -(* a1 - Source. *) -(* a2 - Size. *) - -(* Preserves a0. *) -proc _memcpy(); -begin - .memcpy_loop; - if v80 <> 0 then - v0 := _load_byte(v84); - _store_byte(v0, v88); - - v88 := v88 + 1; - v84 := v84 + 1; - v80 := v80 + -1; - goto .memcpy_loop; - end; - - return v88 -end; - -(* Advances the token stream by a0 bytes. *) -proc _advance_token(); -begin - _store_word(source_code_position + v88, @source_code_position); -end; - -(* Prints the current token. *) - -(* Parameters: *) -(* a0 - Token length. *) - -(* Returns a0 unchanged. *) -proc _write_token(); -begin - _write_s(source_code_position, v88); - return v88 -end; - -proc _compile_integer_literal(); -begin - _write_z("\tli t0, \0"); - - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - - _write_c('\n'); -end; - -proc _compile_character_literal(); -begin - _write_z("\tli t0, \0"); - - _write_c('\''); - _advance_token(1); - - v0 := _load_byte(source_code_position); - if v0 = '\\' then - _write_c('\\'); - _advance_token(1); - end; - - v0 := _load_byte(source_code_position); - _write_c(v0); - - _write_c('\''); - _write_c('\n'); - - _advance_token(2); -end; - -proc _compile_variable_expression(); -begin - _compile_designator(); - _write_z("\tlw t0, (t0)\n\0"); -end; - -proc _compile_address_expression(); -begin - (* Skip the "@" sign. *) - _advance_token(1); - _compile_designator(); -end; - -proc _compile_negate_expression(); -begin - (* Skip the "-" sign. *) - _advance_token(1); - _compile_term(); - - _write_z("\tneg t0, t0\n\0"); -end; - -proc _compile_not_expression(); -begin - (* Skip the "~" sign. *) - _advance_token(1); - _compile_term(); - - _write_z("\tnot t0, t0\n\0"); -end; - -proc _compile_string_literal(); -begin - v0 := _string_length(source_code_position); - v4 := _add_string(source_code_position); - - _advance_token(v0 + 2); - _write_z("\tla t0, strings\n\0"); - - _write_z("\tli t1, \0"); - _write_i(v4); - _write_c('\n'); - - _write_z("\tadd t0, t0, t1\n\0"); -end; - -proc _compile_term(); -begin - v0 := _load_byte(source_code_position); - - if v0 = '\'' then - _compile_character_literal(); - end; - if v0 = '@' then - _compile_address_expression(); - end; - if v0 = '-' then - _compile_negate_expression(); - end; - if v0 = '~' then - _compile_not_expression(); - end; - if v0 = '"' then - _compile_string_literal(); - end; - if v0 = '_' then - _compile_call(); - _write_z("\nmv t0, a0\n\0"); - end; - if _is_digit(v0) = 1 then - _compile_integer_literal(); - end; - if _is_lower(v0) = 1 then - _compile_variable_expression(); - end; -end; - -proc _compile_binary_rhs(); -begin - (* Skip the whitespace after the binary operator. *) - _advance_token(1); - _compile_term(); - - (* Load the left expression from the stack; *) - _write_z("\tlw t1, 24(sp)\n\0"); -end; - -proc _compile_expression(); -begin - _compile_term(); - v0 := _load_byte(source_code_position); - - if v0 <> ' ' then - goto .compile_expression_end; - end; - (* It is a binary expression. *) - - (* Save the value of the left expression on the stack. *) - _write_z("sw t0, 24(sp)\n\0"); - - (* Skip surrounding whitespace in front of the operator. *) - _advance_token(1); - v0 := _load_byte(source_code_position); - - if v0 = '+' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("add t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = '*' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tmul t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = '&' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tand t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = 'o' then - _advance_token(2); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("or t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = 'x' then - _advance_token(3); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("xor t0, t0, t1\n\0"); - - goto .compile_expression_end; - end; - if v0 = '=' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("xor t0, t0, t1\nseqz t0, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '%' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("rem t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '/' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("div t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '<' then - _advance_token(1); - v0 := _load_byte(source_code_position); - - if v0 = '>' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '=' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0"); - - goto .compile_expression_end; - end; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("slt t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - if v0 = '>' then - _advance_token(1); - v0 := _load_byte(source_code_position); - if v0 = '=' then - _advance_token(1); - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0"); - - goto .compile_expression_end; - end; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\n\0"); - - goto .compile_expression_end; - end; - - .compile_expression_end; -end; - -proc _compile_call(); -begin - (* Stack variables: *) - (* v0 - Procedure name length. *) - (* v4 - Procedure name pointer. *) - (* v8 - Argument count. *) - - v0 := _read_token(); - v4 := source_code_position; - v8 := 0; - - (* Skip the identifier and left paren. *) - _advance_token(v0 + 1); - v12 := _load_byte(source_code_position); - - if v12 = ')' then - goto .compile_call_finalize - end; - .compile_call_loop; - _compile_expression(); - - (* Save the argument on the stack. *) - _write_z("\tsw t0, \0"); - - (* Calculate the stack offset: 116 - (4 * argument_counter) *) - v12 := v8 * 4; - v12 := 116 + -v12; - _write_i(v12); - - _write_z("(sp)\n\0"); - - (* Add one to the argument counter. *) - v8 := v8 + 1; - - v12 := _load_byte(source_code_position); - - if v12 <> ',' then - goto .compile_call_finalize; - end; - _advance_token(2); - goto .compile_call_loop; - - .compile_call_finalize; - (* Load the argument from the stack. *) - if v8 <> 0 then - (* Decrement the argument counter. *) - v8 := v8 + -1; - - _write_z("\tlw a\0"); - _write_i(v8); - - _write_z(", \0"); - - (* Calculate the stack offset: 116 - (4 * argument_counter) *) - v12 := v8 * 4; - v12 := 116 + -v12; - _write_i(v12); - - _write_z("(sp)\n\0"); - - goto .compile_call_finalize; - end; - - .compile_call_end; - _write_z("\tcall \0"); - _write_s(v4, v0); - - (* Skip the right paren. *) - _advance_token(1); -end; - -proc _compile_goto(); -begin - _advance_token(5); - - v0 := _read_token(); - _write_z("\tj \0"); - - _write_token(v0); - _advance_token(); -end; - -proc _compile_local_designator(); -begin - (* Skip "v" in the local variable name. *) - _advance_token(1); - _write_z("\t addi t0, sp, \0"); - - (* Read local variable stack offset and save it. *) - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - _write_c('\n'); -end; - -proc _compile_global_designator(); -begin - _write_z("\tla t0, \0"); - - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - - _write_c('\n'); -end; - -proc _compile_designator(); -begin - if _load_byte(source_code_position) = 'v' then - _compile_local_designator(); - else - _compile_global_designator(); - end; -end; - -proc _compile_assignment(); -begin - _compile_designator(); - - (* Save the assignee address on the stack. *) - _write_z("\tsw t0, 60(sp)\n\0"); - - (* Skip the assignment sign (:=) with surrounding whitespaces. *) - _advance_token(4); - - (* Compile the assignment. *) - _compile_expression(); - - _write_z("\tlw t1, 60(sp)\nsw t0, (t1)\n\0"); -end; - -proc _compile_return_statement(); -begin - (* Skip "return" keyword and whitespace after it. *) - _advance_token(7); - _compile_expression(); - - _write_z("mv a0, t0\n\0"); -end; - -(* Writes a label, .Ln, where n is a unique number. *) - -(* Parameters: *) -(* a0 - Label counter. *) -proc _write_label(); -begin - _write_z(".L\0"); - _write_i(v88); -end; - -proc _compile_if(); -begin - (* Skip "if ". *) - _advance_token(3); - (* Compile condition. *) - _compile_expression(); - (* Skip " then" with newline. *) - _advance_token(6); - - (* v0 is the label after the if statement. *) - v0 := label_counter; - _store_word(label_counter + 1, @label_counter); - (* v4 is the label in front of the next elsif condition or end. *) - v4 := label_counter; - _store_word(label_counter + 1, @label_counter); - - _write_z("\tbeqz t0, \0"); - _write_label(v4); - _write_c('\n'); - - _compile_procedure_body(); - - _write_z("\tj \0"); - _write_label(v0); - _write_c('\n'); - - _write_label(v4); - _write_z(":\n\0"); - - if _memcmp(source_code_position, "end", 3) = 0 then - goto .compile_if_end; - end; - if _memcmp(source_code_position, "else", 3) = 0 then - goto .compile_if_else - end; - .compile_if_else; - (* Skip "else" and newline. *) - _advance_token(5); - _compile_procedure_body(); - - .compile_if_end; - (* Skip "end". *) - _advance_token(3); - - _write_label(v0); - _write_z(":\n\0"); -end; - -proc _compile_label_declaration(); -begin - (* Skip the dot. *) - _advance_token(1); - v0 := _read_token(); - _write_c('.'); - _write_s(source_code_position, v0); - _write_z(":\n\0"); - _advance_token(v0); -end; - -proc _compile_statement(); -begin - _skip_spaces(); - v0 := _load_byte(source_code_position); - - (* This is a call if the statement starts with an underscore. *) - if v0 = '_' then - _compile_call(); - goto .compile_statement_semicolon; - end; - if v0 = 'g' then - _compile_goto(); - goto .compile_statement_semicolon; - end; - if v0 = 'i' then - _compile_if(); - goto .compile_statement_semicolon; - end; - if v0 = 'r' then - _compile_return_statement(); - _write_c('\n'); - - goto .compile_statement_end; - end; - if v0 = '.' then - _compile_label_declaration(); - - goto .compile_statement_semicolon; - end; - _compile_assignment(); - goto .compile_statement_semicolon; - - .compile_statement_semicolon; - _advance_token(2); - _write_c('\n'); - - .compile_statement_end; -end; - -proc _compile_procedure_body(); -begin - .compile_procedure_body_loop; - _skip_empty_lines(); - _skip_spaces(); - - v0 := _memcmp(source_code_position, "end", 3) = 0; - v4 := _memcmp(source_code_position, "else", 4) = 0; - v4 := v0 or v4; - - if v4 = 0 then - _compile_statement(); - goto .compile_procedure_body_loop; - end; -end; - -(* Writes a regster name to the standard output. *) - -(* Parameters: *) -(* a0 - Register character. *) -(* a1 - Register number. *) -proc _write_register(); -begin - _write_c(v88); - v84 := v84 + '0'; - _write_c(v84); -end; - -proc _compile_procedure_prologue(); -begin - (* Skip open paren. *) - _advance_token(1); - v0 := 0; - - .compile_procedure_prologue_skip; - if _load_byte(source_code_position) <> ')' then - _advance_token(1); - goto .compile_procedure_prologue_skip; - end; - - .compile_procedure_prologue_loop; - _write_z("\tsw a\0"); - _write_i(v0); - _write_z(", \0"); - - (* Calculate the stack offset: 88 - (4 * parameter_counter) *) - v4 := v0 * 4; - v4 := 88 + -v4; - _write_i(v4); - - _write_z("(sp)\n\0"); - - v0 := v0 + 1; - if v0 <> 8 then - goto .compile_procedure_prologue_loop; - end; - (* Skip close paren. *) - _advance_token(1); -end; - -proc _compile_procedure(); -begin - (* Skip "proc ". *) - _advance_token(5); - - (* Save the procedure name length. *) - v0 := _read_token(); - - (* Write .type _procedure_name, @function. *) - _write_z(".type \0"); - - _write_token(v0); - _write_z(", @function\n\0"); - - (* Write procedure label, _procedure_name: *) - _write_token(v0); - _write_z(":\n\0"); - - (* Skip procedure name. *) - _advance_token(v0); - _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); - _compile_procedure_prologue(); - - (* Skip semicolon, "begin" and newline. *) - _advance_token(8); - - _compile_procedure_body(); - - (* Write the epilogue. *) - _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); - - (* Skip the "end" keyword, semicolon and newline. *) - _advance_token(5); -end; - -proc _skip_spaces(); -begin - v0 := _load_byte(source_code_position); - if v0 = '\t' then - _advance_token(1); - _skip_spaces(); - end; -end; - -(* Prints and skips a line. *) -proc _skip_comment(); -begin - .skip_comment_loop; - v0 := _load_byte(source_code_position); - - (* Check for newline character. *) - if v0 <> '\n' then - (* Advance the input string by one byte. *) - _advance_token(1); - - goto .skip_comment_loop; - end; - (* Skip the newline. *) - _advance_token(1); -end; - -(* Skip newlines and comments. *) -proc _skip_empty_lines(); -begin - .skip_empty_lines_rerun; - v0 := source_code_position; - - .skip_empty_lines_loop; - v4 := _load_byte(v0); - - if v4 = '\n' then - goto .skip_empty_lines_newline; - end; - if v4 = '\t' then - goto .skip_empty_lines_tab; - end; - if v4 <> '(' then - goto .skip_empty_lines_end; - end; - v4 := v0 + 1; - - if _load_byte(v4) = '*' then - goto .skip_empty_lines_comment - end; - goto .skip_empty_lines_end; - - .skip_empty_lines_comment; - _store_word(v0, @source_code_position); - _skip_comment(); - goto .skip_empty_lines_rerun; - - .skip_empty_lines_newline; - _store_word(v0 + 1, @source_code_position); - goto .skip_empty_lines_rerun; - - .skip_empty_lines_tab; - v0 := v0 + 1; - goto .skip_empty_lines_loop - - .skip_empty_lines_end; -end; - -proc _compile_global_initializer(); -begin - v0 := _load_byte(source_code_position); - - if v0 = '"' then - _write_z("\n\t.word strings + \0"); - v4 := _string_length(source_code_position); - - _add_string(source_code_position); - _write_i(); - - (* Skip the quoted string. *) - _advance_token(v4 + 2); - - goto .compile_global_initializer_end; - end; - if v0 = 'S' then - (* Skip "S(". *) - _advance_token(2); - - if _load_byte(source_code_position) = ')' then - goto .compile_global_initializer_closing; - end; - goto .compile_global_initializer_loop; - end; - if v0 = '@' then - (* Skip @. *) - _advance_token(1); - _write_z("\n\t.word \0"); - v0 := _read_token(); - _write_token(v0); - _advance_token(v0); - - goto .compile_global_initializer_end; - end; - if _is_digit(v0) = 1 then - _write_z("\n\t.word \0"); - v0 := _read_token(); - _write_token(v0); - _advance_token(1); - - goto .compile_global_initializer_end; - end; - - .compile_global_initializer_loop; - _compile_global_initializer(); - - if _load_byte(source_code_position) <> ')' then - (* Skip comma and whitespace after it. *) - _advance_token(2); - - goto .compile_global_initializer_loop; - end; - - .compile_global_initializer_closing; - (* Skip ")" *) - _advance_token(1); - - goto .compile_global_initializer_end; - - .compile_global_initializer_end; -end; - -proc _compile_constant_declaration(); -begin - v0 := _read_token(); - - _write_z(".type \0"); - _write_token(v0); - _write_z(", @object\n\0"); - - _write_token(v0); - _write_c(':'); - - (* Skip the constant name with assignment sign and surrounding whitespaces. *) - _advance_token(v0 + 4); - _compile_global_initializer(); - (* Skip semicolon and newline. *) - _advance_token(2); - _write_c('\n'); -end; - -proc _compile_const_part(); -begin - _skip_empty_lines(); - - if _memcmp(source_code_position, "const\0", 5) <> 0 then - goto .compile_const_part_end; - end; - (* Skip "const" with the newline after it. *) - _advance_token(6); - _write_z(".section .rodata # Compiled from const section.\n\n\0"); - - .compile_const_part_loop; - _skip_empty_lines(); - - (* If the character at the line beginning is not indentation, *) - (* it is probably the next code section. *) - if _load_byte(source_code_position) = '\t' then - _advance_token(1); - - _compile_constant_declaration(); - goto .compile_const_part_loop; - end; - - .compile_const_part_end; -end; - -proc _compile_variable_declaration(); -begin - v0 := _read_token(); - - _write_z(".type \0"); - _write_token(v0); - _write_z(", @object\n\0"); - - _write_token(v0); - _write_c(':'); - - (* Skip the variable name and colon with space before the type. *) - _advance_token(v0 + 2); - - (* Skip the type name. *) - v4 := _read_token(); - _advance_token(v4); - - if _load_byte(source_code_position) <> ' ' then - (* Else we assume this is a zeroed 81920 bytes big array. *) - _write_z(" .zero 81920\0"); - else - (* Skip the assignment sign with surrounding whitespaces. *) - _advance_token(4); - _compile_global_initializer(); - end; - - (* Skip semicolon and newline. *) - _advance_token(2); - _write_c('\n'); -end; - -proc _compile_var_part(); -begin - if _memcmp(source_code_position, "var\0", 3) <> 0 then - goto .compile_var_part_end; - end; - (* Skip "var" and newline. *) - _advance_token(4); - _write_z(".section .data\n\0"); - - .compile_var_part_loop; - _skip_empty_lines(); - v0 := _load_byte(source_code_position); - - if v0 = '\t' then - _advance_token(1); - _compile_variable_declaration(); - goto .compile_var_part_loop; - end; - - .compile_var_part_end; -end; - -(* Process the source code and print the generated code. *) -proc _compile_module(); -begin - _compile_const_part(); - _skip_empty_lines(); - _compile_var_part(); - - _write_z(".section .text\n\n\0"); - _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); - _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); - _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); - _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); - _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); - - .compile_module_loop; - _skip_empty_lines(); - - if _load_byte(source_code_position) <> 0 then - (* 5 is "proc " length. Space is needed to distinguish from "procedure". *) - if _memcmp(source_code_position, "proc ", 5) = 0 then - _compile_procedure(); - goto .compile_module_loop; - end; - end; - .compile_module_end; -end; - -proc _compile(); -begin - _write_z(".globl _start\n\n\0"); - _compile_module(); - - _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); - _write_c('"'); - - v0 := @compiler_strings; - v4 := compiler_strings_position; - - .compile_loop; - if v0 < v4 then - v8 := _load_byte(v0); - v0 := v0 + 1; - _write_c(v8); - - goto .compile_loop; - end; - _write_c('"'); - _write_c('\n'); -end; - -(* Terminates the program. a0 contains the return code. *) - -(* Parameters: *) -(* a0 - Status code. *) -proc _exit(); -begin - _syscall(0, 0, 0, 0, 0, 0, 93); -end; - -(* Inserts a symbol into the table. *) - -(* Parameters: *) -(* a0 - Symbol pointer. *) -(* a1 - Symbol name length. *) -(* a2 - Symbol name pointer. *) -(* a3 - Symbol table. *) -proc _symbol_table_enter(); -begin - (* The first word in the symbol table is its length, get it. *) - v0 := _load_word(v76); - - (* Calculate the offset for the new symbol. *) - v4 := v0 * 4; - v4 := v4 + 4; - v4 := v76 + 4; - - _memcpy(v4, @v80, 12); - - (* Increment the symbol table length. *) - v0 := v0 + 1; - _store_word(v0, v76); -end; - -proc _symbol_table_build(); -begin - _symbol_table_enter(@symbol_type_info_int, 3, symbol_builtin_name_int, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_word, 4, symbol_builtin_name_word, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_pointer, 7, symbol_builtin_name_pointer, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_char, 4, symbol_builtin_name_char, @symbol_table_global); - _symbol_table_enter(@symbol_type_info_bool, 4, symbol_builtin_name_bool, @symbol_table_global); -end; - - -(* Classification table assigns each possible character to a group (class). All *) -(* characters of the same group a handled equivalently. *) - -(* Classification: *) - -(* TransitionClass = ( *) -(* transitionClassInvalid = 1, *) -(* transitionClassDigit = 2, *) -(* transitionClassAlpha = 3, *) -(* transitionClassSpace = 4, *) -(* transitionClassColon = 5, *) -(* transitionClassEquals = 6, *) -(* transitionClassLeftParen = 7, *) -(* transitionClassRightParen = 8, *) -(* transitionClassAsterisk = 9, *) -(* transitionClassUnderscore = 10, *) -(* transitionClassSingle = 11, *) -(* transitionClassHex = 12, *) -(* transitionClassZero = 13, *) -(* transitionClassX = 14, *) -(* transitionClassEof = 15, *) -(* transitionClassDot = 16, *) -(* transitionClassMinus = 17, *) -(* transitionClassSingleQuote = 18, *) -(* transitionClassDoubleQuote = 19, *) -(* transitionClassGreater = 20, *) -(* transitionClassLess = 21, *) -(* transitionClassOther = 22 *) -(* ); *) -(* TransitionState = ( *) -(* transitionStateStart = 1, *) -(* transitionStateColon = 2, *) -(* transitionStateIdentifier = 3, *) -(* transitionStateDecimal = 4, *) -(* transitionStateGreater = 5, *) -(* transitionStateMinus = 6, *) -(* transitionStateLeftParen = 7, *) -(* transitionStateLess = 8, *) -(* transitionStateDot = 9, *) -(* transitionStateComment = 10, *) -(* transitionStateClosingComment = 11, *) -(* transitionStateCharacter = 12, *) -(* transitionStateString = 13, *) -(* transitionStateLeadingZero = 14, *) -(* transitionStateDecimalSuffix = 15, *) -(* transitionStateEnd = 16 *) -(* ); *) -(* Transition = record *) -(* action: TransitionAction; *) -(* next_state: TransitionState *) -(* end; *) -(* TransitionAction = ( *) -(* none = 1, *) -(* accumulate = 2, *) -(* skip = 3, *) -(* single = 4, *) -(* eof = 5, *) -(* finalize = 6, *) -(* composite = 7, *) -(* key_id = 8, *) -(* integer = 9, *) -(* delimited = 10 *) -(* ); *) - -(* Assigns some value to at array index. *) - -(* Parameters: *) -(* a0 - Array pointer. *) -(* a1 - Index (word offset into the array). *) -(* a2 - Data to assign. *) -proc _assign_at(); -begin - v0 := v84 + -1; - v0 := v0 * 4; - v0 := v88 + v0; - - _store_word(v80, v0); -end; - -proc _create_classification(); -begin - _assign_at(@classification, 1, 15); - _assign_at(@classification, 2, 1); - _assign_at(@classification, 3, 1); - _assign_at(@classification, 4, 1); - _assign_at(@classification, 5, 1); - _assign_at(@classification, 6, 1); - _assign_at(@classification, 7, 1); - _assign_at(@classification, 8, 1); - _assign_at(@classification, 9, 1); - _assign_at(@classification, 10, 4); - _assign_at(@classification, 11, 4); - _assign_at(@classification, 12, 1); - _assign_at(@classification, 13, 1); - _assign_at(@classification, 14, 4); - _assign_at(@classification, 15, 1); - _assign_at(@classification, 16, 1); - _assign_at(@classification, 17, 1); - _assign_at(@classification, 18, 1); - _assign_at(@classification, 19, 1); - _assign_at(@classification, 20, 1); - _assign_at(@classification, 21, 1); - _assign_at(@classification, 22, 1); - _assign_at(@classification, 23, 1); - _assign_at(@classification, 24, 1); - _assign_at(@classification, 25, 1); - _assign_at(@classification, 26, 1); - _assign_at(@classification, 27, 1); - _assign_at(@classification, 28, 1); - _assign_at(@classification, 29, 1); - _assign_at(@classification, 30, 1); - _assign_at(@classification, 31, 1); - _assign_at(@classification, 32, 1); - _assign_at(@classification, 33, 4); - _assign_at(@classification, 34, 11); - _assign_at(@classification, 35, 19); - _assign_at(@classification, 36, 22); - _assign_at(@classification, 37, 22); - _assign_at(@classification, 38, 11); - _assign_at(@classification, 39, 11); - _assign_at(@classification, 40, 18); - _assign_at(@classification, 41, 7); - _assign_at(@classification, 42, 8); - _assign_at(@classification, 43, 9); - _assign_at(@classification, 44, 11); - _assign_at(@classification, 45, 11); - _assign_at(@classification, 46, 17); - _assign_at(@classification, 47, 16); - _assign_at(@classification, 48, 11); - _assign_at(@classification, 49, 13); - _assign_at(@classification, 50, 2); - _assign_at(@classification, 51, 2); - _assign_at(@classification, 52, 2); - _assign_at(@classification, 53, 2); - _assign_at(@classification, 54, 2); - _assign_at(@classification, 55, 2); - _assign_at(@classification, 56, 2); - _assign_at(@classification, 57, 2); - _assign_at(@classification, 58, 2); - _assign_at(@classification, 59, 5); - _assign_at(@classification, 60, 11); - _assign_at(@classification, 61, 21); - _assign_at(@classification, 62, 6); - _assign_at(@classification, 63, 20); - _assign_at(@classification, 64, 22); - _assign_at(@classification, 65, 11); - _assign_at(@classification, 66, 3); - _assign_at(@classification, 67, 3); - _assign_at(@classification, 68, 3); - _assign_at(@classification, 69, 3); - _assign_at(@classification, 70, 3); - _assign_at(@classification, 71, 3); - _assign_at(@classification, 72, 3); - _assign_at(@classification, 73, 3); - _assign_at(@classification, 74, 3); - _assign_at(@classification, 75, 3); - _assign_at(@classification, 76, 3); - _assign_at(@classification, 77, 3); - _assign_at(@classification, 78, 3); - _assign_at(@classification, 79, 3); - _assign_at(@classification, 80, 3); - _assign_at(@classification, 81, 3); - _assign_at(@classification, 82, 3); - _assign_at(@classification, 83, 3); - _assign_at(@classification, 84, 3); - _assign_at(@classification, 85, 3); - _assign_at(@classification, 86, 3); - _assign_at(@classification, 87, 3); - _assign_at(@classification, 88, 3); - _assign_at(@classification, 89, 3); - _assign_at(@classification, 90, 3); - _assign_at(@classification, 91, 3); - _assign_at(@classification, 92, 11); - _assign_at(@classification, 93, 22); - _assign_at(@classification, 94, 11); - _assign_at(@classification, 95, 11); - _assign_at(@classification, 96, 10); - _assign_at(@classification, 97, 22); - _assign_at(@classification, 98, 12); - _assign_at(@classification, 99, 12); - _assign_at(@classification, 100, 12); - _assign_at(@classification, 101, 12); - _assign_at(@classification, 102, 12); - _assign_at(@classification, 103, 12); - _assign_at(@classification, 104, 3); - _assign_at(@classification, 105, 3); - _assign_at(@classification, 106, 3); - _assign_at(@classification, 107, 3); - _assign_at(@classification, 108, 3); - _assign_at(@classification, 109, 3); - _assign_at(@classification, 110, 3); - _assign_at(@classification, 111, 3); - _assign_at(@classification, 112, 3); - _assign_at(@classification, 113, 3); - _assign_at(@classification, 114, 3); - _assign_at(@classification, 115, 3); - _assign_at(@classification, 116, 3); - _assign_at(@classification, 117, 3); - _assign_at(@classification, 118, 3); - _assign_at(@classification, 119, 3); - _assign_at(@classification, 120, 3); - _assign_at(@classification, 121, 14); - _assign_at(@classification, 122, 3); - _assign_at(@classification, 123, 3); - _assign_at(@classification, 124, 22); - _assign_at(@classification, 125, 11); - _assign_at(@classification, 126, 22); - _assign_at(@classification, 127, 11); - _assign_at(@classification, 128, 1); - - v0 := 129; - -(* Set the remaining 129 - 256 bytes to transitionClassOther. *) - .create_classification_loop; - _assign_at(@classification, v0, 22); - v0 := v0 + 1; - - if v0 < 257 then - goto .create_classification_loop; - end; -end; - -(* Parameters: *) -(* a0 - Current state (first index into transitions table). *) -(* a1 - Transition (second index into transitions table).. *) -(* a2 - Action to assign. *) -(* a3 - Next state to assign. *) -proc _set_transition(); -begin - (* Transitions start at offset in classification array. Save the transitions start in v0. *) - v0 := @classification + 256 - - (* Each state is 8 bytes long (2 words: action and next state). *) - (* There are 16 transition classes, so a transition 8 * 16 = 128 bytes long. *) - - v4 := v88 + -1; - v4 := v4 * 128; - - v8 := v84 + -1; - v8 := v8 * 8; - - v12 := v0 + v4; - v12 := v12 + v8; - - _store_word(v80, v12); - v12 := v12 + 4; - _store_word(v76, v12); -end; - -(* Parameters: *) -(* a0 - Current state (Transition state enumeration). *) -(* a1 - Default action (Callback). *) -(* a2 - Next state (Transition state enumeration). *) -proc _set_default_transition(); -begin - _set_transition(v88, 1, v84, v80); - _set_transition(v88, 2, v84, v80); - _set_transition(v88, 3, v84, v80); - _set_transition(v88, 4, v84, v80); - _set_transition(v88, 5, v84, v80); - _set_transition(v88, 6, v84, v80); - _set_transition(v88, 7, v84, v80); - _set_transition(v88, 8, v84, v80); - _set_transition(v88, 9, v84, v80); - _set_transition(v88, 10, v84, v80); - _set_transition(v88, 11, v84, v80); - _set_transition(v88, 12, v84, v80); - _set_transition(v88, 13, v84, v80); - _set_transition(v88, 14, v84, v80); - _set_transition(v88, 15, v84, v80); - _set_transition(v88, 16, v84, v80); - _set_transition(v88, 17, v84, v80); - _set_transition(v88, 18, v84, v80); - _set_transition(v88, 19, v84, v80); - _set_transition(v88, 20, v84, v80); - _set_transition(v88, 21, v84, v80); - _set_transition(v88, 22, v84, v80); -end; - - -(* The transition table describes transitions from one state to another, given *) -(* a symbol (character class). *) - -(* The table has m rows and n columns, where m is the amount of states and n is *) -(* the amount of classes. So given the current state and a classified character *) -(* the table can be used to look up the next state. *) - -(* Each cell is a word long. *) -(* - The least significant byte of the word is a row number (beginning with 0). *) -(* It specifies the target state. "ff" means that this is an end state and no *) -(* transition is possible. *) -(* - The next byte is the action that should be performed when transitioning. *) -(* For the meaning of actions see labels in the lex_next function, which *) -(* handles each action. *) -proc _create_transitions(); -begin - (* Start state. *) - _set_transition(1, 1, 1, 16); - _set_transition(1, 2, 2, 4); - _set_transition(1, 3, 2, 3); - _set_transition(1, 4, 3, 1); - _set_transition(1, 5, 2, 5); - _set_transition(1, 6, 4, 16); - _set_transition(1, 7, 2, 7); - _set_transition(1, 8, 4, 16); - _set_transition(1, 9, 4, 16); - _set_transition(1, 10, 2, 3); - _set_transition(1, 11, 4, 16); - _set_transition(1, 12, 2, 3); - _set_transition(1, 13, 2, 14); - _set_transition(1, 14, 2, 3); - _set_transition(1, 15, 5, 16); - _set_transition(1, 16, 2, 9); - _set_transition(1, 17, 2, 6); - _set_transition(1, 18, 2, 12); - _set_transition(1, 19, 2, 13); - _set_transition(1, 20, 2, 5); - _set_transition(1, 21, 2, 8); - _set_transition(1, 22, 1, 16); - - (* Colon state. *) - _set_default_transition(2, 6, 16); - _set_transition(2, 6, 7, 16); - - (* Identifier state. *) - _set_default_transition(3, 8, 16); - _set_transition(3, 2, 2, 3); - _set_transition(3, 3, 2, 3); - _set_transition(3, 10, 2, 3); - _set_transition(3, 12, 2, 3); - _set_transition(3, 13, 2, 3); - _set_transition(3, 14, 2, 3); - - (* Decimal state. *) - _set_default_transition(4, 9, 16); - _set_transition(4, 2, 2, 4); - _set_transition(4, 3, 2, 15); - _set_transition(4, 10, 1, 16); - _set_transition(4, 12, 2, 15); - _set_transition(4, 13, 2, 4); - _set_transition(4, 14, 2, 15); - - (* Greater state. *) - _set_default_transition(5, 6, 16); - _set_transition(5, 6, 7, 16); - - (* Minus state. *) - _set_default_transition(6, 6, 16); - _set_transition(6, 20, 7, 16); - - (* Left paren state. *) - _set_default_transition(7, 6, 16); - _set_transition(7, 9, 2, 10); - - (* Less state. *) - _set_default_transition(8, 6, 16); - _set_transition(8, 6, 7, 16); - _set_transition(8, 20, 7, 16); - - (* Hexadecimal after 0x. *) - _set_default_transition(9, 6, 16); - _set_transition(9, 16, 7, 16); - - (* Comment. *) - _set_default_transition(10, 2, 10); - _set_transition(10, 9, 2, 11); - _set_transition(10, 15, 1, 16); - - (* Closing comment. *) - _set_default_transition(11, 2, 10); - _set_transition(11, 1, 1, 16); - _set_transition(11, 8, 10, 16); - _set_transition(11, 9, 2, 11); - _set_transition(11, 15, 1, 16); - - (* Character. *) - _set_default_transition(12, 2, 12); - _set_transition(12, 1, 1, 16); - _set_transition(12, 15, 1, 16); - _set_transition(12, 18, 10, 16); - - (* String. *) - _set_default_transition(13, 2, 13); - _set_transition(13, 1, 1, 16); - _set_transition(13, 15, 1, 16); - _set_transition(13, 19, 10, 16); - - (* Leading zero. *) - _set_default_transition(14, 9, 16); - _set_transition(14, 2, 1, 16); - _set_transition(14, 3, 1, 16); - _set_transition(14, 10, 1, 16); - _set_transition(14, 12, 1, 16); - _set_transition(14, 13, 1, 16); - _set_transition(14, 14, 1, 16); - - (* Digit with a character suffix. *) - _set_default_transition(15, 9, 16); - _set_transition(15, 3, 1, 16); - _set_transition(15, 2, 1, 16); - _set_transition(15, 12, 1, 16); - _set_transition(15, 13, 1, 16); - _set_transition(15, 14, 1, 16); -end; - -proc _lexer_get_state(); -begin - (* Lexer state is saved after the transition tables. The offset is 256 + 16 * 22. *) - v0 := @classification; - v4 := 16 * 22; - v0 := v0 + 256; - - return v0 + v4 -end; - -(* Gets pointer to the current source text. *) -proc _lexer_get_current(); -begin - return _lexer_get_state() + 4 -end; - -(* Resets the lexer state for reading the next token. *) -proc _lexer_reset(); -begin - (* Transition start state is 1. *) - v0 := _lexer_get_state(); - _store_word(1, v4); - - (* Text pointer to the beginning of the currently read token. *) - v4 := _lexer_get_current(); - _store_word(source_code_position, v4); - - (* Initial length of the token is 0. *) - _store_word(0, source_code_position + 4); -end; - -(* One time lexer initialization. *) -proc _lexer_initialize(); -begin - _create_classification(); - _create_transitions(); -end; - -(* Entry point. *) -proc _start(); -begin - _lexer_initialize(); - _symbol_table_build(); - - (* Read the source from the standard input. *) - (* Second argument is buffer size. Modifying update the source_code definition. *) - _read_file(@source_code, 81920); - _compile(); - - _exit(0); -end; |
