(* This Source Code Form is subject to the terms of the Mozilla Public License, *) (* v. 2.0. If a copy of the MPL was not distributed with this file, You can *) (* obtain one at https://mozilla.org/MPL/2.0/. *) (* Stage 13 compiler. *) const symbol_builtin_name_int := "Int"; symbol_builtin_name_word := "Word"; symbol_builtin_name_pointer := "Pointer"; symbol_builtin_name_char := "Char"; symbol_builtin_name_bool := "Bool"; (* Every type info starts with a word describing what type it is. *) (* PRIMITIVE_TYPE = 1 *) (* Primitive types have only type size. *) symbol_builtin_type_int := S(1, 4); symbol_builtin_type_word := S(1, 4); symbol_builtin_type_pointer := S(1, 4); symbol_builtin_type_char := S(1, 1); symbol_builtin_type_bool := S(1, 1); (* Info objects start with a word describing its type. *) (* INFO_TYPE = 1 *) (* INFO_PARAMETER = 2 *) (* INFO_TEMPORARY = 3 *) (* Type info has the type it belongs to. *) symbol_type_info_int := S(1, @symbol_builtin_type_int); symbol_type_info_word := S(1, @symbol_builtin_type_word); symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); symbol_type_info_char := S(1, @symbol_builtin_type_char); symbol_type_info_bool := S(1, @symbol_builtin_type_bool); var source_code: Array; compiler_strings: Array; symbol_table_global: Array; symbol_table_local: Array; classification: Array; memory: Array; compiler_strings_position: Pointer := @compiler_strings; compiler_strings_length: Word := 0; label_counter: Word := 0; source_code_position: Pointer := @source_code; memory_free_pointer: Word := @memory; (* Calculates and returns the string token length between quotes, including the *) (* escaping slash characters. *) (* Parameters: *) (* string - String token pointer. *) (* Returns the length in a0. *) proc _string_length(string: Word); var counter: Word; begin (* Reset the counter. *) counter := 0; .string_length_loop; string := string + 1; if _load_byte(string) <> '"' then counter := counter + 1; goto .string_length_loop; end; return counter end; (* Adds a string to the global, read-only string storage. *) (* Parameters: *) (* string - String token. *) (* Returns the offset from the beginning of the storage to the new string in a0. *) proc _add_string(string: Word); var contents: Word; result: Word; current_byte: Word; begin contents := string + 1; result := compiler_strings_length; .add_string_loop; if _load_byte(contents) <> '"' then current_byte := _load_byte(contents); _store_byte(current_byte, compiler_strings_position); compiler_strings_position := compiler_strings_position + 1; contents := contents + 1; if current_byte <> '\\' then compiler_strings_length := compiler_strings_length + 1; end; goto .add_string_loop; end; return result end; (* Reads standard input into a buffer. *) (* buffer - Buffer pointer. *) (* size - Buffer size. *) (* Returns the amount of bytes written in a0. *) proc _read_file(buffer: Word, size: Word); begin _syscall(0, buffer, size, 0, 0, 0, 63); end; (* Writes to the standard output. *) (* Parameters: *) (* buffer - Buffer. *) (* size - Buffer length. *) proc _write_s(buffer: Word, size: Word); begin _syscall(1, buffer, size, 0, 0, 0, 64); end; (* Writes a number to a string buffer. *) (* Parameters: *) (* number - Whole number. *) (* output_buffer - Buffer pointer. *) (* Sets a0 to the length of the written number. *) proc _print_i(number: Word, output_buffer: Word); var local_buffer: Word; is_negative: Word; current_character: Word; result: Word; begin local_buffer := @result + 11; if number >= 0 then is_negative := 0; else number = -number; is_negative := 1; end; .print_i_digit10; current_character := number % 10; _store_byte(current_character + '0', local_buffer); number := number / 10; local_buffer := local_buffer + -1; if number <> 0 then goto .print_i_digit10; end; if is_negative = 1 then _store_byte('-', local_buffer); local_buffer := local_buffer + -1; end; result := @result + 11; result := result + -local_buffer; _memcpy(output_buffer, local_buffer + 1, result); return result end; (* Writes a number to the standard output. *) (* Parameters: *) (* number - Whole number. *) proc _write_i(number: Word); var local_buffer: Word; length: Word; begin length := _print_i(number, @local_buffer); _write_s(@local_buffer, length); end; (* Writes a character from a0 into the standard output. *) (* Parameters: *) (* character - Character to write. *) proc _write_c(character: Word); begin _write_s(@character, 1); end; (* Write null terminated string. *) (* Parameters: *) (* string - String. *) proc _write_z(string: Word); var next_byte: Word; begin (* Check for 0 character. *) next_byte := _load_byte(string); if next_byte <> 0 then (* Print a character. *) _write_c(next_byte); (* Advance the input string by one byte. *) _write_z(string + 1); end; end; (* Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. *) proc _is_upper(character: Word); var lhs: Word; rhs: Word; begin lhs := character >= 'A'; rhs := character <= 'Z'; return lhs & rhs end; (* Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. *) proc _is_lower(character: Word); var lhs: Word; rhs: Word; begin lhs := character >= 'a'; rhs := character <= 'z'; return lhs & rhs end; (* Detects if the passed character is a 7-bit alpha character or an underscore. *) (* Paramters: *) (* character - Tested character. *) (* Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. *) proc _is_alpha(character: Word); var is_upper_result: Word; is_lower_result: Word; is_alpha_result: Word; is_underscore: Word; begin is_upper_result := _is_upper(character); is_lower_result := _is_lower(character); is_underscore := character = '_'; is_alpha_result := is_lower_result or is_upper_result; return is_alpha_result or is_underscore end; (* Detects whether the passed character is a digit *) (* (a value between 0 and 9). *) (* Parameters: *) (* character - Exemined value. *) (* Sets a0 to 1 if it is a digit, to 0 otherwise. *) proc _is_digit(character: Word); var lhs: Word; rhs: Word; begin lhs := character >= '0'; rhs := character <= '9'; return lhs & rhs end; proc _is_alnum(character: Word); var lhs: Word; rhs: Word; begin lhs := _is_alpha(character); rhs := _is_digit(character); return lhs or rhs end; (* Reads the next token. *) (* Returns token length in a0. *) proc _read_token(); var current_token_position: Word; token_length: Word; current_character: Word; is_dot: Word; is_alnum_result: Word; begin current_token_position := source_code_position; token_length := 0; .read_token_loop; (* Current character. *) current_character := _load_byte(current_token_position); (* First we try to read a derictive. *) (* A derictive can contain a dot and characters. *) is_dot := current_character = '.'; is_alnum_result := _is_alnum(current_character); if is_dot or is_alnum_result then (* Advance the source code position and token length. *) token_length := token_length + 1; current_token_position := current_token_position + 1; goto .read_token_loop; end; return token_length end; (* Parameters: *) (* lhs - First pointer. *) (* rhs - Second pointer. *) (* count - The length to compare. *) (* Returns 0 if memory regions are equal. *) proc _memcmp(lhs: Word, rhs: Word, count: Word); var lhs_byte: Word; rhs_byte: Word; result: Word; begin result := 0; .memcmp_loop; if count <> 0 then lhs_byte := _load_byte(lhs); rhs_byte := _load_byte(rhs); result := lhs_byte + -rhs_byte; lhs := lhs + 1; rhs := rhs + 1; count := count + -1; if result = 0 then goto .memcmp_loop; end; end; return result end; (* Copies memory. *) (* Parameters: *) (* destination - Destination. *) (* source - Source. *) (* count - Size. *) (* Returns the destination. *) proc _memcpy(destination: Word, source: Word, count: Word); var current_byte: Word; begin .memcpy_loop; if count <> 0 then current_byte := _load_byte(source); _store_byte(current_byte, destination); destination := destination + 1; source := source + 1; count := count + -1; goto .memcpy_loop; end; return destination end; (* Advances the token stream by a0 bytes. *) proc _advance_token(count: Word); begin source_code_position := source_code_position + count; end; (* Prints the current token. *) (* Parameters: *) (* length - Token length. *) (* Returns a0 unchanged. *) proc _write_token(length: Word); begin _write_s(source_code_position, length); return length end; proc _compile_integer_literal(); var integer_token: Word; begin _write_z("\tli t0, \0"); integer_token := _read_token(); _write_token(integer_token); _advance_token(integer_token); _write_c('\n'); end; proc _compile_character_literal(); var character: Word; begin _write_z("\tli t0, '\0"); _advance_token(1); character := _load_byte(source_code_position); if character = '\\' then _write_c('\\'); _advance_token(1); end; _write_s(source_code_position, 1); _write_s("'\n", 2); _advance_token(2); end; proc _compile_variable_expression(); begin _compile_designator(); _write_z("\tlw t0, (t0)\n\0"); end; proc _compile_address_expression(); begin (* Skip the "@" sign. *) _advance_token(1); _compile_designator(); end; proc _compile_negate_expression(); begin (* Skip the "-" sign. *) _advance_token(1); _compile_term(); _write_z("\tneg t0, t0\n\0"); end; proc _compile_not_expression(); begin (* Skip the "~" sign. *) _advance_token(1); _compile_term(); _write_z("\tnot t0, t0\n\0"); end; proc _compile_string_literal(); var length: Word; offset: Word; begin length := _string_length(source_code_position); offset := _add_string(source_code_position); _advance_token(length + 2); _write_z("\tla t0, strings\n\0"); _write_z("\tli t1, \0"); _write_i(offset); _write_c('\n'); _write_z("\tadd t0, t0, t1\n\0"); end; proc _compile_term(); var current_character: Word; begin current_character := _load_byte(source_code_position); if current_character = '\'' then _compile_character_literal(); end; if current_character = '@' then _compile_address_expression(); end; if current_character = '-' then _compile_negate_expression(); end; if current_character = '~' then _compile_not_expression(); end; if current_character = '"' then _compile_string_literal(); end; if current_character = '_' then _compile_call(); _write_z("\nmv t0, a0\n\0"); end; if _is_digit(current_character) = 1 then _compile_integer_literal(); end; if _is_lower(current_character) = 1 then _compile_variable_expression(); end; end; proc _compile_binary_rhs(); begin (* Skip the whitespace after the binary operator. *) _advance_token(1); _compile_term(); (* Load the left expression from the stack; *) _write_z("\tlw t1, 24(sp)\n\0"); end; proc _compile_expression(); var current_character: Word; begin _compile_term(); current_character := _load_byte(source_code_position); if current_character <> ' ' then goto .compile_expression_end; end; (* It is a binary expression. *) (* Save the value of the left expression on the stack. *) _write_z("sw t0, 24(sp)\n\0"); (* Skip surrounding whitespace in front of the operator. *) _advance_token(1); current_character := _load_byte(source_code_position); if current_character = '+' then _advance_token(1); _compile_binary_rhs(); (* Execute the operation. *) _write_z("add t0, t0, t1\n\0"); goto .compile_expression_end; end; if current_character = '*' then _advance_token(1); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tmul t0, t0, t1\n\0"); goto .compile_expression_end; end; if current_character = '&' then _advance_token(1); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tand t0, t0, t1\n\0"); goto .compile_expression_end; end; if current_character = 'o' then _advance_token(2); _compile_binary_rhs(); (* Execute the operation. *) _write_z("or t0, t0, t1\n\0"); goto .compile_expression_end; end; if current_character = 'x' then _advance_token(3); _compile_binary_rhs(); (* Execute the operation. *) _write_z("xor t0, t0, t1\n\0"); goto .compile_expression_end; end; if current_character = '=' then _advance_token(1); _compile_binary_rhs(); (* Execute the operation. *) _write_z("xor t0, t0, t1\nseqz t0, t0\n\0"); goto .compile_expression_end; end; if current_character = '%' then _advance_token(1); _compile_binary_rhs(); (* Execute the operation. *) _write_z("rem t0, t1, t0\n\0"); goto .compile_expression_end; end; if current_character = '/' then _advance_token(1); _compile_binary_rhs(); (* Execute the operation. *) _write_z("div t0, t1, t0\n\0"); goto .compile_expression_end; end; if current_character = '<' then _advance_token(1); current_character := _load_byte(source_code_position); if current_character = '>' then _advance_token(1); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0"); goto .compile_expression_end; end; if current_character = '=' then _advance_token(1); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0"); goto .compile_expression_end; end; _compile_binary_rhs(); (* Execute the operation. *) _write_z("slt t0, t1, t0\n\0"); goto .compile_expression_end; end; if current_character = '>' then _advance_token(1); current_character := _load_byte(source_code_position); if current_character = '=' then _advance_token(1); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0"); goto .compile_expression_end; end; _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tslt t0, t1, t0\n\0"); goto .compile_expression_end; end; .compile_expression_end; end; proc _compile_call(); var name_length: Word; name: Word; argument_count: Word; stack_offset: Word; begin name_length := _read_token(); name := source_code_position; argument_count := 0; (* Skip the identifier and left paren. *) _advance_token(name_length + 1); if _load_byte(source_code_position) = ')' then goto .compile_call_finalize end; .compile_call_loop; _compile_expression(); (* Save the argument on the stack. *) _write_z("\tsw t0, \0"); (* Calculate the stack offset: 116 - (4 * argument_counter) *) stack_offset := argument_count * 4; _write_i(116 + -stack_offset); _write_z("(sp)\n\0"); (* Add one to the argument counter. *) argument_count := argument_count + 1; if _load_byte(source_code_position) <> ',' then goto .compile_call_finalize; end; _advance_token(2); goto .compile_call_loop; .compile_call_finalize; (* Load the argument from the stack. *) if argument_count <> 0 then (* Decrement the argument counter. *) argument_count := argument_count + -1; _write_z("\tlw a\0"); _write_i(argument_count); _write_z(", \0"); (* Calculate the stack offset: 116 - (4 * argument_counter) *) stack_offset := argument_count * 4; _write_i(116 + -stack_offset); _write_z("(sp)\n\0"); goto .compile_call_finalize; end; .compile_call_end; _write_z("\tcall \0"); _write_s(name, name_length); (* Skip the right paren. *) _advance_token(1); end; proc _compile_goto(); var next_token: Word; begin _advance_token(5); next_token := _read_token(); _write_z("\tj \0"); _write_token(next_token); _advance_token(next_token); end; proc _compile_local_designator(symbol: Word, name_length: Word); var variable_offset: Word; begin _write_z("\taddi t0, sp, \0"); variable_offset := _parameter_info_get_offset(symbol); _write_i(variable_offset); _write_c('\n'); _advance_token(name_length); end; proc _compile_global_designator(); var name: Word; begin _write_z("\tla t0, \0"); name := _read_token(); _write_token(name); _advance_token(name); _write_c('\n'); end; proc _compile_designator(); var name_token: Word; lookup_result: Word; begin name_token := _read_token(); lookup_result := _symbol_table_lookup(@symbol_table_local, source_code_position, name_token); if lookup_result <> 0 then _compile_local_designator(lookup_result, name_token); goto .compile_designator_end; end; _compile_global_designator(); .compile_designator_end; end; proc _compile_assignment(); begin _compile_designator(); (* Save the assignee address on the stack. *) _write_z("\tsw t0, 60(sp)\n\0"); (* Skip the assignment sign (:=) with surrounding whitespaces. *) _advance_token(4); (* Compile the assignment. *) _compile_expression(); _write_z("\tlw t1, 60(sp)\nsw t0, (t1)\n\0"); end; proc _compile_return_statement(); begin (* Skip "return" keyword and whitespace after it. *) _advance_token(7); _compile_expression(); _write_z("mv a0, t0\n\0"); end; (* Writes a label, .Ln, where n is a unique number. *) (* Parameters: *) (* counter - Label counter. *) proc _write_label(counter: Word); begin _write_z(".L\0"); _write_i(counter); end; proc _compile_if(); var after_end_label: Word; condition_label: Word; begin (* Skip "if ". *) _advance_token(3); (* Compile condition. *) _compile_expression(); (* Skip " then" with newline. *) _advance_token(6); after_end_label := label_counter; label_counter := label_counter + 1; (* condition_label is the label in front of the next elsif condition or end. *) condition_label := label_counter; label_counter := label_counter + 1; _write_z("\tbeqz t0, \0"); _write_label(condition_label); _write_c('\n'); _compile_procedure_body(); _write_z("\tj \0"); _write_label(after_end_label); _write_c('\n'); _write_label(condition_label); _write_z(":\n\0"); if _memcmp(source_code_position, "end", 3) = 0 then goto .compile_if_end; end; if _memcmp(source_code_position, "else", 3) = 0 then goto .compile_if_else end; .compile_if_else; (* Skip "else" and newline. *) _advance_token(5); _compile_procedure_body(); .compile_if_end; (* Skip "end". *) _advance_token(3); _write_label(after_end_label); _write_z(":\n\0"); end; proc _compile_label_declaration(); var label_token: Word; begin (* Skip the dot. *) _advance_token(1); label_token := _read_token(); _write_c('.'); _write_s(source_code_position, label_token); _write_z(":\n\0"); _advance_token(label_token); end; proc _compile_statement(); var current_byte: Word; begin _skip_spaces(); current_byte := _load_byte(source_code_position); (* This is a call if the statement starts with an underscore. *) if current_byte = '_' then _compile_call(); goto .compile_statement_semicolon; end; if _memcmp(source_code_position, "goto ", 5) = 0 then _compile_goto(); goto .compile_statement_semicolon; end; if _memcmp(source_code_position, "if ", 3) = 0 then _compile_if(); goto .compile_statement_semicolon; end; if _memcmp(source_code_position, "return ", 7) = 0 then _compile_return_statement(); _write_c('\n'); goto .compile_statement_end; end; if current_byte = '.' then _compile_label_declaration(); goto .compile_statement_semicolon; end; _compile_assignment(); goto .compile_statement_semicolon; .compile_statement_semicolon; _advance_token(2); _write_c('\n'); .compile_statement_end; end; proc _compile_procedure_body(); var lhs: Word; rhs: Word; begin .compile_procedure_body_loop; _skip_empty_lines(); _skip_spaces(); lhs := _memcmp(source_code_position, "end", 3) = 0; rhs := _memcmp(source_code_position, "else", 4) = 0; lhs := lhs or rhs; if lhs = 0 then _compile_statement(); goto .compile_procedure_body_loop; end; end; (* Writes a regster name to the standard output. *) (* Parameters: *) (* register_character - Register character. *) (* register_number - Register number. *) proc _write_register(register_character: Word, register_number: Word); begin _write_c(register_character); register_number := register_number + '0'; _write_c(register_number); end; proc _skip_spaces(); var current_byte: Word; begin current_byte := _load_byte(source_code_position); if current_byte = '\t' then _advance_token(1); _skip_spaces(); end; end; proc _read_type_expression(); var type_name: Word; begin type_name := _read_token(); _advance_token(type_name); end; (* Parameters: *) (* parameter_index - Parameter index. *) proc _parameter_info_create(parameter_index: Word); var offset: Word; current_word: Word; result: Word; begin result := memory_free_pointer; current_word := result; (* 2 is INFO_PARAMETER *) _store_word(2, current_word); current_word := current_word + 4; (* Calculate the stack offset: 88 - (4 * parameter_counter) *) offset := parameter_index * 4; _store_word(88 + -offset, current_word); memory_free_pointer := current_word + 4; return result end; proc _parameter_info_get_offset(info: Word); begin info := info + 4; return _load_word(info) end; (* Parameters: *) (* temporary_index - Parameter index. *) proc _temporary_info_create(temporary_index: Word); var offset: Word; current_word: Word; result: Word; begin result := memory_free_pointer; current_word := result; (* 3 is INFO_TEMPORARY *) _store_word(3, current_word); current_word := current_word + 4; (* Calculate the stack offset: 4 * variable_counter. *) _store_word(temporary_index * 4, current_word); memory_free_pointer := current_word + 4; return result end; proc _temporary_info_get_offset(info: Word); begin info := info + 4; return _load_word(info) end; (* Parameters: *) (* parameter_index - Parameter index. *) proc _read_procedure_parameter(parameter_index: Word); var name_length: Word; info: Word; name_position: Word; begin (* Read the parameter name. *) name_position := source_code_position; name_length := _read_token(); _advance_token(name_length); (* Skip colon and space in front of the type expression. *) _advance_token(2); _read_type_expression(); _write_z("\tsw a\0"); _write_i(parameter_index); _write_z(", \0"); info := _parameter_info_create(parameter_index); _symbol_table_enter(@symbol_table_local, name_position, name_length, info); info := _parameter_info_get_offset(info); _write_i(info); _write_z("(sp)\n\0"); end; proc _read_procedure_parameters(); var parameter_counter: Word; begin (* Skip open paren. *) _advance_token(1); parameter_counter := 0; .compile_procedure_prologue_skip; if _load_byte(source_code_position) <> ')' then _read_procedure_parameter(parameter_counter); parameter_counter := parameter_counter + 1; if _load_byte(source_code_position) = ',' then _advance_token(2); goto .compile_procedure_prologue_skip; end; end; (* Skip close paren. *) _advance_token(1); end; (* Parameters: *) (* variable_index - Variable index. *) proc _read_procedure_temporary(variable_index: Word); var name_length: Word; info: Word; name_position: Word; begin _skip_spaces(); name_position := source_code_position; (* Read and skip variable name, colon and the space *) name_length := _read_token(); _advance_token(name_length + 2); _read_type_expression(); info := _temporary_info_create(variable_index); _symbol_table_enter(@symbol_table_local, name_position, name_length, info); (* Skip semicolon and newline after the variable declaration *) _advance_token(2); end; proc _read_procedure_temporaries(); var temporary_counter: Word; begin if _memcmp(source_code_position, "var", 3) <> 0 then goto .read_local_variables_end; end; _advance_token(4); temporary_counter := 0; .read_local_variables_loop; if _memcmp(source_code_position, "begin", 5) = 0 then goto .read_local_variables_end; end; _read_procedure_temporary(temporary_counter); temporary_counter := temporary_counter + 1; goto .read_local_variables_loop; .read_local_variables_end; end; proc _compile_procedure(); var name_length: Word; begin (* Skip "proc ". *) _advance_token(5); (* Clear local symbol table. *) _store_word(0, @symbol_table_local); name_length := _read_token(); (* Write .type _procedure_name, @function. *) _write_z(".type \0"); _write_token(name_length); _write_z(", @function\n\0"); (* Write procedure label, _procedure_name: *) _write_token(name_length); _write_z(":\n\0"); (* Skip procedure name. *) _advance_token(name_length); _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); _read_procedure_parameters(); (* Skip semicolon and newline. *) _advance_token(2); _read_procedure_temporaries(); (* Skip semicolon, "begin" and newline. *) _advance_token(6); _compile_procedure_body(); (* Write the epilogue. *) _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); (* Skip the "end" keyword, semicolon and newline. *) _advance_token(5); end; (* Prints and skips a line. *) proc _skip_comment(); begin .skip_comment_loop; (* Check for newline character. *) if _load_byte(source_code_position) <> '\n' then (* Advance the input string by one byte. *) _advance_token(1); goto .skip_comment_loop; end; (* Skip the newline. *) _advance_token(1); end; (* Skip newlines and comments. *) proc _skip_empty_lines(); var current_position: Word; current_byte: Word; begin .skip_empty_lines_rerun; current_position := source_code_position; .skip_empty_lines_loop; current_byte := _load_byte(current_position); if current_byte = '\n' then goto .skip_empty_lines_newline; end; if current_byte = '\t' then goto .skip_empty_lines_tab; end; if current_byte <> '(' then goto .skip_empty_lines_end; end; current_byte := _load_byte(current_position + 1); if current_byte = '*' then goto .skip_empty_lines_comment end; goto .skip_empty_lines_end; .skip_empty_lines_comment; source_code_position := current_position; _skip_comment(); goto .skip_empty_lines_rerun; .skip_empty_lines_newline; source_code_position := current_position + 1; goto .skip_empty_lines_rerun; .skip_empty_lines_tab; current_position := current_position + 1; goto .skip_empty_lines_loop .skip_empty_lines_end; end; proc _compile_global_initializer(); var current_byte: Word; length: Word; begin current_byte := _load_byte(source_code_position); if current_byte = '"' then _write_z("\n\t.word strings + \0"); length := _string_length(source_code_position); _add_string(source_code_position); _write_i(); (* Skip the quoted string. *) _advance_token(length + 2); goto .compile_global_initializer_end; end; if current_byte = 'S' then (* Skip "S(". *) _advance_token(2); if _load_byte(source_code_position) = ')' then goto .compile_global_initializer_closing; end; goto .compile_global_initializer_loop; end; if current_byte = '@' then (* Skip @. *) _advance_token(1); _write_z("\n\t.word \0"); current_byte := _read_token(); _write_token(current_byte); _advance_token(current_byte); goto .compile_global_initializer_end; end; if _is_digit(current_byte) = 1 then _write_z("\n\t.word \0"); current_byte := _read_token(); _write_token(current_byte); _advance_token(1); goto .compile_global_initializer_end; end; .compile_global_initializer_loop; _compile_global_initializer(); if _load_byte(source_code_position) <> ')' then (* Skip comma and whitespace after it. *) _advance_token(2); goto .compile_global_initializer_loop; end; .compile_global_initializer_closing; (* Skip ")" *) _advance_token(1); goto .compile_global_initializer_end; .compile_global_initializer_end; end; proc _compile_constant_declaration(); var name_length: Word; begin name_length := _read_token(); _write_z(".type \0"); _write_token(name_length); _write_z(", @object\n\0"); _write_token(name_length); _write_c(':'); (* Skip the constant name with assignment sign and surrounding whitespaces. *) _advance_token(name_length + 4); _compile_global_initializer(); (* Skip semicolon and newline. *) _advance_token(2); _write_c('\n'); end; proc _compile_const_part(); begin _skip_empty_lines(); if _memcmp(source_code_position, "const\0", 5) <> 0 then goto .compile_const_part_end; end; (* Skip "const" with the newline after it. *) _advance_token(6); _write_z(".section .rodata # Compiled from const section.\n\n\0"); .compile_const_part_loop; _skip_empty_lines(); (* If the character at the line beginning is not indentation, *) (* it is probably the next code section. *) if _load_byte(source_code_position) = '\t' then _advance_token(1); _compile_constant_declaration(); goto .compile_const_part_loop; end; .compile_const_part_end; end; proc _compile_variable_declaration(); var name_length: Word; begin name_length := _read_token(); _write_z(".type \0"); _write_token(name_length); _write_z(", @object\n\0"); _write_token(name_length); _write_c(':'); (* Skip the variable name and colon with space before the type. *) _advance_token(name_length + 2); _read_type_expression(); if _load_byte(source_code_position) <> ' ' then (* Else we assume this is a zeroed 81920 bytes big array. *) _write_z(" .zero 81920\0"); else (* Skip the assignment sign with surrounding whitespaces. *) _advance_token(4); _compile_global_initializer(); end; (* Skip semicolon and newline. *) _advance_token(2); _write_c('\n'); end; proc _compile_var_part(); var current_character: Word; begin if _memcmp(source_code_position, "var\0", 3) <> 0 then goto .compile_var_part_end; end; (* Skip "var" and newline. *) _advance_token(4); _write_z(".section .data\n\0"); .compile_var_part_loop; _skip_empty_lines(); current_character := _load_byte(source_code_position); if current_character = '\t' then _advance_token(1); _compile_variable_declaration(); goto .compile_var_part_loop; end; .compile_var_part_end; end; (* Process the source code and print the generated code. *) proc _compile_module(); begin _compile_const_part(); _skip_empty_lines(); _compile_var_part(); _write_z(".section .text\n\n\0"); _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); .compile_module_loop; _skip_empty_lines(); if _load_byte(source_code_position) <> 0 then (* 5 is "proc " length. Space is needed to distinguish from "procedure". *) if _memcmp(source_code_position, "proc ", 5) = 0 then _compile_procedure(); goto .compile_module_loop; end; end; .compile_module_end; end; proc _compile(); var compiler_strings_copy: Word; compiler_strings_end: Word; current_byte: Word; begin _write_z(".globl _start\n\n\0"); _compile_module(); _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); _write_c('"'); compiler_strings_copy := @compiler_strings; compiler_strings_end := compiler_strings_position; .compile_loop; if compiler_strings_copy < compiler_strings_end then current_byte := _load_byte(compiler_strings_copy); compiler_strings_copy := compiler_strings_copy + 1; _write_c(current_byte); goto .compile_loop; end; _write_c('"'); _write_c('\n'); end; (* Terminates the program. a0 contains the return code. *) (* Parameters: *) (* a0 - Status code. *) proc _exit(); begin _syscall(0, 0, 0, 0, 0, 0, 93); end; (* Looks for a symbol in the given symbol table. *) (* Parameters: *) (* symbol_table - Symbol table. *) (* symbol_name - Symbol name pointer. *) (* name_length - Symbol name length. *) (* Returns the symbol pointer or 0 in a0. *) proc _symbol_table_lookup(symbol_table: Word, symbol_name: Word, name_length: Word); var result: Word; symbol_table_length: Word; current_name: Word; current_length: Word; begin result := 0; (* The first word in the symbol table is its length, get it. *) symbol_table_length := _load_word(symbol_table); (* Go to the first symbol position. *) symbol_table := symbol_table + 4; .symbol_table_lookup_loop; if symbol_table_length = 0 then goto .symbol_table_lookup_end; end; (* Symbol name pointer and length. *) current_name := _load_word(symbol_table); current_length := _load_word(symbol_table + 4); (* If lengths don't match, exit and return nil. *) if name_length <> current_length then goto .symbol_table_lookup_repeat; end; (* If names don't match, exit and return nil. *) if _memcmp(symbol_name, current_name, name_length) <> 0 then goto .symbol_table_lookup_repeat; end; (* Otherwise, the symbol is found. *) result := _load_word(symbol_table + 8); goto .symbol_table_lookup_end; .symbol_table_lookup_repeat; symbol_table := symbol_table + 12; symbol_table_length := symbol_table_length + -1; goto .symbol_table_lookup_loop; .symbol_table_lookup_end; return result end; (* Inserts a symbol into the table. *) (* Parameters: *) (* symbol_table - Symbol table. *) (* symbol_name - Symbol name pointer. *) (* name_length - Symbol name length. *) (* symbol - Symbol pointer. *) proc _symbol_table_enter(symbol_table: Word, symbol_name: Word, name_length: Word, symbol: Word); var table_length: Word; symbol_pointer: Word; begin (* The first word in the symbol table is its length, get it. *) table_length := _load_word(symbol_table); (* Calculate the offset for the new symbol. *) symbol_pointer := table_length * 12; symbol_pointer := symbol_pointer + 4; symbol_pointer := symbol_table + symbol_pointer; _store_word(symbol_name, symbol_pointer); symbol_pointer := symbol_pointer + 4; _store_word(name_length, symbol_pointer); symbol_pointer := symbol_pointer + 4; _store_word(symbol, symbol_pointer); (* Increment the symbol table length. *) table_length := table_length + 1; _store_word(table_length, symbol_table); end; proc _symbol_table_build(); begin (* Set the table length to 0. *) _store_word(0, @symbol_table_global); (* Enter built-in symbols. *) _symbol_table_enter(@symbol_table_global, symbol_builtin_name_int, 3, @symbol_type_info_int); _symbol_table_enter(@symbol_table_global, symbol_builtin_name_word, 4, @symbol_type_info_word); _symbol_table_enter(@symbol_table_global, symbol_builtin_name_pointer, 7, @symbol_type_info_pointer); _symbol_table_enter(@symbol_table_global, symbol_builtin_name_char, 4, @symbol_type_info_char); _symbol_table_enter(@symbol_table_global, symbol_builtin_name_bool, 4, @symbol_type_info_bool); end; (* Classification table assigns each possible character to a group (class). All *) (* characters of the same group a handled equivalently. *) (* Classification: *) (* TransitionClass = ( *) (* transitionClassInvalid = 1, *) (* transitionClassDigit = 2, *) (* transitionClassAlpha = 3, *) (* transitionClassSpace = 4, *) (* transitionClassColon = 5, *) (* transitionClassEquals = 6, *) (* transitionClassLeftParen = 7, *) (* transitionClassRightParen = 8, *) (* transitionClassAsterisk = 9, *) (* transitionClassUnderscore = 10, *) (* transitionClassSingle = 11, *) (* transitionClassHex = 12, *) (* transitionClassZero = 13, *) (* transitionClassX = 14, *) (* transitionClassEof = 15, *) (* transitionClassDot = 16, *) (* transitionClassMinus = 17, *) (* transitionClassSingleQuote = 18, *) (* transitionClassDoubleQuote = 19, *) (* transitionClassGreater = 20, *) (* transitionClassLess = 21, *) (* transitionClassOther = 22 *) (* ); *) (* TransitionState = ( *) (* transitionStateStart = 1, *) (* transitionStateColon = 2, *) (* transitionStateIdentifier = 3, *) (* transitionStateDecimal = 4, *) (* transitionStateGreater = 5, *) (* transitionStateMinus = 6, *) (* transitionStateLeftParen = 7, *) (* transitionStateLess = 8, *) (* transitionStateDot = 9, *) (* transitionStateComment = 10, *) (* transitionStateClosingComment = 11, *) (* transitionStateCharacter = 12, *) (* transitionStateString = 13, *) (* transitionStateLeadingZero = 14, *) (* transitionStateDecimalSuffix = 15, *) (* transitionStateEnd = 16 *) (* ); *) (* Transition = record *) (* action: TransitionAction; *) (* next_state: TransitionState *) (* end; *) (* TransitionAction = ( *) (* none = 1, *) (* accumulate = 2, *) (* skip = 3, *) (* single = 4, *) (* eof = 5, *) (* finalize = 6, *) (* composite = 7, *) (* key_id = 8, *) (* integer = 9, *) (* delimited = 10 *) (* ); *) (* Assigns some value to at array index. *) (* Parameters: *) (* array - Array pointer. *) (* index - Index (word offset into the array). *) (* data - Data to assign. *) proc _assign_at(array: Word, index: Word, data: Word); var target: Word; begin target := index + -1; target := target * 4; target := array + target; _store_word(data, target); end; proc _create_classification(); var code: Word; begin _assign_at(@classification, 1, 15); _assign_at(@classification, 2, 1); _assign_at(@classification, 3, 1); _assign_at(@classification, 4, 1); _assign_at(@classification, 5, 1); _assign_at(@classification, 6, 1); _assign_at(@classification, 7, 1); _assign_at(@classification, 8, 1); _assign_at(@classification, 9, 1); _assign_at(@classification, 10, 4); _assign_at(@classification, 11, 4); _assign_at(@classification, 12, 1); _assign_at(@classification, 13, 1); _assign_at(@classification, 14, 4); _assign_at(@classification, 15, 1); _assign_at(@classification, 16, 1); _assign_at(@classification, 17, 1); _assign_at(@classification, 18, 1); _assign_at(@classification, 19, 1); _assign_at(@classification, 20, 1); _assign_at(@classification, 21, 1); _assign_at(@classification, 22, 1); _assign_at(@classification, 23, 1); _assign_at(@classification, 24, 1); _assign_at(@classification, 25, 1); _assign_at(@classification, 26, 1); _assign_at(@classification, 27, 1); _assign_at(@classification, 28, 1); _assign_at(@classification, 29, 1); _assign_at(@classification, 30, 1); _assign_at(@classification, 31, 1); _assign_at(@classification, 32, 1); _assign_at(@classification, 33, 4); _assign_at(@classification, 34, 11); _assign_at(@classification, 35, 19); _assign_at(@classification, 36, 22); _assign_at(@classification, 37, 22); _assign_at(@classification, 38, 11); _assign_at(@classification, 39, 11); _assign_at(@classification, 40, 18); _assign_at(@classification, 41, 7); _assign_at(@classification, 42, 8); _assign_at(@classification, 43, 9); _assign_at(@classification, 44, 11); _assign_at(@classification, 45, 11); _assign_at(@classification, 46, 17); _assign_at(@classification, 47, 16); _assign_at(@classification, 48, 11); _assign_at(@classification, 49, 13); _assign_at(@classification, 50, 2); _assign_at(@classification, 51, 2); _assign_at(@classification, 52, 2); _assign_at(@classification, 53, 2); _assign_at(@classification, 54, 2); _assign_at(@classification, 55, 2); _assign_at(@classification, 56, 2); _assign_at(@classification, 57, 2); _assign_at(@classification, 58, 2); _assign_at(@classification, 59, 5); _assign_at(@classification, 60, 11); _assign_at(@classification, 61, 21); _assign_at(@classification, 62, 6); _assign_at(@classification, 63, 20); _assign_at(@classification, 64, 22); _assign_at(@classification, 65, 11); _assign_at(@classification, 66, 3); _assign_at(@classification, 67, 3); _assign_at(@classification, 68, 3); _assign_at(@classification, 69, 3); _assign_at(@classification, 70, 3); _assign_at(@classification, 71, 3); _assign_at(@classification, 72, 3); _assign_at(@classification, 73, 3); _assign_at(@classification, 74, 3); _assign_at(@classification, 75, 3); _assign_at(@classification, 76, 3); _assign_at(@classification, 77, 3); _assign_at(@classification, 78, 3); _assign_at(@classification, 79, 3); _assign_at(@classification, 80, 3); _assign_at(@classification, 81, 3); _assign_at(@classification, 82, 3); _assign_at(@classification, 83, 3); _assign_at(@classification, 84, 3); _assign_at(@classification, 85, 3); _assign_at(@classification, 86, 3); _assign_at(@classification, 87, 3); _assign_at(@classification, 88, 3); _assign_at(@classification, 89, 3); _assign_at(@classification, 90, 3); _assign_at(@classification, 91, 3); _assign_at(@classification, 92, 11); _assign_at(@classification, 93, 22); _assign_at(@classification, 94, 11); _assign_at(@classification, 95, 11); _assign_at(@classification, 96, 10); _assign_at(@classification, 97, 22); _assign_at(@classification, 98, 12); _assign_at(@classification, 99, 12); _assign_at(@classification, 100, 12); _assign_at(@classification, 101, 12); _assign_at(@classification, 102, 12); _assign_at(@classification, 103, 12); _assign_at(@classification, 104, 3); _assign_at(@classification, 105, 3); _assign_at(@classification, 106, 3); _assign_at(@classification, 107, 3); _assign_at(@classification, 108, 3); _assign_at(@classification, 109, 3); _assign_at(@classification, 110, 3); _assign_at(@classification, 111, 3); _assign_at(@classification, 112, 3); _assign_at(@classification, 113, 3); _assign_at(@classification, 114, 3); _assign_at(@classification, 115, 3); _assign_at(@classification, 116, 3); _assign_at(@classification, 117, 3); _assign_at(@classification, 118, 3); _assign_at(@classification, 119, 3); _assign_at(@classification, 120, 3); _assign_at(@classification, 121, 14); _assign_at(@classification, 122, 3); _assign_at(@classification, 123, 3); _assign_at(@classification, 124, 22); _assign_at(@classification, 125, 11); _assign_at(@classification, 126, 22); _assign_at(@classification, 127, 11); _assign_at(@classification, 128, 1); code := 129; (* Set the remaining 129 - 256 bytes to transitionClassOther. *) .create_classification_loop; _assign_at(@classification, code, 22); code := code + 1; if code < 257 then goto .create_classification_loop; end; end; (* Parameters: *) (* current_state - Current state (first index into transitions table). *) (* transition - Transition (second index into transitions table). *) (* action - Action to assign. *) (* next_state - Next state to assign. *) proc _set_transition(current_state: Word, transition: Word, action: Word, next_state: Word); var transition_table: Word; row_position: Word; state_position: Word; target: Word; begin (* Transitions start at offset in classification array. *) transition_table := @classification + 256 (* Each state is 8 bytes long (2 words: action and next state). *) (* There are 16 transition classes, so a transition 8 * 16 = 128 bytes long. *) row_position := current_state + -1; row_position := row_position * 128; state_position := transition + -1; state_position := state_position * 8; target := transition_table + row_position; target := target + state_position; _store_word(action, target); target := target + 4; _store_word(next_state, target); end; (* Parameters: *) (* current_state - Current state (Transition state enumeration). *) (* default_action - Default action (Callback). *) (* next_state - Next state (Transition state enumeration). *) proc _set_default_transition(current_state: Word, default_action: Word, next_state: Word); begin _set_transition(current_state, 1, default_action, next_state); _set_transition(current_state, 2, default_action, next_state); _set_transition(current_state, 3, default_action, next_state); _set_transition(current_state, 4, default_action, next_state); _set_transition(current_state, 5, default_action, next_state); _set_transition(current_state, 6, default_action, next_state); _set_transition(current_state, 7, default_action, next_state); _set_transition(current_state, 8, default_action, next_state); _set_transition(current_state, 9, default_action, next_state); _set_transition(current_state, 10, default_action, next_state); _set_transition(current_state, 11, default_action, next_state); _set_transition(current_state, 12, default_action, next_state); _set_transition(current_state, 13, default_action, next_state); _set_transition(current_state, 14, default_action, next_state); _set_transition(current_state, 15, default_action, next_state); _set_transition(current_state, 16, default_action, next_state); _set_transition(current_state, 17, default_action, next_state); _set_transition(current_state, 18, default_action, next_state); _set_transition(current_state, 19, default_action, next_state); _set_transition(current_state, 20, default_action, next_state); _set_transition(current_state, 21, default_action, next_state); _set_transition(current_state, 22, default_action, next_state); end; (* The transition table describes transitions from one state to another, given *) (* a symbol (character class). *) (* The table has m rows and n columns, where m is the amount of states and n is *) (* the amount of classes. So given the current state and a classified character *) (* the table can be used to look up the next state. *) (* Each cell is a word long. *) (* - The least significant byte of the word is a row number (beginning with 0). *) (* It specifies the target state. "ff" means that this is an end state and no *) (* transition is possible. *) (* - The next byte is the action that should be performed when transitioning. *) (* For the meaning of actions see labels in the lex_next function, which *) (* handles each action. *) proc _create_transitions(); begin (* Start state. *) _set_transition(1, 1, 1, 16); _set_transition(1, 2, 2, 4); _set_transition(1, 3, 2, 3); _set_transition(1, 4, 3, 1); _set_transition(1, 5, 2, 5); _set_transition(1, 6, 4, 16); _set_transition(1, 7, 2, 7); _set_transition(1, 8, 4, 16); _set_transition(1, 9, 4, 16); _set_transition(1, 10, 2, 3); _set_transition(1, 11, 4, 16); _set_transition(1, 12, 2, 3); _set_transition(1, 13, 2, 14); _set_transition(1, 14, 2, 3); _set_transition(1, 15, 5, 16); _set_transition(1, 16, 2, 9); _set_transition(1, 17, 2, 6); _set_transition(1, 18, 2, 12); _set_transition(1, 19, 2, 13); _set_transition(1, 20, 2, 5); _set_transition(1, 21, 2, 8); _set_transition(1, 22, 1, 16); (* Colon state. *) _set_default_transition(2, 6, 16); _set_transition(2, 6, 7, 16); (* Identifier state. *) _set_default_transition(3, 8, 16); _set_transition(3, 2, 2, 3); _set_transition(3, 3, 2, 3); _set_transition(3, 10, 2, 3); _set_transition(3, 12, 2, 3); _set_transition(3, 13, 2, 3); _set_transition(3, 14, 2, 3); (* Decimal state. *) _set_default_transition(4, 9, 16); _set_transition(4, 2, 2, 4); _set_transition(4, 3, 2, 15); _set_transition(4, 10, 1, 16); _set_transition(4, 12, 2, 15); _set_transition(4, 13, 2, 4); _set_transition(4, 14, 2, 15); (* Greater state. *) _set_default_transition(5, 6, 16); _set_transition(5, 6, 7, 16); (* Minus state. *) _set_default_transition(6, 6, 16); _set_transition(6, 20, 7, 16); (* Left paren state. *) _set_default_transition(7, 6, 16); _set_transition(7, 9, 2, 10); (* Less state. *) _set_default_transition(8, 6, 16); _set_transition(8, 6, 7, 16); _set_transition(8, 20, 7, 16); (* Hexadecimal after 0x. *) _set_default_transition(9, 6, 16); _set_transition(9, 16, 7, 16); (* Comment. *) _set_default_transition(10, 2, 10); _set_transition(10, 9, 2, 11); _set_transition(10, 15, 1, 16); (* Closing comment. *) _set_default_transition(11, 2, 10); _set_transition(11, 1, 1, 16); _set_transition(11, 8, 10, 16); _set_transition(11, 9, 2, 11); _set_transition(11, 15, 1, 16); (* Character. *) _set_default_transition(12, 2, 12); _set_transition(12, 1, 1, 16); _set_transition(12, 15, 1, 16); _set_transition(12, 18, 10, 16); (* String. *) _set_default_transition(13, 2, 13); _set_transition(13, 1, 1, 16); _set_transition(13, 15, 1, 16); _set_transition(13, 19, 10, 16); (* Leading zero. *) _set_default_transition(14, 9, 16); _set_transition(14, 2, 1, 16); _set_transition(14, 3, 1, 16); _set_transition(14, 10, 1, 16); _set_transition(14, 12, 1, 16); _set_transition(14, 13, 1, 16); _set_transition(14, 14, 1, 16); (* Digit with a character suffix. *) _set_default_transition(15, 9, 16); _set_transition(15, 3, 1, 16); _set_transition(15, 2, 1, 16); _set_transition(15, 12, 1, 16); _set_transition(15, 13, 1, 16); _set_transition(15, 14, 1, 16); end; proc _lexer_get_state(); var offset: Word; size: Word; begin (* Lexer state is saved after the transition tables. The offset is 256 + 16 * 22. *) offset := @classification; size := 16 * 22; offset := offset + 256; return offset + size end; (* Gets pointer to the current source text. *) proc _lexer_get_current(); begin return _lexer_get_state() + 4 end; (* Resets the lexer state for reading the next token. *) proc _lexer_reset(); var state: Word; current: Word; begin (* Transition start state is 1. *) state := _lexer_get_state(); _store_word(1, state); (* Text pointer to the beginning of the currently read token. *) current := _lexer_get_current(); _store_word(source_code_position, current); (* Initial length of the token is 0. *) _store_word(0, source_code_position + 4); end; (* One time lexer initialization. *) proc _lexer_initialize(); begin _create_classification(); _create_transitions(); end; (* Entry point. *) proc _start(); begin _lexer_initialize(); _symbol_table_build(); (* Read the source from the standard input. *) (* Second argument is buffer size. Modifying update the source_code definition. *) _read_file(@source_code, 81920); _compile(); _exit(0); end;