(* * This Source Code Form is subject to the terms of the Mozilla Public License, * v. 2.0. If a copy of the MPL was not distributed with this file, You can * obtain one at https://mozilla.org/MPL/2.0/. *) (* Stage 15 compiler. *) type LexerAction = (none, accumulate, skip, single, eof, finalize, composite, key_id, integer, delimited); (** * Classification table assigns each possible character to a group (class). All * characters of the same group a handled equivalently. * * Transition = record * action: TransitionAction; * next_state: TransitionState * end; *) LexerClass = ( invalid, digit, alpha, space, colon, equals, left_paren, right_paren, asterisk, backslash, single, hex, zero, x, eof, dot, minus, single_quote, double_quote, greater, less, other ); LexerState = ( start, colon, identifier, decimal, leading_zero, greater, minus, left_paren, less, dot, comment, closing_comment, character, character_escape, string, string_escape, finish ); LexerTokenKind = ( identifier, _const, _var, _proc, _type, _begin, _end, _if, _then, _else, _elsif, _while, _do, _extern, _record, _union, _true, _false, null, and, _or, _xor, pipe, not, _return, _module, _program, _import, _cast, _defer, _case, _of, trait, left_paren, right_paren, left_square, right_square, shift_left, shift_right, greater_equal, less_equal, greater_than, less_than, not_equal, equals, semicolon, dot, comma, plus, arrow, minus, multiplication, division, remainder, assignment, colon, hat, at, comment, string, character, integer, word, _goto, eof ); NodeKind = ( integer_literal, string_literal, character_literal, variable_expression, field_access_expression, dereference_expression, unary_expression, binary_expression, call, goto_statement, label_declaration, return_statement, assign_statement, if_statement, procedure_declaration, variable_declaration, enumeration_type_expression, named_type_expression, type_declaration, module_declaration ); InfoKind = (type_info, parameter_info, temporary_info); TypeKind = (primitive, enumeration); var source_code: Array; compiler_strings: Array; symbol_table_global: Array; symbol_table_local: Array; classification: Array; (* To reserve memory just add the value of needed bytes to the memory_free_pointer variable. *) memory: Array; compiler_strings_position: Word; compiler_strings_length: Word; label_counter: Word; (* Points to a segment of free memory. *) memory_free_pointer: Word; (** * Calculates and returns the string token length between quotes, including the * escaping slash characters. * * Parameters: * string - String token pointer. * * Returns the length in a0. *) proc _string_length(string: Word); var counter: Word; current_byte: Word; begin (* Reset the counter. *) counter := 0; .string_length_loop; string := string + 1; current_byte := _load_byte(string); if current_byte <> '"' then counter := counter + 1; goto string_length_loop end; return counter end; (** * Adds a string to the global, read-only string storage. * * Parameters: * string - String token. * * Returns the offset from the beginning of the storage to the new string in a0. *) proc _add_string(string: Word); var contents: Word; result: Word; current_byte: Word; begin contents := string + 1; result := compiler_strings_length; .add_string_loop; current_byte := _load_byte(contents); if current_byte <> '"' then _store_byte(current_byte, compiler_strings_position); compiler_strings_position := compiler_strings_position + 1; contents := contents + 1; if current_byte <> '\\' then compiler_strings_length := compiler_strings_length + 1 end; goto add_string_loop end; return result end; (** * Reads standard input into a buffer. * * Parameters: * buffer - Buffer pointer. * size - Buffer size. * * Returns the amount of bytes written in a0. *) proc _read_file(buffer: Word, size: Word); return _syscall(0, buffer, size, 0, 0, 0, 63) end; (** * Writes to the standard output. * * Parameters: * buffer - Buffer. * size - Buffer length. *) proc _write_s(buffer: Word, size: Word); begin _syscall(1, buffer, size, 0, 0, 0, 64) end; (** * Writes a number to a string buffer. * * Parameters: * number - Whole number. * output_buffer - Buffer pointer. * * Sets a0 to the length of the written number. *) proc _print_i(number: Word, output_buffer: Word); var local_buffer: Word; is_negative: Word; current_character: Word; result: Word; begin local_buffer := @result + 11; if number >= 0 then is_negative := 0 else number = -number; is_negative := 1 end; .print_i_digit10; current_character := number % 10; _store_byte(current_character + '0', local_buffer); number := number / 10; local_buffer := local_buffer - 1; if number <> 0 then goto print_i_digit10 end; if is_negative = 1 then _store_byte('-', local_buffer); local_buffer := local_buffer - 1 end; result := @result + 11; result := result - local_buffer; _memcpy(output_buffer, local_buffer + 1, result); return result end; (** * Writes a number to the standard output. * * Parameters: * number - Whole number. *) proc _write_i(number: Word); var local_buffer: Word; length: Word; begin length := _print_i(number, @local_buffer); _write_s(@local_buffer, length) end; (** * Writes a character from a0 into the standard output. * * Parameters: * character - Character to write. *) proc _write_c(character: Word); begin _write_s(@character, 1) end; (** * Write null terminated string. * * Parameters: * string - String. *) proc _write_z(string: Word); var next_byte: Word; begin (* Check for 0 character. *) next_byte := _load_byte(string); if next_byte <> 0 then (* Print a character. *) _write_c(next_byte); (* Advance the input string by one byte. *) _write_z(string + 1) end end; (** * Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. *) proc _is_upper(character: Word); var lhs: Word; rhs: Word; begin lhs := character >= 'A'; rhs := character <= 'Z'; return lhs & rhs end; (** * Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. *) proc _is_lower(character: Word); var lhs: Word; rhs: Word; begin lhs := character >= 'a'; rhs := character <= 'z'; return lhs & rhs end; (** * Detects if the passed character is a 7-bit alpha character or an underscore. * * Paramters: * character - Tested character. * * Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. *) proc _is_alpha(character: Word); var is_upper_result: Word; is_lower_result: Word; is_alpha_result: Word; is_underscore: Word; begin is_upper_result := _is_upper(character); is_lower_result := _is_lower(character); is_underscore := character = '_'; is_alpha_result := is_lower_result or is_upper_result; return is_alpha_result or is_underscore end; (** * Detects whether the passed character is a digit (a value between 0 and 9). * * Parameters: * character - Exemined value. * * Sets a0 to 1 if it is a digit, to 0 otherwise. *) proc _is_digit(character: Word); var lhs: Word; rhs: Word; begin lhs := character >= '0'; rhs := character <= '9'; return lhs & rhs end; proc _is_alnum(character: Word); var lhs: Word; rhs: Word; begin lhs := _is_alpha(character); rhs := _is_digit(character); return lhs or rhs end; (** * Parameters: * lhs - First pointer. * rhs - Second pointer. * count - The length to compare. * * Returns 0 if memory regions are equal. *) proc _memcmp(lhs: Word, rhs: Word, count: Word); var lhs_byte: Word; rhs_byte: Word; result: Word; begin result := 0; .memcmp_loop; if count <> 0 then lhs_byte := _load_byte(lhs); rhs_byte := _load_byte(rhs); result := lhs_byte - rhs_byte; lhs := lhs + 1; rhs := rhs + 1; count := count - 1; if result = 0 then goto memcmp_loop end end; return result end; (** * Copies memory. * * Parameters: * destination - Destination. * source - Source. * count - Size. * * Returns the destination. *) proc _memcpy(destination: Word, source: Word, count: Word); var current_byte: Word; begin .memcpy_loop; if count <> 0 then current_byte := _load_byte(source); _store_byte(current_byte, destination); destination := destination + 1; source := source + 1; count := count - 1; goto memcpy_loop end; return destination end; proc _node_get_kind(this: Word); return this^ end; proc _node_set_kind(this: Word, kind: Word); begin this^ := kind end; proc _integer_literal_node_size(); return 12 end; proc _integer_literal_node_get_value(this: Word); begin this := this + 4; return this^ end; proc _integer_literal_node_set_value(this: Word, value: Word); begin this := this + 4; this^ := value end; proc _integer_literal_node_get_length(this: Word); begin this := this + 8; return this^ end; proc _integer_literal_node_set_length(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _parse_integer_literal(); var integer_token: Word; integer_length: Word; result: Word; begin result := memory_free_pointer; memory_free_pointer := memory_free_pointer + 12; integer_token := _lexer_global_get_start(); integer_length := _lexer_global_get_end(); integer_length := integer_length - integer_token; _lexer_skip_token(); _node_set_kind(result, NodeKind.integer_literal); _integer_literal_node_set_value(result, integer_token); _integer_literal_node_set_length(result, integer_length); return result end; proc _compile_integer_literal(integer_literal_node: Word); var integer_token: Word; integer_length: Word; token_kind: Word; begin _write_z("\tli t0, \0"); integer_token := _integer_literal_node_get_value(integer_literal_node); integer_length := _integer_literal_node_get_length(integer_literal_node); _write_s(integer_token, integer_length); _write_c('\n') end; proc _character_literal_node_get_value(this: Word); begin this := this + 4; return this^ end; proc _character_literal_node_set_value(this: Word, value: Word); begin this := this + 4; this^ := value end; proc _character_literal_node_get_length(this: Word); begin this := this + 8; return this^ end; proc _character_literal_node_set_length(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _parse_character_literal(); var character: Word; character_length: Word; result: Word; begin result := memory_free_pointer; memory_free_pointer := memory_free_pointer + 12; character := _lexer_global_get_start(); character_length := _lexer_global_get_end(); character_length := character_length - character; _lexer_skip_token(); _node_set_kind(result, NodeKind.character_literal); _integer_literal_node_set_value(result, character); _integer_literal_node_set_length(result, character_length); return result end; proc _compile_character_literal(character_literal_node: Word); var character: Word; character_length: Word; begin character := _character_literal_node_get_value(character_literal_node); character_length := _character_literal_node_get_length(character_literal_node); _write_z("\tli t0, \0"); _write_s(character, character_length); _write_c('\n') end; proc _variable_expression_size(); return 12 end; proc _variable_expression_get_name(this: Word); begin this := this + 4; return this^ end; proc _variable_expression_set_name(this: Word, value: Word); begin this := this + 4; this^ := value end; proc _variable_expression_get_length(this: Word); begin this := this + 8; return this^ end; proc _variable_expression_set_length(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _allocate(size: Word); var result: Word; begin result := memory_free_pointer; memory_free_pointer := memory_free_pointer + size; return result end; proc _parse_variable_expression(); var name: Word; name_token: Word; result: Word; memory_size: Word; begin name := _lexer_global_get_start(); name_token := _lexer_global_get_end(); name_token := name_token - name; _lexer_skip_token(); memory_size := _variable_expression_size(); result := _allocate(memory_size); _node_set_kind(result, NodeKind.variable_expression); _variable_expression_set_name(result, name); _variable_expression_set_length(result, name_token); return result end; proc _compile_variable_expression(variable_expression: Word); var name: Word; name_token: Word; lookup_result: Word; begin name := _variable_expression_get_name(variable_expression); name_token := _variable_expression_get_length(variable_expression); lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token); if lookup_result <> 0 then _compile_local_designator(lookup_result) else _compile_global_designator(variable_expression) end end; proc _string_literal_node_size(); return 12 end; proc _string_literal_node_get_value(this: Word); begin this := this + 4; return this^ end; proc _string_literal_node_set_value(this: Word, value: Word); begin this := this + 4; this^ := value end; proc _string_literal_node_get_length(this: Word); begin this := this + 8; return this^ end; proc _string_literal_node_set_length(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _parse_string_literal(); var length: Word; token_start: Word; result: Word; memory_size: Word; begin memory_size := _string_literal_node_size(); result := _allocate(memory_size); token_start := _lexer_global_get_start(); length := _string_length(token_start); _lexer_skip_token(); _node_set_kind(result, NodeKind.string_literal); _string_literal_node_set_value(result, token_start); _string_literal_node_set_length(result, length); return result end; proc _compile_string_literal(string_literal_node: Word); var token_start: Word; length: Word; offset: Word; begin token_start := _string_literal_node_get_value(string_literal_node); length := _string_literal_node_get_length(string_literal_node); offset := _add_string(token_start); _write_z("\tla t0, strings\n\0"); _write_z("\tli t1, \0"); _write_i(offset); _write_c('\n'); _write_z("\tadd t0, t0, t1\n\0") end; proc _parse_simple_expression(); var current_character: Word; parser_node: Word; token_kind: Word; begin parser_node := 0; _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.character then parser_node := _parse_character_literal() elsif token_kind = LexerTokenKind.integer then parser_node := _parse_integer_literal() elsif token_kind = LexerTokenKind.string then parser_node := _parse_string_literal() elsif token_kind = LexerTokenKind.identifier then parser_node := _parse_variable_expression() end; return parser_node end; proc _dereference_expression_size(); return 8 end; proc _dereference_expression_get_pointer(this: Word); begin this := this + 4; return this^ end; proc _dereference_expression_set_pointer(this: Word, value: Word); begin this := this + 4; this^ := value end; proc _parse_dereference_expression(simple_expression: Word); var result: Word; memory_size: Word; begin memory_size := _dereference_expression_size(); result := _allocate(memory_size); _node_set_kind(result, NodeKind.dereference_expression); _dereference_expression_set_pointer(result, simple_expression); _lexer_skip_token(); return result end; proc _parse_designator(); var simple_expression: Word; token_kind: Word; begin simple_expression := _parse_simple_expression(); _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.hat then simple_expression := _parse_dereference_expression(simple_expression) elsif token_kind = LexerTokenKind.dot then simple_expression := _parse_field_access_expression(simple_expression) elsif token_kind = LexerTokenKind.left_paren then simple_expression := _parse_call(simple_expression) end; return simple_expression end; proc _compile_simple_expression(parser_node: Word); var is_address: Word; node_kind: Word; begin is_address := 0; node_kind := _node_get_kind(parser_node); if node_kind = NodeKind.character_literal then _compile_character_literal(parser_node) elsif node_kind = NodeKind.string_literal then _compile_string_literal(parser_node) elsif node_kind = NodeKind.integer_literal then _compile_integer_literal(parser_node) else _compile_variable_expression(parser_node); is_address := 1 end; return is_address end; proc _unary_expression_size(); return 12 end; proc _unary_expression_get_operand(this: Word); begin this := this + 4; return this^ end; proc _unary_expression_set_operand(this: Word, value: Word); begin this := this + 4; this^ := value end; proc _unary_expression_get_operator(this: Word); begin this := this + 8; return this^ end; proc _unary_expression_set_operator(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _parse_unary_expression(); var token_kind: Word; result: Word; memory_size: Word; operand: Word; operator: Word; begin _lexer_read_token(@token_kind); operator := 0; if token_kind = LexerTokenKind.at then operator := '@' elsif token_kind = LexerTokenKind.minus then operator := '-' elsif token_kind = LexerTokenKind.not then operator := '~' end; if operator <> 0 then _lexer_skip_token() end; result := _parse_designator(); if operator <> 0 then operand := result; memory_size := _unary_expression_size(); result := _allocate(memory_size); _node_set_kind(result, NodeKind.unary_expression); _unary_expression_set_operand(result, operand); _unary_expression_set_operator(result, operator) end; return result end; proc _compile_unary_expression(parser_node: Word); var current_character: Word; token_kind: Word; expression_kind: Word; operator: Word; operand: Word; begin operator := 0; operand := 0; expression_kind := _node_get_kind(parser_node); if expression_kind = NodeKind.unary_expression then operator := _unary_expression_get_operator(parser_node); operand := _unary_expression_get_operand(parser_node) else operand := parser_node end; if operator = '@' then _compile_designator(operand) elsif _compile_designator(operand) then _write_z("\tlw t0, (t0) # Designator is an address.\n\0") end; if operator = '-' then _write_z("\tneg t0, t0\n\0") elsif operator = '~' then _write_z("\tnot t0, t0\n\0") end end; proc _binary_expression_size(); return 16 end; proc _binary_expression_get_lhs(this: Word); begin this := this + 4; return this^ end; proc _binary_expression_set_lhs(this: Word, value: Word); begin this := this + 4; this^ := value end; proc _binary_expression_get_rhs(this: Word); begin this := this + 8; return this^ end; proc _binary_expression_set_rhs(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _binary_expression_get_operator(this: Word); begin this := this + 12; return this^ end; proc _binary_expression_set_operator(this: Word, value: Word); begin this := this + 12; this^ := value end; proc _parse_binary_expression(); var lhs_node: Word; rhs_node: Word; token_kind: Word; memory_size: Word; result: Word; begin lhs_node := _parse_unary_expression(); rhs_node := 0; _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.plus then _lexer_skip_token(); rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.minus then _lexer_skip_token(); rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.multiplication then _lexer_skip_token(); rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.and then _lexer_skip_token(); rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind._or then _lexer_skip_token(); rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind._xor then _lexer_skip_token(); rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.equals then _lexer_skip_token(); rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.remainder then _lexer_skip_token(); rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.division then _lexer_skip_token(); rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.less_than then _lexer_skip_token(); rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.greater_than then _lexer_skip_token(); rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.less_equal then _lexer_skip_token(); rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.not_equal then _lexer_skip_token(); rhs_node := _parse_unary_expression() elsif token_kind = LexerTokenKind.greater_equal then _lexer_skip_token(); rhs_node := _parse_unary_expression() end; if rhs_node <> 0 then memory_size := _binary_expression_size(); result := _allocate(memory_size); _node_set_kind(result, NodeKind.binary_expression); _binary_expression_set_lhs(result, lhs_node); _binary_expression_set_rhs(result, rhs_node); _binary_expression_set_operator(result, token_kind) else result := lhs_node end; return result end; proc _compile_binary_expression(parser_node: Word); var token_kind: Word; expression_kind: Word; operand_node: Word; begin expression_kind := _node_get_kind(parser_node); if expression_kind <> NodeKind.binary_expression then _compile_unary_expression(parser_node) else token_kind := _binary_expression_get_operator(parser_node); operand_node := _binary_expression_get_lhs(parser_node); _compile_unary_expression(operand_node); (* Save the value of the left expression on the stack. *) _write_z("\tsw t0, 64(sp)\n\0"); operand_node := _binary_expression_get_rhs(parser_node); _compile_unary_expression(operand_node); (* Load the left expression from the stack; *) _write_z("\tlw t1, 64(sp)\n\0"); if token_kind = LexerTokenKind.plus then _write_z("\tadd t0, t0, t1\n\0") elsif token_kind = LexerTokenKind.minus then _write_z("\tsub t0, t1, t0\n\0"); elsif token_kind = LexerTokenKind.multiplication then _write_z("\tmul t0, t0, t1\n\0") elsif token_kind = LexerTokenKind.and then _write_z("\tand t0, t0, t1\n\0") elsif token_kind = LexerTokenKind._or then _write_z("\tor t0, t0, t1\n\0") elsif token_kind = LexerTokenKind._xor then _write_z("\txor t0, t0, t1\n\0") elsif token_kind = LexerTokenKind.equals then _write_z("\txor t0, t0, t1\n\tseqz t0, t0\n\0") elsif token_kind = LexerTokenKind.remainder then _write_z("\trem t0, t1, t0\n\0") elsif token_kind = LexerTokenKind.division then _write_z("\tdiv t0, t1, t0\n\0") elsif token_kind = LexerTokenKind.less_than then _write_z("\tslt t0, t1, t0\n\0") elsif token_kind = LexerTokenKind.greater_than then _write_z("\tslt t0, t0, t1\n\0") elsif token_kind = LexerTokenKind.less_equal then _write_z("\tslt t0, t0, t1\n\txori t0, t0, 1\n\0") elsif token_kind = LexerTokenKind.not_equal then _write_z("\txor t0, t0, t1\n\tsnez t0, t0\n\0") elsif token_kind = LexerTokenKind.greater_equal then _write_z("\tslt t0, t1, t0\n\txori t0, t0, 1\n\0") end end end; proc _compile_expression(); var parser_node: Word; begin parser_node := _parse_binary_expression(); _compile_binary_expression(parser_node) end; (* 4 bytes node kind + 4 byte pointer to variable expression + 4 * 7 for arguments. *) proc _call_size(); return 44 end; proc _call_get_name(this: Word); begin this := this + 8; return this^ end; proc _call_set_name(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _call_get_argument(this: Word, n: Word); begin n := n * 4; this := this + 8; this := this + n; return this^ end; proc _call_set_argument(this: Word, n: Word, value: Word); begin n := n * 4; this := this + 8; this := this + n; this^ := value end; proc _parse_call(callee: Word); var parsed_expression: Word; result: Word; argument_number: Word; token_kind: Word; call_size: Word; begin call_size := _call_size(); result := _allocate(call_size); _node_set_kind(result, NodeKind.call); _statement_set_next(result, 0); argument_number := 1; _call_set_name(result, callee); _lexer_read_token(@token_kind); _lexer_skip_token(); _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.right_paren then _lexer_skip_token(); goto parse_call_end end; .parse_call_loop; parsed_expression := _parse_binary_expression(); _call_set_argument(result, argument_number, parsed_expression); argument_number := argument_number + 1; _lexer_read_token(@token_kind); _lexer_skip_token(); if token_kind = LexerTokenKind.comma then goto parse_call_loop end; .parse_call_end; (* Set the trailing argument to nil. *) _call_set_argument(result, argument_number, 0); return result end; proc _compile_call(parsed_call: Word); var name_length: Word; name: Word; argument_count: Word; stack_offset: Word; parsed_expression: Word; begin parsed_expression := _call_get_name(parsed_call); name := _variable_expression_get_name(parsed_expression); name_length := _variable_expression_get_length(parsed_expression); argument_count := 0; .compile_call_loop; parsed_expression := _call_get_argument(parsed_call, argument_count + 1); if parsed_expression = 0 then goto compile_call_finalize else _compile_binary_expression(parsed_expression); (* Save the argument on the stack. *) _write_z("\tsw t0, \0"); stack_offset := argument_count * 4; _write_i(116 - stack_offset); _write_z("(sp)\n\0"); argument_count := argument_count + 1; goto compile_call_loop end; .compile_call_finalize; (* Load the argument from the stack. *) if argument_count <> 0 then (* Decrement the argument counter. *) argument_count := argument_count - 1; _write_z("\tlw a\0"); _write_i(argument_count); _write_z(", \0"); (* Calculate the stack offset: 116 - (4 * argument_counter) *) stack_offset := argument_count * 4; _write_i(116 - stack_offset); _write_z("(sp)\n\0"); goto compile_call_finalize end; _write_z("\tcall \0"); _write_s(name, name_length); _write_c('\n') end; (** * All statements are chained into a list. Next contains a pointer to the next * statement in the statement list. *) proc _statement_get_next(this: Word); begin this := this + 4; return this^ end; proc _statement_set_next(this: Word, value: Word); begin this := this + 4; this^ := value end; proc _goto_statement_size(); return 16 end; proc _goto_statement_get_label(this: Word); begin this := this + 8; return this^ end; proc _goto_statement_set_label(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _goto_statement_get_length(this: Word); begin this := this + 12; return this^ end; proc _goto_statement_set_length(this: Word, value: Word); begin this := this + 12; this^ := value end; proc _parse_goto_statement(); var token_kind: Word; label_name: Word; label_length: Word; statement_size: Word; result: Word; begin _lexer_skip_token(); _lexer_read_token(@token_kind); label_name := _lexer_global_get_start(); label_length := _lexer_global_get_end() - label_name; _lexer_skip_token(); statement_size := _goto_statement_size(); result := _allocate(statement_size); _node_set_kind(result, NodeKind.goto_statement); _statement_set_next(result, 0); _goto_statement_set_label(result, label_name); _goto_statement_set_length(result, label_length); return result end; proc _compile_goto_statement(parser_node: Word); var label_name: Word; label_length: Word; begin label_name := _goto_statement_get_label(parser_node); label_length := _goto_statement_get_length(parser_node); _write_z("\tj .\0"); _write_s(label_name, label_length); _write_c('\n') end; proc _label_declaration_size(); return 16 end; proc _label_declaration_get_label(this: Word); begin this := this + 8; return this^ end; proc _label_declaration_set_label(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _label_declaration_get_length(this: Word); begin this := this + 12; return this^ end; proc _label_declaration_set_length(this: Word, value: Word); begin this := this + 12; this^ := value end; proc _parse_label_declaration(); var token_kind: Word; label_name: Word; label_length: Word; statement_size: Word; result: Word; begin _lexer_skip_token(); _lexer_read_token(@token_kind); label_name := _lexer_global_get_start(); label_length := _lexer_global_get_end() - label_name; _lexer_skip_token(); statement_size := _label_declaration_size(); result := _allocate(statement_size); _node_set_kind(result, NodeKind.label_declaration); _statement_set_next(result, 0); _goto_statement_set_label(result, label_name); _goto_statement_set_length(result, label_length); return result end; proc _compile_label_declaration(parser_node: Word); var label_name: Word; label_length: Word; begin label_name := _label_declaration_get_label(parser_node); label_length := _label_declaration_get_length(parser_node); _write_c('.'); _write_s(label_name, label_length); _write_z(":\n\0") end; proc _compile_local_designator(symbol: Word); var variable_offset: Word; begin _write_z("\taddi t0, sp, \0"); variable_offset := _parameter_info_get_offset(symbol); _write_i(variable_offset); _write_c('\n') end; proc _compile_global_designator(variable_expression: Word); var name: Word; token_length: Word; begin _write_z("\tla t0, \0"); name := _variable_expression_get_name(variable_expression); token_length := _variable_expression_get_length(variable_expression); _write_s(name, token_length); _write_c('\n') end; proc _field_access_expression_size(); return 16 end; proc _field_access_expression_get_aggregate(this: Word); begin this := this + 4; return this^ end; proc _field_access_expression_set_aggregate(this: Word, value: Word); begin this := this + 4; this^ := value end; proc _field_access_expression_get_field(this: Word); begin this := this + 8; return this^ end; proc _field_access_expression_set_field(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _field_access_expression_get_length(this: Word); begin this := this + 12; return this^ end; proc _field_access_expression_set_length(this: Word, value: Word); begin this := this + 12; this^ := value end; proc _compile_enumeration_value(field_access_expression: Word); var enumeration_type: Word; members: Word; members_length: Word; token_type: Word; value_name: Word; name_length: Word; member_name: Word; member_length: Word; counter: Word; symbol: Word; begin symbol := _field_access_expression_get_aggregate(field_access_expression); value_name := _variable_expression_get_name(symbol); name_length := _variable_expression_get_length(symbol); symbol := _symbol_table_lookup(@symbol_table_global, value_name, name_length); enumeration_type := _type_info_get_type(symbol); members := _enumeration_type_get_members(enumeration_type); members_length := _enumeration_type_get_length(enumeration_type); _lexer_read_token(@token_type); value_name := _field_access_expression_get_field(field_access_expression); name_length := _field_access_expression_get_length(field_access_expression); counter := 1; .compile_enumeration_value_members; if members_length > 0 then member_name := members^; member_length := members + 4; member_length := member_length^; if _lexer_compare_keyword(value_name, name_length, member_name, member_length) then else members_length := members_length - 1; members := members + 8; counter := counter + 1; goto compile_enumeration_value_members end; _write_z("\tli t0, \0"); _write_i(counter); _write_c('\n') end end; proc _parse_field_access_expression(aggregate: Word); var token_kind: Word; name: Word; name_token: Word; result: Word; memory_size: Word; begin (* Skip dot. Read the enumeration value. *) _lexer_skip_token(); _lexer_read_token(@token_kind); name := _lexer_global_get_start(); name_token := _lexer_global_get_end(); name_token := name_token - name; _lexer_skip_token(); memory_size := _field_access_expression_size(); result := _allocate(memory_size); _node_set_kind(result, NodeKind.field_access_expression); _field_access_expression_set_aggregate(result, aggregate); _field_access_expression_set_field(result, name); _field_access_expression_set_length(result, name_token); return result end; proc _compile_designator(parser_node: Word); var name_token: Word; lookup_result: Word; token_kind: Word; parser_node: Word; is_address: Word; node_kind: Word; begin is_address := 1; node_kind := _node_get_kind(parser_node); if node_kind = NodeKind.dereference_expression then parser_node := _dereference_expression_get_pointer(parser_node); _compile_simple_expression(parser_node); _write_z("\tlw t0, (t0)\n\0") elsif node_kind = NodeKind.field_access_expression then _compile_enumeration_value(parser_node); is_address := 0 elsif node_kind = NodeKind.call then _compile_call(parser_node); _write_z("\tmv t0, a0\n\0"); is_address := 0 else is_address := _compile_simple_expression(parser_node) end; return is_address end; proc _assignment_statement_size(); return 16 end; proc _assignment_statement_get_assignee(this: Word); begin this := this + 8; return this^ end; proc _assignment_statement_set_assignee(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _assignment_statement_get_assignment(this: Word); begin this := this + 12; return this^ end; proc _assignment_statement_set_assignment(this: Word, value: Word); begin this := this + 12; this^ := value end; proc _parse_assignment_statement(assignee: Word); var statement_size: Word; result: Word; token_kind: Word; assignment_node: Word; begin statement_size := _assignment_statement_size(); result := _allocate(statement_size); _node_set_kind(result, NodeKind.assignment_statement); _statement_set_next(result, 0); _assignment_statement_set_assignee(result, assignee); (* Skip the assignment sign (:=) with surrounding whitespaces. *) _lexer_read_token(@token_kind); _lexer_skip_token(); assignment_node := _parse_binary_expression(); _assignment_statement_set_assignment(result, assignment_node); return result end; proc _compile_assignment_statement(parser_tree: Word); var current_expression: Word; begin current_expression := _assignment_statement_get_assignee(parser_tree); _compile_designator(current_expression); (* Save the assignee address on the stack. *) _write_z("\tsw t0, 60(sp)\n\0"); (* Compile the assignment. *) current_expression := _assignment_statement_get_assignment(parser_tree); _compile_binary_expression(current_expression); _write_z("\tlw t1, 60(sp)\n\tsw t0, (t1)\n\0") end; proc _return_statement_size(); return 12 end; proc _return_statement_get_returned(this: Word); begin this := this + 8; return this^ end; proc _return_statement_set_returned(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _parse_return_statement(); var token_kind: Word; returned: Word; label_length: Word; statement_size: Word; result: Word; begin (* Skip "return" keyword and whitespace after it. *) _lexer_skip_token(); _lexer_read_token(@token_kind); returned := _parse_binary_expression(); statement_size := _return_statement_size(); result := _allocate(statement_size); _node_set_kind(result, NodeKind.return_statement); _statement_set_next(result, 0); _return_statement_set_returned(result, returned); return result end; proc _compile_return_statement(parser_node: Word); var return_expression: Word; begin return_expression := _return_statement_get_returned(parser_node); _compile_binary_expression(return_expression); _write_z("\tmv a0, t0\n\0") end; (** * Writes a label, .Ln, where n is a unique number. * * Parameters: * counter - Label counter. *) proc _write_label(counter: Word); begin _write_z(".L\0"); _write_i(counter) end; proc _parse_conditional_statements(); var conditional_size: Word; token_kind: Word; current_node: Word; result: Word; begin conditional_size := _conditional_statements_size(); result := _allocate(conditional_size); (* Skip "if", "while" or "elsif". *) _lexer_skip_token(); current_node := _parse_binary_expression(); _conditional_statements_set_condition(result, current_node); (* Skip "then" or "do". *) _lexer_read_token(@token_kind); _lexer_skip_token(); current_node := _parse_statements(); _conditional_statements_set_statements(result, current_node); _conditional_statements_set_next(result, 0); return result end; proc _compile_conditional_statements(parser_node: Word, after_end_label: Word); var condition_label: Word; current_node: Word; begin (* Compile condition. *) current_node := _conditional_statements_get_condition(parser_node); _compile_binary_expression(current_node); (* condition_label is the label in front of the next elsif condition or end. *) condition_label := label_counter; label_counter := label_counter + 1; _write_z("\tbeqz t0, \0"); _write_label(condition_label); _write_c('\n'); current_node := _conditional_statements_get_statements(parser_node); _compile_statements(current_node); _write_z("\tj \0"); _write_label(after_end_label); _write_c('\n'); _write_label(condition_label); _write_z(":\n\0") end; (** * Conditional statements is a list of pairs: condition and statements. * Used for example to represent if and elsif blocks with beloning statements. *) proc _conditional_statements_size(); return 12 end; proc _conditional_statements_get_condition(this: Word); return this^ end; proc _conditional_statements_set_condition(this: Word, value: Word); begin this^ := value end; proc _conditional_statements_get_statements(this: Word); begin this := this + 4; return this^ end; proc _conditional_statements_set_statements(this: Word, value: Word); begin this := this + 4; this^ := value end; proc _conditional_statements_get_next(this: Word); begin this := this + 8; return this^ end; proc _conditional_statements_set_next(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _if_statement_size(); return 16 end; proc _if_statement_get_conditionals(this: Word); begin this := this + 8; return this^ end; proc _if_statement_set_conditionals(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _if_statement_get_else(this: Word); begin this := this + 12; return this^ end; proc _if_statement_set_else(this: Word, value: Word); begin this := this + 12; this^ := value end; proc _parse_if_statement(); var current_node: Word; result: Word; object_size: Word; token_kind: Word; previous_conditional: Word; next_conditional: Word; begin object_size := _if_statement_size(); result := _allocate(object_size); _node_set_kind(result, NodeKind.if_statement); _statement_set_next(result, 0); previous_conditional := _parse_conditional_statements(); _if_statement_set_conditionals(result, previous_conditional); .parse_if_statement_loop; _lexer_read_token(@token_kind); if token_kind = LexerTokenKind._elsif then next_conditional := _parse_conditional_statements(); _conditional_statements_set_next(previous_conditional, next_conditional); previous_conditional = next_conditional; goto parse_if_statement_loop elsif token_kind = LexerTokenKind._else then _lexer_skip_token(); current_node := _parse_statements(); _if_statement_set_else(result, current_node) else _if_statement_set_else(result, 0) end; _lexer_skip_token(); return result end; proc _parse_statement(); var token_kind: Word; result : Word; begin result := 0; _lexer_read_token(@token_kind); if token_kind = LexerTokenKind._goto then result := _parse_goto_statement() elsif token_kind = LexerTokenKind._if then result := _parse_if_statement() elsif token_kind = LexerTokenKind._return then result := _parse_return_statement() elsif token_kind = LexerTokenKind.dot then result := _parse_label_declaration() elsif token_kind = LexerTokenKind.identifier then result := _parse_designator(); if _node_get_kind(result) <> NodeKind.call then result := _parse_assignment_statement(result) end end; return result end; proc _parse_statements(); var token_kind: Word; previous_statement: Word; next_statement: Word; first_statement: Word; begin _skip_empty_lines(); first_statement := _parse_statement(); previous_statement := first_statement; if previous_statement = 0 then goto parse_statements_end end; .parse_statement_loop; _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.semicolon then _lexer_skip_token(); _skip_empty_lines(); next_statement := _parse_statement(); _statement_set_next(previous_statement, next_statement); previous_statement := next_statement; if previous_statement <> 0 then goto parse_statement_loop end end; .parse_statements_end; _skip_empty_lines(); return first_statement end; proc _compile_statements(parser_node: Word); var current_statement: Word; begin current_statement := parser_node; .compile_statements_loop; if current_statement <> 0 then _compile_statement(current_statement); current_statement := _statement_get_next(current_statement); goto compile_statements_loop end end; proc _compile_if_statement(parser_node: Word); var current_node: Word; after_end_label: Word; condition_label: Word; begin after_end_label := label_counter; label_counter := label_counter + 1; current_node := _if_statement_get_conditionals(parser_node); _compile_conditional_statements(current_node, after_end_label); .compile_if_statement_loop; current_node := _conditional_statements_get_next(current_node); if current_node <> 0 then _compile_conditional_statements(current_node, after_end_label); goto compile_if_statement_loop end; current_node := _if_statement_get_else(parser_node); if current_node <> 0 then _compile_statements(current_node) end; _write_label(after_end_label); _write_z(":\n\0") end; proc _compile_statement(parser_node: Word); var statement_kind: Word; begin statement_kind := _node_get_kind(parser_node); if statement_kind = NodeKind.goto_statement then _compile_goto_statement(parser_node) elsif statement_kind = NodeKind.if_statement then _compile_if_statement(parser_node) elsif statement_kind = NodeKind.return_statement then _compile_return_statement(parser_node) elsif statement_kind = NodeKind.label_declaration then _compile_label_declaration(parser_node) elsif statement_kind = NodeKind.call then _compile_call(parser_node) elsif statement_kind = NodeKind.assignment_statement then _compile_assignment_statement(parser_node) end end; (** * Writes a regster name to the standard output. * * Parameters: * register_character - Register character. * register_number - Register number. *) proc _write_register(register_character: Word, register_number: Word); begin _write_c(register_character); _write_c(register_number + '0') end; proc _type_get_kind(this: Word); return this^ end; proc _type_set_kind(this: Word, value: Word); begin this^ := value end; proc _type_get_size(this: Word); begin this := this + 4; return this^ end; proc _type_set_size(this: Word, value: Word); begin this := this + 4; this^ := value end; proc _enumeration_type_get_members(this: Word); begin this := this + 8; return this^ end; proc _enumeration_type_set_members(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _enumeration_type_get_length(this: Word); begin this := this + 12; return this^ end; proc _enumeration_type_set_length(this: Word, value: Word); begin this := this + 12; this^ := value end; proc _enumeration_type_expression_size(); return 12 end; proc _enumeration_type_expression_get_members(this: Word); begin this := this + 4; return this^ end; proc _enumeration_type_expression_set_members(this: Word, value: Word); begin this := this + 4; this^ := value end; proc _enumeration_type_expression_get_length(this: Word); begin this := this + 8; return this^ end; proc _enumeration_type_expression_set_length(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _named_type_expression_size(); return 12 end; proc _named_type_expression_get_name(this: Word); begin this := this + 4; return this^ end; proc _named_type_expression_set_name(this: Word, value: Word); begin this := this + 4; this^ := value end; proc _named_type_expression_get_length(this: Word); begin this := this + 8; return this^ end; proc _named_type_expression_set_length(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _parse_enumeration_type_expression(); var token_kind: Word; enumeration_name: Word; name_length: Word; memory_start: Word; member_count: Word; result: Word; type_expression_size: Word; begin _lexer_skip_token(); memory_start := memory_free_pointer; member_count := 0; _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.right_paren then goto parse_enumeration_type_expression_end end; .parse_enumeration_type_expression_loop; member_count := member_count + 1; enumeration_name := _lexer_global_get_start(); name_length := _lexer_global_get_end() - enumeration_name; memory_free_pointer^ := enumeration_name; memory_free_pointer := memory_free_pointer + 4; memory_free_pointer^ := name_length; memory_free_pointer := memory_free_pointer + 4; (* Skip the identifier. *) _lexer_skip_token(); _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.comma then _lexer_skip_token(); _lexer_read_token(@token_kind); goto parse_enumeration_type_expression_loop end; .parse_enumeration_type_expression_end; _lexer_skip_token(); type_expression_size := _enumeration_type_expression_size(); result := _allocate(type_expression_size); _node_set_kind(result, NodeKind.enumeration_type_expression); _enumeration_type_expression_set_members(result, memory_start); _enumeration_type_expression_set_length(result, member_count); return result end; (** * Reads and creates enumeration type representation. * * record * type_kind: Word; * size: Word; * members: StringArray; * length: Word * end; * * Returns enumeration type description. *) proc _read_type_enumeration(parser_node: Word); var result: Word; memory_start: Word; member_count: Word; begin (* The resulting structure is 16 bytes long. *) result := _allocate(16); memory_start := _enumeration_type_expression_get_members(parser_node); member_count := _enumeration_type_expression_get_length(parser_node); _type_set_kind(result, TypeKind.enumeration); _type_set_size(result, 4); _enumeration_type_set_members(result, memory_start); _enumeration_type_set_length(result, member_count); return _type_info_create(result) end; proc _parse_named_type_expression(); var type_expression_size: Word; result: Word; type_name: Word; name_length: Word; begin type_expression_size := _named_type_expression_size(); result := _allocate(type_expression_size); _node_set_kind(result, NodeKind.named_type_expression); type_name := _lexer_global_get_start(); name_length := _lexer_global_get_end() - type_name; _named_type_expression_set_name(result, type_name); _named_type_expression_set_length(result, name_length); _lexer_skip_token(); return result end; proc _parse_type_expression(); var token_kind: Word; result: Word; begin result := 0; _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.identifier then result := _parse_named_type_expression() elsif token_kind = LexerTokenKind.left_paren then result := _parse_enumeration_type_expression() end; return result end; proc _read_type_expression(parser_node: Word); var token_kind: Word; type_name: Word; name_length: Word; result: Word; begin token_kind := _node_get_kind(parser_node); if token_kind = NodeKind.named_type_expression then type_name := _named_type_expression_get_name(parser_node); name_length := _named_type_expression_get_length(parser_node); result := _symbol_table_lookup(@symbol_table_global, type_name, name_length); result := _type_info_get_type(result) elsif token_kind = NodeKind.enumeration_type_expression then result := _read_type_enumeration(parser_node) end; return result end; proc _type_info_get_type(this: Word); begin this := this + 4; return this^ end; (** * Parameters: * parameter_index - Parameter index. *) proc _parameter_info_create(parameter_index: Word); var offset: Word; current_word: Word; result: Word; begin result := memory_free_pointer; current_word := result; current_word^ := InfoKind.parameter_info; current_word := current_word + 4; (* Calculate the stack offset: 88 - (4 * parameter_counter) *) offset := parameter_index * 4; current_word^ := 88 - offset; memory_free_pointer := current_word + 4; return result end; proc _parameter_info_get_offset(this: Word); begin this := this + 4; return this^ end; proc _type_info_create(type_representation: Word); var result: Word; current_word: Word; begin result := memory_free_pointer; current_word := result; current_word^ := InfoKind.type_info; current_word := current_word + 4; current_word^ := type_representation; memory_free_pointer := current_word + 4; return result end; (** * Parameters: * temporary_index - Parameter index. *) proc _temporary_info_create(temporary_index: Word); var offset: Word; current_word: Word; result: Word; begin result := memory_free_pointer; current_word := result; current_word^ := InfoKind.temporary_info; current_word := current_word + 4; (* Calculate the stack offset: 4 * variable_counter. *) current_word^ := temporary_index * 4; memory_free_pointer := current_word + 4; return result end; (** * Parameters: * parameter_index - Parameter index. *) proc _read_procedure_parameter(parser_node: Word, parameter_index: Word); var name_length: Word; info: Word; name_position: Word; begin name_position := _declaration_get_name(parser_node); name_length := _declaration_get_length(parser_node); _write_z("\tsw a\0"); _write_i(parameter_index); _write_z(", \0"); info := _parameter_info_create(parameter_index); _symbol_table_enter(@symbol_table_local, name_position, name_length, info); info := _parameter_info_get_offset(info); _write_i(info); _write_z("(sp)\n\0") end; (** * Parameters: * variable_index - Variable index. *) proc _read_procedure_temporary(parser_node: Word, variable_index: Word); var name_length: Word; info: Word; name_position: Word; begin name_position := _declaration_get_name(parser_node); name_length := _declaration_get_length(parser_node); info := _temporary_info_create(variable_index); _symbol_table_enter(@symbol_table_local, name_position, name_length, info) end; proc _read_procedure_temporaries(parser_node: Word); var temporary_counter: Word; begin temporary_counter := 0; .read_procedure_temporaries_loop; if parser_node = 0 then goto read_procedure_temporaries_end end; _read_procedure_temporary(parser_node, temporary_counter); temporary_counter := temporary_counter + 1; parser_node := _declaration_get_next(parser_node); goto read_procedure_temporaries_loop; .read_procedure_temporaries_end end; proc _declaration_get_next(this: Word); begin this := this + 4; return this^ end; proc _declaration_set_next(this: Word, value: Word); begin this := this + 4; this^ := value end; proc _declaration_get_name(this: Word); begin this := this + 8; return this^ end; proc _declaration_set_name(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _declaration_get_length(this: Word); begin this := this + 12; return this^ end; proc _declaration_set_length(this: Word, value: Word); begin this := this + 12; this^ := value end; (* Kind + next declaration pointer + argument list + procedure name + statement list pointer + temporary list pointer. *) proc _procedure_declaration_size(); return 28 end; proc _procedure_declaration_get_body(this: Word); begin this := this + 16; return this^ end; proc _procedure_declaration_set_body(this: Word, value: Word); begin this := this + 16; this^ := value end; proc _procedure_declaration_get_temporaries(this: Word); begin this := this + 20; return this^ end; proc _procedure_declaration_set_temporaries(this: Word, value: Word); begin this := this + 20; this^ := value end; proc _procedure_declaration_get_parameters(this: Word); begin this := this + 24; return this^ end; proc _procedure_declaration_set_parameters(this: Word, value: Word); begin this := this + 24; this^ := value end; proc _parse_procedure_declaration(); var name_pointer: Word; name_length: Word; token_kind: Word; result: Word; declaration_size: Word; parameter_head: Word; begin declaration_size := _procedure_declaration_size(); result := _allocate(declaration_size); _node_set_kind(result, NodeKind.procedure_declaration); _declaration_set_next(result, 0); (* Skip "proc ". *) _lexer_skip_token(); _lexer_read_token(@token_kind); name_pointer := _lexer_global_get_start(); name_length := _lexer_global_get_end() - name_pointer; _declaration_set_name(result, name_pointer); _declaration_set_length(result, name_length); (* Skip procedure name. *) _lexer_skip_token(); (* Skip open paren. *) _lexer_read_token(@token_kind); _lexer_skip_token(); parameter_head := 0; .parse_procedure_declaration_parameter; _lexer_read_token(@token_kind); if token_kind <> LexerTokenKind.right_paren then name_pointer := _parse_variable_declaration(); if parameter_head = 0 then parameter_head := name_pointer else _declaration_set_next(name_length, name_pointer) end; name_length := name_pointer; _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.comma then _lexer_skip_token(); goto parse_procedure_declaration_parameter end end; (* Skip close paren. *) _lexer_skip_token(); _procedure_declaration_set_parameters(result, parameter_head); (* Skip semicolon and newline. *) _lexer_read_token(@token_kind); _lexer_skip_token(); parameter_head := _parse_var_part(); _procedure_declaration_set_temporaries(result, parameter_head); (* Skip semicolon, "begin" and newline. *) _lexer_read_token(@token_kind); if token_kind = LexerTokenKind._begin then _lexer_skip_token(); parameter_head := _parse_statements() elsif token_kind = LexerTokenKind._return then parameter_head := _parse_return_statement() end; _procedure_declaration_set_body(result, parameter_head); (* Skip the "end" keyword. *) _lexer_read_token(@token_kind); _lexer_skip_token(); return result end; proc _compile_procedure_declaration(parser_node: Word); var name_pointer: Word; name_length: Word; parameter_counter: Word; current_parameter: Word; begin (* Clear local symbol table. *) symbol_table_local := 0; name_pointer := _declaration_get_name(parser_node); name_length := _declaration_get_length(parser_node); (* Write .type _procedure_name, @function. *) _write_z(".type \0"); _write_s(name_pointer, name_length); _write_z(", @function\n\0"); (* Write procedure label, _procedure_name: *) _write_s(name_pointer, name_length); _write_z(":\n\0"); (* Write the prologue. *) _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); current_parameter := _procedure_declaration_get_parameters(parser_node); parameter_counter := 0; .compile_procedure_declaration_parameter; if current_parameter = 0 then goto compile_procedure_declaration_end end; _read_procedure_parameter(current_parameter, parameter_counter); parameter_counter := parameter_counter + 1; current_parameter := _declaration_get_next(current_parameter); goto compile_procedure_declaration_parameter; .compile_procedure_declaration_end; current_parameter := _procedure_declaration_get_temporaries(parser_node); _read_procedure_temporaries(current_parameter); current_parameter := _procedure_declaration_get_body(parser_node); _compile_statements(current_parameter); (* Write the epilogue. *) _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0") end; proc _parse_procedures(); var parser_node: Word; result: Word; current_declaration: Word; token_kind: Word; begin result := 0; .parse_procedures_loop; _skip_empty_lines(); _lexer_read_token(@token_kind); if token_kind = LexerTokenKind._proc then parser_node := _parse_procedure_declaration(); if result = 0 then result := parser_node else _declaration_set_next(current_declaration, parser_node) end; current_declaration := parser_node; (* Skip semicolon. *) _lexer_read_token(@token_kind); _lexer_skip_token(); goto parse_procedures_loop end; return result end; proc _compile_procedures(parser_node: Word); var result: Word; begin .compile_procedures_loop; if parser_node = 0 then goto compile_procedures_end end; _compile_procedure_declaration(parser_node); parser_node := _declaration_get_next(parser_node); goto compile_procedures_loop; .compile_procedures_end end; (** * Skips comments. *) proc _skip_empty_lines(); var token_kind: Word; begin .skip_empty_lines_rerun; _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.comment then _lexer_skip_token(); goto skip_empty_lines_rerun end end; proc _type_declaration_size(); return 20 end; proc _type_declaration_get_type(this: Word); begin this := this + 16; return this^ end; proc _type_declaration_set_type(this: Word, value: Word); begin this := this + 16; this^ := value end; proc _parse_type_declaration(); var token_kind: Word; type_name: Word; name_length: Word; parser_node: Word; result: Word; declaration_size: Word; begin _lexer_read_token(@token_kind); type_name := _lexer_global_get_start(); name_length := _lexer_global_get_end() - type_name; _lexer_skip_token(); _lexer_read_token(@token_kind); _lexer_skip_token(); parser_node := _parse_type_expression(); declaration_size := _type_declaration_size(); result := _allocate(declaration_size); _node_set_kind(result, NodeKind.type_declaration); _declaration_set_next(result, 0); _declaration_set_name(result, type_name); _declaration_set_length(result, name_length); _type_declaration_set_type(result, parser_node); _lexer_read_token(@token_kind); _lexer_skip_token(); return result end; proc _read_type_declaration(parser_node: Word); var type_name: Word; name_length: Word; type_info: Word; begin type_name := _declaration_get_name(parser_node); name_length := _declaration_get_length(parser_node); parser_node := _type_declaration_get_type(parser_node); type_info := _read_type_expression(parser_node); _symbol_table_enter(@symbol_table_global, type_name, name_length, type_info) end; proc _parse_type_part(); var token_kind: Word; parser_node: Word; result: Word; current_declaration: Word; begin result := 0; _skip_empty_lines(); _lexer_read_token(@token_kind); if token_kind <> LexerTokenKind._type then goto parse_type_part_end end; _lexer_skip_token(); .parse_type_part_loop; _skip_empty_lines(); _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.identifier then parser_node := _parse_type_declaration(); if result = 0 then result := parser_node else _declaration_set_next(current_declaration, parser_node) end; current_declaration := parser_node; goto parse_type_part_loop end; .parse_type_part_end; return result end; proc _read_type_part(parser_node: Word); begin .read_type_part_loop; if parser_node = 0 then goto read_type_part_end end; _read_type_declaration(parser_node); parser_node := _declaration_get_next(parser_node); goto read_type_part_loop; .read_type_part_end end; proc _variable_declaration_size(); return 20 end; proc _variable_declaration_get_type(this: Word); begin this := this + 16; return this^ end; proc _variable_declaration_set_type(this: Word, value: Word); begin this := this + 16; this^ := value end; proc _parse_variable_declaration(); var token_kind: Word; name: Word; name_length: Word; variable_type: Word; result: Word; declaration_size: Word; begin _lexer_read_token(@token_kind); name := _lexer_global_get_start(); name_length := _lexer_global_get_end() - name; (* Skip the variable name and colon with the type. *) _lexer_skip_token(); _lexer_read_token(@token_kind); _lexer_skip_token(); variable_type := _parse_type_expression(); declaration_size := _variable_declaration_size(); result := _allocate(declaration_size); _node_set_kind(result, NodeKind.variable_declaration); _declaration_set_next(result, 0); _declaration_set_name(result, name); _declaration_set_length(result, name_length); _variable_declaration_set_type(result, variable_type); return result end; proc _compile_variable_declaration(parser_tree: Word); var name: Word; name_length: Word; token_kind: Word; variable_type: Word; begin name := _declaration_get_name(parser_tree); name_length := _declaration_get_length(parser_tree); variable_type := _variable_declaration_get_type(parser_tree); _write_z(".type \0"); _write_s(name, name_length); _write_z(", @object\n\0"); _write_s(name, name_length); _write_c(':'); _lexer_read_token(@token_kind); name := _named_type_expression_get_name(variable_type); name_length := _named_type_expression_get_length(variable_type); if _lexer_compare_keyword("Array", 5, name, name_length) then (* Else we assume this is a zeroed 409600 bytes big array. *) _write_z(" .zero 409600\0") else _write_z(" .word 0\n\0") end; _write_c('\n') end; proc _parse_var_part(); var result: Word; token_kind: Word; variable_node: Word; current_declaration: Word; begin result := 0; _lexer_read_token(@token_kind); if token_kind <> LexerTokenKind._var then goto parse_var_part_end end; (* Skip "var". *) _lexer_skip_token(); .parse_var_part_loop; _skip_empty_lines(); _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.identifier then variable_node := _parse_variable_declaration(); (* Skip semicolon. *) _lexer_read_token(@token_kind); _lexer_skip_token(); if result = 0 then result := variable_node else _declaration_set_next(current_declaration, variable_node) end; current_declaration := variable_node; goto parse_var_part_loop end; .parse_var_part_end; return result end; proc _compile_var_part(parser_node: Word); begin if parser_node = 0 then goto compile_var_part_end end; _write_z(".section .data\n\0"); .compile_var_part_loop; _compile_variable_declaration(parser_node); parser_node := _declaration_get_next(parser_node); if parser_node <> 0 then goto compile_var_part_loop end; .compile_var_part_end end; proc _module_declaration_size(); return 16 end; proc _module_declaration_get_types(this: Word); begin this := this + 4; return this^ end; proc _module_declaration_set_types(this: Word, value: Word); begin this := this + 4; this^ := value end; proc _module_declaration_get_globals(this: Word); begin this := this + 8; return this^ end; proc _module_declaration_set_globals(this: Word, value: Word); begin this := this + 8; this^ := value end; proc _module_declaration_get_procedures(this: Word); begin this := this + 12; return this^ end; proc _module_declaration_set_procedures(this: Word, value: Word); begin this := this + 12; this^ := value end; proc _parse_module_declaration(); var parser_node: Word; declaration_size: Word; result: Word; begin declaration_size := _module_declaration_size(); result := _allocate(declaration_size); _node_set_kind(result, NodeKind.module_declaration); parser_node := _parse_type_part(); _module_declaration_set_types(result, parser_node); parser_node := _parse_var_part(); _module_declaration_set_globals(result, parser_node); parser_node := _parse_procedures(); _module_declaration_set_procedures(result, parser_node); return result end; (** * Process the source code and print the generated code. *) proc _compile_module_declaration(parser_node: Word); var current_part: Word; compiler_strings_copy: Word; compiler_strings_end: Word; current_byte: Word; begin _write_z(".globl _start\n\n\0"); current_part := _module_declaration_get_types(parser_node); _read_type_part(current_part); current_part := _module_declaration_get_globals(parser_node); _compile_var_part(current_part); _write_z(".section .text\n\n\0"); _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); current_part := _module_declaration_get_procedures(parser_node); _compile_procedures(current_part); _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); _write_c('"'); compiler_strings_copy := @compiler_strings; compiler_strings_end := compiler_strings_position; .compile_module_declaration_loop; if compiler_strings_copy < compiler_strings_end then current_byte := _load_byte(compiler_strings_copy); compiler_strings_copy := compiler_strings_copy + 1; _write_c(current_byte); goto compile_module_declaration_loop end; _write_c('"'); _write_c('\n') end; proc _compile(); var parser_node: Word; begin parser_node := _parse_module_declaration(); _compile_module_declaration(parser_node) end; (** * Terminates the program. a0 contains the return code. * * Parameters: * a0 - Status code. *) proc _exit(status: Word); begin _syscall(status, 0, 0, 0, 0, 0, 93) end; (** * Looks for a symbol in the given symbol table. * * Parameters: * symbol_table - Symbol table. * symbol_name - Symbol name pointer. * name_length - Symbol name length. * * Returns the symbol pointer or 0 in a0. *) proc _symbol_table_lookup(symbol_table: Word, symbol_name: Word, name_length: Word); var result: Word; symbol_table_length: Word; current_name: Word; current_length: Word; begin result := 0; (* The first word in the symbol table is its length, get it. *) symbol_table_length := symbol_table^; (* Go to the first symbol position. *) symbol_table := symbol_table + 4; .symbol_table_lookup_loop; if symbol_table_length = 0 then goto symbol_table_lookup_end end; (* Symbol name pointer and length. *) current_name := symbol_table^; current_length := symbol_table + 4; current_length := current_length^; (* If lengths don't match, exit and return nil. *) if name_length <> current_length then goto symbol_table_lookup_repeat end; (* If names don't match, exit and return nil. *) if _memcmp(symbol_name, current_name, name_length) then goto symbol_table_lookup_repeat end; (* Otherwise, the symbol is found. *) result := symbol_table + 8; result := result^; goto symbol_table_lookup_end; .symbol_table_lookup_repeat; symbol_table := symbol_table + 12; symbol_table_length := symbol_table_length - 1; goto symbol_table_lookup_loop; .symbol_table_lookup_end; return result end; (** * Inserts a symbol into the table. * * Parameters: * symbol_table - Symbol table. * symbol_name - Symbol name pointer. * name_length - Symbol name length. * symbol - Symbol pointer. *) proc _symbol_table_enter(symbol_table: Word, symbol_name: Word, name_length: Word, symbol: Word); var table_length: Word; symbol_pointer: Word; begin (* The first word in the symbol table is its length, get it. *) table_length := symbol_table^; (* Calculate the offset for the new symbol. *) symbol_pointer := table_length * 12; symbol_pointer := symbol_pointer + 4; symbol_pointer := symbol_table + symbol_pointer; symbol_pointer^ := symbol_name; symbol_pointer := symbol_pointer + 4; symbol_pointer^ := name_length; symbol_pointer := symbol_pointer + 4; symbol_pointer^ := symbol; (* Increment the symbol table length. *) table_length := table_length + 1; symbol_table^ := table_length end; proc _symbol_table_build(); var current_info: Word; current_type: Word; begin (* Set the table length to 0. *) symbol_table_global := 0; current_type := _allocate(8); _type_set_kind(current_type, TypeKind.primitive); _type_set_size(current_type, 4); (* Enter built-in symbols. *) current_info := _type_info_create(current_type); _symbol_table_enter(@symbol_table_global, "Word", 4, current_info); current_info := _type_info_create(current_type); _symbol_table_enter(@symbol_table_global, "Array", 5, current_info) end; (** * Assigns some value to at array index. * * Parameters: * array - Array pointer. * index - Index (word offset into the array). * data - Data to assign. *) proc _assign_at(array: Word, index: Word, data: Word); var target: Word; begin target := index - 1; target := target * 4; target := array + target; target^ := data end; proc _get_at(array: Word, index: Word); var target: Word; begin target := index - 1; target := target * 4; target := array + target; return target^ end; (** * Initializes the array with character classes. *) proc _lexer_classifications(); var code: Word; begin _assign_at(@classification, 1, LexerClass.eof); _assign_at(@classification, 2, LexerClass.invalid); _assign_at(@classification, 3, LexerClass.invalid); _assign_at(@classification, 4, LexerClass.invalid); _assign_at(@classification, 5, LexerClass.invalid); _assign_at(@classification, 6, LexerClass.invalid); _assign_at(@classification, 7, LexerClass.invalid); _assign_at(@classification, 8, LexerClass.invalid); _assign_at(@classification, 9, LexerClass.invalid); _assign_at(@classification, 10, LexerClass.space); _assign_at(@classification, 11, LexerClass.space); _assign_at(@classification, 12, LexerClass.invalid); _assign_at(@classification, 13, LexerClass.invalid); _assign_at(@classification, 14, LexerClass.space); _assign_at(@classification, 15, LexerClass.invalid); _assign_at(@classification, 16, LexerClass.invalid); _assign_at(@classification, 17, LexerClass.invalid); _assign_at(@classification, 18, LexerClass.invalid); _assign_at(@classification, 19, LexerClass.invalid); _assign_at(@classification, 20, LexerClass.invalid); _assign_at(@classification, 21, LexerClass.invalid); _assign_at(@classification, 22, LexerClass.invalid); _assign_at(@classification, 23, LexerClass.invalid); _assign_at(@classification, 24, LexerClass.invalid); _assign_at(@classification, 25, LexerClass.invalid); _assign_at(@classification, 26, LexerClass.invalid); _assign_at(@classification, 27, LexerClass.invalid); _assign_at(@classification, 28, LexerClass.invalid); _assign_at(@classification, 29, LexerClass.invalid); _assign_at(@classification, 30, LexerClass.invalid); _assign_at(@classification, 31, LexerClass.invalid); _assign_at(@classification, 32, LexerClass.invalid); _assign_at(@classification, 33, LexerClass.space); _assign_at(@classification, 34, LexerClass.single); _assign_at(@classification, 35, LexerClass.double_quote); _assign_at(@classification, 36, LexerClass.other); _assign_at(@classification, 37, LexerClass.other); _assign_at(@classification, 38, LexerClass.single); _assign_at(@classification, 39, LexerClass.single); _assign_at(@classification, 40, LexerClass.single_quote); _assign_at(@classification, 41, LexerClass.left_paren); _assign_at(@classification, 42, LexerClass.right_paren); _assign_at(@classification, 43, LexerClass.asterisk); _assign_at(@classification, 44, LexerClass.single); _assign_at(@classification, 45, LexerClass.single); _assign_at(@classification, 46, LexerClass.minus); _assign_at(@classification, 47, LexerClass.dot); _assign_at(@classification, 48, LexerClass.single); _assign_at(@classification, 49, LexerClass.zero); _assign_at(@classification, 50, LexerClass.digit); _assign_at(@classification, 51, LexerClass.digit); _assign_at(@classification, 52, LexerClass.digit); _assign_at(@classification, 53, LexerClass.digit); _assign_at(@classification, 54, LexerClass.digit); _assign_at(@classification, 55, LexerClass.digit); _assign_at(@classification, 56, LexerClass.digit); _assign_at(@classification, 57, LexerClass.digit); _assign_at(@classification, 58, LexerClass.digit); _assign_at(@classification, 59, LexerClass.colon); _assign_at(@classification, 60, LexerClass.single); _assign_at(@classification, 61, LexerClass.less); _assign_at(@classification, 62, LexerClass.equals); _assign_at(@classification, 63, LexerClass.greater); _assign_at(@classification, 64, LexerClass.other); _assign_at(@classification, 65, LexerClass.single); _assign_at(@classification, 66, LexerClass.alpha); _assign_at(@classification, 67, LexerClass.alpha); _assign_at(@classification, 68, LexerClass.alpha); _assign_at(@classification, 69, LexerClass.alpha); _assign_at(@classification, 70, LexerClass.alpha); _assign_at(@classification, 71, LexerClass.alpha); _assign_at(@classification, 72, LexerClass.alpha); _assign_at(@classification, 73, LexerClass.alpha); _assign_at(@classification, 74, LexerClass.alpha); _assign_at(@classification, 75, LexerClass.alpha); _assign_at(@classification, 76, LexerClass.alpha); _assign_at(@classification, 77, LexerClass.alpha); _assign_at(@classification, 78, LexerClass.alpha); _assign_at(@classification, 79, LexerClass.alpha); _assign_at(@classification, 80, LexerClass.alpha); _assign_at(@classification, 81, LexerClass.alpha); _assign_at(@classification, 82, LexerClass.alpha); _assign_at(@classification, 83, LexerClass.alpha); _assign_at(@classification, 84, LexerClass.alpha); _assign_at(@classification, 85, LexerClass.alpha); _assign_at(@classification, 86, LexerClass.alpha); _assign_at(@classification, 87, LexerClass.alpha); _assign_at(@classification, 88, LexerClass.alpha); _assign_at(@classification, 89, LexerClass.alpha); _assign_at(@classification, 90, LexerClass.alpha); _assign_at(@classification, 91, LexerClass.alpha); _assign_at(@classification, 92, LexerClass.single); _assign_at(@classification, 93, LexerClass.backslash); _assign_at(@classification, 94, LexerClass.single); _assign_at(@classification, 95, LexerClass.single); _assign_at(@classification, 96, LexerClass.alpha); _assign_at(@classification, 97, LexerClass.other); _assign_at(@classification, 98, LexerClass.hex); _assign_at(@classification, 99, LexerClass.hex); _assign_at(@classification, 100, LexerClass.hex); _assign_at(@classification, 101, LexerClass.hex); _assign_at(@classification, 102, LexerClass.hex); _assign_at(@classification, 103, LexerClass.hex); _assign_at(@classification, 104, LexerClass.alpha); _assign_at(@classification, 105, LexerClass.alpha); _assign_at(@classification, 106, LexerClass.alpha); _assign_at(@classification, 107, LexerClass.alpha); _assign_at(@classification, 108, LexerClass.alpha); _assign_at(@classification, 109, LexerClass.alpha); _assign_at(@classification, 110, LexerClass.alpha); _assign_at(@classification, 111, LexerClass.alpha); _assign_at(@classification, 112, LexerClass.alpha); _assign_at(@classification, 113, LexerClass.alpha); _assign_at(@classification, 114, LexerClass.alpha); _assign_at(@classification, 115, LexerClass.alpha); _assign_at(@classification, 116, LexerClass.alpha); _assign_at(@classification, 117, LexerClass.alpha); _assign_at(@classification, 118, LexerClass.alpha); _assign_at(@classification, 119, LexerClass.alpha); _assign_at(@classification, 120, LexerClass.alpha); _assign_at(@classification, 121, LexerClass.x); _assign_at(@classification, 122, LexerClass.alpha); _assign_at(@classification, 123, LexerClass.alpha); _assign_at(@classification, 124, LexerClass.other); _assign_at(@classification, 125, LexerClass.single); _assign_at(@classification, 126, LexerClass.other); _assign_at(@classification, 127, LexerClass.single); _assign_at(@classification, 128, LexerClass.invalid); code := 129; (* Set the remaining 129 - 256 bytes to transitionClassOther. *) .create_classification_loop; _assign_at(@classification, code, LexerClass.other); code := code + 1; if code < 257 then goto create_classification_loop end end; proc _lexer_get_transition(current_state: Word, character_class: Word); var transition_table: Word; row_position: Word; column_position: Word; target: Word; begin (* Each state is 8 bytes long (2 words: action and next state). There are 22 character classes, so a transition row 8 * 22 = 176 bytes long. *) row_position := current_state - 1; row_position := row_position * 176; column_position := character_class - 1; column_position := column_position * 8; target := _lexer_get_transition_table(); target := target + row_position; return target + column_position end; (** * Parameters: * current_state - First index into transitions table. * character_class - Second index into transitions table. * action - Action to assign. * next_state - Next state to assign. *) proc _lexer_set_transition(current_state: Word, character_class: Word, action: Word, next_state: Word); var transition: Word; begin transition := _lexer_get_transition(current_state, character_class); _lexer_transition_set_action(transition, action); _lexer_transition_set_state(transition, next_state) end; (* Sets same action and state transition for all character classes in one transition row. *) (** * Parameters: * current_state - Current state (Transition state enumeration). * default_action - Default action (Callback). * next_state - Next state (Transition state enumeration). *) proc _lexer_default_transition(current_state: Word, default_action: Word, next_state: Word); begin _lexer_set_transition(current_state, LexerClass.invalid, default_action, next_state); _lexer_set_transition(current_state, LexerClass.digit, default_action, next_state); _lexer_set_transition(current_state, LexerClass.alpha, default_action, next_state); _lexer_set_transition(current_state, LexerClass.space, default_action, next_state); _lexer_set_transition(current_state, LexerClass.colon, default_action, next_state); _lexer_set_transition(current_state, LexerClass.equals, default_action, next_state); _lexer_set_transition(current_state, LexerClass.left_paren, default_action, next_state); _lexer_set_transition(current_state, LexerClass.right_paren, default_action, next_state); _lexer_set_transition(current_state, LexerClass.asterisk, default_action, next_state); _lexer_set_transition(current_state, LexerClass.backslash, default_action, next_state); _lexer_set_transition(current_state, LexerClass.single, default_action, next_state); _lexer_set_transition(current_state, LexerClass.hex, default_action, next_state); _lexer_set_transition(current_state, LexerClass.zero, default_action, next_state); _lexer_set_transition(current_state, LexerClass.x, default_action, next_state); _lexer_set_transition(current_state, LexerClass.eof, default_action, next_state); _lexer_set_transition(current_state, LexerClass.dot, default_action, next_state); _lexer_set_transition(current_state, LexerClass.minus, default_action, next_state); _lexer_set_transition(current_state, LexerClass.single_quote, default_action, next_state); _lexer_set_transition(current_state, LexerClass.double_quote, default_action, next_state); _lexer_set_transition(current_state, LexerClass.greater, default_action, next_state); _lexer_set_transition(current_state, LexerClass.less, default_action, next_state); _lexer_set_transition(current_state, LexerClass.other, default_action, next_state) end; (** * The transition table describes transitions from one state to another, given * a symbol (character class). * * The table has m rows and n columns, where m is the amount of states and n is * the amount of classes. So given the current state and a classified character * the table can be used to look up the next state. *) proc _lexer_transitions(); begin (* Start state. *) _lexer_set_transition(LexerState.start, LexerClass.invalid, LexerAction.none, LexerState.finish); _lexer_set_transition(LexerState.start, LexerClass.digit, LexerAction.accumulate, LexerState.decimal); _lexer_set_transition(LexerState.start, LexerClass.alpha, LexerAction.accumulate, LexerState.identifier); _lexer_set_transition(LexerState.start, LexerClass.space, LexerAction.skip, LexerState.start); _lexer_set_transition(LexerState.start, LexerClass.colon, LexerAction.accumulate, LexerState.colon); _lexer_set_transition(LexerState.start, LexerClass.equals, LexerAction.single, LexerState.finish); _lexer_set_transition(LexerState.start, LexerClass.left_paren, LexerAction.accumulate, LexerState.left_paren); _lexer_set_transition(LexerState.start, LexerClass.right_paren, LexerAction.single, LexerState.finish); _lexer_set_transition(LexerState.start, LexerClass.asterisk, LexerAction.single, LexerState.finish); _lexer_set_transition(LexerState.start, LexerClass.backslash, LexerAction.none, LexerState.finish); _lexer_set_transition(LexerState.start, LexerClass.single, LexerAction.single, LexerState.finish); _lexer_set_transition(LexerState.start, LexerClass.hex, LexerAction.accumulate, LexerState.identifier); _lexer_set_transition(LexerState.start, LexerClass.zero, LexerAction.accumulate, LexerState.leading_zero); _lexer_set_transition(LexerState.start, LexerClass.x, LexerAction.accumulate, LexerState.identifier); _lexer_set_transition(LexerState.start, LexerClass.eof, LexerAction.eof, LexerState.finish); _lexer_set_transition(LexerState.start, LexerClass.dot, LexerAction.single, LexerState.finish); _lexer_set_transition(LexerState.start, LexerClass.minus, LexerAction.accumulate, LexerState.minus); _lexer_set_transition(LexerState.start, LexerClass.single_quote, LexerAction.accumulate, LexerState.character); _lexer_set_transition(LexerState.start, LexerClass.double_quote, LexerAction.accumulate, LexerState.string); _lexer_set_transition(LexerState.start, LexerClass.greater, LexerAction.accumulate, LexerState.greater); _lexer_set_transition(LexerState.start, LexerClass.less, LexerAction.accumulate, LexerState.less); _lexer_set_transition(LexerState.start, LexerClass.other, LexerAction.none, LexerState.finish); (* Colon state. *) _lexer_default_transition(LexerState.colon, LexerAction.finalize, LexerState.finish); _lexer_set_transition(LexerState.colon, LexerClass.equals, LexerAction.composite, LexerState.finish); (* Identifier state. *) _lexer_default_transition(LexerState.identifier, LexerAction.key_id, LexerState.finish); _lexer_set_transition(LexerState.identifier, LexerClass.digit, LexerAction.accumulate, LexerState.identifier); _lexer_set_transition(LexerState.identifier, LexerClass.alpha, LexerAction.accumulate, LexerState.identifier); _lexer_set_transition(LexerState.identifier, LexerClass.hex, LexerAction.accumulate, LexerState.identifier); _lexer_set_transition(LexerState.identifier, LexerClass.zero, LexerAction.accumulate, LexerState.identifier); _lexer_set_transition(LexerState.identifier, LexerClass.x, LexerAction.accumulate, LexerState.identifier); (* Decimal state. *) _lexer_default_transition(LexerState.decimal, LexerAction.integer, LexerState.finish); _lexer_set_transition(LexerState.decimal, LexerClass.digit, LexerAction.accumulate, LexerState.decimal); _lexer_set_transition(LexerState.decimal, LexerClass.alpha, LexerAction.none, LexerState.finish); _lexer_set_transition(LexerState.decimal, LexerClass.hex, LexerAction.none, LexerState.finish); _lexer_set_transition(LexerState.decimal, LexerClass.zero, LexerAction.accumulate, LexerState.decimal); _lexer_set_transition(LexerState.decimal, LexerClass.x, LexerAction.none, LexerState.finish); (* Leading zero. *) _lexer_default_transition(LexerState.leading_zero, LexerAction.integer, LexerState.finish); _lexer_set_transition(LexerState.leading_zero, LexerClass.digit, LexerAction.none, LexerState.finish); _lexer_set_transition(LexerState.leading_zero, LexerClass.alpha, LexerAction.none, LexerState.finish); _lexer_set_transition(LexerState.leading_zero, LexerClass.hex, LexerAction.none, LexerState.finish); _lexer_set_transition(LexerState.leading_zero, LexerClass.zero, LexerAction.none, LexerState.finish); _lexer_set_transition(LexerState.leading_zero, LexerClass.x, LexerAction.none, LexerState.dot); (* Greater state. *) _lexer_default_transition(LexerState.greater, LexerAction.finalize, LexerState.finish); _lexer_set_transition(LexerState.greater, LexerClass.equals, LexerAction.composite, LexerState.finish); (* Minus state. *) _lexer_default_transition(LexerState.minus, LexerAction.finalize, LexerState.finish); _lexer_set_transition(LexerState.minus, LexerClass.greater, LexerAction.composite, LexerState.finish); (* Left paren state. *) _lexer_default_transition(LexerState.left_paren, LexerAction.finalize, LexerState.finish); _lexer_set_transition(LexerState.left_paren, LexerClass.asterisk, LexerAction.accumulate, LexerState.comment); (* Less state. *) _lexer_default_transition(LexerState.less, LexerAction.finalize, LexerState.finish); _lexer_set_transition(LexerState.less, LexerClass.equals, LexerAction.composite, LexerState.finish); _lexer_set_transition(LexerState.less, LexerClass.greater, LexerAction.composite, LexerState.finish); (* Hexadecimal after 0x. *) _lexer_default_transition(LexerState.dot, LexerAction.finalize, LexerState.finish); _lexer_set_transition(LexerState.dot, LexerClass.dot, LexerAction.composite, LexerState.finish); (* Comment. *) _lexer_default_transition(LexerState.comment, LexerAction.accumulate, LexerState.comment); _lexer_set_transition(LexerState.comment, LexerClass.asterisk, LexerAction.accumulate, LexerState.closing_comment); _lexer_set_transition(LexerState.comment, LexerClass.eof, LexerAction.none, LexerState.finish); (* Closing comment. *) _lexer_default_transition(LexerState.closing_comment, LexerAction.accumulate, LexerState.comment); _lexer_set_transition(LexerState.closing_comment, LexerClass.invalid, LexerAction.none, LexerState.finish); _lexer_set_transition(LexerState.closing_comment, LexerClass.right_paren, LexerAction.delimited, LexerState.finish); _lexer_set_transition(LexerState.closing_comment, LexerClass.asterisk, LexerAction.accumulate, LexerState.closing_comment); _lexer_set_transition(LexerState.closing_comment, LexerClass.eof, LexerAction.none, LexerState.finish); (* Character. *) _lexer_default_transition(LexerState.character, LexerAction.accumulate, LexerState.character); _lexer_set_transition(LexerState.character, LexerClass.invalid, LexerAction.none, LexerState.finish); _lexer_set_transition(LexerState.character, LexerClass.eof, LexerAction.none, LexerState.finish); _lexer_set_transition(LexerState.character, LexerClass.single_quote, LexerAction.delimited, LexerState.finish); _lexer_set_transition(LexerState.character, LexerClass.backslash, LexerAction.accumulate, LexerState.character_escape); (* Escape sequence in a character. *) _lexer_default_transition(LexerState.character_escape, LexerAction.accumulate, LexerState.character); _lexer_set_transition(LexerState.character_escape, LexerClass.invalid, LexerAction.none, LexerState.finish); _lexer_set_transition(LexerState.character_escape, LexerClass.eof, LexerAction.none, LexerState.finish); (* String. *) _lexer_default_transition(LexerState.string, LexerAction.accumulate, LexerState.string); _lexer_set_transition(LexerState.string, LexerClass.invalid, LexerAction.none, LexerState.finish); _lexer_set_transition(LexerState.string, LexerClass.eof, LexerAction.none, LexerState.finish); _lexer_set_transition(LexerState.string, LexerClass.double_quote, LexerAction.delimited, LexerState.finish); _lexer_set_transition(LexerState.string, LexerClass.backslash, LexerAction.accumulate, LexerState.string_escape); (* Escape sequence in a string. *) _lexer_default_transition(LexerState.string_escape, LexerAction.accumulate, LexerState.string); _lexer_set_transition(LexerState.string_escape, LexerClass.invalid, LexerAction.none, LexerState.finish); _lexer_set_transition(LexerState.string_escape, LexerClass.eof, LexerAction.none, LexerState.finish) end; (** * Transition table is saved after character classification table. * Each character entry is 1 word long and there are 256 characters. * 1024 = 256 * 4 *) proc _lexer_get_transition_table(); return @classification + 1024 end; (** * Lexer state is saved after the transition tables. * Each transition table entry is 8 bytes long. The table has 16 rows (transition states) * and 22 columns (character classes), so 2992 = 8 * 17 * 22. *) proc _lexer_global_state(); var result: Word; begin result := _lexer_get_transition_table(); return result + 2992 end; (** * Gets pointer to the token start. *) proc _lexer_global_get_start(); var target: Word; begin target := _lexer_global_state(); target := target + 4; return target^ end; (** * Sets pointer to the token start. *) proc _lexer_global_set_start(new_start: Word); var target: Word; begin target := _lexer_global_state(); target := target + 4; target^ := new_start end; (** * Gets pointer to the token end. *) proc _lexer_global_get_end(); var target: Word; begin target := _lexer_global_state(); target := target + 8; return target^ end; (** * Sets pointer to the token end. *) proc _lexer_global_set_end(new_start: Word); var target: Word; begin target := _lexer_global_state(); target := target + 8; target^ := new_start end; proc _lexer_transition_get_action(this: Word); return this^ end; proc _lexer_transition_set_action(this: Word, value: Word); begin this^ := value end; proc _lexer_transition_get_state(this: Word); begin this := this + 4; return this^ end; proc _lexer_transition_set_state(this: Word, value: Word); begin this := this + 4; this^ := value end; (** * Resets the lexer state for reading the next token. *) proc _lexer_reset(); var state: Word; begin (* Transition start state is 1. *) state := _lexer_global_state(); state^ := LexerState.start; state := _lexer_global_get_start(); _lexer_global_set_end(state) end; (** * One time lexer initialization. *) proc _lexer_initialize(); begin _lexer_classifications(); _lexer_transitions(); _lexer_global_set_start(@source_code); _lexer_global_set_end(@source_code) end; proc _lexer_next_transition(); var current_character: Word; character_class: Word; current_state: Word; begin current_character := _lexer_global_get_end(); current_character := _load_byte(current_character); character_class := _get_at(@classification, current_character + 1); current_state := _lexer_global_state(); current_state := current_state^; return _lexer_get_transition(current_state, character_class) end; proc _lexer_compare_keyword(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word); var result: Word; begin result := 0; if lhs_length = rhs_length then result := _memcmp(lhs_pointer, rhs_pointer, lhs_length); result := result = 0 end; return result end; proc _lexer_classify_keyword(position_start: Word, position_end: Word); var result: Word; token_length: Word; begin result := LexerTokenKind.identifier; token_length := position_end - position_start; if _lexer_compare_keyword(position_start, token_length, "const", 5) then result := LexerTokenKind._const elsif _lexer_compare_keyword(position_start, token_length, "var", 3) then result := LexerTokenKind._var elsif _lexer_compare_keyword(position_start, token_length, "proc", 4) then result := LexerTokenKind._proc elsif _lexer_compare_keyword(position_start, token_length, "type", 4) then result := LexerTokenKind._type elsif _lexer_compare_keyword(position_start, token_length, "begin", 5) then result := LexerTokenKind._begin elsif _lexer_compare_keyword(position_start, token_length, "end", 3) then result := LexerTokenKind._end elsif _lexer_compare_keyword(position_start, token_length, "return", 6) then result := LexerTokenKind._return elsif _lexer_compare_keyword(position_start, token_length, "goto", 4) then result := LexerTokenKind._goto elsif _lexer_compare_keyword(position_start, token_length, "if", 2) then result := LexerTokenKind._if elsif _lexer_compare_keyword(position_start, token_length, "while", 5) then result := LexerTokenKind._while elsif _lexer_compare_keyword(position_start, token_length, "then", 4) then result := LexerTokenKind._then elsif _lexer_compare_keyword(position_start, token_length, "else", 4) then result := LexerTokenKind._else elsif _lexer_compare_keyword(position_start, token_length, "elsif", 5) then result := LexerTokenKind._elsif elsif _lexer_compare_keyword(position_start, token_length, "or", 2) then result := LexerTokenKind._or elsif _lexer_compare_keyword(position_start, token_length, "xor", 2) then result := LexerTokenKind._xor end; return result end; proc _lexer_classify_finalize(start_position: Word); var character: Word; result: Word; begin result := 0; character := _load_byte(start_position); if character = ':' then result := LexerTokenKind.colon elsif character = '.' then result := LexerTokenKind.dot elsif character = '(' then result := LexerTokenKind.left_paren elsif character = '-' then result := LexerTokenKind.minus elsif character = '<' then result := LexerTokenKind.less_than elsif character = '>' then result := LexerTokenKind.greater_than end; return result end; proc _lexer_classify_single(start_position: Word); var character: Word; result: Word; begin result := 0; character := _load_byte(start_position); if character = ';' then result := LexerTokenKind.semicolon elsif character = ',' then result := LexerTokenKind.comma elsif character = ')' then result := LexerTokenKind.right_paren elsif character = '@' then result := LexerTokenKind.at elsif character = '~' then result := LexerTokenKind.not elsif character = '&' then result := LexerTokenKind.and elsif character = '+' then result := LexerTokenKind.plus elsif character = '*' then result := LexerTokenKind.multiplication elsif character = '=' then result := LexerTokenKind.equals elsif character = '%' then result := LexerTokenKind.remainder elsif character = '/' then result := LexerTokenKind.division elsif character = '.' then result := LexerTokenKind.dot elsif character = '^' then result := LexerTokenKind.hat end; return result end; proc _lexer_classify_composite(start_position: Word, one_before_last: Word); var first_character: Word; last_character: Word; result: Word; begin first_character := _load_byte(start_position); last_character := _load_byte(one_before_last); if first_character = ':' then result := LexerTokenKind.assignment elsif first_character = '<' then if last_character = '=' then result := LexerTokenKind.less_equal elsif last_character = '>' then result := LexerTokenKind.not_equal end elsif first_character = '>' then if last_character = '=' then result := LexerTokenKind.greater_equal end end; return result end; proc _lexer_classify_delimited(start_position: Word, end_position: Word); var token_length: Word; delimiter: Word; result: Word; begin token_length := end_position - start_position; delimiter := _load_byte(start_position); if delimiter = '(' then result := LexerTokenKind.comment elsif delimiter = '\'' then result := LexerTokenKind.character elsif delimiter = '"' then result := LexerTokenKind.string end; return result end; proc _lexer_classify_integer(start_position: Word, end_position: Word); begin return LexerTokenKind.integer end; proc _lexer_execute_action(action_to_perform: Word, kind: Word); var position_start: Word; position_end: Word; intermediate: Word; begin position_start := _lexer_global_get_start(); position_end := _lexer_global_get_end(); if action_to_perform = LexerAction.none then elsif action_to_perform = LexerAction.accumulate then _lexer_global_set_end(position_end + 1) elsif action_to_perform = LexerAction.skip then _lexer_global_set_start(position_start + 1); _lexer_global_set_end(position_end + 1) elsif action_to_perform = LexerAction.single then _lexer_global_set_end(position_end + 1); intermediate := _lexer_classify_single(position_start); kind^ := intermediate elsif action_to_perform = LexerAction.eof then intermediate := LexerTokenKind.eof; kind^ := intermediate elsif action_to_perform = LexerAction.finalize then intermediate := _lexer_classify_finalize(position_start); kind^ := intermediate elsif action_to_perform = LexerAction.composite then _lexer_global_set_end(position_end + 1); intermediate := _lexer_classify_composite(position_start, position_end); kind^ := intermediate elsif action_to_perform = LexerAction.key_id then intermediate := _lexer_classify_keyword(position_start, position_end); kind^ := intermediate elsif action_to_perform = LexerAction.integer then intermediate := _lexer_classify_integer(position_start, position_end); kind^ := intermediate elsif action_to_perform = LexerAction.delimited then _lexer_global_set_end(position_end + 1); intermediate := _lexer_classify_delimited(position_start, position_end + 1); kind^ := intermediate end; end; proc _lexer_execute_transition(kind: Word); var next_transition: Word; next_state: Word; global_state: Word; action_to_perform: Word; begin next_transition := _lexer_next_transition(); next_state := _lexer_transition_get_state(next_transition); action_to_perform := _lexer_transition_get_action(next_transition); global_state := _lexer_global_state(); global_state^ := next_state; _lexer_execute_action(action_to_perform, kind); return next_state end; proc _lexer_advance_token(kind: Word); var result_state: Word; begin result_state := _lexer_execute_transition(kind); if result_state <> LexerState.finish then _lexer_advance_token(kind) end end; (** * Reads the next token and writes its type into the address in the kind parameter. *) proc _lexer_read_token(kind: Word); begin _lexer_reset(); _lexer_advance_token(kind) end; (** * Advances the token stream past the last read token. *) proc _lexer_skip_token(); var old_end: Word; begin old_end := _lexer_global_get_end(); _lexer_global_set_start(old_end) end; proc _initialize_global_state(); begin compiler_strings_position := @compiler_strings; memory_free_pointer := @memory end; (* * Entry point. *) proc _start(); var last_read: Word; offset: Word; begin _initialize_global_state(); _lexer_initialize(); _symbol_table_build(); (* Read the source from the standard input. *) offset := @source_code; .start_read; (* Second argument is buffer size. Modifying update the source_code definition. *) last_read := _read_file(offset, 409600); if last_read > 0 then offset := offset + last_read; goto start_read end; _compile(); _exit(0) end;