(* * This Source Code Form is subject to the terms of the Mozilla Public License, * v. 2.0. If a copy of the MPL was not distributed with this file, You can * obtain one at https://mozilla.org/MPL/2.0/. *) (* Stage 14 compiler. *) (* - Binary minus. *) (* - Space independent parsing. *) (* - Label names in goto statements aren't required to begin with a dot. *) (* - Dereferencing pointers pointing to word long data. *) (* - Enumeration type. *) const symbol_builtin_name_int := "Int"; symbol_builtin_name_word := "Word"; symbol_builtin_name_pointer := "Pointer"; symbol_builtin_name_char := "Char"; symbol_builtin_name_array := "Array"; (* Every type info starts with a word describing what type it is. PRIMITIVE_TYPE = 1 ENUMERATION_TYPE = 2 Primitive types have only type size. *) symbol_builtin_type_int := S(1, 4); symbol_builtin_type_word := S(1, 4); symbol_builtin_type_pointer := S(1, 4); symbol_builtin_type_char := S(1, 1); symbol_builtin_type_array := S(1, 4); (* Info objects start with a word describing its type. TYPE_INFO = 1 PARAMETER_INFO = 2 TEMPORARY_INFO = 3 Type info has the type it belongs to. *) symbol_type_info_int := S(1, @symbol_builtin_type_int); symbol_type_info_word := S(1, @symbol_builtin_type_word); symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); symbol_type_info_char := S(1, @symbol_builtin_type_char); symbol_type_info_array := S(1, @symbol_builtin_type_array); var source_code: Array; compiler_strings: Array; symbol_table_global: Array; symbol_table_local: Array; classification: Array; (* To reserve memory just add the value of needed bytes to the memory_free_pointer_variable. *) memory: Array; compiler_strings_position: Pointer := @compiler_strings; compiler_strings_length: Word := 0; label_counter: Word := 0; (* Points to a segment of free memory. *) memory_free_pointer: Word := @memory; (** * Calculates and returns the string token length between quotes, including the * escaping slash characters. * * Parameters: * string - String token pointer. * * Returns the length in a0. *) proc _string_length(string: Word); var counter: Word; begin (* Reset the counter. *) counter := 0; .string_length_loop; string := string + 1; if _load_byte(string) <> '"' then counter := counter + 1; goto .string_length_loop end; return counter end; (** * Adds a string to the global, read-only string storage. * * Parameters: * string - String token. * * Returns the offset from the beginning of the storage to the new string in a0. *) proc _add_string(string: Word); var contents: Word; result: Word; current_byte: Word; begin contents := string + 1; result := compiler_strings_length; .add_string_loop; if _load_byte(contents) <> '"' then current_byte := _load_byte(contents); _store_byte(current_byte, compiler_strings_position); compiler_strings_position := compiler_strings_position + 1; contents := contents + 1; if current_byte <> '\\' then compiler_strings_length := compiler_strings_length + 1 end; goto .add_string_loop end; return result end; (** * Reads standard input into a buffer. * * Parameters: * buffer - Buffer pointer. * size - Buffer size. * * Returns the amount of bytes written in a0. *) proc _read_file(buffer: Word, size: Word); return _syscall(0, buffer, size, 0, 0, 0, 63) end; (** * Writes to the standard output. * * Parameters: * buffer - Buffer. * size - Buffer length. *) proc _write_s(buffer: Word, size: Word); begin _syscall(1, buffer, size, 0, 0, 0, 64) end; (** * Writes a number to a string buffer. * * Parameters: * number - Whole number. * output_buffer - Buffer pointer. * * Sets a0 to the length of the written number. *) proc _print_i(number: Word, output_buffer: Word); var local_buffer: Word; is_negative: Word; current_character: Word; result: Word; begin local_buffer := @result + 11; if number >= 0 then is_negative := 0 else number = -number; is_negative := 1 end; .print_i_digit10; current_character := number % 10; _store_byte(current_character + '0', local_buffer); number := number / 10; local_buffer := local_buffer + -1; if number <> 0 then goto .print_i_digit10 end; if is_negative = 1 then _store_byte('-', local_buffer); local_buffer := local_buffer + -1 end; result := @result + 11; result := result + -local_buffer; _memcpy(output_buffer, local_buffer + 1, result); return result end; (** * Writes a number to the standard output. * * Parameters: * number - Whole number. *) proc _write_i(number: Word); var local_buffer: Word; length: Word; begin length := _print_i(number, @local_buffer); _write_s(@local_buffer, length) end; (** * Writes a character from a0 into the standard output. * * Parameters: * character - Character to write. *) proc _write_c(character: Word); begin _write_s(@character, 1) end; (** * Write null terminated string. * * Parameters: * string - String. *) proc _write_z(string: Word); var next_byte: Word; begin (* Check for 0 character. *) next_byte := _load_byte(string); if next_byte <> 0 then (* Print a character. *) _write_c(next_byte); (* Advance the input string by one byte. *) _write_z(string + 1) end end; (** * Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0. *) proc _is_upper(character: Word); var lhs: Word; rhs: Word; begin lhs := character >= 'A'; rhs := character <= 'Z'; return lhs & rhs end; (** * Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0. *) proc _is_lower(character: Word); var lhs: Word; rhs: Word; begin lhs := character >= 'a'; rhs := character <= 'z'; return lhs & rhs end; (** * Detects if the passed character is a 7-bit alpha character or an underscore. * * Paramters: * character - Tested character. * * Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise. *) proc _is_alpha(character: Word); var is_upper_result: Word; is_lower_result: Word; is_alpha_result: Word; is_underscore: Word; begin is_upper_result := _is_upper(character); is_lower_result := _is_lower(character); is_underscore := character = '_'; is_alpha_result := is_lower_result or is_upper_result; return is_alpha_result or is_underscore end; (** * Detects whether the passed character is a digit (a value between 0 and 9). * * Parameters: * character - Exemined value. * * Sets a0 to 1 if it is a digit, to 0 otherwise. *) proc _is_digit(character: Word); var lhs: Word; rhs: Word; begin lhs := character >= '0'; rhs := character <= '9'; return lhs & rhs end; proc _is_alnum(character: Word); var lhs: Word; rhs: Word; begin lhs := _is_alpha(character); rhs := _is_digit(character); return lhs or rhs end; (** * Parameters: * lhs - First pointer. * rhs - Second pointer. * count - The length to compare. * * Returns 0 if memory regions are equal. *) proc _memcmp(lhs: Word, rhs: Word, count: Word); var lhs_byte: Word; rhs_byte: Word; result: Word; begin result := 0; .memcmp_loop; if count <> 0 then lhs_byte := _load_byte(lhs); rhs_byte := _load_byte(rhs); result := lhs_byte + -rhs_byte; lhs := lhs + 1; rhs := rhs + 1; count := count + -1; if result = 0 then goto .memcmp_loop end end; return result end; (** * Copies memory. * * Parameters: * destination - Destination. * source - Source. * count - Size. * * Returns the destination. *) proc _memcpy(destination: Word, source: Word, count: Word); var current_byte: Word; begin .memcpy_loop; if count <> 0 then current_byte := _load_byte(source); _store_byte(current_byte, destination); destination := destination + 1; source := source + 1; count := count + -1; goto .memcpy_loop end; return destination end; proc _compile_integer_literal(); var integer_token: Word; integer_length: Word; token_kind: Word; begin _write_z("\tli t0, \0"); integer_token := _lexer_global_get_start(); integer_length := _lexer_global_get_end() + -integer_token; _write_s(integer_token, integer_length); _lexer_skip_token(); _write_c('\n') end; proc _compile_character_literal(); var character: Word; token_kind: Word; character_length: Word; begin character := _lexer_global_get_start(); character_length := _lexer_global_get_end() + -character; _write_z("\tli t0, \0"); _write_s(character, character_length); _write_c('\n'); _lexer_skip_token() end; proc _compile_variable_expression(); var name: Word; lookup_result: Word; name_token: Word; begin name := _lexer_global_get_start(); name_token := _lexer_global_get_end() + -name; lookup_result := _symbol_table_lookup(@symbol_table_global, name, name_token); if lookup_result <> 0 then _compile_enumeration_value(lookup_result) else _compile_designator(); _write_z("\tlw t0, (t0)\n\0") end end; (** * Compiled take address expression, starting with an "@" sign. *) proc _compile_address_expression(); begin _lexer_skip_token(); _compile_designator() end; (** * Compile unary negation, "-" sign. *) proc _compile_negate_expression(); begin _lexer_skip_token(); _compile_term(); _write_z("\tneg t0, t0\n\0") end; (* Compile unary negation, "~" sign. *) proc _compile_not_expression(); var token_kind: Word; begin _lexer_read_token(@token_kind); _lexer_skip_token(); _compile_term(); _write_z("\tnot t0, t0\n\0") end; proc _compile_string_literal(); var token_kind: Word; token_start: Word; length: Word; offset: Word; begin _lexer_read_token(@token_kind); token_start := _lexer_global_get_start(); length := _string_length(token_start); offset := _add_string(token_start); _lexer_skip_token(); _write_z("\tla t0, strings\n\0"); _write_z("\tli t1, \0"); _write_i(offset); _write_c('\n'); _write_z("\tadd t0, t0, t1\n\0") end; proc _compile_term(); var current_character: Word; token_kind: Word; begin _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_character() then _compile_character_literal() elsif token_kind = _lexer_token_kind_string() then _compile_string_literal() elsif token_kind = _lexer_token_kind_integer() then _compile_integer_literal() elsif token_kind = _lexer_token_kind_at() then _compile_address_expression() elsif token_kind = _lexer_token_kind_minus() then _compile_negate_expression() elsif token_kind = _lexer_token_kind_not() then _compile_not_expression() elsif token_kind = _lexer_token_kind_identifier() then current_character := _lexer_global_get_start(); current_character := _load_byte(current_character); (* This is a call if the statement starts with an underscore. *) if current_character = '_' then _compile_call(); _write_z("\tmv t0, a0\n\0") else _compile_variable_expression() end end end; proc _compile_binary_rhs(); begin (* Save the value of the left expression on the stack. *) _write_z("\tsw t0, 64(sp)\n\0"); _compile_term(); (* Load the left expression from the stack; *) _write_z("\tlw t1, 64(sp)\n\0") end; proc _compile_expression(); var token_kind: Word; begin _compile_term(); _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_plus() then _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tadd t0, t0, t1\n\0") elsif token_kind = _lexer_token_kind_minus() then _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tsub t0, t1, t0\n\0"); elsif token_kind = _lexer_token_kind_multiplication() then _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tmul t0, t0, t1\n\0") elsif token_kind = _lexer_token_kind_and() then _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tand t0, t0, t1\n\0") elsif token_kind = _lexer_token_kind_or() then _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tor t0, t0, t1\n\0") elsif token_kind = _lexer_token_kind_xor() then _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\txor t0, t0, t1\n\0") elsif token_kind = _lexer_token_kind_equals() then _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\txor t0, t0, t1\n\tseqz t0, t0\n\0") elsif token_kind = _lexer_token_kind_remainder() then _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\trem t0, t1, t0\n\0") elsif token_kind = _lexer_token_kind_division() then _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tdiv t0, t1, t0\n\0") elsif token_kind = _lexer_token_kind_less_than() then _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tslt t0, t1, t0\n\0") elsif token_kind = _lexer_token_kind_greater_than() then _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tslt t0, t0, t1\n\0") elsif token_kind = _lexer_token_kind_less_equal() then _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tslt t0, t0, t1\n\txori t0, t0, 1\n\0") elsif token_kind = _lexer_token_kind_not_equal() then _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\txor t0, t0, t1\n\tsnez t0, t0\n\0") elsif token_kind = _lexer_token_kind_greater_equal() then _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tslt t0, t1, t0\n\txori t0, t0, 1\n\0") end; .compile_expression_end; end; proc _compile_call(); var name_length: Word; name: Word; argument_count: Word; stack_offset: Word; token_kind: Word; begin _lexer_read_token(@token_kind); name := _lexer_global_get_start(); name_length := _lexer_global_get_end() + -name; argument_count := 0; (* Skip the identifier and left paren. *) _lexer_skip_token(); _lexer_read_token(@token_kind); _lexer_skip_token(); _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_right_paren() then goto .compile_call_finalize end; .compile_call_loop; _compile_expression(); (* Save the argument on the stack. *) _write_z("\tsw t0, \0"); (* Calculate the stack offset: 116 - (4 * argument_counter) *) stack_offset := argument_count * 4; _write_i(116 + -stack_offset); _write_z("(sp)\n\0"); (* Add one to the argument counter. *) argument_count := argument_count + 1; _lexer_read_token(@token_kind); if token_kind <> _lexer_token_kind_comma() then goto .compile_call_finalize end; _lexer_skip_token(); goto .compile_call_loop; .compile_call_finalize; (* Load the argument from the stack. *) if argument_count <> 0 then (* Decrement the argument counter. *) argument_count := argument_count + -1; _write_z("\tlw a\0"); _write_i(argument_count); _write_z(", \0"); (* Calculate the stack offset: 116 - (4 * argument_counter) *) stack_offset := argument_count * 4; _write_i(116 + -stack_offset); _write_z("(sp)\n\0"); goto .compile_call_finalize end; .compile_call_end; _write_z("\tcall \0"); _write_s(name, name_length); _write_c('\n'); (* Skip the right paren. *) _lexer_read_token(@token_kind); _lexer_skip_token() end; proc _compile_goto(); var next_token: Word; next_length: Word; token_kind: Word; begin _lexer_skip_token(); _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_dot() then _lexer_skip_token(); _lexer_read_token(@token_kind) end; next_token := _lexer_global_get_start(); next_length := _lexer_global_get_end() + -next_token; _write_z("\tj .\0"); _write_s(next_token, next_length); _lexer_skip_token() end; proc _compile_local_designator(symbol: Word); var variable_offset: Word; begin _write_z("\taddi t0, sp, \0"); variable_offset := _parameter_info_get_offset(symbol); _write_i(variable_offset); _write_c('\n'); _lexer_skip_token() end; proc _compile_global_designator(); var name: Word; token_kind: Word; token_length: Word; begin _write_z("\tla t0, \0"); _lexer_read_token(@token_kind); name := _lexer_global_get_start(); token_length := _lexer_global_get_end() + -name; _write_s(name, token_length); _lexer_skip_token(); _write_c('\n') end; proc _compile_enumeration_value(symbol: Word); var enumeration_type: Word; members: Word; members_length: Word; token_type: Word; value_name: Word; name_length: Word; member_name: Word; member_length: Word; counter: Word; begin enumeration_type := _type_info_get_type(symbol); members := _enumeration_type_get_members(enumeration_type); members_length := _enumeration_type_get_length(enumeration_type); (* Skip enumeration type name and dot. Read the enumeration value. *) _lexer_skip_token(); _lexer_read_token(@token_type); _lexer_skip_token(); _lexer_read_token(@token_type); value_name := _lexer_global_get_start(); name_length := _lexer_global_get_end() + -value_name; _lexer_skip_token(); counter := 1; .compile_enumeration_value_members; if members_length > 0 then member_name := _load_word(members); member_length := _load_word(members + 4); if _lexer_compare_keyword(value_name, name_length, member_name, member_length) = 0 then members_length := members_length + -1; members := members + 8; counter := counter + 1; goto .compile_enumeration_value_members end; _write_z("\tli t0, \0"); _write_i(counter); _write_c('\n') end end; proc _compile_designator(); var name_token: Word; lookup_result: Word; token_kind: Word; name: Word; begin _lexer_read_token(@token_kind); name := _lexer_global_get_start(); name_token := _lexer_global_get_end() + -name; lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token); if lookup_result <> 0 then _compile_local_designator(lookup_result) else _compile_global_designator() end; _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_hat() then _lexer_skip_token(); _write_z("\tlw t0, (t0)\n\0") end end; proc _compile_assignment(); var token_kind: Word; begin _compile_designator(); (* Save the assignee address on the stack. *) _write_z("\tsw t0, 60(sp)\n\0"); (* Skip the assignment sign (:=) with surrounding whitespaces. *) _lexer_read_token(@token_kind); _lexer_skip_token(); (* Compile the assignment. *) _compile_expression(); _write_z("\tlw t1, 60(sp)\n\tsw t0, (t1)\n\0") end; proc _compile_return_statement(); var token_kind: Word; begin (* Skip "return" keyword and whitespace after it. *) _lexer_read_token(@token_kind); _lexer_skip_token(); _compile_expression(); _write_z("\tmv a0, t0\n\0") end; (** * Writes a label, .Ln, where n is a unique number. * * Parameters: * counter - Label counter. *) proc _write_label(counter: Word); begin _write_z(".L\0"); _write_i(counter) end; proc _compile_condition(after_end_label: Word); var condition_label: Word; token_kind: Word; begin (* Compile condition. *) _compile_expression(); (* Skip " then" with newline. *) _lexer_read_token(@token_kind); _lexer_skip_token(); (* condition_label is the label in front of the next elsif condition or end. *) condition_label := label_counter; label_counter := label_counter + 1; _write_z("\tbeqz t0, \0"); _write_label(condition_label); _write_c('\n'); _compile_statement_list(); _write_z("\tj \0"); _write_label(after_end_label); _write_c('\n'); _write_label(condition_label); _write_z(":\n\0") end; proc _compile_if(); var after_end_label: Word; condition_label: Word; token_kind: Word; begin (* Skip "if ". *) _lexer_read_token(@token_kind); _lexer_skip_token(); after_end_label := label_counter; label_counter := label_counter + 1; _compile_condition(after_end_label); .compile_if_loop; _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_else() then _lexer_skip_token(); _compile_statement_list() elsif token_kind = _lexer_token_kind_elsif() then _lexer_skip_token(); _compile_condition(after_end_label); goto .compile_if_loop end; _lexer_skip_token(); _write_label(after_end_label); _write_z(":\n\0") end; proc _compile_label_declaration(); var label_token: Word; token_kind: Word; name: Word; begin (* Skip the dot. *) _lexer_skip_token(); _lexer_read_token(@token_kind); name := _lexer_global_get_start(); label_token := _lexer_global_get_end() + -name; _write_c('.'); _write_s(name, label_token); _write_z(":\n\0"); _lexer_skip_token() end; proc _compile_statement(); var current_byte: Word; token_kind: Word; begin _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_goto() then _compile_goto() elsif token_kind = _lexer_token_kind_if() then _compile_if() elsif token_kind = _lexer_token_kind_return() then _compile_return_statement() elsif token_kind = _lexer_token_kind_dot() then _compile_label_declaration() elsif token_kind = _lexer_token_kind_identifier() then current_byte := _lexer_global_get_start(); current_byte := _load_byte(current_byte); (* This is a call if the statement starts with an underscore. *) if current_byte = '_' then _compile_call() else _compile_assignment() end end; _write_c('\n') end; proc _compile_statement_list(); var token_kind: Word; begin _skip_empty_lines(); _compile_statement(); _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_semicolon() then _lexer_skip_token(); _compile_statement_list() end; _skip_empty_lines() end; (** * Writes a regster name to the standard output. * * Parameters: * register_character - Register character. * register_number - Register number. *) proc _write_register(register_character: Word, register_number: Word); begin _write_c(register_character); _write_c(register_number + '0') end; proc _type_get_kind(this: Word); return _load_word(this) end; proc _type_set_kind(this: Word, value: Word); begin _store_word(value, this) end; proc _type_get_size(this: Word); return _load_word(this + 4) end; proc _type_set_size(this: Word, value: Word); begin _store_word(value, this + 4) end; proc _enumeration_type_get_members(this: Word); return _load_word(this + 8) end; proc _enumeration_type_set_members(this: Word, value: Word); begin _store_word(value, this + 8) end; proc _enumeration_type_get_length(this: Word); return _load_word(this + 12) end; proc _enumeration_type_set_length(this: Word, value: Word); begin _store_word(value, this + 12) end; (** * Reads and creates enumeration type representation. * * record * type_kind: Word; * size: Word; * members: StringArray; * length: Word * end; * * Returns enumeration type description. *) proc _read_type_enumeration(); var token_kind: Word; enumeration_name: Word; name_length: Word; memory_start: Word; member_count: Word; result: Word; begin _lexer_skip_token(); memory_start := memory_free_pointer; member_count := 0; _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_right_paren() then goto .read_type_enumeration_end end; .read_type_enumeration_loop; member_count := member_count + 1; enumeration_name := _lexer_global_get_start(); name_length := _lexer_global_get_end() + -enumeration_name; _store_word(enumeration_name, memory_free_pointer); memory_free_pointer := memory_free_pointer + 4; _store_word(name_length, memory_free_pointer); memory_free_pointer := memory_free_pointer + 4; (* Skip the identifier. *) _lexer_skip_token(); _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_comma() then _lexer_skip_token(); _lexer_read_token(@token_kind); goto .read_type_enumeration_loop end; .read_type_enumeration_end; _lexer_skip_token(); (* The resulting structure is 16 bytes long. *) result := memory_free_pointer; memory_free_pointer := memory_free_pointer + 16; (* ENUMERATION_TYPE is 2. *) _type_set_kind(result, 2); _type_set_size(result, 4); _enumeration_type_set_members(result, memory_start); _enumeration_type_set_length(result, member_count); return _type_info_create(result) end; proc _read_type_expression(); var token_kind: Word; type_name: Word; name_length: Word; result: Word; begin result := 0; _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_identifier() then (* Named type. *) type_name := _lexer_global_get_start(); name_length := _lexer_global_get_end() + -type_name; result := _symbol_table_lookup(@symbol_table_global, type_name, name_length); result := _type_info_get_type(result); _lexer_skip_token() elsif token_kind = _lexer_token_kind_left_paren() then result := _read_type_enumeration() end; return result end; proc _type_info_get_type(this: Word); return _load_word(this + 4) end; (** * Parameters: * parameter_index - Parameter index. *) proc _parameter_info_create(parameter_index: Word); var offset: Word; current_word: Word; result: Word; begin result := memory_free_pointer; current_word := result; (* 2 is INFO_PARAMETER *) _store_word(2, current_word); current_word := current_word + 4; (* Calculate the stack offset: 88 - (4 * parameter_counter) *) offset := parameter_index * 4; _store_word(88 + -offset, current_word); memory_free_pointer := current_word + 4; return result end; proc _parameter_info_get_offset(this: Word); begin this := this + 4; return _load_word(this) end; proc _type_info_create(type_representation: Word); var result: Word; current_word: Word; begin result := memory_free_pointer; current_word := result; (* 1 is INFO_TYPE *) _store_word(1, current_word); current_word := current_word + 4; _store_word(type_representation, current_word); memory_free_pointer := current_word + 4; return result end; (** * Parameters: * temporary_index - Parameter index. *) proc _temporary_info_create(temporary_index: Word); var offset: Word; current_word: Word; result: Word; begin result := memory_free_pointer; current_word := result; (* 3 is INFO_TEMPORARY *) _store_word(3, current_word); current_word := current_word + 4; (* Calculate the stack offset: 4 * variable_counter. *) _store_word(temporary_index * 4, current_word); memory_free_pointer := current_word + 4; return result end; proc _temporary_info_get_offset(this: Word); begin this := this + 4; return _load_word(this) end; (** * Parameters: * parameter_index - Parameter index. *) proc _read_procedure_parameter(parameter_index: Word); var name_length: Word; info: Word; name_position: Word; token_kind: Word; begin (* Read the parameter name. *) _lexer_read_token(@token_kind); name_position := _lexer_global_get_start(); name_length := _lexer_global_get_end() + -name_position; _lexer_skip_token(); (* Skip colon and space in front of the type expression. *) _lexer_read_token(@token_kind); _lexer_skip_token(); _read_type_expression(); _write_z("\tsw a\0"); _write_i(parameter_index); _write_z(", \0"); info := _parameter_info_create(parameter_index); _symbol_table_enter(@symbol_table_local, name_position, name_length, info); info := _parameter_info_get_offset(info); _write_i(info); _write_z("(sp)\n\0") end; proc _read_procedure_parameters(); var parameter_counter: Word; token_kind: Word; begin (* Skip open paren. *) _lexer_read_token(@token_kind); _lexer_skip_token(); parameter_counter := 0; .compile_procedure_prologue_skip; _lexer_read_token(@token_kind); if token_kind <> _lexer_token_kind_right_paren() then _read_procedure_parameter(parameter_counter); parameter_counter := parameter_counter + 1; _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_comma() then _lexer_skip_token(); goto .compile_procedure_prologue_skip end end; (* Skip close paren. *) _lexer_skip_token() end; (** * Parameters: * variable_index - Variable index. *) proc _read_procedure_temporary(variable_index: Word); var name_length: Word; info: Word; name_position: Word; token_kind: Word; begin _lexer_read_token(@token_kind); name_position := _lexer_global_get_start(); name_length := _lexer_global_get_end() + -name_position; _lexer_skip_token(); (* Read and skip variable name, colon and the space *) _lexer_read_token(@token_kind); _lexer_skip_token(); _read_type_expression(); info := _temporary_info_create(variable_index); _symbol_table_enter(@symbol_table_local, name_position, name_length, info); (* Skip semicolon and newline after the variable declaration *) _lexer_read_token(@token_kind); _lexer_skip_token() end; proc _read_procedure_temporaries(); var temporary_counter: Word; token_kind: Word; begin _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_var() then _lexer_skip_token(); temporary_counter := 0; .read_local_variables_loop; _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_identifier() then _read_procedure_temporary(temporary_counter); temporary_counter := temporary_counter + 1; goto .read_local_variables_loop end end end; proc _compile_procedure(); var name_pointer: Word; name_length: Word; token_kind: Word; begin (* Skip "proc ". *) _lexer_read_token(@token_kind); _lexer_skip_token(); (* Clear local symbol table. *) _store_word(0, @symbol_table_local); _lexer_read_token(@token_kind); name_pointer := _lexer_global_get_start(); name_length := _lexer_global_get_end() + -name_pointer; (* Write .type _procedure_name, @function. *) _write_z(".type \0"); _write_s(name_pointer, name_length); _write_z(", @function\n\0"); (* Write procedure label, _procedure_name: *) _write_s(name_pointer, name_length); _write_z(":\n\0"); (* Skip procedure name. *) _lexer_skip_token(); _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); _read_procedure_parameters(); (* Skip semicolon and newline. *) _lexer_read_token(@token_kind); _lexer_skip_token(); _read_procedure_temporaries(); (* Skip semicolon, "begin" and newline. *) _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_begin() then _lexer_skip_token(); _compile_statement_list() elsif token_kind = _lexer_token_kind_return() then _compile_return_statement() end; (* Write the epilogue. *) _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); (* Skip the "end" keyword, semicolon and newline. *) _lexer_read_token(@token_kind); _lexer_skip_token(); _lexer_read_token(@token_kind); _lexer_skip_token() end; (** * Skips comments. *) proc _skip_empty_lines(); var token_kind: Word; begin .skip_empty_lines_rerun; _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_comment() then _lexer_skip_token(); goto .skip_empty_lines_rerun end end; (** * Compile global variable initializer. *) proc _compile_global_initializer(); var current_byte: Word; length: Word; token_kind: Word; token_start: Word; begin _lexer_read_token(@token_kind); token_start := _lexer_global_get_start(); current_byte := _load_byte(token_start); if token_kind = _lexer_token_kind_string() then _write_z("\n\t.word strings + \0"); length := _string_length(token_start); _add_string(token_start); _write_i(); (* Skip the quoted string. *) _lexer_skip_token(); goto .compile_global_initializer_end elsif current_byte = 'S' then (* Skip "S(". *) _lexer_skip_token(); _lexer_read_token(@token_kind); _lexer_skip_token(); _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_right_paren() then goto .compile_global_initializer_closing end; goto .compile_global_initializer_loop elsif token_kind = _lexer_token_kind_at() then (* Skip @. *) _lexer_skip_token(); _write_z("\n\t.word \0"); _lexer_read_token(@token_kind); token_start := _lexer_global_get_start(); _write_s(token_start, _lexer_global_get_end() + -token_start); _lexer_skip_token(); goto .compile_global_initializer_end elsif token_kind = _lexer_token_kind_integer() then _write_z("\n\t.word \0"); _write_s(token_start, _lexer_global_get_end() + -token_start); _lexer_skip_token(); goto .compile_global_initializer_end end; .compile_global_initializer_loop; _compile_global_initializer(); _lexer_read_token(@token_kind); if token_kind <> _lexer_token_kind_right_paren() then (* Skip comma and whitespace after it. *) _lexer_skip_token(); goto .compile_global_initializer_loop end; .compile_global_initializer_closing; (* Skip ")" *) _lexer_skip_token(); .compile_global_initializer_end end; proc _compile_constant_declaration(); var name: Word; name_length: Word; token_kind: Word; begin name := _lexer_global_get_start(); name_length := _lexer_global_get_end() + -name; _write_z(".type \0"); _write_s(name, name_length); _write_z(", @object\n\0"); _write_s(name, name_length); _write_c(':'); (* Skip the constant name with assignment sign and surrounding whitespaces. *) _lexer_skip_token(); _lexer_read_token(@token_kind); _lexer_skip_token(); _compile_global_initializer(); (* Skip semicolon and newline. *) _lexer_read_token(@token_kind); _lexer_skip_token(); _write_c('\n') end; proc _compile_type_declaration(); var token_kind: Word; type_name: Word; name_length: Word; type_info: Word; begin type_name := _lexer_global_get_start(); name_length := _lexer_global_get_end() + -type_name; _lexer_skip_token(); _lexer_read_token(@token_kind); _lexer_skip_token(); type_info := _read_type_expression(); _symbol_table_enter(@symbol_table_global, type_name, name_length, type_info); _lexer_read_token(@token_kind); _lexer_skip_token() end; proc _compile_type_part(); var token_kind: Word; begin _skip_empty_lines(); _lexer_read_token(@token_kind); if token_kind <> _lexer_token_kind_type() then goto .compile_type_part_end end; _lexer_skip_token(); .compile_type_part_loop; _skip_empty_lines(); _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_identifier() then _compile_type_declaration(); goto .compile_type_part_loop end; .compile_type_part_end end; proc _compile_const_part(); var token_kind: Word; begin _skip_empty_lines(); _lexer_read_token(@token_kind); if token_kind <> _lexer_token_kind_const() then goto .compile_const_part_end end; (* Skip "const" with the newline after it. *) _lexer_skip_token(); _write_z(".section .rodata # Compiled from const section.\n\n\0"); .compile_const_part_loop; _skip_empty_lines(); (* If the character at the line beginning is not indentation, it is probably the next code section. *) _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_identifier() then _compile_constant_declaration(); goto .compile_const_part_loop end; .compile_const_part_end end; proc _compile_variable_declaration(); var name: Word; name_length: Word; token_kind: Word; begin _lexer_read_token(@token_kind); name := _lexer_global_get_start(); name_length := _lexer_global_get_end() + -name; _write_z(".type \0"); _write_s(name, name_length); _write_z(", @object\n\0"); _write_s(name, name_length); _write_c(':'); (* Skip the variable name and colon with space before the type. *) _lexer_skip_token(); _lexer_read_token(@token_kind); _lexer_skip_token(); _read_type_expression(); _lexer_read_token(@token_kind); if token_kind <> _lexer_token_kind_assignment() then (* Else we assume this is a zeroed 81920 bytes big array. *) _write_z(" .zero 81920\0") else (* Skip the assignment sign with surrounding whitespaces. *) _lexer_skip_token(); _compile_global_initializer(); _lexer_read_token(@token_kind) end; (* Skip semicolon and newline. *) _lexer_read_token(@token_kind); _lexer_skip_token(); _write_c('\n') end; proc _compile_var_part(); var token_kind: Word; begin _lexer_read_token(@token_kind); if token_kind <> _lexer_token_kind_var() then goto .compile_var_part_end end; (* Skip "var" and newline. *) _lexer_skip_token(); _write_z(".section .data\n\0"); .compile_var_part_loop; _skip_empty_lines(); _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_identifier() then _compile_variable_declaration(); goto .compile_var_part_loop end; .compile_var_part_end end; (** * Process the source code and print the generated code. *) proc _compile_module(); var token_kind: Word; begin _compile_type_part(); _compile_const_part(); _skip_empty_lines(); _compile_var_part(); _write_z(".section .text\n\n\0"); _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); .compile_module_loop; _skip_empty_lines(); _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_proc() then _compile_procedure(); goto .compile_module_loop end end; proc _compile(); var compiler_strings_copy: Word; compiler_strings_end: Word; current_byte: Word; begin _write_z(".globl _start\n\n\0"); _compile_module(); _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); _write_c('"'); compiler_strings_copy := @compiler_strings; compiler_strings_end := compiler_strings_position; .compile_loop; if compiler_strings_copy < compiler_strings_end then current_byte := _load_byte(compiler_strings_copy); compiler_strings_copy := compiler_strings_copy + 1; _write_c(current_byte); goto .compile_loop end; _write_c('"'); _write_c('\n') end; (** * Terminates the program. a0 contains the return code. * * Parameters: * a0 - Status code. *) proc _exit(); begin _syscall(0, 0, 0, 0, 0, 0, 93) end; (** * Looks for a symbol in the given symbol table. * * Parameters: * symbol_table - Symbol table. * symbol_name - Symbol name pointer. * name_length - Symbol name length. * * Returns the symbol pointer or 0 in a0. *) proc _symbol_table_lookup(symbol_table: Word, symbol_name: Word, name_length: Word); var result: Word; symbol_table_length: Word; current_name: Word; current_length: Word; begin result := 0; (* The first word in the symbol table is its length, get it. *) symbol_table_length := _load_word(symbol_table); (* Go to the first symbol position. *) symbol_table := symbol_table + 4; .symbol_table_lookup_loop; if symbol_table_length = 0 then goto .symbol_table_lookup_end end; (* Symbol name pointer and length. *) current_name := _load_word(symbol_table); current_length := _load_word(symbol_table + 4); (* If lengths don't match, exit and return nil. *) if name_length <> current_length then goto .symbol_table_lookup_repeat end; (* If names don't match, exit and return nil. *) if _memcmp(symbol_name, current_name, name_length) <> 0 then goto .symbol_table_lookup_repeat end; (* Otherwise, the symbol is found. *) result := _load_word(symbol_table + 8); goto .symbol_table_lookup_end; .symbol_table_lookup_repeat; symbol_table := symbol_table + 12; symbol_table_length := symbol_table_length + -1; goto .symbol_table_lookup_loop; .symbol_table_lookup_end; return result end; (** * Inserts a symbol into the table. * * Parameters: * symbol_table - Symbol table. * symbol_name - Symbol name pointer. * name_length - Symbol name length. * symbol - Symbol pointer. *) proc _symbol_table_enter(symbol_table: Word, symbol_name: Word, name_length: Word, symbol: Word); var table_length: Word; symbol_pointer: Word; begin (* The first word in the symbol table is its length, get it. *) table_length := _load_word(symbol_table); (* Calculate the offset for the new symbol. *) symbol_pointer := table_length * 12; symbol_pointer := symbol_pointer + 4; symbol_pointer := symbol_table + symbol_pointer; _store_word(symbol_name, symbol_pointer); symbol_pointer := symbol_pointer + 4; _store_word(name_length, symbol_pointer); symbol_pointer := symbol_pointer + 4; _store_word(symbol, symbol_pointer); (* Increment the symbol table length. *) table_length := table_length + 1; _store_word(table_length, symbol_table) end; proc _symbol_table_build(); begin (* Set the table length to 0. *) _store_word(0, @symbol_table_global); (* Enter built-in symbols. *) _symbol_table_enter(@symbol_table_global, symbol_builtin_name_int, 3, @symbol_type_info_int); _symbol_table_enter(@symbol_table_global, symbol_builtin_name_word, 4, @symbol_type_info_word); _symbol_table_enter(@symbol_table_global, symbol_builtin_name_pointer, 7, @symbol_type_info_pointer); _symbol_table_enter(@symbol_table_global, symbol_builtin_name_char, 4, @symbol_type_info_char); _symbol_table_enter(@symbol_table_global, symbol_builtin_name_array, 5, @symbol_type_info_array) end; (** * Classification table assigns each possible character to a group (class). All * characters of the same group a handled equivalently. * * Transition = record * action: TransitionAction; * next_state: TransitionState * end; *) proc _lexer_class_invalid(); return 1 end; proc _lexer_class_digit(); return 2 end; proc _lexer_class_alpha(); return 3 end; proc _lexer_class_space(); return 4 end; proc _lexer_class_colon(); return 5 end; proc _lexer_class_equals(); return 6 end; proc _lexer_class_left_paren(); return 7 end; proc _lexer_class_right_paren(); return 8 end; proc _lexer_class_asterisk(); return 9 end; proc _lexer_class_backslash(); return 10 end; proc _lexer_class_single(); return 11 end; proc _lexer_class_hex(); return 12 end; proc _lexer_class_zero(); return 13 end; proc _lexer_class_x(); return 14 end; proc _lexer_class_eof(); return 15 end; proc _lexer_class_dot(); return 16 end; proc _lexer_class_minus(); return 17 end; proc _lexer_class_single_quote(); return 18 end; proc _lexer_class_double_quote(); return 19 end; proc _lexer_class_greater(); return 20 end; proc _lexer_class_less(); return 21 end; proc _lexer_class_other(); return 22 end; proc _lexer_state_start(); return 1 end; proc _lexer_state_colon(); return 2 end; proc _lexer_state_identifier(); return 3 end; proc _lexer_state_decimal(); return 4 end; proc _lexer_state_leading_zero(); return 5 end; proc _lexer_state_greater(); return 6 end; proc _lexer_state_minus(); return 7 end; proc _lexer_state_left_paren(); return 8 end; proc _lexer_state_less(); return 9 end; proc _lexer_state_dot(); return 10 end; proc _lexer_state_comment(); return 11 end; proc _lexer_state_closing_comment(); return 12 end; proc _lexer_state_character(); return 13 end; proc _lexer_state_character_escape(); return 14 end; proc _lexer_state_string(); return 15 end; proc _lexer_state_string_escape(); return 16 end; proc _lexer_state_end(); return 17 end; proc _lexer_action_none(); return 1 end; proc _lexer_action_accumulate(); return 2 end; proc _lexer_action_skip(); return 3 end; proc _lexer_action_single(); return 4 end; proc _lexer_action_eof(); return 5 end; proc _lexer_action_finalize(); return 6 end; proc _lexer_action_composite(); return 7 end; proc _lexer_action_key_id(); return 8 end; proc _lexer_action_integer(); return 9 end; proc _lexer_action_delimited(); return 10 end; (** * Assigns some value to at array index. * * Parameters: * array - Array pointer. * index - Index (word offset into the array). * data - Data to assign. *) proc _assign_at(array: Word, index: Word, data: Word); var target: Word; begin target := index + -1; target := target * 4; target := array + target; _store_word(data, target) end; proc _get_at(array: Word, index: Word); var target: Word; begin target := index + -1; target := target * 4; target := array + target; return _load_word(target) end; (** * Initializes the array with character classes. *) proc _lexer_classifications(); var code: Word; begin _assign_at(@classification, 1, _lexer_class_eof()); _assign_at(@classification, 2, _lexer_class_invalid()); _assign_at(@classification, 3, _lexer_class_invalid()); _assign_at(@classification, 4, _lexer_class_invalid()); _assign_at(@classification, 5, _lexer_class_invalid()); _assign_at(@classification, 6, _lexer_class_invalid()); _assign_at(@classification, 7, _lexer_class_invalid()); _assign_at(@classification, 8, _lexer_class_invalid()); _assign_at(@classification, 9, _lexer_class_invalid()); _assign_at(@classification, 10, _lexer_class_space()); _assign_at(@classification, 11, _lexer_class_space()); _assign_at(@classification, 12, _lexer_class_invalid()); _assign_at(@classification, 13, _lexer_class_invalid()); _assign_at(@classification, 14, _lexer_class_space()); _assign_at(@classification, 15, _lexer_class_invalid()); _assign_at(@classification, 16, _lexer_class_invalid()); _assign_at(@classification, 17, _lexer_class_invalid()); _assign_at(@classification, 18, _lexer_class_invalid()); _assign_at(@classification, 19, _lexer_class_invalid()); _assign_at(@classification, 20, _lexer_class_invalid()); _assign_at(@classification, 21, _lexer_class_invalid()); _assign_at(@classification, 22, _lexer_class_invalid()); _assign_at(@classification, 23, _lexer_class_invalid()); _assign_at(@classification, 24, _lexer_class_invalid()); _assign_at(@classification, 25, _lexer_class_invalid()); _assign_at(@classification, 26, _lexer_class_invalid()); _assign_at(@classification, 27, _lexer_class_invalid()); _assign_at(@classification, 28, _lexer_class_invalid()); _assign_at(@classification, 29, _lexer_class_invalid()); _assign_at(@classification, 30, _lexer_class_invalid()); _assign_at(@classification, 31, _lexer_class_invalid()); _assign_at(@classification, 32, _lexer_class_invalid()); _assign_at(@classification, 33, _lexer_class_space()); _assign_at(@classification, 34, _lexer_class_single()); _assign_at(@classification, 35, _lexer_class_double_quote()); _assign_at(@classification, 36, _lexer_class_other()); _assign_at(@classification, 37, _lexer_class_other()); _assign_at(@classification, 38, _lexer_class_single()); _assign_at(@classification, 39, _lexer_class_single()); _assign_at(@classification, 40, _lexer_class_single_quote()); _assign_at(@classification, 41, _lexer_class_left_paren()); _assign_at(@classification, 42, _lexer_class_right_paren()); _assign_at(@classification, 43, _lexer_class_asterisk()); _assign_at(@classification, 44, _lexer_class_single()); _assign_at(@classification, 45, _lexer_class_single()); _assign_at(@classification, 46, _lexer_class_minus()); _assign_at(@classification, 47, _lexer_class_dot()); _assign_at(@classification, 48, _lexer_class_single()); _assign_at(@classification, 49, _lexer_class_zero()); _assign_at(@classification, 50, _lexer_class_digit()); _assign_at(@classification, 51, _lexer_class_digit()); _assign_at(@classification, 52, _lexer_class_digit()); _assign_at(@classification, 53, _lexer_class_digit()); _assign_at(@classification, 54, _lexer_class_digit()); _assign_at(@classification, 55, _lexer_class_digit()); _assign_at(@classification, 56, _lexer_class_digit()); _assign_at(@classification, 57, _lexer_class_digit()); _assign_at(@classification, 58, _lexer_class_digit()); _assign_at(@classification, 59, _lexer_class_colon()); _assign_at(@classification, 60, _lexer_class_single()); _assign_at(@classification, 61, _lexer_class_less()); _assign_at(@classification, 62, _lexer_class_equals()); _assign_at(@classification, 63, _lexer_class_greater()); _assign_at(@classification, 64, _lexer_class_other()); _assign_at(@classification, 65, _lexer_class_single()); _assign_at(@classification, 66, _lexer_class_alpha()); _assign_at(@classification, 67, _lexer_class_alpha()); _assign_at(@classification, 68, _lexer_class_alpha()); _assign_at(@classification, 69, _lexer_class_alpha()); _assign_at(@classification, 70, _lexer_class_alpha()); _assign_at(@classification, 71, _lexer_class_alpha()); _assign_at(@classification, 72, _lexer_class_alpha()); _assign_at(@classification, 73, _lexer_class_alpha()); _assign_at(@classification, 74, _lexer_class_alpha()); _assign_at(@classification, 75, _lexer_class_alpha()); _assign_at(@classification, 76, _lexer_class_alpha()); _assign_at(@classification, 77, _lexer_class_alpha()); _assign_at(@classification, 78, _lexer_class_alpha()); _assign_at(@classification, 79, _lexer_class_alpha()); _assign_at(@classification, 80, _lexer_class_alpha()); _assign_at(@classification, 81, _lexer_class_alpha()); _assign_at(@classification, 82, _lexer_class_alpha()); _assign_at(@classification, 83, _lexer_class_alpha()); _assign_at(@classification, 84, _lexer_class_alpha()); _assign_at(@classification, 85, _lexer_class_alpha()); _assign_at(@classification, 86, _lexer_class_alpha()); _assign_at(@classification, 87, _lexer_class_alpha()); _assign_at(@classification, 88, _lexer_class_alpha()); _assign_at(@classification, 89, _lexer_class_alpha()); _assign_at(@classification, 90, _lexer_class_alpha()); _assign_at(@classification, 91, _lexer_class_alpha()); _assign_at(@classification, 92, _lexer_class_single()); _assign_at(@classification, 93, _lexer_class_backslash()); _assign_at(@classification, 94, _lexer_class_single()); _assign_at(@classification, 95, _lexer_class_single()); _assign_at(@classification, 96, _lexer_class_alpha()); _assign_at(@classification, 97, _lexer_class_other()); _assign_at(@classification, 98, _lexer_class_hex()); _assign_at(@classification, 99, _lexer_class_hex()); _assign_at(@classification, 100, _lexer_class_hex()); _assign_at(@classification, 101, _lexer_class_hex()); _assign_at(@classification, 102, _lexer_class_hex()); _assign_at(@classification, 103, _lexer_class_hex()); _assign_at(@classification, 104, _lexer_class_alpha()); _assign_at(@classification, 105, _lexer_class_alpha()); _assign_at(@classification, 106, _lexer_class_alpha()); _assign_at(@classification, 107, _lexer_class_alpha()); _assign_at(@classification, 108, _lexer_class_alpha()); _assign_at(@classification, 109, _lexer_class_alpha()); _assign_at(@classification, 110, _lexer_class_alpha()); _assign_at(@classification, 111, _lexer_class_alpha()); _assign_at(@classification, 112, _lexer_class_alpha()); _assign_at(@classification, 113, _lexer_class_alpha()); _assign_at(@classification, 114, _lexer_class_alpha()); _assign_at(@classification, 115, _lexer_class_alpha()); _assign_at(@classification, 116, _lexer_class_alpha()); _assign_at(@classification, 117, _lexer_class_alpha()); _assign_at(@classification, 118, _lexer_class_alpha()); _assign_at(@classification, 119, _lexer_class_alpha()); _assign_at(@classification, 120, _lexer_class_alpha()); _assign_at(@classification, 121, _lexer_class_x()); _assign_at(@classification, 122, _lexer_class_alpha()); _assign_at(@classification, 123, _lexer_class_alpha()); _assign_at(@classification, 124, _lexer_class_other()); _assign_at(@classification, 125, _lexer_class_single()); _assign_at(@classification, 126, _lexer_class_other()); _assign_at(@classification, 127, _lexer_class_single()); _assign_at(@classification, 128, _lexer_class_invalid()); code := 129; (* Set the remaining 129 - 256 bytes to transitionClassOther. *) .create_classification_loop; _assign_at(@classification, code, _lexer_class_other()); code := code + 1; if code < 257 then goto .create_classification_loop end end; proc _lexer_get_transition(current_state: Word, character_class: Word); var transition_table: Word; row_position: Word; column_position: Word; target: Word; begin (* Each state is 8 bytes long (2 words: action and next state). There are 22 character classes, so a transition row 8 * 22 = 176 bytes long. *) row_position := current_state + -1; row_position := row_position * 176; column_position := character_class + -1; column_position := column_position * 8; target := _lexer_get_transition_table() + row_position; return target + column_position end; (** * Parameters: * current_state - First index into transitions table. * character_class - Second index into transitions table. * action - Action to assign. * next_state - Next state to assign. *) proc _lexer_set_transition(current_state: Word, character_class: Word, action: Word, next_state: Word); var transition: Word; begin transition := _lexer_get_transition(current_state, character_class); _lexer_transition_set_action(transition, action); _lexer_transition_set_state(transition, next_state) end; (* Sets same action and state transition for all character classes in one transition row. *) (** * Parameters: * current_state - Current state (Transition state enumeration). * default_action - Default action (Callback). * next_state - Next state (Transition state enumeration). *) proc _lexer_default_transition(current_state: Word, default_action: Word, next_state: Word); begin _lexer_set_transition(current_state, _lexer_class_invalid(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_digit(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_alpha(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_space(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_colon(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_equals(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_left_paren(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_right_paren(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_asterisk(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_backslash(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_single(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_hex(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_zero(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_x(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_eof(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_dot(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_minus(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_single_quote(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_double_quote(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_greater(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_less(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_other(), default_action, next_state) end; (** * The transition table describes transitions from one state to another, given * a symbol (character class). * * The table has m rows and n columns, where m is the amount of states and n is * the amount of classes. So given the current state and a classified character * the table can be used to look up the next state. *) proc _lexer_transitions(); begin (* Start state. *) _lexer_set_transition(_lexer_state_start(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_start(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_decimal()); _lexer_set_transition(_lexer_state_start(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_identifier()); _lexer_set_transition(_lexer_state_start(), _lexer_class_space(), _lexer_action_skip(), _lexer_state_start()); _lexer_set_transition(_lexer_state_start(), _lexer_class_colon(), _lexer_action_accumulate(), _lexer_state_colon()); _lexer_set_transition(_lexer_state_start(), _lexer_class_equals(), _lexer_action_single(), _lexer_state_end()); _lexer_set_transition(_lexer_state_start(), _lexer_class_left_paren(), _lexer_action_accumulate(), _lexer_state_left_paren()); _lexer_set_transition(_lexer_state_start(), _lexer_class_right_paren(), _lexer_action_single(), _lexer_state_end()); _lexer_set_transition(_lexer_state_start(), _lexer_class_asterisk(), _lexer_action_single(), _lexer_state_end()); _lexer_set_transition(_lexer_state_start(), _lexer_class_backslash(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_start(), _lexer_class_single(), _lexer_action_single(), _lexer_state_end()); _lexer_set_transition(_lexer_state_start(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_identifier()); _lexer_set_transition(_lexer_state_start(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_leading_zero()); _lexer_set_transition(_lexer_state_start(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_identifier()); _lexer_set_transition(_lexer_state_start(), _lexer_class_eof(), _lexer_action_eof(), _lexer_state_end()); _lexer_set_transition(_lexer_state_start(), _lexer_class_dot(), _lexer_action_single(), _lexer_state_end()); _lexer_set_transition(_lexer_state_start(), _lexer_class_minus(), _lexer_action_accumulate(), _lexer_state_minus()); _lexer_set_transition(_lexer_state_start(), _lexer_class_single_quote(), _lexer_action_accumulate(), _lexer_state_character()); _lexer_set_transition(_lexer_state_start(), _lexer_class_double_quote(), _lexer_action_accumulate(), _lexer_state_string()); _lexer_set_transition(_lexer_state_start(), _lexer_class_greater(), _lexer_action_accumulate(), _lexer_state_greater()); _lexer_set_transition(_lexer_state_start(), _lexer_class_less(), _lexer_action_accumulate(), _lexer_state_less()); _lexer_set_transition(_lexer_state_start(), _lexer_class_other(), _lexer_action_none(), _lexer_state_end()); (* Colon state. *) _lexer_default_transition(_lexer_state_colon(), _lexer_action_finalize(), _lexer_state_end()); _lexer_set_transition(_lexer_state_colon(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end()); (* Identifier state. *) _lexer_default_transition(_lexer_state_identifier(), _lexer_action_key_id(), _lexer_state_end()); _lexer_set_transition(_lexer_state_identifier(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_identifier()); _lexer_set_transition(_lexer_state_identifier(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_identifier()); _lexer_set_transition(_lexer_state_identifier(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_identifier()); _lexer_set_transition(_lexer_state_identifier(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_identifier()); _lexer_set_transition(_lexer_state_identifier(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_identifier()); (* Decimal state. *) _lexer_default_transition(_lexer_state_decimal(), _lexer_action_integer(), _lexer_state_end()); _lexer_set_transition(_lexer_state_decimal(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_decimal()); _lexer_set_transition(_lexer_state_decimal(), _lexer_class_alpha(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_decimal(), _lexer_class_hex(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_decimal(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_decimal()); _lexer_set_transition(_lexer_state_decimal(), _lexer_class_x(), _lexer_action_none(), _lexer_state_end()); (* Leading zero. *) _lexer_default_transition(_lexer_state_leading_zero(), _lexer_action_integer(), _lexer_state_end()); _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_digit(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_alpha(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_hex(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_zero(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_x(), _lexer_action_none(), _lexer_state_dot()); (* Greater state. *) _lexer_default_transition(_lexer_state_greater(), _lexer_action_finalize(), _lexer_state_end()); _lexer_set_transition(_lexer_state_greater(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end()); (* Minus state. *) _lexer_default_transition(_lexer_state_minus(), _lexer_action_finalize(), _lexer_state_end()); _lexer_set_transition(_lexer_state_minus(), _lexer_class_greater(), _lexer_action_composite(), _lexer_state_end()); (* Left paren state. *) _lexer_default_transition(_lexer_state_left_paren(), _lexer_action_finalize(), _lexer_state_end()); _lexer_set_transition(_lexer_state_left_paren(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_comment()); (* Less state. *) _lexer_default_transition(_lexer_state_less(), _lexer_action_finalize(), _lexer_state_end()); _lexer_set_transition(_lexer_state_less(), _lexer_class_equals(), _lexer_action_composite(), _lexer_state_end()); _lexer_set_transition(_lexer_state_less(), _lexer_class_greater(), _lexer_action_composite(), _lexer_state_end()); (* Hexadecimal after 0x. *) _lexer_default_transition(_lexer_state_dot(), _lexer_action_finalize(), _lexer_state_end()); _lexer_set_transition(_lexer_state_dot(), _lexer_class_dot(), _lexer_action_composite(), _lexer_state_end()); (* Comment. *) _lexer_default_transition(_lexer_state_comment(), _lexer_action_accumulate(), _lexer_state_comment()); _lexer_set_transition(_lexer_state_comment(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_closing_comment()); _lexer_set_transition(_lexer_state_comment(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); (* Closing comment. *) _lexer_default_transition(_lexer_state_closing_comment(), _lexer_action_accumulate(), _lexer_state_comment()); _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_right_paren(), _lexer_action_delimited(), _lexer_state_end()); _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_asterisk(), _lexer_action_accumulate(), _lexer_state_closing_comment()); _lexer_set_transition(_lexer_state_closing_comment(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); (* Character. *) _lexer_default_transition(_lexer_state_character(), _lexer_action_accumulate(), _lexer_state_character()); _lexer_set_transition(_lexer_state_character(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_character(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_character(), _lexer_class_single_quote(), _lexer_action_delimited(), _lexer_state_end()); _lexer_set_transition(_lexer_state_character(), _lexer_class_backslash(), _lexer_action_accumulate(), _lexer_state_character_escape()); (* Escape sequence in a character. *) _lexer_default_transition(_lexer_state_character_escape(), _lexer_action_accumulate(), _lexer_state_character()); _lexer_set_transition(_lexer_state_character_escape(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_character_escape(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); (* String. *) _lexer_default_transition(_lexer_state_string(), _lexer_action_accumulate(), _lexer_state_string()); _lexer_set_transition(_lexer_state_string(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_string(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_string(), _lexer_class_double_quote(), _lexer_action_delimited(), _lexer_state_end()); _lexer_set_transition(_lexer_state_string(), _lexer_class_backslash(), _lexer_action_accumulate(), _lexer_state_string_escape()); (* Escape sequence in a string. *) _lexer_default_transition(_lexer_state_string_escape(), _lexer_action_accumulate(), _lexer_state_string()); _lexer_set_transition(_lexer_state_string_escape(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_string_escape(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()) end; (** * Transition table is saved after character classification table. * Each character entry is 1 word long and there are 256 characters. * 1024 = 256 * 4 *) proc _lexer_get_transition_table(); return @classification + 1024 end; (** * Lexer state is saved after the transition tables. * Each transition table entry is 8 bytes long. The table has 16 rows (transition states) * and 22 columns (character classes), so 2992 = 8 * 17 * 22. *) proc _lexer_global_state(); return _lexer_get_transition_table() + 2992 end; (** * Gets pointer to the token start. *) proc _lexer_global_get_start(); var target: Word; begin target := _lexer_global_state() + 4; return _load_word(target) end; (** * Sets pointer to the token start. *) proc _lexer_global_set_start(new_start: Word); var target: Word; begin target := _lexer_global_state() + 4; _store_word(new_start, target) end; (** * Gets pointer to the token end. *) proc _lexer_global_get_end(); var target: Word; begin target := _lexer_global_state() + 8; return _load_word(target) end; (** * Sets pointer to the token end. *) proc _lexer_global_set_end(new_start: Word); var target: Word; begin target := _lexer_global_state() + 8; _store_word(new_start, target) end; proc _lexer_transition_get_action(transition: Word); return _load_word(transition) end; proc _lexer_transition_set_action(transition: Word, action: Word); begin _store_word(action, transition) end; proc _lexer_transition_get_state(transition: Word); return _load_word(transition + 4) end; proc _lexer_transition_set_state(transition: Word, state: Word); begin _store_word(state, transition + 4) end; (** * Resets the lexer state for reading the next token. *) proc _lexer_reset(); var state: Word; begin (* Transition start state is 1. *) state := _lexer_global_state(); _store_word(_lexer_state_start(), state); state := _lexer_global_get_start(); _lexer_global_set_end(state) end; (** * One time lexer initialization. *) proc _lexer_initialize(); begin _lexer_classifications(); _lexer_transitions(); _lexer_global_set_start(@source_code); _lexer_global_set_end(@source_code) end; proc _lexer_next_transition(); var current_character: Word; character_class: Word; current_state: Word; begin current_character := _lexer_global_get_end(); current_character := _load_byte(current_character); character_class := _get_at(@classification, current_character + 1); current_state := _lexer_global_state(); current_state := _load_word(current_state); return _lexer_get_transition(current_state, character_class) end; proc _lexer_token_kind_identifier(); return 1 end; proc _lexer_token_kind_const(); return 2 end; proc _lexer_token_kind_var(); return 3 end; proc _lexer_token_kind_proc(); return 4 end; proc _lexer_token_kind_type(); return 5 end; proc _lexer_token_kind_begin(); return 6 end; proc _lexer_token_kind_end(); return 7 end; proc _lexer_token_kind_if(); return 8 end; proc _lexer_token_kind_then(); return 9 end; proc _lexer_token_kind_else(); return 10 end; proc _lexer_token_kind_elsif(); return 11 end; proc _lexer_token_kind_while(); return 12 end; proc _lexer_token_kind_do(); return 13 end; proc _lexer_token_kind_extern(); return 14 end; proc _lexer_token_kind_record(); return 15 end; proc _lexer_token_kind_union(); return 16 end; proc _lexer_token_kind_true(); return 17 end; proc _lexer_token_kind_false(); return 18 end; proc _lexer_token_kind_nil(); return 19 end; proc _lexer_token_kind_and(); return 20 end; proc _lexer_token_kind_or(); return 21 end; proc _lexer_token_kind_xor(); return 22 end; proc _lexer_token_kind_pipe(); return 23 end; proc _lexer_token_kind_not(); return 24 end; proc _lexer_token_kind_return(); return 24 end; proc _lexer_token_kind_module(); return 25 end; proc _lexer_token_kind_program(); return 26 end; proc _lexer_token_kind_import(); return 27 end; proc _lexer_token_kind_cast(); return 28 end; proc _lexer_token_kind_defer(); return 29 end; proc _lexer_token_kind_case(); return 30 end; proc _lexer_token_kind_of(); return 31 end; proc _lexer_token_kind_trait(); return 32 end; proc _lexer_token_kind_left_paren(); return 33 end; proc _lexer_token_kind_right_paren(); return 34 end; proc _lexer_token_kind_left_square(); return 35 end; proc _lexer_token_kind_right_square(); return 36 end; proc _lexer_token_kind_shift_left(); return 37 end; proc _lexer_token_kind_shift_right(); return 38 end; proc _lexer_token_kind_greater_equal(); return 39 end; proc _lexer_token_kind_less_equal(); return 40 end; proc _lexer_token_kind_greater_than(); return 41 end; proc _lexer_token_kind_less_than(); return 42 end; proc _lexer_token_kind_not_equal(); return 43 end; proc _lexer_token_kind_equals(); return 44 end; proc _lexer_token_kind_semicolon(); return 45 end; proc _lexer_token_kind_dot(); return 46 end; proc _lexer_token_kind_comma(); return 47 end; proc _lexer_token_kind_plus(); return 48 end; proc _lexer_token_kind_arrow(); return 49 end; proc _lexer_token_kind_minus(); return 50 end; proc _lexer_token_kind_multiplication(); return 51 end; proc _lexer_token_kind_division(); return 52 end; proc _lexer_token_kind_remainder(); return 53 end; proc _lexer_token_kind_assignment(); return 54 end; proc _lexer_token_kind_colon(); return 55 end; proc _lexer_token_kind_hat(); return 56 end; proc _lexer_token_kind_at(); return 57 end; proc _lexer_token_kind_comment(); return 58 end; proc _lexer_token_kind_string(); return 59 end; proc _lexer_token_kind_character(); return 60 end; proc _lexer_token_kind_integer(); return 61 end; proc _lexer_token_kind_word(); return 62 end; proc _lexer_token_kind_goto(); return 63 end; proc _lexer_token_kind_eof(); return 64 end; proc _lexer_compare_keyword(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word); var result: Word; begin result := 0; if lhs_length = rhs_length then result := _memcmp(lhs_pointer, rhs_pointer, lhs_length) = 0 end; return result end; proc _lexer_classify_keyword(position_start: Word, position_end: Word); var result: Word; token_length: Word; begin result := _lexer_token_kind_identifier(); token_length := position_end + -position_start; if _lexer_compare_keyword(position_start, token_length, "const", 5) = 1 then result := _lexer_token_kind_const() elsif _lexer_compare_keyword(position_start, token_length, "var", 3) = 1 then result := _lexer_token_kind_var() elsif _lexer_compare_keyword(position_start, token_length, "proc", 4) = 1 then result := _lexer_token_kind_proc() elsif _lexer_compare_keyword(position_start, token_length, "type", 4) = 1 then result := _lexer_token_kind_type() elsif _lexer_compare_keyword(position_start, token_length, "begin", 5) = 1 then result := _lexer_token_kind_begin() elsif _lexer_compare_keyword(position_start, token_length, "end", 3) = 1 then result := _lexer_token_kind_end() elsif _lexer_compare_keyword(position_start, token_length, "return", 6) = 1 then result := _lexer_token_kind_return() elsif _lexer_compare_keyword(position_start, token_length, "goto", 4) = 1 then result := _lexer_token_kind_goto() elsif _lexer_compare_keyword(position_start, token_length, "if", 2) = 1 then result := _lexer_token_kind_if() elsif _lexer_compare_keyword(position_start, token_length, "while", 5) = 1 then result := _lexer_token_kind_while() elsif _lexer_compare_keyword(position_start, token_length, "then", 4) = 1 then result := _lexer_token_kind_then() elsif _lexer_compare_keyword(position_start, token_length, "else", 4) = 1 then result := _lexer_token_kind_else() elsif _lexer_compare_keyword(position_start, token_length, "elsif", 5) = 1 then result := _lexer_token_kind_elsif() elsif _lexer_compare_keyword(position_start, token_length, "or", 2) = 1 then result := _lexer_token_kind_or() elsif _lexer_compare_keyword(position_start, token_length, "xor", 2) = 1 then result := _lexer_token_kind_xor() end; return result end; proc _lexer_classify_finalize(start_position: Word); var character: Word; result: Word; begin result := 0; character := _load_byte(start_position); if character = ':' then result := _lexer_token_kind_colon() elsif character = '.' then result := _lexer_token_kind_dot() elsif character = '(' then result := _lexer_token_kind_left_paren() elsif character = '-' then result := _lexer_token_kind_minus() elsif character = '<' then result := _lexer_token_kind_less_than() elsif character = '>' then result := _lexer_token_kind_greater_than() end; return result end; proc _lexer_classify_single(start_position: Word); var character: Word; result: Word; begin result := 0; character := _load_byte(start_position); if character = ';' then result := _lexer_token_kind_semicolon() elsif character = ',' then result := _lexer_token_kind_comma() elsif character = ')' then result := _lexer_token_kind_right_paren() elsif character = '@' then result := _lexer_token_kind_at() elsif character = '~' then result := _lexer_token_kind_not() elsif character = '&' then result := _lexer_token_kind_and() elsif character = '+' then result := _lexer_token_kind_plus() elsif character = '*' then result := _lexer_token_kind_multiplication() elsif character = '=' then result := _lexer_token_kind_equals() elsif character = '%' then result := _lexer_token_kind_remainder() elsif character = '/' then result := _lexer_token_kind_division() elsif character = '.' then result := _lexer_token_kind_dot() elsif character = '^' then result := _lexer_token_kind_hat() end; return result end; proc _lexer_classify_composite(start_position: Word, one_before_last: Word); var first_character: Word; last_character: Word; result: Word; begin first_character := _load_byte(start_position); last_character := _load_byte(one_before_last); if first_character = ':' then result := _lexer_token_kind_assignment() elsif first_character = '<' then if last_character = '=' then result := _lexer_token_kind_less_equal() elsif last_character = '>' then result := _lexer_token_kind_not_equal() end elsif first_character = '>' then if last_character = '=' then result := _lexer_token_kind_greater_equal() end end; return result end; proc _lexer_classify_delimited(start_position: Word, end_position: Word); var token_length: Word; delimiter: Word; result: Word; begin token_length := end_position + -start_position; delimiter := _load_byte(start_position); if delimiter = '(' then result := _lexer_token_kind_comment() elsif delimiter = '\'' then result := _lexer_token_kind_character() elsif delimiter = '"' then result := _lexer_token_kind_string() end; return result end; proc _lexer_classify_integer(start_position: Word, end_position: Word); begin return _lexer_token_kind_integer() end; proc _lexer_execute_action(action_to_perform: Word, kind: Word); var position_start: Word; position_end: Word; intermediate: Word; begin position_start := _lexer_global_get_start(); position_end := _lexer_global_get_end(); if action_to_perform = _lexer_action_none() then elsif action_to_perform = _lexer_action_accumulate() then _lexer_global_set_end(position_end + 1) elsif action_to_perform = _lexer_action_skip() then _lexer_global_set_start(position_start + 1); _lexer_global_set_end(position_end + 1) elsif action_to_perform = _lexer_action_single() then _lexer_global_set_end(position_end + 1); intermediate := _lexer_classify_single(position_start); _store_word(intermediate, kind) elsif action_to_perform = _lexer_action_eof() then intermediate := _lexer_token_kind_eof(); _store_word(intermediate, kind) elsif action_to_perform = _lexer_action_finalize() then intermediate := _lexer_classify_finalize(position_start); _store_word(intermediate, kind) elsif action_to_perform = _lexer_action_composite() then _lexer_global_set_end(position_end + 1); intermediate := _lexer_classify_composite(position_start, position_end); _store_word(intermediate, kind) elsif action_to_perform = _lexer_action_key_id() then intermediate := _lexer_classify_keyword(position_start, position_end); _store_word(intermediate, kind) elsif action_to_perform = _lexer_action_integer() then intermediate := _lexer_classify_integer(position_start, position_end); _store_word(intermediate, kind) elsif action_to_perform = _lexer_action_delimited() then _lexer_global_set_end(position_end + 1); intermediate := _lexer_classify_delimited(position_start, position_end + 1); _store_word(intermediate, kind) end; end; proc _lexer_execute_transition(kind: Word); var next_transition: Word; next_state: Word; global_state: Word; action_to_perform: Word; begin next_transition := _lexer_next_transition(); next_state := _lexer_transition_get_state(next_transition); action_to_perform := _lexer_transition_get_action(next_transition); global_state := _lexer_global_state(); _store_word(next_state, global_state); _lexer_execute_action(action_to_perform, kind); return next_state end; proc _lexer_advance_token(kind: Word); begin if _lexer_execute_transition(kind) <> _lexer_state_end() then _lexer_advance_token(kind) end end; (** * Reads the next token and writes its type into the address in the kind parameter. *) proc _lexer_read_token(kind: Word); begin _lexer_reset(); _lexer_advance_token(kind) end; (** * Advances the token stream past the last read token. *) proc _lexer_skip_token(); var old_end: Word; begin old_end := _lexer_global_get_end(); _lexer_global_set_start(old_end) end; (* * Entry point. *) proc _start(); var last_read: Word; offset: Word; begin _lexer_initialize(); _symbol_table_build(); (* Read the source from the standard input. *) offset := @source_code; .start_read; (* Second argument is buffer size. Modifying update the source_code definition. *) last_read := _read_file(offset, 81920); if last_read > 0 then offset := offset + last_read; goto .start_read end; _compile(); _exit(0) end;