From b20632245560b70f0605c0df1e56d766f22b4938 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Fri, 24 Oct 2025 16:07:57 +0200 Subject: [PATCH] Parse type declarations and variable part --- boot/stage14.elna | 14 +- boot/stage15.elna | 1074 ++++++++++++++++++++++++--------------------- 2 files changed, 594 insertions(+), 494 deletions(-) diff --git a/boot/stage14.elna b/boot/stage14.elna index e7e8811..5566518 100644 --- a/boot/stage14.elna +++ b/boot/stage14.elna @@ -1593,18 +1593,22 @@ begin _lexer_skip_token(); _lexer_read_token(@token_kind); _lexer_skip_token(); - _read_type_expression(); + _lexer_read_token(@token_kind); + name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -name; + _lexer_skip_token(); _lexer_read_token(@token_kind); - if token_kind <> _lexer_token_kind_assignment() then + if _lexer_compare_keyword("Array", 5, name, name_length) = 1 then (* Else we assume this is a zeroed 819200 bytes big array. *) _write_z(" .zero 819200\0") - else + elsif token_kind = _lexer_token_kind_assignment() then (* Skip the assignment sign with surrounding whitespaces. *) _lexer_skip_token(); - _compile_global_initializer(); - _lexer_read_token(@token_kind) + _compile_global_initializer() + else + _write_z(" .word 0\n\0") end; (* Skip semicolon and newline. *) diff --git a/boot/stage15.elna b/boot/stage15.elna index d85d95e..d317c5c 100644 --- a/boot/stage15.elna +++ b/boot/stage15.elna @@ -144,40 +144,14 @@ type assign_statement, if_statement, procedure_declaration, - variable_declaration + variable_declaration, + enumeration_type_expression, + named_type_expression, + type_declaration, + module_declaration ); - -const - symbol_builtin_name_int := "Int"; - symbol_builtin_name_word := "Word"; - symbol_builtin_name_pointer := "Pointer"; - symbol_builtin_name_char := "Char"; - symbol_builtin_name_array := "Array"; - - (* Every type info starts with a word describing what type it is. - - PRIMITIVE_TYPE = 1 - ENUMERATION_TYPE = 2 - - Primitive types have only type size. *) - symbol_builtin_type_int := S(1, 4); - symbol_builtin_type_word := S(1, 4); - symbol_builtin_type_pointer := S(1, 4); - symbol_builtin_type_char := S(1, 1); - symbol_builtin_type_array := S(1, 4); - - (* Info objects start with a word describing its type. - - TYPE_INFO = 1 - PARAMETER_INFO = 2 - TEMPORARY_INFO = 3 - - Type info has the type it belongs to. *) - symbol_type_info_int := S(1, @symbol_builtin_type_int); - symbol_type_info_word := S(1, @symbol_builtin_type_word); - symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); - symbol_type_info_char := S(1, @symbol_builtin_type_char); - symbol_type_info_array := S(1, @symbol_builtin_type_array); + InfoKind = (type_info, parameter_info, temporary_info); + TypeKind = (primitive, enumeration); var source_code: Array; @@ -186,15 +160,15 @@ var symbol_table_local: Array; classification: Array; - (* To reserve memory just add the value of needed bytes to the memory_free_pointer_variable. *) + (* To reserve memory just add the value of needed bytes to the memory_free_pointer variable. *) memory: Array; - compiler_strings_position: Pointer := @compiler_strings; - compiler_strings_length: Word := 0; - label_counter: Word := 0; + compiler_strings_position: Word; + compiler_strings_length: Word; + label_counter: Word; (* Points to a segment of free memory. *) - memory_free_pointer: Word := @memory; + memory_free_pointer: Word; (** * Calculates and returns the string token length between quotes, including the @@ -535,21 +509,27 @@ proc _integer_literal_node_size(); end; proc _integer_literal_node_get_value(this: Word); - return _load_word(this + 4) +begin + this := this + 4; + return this^ end; proc _integer_literal_node_set_value(this: Word, value: Word); begin - _store_word(value, this + 4) + this := this + 4; + this^ := value end; proc _integer_literal_node_get_length(this: Word); - return _load_word(this + 8) +begin + this := this + 8; + return this^ end; proc _integer_literal_node_set_length(this: Word, value: Word); begin - _store_word(value, this + 8) + this := this + 8; + this^ := value end; proc _parse_integer_literal(); @@ -1530,7 +1510,8 @@ begin .compile_enumeration_value_members; if members_length > 0 then member_name := members^; - member_length := _load_word(members + 4); + member_length := members + 4; + member_length := member_length^; if _lexer_compare_keyword(value_name, name_length, member_name, member_length) then else @@ -2077,6 +2058,115 @@ begin this^ := value end; +proc _enumeration_type_expression_size(); + return 12 +end; + +proc _enumeration_type_expression_get_members(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _enumeration_type_expression_set_members(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _enumeration_type_expression_get_length(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _enumeration_type_expression_set_length(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _named_type_expression_size(); + return 12 +end; + +proc _named_type_expression_get_name(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _named_type_expression_set_name(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _named_type_expression_get_length(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _named_type_expression_set_length(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _parse_enumeration_type_expression(); +var + token_kind: Word; + enumeration_name: Word; + name_length: Word; + memory_start: Word; + member_count: Word; + result: Word; + type_expression_size: Word; +begin + _lexer_skip_token(); + memory_start := memory_free_pointer; + member_count := 0; + + _lexer_read_token(@token_kind); + if token_kind = LexerTokenKind.right_paren then + goto parse_enumeration_type_expression_end + end; + .parse_enumeration_type_expression_loop; + member_count := member_count + 1; + + enumeration_name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() - enumeration_name; + + memory_free_pointer^ := enumeration_name; + memory_free_pointer := memory_free_pointer + 4; + + memory_free_pointer^ := name_length; + memory_free_pointer := memory_free_pointer + 4; + + (* Skip the identifier. *) + _lexer_skip_token(); + + _lexer_read_token(@token_kind); + if token_kind = LexerTokenKind.comma then + _lexer_skip_token(); + _lexer_read_token(@token_kind); + goto parse_enumeration_type_expression_loop + end; + + .parse_enumeration_type_expression_end; + _lexer_skip_token(); + + type_expression_size := _enumeration_type_expression_size(); + result := _allocate(type_expression_size); + + _node_set_kind(result, NodeKind.enumeration_type_expression); + _enumeration_type_expression_set_members(result, memory_start); + _enumeration_type_expression_set_length(result, member_count); + + return result +end; + (** * Reads and creates enumeration type representation. * @@ -2089,55 +2179,19 @@ end; * * Returns enumeration type description. *) -proc _read_type_enumeration(); +proc _read_type_enumeration(parser_node: Word); var - token_kind: Word; - enumeration_name: Word; - name_length: Word; + result: Word; memory_start: Word; member_count: Word; - result: Word; begin - _lexer_skip_token(); - memory_start := memory_free_pointer; - member_count := 0; - - _lexer_read_token(@token_kind); - if token_kind = LexerTokenKind.right_paren then - goto read_type_enumeration_end - end; - .read_type_enumeration_loop; - member_count := member_count + 1; - - enumeration_name := _lexer_global_get_start(); - name_length := _lexer_global_get_end(); - name_length := name_length - enumeration_name; - - _store_word(enumeration_name, memory_free_pointer); - memory_free_pointer := memory_free_pointer + 4; - - _store_word(name_length, memory_free_pointer); - memory_free_pointer := memory_free_pointer + 4; - - (* Skip the identifier. *) - _lexer_skip_token(); - - _lexer_read_token(@token_kind); - if token_kind = LexerTokenKind.comma then - _lexer_skip_token(); - _lexer_read_token(@token_kind); - goto read_type_enumeration_loop - end; - - .read_type_enumeration_end; - _lexer_skip_token(); - (* The resulting structure is 16 bytes long. *) - result := memory_free_pointer; - memory_free_pointer := memory_free_pointer + 16; + result := _allocate(16); - (* ENUMERATION_TYPE is 2. *) - _type_set_kind(result, 2); + memory_start := _enumeration_type_expression_get_members(parser_node); + member_count := _enumeration_type_expression_get_length(parser_node); + + _type_set_kind(result, TypeKind.enumeration); _type_set_size(result, 4); _enumeration_type_set_members(result, memory_start); _enumeration_type_set_length(result, member_count); @@ -2145,34 +2199,68 @@ begin return _type_info_create(result) end; -proc _read_type_expression(); +proc _parse_named_type_expression(); var - token_kind: Word; + type_expression_size: Word; + result: Word; type_name: Word; name_length: Word; +begin + type_expression_size := _named_type_expression_size(); + result := _allocate(type_expression_size); + + _node_set_kind(result, NodeKind.named_type_expression); + type_name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() - type_name; + _named_type_expression_set_name(result, type_name); + _named_type_expression_set_length(result, name_length); + _lexer_skip_token(); + + return result +end; + +proc _parse_type_expression(); +var + token_kind: Word; result: Word; begin result := 0; _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.identifier then - (* Named type. *) - type_name := _lexer_global_get_start(); - name_length := _lexer_global_get_end(); - name_length := name_length - type_name; - result := _symbol_table_lookup(@symbol_table_global, type_name, name_length); - result := _type_info_get_type(result); - - _lexer_skip_token() + result := _parse_named_type_expression() elsif token_kind = LexerTokenKind.left_paren then - result := _read_type_enumeration() + result := _parse_enumeration_type_expression() + end; + return result +end; + +proc _read_type_expression(parser_node: Word); +var + token_kind: Word; + type_name: Word; + name_length: Word; + result: Word; +begin + token_kind := _node_get_kind(parser_node); + + if token_kind = NodeKind.named_type_expression then + type_name := _named_type_expression_get_name(parser_node); + name_length := _named_type_expression_get_length(parser_node); + + result := _symbol_table_lookup(@symbol_table_global, type_name, name_length); + result := _type_info_get_type(result) + elsif token_kind = NodeKind.enumeration_type_expression then + result := _read_type_enumeration(parser_node) end; return result end; proc _type_info_get_type(this: Word); - return _load_word(this + 4) +begin + this := this + 4; + return this^ end; (** @@ -2187,14 +2275,13 @@ var begin result := memory_free_pointer; current_word := result; - (* 2 is INFO_PARAMETER *) - _store_word(2, current_word); + current_word^ := InfoKind.parameter_info; current_word := current_word + 4; (* Calculate the stack offset: 88 - (4 * parameter_counter) *) offset := parameter_index * 4; - _store_word(88 - offset, current_word); + current_word^ := 88 - offset; memory_free_pointer := current_word + 4; @@ -2214,11 +2301,10 @@ var begin result := memory_free_pointer; current_word := result; - (* 1 is INFO_TYPE *) - _store_word(1, current_word); + current_word^ := InfoKind.type_info; current_word := current_word + 4; - _store_word(type_representation, current_word); + current_word^ := type_representation; memory_free_pointer := current_word + 4; @@ -2237,48 +2323,29 @@ var begin result := memory_free_pointer; current_word := result; - (* 3 is INFO_TEMPORARY *) - _store_word(3, current_word); - + current_word^ := InfoKind.temporary_info; current_word := current_word + 4; (* Calculate the stack offset: 4 * variable_counter. *) - _store_word(temporary_index * 4, current_word); + current_word^ := temporary_index * 4; memory_free_pointer := current_word + 4; return result end; -proc _temporary_info_get_offset(this: Word); -begin - this := this + 4; - return this^ -end; - (** * Parameters: * parameter_index - Parameter index. *) -proc _read_procedure_parameter(parameter_index: Word); +proc _read_procedure_parameter(parser_node: Word, parameter_index: Word); var name_length: Word; info: Word; name_position: Word; - token_kind: Word; begin - (* Read the parameter name. *) - _lexer_read_token(@token_kind); - name_position := _lexer_global_get_start(); - name_length := _lexer_global_get_end(); - name_length := name_length - name_position; - _lexer_skip_token(); - - (* Skip colon and space in front of the type expression. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - - _read_type_expression(); + name_position := _declaration_get_name(parser_node); + name_length := _declaration_get_length(parser_node); _write_z("\tsw a\0"); _write_i(parameter_index); @@ -2293,85 +2360,40 @@ begin _write_z("(sp)\n\0") end; -proc _read_procedure_parameters(); -var - parameter_counter: Word; - token_kind: Word; -begin - (* Skip open paren. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - parameter_counter := 0; - - .compile_procedure_prologue_skip; - _lexer_read_token(@token_kind); - - if token_kind <> LexerTokenKind.right_paren then - _read_procedure_parameter(parameter_counter); - parameter_counter := parameter_counter + 1; - _lexer_read_token(@token_kind); - - if token_kind = LexerTokenKind.comma then - _lexer_skip_token(); - goto compile_procedure_prologue_skip - end - end; - (* Skip close paren. *) - _lexer_skip_token() -end; - (** * Parameters: * variable_index - Variable index. *) -proc _read_procedure_temporary(variable_index: Word); +proc _read_procedure_temporary(parser_node: Word, variable_index: Word); var name_length: Word; info: Word; name_position: Word; - token_kind: Word; begin - _lexer_read_token(@token_kind); - name_position := _lexer_global_get_start(); - name_length := _lexer_global_get_end(); - name_length := name_length - name_position; - _lexer_skip_token(); - - (* Read and skip variable name, colon and the space *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - - _read_type_expression(); + name_position := _declaration_get_name(parser_node); + name_length := _declaration_get_length(parser_node); info := _temporary_info_create(variable_index); - _symbol_table_enter(@symbol_table_local, name_position, name_length, info); - - (* Skip semicolon and newline after the variable declaration *) - _lexer_read_token(@token_kind); - _lexer_skip_token() + _symbol_table_enter(@symbol_table_local, name_position, name_length, info) end; -proc _read_procedure_temporaries(); +proc _read_procedure_temporaries(parser_node: Word); var temporary_counter: Word; - token_kind: Word; begin - _lexer_read_token(@token_kind); + temporary_counter := 0; - if token_kind = LexerTokenKind._var then - _lexer_skip_token(); - temporary_counter := 0; + .read_procedure_temporaries_loop; + if parser_node = 0 then + goto read_procedure_temporaries_end + end; + _read_procedure_temporary(parser_node, temporary_counter); - .read_local_variables_loop; - _lexer_read_token(@token_kind); + temporary_counter := temporary_counter + 1; + parser_node := _declaration_get_next(parser_node); + goto read_procedure_temporaries_loop; - if token_kind = LexerTokenKind.identifier then - _read_procedure_temporary(temporary_counter); - - temporary_counter := temporary_counter + 1; - goto read_local_variables_loop - end - end + .read_procedure_temporaries_end end; proc _declaration_get_next(this: Word); @@ -2410,9 +2432,9 @@ begin this^ := value end; -(* Kind + next declaration pointer + 7 * 4 arguments + procedure name + statement list pointer + temporary list pointer. *) +(* Kind + next declaration pointer + argument list + procedure name + statement list pointer + temporary list pointer. *) proc _procedure_declaration_size(); - return 108 + return 28 end; proc _procedure_declaration_get_body(this: Word); @@ -2439,27 +2461,16 @@ begin this^ := value end; -proc _procedure_declaration_get_parameter(this: Word, n: Word, field: Word); +proc _procedure_declaration_get_parameters(this: Word); begin - field := field * 4; - n := n * 12; - this := this + 12; - this := this + n; - this := this + field; + this := this + 24; return this^ end; -proc _procedure_declaration_set_parameter(this: Word, n: Word, name: Word, length: Word, type_expression: Word); +proc _procedure_declaration_set_parameters(this: Word, value: Word); begin - n := n * 12; this := this + 24; - this := this + n; - this := this - 4; - this^ := type_expression; - this := this - 4; - this^ := length; - this := this - 4; - this^ := name + this^ := value end; proc _parse_procedure_declaration(); @@ -2469,7 +2480,7 @@ var token_kind: Word; result: Word; declaration_size: Word; - parameter_counter: Word; + parameter_head: Word; begin declaration_size := _procedure_declaration_size(); result := _allocate(declaration_size); @@ -2492,64 +2503,67 @@ begin (* Skip open paren. *) _lexer_read_token(@token_kind); _lexer_skip_token(); - parameter_counter := 0; + parameter_head := 0; - _lexer_read_token(@token_kind); - if token_kind = LexerTokenKind.right_paren then - goto parse_procedure_declaration_parameters - end; .parse_procedure_declaration_parameter; - - parameter_counter := parameter_counter + 1; - name_pointer := _lexer_global_get_start(); - name_length := _lexer_global_get_end() - name_pointer; - - _lexer_skip_token(); - (* Skip colon in front of the type expression and the type itself. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - _lexer_read_token(@token_kind); - _lexer_skip_token(); - - _procedure_declaration_set_parameter(result, parameter_counter, name_pointer, name_length, 0); _lexer_read_token(@token_kind); - if token_kind = LexerTokenKind.comma then - goto parse_procedure_declaration_parameter + if token_kind <> LexerTokenKind.right_paren then + name_pointer := _parse_variable_declaration(); + if parameter_head = 0 then + parameter_head := name_pointer + else + _declaration_set_next(name_length, name_pointer) + end; + name_length := name_pointer; + + _lexer_read_token(@token_kind); + + if token_kind = LexerTokenKind.comma then + _lexer_skip_token(); + goto parse_procedure_declaration_parameter + end end; - - .parse_procedure_declaration_parameters; - (* Skip right paren and semicolon. *) + (* Skip close paren. *) _lexer_skip_token(); + _procedure_declaration_set_parameters(result, parameter_head); + + (* Skip semicolon and newline. *) _lexer_read_token(@token_kind); _lexer_skip_token(); + parameter_head := _parse_var_part(); + _procedure_declaration_set_temporaries(result, parameter_head); + + (* Skip semicolon, "begin" and newline. *) _lexer_read_token(@token_kind); + if token_kind = LexerTokenKind._begin then + _lexer_skip_token(); + parameter_head := _parse_statements() + elsif token_kind = LexerTokenKind._return then + parameter_head := _parse_return_statement() + end; + _procedure_declaration_set_body(result, parameter_head); + + (* Skip the "end" keyword. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); return result end; -proc _compile_procedure_declaration(); -begin -end; - -proc _compile_procedure(); +proc _compile_procedure_declaration(parser_node: Word); var name_pointer: Word; name_length: Word; - token_kind: Word; - parser_node: Word; + parameter_counter: Word; + current_parameter: Word; begin - (* Skip "proc ". *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - (* Clear local symbol table. *) symbol_table_local := 0; - _lexer_read_token(@token_kind); - name_pointer := _lexer_global_get_start(); - name_length := _lexer_global_get_end() - name_pointer; + name_pointer := _declaration_get_name(parser_node); + name_length := _declaration_get_length(parser_node); (* Write .type _procedure_name, @function. *) _write_z(".type \0"); @@ -2561,35 +2575,77 @@ begin _write_s(name_pointer, name_length); _write_z(":\n\0"); - (* Skip procedure name. *) - _lexer_skip_token(); + (* Write the prologue. *) _write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0"); - _read_procedure_parameters(); - (* Skip semicolon and newline. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - _read_procedure_temporaries(); - - (* Skip semicolon, "begin" and newline. *) - _lexer_read_token(@token_kind); - if token_kind = LexerTokenKind._begin then - _lexer_skip_token(); - parser_node := _parse_statements(); - _compile_statements(parser_node) - elsif token_kind = LexerTokenKind._return then - parser_node := _parse_return_statement(parser_node); - _compile_return_statement(parser_node) + current_parameter := _procedure_declaration_get_parameters(parser_node); + parameter_counter := 0; + .compile_procedure_declaration_parameter; + if current_parameter = 0 then + goto compile_procedure_declaration_end end; + _read_procedure_parameter(current_parameter, parameter_counter); + parameter_counter := parameter_counter + 1; + + current_parameter := _declaration_get_next(current_parameter); + goto compile_procedure_declaration_parameter; + + .compile_procedure_declaration_end; + + current_parameter := _procedure_declaration_get_temporaries(parser_node); + _read_procedure_temporaries(current_parameter); + + current_parameter := _procedure_declaration_get_body(parser_node); + _compile_statements(current_parameter); (* Write the epilogue. *) - _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); + _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0") +end; - (* Skip the "end" keyword, semicolon and newline. *) +proc _parse_procedures(); +var + parser_node: Word; + result: Word; + current_declaration: Word; + token_kind: Word; +begin + result := 0; + + .parse_procedures_loop; + _skip_empty_lines(); _lexer_read_token(@token_kind); - _lexer_skip_token(); - _lexer_read_token(@token_kind); - _lexer_skip_token() + + if token_kind = LexerTokenKind._proc then + parser_node := _parse_procedure_declaration(); + if result = 0 then + result := parser_node + else + _declaration_set_next(current_declaration, parser_node) + end; + current_declaration := parser_node; + + (* Skip semicolon. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + goto parse_procedures_loop + end; + return result +end; + +proc _compile_procedures(parser_node: Word); +var + result: Word; +begin + .compile_procedures_loop; + if parser_node = 0 then + goto compile_procedures_end + end; + _compile_procedure_declaration(parser_node); + parser_node := _declaration_get_next(parser_node); + goto compile_procedures_loop; + + .compile_procedures_end end; (** @@ -2609,185 +2665,119 @@ begin end end; -(** - * Compile global variable initializer. - *) -proc _compile_global_initializer(); -var - current_byte: Word; - length: Word; - token_kind: Word; - token_start: Word; -begin - _lexer_read_token(@token_kind); - token_start := _lexer_global_get_start(); - current_byte := _load_byte(token_start); - if token_kind = LexerTokenKind.string then - _write_z("\n\t.word strings + \0"); - length := _string_length(token_start); - - _add_string(token_start); - _write_i(); - - (* Skip the quoted string. *) - _lexer_skip_token(); - - goto compile_global_initializer_end - elsif current_byte = 'S' then - (* Skip "S(". *) - _lexer_skip_token(); - _lexer_read_token(@token_kind); - _lexer_skip_token(); - _lexer_read_token(@token_kind); - - if token_kind = LexerTokenKind.right_paren then - goto compile_global_initializer_closing - end; - goto compile_global_initializer_loop - elsif token_kind = LexerTokenKind.at then - (* Skip @. *) - _lexer_skip_token(); - _write_z("\n\t.word \0"); - _lexer_read_token(@token_kind); - - token_start := _lexer_global_get_start(); - length := _lexer_global_get_end(); - length := length - token_start; - _write_s(token_start, length); - - _lexer_skip_token(); - goto compile_global_initializer_end - elsif token_kind = LexerTokenKind.integer then - _write_z("\n\t.word \0"); - - length := _lexer_global_get_end(); - length := length - token_start; - _write_s(token_start, length); - _lexer_skip_token(); - - goto compile_global_initializer_end - end; - - .compile_global_initializer_loop; - _compile_global_initializer(); - - _lexer_read_token(@token_kind); - if token_kind <> LexerTokenKind.right_paren then - (* Skip comma and whitespace after it. *) - _lexer_skip_token(); - - goto compile_global_initializer_loop - end; - - .compile_global_initializer_closing; - (* Skip ")" *) - _lexer_skip_token(); - - .compile_global_initializer_end +proc _type_declaration_size(); + return 20 end; -proc _compile_constant_declaration(); -var - name: Word; - name_length: Word; - token_kind: Word; +proc _type_declaration_get_type(this: Word); begin - name := _lexer_global_get_start(); - name_length := _lexer_global_get_end(); - name_length := name_length - name; - - _write_z(".type \0"); - _write_s(name, name_length); - _write_z(", @object\n\0"); - - _write_s(name, name_length); - _write_c(':'); - - (* Skip the constant name with assignment sign and surrounding whitespaces. *) - _lexer_skip_token(); - _lexer_read_token(@token_kind); - _lexer_skip_token(); - _compile_global_initializer(); - - (* Skip semicolon and newline. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); - _write_c('\n') + this := this + 16; + return this^ end; -proc _compile_type_declaration(); +proc _type_declaration_set_type(this: Word, value: Word); +begin + this := this + 16; + this^ := value +end; + +proc _parse_type_declaration(); var token_kind: Word; type_name: Word; name_length: Word; - type_info: Word; + parser_node: Word; + result: Word; + declaration_size: Word; begin + _lexer_read_token(@token_kind); type_name := _lexer_global_get_start(); - name_length := _lexer_global_get_end(); - name_length := name_length - type_name; + name_length := _lexer_global_get_end() - type_name; _lexer_skip_token(); _lexer_read_token(@token_kind); _lexer_skip_token(); - type_info := _read_type_expression(); - _symbol_table_enter(@symbol_table_global, type_name, name_length, type_info); + parser_node := _parse_type_expression(); + declaration_size := _type_declaration_size(); + result := _allocate(declaration_size); + + _node_set_kind(result, NodeKind.type_declaration); + _declaration_set_next(result, 0); + _declaration_set_name(result, type_name); + _declaration_set_length(result, name_length); + _type_declaration_set_type(result, parser_node); _lexer_read_token(@token_kind); - _lexer_skip_token() + _lexer_skip_token(); + + return result end; -proc _compile_type_part(); +proc _read_type_declaration(parser_node: Word); +var + type_name: Word; + name_length: Word; + type_info: Word; +begin + type_name := _declaration_get_name(parser_node); + name_length := _declaration_get_length(parser_node); + + parser_node := _type_declaration_get_type(parser_node); + type_info := _read_type_expression(parser_node); + + _symbol_table_enter(@symbol_table_global, type_name, name_length, type_info) +end; + +proc _parse_type_part(); var token_kind: Word; + parser_node: Word; + result: Word; + current_declaration: Word; begin + result := 0; _skip_empty_lines(); _lexer_read_token(@token_kind); if token_kind <> LexerTokenKind._type then - goto compile_type_part_end + goto parse_type_part_end end; _lexer_skip_token(); - .compile_type_part_loop; + .parse_type_part_loop; _skip_empty_lines(); _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.identifier then - _compile_type_declaration(); - goto compile_type_part_loop + parser_node := _parse_type_declaration(); + + if result = 0 then + result := parser_node + else + _declaration_set_next(current_declaration, parser_node) + end; + current_declaration := parser_node; + goto parse_type_part_loop end; - .compile_type_part_end + .parse_type_part_end; + return result end; -proc _compile_const_part(); -var - token_kind: Word; +proc _read_type_part(parser_node: Word); begin - _skip_empty_lines(); - _lexer_read_token(@token_kind); - - if token_kind <> LexerTokenKind._const then - goto compile_const_part_end - end; - (* Skip "const" with the newline after it. *) - _lexer_skip_token(); - _write_z(".section .rodata # Compiled from const section.\n\n\0"); - - .compile_const_part_loop; - _skip_empty_lines(); - - (* If the character at the line beginning is not indentation, - it is probably the next code section. *) - _lexer_read_token(@token_kind); - if token_kind = LexerTokenKind.identifier then - _compile_constant_declaration(); - goto compile_const_part_loop + .read_type_part_loop; + if parser_node = 0 then + goto read_type_part_end end; - .compile_const_part_end + _read_type_declaration(parser_node); + parser_node := _declaration_get_next(parser_node); + + goto read_type_part_loop; + .read_type_part_end end; proc _variable_declaration_size(); @@ -2811,6 +2801,7 @@ var token_kind: Word; name: Word; name_length: Word; + variable_type: Word; result: Word; declaration_size: Word; begin @@ -2823,8 +2814,7 @@ begin _lexer_skip_token(); _lexer_read_token(@token_kind); _lexer_skip_token(); - _lexer_read_token(@token_kind); - _lexer_skip_token(); + variable_type := _parse_type_expression(); declaration_size := _variable_declaration_size(); result := _allocate(declaration_size); @@ -2833,21 +2823,21 @@ begin _declaration_set_next(result, 0); _declaration_set_name(result, name); _declaration_set_length(result, name_length); - _variable_declaration_set_type(result, 0); + _variable_declaration_set_type(result, variable_type); return result end; -proc _compile_variable_declaration(); +proc _compile_variable_declaration(parser_tree: Word); var name: Word; name_length: Word; token_kind: Word; - parser_tree: Word; + variable_type: Word; begin - parser_tree := _parse_variable_declaration(); name := _declaration_get_name(parser_tree); name_length := _declaration_get_length(parser_tree); + variable_type := _variable_declaration_get_type(parser_tree); _write_z(".type \0"); _write_s(name, name_length); @@ -2857,85 +2847,164 @@ begin _write_c(':'); _lexer_read_token(@token_kind); + name := _named_type_expression_get_name(variable_type); + name_length := _named_type_expression_get_length(variable_type); - if token_kind <> LexerTokenKind.assignment then - (* Else we assume this is a zeroed 819200 bytes big array. *) - _write_z(" .zero 819200\0") + if _lexer_compare_keyword("Array", 5, name, name_length) then + (* Else we assume this is a zeroed 409600 bytes big array. *) + _write_z(" .zero 409600\0") else - (* Skip the assignment sign with surrounding whitespaces. *) - _lexer_skip_token(); - _compile_global_initializer(); - _lexer_read_token(@token_kind) + _write_z(" .word 0\n\0") end; - - (* Skip semicolon and newline. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); _write_c('\n') end; -proc _compile_var_part(); +proc _parse_var_part(); var + result: Word; token_kind: Word; + variable_node: Word; + current_declaration: Word; begin + result := 0; _lexer_read_token(@token_kind); if token_kind <> LexerTokenKind._var then - goto compile_var_part_end + goto parse_var_part_end end; - (* Skip "var" and newline. *) + (* Skip "var". *) _lexer_skip_token(); - _write_z(".section .data\n\0"); - .compile_var_part_loop; + .parse_var_part_loop; _skip_empty_lines(); _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.identifier then - _compile_variable_declaration(); + variable_node := _parse_variable_declaration(); + + (* Skip semicolon. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); + + if result = 0 then + result := variable_node + else + _declaration_set_next(current_declaration, variable_node) + end; + current_declaration := variable_node; + goto parse_var_part_loop + end; + + .parse_var_part_end; + return result +end; + +proc _compile_var_part(parser_node: Word); +begin + if parser_node = 0 then + goto compile_var_part_end + end; + _write_z(".section .data\n\0"); + + .compile_var_part_loop; + _compile_variable_declaration(parser_node); + + parser_node := _declaration_get_next(parser_node); + if parser_node <> 0 then goto compile_var_part_loop end; .compile_var_part_end end; +proc _module_declaration_size(); + return 16 +end; + +proc _module_declaration_get_types(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _module_declaration_set_types(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _module_declaration_get_globals(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _module_declaration_set_globals(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _module_declaration_get_procedures(this: Word); +begin + this := this + 12; + return this^ +end; + +proc _module_declaration_set_procedures(this: Word, value: Word); +begin + this := this + 12; + this^ := value +end; + +proc _parse_module_declaration(); +var + parser_node: Word; + declaration_size: Word; + result: Word; +begin + declaration_size := _module_declaration_size(); + result := _allocate(declaration_size); + + _node_set_kind(result, NodeKind.module_declaration); + + parser_node := _parse_type_part(); + _module_declaration_set_types(result, parser_node); + + parser_node := _parse_var_part(); + _module_declaration_set_globals(result, parser_node); + + parser_node := _parse_procedures(); + _module_declaration_set_procedures(result, parser_node); + + return result +end; + (** * Process the source code and print the generated code. *) -proc _compile_module(); -var - token_kind: Word; -begin - _compile_type_part(); - _compile_const_part(); - _skip_empty_lines(); - _compile_var_part(); - - _write_z(".section .text\n\n\0"); - _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); - _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); - _write_z(".type _load_word, @function\n_load_word:\n\tlw a0, (a0)\nret\n\n\0"); - _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); - _write_z(".type _store_word, @function\n_store_word:\n\tsw a0, (a1)\nret\n\n\0"); - - .compile_module_loop; - _skip_empty_lines(); - _lexer_read_token(@token_kind); - - if token_kind = LexerTokenKind._proc then - _compile_procedure(); - goto compile_module_loop - end -end; - -proc _compile(); +proc _compile_module_declaration(parser_node: Word); var + current_part: Word; compiler_strings_copy: Word; compiler_strings_end: Word; current_byte: Word; begin _write_z(".globl _start\n\n\0"); - _compile_module(); + + current_part := _module_declaration_get_types(parser_node); + _read_type_part(current_part); + + current_part := _module_declaration_get_globals(parser_node); + _compile_var_part(current_part); + + _write_z(".section .text\n\n\0"); + _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); + _write_z(".type _load_byte, @function\n_load_byte:\n\tlb a0, (a0)\nret\n\n\0"); + _write_z(".type _store_byte, @function\n_store_byte:\n\tsb a0, (a1)\nret\n\n\0"); + + current_part := _module_declaration_get_procedures(parser_node); + _compile_procedures(current_part); _write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0"); _write_c('"'); @@ -2943,27 +3012,35 @@ begin compiler_strings_copy := @compiler_strings; compiler_strings_end := compiler_strings_position; - .compile_loop; + .compile_module_declaration_loop; if compiler_strings_copy < compiler_strings_end then current_byte := _load_byte(compiler_strings_copy); compiler_strings_copy := compiler_strings_copy + 1; _write_c(current_byte); - goto compile_loop + goto compile_module_declaration_loop end; _write_c('"'); _write_c('\n') end; +proc _compile(); +var + parser_node: Word; +begin + parser_node := _parse_module_declaration(); + _compile_module_declaration(parser_node) +end; + (** * Terminates the program. a0 contains the return code. * * Parameters: * a0 - Status code. *) -proc _exit(); +proc _exit(status: Word); begin - _syscall(0, 0, 0, 0, 0, 0, 93) + _syscall(status, 0, 0, 0, 0, 0, 93) end; (** @@ -2998,7 +3075,8 @@ begin (* Symbol name pointer and length. *) current_name := symbol_table^; - current_length := _load_word(symbol_table + 4); + current_length := symbol_table + 4; + current_length := current_length^; (* If lengths don't match, exit and return nil. *) if name_length <> current_length then @@ -3009,7 +3087,8 @@ begin goto symbol_table_lookup_repeat end; (* Otherwise, the symbol is found. *) - result := _load_word(symbol_table + 8); + result := symbol_table + 8; + result := result^; goto symbol_table_lookup_end; .symbol_table_lookup_repeat; @@ -3043,28 +3122,35 @@ begin symbol_pointer := symbol_pointer + 4; symbol_pointer := symbol_table + symbol_pointer; - _store_word(symbol_name, symbol_pointer); + symbol_pointer^ := symbol_name; symbol_pointer := symbol_pointer + 4; - _store_word(name_length, symbol_pointer); + symbol_pointer^ := name_length; symbol_pointer := symbol_pointer + 4; - _store_word(symbol, symbol_pointer); + symbol_pointer^ := symbol; (* Increment the symbol table length. *) table_length := table_length + 1; - _store_word(table_length, symbol_table) + symbol_table^ := table_length end; proc _symbol_table_build(); +var + current_info: Word; + current_type: Word; begin (* Set the table length to 0. *) - _store_word(0, @symbol_table_global); + symbol_table_global := 0; + + current_type := _allocate(8); + _type_set_kind(current_type, TypeKind.primitive); + _type_set_size(current_type, 4); (* Enter built-in symbols. *) - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_int, 3, @symbol_type_info_int); - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_word, 4, @symbol_type_info_word); - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_pointer, 7, @symbol_type_info_pointer); - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_char, 4, @symbol_type_info_char); - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_array, 5, @symbol_type_info_array) + current_info := _type_info_create(current_type); + _symbol_table_enter(@symbol_table_global, "Word", 4, current_info); + + current_info := _type_info_create(current_type); + _symbol_table_enter(@symbol_table_global, "Array", 5, current_info) end; (** @@ -3507,22 +3593,25 @@ begin target^ := new_start end; -proc _lexer_transition_get_action(transition: Word); - return transition^ +proc _lexer_transition_get_action(this: Word); + return this^ end; -proc _lexer_transition_set_action(transition: Word, action: Word); +proc _lexer_transition_set_action(this: Word, value: Word); begin - _store_word(action, transition) + this^ := value end; -proc _lexer_transition_get_state(transition: Word); - return _load_word(transition + 4) -end; - -proc _lexer_transition_set_state(transition: Word, state: Word); +proc _lexer_transition_get_state(this: Word); begin - _store_word(state, transition + 4) + this := this + 4; + return this^ +end; + +proc _lexer_transition_set_state(this: Word, value: Word); +begin + this := this + 4; + this^ := value end; (** @@ -3534,7 +3623,7 @@ var begin (* Transition start state is 1. *) state := _lexer_global_state(); - _store_word(LexerState.start, state); + state^ := LexerState.start; state := _lexer_global_get_start(); _lexer_global_set_end(state) @@ -3755,29 +3844,29 @@ begin _lexer_global_set_end(position_end + 1); intermediate := _lexer_classify_single(position_start); - _store_word(intermediate, kind) + kind^ := intermediate elsif action_to_perform = LexerAction.eof then intermediate := LexerTokenKind.eof; - _store_word(intermediate, kind) + kind^ := intermediate elsif action_to_perform = LexerAction.finalize then intermediate := _lexer_classify_finalize(position_start); - _store_word(intermediate, kind) + kind^ := intermediate elsif action_to_perform = LexerAction.composite then _lexer_global_set_end(position_end + 1); intermediate := _lexer_classify_composite(position_start, position_end); - _store_word(intermediate, kind) + kind^ := intermediate elsif action_to_perform = LexerAction.key_id then intermediate := _lexer_classify_keyword(position_start, position_end); - _store_word(intermediate, kind) + kind^ := intermediate elsif action_to_perform = LexerAction.integer then intermediate := _lexer_classify_integer(position_start, position_end); - _store_word(intermediate, kind) + kind^ := intermediate elsif action_to_perform = LexerAction.delimited then _lexer_global_set_end(position_end + 1); intermediate := _lexer_classify_delimited(position_start, position_end + 1); - _store_word(intermediate, kind) + kind^ := intermediate end; end; @@ -3794,7 +3883,7 @@ begin global_state := _lexer_global_state(); - _store_word(next_state, global_state); + global_state^ := next_state; _lexer_execute_action(action_to_perform, kind); return next_state @@ -3830,6 +3919,12 @@ begin _lexer_global_set_start(old_end) end; +proc _initialize_global_state(); +begin + compiler_strings_position := @compiler_strings; + memory_free_pointer := @memory +end; + (* * Entry point. *) @@ -3838,6 +3933,7 @@ var last_read: Word; offset: Word; begin + _initialize_global_state(); _lexer_initialize(); _symbol_table_build(); @@ -3846,7 +3942,7 @@ begin .start_read; (* Second argument is buffer size. Modifying update the source_code definition. *) - last_read := _read_file(offset, 819200); + last_read := _read_file(offset, 409600); if last_read > 0 then offset := offset + last_read; goto start_read