From 2214cd33ae5713f3cea6c28b00bbe2e991452068 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Thu, 23 Oct 2025 09:06:24 +0200 Subject: [PATCH] Parse type declarations and variable part --- boot/stage14.elna | 14 +- boot/stage15.elna | 416 +++++++++++++++++++++++++++++++++++----------- 2 files changed, 325 insertions(+), 105 deletions(-) diff --git a/boot/stage14.elna b/boot/stage14.elna index e7e8811..5566518 100644 --- a/boot/stage14.elna +++ b/boot/stage14.elna @@ -1593,18 +1593,22 @@ begin _lexer_skip_token(); _lexer_read_token(@token_kind); _lexer_skip_token(); - _read_type_expression(); + _lexer_read_token(@token_kind); + name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -name; + _lexer_skip_token(); _lexer_read_token(@token_kind); - if token_kind <> _lexer_token_kind_assignment() then + if _lexer_compare_keyword("Array", 5, name, name_length) = 1 then (* Else we assume this is a zeroed 819200 bytes big array. *) _write_z(" .zero 819200\0") - else + elsif token_kind = _lexer_token_kind_assignment() then (* Skip the assignment sign with surrounding whitespaces. *) _lexer_skip_token(); - _compile_global_initializer(); - _lexer_read_token(@token_kind) + _compile_global_initializer() + else + _write_z(" .word 0\n\0") end; (* Skip semicolon and newline. *) diff --git a/boot/stage15.elna b/boot/stage15.elna index d85d95e..174f466 100644 --- a/boot/stage15.elna +++ b/boot/stage15.elna @@ -144,7 +144,10 @@ type assign_statement, if_statement, procedure_declaration, - variable_declaration + variable_declaration, + enumeration_type_expression, + named_type_expression, + type_declaration ); const @@ -189,12 +192,12 @@ var (* To reserve memory just add the value of needed bytes to the memory_free_pointer_variable. *) memory: Array; - compiler_strings_position: Pointer := @compiler_strings; - compiler_strings_length: Word := 0; - label_counter: Word := 0; + compiler_strings_position: Pointer; + compiler_strings_length: Word; + label_counter: Word; (* Points to a segment of free memory. *) - memory_free_pointer: Word := @memory; + memory_free_pointer: Word; (** * Calculates and returns the string token length between quotes, including the @@ -2077,6 +2080,115 @@ begin this^ := value end; +proc _enumeration_type_expression_size(); + return 12 +end; + +proc _enumeration_type_expression_get_members(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _enumeration_type_expression_set_members(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _enumeration_type_expression_get_length(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _enumeration_type_expression_set_length(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _named_type_expression_size(); + return 12 +end; + +proc _named_type_expression_get_name(this: Word); +begin + this := this + 4; + return this^ +end; + +proc _named_type_expression_set_name(this: Word, value: Word); +begin + this := this + 4; + this^ := value +end; + +proc _named_type_expression_get_length(this: Word); +begin + this := this + 8; + return this^ +end; + +proc _named_type_expression_set_length(this: Word, value: Word); +begin + this := this + 8; + this^ := value +end; + +proc _parse_enumeration_type_expression(); +var + token_kind: Word; + enumeration_name: Word; + name_length: Word; + memory_start: Word; + member_count: Word; + result: Word; + type_expression_size: Word; +begin + _lexer_skip_token(); + memory_start := memory_free_pointer; + member_count := 0; + + _lexer_read_token(@token_kind); + if token_kind = LexerTokenKind.right_paren then + goto parse_enumeration_type_expression_end + end; + .parse_enumeration_type_expression_loop; + member_count := member_count + 1; + + enumeration_name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() - enumeration_name; + + memory_free_pointer^ := enumeration_name; + memory_free_pointer := memory_free_pointer + 4; + + memory_free_pointer^ := name_length; + memory_free_pointer := memory_free_pointer + 4; + + (* Skip the identifier. *) + _lexer_skip_token(); + + _lexer_read_token(@token_kind); + if token_kind = LexerTokenKind.comma then + _lexer_skip_token(); + _lexer_read_token(@token_kind); + goto parse_enumeration_type_expression_loop + end; + + .parse_enumeration_type_expression_end; + _lexer_skip_token(); + + type_expression_size := _enumeration_type_expression_size(); + result := _allocate(type_expression_size); + + _node_set_kind(result, NodeKind.enumeration_type_expression); + _enumeration_type_expression_set_members(result, memory_start); + _enumeration_type_expression_set_length(result, member_count); + + return result +end; + (** * Reads and creates enumeration type representation. * @@ -2089,52 +2201,17 @@ end; * * Returns enumeration type description. *) -proc _read_type_enumeration(); +proc _read_type_enumeration(parser_node: Word); var - token_kind: Word; - enumeration_name: Word; - name_length: Word; + result: Word; memory_start: Word; member_count: Word; - result: Word; begin - _lexer_skip_token(); - memory_start := memory_free_pointer; - member_count := 0; - - _lexer_read_token(@token_kind); - if token_kind = LexerTokenKind.right_paren then - goto read_type_enumeration_end - end; - .read_type_enumeration_loop; - member_count := member_count + 1; - - enumeration_name := _lexer_global_get_start(); - name_length := _lexer_global_get_end(); - name_length := name_length - enumeration_name; - - _store_word(enumeration_name, memory_free_pointer); - memory_free_pointer := memory_free_pointer + 4; - - _store_word(name_length, memory_free_pointer); - memory_free_pointer := memory_free_pointer + 4; - - (* Skip the identifier. *) - _lexer_skip_token(); - - _lexer_read_token(@token_kind); - if token_kind = LexerTokenKind.comma then - _lexer_skip_token(); - _lexer_read_token(@token_kind); - goto read_type_enumeration_loop - end; - - .read_type_enumeration_end; - _lexer_skip_token(); - (* The resulting structure is 16 bytes long. *) - result := memory_free_pointer; - memory_free_pointer := memory_free_pointer + 16; + result := _allocate(16); + + memory_start := _enumeration_type_expression_get_members(parser_node); + member_count := _enumeration_type_expression_get_length(parser_node); (* ENUMERATION_TYPE is 2. *) _type_set_kind(result, 2); @@ -2145,27 +2222,59 @@ begin return _type_info_create(result) end; -proc _read_type_expression(); +proc _parse_named_type_expression(); var - token_kind: Word; + type_expression_size: Word; + result: Word; type_name: Word; name_length: Word; +begin + type_expression_size := _named_type_expression_size(); + result := _allocate(type_expression_size); + + _node_set_kind(result, NodeKind.named_type_expression); + type_name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() - type_name; + _named_type_expression_set_name(result, type_name); + _named_type_expression_set_length(result, name_length); + _lexer_skip_token(); + + return result +end; + +proc _parse_type_expression(); +var + token_kind: Word; result: Word; begin result := 0; _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.identifier then - (* Named type. *) - type_name := _lexer_global_get_start(); - name_length := _lexer_global_get_end(); - name_length := name_length - type_name; - result := _symbol_table_lookup(@symbol_table_global, type_name, name_length); - result := _type_info_get_type(result); - - _lexer_skip_token() + result := _parse_named_type_expression() elsif token_kind = LexerTokenKind.left_paren then - result := _read_type_enumeration() + result := _parse_enumeration_type_expression() + end; + return result +end; + +proc _read_type_expression(parser_node: Word); +var + token_kind: Word; + type_name: Word; + name_length: Word; + result: Word; +begin + token_kind := _node_get_kind(parser_node); + + if token_kind = NodeKind.named_type_expression then + type_name := _named_type_expression_get_name(parser_node); + name_length := _named_type_expression_get_length(parser_node); + + result := _symbol_table_lookup(@symbol_table_global, type_name, name_length); + result := _type_info_get_type(result) + elsif token_kind = NodeKind.enumeration_type_expression then + result := _read_type_enumeration(parser_node) end; return result @@ -2274,11 +2383,11 @@ begin name_length := name_length - name_position; _lexer_skip_token(); - (* Skip colon and space in front of the type expression. *) + (* Skip colon with the type expression. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); _lexer_read_token(@token_kind); _lexer_skip_token(); - - _read_type_expression(); _write_z("\tsw a\0"); _write_i(parameter_index); @@ -2337,11 +2446,11 @@ begin name_length := name_length - name_position; _lexer_skip_token(); - (* Read and skip variable name, colon and the space *) + (* Read and skip colon with the type expression. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); _lexer_read_token(@token_kind); _lexer_skip_token(); - - _read_type_expression(); info := _temporary_info_create(variable_index); _symbol_table_enter(@symbol_table_local, name_position, name_length, info); @@ -2716,50 +2825,118 @@ begin _write_c('\n') end; -proc _compile_type_declaration(); +proc _type_declaration_size(); + return 20 +end; + +proc _type_declaration_get_type(this: Word); +begin + this := this + 16; + return this^ +end; + +proc _type_declaration_set_type(this: Word, value: Word); +begin + this := this + 16; + this^ := value +end; + +proc _parse_type_declaration(); var token_kind: Word; type_name: Word; name_length: Word; - type_info: Word; + parser_node: Word; + result: Word; + declaration_size: Word; begin + _lexer_read_token(@token_kind); type_name := _lexer_global_get_start(); - name_length := _lexer_global_get_end(); - name_length := name_length - type_name; + name_length := _lexer_global_get_end() - type_name; _lexer_skip_token(); _lexer_read_token(@token_kind); _lexer_skip_token(); - type_info := _read_type_expression(); - _symbol_table_enter(@symbol_table_global, type_name, name_length, type_info); + parser_node := _parse_type_expression(); + declaration_size := _type_declaration_size(); + result := _allocate(declaration_size); + + _node_set_kind(result, NodeKind.type_declaration); + _declaration_set_next(result, 0); + _declaration_set_name(result, type_name); + _declaration_set_length(result, name_length); + _type_declaration_set_type(result, parser_node); _lexer_read_token(@token_kind); - _lexer_skip_token() + _lexer_skip_token(); + + return result end; -proc _compile_type_part(); +proc _read_type_declaration(parser_node: Word); +var + type_name: Word; + name_length: Word; + type_info: Word; +begin + type_name := _declaration_get_name(parser_node); + name_length := _declaration_get_length(parser_node); + + parser_node := _type_declaration_get_type(parser_node); + type_info := _read_type_expression(parser_node); + + _symbol_table_enter(@symbol_table_global, type_name, name_length, type_info) +end; + +proc _parse_type_part(); var token_kind: Word; + parser_node: Word; + result: Word; + current_declaration: Word; begin + result := 0; _skip_empty_lines(); _lexer_read_token(@token_kind); if token_kind <> LexerTokenKind._type then - goto compile_type_part_end + goto parse_type_part_end end; _lexer_skip_token(); - .compile_type_part_loop; + .parse_type_part_loop; _skip_empty_lines(); _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.identifier then - _compile_type_declaration(); - goto compile_type_part_loop + parser_node := _parse_type_declaration(); + + if result = 0 then + result := parser_node + else + _declaration_set_next(current_declaration, parser_node) + end; + current_declaration := parser_node; + goto parse_type_part_loop end; - .compile_type_part_end + .parse_type_part_end; + return result +end; + +proc _read_type_part(parser_node: Word); +begin + .read_type_part_loop; + if parser_node = 0 then + goto read_type_part_end + end; + + _read_type_declaration(parser_node); + parser_node := _declaration_get_next(parser_node); + + goto read_type_part_loop; + .read_type_part_end end; proc _compile_const_part(); @@ -2811,6 +2988,7 @@ var token_kind: Word; name: Word; name_length: Word; + variable_type: Word; result: Word; declaration_size: Word; begin @@ -2823,8 +3001,7 @@ begin _lexer_skip_token(); _lexer_read_token(@token_kind); _lexer_skip_token(); - _lexer_read_token(@token_kind); - _lexer_skip_token(); + variable_type := _parse_type_expression(); declaration_size := _variable_declaration_size(); result := _allocate(declaration_size); @@ -2833,21 +3010,25 @@ begin _declaration_set_next(result, 0); _declaration_set_name(result, name); _declaration_set_length(result, name_length); - _variable_declaration_set_type(result, 0); + _variable_declaration_set_type(result, variable_type); + + (* Skip semicolon and newline. *) + _lexer_read_token(@token_kind); + _lexer_skip_token(); return result end; -proc _compile_variable_declaration(); +proc _compile_variable_declaration(parser_tree: Word); var name: Word; name_length: Word; token_kind: Word; - parser_tree: Word; + variable_type: Word; begin - parser_tree := _parse_variable_declaration(); name := _declaration_get_name(parser_tree); name_length := _declaration_get_length(parser_tree); + variable_type := _variable_declaration_get_type(parser_tree); _write_z(".type \0"); _write_s(name, name_length); @@ -2857,42 +3038,66 @@ begin _write_c(':'); _lexer_read_token(@token_kind); + name := _named_type_expression_get_name(variable_type); + name_length := _named_type_expression_get_length(variable_type); - if token_kind <> LexerTokenKind.assignment then + if _lexer_compare_keyword("Array", 5, name, name_length) then (* Else we assume this is a zeroed 819200 bytes big array. *) _write_z(" .zero 819200\0") else - (* Skip the assignment sign with surrounding whitespaces. *) - _lexer_skip_token(); - _compile_global_initializer(); - _lexer_read_token(@token_kind) + _write_z(" .word 0\n\0") end; - - (* Skip semicolon and newline. *) - _lexer_read_token(@token_kind); - _lexer_skip_token(); _write_c('\n') end; -proc _compile_var_part(); +proc _parse_var_part(); var + result: Word; token_kind: Word; + variable_node: Word; + current_declaration: Word; begin + result := 0; _lexer_read_token(@token_kind); if token_kind <> LexerTokenKind._var then - goto compile_var_part_end + goto parse_var_part_end end; - (* Skip "var" and newline. *) + (* Skip "var". *) _lexer_skip_token(); - _write_z(".section .data\n\0"); - .compile_var_part_loop; + .parse_var_part_loop; _skip_empty_lines(); _lexer_read_token(@token_kind); if token_kind = LexerTokenKind.identifier then - _compile_variable_declaration(); + variable_node := _parse_variable_declaration(); + + if result = 0 then + result := variable_node + else + _declaration_set_next(current_declaration, variable_node) + end; + current_declaration := variable_node; + goto parse_var_part_loop + end; + + .parse_var_part_end; + return result +end; + +proc _compile_var_part(parser_node: Word); +begin + if parser_node = 0 then + goto compile_var_part_end + end; + _write_z(".section .data\n\0"); + + .compile_var_part_loop; + _compile_variable_declaration(parser_node); + + parser_node := _declaration_get_next(parser_node); + if parser_node <> 0 then goto compile_var_part_loop end; @@ -2905,11 +3110,15 @@ end; proc _compile_module(); var token_kind: Word; + parser_node: Word; begin - _compile_type_part(); + parser_node := _parse_type_part(); + _read_type_part(parser_node); + _compile_const_part(); - _skip_empty_lines(); - _compile_var_part(); + + parser_node := _parse_var_part(); + _compile_var_part(parser_node); _write_z(".section .text\n\n\0"); _write_z(".type _syscall, @function\n_syscall:\n\tmv a7, a6\n\tecall\n\tret\n\n\0"); @@ -3830,6 +4039,12 @@ begin _lexer_global_set_start(old_end) end; +proc _initialize_global_state(); +begin + compiler_strings_position := @compiler_strings; + memory_free_pointer := @memory +end; + (* * Entry point. *) @@ -3838,6 +4053,7 @@ var last_read: Word; offset: Word; begin + _initialize_global_state(); _lexer_initialize(); _symbol_table_build();