From 65fb54486651b49bb97baf2e64e5e861f38f8b3a Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Tue, 3 Mar 2026 22:33:59 +0100 Subject: [PATCH] Allow peeking and reading tokens --- boot/stage20/cl.elna | 628 ++++++++++++++++++++----------------------- 1 file changed, 290 insertions(+), 338 deletions(-) diff --git a/boot/stage20/cl.elna b/boot/stage20/cl.elna index d47c418..8764fcf 100644 --- a/boot/stage20/cl.elna +++ b/boot/stage20/cl.elna @@ -438,16 +438,17 @@ type _goto, eof ); - ElnaLexerCursor = record - state: ElnaLexerState; - start: Word; - finish: Word - end; ElnaLexerToken = record kind: ElnaLexerKind; start: Word; length: Word end; + ElnaLexerCursor = record + state: ElnaLexerState; + start: Word; + finish: Word; + token: ^ElnaLexerToken + end; ElnaTacOperator = ( get_address, @@ -1660,17 +1661,16 @@ end; proc elna_parser_integer_literal(cursor: ^ElnaLexerCursor); var - integer_length: Word; + token: ^ElnaLexerToken; result: ^ElnaTreeIntegerLiteral; buffer: Word; begin result := malloc(#size(ElnaTreeIntegerLiteral)); - integer_length := cursor^.finish - cursor^.start; + token := elna_lexer_read(cursor); - buffer := malloc(integer_length + 1); - bzero(buffer, integer_length + 1); - memcpy(buffer, cursor^.start, integer_length); - elna_lexer_skip_token(cursor); + buffer := malloc(token^.length + 1); + bzero(buffer, token^.length + 1); + memcpy(buffer, token^.start, token^.length); result^.kind := ElnaTreeKind.integer_literal; result^.value := atoi(buffer); @@ -1689,7 +1689,7 @@ begin result^.value := string_compare(cursor^.start, 4, "true", 4); result^.type_decoration := nil; - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); return result end; @@ -1698,7 +1698,7 @@ proc elna_parser_nil_literal(cursor: ^ElnaLexerCursor); var result: ^ElnaTreeNilLiteral; begin - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); result := malloc(#size(ElnaTreeNilLiteral)); result^.kind := ElnaTreeKind.null; @@ -1731,14 +1731,15 @@ end; proc elna_parser_character_literal(cursor: ^ElnaLexerCursor); var result: ^ElnaTreeCharacterLiteral; + token: ^ElnaLexerToken; begin result := malloc(#size(ElnaTreeCharacterLiteral)); + token := elna_lexer_read(cursor); result^.kind := ElnaTreeKind.character_literal; - result^.value := cursor^.start; - result^.length := cursor^.finish - cursor^.start; + result^.value := token^.start; + result^.length := token^.length; result^.type_decoration := nil; - elna_lexer_skip_token(cursor); return result end; @@ -1753,16 +1754,16 @@ end; proc elna_parser_variable_expression(cursor: ^ElnaLexerCursor); var result: ^ElnaTreeVariableExpression; + token: ^ElnaLexerToken; begin result := malloc(#size(ElnaTreeVariableExpression)); + token := elna_lexer_read(cursor); result^.kind := ElnaTreeKind.variable_expression; - result^.name := cursor^.start; - result^.length := cursor^.finish - cursor^.start; + result^.name := token^.start; + result^.length := token^.length; result^.type_decoration := nil; - elna_lexer_skip_token(cursor); - return result end; @@ -1787,16 +1788,16 @@ end; proc elna_parser_string_literal(cursor: ^ElnaLexerCursor); var result: ^ElnaTreeStringLiteral; + token: ^ElnaLexerToken; begin result := malloc(#size(ElnaTreeStringLiteral)); + token := elna_lexer_read(cursor); result^.kind := ElnaTreeKind.string_literal; - result^.value := cursor^.start; - result^.length := cursor^.finish - cursor^.start; + result^.value := token^.start; + result^.length := token^.length; result^.type_decoration := nil; - elna_lexer_skip_token(cursor); - return result end; @@ -1826,21 +1827,20 @@ end; proc elna_parser_trait_expression(cursor: ^ElnaLexerCursor); var result: ^ElnaTreeTraitExpression; + token: ^ElnaLexerToken; begin result := malloc(#size(ElnaTreeTraitExpression)); result^.kind := ElnaTreeKind.trait_expression; - result^.name := cursor^.start; - result^.length := cursor^.finish - cursor^.start; + token := elna_lexer_read(cursor); + result^.name := token^.start; + result^.length := token^.length; - elna_lexer_skip_token(cursor); - elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); result^.argument := elna_parser_type_expression(cursor); - elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); return result end; @@ -1852,7 +1852,7 @@ var token: ^ElnaLexerToken; begin parser_node := 0; - token := elna_lexer_read_token(cursor); + token := elna_lexer_peek(cursor); if token^.kind = ElnaLexerKind.character then parser_node := elna_parser_character_literal(cursor) @@ -1881,7 +1881,7 @@ begin result^.kind := ElnaTreeKind.dereference_expression; result^.pointer := simple_expression; result^.type_decoration := nil; - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); return result end; @@ -1894,7 +1894,7 @@ begin simple_expression := elna_parser_simple_expression(cursor); .elna_parser_designator_loop; - token := elna_lexer_read_token(cursor); + token := elna_lexer_peek(cursor); if token^.kind = ElnaLexerKind.hat then simple_expression := elna_parser_dereference_expression(cursor, simple_expression); @@ -1954,7 +1954,7 @@ var operator: Word; token: ^ElnaLexerToken; begin - token := elna_lexer_read_token(cursor); + token := elna_lexer_peek(cursor); operator := 0; if token^.kind = ElnaLexerKind.at then @@ -1965,7 +1965,7 @@ begin operator := '~' end; if operator <> 0 then - elna_lexer_skip_token(cursor) + elna_lexer_read(cursor) end; result := elna_parser_designator(cursor); @@ -2055,49 +2055,49 @@ var begin lhs_node := elna_parser_unary_expression(cursor); rhs_node := 0; - token := elna_lexer_read_token(cursor); + token := elna_lexer_peek(cursor); if token^.kind = ElnaLexerKind.plus then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); rhs_node := elna_parser_unary_expression(cursor) elsif token^.kind = ElnaLexerKind.minus then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); rhs_node := elna_parser_unary_expression(cursor) elsif token^.kind = ElnaLexerKind.multiplication then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); rhs_node := elna_parser_unary_expression(cursor) elsif token^.kind = ElnaLexerKind.and then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); rhs_node := elna_parser_unary_expression(cursor) elsif token^.kind = ElnaLexerKind._or then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); rhs_node := elna_parser_unary_expression(cursor) elsif token^.kind = ElnaLexerKind._xor then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); rhs_node := elna_parser_unary_expression(cursor) elsif token^.kind = ElnaLexerKind.equals then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); rhs_node := elna_parser_unary_expression(cursor) elsif token^.kind = ElnaLexerKind.remainder then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); rhs_node := elna_parser_unary_expression(cursor) elsif token^.kind = ElnaLexerKind.division then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); rhs_node := elna_parser_unary_expression(cursor) elsif token^.kind = ElnaLexerKind.less_than then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); rhs_node := elna_parser_unary_expression(cursor) elsif token^.kind = ElnaLexerKind.greater_than then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); rhs_node := elna_parser_unary_expression(cursor) elsif token^.kind = ElnaLexerKind.less_equal then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); rhs_node := elna_parser_unary_expression(cursor) elsif token^.kind = ElnaLexerKind.not_equal then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); rhs_node := elna_parser_unary_expression(cursor) elsif token^.kind = ElnaLexerKind.greater_equal then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); rhs_node := elna_parser_unary_expression(cursor) end; if rhs_node <> 0 then @@ -2181,12 +2181,11 @@ begin argument_number := 1; result^.callee := callee; - elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); - token := elna_lexer_read_token(cursor); + elna_lexer_read(cursor); + token := elna_lexer_peek(cursor); if token^.kind = ElnaLexerKind.right_paren then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); goto elna_parser_call_end end; @@ -2202,8 +2201,7 @@ begin argument_entry^.expression := elna_parser_binary_expression(cursor); argument_number := argument_number + 1; - token := elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); + token := elna_lexer_read(cursor); if token^.kind = ElnaLexerKind.comma then goto elna_parser_call_loop @@ -2249,17 +2247,16 @@ end; proc elna_parser_goto_statement(cursor: ^ElnaLexerCursor); var result: ^ElnaTreeGotoStatement; + token: ^ElnaLexerToken; begin - elna_lexer_skip_token(cursor); - elna_lexer_read_token(cursor); + elna_lexer_read(cursor); + token := elna_lexer_read(cursor); result := malloc(#size(ElnaTreeGotoStatement)); result^.kind := ElnaTreeKind.goto_statement; result^.next := nil; - result^.label := cursor^.start; - result^.length := cursor^.finish - cursor^.start; - - elna_lexer_skip_token(cursor); + result^.label := token^.start; + result^.length := token^.length; return result end; @@ -2284,18 +2281,17 @@ end; proc elna_parser_label_declaration(cursor: ^ElnaLexerCursor); var result: ^ElnaTreeGotoStatement; + token: ^ElnaLexerToken; begin - elna_lexer_skip_token(cursor); - elna_lexer_read_token(cursor); + elna_lexer_read(cursor); + token := elna_lexer_read(cursor); result := malloc(#size(ElnaTreeLabelDeclaration)); result^.kind := ElnaTreeKind.label_declaration; result^.next := nil; - result^.label := cursor^.start; - result^.length := cursor^.finish - cursor^.start; - - elna_lexer_skip_token(cursor); + result^.label := token^.start; + result^.length := token^.length; return result end; @@ -2348,20 +2344,19 @@ end; proc elna_parser_field_access_expression(cursor: ^ElnaLexerCursor, aggregate: Word); var result: ^ElnaTreeFieldAccessExpression; + token: ^ElnaLexerToken; begin (* Skip dot. Read the enumeration value. *) - elna_lexer_skip_token(cursor); - elna_lexer_read_token(cursor); + elna_lexer_read(cursor); + token := elna_lexer_read(cursor); result := malloc(#size(ElnaTreeFieldAccessExpression)); result^.kind := ElnaTreeKind.field_access_expression; result^.type_decoration := nil; result^.aggregate := aggregate; - result^.field := cursor^.start; - result^.length := cursor^.finish - cursor^.start; - - elna_lexer_skip_token(cursor); + result^.field := token^.start; + result^.length := token^.length; return result end; @@ -2370,7 +2365,7 @@ proc elna_parser_array_access_expression(cursor: ^ElnaLexerCursor, array: Word); var result: ^ElnaTreeArrayAccessExpression; begin - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); result := malloc(#size(ElnaTreeArrayAccessExpression)); @@ -2379,8 +2374,7 @@ begin result^.array := array; result^.index := elna_parser_binary_expression(cursor); - elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); return result end; @@ -2530,9 +2524,7 @@ begin result^.assignee := assignee; (* Skip the assignment sign (:=) with surrounding whitespaces. *) - elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); - + elna_lexer_read(cursor); result^.assignment := elna_parser_binary_expression(cursor); return result @@ -2601,8 +2593,7 @@ var result: ^ElnaTreeReturnStatement; begin (* Skip "return" keyword and whitespace after it. *) - elna_lexer_skip_token(cursor); - elna_lexer_read_token(cursor); + elna_lexer_read(cursor); returned := elna_parser_binary_expression(cursor); result := malloc(#size(ElnaTreeReturnStatement)); @@ -2651,14 +2642,13 @@ begin result := malloc(#size(ElnaTreeConditionalStatements)); (* Skip "if", "while" or "elsif". *) - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); result^.condition := elna_parser_binary_expression(cursor); (* Skip "then" or "do". *) - elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); - result^.statements := elna_parser_statements(); + result^.statements := elna_parser_statements(cursor); result^.next := nil; return result @@ -2708,7 +2698,7 @@ begin result^.conditionals := previous_conditional; .elna_parser_if_statement_loop; - token := elna_lexer_read_token(cursor); + token := elna_lexer_peek(cursor); if token^.kind = ElnaLexerKind._elsif then next_conditional := elna_parser_conditional_statements(cursor); @@ -2717,12 +2707,12 @@ begin goto elna_parser_if_statement_loop elsif token^.kind = ElnaLexerKind._else then - elna_lexer_skip_token(cursor); - result^._else := elna_parser_statements() + elna_lexer_read(cursor); + result^._else := elna_parser_statements(cursor) else result^._else := nil end; - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); return result end; @@ -2733,7 +2723,7 @@ var token: ^ElnaLexerToken; begin result := nil; - token := elna_lexer_read_token(cursor); + token := elna_lexer_peek(cursor); if token^.kind = ElnaLexerKind._goto then result := elna_parser_goto_statement(cursor) @@ -2769,10 +2759,10 @@ begin end; .elna_parser_statement_loop; - token := elna_lexer_read_token(cursor); + token := elna_lexer_peek(cursor); if token^.kind = ElnaLexerKind.semicolon then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); elna_lexer_skip_empty_lines(cursor); next_statement := elna_parser_statement(cursor); previous_statement^.next := next_statement; @@ -2870,11 +2860,11 @@ var previous_entry: Word; token: ^ElnaLexerToken; begin - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); member_count := 0; memory_start := 0; - token := elna_lexer_read_token(cursor); + token := elna_lexer_read(cursor); if token^.kind = ElnaLexerKind._end then goto elna_parser_record_type_expression_end end; @@ -2882,16 +2872,14 @@ begin entry := malloc(16); member_count := member_count + 1; - entry^ := cursor^.start; + entry^ := token^.start; entry := entry + 4; - entry^ := cursor^.finish - cursor^.start; + entry^ := token^.length; entry := entry + 4; - (* Skip the identifier. *) - elna_lexer_skip_token(cursor); - elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); + (* Skip the colon. *) + elna_lexer_read(cursor); field_type := elna_parser_type_expression(cursor); @@ -2906,15 +2894,12 @@ begin end; previous_entry := entry; - token := elna_lexer_read_token(cursor); + token := elna_lexer_read(cursor); if token^.kind = ElnaLexerKind.semicolon then - elna_lexer_skip_token(cursor); - elna_lexer_read_token(cursor); + token := elna_lexer_read(cursor); goto elna_parser_record_type_expression_loop end; - .elna_parser_record_type_expression_end; - elna_lexer_skip_token(cursor); result := malloc(#size(ElnaTreeEnumerationTypeExpression)); @@ -2934,22 +2919,19 @@ var previous_entry: Word; token: ^ElnaLexerToken; begin - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); memory_start := 0; member_count := 0; - token := elna_lexer_read_token(cursor); - if token^.kind = ElnaLexerKind.right_paren then - goto elna_parser_enumeration_type_expression_end - end; .elna_parser_enumeration_type_expression_loop; + token := elna_lexer_read(cursor); entry := malloc(12); member_count := member_count + 1; - entry^ := cursor^.start; + entry^ := token^.start; entry := entry + 4; - entry^ := cursor^.finish - cursor^.start; + entry^ := token^.length; entry := entry + 4; entry^ := 0; @@ -2961,18 +2943,10 @@ begin previous_entry := entry; (* Skip the identifier. *) - elna_lexer_skip_token(cursor); - - token := elna_lexer_read_token(cursor); + token := elna_lexer_read(cursor); if token^.kind = ElnaLexerKind.comma then - elna_lexer_skip_token(cursor); - elna_lexer_read_token(cursor); goto elna_parser_enumeration_type_expression_loop end; - - .elna_parser_enumeration_type_expression_end; - elna_lexer_skip_token(cursor); - result := malloc(#size(ElnaTreeEnumerationTypeExpression)); result^.kind := ElnaTreeKind.enumeration_type_expression; @@ -3117,13 +3091,14 @@ end; proc elna_parser_named_type_expression(cursor: ^ElnaLexerCursor); var result: ^ElnaTreeNamedTypeExpression; + token: ^ElnaLexerToken; begin result := malloc(#size(ElnaTreeNamedTypeExpression)); + token := elna_lexer_read(cursor); result^.kind := ElnaTreeKind.named_type_expression; - result^.name := cursor^.start; - result^.length := cursor^.finish - cursor^.start; - elna_lexer_skip_token(cursor); + result^.name := token^.start; + result^.length := token^.length; return result end; @@ -3132,7 +3107,7 @@ proc elna_parser_pointer_type_expression(cursor: ^ElnaLexerCursor); var result: ^ElnaTreePointerTypeExpression; begin - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); result := malloc(#size(ElnaTreePointerTypeExpression)); result^.kind := ElnaTreeKind.pointer_type_expression; @@ -3145,15 +3120,14 @@ proc elna_parser_array_type_expression(cursor: ^ElnaLexerCursor); var result: ^ElnaTreeArrayTypeExpression; begin - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); result := malloc(#size(ElnaTreeArrayTypeExpression)); result^.kind := ElnaTreeKind.array_type_expression; result^.length := elna_parser_binary_expression(cursor); (* Read and skip square bracket. *) - elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); result^.base := elna_parser_type_expression(cursor); @@ -3166,7 +3140,7 @@ var token: ^ElnaLexerToken; begin result := nil; - token := elna_lexer_read_token(cursor); + token := elna_lexer_peek(cursor); if token^.kind = ElnaLexerKind.identifier then result := elna_parser_named_type_expression(cursor) @@ -3292,22 +3266,19 @@ begin result^.next := nil; (* Skip "proc ". *) - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); - elna_lexer_read_token(cursor); - - result^.name := cursor^.start; - result^.length := cursor^.finish - cursor^.start; (* Skip procedure name. *) - elna_lexer_skip_token(cursor); + token := elna_lexer_read(cursor); + result^.name := token^.start; + result^.length := token^.length; (* Skip open paren. *) - elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); parameter_head := nil; .elna_parser_procedure_declaration_parameter; - token := elna_lexer_read_token(cursor); + token := elna_lexer_peek(cursor); if token^.kind <> ElnaLexerKind.right_paren then next_declaration := elna_parser_variable_declaration(cursor); @@ -3318,25 +3289,23 @@ begin end; current_declaration := next_declaration; - token := elna_lexer_read_token(cursor); + token := elna_lexer_peek(cursor); if token^.kind = ElnaLexerKind.comma then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); goto elna_parser_procedure_declaration_parameter end end; (* Skip close paren. *) - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); result^.parameters := parameter_head; (* Skip semicolon or arrow. *) - token := elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); + token := elna_lexer_read(cursor); if token^.kind = ElnaLexerKind.arrow then result^.return_type := elna_parser_type_expression(cursor); - elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor) + elna_lexer_read(cursor) else result^.return_type := nil end; @@ -3345,9 +3314,9 @@ begin result^.temporaries := parameter_head; (* Skip semicolon, "begin" and newline. *) - token := elna_lexer_read_token(cursor); + token := elna_lexer_peek(cursor); if token^.kind = ElnaLexerKind._begin then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); parameter_head := elna_parser_statements(cursor) elsif token^.kind = ElnaLexerKind._return then parameter_head := elna_parser_return_statement(cursor) @@ -3355,8 +3324,7 @@ begin result^.body := parameter_head; (* Skip the "end" keyword. *) - elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); return result end; @@ -3559,7 +3527,7 @@ begin .elna_parser_procedures_loop; elna_lexer_skip_empty_lines(cursor); - token := elna_lexer_read_token(cursor); + token := elna_lexer_peek(cursor); if token^.kind = ElnaLexerKind._proc then parser_node := elna_parser_procedure_declaration(cursor); @@ -3571,8 +3539,7 @@ begin current_declaration := parser_node; (* Skip semicolon. *) - elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); goto elna_parser_procedures_loop end; @@ -3671,38 +3638,31 @@ var begin .skip_empty_lines_rerun; - token := elna_lexer_read_token(cursor); + token := elna_lexer_peek(cursor); if token^.kind = ElnaLexerKind.comment then - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); goto skip_empty_lines_rerun end end; proc elna_parser_type_declaration(cursor: ^ElnaLexerCursor); var - type_name: Word; - name_length: Word; result: ^ElnaTreeTypeDeclaration; + token: ^ElnaLexerToken; begin - elna_lexer_read_token(cursor); - type_name := cursor^.start; - name_length := cursor^.finish - cursor^.start; - - elna_lexer_skip_token(cursor); - elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); - result := malloc(#size(ElnaTreeTypeDeclaration)); + token := elna_lexer_read(cursor); result^.kind := ElnaTreeKind.type_declaration; result^.next := nil; - result^.name := type_name; - result^.length := name_length; + result^.name := token^.start; + result^.length := token^.length; + + elna_lexer_read(cursor); result^._type := elna_parser_type_expression(cursor); - elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); return result end; @@ -3736,17 +3696,17 @@ var begin result := nil; elna_lexer_skip_empty_lines(cursor); - token := elna_lexer_read_token(cursor); + token := elna_lexer_peek(cursor); if token^.kind <> ElnaLexerKind._type then goto elna_parser_type_part_end end; - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); .elna_parser_type_part_loop; elna_lexer_skip_empty_lines(cursor); - token := elna_lexer_read_token(cursor); + token := elna_lexer_peek(cursor); if token^.kind = ElnaLexerKind.identifier then parser_node := elna_parser_type_declaration(cursor); @@ -3765,28 +3725,22 @@ end; proc elna_parser_variable_declaration(cursor: ^ElnaLexerCursor); var - name: Word; - name_length: Word; variable_type: Word; result: ^ElnaTreeVariableDeclaration; + token: ^ElnaLexerToken; begin - elna_lexer_read_token(cursor); - - name := cursor^.start; - name_length := cursor^.finish - cursor^.start; + token := elna_lexer_read(cursor); (* Skip the variable name and colon with the type. *) - elna_lexer_skip_token(cursor); - elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); variable_type := elna_parser_type_expression(cursor); result := malloc(#size(ElnaTreeVariableDeclaration)); result^.kind := ElnaTreeKind.variable_declaration; result^.next := nil; - result^.name := name; - result^.length := name_length; + result^.name := token^.start; + result^.length := token^.length; result^._type := variable_type; return result @@ -3816,24 +3770,23 @@ var token: ^ElnaLexerToken; begin result := 0; - token := elna_lexer_read_token(cursor); + token := elna_lexer_peek(cursor); if token^.kind <> ElnaLexerKind._var then goto elna_parser_var_part_end end; (* Skip "var". *) - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); .elna_parser_var_part_loop; elna_lexer_skip_empty_lines(cursor); - token := elna_lexer_read_token(cursor); + token := elna_lexer_peek(cursor); if token^.kind = ElnaLexerKind.identifier then variable_node := elna_parser_variable_declaration(cursor); (* Skip semicolon. *) - elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); if result = 0 then result := variable_node @@ -3889,7 +3842,7 @@ begin (* Skip "program;". *) elna_lexer_skip_empty_lines(cursor); - token := elna_lexer_read_token(cursor); + token := elna_lexer_read(cursor); if token^.kind <> ElnaLexerKind._program then parser_error := malloc(#size(ElnaError)); @@ -3898,9 +3851,7 @@ begin error_list^.first := parser_error; error_list^.last := parser_error else - elna_lexer_skip_token(cursor); - elna_lexer_read_token(cursor); - elna_lexer_skip_token(cursor); + elna_lexer_read(cursor); result^.types := elna_parser_type_part(cursor); result^.globals := elna_parser_var_part(cursor); @@ -4779,16 +4730,8 @@ begin elna_lexer_transitions(); cursor^.start := code_pointer; - cursor^.finish := code_pointer -end; - -proc elna_lexer_next_transition(cursor: ^ElnaLexerCursor); -var - current_character: Word; -begin - current_character := _load_byte(cursor^.finish); - - return elna_lexer_get_transition(cursor^.state, classification[current_character + 1]) + cursor^.finish := code_pointer; + cursor^.token := nil end; proc string_compare(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word); @@ -4804,183 +4747,198 @@ begin return result end; -proc elna_lexer_classify_keyword(position_start: Word, position_end: Word); +proc elna_lexer_classify_keyword(position_start: Word, position_end: Word) -> ^ElnaLexerToken; var - result: Word; - token_length: Word; + result: ^ElnaLexerToken; begin - result := ElnaLexerKind.identifier; - token_length := position_end - position_start; + result := malloc(#size(ElnaLexerToken)); + result^.start := position_start; + result^.length := position_end - position_start; if _load_byte(position_start) = '#' then - result := ElnaLexerKind.trait - elsif string_compare(position_start, token_length, "const", 5) then - result := ElnaLexerKind._const - elsif string_compare(position_start, token_length, "var", 3) then - result := ElnaLexerKind._var - elsif string_compare(position_start, token_length, "proc", 4) then - result := ElnaLexerKind._proc - elsif string_compare(position_start, token_length, "type", 4) then - result := ElnaLexerKind._type - elsif string_compare(position_start, token_length, "begin", 5) then - result := ElnaLexerKind._begin - elsif string_compare(position_start, token_length, "end", 3) then - result := ElnaLexerKind._end - elsif string_compare(position_start, token_length, "return", 6) then - result := ElnaLexerKind._return - elsif string_compare(position_start, token_length, "goto", 4) then - result := ElnaLexerKind._goto - elsif string_compare(position_start, token_length, "if", 2) then - result := ElnaLexerKind._if - elsif string_compare(position_start, token_length, "while", 5) then - result := ElnaLexerKind._while - elsif string_compare(position_start, token_length, "then", 4) then - result := ElnaLexerKind._then - elsif string_compare(position_start, token_length, "else", 4) then - result := ElnaLexerKind._else - elsif string_compare(position_start, token_length, "elsif", 5) then - result := ElnaLexerKind._elsif - elsif string_compare(position_start, token_length, "record", 6) then - result := ElnaLexerKind._record - elsif string_compare(position_start, token_length, "or", 2) then - result := ElnaLexerKind._or - elsif string_compare(position_start, token_length, "xor", 3) then - result := ElnaLexerKind._xor - elsif string_compare(position_start, token_length, "program", 7) then - result := ElnaLexerKind._program - elsif string_compare(position_start, token_length, "module", 6) then - result := ElnaLexerKind._module - elsif string_compare(position_start, token_length, "nil", 3) then - result := ElnaLexerKind.null - elsif string_compare(position_start, token_length, "true", 4) then - result := ElnaLexerKind.boolean - elsif string_compare(position_start, token_length, "false", 5) then - result := ElnaLexerKind.boolean + result^.kind := ElnaLexerKind.trait + elsif string_compare(position_start, result^.length, "const", 5) then + result^.kind := ElnaLexerKind._const + elsif string_compare(position_start, result^.length, "var", 3) then + result^.kind := ElnaLexerKind._var + elsif string_compare(position_start, result^.length, "proc", 4) then + result^.kind := ElnaLexerKind._proc + elsif string_compare(position_start, result^.length, "type", 4) then + result^.kind := ElnaLexerKind._type + elsif string_compare(position_start, result^.length, "begin", 5) then + result^.kind := ElnaLexerKind._begin + elsif string_compare(position_start, result^.length, "end", 3) then + result^.kind := ElnaLexerKind._end + elsif string_compare(position_start, result^.length, "return", 6) then + result^.kind := ElnaLexerKind._return + elsif string_compare(position_start, result^.length, "goto", 4) then + result^.kind := ElnaLexerKind._goto + elsif string_compare(position_start, result^.length, "if", 2) then + result^.kind := ElnaLexerKind._if + elsif string_compare(position_start, result^.length, "while", 5) then + result^.kind := ElnaLexerKind._while + elsif string_compare(position_start, result^.length, "then", 4) then + result^.kind := ElnaLexerKind._then + elsif string_compare(position_start, result^.length, "else", 4) then + result^.kind := ElnaLexerKind._else + elsif string_compare(position_start, result^.length, "elsif", 5) then + result^.kind := ElnaLexerKind._elsif + elsif string_compare(position_start, result^.length, "record", 6) then + result^.kind := ElnaLexerKind._record + elsif string_compare(position_start, result^.length, "or", 2) then + result^.kind := ElnaLexerKind._or + elsif string_compare(position_start, result^.length, "xor", 3) then + result^.kind := ElnaLexerKind._xor + elsif string_compare(position_start, result^.length, "program", 7) then + result^.kind := ElnaLexerKind._program + elsif string_compare(position_start, result^.length, "module", 6) then + result^.kind := ElnaLexerKind._module + elsif string_compare(position_start, result^.length, "nil", 3) then + result^.kind := ElnaLexerKind.null + elsif string_compare(position_start, result^.length, "true", 4) then + result^.kind := ElnaLexerKind.boolean + elsif string_compare(position_start, result^.length, "false", 5) then + result^.kind := ElnaLexerKind.boolean + else + result^.kind := ElnaLexerKind.identifier end; return result end; -proc elna_lexer_classify_finalize(start_position: Word); +proc elna_lexer_classify_finalize(start_position: Word) -> ^ElnaLexerToken; var character: Word; - result: Word; + result: ^ElnaLexerToken; begin - result := 0; + result := malloc(#size(ElnaLexerToken)); character := _load_byte(start_position); if character = ':' then - result := ElnaLexerKind.colon + result^.kind := ElnaLexerKind.colon elsif character = '.' then - result := ElnaLexerKind.dot + result^.kind := ElnaLexerKind.dot elsif character = '(' then - result := ElnaLexerKind.left_paren + result^.kind := ElnaLexerKind.left_paren elsif character = '-' then - result := ElnaLexerKind.minus + result^.kind := ElnaLexerKind.minus elsif character = '<' then - result := ElnaLexerKind.less_than + result^.kind := ElnaLexerKind.less_than elsif character = '>' then - result := ElnaLexerKind.greater_than + result^.kind := ElnaLexerKind.greater_than end; return result end; -proc elna_lexer_classify_single(start_position: Word); +proc elna_lexer_classify_single(start_position: Word) -> ^ElnaLexerToken; var character: Word; - result: Word; + result: ^ElnaLexerToken; begin - result := 0; + result := malloc(#size(ElnaLexerToken)); character := _load_byte(start_position); if character = ';' then - result := ElnaLexerKind.semicolon + result^.kind := ElnaLexerKind.semicolon elsif character = ',' then - result := ElnaLexerKind.comma + result^.kind := ElnaLexerKind.comma elsif character = ')' then - result := ElnaLexerKind.right_paren + result^.kind := ElnaLexerKind.right_paren elsif character = '@' then - result := ElnaLexerKind.at + result^.kind := ElnaLexerKind.at elsif character = '~' then - result := ElnaLexerKind.not + result^.kind := ElnaLexerKind.not elsif character = '&' then - result := ElnaLexerKind.and + result^.kind := ElnaLexerKind.and elsif character = '+' then - result := ElnaLexerKind.plus + result^.kind := ElnaLexerKind.plus elsif character = '*' then - result := ElnaLexerKind.multiplication + result^.kind := ElnaLexerKind.multiplication elsif character = '=' then - result := ElnaLexerKind.equals + result^.kind := ElnaLexerKind.equals elsif character = '%' then - result := ElnaLexerKind.remainder + result^.kind := ElnaLexerKind.remainder elsif character = '/' then - result := ElnaLexerKind.division + result^.kind := ElnaLexerKind.division elsif character = '.' then - result := ElnaLexerKind.dot + result^.kind := ElnaLexerKind.dot elsif character = '^' then - result := ElnaLexerKind.hat + result^.kind := ElnaLexerKind.hat elsif character = '[' then - result := ElnaLexerKind.left_square + result^.kind := ElnaLexerKind.left_square elsif character = ']' then - result := ElnaLexerKind.right_square + result^.kind := ElnaLexerKind.right_square end; return result end; -proc elna_lexer_classify_composite(start_position: Word, one_before_last: Word); +proc elna_lexer_classify_composite(start_position: Word, one_before_last: Word) -> ^ElnaLexerToken; var first_character: Word; last_character: Word; - result: Word; + result: ^ElnaLexerToken; begin first_character := _load_byte(start_position); last_character := _load_byte(one_before_last); + result := malloc(#size(ElnaLexerToken)); if first_character = ':' then - result := ElnaLexerKind.assignment + result^.kind := ElnaLexerKind.assignment elsif first_character = '<' then if last_character = '=' then - result := ElnaLexerKind.less_equal + result^.kind := ElnaLexerKind.less_equal elsif last_character = '>' then - result := ElnaLexerKind.not_equal + result^.kind := ElnaLexerKind.not_equal end elsif first_character = '>' then if last_character = '=' then - result := ElnaLexerKind.greater_equal + result^.kind := ElnaLexerKind.greater_equal end elsif first_character = '-' then - result := ElnaLexerKind.arrow + result^.kind := ElnaLexerKind.arrow end; return result end; -proc elna_lexer_classify_delimited(start_position: Word, end_position: Word); +proc elna_lexer_classify_delimited(start_position: Word, end_position: Word) -> ^ElnaLexerToken; var - token_length: Word; delimiter: Word; - result: Word; + result: ^ElnaLexerToken; begin - token_length := end_position - start_position; delimiter := _load_byte(start_position); + result := malloc(#size(ElnaLexerToken)); + + result^.start := start_position; + result^.length := end_position - start_position; if delimiter = '(' then - result := ElnaLexerKind.comment + result^.kind := ElnaLexerKind.comment elsif delimiter = '\'' then - result := ElnaLexerKind.character + result^.kind:= ElnaLexerKind.character elsif delimiter = '"' then - result := ElnaLexerKind.string + result^.kind := ElnaLexerKind.string end; return result end; -proc elna_lexer_classify_integer(start_position: Word, end_position: Word); - return ElnaLexerKind.integer +proc elna_lexer_classify_integer(start_position: Word, end_position: Word) -> ^ElnaLexerToken; +var + result: ^ElnaLexerToken; +begin + result := malloc(#size(ElnaLexerToken)); + result^.kind := ElnaLexerKind.integer; + result^.start := start_position; + result^.length := end_position - start_position; + + return result end; -proc elna_lexer_execute_action(cursor: ^ElnaLexerCursor, action_to_perform: Word, kind: ^ElnaLexerKind); +proc elna_lexer_execute_action(cursor: ^ElnaLexerCursor, action_to_perform: Word) -> ^ElnaLexerToken; var - intermediate: Word; + token: ^ElnaLexerToken; begin + token := nil; + if action_to_perform = ElnaLexerAction.none then elsif action_to_perform = ElnaLexerAction.accumulate then cursor^.finish := cursor^.finish + 1 @@ -4990,82 +4948,76 @@ begin elsif action_to_perform = ElnaLexerAction.single then cursor^.finish := cursor^.finish + 1; - intermediate := elna_lexer_classify_single(cursor^.start); - kind^ := intermediate + token := elna_lexer_classify_single(cursor^.start) elsif action_to_perform = ElnaLexerAction.eof then - intermediate := ElnaLexerKind.eof; - kind^ := intermediate + token := malloc(#size(ElnaLexerToken)); + token^.kind := ElnaLexerKind.eof elsif action_to_perform = ElnaLexerAction.finalize then - intermediate := elna_lexer_classify_finalize(cursor^.start); - kind^ := intermediate + token := elna_lexer_classify_finalize(cursor^.start) elsif action_to_perform = ElnaLexerAction.composite then - intermediate := elna_lexer_classify_composite(cursor^.start, cursor^.finish); - kind^ := intermediate; + token := elna_lexer_classify_composite(cursor^.start, cursor^.finish); cursor^.finish := cursor^.finish + 1 elsif action_to_perform = ElnaLexerAction.key_id then - intermediate := elna_lexer_classify_keyword(cursor^.start, cursor^.finish); - kind^ := intermediate + token := elna_lexer_classify_keyword(cursor^.start, cursor^.finish) elsif action_to_perform = ElnaLexerAction.integer then - intermediate := elna_lexer_classify_integer(cursor^.start, cursor^.finish); - kind^ := intermediate + token := elna_lexer_classify_integer(cursor^.start, cursor^.finish) elsif action_to_perform = ElnaLexerAction.delimited then cursor^.finish := cursor^.finish + 1; - intermediate := elna_lexer_classify_delimited(cursor^.start, cursor^.finish); - kind^ := intermediate - end + token := elna_lexer_classify_delimited(cursor^.start, cursor^.finish) + end; + return token end; -proc elna_lexer_execute_transition(cursor: ^ElnaLexerCursor, kind: ^ElnaLexerKind) -> ElnaLexerState; +proc elna_lexer_execute_transition(cursor: ^ElnaLexerCursor, kind: ^ElnaLexerKind) -> ^ElnaLexerToken; var next_transition: ^ElnaLexerTransition; + current_character: Word; begin - next_transition := elna_lexer_next_transition(cursor); + current_character := _load_byte(cursor^.finish); + next_transition := elna_lexer_get_transition(cursor^.state, classification[current_character + 1]); cursor^.state := next_transition^.next_state; - elna_lexer_execute_action(cursor, next_transition^.action, kind); - - return next_transition^.next_state + return elna_lexer_execute_action(cursor, next_transition^.action, kind) end; -proc elna_lexer_advance_token(cursor: ^ElnaLexerCursor); +proc elna_lexer_advance_token(cursor: ^ElnaLexerCursor) -> ^ElnaLexerToken; var - result_state: ElnaLexerState; - kind: ElnaLexerKind; + token: ^ElnaLexerToken; begin - result_state := elna_lexer_execute_transition(cursor, @kind); - if result_state <> ElnaLexerState.finish then - kind := elna_lexer_advance_token(cursor) + token := elna_lexer_execute_transition(cursor); + if cursor^.state <> ElnaLexerState.finish then + token := elna_lexer_advance_token(cursor) end; - return kind + return token end; (** * Reads the next token and writes its type into the address in the kind parameter. * Resets the lexer state for reading the next token. *) -proc elna_lexer_read_token(cursor: ^ElnaLexerCursor) -> ^ElnaLexerToken; -var - result: ^ElnaLexerToken; +proc elna_lexer_peek(cursor: ^ElnaLexerCursor) -> ^ElnaLexerToken; begin - cursor^.state := ElnaLexerState.start; - cursor^.finish := cursor^.start; - - result := malloc(#size(ElnaLexerToken)); - result^.kind := elna_lexer_advance_token(cursor); - result^.start := cursor^.start; - result^.length := cursor^.finish - cursor^.start; - - return result + if cursor^.token = nil then + cursor^.state := ElnaLexerState.start; + cursor^.token := elna_lexer_advance_token(cursor) + end; + return cursor^.token end; (** - * Advances the token stream past the last read token. + * Reads the token and advance the lexer. *) -proc elna_lexer_skip_token(cursor: ^ElnaLexerCursor); +proc elna_lexer_read(cursor: ^ElnaLexerCursor) -> ^ElnaLexerToken; +var + token: ^ElnaLexerToken; begin - cursor^.start := cursor^.finish + token := elna_lexer_peek(cursor); + cursor^.token := nil; + cursor^.start := cursor^.finish; + + return token end; proc _initialize_global_state();