From e1971b469e131083bfb11425f9ba5c4e9e650bc6 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Sun, 8 Mar 2026 18:16:06 +0100 Subject: [PATCH] Track token source position --- boot/stage20/cl.elna | 174 ++++++++++++++++++++++++++----------------- 1 file changed, 106 insertions(+), 68 deletions(-) diff --git a/boot/stage20/cl.elna b/boot/stage20/cl.elna index 5da101e..a4adfc5 100644 --- a/boot/stage20/cl.elna +++ b/boot/stage20/cl.elna @@ -15,6 +15,14 @@ type first: ^ElnaListNode; last: ^ElnaListNode end; + ElnaLocation = record + line: Word; + column: Word + end; + ElnaPosition = record + start_location: ElnaLocation; + end_location: ElnaLocation + end; (** * List of intermediate representation items. @@ -441,13 +449,15 @@ type ElnaLexerToken = record kind: ElnaLexerKind; start: Word; - length: Word + length: Word; + position: ElnaPosition end; ElnaLexerCursor = record state: ElnaLexerState; start: Word; finish: Word; - token: ^ElnaLexerToken + token: ^ElnaLexerToken; + position: ElnaPosition end; ElnaTacOperator = ( @@ -4844,7 +4854,11 @@ begin cursor^.start := code_pointer; cursor^.finish := code_pointer; - cursor^.token := nil + cursor^.token := nil; + cursor^.position.start_location.line := 1; + cursor^.position.start_location.column := 1; + cursor^.position.end_location.line := 1; + cursor^.position.end_location.column := 1 end; proc string_compare(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word); @@ -4860,11 +4874,22 @@ begin return result end; -proc elna_lexer_classify_keyword(position_start: Word, position_end: Word) -> ^ElnaLexerToken; +proc elna_lexer_token_create(kind: ElnaLexerKind, position: ^ElnaPosition) -> ^ElnaLexerToken; var result: ^ElnaLexerToken; begin result := malloc(#size(ElnaLexerToken)); + result^.kind := kind; + memcpy(@result^.position, position, #size(ElnaPosition)); + + return result +end; + +proc elna_lexer_classify_keyword(position_start: Word, position_end: Word, position: ^ElnaPosition) -> ^ElnaLexerToken; +var + result: ^ElnaLexerToken; +begin + result := elna_lexer_token_create(ElnaLexerKind.identifier, position); result^.start := position_start; result^.length := position_end - position_start; @@ -4912,37 +4937,34 @@ begin result^.kind := ElnaLexerKind.boolean elsif string_compare(position_start, result^.length, "false", 5) then result^.kind := ElnaLexerKind.boolean - else - result^.kind := ElnaLexerKind.identifier end; return result end; -proc elna_lexer_classify_finalize(start_position: Word) -> ^ElnaLexerToken; +proc elna_lexer_classify_finalize(start_position: Word, position: ^ElnaPosition) -> ^ElnaLexerToken; var character: Word; result: ^ElnaLexerToken; begin - result := malloc(#size(ElnaLexerToken)); character := _load_byte(start_position); if character = ':' then - result^.kind := ElnaLexerKind.colon + result := elna_lexer_token_create(ElnaLexerKind.colon, position) elsif character = '.' then - result^.kind := ElnaLexerKind.dot + result := elna_lexer_token_create(ElnaLexerKind.dot, position) elsif character = '(' then - result^.kind := ElnaLexerKind.left_paren + result := elna_lexer_token_create(ElnaLexerKind.left_paren, position) elsif character = '-' then - result^.kind := ElnaLexerKind.minus + result := elna_lexer_token_create(ElnaLexerKind.minus, position) elsif character = '<' then - result^.kind := ElnaLexerKind.less_than + result := elna_lexer_token_create(ElnaLexerKind.less_than, position) elsif character = '>' then - result^.kind := ElnaLexerKind.greater_than + result := elna_lexer_token_create(ElnaLexerKind.greater_than, position) end; return result end; -proc elna_lexer_classify_single(start_position: Word) -> ^ElnaLexerToken; +proc elna_lexer_classify_single(start_position: Word, position: ^ElnaPosition) -> ^ElnaLexerToken; var character: Word; result: ^ElnaLexerToken; @@ -4951,40 +4973,40 @@ begin character := _load_byte(start_position); if character = ';' then - result^.kind := ElnaLexerKind.semicolon + result := elna_lexer_token_create(ElnaLexerKind.semicolon, position) elsif character = ',' then - result^.kind := ElnaLexerKind.comma + result := elna_lexer_token_create(ElnaLexerKind.comma, position) elsif character = ')' then - result^.kind := ElnaLexerKind.right_paren + result := elna_lexer_token_create(ElnaLexerKind.right_paren, position) elsif character = '@' then - result^.kind := ElnaLexerKind.at + result := elna_lexer_token_create(ElnaLexerKind.at, position) elsif character = '~' then - result^.kind := ElnaLexerKind.not + result := elna_lexer_token_create(ElnaLexerKind.not, position) elsif character = '&' then - result^.kind := ElnaLexerKind.and + result := elna_lexer_token_create(ElnaLexerKind.and, position) elsif character = '+' then - result^.kind := ElnaLexerKind.plus + result := elna_lexer_token_create(ElnaLexerKind.plus, position) elsif character = '*' then - result^.kind := ElnaLexerKind.multiplication + result := elna_lexer_token_create(ElnaLexerKind.multiplication, position) elsif character = '=' then - result^.kind := ElnaLexerKind.equals + result := elna_lexer_token_create(ElnaLexerKind.equals, position) elsif character = '%' then - result^.kind := ElnaLexerKind.remainder + result := elna_lexer_token_create(ElnaLexerKind.remainder, position) elsif character = '/' then - result^.kind := ElnaLexerKind.division + result := elna_lexer_token_create(ElnaLexerKind.division, position) elsif character = '.' then - result^.kind := ElnaLexerKind.dot + result := elna_lexer_token_create(ElnaLexerKind.dot, position) elsif character = '^' then - result^.kind := ElnaLexerKind.hat + result := elna_lexer_token_create(ElnaLexerKind.hat, position) elsif character = '[' then - result^.kind := ElnaLexerKind.left_square + result := elna_lexer_token_create(ElnaLexerKind.left_square, position) elsif character = ']' then - result^.kind := ElnaLexerKind.right_square + result := elna_lexer_token_create(ElnaLexerKind.right_square, position) end; return result end; -proc elna_lexer_classify_composite(start_position: Word, one_before_last: Word) -> ^ElnaLexerToken; +proc elna_lexer_classify_composite(start_position: Word, one_before_last: Word, position: ^ElnaPosition) -> ^ElnaLexerToken; var first_character: Word; last_character: Word; @@ -4992,54 +5014,50 @@ var begin first_character := _load_byte(start_position); last_character := _load_byte(one_before_last); - result := malloc(#size(ElnaLexerToken)); if first_character = ':' then - result^.kind := ElnaLexerKind.assignment + result := elna_lexer_token_create(ElnaLexerKind.assignment, position) elsif first_character = '<' then if last_character = '=' then - result^.kind := ElnaLexerKind.less_equal + result := elna_lexer_token_create(ElnaLexerKind.less_equal, position) elsif last_character = '>' then - result^.kind := ElnaLexerKind.not_equal + result := elna_lexer_token_create(ElnaLexerKind.not_equal, position) end elsif first_character = '>' then if last_character = '=' then - result^.kind := ElnaLexerKind.greater_equal + result := elna_lexer_token_create(ElnaLexerKind.greater_equal, position) end elsif first_character = '-' then - result^.kind := ElnaLexerKind.arrow + result := elna_lexer_token_create(ElnaLexerKind.arrow, position) end; - return result end; -proc elna_lexer_classify_delimited(start_position: Word, end_position: Word) -> ^ElnaLexerToken; +proc elna_lexer_classify_delimited(start_position: Word, end_position: Word, position: ^ElnaPosition) -> ^ElnaLexerToken; var delimiter: Word; result: ^ElnaLexerToken; begin delimiter := _load_byte(start_position); - result := malloc(#size(ElnaLexerToken)); + if delimiter = '(' then + result := elna_lexer_token_create(ElnaLexerKind.comment, position) + elsif delimiter = '\'' then + result := elna_lexer_token_create(ElnaLexerKind.character, position) + elsif delimiter = '"' then + result := elna_lexer_token_create(ElnaLexerKind.string, position) + end; result^.start := start_position; result^.length := end_position - start_position; - if delimiter = '(' then - result^.kind := ElnaLexerKind.comment - elsif delimiter = '\'' then - result^.kind:= ElnaLexerKind.character - elsif delimiter = '"' then - result^.kind := ElnaLexerKind.string - end; return result end; -proc elna_lexer_classify_integer(start_position: Word, end_position: Word) -> ^ElnaLexerToken; +proc elna_lexer_classify_integer(start_position: Word, end_position: Word, position: ^ElnaPosition) -> ^ElnaLexerToken; var result: ^ElnaLexerToken; begin - result := malloc(#size(ElnaLexerToken)); - result^.kind := ElnaLexerKind.integer; + result := elna_lexer_token_create(ElnaLexerKind.integer, position); result^.start := start_position; result^.length := end_position - start_position; @@ -5054,31 +5072,32 @@ begin if action_to_perform = ElnaLexerAction.none then elsif action_to_perform = ElnaLexerAction.accumulate then - cursor^.finish := cursor^.finish + 1 + elna_lexer_advance(cursor) elsif action_to_perform = ElnaLexerAction.skip then + elna_lexer_classify_space(cursor^.start, @cursor^.position.end_location); cursor^.start := cursor^.start + 1; cursor^.finish := cursor^.finish + 1 elsif action_to_perform = ElnaLexerAction.single then - cursor^.finish := cursor^.finish + 1; + elna_lexer_advance(cursor); - token := elna_lexer_classify_single(cursor^.start) + token := elna_lexer_classify_single(cursor^.start, @cursor^.position) elsif action_to_perform = ElnaLexerAction.eof then token := malloc(#size(ElnaLexerToken)); token^.kind := ElnaLexerKind.eof elsif action_to_perform = ElnaLexerAction.finalize then - token := elna_lexer_classify_finalize(cursor^.start) + token := elna_lexer_classify_finalize(cursor^.start, @cursor^.position) elsif action_to_perform = ElnaLexerAction.composite then - token := elna_lexer_classify_composite(cursor^.start, cursor^.finish); + token := elna_lexer_classify_composite(cursor^.start, cursor^.finish, @cursor^.position); - cursor^.finish := cursor^.finish + 1 + elna_lexer_advance(cursor) elsif action_to_perform = ElnaLexerAction.key_id then - token := elna_lexer_classify_keyword(cursor^.start, cursor^.finish) + token := elna_lexer_classify_keyword(cursor^.start, cursor^.finish, @cursor^.position) elsif action_to_perform = ElnaLexerAction.integer then - token := elna_lexer_classify_integer(cursor^.start, cursor^.finish) + token := elna_lexer_classify_integer(cursor^.start, cursor^.finish, @cursor^.position) elsif action_to_perform = ElnaLexerAction.delimited then - cursor^.finish := cursor^.finish + 1; + elna_lexer_advance(cursor); - token := elna_lexer_classify_delimited(cursor^.start, cursor^.finish) + token := elna_lexer_classify_delimited(cursor^.start, cursor^.finish, @cursor^.position) end; return token end; @@ -5095,15 +5114,24 @@ begin return elna_lexer_execute_action(cursor, next_transition^.action, kind) end; -proc elna_lexer_advance_token(cursor: ^ElnaLexerCursor) -> ^ElnaLexerToken; +proc elna_lexer_classify_space(start_position: Word, location: ^ElnaLocation); var - token: ^ElnaLexerToken; + character: Word; begin - token := elna_lexer_execute_transition(cursor); - if cursor^.state <> ElnaLexerState.finish then - token := elna_lexer_advance_token(cursor) - end; - return token + character := _load_byte(start_position); + + if character = '\n' then + location^.line := location^.line + 1; + location^.column := 1 + else + location^.column := location^.column + 1 + end +end; + +proc elna_lexer_advance(cursor: ^ElnaLexerCursor); +begin + cursor^.finish := cursor^.finish + 1; + cursor^.position.end_location.column := cursor^.position.end_location.column + 1 end; (** @@ -5111,10 +5139,19 @@ end; * Resets the lexer state for reading the next token. *) proc elna_lexer_peek(cursor: ^ElnaLexerCursor) -> ^ElnaLexerToken; +var + token: ^ElnaLexerToken; begin if cursor^.token = nil then cursor^.state := ElnaLexerState.start; - cursor^.token := elna_lexer_advance_token(cursor) + + .elna_lexer_peek_loop; + token := elna_lexer_execute_transition(cursor); + + if cursor^.state <> ElnaLexerState.finish then + goto elna_lexer_peek_loop + end; + cursor^.token := token end; return cursor^.token end; @@ -5129,6 +5166,7 @@ begin token := elna_lexer_peek(cursor); cursor^.token := nil; cursor^.start := cursor^.finish; + memcpy(@cursor^.position.start_location, @cursor^.position.end_location, #size(ElnaLocation)); return token end;