Track token source position

This commit is contained in:
2026-03-08 18:16:06 +01:00
parent aab7e42260
commit e1971b469e

View File

@@ -15,6 +15,14 @@ type
first: ^ElnaListNode; first: ^ElnaListNode;
last: ^ElnaListNode last: ^ElnaListNode
end; end;
ElnaLocation = record
line: Word;
column: Word
end;
ElnaPosition = record
start_location: ElnaLocation;
end_location: ElnaLocation
end;
(** (**
* List of intermediate representation items. * List of intermediate representation items.
@@ -441,13 +449,15 @@ type
ElnaLexerToken = record ElnaLexerToken = record
kind: ElnaLexerKind; kind: ElnaLexerKind;
start: Word; start: Word;
length: Word length: Word;
position: ElnaPosition
end; end;
ElnaLexerCursor = record ElnaLexerCursor = record
state: ElnaLexerState; state: ElnaLexerState;
start: Word; start: Word;
finish: Word; finish: Word;
token: ^ElnaLexerToken token: ^ElnaLexerToken;
position: ElnaPosition
end; end;
ElnaTacOperator = ( ElnaTacOperator = (
@@ -4844,7 +4854,11 @@ begin
cursor^.start := code_pointer; cursor^.start := code_pointer;
cursor^.finish := code_pointer; cursor^.finish := code_pointer;
cursor^.token := nil cursor^.token := nil;
cursor^.position.start_location.line := 1;
cursor^.position.start_location.column := 1;
cursor^.position.end_location.line := 1;
cursor^.position.end_location.column := 1
end; end;
proc string_compare(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word); proc string_compare(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word);
@@ -4860,11 +4874,22 @@ begin
return result return result
end; end;
proc elna_lexer_classify_keyword(position_start: Word, position_end: Word) -> ^ElnaLexerToken; proc elna_lexer_token_create(kind: ElnaLexerKind, position: ^ElnaPosition) -> ^ElnaLexerToken;
var var
result: ^ElnaLexerToken; result: ^ElnaLexerToken;
begin begin
result := malloc(#size(ElnaLexerToken)); result := malloc(#size(ElnaLexerToken));
result^.kind := kind;
memcpy(@result^.position, position, #size(ElnaPosition));
return result
end;
proc elna_lexer_classify_keyword(position_start: Word, position_end: Word, position: ^ElnaPosition) -> ^ElnaLexerToken;
var
result: ^ElnaLexerToken;
begin
result := elna_lexer_token_create(ElnaLexerKind.identifier, position);
result^.start := position_start; result^.start := position_start;
result^.length := position_end - position_start; result^.length := position_end - position_start;
@@ -4912,37 +4937,34 @@ begin
result^.kind := ElnaLexerKind.boolean result^.kind := ElnaLexerKind.boolean
elsif string_compare(position_start, result^.length, "false", 5) then elsif string_compare(position_start, result^.length, "false", 5) then
result^.kind := ElnaLexerKind.boolean result^.kind := ElnaLexerKind.boolean
else
result^.kind := ElnaLexerKind.identifier
end; end;
return result return result
end; end;
proc elna_lexer_classify_finalize(start_position: Word) -> ^ElnaLexerToken; proc elna_lexer_classify_finalize(start_position: Word, position: ^ElnaPosition) -> ^ElnaLexerToken;
var var
character: Word; character: Word;
result: ^ElnaLexerToken; result: ^ElnaLexerToken;
begin begin
result := malloc(#size(ElnaLexerToken));
character := _load_byte(start_position); character := _load_byte(start_position);
if character = ':' then if character = ':' then
result^.kind := ElnaLexerKind.colon result := elna_lexer_token_create(ElnaLexerKind.colon, position)
elsif character = '.' then elsif character = '.' then
result^.kind := ElnaLexerKind.dot result := elna_lexer_token_create(ElnaLexerKind.dot, position)
elsif character = '(' then elsif character = '(' then
result^.kind := ElnaLexerKind.left_paren result := elna_lexer_token_create(ElnaLexerKind.left_paren, position)
elsif character = '-' then elsif character = '-' then
result^.kind := ElnaLexerKind.minus result := elna_lexer_token_create(ElnaLexerKind.minus, position)
elsif character = '<' then elsif character = '<' then
result^.kind := ElnaLexerKind.less_than result := elna_lexer_token_create(ElnaLexerKind.less_than, position)
elsif character = '>' then elsif character = '>' then
result^.kind := ElnaLexerKind.greater_than result := elna_lexer_token_create(ElnaLexerKind.greater_than, position)
end; end;
return result return result
end; end;
proc elna_lexer_classify_single(start_position: Word) -> ^ElnaLexerToken; proc elna_lexer_classify_single(start_position: Word, position: ^ElnaPosition) -> ^ElnaLexerToken;
var var
character: Word; character: Word;
result: ^ElnaLexerToken; result: ^ElnaLexerToken;
@@ -4951,40 +4973,40 @@ begin
character := _load_byte(start_position); character := _load_byte(start_position);
if character = ';' then if character = ';' then
result^.kind := ElnaLexerKind.semicolon result := elna_lexer_token_create(ElnaLexerKind.semicolon, position)
elsif character = ',' then elsif character = ',' then
result^.kind := ElnaLexerKind.comma result := elna_lexer_token_create(ElnaLexerKind.comma, position)
elsif character = ')' then elsif character = ')' then
result^.kind := ElnaLexerKind.right_paren result := elna_lexer_token_create(ElnaLexerKind.right_paren, position)
elsif character = '@' then elsif character = '@' then
result^.kind := ElnaLexerKind.at result := elna_lexer_token_create(ElnaLexerKind.at, position)
elsif character = '~' then elsif character = '~' then
result^.kind := ElnaLexerKind.not result := elna_lexer_token_create(ElnaLexerKind.not, position)
elsif character = '&' then elsif character = '&' then
result^.kind := ElnaLexerKind.and result := elna_lexer_token_create(ElnaLexerKind.and, position)
elsif character = '+' then elsif character = '+' then
result^.kind := ElnaLexerKind.plus result := elna_lexer_token_create(ElnaLexerKind.plus, position)
elsif character = '*' then elsif character = '*' then
result^.kind := ElnaLexerKind.multiplication result := elna_lexer_token_create(ElnaLexerKind.multiplication, position)
elsif character = '=' then elsif character = '=' then
result^.kind := ElnaLexerKind.equals result := elna_lexer_token_create(ElnaLexerKind.equals, position)
elsif character = '%' then elsif character = '%' then
result^.kind := ElnaLexerKind.remainder result := elna_lexer_token_create(ElnaLexerKind.remainder, position)
elsif character = '/' then elsif character = '/' then
result^.kind := ElnaLexerKind.division result := elna_lexer_token_create(ElnaLexerKind.division, position)
elsif character = '.' then elsif character = '.' then
result^.kind := ElnaLexerKind.dot result := elna_lexer_token_create(ElnaLexerKind.dot, position)
elsif character = '^' then elsif character = '^' then
result^.kind := ElnaLexerKind.hat result := elna_lexer_token_create(ElnaLexerKind.hat, position)
elsif character = '[' then elsif character = '[' then
result^.kind := ElnaLexerKind.left_square result := elna_lexer_token_create(ElnaLexerKind.left_square, position)
elsif character = ']' then elsif character = ']' then
result^.kind := ElnaLexerKind.right_square result := elna_lexer_token_create(ElnaLexerKind.right_square, position)
end; end;
return result return result
end; end;
proc elna_lexer_classify_composite(start_position: Word, one_before_last: Word) -> ^ElnaLexerToken; proc elna_lexer_classify_composite(start_position: Word, one_before_last: Word, position: ^ElnaPosition) -> ^ElnaLexerToken;
var var
first_character: Word; first_character: Word;
last_character: Word; last_character: Word;
@@ -4992,54 +5014,50 @@ var
begin begin
first_character := _load_byte(start_position); first_character := _load_byte(start_position);
last_character := _load_byte(one_before_last); last_character := _load_byte(one_before_last);
result := malloc(#size(ElnaLexerToken));
if first_character = ':' then if first_character = ':' then
result^.kind := ElnaLexerKind.assignment result := elna_lexer_token_create(ElnaLexerKind.assignment, position)
elsif first_character = '<' then elsif first_character = '<' then
if last_character = '=' then if last_character = '=' then
result^.kind := ElnaLexerKind.less_equal result := elna_lexer_token_create(ElnaLexerKind.less_equal, position)
elsif last_character = '>' then elsif last_character = '>' then
result^.kind := ElnaLexerKind.not_equal result := elna_lexer_token_create(ElnaLexerKind.not_equal, position)
end end
elsif first_character = '>' then elsif first_character = '>' then
if last_character = '=' then if last_character = '=' then
result^.kind := ElnaLexerKind.greater_equal result := elna_lexer_token_create(ElnaLexerKind.greater_equal, position)
end end
elsif first_character = '-' then elsif first_character = '-' then
result^.kind := ElnaLexerKind.arrow result := elna_lexer_token_create(ElnaLexerKind.arrow, position)
end; end;
return result return result
end; end;
proc elna_lexer_classify_delimited(start_position: Word, end_position: Word) -> ^ElnaLexerToken; proc elna_lexer_classify_delimited(start_position: Word, end_position: Word, position: ^ElnaPosition) -> ^ElnaLexerToken;
var var
delimiter: Word; delimiter: Word;
result: ^ElnaLexerToken; result: ^ElnaLexerToken;
begin begin
delimiter := _load_byte(start_position); delimiter := _load_byte(start_position);
result := malloc(#size(ElnaLexerToken));
if delimiter = '(' then
result := elna_lexer_token_create(ElnaLexerKind.comment, position)
elsif delimiter = '\'' then
result := elna_lexer_token_create(ElnaLexerKind.character, position)
elsif delimiter = '"' then
result := elna_lexer_token_create(ElnaLexerKind.string, position)
end;
result^.start := start_position; result^.start := start_position;
result^.length := end_position - start_position; result^.length := end_position - start_position;
if delimiter = '(' then
result^.kind := ElnaLexerKind.comment
elsif delimiter = '\'' then
result^.kind:= ElnaLexerKind.character
elsif delimiter = '"' then
result^.kind := ElnaLexerKind.string
end;
return result return result
end; end;
proc elna_lexer_classify_integer(start_position: Word, end_position: Word) -> ^ElnaLexerToken; proc elna_lexer_classify_integer(start_position: Word, end_position: Word, position: ^ElnaPosition) -> ^ElnaLexerToken;
var var
result: ^ElnaLexerToken; result: ^ElnaLexerToken;
begin begin
result := malloc(#size(ElnaLexerToken)); result := elna_lexer_token_create(ElnaLexerKind.integer, position);
result^.kind := ElnaLexerKind.integer;
result^.start := start_position; result^.start := start_position;
result^.length := end_position - start_position; result^.length := end_position - start_position;
@@ -5054,31 +5072,32 @@ begin
if action_to_perform = ElnaLexerAction.none then if action_to_perform = ElnaLexerAction.none then
elsif action_to_perform = ElnaLexerAction.accumulate then elsif action_to_perform = ElnaLexerAction.accumulate then
cursor^.finish := cursor^.finish + 1 elna_lexer_advance(cursor)
elsif action_to_perform = ElnaLexerAction.skip then elsif action_to_perform = ElnaLexerAction.skip then
elna_lexer_classify_space(cursor^.start, @cursor^.position.end_location);
cursor^.start := cursor^.start + 1; cursor^.start := cursor^.start + 1;
cursor^.finish := cursor^.finish + 1 cursor^.finish := cursor^.finish + 1
elsif action_to_perform = ElnaLexerAction.single then elsif action_to_perform = ElnaLexerAction.single then
cursor^.finish := cursor^.finish + 1; elna_lexer_advance(cursor);
token := elna_lexer_classify_single(cursor^.start) token := elna_lexer_classify_single(cursor^.start, @cursor^.position)
elsif action_to_perform = ElnaLexerAction.eof then elsif action_to_perform = ElnaLexerAction.eof then
token := malloc(#size(ElnaLexerToken)); token := malloc(#size(ElnaLexerToken));
token^.kind := ElnaLexerKind.eof token^.kind := ElnaLexerKind.eof
elsif action_to_perform = ElnaLexerAction.finalize then elsif action_to_perform = ElnaLexerAction.finalize then
token := elna_lexer_classify_finalize(cursor^.start) token := elna_lexer_classify_finalize(cursor^.start, @cursor^.position)
elsif action_to_perform = ElnaLexerAction.composite then elsif action_to_perform = ElnaLexerAction.composite then
token := elna_lexer_classify_composite(cursor^.start, cursor^.finish); token := elna_lexer_classify_composite(cursor^.start, cursor^.finish, @cursor^.position);
cursor^.finish := cursor^.finish + 1 elna_lexer_advance(cursor)
elsif action_to_perform = ElnaLexerAction.key_id then elsif action_to_perform = ElnaLexerAction.key_id then
token := elna_lexer_classify_keyword(cursor^.start, cursor^.finish) token := elna_lexer_classify_keyword(cursor^.start, cursor^.finish, @cursor^.position)
elsif action_to_perform = ElnaLexerAction.integer then elsif action_to_perform = ElnaLexerAction.integer then
token := elna_lexer_classify_integer(cursor^.start, cursor^.finish) token := elna_lexer_classify_integer(cursor^.start, cursor^.finish, @cursor^.position)
elsif action_to_perform = ElnaLexerAction.delimited then elsif action_to_perform = ElnaLexerAction.delimited then
cursor^.finish := cursor^.finish + 1; elna_lexer_advance(cursor);
token := elna_lexer_classify_delimited(cursor^.start, cursor^.finish) token := elna_lexer_classify_delimited(cursor^.start, cursor^.finish, @cursor^.position)
end; end;
return token return token
end; end;
@@ -5095,15 +5114,24 @@ begin
return elna_lexer_execute_action(cursor, next_transition^.action, kind) return elna_lexer_execute_action(cursor, next_transition^.action, kind)
end; end;
proc elna_lexer_advance_token(cursor: ^ElnaLexerCursor) -> ^ElnaLexerToken; proc elna_lexer_classify_space(start_position: Word, location: ^ElnaLocation);
var var
token: ^ElnaLexerToken; character: Word;
begin begin
token := elna_lexer_execute_transition(cursor); character := _load_byte(start_position);
if cursor^.state <> ElnaLexerState.finish then
token := elna_lexer_advance_token(cursor) if character = '\n' then
location^.line := location^.line + 1;
location^.column := 1
else
location^.column := location^.column + 1
end
end; end;
return token
proc elna_lexer_advance(cursor: ^ElnaLexerCursor);
begin
cursor^.finish := cursor^.finish + 1;
cursor^.position.end_location.column := cursor^.position.end_location.column + 1
end; end;
(** (**
@@ -5111,10 +5139,19 @@ end;
* Resets the lexer state for reading the next token. * Resets the lexer state for reading the next token.
*) *)
proc elna_lexer_peek(cursor: ^ElnaLexerCursor) -> ^ElnaLexerToken; proc elna_lexer_peek(cursor: ^ElnaLexerCursor) -> ^ElnaLexerToken;
var
token: ^ElnaLexerToken;
begin begin
if cursor^.token = nil then if cursor^.token = nil then
cursor^.state := ElnaLexerState.start; cursor^.state := ElnaLexerState.start;
cursor^.token := elna_lexer_advance_token(cursor)
.elna_lexer_peek_loop;
token := elna_lexer_execute_transition(cursor);
if cursor^.state <> ElnaLexerState.finish then
goto elna_lexer_peek_loop
end;
cursor^.token := token
end; end;
return cursor^.token return cursor^.token
end; end;
@@ -5129,6 +5166,7 @@ begin
token := elna_lexer_peek(cursor); token := elna_lexer_peek(cursor);
cursor^.token := nil; cursor^.token := nil;
cursor^.start := cursor^.finish; cursor^.start := cursor^.finish;
memcpy(@cursor^.position.start_location, @cursor^.position.end_location, #size(ElnaLocation));
return token return token
end; end;