Track token source position
This commit is contained in:
@@ -15,6 +15,14 @@ type
|
|||||||
first: ^ElnaListNode;
|
first: ^ElnaListNode;
|
||||||
last: ^ElnaListNode
|
last: ^ElnaListNode
|
||||||
end;
|
end;
|
||||||
|
ElnaLocation = record
|
||||||
|
line: Word;
|
||||||
|
column: Word
|
||||||
|
end;
|
||||||
|
ElnaPosition = record
|
||||||
|
start_location: ElnaLocation;
|
||||||
|
end_location: ElnaLocation
|
||||||
|
end;
|
||||||
|
|
||||||
(**
|
(**
|
||||||
* List of intermediate representation items.
|
* List of intermediate representation items.
|
||||||
@@ -441,13 +449,15 @@ type
|
|||||||
ElnaLexerToken = record
|
ElnaLexerToken = record
|
||||||
kind: ElnaLexerKind;
|
kind: ElnaLexerKind;
|
||||||
start: Word;
|
start: Word;
|
||||||
length: Word
|
length: Word;
|
||||||
|
position: ElnaPosition
|
||||||
end;
|
end;
|
||||||
ElnaLexerCursor = record
|
ElnaLexerCursor = record
|
||||||
state: ElnaLexerState;
|
state: ElnaLexerState;
|
||||||
start: Word;
|
start: Word;
|
||||||
finish: Word;
|
finish: Word;
|
||||||
token: ^ElnaLexerToken
|
token: ^ElnaLexerToken;
|
||||||
|
position: ElnaPosition
|
||||||
end;
|
end;
|
||||||
|
|
||||||
ElnaTacOperator = (
|
ElnaTacOperator = (
|
||||||
@@ -4844,7 +4854,11 @@ begin
|
|||||||
|
|
||||||
cursor^.start := code_pointer;
|
cursor^.start := code_pointer;
|
||||||
cursor^.finish := code_pointer;
|
cursor^.finish := code_pointer;
|
||||||
cursor^.token := nil
|
cursor^.token := nil;
|
||||||
|
cursor^.position.start_location.line := 1;
|
||||||
|
cursor^.position.start_location.column := 1;
|
||||||
|
cursor^.position.end_location.line := 1;
|
||||||
|
cursor^.position.end_location.column := 1
|
||||||
end;
|
end;
|
||||||
|
|
||||||
proc string_compare(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word);
|
proc string_compare(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word);
|
||||||
@@ -4860,11 +4874,22 @@ begin
|
|||||||
return result
|
return result
|
||||||
end;
|
end;
|
||||||
|
|
||||||
proc elna_lexer_classify_keyword(position_start: Word, position_end: Word) -> ^ElnaLexerToken;
|
proc elna_lexer_token_create(kind: ElnaLexerKind, position: ^ElnaPosition) -> ^ElnaLexerToken;
|
||||||
var
|
var
|
||||||
result: ^ElnaLexerToken;
|
result: ^ElnaLexerToken;
|
||||||
begin
|
begin
|
||||||
result := malloc(#size(ElnaLexerToken));
|
result := malloc(#size(ElnaLexerToken));
|
||||||
|
result^.kind := kind;
|
||||||
|
memcpy(@result^.position, position, #size(ElnaPosition));
|
||||||
|
|
||||||
|
return result
|
||||||
|
end;
|
||||||
|
|
||||||
|
proc elna_lexer_classify_keyword(position_start: Word, position_end: Word, position: ^ElnaPosition) -> ^ElnaLexerToken;
|
||||||
|
var
|
||||||
|
result: ^ElnaLexerToken;
|
||||||
|
begin
|
||||||
|
result := elna_lexer_token_create(ElnaLexerKind.identifier, position);
|
||||||
result^.start := position_start;
|
result^.start := position_start;
|
||||||
result^.length := position_end - position_start;
|
result^.length := position_end - position_start;
|
||||||
|
|
||||||
@@ -4912,37 +4937,34 @@ begin
|
|||||||
result^.kind := ElnaLexerKind.boolean
|
result^.kind := ElnaLexerKind.boolean
|
||||||
elsif string_compare(position_start, result^.length, "false", 5) then
|
elsif string_compare(position_start, result^.length, "false", 5) then
|
||||||
result^.kind := ElnaLexerKind.boolean
|
result^.kind := ElnaLexerKind.boolean
|
||||||
else
|
|
||||||
result^.kind := ElnaLexerKind.identifier
|
|
||||||
end;
|
end;
|
||||||
return result
|
return result
|
||||||
end;
|
end;
|
||||||
|
|
||||||
proc elna_lexer_classify_finalize(start_position: Word) -> ^ElnaLexerToken;
|
proc elna_lexer_classify_finalize(start_position: Word, position: ^ElnaPosition) -> ^ElnaLexerToken;
|
||||||
var
|
var
|
||||||
character: Word;
|
character: Word;
|
||||||
result: ^ElnaLexerToken;
|
result: ^ElnaLexerToken;
|
||||||
begin
|
begin
|
||||||
result := malloc(#size(ElnaLexerToken));
|
|
||||||
character := _load_byte(start_position);
|
character := _load_byte(start_position);
|
||||||
|
|
||||||
if character = ':' then
|
if character = ':' then
|
||||||
result^.kind := ElnaLexerKind.colon
|
result := elna_lexer_token_create(ElnaLexerKind.colon, position)
|
||||||
elsif character = '.' then
|
elsif character = '.' then
|
||||||
result^.kind := ElnaLexerKind.dot
|
result := elna_lexer_token_create(ElnaLexerKind.dot, position)
|
||||||
elsif character = '(' then
|
elsif character = '(' then
|
||||||
result^.kind := ElnaLexerKind.left_paren
|
result := elna_lexer_token_create(ElnaLexerKind.left_paren, position)
|
||||||
elsif character = '-' then
|
elsif character = '-' then
|
||||||
result^.kind := ElnaLexerKind.minus
|
result := elna_lexer_token_create(ElnaLexerKind.minus, position)
|
||||||
elsif character = '<' then
|
elsif character = '<' then
|
||||||
result^.kind := ElnaLexerKind.less_than
|
result := elna_lexer_token_create(ElnaLexerKind.less_than, position)
|
||||||
elsif character = '>' then
|
elsif character = '>' then
|
||||||
result^.kind := ElnaLexerKind.greater_than
|
result := elna_lexer_token_create(ElnaLexerKind.greater_than, position)
|
||||||
end;
|
end;
|
||||||
return result
|
return result
|
||||||
end;
|
end;
|
||||||
|
|
||||||
proc elna_lexer_classify_single(start_position: Word) -> ^ElnaLexerToken;
|
proc elna_lexer_classify_single(start_position: Word, position: ^ElnaPosition) -> ^ElnaLexerToken;
|
||||||
var
|
var
|
||||||
character: Word;
|
character: Word;
|
||||||
result: ^ElnaLexerToken;
|
result: ^ElnaLexerToken;
|
||||||
@@ -4951,40 +4973,40 @@ begin
|
|||||||
character := _load_byte(start_position);
|
character := _load_byte(start_position);
|
||||||
|
|
||||||
if character = ';' then
|
if character = ';' then
|
||||||
result^.kind := ElnaLexerKind.semicolon
|
result := elna_lexer_token_create(ElnaLexerKind.semicolon, position)
|
||||||
elsif character = ',' then
|
elsif character = ',' then
|
||||||
result^.kind := ElnaLexerKind.comma
|
result := elna_lexer_token_create(ElnaLexerKind.comma, position)
|
||||||
elsif character = ')' then
|
elsif character = ')' then
|
||||||
result^.kind := ElnaLexerKind.right_paren
|
result := elna_lexer_token_create(ElnaLexerKind.right_paren, position)
|
||||||
elsif character = '@' then
|
elsif character = '@' then
|
||||||
result^.kind := ElnaLexerKind.at
|
result := elna_lexer_token_create(ElnaLexerKind.at, position)
|
||||||
elsif character = '~' then
|
elsif character = '~' then
|
||||||
result^.kind := ElnaLexerKind.not
|
result := elna_lexer_token_create(ElnaLexerKind.not, position)
|
||||||
elsif character = '&' then
|
elsif character = '&' then
|
||||||
result^.kind := ElnaLexerKind.and
|
result := elna_lexer_token_create(ElnaLexerKind.and, position)
|
||||||
elsif character = '+' then
|
elsif character = '+' then
|
||||||
result^.kind := ElnaLexerKind.plus
|
result := elna_lexer_token_create(ElnaLexerKind.plus, position)
|
||||||
elsif character = '*' then
|
elsif character = '*' then
|
||||||
result^.kind := ElnaLexerKind.multiplication
|
result := elna_lexer_token_create(ElnaLexerKind.multiplication, position)
|
||||||
elsif character = '=' then
|
elsif character = '=' then
|
||||||
result^.kind := ElnaLexerKind.equals
|
result := elna_lexer_token_create(ElnaLexerKind.equals, position)
|
||||||
elsif character = '%' then
|
elsif character = '%' then
|
||||||
result^.kind := ElnaLexerKind.remainder
|
result := elna_lexer_token_create(ElnaLexerKind.remainder, position)
|
||||||
elsif character = '/' then
|
elsif character = '/' then
|
||||||
result^.kind := ElnaLexerKind.division
|
result := elna_lexer_token_create(ElnaLexerKind.division, position)
|
||||||
elsif character = '.' then
|
elsif character = '.' then
|
||||||
result^.kind := ElnaLexerKind.dot
|
result := elna_lexer_token_create(ElnaLexerKind.dot, position)
|
||||||
elsif character = '^' then
|
elsif character = '^' then
|
||||||
result^.kind := ElnaLexerKind.hat
|
result := elna_lexer_token_create(ElnaLexerKind.hat, position)
|
||||||
elsif character = '[' then
|
elsif character = '[' then
|
||||||
result^.kind := ElnaLexerKind.left_square
|
result := elna_lexer_token_create(ElnaLexerKind.left_square, position)
|
||||||
elsif character = ']' then
|
elsif character = ']' then
|
||||||
result^.kind := ElnaLexerKind.right_square
|
result := elna_lexer_token_create(ElnaLexerKind.right_square, position)
|
||||||
end;
|
end;
|
||||||
return result
|
return result
|
||||||
end;
|
end;
|
||||||
|
|
||||||
proc elna_lexer_classify_composite(start_position: Word, one_before_last: Word) -> ^ElnaLexerToken;
|
proc elna_lexer_classify_composite(start_position: Word, one_before_last: Word, position: ^ElnaPosition) -> ^ElnaLexerToken;
|
||||||
var
|
var
|
||||||
first_character: Word;
|
first_character: Word;
|
||||||
last_character: Word;
|
last_character: Word;
|
||||||
@@ -4992,54 +5014,50 @@ var
|
|||||||
begin
|
begin
|
||||||
first_character := _load_byte(start_position);
|
first_character := _load_byte(start_position);
|
||||||
last_character := _load_byte(one_before_last);
|
last_character := _load_byte(one_before_last);
|
||||||
result := malloc(#size(ElnaLexerToken));
|
|
||||||
|
|
||||||
if first_character = ':' then
|
if first_character = ':' then
|
||||||
result^.kind := ElnaLexerKind.assignment
|
result := elna_lexer_token_create(ElnaLexerKind.assignment, position)
|
||||||
elsif first_character = '<' then
|
elsif first_character = '<' then
|
||||||
if last_character = '=' then
|
if last_character = '=' then
|
||||||
result^.kind := ElnaLexerKind.less_equal
|
result := elna_lexer_token_create(ElnaLexerKind.less_equal, position)
|
||||||
elsif last_character = '>' then
|
elsif last_character = '>' then
|
||||||
result^.kind := ElnaLexerKind.not_equal
|
result := elna_lexer_token_create(ElnaLexerKind.not_equal, position)
|
||||||
end
|
end
|
||||||
elsif first_character = '>' then
|
elsif first_character = '>' then
|
||||||
if last_character = '=' then
|
if last_character = '=' then
|
||||||
result^.kind := ElnaLexerKind.greater_equal
|
result := elna_lexer_token_create(ElnaLexerKind.greater_equal, position)
|
||||||
end
|
end
|
||||||
elsif first_character = '-' then
|
elsif first_character = '-' then
|
||||||
result^.kind := ElnaLexerKind.arrow
|
result := elna_lexer_token_create(ElnaLexerKind.arrow, position)
|
||||||
end;
|
end;
|
||||||
|
|
||||||
return result
|
return result
|
||||||
end;
|
end;
|
||||||
|
|
||||||
proc elna_lexer_classify_delimited(start_position: Word, end_position: Word) -> ^ElnaLexerToken;
|
proc elna_lexer_classify_delimited(start_position: Word, end_position: Word, position: ^ElnaPosition) -> ^ElnaLexerToken;
|
||||||
var
|
var
|
||||||
delimiter: Word;
|
delimiter: Word;
|
||||||
result: ^ElnaLexerToken;
|
result: ^ElnaLexerToken;
|
||||||
begin
|
begin
|
||||||
delimiter := _load_byte(start_position);
|
delimiter := _load_byte(start_position);
|
||||||
result := malloc(#size(ElnaLexerToken));
|
|
||||||
|
|
||||||
|
if delimiter = '(' then
|
||||||
|
result := elna_lexer_token_create(ElnaLexerKind.comment, position)
|
||||||
|
elsif delimiter = '\'' then
|
||||||
|
result := elna_lexer_token_create(ElnaLexerKind.character, position)
|
||||||
|
elsif delimiter = '"' then
|
||||||
|
result := elna_lexer_token_create(ElnaLexerKind.string, position)
|
||||||
|
end;
|
||||||
result^.start := start_position;
|
result^.start := start_position;
|
||||||
result^.length := end_position - start_position;
|
result^.length := end_position - start_position;
|
||||||
|
|
||||||
if delimiter = '(' then
|
|
||||||
result^.kind := ElnaLexerKind.comment
|
|
||||||
elsif delimiter = '\'' then
|
|
||||||
result^.kind:= ElnaLexerKind.character
|
|
||||||
elsif delimiter = '"' then
|
|
||||||
result^.kind := ElnaLexerKind.string
|
|
||||||
end;
|
|
||||||
return result
|
return result
|
||||||
end;
|
end;
|
||||||
|
|
||||||
proc elna_lexer_classify_integer(start_position: Word, end_position: Word) -> ^ElnaLexerToken;
|
proc elna_lexer_classify_integer(start_position: Word, end_position: Word, position: ^ElnaPosition) -> ^ElnaLexerToken;
|
||||||
var
|
var
|
||||||
result: ^ElnaLexerToken;
|
result: ^ElnaLexerToken;
|
||||||
begin
|
begin
|
||||||
result := malloc(#size(ElnaLexerToken));
|
result := elna_lexer_token_create(ElnaLexerKind.integer, position);
|
||||||
result^.kind := ElnaLexerKind.integer;
|
|
||||||
result^.start := start_position;
|
result^.start := start_position;
|
||||||
result^.length := end_position - start_position;
|
result^.length := end_position - start_position;
|
||||||
|
|
||||||
@@ -5054,31 +5072,32 @@ begin
|
|||||||
|
|
||||||
if action_to_perform = ElnaLexerAction.none then
|
if action_to_perform = ElnaLexerAction.none then
|
||||||
elsif action_to_perform = ElnaLexerAction.accumulate then
|
elsif action_to_perform = ElnaLexerAction.accumulate then
|
||||||
cursor^.finish := cursor^.finish + 1
|
elna_lexer_advance(cursor)
|
||||||
elsif action_to_perform = ElnaLexerAction.skip then
|
elsif action_to_perform = ElnaLexerAction.skip then
|
||||||
|
elna_lexer_classify_space(cursor^.start, @cursor^.position.end_location);
|
||||||
cursor^.start := cursor^.start + 1;
|
cursor^.start := cursor^.start + 1;
|
||||||
cursor^.finish := cursor^.finish + 1
|
cursor^.finish := cursor^.finish + 1
|
||||||
elsif action_to_perform = ElnaLexerAction.single then
|
elsif action_to_perform = ElnaLexerAction.single then
|
||||||
cursor^.finish := cursor^.finish + 1;
|
elna_lexer_advance(cursor);
|
||||||
|
|
||||||
token := elna_lexer_classify_single(cursor^.start)
|
token := elna_lexer_classify_single(cursor^.start, @cursor^.position)
|
||||||
elsif action_to_perform = ElnaLexerAction.eof then
|
elsif action_to_perform = ElnaLexerAction.eof then
|
||||||
token := malloc(#size(ElnaLexerToken));
|
token := malloc(#size(ElnaLexerToken));
|
||||||
token^.kind := ElnaLexerKind.eof
|
token^.kind := ElnaLexerKind.eof
|
||||||
elsif action_to_perform = ElnaLexerAction.finalize then
|
elsif action_to_perform = ElnaLexerAction.finalize then
|
||||||
token := elna_lexer_classify_finalize(cursor^.start)
|
token := elna_lexer_classify_finalize(cursor^.start, @cursor^.position)
|
||||||
elsif action_to_perform = ElnaLexerAction.composite then
|
elsif action_to_perform = ElnaLexerAction.composite then
|
||||||
token := elna_lexer_classify_composite(cursor^.start, cursor^.finish);
|
token := elna_lexer_classify_composite(cursor^.start, cursor^.finish, @cursor^.position);
|
||||||
|
|
||||||
cursor^.finish := cursor^.finish + 1
|
elna_lexer_advance(cursor)
|
||||||
elsif action_to_perform = ElnaLexerAction.key_id then
|
elsif action_to_perform = ElnaLexerAction.key_id then
|
||||||
token := elna_lexer_classify_keyword(cursor^.start, cursor^.finish)
|
token := elna_lexer_classify_keyword(cursor^.start, cursor^.finish, @cursor^.position)
|
||||||
elsif action_to_perform = ElnaLexerAction.integer then
|
elsif action_to_perform = ElnaLexerAction.integer then
|
||||||
token := elna_lexer_classify_integer(cursor^.start, cursor^.finish)
|
token := elna_lexer_classify_integer(cursor^.start, cursor^.finish, @cursor^.position)
|
||||||
elsif action_to_perform = ElnaLexerAction.delimited then
|
elsif action_to_perform = ElnaLexerAction.delimited then
|
||||||
cursor^.finish := cursor^.finish + 1;
|
elna_lexer_advance(cursor);
|
||||||
|
|
||||||
token := elna_lexer_classify_delimited(cursor^.start, cursor^.finish)
|
token := elna_lexer_classify_delimited(cursor^.start, cursor^.finish, @cursor^.position)
|
||||||
end;
|
end;
|
||||||
return token
|
return token
|
||||||
end;
|
end;
|
||||||
@@ -5095,15 +5114,24 @@ begin
|
|||||||
return elna_lexer_execute_action(cursor, next_transition^.action, kind)
|
return elna_lexer_execute_action(cursor, next_transition^.action, kind)
|
||||||
end;
|
end;
|
||||||
|
|
||||||
proc elna_lexer_advance_token(cursor: ^ElnaLexerCursor) -> ^ElnaLexerToken;
|
proc elna_lexer_classify_space(start_position: Word, location: ^ElnaLocation);
|
||||||
var
|
var
|
||||||
token: ^ElnaLexerToken;
|
character: Word;
|
||||||
begin
|
begin
|
||||||
token := elna_lexer_execute_transition(cursor);
|
character := _load_byte(start_position);
|
||||||
if cursor^.state <> ElnaLexerState.finish then
|
|
||||||
token := elna_lexer_advance_token(cursor)
|
if character = '\n' then
|
||||||
|
location^.line := location^.line + 1;
|
||||||
|
location^.column := 1
|
||||||
|
else
|
||||||
|
location^.column := location^.column + 1
|
||||||
|
end
|
||||||
end;
|
end;
|
||||||
return token
|
|
||||||
|
proc elna_lexer_advance(cursor: ^ElnaLexerCursor);
|
||||||
|
begin
|
||||||
|
cursor^.finish := cursor^.finish + 1;
|
||||||
|
cursor^.position.end_location.column := cursor^.position.end_location.column + 1
|
||||||
end;
|
end;
|
||||||
|
|
||||||
(**
|
(**
|
||||||
@@ -5111,10 +5139,19 @@ end;
|
|||||||
* Resets the lexer state for reading the next token.
|
* Resets the lexer state for reading the next token.
|
||||||
*)
|
*)
|
||||||
proc elna_lexer_peek(cursor: ^ElnaLexerCursor) -> ^ElnaLexerToken;
|
proc elna_lexer_peek(cursor: ^ElnaLexerCursor) -> ^ElnaLexerToken;
|
||||||
|
var
|
||||||
|
token: ^ElnaLexerToken;
|
||||||
begin
|
begin
|
||||||
if cursor^.token = nil then
|
if cursor^.token = nil then
|
||||||
cursor^.state := ElnaLexerState.start;
|
cursor^.state := ElnaLexerState.start;
|
||||||
cursor^.token := elna_lexer_advance_token(cursor)
|
|
||||||
|
.elna_lexer_peek_loop;
|
||||||
|
token := elna_lexer_execute_transition(cursor);
|
||||||
|
|
||||||
|
if cursor^.state <> ElnaLexerState.finish then
|
||||||
|
goto elna_lexer_peek_loop
|
||||||
|
end;
|
||||||
|
cursor^.token := token
|
||||||
end;
|
end;
|
||||||
return cursor^.token
|
return cursor^.token
|
||||||
end;
|
end;
|
||||||
@@ -5129,6 +5166,7 @@ begin
|
|||||||
token := elna_lexer_peek(cursor);
|
token := elna_lexer_peek(cursor);
|
||||||
cursor^.token := nil;
|
cursor^.token := nil;
|
||||||
cursor^.start := cursor^.finish;
|
cursor^.start := cursor^.finish;
|
||||||
|
memcpy(@cursor^.position.start_location, @cursor^.position.end_location, #size(ElnaLocation));
|
||||||
|
|
||||||
return token
|
return token
|
||||||
end;
|
end;
|
||||||
|
|||||||
Reference in New Issue
Block a user