Trace the source code position in the lexer

This commit is contained in:
2025-06-12 18:45:17 +02:00
parent e3f094c8a5
commit 9341017103
7 changed files with 251 additions and 231 deletions

View File

@ -213,7 +213,7 @@ begin
end
end;
proc compare_keyword(keyword: ARRAY OF CHAR, token_start: PLexerBuffer, token_end: PLexerBuffer) -> BOOLEAN;
proc compare_keyword(keyword: ARRAY OF CHAR, token_start: BufferPosition, token_end: PLexerBuffer) -> BOOLEAN;
var
result: BOOLEAN;
index: CARDINAL;
@ -223,17 +223,17 @@ begin
index := 0;
result := true;
keyword_length := Length(keyword);
continue := (index < keyword_length) & (token_start <> token_end);
continue := (index < keyword_length) & (token_start.iterator <> token_end);
while continue & result do
result := (keyword[index] = token_start^) or (Lower(keyword[index]) = token_start^);
INC(token_start);
result := (keyword[index] = token_start.iterator^) or (Lower(keyword[index]) = token_start.iterator^);
INC(token_start.iterator);
INC(index);
continue := (index < keyword_length) & (token_start <> token_end)
continue := (index < keyword_length) & (token_start.iterator <> token_end)
end;
result := result & (index = Length(keyword));
return result & (token_start = token_end)
return result & (token_start.iterator = token_end)
end;
(* Reached the end of file. *)
@ -242,32 +242,37 @@ begin
token^.kind := lexerKindEof
end;
proc increment(position: PBufferPosition);
begin
INC(position^.iterator)
end;
(* Add the character to the token currently read and advance to the next character. *)
proc transition_action_accumulate(lexer: PLexer, token: PLexerToken);
begin
INC(lexer^.current)
increment(ADR(lexer^.current))
end;
(* The current character is not a part of the token. Finish the token already
* read. Don't advance to the next character. *)
proc transition_action_finalize(lexer: PLexer, token: PLexerToken);
begin
if lexer^.start^ = ':' then
if lexer^.start.iterator^ = ':' then
token^.kind := lexerKindColon
end;
if lexer^.start^ = '>' then
if lexer^.start.iterator^ = '>' then
token^.kind := lexerKindGreaterThan
end;
if lexer^.start^ = '<' then
if lexer^.start.iterator^ = '<' then
token^.kind := lexerKindLessThan
end;
if lexer^.start^ = '(' then
if lexer^.start.iterator^ = '(' then
token^.kind := lexerKindLeftParen
end;
if lexer^.start^ = '-' then
if lexer^.start.iterator^ = '-' then
token^.kind := lexerKindMinus
end;
if lexer^.start^ = '.' then
if lexer^.start.iterator^ = '.' then
token^.kind := lexerKindDot
end
end;
@ -275,34 +280,39 @@ end;
(* An action for tokens containing multiple characters. *)
proc transition_action_composite(lexer: PLexer, token: PLexerToken);
begin
if lexer^.start^ = '<' then
if lexer^.current^ = '>' then
if lexer^.start.iterator^ = '<' then
if lexer^.current.iterator^ = '>' then
token^.kind := lexerKindNotEqual
end;
if lexer^.current^ = '=' then
if lexer^.current.iterator^ = '=' then
token^.kind := lexerKindLessEqual
end
end;
if (lexer^.start^ = '>') & (lexer^.current^ = '=') then
if (lexer^.start.iterator^ = '>') & (lexer^.current.iterator^ = '=') then
token^.kind := lexerKindGreaterEqual
end;
if (lexer^.start^ = '.') & (lexer^.current^ = '.') then
if (lexer^.start.iterator^ = '.') & (lexer^.current.iterator^ = '.') then
token^.kind := lexerKindRange
end;
if (lexer^.start^ = ':') & (lexer^.current^ = '=') then
if (lexer^.start.iterator^ = ':') & (lexer^.current.iterator^ = '=') then
token^.kind := lexerKindAssignment
end;
if (lexer^.start^ = '-') & (lexer^.current^ = '>') then
if (lexer^.start.iterator^ = '-') & (lexer^.current.iterator^ = '>') then
token^.kind := lexerKindArrow
end;
INC(lexer^.current)
increment(ADR(lexer^.current))
end;
(* Skip a space. *)
proc transition_action_skip(lexer: PLexer, token: PLexerToken);
begin
INC(lexer^.current);
INC(lexer^.start)
increment(ADR(lexer^.start));
if ORD(lexer^.start.iterator^) = 10 then
INC(lexer^.start.location.line);
lexer^.start.location.column := 1
end;
lexer^.current := lexer^.start
end;
(* Delimited string action. *)
@ -310,30 +320,30 @@ proc transition_action_delimited(lexer: PLexer, token: PLexerToken);
var
text_length: CARDINAL;
begin
if lexer^.start^ = '(' then
if lexer^.start.iterator^ = '(' then
token^.kind := lexerKindComment
end;
if lexer^.start^ = '"' then
text_length := lexer^.current;
DEC(text_length, lexer^.start);
if lexer^.start.iterator^ = '"' then
text_length := lexer^.current.iterator;
DEC(text_length, lexer^.start.iterator);
INC(text_length);
MemZero(ADR(token^.stringKind), TSIZE(ShortString));
MemCopy(lexer^.start, text_length, ADR(token^.stringKind));
MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind));
token^.kind := lexerKindCharacter
end;
if lexer^.start^ = "'" then
text_length := lexer^.current;
DEC(text_length, lexer^.start);
if lexer^.start.iterator^ = "'" then
text_length := lexer^.current.iterator;
DEC(text_length, lexer^.start.iterator);
INC(text_length);
MemZero(ADR(token^.stringKind), TSIZE(ShortString));
MemCopy(lexer^.start, text_length, ADR(token^.stringKind));
MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind));
token^.kind := lexerKindString
end;
INC(lexer^.current)
increment(ADR(lexer^.current))
end;
(* Finalize keyword or identifier. *)
@ -341,102 +351,102 @@ proc transition_action_key_id(lexer: PLexer, token: PLexerToken);
begin
token^.kind := lexerKindIdentifier;
token^.identifierKind[1] := lexer^.current;
DEC(token^.identifierKind[1], lexer^.start);
MemCopy(lexer^.start, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
token^.identifierKind[1] := lexer^.current.iterator;
DEC(token^.identifierKind[1], lexer^.start.iterator);
MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
if compare_keyword('PROGRAM', lexer^.start, lexer^.current) then
if compare_keyword('PROGRAM', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindProgram
end;
if compare_keyword('IMPORT', lexer^.start, lexer^.current) then
if compare_keyword('IMPORT', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindImport
end;
if compare_keyword('CONST', lexer^.start, lexer^.current) then
if compare_keyword('CONST', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindConst
end;
if compare_keyword('VAR', lexer^.start, lexer^.current) then
if compare_keyword('VAR', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindVar
end;
if compare_keyword('IF', lexer^.start, lexer^.current) then
if compare_keyword('IF', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindIf
end;
if compare_keyword('THEN', lexer^.start, lexer^.current) then
if compare_keyword('THEN', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindThen
end;
if compare_keyword('ELSIF', lexer^.start, lexer^.current) then
if compare_keyword('ELSIF', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindElsif
end;
if compare_keyword('ELSE', lexer^.start, lexer^.current) then
if compare_keyword('ELSE', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindElse
end;
if compare_keyword('WHILE', lexer^.start, lexer^.current) then
if compare_keyword('WHILE', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindWhile
end;
if compare_keyword('DO', lexer^.start, lexer^.current) then
if compare_keyword('DO', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindDo
end;
if compare_keyword('proc', lexer^.start, lexer^.current) then
if compare_keyword('proc', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindProc
end;
if compare_keyword('BEGIN', lexer^.start, lexer^.current) then
if compare_keyword('BEGIN', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBegin
end;
if compare_keyword('END', lexer^.start, lexer^.current) then
if compare_keyword('END', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindEnd
end;
if compare_keyword('TYPE', lexer^.start, lexer^.current) then
if compare_keyword('TYPE', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindType
end;
if compare_keyword('RECORD', lexer^.start, lexer^.current) then
if compare_keyword('RECORD', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindRecord
end;
if compare_keyword('UNION', lexer^.start, lexer^.current) then
if compare_keyword('UNION', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindUnion
end;
if compare_keyword('NIL', lexer^.start, lexer^.current) then
if compare_keyword('NIL', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindNull
end;
if compare_keyword('AND', lexer^.start, lexer^.current) then
if compare_keyword('AND', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindAnd
end;
if compare_keyword('OR', lexer^.start, lexer^.current) then
if compare_keyword('OR', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindOr
end;
if compare_keyword('RETURN', lexer^.start, lexer^.current) then
if compare_keyword('RETURN', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindReturn
end;
if compare_keyword('DEFINITION', lexer^.start, lexer^.current) then
if compare_keyword('DEFINITION', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindDefinition
end;
if compare_keyword('TO', lexer^.start, lexer^.current) then
if compare_keyword('TO', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindTo
end;
if compare_keyword('CASE', lexer^.start, lexer^.current) then
if compare_keyword('CASE', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindCase
end;
if compare_keyword('OF', lexer^.start, lexer^.current) then
if compare_keyword('OF', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindOf
end;
if compare_keyword('FROM', lexer^.start, lexer^.current) then
if compare_keyword('FROM', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindFrom
end;
if compare_keyword('MODULE', lexer^.start, lexer^.current) then
if compare_keyword('MODULE', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindModule
end;
if compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current) then
if compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindImplementation
end;
if compare_keyword('POINTER', lexer^.start, lexer^.current) then
if compare_keyword('POINTER', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindPointer
end;
if compare_keyword('ARRAY', lexer^.start, lexer^.current) then
if compare_keyword('ARRAY', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindArray
end;
if compare_keyword('TRUE', lexer^.start, lexer^.current) then
if compare_keyword('TRUE', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBoolean;
token^.booleanKind := true
end;
if compare_keyword('FALSE', lexer^.start, lexer^.current) then
if compare_keyword('FALSE', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBoolean;
token^.booleanKind := false
end
@ -446,52 +456,52 @@ end;
* followed by other characters forming a composite token. *)
proc transition_action_single(lexer: PLexer, token: PLexerToken);
begin
if lexer^.current^ = '&' then
if lexer^.current.iterator^ = '&' then
token^.kind := lexerKindAnd
end;
if lexer^.current^ = ';' then
if lexer^.current.iterator^ = ';' then
token^.kind := lexerKindSemicolon
end;
if lexer^.current^ = ',' then
if lexer^.current.iterator^ = ',' then
token^.kind := lexerKindComma
end;
if lexer^.current^ = '~' then
if lexer^.current.iterator^ = '~' then
token^.kind := lexerKindTilde
end;
if lexer^.current^ = ')' then
if lexer^.current.iterator^ = ')' then
token^.kind := lexerKindRightParen
end;
if lexer^.current^ = '[' then
if lexer^.current.iterator^ = '[' then
token^.kind := lexerKindLeftSquare
end;
if lexer^.current^ = ']' then
if lexer^.current.iterator^ = ']' then
token^.kind := lexerKindRightSquare
end;
if lexer^.current^ = '^' then
if lexer^.current.iterator^ = '^' then
token^.kind := lexerKindHat
end;
if lexer^.current^ = '=' then
if lexer^.current.iterator^ = '=' then
token^.kind := lexerKindEqual
end;
if lexer^.current^ = '+' then
if lexer^.current.iterator^ = '+' then
token^.kind := lexerKindPlus
end;
if lexer^.current^ = '*' then
if lexer^.current.iterator^ = '*' then
token^.kind := lexerKindAsterisk
end;
if lexer^.current^ = '/' then
if lexer^.current.iterator^ = '/' then
token^.kind := lexerKindDivision
end;
if lexer^.current^ = '%' then
if lexer^.current.iterator^ = '%' then
token^.kind := lexerKindRemainder
end;
if lexer^.current^ = '@' then
if lexer^.current.iterator^ = '@' then
token^.kind := lexerKindAt
end;
if lexer^.current^ = '|' then
if lexer^.current.iterator^ = '|' then
token^.kind := lexerKindPipe
end;
INC(lexer^.current)
increment(ADR(lexer^.current.iterator))
end;
(* Handle an integer literal. *)
@ -503,21 +513,21 @@ var
begin
token^.kind := lexerKindInteger;
integer_length := lexer^.current;
DEC(integer_length, lexer^.start);
integer_length := lexer^.current.iterator;
DEC(integer_length, lexer^.start.iterator);
MemZero(ADR(token^.identifierKind), TSIZE(Identifier));
MemCopy(lexer^.start, integer_length, ADR(token^.identifierKind[1]));
MemCopy(lexer^.start.iterator, integer_length, ADR(token^.identifierKind[1]));
buffer := InitStringCharStar(ADR(token^.identifierKind[1]));
token^.integerKind := StringToInteger(buffer, 10, found);
buffer := KillString(buffer)
end;
proc set_default_transition(current_state: TransitionState, DefaultAction: TransitionAction, next_state: TransitionState);
proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState);
var
default_transition: Transition;
begin
default_transition.action := DefaultAction;
default_transition.action := default_action;
default_transition.next_state := next_state;
transitions[ORD(current_state) + 1][ORD(transitionClassInvalid) + 1] := default_transition;
@ -821,7 +831,7 @@ begin
current_state := transitionStateStart;
while current_state <> transitionStateEnd DO
index1 := ORD(lexer^.current^);
index1 := ORD(lexer^.current.iterator^);
INC(index1);
current_class := classification[index1];
@ -836,6 +846,9 @@ begin
end;
current_state := current_transition.next_state
end;
result.start_location := lexer^.start.location;
result.end_location := lexer^.current.location;
return result
end;
@ -845,7 +858,9 @@ var
begin
if lexer^.length = 0 then
lexer^.length := ReadNBytes(lexer^.input, CHUNK_SIZE, lexer^.buffer);
lexer^.current := lexer^.buffer
lexer^.current.location.column := 1;
lexer^.current.location.line := 1;
lexer^.current.iterator := lexer^.buffer
end;
lexer^.start := lexer^.current;