Trace the source code position in the lexer

This commit is contained in:
2025-06-12 18:44:06 +02:00
parent 90aa5a0030
commit fdaeb25f73
7 changed files with 252 additions and 232 deletions

View File

@ -212,7 +212,7 @@ BEGIN
INC(i)
END
END initialize_classification;
PROCEDURE compare_keyword(keyword: ARRAY OF CHAR; token_start: PLexerBuffer; token_end: PLexerBuffer): BOOLEAN;
PROCEDURE compare_keyword(keyword: ARRAY OF CHAR; token_start: BufferPosition; token_end: PLexerBuffer): BOOLEAN;
VAR
result: BOOLEAN;
index: CARDINAL;
@ -222,213 +222,222 @@ BEGIN
index := 0;
result := TRUE;
keyword_length := Length(keyword);
continue := (index < keyword_length) AND (token_start <> token_end);
continue := (index < keyword_length) AND (token_start.iterator <> token_end);
WHILE continue AND result DO
result := (keyword[index] = token_start^) OR (Lower(keyword[index]) = token_start^);
INC(token_start);
result := (keyword[index] = token_start.iterator^) OR (Lower(keyword[index]) = token_start.iterator^);
INC(token_start.iterator);
INC(index);
continue := (index < keyword_length) AND (token_start <> token_end)
continue := (index < keyword_length) AND (token_start.iterator <> token_end)
END;
result := result AND (index = Length(keyword));
RETURN result AND (token_start = token_end)
RETURN result AND (token_start.iterator = token_end)
END compare_keyword;
(* Reached the end of file. *)
PROCEDURE transition_action_eof(lexer: PLexer; token: PLexerToken);
BEGIN
token^.kind := lexerKindEof
END transition_action_eof;
PROCEDURE increment(position: PBufferPosition);
BEGIN
INC(position^.iterator)
END increment;
(* Add the character to the token currently read and advance to the next character. *)
PROCEDURE transition_action_accumulate(lexer: PLexer; token: PLexerToken);
BEGIN
INC(lexer^.current)
increment(ADR(lexer^.current))
END transition_action_accumulate;
(* The current character is not a part of the token. Finish the token already
* read. Don't advance to the next character. *)
PROCEDURE transition_action_finalize(lexer: PLexer; token: PLexerToken);
BEGIN
IF lexer^.start^ = ':' THEN
IF lexer^.start.iterator^ = ':' THEN
token^.kind := lexerKindColon
END;
IF lexer^.start^ = '>' THEN
IF lexer^.start.iterator^ = '>' THEN
token^.kind := lexerKindGreaterThan
END;
IF lexer^.start^ = '<' THEN
IF lexer^.start.iterator^ = '<' THEN
token^.kind := lexerKindLessThan
END;
IF lexer^.start^ = '(' THEN
IF lexer^.start.iterator^ = '(' THEN
token^.kind := lexerKindLeftParen
END;
IF lexer^.start^ = '-' THEN
IF lexer^.start.iterator^ = '-' THEN
token^.kind := lexerKindMinus
END;
IF lexer^.start^ = '.' THEN
IF lexer^.start.iterator^ = '.' THEN
token^.kind := lexerKindDot
END
END transition_action_finalize;
(* An action for tokens containing multiple characters. *)
PROCEDURE transition_action_composite(lexer: PLexer; token: PLexerToken);
BEGIN
IF lexer^.start^ = '<' THEN
IF lexer^.current^ = '>' THEN
IF lexer^.start.iterator^ = '<' THEN
IF lexer^.current.iterator^ = '>' THEN
token^.kind := lexerKindNotEqual
END;
IF lexer^.current^ = '=' THEN
IF lexer^.current.iterator^ = '=' THEN
token^.kind := lexerKindLessEqual
END
END;
IF (lexer^.start^ = '>') AND (lexer^.current^ = '=') THEN
IF (lexer^.start.iterator^ = '>') AND (lexer^.current.iterator^ = '=') THEN
token^.kind := lexerKindGreaterEqual
END;
IF (lexer^.start^ = '.') AND (lexer^.current^ = '.') THEN
IF (lexer^.start.iterator^ = '.') AND (lexer^.current.iterator^ = '.') THEN
token^.kind := lexerKindRange
END;
IF (lexer^.start^ = ':') AND (lexer^.current^ = '=') THEN
IF (lexer^.start.iterator^ = ':') AND (lexer^.current.iterator^ = '=') THEN
token^.kind := lexerKindAssignment
END;
IF (lexer^.start^ = '-') AND (lexer^.current^ = '>') THEN
IF (lexer^.start.iterator^ = '-') AND (lexer^.current.iterator^ = '>') THEN
token^.kind := lexerKindArrow
END;
INC(lexer^.current)
increment(ADR(lexer^.current))
END transition_action_composite;
(* Skip a space. *)
PROCEDURE transition_action_skip(lexer: PLexer; token: PLexerToken);
BEGIN
INC(lexer^.current);
INC(lexer^.start)
increment(ADR(lexer^.start));
IF ORD(lexer^.start.iterator^) = 10 THEN
INC(lexer^.start.location.line);
lexer^.start.location.column := 1
END;
lexer^.current := lexer^.start
END transition_action_skip;
(* Delimited string action. *)
PROCEDURE transition_action_delimited(lexer: PLexer; token: PLexerToken);
VAR
text_length: CARDINAL;
BEGIN
IF lexer^.start^ = '(' THEN
IF lexer^.start.iterator^ = '(' THEN
token^.kind := lexerKindComment
END;
IF lexer^.start^ = '"' THEN
text_length := lexer^.current;
DEC(text_length, lexer^.start);
IF lexer^.start.iterator^ = '"' THEN
text_length := lexer^.current.iterator;
DEC(text_length, lexer^.start.iterator);
INC(text_length);
MemZero(ADR(token^.stringKind), TSIZE(ShortString));
MemCopy(lexer^.start, text_length, ADR(token^.stringKind));
MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind));
token^.kind := lexerKindCharacter
END;
IF lexer^.start^ = "'" THEN
text_length := lexer^.current;
DEC(text_length, lexer^.start);
IF lexer^.start.iterator^ = "'" THEN
text_length := lexer^.current.iterator;
DEC(text_length, lexer^.start.iterator);
INC(text_length);
MemZero(ADR(token^.stringKind), TSIZE(ShortString));
MemCopy(lexer^.start, text_length, ADR(token^.stringKind));
MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind));
token^.kind := lexerKindString
END;
INC(lexer^.current)
increment(ADR(lexer^.current))
END transition_action_delimited;
(* Finalize keyword OR identifier. *)
PROCEDURE transition_action_key_id(lexer: PLexer; token: PLexerToken);
BEGIN
token^.kind := lexerKindIdentifier;
token^.identifierKind[1] := lexer^.current;
DEC(token^.identifierKind[1], lexer^.start);
MemCopy(lexer^.start, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
token^.identifierKind[1] := lexer^.current.iterator;
DEC(token^.identifierKind[1], lexer^.start.iterator);
MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
IF compare_keyword('PROGRAM', lexer^.start, lexer^.current) THEN
IF compare_keyword('PROGRAM', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindProgram
END;
IF compare_keyword('IMPORT', lexer^.start, lexer^.current) THEN
IF compare_keyword('IMPORT', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindImport
END;
IF compare_keyword('CONST', lexer^.start, lexer^.current) THEN
IF compare_keyword('CONST', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindConst
END;
IF compare_keyword('VAR', lexer^.start, lexer^.current) THEN
IF compare_keyword('VAR', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindVar
END;
IF compare_keyword('IF', lexer^.start, lexer^.current) THEN
IF compare_keyword('IF', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindIf
END;
IF compare_keyword('THEN', lexer^.start, lexer^.current) THEN
IF compare_keyword('THEN', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindThen
END;
IF compare_keyword('ELSIF', lexer^.start, lexer^.current) THEN
IF compare_keyword('ELSIF', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindElsif
END;
IF compare_keyword('ELSE', lexer^.start, lexer^.current) THEN
IF compare_keyword('ELSE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindElse
END;
IF compare_keyword('WHILE', lexer^.start, lexer^.current) THEN
IF compare_keyword('WHILE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindWhile
END;
IF compare_keyword('DO', lexer^.start, lexer^.current) THEN
IF compare_keyword('DO', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindDo
END;
IF compare_keyword('proc', lexer^.start, lexer^.current) THEN
IF compare_keyword('proc', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindProc
END;
IF compare_keyword('BEGIN', lexer^.start, lexer^.current) THEN
IF compare_keyword('BEGIN', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindBegin
END;
IF compare_keyword('END', lexer^.start, lexer^.current) THEN
IF compare_keyword('END', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindEnd
END;
IF compare_keyword('TYPE', lexer^.start, lexer^.current) THEN
IF compare_keyword('TYPE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindType
END;
IF compare_keyword('RECORD', lexer^.start, lexer^.current) THEN
IF compare_keyword('RECORD', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindRecord
END;
IF compare_keyword('UNION', lexer^.start, lexer^.current) THEN
IF compare_keyword('UNION', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindUnion
END;
IF compare_keyword('NIL', lexer^.start, lexer^.current) THEN
IF compare_keyword('NIL', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindNull
END;
IF compare_keyword('AND', lexer^.start, lexer^.current) THEN
IF compare_keyword('AND', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindAnd
END;
IF compare_keyword('OR', lexer^.start, lexer^.current) THEN
IF compare_keyword('OR', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindOr
END;
IF compare_keyword('RETURN', lexer^.start, lexer^.current) THEN
IF compare_keyword('RETURN', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindReturn
END;
IF compare_keyword('DEFINITION', lexer^.start, lexer^.current) THEN
IF compare_keyword('DEFINITION', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindDefinition
END;
IF compare_keyword('TO', lexer^.start, lexer^.current) THEN
IF compare_keyword('TO', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindTo
END;
IF compare_keyword('CASE', lexer^.start, lexer^.current) THEN
IF compare_keyword('CASE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindCase
END;
IF compare_keyword('OF', lexer^.start, lexer^.current) THEN
IF compare_keyword('OF', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindOf
END;
IF compare_keyword('FROM', lexer^.start, lexer^.current) THEN
IF compare_keyword('FROM', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindFrom
END;
IF compare_keyword('MODULE', lexer^.start, lexer^.current) THEN
IF compare_keyword('MODULE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindModule
END;
IF compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current) THEN
IF compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindImplementation
END;
IF compare_keyword('POINTER', lexer^.start, lexer^.current) THEN
IF compare_keyword('POINTER', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindPointer
END;
IF compare_keyword('ARRAY', lexer^.start, lexer^.current) THEN
IF compare_keyword('ARRAY', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindArray
END;
IF compare_keyword('TRUE', lexer^.start, lexer^.current) THEN
IF compare_keyword('TRUE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindBoolean;
token^.booleanKind := TRUE
END;
IF compare_keyword('FALSE', lexer^.start, lexer^.current) THEN
IF compare_keyword('FALSE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindBoolean;
token^.booleanKind := FALSE
END
@ -437,52 +446,52 @@ END transition_action_key_id;
* followed by other characters forming a composite token. *)
PROCEDURE transition_action_single(lexer: PLexer; token: PLexerToken);
BEGIN
IF lexer^.current^ = '&' THEN
IF lexer^.current.iterator^ = '&' THEN
token^.kind := lexerKindAnd
END;
IF lexer^.current^ = ';' THEN
IF lexer^.current.iterator^ = ';' THEN
token^.kind := lexerKindSemicolon
END;
IF lexer^.current^ = ',' THEN
IF lexer^.current.iterator^ = ',' THEN
token^.kind := lexerKindComma
END;
IF lexer^.current^ = '~' THEN
IF lexer^.current.iterator^ = '~' THEN
token^.kind := lexerKindTilde
END;
IF lexer^.current^ = ')' THEN
IF lexer^.current.iterator^ = ')' THEN
token^.kind := lexerKindRightParen
END;
IF lexer^.current^ = '[' THEN
IF lexer^.current.iterator^ = '[' THEN
token^.kind := lexerKindLeftSquare
END;
IF lexer^.current^ = ']' THEN
IF lexer^.current.iterator^ = ']' THEN
token^.kind := lexerKindRightSquare
END;
IF lexer^.current^ = '^' THEN
IF lexer^.current.iterator^ = '^' THEN
token^.kind := lexerKindHat
END;
IF lexer^.current^ = '=' THEN
IF lexer^.current.iterator^ = '=' THEN
token^.kind := lexerKindEqual
END;
IF lexer^.current^ = '+' THEN
IF lexer^.current.iterator^ = '+' THEN
token^.kind := lexerKindPlus
END;
IF lexer^.current^ = '*' THEN
IF lexer^.current.iterator^ = '*' THEN
token^.kind := lexerKindAsterisk
END;
IF lexer^.current^ = '/' THEN
IF lexer^.current.iterator^ = '/' THEN
token^.kind := lexerKindDivision
END;
IF lexer^.current^ = '%' THEN
IF lexer^.current.iterator^ = '%' THEN
token^.kind := lexerKindRemainder
END;
IF lexer^.current^ = '@' THEN
IF lexer^.current.iterator^ = '@' THEN
token^.kind := lexerKindAt
END;
IF lexer^.current^ = '|' THEN
IF lexer^.current.iterator^ = '|' THEN
token^.kind := lexerKindPipe
END;
INC(lexer^.current)
increment(ADR(lexer^.current.iterator))
END transition_action_single;
(* Handle an integer literal. *)
PROCEDURE transition_action_integer(lexer: PLexer; token: PLexerToken);
@ -493,20 +502,20 @@ VAR
BEGIN
token^.kind := lexerKindInteger;
integer_length := lexer^.current;
DEC(integer_length, lexer^.start);
integer_length := lexer^.current.iterator;
DEC(integer_length, lexer^.start.iterator);
MemZero(ADR(token^.identifierKind), TSIZE(Identifier));
MemCopy(lexer^.start, integer_length, ADR(token^.identifierKind[1]));
MemCopy(lexer^.start.iterator, integer_length, ADR(token^.identifierKind[1]));
buffer := InitStringCharStar(ADR(token^.identifierKind[1]));
token^.integerKind := StringToInteger(buffer, 10, found);
buffer := KillString(buffer)
END transition_action_integer;
PROCEDURE set_default_transition(current_state: TransitionState; DefaultAction: TransitionAction; next_state: TransitionState);
PROCEDURE set_default_transition(current_state: TransitionState; default_action: TransitionAction; next_state: TransitionState);
VAR
default_transition: Transition;
BEGIN
default_transition.action := DefaultAction;
default_transition.action := default_action;
default_transition.next_state := next_state;
transitions[ORD(current_state) + 1][ORD(transitionClassInvalid) + 1] := default_transition;
@ -807,7 +816,7 @@ BEGIN
current_state := transitionStateStart;
WHILE current_state <> transitionStateEnd DO
index1 := ORD(lexer^.current^);
index1 := ORD(lexer^.current.iterator^);
INC(index1);
current_class := classification[index1];
@ -822,6 +831,9 @@ BEGIN
END;
current_state := current_transition.next_state
END;
result.start_location := lexer^.start.location;
result.end_location := lexer^.current.location;
RETURN result
END lexer_current;
PROCEDURE lexer_lex(lexer: PLexer): LexerToken;
@ -830,7 +842,9 @@ VAR
BEGIN
IF lexer^.length = 0 THEN
lexer^.length := ReadNBytes(lexer^.input, CHUNK_SIZE, lexer^.buffer);
lexer^.current := lexer^.buffer
lexer^.current.location.column := 1;
lexer^.current.location.line := 1;
lexer^.current.iterator := lexer^.buffer
END;
lexer^.start := lexer^.current;