Trace the source code position in the lexer
This commit is contained in:
@ -213,7 +213,7 @@ begin
|
||||
end
|
||||
end;
|
||||
|
||||
proc compare_keyword(keyword: ARRAY OF CHAR, token_start: PLexerBuffer, token_end: PLexerBuffer) -> BOOLEAN;
|
||||
proc compare_keyword(keyword: ARRAY OF CHAR, token_start: BufferPosition, token_end: PLexerBuffer) -> BOOLEAN;
|
||||
var
|
||||
result: BOOLEAN;
|
||||
index: CARDINAL;
|
||||
@ -223,17 +223,17 @@ begin
|
||||
index := 0;
|
||||
result := true;
|
||||
keyword_length := Length(keyword);
|
||||
continue := (index < keyword_length) & (token_start <> token_end);
|
||||
continue := (index < keyword_length) & (token_start.iterator <> token_end);
|
||||
|
||||
while continue & result do
|
||||
result := (keyword[index] = token_start^) or (Lower(keyword[index]) = token_start^);
|
||||
INC(token_start);
|
||||
result := (keyword[index] = token_start.iterator^) or (Lower(keyword[index]) = token_start.iterator^);
|
||||
INC(token_start.iterator);
|
||||
INC(index);
|
||||
continue := (index < keyword_length) & (token_start <> token_end)
|
||||
continue := (index < keyword_length) & (token_start.iterator <> token_end)
|
||||
end;
|
||||
result := result & (index = Length(keyword));
|
||||
|
||||
return result & (token_start = token_end)
|
||||
return result & (token_start.iterator = token_end)
|
||||
end;
|
||||
|
||||
(* Reached the end of file. *)
|
||||
@ -242,32 +242,37 @@ begin
|
||||
token^.kind := lexerKindEof
|
||||
end;
|
||||
|
||||
proc increment(position: PBufferPosition);
|
||||
begin
|
||||
INC(position^.iterator)
|
||||
end;
|
||||
|
||||
(* Add the character to the token currently read and advance to the next character. *)
|
||||
proc transition_action_accumulate(lexer: PLexer, token: PLexerToken);
|
||||
begin
|
||||
INC(lexer^.current)
|
||||
increment(ADR(lexer^.current))
|
||||
end;
|
||||
|
||||
(* The current character is not a part of the token. Finish the token already
|
||||
* read. Don't advance to the next character. *)
|
||||
proc transition_action_finalize(lexer: PLexer, token: PLexerToken);
|
||||
begin
|
||||
if lexer^.start^ = ':' then
|
||||
if lexer^.start.iterator^ = ':' then
|
||||
token^.kind := lexerKindColon
|
||||
end;
|
||||
if lexer^.start^ = '>' then
|
||||
if lexer^.start.iterator^ = '>' then
|
||||
token^.kind := lexerKindGreaterThan
|
||||
end;
|
||||
if lexer^.start^ = '<' then
|
||||
if lexer^.start.iterator^ = '<' then
|
||||
token^.kind := lexerKindLessThan
|
||||
end;
|
||||
if lexer^.start^ = '(' then
|
||||
if lexer^.start.iterator^ = '(' then
|
||||
token^.kind := lexerKindLeftParen
|
||||
end;
|
||||
if lexer^.start^ = '-' then
|
||||
if lexer^.start.iterator^ = '-' then
|
||||
token^.kind := lexerKindMinus
|
||||
end;
|
||||
if lexer^.start^ = '.' then
|
||||
if lexer^.start.iterator^ = '.' then
|
||||
token^.kind := lexerKindDot
|
||||
end
|
||||
end;
|
||||
@ -275,34 +280,39 @@ end;
|
||||
(* An action for tokens containing multiple characters. *)
|
||||
proc transition_action_composite(lexer: PLexer, token: PLexerToken);
|
||||
begin
|
||||
if lexer^.start^ = '<' then
|
||||
if lexer^.current^ = '>' then
|
||||
if lexer^.start.iterator^ = '<' then
|
||||
if lexer^.current.iterator^ = '>' then
|
||||
token^.kind := lexerKindNotEqual
|
||||
end;
|
||||
if lexer^.current^ = '=' then
|
||||
if lexer^.current.iterator^ = '=' then
|
||||
token^.kind := lexerKindLessEqual
|
||||
end
|
||||
end;
|
||||
if (lexer^.start^ = '>') & (lexer^.current^ = '=') then
|
||||
if (lexer^.start.iterator^ = '>') & (lexer^.current.iterator^ = '=') then
|
||||
token^.kind := lexerKindGreaterEqual
|
||||
end;
|
||||
if (lexer^.start^ = '.') & (lexer^.current^ = '.') then
|
||||
if (lexer^.start.iterator^ = '.') & (lexer^.current.iterator^ = '.') then
|
||||
token^.kind := lexerKindRange
|
||||
end;
|
||||
if (lexer^.start^ = ':') & (lexer^.current^ = '=') then
|
||||
if (lexer^.start.iterator^ = ':') & (lexer^.current.iterator^ = '=') then
|
||||
token^.kind := lexerKindAssignment
|
||||
end;
|
||||
if (lexer^.start^ = '-') & (lexer^.current^ = '>') then
|
||||
if (lexer^.start.iterator^ = '-') & (lexer^.current.iterator^ = '>') then
|
||||
token^.kind := lexerKindArrow
|
||||
end;
|
||||
INC(lexer^.current)
|
||||
increment(ADR(lexer^.current))
|
||||
end;
|
||||
|
||||
(* Skip a space. *)
|
||||
proc transition_action_skip(lexer: PLexer, token: PLexerToken);
|
||||
begin
|
||||
INC(lexer^.current);
|
||||
INC(lexer^.start)
|
||||
increment(ADR(lexer^.start));
|
||||
|
||||
if ORD(lexer^.start.iterator^) = 10 then
|
||||
INC(lexer^.start.location.line);
|
||||
lexer^.start.location.column := 1
|
||||
end;
|
||||
lexer^.current := lexer^.start
|
||||
end;
|
||||
|
||||
(* Delimited string action. *)
|
||||
@ -310,30 +320,30 @@ proc transition_action_delimited(lexer: PLexer, token: PLexerToken);
|
||||
var
|
||||
text_length: CARDINAL;
|
||||
begin
|
||||
if lexer^.start^ = '(' then
|
||||
if lexer^.start.iterator^ = '(' then
|
||||
token^.kind := lexerKindComment
|
||||
end;
|
||||
if lexer^.start^ = '"' then
|
||||
text_length := lexer^.current;
|
||||
DEC(text_length, lexer^.start);
|
||||
if lexer^.start.iterator^ = '"' then
|
||||
text_length := lexer^.current.iterator;
|
||||
DEC(text_length, lexer^.start.iterator);
|
||||
INC(text_length);
|
||||
|
||||
MemZero(ADR(token^.stringKind), TSIZE(ShortString));
|
||||
MemCopy(lexer^.start, text_length, ADR(token^.stringKind));
|
||||
MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind));
|
||||
|
||||
token^.kind := lexerKindCharacter
|
||||
end;
|
||||
if lexer^.start^ = "'" then
|
||||
text_length := lexer^.current;
|
||||
DEC(text_length, lexer^.start);
|
||||
if lexer^.start.iterator^ = "'" then
|
||||
text_length := lexer^.current.iterator;
|
||||
DEC(text_length, lexer^.start.iterator);
|
||||
INC(text_length);
|
||||
|
||||
MemZero(ADR(token^.stringKind), TSIZE(ShortString));
|
||||
MemCopy(lexer^.start, text_length, ADR(token^.stringKind));
|
||||
MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind));
|
||||
|
||||
token^.kind := lexerKindString
|
||||
end;
|
||||
INC(lexer^.current)
|
||||
increment(ADR(lexer^.current))
|
||||
end;
|
||||
|
||||
(* Finalize keyword or identifier. *)
|
||||
@ -341,102 +351,102 @@ proc transition_action_key_id(lexer: PLexer, token: PLexerToken);
|
||||
begin
|
||||
token^.kind := lexerKindIdentifier;
|
||||
|
||||
token^.identifierKind[1] := lexer^.current;
|
||||
DEC(token^.identifierKind[1], lexer^.start);
|
||||
MemCopy(lexer^.start, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
|
||||
token^.identifierKind[1] := lexer^.current.iterator;
|
||||
DEC(token^.identifierKind[1], lexer^.start.iterator);
|
||||
MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
|
||||
|
||||
if compare_keyword('PROGRAM', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('PROGRAM', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindProgram
|
||||
end;
|
||||
if compare_keyword('IMPORT', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('IMPORT', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindImport
|
||||
end;
|
||||
if compare_keyword('CONST', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('CONST', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindConst
|
||||
end;
|
||||
if compare_keyword('VAR', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('VAR', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindVar
|
||||
end;
|
||||
if compare_keyword('IF', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('IF', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindIf
|
||||
end;
|
||||
if compare_keyword('THEN', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('THEN', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindThen
|
||||
end;
|
||||
if compare_keyword('ELSIF', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('ELSIF', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindElsif
|
||||
end;
|
||||
if compare_keyword('ELSE', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('ELSE', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindElse
|
||||
end;
|
||||
if compare_keyword('WHILE', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('WHILE', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindWhile
|
||||
end;
|
||||
if compare_keyword('DO', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('DO', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindDo
|
||||
end;
|
||||
if compare_keyword('proc', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('proc', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindProc
|
||||
end;
|
||||
if compare_keyword('BEGIN', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('BEGIN', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindBegin
|
||||
end;
|
||||
if compare_keyword('END', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('END', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindEnd
|
||||
end;
|
||||
if compare_keyword('TYPE', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('TYPE', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindType
|
||||
end;
|
||||
if compare_keyword('RECORD', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('RECORD', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindRecord
|
||||
end;
|
||||
if compare_keyword('UNION', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('UNION', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindUnion
|
||||
end;
|
||||
if compare_keyword('NIL', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('NIL', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindNull
|
||||
end;
|
||||
if compare_keyword('AND', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('AND', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindAnd
|
||||
end;
|
||||
if compare_keyword('OR', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('OR', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindOr
|
||||
end;
|
||||
if compare_keyword('RETURN', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('RETURN', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindReturn
|
||||
end;
|
||||
if compare_keyword('DEFINITION', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('DEFINITION', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindDefinition
|
||||
end;
|
||||
if compare_keyword('TO', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('TO', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindTo
|
||||
end;
|
||||
if compare_keyword('CASE', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('CASE', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindCase
|
||||
end;
|
||||
if compare_keyword('OF', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('OF', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindOf
|
||||
end;
|
||||
if compare_keyword('FROM', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('FROM', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindFrom
|
||||
end;
|
||||
if compare_keyword('MODULE', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('MODULE', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindModule
|
||||
end;
|
||||
if compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindImplementation
|
||||
end;
|
||||
if compare_keyword('POINTER', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('POINTER', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindPointer
|
||||
end;
|
||||
if compare_keyword('ARRAY', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('ARRAY', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindArray
|
||||
end;
|
||||
if compare_keyword('TRUE', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('TRUE', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindBoolean;
|
||||
token^.booleanKind := true
|
||||
end;
|
||||
if compare_keyword('FALSE', lexer^.start, lexer^.current) then
|
||||
if compare_keyword('FALSE', lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := lexerKindBoolean;
|
||||
token^.booleanKind := false
|
||||
end
|
||||
@ -446,52 +456,52 @@ end;
|
||||
* followed by other characters forming a composite token. *)
|
||||
proc transition_action_single(lexer: PLexer, token: PLexerToken);
|
||||
begin
|
||||
if lexer^.current^ = '&' then
|
||||
if lexer^.current.iterator^ = '&' then
|
||||
token^.kind := lexerKindAnd
|
||||
end;
|
||||
if lexer^.current^ = ';' then
|
||||
if lexer^.current.iterator^ = ';' then
|
||||
token^.kind := lexerKindSemicolon
|
||||
end;
|
||||
if lexer^.current^ = ',' then
|
||||
if lexer^.current.iterator^ = ',' then
|
||||
token^.kind := lexerKindComma
|
||||
end;
|
||||
if lexer^.current^ = '~' then
|
||||
if lexer^.current.iterator^ = '~' then
|
||||
token^.kind := lexerKindTilde
|
||||
end;
|
||||
if lexer^.current^ = ')' then
|
||||
if lexer^.current.iterator^ = ')' then
|
||||
token^.kind := lexerKindRightParen
|
||||
end;
|
||||
if lexer^.current^ = '[' then
|
||||
if lexer^.current.iterator^ = '[' then
|
||||
token^.kind := lexerKindLeftSquare
|
||||
end;
|
||||
if lexer^.current^ = ']' then
|
||||
if lexer^.current.iterator^ = ']' then
|
||||
token^.kind := lexerKindRightSquare
|
||||
end;
|
||||
if lexer^.current^ = '^' then
|
||||
if lexer^.current.iterator^ = '^' then
|
||||
token^.kind := lexerKindHat
|
||||
end;
|
||||
if lexer^.current^ = '=' then
|
||||
if lexer^.current.iterator^ = '=' then
|
||||
token^.kind := lexerKindEqual
|
||||
end;
|
||||
if lexer^.current^ = '+' then
|
||||
if lexer^.current.iterator^ = '+' then
|
||||
token^.kind := lexerKindPlus
|
||||
end;
|
||||
if lexer^.current^ = '*' then
|
||||
if lexer^.current.iterator^ = '*' then
|
||||
token^.kind := lexerKindAsterisk
|
||||
end;
|
||||
if lexer^.current^ = '/' then
|
||||
if lexer^.current.iterator^ = '/' then
|
||||
token^.kind := lexerKindDivision
|
||||
end;
|
||||
if lexer^.current^ = '%' then
|
||||
if lexer^.current.iterator^ = '%' then
|
||||
token^.kind := lexerKindRemainder
|
||||
end;
|
||||
if lexer^.current^ = '@' then
|
||||
if lexer^.current.iterator^ = '@' then
|
||||
token^.kind := lexerKindAt
|
||||
end;
|
||||
if lexer^.current^ = '|' then
|
||||
if lexer^.current.iterator^ = '|' then
|
||||
token^.kind := lexerKindPipe
|
||||
end;
|
||||
INC(lexer^.current)
|
||||
increment(ADR(lexer^.current.iterator))
|
||||
end;
|
||||
|
||||
(* Handle an integer literal. *)
|
||||
@ -503,21 +513,21 @@ var
|
||||
begin
|
||||
token^.kind := lexerKindInteger;
|
||||
|
||||
integer_length := lexer^.current;
|
||||
DEC(integer_length, lexer^.start);
|
||||
integer_length := lexer^.current.iterator;
|
||||
DEC(integer_length, lexer^.start.iterator);
|
||||
MemZero(ADR(token^.identifierKind), TSIZE(Identifier));
|
||||
MemCopy(lexer^.start, integer_length, ADR(token^.identifierKind[1]));
|
||||
MemCopy(lexer^.start.iterator, integer_length, ADR(token^.identifierKind[1]));
|
||||
|
||||
buffer := InitStringCharStar(ADR(token^.identifierKind[1]));
|
||||
token^.integerKind := StringToInteger(buffer, 10, found);
|
||||
buffer := KillString(buffer)
|
||||
end;
|
||||
|
||||
proc set_default_transition(current_state: TransitionState, DefaultAction: TransitionAction, next_state: TransitionState);
|
||||
proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState);
|
||||
var
|
||||
default_transition: Transition;
|
||||
begin
|
||||
default_transition.action := DefaultAction;
|
||||
default_transition.action := default_action;
|
||||
default_transition.next_state := next_state;
|
||||
|
||||
transitions[ORD(current_state) + 1][ORD(transitionClassInvalid) + 1] := default_transition;
|
||||
@ -821,7 +831,7 @@ begin
|
||||
current_state := transitionStateStart;
|
||||
|
||||
while current_state <> transitionStateEnd DO
|
||||
index1 := ORD(lexer^.current^);
|
||||
index1 := ORD(lexer^.current.iterator^);
|
||||
INC(index1);
|
||||
current_class := classification[index1];
|
||||
|
||||
@ -836,6 +846,9 @@ begin
|
||||
end;
|
||||
current_state := current_transition.next_state
|
||||
end;
|
||||
result.start_location := lexer^.start.location;
|
||||
result.end_location := lexer^.current.location;
|
||||
|
||||
return result
|
||||
end;
|
||||
|
||||
@ -845,7 +858,9 @@ var
|
||||
begin
|
||||
if lexer^.length = 0 then
|
||||
lexer^.length := ReadNBytes(lexer^.input, CHUNK_SIZE, lexer^.buffer);
|
||||
lexer^.current := lexer^.buffer
|
||||
lexer^.current.location.column := 1;
|
||||
lexer^.current.location.line := 1;
|
||||
lexer^.current.iterator := lexer^.buffer
|
||||
end;
|
||||
lexer^.start := lexer^.current;
|
||||
|
||||
|
Reference in New Issue
Block a user