Trace the source code position in the lexer
This commit is contained in:
194
source/Lexer.mod
194
source/Lexer.mod
@ -212,7 +212,7 @@ BEGIN
|
||||
INC(i)
|
||||
END
|
||||
END initialize_classification;
|
||||
PROCEDURE compare_keyword(keyword: ARRAY OF CHAR; token_start: PLexerBuffer; token_end: PLexerBuffer): BOOLEAN;
|
||||
PROCEDURE compare_keyword(keyword: ARRAY OF CHAR; token_start: BufferPosition; token_end: PLexerBuffer): BOOLEAN;
|
||||
VAR
|
||||
result: BOOLEAN;
|
||||
index: CARDINAL;
|
||||
@ -222,213 +222,222 @@ BEGIN
|
||||
index := 0;
|
||||
result := TRUE;
|
||||
keyword_length := Length(keyword);
|
||||
continue := (index < keyword_length) AND (token_start <> token_end);
|
||||
continue := (index < keyword_length) AND (token_start.iterator <> token_end);
|
||||
|
||||
WHILE continue AND result DO
|
||||
result := (keyword[index] = token_start^) OR (Lower(keyword[index]) = token_start^);
|
||||
INC(token_start);
|
||||
result := (keyword[index] = token_start.iterator^) OR (Lower(keyword[index]) = token_start.iterator^);
|
||||
INC(token_start.iterator);
|
||||
INC(index);
|
||||
continue := (index < keyword_length) AND (token_start <> token_end)
|
||||
continue := (index < keyword_length) AND (token_start.iterator <> token_end)
|
||||
END;
|
||||
result := result AND (index = Length(keyword));
|
||||
|
||||
RETURN result AND (token_start = token_end)
|
||||
RETURN result AND (token_start.iterator = token_end)
|
||||
END compare_keyword;
|
||||
(* Reached the end of file. *)
|
||||
PROCEDURE transition_action_eof(lexer: PLexer; token: PLexerToken);
|
||||
BEGIN
|
||||
token^.kind := lexerKindEof
|
||||
END transition_action_eof;
|
||||
PROCEDURE increment(position: PBufferPosition);
|
||||
BEGIN
|
||||
INC(position^.iterator)
|
||||
END increment;
|
||||
(* Add the character to the token currently read and advance to the next character. *)
|
||||
PROCEDURE transition_action_accumulate(lexer: PLexer; token: PLexerToken);
|
||||
BEGIN
|
||||
INC(lexer^.current)
|
||||
increment(ADR(lexer^.current))
|
||||
END transition_action_accumulate;
|
||||
(* The current character is not a part of the token. Finish the token already
|
||||
* read. Don't advance to the next character. *)
|
||||
PROCEDURE transition_action_finalize(lexer: PLexer; token: PLexerToken);
|
||||
BEGIN
|
||||
IF lexer^.start^ = ':' THEN
|
||||
IF lexer^.start.iterator^ = ':' THEN
|
||||
token^.kind := lexerKindColon
|
||||
END;
|
||||
IF lexer^.start^ = '>' THEN
|
||||
IF lexer^.start.iterator^ = '>' THEN
|
||||
token^.kind := lexerKindGreaterThan
|
||||
END;
|
||||
IF lexer^.start^ = '<' THEN
|
||||
IF lexer^.start.iterator^ = '<' THEN
|
||||
token^.kind := lexerKindLessThan
|
||||
END;
|
||||
IF lexer^.start^ = '(' THEN
|
||||
IF lexer^.start.iterator^ = '(' THEN
|
||||
token^.kind := lexerKindLeftParen
|
||||
END;
|
||||
IF lexer^.start^ = '-' THEN
|
||||
IF lexer^.start.iterator^ = '-' THEN
|
||||
token^.kind := lexerKindMinus
|
||||
END;
|
||||
IF lexer^.start^ = '.' THEN
|
||||
IF lexer^.start.iterator^ = '.' THEN
|
||||
token^.kind := lexerKindDot
|
||||
END
|
||||
END transition_action_finalize;
|
||||
(* An action for tokens containing multiple characters. *)
|
||||
PROCEDURE transition_action_composite(lexer: PLexer; token: PLexerToken);
|
||||
BEGIN
|
||||
IF lexer^.start^ = '<' THEN
|
||||
IF lexer^.current^ = '>' THEN
|
||||
IF lexer^.start.iterator^ = '<' THEN
|
||||
IF lexer^.current.iterator^ = '>' THEN
|
||||
token^.kind := lexerKindNotEqual
|
||||
END;
|
||||
IF lexer^.current^ = '=' THEN
|
||||
IF lexer^.current.iterator^ = '=' THEN
|
||||
token^.kind := lexerKindLessEqual
|
||||
END
|
||||
END;
|
||||
IF (lexer^.start^ = '>') AND (lexer^.current^ = '=') THEN
|
||||
IF (lexer^.start.iterator^ = '>') AND (lexer^.current.iterator^ = '=') THEN
|
||||
token^.kind := lexerKindGreaterEqual
|
||||
END;
|
||||
IF (lexer^.start^ = '.') AND (lexer^.current^ = '.') THEN
|
||||
IF (lexer^.start.iterator^ = '.') AND (lexer^.current.iterator^ = '.') THEN
|
||||
token^.kind := lexerKindRange
|
||||
END;
|
||||
IF (lexer^.start^ = ':') AND (lexer^.current^ = '=') THEN
|
||||
IF (lexer^.start.iterator^ = ':') AND (lexer^.current.iterator^ = '=') THEN
|
||||
token^.kind := lexerKindAssignment
|
||||
END;
|
||||
IF (lexer^.start^ = '-') AND (lexer^.current^ = '>') THEN
|
||||
IF (lexer^.start.iterator^ = '-') AND (lexer^.current.iterator^ = '>') THEN
|
||||
token^.kind := lexerKindArrow
|
||||
END;
|
||||
INC(lexer^.current)
|
||||
increment(ADR(lexer^.current))
|
||||
END transition_action_composite;
|
||||
(* Skip a space. *)
|
||||
PROCEDURE transition_action_skip(lexer: PLexer; token: PLexerToken);
|
||||
BEGIN
|
||||
INC(lexer^.current);
|
||||
INC(lexer^.start)
|
||||
increment(ADR(lexer^.start));
|
||||
|
||||
IF ORD(lexer^.start.iterator^) = 10 THEN
|
||||
INC(lexer^.start.location.line);
|
||||
lexer^.start.location.column := 1
|
||||
END;
|
||||
lexer^.current := lexer^.start
|
||||
END transition_action_skip;
|
||||
(* Delimited string action. *)
|
||||
PROCEDURE transition_action_delimited(lexer: PLexer; token: PLexerToken);
|
||||
VAR
|
||||
text_length: CARDINAL;
|
||||
BEGIN
|
||||
IF lexer^.start^ = '(' THEN
|
||||
IF lexer^.start.iterator^ = '(' THEN
|
||||
token^.kind := lexerKindComment
|
||||
END;
|
||||
IF lexer^.start^ = '"' THEN
|
||||
text_length := lexer^.current;
|
||||
DEC(text_length, lexer^.start);
|
||||
IF lexer^.start.iterator^ = '"' THEN
|
||||
text_length := lexer^.current.iterator;
|
||||
DEC(text_length, lexer^.start.iterator);
|
||||
INC(text_length);
|
||||
|
||||
MemZero(ADR(token^.stringKind), TSIZE(ShortString));
|
||||
MemCopy(lexer^.start, text_length, ADR(token^.stringKind));
|
||||
MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind));
|
||||
|
||||
token^.kind := lexerKindCharacter
|
||||
END;
|
||||
IF lexer^.start^ = "'" THEN
|
||||
text_length := lexer^.current;
|
||||
DEC(text_length, lexer^.start);
|
||||
IF lexer^.start.iterator^ = "'" THEN
|
||||
text_length := lexer^.current.iterator;
|
||||
DEC(text_length, lexer^.start.iterator);
|
||||
INC(text_length);
|
||||
|
||||
MemZero(ADR(token^.stringKind), TSIZE(ShortString));
|
||||
MemCopy(lexer^.start, text_length, ADR(token^.stringKind));
|
||||
MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind));
|
||||
|
||||
token^.kind := lexerKindString
|
||||
END;
|
||||
INC(lexer^.current)
|
||||
increment(ADR(lexer^.current))
|
||||
END transition_action_delimited;
|
||||
(* Finalize keyword OR identifier. *)
|
||||
PROCEDURE transition_action_key_id(lexer: PLexer; token: PLexerToken);
|
||||
BEGIN
|
||||
token^.kind := lexerKindIdentifier;
|
||||
|
||||
token^.identifierKind[1] := lexer^.current;
|
||||
DEC(token^.identifierKind[1], lexer^.start);
|
||||
MemCopy(lexer^.start, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
|
||||
token^.identifierKind[1] := lexer^.current.iterator;
|
||||
DEC(token^.identifierKind[1], lexer^.start.iterator);
|
||||
MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
|
||||
|
||||
IF compare_keyword('PROGRAM', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('PROGRAM', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindProgram
|
||||
END;
|
||||
IF compare_keyword('IMPORT', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('IMPORT', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindImport
|
||||
END;
|
||||
IF compare_keyword('CONST', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('CONST', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindConst
|
||||
END;
|
||||
IF compare_keyword('VAR', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('VAR', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindVar
|
||||
END;
|
||||
IF compare_keyword('IF', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('IF', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindIf
|
||||
END;
|
||||
IF compare_keyword('THEN', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('THEN', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindThen
|
||||
END;
|
||||
IF compare_keyword('ELSIF', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('ELSIF', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindElsif
|
||||
END;
|
||||
IF compare_keyword('ELSE', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('ELSE', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindElse
|
||||
END;
|
||||
IF compare_keyword('WHILE', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('WHILE', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindWhile
|
||||
END;
|
||||
IF compare_keyword('DO', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('DO', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindDo
|
||||
END;
|
||||
IF compare_keyword('proc', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('proc', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindProc
|
||||
END;
|
||||
IF compare_keyword('BEGIN', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('BEGIN', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindBegin
|
||||
END;
|
||||
IF compare_keyword('END', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('END', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindEnd
|
||||
END;
|
||||
IF compare_keyword('TYPE', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('TYPE', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindType
|
||||
END;
|
||||
IF compare_keyword('RECORD', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('RECORD', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindRecord
|
||||
END;
|
||||
IF compare_keyword('UNION', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('UNION', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindUnion
|
||||
END;
|
||||
IF compare_keyword('NIL', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('NIL', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindNull
|
||||
END;
|
||||
IF compare_keyword('AND', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('AND', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindAnd
|
||||
END;
|
||||
IF compare_keyword('OR', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('OR', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindOr
|
||||
END;
|
||||
IF compare_keyword('RETURN', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('RETURN', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindReturn
|
||||
END;
|
||||
IF compare_keyword('DEFINITION', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('DEFINITION', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindDefinition
|
||||
END;
|
||||
IF compare_keyword('TO', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('TO', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindTo
|
||||
END;
|
||||
IF compare_keyword('CASE', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('CASE', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindCase
|
||||
END;
|
||||
IF compare_keyword('OF', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('OF', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindOf
|
||||
END;
|
||||
IF compare_keyword('FROM', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('FROM', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindFrom
|
||||
END;
|
||||
IF compare_keyword('MODULE', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('MODULE', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindModule
|
||||
END;
|
||||
IF compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindImplementation
|
||||
END;
|
||||
IF compare_keyword('POINTER', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('POINTER', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindPointer
|
||||
END;
|
||||
IF compare_keyword('ARRAY', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('ARRAY', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindArray
|
||||
END;
|
||||
IF compare_keyword('TRUE', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('TRUE', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindBoolean;
|
||||
token^.booleanKind := TRUE
|
||||
END;
|
||||
IF compare_keyword('FALSE', lexer^.start, lexer^.current) THEN
|
||||
IF compare_keyword('FALSE', lexer^.start, lexer^.current.iterator) THEN
|
||||
token^.kind := lexerKindBoolean;
|
||||
token^.booleanKind := FALSE
|
||||
END
|
||||
@ -437,52 +446,52 @@ END transition_action_key_id;
|
||||
* followed by other characters forming a composite token. *)
|
||||
PROCEDURE transition_action_single(lexer: PLexer; token: PLexerToken);
|
||||
BEGIN
|
||||
IF lexer^.current^ = '&' THEN
|
||||
IF lexer^.current.iterator^ = '&' THEN
|
||||
token^.kind := lexerKindAnd
|
||||
END;
|
||||
IF lexer^.current^ = ';' THEN
|
||||
IF lexer^.current.iterator^ = ';' THEN
|
||||
token^.kind := lexerKindSemicolon
|
||||
END;
|
||||
IF lexer^.current^ = ',' THEN
|
||||
IF lexer^.current.iterator^ = ',' THEN
|
||||
token^.kind := lexerKindComma
|
||||
END;
|
||||
IF lexer^.current^ = '~' THEN
|
||||
IF lexer^.current.iterator^ = '~' THEN
|
||||
token^.kind := lexerKindTilde
|
||||
END;
|
||||
IF lexer^.current^ = ')' THEN
|
||||
IF lexer^.current.iterator^ = ')' THEN
|
||||
token^.kind := lexerKindRightParen
|
||||
END;
|
||||
IF lexer^.current^ = '[' THEN
|
||||
IF lexer^.current.iterator^ = '[' THEN
|
||||
token^.kind := lexerKindLeftSquare
|
||||
END;
|
||||
IF lexer^.current^ = ']' THEN
|
||||
IF lexer^.current.iterator^ = ']' THEN
|
||||
token^.kind := lexerKindRightSquare
|
||||
END;
|
||||
IF lexer^.current^ = '^' THEN
|
||||
IF lexer^.current.iterator^ = '^' THEN
|
||||
token^.kind := lexerKindHat
|
||||
END;
|
||||
IF lexer^.current^ = '=' THEN
|
||||
IF lexer^.current.iterator^ = '=' THEN
|
||||
token^.kind := lexerKindEqual
|
||||
END;
|
||||
IF lexer^.current^ = '+' THEN
|
||||
IF lexer^.current.iterator^ = '+' THEN
|
||||
token^.kind := lexerKindPlus
|
||||
END;
|
||||
IF lexer^.current^ = '*' THEN
|
||||
IF lexer^.current.iterator^ = '*' THEN
|
||||
token^.kind := lexerKindAsterisk
|
||||
END;
|
||||
IF lexer^.current^ = '/' THEN
|
||||
IF lexer^.current.iterator^ = '/' THEN
|
||||
token^.kind := lexerKindDivision
|
||||
END;
|
||||
IF lexer^.current^ = '%' THEN
|
||||
IF lexer^.current.iterator^ = '%' THEN
|
||||
token^.kind := lexerKindRemainder
|
||||
END;
|
||||
IF lexer^.current^ = '@' THEN
|
||||
IF lexer^.current.iterator^ = '@' THEN
|
||||
token^.kind := lexerKindAt
|
||||
END;
|
||||
IF lexer^.current^ = '|' THEN
|
||||
IF lexer^.current.iterator^ = '|' THEN
|
||||
token^.kind := lexerKindPipe
|
||||
END;
|
||||
INC(lexer^.current)
|
||||
increment(ADR(lexer^.current.iterator))
|
||||
END transition_action_single;
|
||||
(* Handle an integer literal. *)
|
||||
PROCEDURE transition_action_integer(lexer: PLexer; token: PLexerToken);
|
||||
@ -493,20 +502,20 @@ VAR
|
||||
BEGIN
|
||||
token^.kind := lexerKindInteger;
|
||||
|
||||
integer_length := lexer^.current;
|
||||
DEC(integer_length, lexer^.start);
|
||||
integer_length := lexer^.current.iterator;
|
||||
DEC(integer_length, lexer^.start.iterator);
|
||||
MemZero(ADR(token^.identifierKind), TSIZE(Identifier));
|
||||
MemCopy(lexer^.start, integer_length, ADR(token^.identifierKind[1]));
|
||||
MemCopy(lexer^.start.iterator, integer_length, ADR(token^.identifierKind[1]));
|
||||
|
||||
buffer := InitStringCharStar(ADR(token^.identifierKind[1]));
|
||||
token^.integerKind := StringToInteger(buffer, 10, found);
|
||||
buffer := KillString(buffer)
|
||||
END transition_action_integer;
|
||||
PROCEDURE set_default_transition(current_state: TransitionState; DefaultAction: TransitionAction; next_state: TransitionState);
|
||||
PROCEDURE set_default_transition(current_state: TransitionState; default_action: TransitionAction; next_state: TransitionState);
|
||||
VAR
|
||||
default_transition: Transition;
|
||||
BEGIN
|
||||
default_transition.action := DefaultAction;
|
||||
default_transition.action := default_action;
|
||||
default_transition.next_state := next_state;
|
||||
|
||||
transitions[ORD(current_state) + 1][ORD(transitionClassInvalid) + 1] := default_transition;
|
||||
@ -807,7 +816,7 @@ BEGIN
|
||||
current_state := transitionStateStart;
|
||||
|
||||
WHILE current_state <> transitionStateEnd DO
|
||||
index1 := ORD(lexer^.current^);
|
||||
index1 := ORD(lexer^.current.iterator^);
|
||||
INC(index1);
|
||||
current_class := classification[index1];
|
||||
|
||||
@ -822,6 +831,9 @@ BEGIN
|
||||
END;
|
||||
current_state := current_transition.next_state
|
||||
END;
|
||||
result.start_location := lexer^.start.location;
|
||||
result.end_location := lexer^.current.location;
|
||||
|
||||
RETURN result
|
||||
END lexer_current;
|
||||
PROCEDURE lexer_lex(lexer: PLexer): LexerToken;
|
||||
@ -830,7 +842,9 @@ VAR
|
||||
BEGIN
|
||||
IF lexer^.length = 0 THEN
|
||||
lexer^.length := ReadNBytes(lexer^.input, CHUNK_SIZE, lexer^.buffer);
|
||||
lexer^.current := lexer^.buffer
|
||||
lexer^.current.location.column := 1;
|
||||
lexer^.current.location.line := 1;
|
||||
lexer^.current.iterator := lexer^.buffer
|
||||
END;
|
||||
lexer^.start := lexer^.current;
|
||||
|
||||
|
Reference in New Issue
Block a user