Trace the source code position in the lexer

This commit is contained in:
2025-06-12 18:44:06 +02:00
parent 90aa5a0030
commit fdaeb25f73
7 changed files with 252 additions and 232 deletions

View File

@@ -4,5 +4,9 @@ TYPE
ShortString = ARRAY[1..256] OF CHAR; ShortString = ARRAY[1..256] OF CHAR;
Identifier = ARRAY[1..256] OF CHAR; Identifier = ARRAY[1..256] OF CHAR;
PIdentifier = POINTER TO Identifier; PIdentifier = POINTER TO Identifier;
TextLocation = RECORD
line: CARDINAL;
column: CARDINAL
END;
END Common. END Common.

View File

@@ -2,17 +2,22 @@ DEFINITION MODULE Lexer;
FROM FIO IMPORT File; FROM FIO IMPORT File;
FROM Common IMPORT Identifier, ShortString; FROM Common IMPORT Identifier, ShortString, TextLocation;
TYPE TYPE
PLexerBuffer = POINTER TO CHAR; PLexerBuffer = POINTER TO CHAR;
BufferPosition = RECORD
iterator: PLexerBuffer;
location: TextLocation
END;
PBufferPosition = POINTER TO BufferPosition;
Lexer = RECORD Lexer = RECORD
input: File; input: File;
buffer: PLexerBuffer; buffer: PLexerBuffer;
size: CARDINAL; size: CARDINAL;
length: CARDINAL; length: CARDINAL;
start: PLexerBuffer; start: BufferPosition;
current: PLexerBuffer current: BufferPosition
END; END;
PLexer = POINTER TO Lexer; PLexer = POINTER TO Lexer;
LexerKind = ( LexerKind = (
@@ -86,7 +91,9 @@ TYPE
lexerKindIdentifier: identifierKind: Identifier | lexerKindIdentifier: identifierKind: Identifier |
lexerKindInteger: integerKind: INTEGER | lexerKindInteger: integerKind: INTEGER |
lexerKindString: stringKind: ShortString lexerKindString: stringKind: ShortString
END END;
start_location: TextLocation;
end_location: TextLocation
END; END;
PLexerToken = POINTER TO LexerToken; PLexerToken = POINTER TO LexerToken;

View File

@@ -212,7 +212,7 @@ BEGIN
INC(i) INC(i)
END END
END initialize_classification; END initialize_classification;
PROCEDURE compare_keyword(keyword: ARRAY OF CHAR; token_start: PLexerBuffer; token_end: PLexerBuffer): BOOLEAN; PROCEDURE compare_keyword(keyword: ARRAY OF CHAR; token_start: BufferPosition; token_end: PLexerBuffer): BOOLEAN;
VAR VAR
result: BOOLEAN; result: BOOLEAN;
index: CARDINAL; index: CARDINAL;
@@ -222,213 +222,222 @@ BEGIN
index := 0; index := 0;
result := TRUE; result := TRUE;
keyword_length := Length(keyword); keyword_length := Length(keyword);
continue := (index < keyword_length) AND (token_start <> token_end); continue := (index < keyword_length) AND (token_start.iterator <> token_end);
WHILE continue AND result DO WHILE continue AND result DO
result := (keyword[index] = token_start^) OR (Lower(keyword[index]) = token_start^); result := (keyword[index] = token_start.iterator^) OR (Lower(keyword[index]) = token_start.iterator^);
INC(token_start); INC(token_start.iterator);
INC(index); INC(index);
continue := (index < keyword_length) AND (token_start <> token_end) continue := (index < keyword_length) AND (token_start.iterator <> token_end)
END; END;
result := result AND (index = Length(keyword)); result := result AND (index = Length(keyword));
RETURN result AND (token_start = token_end) RETURN result AND (token_start.iterator = token_end)
END compare_keyword; END compare_keyword;
(* Reached the end of file. *) (* Reached the end of file. *)
PROCEDURE transition_action_eof(lexer: PLexer; token: PLexerToken); PROCEDURE transition_action_eof(lexer: PLexer; token: PLexerToken);
BEGIN BEGIN
token^.kind := lexerKindEof token^.kind := lexerKindEof
END transition_action_eof; END transition_action_eof;
PROCEDURE increment(position: PBufferPosition);
BEGIN
INC(position^.iterator)
END increment;
(* Add the character to the token currently read and advance to the next character. *) (* Add the character to the token currently read and advance to the next character. *)
PROCEDURE transition_action_accumulate(lexer: PLexer; token: PLexerToken); PROCEDURE transition_action_accumulate(lexer: PLexer; token: PLexerToken);
BEGIN BEGIN
INC(lexer^.current) increment(ADR(lexer^.current))
END transition_action_accumulate; END transition_action_accumulate;
(* The current character is not a part of the token. Finish the token already (* The current character is not a part of the token. Finish the token already
* read. Don't advance to the next character. *) * read. Don't advance to the next character. *)
PROCEDURE transition_action_finalize(lexer: PLexer; token: PLexerToken); PROCEDURE transition_action_finalize(lexer: PLexer; token: PLexerToken);
BEGIN BEGIN
IF lexer^.start^ = ':' THEN IF lexer^.start.iterator^ = ':' THEN
token^.kind := lexerKindColon token^.kind := lexerKindColon
END; END;
IF lexer^.start^ = '>' THEN IF lexer^.start.iterator^ = '>' THEN
token^.kind := lexerKindGreaterThan token^.kind := lexerKindGreaterThan
END; END;
IF lexer^.start^ = '<' THEN IF lexer^.start.iterator^ = '<' THEN
token^.kind := lexerKindLessThan token^.kind := lexerKindLessThan
END; END;
IF lexer^.start^ = '(' THEN IF lexer^.start.iterator^ = '(' THEN
token^.kind := lexerKindLeftParen token^.kind := lexerKindLeftParen
END; END;
IF lexer^.start^ = '-' THEN IF lexer^.start.iterator^ = '-' THEN
token^.kind := lexerKindMinus token^.kind := lexerKindMinus
END; END;
IF lexer^.start^ = '.' THEN IF lexer^.start.iterator^ = '.' THEN
token^.kind := lexerKindDot token^.kind := lexerKindDot
END END
END transition_action_finalize; END transition_action_finalize;
(* An action for tokens containing multiple characters. *) (* An action for tokens containing multiple characters. *)
PROCEDURE transition_action_composite(lexer: PLexer; token: PLexerToken); PROCEDURE transition_action_composite(lexer: PLexer; token: PLexerToken);
BEGIN BEGIN
IF lexer^.start^ = '<' THEN IF lexer^.start.iterator^ = '<' THEN
IF lexer^.current^ = '>' THEN IF lexer^.current.iterator^ = '>' THEN
token^.kind := lexerKindNotEqual token^.kind := lexerKindNotEqual
END; END;
IF lexer^.current^ = '=' THEN IF lexer^.current.iterator^ = '=' THEN
token^.kind := lexerKindLessEqual token^.kind := lexerKindLessEqual
END END
END; END;
IF (lexer^.start^ = '>') AND (lexer^.current^ = '=') THEN IF (lexer^.start.iterator^ = '>') AND (lexer^.current.iterator^ = '=') THEN
token^.kind := lexerKindGreaterEqual token^.kind := lexerKindGreaterEqual
END; END;
IF (lexer^.start^ = '.') AND (lexer^.current^ = '.') THEN IF (lexer^.start.iterator^ = '.') AND (lexer^.current.iterator^ = '.') THEN
token^.kind := lexerKindRange token^.kind := lexerKindRange
END; END;
IF (lexer^.start^ = ':') AND (lexer^.current^ = '=') THEN IF (lexer^.start.iterator^ = ':') AND (lexer^.current.iterator^ = '=') THEN
token^.kind := lexerKindAssignment token^.kind := lexerKindAssignment
END; END;
IF (lexer^.start^ = '-') AND (lexer^.current^ = '>') THEN IF (lexer^.start.iterator^ = '-') AND (lexer^.current.iterator^ = '>') THEN
token^.kind := lexerKindArrow token^.kind := lexerKindArrow
END; END;
INC(lexer^.current) increment(ADR(lexer^.current))
END transition_action_composite; END transition_action_composite;
(* Skip a space. *) (* Skip a space. *)
PROCEDURE transition_action_skip(lexer: PLexer; token: PLexerToken); PROCEDURE transition_action_skip(lexer: PLexer; token: PLexerToken);
BEGIN BEGIN
INC(lexer^.current); increment(ADR(lexer^.start));
INC(lexer^.start)
IF ORD(lexer^.start.iterator^) = 10 THEN
INC(lexer^.start.location.line);
lexer^.start.location.column := 1
END;
lexer^.current := lexer^.start
END transition_action_skip; END transition_action_skip;
(* Delimited string action. *) (* Delimited string action. *)
PROCEDURE transition_action_delimited(lexer: PLexer; token: PLexerToken); PROCEDURE transition_action_delimited(lexer: PLexer; token: PLexerToken);
VAR VAR
text_length: CARDINAL; text_length: CARDINAL;
BEGIN BEGIN
IF lexer^.start^ = '(' THEN IF lexer^.start.iterator^ = '(' THEN
token^.kind := lexerKindComment token^.kind := lexerKindComment
END; END;
IF lexer^.start^ = '"' THEN IF lexer^.start.iterator^ = '"' THEN
text_length := lexer^.current; text_length := lexer^.current.iterator;
DEC(text_length, lexer^.start); DEC(text_length, lexer^.start.iterator);
INC(text_length); INC(text_length);
MemZero(ADR(token^.stringKind), TSIZE(ShortString)); MemZero(ADR(token^.stringKind), TSIZE(ShortString));
MemCopy(lexer^.start, text_length, ADR(token^.stringKind)); MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind));
token^.kind := lexerKindCharacter token^.kind := lexerKindCharacter
END; END;
IF lexer^.start^ = "'" THEN IF lexer^.start.iterator^ = "'" THEN
text_length := lexer^.current; text_length := lexer^.current.iterator;
DEC(text_length, lexer^.start); DEC(text_length, lexer^.start.iterator);
INC(text_length); INC(text_length);
MemZero(ADR(token^.stringKind), TSIZE(ShortString)); MemZero(ADR(token^.stringKind), TSIZE(ShortString));
MemCopy(lexer^.start, text_length, ADR(token^.stringKind)); MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind));
token^.kind := lexerKindString token^.kind := lexerKindString
END; END;
INC(lexer^.current) increment(ADR(lexer^.current))
END transition_action_delimited; END transition_action_delimited;
(* Finalize keyword OR identifier. *) (* Finalize keyword OR identifier. *)
PROCEDURE transition_action_key_id(lexer: PLexer; token: PLexerToken); PROCEDURE transition_action_key_id(lexer: PLexer; token: PLexerToken);
BEGIN BEGIN
token^.kind := lexerKindIdentifier; token^.kind := lexerKindIdentifier;
token^.identifierKind[1] := lexer^.current; token^.identifierKind[1] := lexer^.current.iterator;
DEC(token^.identifierKind[1], lexer^.start); DEC(token^.identifierKind[1], lexer^.start.iterator);
MemCopy(lexer^.start, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2])); MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
IF compare_keyword('PROGRAM', lexer^.start, lexer^.current) THEN IF compare_keyword('PROGRAM', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindProgram token^.kind := lexerKindProgram
END; END;
IF compare_keyword('IMPORT', lexer^.start, lexer^.current) THEN IF compare_keyword('IMPORT', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindImport token^.kind := lexerKindImport
END; END;
IF compare_keyword('CONST', lexer^.start, lexer^.current) THEN IF compare_keyword('CONST', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindConst token^.kind := lexerKindConst
END; END;
IF compare_keyword('VAR', lexer^.start, lexer^.current) THEN IF compare_keyword('VAR', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindVar token^.kind := lexerKindVar
END; END;
IF compare_keyword('IF', lexer^.start, lexer^.current) THEN IF compare_keyword('IF', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindIf token^.kind := lexerKindIf
END; END;
IF compare_keyword('THEN', lexer^.start, lexer^.current) THEN IF compare_keyword('THEN', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindThen token^.kind := lexerKindThen
END; END;
IF compare_keyword('ELSIF', lexer^.start, lexer^.current) THEN IF compare_keyword('ELSIF', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindElsif token^.kind := lexerKindElsif
END; END;
IF compare_keyword('ELSE', lexer^.start, lexer^.current) THEN IF compare_keyword('ELSE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindElse token^.kind := lexerKindElse
END; END;
IF compare_keyword('WHILE', lexer^.start, lexer^.current) THEN IF compare_keyword('WHILE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindWhile token^.kind := lexerKindWhile
END; END;
IF compare_keyword('DO', lexer^.start, lexer^.current) THEN IF compare_keyword('DO', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindDo token^.kind := lexerKindDo
END; END;
IF compare_keyword('proc', lexer^.start, lexer^.current) THEN IF compare_keyword('proc', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindProc token^.kind := lexerKindProc
END; END;
IF compare_keyword('BEGIN', lexer^.start, lexer^.current) THEN IF compare_keyword('BEGIN', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindBegin token^.kind := lexerKindBegin
END; END;
IF compare_keyword('END', lexer^.start, lexer^.current) THEN IF compare_keyword('END', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindEnd token^.kind := lexerKindEnd
END; END;
IF compare_keyword('TYPE', lexer^.start, lexer^.current) THEN IF compare_keyword('TYPE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindType token^.kind := lexerKindType
END; END;
IF compare_keyword('RECORD', lexer^.start, lexer^.current) THEN IF compare_keyword('RECORD', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindRecord token^.kind := lexerKindRecord
END; END;
IF compare_keyword('UNION', lexer^.start, lexer^.current) THEN IF compare_keyword('UNION', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindUnion token^.kind := lexerKindUnion
END; END;
IF compare_keyword('NIL', lexer^.start, lexer^.current) THEN IF compare_keyword('NIL', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindNull token^.kind := lexerKindNull
END; END;
IF compare_keyword('AND', lexer^.start, lexer^.current) THEN IF compare_keyword('AND', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindAnd token^.kind := lexerKindAnd
END; END;
IF compare_keyword('OR', lexer^.start, lexer^.current) THEN IF compare_keyword('OR', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindOr token^.kind := lexerKindOr
END; END;
IF compare_keyword('RETURN', lexer^.start, lexer^.current) THEN IF compare_keyword('RETURN', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindReturn token^.kind := lexerKindReturn
END; END;
IF compare_keyword('DEFINITION', lexer^.start, lexer^.current) THEN IF compare_keyword('DEFINITION', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindDefinition token^.kind := lexerKindDefinition
END; END;
IF compare_keyword('TO', lexer^.start, lexer^.current) THEN IF compare_keyword('TO', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindTo token^.kind := lexerKindTo
END; END;
IF compare_keyword('CASE', lexer^.start, lexer^.current) THEN IF compare_keyword('CASE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindCase token^.kind := lexerKindCase
END; END;
IF compare_keyword('OF', lexer^.start, lexer^.current) THEN IF compare_keyword('OF', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindOf token^.kind := lexerKindOf
END; END;
IF compare_keyword('FROM', lexer^.start, lexer^.current) THEN IF compare_keyword('FROM', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindFrom token^.kind := lexerKindFrom
END; END;
IF compare_keyword('MODULE', lexer^.start, lexer^.current) THEN IF compare_keyword('MODULE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindModule token^.kind := lexerKindModule
END; END;
IF compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current) THEN IF compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindImplementation token^.kind := lexerKindImplementation
END; END;
IF compare_keyword('POINTER', lexer^.start, lexer^.current) THEN IF compare_keyword('POINTER', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindPointer token^.kind := lexerKindPointer
END; END;
IF compare_keyword('ARRAY', lexer^.start, lexer^.current) THEN IF compare_keyword('ARRAY', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindArray token^.kind := lexerKindArray
END; END;
IF compare_keyword('TRUE', lexer^.start, lexer^.current) THEN IF compare_keyword('TRUE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindBoolean; token^.kind := lexerKindBoolean;
token^.booleanKind := TRUE token^.booleanKind := TRUE
END; END;
IF compare_keyword('FALSE', lexer^.start, lexer^.current) THEN IF compare_keyword('FALSE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindBoolean; token^.kind := lexerKindBoolean;
token^.booleanKind := FALSE token^.booleanKind := FALSE
END END
@@ -437,52 +446,52 @@ END transition_action_key_id;
* followed by other characters forming a composite token. *) * followed by other characters forming a composite token. *)
PROCEDURE transition_action_single(lexer: PLexer; token: PLexerToken); PROCEDURE transition_action_single(lexer: PLexer; token: PLexerToken);
BEGIN BEGIN
IF lexer^.current^ = '&' THEN IF lexer^.current.iterator^ = '&' THEN
token^.kind := lexerKindAnd token^.kind := lexerKindAnd
END; END;
IF lexer^.current^ = ';' THEN IF lexer^.current.iterator^ = ';' THEN
token^.kind := lexerKindSemicolon token^.kind := lexerKindSemicolon
END; END;
IF lexer^.current^ = ',' THEN IF lexer^.current.iterator^ = ',' THEN
token^.kind := lexerKindComma token^.kind := lexerKindComma
END; END;
IF lexer^.current^ = '~' THEN IF lexer^.current.iterator^ = '~' THEN
token^.kind := lexerKindTilde token^.kind := lexerKindTilde
END; END;
IF lexer^.current^ = ')' THEN IF lexer^.current.iterator^ = ')' THEN
token^.kind := lexerKindRightParen token^.kind := lexerKindRightParen
END; END;
IF lexer^.current^ = '[' THEN IF lexer^.current.iterator^ = '[' THEN
token^.kind := lexerKindLeftSquare token^.kind := lexerKindLeftSquare
END; END;
IF lexer^.current^ = ']' THEN IF lexer^.current.iterator^ = ']' THEN
token^.kind := lexerKindRightSquare token^.kind := lexerKindRightSquare
END; END;
IF lexer^.current^ = '^' THEN IF lexer^.current.iterator^ = '^' THEN
token^.kind := lexerKindHat token^.kind := lexerKindHat
END; END;
IF lexer^.current^ = '=' THEN IF lexer^.current.iterator^ = '=' THEN
token^.kind := lexerKindEqual token^.kind := lexerKindEqual
END; END;
IF lexer^.current^ = '+' THEN IF lexer^.current.iterator^ = '+' THEN
token^.kind := lexerKindPlus token^.kind := lexerKindPlus
END; END;
IF lexer^.current^ = '*' THEN IF lexer^.current.iterator^ = '*' THEN
token^.kind := lexerKindAsterisk token^.kind := lexerKindAsterisk
END; END;
IF lexer^.current^ = '/' THEN IF lexer^.current.iterator^ = '/' THEN
token^.kind := lexerKindDivision token^.kind := lexerKindDivision
END; END;
IF lexer^.current^ = '%' THEN IF lexer^.current.iterator^ = '%' THEN
token^.kind := lexerKindRemainder token^.kind := lexerKindRemainder
END; END;
IF lexer^.current^ = '@' THEN IF lexer^.current.iterator^ = '@' THEN
token^.kind := lexerKindAt token^.kind := lexerKindAt
END; END;
IF lexer^.current^ = '|' THEN IF lexer^.current.iterator^ = '|' THEN
token^.kind := lexerKindPipe token^.kind := lexerKindPipe
END; END;
INC(lexer^.current) increment(ADR(lexer^.current.iterator))
END transition_action_single; END transition_action_single;
(* Handle an integer literal. *) (* Handle an integer literal. *)
PROCEDURE transition_action_integer(lexer: PLexer; token: PLexerToken); PROCEDURE transition_action_integer(lexer: PLexer; token: PLexerToken);
@@ -493,20 +502,20 @@ VAR
BEGIN BEGIN
token^.kind := lexerKindInteger; token^.kind := lexerKindInteger;
integer_length := lexer^.current; integer_length := lexer^.current.iterator;
DEC(integer_length, lexer^.start); DEC(integer_length, lexer^.start.iterator);
MemZero(ADR(token^.identifierKind), TSIZE(Identifier)); MemZero(ADR(token^.identifierKind), TSIZE(Identifier));
MemCopy(lexer^.start, integer_length, ADR(token^.identifierKind[1])); MemCopy(lexer^.start.iterator, integer_length, ADR(token^.identifierKind[1]));
buffer := InitStringCharStar(ADR(token^.identifierKind[1])); buffer := InitStringCharStar(ADR(token^.identifierKind[1]));
token^.integerKind := StringToInteger(buffer, 10, found); token^.integerKind := StringToInteger(buffer, 10, found);
buffer := KillString(buffer) buffer := KillString(buffer)
END transition_action_integer; END transition_action_integer;
PROCEDURE set_default_transition(current_state: TransitionState; DefaultAction: TransitionAction; next_state: TransitionState); PROCEDURE set_default_transition(current_state: TransitionState; default_action: TransitionAction; next_state: TransitionState);
VAR VAR
default_transition: Transition; default_transition: Transition;
BEGIN BEGIN
default_transition.action := DefaultAction; default_transition.action := default_action;
default_transition.next_state := next_state; default_transition.next_state := next_state;
transitions[ORD(current_state) + 1][ORD(transitionClassInvalid) + 1] := default_transition; transitions[ORD(current_state) + 1][ORD(transitionClassInvalid) + 1] := default_transition;
@@ -807,7 +816,7 @@ BEGIN
current_state := transitionStateStart; current_state := transitionStateStart;
WHILE current_state <> transitionStateEnd DO WHILE current_state <> transitionStateEnd DO
index1 := ORD(lexer^.current^); index1 := ORD(lexer^.current.iterator^);
INC(index1); INC(index1);
current_class := classification[index1]; current_class := classification[index1];
@@ -822,6 +831,9 @@ BEGIN
END; END;
current_state := current_transition.next_state current_state := current_transition.next_state
END; END;
result.start_location := lexer^.start.location;
result.end_location := lexer^.current.location;
RETURN result RETURN result
END lexer_current; END lexer_current;
PROCEDURE lexer_lex(lexer: PLexer): LexerToken; PROCEDURE lexer_lex(lexer: PLexer): LexerToken;
@@ -830,7 +842,9 @@ VAR
BEGIN BEGIN
IF lexer^.length = 0 THEN IF lexer^.length = 0 THEN
lexer^.length := ReadNBytes(lexer^.input, CHUNK_SIZE, lexer^.buffer); lexer^.length := ReadNBytes(lexer^.input, CHUNK_SIZE, lexer^.buffer);
lexer^.current := lexer^.buffer lexer^.current.location.column := 1;
lexer^.current.location.line := 1;
lexer^.current.iterator := lexer^.buffer
END; END;
lexer^.start := lexer^.current; lexer^.start := lexer^.current;

View File

@@ -4,6 +4,11 @@ FROM Common IMPORT Identifier, PIdentifier, ShortString;
FROM Lexer IMPORT PLexer; FROM Lexer IMPORT PLexer;
TYPE TYPE
Parser = RECORD
lexer: PLexer
END;
PParser = POINTER TO Parser;
AstLiteralKind = ( AstLiteralKind = (
astLiteralKindInteger, astLiteralKindInteger,
astLiteralKindString, astLiteralKindString,

View File

@@ -9,7 +9,7 @@ FROM Storage IMPORT ALLOCATE, REALLOCATE;
FROM Lexer IMPORT Lexer, LexerKind, LexerToken, lexer_current, lexer_lex; FROM Lexer IMPORT Lexer, LexerKind, LexerToken, lexer_current, lexer_lex;
(* Calls lexer_lex() but skips the comments. *) (* Calls lexer_lex() but skips the comments. *)
PROCEDURE transpiler_lex(lexer: PLexer): LexerToken; PROCEDURE parser_lex(lexer: PLexer): LexerToken;
VAR VAR
result: LexerToken; result: LexerToken;
BEGIN BEGIN
@@ -20,7 +20,7 @@ BEGIN
END; END;
RETURN result RETURN result
END transpiler_lex; END parser_lex;
PROCEDURE parse_type_fields(lexer: PLexer): PAstFieldDeclaration; PROCEDURE parse_type_fields(lexer: PLexer): PAstFieldDeclaration;
VAR VAR
token: LexerToken; token: LexerToken;
@@ -29,7 +29,7 @@ VAR
current_field: PAstFieldDeclaration; current_field: PAstFieldDeclaration;
BEGIN BEGIN
ALLOCATE(field_declarations, TSIZE(AstFieldDeclaration)); ALLOCATE(field_declarations, TSIZE(AstFieldDeclaration));
token := transpiler_lex(lexer); token := parser_lex(lexer);
field_count := 0; field_count := 0;
WHILE token.kind <> lexerKindEnd DO WHILE token.kind <> lexerKindEnd DO
@@ -40,16 +40,16 @@ BEGIN
current_field := field_declarations; current_field := field_declarations;
INC(current_field , TSIZE(AstFieldDeclaration) * (field_count - 1)); INC(current_field , TSIZE(AstFieldDeclaration) * (field_count - 1));
token := transpiler_lex(lexer); token := parser_lex(lexer);
current_field^.field_name := token.identifierKind; current_field^.field_name := token.identifierKind;
token := transpiler_lex(lexer); token := parser_lex(lexer);
current_field^.field_type := parse_type_expression(lexer); current_field^.field_type := parse_type_expression(lexer);
token := transpiler_lex(lexer); token := parser_lex(lexer);
IF token.kind = lexerKindSemicolon THEN IF token.kind = lexerKindSemicolon THEN
token := transpiler_lex(lexer) token := parser_lex(lexer)
END END
END; END;
INC(current_field, TSIZE(AstFieldDeclaration)); INC(current_field, TSIZE(AstFieldDeclaration));
@@ -78,7 +78,7 @@ BEGIN
token := lexer_current(lexer); token := lexer_current(lexer);
IF token.kind = lexerKindPointer THEN IF token.kind = lexerKindPointer THEN
token := transpiler_lex(lexer) token := parser_lex(lexer)
END; END;
token := lexer_current(lexer); token := lexer_current(lexer);
result^.target := parse_type_expression(lexer); result^.target := parse_type_expression(lexer);
@@ -98,16 +98,16 @@ BEGIN
token := lexer_current(lexer); token := lexer_current(lexer);
IF token.kind = lexerKindArray THEN IF token.kind = lexerKindArray THEN
token := transpiler_lex(lexer) token := parser_lex(lexer)
END; END;
IF token.kind <> lexerKindOf THEN IF token.kind <> lexerKindOf THEN
token := transpiler_lex(lexer); token := parser_lex(lexer);
result^.length := token.integerKind; result^.length := token.integerKind;
token := transpiler_lex(lexer) token := parser_lex(lexer)
END; END;
token := transpiler_lex(lexer); token := parser_lex(lexer);
result^.base := parse_type_expression(lexer); result^.base := parse_type_expression(lexer);
RETURN result RETURN result
@@ -124,14 +124,14 @@ BEGIN
case_count := 1; case_count := 1;
ALLOCATE(result^.cases, TSIZE(Identifier) * 2); ALLOCATE(result^.cases, TSIZE(Identifier) * 2);
token := transpiler_lex(lexer); token := parser_lex(lexer);
current_case := result^.cases; current_case := result^.cases;
current_case^ := token.identifierKind; current_case^ := token.identifierKind;
token := transpiler_lex(lexer); token := parser_lex(lexer);
WHILE token.kind = lexerKindComma DO WHILE token.kind = lexerKindComma DO
token := transpiler_lex(lexer); token := parser_lex(lexer);
INC(case_count); INC(case_count);
INC(case_count); INC(case_count);
@@ -141,7 +141,7 @@ BEGIN
INC(current_case, TSIZE(Identifier) * (case_count - 1)); INC(current_case, TSIZE(Identifier) * (case_count - 1));
current_case^ := token.identifierKind; current_case^ := token.identifierKind;
token := transpiler_lex(lexer) token := parser_lex(lexer)
END; END;
INC(current_case, TSIZE(Identifier)); INC(current_case, TSIZE(Identifier));
MemZero(current_case, TSIZE(Identifier)); MemZero(current_case, TSIZE(Identifier));
@@ -174,8 +174,8 @@ BEGIN
ALLOCATE(result^.parameters, 1); ALLOCATE(result^.parameters, 1);
token := transpiler_lex(lexer); token := parser_lex(lexer);
token := transpiler_lex(lexer); token := parser_lex(lexer);
WHILE token.kind <> lexerKindRightParen DO WHILE token.kind <> lexerKindRightParen DO
INC(parameter_count); INC(parameter_count);
@@ -187,9 +187,9 @@ BEGIN
current_parameter^ := parse_type_expression(lexer); current_parameter^ := parse_type_expression(lexer);
token := transpiler_lex(lexer); token := parser_lex(lexer);
IF token.kind = lexerKindComma THEN IF token.kind = lexerKindComma THEN
token := transpiler_lex(lexer) token := parser_lex(lexer)
END END
END; END;
current_parameter := result^.parameters; current_parameter := result^.parameters;
@@ -236,11 +236,11 @@ BEGIN
NEW(result); NEW(result);
result^.identifier := token.identifierKind; result^.identifier := token.identifierKind;
token := transpiler_lex(lexer); token := parser_lex(lexer);
token := transpiler_lex(lexer); token := parser_lex(lexer);
result^.type_expression := parse_type_expression(lexer); result^.type_expression := parse_type_expression(lexer);
token := transpiler_lex(lexer); token := parser_lex(lexer);
RETURN result RETURN result
END parse_type_declaration; END parse_type_declaration;
@@ -258,7 +258,7 @@ BEGIN
declaration_count := 0; declaration_count := 0;
IF token.kind = lexerKindType THEN IF token.kind = lexerKindType THEN
token := transpiler_lex(lexer); token := parser_lex(lexer);
WHILE token.kind = lexerKindIdentifier DO WHILE token.kind = lexerKindIdentifier DO
INC(declaration_count); INC(declaration_count);
@@ -268,7 +268,7 @@ BEGIN
INC(current_declaration, TSIZE(PAstTypedDeclaration) * (declaration_count - 1)); INC(current_declaration, TSIZE(PAstTypedDeclaration) * (declaration_count - 1));
current_declaration^ := parse_type_declaration(lexer); current_declaration^ := parse_type_declaration(lexer);
token := transpiler_lex(lexer) token := parser_lex(lexer)
END END
END; END;
IF declaration_count <> 0 THEN IF declaration_count <> 0 THEN
@@ -288,12 +288,12 @@ BEGIN
token := lexer_current(lexer); token := lexer_current(lexer);
result^.variable_name := token.identifierKind; result^.variable_name := token.identifierKind;
token := transpiler_lex(lexer); token := parser_lex(lexer);
token := transpiler_lex(lexer); token := parser_lex(lexer);
result^.variable_type := parse_type_expression(lexer); result^.variable_type := parse_type_expression(lexer);
token := transpiler_lex(lexer); token := parser_lex(lexer);
RETURN result RETURN result
END parse_variable_declaration; END parse_variable_declaration;
PROCEDURE parse_variable_part(lexer: PLexer): PPAstVariableDeclaration; PROCEDURE parse_variable_part(lexer: PLexer): PPAstVariableDeclaration;
@@ -310,7 +310,7 @@ BEGIN
declaration_count := 0; declaration_count := 0;
IF token.kind = lexerKindVar THEN IF token.kind = lexerKindVar THEN
token := transpiler_lex(lexer); token := parser_lex(lexer);
WHILE token.kind = lexerKindIdentifier DO WHILE token.kind = lexerKindIdentifier DO
INC(declaration_count); INC(declaration_count);
@@ -320,7 +320,7 @@ BEGIN
INC(current_declaration, TSIZE(PAstVariableDeclaration) * (declaration_count - 1)); INC(current_declaration, TSIZE(PAstVariableDeclaration) * (declaration_count - 1));
current_declaration^ := parse_variable_declaration(lexer); current_declaration^ := parse_variable_declaration(lexer);
token := transpiler_lex(lexer) token := parser_lex(lexer)
END END
END; END;
IF declaration_count <> 0 THEN IF declaration_count <> 0 THEN
@@ -340,12 +340,12 @@ BEGIN
token := lexer_current(lexer); token := lexer_current(lexer);
result^.constant_name := token.identifierKind; result^.constant_name := token.identifierKind;
token := transpiler_lex(lexer); token := parser_lex(lexer);
token := transpiler_lex(lexer); token := parser_lex(lexer);
result^.constant_value := token.integerKind; result^.constant_value := token.integerKind;
token := transpiler_lex(lexer); token := parser_lex(lexer);
RETURN result RETURN result
END parse_constant_declaration; END parse_constant_declaration;
@@ -363,7 +363,7 @@ BEGIN
declaration_count := 0; declaration_count := 0;
IF token.kind = lexerKindConst THEN IF token.kind = lexerKindConst THEN
token := transpiler_lex(lexer); token := parser_lex(lexer);
WHILE token.kind = lexerKindIdentifier DO WHILE token.kind = lexerKindIdentifier DO
INC(declaration_count); INC(declaration_count);
@@ -373,7 +373,7 @@ BEGIN
INC(current_declaration, TSIZE(PAstConstantDeclaration) * (declaration_count - 1)); INC(current_declaration, TSIZE(PAstConstantDeclaration) * (declaration_count - 1));
current_declaration^ := parse_constant_declaration(lexer); current_declaration^ := parse_constant_declaration(lexer);
token := transpiler_lex(lexer) token := parser_lex(lexer)
END END
END; END;
IF declaration_count <> 0 THEN IF declaration_count <> 0 THEN
@@ -393,20 +393,20 @@ BEGIN
NEW(result); NEW(result);
symbol_count := 1; symbol_count := 1;
token := transpiler_lex(lexer); token := parser_lex(lexer);
result^.package := token.identifierKind; result^.package := token.identifierKind;
token := transpiler_lex(lexer); token := parser_lex(lexer);
ALLOCATE(result^.symbols, TSIZE(Identifier) * 2); ALLOCATE(result^.symbols, TSIZE(Identifier) * 2);
current_symbol := result^.symbols; current_symbol := result^.symbols;
token := transpiler_lex(lexer); token := parser_lex(lexer);
current_symbol^ := token.identifierKind; current_symbol^ := token.identifierKind;
token := transpiler_lex(lexer); token := parser_lex(lexer);
WHILE token.kind <> lexerKindSemicolon DO WHILE token.kind <> lexerKindSemicolon DO
token := transpiler_lex(lexer); token := parser_lex(lexer);
INC(symbol_count); INC(symbol_count);
REALLOCATE(result^.symbols, TSIZE(Identifier) * (symbol_count + 1)); REALLOCATE(result^.symbols, TSIZE(Identifier) * (symbol_count + 1));
@@ -414,12 +414,12 @@ BEGIN
INC(current_symbol, TSIZE(Identifier) * (symbol_count - 1)); INC(current_symbol, TSIZE(Identifier) * (symbol_count - 1));
current_symbol^ := token.identifierKind; current_symbol^ := token.identifierKind;
token := transpiler_lex(lexer) token := parser_lex(lexer)
END; END;
INC(current_symbol, TSIZE(Identifier)); INC(current_symbol, TSIZE(Identifier));
MemZero(current_symbol, TSIZE(Identifier)); MemZero(current_symbol, TSIZE(Identifier));
token := transpiler_lex(lexer); token := parser_lex(lexer);
RETURN result RETURN result
END parse_import_statement; END parse_import_statement;
@@ -484,7 +484,7 @@ BEGIN
literal^.boolean := token.booleanKind literal^.boolean := token.booleanKind
END; END;
IF literal <> NIL THEN IF literal <> NIL THEN
token := transpiler_lex(lexer) token := parser_lex(lexer)
END; END;
RETURN literal RETURN literal
@@ -508,7 +508,7 @@ BEGIN
END; END;
IF (result = NIL) AND (next_token.kind = lexerKindMinus) THEN IF (result = NIL) AND (next_token.kind = lexerKindMinus) THEN
NEW(result); NEW(result);
next_token := transpiler_lex(lexer); next_token := parser_lex(lexer);
result^.kind := astExpressionKindUnary; result^.kind := astExpressionKindUnary;
result^.unary_operator := astUnaryOperatorMinus; result^.unary_operator := astUnaryOperatorMinus;
@@ -516,17 +516,17 @@ BEGIN
END; END;
IF (result = NIL) AND (next_token.kind = lexerKindTilde) THEN IF (result = NIL) AND (next_token.kind = lexerKindTilde) THEN
NEW(result); NEW(result);
next_token := transpiler_lex(lexer); next_token := parser_lex(lexer);
result^.kind := astExpressionKindUnary; result^.kind := astExpressionKindUnary;
result^.unary_operator := astUnaryOperatorNot; result^.unary_operator := astUnaryOperatorNot;
result^.unary_operand := parse_factor(lexer) result^.unary_operand := parse_factor(lexer)
END; END;
IF (result = NIL) AND (next_token.kind = lexerKindLeftParen) THEN IF (result = NIL) AND (next_token.kind = lexerKindLeftParen) THEN
next_token := transpiler_lex(lexer); next_token := parser_lex(lexer);
result := parse_expression(lexer); result := parse_expression(lexer);
IF result <> NIL THEN IF result <> NIL THEN
next_token := transpiler_lex(lexer) next_token := parser_lex(lexer)
END END
END; END;
IF (result = NIL) AND (next_token.kind = lexerKindIdentifier) THEN IF (result = NIL) AND (next_token.kind = lexerKindIdentifier) THEN
@@ -535,7 +535,7 @@ BEGIN
result^.kind := astExpressionKindIdentifier; result^.kind := astExpressionKindIdentifier;
result^.identifier := next_token.identifierKind; result^.identifier := next_token.identifierKind;
next_token := transpiler_lex(lexer) next_token := parser_lex(lexer)
END; END;
RETURN result RETURN result
@@ -562,34 +562,34 @@ BEGIN
designator^.kind := astExpressionKindDereference; designator^.kind := astExpressionKindDereference;
designator^.reference := inner_expression; designator^.reference := inner_expression;
next_token := transpiler_lex(lexer); next_token := parser_lex(lexer);
handled := TRUE handled := TRUE
END; END;
IF ~handled AND (next_token.kind = lexerKindLeftSquare) THEN IF ~handled AND (next_token.kind = lexerKindLeftSquare) THEN
NEW(designator); NEW(designator);
next_token := transpiler_lex(lexer); next_token := parser_lex(lexer);
designator^.kind := astExpressionKindArrayAccess; designator^.kind := astExpressionKindArrayAccess;
designator^.array := inner_expression; designator^.array := inner_expression;
designator^.index := parse_expression(lexer); designator^.index := parse_expression(lexer);
next_token := transpiler_lex(lexer); next_token := parser_lex(lexer);
handled := TRUE handled := TRUE
END; END;
IF ~handled AND (next_token.kind = lexerKindDot) THEN IF ~handled AND (next_token.kind = lexerKindDot) THEN
NEW(designator); NEW(designator);
next_token := transpiler_lex(lexer); next_token := parser_lex(lexer);
designator^.kind := astExpressionKindFieldAccess; designator^.kind := astExpressionKindFieldAccess;
designator^.aggregate := inner_expression; designator^.aggregate := inner_expression;
designator^.field := next_token.identifierKind; designator^.field := next_token.identifierKind;
next_token := transpiler_lex(lexer); next_token := parser_lex(lexer);
handled := TRUE handled := TRUE
END; END;
IF ~handled AND (next_token.kind = lexerKindLeftParen) THEN IF ~handled AND (next_token.kind = lexerKindLeftParen) THEN
NEW(designator); NEW(designator);
next_token := transpiler_lex(lexer); next_token := parser_lex(lexer);
designator^.kind := astExpressionKindCall; designator^.kind := astExpressionKindCall;
designator^.callable := inner_expression; designator^.callable := inner_expression;
@@ -604,7 +604,7 @@ BEGIN
next_token := lexer_current(lexer); next_token := lexer_current(lexer);
WHILE next_token.kind = lexerKindComma DO WHILE next_token.kind = lexerKindComma DO
next_token := transpiler_lex(lexer); next_token := parser_lex(lexer);
designator^.argument_count := designator^.argument_count + 1; designator^.argument_count := designator^.argument_count + 1;
REALLOCATE(designator^.arguments, TSIZE(PAstExpression) * designator^.argument_count); REALLOCATE(designator^.arguments, TSIZE(PAstExpression) * designator^.argument_count);
@@ -616,7 +616,7 @@ BEGIN
END END
END; END;
next_token := transpiler_lex(lexer); next_token := parser_lex(lexer);
handled := TRUE handled := TRUE
END END
END; END;
@@ -629,7 +629,7 @@ VAR
result: PAstExpression; result: PAstExpression;
right: PAstExpression; right: PAstExpression;
BEGIN BEGIN
next_token := transpiler_lex(lexer); next_token := parser_lex(lexer);
right := parse_designator(lexer); right := parse_designator(lexer);
result := NIL; result := NIL;
@@ -703,7 +703,7 @@ BEGIN
NEW(result); NEW(result);
result^.kind := astStatementKindReturn; result^.kind := astStatementKindReturn;
token := transpiler_lex(lexer); token := parser_lex(lexer);
result^.returned := parse_expression(lexer); result^.returned := parse_expression(lexer);
RETURN result RETURN result
@@ -717,7 +717,7 @@ BEGIN
result^.kind := astStatementKindAssignment; result^.kind := astStatementKindAssignment;
result^.assignee := assignee; result^.assignee := assignee;
token := transpiler_lex(lexer); token := parser_lex(lexer);
result^.assignment := parse_expression(lexer); result^.assignment := parse_expression(lexer);
RETURN result RETURN result
@@ -766,7 +766,7 @@ VAR
designator: PAstExpression; designator: PAstExpression;
BEGIN BEGIN
statement := NIL; statement := NIL;
token := transpiler_lex(lexer); token := parser_lex(lexer);
IF token.kind = lexerKindIf THEN IF token.kind = lexerKindIf THEN
statement := parse_if_statement(lexer) statement := parse_if_statement(lexer)
@@ -798,11 +798,11 @@ BEGIN
NEW(result); NEW(result);
result^.kind := astStatementKindIf; result^.kind := astStatementKindIf;
token := transpiler_lex(lexer); token := parser_lex(lexer);
result^.if_condition := parse_expression(lexer); result^.if_condition := parse_expression(lexer);
result^.if_branch := parse_compound_statement(lexer); result^.if_branch := parse_compound_statement(lexer);
token := transpiler_lex(lexer); token := parser_lex(lexer);
RETURN result RETURN result
END parse_if_statement; END parse_if_statement;
PROCEDURE parse_while_statement(lexer: PLexer): PAstStatement; PROCEDURE parse_while_statement(lexer: PLexer): PAstStatement;
@@ -813,11 +813,11 @@ BEGIN
NEW(result); NEW(result);
result^.kind := astStatementKindWhile; result^.kind := astStatementKindWhile;
token := transpiler_lex(lexer); token := parser_lex(lexer);
result^.while_condition := parse_expression(lexer); result^.while_condition := parse_expression(lexer);
result^.while_body := parse_compound_statement(lexer); result^.while_body := parse_compound_statement(lexer);
token := transpiler_lex(lexer); token := parser_lex(lexer);
RETURN result RETURN result
END parse_while_statement; END parse_while_statement;
PROCEDURE parse_statement_part(lexer: PLexer): AstCompoundStatement; PROCEDURE parse_statement_part(lexer: PLexer): AstCompoundStatement;
@@ -844,15 +844,15 @@ VAR
BEGIN BEGIN
NEW(declaration); NEW(declaration);
token := transpiler_lex(lexer); token := parser_lex(lexer);
declaration^.name := token.identifierKind; declaration^.name := token.identifierKind;
token := transpiler_lex(lexer); token := parser_lex(lexer);
declaration^.parameters := NIL; declaration^.parameters := NIL;
declaration^.parameter_count := 0; declaration^.parameter_count := 0;
token := transpiler_lex(lexer); token := parser_lex(lexer);
WHILE token.kind <> lexerKindRightParen DO WHILE token.kind <> lexerKindRightParen DO
parameter_index := declaration^.parameter_count; parameter_index := declaration^.parameter_count;
INC(declaration^.parameter_count); INC(declaration^.parameter_count);
@@ -863,26 +863,26 @@ BEGIN
current_parameter^.identifier := token.identifierKind; current_parameter^.identifier := token.identifierKind;
token := transpiler_lex(lexer); token := parser_lex(lexer);
token := transpiler_lex(lexer); token := parser_lex(lexer);
current_parameter^.type_expression := parse_type_expression(lexer); current_parameter^.type_expression := parse_type_expression(lexer);
token := transpiler_lex(lexer); token := parser_lex(lexer);
IF token.kind = lexerKindComma THEN IF token.kind = lexerKindComma THEN
token := transpiler_lex(lexer) token := parser_lex(lexer)
END END
END; END;
token := transpiler_lex(lexer); token := parser_lex(lexer);
declaration^.return_type := NIL; declaration^.return_type := NIL;
(* Check for the return type and write it. *) (* Check for the return type and write it. *)
IF token.kind = lexerKindArrow THEN IF token.kind = lexerKindArrow THEN
token := transpiler_lex(lexer); token := parser_lex(lexer);
declaration^.return_type := parse_type_expression(lexer); declaration^.return_type := parse_type_expression(lexer);
token := transpiler_lex(lexer) token := parser_lex(lexer)
END; END;
token := transpiler_lex(lexer); token := parser_lex(lexer);
RETURN declaration RETURN declaration
END parse_procedure_heading; END parse_procedure_heading;
@@ -897,8 +897,8 @@ BEGIN
declaration^.variables := parse_variable_part(lexer); declaration^.variables := parse_variable_part(lexer);
declaration^.statements := parse_statement_part(lexer); declaration^.statements := parse_statement_part(lexer);
token := transpiler_lex(lexer); token := parser_lex(lexer);
token := transpiler_lex(lexer); token := parser_lex(lexer);
RETURN declaration RETURN declaration
END parse_procedure_declaration; END parse_procedure_declaration;
@@ -938,16 +938,16 @@ VAR
result: PAstModule; result: PAstModule;
BEGIN BEGIN
NEW(result); NEW(result);
token := transpiler_lex(lexer); token := parser_lex(lexer);
result^.main := TRUE; result^.main := TRUE;
IF token.kind = lexerKindModule THEN IF token.kind = lexerKindModule THEN
result^.main := FALSE result^.main := FALSE
END; END;
token := transpiler_lex(lexer); token := parser_lex(lexer);
(* Write the module body. *) (* Write the module body. *)
token := transpiler_lex(lexer); token := parser_lex(lexer);
result^.imports := parse_import_part(lexer); result^.imports := parse_import_part(lexer);
result^.constants := parse_constant_part(lexer); result^.constants := parse_constant_part(lexer);
@@ -957,8 +957,8 @@ BEGIN
result^.procedures := parse_procedure_part(lexer); result^.procedures := parse_procedure_part(lexer);
result^.statements := parse_statement_part(lexer); result^.statements := parse_statement_part(lexer);
token := transpiler_lex(lexer); token := parser_lex(lexer);
token := transpiler_lex(lexer); token := parser_lex(lexer);
RETURN result RETURN result
END parse_module; END parse_module;

View File

@@ -9,7 +9,8 @@ FROM Parser IMPORT PAstModule;
TYPE TYPE
TranspilerContext = RECORD TranspilerContext = RECORD
input_name: ShortString; input_name: ShortString;
output: File output: File;
indentation: CARDINAL
END; END;
PTranspilerContext = POINTER TO TranspilerContext; PTranspilerContext = POINTER TO TranspilerContext;

View File

@@ -1,53 +1,37 @@
IMPLEMENTATION MODULE Transpiler; IMPLEMENTATION MODULE Transpiler;
FROM FIO IMPORT WriteNBytes, WriteLine, WriteChar, WriteString; FROM FIO IMPORT WriteNBytes, WriteLine, WriteChar, WriteString;
FROM SYSTEM IMPORT ADR, ADDRESS, TSIZE; FROM SYSTEM IMPORT ADR, TSIZE;
FROM NumberIO IMPORT IntToStr; FROM NumberIO IMPORT IntToStr;
FROM Storage IMPORT ALLOCATE, REALLOCATE;
FROM MemUtils IMPORT MemCopy, MemZero;
FROM Common IMPORT Identifier, PIdentifier, ShortString; FROM Common IMPORT Identifier, PIdentifier, ShortString;
FROM Lexer IMPORT Lexer, LexerToken, lexer_current, lexer_lex, LexerKind;
FROM Parser IMPORT AstTypeExpressionKind, AstExpressionKind, AstLiteralKind, AstUnaryOperator, AstBinaryOperator, FROM Parser IMPORT AstTypeExpressionKind, AstExpressionKind, AstLiteralKind, AstUnaryOperator, AstBinaryOperator,
AstModule, PAstModule, AstExpression, PPAstExpression, PAstExpression, PAstLiteral, PPAstProcedureDeclaration, PAstModule, PPAstExpression, PAstExpression, PAstLiteral, PPAstProcedureDeclaration,
PAstConstantDeclaration, PPAstConstantDeclaration, PPAstStatement, PAstStatement, AstStatementKind, PAstConstantDeclaration, PPAstConstantDeclaration, PPAstStatement, PAstStatement, AstStatementKind,
AstTypedDeclaration, PAstTypedDeclaration, PPAstTypedDeclaration, AstCompoundStatement, PAstProcedureDeclaration, AstTypedDeclaration, PAstTypedDeclaration, PPAstTypedDeclaration, AstCompoundStatement, PAstProcedureDeclaration,
PAstVariableDeclaration, PPAstVariableDeclaration, PAstImportStatement, PPAstImportStatement, PAstVariableDeclaration, PPAstVariableDeclaration, PAstImportStatement, PPAstImportStatement,
PAstTypeExpression, PPAstTypeExpression, AstFieldDeclaration, PAstFieldDeclaration; PAstTypeExpression, PPAstTypeExpression, AstFieldDeclaration, PAstFieldDeclaration;
(* Calls lexer_lex() but skips the comments. *) PROCEDURE indent(context: PTranspilerContext);
PROCEDURE transpiler_lex(lexer: PLexer): LexerToken;
VAR VAR
result: LexerToken; count: CARDINAL;
BEGIN BEGIN
result := lexer_lex(lexer); count := 0;
WHILE result.kind = lexerKindComment DO WHILE count < context^.indentation DO
result := lexer_lex(lexer) WriteString(context^.output, ' ');
END; INC(count)
END
RETURN result END indent;
END transpiler_lex;
(* Write a semicolon followed by a newline. *) (* Write a semicolon followed by a newline. *)
PROCEDURE write_semicolon(output: File); PROCEDURE write_semicolon(output: File);
BEGIN BEGIN
WriteChar(output, ';'); WriteChar(output, ';');
WriteLine(output) WriteLine(output)
END write_semicolon; END write_semicolon;
PROCEDURE write_current(lexer: PLexer; output: File);
VAR
written_bytes: CARDINAL;
count: CARDINAL;
BEGIN
count := lexer^.current;
DEC(count, lexer^.start);
written_bytes := WriteNBytes(output, count, lexer^.start)
END write_current;
PROCEDURE transpile_import_statement(context: PTranspilerContext; import_statement: PAstImportStatement); PROCEDURE transpile_import_statement(context: PTranspilerContext; import_statement: PAstImportStatement);
VAR VAR
token: LexerToken;
written_bytes: CARDINAL; written_bytes: CARDINAL;
current_symbol: PIdentifier; current_symbol: PIdentifier;
BEGIN BEGIN
@@ -92,7 +76,7 @@ BEGIN
write_semicolon(context^.output) write_semicolon(context^.output)
END transpile_constant_declaration; END transpile_constant_declaration;
PROCEDURE transpile_constant_part(context: PTranspilerContext; declarations: PPAstConstantDeclaration); PROCEDURE transpile_constant_part(context: PTranspilerContext; declarations: PPAstConstantDeclaration; extra_newline: BOOLEAN);
VAR VAR
current_declaration: PPAstConstantDeclaration; current_declaration: PPAstConstantDeclaration;
BEGIN BEGIN
@@ -106,12 +90,12 @@ BEGIN
INC(current_declaration, TSIZE(PAstConstantDeclaration)) INC(current_declaration, TSIZE(PAstConstantDeclaration))
END; END;
IF extra_newline THEN
WriteLine(context^.output) WriteLine(context^.output)
END END
END
END transpile_constant_part; END transpile_constant_part;
PROCEDURE transpile_module(context: PTranspilerContext; result: PAstModule); PROCEDURE transpile_module(context: PTranspilerContext; result: PAstModule);
VAR
token: LexerToken;
BEGIN BEGIN
IF result^.main = FALSE THEN IF result^.main = FALSE THEN
WriteString(context^.output, 'IMPLEMENTATION ') WriteString(context^.output, 'IMPLEMENTATION ')
@@ -127,9 +111,9 @@ BEGIN
(* Write the module body. *) (* Write the module body. *)
transpile_import_part(context, result^.imports); transpile_import_part(context, result^.imports);
transpile_constant_part(context, result^.constants); transpile_constant_part(context, result^.constants, TRUE);
transpile_type_part(context, result^.types); transpile_type_part(context, result^.types);
transpile_variable_part(context, result^.variables); transpile_variable_part(context, result^.variables, TRUE);
transpile_procedure_part(context, result^.procedures); transpile_procedure_part(context, result^.procedures);
transpile_statement_part(context, result^.statements); transpile_statement_part(context, result^.statements);
@@ -169,8 +153,6 @@ BEGIN
WriteString(context^.output, ' END') WriteString(context^.output, ' END')
END transpile_record_type; END transpile_record_type;
PROCEDURE transpile_pointer_type(context: PTranspilerContext; type_expression: PAstTypeExpression); PROCEDURE transpile_pointer_type(context: PTranspilerContext; type_expression: PAstTypeExpression);
VAR
token: LexerToken;
BEGIN BEGIN
WriteString(context^.output, 'POINTER TO '); WriteString(context^.output, 'POINTER TO ');
@@ -306,7 +288,7 @@ BEGIN
transpile_type_expression(context, declaration^.variable_type); transpile_type_expression(context, declaration^.variable_type);
write_semicolon(context^.output) write_semicolon(context^.output)
END transpile_variable_declaration; END transpile_variable_declaration;
PROCEDURE transpile_variable_part(context: PTranspilerContext; declarations: PPAstVariableDeclaration); PROCEDURE transpile_variable_part(context: PTranspilerContext; declarations: PPAstVariableDeclaration; extra_newline: BOOLEAN);
VAR VAR
current_declaration: PPAstVariableDeclaration; current_declaration: PPAstVariableDeclaration;
BEGIN BEGIN
@@ -320,12 +302,13 @@ BEGIN
INC(current_declaration, TSIZE(PAstVariableDeclaration)) INC(current_declaration, TSIZE(PAstVariableDeclaration))
END; END;
IF extra_newline THEN
WriteLine(context^.output) WriteLine(context^.output)
END END
END
END transpile_variable_part; END transpile_variable_part;
PROCEDURE transpile_procedure_heading(context: PTranspilerContext; declaration: PAstProcedureDeclaration); PROCEDURE transpile_procedure_heading(context: PTranspilerContext; declaration: PAstProcedureDeclaration);
VAR VAR
token: LexerToken;
written_bytes: CARDINAL; written_bytes: CARDINAL;
parameter_index: CARDINAL; parameter_index: CARDINAL;
current_parameter: PAstTypedDeclaration; current_parameter: PAstTypedDeclaration;
@@ -487,32 +470,32 @@ BEGIN
END END
END transpile_expression; END transpile_expression;
PROCEDURE transpile_if_statement(context: PTranspilerContext; statement: PAstStatement); PROCEDURE transpile_if_statement(context: PTranspilerContext; statement: PAstStatement);
VAR
token: LexerToken;
BEGIN BEGIN
IF statement <> NIL THEN WriteString(context^.output, 'IF ');
WriteString(context^.output, ' IF ');
transpile_expression(context, statement^.if_condition); transpile_expression(context, statement^.if_condition);
WriteString(context^.output, ' THEN'); WriteString(context^.output, ' THEN');
WriteLine(context^.output); WriteLine(context^.output);
INC(context^.indentation);
transpile_compound_statement(context, statement^.if_branch); transpile_compound_statement(context, statement^.if_branch);
WriteString(context^.output, ' END') DEC(context^.indentation);
END indent(context);
WriteString(context^.output, 'END')
END transpile_if_statement; END transpile_if_statement;
PROCEDURE transpile_while_statement(context: PTranspilerContext; statement: PAstStatement); PROCEDURE transpile_while_statement(context: PTranspilerContext; statement: PAstStatement);
VAR
token: LexerToken;
BEGIN BEGIN
WriteString(context^.output, ' WHILE '); WriteString(context^.output, 'WHILE ');
transpile_expression(context, statement^.while_condition); transpile_expression(context, statement^.while_condition);
WriteString(context^.output, ' DO'); WriteString(context^.output, ' DO');
WriteLine(context^.output); WriteLine(context^.output);
INC(context^.indentation);
transpile_compound_statement(context, statement^.while_body); transpile_compound_statement(context, statement^.while_body);
WriteString(context^.output, ' END') DEC(context^.indentation);
indent(context);
WriteString(context^.output, 'END')
END transpile_while_statement; END transpile_while_statement;
PROCEDURE transpile_assignment_statement(context: PTranspilerContext; statement: PAstStatement); PROCEDURE transpile_assignment_statement(context: PTranspilerContext; statement: PAstStatement);
BEGIN BEGIN
@@ -522,7 +505,7 @@ BEGIN
END transpile_assignment_statement; END transpile_assignment_statement;
PROCEDURE transpile_return_statement(context: PTranspilerContext; statement: PAstStatement); PROCEDURE transpile_return_statement(context: PTranspilerContext; statement: PAstStatement);
BEGIN BEGIN
WriteString(context^.output, ' RETURN '); WriteString(context^.output, 'RETURN ');
transpile_expression(context, statement^.returned) transpile_expression(context, statement^.returned)
END transpile_return_statement; END transpile_return_statement;
@@ -548,6 +531,8 @@ BEGIN
END transpile_compound_statement; END transpile_compound_statement;
PROCEDURE transpile_statement(context: PTranspilerContext; statement: PAstStatement); PROCEDURE transpile_statement(context: PTranspilerContext; statement: PAstStatement);
BEGIN BEGIN
indent(context);
IF statement^.kind = astStatementKindIf THEN IF statement^.kind = astStatementKindIf THEN
transpile_if_statement(context, statement) transpile_if_statement(context, statement)
END; END;
@@ -569,7 +554,10 @@ BEGIN
IF compound.count > 0 THEN IF compound.count > 0 THEN
WriteString(context^.output, 'BEGIN'); WriteString(context^.output, 'BEGIN');
WriteLine(context^.output); WriteLine(context^.output);
transpile_compound_statement(context, compound)
INC(context^.indentation);
transpile_compound_statement(context, compound);
DEC(context^.indentation)
END END
END transpile_statement_part; END transpile_statement_part;
PROCEDURE transpile_procedure_declaration(context: PTranspilerContext; declaration: PAstProcedureDeclaration); PROCEDURE transpile_procedure_declaration(context: PTranspilerContext; declaration: PAstProcedureDeclaration);
@@ -578,8 +566,8 @@ VAR
BEGIN BEGIN
transpile_procedure_heading(context, declaration); transpile_procedure_heading(context, declaration);
transpile_constant_part(context, declaration^.constants); transpile_constant_part(context, declaration^.constants, FALSE);
transpile_variable_part(context, declaration^.variables); transpile_variable_part(context, declaration^.variables, FALSE);
transpile_statement_part(context, declaration^.statements); transpile_statement_part(context, declaration^.statements);
WriteString(context^.output, 'END '); WriteString(context^.output, 'END ');
@@ -628,6 +616,7 @@ VAR
BEGIN BEGIN
context.input_name := input_name; context.input_name := input_name;
context.output := output; context.output := output;
context.indentation := 0;
transpile_module(ADR(context), ast_module) transpile_module(ADR(context), ast_module)
END transpile; END transpile;