Trace the source code position in the lexer

This commit is contained in:
2025-06-12 18:44:06 +02:00
parent 90aa5a0030
commit fdaeb25f73
7 changed files with 252 additions and 232 deletions

View File

@ -4,5 +4,9 @@ TYPE
ShortString = ARRAY[1..256] OF CHAR;
Identifier = ARRAY[1..256] OF CHAR;
PIdentifier = POINTER TO Identifier;
TextLocation = RECORD
line: CARDINAL;
column: CARDINAL
END;
END Common.

View File

@ -2,17 +2,22 @@ DEFINITION MODULE Lexer;
FROM FIO IMPORT File;
FROM Common IMPORT Identifier, ShortString;
FROM Common IMPORT Identifier, ShortString, TextLocation;
TYPE
PLexerBuffer = POINTER TO CHAR;
BufferPosition = RECORD
iterator: PLexerBuffer;
location: TextLocation
END;
PBufferPosition = POINTER TO BufferPosition;
Lexer = RECORD
input: File;
buffer: PLexerBuffer;
size: CARDINAL;
length: CARDINAL;
start: PLexerBuffer;
current: PLexerBuffer
start: BufferPosition;
current: BufferPosition
END;
PLexer = POINTER TO Lexer;
LexerKind = (
@ -86,7 +91,9 @@ TYPE
lexerKindIdentifier: identifierKind: Identifier |
lexerKindInteger: integerKind: INTEGER |
lexerKindString: stringKind: ShortString
END
END;
start_location: TextLocation;
end_location: TextLocation
END;
PLexerToken = POINTER TO LexerToken;

View File

@ -212,7 +212,7 @@ BEGIN
INC(i)
END
END initialize_classification;
PROCEDURE compare_keyword(keyword: ARRAY OF CHAR; token_start: PLexerBuffer; token_end: PLexerBuffer): BOOLEAN;
PROCEDURE compare_keyword(keyword: ARRAY OF CHAR; token_start: BufferPosition; token_end: PLexerBuffer): BOOLEAN;
VAR
result: BOOLEAN;
index: CARDINAL;
@ -222,213 +222,222 @@ BEGIN
index := 0;
result := TRUE;
keyword_length := Length(keyword);
continue := (index < keyword_length) AND (token_start <> token_end);
continue := (index < keyword_length) AND (token_start.iterator <> token_end);
WHILE continue AND result DO
result := (keyword[index] = token_start^) OR (Lower(keyword[index]) = token_start^);
INC(token_start);
result := (keyword[index] = token_start.iterator^) OR (Lower(keyword[index]) = token_start.iterator^);
INC(token_start.iterator);
INC(index);
continue := (index < keyword_length) AND (token_start <> token_end)
continue := (index < keyword_length) AND (token_start.iterator <> token_end)
END;
result := result AND (index = Length(keyword));
RETURN result AND (token_start = token_end)
RETURN result AND (token_start.iterator = token_end)
END compare_keyword;
(* Reached the end of file. *)
PROCEDURE transition_action_eof(lexer: PLexer; token: PLexerToken);
BEGIN
token^.kind := lexerKindEof
END transition_action_eof;
PROCEDURE increment(position: PBufferPosition);
BEGIN
INC(position^.iterator)
END increment;
(* Add the character to the token currently read and advance to the next character. *)
PROCEDURE transition_action_accumulate(lexer: PLexer; token: PLexerToken);
BEGIN
INC(lexer^.current)
increment(ADR(lexer^.current))
END transition_action_accumulate;
(* The current character is not a part of the token. Finish the token already
* read. Don't advance to the next character. *)
PROCEDURE transition_action_finalize(lexer: PLexer; token: PLexerToken);
BEGIN
IF lexer^.start^ = ':' THEN
IF lexer^.start.iterator^ = ':' THEN
token^.kind := lexerKindColon
END;
IF lexer^.start^ = '>' THEN
IF lexer^.start.iterator^ = '>' THEN
token^.kind := lexerKindGreaterThan
END;
IF lexer^.start^ = '<' THEN
IF lexer^.start.iterator^ = '<' THEN
token^.kind := lexerKindLessThan
END;
IF lexer^.start^ = '(' THEN
IF lexer^.start.iterator^ = '(' THEN
token^.kind := lexerKindLeftParen
END;
IF lexer^.start^ = '-' THEN
IF lexer^.start.iterator^ = '-' THEN
token^.kind := lexerKindMinus
END;
IF lexer^.start^ = '.' THEN
IF lexer^.start.iterator^ = '.' THEN
token^.kind := lexerKindDot
END
END transition_action_finalize;
(* An action for tokens containing multiple characters. *)
PROCEDURE transition_action_composite(lexer: PLexer; token: PLexerToken);
BEGIN
IF lexer^.start^ = '<' THEN
IF lexer^.current^ = '>' THEN
IF lexer^.start.iterator^ = '<' THEN
IF lexer^.current.iterator^ = '>' THEN
token^.kind := lexerKindNotEqual
END;
IF lexer^.current^ = '=' THEN
IF lexer^.current.iterator^ = '=' THEN
token^.kind := lexerKindLessEqual
END
END;
IF (lexer^.start^ = '>') AND (lexer^.current^ = '=') THEN
IF (lexer^.start.iterator^ = '>') AND (lexer^.current.iterator^ = '=') THEN
token^.kind := lexerKindGreaterEqual
END;
IF (lexer^.start^ = '.') AND (lexer^.current^ = '.') THEN
IF (lexer^.start.iterator^ = '.') AND (lexer^.current.iterator^ = '.') THEN
token^.kind := lexerKindRange
END;
IF (lexer^.start^ = ':') AND (lexer^.current^ = '=') THEN
IF (lexer^.start.iterator^ = ':') AND (lexer^.current.iterator^ = '=') THEN
token^.kind := lexerKindAssignment
END;
IF (lexer^.start^ = '-') AND (lexer^.current^ = '>') THEN
IF (lexer^.start.iterator^ = '-') AND (lexer^.current.iterator^ = '>') THEN
token^.kind := lexerKindArrow
END;
INC(lexer^.current)
increment(ADR(lexer^.current))
END transition_action_composite;
(* Skip a space. *)
PROCEDURE transition_action_skip(lexer: PLexer; token: PLexerToken);
BEGIN
INC(lexer^.current);
INC(lexer^.start)
increment(ADR(lexer^.start));
IF ORD(lexer^.start.iterator^) = 10 THEN
INC(lexer^.start.location.line);
lexer^.start.location.column := 1
END;
lexer^.current := lexer^.start
END transition_action_skip;
(* Delimited string action. *)
PROCEDURE transition_action_delimited(lexer: PLexer; token: PLexerToken);
VAR
text_length: CARDINAL;
BEGIN
IF lexer^.start^ = '(' THEN
IF lexer^.start.iterator^ = '(' THEN
token^.kind := lexerKindComment
END;
IF lexer^.start^ = '"' THEN
text_length := lexer^.current;
DEC(text_length, lexer^.start);
IF lexer^.start.iterator^ = '"' THEN
text_length := lexer^.current.iterator;
DEC(text_length, lexer^.start.iterator);
INC(text_length);
MemZero(ADR(token^.stringKind), TSIZE(ShortString));
MemCopy(lexer^.start, text_length, ADR(token^.stringKind));
MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind));
token^.kind := lexerKindCharacter
END;
IF lexer^.start^ = "'" THEN
text_length := lexer^.current;
DEC(text_length, lexer^.start);
IF lexer^.start.iterator^ = "'" THEN
text_length := lexer^.current.iterator;
DEC(text_length, lexer^.start.iterator);
INC(text_length);
MemZero(ADR(token^.stringKind), TSIZE(ShortString));
MemCopy(lexer^.start, text_length, ADR(token^.stringKind));
MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind));
token^.kind := lexerKindString
END;
INC(lexer^.current)
increment(ADR(lexer^.current))
END transition_action_delimited;
(* Finalize keyword OR identifier. *)
PROCEDURE transition_action_key_id(lexer: PLexer; token: PLexerToken);
BEGIN
token^.kind := lexerKindIdentifier;
token^.identifierKind[1] := lexer^.current;
DEC(token^.identifierKind[1], lexer^.start);
MemCopy(lexer^.start, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
token^.identifierKind[1] := lexer^.current.iterator;
DEC(token^.identifierKind[1], lexer^.start.iterator);
MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
IF compare_keyword('PROGRAM', lexer^.start, lexer^.current) THEN
IF compare_keyword('PROGRAM', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindProgram
END;
IF compare_keyword('IMPORT', lexer^.start, lexer^.current) THEN
IF compare_keyword('IMPORT', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindImport
END;
IF compare_keyword('CONST', lexer^.start, lexer^.current) THEN
IF compare_keyword('CONST', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindConst
END;
IF compare_keyword('VAR', lexer^.start, lexer^.current) THEN
IF compare_keyword('VAR', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindVar
END;
IF compare_keyword('IF', lexer^.start, lexer^.current) THEN
IF compare_keyword('IF', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindIf
END;
IF compare_keyword('THEN', lexer^.start, lexer^.current) THEN
IF compare_keyword('THEN', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindThen
END;
IF compare_keyword('ELSIF', lexer^.start, lexer^.current) THEN
IF compare_keyword('ELSIF', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindElsif
END;
IF compare_keyword('ELSE', lexer^.start, lexer^.current) THEN
IF compare_keyword('ELSE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindElse
END;
IF compare_keyword('WHILE', lexer^.start, lexer^.current) THEN
IF compare_keyword('WHILE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindWhile
END;
IF compare_keyword('DO', lexer^.start, lexer^.current) THEN
IF compare_keyword('DO', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindDo
END;
IF compare_keyword('proc', lexer^.start, lexer^.current) THEN
IF compare_keyword('proc', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindProc
END;
IF compare_keyword('BEGIN', lexer^.start, lexer^.current) THEN
IF compare_keyword('BEGIN', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindBegin
END;
IF compare_keyword('END', lexer^.start, lexer^.current) THEN
IF compare_keyword('END', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindEnd
END;
IF compare_keyword('TYPE', lexer^.start, lexer^.current) THEN
IF compare_keyword('TYPE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindType
END;
IF compare_keyword('RECORD', lexer^.start, lexer^.current) THEN
IF compare_keyword('RECORD', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindRecord
END;
IF compare_keyword('UNION', lexer^.start, lexer^.current) THEN
IF compare_keyword('UNION', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindUnion
END;
IF compare_keyword('NIL', lexer^.start, lexer^.current) THEN
IF compare_keyword('NIL', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindNull
END;
IF compare_keyword('AND', lexer^.start, lexer^.current) THEN
IF compare_keyword('AND', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindAnd
END;
IF compare_keyword('OR', lexer^.start, lexer^.current) THEN
IF compare_keyword('OR', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindOr
END;
IF compare_keyword('RETURN', lexer^.start, lexer^.current) THEN
IF compare_keyword('RETURN', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindReturn
END;
IF compare_keyword('DEFINITION', lexer^.start, lexer^.current) THEN
IF compare_keyword('DEFINITION', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindDefinition
END;
IF compare_keyword('TO', lexer^.start, lexer^.current) THEN
IF compare_keyword('TO', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindTo
END;
IF compare_keyword('CASE', lexer^.start, lexer^.current) THEN
IF compare_keyword('CASE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindCase
END;
IF compare_keyword('OF', lexer^.start, lexer^.current) THEN
IF compare_keyword('OF', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindOf
END;
IF compare_keyword('FROM', lexer^.start, lexer^.current) THEN
IF compare_keyword('FROM', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindFrom
END;
IF compare_keyword('MODULE', lexer^.start, lexer^.current) THEN
IF compare_keyword('MODULE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindModule
END;
IF compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current) THEN
IF compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindImplementation
END;
IF compare_keyword('POINTER', lexer^.start, lexer^.current) THEN
IF compare_keyword('POINTER', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindPointer
END;
IF compare_keyword('ARRAY', lexer^.start, lexer^.current) THEN
IF compare_keyword('ARRAY', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindArray
END;
IF compare_keyword('TRUE', lexer^.start, lexer^.current) THEN
IF compare_keyword('TRUE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindBoolean;
token^.booleanKind := TRUE
END;
IF compare_keyword('FALSE', lexer^.start, lexer^.current) THEN
IF compare_keyword('FALSE', lexer^.start, lexer^.current.iterator) THEN
token^.kind := lexerKindBoolean;
token^.booleanKind := FALSE
END
@ -437,52 +446,52 @@ END transition_action_key_id;
* followed by other characters forming a composite token. *)
PROCEDURE transition_action_single(lexer: PLexer; token: PLexerToken);
BEGIN
IF lexer^.current^ = '&' THEN
IF lexer^.current.iterator^ = '&' THEN
token^.kind := lexerKindAnd
END;
IF lexer^.current^ = ';' THEN
IF lexer^.current.iterator^ = ';' THEN
token^.kind := lexerKindSemicolon
END;
IF lexer^.current^ = ',' THEN
IF lexer^.current.iterator^ = ',' THEN
token^.kind := lexerKindComma
END;
IF lexer^.current^ = '~' THEN
IF lexer^.current.iterator^ = '~' THEN
token^.kind := lexerKindTilde
END;
IF lexer^.current^ = ')' THEN
IF lexer^.current.iterator^ = ')' THEN
token^.kind := lexerKindRightParen
END;
IF lexer^.current^ = '[' THEN
IF lexer^.current.iterator^ = '[' THEN
token^.kind := lexerKindLeftSquare
END;
IF lexer^.current^ = ']' THEN
IF lexer^.current.iterator^ = ']' THEN
token^.kind := lexerKindRightSquare
END;
IF lexer^.current^ = '^' THEN
IF lexer^.current.iterator^ = '^' THEN
token^.kind := lexerKindHat
END;
IF lexer^.current^ = '=' THEN
IF lexer^.current.iterator^ = '=' THEN
token^.kind := lexerKindEqual
END;
IF lexer^.current^ = '+' THEN
IF lexer^.current.iterator^ = '+' THEN
token^.kind := lexerKindPlus
END;
IF lexer^.current^ = '*' THEN
IF lexer^.current.iterator^ = '*' THEN
token^.kind := lexerKindAsterisk
END;
IF lexer^.current^ = '/' THEN
IF lexer^.current.iterator^ = '/' THEN
token^.kind := lexerKindDivision
END;
IF lexer^.current^ = '%' THEN
IF lexer^.current.iterator^ = '%' THEN
token^.kind := lexerKindRemainder
END;
IF lexer^.current^ = '@' THEN
IF lexer^.current.iterator^ = '@' THEN
token^.kind := lexerKindAt
END;
IF lexer^.current^ = '|' THEN
IF lexer^.current.iterator^ = '|' THEN
token^.kind := lexerKindPipe
END;
INC(lexer^.current)
increment(ADR(lexer^.current.iterator))
END transition_action_single;
(* Handle an integer literal. *)
PROCEDURE transition_action_integer(lexer: PLexer; token: PLexerToken);
@ -493,20 +502,20 @@ VAR
BEGIN
token^.kind := lexerKindInteger;
integer_length := lexer^.current;
DEC(integer_length, lexer^.start);
integer_length := lexer^.current.iterator;
DEC(integer_length, lexer^.start.iterator);
MemZero(ADR(token^.identifierKind), TSIZE(Identifier));
MemCopy(lexer^.start, integer_length, ADR(token^.identifierKind[1]));
MemCopy(lexer^.start.iterator, integer_length, ADR(token^.identifierKind[1]));
buffer := InitStringCharStar(ADR(token^.identifierKind[1]));
token^.integerKind := StringToInteger(buffer, 10, found);
buffer := KillString(buffer)
END transition_action_integer;
PROCEDURE set_default_transition(current_state: TransitionState; DefaultAction: TransitionAction; next_state: TransitionState);
PROCEDURE set_default_transition(current_state: TransitionState; default_action: TransitionAction; next_state: TransitionState);
VAR
default_transition: Transition;
BEGIN
default_transition.action := DefaultAction;
default_transition.action := default_action;
default_transition.next_state := next_state;
transitions[ORD(current_state) + 1][ORD(transitionClassInvalid) + 1] := default_transition;
@ -807,7 +816,7 @@ BEGIN
current_state := transitionStateStart;
WHILE current_state <> transitionStateEnd DO
index1 := ORD(lexer^.current^);
index1 := ORD(lexer^.current.iterator^);
INC(index1);
current_class := classification[index1];
@ -822,6 +831,9 @@ BEGIN
END;
current_state := current_transition.next_state
END;
result.start_location := lexer^.start.location;
result.end_location := lexer^.current.location;
RETURN result
END lexer_current;
PROCEDURE lexer_lex(lexer: PLexer): LexerToken;
@ -830,7 +842,9 @@ VAR
BEGIN
IF lexer^.length = 0 THEN
lexer^.length := ReadNBytes(lexer^.input, CHUNK_SIZE, lexer^.buffer);
lexer^.current := lexer^.buffer
lexer^.current.location.column := 1;
lexer^.current.location.line := 1;
lexer^.current.iterator := lexer^.buffer
END;
lexer^.start := lexer^.current;

View File

@ -4,6 +4,11 @@ FROM Common IMPORT Identifier, PIdentifier, ShortString;
FROM Lexer IMPORT PLexer;
TYPE
Parser = RECORD
lexer: PLexer
END;
PParser = POINTER TO Parser;
AstLiteralKind = (
astLiteralKindInteger,
astLiteralKindString,

View File

@ -9,7 +9,7 @@ FROM Storage IMPORT ALLOCATE, REALLOCATE;
FROM Lexer IMPORT Lexer, LexerKind, LexerToken, lexer_current, lexer_lex;
(* Calls lexer_lex() but skips the comments. *)
PROCEDURE transpiler_lex(lexer: PLexer): LexerToken;
PROCEDURE parser_lex(lexer: PLexer): LexerToken;
VAR
result: LexerToken;
BEGIN
@ -20,7 +20,7 @@ BEGIN
END;
RETURN result
END transpiler_lex;
END parser_lex;
PROCEDURE parse_type_fields(lexer: PLexer): PAstFieldDeclaration;
VAR
token: LexerToken;
@ -29,7 +29,7 @@ VAR
current_field: PAstFieldDeclaration;
BEGIN
ALLOCATE(field_declarations, TSIZE(AstFieldDeclaration));
token := transpiler_lex(lexer);
token := parser_lex(lexer);
field_count := 0;
WHILE token.kind <> lexerKindEnd DO
@ -40,16 +40,16 @@ BEGIN
current_field := field_declarations;
INC(current_field , TSIZE(AstFieldDeclaration) * (field_count - 1));
token := transpiler_lex(lexer);
token := parser_lex(lexer);
current_field^.field_name := token.identifierKind;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
current_field^.field_type := parse_type_expression(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
IF token.kind = lexerKindSemicolon THEN
token := transpiler_lex(lexer)
token := parser_lex(lexer)
END
END;
INC(current_field, TSIZE(AstFieldDeclaration));
@ -78,7 +78,7 @@ BEGIN
token := lexer_current(lexer);
IF token.kind = lexerKindPointer THEN
token := transpiler_lex(lexer)
token := parser_lex(lexer)
END;
token := lexer_current(lexer);
result^.target := parse_type_expression(lexer);
@ -98,16 +98,16 @@ BEGIN
token := lexer_current(lexer);
IF token.kind = lexerKindArray THEN
token := transpiler_lex(lexer)
token := parser_lex(lexer)
END;
IF token.kind <> lexerKindOf THEN
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.length := token.integerKind;
token := transpiler_lex(lexer)
token := parser_lex(lexer)
END;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.base := parse_type_expression(lexer);
RETURN result
@ -124,14 +124,14 @@ BEGIN
case_count := 1;
ALLOCATE(result^.cases, TSIZE(Identifier) * 2);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
current_case := result^.cases;
current_case^ := token.identifierKind;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
WHILE token.kind = lexerKindComma DO
token := transpiler_lex(lexer);
token := parser_lex(lexer);
INC(case_count);
INC(case_count);
@ -141,7 +141,7 @@ BEGIN
INC(current_case, TSIZE(Identifier) * (case_count - 1));
current_case^ := token.identifierKind;
token := transpiler_lex(lexer)
token := parser_lex(lexer)
END;
INC(current_case, TSIZE(Identifier));
MemZero(current_case, TSIZE(Identifier));
@ -174,8 +174,8 @@ BEGIN
ALLOCATE(result^.parameters, 1);
token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
token := parser_lex(lexer);
WHILE token.kind <> lexerKindRightParen DO
INC(parameter_count);
@ -187,9 +187,9 @@ BEGIN
current_parameter^ := parse_type_expression(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
IF token.kind = lexerKindComma THEN
token := transpiler_lex(lexer)
token := parser_lex(lexer)
END
END;
current_parameter := result^.parameters;
@ -236,11 +236,11 @@ BEGIN
NEW(result);
result^.identifier := token.identifierKind;
token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
token := parser_lex(lexer);
result^.type_expression := parse_type_expression(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
RETURN result
END parse_type_declaration;
@ -258,7 +258,7 @@ BEGIN
declaration_count := 0;
IF token.kind = lexerKindType THEN
token := transpiler_lex(lexer);
token := parser_lex(lexer);
WHILE token.kind = lexerKindIdentifier DO
INC(declaration_count);
@ -268,7 +268,7 @@ BEGIN
INC(current_declaration, TSIZE(PAstTypedDeclaration) * (declaration_count - 1));
current_declaration^ := parse_type_declaration(lexer);
token := transpiler_lex(lexer)
token := parser_lex(lexer)
END
END;
IF declaration_count <> 0 THEN
@ -288,12 +288,12 @@ BEGIN
token := lexer_current(lexer);
result^.variable_name := token.identifierKind;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.variable_type := parse_type_expression(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
RETURN result
END parse_variable_declaration;
PROCEDURE parse_variable_part(lexer: PLexer): PPAstVariableDeclaration;
@ -310,7 +310,7 @@ BEGIN
declaration_count := 0;
IF token.kind = lexerKindVar THEN
token := transpiler_lex(lexer);
token := parser_lex(lexer);
WHILE token.kind = lexerKindIdentifier DO
INC(declaration_count);
@ -320,7 +320,7 @@ BEGIN
INC(current_declaration, TSIZE(PAstVariableDeclaration) * (declaration_count - 1));
current_declaration^ := parse_variable_declaration(lexer);
token := transpiler_lex(lexer)
token := parser_lex(lexer)
END
END;
IF declaration_count <> 0 THEN
@ -340,12 +340,12 @@ BEGIN
token := lexer_current(lexer);
result^.constant_name := token.identifierKind;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.constant_value := token.integerKind;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
RETURN result
END parse_constant_declaration;
@ -363,7 +363,7 @@ BEGIN
declaration_count := 0;
IF token.kind = lexerKindConst THEN
token := transpiler_lex(lexer);
token := parser_lex(lexer);
WHILE token.kind = lexerKindIdentifier DO
INC(declaration_count);
@ -373,7 +373,7 @@ BEGIN
INC(current_declaration, TSIZE(PAstConstantDeclaration) * (declaration_count - 1));
current_declaration^ := parse_constant_declaration(lexer);
token := transpiler_lex(lexer)
token := parser_lex(lexer)
END
END;
IF declaration_count <> 0 THEN
@ -393,20 +393,20 @@ BEGIN
NEW(result);
symbol_count := 1;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.package := token.identifierKind;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
ALLOCATE(result^.symbols, TSIZE(Identifier) * 2);
current_symbol := result^.symbols;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
current_symbol^ := token.identifierKind;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
WHILE token.kind <> lexerKindSemicolon DO
token := transpiler_lex(lexer);
token := parser_lex(lexer);
INC(symbol_count);
REALLOCATE(result^.symbols, TSIZE(Identifier) * (symbol_count + 1));
@ -414,12 +414,12 @@ BEGIN
INC(current_symbol, TSIZE(Identifier) * (symbol_count - 1));
current_symbol^ := token.identifierKind;
token := transpiler_lex(lexer)
token := parser_lex(lexer)
END;
INC(current_symbol, TSIZE(Identifier));
MemZero(current_symbol, TSIZE(Identifier));
token := transpiler_lex(lexer);
token := parser_lex(lexer);
RETURN result
END parse_import_statement;
@ -484,7 +484,7 @@ BEGIN
literal^.boolean := token.booleanKind
END;
IF literal <> NIL THEN
token := transpiler_lex(lexer)
token := parser_lex(lexer)
END;
RETURN literal
@ -508,7 +508,7 @@ BEGIN
END;
IF (result = NIL) AND (next_token.kind = lexerKindMinus) THEN
NEW(result);
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
result^.kind := astExpressionKindUnary;
result^.unary_operator := astUnaryOperatorMinus;
@ -516,17 +516,17 @@ BEGIN
END;
IF (result = NIL) AND (next_token.kind = lexerKindTilde) THEN
NEW(result);
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
result^.kind := astExpressionKindUnary;
result^.unary_operator := astUnaryOperatorNot;
result^.unary_operand := parse_factor(lexer)
END;
IF (result = NIL) AND (next_token.kind = lexerKindLeftParen) THEN
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
result := parse_expression(lexer);
IF result <> NIL THEN
next_token := transpiler_lex(lexer)
next_token := parser_lex(lexer)
END
END;
IF (result = NIL) AND (next_token.kind = lexerKindIdentifier) THEN
@ -535,7 +535,7 @@ BEGIN
result^.kind := astExpressionKindIdentifier;
result^.identifier := next_token.identifierKind;
next_token := transpiler_lex(lexer)
next_token := parser_lex(lexer)
END;
RETURN result
@ -562,34 +562,34 @@ BEGIN
designator^.kind := astExpressionKindDereference;
designator^.reference := inner_expression;
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
handled := TRUE
END;
IF ~handled AND (next_token.kind = lexerKindLeftSquare) THEN
NEW(designator);
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
designator^.kind := astExpressionKindArrayAccess;
designator^.array := inner_expression;
designator^.index := parse_expression(lexer);
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
handled := TRUE
END;
IF ~handled AND (next_token.kind = lexerKindDot) THEN
NEW(designator);
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
designator^.kind := astExpressionKindFieldAccess;
designator^.aggregate := inner_expression;
designator^.field := next_token.identifierKind;
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
handled := TRUE
END;
IF ~handled AND (next_token.kind = lexerKindLeftParen) THEN
NEW(designator);
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
designator^.kind := astExpressionKindCall;
designator^.callable := inner_expression;
@ -604,7 +604,7 @@ BEGIN
next_token := lexer_current(lexer);
WHILE next_token.kind = lexerKindComma DO
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
designator^.argument_count := designator^.argument_count + 1;
REALLOCATE(designator^.arguments, TSIZE(PAstExpression) * designator^.argument_count);
@ -616,7 +616,7 @@ BEGIN
END
END;
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
handled := TRUE
END
END;
@ -629,7 +629,7 @@ VAR
result: PAstExpression;
right: PAstExpression;
BEGIN
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
right := parse_designator(lexer);
result := NIL;
@ -703,7 +703,7 @@ BEGIN
NEW(result);
result^.kind := astStatementKindReturn;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.returned := parse_expression(lexer);
RETURN result
@ -717,7 +717,7 @@ BEGIN
result^.kind := astStatementKindAssignment;
result^.assignee := assignee;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.assignment := parse_expression(lexer);
RETURN result
@ -766,7 +766,7 @@ VAR
designator: PAstExpression;
BEGIN
statement := NIL;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
IF token.kind = lexerKindIf THEN
statement := parse_if_statement(lexer)
@ -798,11 +798,11 @@ BEGIN
NEW(result);
result^.kind := astStatementKindIf;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.if_condition := parse_expression(lexer);
result^.if_branch := parse_compound_statement(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
RETURN result
END parse_if_statement;
PROCEDURE parse_while_statement(lexer: PLexer): PAstStatement;
@ -813,11 +813,11 @@ BEGIN
NEW(result);
result^.kind := astStatementKindWhile;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.while_condition := parse_expression(lexer);
result^.while_body := parse_compound_statement(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
RETURN result
END parse_while_statement;
PROCEDURE parse_statement_part(lexer: PLexer): AstCompoundStatement;
@ -844,15 +844,15 @@ VAR
BEGIN
NEW(declaration);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
declaration^.name := token.identifierKind;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
declaration^.parameters := NIL;
declaration^.parameter_count := 0;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
WHILE token.kind <> lexerKindRightParen DO
parameter_index := declaration^.parameter_count;
INC(declaration^.parameter_count);
@ -863,26 +863,26 @@ BEGIN
current_parameter^.identifier := token.identifierKind;
token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
token := parser_lex(lexer);
current_parameter^.type_expression := parse_type_expression(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
IF token.kind = lexerKindComma THEN
token := transpiler_lex(lexer)
token := parser_lex(lexer)
END
END;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
declaration^.return_type := NIL;
(* Check for the return type and write it. *)
IF token.kind = lexerKindArrow THEN
token := transpiler_lex(lexer);
token := parser_lex(lexer);
declaration^.return_type := parse_type_expression(lexer);
token := transpiler_lex(lexer)
token := parser_lex(lexer)
END;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
RETURN declaration
END parse_procedure_heading;
@ -897,8 +897,8 @@ BEGIN
declaration^.variables := parse_variable_part(lexer);
declaration^.statements := parse_statement_part(lexer);
token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
token := parser_lex(lexer);
RETURN declaration
END parse_procedure_declaration;
@ -938,16 +938,16 @@ VAR
result: PAstModule;
BEGIN
NEW(result);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.main := TRUE;
IF token.kind = lexerKindModule THEN
result^.main := FALSE
END;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
(* Write the module body. *)
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.imports := parse_import_part(lexer);
result^.constants := parse_constant_part(lexer);
@ -957,8 +957,8 @@ BEGIN
result^.procedures := parse_procedure_part(lexer);
result^.statements := parse_statement_part(lexer);
token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
token := parser_lex(lexer);
RETURN result
END parse_module;

View File

@ -9,7 +9,8 @@ FROM Parser IMPORT PAstModule;
TYPE
TranspilerContext = RECORD
input_name: ShortString;
output: File
output: File;
indentation: CARDINAL
END;
PTranspilerContext = POINTER TO TranspilerContext;

View File

@ -1,53 +1,37 @@
IMPLEMENTATION MODULE Transpiler;
FROM FIO IMPORT WriteNBytes, WriteLine, WriteChar, WriteString;
FROM SYSTEM IMPORT ADR, ADDRESS, TSIZE;
FROM SYSTEM IMPORT ADR, TSIZE;
FROM NumberIO IMPORT IntToStr;
FROM Storage IMPORT ALLOCATE, REALLOCATE;
FROM MemUtils IMPORT MemCopy, MemZero;
FROM Common IMPORT Identifier, PIdentifier, ShortString;
FROM Lexer IMPORT Lexer, LexerToken, lexer_current, lexer_lex, LexerKind;
FROM Parser IMPORT AstTypeExpressionKind, AstExpressionKind, AstLiteralKind, AstUnaryOperator, AstBinaryOperator,
AstModule, PAstModule, AstExpression, PPAstExpression, PAstExpression, PAstLiteral, PPAstProcedureDeclaration,
PAstModule, PPAstExpression, PAstExpression, PAstLiteral, PPAstProcedureDeclaration,
PAstConstantDeclaration, PPAstConstantDeclaration, PPAstStatement, PAstStatement, AstStatementKind,
AstTypedDeclaration, PAstTypedDeclaration, PPAstTypedDeclaration, AstCompoundStatement, PAstProcedureDeclaration,
PAstVariableDeclaration, PPAstVariableDeclaration, PAstImportStatement, PPAstImportStatement,
PAstTypeExpression, PPAstTypeExpression, AstFieldDeclaration, PAstFieldDeclaration;
(* Calls lexer_lex() but skips the comments. *)
PROCEDURE transpiler_lex(lexer: PLexer): LexerToken;
PROCEDURE indent(context: PTranspilerContext);
VAR
result: LexerToken;
count: CARDINAL;
BEGIN
result := lexer_lex(lexer);
count := 0;
WHILE result.kind = lexerKindComment DO
result := lexer_lex(lexer)
END;
RETURN result
END transpiler_lex;
WHILE count < context^.indentation DO
WriteString(context^.output, ' ');
INC(count)
END
END indent;
(* Write a semicolon followed by a newline. *)
PROCEDURE write_semicolon(output: File);
BEGIN
WriteChar(output, ';');
WriteLine(output)
END write_semicolon;
PROCEDURE write_current(lexer: PLexer; output: File);
VAR
written_bytes: CARDINAL;
count: CARDINAL;
BEGIN
count := lexer^.current;
DEC(count, lexer^.start);
written_bytes := WriteNBytes(output, count, lexer^.start)
END write_current;
PROCEDURE transpile_import_statement(context: PTranspilerContext; import_statement: PAstImportStatement);
VAR
token: LexerToken;
written_bytes: CARDINAL;
current_symbol: PIdentifier;
BEGIN
@ -92,7 +76,7 @@ BEGIN
write_semicolon(context^.output)
END transpile_constant_declaration;
PROCEDURE transpile_constant_part(context: PTranspilerContext; declarations: PPAstConstantDeclaration);
PROCEDURE transpile_constant_part(context: PTranspilerContext; declarations: PPAstConstantDeclaration; extra_newline: BOOLEAN);
VAR
current_declaration: PPAstConstantDeclaration;
BEGIN
@ -106,12 +90,12 @@ BEGIN
INC(current_declaration, TSIZE(PAstConstantDeclaration))
END;
WriteLine(context^.output)
IF extra_newline THEN
WriteLine(context^.output)
END
END
END transpile_constant_part;
PROCEDURE transpile_module(context: PTranspilerContext; result: PAstModule);
VAR
token: LexerToken;
BEGIN
IF result^.main = FALSE THEN
WriteString(context^.output, 'IMPLEMENTATION ')
@ -127,9 +111,9 @@ BEGIN
(* Write the module body. *)
transpile_import_part(context, result^.imports);
transpile_constant_part(context, result^.constants);
transpile_constant_part(context, result^.constants, TRUE);
transpile_type_part(context, result^.types);
transpile_variable_part(context, result^.variables);
transpile_variable_part(context, result^.variables, TRUE);
transpile_procedure_part(context, result^.procedures);
transpile_statement_part(context, result^.statements);
@ -169,8 +153,6 @@ BEGIN
WriteString(context^.output, ' END')
END transpile_record_type;
PROCEDURE transpile_pointer_type(context: PTranspilerContext; type_expression: PAstTypeExpression);
VAR
token: LexerToken;
BEGIN
WriteString(context^.output, 'POINTER TO ');
@ -306,7 +288,7 @@ BEGIN
transpile_type_expression(context, declaration^.variable_type);
write_semicolon(context^.output)
END transpile_variable_declaration;
PROCEDURE transpile_variable_part(context: PTranspilerContext; declarations: PPAstVariableDeclaration);
PROCEDURE transpile_variable_part(context: PTranspilerContext; declarations: PPAstVariableDeclaration; extra_newline: BOOLEAN);
VAR
current_declaration: PPAstVariableDeclaration;
BEGIN
@ -320,12 +302,13 @@ BEGIN
INC(current_declaration, TSIZE(PAstVariableDeclaration))
END;
WriteLine(context^.output)
IF extra_newline THEN
WriteLine(context^.output)
END
END
END transpile_variable_part;
PROCEDURE transpile_procedure_heading(context: PTranspilerContext; declaration: PAstProcedureDeclaration);
VAR
token: LexerToken;
written_bytes: CARDINAL;
parameter_index: CARDINAL;
current_parameter: PAstTypedDeclaration;
@ -487,32 +470,32 @@ BEGIN
END
END transpile_expression;
PROCEDURE transpile_if_statement(context: PTranspilerContext; statement: PAstStatement);
VAR
token: LexerToken;
BEGIN
IF statement <> NIL THEN
WriteString(context^.output, ' IF ');
transpile_expression(context, statement^.if_condition);
WriteString(context^.output, 'IF ');
transpile_expression(context, statement^.if_condition);
WriteString(context^.output, ' THEN');
WriteLine(context^.output);
WriteString(context^.output, ' THEN');
WriteLine(context^.output);
INC(context^.indentation);
transpile_compound_statement(context, statement^.if_branch);
WriteString(context^.output, ' END')
END
transpile_compound_statement(context, statement^.if_branch);
DEC(context^.indentation);
indent(context);
WriteString(context^.output, 'END')
END transpile_if_statement;
PROCEDURE transpile_while_statement(context: PTranspilerContext; statement: PAstStatement);
VAR
token: LexerToken;
BEGIN
WriteString(context^.output, ' WHILE ');
WriteString(context^.output, 'WHILE ');
transpile_expression(context, statement^.while_condition);
WriteString(context^.output, ' DO');
WriteLine(context^.output);
INC(context^.indentation);
transpile_compound_statement(context, statement^.while_body);
WriteString(context^.output, ' END')
DEC(context^.indentation);
indent(context);
WriteString(context^.output, 'END')
END transpile_while_statement;
PROCEDURE transpile_assignment_statement(context: PTranspilerContext; statement: PAstStatement);
BEGIN
@ -522,7 +505,7 @@ BEGIN
END transpile_assignment_statement;
PROCEDURE transpile_return_statement(context: PTranspilerContext; statement: PAstStatement);
BEGIN
WriteString(context^.output, ' RETURN ');
WriteString(context^.output, 'RETURN ');
transpile_expression(context, statement^.returned)
END transpile_return_statement;
@ -548,6 +531,8 @@ BEGIN
END transpile_compound_statement;
PROCEDURE transpile_statement(context: PTranspilerContext; statement: PAstStatement);
BEGIN
indent(context);
IF statement^.kind = astStatementKindIf THEN
transpile_if_statement(context, statement)
END;
@ -569,7 +554,10 @@ BEGIN
IF compound.count > 0 THEN
WriteString(context^.output, 'BEGIN');
WriteLine(context^.output);
transpile_compound_statement(context, compound)
INC(context^.indentation);
transpile_compound_statement(context, compound);
DEC(context^.indentation)
END
END transpile_statement_part;
PROCEDURE transpile_procedure_declaration(context: PTranspilerContext; declaration: PAstProcedureDeclaration);
@ -578,8 +566,8 @@ VAR
BEGIN
transpile_procedure_heading(context, declaration);
transpile_constant_part(context, declaration^.constants);
transpile_variable_part(context, declaration^.variables);
transpile_constant_part(context, declaration^.constants, FALSE);
transpile_variable_part(context, declaration^.variables, FALSE);
transpile_statement_part(context, declaration^.statements);
WriteString(context^.output, 'END ');
@ -628,6 +616,7 @@ VAR
BEGIN
context.input_name := input_name;
context.output := output;
context.indentation := 0;
transpile_module(ADR(context), ast_module)
END transpile;