Trace the source code position in the lexer

This commit is contained in:
2025-06-12 18:45:17 +02:00
parent e3f094c8a5
commit 9341017103
7 changed files with 251 additions and 231 deletions

View File

@ -4,5 +4,9 @@ TYPE
ShortString = ARRAY[1..256] OF CHAR;
Identifier = ARRAY[1..256] OF CHAR;
PIdentifier = POINTER TO Identifier;
TextLocation = RECORD
line: CARDINAL;
column: CARDINAL
END;
END Common.

View File

@ -2,17 +2,22 @@ DEFINITION MODULE Lexer;
FROM FIO IMPORT File;
FROM Common IMPORT Identifier, ShortString;
FROM Common IMPORT Identifier, ShortString, TextLocation;
TYPE
PLexerBuffer = POINTER TO CHAR;
BufferPosition = RECORD
iterator: PLexerBuffer;
location: TextLocation
END;
PBufferPosition = POINTER TO BufferPosition;
Lexer = RECORD
input: File;
buffer: PLexerBuffer;
size: CARDINAL;
length: CARDINAL;
start: PLexerBuffer;
current: PLexerBuffer
start: BufferPosition;
current: BufferPosition
END;
PLexer = POINTER TO Lexer;
LexerKind = (
@ -86,7 +91,9 @@ TYPE
lexerKindIdentifier: identifierKind: Identifier |
lexerKindInteger: integerKind: INTEGER |
lexerKindString: stringKind: ShortString
END
END;
start_location: TextLocation;
end_location: TextLocation
END;
PLexerToken = POINTER TO LexerToken;

View File

@ -213,7 +213,7 @@ begin
end
end;
proc compare_keyword(keyword: ARRAY OF CHAR, token_start: PLexerBuffer, token_end: PLexerBuffer) -> BOOLEAN;
proc compare_keyword(keyword: ARRAY OF CHAR, token_start: BufferPosition, token_end: PLexerBuffer) -> BOOLEAN;
var
result: BOOLEAN;
index: CARDINAL;
@ -223,17 +223,17 @@ begin
index := 0;
result := true;
keyword_length := Length(keyword);
continue := (index < keyword_length) & (token_start <> token_end);
continue := (index < keyword_length) & (token_start.iterator <> token_end);
while continue & result do
result := (keyword[index] = token_start^) or (Lower(keyword[index]) = token_start^);
INC(token_start);
result := (keyword[index] = token_start.iterator^) or (Lower(keyword[index]) = token_start.iterator^);
INC(token_start.iterator);
INC(index);
continue := (index < keyword_length) & (token_start <> token_end)
continue := (index < keyword_length) & (token_start.iterator <> token_end)
end;
result := result & (index = Length(keyword));
return result & (token_start = token_end)
return result & (token_start.iterator = token_end)
end;
(* Reached the end of file. *)
@ -242,32 +242,37 @@ begin
token^.kind := lexerKindEof
end;
proc increment(position: PBufferPosition);
begin
INC(position^.iterator)
end;
(* Add the character to the token currently read and advance to the next character. *)
proc transition_action_accumulate(lexer: PLexer, token: PLexerToken);
begin
INC(lexer^.current)
increment(ADR(lexer^.current))
end;
(* The current character is not a part of the token. Finish the token already
* read. Don't advance to the next character. *)
proc transition_action_finalize(lexer: PLexer, token: PLexerToken);
begin
if lexer^.start^ = ':' then
if lexer^.start.iterator^ = ':' then
token^.kind := lexerKindColon
end;
if lexer^.start^ = '>' then
if lexer^.start.iterator^ = '>' then
token^.kind := lexerKindGreaterThan
end;
if lexer^.start^ = '<' then
if lexer^.start.iterator^ = '<' then
token^.kind := lexerKindLessThan
end;
if lexer^.start^ = '(' then
if lexer^.start.iterator^ = '(' then
token^.kind := lexerKindLeftParen
end;
if lexer^.start^ = '-' then
if lexer^.start.iterator^ = '-' then
token^.kind := lexerKindMinus
end;
if lexer^.start^ = '.' then
if lexer^.start.iterator^ = '.' then
token^.kind := lexerKindDot
end
end;
@ -275,34 +280,39 @@ end;
(* An action for tokens containing multiple characters. *)
proc transition_action_composite(lexer: PLexer, token: PLexerToken);
begin
if lexer^.start^ = '<' then
if lexer^.current^ = '>' then
if lexer^.start.iterator^ = '<' then
if lexer^.current.iterator^ = '>' then
token^.kind := lexerKindNotEqual
end;
if lexer^.current^ = '=' then
if lexer^.current.iterator^ = '=' then
token^.kind := lexerKindLessEqual
end
end;
if (lexer^.start^ = '>') & (lexer^.current^ = '=') then
if (lexer^.start.iterator^ = '>') & (lexer^.current.iterator^ = '=') then
token^.kind := lexerKindGreaterEqual
end;
if (lexer^.start^ = '.') & (lexer^.current^ = '.') then
if (lexer^.start.iterator^ = '.') & (lexer^.current.iterator^ = '.') then
token^.kind := lexerKindRange
end;
if (lexer^.start^ = ':') & (lexer^.current^ = '=') then
if (lexer^.start.iterator^ = ':') & (lexer^.current.iterator^ = '=') then
token^.kind := lexerKindAssignment
end;
if (lexer^.start^ = '-') & (lexer^.current^ = '>') then
if (lexer^.start.iterator^ = '-') & (lexer^.current.iterator^ = '>') then
token^.kind := lexerKindArrow
end;
INC(lexer^.current)
increment(ADR(lexer^.current))
end;
(* Skip a space. *)
proc transition_action_skip(lexer: PLexer, token: PLexerToken);
begin
INC(lexer^.current);
INC(lexer^.start)
increment(ADR(lexer^.start));
if ORD(lexer^.start.iterator^) = 10 then
INC(lexer^.start.location.line);
lexer^.start.location.column := 1
end;
lexer^.current := lexer^.start
end;
(* Delimited string action. *)
@ -310,30 +320,30 @@ proc transition_action_delimited(lexer: PLexer, token: PLexerToken);
var
text_length: CARDINAL;
begin
if lexer^.start^ = '(' then
if lexer^.start.iterator^ = '(' then
token^.kind := lexerKindComment
end;
if lexer^.start^ = '"' then
text_length := lexer^.current;
DEC(text_length, lexer^.start);
if lexer^.start.iterator^ = '"' then
text_length := lexer^.current.iterator;
DEC(text_length, lexer^.start.iterator);
INC(text_length);
MemZero(ADR(token^.stringKind), TSIZE(ShortString));
MemCopy(lexer^.start, text_length, ADR(token^.stringKind));
MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind));
token^.kind := lexerKindCharacter
end;
if lexer^.start^ = "'" then
text_length := lexer^.current;
DEC(text_length, lexer^.start);
if lexer^.start.iterator^ = "'" then
text_length := lexer^.current.iterator;
DEC(text_length, lexer^.start.iterator);
INC(text_length);
MemZero(ADR(token^.stringKind), TSIZE(ShortString));
MemCopy(lexer^.start, text_length, ADR(token^.stringKind));
MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind));
token^.kind := lexerKindString
end;
INC(lexer^.current)
increment(ADR(lexer^.current))
end;
(* Finalize keyword or identifier. *)
@ -341,102 +351,102 @@ proc transition_action_key_id(lexer: PLexer, token: PLexerToken);
begin
token^.kind := lexerKindIdentifier;
token^.identifierKind[1] := lexer^.current;
DEC(token^.identifierKind[1], lexer^.start);
MemCopy(lexer^.start, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
token^.identifierKind[1] := lexer^.current.iterator;
DEC(token^.identifierKind[1], lexer^.start.iterator);
MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
if compare_keyword('PROGRAM', lexer^.start, lexer^.current) then
if compare_keyword('PROGRAM', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindProgram
end;
if compare_keyword('IMPORT', lexer^.start, lexer^.current) then
if compare_keyword('IMPORT', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindImport
end;
if compare_keyword('CONST', lexer^.start, lexer^.current) then
if compare_keyword('CONST', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindConst
end;
if compare_keyword('VAR', lexer^.start, lexer^.current) then
if compare_keyword('VAR', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindVar
end;
if compare_keyword('IF', lexer^.start, lexer^.current) then
if compare_keyword('IF', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindIf
end;
if compare_keyword('THEN', lexer^.start, lexer^.current) then
if compare_keyword('THEN', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindThen
end;
if compare_keyword('ELSIF', lexer^.start, lexer^.current) then
if compare_keyword('ELSIF', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindElsif
end;
if compare_keyword('ELSE', lexer^.start, lexer^.current) then
if compare_keyword('ELSE', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindElse
end;
if compare_keyword('WHILE', lexer^.start, lexer^.current) then
if compare_keyword('WHILE', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindWhile
end;
if compare_keyword('DO', lexer^.start, lexer^.current) then
if compare_keyword('DO', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindDo
end;
if compare_keyword('proc', lexer^.start, lexer^.current) then
if compare_keyword('proc', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindProc
end;
if compare_keyword('BEGIN', lexer^.start, lexer^.current) then
if compare_keyword('BEGIN', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBegin
end;
if compare_keyword('END', lexer^.start, lexer^.current) then
if compare_keyword('END', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindEnd
end;
if compare_keyword('TYPE', lexer^.start, lexer^.current) then
if compare_keyword('TYPE', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindType
end;
if compare_keyword('RECORD', lexer^.start, lexer^.current) then
if compare_keyword('RECORD', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindRecord
end;
if compare_keyword('UNION', lexer^.start, lexer^.current) then
if compare_keyword('UNION', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindUnion
end;
if compare_keyword('NIL', lexer^.start, lexer^.current) then
if compare_keyword('NIL', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindNull
end;
if compare_keyword('AND', lexer^.start, lexer^.current) then
if compare_keyword('AND', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindAnd
end;
if compare_keyword('OR', lexer^.start, lexer^.current) then
if compare_keyword('OR', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindOr
end;
if compare_keyword('RETURN', lexer^.start, lexer^.current) then
if compare_keyword('RETURN', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindReturn
end;
if compare_keyword('DEFINITION', lexer^.start, lexer^.current) then
if compare_keyword('DEFINITION', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindDefinition
end;
if compare_keyword('TO', lexer^.start, lexer^.current) then
if compare_keyword('TO', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindTo
end;
if compare_keyword('CASE', lexer^.start, lexer^.current) then
if compare_keyword('CASE', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindCase
end;
if compare_keyword('OF', lexer^.start, lexer^.current) then
if compare_keyword('OF', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindOf
end;
if compare_keyword('FROM', lexer^.start, lexer^.current) then
if compare_keyword('FROM', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindFrom
end;
if compare_keyword('MODULE', lexer^.start, lexer^.current) then
if compare_keyword('MODULE', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindModule
end;
if compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current) then
if compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindImplementation
end;
if compare_keyword('POINTER', lexer^.start, lexer^.current) then
if compare_keyword('POINTER', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindPointer
end;
if compare_keyword('ARRAY', lexer^.start, lexer^.current) then
if compare_keyword('ARRAY', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindArray
end;
if compare_keyword('TRUE', lexer^.start, lexer^.current) then
if compare_keyword('TRUE', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBoolean;
token^.booleanKind := true
end;
if compare_keyword('FALSE', lexer^.start, lexer^.current) then
if compare_keyword('FALSE', lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBoolean;
token^.booleanKind := false
end
@ -446,52 +456,52 @@ end;
* followed by other characters forming a composite token. *)
proc transition_action_single(lexer: PLexer, token: PLexerToken);
begin
if lexer^.current^ = '&' then
if lexer^.current.iterator^ = '&' then
token^.kind := lexerKindAnd
end;
if lexer^.current^ = ';' then
if lexer^.current.iterator^ = ';' then
token^.kind := lexerKindSemicolon
end;
if lexer^.current^ = ',' then
if lexer^.current.iterator^ = ',' then
token^.kind := lexerKindComma
end;
if lexer^.current^ = '~' then
if lexer^.current.iterator^ = '~' then
token^.kind := lexerKindTilde
end;
if lexer^.current^ = ')' then
if lexer^.current.iterator^ = ')' then
token^.kind := lexerKindRightParen
end;
if lexer^.current^ = '[' then
if lexer^.current.iterator^ = '[' then
token^.kind := lexerKindLeftSquare
end;
if lexer^.current^ = ']' then
if lexer^.current.iterator^ = ']' then
token^.kind := lexerKindRightSquare
end;
if lexer^.current^ = '^' then
if lexer^.current.iterator^ = '^' then
token^.kind := lexerKindHat
end;
if lexer^.current^ = '=' then
if lexer^.current.iterator^ = '=' then
token^.kind := lexerKindEqual
end;
if lexer^.current^ = '+' then
if lexer^.current.iterator^ = '+' then
token^.kind := lexerKindPlus
end;
if lexer^.current^ = '*' then
if lexer^.current.iterator^ = '*' then
token^.kind := lexerKindAsterisk
end;
if lexer^.current^ = '/' then
if lexer^.current.iterator^ = '/' then
token^.kind := lexerKindDivision
end;
if lexer^.current^ = '%' then
if lexer^.current.iterator^ = '%' then
token^.kind := lexerKindRemainder
end;
if lexer^.current^ = '@' then
if lexer^.current.iterator^ = '@' then
token^.kind := lexerKindAt
end;
if lexer^.current^ = '|' then
if lexer^.current.iterator^ = '|' then
token^.kind := lexerKindPipe
end;
INC(lexer^.current)
increment(ADR(lexer^.current.iterator))
end;
(* Handle an integer literal. *)
@ -503,21 +513,21 @@ var
begin
token^.kind := lexerKindInteger;
integer_length := lexer^.current;
DEC(integer_length, lexer^.start);
integer_length := lexer^.current.iterator;
DEC(integer_length, lexer^.start.iterator);
MemZero(ADR(token^.identifierKind), TSIZE(Identifier));
MemCopy(lexer^.start, integer_length, ADR(token^.identifierKind[1]));
MemCopy(lexer^.start.iterator, integer_length, ADR(token^.identifierKind[1]));
buffer := InitStringCharStar(ADR(token^.identifierKind[1]));
token^.integerKind := StringToInteger(buffer, 10, found);
buffer := KillString(buffer)
end;
proc set_default_transition(current_state: TransitionState, DefaultAction: TransitionAction, next_state: TransitionState);
proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState);
var
default_transition: Transition;
begin
default_transition.action := DefaultAction;
default_transition.action := default_action;
default_transition.next_state := next_state;
transitions[ORD(current_state) + 1][ORD(transitionClassInvalid) + 1] := default_transition;
@ -821,7 +831,7 @@ begin
current_state := transitionStateStart;
while current_state <> transitionStateEnd DO
index1 := ORD(lexer^.current^);
index1 := ORD(lexer^.current.iterator^);
INC(index1);
current_class := classification[index1];
@ -836,6 +846,9 @@ begin
end;
current_state := current_transition.next_state
end;
result.start_location := lexer^.start.location;
result.end_location := lexer^.current.location;
return result
end;
@ -845,7 +858,9 @@ var
begin
if lexer^.length = 0 then
lexer^.length := ReadNBytes(lexer^.input, CHUNK_SIZE, lexer^.buffer);
lexer^.current := lexer^.buffer
lexer^.current.location.column := 1;
lexer^.current.location.line := 1;
lexer^.current.iterator := lexer^.buffer
end;
lexer^.start := lexer^.current;

View File

@ -4,6 +4,11 @@ FROM Common IMPORT Identifier, PIdentifier, ShortString;
FROM Lexer IMPORT PLexer;
TYPE
Parser = RECORD
lexer: PLexer
END;
PParser = POINTER TO Parser;
AstLiteralKind = (
astLiteralKindInteger,
astLiteralKindString,

View File

@ -9,7 +9,7 @@ from Storage import ALLOCATE, REALLOCATE;
from Lexer import Lexer, LexerKind, LexerToken, lexer_current, lexer_lex;
(* Calls lexer_lex() but skips the comments. *)
proc transpiler_lex(lexer: PLexer) -> LexerToken;
proc parser_lex(lexer: PLexer) -> LexerToken;
var
result: LexerToken;
begin
@ -30,7 +30,7 @@ var
current_field: PAstFieldDeclaration;
begin
ALLOCATE(field_declarations, TSIZE(AstFieldDeclaration));
token := transpiler_lex(lexer);
token := parser_lex(lexer);
field_count := 0;
while token.kind <> lexerKindEnd do
@ -41,16 +41,16 @@ begin
current_field := field_declarations;
INC(current_field , TSIZE(AstFieldDeclaration) * (field_count - 1));
token := transpiler_lex(lexer);
token := parser_lex(lexer);
current_field^.field_name := token.identifierKind;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
current_field^.field_type := parse_type_expression(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
if token.kind = lexerKindSemicolon then
token := transpiler_lex(lexer)
token := parser_lex(lexer)
end
end;
INC(current_field, TSIZE(AstFieldDeclaration));
@ -81,7 +81,7 @@ begin
token := lexer_current(lexer);
if token.kind = lexerKindPointer then
token := transpiler_lex(lexer)
token := parser_lex(lexer)
end;
token := lexer_current(lexer);
result^.target := parse_type_expression(lexer);
@ -102,16 +102,16 @@ begin
token := lexer_current(lexer);
if token.kind = lexerKindArray then
token := transpiler_lex(lexer)
token := parser_lex(lexer)
end;
if token.kind <> lexerKindOf then
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.length := token.integerKind;
token := transpiler_lex(lexer)
token := parser_lex(lexer)
end;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.base := parse_type_expression(lexer);
return result
@ -129,14 +129,14 @@ begin
case_count := 1;
ALLOCATE(result^.cases, TSIZE(Identifier) * 2);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
current_case := result^.cases;
current_case^ := token.identifierKind;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
while token.kind = lexerKindComma do
token := transpiler_lex(lexer);
token := parser_lex(lexer);
INC(case_count);
INC(case_count);
@ -146,7 +146,7 @@ begin
INC(current_case, TSIZE(Identifier) * (case_count - 1));
current_case^ := token.identifierKind;
token := transpiler_lex(lexer)
token := parser_lex(lexer)
end;
INC(current_case, TSIZE(Identifier));
MemZero(current_case, TSIZE(Identifier));
@ -181,8 +181,8 @@ begin
ALLOCATE(result^.parameters, 1);
token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
token := parser_lex(lexer);
while token.kind <> lexerKindRightParen do
INC(parameter_count);
@ -194,9 +194,9 @@ begin
current_parameter^ := parse_type_expression(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
if token.kind = lexerKindComma then
token := transpiler_lex(lexer)
token := parser_lex(lexer)
end
end;
current_parameter := result^.parameters;
@ -245,11 +245,11 @@ begin
NEW(result);
result^.identifier := token.identifierKind;
token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
token := parser_lex(lexer);
result^.type_expression := parse_type_expression(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
return result
end;
@ -268,7 +268,7 @@ begin
declaration_count := 0;
if token.kind = lexerKindType then
token := transpiler_lex(lexer);
token := parser_lex(lexer);
while token.kind = lexerKindIdentifier do
INC(declaration_count);
@ -278,7 +278,7 @@ begin
INC(current_declaration, TSIZE(PAstTypedDeclaration) * (declaration_count - 1));
current_declaration^ := parse_type_declaration(lexer);
token := transpiler_lex(lexer)
token := parser_lex(lexer)
end
end;
if declaration_count <> 0 then
@ -299,12 +299,12 @@ begin
token := lexer_current(lexer);
result^.variable_name := token.identifierKind;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.variable_type := parse_type_expression(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
return result
end;
@ -322,7 +322,7 @@ begin
declaration_count := 0;
if token.kind = lexerKindVar then
token := transpiler_lex(lexer);
token := parser_lex(lexer);
while token.kind = lexerKindIdentifier do
INC(declaration_count);
@ -332,7 +332,7 @@ begin
INC(current_declaration, TSIZE(PAstVariableDeclaration) * (declaration_count - 1));
current_declaration^ := parse_variable_declaration(lexer);
token := transpiler_lex(lexer)
token := parser_lex(lexer)
end
end;
if declaration_count <> 0 then
@ -353,12 +353,12 @@ begin
token := lexer_current(lexer);
result^.constant_name := token.identifierKind;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.constant_value := token.integerKind;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
return result
end;
@ -377,7 +377,7 @@ begin
declaration_count := 0;
if token.kind = lexerKindConst then
token := transpiler_lex(lexer);
token := parser_lex(lexer);
while token.kind = lexerKindIdentifier do
INC(declaration_count);
@ -387,7 +387,7 @@ begin
INC(current_declaration, TSIZE(PAstConstantDeclaration) * (declaration_count - 1));
current_declaration^ := parse_constant_declaration(lexer);
token := transpiler_lex(lexer)
token := parser_lex(lexer)
end
end;
if declaration_count <> 0 then
@ -408,20 +408,20 @@ begin
NEW(result);
symbol_count := 1;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.package := token.identifierKind;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
ALLOCATE(result^.symbols, TSIZE(Identifier) * 2);
current_symbol := result^.symbols;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
current_symbol^ := token.identifierKind;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
while token.kind <> lexerKindSemicolon do
token := transpiler_lex(lexer);
token := parser_lex(lexer);
INC(symbol_count);
REALLOCATE(result^.symbols, TSIZE(Identifier) * (symbol_count + 1));
@ -429,12 +429,12 @@ begin
INC(current_symbol, TSIZE(Identifier) * (symbol_count - 1));
current_symbol^ := token.identifierKind;
token := transpiler_lex(lexer)
token := parser_lex(lexer)
end;
INC(current_symbol, TSIZE(Identifier));
MemZero(current_symbol, TSIZE(Identifier));
token := transpiler_lex(lexer);
token := parser_lex(lexer);
return result
end;
@ -501,7 +501,7 @@ begin
literal^.boolean := token.booleanKind
end;
if literal <> nil then
token := transpiler_lex(lexer)
token := parser_lex(lexer)
end;
return literal
@ -526,7 +526,7 @@ begin
end;
if (result = nil) & (next_token.kind = lexerKindMinus) then
NEW(result);
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
result^.kind := astExpressionKindUnary;
result^.unary_operator := astUnaryOperatorMinus;
@ -534,17 +534,17 @@ begin
end;
if (result = nil) & (next_token.kind = lexerKindTilde) then
NEW(result);
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
result^.kind := astExpressionKindUnary;
result^.unary_operator := astUnaryOperatorNot;
result^.unary_operand := parse_factor(lexer)
end;
if (result = nil) & (next_token.kind = lexerKindLeftParen) then
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
result := parse_expression(lexer);
if result <> nil then
next_token := transpiler_lex(lexer)
next_token := parser_lex(lexer)
end
end;
if (result = nil) & (next_token.kind = lexerKindIdentifier) then
@ -553,7 +553,7 @@ begin
result^.kind := astExpressionKindIdentifier;
result^.identifier := next_token.identifierKind;
next_token := transpiler_lex(lexer)
next_token := parser_lex(lexer)
end;
return result
@ -581,34 +581,34 @@ begin
designator^.kind := astExpressionKindDereference;
designator^.reference := inner_expression;
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
handled := true
end;
if ~handled & (next_token.kind = lexerKindLeftSquare) then
NEW(designator);
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
designator^.kind := astExpressionKindArrayAccess;
designator^.array := inner_expression;
designator^.index := parse_expression(lexer);
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
handled := true
end;
if ~handled & (next_token.kind = lexerKindDot) then
NEW(designator);
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
designator^.kind := astExpressionKindFieldAccess;
designator^.aggregate := inner_expression;
designator^.field := next_token.identifierKind;
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
handled := true
end;
if ~handled & (next_token.kind = lexerKindLeftParen) then
NEW(designator);
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
designator^.kind := astExpressionKindCall;
designator^.callable := inner_expression;
@ -623,7 +623,7 @@ begin
next_token := lexer_current(lexer);
while next_token.kind = lexerKindComma do
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
designator^.argument_count := designator^.argument_count + 1;
REALLOCATE(designator^.arguments, TSIZE(PAstExpression) * designator^.argument_count);
@ -635,7 +635,7 @@ begin
end
end;
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
handled := true
end
end;
@ -649,7 +649,7 @@ var
result: PAstExpression;
right: PAstExpression;
begin
next_token := transpiler_lex(lexer);
next_token := parser_lex(lexer);
right := parse_designator(lexer);
result := nil;
@ -725,7 +725,7 @@ begin
NEW(result);
result^.kind := astStatementKindReturn;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.returned := parse_expression(lexer);
return result
@ -740,7 +740,7 @@ begin
result^.kind := astStatementKindAssignment;
result^.assignee := assignee;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.assignment := parse_expression(lexer);
return result
@ -792,7 +792,7 @@ var
designator: PAstExpression;
begin
statement := nil;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
if token.kind = lexerKindIf then
statement := parse_if_statement(lexer)
@ -825,11 +825,11 @@ begin
NEW(result);
result^.kind := astStatementKindIf;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.if_condition := parse_expression(lexer);
result^.if_branch := parse_compound_statement(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
return result
end;
@ -841,11 +841,11 @@ begin
NEW(result);
result^.kind := astStatementKindWhile;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.while_condition := parse_expression(lexer);
result^.while_body := parse_compound_statement(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
return result
end;
@ -874,15 +874,15 @@ var
begin
NEW(declaration);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
declaration^.name := token.identifierKind;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
declaration^.parameters := nil;
declaration^.parameter_count := 0;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
while token.kind <> lexerKindRightParen do
parameter_index := declaration^.parameter_count;
INC(declaration^.parameter_count);
@ -893,26 +893,26 @@ begin
current_parameter^.identifier := token.identifierKind;
token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
token := parser_lex(lexer);
current_parameter^.type_expression := parse_type_expression(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
if token.kind = lexerKindComma then
token := transpiler_lex(lexer)
token := parser_lex(lexer)
end
end;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
declaration^.return_type := nil;
(* Check for the return type and write it. *)
if token.kind = lexerKindArrow then
token := transpiler_lex(lexer);
token := parser_lex(lexer);
declaration^.return_type := parse_type_expression(lexer);
token := transpiler_lex(lexer)
token := parser_lex(lexer)
end;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
return declaration
end;
@ -928,8 +928,8 @@ begin
declaration^.variables := parse_variable_part(lexer);
declaration^.statements := parse_statement_part(lexer);
token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
token := parser_lex(lexer);
return declaration
end;
@ -971,16 +971,16 @@ var
result: PAstModule;
begin
NEW(result);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.main := true;
if token.kind = lexerKindModule then
result^.main := false
end;
token := transpiler_lex(lexer);
token := parser_lex(lexer);
(* Write the module body. *)
token := transpiler_lex(lexer);
token := parser_lex(lexer);
result^.imports := parse_import_part(lexer);
result^.constants := parse_constant_part(lexer);
@ -990,8 +990,8 @@ begin
result^.procedures := parse_procedure_part(lexer);
result^.statements := parse_statement_part(lexer);
token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
token := parser_lex(lexer);
token := parser_lex(lexer);
return result
end;

View File

@ -9,7 +9,8 @@ FROM Parser IMPORT PAstModule;
TYPE
TranspilerContext = RECORD
input_name: ShortString;
output: File
output: File;
indentation: CARDINAL
END;
PTranspilerContext = POINTER TO TranspilerContext;

View File

@ -1,33 +1,28 @@
module;
from FIO import WriteNBytes, WriteLine, WriteChar, WriteString;
from SYSTEM import ADR, ADDRESS, TSIZE;
from SYSTEM import ADR, TSIZE;
from NumberIO import IntToStr;
from Storage import ALLOCATE, REALLOCATE;
from MemUtils import MemCopy, MemZero;
from Common import Identifier, PIdentifier, ShortString;
from Lexer import Lexer, LexerToken, lexer_current, lexer_lex, LexerKind;
from Parser import AstTypeExpressionKind, AstExpressionKind, AstLiteralKind, AstUnaryOperator, AstBinaryOperator,
AstModule, PAstModule, AstExpression, PPAstExpression, PAstExpression, PAstLiteral, PPAstProcedureDeclaration,
PAstModule, PPAstExpression, PAstExpression, PAstLiteral, PPAstProcedureDeclaration,
PAstConstantDeclaration, PPAstConstantDeclaration, PPAstStatement, PAstStatement, AstStatementKind,
AstTypedDeclaration, PAstTypedDeclaration, PPAstTypedDeclaration, AstCompoundStatement, PAstProcedureDeclaration,
PAstVariableDeclaration, PPAstVariableDeclaration, PAstImportStatement, PPAstImportStatement,
PAstTypeExpression, PPAstTypeExpression, AstFieldDeclaration, PAstFieldDeclaration;
(* Calls lexer_lex() but skips the comments. *)
proc transpiler_lex(lexer: PLexer) -> LexerToken;
proc indent(context: PTranspilerContext);
var
result: LexerToken;
count: CARDINAL;
begin
result := lexer_lex(lexer);
count := 0;
while result.kind = lexerKindComment do
result := lexer_lex(lexer)
end;
return result
while count < context^.indentation do
WriteString(context^.output, ' ');
INC(count)
end
end;
(* Write a semicolon followed by a newline. *)
@ -37,20 +32,8 @@ begin
WriteLine(output)
end;
proc write_current(lexer: PLexer, output: File);
var
written_bytes: CARDINAL;
count: CARDINAL;
begin
count := lexer^.current;
DEC(count, lexer^.start);
written_bytes := WriteNBytes(output, count, lexer^.start)
end;
proc transpile_import_statement(context: PTranspilerContext, import_statement: PAstImportStatement);
var
token: LexerToken;
written_bytes: CARDINAL;
current_symbol: PIdentifier;
begin
@ -98,7 +81,7 @@ begin
write_semicolon(context^.output)
end;
proc transpile_constant_part(context: PTranspilerContext, declarations: PPAstConstantDeclaration);
proc transpile_constant_part(context: PTranspilerContext, declarations: PPAstConstantDeclaration, extra_newline: BOOLEAN);
var
current_declaration: PPAstConstantDeclaration;
begin
@ -112,13 +95,13 @@ begin
INC(current_declaration, TSIZE(PAstConstantDeclaration))
end;
WriteLine(context^.output)
if extra_newline then
WriteLine(context^.output)
end
end
end;
proc transpile_module(context: PTranspilerContext, result: PAstModule);
var
token: LexerToken;
begin
if result^.main = false then
WriteString(context^.output, 'IMPLEMENTATION ')
@ -134,9 +117,9 @@ begin
(* Write the module body. *)
transpile_import_part(context, result^.imports);
transpile_constant_part(context, result^.constants);
transpile_constant_part(context, result^.constants, true);
transpile_type_part(context, result^.types);
transpile_variable_part(context, result^.variables);
transpile_variable_part(context, result^.variables, true);
transpile_procedure_part(context, result^.procedures);
transpile_statement_part(context, result^.statements);
@ -179,8 +162,6 @@ begin
end;
proc transpile_pointer_type(context: PTranspilerContext, type_expression: PAstTypeExpression);
var
token: LexerToken;
begin
WriteString(context^.output, 'POINTER TO ');
@ -325,7 +306,7 @@ begin
write_semicolon(context^.output)
end;
proc transpile_variable_part(context: PTranspilerContext, declarations: PPAstVariableDeclaration);
proc transpile_variable_part(context: PTranspilerContext, declarations: PPAstVariableDeclaration, extra_newline: BOOLEAN);
var
current_declaration: PPAstVariableDeclaration;
begin
@ -339,13 +320,14 @@ begin
INC(current_declaration, TSIZE(PAstVariableDeclaration))
end;
WriteLine(context^.output)
if extra_newline then
WriteLine(context^.output)
end
end
end;
proc transpile_procedure_heading(context: PTranspilerContext, declaration: PAstProcedureDeclaration);
var
token: LexerToken;
written_bytes: CARDINAL;
parameter_index: CARDINAL;
current_parameter: PAstTypedDeclaration;
@ -511,33 +493,33 @@ begin
end;
proc transpile_if_statement(context: PTranspilerContext, statement: PAstStatement);
var
token: LexerToken;
begin
if statement <> nil then
WriteString(context^.output, ' IF ');
transpile_expression(context, statement^.if_condition);
WriteString(context^.output, 'IF ');
transpile_expression(context, statement^.if_condition);
WriteString(context^.output, ' THEN');
WriteLine(context^.output);
WriteString(context^.output, ' THEN');
WriteLine(context^.output);
INC(context^.indentation);
transpile_compound_statement(context, statement^.if_branch);
WriteString(context^.output, ' END')
end
transpile_compound_statement(context, statement^.if_branch);
DEC(context^.indentation);
indent(context);
WriteString(context^.output, 'END')
end;
proc transpile_while_statement(context: PTranspilerContext, statement: PAstStatement);
var
token: LexerToken;
begin
WriteString(context^.output, ' WHILE ');
WriteString(context^.output, 'WHILE ');
transpile_expression(context, statement^.while_condition);
WriteString(context^.output, ' DO');
WriteLine(context^.output);
INC(context^.indentation);
transpile_compound_statement(context, statement^.while_body);
WriteString(context^.output, ' END')
DEC(context^.indentation);
indent(context);
WriteString(context^.output, 'END')
end;
proc transpile_assignment_statement(context: PTranspilerContext, statement: PAstStatement);
@ -549,7 +531,7 @@ end;
proc transpile_return_statement(context: PTranspilerContext, statement: PAstStatement);
begin
WriteString(context^.output, ' RETURN ');
WriteString(context^.output, 'RETURN ');
transpile_expression(context, statement^.returned)
end;
@ -577,6 +559,8 @@ end;
proc transpile_statement(context: PTranspilerContext, statement: PAstStatement);
begin
indent(context);
if statement^.kind = astStatementKindIf then
transpile_if_statement(context, statement)
end;
@ -599,7 +583,10 @@ begin
if compound.count > 0 then
WriteString(context^.output, 'BEGIN');
WriteLine(context^.output);
transpile_compound_statement(context, compound)
INC(context^.indentation);
transpile_compound_statement(context, compound);
DEC(context^.indentation)
end
end;
@ -609,8 +596,8 @@ var
begin
transpile_procedure_heading(context, declaration);
transpile_constant_part(context, declaration^.constants);
transpile_variable_part(context, declaration^.variables);
transpile_constant_part(context, declaration^.constants, false);
transpile_variable_part(context, declaration^.variables, false);
transpile_statement_part(context, declaration^.statements);
WriteString(context^.output, 'END ');
@ -662,6 +649,7 @@ var
begin
context.input_name := input_name;
context.output := output;
context.indentation := 0;
transpile_module(ADR(context), ast_module)
end;