Split the parser from the code generator

This commit is contained in:
2025-06-11 22:36:35 +02:00
parent 00e557686b
commit e3f094c8a5
7 changed files with 409 additions and 230 deletions

View File

@ -1,6 +1,6 @@
module;
from FIO import StdErr, WriteNBytes, WriteLine, WriteChar, WriteString;
from FIO import WriteNBytes, WriteLine, WriteChar, WriteString;
from SYSTEM import ADR, ADDRESS, TSIZE;
from NumberIO import IntToStr;
@ -10,13 +10,11 @@ from MemUtils import MemCopy, MemZero;
from Common import Identifier, PIdentifier, ShortString;
from Lexer import Lexer, LexerToken, lexer_current, lexer_lex, LexerKind;
from Parser import AstTypeExpressionKind, AstExpressionKind, AstLiteralKind, AstUnaryOperator, AstBinaryOperator,
AstModule, PAstModule, AstExpression, PPAstExpression, PAstExpression, PAstLiteral,
PAstConstantDeclaration, PPAstConstantDeclaration, PAstStatement, AstStatementKind,
AstTypeDeclaration, PAstTypeDeclaration, PPAstTypeDeclaration,
AstModule, PAstModule, AstExpression, PPAstExpression, PAstExpression, PAstLiteral, PPAstProcedureDeclaration,
PAstConstantDeclaration, PPAstConstantDeclaration, PPAstStatement, PAstStatement, AstStatementKind,
AstTypedDeclaration, PAstTypedDeclaration, PPAstTypedDeclaration, AstCompoundStatement, PAstProcedureDeclaration,
PAstVariableDeclaration, PPAstVariableDeclaration, PAstImportStatement, PPAstImportStatement,
PAstTypeExpression, PPAstTypeExpression, AstFieldDeclaration, PAstFieldDeclaration,
parse_type_expression, parse_variable_part, parse_type_part, parse_constant_part, parse_import_part,
parse_designator, parse_expression, parse_return_statement, parse_assignment_statement, parse_call_statement;
PAstTypeExpression, PPAstTypeExpression, AstFieldDeclaration, PAstFieldDeclaration;
(* Calls lexer_lex() but skips the comments. *)
proc transpiler_lex(lexer: PLexer) -> LexerToken;
@ -118,15 +116,11 @@ begin
end
end;
proc transpile_module(context: PTranspilerContext) -> PAstModule;
proc transpile_module(context: PTranspilerContext, result: PAstModule);
var
token: LexerToken;
result: PAstModule;
begin
NEW(result);
token := transpiler_lex(context^.lexer);
if token.kind = lexerKindModule then
if result^.main = false then
WriteString(context^.output, 'IMPLEMENTATION ')
end;
WriteString(context^.output, 'MODULE ');
@ -134,37 +128,23 @@ begin
(* Write the module name and end the line with a semicolon and newline. *)
transpile_module_name(context);
token := transpiler_lex(context^.lexer);
write_semicolon(context^.output);
WriteLine(context^.output);
(* Write the module body. *)
token := transpiler_lex(context^.lexer);
result^.imports := parse_import_part(context^.lexer);
transpile_import_part(context, result^.imports);
result^.constants := parse_constant_part(context^.lexer);
transpile_constant_part(context, result^.constants);
result^.types := parse_type_part(context^.lexer);
transpile_type_part(context, result^.types);
result^.variables := parse_variable_part(context^.lexer);
transpile_variable_part(context, result^.variables);
transpile_procedure_part(context);
transpile_statement_part(context);
transpile_procedure_part(context, result^.procedures);
transpile_statement_part(context, result^.statements);
WriteString(context^.output, 'END ');
transpile_module_name(context);
token := transpiler_lex(context^.lexer);
WriteChar(context^.output, '.');
token := transpiler_lex(context^.lexer);
WriteLine(context^.output);
return result
WriteLine(context^.output)
end;
proc transpile_type_fields(context: PTranspilerContext, fields: PAstFieldDeclaration);
@ -217,7 +197,7 @@ begin
WriteString(context^.output, '[1..');
IntToStr(type_expression^.length, 0, buffer);
WriteString(context^.output, buffer);
WriteString(context^.output, buffer);
WriteChar(context^.output, ']')
end;
@ -270,7 +250,7 @@ begin
while current_parameter^ <> nil do
transpile_type_expression(context, current_parameter^);
INC(current_parameter, TSIZE(PAstTypeExpression));
INC(current_parameter, TSIZE(PAstTypeExpression));
if current_parameter^ <> nil then
WriteString(context^.output, ', ')
@ -301,7 +281,7 @@ begin
end
end;
proc transpile_type_declaration(context: PTranspilerContext, declaration: PAstTypeDeclaration);
proc transpile_type_declaration(context: PTranspilerContext, declaration: PAstTypedDeclaration);
var
written_bytes: CARDINAL;
begin
@ -314,9 +294,9 @@ begin
write_semicolon(context^.output)
end;
proc transpile_type_part(context: PTranspilerContext, declarations: PPAstTypeDeclaration);
proc transpile_type_part(context: PTranspilerContext, declarations: PPAstTypedDeclaration);
var
current_declaration: PPAstTypeDeclaration;
current_declaration: PPAstTypedDeclaration;
begin
if declarations^ <> nil then
WriteString(context^.output, 'TYPE');
@ -326,7 +306,7 @@ begin
while current_declaration^ <> nil do
transpile_type_declaration(context, current_declaration^);
INC(current_declaration, TSIZE(PAstTypeDeclaration))
INC(current_declaration, TSIZE(PAstTypedDeclaration))
end;
WriteLine(context^.output)
end
@ -363,51 +343,41 @@ begin
end
end;
proc transpile_procedure_heading(context: PTranspilerContext) -> LexerToken;
proc transpile_procedure_heading(context: PTranspilerContext, declaration: PAstProcedureDeclaration);
var
token: LexerToken;
result: LexerToken;
type_expression: PAstTypeExpression;
written_bytes: CARDINAL;
parameter_index: CARDINAL;
current_parameter: PAstTypedDeclaration;
begin
WriteString(context^.output, 'PROCEDURE ');
result := transpiler_lex(context^.lexer);
write_current(context^.lexer, context^.output);
token := transpiler_lex(context^.lexer);
written_bytes := WriteNBytes(context^.output, ORD(declaration^.name[1]), ADR(declaration^.name[2]));
WriteChar(context^.output, '(');
token := transpiler_lex(context^.lexer);
while token.kind <> lexerKindRightParen do
write_current(context^.lexer, context^.output);
parameter_index := 0;
current_parameter := declaration^.parameters;
token := transpiler_lex(context^.lexer);
while parameter_index < declaration^.parameter_count do
written_bytes := WriteNBytes(context^.output, ORD(current_parameter^.identifier[1]), ADR(current_parameter^.identifier[2]));
WriteString(context^.output, ': ');
token := transpiler_lex(context^.lexer);
transpile_type_expression(context, current_parameter^.type_expression);
type_expression := parse_type_expression(context^.lexer);
transpile_type_expression(context, type_expression);
INC(parameter_index);
INC(current_parameter, TSIZE(AstTypedDeclaration));
token := transpiler_lex(context^.lexer);
if (token.kind = lexerKindSemicolon) or (token.kind = lexerKindComma) then
WriteString(context^.output, '; ');
token := transpiler_lex(context^.lexer)
if parameter_index <> declaration^.parameter_count then
WriteString(context^.output, '; ')
end
end;
WriteString(context^.output, ')');
token := transpiler_lex(context^.lexer);
(* Check for the return type and write it. *)
if token.kind = lexerKindArrow then
if declaration^.return_type <> nil then
WriteString(context^.output, ': ');
token := transpiler_lex(context^.lexer);
write_current(context^.lexer, context^.output);
token := transpiler_lex(context^.lexer)
transpile_type_expression(context, declaration^.return_type)
end;
token := transpiler_lex(context^.lexer);
write_semicolon(context^.output);
return result
write_semicolon(context^.output)
end;
proc transpile_unary_operator(context: PTranspilerContext, operator: AstUnaryOperator);
@ -470,20 +440,20 @@ begin
if literal^.kind = astLiteralKindInteger then
IntToStr(literal^.integer, 0, buffer);
WriteString(context^.output, buffer);
WriteString(context^.output, buffer)
end;
if literal^.kind = astLiteralKindString then
WriteString(context^.output, literal^.string)
end;
if literal^.kind = astLiteralKindNull then
if literal^.kind = astLiteralKindNull then
WriteString(context^.output, 'NIL')
end;
if (literal^.kind = astLiteralKindBoolean) & literal^.boolean then
WriteString(context^.output, 'TRUE')
end;
end;
if (literal^.kind = astLiteralKindBoolean) & (literal^.boolean = false) then
WriteString(context^.output, 'FALSE')
end
end
end;
if expression^.kind = astExpressionKindIdentifier then
written_bytes := WriteNBytes(context^.output, ORD(expression^.identifier[1]), ADR(expression^.identifier[2]))
@ -501,7 +471,7 @@ begin
if expression^.kind = astExpressionKindFieldAccess then
transpile_expression(context, expression^.aggregate);
WriteChar(context^.output, '.');
written_bytes := WriteNBytes(context^.output, ORD(expression^.field[1]), ADR(expression^.field[2]));
written_bytes := WriteNBytes(context^.output, ORD(expression^.field[1]), ADR(expression^.field[2]))
end;
if expression^.kind = astExpressionKindUnary then
transpile_unary_operator(context, expression^.unary_operator);
@ -540,52 +510,34 @@ begin
end
end;
proc transpile_if_statement(context: PTranspilerContext) -> PAstStatement;
proc transpile_if_statement(context: PTranspilerContext, statement: PAstStatement);
var
token: LexerToken;
result: PAstStatement;
begin
NEW(result);
result^.kind := astStatementKindIf;
WriteString(context^.output, ' IF ');
if statement <> nil then
WriteString(context^.output, ' IF ');
transpile_expression(context, statement^.if_condition);
token := transpiler_lex(context^.lexer);
result^.if_condition := parse_expression(context^.lexer);
WriteString(context^.output, ' THEN');
WriteLine(context^.output);
transpile_expression(context, result^.if_condition);
token := lexer_current(context^.lexer);
WriteString(context^.output, ' THEN');
WriteLine(context^.output);
transpile_statements(context);
WriteString(context^.output, ' END');
token := transpiler_lex(context^.lexer);
return result
transpile_compound_statement(context, statement^.if_branch);
WriteString(context^.output, ' END')
end
end;
proc transpile_while_statement(context: PTranspilerContext) -> PAstStatement;
proc transpile_while_statement(context: PTranspilerContext, statement: PAstStatement);
var
token: LexerToken;
result: PAstStatement;
begin
NEW(result);
result^.kind := astStatementKindWhile;
WriteString(context^.output, ' WHILE ');
token := transpiler_lex(context^.lexer);
result^.while_condition := parse_expression(context^.lexer);
transpile_expression(context, result^.while_condition);
token := lexer_current(context^.lexer);
transpile_expression(context, statement^.while_condition);
WriteString(context^.output, ' DO');
WriteLine(context^.output);
transpile_statements(context);
WriteString(context^.output, ' END');
token := transpiler_lex(context^.lexer);
return result
transpile_compound_statement(context, statement^.while_body);
WriteString(context^.output, ' END')
end;
proc transpile_assignment_statement(context: PTranspilerContext, statement: PAstStatement);
@ -599,108 +551,81 @@ proc transpile_return_statement(context: PTranspilerContext, statement: PAstStat
begin
WriteString(context^.output, ' RETURN ');
transpile_expression(context, statement^.returned);
transpile_expression(context, statement^.returned)
end;
proc transpile_statement(context: PTranspilerContext);
proc transpile_compound_statement(context: PTranspilerContext, statement: AstCompoundStatement);
var
token: LexerToken;
written_bytes: CARDINAL;
statement: PAstStatement;
designator: PAstExpression;
current_statement: PPAstStatement;
index: CARDINAL;
begin
token := transpiler_lex(context^.lexer);
index := 0;
current_statement := statement.statements;
if token.kind = lexerKindIf then
statement := transpile_if_statement(context)
end;
if token.kind = lexerKindWhile then
statement := transpile_while_statement(context)
end;
if token.kind = lexerKindReturn then
statement := parse_return_statement(context^.lexer);
transpile_return_statement(context, statement)
end;
if token.kind = lexerKindIdentifier then
designator := parse_designator(context^.lexer);
token := lexer_current(context^.lexer);
while index < statement.count do
transpile_statement(context, current_statement^);
if token.kind = lexerKindAssignment then
statement := parse_assignment_statement(context^.lexer, designator);
transpile_assignment_statement(context, statement)
end;
if token.kind <> lexerKindAssignment then
statement := parse_call_statement(context^.lexer, designator);
transpile_expression(context, designator);
INC(current_statement, TSIZE(PAstStatement));
INC(index);
written_bytes := WriteNBytes(StdErr, 5, context^.lexer^.start);
WriteLine(StdErr);
end
end
end;
proc transpile_statements(context: PTranspilerContext);
var
token: LexerToken;
begin
token := lexer_current(context^.lexer);
while token.kind <> lexerKindEnd do
transpile_statement(context);
token := lexer_current(context^.lexer);
if token.kind = lexerKindSemicolon then
if index <> statement.count then
WriteChar(context^.output, ';')
end;
WriteLine(context^.output)
end
end;
proc transpile_statement_part(context: PTranspilerContext);
var
token: LexerToken;
proc transpile_statement(context: PTranspilerContext, statement: PAstStatement);
begin
token := lexer_current(context^.lexer);
if token.kind = lexerKindBegin then
WriteString(context^.output, 'BEGIN');
WriteLine(context^.output);
transpile_statements(context)
if statement^.kind = astStatementKindIf then
transpile_if_statement(context, statement)
end;
if statement^.kind = astStatementKindWhile then
transpile_while_statement(context, statement)
end;
if statement^.kind = astStatementKindReturn then
transpile_return_statement(context, statement)
end;
if statement^.kind = astStatementKindAssignment then
transpile_assignment_statement(context, statement)
end;
if statement^.kind = astStatementKindCall then
transpile_expression(context, statement^.call)
end
end;
proc transpile_procedure_declaration(context: PTranspilerContext);
var
token: LexerToken;
seen_variables: PPAstVariableDeclaration;
written_bytes: CARDINAL;
seen_constants: PPAstConstantDeclaration;
proc transpile_statement_part(context: PTranspilerContext, compound: AstCompoundStatement);
begin
token := transpile_procedure_heading(context);
seen_constants := parse_constant_part(context^.lexer);
transpile_constant_part(context, seen_constants);
seen_variables := parse_variable_part(context^.lexer);
transpile_variable_part(context, seen_variables);
transpile_statement_part(context);
WriteString(context^.output, 'END ');
written_bytes := WriteNBytes(context^.output, ORD(token.identifierKind[1]), ADR(token.identifierKind[2]));
token := transpiler_lex(context^.lexer);
write_semicolon(context^.output);
token := transpiler_lex(context^.lexer)
if compound.count > 0 then
WriteString(context^.output, 'BEGIN');
WriteLine(context^.output);
transpile_compound_statement(context, compound)
end
end;
proc transpile_procedure_part(context: PTranspilerContext);
proc transpile_procedure_declaration(context: PTranspilerContext, declaration: PAstProcedureDeclaration);
var
token: LexerToken;
written_bytes: CARDINAL;
begin
token := lexer_current(context^.lexer);
transpile_procedure_heading(context, declaration);
while token.kind = lexerKindProc do
transpile_procedure_declaration(context);
token := lexer_current(context^.lexer);
WriteLine(context^.output)
transpile_constant_part(context, declaration^.constants);
transpile_variable_part(context, declaration^.variables);
transpile_statement_part(context, declaration^.statements);
WriteString(context^.output, 'END ');
written_bytes := WriteNBytes(context^.output, ORD(declaration^.name[1]), ADR(declaration^.name[2]));
write_semicolon(context^.output)
end;
proc transpile_procedure_part(context: PTranspilerContext, declaration: PPAstProcedureDeclaration);
begin
while declaration^ <> nil do
transpile_procedure_declaration(context, declaration^);
WriteLine(context^.output);
INC(declaration, TSIZE(PAstProcedureDeclaration))
end
end;
@ -716,7 +641,7 @@ begin
if context^.input_name[counter] = '/' then
last_slash := counter
end;
INC(counter)
INC(counter)
end;
if last_slash = 0 then
@ -727,21 +652,18 @@ begin
end;
while (context^.input_name[counter] <> '.') & (ORD(context^.input_name[counter]) <> 0) do
WriteChar(context^.output, context^.input_name[counter]);
INC(counter)
end;
INC(counter)
end
end;
proc transpile(lexer: PLexer, output: File, input_name: ShortString);
proc transpile(ast_module: PAstModule, output: File, input_name: ShortString);
var
token: LexerToken;
context: TranspilerContext;
ast_module: PAstModule;
begin
context.input_name := input_name;
context.output := output;
context.lexer := lexer;
ast_module := transpile_module(ADR(context))
transpile_module(ADR(context), ast_module)
end;
end.