From c9c9b217a292236f8b537ee02167d0e918d46fb0 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Fri, 6 Jun 2025 18:25:29 +0200 Subject: [PATCH] Parse and transpile unary operations --- source/CommandLineInterface.mod | 2 +- source/Lexer.def | 7 +- source/Lexer.mod | 10 ++- source/Parser.def | 49 ++++++++++- source/Parser.mod | 146 +++++++++++++++++++++++++++++--- source/Transpiler.mod | 106 ++++++++++++++++++++--- 6 files changed, 291 insertions(+), 29 deletions(-) diff --git a/source/CommandLineInterface.mod b/source/CommandLineInterface.mod index f22cf2b..10eac9b 100644 --- a/source/CommandLineInterface.mod +++ b/source/CommandLineInterface.mod @@ -18,7 +18,7 @@ VAR parsed: BOOLEAN; BEGIN i := 1; - ALLOCATE(result, TSIZE(CommandLine)); + NEW(result); result^.lex := FALSE; result^.parse := FALSE; MemZero(ADR(result^.input), 256); diff --git a/source/Lexer.def b/source/Lexer.def index 7dcf06b..ce6fd01 100644 --- a/source/Lexer.def +++ b/source/Lexer.def @@ -2,7 +2,7 @@ DEFINITION MODULE Lexer; FROM FIO IMPORT File; -FROM Common IMPORT Identifier; +FROM Common IMPORT Identifier, ShortString; TYPE PLexerBuffer = POINTER TO CHAR; @@ -41,7 +41,7 @@ TYPE lexerKindNull, lexerKindAnd, lexerKindOr, - lexerKindNot, + lexerKindTilde, lexerKindReturn, lexerKindDefinition, lexerKindRange, @@ -84,7 +84,8 @@ TYPE CASE kind: LexerKind OF lexerKindBoolean: booleanKind: BOOLEAN | lexerKindIdentifier: identifierKind: Identifier | - lexerKindInteger: integerKind: INTEGER + lexerKindInteger: integerKind: INTEGER | + lexerKindString: stringKind: ShortString END END; PLexerToken = POINTER TO LexerToken; diff --git a/source/Lexer.mod b/source/Lexer.mod index 99fcc4e..4d8ca20 100644 --- a/source/Lexer.mod +++ b/source/Lexer.mod @@ -294,6 +294,8 @@ BEGIN END transition_action_skip; (* Delimited string action. *) PROCEDURE transition_action_delimited(lexer: PLexer; token: PLexerToken); +VAR + text_length: CARDINAL; BEGIN IF lexer^.Start^ = '(' THEN token^.kind := lexerKindComment @@ -302,6 +304,10 @@ BEGIN token^.kind := lexerKindCharacter END; IF lexer^.Start^ = "'" THEN + text_length := lexer^.Current - lexer^.Start; + MemZero(ADR(token^.stringKind), TSIZE(ShortString)); + MemCopy(lexer^.Start, text_length, ADR(token^.stringKind)); + token^.kind := lexerKindString END; INC(lexer^.Current) @@ -423,8 +429,8 @@ BEGIN IF lexer^.Current^ = ',' THEN token^.kind := lexerKindComma END; - IF lexer^.Current^ = ',' THEN - token^.kind := lexerKindComma + IF lexer^.Current^ = '~' THEN + token^.kind := lexerKindTilde END; IF lexer^.Current^ = ')' THEN token^.kind := lexerKindRightParen diff --git a/source/Parser.def b/source/Parser.def index 21526bd..65e58cd 100644 --- a/source/Parser.def +++ b/source/Parser.def @@ -1,9 +1,55 @@ DEFINITION MODULE Parser; -FROM Common IMPORT Identifier, PIdentifier; +FROM Common IMPORT Identifier, PIdentifier, ShortString; FROM Lexer IMPORT PLexer; TYPE + AstLiteralKind = ( + astLiteralKindInteger, + astLiteralKindString, + astLiteralKindNull + ); + AstLiteral = RECORD + CASE kind: AstLiteralKind OF + astLiteralKindInteger: integer: INTEGER | + astLiteralKindString: string: ShortString | + astLiteralKindNull: + END + END; + PAstLiteral = POINTER TO AstLiteral; + + AstUnaryOperator = ( + astUnaryOperatorNot, + astUnaryOperatorMinus + ); + + AstExpressionKind = ( + astExpressionKindLiteral, + astExpressionKindIdentifier, + astExpressionKindArrayAccess, + astExpressionKindDereference, + astExpressionKindFieldAccess, + astExpressionKindUnary + ); + AstExpression = RECORD + CASE kind: AstExpressionKind OF + astExpressionKindLiteral: literal: PAstLiteral | + astExpressionKindIdentifier: identifier: Identifier | + astExpressionKindDereference: reference: PAstExpression | + astExpressionKindArrayAccess: + array: PAstExpression; + index: PAstExpression | + astExpressionKindFieldAccess: + aggregate: PAstExpression; + field: Identifier | + astExpressionKindUnary: + unary_operator: AstUnaryOperator; + unary_operand: PAstExpression + END + END; + PAstExpression = POINTER TO AstExpression; + PPAstExpression = POINTER TO PAstExpression; + AstImportStatement = RECORD package: Identifier; symbols: PIdentifier @@ -74,5 +120,6 @@ PROCEDURE parse_type_part(lexer: PLexer): PPAstTypeDeclaration; PROCEDURE parse_variable_part(lexer: PLexer): PPAstVariableDeclaration; PROCEDURE parse_constant_part(lexer: PLexer): PPAstConstantDeclaration; PROCEDURE parse_import_part(lexer: PLexer): PPAstImportStatement; +PROCEDURE parse_designator(lexer: PLexer): PAstExpression; END Parser. diff --git a/source/Parser.mod b/source/Parser.mod index 514dca1..22c5f3b 100644 --- a/source/Parser.mod +++ b/source/Parser.mod @@ -58,7 +58,7 @@ PROCEDURE parse_record_type(lexer: PLexer): PAstTypeExpression; VAR result: PAstTypeExpression; BEGIN - ALLOCATE(result, TSIZE(AstTypeExpression)); + NEW(result); result^.kind := astTypeExpressionKindRecord; result^.fields := parse_type_fields(lexer); @@ -69,7 +69,7 @@ VAR token: LexerToken; result: PAstTypeExpression; BEGIN - ALLOCATE(result, TSIZE(AstTypeExpression)); + NEW(result); result^.kind := astTypeExpressionKindPointer; token := lexer_current(lexer); @@ -88,7 +88,7 @@ VAR buffer: ARRAY[1..20] OF CHAR; result: PAstTypeExpression; BEGIN - ALLOCATE(result, TSIZE(AstTypeExpression)); + NEW(result); result^.kind := astTypeExpressionKindArray; result^.length := 0; @@ -116,7 +116,7 @@ VAR current_case: PIdentifier; case_count: CARDINAL; BEGIN - ALLOCATE(result, TSIZE(AstTypeExpression)); + NEW(result); result^.kind := astTypeExpressionKindEnumeration; case_count := 1; @@ -147,10 +147,9 @@ PROCEDURE parse_named_type(lexer: PLexer): PAstTypeExpression; VAR token: LexerToken; result: PAstTypeExpression; - written_bytes: CARDINAL; BEGIN token := lexer_current(lexer); - ALLOCATE(result, TSIZE(AstTypeExpression)); + NEW(result); result^.kind := astTypeExpressionKindNamed; result^.name := token.identifierKind; @@ -165,7 +164,7 @@ VAR parameter_count: CARDINAL; BEGIN parameter_count := 0; - ALLOCATE(result, TSIZE(AstTypeExpression)); + NEW(result); result^.kind := astTypeExpressionKindProcedure; ALLOCATE(result^.parameters, 1); @@ -227,7 +226,7 @@ VAR BEGIN token := lexer_current(lexer); - ALLOCATE(result, TSIZE(AstTypeDeclaration)); + NEW(result); result^.identifier := token.identifierKind; token := transpiler_lex(lexer); @@ -277,7 +276,7 @@ VAR token: LexerToken; result: PAstVariableDeclaration; BEGIN - ALLOCATE(result, TSIZE(AstVariableDeclaration)); + NEW(result); token := lexer_current(lexer); result^.variable_name := token.identifierKind; @@ -329,7 +328,7 @@ VAR token: LexerToken; result: PAstConstantDeclaration; BEGIN - ALLOCATE(result, TSIZE(AstConstantDeclaration)); + NEW(result); token := lexer_current(lexer); result^.constant_name := token.identifierKind; @@ -384,7 +383,7 @@ VAR symbol_count: CARDINAL; current_symbol: PIdentifier; BEGIN - ALLOCATE(result, TSIZE(AstImportStatement)); + NEW(result); symbol_count := 1; token := transpiler_lex(lexer); @@ -446,4 +445,129 @@ BEGIN RETURN result END parse_import_part; +PROCEDURE parse_literal(lexer: PLexer): PAstLiteral; +VAR + literal: PAstLiteral; + token: LexerToken; +BEGIN + literal := NIL; + token := lexer_current(lexer); + + IF token.kind = lexerKindInteger THEN + NEW(literal); + + literal^.kind := astLiteralKindInteger; + literal^.integer := token.integerKind; + END; + IF token.kind = lexerKindCharacter THEN + NEW(literal); + + literal^.kind := astLiteralKindString; + literal^.string := token.stringKind; + END; + IF token.kind = lexerKindNull THEN + NEW(literal); + + literal^.kind := astLiteralKindNull; + END; + IF literal <> NIL THEN + token := transpiler_lex(lexer) + END; + + RETURN literal +END parse_literal; +PROCEDURE parse_factor(lexer: PLexer): PAstExpression; +VAR + next_token: LexerToken; + result: PAstExpression; + literal: PAstLiteral; +BEGIN + result := NIL; + next_token := lexer_current(lexer); + + literal := parse_literal(lexer); + + IF (result = NIL) AND (literal <> NIL) THEN + NEW(result); + + result^.kind := astExpressionKindLiteral; + result^.literal := literal; + END; + IF (result = NIL) AND (next_token.kind = lexerKindMinus) THEN + NEW(result); + next_token := transpiler_lex(lexer); + + result^.kind := astExpressionKindUnary; + result^.unary_operator := astUnaryOperatorMinus; + result^.unary_operand := parse_factor(lexer) + END; + IF (result = NIL) AND (next_token.kind = lexerKindTilde) THEN + NEW(result); + next_token := transpiler_lex(lexer); + + result^.kind := astExpressionKindUnary; + result^.unary_operator := astUnaryOperatorNot; + result^.unary_operand := parse_factor(lexer) + END; + IF (result = NIL) AND (next_token.kind = lexerKindIdentifier) THEN + NEW(result); + + result^.kind := astExpressionKindIdentifier; + result^.identifier := next_token.identifierKind; + + next_token := transpiler_lex(lexer) + END; + + RETURN result +END parse_factor; +PROCEDURE parse_designator(lexer: PLexer): PAstExpression; +VAR + next_token: LexerToken; + inner_expression: PAstExpression; + designator: PAstExpression; + handled: BOOLEAN; +BEGIN + designator := parse_factor(lexer); + handled := designator <> NIL; + next_token := lexer_current(lexer); + + WHILE handled DO + inner_expression := designator; + handled := FALSE; + + IF ~handled AND (next_token.kind = lexerKindHat) THEN + NEW(designator); + + designator^.kind := astExpressionKindDereference; + designator^.reference := inner_expression; + + next_token := transpiler_lex(lexer); + handled := TRUE + END; + IF ~handled AND (next_token.kind = lexerKindLeftSquare) THEN + NEW(designator); + next_token := transpiler_lex(lexer); + + designator^.kind := astExpressionKindArrayAccess; + designator^.array := inner_expression; + designator^.index := parse_designator(lexer); + + next_token := transpiler_lex(lexer); + handled := TRUE + END; + IF ~handled AND (next_token.kind = lexerKindDot) THEN + NEW(designator); + next_token := transpiler_lex(lexer); + + designator^.kind := astExpressionKindFieldAccess; + designator^.aggregate := inner_expression; + designator^.field := next_token.identifierKind; + + next_token := transpiler_lex(lexer); + handled := TRUE + END + END; + + RETURN designator +END parse_designator; END Parser. diff --git a/source/Transpiler.mod b/source/Transpiler.mod index dae35fa..f81fdc0 100644 --- a/source/Transpiler.mod +++ b/source/Transpiler.mod @@ -1,6 +1,6 @@ IMPLEMENTATION MODULE Transpiler; -FROM FIO IMPORT WriteNBytes, WriteLine, WriteChar, WriteString; +FROM FIO IMPORT StdErr, WriteNBytes, WriteLine, WriteChar, WriteString; FROM SYSTEM IMPORT ADR, ADDRESS, TSIZE; FROM NumberIO IMPORT IntToStr; @@ -9,12 +9,14 @@ FROM MemUtils IMPORT MemCopy, MemZero; FROM Common IMPORT Identifier, PIdentifier, ShortString; FROM Lexer IMPORT Lexer, LexerToken, lexer_current, lexer_lex, LexerKind; -FROM Parser IMPORT AstModule, PAstModule, AstTypeExpressionKind, +FROM Parser IMPORT AstTypeExpressionKind, AstExpressionKind, AstLiteralKind, AstUnaryOperator, + AstModule, PAstModule, AstExpression, PAstExpression, PAstLiteral, PAstConstantDeclaration, PPAstConstantDeclaration, AstTypeDeclaration, PAstTypeDeclaration, PPAstTypeDeclaration, PAstVariableDeclaration, PPAstVariableDeclaration, PAstImportStatement, PPAstImportStatement, PAstTypeExpression, PPAstTypeExpression, AstFieldDeclaration, PAstFieldDeclaration, - parse_type_expression, parse_variable_part, parse_type_part, parse_constant_part, parse_import_part; + parse_type_expression, parse_variable_part, parse_type_part, parse_constant_part, parse_import_part, + parse_designator; (* Calls lexer_lex() but skips the comments. *) PROCEDURE transpiler_lex(lexer: PLexer): LexerToken; @@ -110,7 +112,7 @@ VAR token: LexerToken; result: PAstModule; BEGIN - ALLOCATE(result, TSIZE(AstModule)); + NEW(result); token := transpiler_lex(context^.lexer); IF token.kind = lexerKindModule THEN @@ -383,12 +385,12 @@ BEGIN RETURN result END transpile_procedure_heading; -PROCEDURE transpile_expression(context: PTranspilerContext; trailing_token: LexerKind); +PROCEDURE transpile_unchanged(context: PTranspilerContext; trailing_token: LexerKind); VAR token: LexerToken; written_bytes: CARDINAL; BEGIN - token := transpiler_lex(context^.lexer); + token := lexer_current(context^.lexer); WHILE (token.kind <> trailing_token) AND (token.kind <> lexerKindEnd) DO written_bytes := 0; @@ -412,7 +414,7 @@ BEGIN WriteString(context^.output, 'AND '); written_bytes := 1 END; - IF token.kind = lexerKindNot THEN + IF token.kind = lexerKindTilde THEN WriteString(context^.output, 'NOT '); written_bytes := 1 END; @@ -422,13 +424,90 @@ BEGIN END; token := transpiler_lex(context^.lexer) END +END transpile_unchanged; +PROCEDURE parse_expression(lexer: PLexer): PAstExpression; +VAR + next_token: LexerToken; + result: PAstExpression; + written_bytes: CARDINAL; +BEGIN + result := parse_designator(lexer); + + written_bytes := WriteNBytes(StdErr, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); + WriteLine(StdErr); + + RETURN result +END parse_expression; +PROCEDURE transpile_unary_operator(context: PTranspilerContext; operator: AstUnaryOperator); +BEGIN + IF operator = astUnaryOperatorMinus THEN + WriteChar(context^.output, '-') + END; + IF operator = astUnaryOperatorNot THEN + WriteChar(context^.output, '~') + END +END transpile_unary_operator; +PROCEDURE transpile_expression(context: PTranspilerContext; expression: PAstExpression); +VAR + literal: PAstLiteral; + buffer: ARRAY[1..20] OF CHAR; + written_bytes: CARDINAL; +BEGIN + IF expression^.kind = astExpressionKindLiteral THEN + literal := expression^.literal; + + IF literal^.kind = astLiteralKindInteger THEN + IntToStr(literal^.integer, 0, buffer); + WriteString(context^.output, buffer); + END; + IF literal^.kind = astLiteralKindString THEN + WriteString(context^.output, literal^.string) + END + END; + IF expression^.kind = astExpressionKindIdentifier THEN + written_bytes := WriteNBytes(context^.output, ORD(expression^.identifier[1]), ADR(expression^.identifier[2])) + END; + IF expression^.kind = astExpressionKindDereference THEN + transpile_expression(context, expression^.reference); + WriteChar(context^.output, '^') + END; + IF expression^.kind = astExpressionKindArrayAccess THEN + transpile_expression(context, expression^.array); + WriteChar(context^.output, '['); + transpile_expression(context, expression^.index); + WriteChar(context^.output, ']') + END; + IF expression^.kind = astExpressionKindFieldAccess THEN + transpile_expression(context, expression^.aggregate); + WriteChar(context^.output, '.'); + written_bytes := WriteNBytes(context^.output, ORD(expression^.field[1]), ADR(expression^.field[2])); + END; + IF expression^.kind = astExpressionKindUnary THEN + transpile_unary_operator(context, expression^.unary_operator); + transpile_expression(context, expression^.unary_operand) + END END transpile_expression; PROCEDURE transpile_if_statement(context: PTranspilerContext); VAR token: LexerToken; + expression: PAstExpression; + lexer: Lexer; BEGIN WriteString(context^.output, ' IF '); - transpile_expression(context, lexerKindThen); + + lexer := context^.lexer^; + token := transpiler_lex(ADR(lexer)); + expression := parse_expression(ADR(lexer)); + + IF expression <> NIL THEN + context^.lexer^ := lexer; + transpile_expression(context, expression); + WriteChar(context^.output, ' ') + END; + IF expression = NIL THEN + token := transpiler_lex(context^.lexer) + END; + transpile_unchanged(context, lexerKindThen); WriteString(context^.output, 'THEN'); WriteLine(context^.output); @@ -441,7 +520,8 @@ VAR token: LexerToken; BEGIN WriteString(context^.output, ' WHILE '); - transpile_expression(context, lexerKindDo); + token := transpiler_lex(context^.lexer); + transpile_unchanged(context, lexerKindDo); WriteString(context^.output, 'DO'); WriteLine(context^.output); @@ -450,9 +530,12 @@ BEGIN token := transpiler_lex(context^.lexer) END transpile_while_statement; PROCEDURE transpile_assignment_statement(context: PTranspilerContext); +VAR + token: LexerToken; BEGIN WriteString(context^.output, ' := '); - transpile_expression(context, lexerKindSemicolon); + token := transpiler_lex(context^.lexer); + transpile_unchanged(context, lexerKindSemicolon); END transpile_assignment_statement; PROCEDURE transpile_call_statement(context: PTranspilerContext); VAR @@ -514,7 +597,8 @@ VAR token: LexerToken; BEGIN WriteString(context^.output, ' RETURN '); - transpile_expression(context, lexerKindSemicolon) + token := transpiler_lex(context^.lexer); + transpile_unchanged(context, lexerKindSemicolon) END transpile_return_statement; PROCEDURE transpile_statement(context: PTranspilerContext); VAR