From fdaeb25f73e536046b592907504b93722df42682 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Thu, 12 Jun 2025 18:44:06 +0200 Subject: [PATCH] Trace the source code position in the lexer --- source/Common.def | 4 + source/Lexer.def | 15 +++- source/Lexer.mod | 194 ++++++++++++++++++++++-------------------- source/Parser.def | 5 ++ source/Parser.mod | 164 +++++++++++++++++------------------ source/Transpiler.def | 3 +- source/Transpiler.mod | 99 ++++++++++----------- 7 files changed, 252 insertions(+), 232 deletions(-) diff --git a/source/Common.def b/source/Common.def index 996a971..9520230 100644 --- a/source/Common.def +++ b/source/Common.def @@ -4,5 +4,9 @@ TYPE ShortString = ARRAY[1..256] OF CHAR; Identifier = ARRAY[1..256] OF CHAR; PIdentifier = POINTER TO Identifier; + TextLocation = RECORD + line: CARDINAL; + column: CARDINAL + END; END Common. diff --git a/source/Lexer.def b/source/Lexer.def index faa22c2..b707b50 100644 --- a/source/Lexer.def +++ b/source/Lexer.def @@ -2,17 +2,22 @@ DEFINITION MODULE Lexer; FROM FIO IMPORT File; -FROM Common IMPORT Identifier, ShortString; +FROM Common IMPORT Identifier, ShortString, TextLocation; TYPE PLexerBuffer = POINTER TO CHAR; + BufferPosition = RECORD + iterator: PLexerBuffer; + location: TextLocation + END; + PBufferPosition = POINTER TO BufferPosition; Lexer = RECORD input: File; buffer: PLexerBuffer; size: CARDINAL; length: CARDINAL; - start: PLexerBuffer; - current: PLexerBuffer + start: BufferPosition; + current: BufferPosition END; PLexer = POINTER TO Lexer; LexerKind = ( @@ -86,7 +91,9 @@ TYPE lexerKindIdentifier: identifierKind: Identifier | lexerKindInteger: integerKind: INTEGER | lexerKindString: stringKind: ShortString - END + END; + start_location: TextLocation; + end_location: TextLocation END; PLexerToken = POINTER TO LexerToken; diff --git a/source/Lexer.mod b/source/Lexer.mod index a9ef958..8b4a3fe 100644 --- a/source/Lexer.mod +++ b/source/Lexer.mod @@ -212,7 +212,7 @@ BEGIN INC(i) END END initialize_classification; -PROCEDURE compare_keyword(keyword: ARRAY OF CHAR; token_start: PLexerBuffer; token_end: PLexerBuffer): BOOLEAN; +PROCEDURE compare_keyword(keyword: ARRAY OF CHAR; token_start: BufferPosition; token_end: PLexerBuffer): BOOLEAN; VAR result: BOOLEAN; index: CARDINAL; @@ -222,213 +222,222 @@ BEGIN index := 0; result := TRUE; keyword_length := Length(keyword); - continue := (index < keyword_length) AND (token_start <> token_end); + continue := (index < keyword_length) AND (token_start.iterator <> token_end); WHILE continue AND result DO - result := (keyword[index] = token_start^) OR (Lower(keyword[index]) = token_start^); - INC(token_start); + result := (keyword[index] = token_start.iterator^) OR (Lower(keyword[index]) = token_start.iterator^); + INC(token_start.iterator); INC(index); - continue := (index < keyword_length) AND (token_start <> token_end) + continue := (index < keyword_length) AND (token_start.iterator <> token_end) END; result := result AND (index = Length(keyword)); - RETURN result AND (token_start = token_end) + RETURN result AND (token_start.iterator = token_end) END compare_keyword; (* Reached the end of file. *) PROCEDURE transition_action_eof(lexer: PLexer; token: PLexerToken); BEGIN token^.kind := lexerKindEof END transition_action_eof; +PROCEDURE increment(position: PBufferPosition); +BEGIN + INC(position^.iterator) +END increment; (* Add the character to the token currently read and advance to the next character. *) PROCEDURE transition_action_accumulate(lexer: PLexer; token: PLexerToken); BEGIN - INC(lexer^.current) + increment(ADR(lexer^.current)) END transition_action_accumulate; (* The current character is not a part of the token. Finish the token already * read. Don't advance to the next character. *) PROCEDURE transition_action_finalize(lexer: PLexer; token: PLexerToken); BEGIN - IF lexer^.start^ = ':' THEN + IF lexer^.start.iterator^ = ':' THEN token^.kind := lexerKindColon END; - IF lexer^.start^ = '>' THEN + IF lexer^.start.iterator^ = '>' THEN token^.kind := lexerKindGreaterThan END; - IF lexer^.start^ = '<' THEN + IF lexer^.start.iterator^ = '<' THEN token^.kind := lexerKindLessThan END; - IF lexer^.start^ = '(' THEN + IF lexer^.start.iterator^ = '(' THEN token^.kind := lexerKindLeftParen END; - IF lexer^.start^ = '-' THEN + IF lexer^.start.iterator^ = '-' THEN token^.kind := lexerKindMinus END; - IF lexer^.start^ = '.' THEN + IF lexer^.start.iterator^ = '.' THEN token^.kind := lexerKindDot END END transition_action_finalize; (* An action for tokens containing multiple characters. *) PROCEDURE transition_action_composite(lexer: PLexer; token: PLexerToken); BEGIN - IF lexer^.start^ = '<' THEN - IF lexer^.current^ = '>' THEN + IF lexer^.start.iterator^ = '<' THEN + IF lexer^.current.iterator^ = '>' THEN token^.kind := lexerKindNotEqual END; - IF lexer^.current^ = '=' THEN + IF lexer^.current.iterator^ = '=' THEN token^.kind := lexerKindLessEqual END END; - IF (lexer^.start^ = '>') AND (lexer^.current^ = '=') THEN + IF (lexer^.start.iterator^ = '>') AND (lexer^.current.iterator^ = '=') THEN token^.kind := lexerKindGreaterEqual END; - IF (lexer^.start^ = '.') AND (lexer^.current^ = '.') THEN + IF (lexer^.start.iterator^ = '.') AND (lexer^.current.iterator^ = '.') THEN token^.kind := lexerKindRange END; - IF (lexer^.start^ = ':') AND (lexer^.current^ = '=') THEN + IF (lexer^.start.iterator^ = ':') AND (lexer^.current.iterator^ = '=') THEN token^.kind := lexerKindAssignment END; - IF (lexer^.start^ = '-') AND (lexer^.current^ = '>') THEN + IF (lexer^.start.iterator^ = '-') AND (lexer^.current.iterator^ = '>') THEN token^.kind := lexerKindArrow END; - INC(lexer^.current) + increment(ADR(lexer^.current)) END transition_action_composite; (* Skip a space. *) PROCEDURE transition_action_skip(lexer: PLexer; token: PLexerToken); BEGIN - INC(lexer^.current); - INC(lexer^.start) + increment(ADR(lexer^.start)); + + IF ORD(lexer^.start.iterator^) = 10 THEN + INC(lexer^.start.location.line); + lexer^.start.location.column := 1 + END; + lexer^.current := lexer^.start END transition_action_skip; (* Delimited string action. *) PROCEDURE transition_action_delimited(lexer: PLexer; token: PLexerToken); VAR text_length: CARDINAL; BEGIN - IF lexer^.start^ = '(' THEN + IF lexer^.start.iterator^ = '(' THEN token^.kind := lexerKindComment END; - IF lexer^.start^ = '"' THEN - text_length := lexer^.current; - DEC(text_length, lexer^.start); + IF lexer^.start.iterator^ = '"' THEN + text_length := lexer^.current.iterator; + DEC(text_length, lexer^.start.iterator); INC(text_length); MemZero(ADR(token^.stringKind), TSIZE(ShortString)); - MemCopy(lexer^.start, text_length, ADR(token^.stringKind)); + MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind)); token^.kind := lexerKindCharacter END; - IF lexer^.start^ = "'" THEN - text_length := lexer^.current; - DEC(text_length, lexer^.start); + IF lexer^.start.iterator^ = "'" THEN + text_length := lexer^.current.iterator; + DEC(text_length, lexer^.start.iterator); INC(text_length); MemZero(ADR(token^.stringKind), TSIZE(ShortString)); - MemCopy(lexer^.start, text_length, ADR(token^.stringKind)); + MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind)); token^.kind := lexerKindString END; - INC(lexer^.current) + increment(ADR(lexer^.current)) END transition_action_delimited; (* Finalize keyword OR identifier. *) PROCEDURE transition_action_key_id(lexer: PLexer; token: PLexerToken); BEGIN token^.kind := lexerKindIdentifier; - token^.identifierKind[1] := lexer^.current; - DEC(token^.identifierKind[1], lexer^.start); - MemCopy(lexer^.start, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2])); + token^.identifierKind[1] := lexer^.current.iterator; + DEC(token^.identifierKind[1], lexer^.start.iterator); + MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2])); - IF compare_keyword('PROGRAM', lexer^.start, lexer^.current) THEN + IF compare_keyword('PROGRAM', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindProgram END; - IF compare_keyword('IMPORT', lexer^.start, lexer^.current) THEN + IF compare_keyword('IMPORT', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindImport END; - IF compare_keyword('CONST', lexer^.start, lexer^.current) THEN + IF compare_keyword('CONST', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindConst END; - IF compare_keyword('VAR', lexer^.start, lexer^.current) THEN + IF compare_keyword('VAR', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindVar END; - IF compare_keyword('IF', lexer^.start, lexer^.current) THEN + IF compare_keyword('IF', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindIf END; - IF compare_keyword('THEN', lexer^.start, lexer^.current) THEN + IF compare_keyword('THEN', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindThen END; - IF compare_keyword('ELSIF', lexer^.start, lexer^.current) THEN + IF compare_keyword('ELSIF', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindElsif END; - IF compare_keyword('ELSE', lexer^.start, lexer^.current) THEN + IF compare_keyword('ELSE', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindElse END; - IF compare_keyword('WHILE', lexer^.start, lexer^.current) THEN + IF compare_keyword('WHILE', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindWhile END; - IF compare_keyword('DO', lexer^.start, lexer^.current) THEN + IF compare_keyword('DO', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindDo END; - IF compare_keyword('proc', lexer^.start, lexer^.current) THEN + IF compare_keyword('proc', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindProc END; - IF compare_keyword('BEGIN', lexer^.start, lexer^.current) THEN + IF compare_keyword('BEGIN', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindBegin END; - IF compare_keyword('END', lexer^.start, lexer^.current) THEN + IF compare_keyword('END', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindEnd END; - IF compare_keyword('TYPE', lexer^.start, lexer^.current) THEN + IF compare_keyword('TYPE', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindType END; - IF compare_keyword('RECORD', lexer^.start, lexer^.current) THEN + IF compare_keyword('RECORD', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindRecord END; - IF compare_keyword('UNION', lexer^.start, lexer^.current) THEN + IF compare_keyword('UNION', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindUnion END; - IF compare_keyword('NIL', lexer^.start, lexer^.current) THEN + IF compare_keyword('NIL', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindNull END; - IF compare_keyword('AND', lexer^.start, lexer^.current) THEN + IF compare_keyword('AND', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindAnd END; - IF compare_keyword('OR', lexer^.start, lexer^.current) THEN + IF compare_keyword('OR', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindOr END; - IF compare_keyword('RETURN', lexer^.start, lexer^.current) THEN + IF compare_keyword('RETURN', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindReturn END; - IF compare_keyword('DEFINITION', lexer^.start, lexer^.current) THEN + IF compare_keyword('DEFINITION', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindDefinition END; - IF compare_keyword('TO', lexer^.start, lexer^.current) THEN + IF compare_keyword('TO', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindTo END; - IF compare_keyword('CASE', lexer^.start, lexer^.current) THEN + IF compare_keyword('CASE', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindCase END; - IF compare_keyword('OF', lexer^.start, lexer^.current) THEN + IF compare_keyword('OF', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindOf END; - IF compare_keyword('FROM', lexer^.start, lexer^.current) THEN + IF compare_keyword('FROM', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindFrom END; - IF compare_keyword('MODULE', lexer^.start, lexer^.current) THEN + IF compare_keyword('MODULE', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindModule END; - IF compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current) THEN + IF compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindImplementation END; - IF compare_keyword('POINTER', lexer^.start, lexer^.current) THEN + IF compare_keyword('POINTER', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindPointer END; - IF compare_keyword('ARRAY', lexer^.start, lexer^.current) THEN + IF compare_keyword('ARRAY', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindArray END; - IF compare_keyword('TRUE', lexer^.start, lexer^.current) THEN + IF compare_keyword('TRUE', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindBoolean; token^.booleanKind := TRUE END; - IF compare_keyword('FALSE', lexer^.start, lexer^.current) THEN + IF compare_keyword('FALSE', lexer^.start, lexer^.current.iterator) THEN token^.kind := lexerKindBoolean; token^.booleanKind := FALSE END @@ -437,52 +446,52 @@ END transition_action_key_id; * followed by other characters forming a composite token. *) PROCEDURE transition_action_single(lexer: PLexer; token: PLexerToken); BEGIN - IF lexer^.current^ = '&' THEN + IF lexer^.current.iterator^ = '&' THEN token^.kind := lexerKindAnd END; - IF lexer^.current^ = ';' THEN + IF lexer^.current.iterator^ = ';' THEN token^.kind := lexerKindSemicolon END; - IF lexer^.current^ = ',' THEN + IF lexer^.current.iterator^ = ',' THEN token^.kind := lexerKindComma END; - IF lexer^.current^ = '~' THEN + IF lexer^.current.iterator^ = '~' THEN token^.kind := lexerKindTilde END; - IF lexer^.current^ = ')' THEN + IF lexer^.current.iterator^ = ')' THEN token^.kind := lexerKindRightParen END; - IF lexer^.current^ = '[' THEN + IF lexer^.current.iterator^ = '[' THEN token^.kind := lexerKindLeftSquare END; - IF lexer^.current^ = ']' THEN + IF lexer^.current.iterator^ = ']' THEN token^.kind := lexerKindRightSquare END; - IF lexer^.current^ = '^' THEN + IF lexer^.current.iterator^ = '^' THEN token^.kind := lexerKindHat END; - IF lexer^.current^ = '=' THEN + IF lexer^.current.iterator^ = '=' THEN token^.kind := lexerKindEqual END; - IF lexer^.current^ = '+' THEN + IF lexer^.current.iterator^ = '+' THEN token^.kind := lexerKindPlus END; - IF lexer^.current^ = '*' THEN + IF lexer^.current.iterator^ = '*' THEN token^.kind := lexerKindAsterisk END; - IF lexer^.current^ = '/' THEN + IF lexer^.current.iterator^ = '/' THEN token^.kind := lexerKindDivision END; - IF lexer^.current^ = '%' THEN + IF lexer^.current.iterator^ = '%' THEN token^.kind := lexerKindRemainder END; - IF lexer^.current^ = '@' THEN + IF lexer^.current.iterator^ = '@' THEN token^.kind := lexerKindAt END; - IF lexer^.current^ = '|' THEN + IF lexer^.current.iterator^ = '|' THEN token^.kind := lexerKindPipe END; - INC(lexer^.current) + increment(ADR(lexer^.current.iterator)) END transition_action_single; (* Handle an integer literal. *) PROCEDURE transition_action_integer(lexer: PLexer; token: PLexerToken); @@ -493,20 +502,20 @@ VAR BEGIN token^.kind := lexerKindInteger; - integer_length := lexer^.current; - DEC(integer_length, lexer^.start); + integer_length := lexer^.current.iterator; + DEC(integer_length, lexer^.start.iterator); MemZero(ADR(token^.identifierKind), TSIZE(Identifier)); - MemCopy(lexer^.start, integer_length, ADR(token^.identifierKind[1])); + MemCopy(lexer^.start.iterator, integer_length, ADR(token^.identifierKind[1])); buffer := InitStringCharStar(ADR(token^.identifierKind[1])); token^.integerKind := StringToInteger(buffer, 10, found); buffer := KillString(buffer) END transition_action_integer; -PROCEDURE set_default_transition(current_state: TransitionState; DefaultAction: TransitionAction; next_state: TransitionState); +PROCEDURE set_default_transition(current_state: TransitionState; default_action: TransitionAction; next_state: TransitionState); VAR default_transition: Transition; BEGIN - default_transition.action := DefaultAction; + default_transition.action := default_action; default_transition.next_state := next_state; transitions[ORD(current_state) + 1][ORD(transitionClassInvalid) + 1] := default_transition; @@ -807,7 +816,7 @@ BEGIN current_state := transitionStateStart; WHILE current_state <> transitionStateEnd DO - index1 := ORD(lexer^.current^); + index1 := ORD(lexer^.current.iterator^); INC(index1); current_class := classification[index1]; @@ -822,6 +831,9 @@ BEGIN END; current_state := current_transition.next_state END; + result.start_location := lexer^.start.location; + result.end_location := lexer^.current.location; + RETURN result END lexer_current; PROCEDURE lexer_lex(lexer: PLexer): LexerToken; @@ -830,7 +842,9 @@ VAR BEGIN IF lexer^.length = 0 THEN lexer^.length := ReadNBytes(lexer^.input, CHUNK_SIZE, lexer^.buffer); - lexer^.current := lexer^.buffer + lexer^.current.location.column := 1; + lexer^.current.location.line := 1; + lexer^.current.iterator := lexer^.buffer END; lexer^.start := lexer^.current; diff --git a/source/Parser.def b/source/Parser.def index 02c02d0..7df325c 100644 --- a/source/Parser.def +++ b/source/Parser.def @@ -4,6 +4,11 @@ FROM Common IMPORT Identifier, PIdentifier, ShortString; FROM Lexer IMPORT PLexer; TYPE + Parser = RECORD + lexer: PLexer + END; + PParser = POINTER TO Parser; + AstLiteralKind = ( astLiteralKindInteger, astLiteralKindString, diff --git a/source/Parser.mod b/source/Parser.mod index 34d2298..afba319 100644 --- a/source/Parser.mod +++ b/source/Parser.mod @@ -9,7 +9,7 @@ FROM Storage IMPORT ALLOCATE, REALLOCATE; FROM Lexer IMPORT Lexer, LexerKind, LexerToken, lexer_current, lexer_lex; (* Calls lexer_lex() but skips the comments. *) -PROCEDURE transpiler_lex(lexer: PLexer): LexerToken; +PROCEDURE parser_lex(lexer: PLexer): LexerToken; VAR result: LexerToken; BEGIN @@ -20,7 +20,7 @@ BEGIN END; RETURN result -END transpiler_lex; +END parser_lex; PROCEDURE parse_type_fields(lexer: PLexer): PAstFieldDeclaration; VAR token: LexerToken; @@ -29,7 +29,7 @@ VAR current_field: PAstFieldDeclaration; BEGIN ALLOCATE(field_declarations, TSIZE(AstFieldDeclaration)); - token := transpiler_lex(lexer); + token := parser_lex(lexer); field_count := 0; WHILE token.kind <> lexerKindEnd DO @@ -40,16 +40,16 @@ BEGIN current_field := field_declarations; INC(current_field , TSIZE(AstFieldDeclaration) * (field_count - 1)); - token := transpiler_lex(lexer); + token := parser_lex(lexer); current_field^.field_name := token.identifierKind; - token := transpiler_lex(lexer); + token := parser_lex(lexer); current_field^.field_type := parse_type_expression(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); IF token.kind = lexerKindSemicolon THEN - token := transpiler_lex(lexer) + token := parser_lex(lexer) END END; INC(current_field, TSIZE(AstFieldDeclaration)); @@ -78,7 +78,7 @@ BEGIN token := lexer_current(lexer); IF token.kind = lexerKindPointer THEN - token := transpiler_lex(lexer) + token := parser_lex(lexer) END; token := lexer_current(lexer); result^.target := parse_type_expression(lexer); @@ -98,16 +98,16 @@ BEGIN token := lexer_current(lexer); IF token.kind = lexerKindArray THEN - token := transpiler_lex(lexer) + token := parser_lex(lexer) END; IF token.kind <> lexerKindOf THEN - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.length := token.integerKind; - token := transpiler_lex(lexer) + token := parser_lex(lexer) END; - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.base := parse_type_expression(lexer); RETURN result @@ -124,14 +124,14 @@ BEGIN case_count := 1; ALLOCATE(result^.cases, TSIZE(Identifier) * 2); - token := transpiler_lex(lexer); + token := parser_lex(lexer); current_case := result^.cases; current_case^ := token.identifierKind; - token := transpiler_lex(lexer); + token := parser_lex(lexer); WHILE token.kind = lexerKindComma DO - token := transpiler_lex(lexer); + token := parser_lex(lexer); INC(case_count); INC(case_count); @@ -141,7 +141,7 @@ BEGIN INC(current_case, TSIZE(Identifier) * (case_count - 1)); current_case^ := token.identifierKind; - token := transpiler_lex(lexer) + token := parser_lex(lexer) END; INC(current_case, TSIZE(Identifier)); MemZero(current_case, TSIZE(Identifier)); @@ -174,8 +174,8 @@ BEGIN ALLOCATE(result^.parameters, 1); - token := transpiler_lex(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); + token := parser_lex(lexer); WHILE token.kind <> lexerKindRightParen DO INC(parameter_count); @@ -187,9 +187,9 @@ BEGIN current_parameter^ := parse_type_expression(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); IF token.kind = lexerKindComma THEN - token := transpiler_lex(lexer) + token := parser_lex(lexer) END END; current_parameter := result^.parameters; @@ -236,11 +236,11 @@ BEGIN NEW(result); result^.identifier := token.identifierKind; - token := transpiler_lex(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); + token := parser_lex(lexer); result^.type_expression := parse_type_expression(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); RETURN result END parse_type_declaration; @@ -258,7 +258,7 @@ BEGIN declaration_count := 0; IF token.kind = lexerKindType THEN - token := transpiler_lex(lexer); + token := parser_lex(lexer); WHILE token.kind = lexerKindIdentifier DO INC(declaration_count); @@ -268,7 +268,7 @@ BEGIN INC(current_declaration, TSIZE(PAstTypedDeclaration) * (declaration_count - 1)); current_declaration^ := parse_type_declaration(lexer); - token := transpiler_lex(lexer) + token := parser_lex(lexer) END END; IF declaration_count <> 0 THEN @@ -288,12 +288,12 @@ BEGIN token := lexer_current(lexer); result^.variable_name := token.identifierKind; - token := transpiler_lex(lexer); + token := parser_lex(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.variable_type := parse_type_expression(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); RETURN result END parse_variable_declaration; PROCEDURE parse_variable_part(lexer: PLexer): PPAstVariableDeclaration; @@ -310,7 +310,7 @@ BEGIN declaration_count := 0; IF token.kind = lexerKindVar THEN - token := transpiler_lex(lexer); + token := parser_lex(lexer); WHILE token.kind = lexerKindIdentifier DO INC(declaration_count); @@ -320,7 +320,7 @@ BEGIN INC(current_declaration, TSIZE(PAstVariableDeclaration) * (declaration_count - 1)); current_declaration^ := parse_variable_declaration(lexer); - token := transpiler_lex(lexer) + token := parser_lex(lexer) END END; IF declaration_count <> 0 THEN @@ -340,12 +340,12 @@ BEGIN token := lexer_current(lexer); result^.constant_name := token.identifierKind; - token := transpiler_lex(lexer); + token := parser_lex(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.constant_value := token.integerKind; - token := transpiler_lex(lexer); + token := parser_lex(lexer); RETURN result END parse_constant_declaration; @@ -363,7 +363,7 @@ BEGIN declaration_count := 0; IF token.kind = lexerKindConst THEN - token := transpiler_lex(lexer); + token := parser_lex(lexer); WHILE token.kind = lexerKindIdentifier DO INC(declaration_count); @@ -373,7 +373,7 @@ BEGIN INC(current_declaration, TSIZE(PAstConstantDeclaration) * (declaration_count - 1)); current_declaration^ := parse_constant_declaration(lexer); - token := transpiler_lex(lexer) + token := parser_lex(lexer) END END; IF declaration_count <> 0 THEN @@ -393,20 +393,20 @@ BEGIN NEW(result); symbol_count := 1; - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.package := token.identifierKind; - token := transpiler_lex(lexer); + token := parser_lex(lexer); ALLOCATE(result^.symbols, TSIZE(Identifier) * 2); current_symbol := result^.symbols; - token := transpiler_lex(lexer); + token := parser_lex(lexer); current_symbol^ := token.identifierKind; - token := transpiler_lex(lexer); + token := parser_lex(lexer); WHILE token.kind <> lexerKindSemicolon DO - token := transpiler_lex(lexer); + token := parser_lex(lexer); INC(symbol_count); REALLOCATE(result^.symbols, TSIZE(Identifier) * (symbol_count + 1)); @@ -414,12 +414,12 @@ BEGIN INC(current_symbol, TSIZE(Identifier) * (symbol_count - 1)); current_symbol^ := token.identifierKind; - token := transpiler_lex(lexer) + token := parser_lex(lexer) END; INC(current_symbol, TSIZE(Identifier)); MemZero(current_symbol, TSIZE(Identifier)); - token := transpiler_lex(lexer); + token := parser_lex(lexer); RETURN result END parse_import_statement; @@ -484,7 +484,7 @@ BEGIN literal^.boolean := token.booleanKind END; IF literal <> NIL THEN - token := transpiler_lex(lexer) + token := parser_lex(lexer) END; RETURN literal @@ -508,7 +508,7 @@ BEGIN END; IF (result = NIL) AND (next_token.kind = lexerKindMinus) THEN NEW(result); - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); result^.kind := astExpressionKindUnary; result^.unary_operator := astUnaryOperatorMinus; @@ -516,17 +516,17 @@ BEGIN END; IF (result = NIL) AND (next_token.kind = lexerKindTilde) THEN NEW(result); - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); result^.kind := astExpressionKindUnary; result^.unary_operator := astUnaryOperatorNot; result^.unary_operand := parse_factor(lexer) END; IF (result = NIL) AND (next_token.kind = lexerKindLeftParen) THEN - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); result := parse_expression(lexer); IF result <> NIL THEN - next_token := transpiler_lex(lexer) + next_token := parser_lex(lexer) END END; IF (result = NIL) AND (next_token.kind = lexerKindIdentifier) THEN @@ -535,7 +535,7 @@ BEGIN result^.kind := astExpressionKindIdentifier; result^.identifier := next_token.identifierKind; - next_token := transpiler_lex(lexer) + next_token := parser_lex(lexer) END; RETURN result @@ -562,34 +562,34 @@ BEGIN designator^.kind := astExpressionKindDereference; designator^.reference := inner_expression; - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); handled := TRUE END; IF ~handled AND (next_token.kind = lexerKindLeftSquare) THEN NEW(designator); - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); designator^.kind := astExpressionKindArrayAccess; designator^.array := inner_expression; designator^.index := parse_expression(lexer); - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); handled := TRUE END; IF ~handled AND (next_token.kind = lexerKindDot) THEN NEW(designator); - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); designator^.kind := astExpressionKindFieldAccess; designator^.aggregate := inner_expression; designator^.field := next_token.identifierKind; - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); handled := TRUE END; IF ~handled AND (next_token.kind = lexerKindLeftParen) THEN NEW(designator); - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); designator^.kind := astExpressionKindCall; designator^.callable := inner_expression; @@ -604,7 +604,7 @@ BEGIN next_token := lexer_current(lexer); WHILE next_token.kind = lexerKindComma DO - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); designator^.argument_count := designator^.argument_count + 1; REALLOCATE(designator^.arguments, TSIZE(PAstExpression) * designator^.argument_count); @@ -616,7 +616,7 @@ BEGIN END END; - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); handled := TRUE END END; @@ -629,7 +629,7 @@ VAR result: PAstExpression; right: PAstExpression; BEGIN - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); right := parse_designator(lexer); result := NIL; @@ -703,7 +703,7 @@ BEGIN NEW(result); result^.kind := astStatementKindReturn; - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.returned := parse_expression(lexer); RETURN result @@ -717,7 +717,7 @@ BEGIN result^.kind := astStatementKindAssignment; result^.assignee := assignee; - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.assignment := parse_expression(lexer); RETURN result @@ -766,7 +766,7 @@ VAR designator: PAstExpression; BEGIN statement := NIL; - token := transpiler_lex(lexer); + token := parser_lex(lexer); IF token.kind = lexerKindIf THEN statement := parse_if_statement(lexer) @@ -798,11 +798,11 @@ BEGIN NEW(result); result^.kind := astStatementKindIf; - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.if_condition := parse_expression(lexer); result^.if_branch := parse_compound_statement(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); RETURN result END parse_if_statement; PROCEDURE parse_while_statement(lexer: PLexer): PAstStatement; @@ -813,11 +813,11 @@ BEGIN NEW(result); result^.kind := astStatementKindWhile; - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.while_condition := parse_expression(lexer); result^.while_body := parse_compound_statement(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); RETURN result END parse_while_statement; PROCEDURE parse_statement_part(lexer: PLexer): AstCompoundStatement; @@ -844,15 +844,15 @@ VAR BEGIN NEW(declaration); - token := transpiler_lex(lexer); + token := parser_lex(lexer); declaration^.name := token.identifierKind; - token := transpiler_lex(lexer); + token := parser_lex(lexer); declaration^.parameters := NIL; declaration^.parameter_count := 0; - token := transpiler_lex(lexer); + token := parser_lex(lexer); WHILE token.kind <> lexerKindRightParen DO parameter_index := declaration^.parameter_count; INC(declaration^.parameter_count); @@ -863,26 +863,26 @@ BEGIN current_parameter^.identifier := token.identifierKind; - token := transpiler_lex(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); + token := parser_lex(lexer); current_parameter^.type_expression := parse_type_expression(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); IF token.kind = lexerKindComma THEN - token := transpiler_lex(lexer) + token := parser_lex(lexer) END END; - token := transpiler_lex(lexer); + token := parser_lex(lexer); declaration^.return_type := NIL; (* Check for the return type and write it. *) IF token.kind = lexerKindArrow THEN - token := transpiler_lex(lexer); + token := parser_lex(lexer); declaration^.return_type := parse_type_expression(lexer); - token := transpiler_lex(lexer) + token := parser_lex(lexer) END; - token := transpiler_lex(lexer); + token := parser_lex(lexer); RETURN declaration END parse_procedure_heading; @@ -897,8 +897,8 @@ BEGIN declaration^.variables := parse_variable_part(lexer); declaration^.statements := parse_statement_part(lexer); - token := transpiler_lex(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); + token := parser_lex(lexer); RETURN declaration END parse_procedure_declaration; @@ -938,16 +938,16 @@ VAR result: PAstModule; BEGIN NEW(result); - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.main := TRUE; IF token.kind = lexerKindModule THEN result^.main := FALSE END; - token := transpiler_lex(lexer); + token := parser_lex(lexer); (* Write the module body. *) - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.imports := parse_import_part(lexer); result^.constants := parse_constant_part(lexer); @@ -957,8 +957,8 @@ BEGIN result^.procedures := parse_procedure_part(lexer); result^.statements := parse_statement_part(lexer); - token := transpiler_lex(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); + token := parser_lex(lexer); RETURN result END parse_module; diff --git a/source/Transpiler.def b/source/Transpiler.def index 534b206..98956fc 100644 --- a/source/Transpiler.def +++ b/source/Transpiler.def @@ -9,7 +9,8 @@ FROM Parser IMPORT PAstModule; TYPE TranspilerContext = RECORD input_name: ShortString; - output: File + output: File; + indentation: CARDINAL END; PTranspilerContext = POINTER TO TranspilerContext; diff --git a/source/Transpiler.mod b/source/Transpiler.mod index fd356ee..6173eb0 100644 --- a/source/Transpiler.mod +++ b/source/Transpiler.mod @@ -1,53 +1,37 @@ IMPLEMENTATION MODULE Transpiler; FROM FIO IMPORT WriteNBytes, WriteLine, WriteChar, WriteString; -FROM SYSTEM IMPORT ADR, ADDRESS, TSIZE; +FROM SYSTEM IMPORT ADR, TSIZE; FROM NumberIO IMPORT IntToStr; -FROM Storage IMPORT ALLOCATE, REALLOCATE; -FROM MemUtils IMPORT MemCopy, MemZero; FROM Common IMPORT Identifier, PIdentifier, ShortString; -FROM Lexer IMPORT Lexer, LexerToken, lexer_current, lexer_lex, LexerKind; FROM Parser IMPORT AstTypeExpressionKind, AstExpressionKind, AstLiteralKind, AstUnaryOperator, AstBinaryOperator, - AstModule, PAstModule, AstExpression, PPAstExpression, PAstExpression, PAstLiteral, PPAstProcedureDeclaration, + PAstModule, PPAstExpression, PAstExpression, PAstLiteral, PPAstProcedureDeclaration, PAstConstantDeclaration, PPAstConstantDeclaration, PPAstStatement, PAstStatement, AstStatementKind, AstTypedDeclaration, PAstTypedDeclaration, PPAstTypedDeclaration, AstCompoundStatement, PAstProcedureDeclaration, PAstVariableDeclaration, PPAstVariableDeclaration, PAstImportStatement, PPAstImportStatement, PAstTypeExpression, PPAstTypeExpression, AstFieldDeclaration, PAstFieldDeclaration; -(* Calls lexer_lex() but skips the comments. *) -PROCEDURE transpiler_lex(lexer: PLexer): LexerToken; +PROCEDURE indent(context: PTranspilerContext); VAR - result: LexerToken; + count: CARDINAL; BEGIN - result := lexer_lex(lexer); + count := 0; - WHILE result.kind = lexerKindComment DO - result := lexer_lex(lexer) - END; - - RETURN result -END transpiler_lex; + WHILE count < context^.indentation DO + WriteString(context^.output, ' '); + INC(count) + END +END indent; (* Write a semicolon followed by a newline. *) PROCEDURE write_semicolon(output: File); BEGIN WriteChar(output, ';'); WriteLine(output) END write_semicolon; -PROCEDURE write_current(lexer: PLexer; output: File); -VAR - written_bytes: CARDINAL; - count: CARDINAL; -BEGIN - count := lexer^.current; - DEC(count, lexer^.start); - - written_bytes := WriteNBytes(output, count, lexer^.start) -END write_current; PROCEDURE transpile_import_statement(context: PTranspilerContext; import_statement: PAstImportStatement); VAR - token: LexerToken; written_bytes: CARDINAL; current_symbol: PIdentifier; BEGIN @@ -92,7 +76,7 @@ BEGIN write_semicolon(context^.output) END transpile_constant_declaration; -PROCEDURE transpile_constant_part(context: PTranspilerContext; declarations: PPAstConstantDeclaration); +PROCEDURE transpile_constant_part(context: PTranspilerContext; declarations: PPAstConstantDeclaration; extra_newline: BOOLEAN); VAR current_declaration: PPAstConstantDeclaration; BEGIN @@ -106,12 +90,12 @@ BEGIN INC(current_declaration, TSIZE(PAstConstantDeclaration)) END; - WriteLine(context^.output) + IF extra_newline THEN + WriteLine(context^.output) + END END END transpile_constant_part; PROCEDURE transpile_module(context: PTranspilerContext; result: PAstModule); -VAR - token: LexerToken; BEGIN IF result^.main = FALSE THEN WriteString(context^.output, 'IMPLEMENTATION ') @@ -127,9 +111,9 @@ BEGIN (* Write the module body. *) transpile_import_part(context, result^.imports); - transpile_constant_part(context, result^.constants); + transpile_constant_part(context, result^.constants, TRUE); transpile_type_part(context, result^.types); - transpile_variable_part(context, result^.variables); + transpile_variable_part(context, result^.variables, TRUE); transpile_procedure_part(context, result^.procedures); transpile_statement_part(context, result^.statements); @@ -169,8 +153,6 @@ BEGIN WriteString(context^.output, ' END') END transpile_record_type; PROCEDURE transpile_pointer_type(context: PTranspilerContext; type_expression: PAstTypeExpression); -VAR - token: LexerToken; BEGIN WriteString(context^.output, 'POINTER TO '); @@ -306,7 +288,7 @@ BEGIN transpile_type_expression(context, declaration^.variable_type); write_semicolon(context^.output) END transpile_variable_declaration; -PROCEDURE transpile_variable_part(context: PTranspilerContext; declarations: PPAstVariableDeclaration); +PROCEDURE transpile_variable_part(context: PTranspilerContext; declarations: PPAstVariableDeclaration; extra_newline: BOOLEAN); VAR current_declaration: PPAstVariableDeclaration; BEGIN @@ -320,12 +302,13 @@ BEGIN INC(current_declaration, TSIZE(PAstVariableDeclaration)) END; - WriteLine(context^.output) + IF extra_newline THEN + WriteLine(context^.output) + END END END transpile_variable_part; PROCEDURE transpile_procedure_heading(context: PTranspilerContext; declaration: PAstProcedureDeclaration); VAR - token: LexerToken; written_bytes: CARDINAL; parameter_index: CARDINAL; current_parameter: PAstTypedDeclaration; @@ -487,32 +470,32 @@ BEGIN END END transpile_expression; PROCEDURE transpile_if_statement(context: PTranspilerContext; statement: PAstStatement); -VAR - token: LexerToken; BEGIN - IF statement <> NIL THEN - WriteString(context^.output, ' IF '); - transpile_expression(context, statement^.if_condition); + WriteString(context^.output, 'IF '); + transpile_expression(context, statement^.if_condition); - WriteString(context^.output, ' THEN'); - WriteLine(context^.output); + WriteString(context^.output, ' THEN'); + WriteLine(context^.output); + INC(context^.indentation); - transpile_compound_statement(context, statement^.if_branch); - WriteString(context^.output, ' END') - END + transpile_compound_statement(context, statement^.if_branch); + DEC(context^.indentation); + indent(context); + WriteString(context^.output, 'END') END transpile_if_statement; PROCEDURE transpile_while_statement(context: PTranspilerContext; statement: PAstStatement); -VAR - token: LexerToken; BEGIN - WriteString(context^.output, ' WHILE '); + WriteString(context^.output, 'WHILE '); transpile_expression(context, statement^.while_condition); WriteString(context^.output, ' DO'); WriteLine(context^.output); + INC(context^.indentation); transpile_compound_statement(context, statement^.while_body); - WriteString(context^.output, ' END') + DEC(context^.indentation); + indent(context); + WriteString(context^.output, 'END') END transpile_while_statement; PROCEDURE transpile_assignment_statement(context: PTranspilerContext; statement: PAstStatement); BEGIN @@ -522,7 +505,7 @@ BEGIN END transpile_assignment_statement; PROCEDURE transpile_return_statement(context: PTranspilerContext; statement: PAstStatement); BEGIN - WriteString(context^.output, ' RETURN '); + WriteString(context^.output, 'RETURN '); transpile_expression(context, statement^.returned) END transpile_return_statement; @@ -548,6 +531,8 @@ BEGIN END transpile_compound_statement; PROCEDURE transpile_statement(context: PTranspilerContext; statement: PAstStatement); BEGIN + indent(context); + IF statement^.kind = astStatementKindIf THEN transpile_if_statement(context, statement) END; @@ -569,7 +554,10 @@ BEGIN IF compound.count > 0 THEN WriteString(context^.output, 'BEGIN'); WriteLine(context^.output); - transpile_compound_statement(context, compound) + + INC(context^.indentation); + transpile_compound_statement(context, compound); + DEC(context^.indentation) END END transpile_statement_part; PROCEDURE transpile_procedure_declaration(context: PTranspilerContext; declaration: PAstProcedureDeclaration); @@ -578,8 +566,8 @@ VAR BEGIN transpile_procedure_heading(context, declaration); - transpile_constant_part(context, declaration^.constants); - transpile_variable_part(context, declaration^.variables); + transpile_constant_part(context, declaration^.constants, FALSE); + transpile_variable_part(context, declaration^.variables, FALSE); transpile_statement_part(context, declaration^.statements); WriteString(context^.output, 'END '); @@ -628,6 +616,7 @@ VAR BEGIN context.input_name := input_name; context.output := output; + context.indentation := 0; transpile_module(ADR(context), ast_module) END transpile;