From 9341017103ff8ceec7a61eac1d5f2c843d8ef052 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Thu, 12 Jun 2025 18:45:17 +0200 Subject: [PATCH] Trace the source code position in the lexer --- source/Common.def | 4 + source/Lexer.def | 15 +++- source/Lexer.elna | 195 ++++++++++++++++++++++------------------- source/Parser.def | 5 ++ source/Parser.elna | 162 +++++++++++++++++----------------- source/Transpiler.def | 3 +- source/Transpiler.elna | 98 +++++++++------------ 7 files changed, 251 insertions(+), 231 deletions(-) diff --git a/source/Common.def b/source/Common.def index 996a971..9520230 100644 --- a/source/Common.def +++ b/source/Common.def @@ -4,5 +4,9 @@ TYPE ShortString = ARRAY[1..256] OF CHAR; Identifier = ARRAY[1..256] OF CHAR; PIdentifier = POINTER TO Identifier; + TextLocation = RECORD + line: CARDINAL; + column: CARDINAL + END; END Common. diff --git a/source/Lexer.def b/source/Lexer.def index faa22c2..b707b50 100644 --- a/source/Lexer.def +++ b/source/Lexer.def @@ -2,17 +2,22 @@ DEFINITION MODULE Lexer; FROM FIO IMPORT File; -FROM Common IMPORT Identifier, ShortString; +FROM Common IMPORT Identifier, ShortString, TextLocation; TYPE PLexerBuffer = POINTER TO CHAR; + BufferPosition = RECORD + iterator: PLexerBuffer; + location: TextLocation + END; + PBufferPosition = POINTER TO BufferPosition; Lexer = RECORD input: File; buffer: PLexerBuffer; size: CARDINAL; length: CARDINAL; - start: PLexerBuffer; - current: PLexerBuffer + start: BufferPosition; + current: BufferPosition END; PLexer = POINTER TO Lexer; LexerKind = ( @@ -86,7 +91,9 @@ TYPE lexerKindIdentifier: identifierKind: Identifier | lexerKindInteger: integerKind: INTEGER | lexerKindString: stringKind: ShortString - END + END; + start_location: TextLocation; + end_location: TextLocation END; PLexerToken = POINTER TO LexerToken; diff --git a/source/Lexer.elna b/source/Lexer.elna index 4cf6ea7..8408604 100644 --- a/source/Lexer.elna +++ b/source/Lexer.elna @@ -213,7 +213,7 @@ begin end end; -proc compare_keyword(keyword: ARRAY OF CHAR, token_start: PLexerBuffer, token_end: PLexerBuffer) -> BOOLEAN; +proc compare_keyword(keyword: ARRAY OF CHAR, token_start: BufferPosition, token_end: PLexerBuffer) -> BOOLEAN; var result: BOOLEAN; index: CARDINAL; @@ -223,17 +223,17 @@ begin index := 0; result := true; keyword_length := Length(keyword); - continue := (index < keyword_length) & (token_start <> token_end); + continue := (index < keyword_length) & (token_start.iterator <> token_end); while continue & result do - result := (keyword[index] = token_start^) or (Lower(keyword[index]) = token_start^); - INC(token_start); + result := (keyword[index] = token_start.iterator^) or (Lower(keyword[index]) = token_start.iterator^); + INC(token_start.iterator); INC(index); - continue := (index < keyword_length) & (token_start <> token_end) + continue := (index < keyword_length) & (token_start.iterator <> token_end) end; result := result & (index = Length(keyword)); - return result & (token_start = token_end) + return result & (token_start.iterator = token_end) end; (* Reached the end of file. *) @@ -242,32 +242,37 @@ begin token^.kind := lexerKindEof end; +proc increment(position: PBufferPosition); +begin + INC(position^.iterator) +end; + (* Add the character to the token currently read and advance to the next character. *) proc transition_action_accumulate(lexer: PLexer, token: PLexerToken); begin - INC(lexer^.current) + increment(ADR(lexer^.current)) end; (* The current character is not a part of the token. Finish the token already * read. Don't advance to the next character. *) proc transition_action_finalize(lexer: PLexer, token: PLexerToken); begin - if lexer^.start^ = ':' then + if lexer^.start.iterator^ = ':' then token^.kind := lexerKindColon end; - if lexer^.start^ = '>' then + if lexer^.start.iterator^ = '>' then token^.kind := lexerKindGreaterThan end; - if lexer^.start^ = '<' then + if lexer^.start.iterator^ = '<' then token^.kind := lexerKindLessThan end; - if lexer^.start^ = '(' then + if lexer^.start.iterator^ = '(' then token^.kind := lexerKindLeftParen end; - if lexer^.start^ = '-' then + if lexer^.start.iterator^ = '-' then token^.kind := lexerKindMinus end; - if lexer^.start^ = '.' then + if lexer^.start.iterator^ = '.' then token^.kind := lexerKindDot end end; @@ -275,34 +280,39 @@ end; (* An action for tokens containing multiple characters. *) proc transition_action_composite(lexer: PLexer, token: PLexerToken); begin - if lexer^.start^ = '<' then - if lexer^.current^ = '>' then + if lexer^.start.iterator^ = '<' then + if lexer^.current.iterator^ = '>' then token^.kind := lexerKindNotEqual end; - if lexer^.current^ = '=' then + if lexer^.current.iterator^ = '=' then token^.kind := lexerKindLessEqual end end; - if (lexer^.start^ = '>') & (lexer^.current^ = '=') then + if (lexer^.start.iterator^ = '>') & (lexer^.current.iterator^ = '=') then token^.kind := lexerKindGreaterEqual end; - if (lexer^.start^ = '.') & (lexer^.current^ = '.') then + if (lexer^.start.iterator^ = '.') & (lexer^.current.iterator^ = '.') then token^.kind := lexerKindRange end; - if (lexer^.start^ = ':') & (lexer^.current^ = '=') then + if (lexer^.start.iterator^ = ':') & (lexer^.current.iterator^ = '=') then token^.kind := lexerKindAssignment end; - if (lexer^.start^ = '-') & (lexer^.current^ = '>') then + if (lexer^.start.iterator^ = '-') & (lexer^.current.iterator^ = '>') then token^.kind := lexerKindArrow end; - INC(lexer^.current) + increment(ADR(lexer^.current)) end; (* Skip a space. *) proc transition_action_skip(lexer: PLexer, token: PLexerToken); begin - INC(lexer^.current); - INC(lexer^.start) + increment(ADR(lexer^.start)); + + if ORD(lexer^.start.iterator^) = 10 then + INC(lexer^.start.location.line); + lexer^.start.location.column := 1 + end; + lexer^.current := lexer^.start end; (* Delimited string action. *) @@ -310,30 +320,30 @@ proc transition_action_delimited(lexer: PLexer, token: PLexerToken); var text_length: CARDINAL; begin - if lexer^.start^ = '(' then + if lexer^.start.iterator^ = '(' then token^.kind := lexerKindComment end; - if lexer^.start^ = '"' then - text_length := lexer^.current; - DEC(text_length, lexer^.start); + if lexer^.start.iterator^ = '"' then + text_length := lexer^.current.iterator; + DEC(text_length, lexer^.start.iterator); INC(text_length); MemZero(ADR(token^.stringKind), TSIZE(ShortString)); - MemCopy(lexer^.start, text_length, ADR(token^.stringKind)); + MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind)); token^.kind := lexerKindCharacter end; - if lexer^.start^ = "'" then - text_length := lexer^.current; - DEC(text_length, lexer^.start); + if lexer^.start.iterator^ = "'" then + text_length := lexer^.current.iterator; + DEC(text_length, lexer^.start.iterator); INC(text_length); MemZero(ADR(token^.stringKind), TSIZE(ShortString)); - MemCopy(lexer^.start, text_length, ADR(token^.stringKind)); + MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind)); token^.kind := lexerKindString end; - INC(lexer^.current) + increment(ADR(lexer^.current)) end; (* Finalize keyword or identifier. *) @@ -341,102 +351,102 @@ proc transition_action_key_id(lexer: PLexer, token: PLexerToken); begin token^.kind := lexerKindIdentifier; - token^.identifierKind[1] := lexer^.current; - DEC(token^.identifierKind[1], lexer^.start); - MemCopy(lexer^.start, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2])); + token^.identifierKind[1] := lexer^.current.iterator; + DEC(token^.identifierKind[1], lexer^.start.iterator); + MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2])); - if compare_keyword('PROGRAM', lexer^.start, lexer^.current) then + if compare_keyword('PROGRAM', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindProgram end; - if compare_keyword('IMPORT', lexer^.start, lexer^.current) then + if compare_keyword('IMPORT', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindImport end; - if compare_keyword('CONST', lexer^.start, lexer^.current) then + if compare_keyword('CONST', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindConst end; - if compare_keyword('VAR', lexer^.start, lexer^.current) then + if compare_keyword('VAR', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindVar end; - if compare_keyword('IF', lexer^.start, lexer^.current) then + if compare_keyword('IF', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindIf end; - if compare_keyword('THEN', lexer^.start, lexer^.current) then + if compare_keyword('THEN', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindThen end; - if compare_keyword('ELSIF', lexer^.start, lexer^.current) then + if compare_keyword('ELSIF', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindElsif end; - if compare_keyword('ELSE', lexer^.start, lexer^.current) then + if compare_keyword('ELSE', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindElse end; - if compare_keyword('WHILE', lexer^.start, lexer^.current) then + if compare_keyword('WHILE', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindWhile end; - if compare_keyword('DO', lexer^.start, lexer^.current) then + if compare_keyword('DO', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindDo end; - if compare_keyword('proc', lexer^.start, lexer^.current) then + if compare_keyword('proc', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindProc end; - if compare_keyword('BEGIN', lexer^.start, lexer^.current) then + if compare_keyword('BEGIN', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindBegin end; - if compare_keyword('END', lexer^.start, lexer^.current) then + if compare_keyword('END', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindEnd end; - if compare_keyword('TYPE', lexer^.start, lexer^.current) then + if compare_keyword('TYPE', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindType end; - if compare_keyword('RECORD', lexer^.start, lexer^.current) then + if compare_keyword('RECORD', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindRecord end; - if compare_keyword('UNION', lexer^.start, lexer^.current) then + if compare_keyword('UNION', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindUnion end; - if compare_keyword('NIL', lexer^.start, lexer^.current) then + if compare_keyword('NIL', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindNull end; - if compare_keyword('AND', lexer^.start, lexer^.current) then + if compare_keyword('AND', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindAnd end; - if compare_keyword('OR', lexer^.start, lexer^.current) then + if compare_keyword('OR', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindOr end; - if compare_keyword('RETURN', lexer^.start, lexer^.current) then + if compare_keyword('RETURN', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindReturn end; - if compare_keyword('DEFINITION', lexer^.start, lexer^.current) then + if compare_keyword('DEFINITION', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindDefinition end; - if compare_keyword('TO', lexer^.start, lexer^.current) then + if compare_keyword('TO', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindTo end; - if compare_keyword('CASE', lexer^.start, lexer^.current) then + if compare_keyword('CASE', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindCase end; - if compare_keyword('OF', lexer^.start, lexer^.current) then + if compare_keyword('OF', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindOf end; - if compare_keyword('FROM', lexer^.start, lexer^.current) then + if compare_keyword('FROM', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindFrom end; - if compare_keyword('MODULE', lexer^.start, lexer^.current) then + if compare_keyword('MODULE', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindModule end; - if compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current) then + if compare_keyword('IMPLEMENTATION', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindImplementation end; - if compare_keyword('POINTER', lexer^.start, lexer^.current) then + if compare_keyword('POINTER', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindPointer end; - if compare_keyword('ARRAY', lexer^.start, lexer^.current) then + if compare_keyword('ARRAY', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindArray end; - if compare_keyword('TRUE', lexer^.start, lexer^.current) then + if compare_keyword('TRUE', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindBoolean; token^.booleanKind := true end; - if compare_keyword('FALSE', lexer^.start, lexer^.current) then + if compare_keyword('FALSE', lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindBoolean; token^.booleanKind := false end @@ -446,52 +456,52 @@ end; * followed by other characters forming a composite token. *) proc transition_action_single(lexer: PLexer, token: PLexerToken); begin - if lexer^.current^ = '&' then + if lexer^.current.iterator^ = '&' then token^.kind := lexerKindAnd end; - if lexer^.current^ = ';' then + if lexer^.current.iterator^ = ';' then token^.kind := lexerKindSemicolon end; - if lexer^.current^ = ',' then + if lexer^.current.iterator^ = ',' then token^.kind := lexerKindComma end; - if lexer^.current^ = '~' then + if lexer^.current.iterator^ = '~' then token^.kind := lexerKindTilde end; - if lexer^.current^ = ')' then + if lexer^.current.iterator^ = ')' then token^.kind := lexerKindRightParen end; - if lexer^.current^ = '[' then + if lexer^.current.iterator^ = '[' then token^.kind := lexerKindLeftSquare end; - if lexer^.current^ = ']' then + if lexer^.current.iterator^ = ']' then token^.kind := lexerKindRightSquare end; - if lexer^.current^ = '^' then + if lexer^.current.iterator^ = '^' then token^.kind := lexerKindHat end; - if lexer^.current^ = '=' then + if lexer^.current.iterator^ = '=' then token^.kind := lexerKindEqual end; - if lexer^.current^ = '+' then + if lexer^.current.iterator^ = '+' then token^.kind := lexerKindPlus end; - if lexer^.current^ = '*' then + if lexer^.current.iterator^ = '*' then token^.kind := lexerKindAsterisk end; - if lexer^.current^ = '/' then + if lexer^.current.iterator^ = '/' then token^.kind := lexerKindDivision end; - if lexer^.current^ = '%' then + if lexer^.current.iterator^ = '%' then token^.kind := lexerKindRemainder end; - if lexer^.current^ = '@' then + if lexer^.current.iterator^ = '@' then token^.kind := lexerKindAt end; - if lexer^.current^ = '|' then + if lexer^.current.iterator^ = '|' then token^.kind := lexerKindPipe end; - INC(lexer^.current) + increment(ADR(lexer^.current.iterator)) end; (* Handle an integer literal. *) @@ -503,21 +513,21 @@ var begin token^.kind := lexerKindInteger; - integer_length := lexer^.current; - DEC(integer_length, lexer^.start); + integer_length := lexer^.current.iterator; + DEC(integer_length, lexer^.start.iterator); MemZero(ADR(token^.identifierKind), TSIZE(Identifier)); - MemCopy(lexer^.start, integer_length, ADR(token^.identifierKind[1])); + MemCopy(lexer^.start.iterator, integer_length, ADR(token^.identifierKind[1])); buffer := InitStringCharStar(ADR(token^.identifierKind[1])); token^.integerKind := StringToInteger(buffer, 10, found); buffer := KillString(buffer) end; -proc set_default_transition(current_state: TransitionState, DefaultAction: TransitionAction, next_state: TransitionState); +proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState); var default_transition: Transition; begin - default_transition.action := DefaultAction; + default_transition.action := default_action; default_transition.next_state := next_state; transitions[ORD(current_state) + 1][ORD(transitionClassInvalid) + 1] := default_transition; @@ -821,7 +831,7 @@ begin current_state := transitionStateStart; while current_state <> transitionStateEnd DO - index1 := ORD(lexer^.current^); + index1 := ORD(lexer^.current.iterator^); INC(index1); current_class := classification[index1]; @@ -836,6 +846,9 @@ begin end; current_state := current_transition.next_state end; + result.start_location := lexer^.start.location; + result.end_location := lexer^.current.location; + return result end; @@ -845,7 +858,9 @@ var begin if lexer^.length = 0 then lexer^.length := ReadNBytes(lexer^.input, CHUNK_SIZE, lexer^.buffer); - lexer^.current := lexer^.buffer + lexer^.current.location.column := 1; + lexer^.current.location.line := 1; + lexer^.current.iterator := lexer^.buffer end; lexer^.start := lexer^.current; diff --git a/source/Parser.def b/source/Parser.def index 02c02d0..7df325c 100644 --- a/source/Parser.def +++ b/source/Parser.def @@ -4,6 +4,11 @@ FROM Common IMPORT Identifier, PIdentifier, ShortString; FROM Lexer IMPORT PLexer; TYPE + Parser = RECORD + lexer: PLexer + END; + PParser = POINTER TO Parser; + AstLiteralKind = ( astLiteralKindInteger, astLiteralKindString, diff --git a/source/Parser.elna b/source/Parser.elna index 03e5d0c..2f48819 100644 --- a/source/Parser.elna +++ b/source/Parser.elna @@ -9,7 +9,7 @@ from Storage import ALLOCATE, REALLOCATE; from Lexer import Lexer, LexerKind, LexerToken, lexer_current, lexer_lex; (* Calls lexer_lex() but skips the comments. *) -proc transpiler_lex(lexer: PLexer) -> LexerToken; +proc parser_lex(lexer: PLexer) -> LexerToken; var result: LexerToken; begin @@ -30,7 +30,7 @@ var current_field: PAstFieldDeclaration; begin ALLOCATE(field_declarations, TSIZE(AstFieldDeclaration)); - token := transpiler_lex(lexer); + token := parser_lex(lexer); field_count := 0; while token.kind <> lexerKindEnd do @@ -41,16 +41,16 @@ begin current_field := field_declarations; INC(current_field , TSIZE(AstFieldDeclaration) * (field_count - 1)); - token := transpiler_lex(lexer); + token := parser_lex(lexer); current_field^.field_name := token.identifierKind; - token := transpiler_lex(lexer); + token := parser_lex(lexer); current_field^.field_type := parse_type_expression(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); if token.kind = lexerKindSemicolon then - token := transpiler_lex(lexer) + token := parser_lex(lexer) end end; INC(current_field, TSIZE(AstFieldDeclaration)); @@ -81,7 +81,7 @@ begin token := lexer_current(lexer); if token.kind = lexerKindPointer then - token := transpiler_lex(lexer) + token := parser_lex(lexer) end; token := lexer_current(lexer); result^.target := parse_type_expression(lexer); @@ -102,16 +102,16 @@ begin token := lexer_current(lexer); if token.kind = lexerKindArray then - token := transpiler_lex(lexer) + token := parser_lex(lexer) end; if token.kind <> lexerKindOf then - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.length := token.integerKind; - token := transpiler_lex(lexer) + token := parser_lex(lexer) end; - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.base := parse_type_expression(lexer); return result @@ -129,14 +129,14 @@ begin case_count := 1; ALLOCATE(result^.cases, TSIZE(Identifier) * 2); - token := transpiler_lex(lexer); + token := parser_lex(lexer); current_case := result^.cases; current_case^ := token.identifierKind; - token := transpiler_lex(lexer); + token := parser_lex(lexer); while token.kind = lexerKindComma do - token := transpiler_lex(lexer); + token := parser_lex(lexer); INC(case_count); INC(case_count); @@ -146,7 +146,7 @@ begin INC(current_case, TSIZE(Identifier) * (case_count - 1)); current_case^ := token.identifierKind; - token := transpiler_lex(lexer) + token := parser_lex(lexer) end; INC(current_case, TSIZE(Identifier)); MemZero(current_case, TSIZE(Identifier)); @@ -181,8 +181,8 @@ begin ALLOCATE(result^.parameters, 1); - token := transpiler_lex(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); + token := parser_lex(lexer); while token.kind <> lexerKindRightParen do INC(parameter_count); @@ -194,9 +194,9 @@ begin current_parameter^ := parse_type_expression(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); if token.kind = lexerKindComma then - token := transpiler_lex(lexer) + token := parser_lex(lexer) end end; current_parameter := result^.parameters; @@ -245,11 +245,11 @@ begin NEW(result); result^.identifier := token.identifierKind; - token := transpiler_lex(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); + token := parser_lex(lexer); result^.type_expression := parse_type_expression(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); return result end; @@ -268,7 +268,7 @@ begin declaration_count := 0; if token.kind = lexerKindType then - token := transpiler_lex(lexer); + token := parser_lex(lexer); while token.kind = lexerKindIdentifier do INC(declaration_count); @@ -278,7 +278,7 @@ begin INC(current_declaration, TSIZE(PAstTypedDeclaration) * (declaration_count - 1)); current_declaration^ := parse_type_declaration(lexer); - token := transpiler_lex(lexer) + token := parser_lex(lexer) end end; if declaration_count <> 0 then @@ -299,12 +299,12 @@ begin token := lexer_current(lexer); result^.variable_name := token.identifierKind; - token := transpiler_lex(lexer); + token := parser_lex(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.variable_type := parse_type_expression(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); return result end; @@ -322,7 +322,7 @@ begin declaration_count := 0; if token.kind = lexerKindVar then - token := transpiler_lex(lexer); + token := parser_lex(lexer); while token.kind = lexerKindIdentifier do INC(declaration_count); @@ -332,7 +332,7 @@ begin INC(current_declaration, TSIZE(PAstVariableDeclaration) * (declaration_count - 1)); current_declaration^ := parse_variable_declaration(lexer); - token := transpiler_lex(lexer) + token := parser_lex(lexer) end end; if declaration_count <> 0 then @@ -353,12 +353,12 @@ begin token := lexer_current(lexer); result^.constant_name := token.identifierKind; - token := transpiler_lex(lexer); + token := parser_lex(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.constant_value := token.integerKind; - token := transpiler_lex(lexer); + token := parser_lex(lexer); return result end; @@ -377,7 +377,7 @@ begin declaration_count := 0; if token.kind = lexerKindConst then - token := transpiler_lex(lexer); + token := parser_lex(lexer); while token.kind = lexerKindIdentifier do INC(declaration_count); @@ -387,7 +387,7 @@ begin INC(current_declaration, TSIZE(PAstConstantDeclaration) * (declaration_count - 1)); current_declaration^ := parse_constant_declaration(lexer); - token := transpiler_lex(lexer) + token := parser_lex(lexer) end end; if declaration_count <> 0 then @@ -408,20 +408,20 @@ begin NEW(result); symbol_count := 1; - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.package := token.identifierKind; - token := transpiler_lex(lexer); + token := parser_lex(lexer); ALLOCATE(result^.symbols, TSIZE(Identifier) * 2); current_symbol := result^.symbols; - token := transpiler_lex(lexer); + token := parser_lex(lexer); current_symbol^ := token.identifierKind; - token := transpiler_lex(lexer); + token := parser_lex(lexer); while token.kind <> lexerKindSemicolon do - token := transpiler_lex(lexer); + token := parser_lex(lexer); INC(symbol_count); REALLOCATE(result^.symbols, TSIZE(Identifier) * (symbol_count + 1)); @@ -429,12 +429,12 @@ begin INC(current_symbol, TSIZE(Identifier) * (symbol_count - 1)); current_symbol^ := token.identifierKind; - token := transpiler_lex(lexer) + token := parser_lex(lexer) end; INC(current_symbol, TSIZE(Identifier)); MemZero(current_symbol, TSIZE(Identifier)); - token := transpiler_lex(lexer); + token := parser_lex(lexer); return result end; @@ -501,7 +501,7 @@ begin literal^.boolean := token.booleanKind end; if literal <> nil then - token := transpiler_lex(lexer) + token := parser_lex(lexer) end; return literal @@ -526,7 +526,7 @@ begin end; if (result = nil) & (next_token.kind = lexerKindMinus) then NEW(result); - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); result^.kind := astExpressionKindUnary; result^.unary_operator := astUnaryOperatorMinus; @@ -534,17 +534,17 @@ begin end; if (result = nil) & (next_token.kind = lexerKindTilde) then NEW(result); - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); result^.kind := astExpressionKindUnary; result^.unary_operator := astUnaryOperatorNot; result^.unary_operand := parse_factor(lexer) end; if (result = nil) & (next_token.kind = lexerKindLeftParen) then - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); result := parse_expression(lexer); if result <> nil then - next_token := transpiler_lex(lexer) + next_token := parser_lex(lexer) end end; if (result = nil) & (next_token.kind = lexerKindIdentifier) then @@ -553,7 +553,7 @@ begin result^.kind := astExpressionKindIdentifier; result^.identifier := next_token.identifierKind; - next_token := transpiler_lex(lexer) + next_token := parser_lex(lexer) end; return result @@ -581,34 +581,34 @@ begin designator^.kind := astExpressionKindDereference; designator^.reference := inner_expression; - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); handled := true end; if ~handled & (next_token.kind = lexerKindLeftSquare) then NEW(designator); - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); designator^.kind := astExpressionKindArrayAccess; designator^.array := inner_expression; designator^.index := parse_expression(lexer); - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); handled := true end; if ~handled & (next_token.kind = lexerKindDot) then NEW(designator); - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); designator^.kind := astExpressionKindFieldAccess; designator^.aggregate := inner_expression; designator^.field := next_token.identifierKind; - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); handled := true end; if ~handled & (next_token.kind = lexerKindLeftParen) then NEW(designator); - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); designator^.kind := astExpressionKindCall; designator^.callable := inner_expression; @@ -623,7 +623,7 @@ begin next_token := lexer_current(lexer); while next_token.kind = lexerKindComma do - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); designator^.argument_count := designator^.argument_count + 1; REALLOCATE(designator^.arguments, TSIZE(PAstExpression) * designator^.argument_count); @@ -635,7 +635,7 @@ begin end end; - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); handled := true end end; @@ -649,7 +649,7 @@ var result: PAstExpression; right: PAstExpression; begin - next_token := transpiler_lex(lexer); + next_token := parser_lex(lexer); right := parse_designator(lexer); result := nil; @@ -725,7 +725,7 @@ begin NEW(result); result^.kind := astStatementKindReturn; - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.returned := parse_expression(lexer); return result @@ -740,7 +740,7 @@ begin result^.kind := astStatementKindAssignment; result^.assignee := assignee; - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.assignment := parse_expression(lexer); return result @@ -792,7 +792,7 @@ var designator: PAstExpression; begin statement := nil; - token := transpiler_lex(lexer); + token := parser_lex(lexer); if token.kind = lexerKindIf then statement := parse_if_statement(lexer) @@ -825,11 +825,11 @@ begin NEW(result); result^.kind := astStatementKindIf; - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.if_condition := parse_expression(lexer); result^.if_branch := parse_compound_statement(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); return result end; @@ -841,11 +841,11 @@ begin NEW(result); result^.kind := astStatementKindWhile; - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.while_condition := parse_expression(lexer); result^.while_body := parse_compound_statement(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); return result end; @@ -874,15 +874,15 @@ var begin NEW(declaration); - token := transpiler_lex(lexer); + token := parser_lex(lexer); declaration^.name := token.identifierKind; - token := transpiler_lex(lexer); + token := parser_lex(lexer); declaration^.parameters := nil; declaration^.parameter_count := 0; - token := transpiler_lex(lexer); + token := parser_lex(lexer); while token.kind <> lexerKindRightParen do parameter_index := declaration^.parameter_count; INC(declaration^.parameter_count); @@ -893,26 +893,26 @@ begin current_parameter^.identifier := token.identifierKind; - token := transpiler_lex(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); + token := parser_lex(lexer); current_parameter^.type_expression := parse_type_expression(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); if token.kind = lexerKindComma then - token := transpiler_lex(lexer) + token := parser_lex(lexer) end end; - token := transpiler_lex(lexer); + token := parser_lex(lexer); declaration^.return_type := nil; (* Check for the return type and write it. *) if token.kind = lexerKindArrow then - token := transpiler_lex(lexer); + token := parser_lex(lexer); declaration^.return_type := parse_type_expression(lexer); - token := transpiler_lex(lexer) + token := parser_lex(lexer) end; - token := transpiler_lex(lexer); + token := parser_lex(lexer); return declaration end; @@ -928,8 +928,8 @@ begin declaration^.variables := parse_variable_part(lexer); declaration^.statements := parse_statement_part(lexer); - token := transpiler_lex(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); + token := parser_lex(lexer); return declaration end; @@ -971,16 +971,16 @@ var result: PAstModule; begin NEW(result); - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.main := true; if token.kind = lexerKindModule then result^.main := false end; - token := transpiler_lex(lexer); + token := parser_lex(lexer); (* Write the module body. *) - token := transpiler_lex(lexer); + token := parser_lex(lexer); result^.imports := parse_import_part(lexer); result^.constants := parse_constant_part(lexer); @@ -990,8 +990,8 @@ begin result^.procedures := parse_procedure_part(lexer); result^.statements := parse_statement_part(lexer); - token := transpiler_lex(lexer); - token := transpiler_lex(lexer); + token := parser_lex(lexer); + token := parser_lex(lexer); return result end; diff --git a/source/Transpiler.def b/source/Transpiler.def index 534b206..98956fc 100644 --- a/source/Transpiler.def +++ b/source/Transpiler.def @@ -9,7 +9,8 @@ FROM Parser IMPORT PAstModule; TYPE TranspilerContext = RECORD input_name: ShortString; - output: File + output: File; + indentation: CARDINAL END; PTranspilerContext = POINTER TO TranspilerContext; diff --git a/source/Transpiler.elna b/source/Transpiler.elna index ac3e987..3b3d2d7 100644 --- a/source/Transpiler.elna +++ b/source/Transpiler.elna @@ -1,33 +1,28 @@ module; from FIO import WriteNBytes, WriteLine, WriteChar, WriteString; -from SYSTEM import ADR, ADDRESS, TSIZE; +from SYSTEM import ADR, TSIZE; from NumberIO import IntToStr; -from Storage import ALLOCATE, REALLOCATE; -from MemUtils import MemCopy, MemZero; from Common import Identifier, PIdentifier, ShortString; -from Lexer import Lexer, LexerToken, lexer_current, lexer_lex, LexerKind; from Parser import AstTypeExpressionKind, AstExpressionKind, AstLiteralKind, AstUnaryOperator, AstBinaryOperator, - AstModule, PAstModule, AstExpression, PPAstExpression, PAstExpression, PAstLiteral, PPAstProcedureDeclaration, + PAstModule, PPAstExpression, PAstExpression, PAstLiteral, PPAstProcedureDeclaration, PAstConstantDeclaration, PPAstConstantDeclaration, PPAstStatement, PAstStatement, AstStatementKind, AstTypedDeclaration, PAstTypedDeclaration, PPAstTypedDeclaration, AstCompoundStatement, PAstProcedureDeclaration, PAstVariableDeclaration, PPAstVariableDeclaration, PAstImportStatement, PPAstImportStatement, PAstTypeExpression, PPAstTypeExpression, AstFieldDeclaration, PAstFieldDeclaration; -(* Calls lexer_lex() but skips the comments. *) -proc transpiler_lex(lexer: PLexer) -> LexerToken; +proc indent(context: PTranspilerContext); var - result: LexerToken; + count: CARDINAL; begin - result := lexer_lex(lexer); + count := 0; - while result.kind = lexerKindComment do - result := lexer_lex(lexer) - end; - - return result + while count < context^.indentation do + WriteString(context^.output, ' '); + INC(count) + end end; (* Write a semicolon followed by a newline. *) @@ -37,20 +32,8 @@ begin WriteLine(output) end; -proc write_current(lexer: PLexer, output: File); -var - written_bytes: CARDINAL; - count: CARDINAL; -begin - count := lexer^.current; - DEC(count, lexer^.start); - - written_bytes := WriteNBytes(output, count, lexer^.start) -end; - proc transpile_import_statement(context: PTranspilerContext, import_statement: PAstImportStatement); var - token: LexerToken; written_bytes: CARDINAL; current_symbol: PIdentifier; begin @@ -98,7 +81,7 @@ begin write_semicolon(context^.output) end; -proc transpile_constant_part(context: PTranspilerContext, declarations: PPAstConstantDeclaration); +proc transpile_constant_part(context: PTranspilerContext, declarations: PPAstConstantDeclaration, extra_newline: BOOLEAN); var current_declaration: PPAstConstantDeclaration; begin @@ -112,13 +95,13 @@ begin INC(current_declaration, TSIZE(PAstConstantDeclaration)) end; - WriteLine(context^.output) + if extra_newline then + WriteLine(context^.output) + end end end; proc transpile_module(context: PTranspilerContext, result: PAstModule); -var - token: LexerToken; begin if result^.main = false then WriteString(context^.output, 'IMPLEMENTATION ') @@ -134,9 +117,9 @@ begin (* Write the module body. *) transpile_import_part(context, result^.imports); - transpile_constant_part(context, result^.constants); + transpile_constant_part(context, result^.constants, true); transpile_type_part(context, result^.types); - transpile_variable_part(context, result^.variables); + transpile_variable_part(context, result^.variables, true); transpile_procedure_part(context, result^.procedures); transpile_statement_part(context, result^.statements); @@ -179,8 +162,6 @@ begin end; proc transpile_pointer_type(context: PTranspilerContext, type_expression: PAstTypeExpression); -var - token: LexerToken; begin WriteString(context^.output, 'POINTER TO '); @@ -325,7 +306,7 @@ begin write_semicolon(context^.output) end; -proc transpile_variable_part(context: PTranspilerContext, declarations: PPAstVariableDeclaration); +proc transpile_variable_part(context: PTranspilerContext, declarations: PPAstVariableDeclaration, extra_newline: BOOLEAN); var current_declaration: PPAstVariableDeclaration; begin @@ -339,13 +320,14 @@ begin INC(current_declaration, TSIZE(PAstVariableDeclaration)) end; - WriteLine(context^.output) + if extra_newline then + WriteLine(context^.output) + end end end; proc transpile_procedure_heading(context: PTranspilerContext, declaration: PAstProcedureDeclaration); var - token: LexerToken; written_bytes: CARDINAL; parameter_index: CARDINAL; current_parameter: PAstTypedDeclaration; @@ -511,33 +493,33 @@ begin end; proc transpile_if_statement(context: PTranspilerContext, statement: PAstStatement); -var - token: LexerToken; begin - if statement <> nil then - WriteString(context^.output, ' IF '); - transpile_expression(context, statement^.if_condition); + WriteString(context^.output, 'IF '); + transpile_expression(context, statement^.if_condition); - WriteString(context^.output, ' THEN'); - WriteLine(context^.output); + WriteString(context^.output, ' THEN'); + WriteLine(context^.output); + INC(context^.indentation); - transpile_compound_statement(context, statement^.if_branch); - WriteString(context^.output, ' END') - end + transpile_compound_statement(context, statement^.if_branch); + DEC(context^.indentation); + indent(context); + WriteString(context^.output, 'END') end; proc transpile_while_statement(context: PTranspilerContext, statement: PAstStatement); -var - token: LexerToken; begin - WriteString(context^.output, ' WHILE '); + WriteString(context^.output, 'WHILE '); transpile_expression(context, statement^.while_condition); WriteString(context^.output, ' DO'); WriteLine(context^.output); + INC(context^.indentation); transpile_compound_statement(context, statement^.while_body); - WriteString(context^.output, ' END') + DEC(context^.indentation); + indent(context); + WriteString(context^.output, 'END') end; proc transpile_assignment_statement(context: PTranspilerContext, statement: PAstStatement); @@ -549,7 +531,7 @@ end; proc transpile_return_statement(context: PTranspilerContext, statement: PAstStatement); begin - WriteString(context^.output, ' RETURN '); + WriteString(context^.output, 'RETURN '); transpile_expression(context, statement^.returned) end; @@ -577,6 +559,8 @@ end; proc transpile_statement(context: PTranspilerContext, statement: PAstStatement); begin + indent(context); + if statement^.kind = astStatementKindIf then transpile_if_statement(context, statement) end; @@ -599,7 +583,10 @@ begin if compound.count > 0 then WriteString(context^.output, 'BEGIN'); WriteLine(context^.output); - transpile_compound_statement(context, compound) + + INC(context^.indentation); + transpile_compound_statement(context, compound); + DEC(context^.indentation) end end; @@ -609,8 +596,8 @@ var begin transpile_procedure_heading(context, declaration); - transpile_constant_part(context, declaration^.constants); - transpile_variable_part(context, declaration^.variables); + transpile_constant_part(context, declaration^.constants, false); + transpile_variable_part(context, declaration^.variables, false); transpile_statement_part(context, declaration^.statements); WriteString(context^.output, 'END '); @@ -662,6 +649,7 @@ var begin context.input_name := input_name; context.output := output; + context.indentation := 0; transpile_module(ADR(context), ast_module) end;