From 5875fb28dbfd2d9abd768ef5e185cf1cb1ad2ab1 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Fri, 30 May 2025 19:51:18 +0200 Subject: [PATCH] Handle ASCII codes > 128 in the tokenizer --- source/Lexer.mod | 34 ++++--- source/Transpiler.mod | 214 +++++++++++++++++++++--------------------- 2 files changed, 128 insertions(+), 120 deletions(-) diff --git a/source/Lexer.mod b/source/Lexer.mod index b4f54ca..6d8003a 100644 --- a/source/Lexer.mod +++ b/source/Lexer.mod @@ -72,6 +72,8 @@ VAR transitions: ARRAY[1..16] OF TransitionClasses; PROCEDURE initialize_classification(); +VAR + i: CARDINAL; BEGIN classification[1] := transitionClassEof; (* NUL *) classification[2] := transitionClassInvalid; (* SOH *) @@ -200,23 +202,29 @@ BEGIN classification[125] := transitionClassSingle; (* | *) classification[126] := transitionClassOther; (* } *) classification[127] := transitionClassSingle; (* ~ *) - classification[128] := transitionClassInvalid (* DEL *) + classification[128] := transitionClassInvalid; (* DEL *) + + i := 129; + WHILE i <= 256 DO + classification[i] := transitionClassOther; + i := i + 1 + END END initialize_classification; PROCEDURE compare_keyword(Keyword: ARRAY OF CHAR; TokenStart: PLexerBuffer; TokenEnd: PLexerBuffer): BOOLEAN; VAR - Result: BOOLEAN; + result: BOOLEAN; Index: CARDINAL; BEGIN Index := 0; - Result := TRUE; + result := TRUE; - WHILE (Index < Length(Keyword)) AND (TokenStart <> TokenEnd) AND Result DO - Result := (Keyword[Index] = TokenStart^) OR (Lower(Keyword[Index]) = TokenStart^); + WHILE (Index < Length(Keyword)) AND (TokenStart <> TokenEnd) AND result DO + result := (Keyword[Index] = TokenStart^) OR (Lower(Keyword[Index]) = TokenStart^); INC(TokenStart); INC(Index) END; - Result := (Index = Length(Keyword)) AND (TokenStart = TokenEnd) AND Result; - RETURN Result + result := (Index = Length(Keyword)) AND (TokenStart = TokenEnd) AND result; + RETURN result END compare_keyword; (* Reached the end of file. *) PROCEDURE transition_action_eof(lexer: PLexer; AToken: PLexerToken); @@ -747,7 +755,7 @@ VAR CurrentClass: TransitionClass; CurrentState: TransitionState; CurrentTransition: Transition; - Result: LexerToken; + result: LexerToken; BEGIN lexer^.Current := lexer^.Start; CurrentState := transitionStateStart; @@ -757,15 +765,15 @@ BEGIN CurrentTransition := transitions[ORD(CurrentState) + 1][ORD(CurrentClass) + 1]; IF CurrentTransition.Action <> NIL THEN - CurrentTransition.Action(lexer, ADR(Result)) + CurrentTransition.Action(lexer, ADR(result)) END; CurrentState := CurrentTransition.NextState END; - RETURN Result + RETURN result END lexer_current; PROCEDURE lexer_lex(lexer: PLexer): LexerToken; VAR - Result: LexerToken; + result: LexerToken; BEGIN IF lexer^.Length = 0 THEN lexer^.Length := ReadNBytes(lexer^.Input, CHUNK_SIZE, lexer^.Buffer); @@ -773,8 +781,8 @@ BEGIN END; lexer^.Start := lexer^.Current; - Result := lexer_current(lexer); - RETURN Result + result := lexer_current(lexer); + RETURN result END lexer_lex; PROCEDURE lexer_destroy(lexer: PLexer); BEGIN diff --git a/source/Transpiler.mod b/source/Transpiler.mod index 97a806c..55d8926 100644 --- a/source/Transpiler.mod +++ b/source/Transpiler.mod @@ -71,69 +71,69 @@ BEGIN END transpile_import_part; PROCEDURE transpile_constant(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; BEGIN WriteString(' '); - Token := lexer_current(lexer); + token := lexer_current(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); WriteString(' = '); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); write_semicolon() END transpile_constant; PROCEDURE transpile_constant_part(context: PTranspilerContext; lexer: PLexer): BOOLEAN; VAR - Token: LexerToken; + token: LexerToken; result: BOOLEAN; BEGIN - Token := lexer_current(lexer); - result := Token.Kind = lexerKindConst; + token := lexer_current(lexer); + result := token.Kind = lexerKindConst; IF result THEN WriteString('CONST'); WriteLn(); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); - WHILE Token.Kind = lexerKindIdentifier DO + WHILE token.Kind = lexerKindIdentifier DO transpile_constant(context, lexer); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) END END; RETURN result END transpile_constant_part; PROCEDURE transpile_module(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; BEGIN - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); - IF Token.Kind = lexerKindDefinition THEN + IF token.Kind = lexerKindDefinition THEN WriteString('DEFINITION '); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) END; - IF Token.Kind = lexerKindImplementation THEN + IF token.Kind = lexerKindImplementation THEN WriteString('IMPLEMENTATION '); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) END; WriteString('MODULE '); (* Write the module name and end the line with a semicolon and newline. *) - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); write_semicolon(); WriteLn(); (* Write the module body. *) - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); transpile_import_part(context, lexer); IF transpile_constant_part(context, lexer) THEN WriteLn() @@ -147,32 +147,32 @@ BEGIN WriteString('END '); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); Write('.'); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); WriteLn() END transpile_module; PROCEDURE transpile_type_fields(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; BEGIN - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); - WHILE Token.Kind <> lexerKindEnd DO + WHILE token.Kind <> lexerKindEnd DO WriteString(' '); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); WriteString(': '); transpile_type_expression(context, lexer); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); - IF Token.Kind = lexerKindSemicolon THEN - Token := transpiler_lex(lexer); + IF token.Kind = lexerKindSemicolon THEN + token := transpiler_lex(lexer); Write(';') END; WriteLn() @@ -180,7 +180,7 @@ BEGIN END transpile_type_fields; PROCEDURE transpile_record_type(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; BEGIN WriteString('RECORD'); WriteLn(); @@ -189,32 +189,32 @@ BEGIN END transpile_record_type; PROCEDURE transpile_pointer_type(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; BEGIN - Token := lexer_current(lexer); + token := lexer_current(lexer); WriteString('POINTER TO '); - IF Token.Kind = lexerKindPointer THEN - Token := transpiler_lex(lexer) + IF token.Kind = lexerKindPointer THEN + token := transpiler_lex(lexer) END; transpile_type_expression(context, lexer) END transpile_pointer_type; PROCEDURE transpile_array_type(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; BEGIN WriteString('ARRAY'); - Token := lexer_current(lexer); + token := lexer_current(lexer); - IF Token.Kind = lexerKindArray THEN - Token := transpiler_lex(lexer) + IF token.Kind = lexerKindArray THEN + token := transpiler_lex(lexer) END; - IF Token.Kind <> lexerKindOf THEN + IF token.Kind <> lexerKindOf THEN WriteString('[1..'); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); Write(']') END; WriteString(' OF '); @@ -222,50 +222,50 @@ BEGIN END transpile_array_type; PROCEDURE transpile_enumeration_type(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; BEGIN WriteString('('); WriteLn(); WriteString(' '); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); - WHILE Token.Kind = lexerKindComma DO + WHILE token.Kind = lexerKindComma DO Write(','); WriteLn(); WriteString(' '); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) END; WriteLn(); WriteString(' )') END transpile_enumeration_type; PROCEDURE transpile_union_type(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; END transpile_union_type; PROCEDURE transpile_procedure_type(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; BEGIN - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); WriteString('PROCEDURE('); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); - WHILE Token.Kind <> lexerKindRightParen DO + WHILE token.Kind <> lexerKindRightParen DO written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); - IF Token.Kind = lexerKindComma THEN - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); + IF token.Kind = lexerKindComma THEN + token := transpiler_lex(lexer); WriteString(', ') END END; @@ -273,42 +273,42 @@ BEGIN END transpile_procedure_type; PROCEDURE transpile_type_expression(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; BEGIN - Token := transpiler_lex(lexer); - IF Token.Kind = lexerKindRecord THEN + token := transpiler_lex(lexer); + IF token.Kind = lexerKindRecord THEN transpile_record_type(context, lexer) END; - IF Token.Kind = lexerKindLeftParen THEN + IF token.Kind = lexerKindLeftParen THEN transpile_enumeration_type(context, lexer) END; - IF (Token.Kind = lexerKindArray) OR (Token.Kind = lexerKindLeftSquare) THEN + IF (token.Kind = lexerKindArray) OR (token.Kind = lexerKindLeftSquare) THEN transpile_array_type(context, lexer) END; - IF Token.Kind = lexerKindHat THEN + IF token.Kind = lexerKindHat THEN transpile_pointer_type(context, lexer) END; - IF Token.Kind = lexerKindProc THEN + IF token.Kind = lexerKindProc THEN transpile_procedure_type(context, lexer) END; - IF Token.Kind = lexerKindIdentifier THEN + IF token.Kind = lexerKindIdentifier THEN written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start) END END transpile_type_expression; PROCEDURE transpile_type_declaration(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; BEGIN WriteString(' '); - Token := lexer_current(lexer); + token := lexer_current(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); WriteString(' = '); transpile_type_expression(context, lexer); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); write_semicolon(); END transpile_type_declaration; PROCEDURE transpile_type_part(context: PTranspilerContext; lexer: PLexer); @@ -331,35 +331,35 @@ BEGIN END transpile_type_part; PROCEDURE transpile_variable_declaration(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; BEGIN WriteString(' '); - Token := lexer_current(lexer); + token := lexer_current(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); WriteString(': '); transpile_type_expression(context, lexer); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); write_semicolon() END transpile_variable_declaration; PROCEDURE transpile_variable_part(context: PTranspilerContext; lexer: PLexer): BOOLEAN; VAR - Token: LexerToken; + token: LexerToken; result: BOOLEAN; BEGIN - Token := lexer_current(lexer); - result := Token.Kind = lexerKindVar; + token := lexer_current(lexer); + result := token.Kind = lexerKindVar; IF result THEN WriteString('VAR'); WriteLn(); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); - WHILE Token.Kind = lexerKindIdentifier DO + WHILE token.Kind = lexerKindIdentifier DO transpile_variable_declaration(context, lexer); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) END END; RETURN result @@ -408,14 +408,14 @@ BEGIN RETURN result END transpile_procedure_heading; -PROCEDURE transpile_expression(context: PTranspilerContext; lexer: PLexer; TrailingToken: LexerKind); +PROCEDURE transpile_expression(context: PTranspilerContext; lexer: PLexer; trailing_token: LexerKind); VAR token: LexerToken; written_bytes: CARDINAL; BEGIN token := transpiler_lex(lexer); - WHILE (token.Kind <> TrailingToken) AND (token.Kind <> lexerKindEnd) DO + WHILE (token.Kind <> trailing_token) AND (token.Kind <> lexerKindEnd) DO written_bytes := 0; IF token.Kind = lexerKindNull THEN WriteString('NIL '); @@ -456,7 +456,7 @@ BEGIN END transpile_if_statement; PROCEDURE transpile_while_statement(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; BEGIN WriteString(' WHILE '); @@ -466,7 +466,7 @@ BEGIN WriteLn(); transpile_statements(context, lexer); WriteString(' END'); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) END transpile_while_statement; PROCEDURE transpile_assignment_statement(context: PTranspilerContext; lexer: PLexer); BEGIN @@ -475,26 +475,26 @@ BEGIN END transpile_assignment_statement; PROCEDURE transpile_call_statement(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; BEGIN WriteString('('); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); - WHILE (Token.Kind <> lexerKindSemicolon) AND (Token.Kind <> lexerKindEnd) DO + WHILE (token.Kind <> lexerKindSemicolon) AND (token.Kind <> lexerKindEnd) DO written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) END END transpile_call_statement; PROCEDURE transpile_return_statement(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; BEGIN WriteString(' RETURN '); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) END transpile_return_statement; PROCEDURE transpile_statement(context: PTranspilerContext; lexer: PLexer); VAR @@ -562,15 +562,15 @@ BEGIN END transpile_statement; PROCEDURE transpile_statements(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; BEGIN - Token := lexer_current(lexer); + token := lexer_current(lexer); - WHILE Token.Kind <> lexerKindEnd DO + WHILE token.Kind <> lexerKindEnd DO transpile_statement(context, lexer); - Token := lexer_current(lexer); + token := lexer_current(lexer); - IF Token.Kind = lexerKindSemicolon THEN + IF token.Kind = lexerKindSemicolon THEN Write(';') END; WriteLn() @@ -578,10 +578,10 @@ BEGIN END transpile_statements; PROCEDURE transpile_statement_part(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; BEGIN - Token := lexer_current(lexer); - IF Token.Kind = lexerKindBegin THEN + token := lexer_current(lexer); + IF token.Kind = lexerKindBegin THEN WriteString('BEGIN'); WriteLn(); transpile_statements(context, lexer) @@ -589,37 +589,37 @@ BEGIN END transpile_statement_part; PROCEDURE transpile_procedure_declaration(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; seen_part: BOOLEAN; written_bytes: CARDINAL; BEGIN - Token := transpile_procedure_heading(context, lexer); + token := transpile_procedure_heading(context, lexer); seen_part := transpile_constant_part(context, lexer); seen_part := transpile_variable_part(context, lexer); transpile_statement_part(context, lexer); WriteString('END '); - written_bytes := WriteNBytes(StdOut, ORD(Token.identifierKind[1]), ADR(Token.identifierKind[2])); + written_bytes := WriteNBytes(StdOut, ORD(token.identifierKind[1]), ADR(token.identifierKind[2])); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); write_semicolon(); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) END transpile_procedure_declaration; PROCEDURE transpile_procedure_part(context: PTranspilerContext; lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; BEGIN - Token := lexer_current(lexer); + token := lexer_current(lexer); - WHILE Token.Kind = lexerKindProc DO + WHILE token.Kind = lexerKindProc DO transpile_procedure_declaration(context, lexer); - Token := lexer_current(lexer); + token := lexer_current(lexer); WriteLn() END END transpile_procedure_part; PROCEDURE transpile(lexer: PLexer); VAR - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; Context: TranspilerContext; BEGIN