From 8ad1259ee8b6cf97e03530b7181666754ceffc32 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Fri, 30 May 2025 19:51:40 +0200 Subject: [PATCH] Handle ASCII codes > 128 in the tokenizer --- source/Lexer.elna | 34 ++++--- source/Transpiler.elna | 214 ++++++++++++++++++++--------------------- 2 files changed, 128 insertions(+), 120 deletions(-) diff --git a/source/Lexer.elna b/source/Lexer.elna index 264f895..4ff2225 100644 --- a/source/Lexer.elna +++ b/source/Lexer.elna @@ -72,6 +72,8 @@ var transitions: [16]TransitionClasses; proc initialize_classification(); +var + i: CARDINAL; begin classification[1] := transitionClassEof; (* NUL *) classification[2] := transitionClassInvalid; (* SOH *) @@ -200,24 +202,30 @@ begin classification[125] := transitionClassSingle; (* | *) classification[126] := transitionClassOther; (* } *) classification[127] := transitionClassSingle; (* ~ *) - classification[128] := transitionClassInvalid (* DEL *) + classification[128] := transitionClassInvalid; (* DEL *) + + i := 129; + while i <= 256 do + classification[i] := transitionClassOther; + i := i + 1 + end end; proc compare_keyword(Keyword: ARRAY OF CHAR, TokenStart: PLexerBuffer, TokenEnd: PLexerBuffer): BOOLEAN; var - Result: BOOLEAN; + result: BOOLEAN; Index: CARDINAL; begin Index := 0; - Result := TRUE; + result := TRUE; - while (Index < Length(Keyword)) & (TokenStart <> TokenEnd) & Result DO - Result := (Keyword[Index] = TokenStart^) or (Lower(Keyword[Index]) = TokenStart^); + while (Index < Length(Keyword)) & (TokenStart <> TokenEnd) & result DO + result := (Keyword[Index] = TokenStart^) or (Lower(Keyword[Index]) = TokenStart^); INC(TokenStart); INC(Index) end; - Result := (Index = Length(Keyword)) & (TokenStart = TokenEnd) & Result; - return Result + result := (Index = Length(Keyword)) & (TokenStart = TokenEnd) & result; + return result end; (* Reached the end of file. *) @@ -761,7 +769,7 @@ var CurrentClass: TransitionClass; CurrentState: TransitionState; CurrentTransition: Transition; - Result: LexerToken; + result: LexerToken; begin lexer^.Current := lexer^.Start; CurrentState := transitionStateStart; @@ -771,16 +779,16 @@ begin CurrentTransition := transitions[ORD(CurrentState) + 1][ORD(CurrentClass) + 1]; if CurrentTransition.Action <> nil then - CurrentTransition.Action(lexer, ADR(Result)) + CurrentTransition.Action(lexer, ADR(result)) end; CurrentState := CurrentTransition.NextState end; - return Result + return result end; proc lexer_lex(lexer: PLexer): LexerToken; var - Result: LexerToken; + result: LexerToken; begin if lexer^.Length = 0 then lexer^.Length := ReadNBytes(lexer^.Input, CHUNK_SIZE, lexer^.Buffer); @@ -788,8 +796,8 @@ begin end; lexer^.Start := lexer^.Current; - Result := lexer_current(lexer); - return Result + result := lexer_current(lexer); + return result end; proc lexer_destroy(lexer: PLexer); diff --git a/source/Transpiler.elna b/source/Transpiler.elna index bdb6c5a..4808203 100644 --- a/source/Transpiler.elna +++ b/source/Transpiler.elna @@ -75,39 +75,39 @@ end; proc transpile_constant(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; begin WriteString(' '); - Token := lexer_current(lexer); + token := lexer_current(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); WriteString(' = '); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); write_semicolon() end; proc transpile_constant_part(context: PTranspilerContext, lexer: PLexer): BOOLEAN; var - Token: LexerToken; + token: LexerToken; result: BOOLEAN; begin - Token := lexer_current(lexer); - result := Token.Kind = lexerKindConst; + token := lexer_current(lexer); + result := token.Kind = lexerKindConst; if result then WriteString('CONST'); WriteLn(); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); - while Token.Kind = lexerKindIdentifier do + while token.Kind = lexerKindIdentifier do transpile_constant(context, lexer); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) end end; return result @@ -115,31 +115,31 @@ end; proc transpile_module(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; begin - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); - if Token.Kind = lexerKindDefinition then + if token.Kind = lexerKindDefinition then WriteString('DEFINITION '); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) end; - if Token.Kind = lexerKindImplementation then + if token.Kind = lexerKindImplementation then WriteString('IMPLEMENTATION '); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) end; WriteString('MODULE '); (* Write the module name and end the line with a semicolon and newline. *) - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); write_semicolon(); WriteLn(); (* Write the module body. *) - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); transpile_import_part(context, lexer); if transpile_constant_part(context, lexer) then WriteLn() @@ -153,33 +153,33 @@ begin WriteString('END '); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); Write('.'); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); WriteLn() end; proc transpile_type_fields(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; begin - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); - while Token.Kind <> lexerKindEnd do + while token.Kind <> lexerKindEnd do WriteString(' '); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); WriteString(': '); transpile_type_expression(context, lexer); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); - if Token.Kind = lexerKindSemicolon then - Token := transpiler_lex(lexer); + if token.Kind = lexerKindSemicolon then + token := transpiler_lex(lexer); Write(';') end; WriteLn() @@ -188,7 +188,7 @@ end; proc transpile_record_type(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; begin WriteString('RECORD'); WriteLn(); @@ -198,33 +198,33 @@ end; proc transpile_pointer_type(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; begin - Token := lexer_current(lexer); + token := lexer_current(lexer); WriteString('POINTER TO '); - if Token.Kind = lexerKindPointer then - Token := transpiler_lex(lexer) + if token.Kind = lexerKindPointer then + token := transpiler_lex(lexer) end; transpile_type_expression(context, lexer) end; proc transpile_array_type(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; begin WriteString('ARRAY'); - Token := lexer_current(lexer); + token := lexer_current(lexer); - if Token.Kind = lexerKindArray then - Token := transpiler_lex(lexer) + if token.Kind = lexerKindArray then + token := transpiler_lex(lexer) end; - if Token.Kind <> lexerKindOf then + if token.Kind <> lexerKindOf then WriteString('[1..'); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); Write(']') end; WriteString(' OF '); @@ -233,26 +233,26 @@ end; proc transpile_enumeration_type(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; begin WriteString('('); WriteLn(); WriteString(' '); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); - while Token.Kind = lexerKindComma do + while token.Kind = lexerKindComma do Write(','); WriteLn(); WriteString(' '); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) end; WriteLn(); WriteString(' )') @@ -260,25 +260,25 @@ end; proc transpile_union_type(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; end; proc transpile_procedure_type(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; begin - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); WriteString('PROCEDURE('); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); - while Token.Kind <> lexerKindRightParen do + while token.Kind <> lexerKindRightParen do written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); - if Token.Kind = lexerKindComma then - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); + if token.Kind = lexerKindComma then + token := transpiler_lex(lexer); WriteString(', ') end end; @@ -287,43 +287,43 @@ end; proc transpile_type_expression(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; begin - Token := transpiler_lex(lexer); - if Token.Kind = lexerKindRecord then + token := transpiler_lex(lexer); + if token.Kind = lexerKindRecord then transpile_record_type(context, lexer) end; - if Token.Kind = lexerKindLeftParen then + if token.Kind = lexerKindLeftParen then transpile_enumeration_type(context, lexer) end; - if (Token.Kind = lexerKindArray) or (Token.Kind = lexerKindLeftSquare) then + if (token.Kind = lexerKindArray) or (token.Kind = lexerKindLeftSquare) then transpile_array_type(context, lexer) end; - if Token.Kind = lexerKindHat then + if token.Kind = lexerKindHat then transpile_pointer_type(context, lexer) end; - if Token.Kind = lexerKindProc then + if token.Kind = lexerKindProc then transpile_procedure_type(context, lexer) end; - if Token.Kind = lexerKindIdentifier then + if token.Kind = lexerKindIdentifier then written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start) end end; proc transpile_type_declaration(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; begin WriteString(' '); - Token := lexer_current(lexer); + token := lexer_current(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); WriteString(' = '); transpile_type_expression(context, lexer); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); write_semicolon(); end; @@ -348,36 +348,36 @@ end; proc transpile_variable_declaration(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; begin WriteString(' '); - Token := lexer_current(lexer); + token := lexer_current(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); WriteString(': '); transpile_type_expression(context, lexer); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); write_semicolon() end; proc transpile_variable_part(context: PTranspilerContext, lexer: PLexer): BOOLEAN; var - Token: LexerToken; + token: LexerToken; result: BOOLEAN; begin - Token := lexer_current(lexer); - result := Token.Kind = lexerKindVar; + token := lexer_current(lexer); + result := token.Kind = lexerKindVar; if result then WriteString('VAR'); WriteLn(); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); - while Token.Kind = lexerKindIdentifier do + while token.Kind = lexerKindIdentifier do transpile_variable_declaration(context, lexer); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) end end; return result @@ -428,14 +428,14 @@ begin return result end; -proc transpile_expression(context: PTranspilerContext, lexer: PLexer, TrailingToken: LexerKind); +proc transpile_expression(context: PTranspilerContext, lexer: PLexer, trailing_token: LexerKind); var token: LexerToken; written_bytes: CARDINAL; begin token := transpiler_lex(lexer); - while (token.Kind <> TrailingToken) & (token.Kind <> lexerKindEnd) do + while (token.Kind <> trailing_token) & (token.Kind <> lexerKindEnd) do written_bytes := 0; if token.Kind = lexerKindNull then WriteString('NIL '); @@ -478,7 +478,7 @@ end; proc transpile_while_statement(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; begin WriteString(' WHILE '); @@ -488,7 +488,7 @@ begin WriteLn(); transpile_statements(context, lexer); WriteString(' END'); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) end; proc transpile_assignment_statement(context: PTranspilerContext, lexer: PLexer); @@ -499,27 +499,27 @@ end; proc transpile_call_statement(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; begin WriteString('('); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); - while (Token.Kind <> lexerKindSemicolon) & (Token.Kind <> lexerKindEnd) do + while (token.Kind <> lexerKindSemicolon) & (token.Kind <> lexerKindEnd) do written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) end end; proc transpile_return_statement(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; begin WriteString(' RETURN '); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) end; proc transpile_statement(context: PTranspilerContext, lexer: PLexer); @@ -589,15 +589,15 @@ end; proc transpile_statements(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; begin - Token := lexer_current(lexer); + token := lexer_current(lexer); - while Token.Kind <> lexerKindEnd do + while token.Kind <> lexerKindEnd do transpile_statement(context, lexer); - Token := lexer_current(lexer); + token := lexer_current(lexer); - if Token.Kind = lexerKindSemicolon then + if token.Kind = lexerKindSemicolon then Write(';') end; WriteLn() @@ -606,10 +606,10 @@ end; proc transpile_statement_part(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; begin - Token := lexer_current(lexer); - if Token.Kind = lexerKindBegin then + token := lexer_current(lexer); + if token.Kind = lexerKindBegin then WriteString('BEGIN'); WriteLn(); transpile_statements(context, lexer) @@ -618,39 +618,39 @@ end; proc transpile_procedure_declaration(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; seen_part: BOOLEAN; written_bytes: CARDINAL; begin - Token := transpile_procedure_heading(context, lexer); + token := transpile_procedure_heading(context, lexer); seen_part := transpile_constant_part(context, lexer); seen_part := transpile_variable_part(context, lexer); transpile_statement_part(context, lexer); WriteString('END '); - written_bytes := WriteNBytes(StdOut, ORD(Token.identifierKind[1]), ADR(Token.identifierKind[2])); + written_bytes := WriteNBytes(StdOut, ORD(token.identifierKind[1]), ADR(token.identifierKind[2])); - Token := transpiler_lex(lexer); + token := transpiler_lex(lexer); write_semicolon(); - Token := transpiler_lex(lexer) + token := transpiler_lex(lexer) end; proc transpile_procedure_part(context: PTranspilerContext, lexer: PLexer); var - Token: LexerToken; + token: LexerToken; begin - Token := lexer_current(lexer); + token := lexer_current(lexer); - while Token.Kind = lexerKindProc do + while token.Kind = lexerKindProc do transpile_procedure_declaration(context, lexer); - Token := lexer_current(lexer); + token := lexer_current(lexer); WriteLn() end end; proc transpile(lexer: PLexer); var - Token: LexerToken; + token: LexerToken; written_bytes: CARDINAL; Context: TranspilerContext; begin