Handle ASCII codes > 128 in the tokenizer

This commit is contained in:
2025-05-30 19:51:18 +02:00
parent 15135f14d8
commit 5875fb28db
2 changed files with 128 additions and 120 deletions

View File

@ -72,6 +72,8 @@ VAR
transitions: ARRAY[1..16] OF TransitionClasses;
PROCEDURE initialize_classification();
VAR
i: CARDINAL;
BEGIN
classification[1] := transitionClassEof; (* NUL *)
classification[2] := transitionClassInvalid; (* SOH *)
@ -200,23 +202,29 @@ BEGIN
classification[125] := transitionClassSingle; (* | *)
classification[126] := transitionClassOther; (* } *)
classification[127] := transitionClassSingle; (* ~ *)
classification[128] := transitionClassInvalid (* DEL *)
classification[128] := transitionClassInvalid; (* DEL *)
i := 129;
WHILE i <= 256 DO
classification[i] := transitionClassOther;
i := i + 1
END
END initialize_classification;
PROCEDURE compare_keyword(Keyword: ARRAY OF CHAR; TokenStart: PLexerBuffer; TokenEnd: PLexerBuffer): BOOLEAN;
VAR
Result: BOOLEAN;
result: BOOLEAN;
Index: CARDINAL;
BEGIN
Index := 0;
Result := TRUE;
result := TRUE;
WHILE (Index < Length(Keyword)) AND (TokenStart <> TokenEnd) AND Result DO
Result := (Keyword[Index] = TokenStart^) OR (Lower(Keyword[Index]) = TokenStart^);
WHILE (Index < Length(Keyword)) AND (TokenStart <> TokenEnd) AND result DO
result := (Keyword[Index] = TokenStart^) OR (Lower(Keyword[Index]) = TokenStart^);
INC(TokenStart);
INC(Index)
END;
Result := (Index = Length(Keyword)) AND (TokenStart = TokenEnd) AND Result;
RETURN Result
result := (Index = Length(Keyword)) AND (TokenStart = TokenEnd) AND result;
RETURN result
END compare_keyword;
(* Reached the end of file. *)
PROCEDURE transition_action_eof(lexer: PLexer; AToken: PLexerToken);
@ -747,7 +755,7 @@ VAR
CurrentClass: TransitionClass;
CurrentState: TransitionState;
CurrentTransition: Transition;
Result: LexerToken;
result: LexerToken;
BEGIN
lexer^.Current := lexer^.Start;
CurrentState := transitionStateStart;
@ -757,15 +765,15 @@ BEGIN
CurrentTransition := transitions[ORD(CurrentState) + 1][ORD(CurrentClass) + 1];
IF CurrentTransition.Action <> NIL THEN
CurrentTransition.Action(lexer, ADR(Result))
CurrentTransition.Action(lexer, ADR(result))
END;
CurrentState := CurrentTransition.NextState
END;
RETURN Result
RETURN result
END lexer_current;
PROCEDURE lexer_lex(lexer: PLexer): LexerToken;
VAR
Result: LexerToken;
result: LexerToken;
BEGIN
IF lexer^.Length = 0 THEN
lexer^.Length := ReadNBytes(lexer^.Input, CHUNK_SIZE, lexer^.Buffer);
@ -773,8 +781,8 @@ BEGIN
END;
lexer^.Start := lexer^.Current;
Result := lexer_current(lexer);
RETURN Result
result := lexer_current(lexer);
RETURN result
END lexer_lex;
PROCEDURE lexer_destroy(lexer: PLexer);
BEGIN

View File

@ -71,69 +71,69 @@ BEGIN
END transpile_import_part;
PROCEDURE transpile_constant(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
BEGIN
WriteString(' ');
Token := lexer_current(lexer);
token := lexer_current(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WriteString(' = ');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
write_semicolon()
END transpile_constant;
PROCEDURE transpile_constant_part(context: PTranspilerContext; lexer: PLexer): BOOLEAN;
VAR
Token: LexerToken;
token: LexerToken;
result: BOOLEAN;
BEGIN
Token := lexer_current(lexer);
result := Token.Kind = lexerKindConst;
token := lexer_current(lexer);
result := token.Kind = lexerKindConst;
IF result THEN
WriteString('CONST');
WriteLn();
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WHILE Token.Kind = lexerKindIdentifier DO
WHILE token.Kind = lexerKindIdentifier DO
transpile_constant(context, lexer);
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
END
END;
RETURN result
END transpile_constant_part;
PROCEDURE transpile_module(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
BEGIN
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
IF Token.Kind = lexerKindDefinition THEN
IF token.Kind = lexerKindDefinition THEN
WriteString('DEFINITION ');
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
END;
IF Token.Kind = lexerKindImplementation THEN
IF token.Kind = lexerKindImplementation THEN
WriteString('IMPLEMENTATION ');
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
END;
WriteString('MODULE ');
(* Write the module name and end the line with a semicolon and newline. *)
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
write_semicolon();
WriteLn();
(* Write the module body. *)
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
transpile_import_part(context, lexer);
IF transpile_constant_part(context, lexer) THEN
WriteLn()
@ -147,32 +147,32 @@ BEGIN
WriteString('END ');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
Write('.');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WriteLn()
END transpile_module;
PROCEDURE transpile_type_fields(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
BEGIN
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WHILE Token.Kind <> lexerKindEnd DO
WHILE token.Kind <> lexerKindEnd DO
WriteString(' ');
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WriteString(': ');
transpile_type_expression(context, lexer);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
IF Token.Kind = lexerKindSemicolon THEN
Token := transpiler_lex(lexer);
IF token.Kind = lexerKindSemicolon THEN
token := transpiler_lex(lexer);
Write(';')
END;
WriteLn()
@ -180,7 +180,7 @@ BEGIN
END transpile_type_fields;
PROCEDURE transpile_record_type(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
BEGIN
WriteString('RECORD');
WriteLn();
@ -189,32 +189,32 @@ BEGIN
END transpile_record_type;
PROCEDURE transpile_pointer_type(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
BEGIN
Token := lexer_current(lexer);
token := lexer_current(lexer);
WriteString('POINTER TO ');
IF Token.Kind = lexerKindPointer THEN
Token := transpiler_lex(lexer)
IF token.Kind = lexerKindPointer THEN
token := transpiler_lex(lexer)
END;
transpile_type_expression(context, lexer)
END transpile_pointer_type;
PROCEDURE transpile_array_type(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
BEGIN
WriteString('ARRAY');
Token := lexer_current(lexer);
token := lexer_current(lexer);
IF Token.Kind = lexerKindArray THEN
Token := transpiler_lex(lexer)
IF token.Kind = lexerKindArray THEN
token := transpiler_lex(lexer)
END;
IF Token.Kind <> lexerKindOf THEN
IF token.Kind <> lexerKindOf THEN
WriteString('[1..');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
Write(']')
END;
WriteString(' OF ');
@ -222,50 +222,50 @@ BEGIN
END transpile_array_type;
PROCEDURE transpile_enumeration_type(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
BEGIN
WriteString('(');
WriteLn();
WriteString(' ');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WHILE Token.Kind = lexerKindComma DO
WHILE token.Kind = lexerKindComma DO
Write(',');
WriteLn();
WriteString(' ');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
END;
WriteLn();
WriteString(' )')
END transpile_enumeration_type;
PROCEDURE transpile_union_type(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
END transpile_union_type;
PROCEDURE transpile_procedure_type(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
BEGIN
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WriteString('PROCEDURE(');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WHILE Token.Kind <> lexerKindRightParen DO
WHILE token.Kind <> lexerKindRightParen DO
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
IF Token.Kind = lexerKindComma THEN
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
IF token.Kind = lexerKindComma THEN
token := transpiler_lex(lexer);
WriteString(', ')
END
END;
@ -273,42 +273,42 @@ BEGIN
END transpile_procedure_type;
PROCEDURE transpile_type_expression(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
BEGIN
Token := transpiler_lex(lexer);
IF Token.Kind = lexerKindRecord THEN
token := transpiler_lex(lexer);
IF token.Kind = lexerKindRecord THEN
transpile_record_type(context, lexer)
END;
IF Token.Kind = lexerKindLeftParen THEN
IF token.Kind = lexerKindLeftParen THEN
transpile_enumeration_type(context, lexer)
END;
IF (Token.Kind = lexerKindArray) OR (Token.Kind = lexerKindLeftSquare) THEN
IF (token.Kind = lexerKindArray) OR (token.Kind = lexerKindLeftSquare) THEN
transpile_array_type(context, lexer)
END;
IF Token.Kind = lexerKindHat THEN
IF token.Kind = lexerKindHat THEN
transpile_pointer_type(context, lexer)
END;
IF Token.Kind = lexerKindProc THEN
IF token.Kind = lexerKindProc THEN
transpile_procedure_type(context, lexer)
END;
IF Token.Kind = lexerKindIdentifier THEN
IF token.Kind = lexerKindIdentifier THEN
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start)
END
END transpile_type_expression;
PROCEDURE transpile_type_declaration(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
BEGIN
WriteString(' ');
Token := lexer_current(lexer);
token := lexer_current(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WriteString(' = ');
transpile_type_expression(context, lexer);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
write_semicolon();
END transpile_type_declaration;
PROCEDURE transpile_type_part(context: PTranspilerContext; lexer: PLexer);
@ -331,35 +331,35 @@ BEGIN
END transpile_type_part;
PROCEDURE transpile_variable_declaration(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
BEGIN
WriteString(' ');
Token := lexer_current(lexer);
token := lexer_current(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WriteString(': ');
transpile_type_expression(context, lexer);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
write_semicolon()
END transpile_variable_declaration;
PROCEDURE transpile_variable_part(context: PTranspilerContext; lexer: PLexer): BOOLEAN;
VAR
Token: LexerToken;
token: LexerToken;
result: BOOLEAN;
BEGIN
Token := lexer_current(lexer);
result := Token.Kind = lexerKindVar;
token := lexer_current(lexer);
result := token.Kind = lexerKindVar;
IF result THEN
WriteString('VAR');
WriteLn();
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WHILE Token.Kind = lexerKindIdentifier DO
WHILE token.Kind = lexerKindIdentifier DO
transpile_variable_declaration(context, lexer);
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
END
END;
RETURN result
@ -408,14 +408,14 @@ BEGIN
RETURN result
END transpile_procedure_heading;
PROCEDURE transpile_expression(context: PTranspilerContext; lexer: PLexer; TrailingToken: LexerKind);
PROCEDURE transpile_expression(context: PTranspilerContext; lexer: PLexer; trailing_token: LexerKind);
VAR
token: LexerToken;
written_bytes: CARDINAL;
BEGIN
token := transpiler_lex(lexer);
WHILE (token.Kind <> TrailingToken) AND (token.Kind <> lexerKindEnd) DO
WHILE (token.Kind <> trailing_token) AND (token.Kind <> lexerKindEnd) DO
written_bytes := 0;
IF token.Kind = lexerKindNull THEN
WriteString('NIL ');
@ -456,7 +456,7 @@ BEGIN
END transpile_if_statement;
PROCEDURE transpile_while_statement(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
BEGIN
WriteString(' WHILE ');
@ -466,7 +466,7 @@ BEGIN
WriteLn();
transpile_statements(context, lexer);
WriteString(' END');
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
END transpile_while_statement;
PROCEDURE transpile_assignment_statement(context: PTranspilerContext; lexer: PLexer);
BEGIN
@ -475,26 +475,26 @@ BEGIN
END transpile_assignment_statement;
PROCEDURE transpile_call_statement(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
BEGIN
WriteString('(');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WHILE (Token.Kind <> lexerKindSemicolon) AND (Token.Kind <> lexerKindEnd) DO
WHILE (token.Kind <> lexerKindSemicolon) AND (token.Kind <> lexerKindEnd) DO
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
END
END transpile_call_statement;
PROCEDURE transpile_return_statement(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
BEGIN
WriteString(' RETURN ');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
END transpile_return_statement;
PROCEDURE transpile_statement(context: PTranspilerContext; lexer: PLexer);
VAR
@ -562,15 +562,15 @@ BEGIN
END transpile_statement;
PROCEDURE transpile_statements(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
BEGIN
Token := lexer_current(lexer);
token := lexer_current(lexer);
WHILE Token.Kind <> lexerKindEnd DO
WHILE token.Kind <> lexerKindEnd DO
transpile_statement(context, lexer);
Token := lexer_current(lexer);
token := lexer_current(lexer);
IF Token.Kind = lexerKindSemicolon THEN
IF token.Kind = lexerKindSemicolon THEN
Write(';')
END;
WriteLn()
@ -578,10 +578,10 @@ BEGIN
END transpile_statements;
PROCEDURE transpile_statement_part(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
BEGIN
Token := lexer_current(lexer);
IF Token.Kind = lexerKindBegin THEN
token := lexer_current(lexer);
IF token.Kind = lexerKindBegin THEN
WriteString('BEGIN');
WriteLn();
transpile_statements(context, lexer)
@ -589,37 +589,37 @@ BEGIN
END transpile_statement_part;
PROCEDURE transpile_procedure_declaration(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
seen_part: BOOLEAN;
written_bytes: CARDINAL;
BEGIN
Token := transpile_procedure_heading(context, lexer);
token := transpile_procedure_heading(context, lexer);
seen_part := transpile_constant_part(context, lexer);
seen_part := transpile_variable_part(context, lexer);
transpile_statement_part(context, lexer);
WriteString('END ');
written_bytes := WriteNBytes(StdOut, ORD(Token.identifierKind[1]), ADR(Token.identifierKind[2]));
written_bytes := WriteNBytes(StdOut, ORD(token.identifierKind[1]), ADR(token.identifierKind[2]));
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
write_semicolon();
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
END transpile_procedure_declaration;
PROCEDURE transpile_procedure_part(context: PTranspilerContext; lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
BEGIN
Token := lexer_current(lexer);
token := lexer_current(lexer);
WHILE Token.Kind = lexerKindProc DO
WHILE token.Kind = lexerKindProc DO
transpile_procedure_declaration(context, lexer);
Token := lexer_current(lexer);
token := lexer_current(lexer);
WriteLn()
END
END transpile_procedure_part;
PROCEDURE transpile(lexer: PLexer);
VAR
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
Context: TranspilerContext;
BEGIN