Handle ASCII codes > 128 in the tokenizer

This commit is contained in:
2025-05-30 19:51:40 +02:00
parent 4eccc147ba
commit 6e415e474f
4 changed files with 287 additions and 266 deletions

View File

@ -13,13 +13,13 @@ type
end;
(* Calls lexer_lex() but skips the comments. *)
proc transpiler_lex(lexer: PLexer): LexerToken;
proc transpiler_lex(lexer: PLexer) -> LexerToken;
var
result: LexerToken;
begin
result := lexer_lex(lexer);
while result.Kind = lexerKindComment do
while result.kind = lexerKindComment do
result := lexer_lex(lexer)
end;
@ -50,7 +50,7 @@ begin
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
token := transpiler_lex(lexer);
while token.Kind <> lexerKindSemicolon do
while token.kind <> lexerKindSemicolon do
WriteString(', ');
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
@ -66,7 +66,7 @@ var
begin
token := lexer_current(lexer);
while token.Kind = lexerKindFrom do
while token.kind = lexerKindFrom do
transpile_import(context, lexer);
token := lexer_current(lexer)
end;
@ -75,39 +75,39 @@ end;
proc transpile_constant(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
begin
WriteString(' ');
Token := lexer_current(lexer);
token := lexer_current(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WriteString(' = ');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
write_semicolon()
end;
proc transpile_constant_part(context: PTranspilerContext, lexer: PLexer): BOOLEAN;
proc transpile_constant_part(context: PTranspilerContext, lexer: PLexer) -> BOOLEAN;
var
Token: LexerToken;
token: LexerToken;
result: BOOLEAN;
begin
Token := lexer_current(lexer);
result := Token.Kind = lexerKindConst;
token := lexer_current(lexer);
result := token.kind = lexerKindConst;
if result then
WriteString('CONST');
WriteLn();
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
while Token.Kind = lexerKindIdentifier do
while token.kind = lexerKindIdentifier do
transpile_constant(context, lexer);
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
end
end;
return result
@ -115,31 +115,31 @@ end;
proc transpile_module(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
begin
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
if Token.Kind = lexerKindDefinition then
if token.kind = lexerKindDefinition then
WriteString('DEFINITION ');
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
end;
if Token.Kind = lexerKindImplementation then
if token.kind = lexerKindImplementation then
WriteString('IMPLEMENTATION ');
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
end;
WriteString('MODULE ');
(* Write the module name and end the line with a semicolon and newline. *)
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
write_semicolon();
WriteLn();
(* Write the module body. *)
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
transpile_import_part(context, lexer);
if transpile_constant_part(context, lexer) then
WriteLn()
@ -153,33 +153,33 @@ begin
WriteString('END ');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
Write('.');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WriteLn()
end;
proc transpile_type_fields(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
begin
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
while Token.Kind <> lexerKindEnd do
while token.kind <> lexerKindEnd do
WriteString(' ');
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WriteString(': ');
transpile_type_expression(context, lexer);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
if Token.Kind = lexerKindSemicolon then
Token := transpiler_lex(lexer);
if token.kind = lexerKindSemicolon then
token := transpiler_lex(lexer);
Write(';')
end;
WriteLn()
@ -188,7 +188,7 @@ end;
proc transpile_record_type(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
begin
WriteString('RECORD');
WriteLn();
@ -198,33 +198,33 @@ end;
proc transpile_pointer_type(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
begin
Token := lexer_current(lexer);
token := lexer_current(lexer);
WriteString('POINTER TO ');
if Token.Kind = lexerKindPointer then
Token := transpiler_lex(lexer)
if token.kind = lexerKindPointer then
token := transpiler_lex(lexer)
end;
transpile_type_expression(context, lexer)
end;
proc transpile_array_type(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
begin
WriteString('ARRAY');
Token := lexer_current(lexer);
token := lexer_current(lexer);
if Token.Kind = lexerKindArray then
Token := transpiler_lex(lexer)
if token.kind = lexerKindArray then
token := transpiler_lex(lexer)
end;
if Token.Kind <> lexerKindOf then
if token.kind <> lexerKindOf then
WriteString('[1..');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
Write(']')
end;
WriteString(' OF ');
@ -233,26 +233,26 @@ end;
proc transpile_enumeration_type(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
begin
WriteString('(');
WriteLn();
WriteString(' ');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
while Token.Kind = lexerKindComma do
while token.kind = lexerKindComma do
Write(',');
WriteLn();
WriteString(' ');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
end;
WriteLn();
WriteString(' )')
@ -260,25 +260,25 @@ end;
proc transpile_union_type(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
end;
proc transpile_procedure_type(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
begin
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WriteString('PROCEDURE(');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
while Token.Kind <> lexerKindRightParen do
while token.kind <> lexerKindRightParen do
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
if Token.Kind = lexerKindComma then
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
if token.kind = lexerKindComma then
token := transpiler_lex(lexer);
WriteString(', ')
end
end;
@ -287,43 +287,43 @@ end;
proc transpile_type_expression(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
begin
Token := transpiler_lex(lexer);
if Token.Kind = lexerKindRecord then
token := transpiler_lex(lexer);
if token.kind = lexerKindRecord then
transpile_record_type(context, lexer)
end;
if Token.Kind = lexerKindLeftParen then
if token.kind = lexerKindLeftParen then
transpile_enumeration_type(context, lexer)
end;
if (Token.Kind = lexerKindArray) or (Token.Kind = lexerKindLeftSquare) then
if (token.kind = lexerKindArray) or (token.kind = lexerKindLeftSquare) then
transpile_array_type(context, lexer)
end;
if Token.Kind = lexerKindHat then
if token.kind = lexerKindHat then
transpile_pointer_type(context, lexer)
end;
if Token.Kind = lexerKindProc then
if token.kind = lexerKindProc then
transpile_procedure_type(context, lexer)
end;
if Token.Kind = lexerKindIdentifier then
if token.kind = lexerKindIdentifier then
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start)
end
end;
proc transpile_type_declaration(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
begin
WriteString(' ');
Token := lexer_current(lexer);
token := lexer_current(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WriteString(' = ');
transpile_type_expression(context, lexer);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
write_semicolon();
end;
@ -333,12 +333,12 @@ var
begin
token := lexer_current(lexer);
if token.Kind = lexerKindType then
if token.kind = lexerKindType then
WriteString('TYPE');
WriteLn();
token := transpiler_lex(lexer);
while token.Kind = lexerKindIdentifier do
while token.kind = lexerKindIdentifier do
transpile_type_declaration(context, lexer);
token := transpiler_lex(lexer)
end;
@ -348,42 +348,42 @@ end;
proc transpile_variable_declaration(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
begin
WriteString(' ');
Token := lexer_current(lexer);
token := lexer_current(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
WriteString(': ');
transpile_type_expression(context, lexer);
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
write_semicolon()
end;
proc transpile_variable_part(context: PTranspilerContext, lexer: PLexer): BOOLEAN;
proc transpile_variable_part(context: PTranspilerContext, lexer: PLexer) -> BOOLEAN;
var
Token: LexerToken;
token: LexerToken;
result: BOOLEAN;
begin
Token := lexer_current(lexer);
result := Token.Kind = lexerKindVar;
token := lexer_current(lexer);
result := token.kind = lexerKindVar;
if result then
WriteString('VAR');
WriteLn();
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
while Token.Kind = lexerKindIdentifier do
while token.kind = lexerKindIdentifier do
transpile_variable_declaration(context, lexer);
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
end
end;
return result
end;
proc transpile_procedure_heading(context: PTranspilerContext, lexer: PLexer): LexerToken;
proc transpile_procedure_heading(context: PTranspilerContext, lexer: PLexer) -> LexerToken;
var
token: LexerToken;
result: LexerToken;
@ -398,7 +398,7 @@ begin
Write('(');
token := transpiler_lex(lexer);
while token.Kind <> lexerKindRightParen do
while token.kind <> lexerKindRightParen do
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
token := transpiler_lex(lexer);
@ -407,7 +407,7 @@ begin
transpile_type_expression(context, lexer);
token := transpiler_lex(lexer);
if (token.Kind = lexerKindSemicolon) or (token.Kind = lexerKindComma) then
if (token.kind = lexerKindSemicolon) or (token.kind = lexerKindComma) then
WriteString('; ');
token := transpiler_lex(lexer)
end
@ -416,7 +416,7 @@ begin
token := transpiler_lex(lexer);
(* Check for the return type and write it. *)
if token.Kind = lexerKindColon then
if token.kind = lexerKindArrow then
WriteString(': ');
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
@ -428,28 +428,28 @@ begin
return result
end;
proc transpile_expression(context: PTranspilerContext, lexer: PLexer, TrailingToken: LexerKind);
proc transpile_expression(context: PTranspilerContext, lexer: PLexer, trailing_token: LexerKind);
var
token: LexerToken;
written_bytes: CARDINAL;
begin
token := transpiler_lex(lexer);
while (token.Kind <> TrailingToken) & (token.Kind <> lexerKindEnd) do
while (token.kind <> trailing_token) & (token.kind <> lexerKindEnd) do
written_bytes := 0;
if token.Kind = lexerKindNull then
if token.kind = lexerKindNull then
WriteString('NIL ');
written_bytes := 1
end;
if token.Kind = lexerKindOr then
if token.kind = lexerKindOr then
WriteString('OR ');
written_bytes := 1
end;
if token.Kind = lexerKindAnd then
if token.kind = lexerKindAnd then
WriteString('AND ');
written_bytes := 1
end;
if token.Kind = lexerKindNot then
if token.kind = lexerKindNot then
WriteString('NOT ');
written_bytes := 1
end;
@ -478,7 +478,7 @@ end;
proc transpile_while_statement(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
begin
WriteString(' WHILE ');
@ -488,7 +488,7 @@ begin
WriteLn();
transpile_statements(context, lexer);
WriteString(' END');
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
end;
proc transpile_assignment_statement(context: PTranspilerContext, lexer: PLexer);
@ -499,27 +499,72 @@ end;
proc transpile_call_statement(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
begin
WriteString('(');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
while (Token.Kind <> lexerKindSemicolon) & (Token.Kind <> lexerKindEnd) do
while (token.kind <> lexerKindSemicolon) & (token.kind <> lexerKindEnd) do
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
end
end;
proc transpile_return_statement(context: PTranspilerContext, lexer: PLexer);
proc transpile_designator_expression(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
begin
WriteString(' ');
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
token := transpiler_lex(lexer);
while token.kind = lexerKindLeftSquare do
Write('[');
token := transpiler_lex(lexer);
while token.kind <> lexerKindRightSquare do
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
token := transpiler_lex(lexer)
end;
Write(']');
token := transpiler_lex(lexer)
end;
if token.kind = lexerKindHat then
Write('^');
token := transpiler_lex(lexer)
end;
if token.kind = lexerKindDot then
Write('.');
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
token := transpiler_lex(lexer)
end;
if token.kind = lexerKindHat then
Write('^');
token := transpiler_lex(lexer)
end;
while token.kind = lexerKindLeftSquare do
Write('[');
token := transpiler_lex(lexer);
while token.kind <> lexerKindRightSquare do
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
token := transpiler_lex(lexer)
end;
Write(']');
token := transpiler_lex(lexer)
end
end;
proc transpile_return_statement(context: PTranspilerContext, lexer: PLexer);
var
token: LexerToken;
written_bytes: CARDINAL;
begin
WriteString(' RETURN ');
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
end;
proc transpile_statement(context: PTranspilerContext, lexer: PLexer);
@ -529,59 +574,23 @@ var
begin
token := transpiler_lex(lexer);
if token.Kind = lexerKindIf then
if token.kind = lexerKindIf then
transpile_if_statement(context, lexer)
end;
if token.Kind = lexerKindWhile then
if token.kind = lexerKindWhile then
transpile_while_statement(context, lexer)
end;
if token.Kind = lexerKindReturn then
if token.kind = lexerKindReturn then
transpile_return_statement(context, lexer)
end;
if token.Kind = lexerKindIdentifier then
WriteString(' ');
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
token := transpiler_lex(lexer);
if token.kind = lexerKindIdentifier then
transpile_designator_expression(context, lexer);
token := lexer_current(lexer);
while token.Kind = lexerKindLeftSquare do
Write('[');
token := transpiler_lex(lexer);
while token.Kind <> lexerKindRightSquare do
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
token := transpiler_lex(lexer)
end;
Write(']');
token := transpiler_lex(lexer);
end;
if token.Kind = lexerKindHat then
Write('^');
token := transpiler_lex(lexer)
end;
if token.Kind = lexerKindDot then
Write('.');
token := transpiler_lex(lexer);
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
token := transpiler_lex(lexer);
end;
if token.Kind = lexerKindHat then
Write('^');
token := transpiler_lex(lexer)
end;
while token.Kind = lexerKindLeftSquare do
Write('[');
token := transpiler_lex(lexer);
while token.Kind <> lexerKindRightSquare do
written_bytes := WriteNBytes(StdOut, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start);
token := transpiler_lex(lexer)
end;
Write(']');
token := transpiler_lex(lexer);
end;
if token.Kind = lexerKindAssignment then
if token.kind = lexerKindAssignment then
transpile_assignment_statement(context, lexer)
end;
if token.Kind = lexerKindLeftParen then
if token.kind = lexerKindLeftParen then
transpile_call_statement(context, lexer)
end
end
@ -589,15 +598,15 @@ end;
proc transpile_statements(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
begin
Token := lexer_current(lexer);
token := lexer_current(lexer);
while Token.Kind <> lexerKindEnd do
while token.kind <> lexerKindEnd do
transpile_statement(context, lexer);
Token := lexer_current(lexer);
token := lexer_current(lexer);
if Token.Kind = lexerKindSemicolon then
if token.kind = lexerKindSemicolon then
Write(';')
end;
WriteLn()
@ -606,10 +615,10 @@ end;
proc transpile_statement_part(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
begin
Token := lexer_current(lexer);
if Token.Kind = lexerKindBegin then
token := lexer_current(lexer);
if token.kind = lexerKindBegin then
WriteString('BEGIN');
WriteLn();
transpile_statements(context, lexer)
@ -618,43 +627,43 @@ end;
proc transpile_procedure_declaration(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
seen_part: BOOLEAN;
written_bytes: CARDINAL;
begin
Token := transpile_procedure_heading(context, lexer);
token := transpile_procedure_heading(context, lexer);
seen_part := transpile_constant_part(context, lexer);
seen_part := transpile_variable_part(context, lexer);
transpile_statement_part(context, lexer);
WriteString('END ');
written_bytes := WriteNBytes(StdOut, ORD(Token.identifierKind[1]), ADR(Token.identifierKind[2]));
written_bytes := WriteNBytes(StdOut, ORD(token.identifierKind[1]), ADR(token.identifierKind[2]));
Token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
write_semicolon();
Token := transpiler_lex(lexer)
token := transpiler_lex(lexer)
end;
proc transpile_procedure_part(context: PTranspilerContext, lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
begin
Token := lexer_current(lexer);
token := lexer_current(lexer);
while Token.Kind = lexerKindProc do
while token.kind = lexerKindProc do
transpile_procedure_declaration(context, lexer);
Token := lexer_current(lexer);
token := lexer_current(lexer);
WriteLn()
end
end;
proc transpile(lexer: PLexer);
var
Token: LexerToken;
token: LexerToken;
written_bytes: CARDINAL;
Context: TranspilerContext;
context: TranspilerContext;
begin
transpile_module(ADR(Context), lexer)
transpile_module(ADR(context), lexer)
end;
end Transpiler.