Handle ASCII codes > 128 in the tokenizer

This commit is contained in:
2025-05-30 19:51:40 +02:00
parent 4eccc147ba
commit 8ad1259ee8
2 changed files with 128 additions and 120 deletions

View File

@ -72,6 +72,8 @@ var
transitions: [16]TransitionClasses;
proc initialize_classification();
var
i: CARDINAL;
begin
classification[1] := transitionClassEof; (* NUL *)
classification[2] := transitionClassInvalid; (* SOH *)
@ -200,24 +202,30 @@ begin
classification[125] := transitionClassSingle; (* | *)
classification[126] := transitionClassOther; (* } *)
classification[127] := transitionClassSingle; (* ~ *)
classification[128] := transitionClassInvalid (* DEL *)
classification[128] := transitionClassInvalid; (* DEL *)
i := 129;
while i <= 256 do
classification[i] := transitionClassOther;
i := i + 1
end
end;
proc compare_keyword(Keyword: ARRAY OF CHAR, TokenStart: PLexerBuffer, TokenEnd: PLexerBuffer): BOOLEAN;
var
Result: BOOLEAN;
result: BOOLEAN;
Index: CARDINAL;
begin
Index := 0;
Result := TRUE;
result := TRUE;
while (Index < Length(Keyword)) & (TokenStart <> TokenEnd) & Result DO
Result := (Keyword[Index] = TokenStart^) or (Lower(Keyword[Index]) = TokenStart^);
while (Index < Length(Keyword)) & (TokenStart <> TokenEnd) & result DO
result := (Keyword[Index] = TokenStart^) or (Lower(Keyword[Index]) = TokenStart^);
INC(TokenStart);
INC(Index)
end;
Result := (Index = Length(Keyword)) & (TokenStart = TokenEnd) & Result;
return Result
result := (Index = Length(Keyword)) & (TokenStart = TokenEnd) & result;
return result
end;
(* Reached the end of file. *)
@ -761,7 +769,7 @@ var
CurrentClass: TransitionClass;
CurrentState: TransitionState;
CurrentTransition: Transition;
Result: LexerToken;
result: LexerToken;
begin
lexer^.Current := lexer^.Start;
CurrentState := transitionStateStart;
@ -771,16 +779,16 @@ begin
CurrentTransition := transitions[ORD(CurrentState) + 1][ORD(CurrentClass) + 1];
if CurrentTransition.Action <> nil then
CurrentTransition.Action(lexer, ADR(Result))
CurrentTransition.Action(lexer, ADR(result))
end;
CurrentState := CurrentTransition.NextState
end;
return Result
return result
end;
proc lexer_lex(lexer: PLexer): LexerToken;
var
Result: LexerToken;
result: LexerToken;
begin
if lexer^.Length = 0 then
lexer^.Length := ReadNBytes(lexer^.Input, CHUNK_SIZE, lexer^.Buffer);
@ -788,8 +796,8 @@ begin
end;
lexer^.Start := lexer^.Current;
Result := lexer_current(lexer);
return Result
result := lexer_current(lexer);
return result
end;
proc lexer_destroy(lexer: PLexer);