Support one hardcoded import

This commit is contained in:
2025-07-10 00:43:17 +02:00
parent 181b19eefe
commit 34abb6b4f5
18 changed files with 396 additions and 312 deletions

View File

@ -1,17 +1,9 @@
module;
from FIO import ReadNBytes;
from SYSTEM import ADR, TSIZE;
from DynamicStrings import String, InitStringCharStar, KillString;
from StringConvert import StringToInteger;
from Storage import DEALLOCATE, ALLOCATE;
from Strings import Length;
from MemUtils import MemCopy, MemZero;
from StrCase import Lower;
import Common;
const
CHUNK_SIZE = 85536;
CHUNK_SIZE := 85536;
type
(*
@ -62,20 +54,109 @@ type
transitionStateDecimalSuffix,
transitionStateEnd
);
TransitionAction = proc(PLexer, PLexerToken);
LexerToken = record
kind: LexerKind;
value: union
booleanKind: Bool;
identifierKind: Identifier;
integerKind: Int;
stringKind: ShortString
end;
start_location: TextLocation;
end_location: TextLocation
end;
TransitionAction = proc(^Lexer, ^LexerToken);
Transition = record
action: TransitionAction;
next_state: TransitionState
end;
TransitionClasses = [22]Transition;
BufferPosition* = record
iterator: ^Char;
location: TextLocation
end;
Lexer* = record
input: ^FILE;
buffer: ^Char;
size: Word;
length: Word;
start: BufferPosition;
current: BufferPosition
end;
LexerKind* = (
lexerKindEof,
lexerKindIdentifier,
lexerKindIf,
lexerKindThen,
lexerKindElse,
lexerKindElsif,
lexerKindWhile,
lexerKindDo,
lexerKindProc,
lexerKindBegin,
lexerKindEnd,
lexerKindXor,
lexerKindConst,
lexerKindVar,
lexerKindCase,
lexerKindOf,
lexerKindType,
lexerKindRecord,
lexerKindUnion,
lexerKindPipe,
lexerKindTo,
lexerKindBoolean,
lexerKindNull,
lexerKindAnd,
lexerKindOr,
lexerKindTilde,
lexerKindReturn,
lexerKindDefer,
lexerKindRange,
lexerKindLeftParen,
lexerKindRightParen,
lexerKindLeftSquare,
lexerKindRightSquare,
lexerKindGreaterEqual,
lexerKindLessEqual,
lexerKindGreaterThan,
lexerKindLessThan,
lexerKindNotEqual,
lexerKindEqual,
lexerKindSemicolon,
lexerKindDot,
lexerKindComma,
lexerKindPlus,
lexerKindMinus,
lexerKindAsterisk,
lexerKindDivision,
lexerKindRemainder,
lexerKindAssignment,
lexerKindColon,
lexerKindHat,
lexerKindAt,
lexerKindComment,
lexerKindInteger,
lexerKindWord,
lexerKindCharacter,
lexerKindString,
lexerKindFrom,
lexerKindPointer,
lexerKindArray,
lexerKindArrow,
lexerKindProgram,
lexerKindModule,
lexerKindImport
);
var
classification: [128]TransitionClass;
transitions: [16]TransitionClasses;
proc initialize_classification();
var
i: CARDINAL;
i: Word;
begin
classification[1] := transitionClassEof; (* NUL *)
classification[2] := transitionClassInvalid; (* SOH *)
@ -213,12 +294,12 @@ begin
end
end;
proc compare_keyword(keyword: ARRAY OF CHAR, token_start: BufferPosition, token_end: PLexerBuffer) -> BOOLEAN;
proc compare_keyword(keyword: String, token_start: BufferPosition, token_end: ^Char) -> Bool;
var
result: BOOLEAN;
index: CARDINAL;
keyword_length: CARDINAL;
continue: BOOLEAN;
result: Bool;
index: Word;
keyword_length: Word;
continue: Bool;
begin
index := 0;
result := true;
@ -237,25 +318,25 @@ begin
end;
(* Reached the end of file. *)
proc transition_action_eof(lexer: PLexer, token: PLexerToken);
proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken);
begin
token^.kind := lexerKindEof
end;
proc increment(position: PBufferPosition);
proc increment(position: ^BufferPosition);
begin
INC(position^.iterator)
end;
(* Add the character to the token currently read and advance to the next character. *)
proc transition_action_accumulate(lexer: PLexer, token: PLexerToken);
proc transition_action_accumulate(lexer: ^Lexer, token: ^LexerToken);
begin
increment(ADR(lexer^.current))
end;
(* The current character is not a part of the token. Finish the token already
* read. Don't advance to the next character. *)
proc transition_action_finalize(lexer: PLexer, token: PLexerToken);
proc transition_action_finalize(lexer: ^Lexer, token: ^LexerToken);
begin
if lexer^.start.iterator^ = ':' then
token^.kind := lexerKindColon
@ -278,7 +359,7 @@ begin
end;
(* An action for tokens containing multiple characters. *)
proc transition_action_composite(lexer: PLexer, token: PLexerToken);
proc transition_action_composite(lexer: ^Lexer, token: ^LexerToken);
begin
if lexer^.start.iterator^ = '<' then
if lexer^.current.iterator^ = '>' then
@ -304,7 +385,7 @@ begin
end;
(* Skip a space. *)
proc transition_action_skip(lexer: PLexer, token: PLexerToken);
proc transition_action_skip(lexer: ^Lexer, token: ^LexerToken);
begin
increment(ADR(lexer^.start));
@ -316,9 +397,9 @@ begin
end;
(* Delimited string action. *)
proc transition_action_delimited(lexer: PLexer, token: PLexerToken);
proc transition_action_delimited(lexer: ^Lexer, token: ^LexerToken);
var
text_length: CARDINAL;
text_length: Word;
begin
if lexer^.start.iterator^ = '(' then
token^.kind := lexerKindComment
@ -347,7 +428,7 @@ begin
end;
(* Finalize keyword or identifier. *)
proc transition_action_key_id(lexer: PLexer, token: PLexerToken);
proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken);
begin
token^.kind := lexerKindIdentifier;
@ -355,95 +436,95 @@ begin
DEC(token^.identifierKind[1], lexer^.start.iterator);
MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
if compare_keyword('program', lexer^.start, lexer^.current.iterator) then
if compare_keyword("program", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindProgram
end;
if compare_keyword('import', lexer^.start, lexer^.current.iterator) then
if compare_keyword("import", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindImport
end;
if compare_keyword('const', lexer^.start, lexer^.current.iterator) then
if compare_keyword("const", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindConst
end;
if compare_keyword('var', lexer^.start, lexer^.current.iterator) then
if compare_keyword("var", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindVar
end;
if compare_keyword('if', lexer^.start, lexer^.current.iterator) then
if compare_keyword("if", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindIf
end;
if compare_keyword('then', lexer^.start, lexer^.current.iterator) then
if compare_keyword("then", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindThen
end;
if compare_keyword('elsif', lexer^.start, lexer^.current.iterator) then
if compare_keyword("elsif", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindElsif
end;
if compare_keyword('else', lexer^.start, lexer^.current.iterator) then
if compare_keyword("else", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindElse
end;
if compare_keyword('while', lexer^.start, lexer^.current.iterator) then
if compare_keyword("while", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindWhile
end;
if compare_keyword('do', lexer^.start, lexer^.current.iterator) then
if compare_keyword("do", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindDo
end;
if compare_keyword('proc', lexer^.start, lexer^.current.iterator) then
if compare_keyword("proc", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindProc
end;
if compare_keyword('begin', lexer^.start, lexer^.current.iterator) then
if compare_keyword("begin", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBegin
end;
if compare_keyword('end', lexer^.start, lexer^.current.iterator) then
if compare_keyword("end", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindEnd
end;
if compare_keyword('type', lexer^.start, lexer^.current.iterator) then
if compare_keyword("type", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindType
end;
if compare_keyword('record', lexer^.start, lexer^.current.iterator) then
if compare_keyword("record", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindRecord
end;
if compare_keyword('union', lexer^.start, lexer^.current.iterator) then
if compare_keyword("union", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindUnion
end;
if compare_keyword('NIL', lexer^.start, lexer^.current.iterator) then
if compare_keyword("NIL", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindNull
end;
if compare_keyword('or', lexer^.start, lexer^.current.iterator) then
if compare_keyword("or", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindOr
end;
if compare_keyword('return', lexer^.start, lexer^.current.iterator) then
if compare_keyword("return", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindReturn
end;
if compare_keyword('defer', lexer^.start, lexer^.current.iterator) then
if compare_keyword("defer", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindDefer
end;
if compare_keyword('TO', lexer^.start, lexer^.current.iterator) then
if compare_keyword("TO", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindTo
end;
if compare_keyword('CASE', lexer^.start, lexer^.current.iterator) then
if compare_keyword("CASE", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindCase
end;
if compare_keyword('OF', lexer^.start, lexer^.current.iterator) then
if compare_keyword("OF", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindOf
end;
if compare_keyword('FROM', lexer^.start, lexer^.current.iterator) then
if compare_keyword("FROM", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindFrom
end;
if compare_keyword('module', lexer^.start, lexer^.current.iterator) then
if compare_keyword("module", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindModule
end;
if compare_keyword('xor', lexer^.start, lexer^.current.iterator) then
if compare_keyword("xor", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindXor
end;
if compare_keyword('POINTER', lexer^.start, lexer^.current.iterator) then
if compare_keyword("POINTER", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindPointer
end;
if compare_keyword('ARRAY', lexer^.start, lexer^.current.iterator) then
if compare_keyword("ARRAY", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindArray
end;
if compare_keyword('TRUE', lexer^.start, lexer^.current.iterator) then
if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBoolean;
token^.booleanKind := true
end;
if compare_keyword('FALSE', lexer^.start, lexer^.current.iterator) then
if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBoolean;
token^.booleanKind := false
end
@ -451,7 +532,7 @@ end;
(* Action for tokens containing only one character. The character cannot be
* followed by other characters forming a composite token. *)
proc transition_action_single(lexer: PLexer, token: PLexerToken);
proc transition_action_single(lexer: ^Lexer, token: ^LexerToken);
begin
if lexer^.current.iterator^ = '&' then
token^.kind := lexerKindAnd
@ -502,11 +583,11 @@ begin
end;
(* Handle an integer literal. *)
proc transition_action_integer(lexer: PLexer, token: PLexerToken);
proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken);
var
buffer: String;
integer_length: CARDINAL;
found: BOOLEAN;
integer_length: Word;
found: Bool;
begin
token^.kind := lexerKindInteger;
@ -805,7 +886,7 @@ begin
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].next_state := transitionStateEnd
end;
proc lexer_initialize(lexer: PLexer, input: File);
proc lexer_make*(lexer: ^Lexer, input: ^FILE);
begin
lexer^.input := input;
lexer^.length := 0;
@ -815,19 +896,20 @@ begin
lexer^.size := CHUNK_SIZE
end;
proc lexer_current(lexer: PLexer) -> LexerToken;
(* Returns the last read token. *)
proc lexer_current*(lexer: ^Lexer) -> LexerToken;
var
current_class: TransitionClass;
current_state: TransitionState;
current_transition: Transition;
result: LexerToken;
index1: CARDINAL;
index2: CARDINAL;
index1: Word;
index2: Word;
begin
lexer^.current := lexer^.start;
current_state := transitionStateStart;
while current_state <> transitionStateEnd DO
while current_state <> transitionStateEnd do
index1 := ORD(lexer^.current.iterator^);
INC(index1);
current_class := classification[index1];
@ -849,7 +931,8 @@ begin
return result
end;
proc lexer_lex(lexer: PLexer) -> LexerToken;
(* Read and return the next token. *)
proc lexer_lex*(lexer: ^Lexer) -> LexerToken;
var
result: LexerToken;
begin
@ -865,12 +948,15 @@ begin
return result
end;
proc lexer_destroy(lexer: PLexer);
proc lexer_destroy*(lexer: ^Lexer);
begin
DEALLOCATE(lexer^.buffer, lexer^.size)
end;
proc lexer_initialize();
begin
initialize_classification();
initialize_transitions()
end;
end.