From 94f7fe3f0e8bc66b020cfef225e8424d2dda4be8 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Sun, 5 Jun 2022 15:16:04 +0200 Subject: [PATCH] Start a Modula-2 experiment --- .gitignore | 3 + README.md | 63 ++++ Rakefile | 83 +++++ boot/Compiler.mod | 18 ++ boot/Lexer.def | 95 ++++++ boot/Lexer.mod | 722 ++++++++++++++++++++++++++++++++++++++++++++ boot/Transpiler.def | 7 + boot/Transpiler.mod | 160 ++++++++++ 8 files changed, 1151 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 Rakefile create mode 100644 boot/Compiler.mod create mode 100644 boot/Lexer.def create mode 100644 boot/Lexer.mod create mode 100644 boot/Transpiler.def create mode 100644 boot/Transpiler.mod diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cde7e11 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +a.out +/dub.selections.json +/build/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..211ca55 --- /dev/null +++ b/README.md @@ -0,0 +1,63 @@ +# Elna programming language + +Elna is a simple, imperative, low-level programming language. + +It is intendet to accompany other languages in the areas, where a high-level +language doesn't fit well. It is also supposed to be an intermediate +representation for a such high-level hypothetical programming language. + +## File extension + +.elna + +## Current implementation + +This repository contains a GCC frontend for Elna. After finishing the frontend +I'm planning to rewrite the compiler in Elna itself with its own backend and +a hand-written parser. So GCC gives a way to have a simple bootstrap compiler +and a possbility to compile Elna programs for different platforms. + +## Grammar + +program = block "." ; + +block = [ "const" ident "=" number {"," ident "=" number} ";"] + [ "var" ident {"," ident} ";"] + { "procedure" ident ";" block ";" } statement ; + +statement = [ ident ":=" expression | "call" ident + | "?" ident | "!" expression + | "begin" statement {";" statement } "end" + | "if" condition "then" statement + | "while" condition "do" statement ]; + +condition = "odd" expression | + expression ("="|"#"|"<"|"<="|">"|">=") expression ; + +expression = [ "+"|"-"] term { ("+"|"-") term}; + +term = factor {("*"|"/") factor}; + +factor = ident | number | "(" expression ")"; + +## Build + +The frontend requires GCC 14.2.0 (not tested with other versions). + +Download the GCC source. Copy the contents of this repository into `gcc/elna` +inside GCC. Finally build GCC enabling the frontend with +`--enable-languages=c,c++,elna`. After the installation the compiler can be +invoked with `$prefix/bin/gelna`. + +There is also a `Rakefile` that downloads, builds and installs GCC into the +`./build/` subdirectory. The `Rakefile` assumes that ruby and rake, as well as +all GCC dependencies are already available in the system. It works under Linux +and Mac OS. In the latter case GCC is patched with the patches used by Homebrew +(official GCC doesn't support Apple silicon targets). Invoke with + +```sh +rake boot +``` + +See `rake -T` for more tasks. The GCC source is under `build/tools`. The +installation path is `build/host/install`. diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..870de81 --- /dev/null +++ b/Rakefile @@ -0,0 +1,83 @@ +require 'pathname' +require 'rake/clean' +require 'open3' + +M2C = 'gm2' # Modula-2 compiler. +BOOT_OBJECTS = FileList['boot/*.mod'] + .map do |source| + Pathname.new(source).basename.sub_ext('.o') + end + +def source_for_object(out_file) + path = Pathname.new(out_file).relative_path_from('build') + result = ['build/boot'] + + definition = File.join('boot', path.basename.sub_ext('.def')) + result << definition if File.exist? definition + + implementation = path.sub_ext('.mod').to_path + implementation = File.join 'build', implementation unless File.exist? implementation + + result << implementation +end + +directory 'build/boot' +directory 'build/self' + +CLEAN.include 'build' + +rule(/build\/.+\.o$/ => ->(file) { source_for_object(file) }) do |t| + sources = t.prerequisites.filter { |f| f.end_with? '.mod' } + + sh M2C, '-c', '-I', 'boot', '-o', t.name, *sources +end + +rule(/build\/self\/.+\.mod$/ => [ + 'build/self', 'build/boot/Compiler', + ->(file) { File.join('boot', Pathname.new(file).basename) } +]) do |t| + sources, compiler = t.prerequisites + .reject { |f| File.directory? f } + .partition { |f| f.end_with? '.mod' } + + File.open t.name, 'w' do |output| + puts + puts(compiler * ' ') + + Open3.popen2(*compiler) do |cl_in, cl_out| + cl_in.write File.read(*sources) + cl_in.close + + IO.copy_stream cl_out, output + cl_out.close + end + end +end + +['boot', 'self'].each do |sub| + compiler_binary = Pathname.new('build') + sub + 'Compiler' + + file compiler_binary.to_path => BOOT_OBJECTS.map { |file| File.join('build', sub, file) } do |t| + sh M2C, '-o', t.name, *t.prerequisites + end + + compiler_object = compiler_binary.sub_ext('.o') + file compiler_object.to_path => source_for_object(compiler_object) do |t| + sources = t.prerequisites.filter { |f| f.end_with? '.mod' } + + sh M2C, '-fscaffold-main', '-c', '-I', 'boot', '-o', t.name, *sources + end +end + +task default: 'build/self/Compiler' +task default: 'build/self/Compiler.mod' +task default: 'boot/Compiler.mod' +task :default do |t| + exe, previous_output, source = t.prerequisites + + cat_arguments = ['cat', source] + diff_arguments = ['diff', '-Nur', '--text', previous_output, '-'] + + puts [cat_arguments * ' ', exe, diff_arguments * ' '].join(' | ') + Open3.pipeline(cat_arguments, exe, diff_arguments) +end diff --git a/boot/Compiler.mod b/boot/Compiler.mod new file mode 100644 index 0000000..1cd98ab --- /dev/null +++ b/boot/Compiler.mod @@ -0,0 +1,18 @@ +MODULE Compiler; + +FROM FIO IMPORT StdIn; +FROM SYSTEM IMPORT ADR; + +FROM Lexer IMPORT Lexer, LexerDestroy, LexerInitialize; +FROM Transpiler IMPORT Transpile; + +VAR + ALexer: Lexer; + +BEGIN + LexerInitialize(ADR(ALexer), StdIn); + + Transpile(ADR(ALexer)); + + LexerDestroy(ADR(ALexer)) +END Compiler. diff --git a/boot/Lexer.def b/boot/Lexer.def new file mode 100644 index 0000000..23bc7f3 --- /dev/null +++ b/boot/Lexer.def @@ -0,0 +1,95 @@ +DEFINITION MODULE Lexer; + +FROM FIO IMPORT File; + +TYPE + PLexerBuffer = POINTER TO CHAR; + Lexer = RECORD + Input: File; + Buffer: PLexerBuffer; + Size: CARDINAL; + Length: CARDINAL; + Start: PLexerBuffer; + Current: PLexerBuffer + END; + PLexer = POINTER TO Lexer; + LexerKind = ( + lexerKindEof, + lexerKindIdentifier, + lexerKindIf, + lexerKindThen, + lexerKindElse, + lexerKindElsif, + lexerKindWhile, + lexerKindDo, + lexerKindProc, + lexerKindBegin, + lexerKindEnd, + lexerKindImplementation, + lexerKindConst, + lexerKindVar, + lexerKindCase, + lexerKindOf, + lexerKindType, + lexerKindRecord, + lexerKindUnion, + lexerKindPipe, + lexerKindTo, + lexerKindBoolean, + lexerKindNull, + lexerKindAnd, + lexerKindOr, + lexerKindNot, + lexerKindReturn, + lexerKindDefinition, + lexerKindRange, + lexerKindLeftParen, + lexerKindRightParen, + lexerKindLeftSquare, + lexerKindRightSquare, + lexerKindGreaterEqual, + lexerKindLessEqual, + lexerKindGreaterThan, + lexerKindLessThan, + lexerKindNotEqual, + lexerKindEqual, + lexerKindSemicolon, + lexerKindDot, + lexerKindComma, + lexerKindPlus, + lexerKindMinus, + lexerKindMultiplication, + lexerKindDivision, + lexerKindRemainder, + lexerKindAssignment, + lexerKindColon, + lexerKindHat, + lexerKindAt, + lexerKindComment, + lexerKindInteger, + lexerKindWord, + lexerKindCharacter, + lexerKindString, + lexerKindFrom, + lexerKindExclamation, + lexerKindArrow, + lexerKindTrait, + lexerKindProgram, + lexerKindModule, + lexerKindImport + ); + LexerToken = RECORD + CASE Kind: LexerKind OF + lexerKindBoolean: booleanKind: BOOLEAN + END + END; + PLexerToken = POINTER TO LexerToken; + +PROCEDURE LexerInitialize(ALexer: PLexer; Input: File); +PROCEDURE LexerDestroy(ALexer: PLexer); +(* Returns the last read token. *) +PROCEDURE LexerCurrent(ALexer: PLexer): LexerToken; +(* Read and return the next token. *) +PROCEDURE LexerLex(ALexer: PLexer): LexerToken; + +END Lexer. diff --git a/boot/Lexer.mod b/boot/Lexer.mod new file mode 100644 index 0000000..4f09fc3 --- /dev/null +++ b/boot/Lexer.mod @@ -0,0 +1,722 @@ +IMPLEMENTATION MODULE Lexer; + +FROM FIO IMPORT ReadNBytes; +FROM SYSTEM IMPORT ADR; + +FROM Storage IMPORT DEALLOCATE, ALLOCATE; +FROM Strings IMPORT Length; +FROM MemUtils IMPORT MemZero; + +CONST + ChunkSize = 65536; + +TYPE + (* + * Classification table assigns each possible character to a group (class). All + * characters of the same group a handled equivalently. + * + * Classification: + *) + TransitionClass = ( + transitionClassInvalid, + transitionClassDigit, + transitionClassAlpha, + transitionClassSpace, + transitionClassColon, + transitionClassEquals, + transitionClassLeftParen, + transitionClassRightParen, + transitionClassAsterisk, + transitionClassUnderscore, + transitionClassSingle, + transitionClassHex, + transitionClassZero, + transitionClassX, + transitionClassEof, + transitionClassDot, + transitionClassMinus, + transitionClassSingleQuote, + transitionClassDoubleQuote, + transitionClassGreater, + transitionClassLess, + transitionClassOther + ); + TransitionState = ( + transitionStateStart, + transitionStateColon, + transitionStateIdentifier, + transitionStateDecimal, + transitionStateGreater, + transitionStateMinus, + transitionStateLeftParen, + transitionStateLess, + transitionStateDot, + transitionStateComment, + transitionStateClosingComment, + transitionStateCharacter, + transitionStateString, + transitionStateLeadingZero, + transitionStateDecimalSuffix, + transitionStateEnd + ); + TransitionAction = PROCEDURE(PLexer, PLexerToken); + Transition = RECORD + Action: TransitionAction; + NextState: TransitionState + END; + +VAR + Classification: ARRAY[1..128] OF TransitionClass; + Transitions: ARRAY[0..MAX(TransitionState)] OF ARRAY[0..MAX(TransitionClass)] OF Transition; + +PROCEDURE InitializeClassification(); +BEGIN + Classification[1] := transitionClassEof; (* NUL *) + Classification[2] := transitionClassInvalid; (* SOH *) + Classification[3] := transitionClassInvalid; (* STX *) + Classification[4] := transitionClassInvalid; (* ETX *) + Classification[5] := transitionClassInvalid; (* EOT *) + Classification[6] := transitionClassInvalid; (* EMQ *) + Classification[7] := transitionClassInvalid; (* ACK *) + Classification[8] := transitionClassInvalid; (* BEL *) + Classification[9] := transitionClassInvalid; (* BS *) + Classification[10] := transitionClassSpace; (* HT *) + Classification[11] := transitionClassSpace; (* LF *) + Classification[12] := transitionClassInvalid; (* VT *) + Classification[13] := transitionClassInvalid; (* FF *) + Classification[14] := transitionClassSpace; (* CR *) + Classification[15] := transitionClassInvalid; (* SO *) + Classification[16] := transitionClassInvalid; (* SI *) + Classification[17] := transitionClassInvalid; (* DLE *) + Classification[18] := transitionClassInvalid; (* DC1 *) + Classification[19] := transitionClassInvalid; (* DC2 *) + Classification[20] := transitionClassInvalid; (* DC3 *) + Classification[21] := transitionClassInvalid; (* DC4 *) + Classification[22] := transitionClassInvalid; (* NAK *) + Classification[23] := transitionClassInvalid; (* SYN *) + Classification[24] := transitionClassInvalid; (* ETB *) + Classification[25] := transitionClassInvalid; (* CAN *) + Classification[26] := transitionClassInvalid; (* EM *) + Classification[27] := transitionClassInvalid; (* SUB *) + Classification[28] := transitionClassInvalid; (* ESC *) + Classification[29] := transitionClassInvalid; (* FS *) + Classification[30] := transitionClassInvalid; (* GS *) + Classification[31] := transitionClassInvalid; (* RS *) + Classification[32] := transitionClassInvalid; (* US *) + Classification[33] := transitionClassSpace; (* Space *) + Classification[34] := transitionClassSingle; (* ! *) + Classification[35] := transitionClassDoubleQuote; (* " *) + Classification[36] := transitionClassOther; (* # *) + Classification[37] := transitionClassOther; (* $ *) + Classification[38] := transitionClassSingle; (* % *) + Classification[39] := transitionClassSingle; (* & *) + Classification[40] := transitionClassSingleQuote; (* ' *) + Classification[41] := transitionClassLeftParen; (* ( *) + Classification[42] := transitionClassRightParen; (* ) *) + Classification[43] := transitionClassAsterisk; (* * *) + Classification[44] := transitionClassSingle; (* + *) + Classification[45] := transitionClassSingle; (* , *) + Classification[46] := transitionClassMinus; (* - *) + Classification[47] := transitionClassDot; (* . *) + Classification[48] := transitionClassSingle; (* / *) + Classification[49] := transitionClassZero; (* 0 *) + Classification[50] := transitionClassDigit; (* 1 *) + Classification[51] := transitionClassDigit; (* 2 *) + Classification[52] := transitionClassDigit; (* 3 *) + Classification[53] := transitionClassDigit; (* 4 *) + Classification[54] := transitionClassDigit; (* 5 *) + Classification[55] := transitionClassDigit; (* 6 *) + Classification[56] := transitionClassDigit; (* 7 *) + Classification[57] := transitionClassDigit; (* 8 *) + Classification[58] := transitionClassDigit; (* 9 *) + Classification[59] := transitionClassColon; (* : *) + Classification[60] := transitionClassSingle; (* ; *) + Classification[61] := transitionClassLess; (* < *) + Classification[62] := transitionClassEquals; (* = *) + Classification[63] := transitionClassGreater; (* > *) + Classification[64] := transitionClassOther; (* ? *) + Classification[65] := transitionClassSingle; (* @ *) + Classification[66] := transitionClassAlpha; (* A *) + Classification[67] := transitionClassAlpha; (* B *) + Classification[68] := transitionClassAlpha; (* C *) + Classification[69] := transitionClassAlpha; (* D *) + Classification[70] := transitionClassAlpha; (* E *) + Classification[71] := transitionClassAlpha; (* F *) + Classification[72] := transitionClassAlpha; (* G *) + Classification[73] := transitionClassAlpha; (* H *) + Classification[74] := transitionClassAlpha; (* I *) + Classification[75] := transitionClassAlpha; (* J *) + Classification[76] := transitionClassAlpha; (* K *) + Classification[77] := transitionClassAlpha; (* L *) + Classification[78] := transitionClassAlpha; (* M *) + Classification[79] := transitionClassAlpha; (* N *) + Classification[80] := transitionClassAlpha; (* O *) + Classification[81] := transitionClassAlpha; (* P *) + Classification[82] := transitionClassAlpha; (* Q *) + Classification[83] := transitionClassAlpha; (* R *) + Classification[84] := transitionClassAlpha; (* S *) + Classification[85] := transitionClassAlpha; (* T *) + Classification[86] := transitionClassAlpha; (* U *) + Classification[87] := transitionClassAlpha; (* V *) + Classification[88] := transitionClassAlpha; (* W *) + Classification[89] := transitionClassAlpha; (* X *) + Classification[90] := transitionClassAlpha; (* Y *) + Classification[91] := transitionClassAlpha; (* Z *) + Classification[92] := transitionClassSingle; (* [ *) + Classification[93] := transitionClassOther; (* \ *) + Classification[94] := transitionClassSingle; (* ] *) + Classification[95] := transitionClassSingle; (* ^ *) + Classification[96] := transitionClassUnderscore; (* _ *) + Classification[97] := transitionClassOther; (* ` *) + Classification[98] := transitionClassHex; (* a *) + Classification[99] := transitionClassHex; (* b *) + Classification[100] := transitionClassHex; (* c *) + Classification[101] := transitionClassHex; (* d *) + Classification[102] := transitionClassHex; (* e *) + Classification[103] := transitionClassHex; (* f *) + Classification[104] := transitionClassAlpha; (* g *) + Classification[105] := transitionClassAlpha; (* h *) + Classification[106] := transitionClassAlpha; (* i *) + Classification[107] := transitionClassAlpha; (* j *) + Classification[108] := transitionClassAlpha; (* k *) + Classification[109] := transitionClassAlpha; (* l *) + Classification[110] := transitionClassAlpha; (* m *) + Classification[111] := transitionClassAlpha; (* n *) + Classification[112] := transitionClassAlpha; (* o *) + Classification[113] := transitionClassAlpha; (* p *) + Classification[114] := transitionClassAlpha; (* q *) + Classification[115] := transitionClassAlpha; (* r *) + Classification[116] := transitionClassAlpha; (* s *) + Classification[117] := transitionClassAlpha; (* t *) + Classification[118] := transitionClassAlpha; (* u *) + Classification[119] := transitionClassAlpha; (* v *) + Classification[120] := transitionClassAlpha; (* w *) + Classification[121] := transitionClassX; (* x *) + Classification[122] := transitionClassAlpha; (* y *) + Classification[123] := transitionClassAlpha; (* z *) + Classification[124] := transitionClassOther; (* { *) + Classification[125] := transitionClassSingle; (* | *) + Classification[126] := transitionClassOther; (* } *) + Classification[127] := transitionClassSingle; (* ~ *) + Classification[128] := transitionClassInvalid (* DEL *) +END InitializeClassification; + +PROCEDURE CompareKeyword(Keyword: ARRAY OF CHAR; TokenStart: PLexerBuffer; TokenEnd: PLexerBuffer): BOOLEAN; +VAR + Result: BOOLEAN; + Index: CARDINAL; +BEGIN + Index := 0; + Result := TRUE; + + WHILE (Index < Length(Keyword)) AND (TokenStart <> TokenEnd) AND Result DO + Result := Keyword[Index] = TokenStart^; + INC(TokenStart); + INC(Index) + END; + RETURN (Index = Length(Keyword)) AND (TokenStart = TokenEnd) AND Result +END CompareKeyword; + +(* Reached the end of file. *) +PROCEDURE TransitionActionEof(ALexer: PLexer; AToken: PLexerToken); +BEGIN + AToken^.Kind := lexerKindEof +END TransitionActionEof; + +(* Add the character to the token currently read and advance to the next character. *) +PROCEDURE TransitionActionAccumulate(ALexer: PLexer; AToken: PLexerToken); +BEGIN + INC(ALexer^.Current) +END TransitionActionAccumulate; + +(* The current character is not a part of the token. Finish the token already + * read. Don't advance to the next character. *) +PROCEDURE TransitionActionFinalize(ALexer: PLexer; AToken: PLexerToken); +BEGIN + IF ALexer^.Start^ = ':' THEN + AToken^.Kind := lexerKindColon + ELSIF ALexer^.Start^ = '>' THEN + AToken^.Kind := lexerKindGreaterThan + ELSIF ALexer^.Start^ = '<' THEN + AToken^.Kind := lexerKindLessThan + ELSIF ALexer^.Start^ = '(' THEN + AToken^.Kind := lexerKindLeftParen + ELSIF ALexer^.Start^ = '-' THEN + AToken^.Kind := lexerKindLeftParen + ELSIF ALexer^.Start^ = '.' THEN + AToken^.Kind := lexerKindDot + END +END TransitionActionFinalize; + +(* An action for tokens containing multiple characters. *) +PROCEDURE TransitionActionComposite(ALexer: PLexer; AToken: PLexerToken); +BEGIN + IF ALexer^.Start^ = '<' THEN + IF ALexer^.Current^ = '>' THEN + AToken^.Kind := lexerKindNotEqual + ELSIF ALexer^.Current^ = '=' THEN + AToken^.Kind := lexerKindLessEqual + END + ELSIF (ALexer^.Start^ = '>') AND (ALexer^.Current^ = '=') THEN + AToken^.Kind := lexerKindGreaterEqual + ELSIF (ALexer^.Start^ = '.') AND (ALexer^.Current^ = '.') THEN + AToken^.Kind := lexerKindRange + ELSIF (ALexer^.Start^ = ':') AND (ALexer^.Current^ = '=') THEN + AToken^.Kind := lexerKindAssignment + END; + INC(ALexer^.Current) +END TransitionActionComposite; + +(* Skip a space. *) +PROCEDURE TransitionActionSkip(ALexer: PLexer; AToken: PLexerToken); +BEGIN + INC(ALexer^.Current); + INC(ALexer^.Start) +END TransitionActionSkip; + +(* 0x04. Delimited string action. *) +PROCEDURE TransitionActionDelimited(ALexer: PLexer; AToken: PLexerToken); +BEGIN + IF ALexer^.Start^ = '(' THEN + AToken^.Kind := lexerKindComment + ELSIF ALexer^.Start^ = '"' THEN + AToken^.Kind := lexerKindCharacter + ELSIF ALexer^.Start^ = "'" THEN + AToken^.Kind := lexerKindString + END; + INC(ALexer^.Current) +END TransitionActionDelimited; + +(* Finalize keyword or identifier. *) +PROCEDURE TransitionActionKeyId(ALexer: PLexer; AToken: PLexerToken); +BEGIN + IF CompareKeyword('PROGRAM', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindProgram + ELSIF CompareKeyword('IMPORT', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindImport + ELSIF CompareKeyword('CONST', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindConst + ELSIF CompareKeyword('VAR', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindVar + ELSIF CompareKeyword('IF', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindIf + ELSIF CompareKeyword('THEN', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindThen + ELSIF CompareKeyword('ELSIF', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindElsif + ELSIF CompareKeyword('ELSE', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindElse + ELSIF CompareKeyword('WHILE', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindWhile + ELSIF CompareKeyword('DO', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindDo + ELSIF CompareKeyword('PROCEDURE', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindProc + ELSIF CompareKeyword('BEGIN', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindBegin + ELSIF CompareKeyword('END', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindEnd + ELSIF CompareKeyword('TYPE', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindType + ELSIF CompareKeyword('RECORD', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindRecord + ELSIF CompareKeyword('UNION', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindUnion + ELSIF CompareKeyword('NIL', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindNull + ELSIF CompareKeyword('AND', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindAnd + ELSIF CompareKeyword('OR', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindOr + ELSIF CompareKeyword('RETURN', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindReturn + ELSIF CompareKeyword('DEFINITION', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindDefinition + ELSIF CompareKeyword('TO', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindTo + ELSIF CompareKeyword('CASE', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindCase + ELSIF CompareKeyword('OF', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindOf + ELSIF CompareKeyword('FROM', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindFrom + ELSIF CompareKeyword('MODULE', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindModule + ELSIF CompareKeyword('IMPLEMENTATION', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindImplementation + ELSIF CompareKeyword('TRUE', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindBoolean; + AToken^.booleanKind := TRUE + ELSIF CompareKeyword('FALSE', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindBoolean; + AToken^.booleanKind := FALSE + ELSE + AToken^.Kind := lexerKindIdentifier + END; +END TransitionActionKeyId; + +(* Action for tokens containing only one character. The character cannot be + * followed by other characters forming a composite token. *) +PROCEDURE TransitionActionSingle(ALexer: PLexer; AToken: PLexerToken); +BEGIN + CASE ALexer^.Current^ OF + '&': AToken^.Kind := lexerKindAnd | + ';': AToken^.Kind := lexerKindSemicolon | + ',': AToken^.Kind := lexerKindComma | + ')': AToken^.Kind := lexerKindRightParen | + '[': AToken^.Kind := lexerKindLeftSquare | + ']': AToken^.Kind := lexerKindRightSquare | + '^': AToken^.Kind := lexerKindHat | + '=': AToken^.Kind := lexerKindEqual | + '+': AToken^.Kind := lexerKindPlus | + '/': AToken^.Kind := lexerKindDivision | + '%': AToken^.Kind := lexerKindRemainder | + '@': AToken^.Kind := lexerKindAt | + '|': AToken^.Kind := lexerKindPipe + END; + INC(ALexer^.Current) +END TransitionActionSingle; + +(* Handle an integer literal. *) +PROCEDURE TransitionActionInteger(ALexer: PLexer; AToken: PLexerToken); +BEGIN + AToken^.Kind := lexerKindInteger +END TransitionActionInteger; + +PROCEDURE SetDefaultTransition(CurrentState: TransitionState; DefaultAction: TransitionAction; NextState: TransitionState); +VAR DefaultTransition: Transition; +BEGIN + DefaultTransition.Action := DefaultAction; + DefaultTransition.NextState := NextState; + + Transitions[ORD(CurrentState)][ORD(transitionClassInvalid)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassDigit)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassAlpha)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassSpace)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassColon)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassEquals)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassLeftParen)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassRightParen)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassAsterisk)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassUnderscore)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassSingle)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassHex)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassZero)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassX)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassEof)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassDot)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassMinus)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassSingleQuote)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassDoubleQuote)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassGreater)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassLess)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassOther)] := DefaultTransition; +END SetDefaultTransition; + +(* + * The transition table describes transitions from one state to another, given + * a symbol (character class). + * + * The table has m rows and n columns, where m is the amount of states and n is + * the amount of classes. So given the current state and a classified character + * the table can be used to look up the next state. + * + * Each cell is a word long. + * - The least significant byte of the word is a row number (beginning with 0). + * It specifies the target state. "ff" means that this is an end state and no + * transition is possible. + * - The next byte is the action that should be performed when transitioning. + * For the meaning of actions see labels in the lex_next function, which + * handles each action. + *) +PROCEDURE InitializeTransitions(); +BEGIN + (* Start state. *) + Transitions[ORD(transitionStateStart)][ORD(transitionClassInvalid)].Action := NIL; + Transitions[ORD(transitionStateStart)][ORD(transitionClassInvalid)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassDigit)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassDigit)].NextState := transitionStateDecimal; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassAlpha)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassAlpha)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassSpace)].Action := TransitionActionSkip; + Transitions[ORD(transitionStateStart)][ORD(transitionClassSpace)].NextState := transitionStateStart; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassColon)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassColon)].NextState := transitionStateColon; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassEquals)].Action := TransitionActionSingle; + Transitions[ORD(transitionStateStart)][ORD(transitionClassEquals)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassLeftParen)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassLeftParen)].NextState := transitionStateLeftParen; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassRightParen)].Action := TransitionActionSingle; + Transitions[ORD(transitionStateStart)][ORD(transitionClassRightParen)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassAsterisk)].Action := TransitionActionSingle; + Transitions[ORD(transitionStateStart)][ORD(transitionClassAsterisk)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassUnderscore)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassUnderscore)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassSingle)].Action := TransitionActionSingle; + Transitions[ORD(transitionStateStart)][ORD(transitionClassSingle)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassHex)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassHex)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassZero)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassZero)].NextState := transitionStateLeadingZero; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassX)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassX)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassEof)].Action := TransitionActionEof; + Transitions[ORD(transitionStateStart)][ORD(transitionClassEof)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassDot)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassDot)].NextState := transitionStateDot; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassMinus)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassMinus)].NextState := transitionStateMinus; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassSingleQuote)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassSingleQuote)].NextState := transitionStateCharacter; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassDoubleQuote)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassDoubleQuote)].NextState := transitionStateString; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassGreater)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassGreater)].NextState := transitionStateGreater; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassLess)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassLess)].NextState := transitionStateLess; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassOther)].Action := NIL; + Transitions[ORD(transitionStateStart)][ORD(transitionClassOther)].NextState := transitionStateEnd; + + (* Colon state. *) + SetDefaultTransition(transitionStateColon, TransitionActionFinalize, transitionStateEnd); + + Transitions[ORD(transitionStateColon)][ORD(transitionClassEquals)].Action := TransitionActionComposite; + Transitions[ORD(transitionStateColon)][ORD(transitionClassEquals)].NextState := transitionStateEnd; + + (* Identifier state. *) + SetDefaultTransition(transitionStateIdentifier, TransitionActionKeyId, transitionStateEnd); + + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassDigit)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassDigit)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassAlpha)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassAlpha)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassUnderscore)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassUnderscore)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassHex)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassHex)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassZero)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassZero)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassX)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassX)].NextState := transitionStateIdentifier; + + (* Decimal state. *) + SetDefaultTransition(transitionStateDecimal, TransitionActionInteger, transitionStateEnd); + + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassDigit)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassDigit)].NextState := transitionStateDecimal; + + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassAlpha)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassAlpha)].NextState := transitionStateDecimalSuffix; + + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassUnderscore)].Action := NIL; + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassUnderscore)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassHex)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassHex)].NextState := transitionStateDecimalSuffix; + + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassZero)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassZero)].NextState := transitionStateDecimal; + + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassX)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassX)].NextState := transitionStateDecimalSuffix; + + (* Greater state. *) + SetDefaultTransition(transitionStateGreater, TransitionActionFinalize, transitionStateEnd); + + Transitions[ORD(transitionStateGreater)][ORD(transitionClassEquals)].Action := TransitionActionComposite; + Transitions[ORD(transitionStateGreater)][ORD(transitionClassEquals)].NextState := transitionStateEnd; + + (* Minus state. *) + SetDefaultTransition(transitionStateMinus, TransitionActionFinalize, transitionStateEnd); + + Transitions[ORD(transitionStateMinus)][ORD(transitionClassGreater)].Action := TransitionActionComposite; + Transitions[ORD(transitionStateMinus)][ORD(transitionClassGreater)].NextState := transitionStateEnd; + + (* Left paren state. *) + SetDefaultTransition(transitionStateLeftParen, TransitionActionFinalize, transitionStateEnd); + + Transitions[ORD(transitionStateLeftParen)][ORD(transitionClassAsterisk)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateLeftParen)][ORD(transitionClassAsterisk)].NextState := transitionStateComment; + + (* Less state. *) + SetDefaultTransition(transitionStateLess, TransitionActionFinalize, transitionStateEnd); + + Transitions[ORD(transitionStateLess)][ORD(transitionClassEquals)].Action := TransitionActionComposite; + Transitions[ORD(transitionStateLess)][ORD(transitionClassEquals)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateLess)][ORD(transitionClassGreater)].Action := TransitionActionComposite; + Transitions[ORD(transitionStateLess)][ORD(transitionClassGreater)].NextState := transitionStateEnd; + + (* Hexadecimal after 0x. *) + SetDefaultTransition(transitionStateDot, TransitionActionFinalize, transitionStateEnd); + + Transitions[ORD(transitionStateDot)][ORD(transitionClassDot)].Action := TransitionActionComposite; + Transitions[ORD(transitionStateDot)][ORD(transitionClassDot)].NextState := transitionStateEnd; + + (* Comment. *) + SetDefaultTransition(transitionStateComment, TransitionActionAccumulate, transitionStateComment); + + Transitions[ORD(transitionStateComment)][ORD(transitionClassAsterisk)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateComment)][ORD(transitionClassAsterisk)].NextState := transitionStateClosingComment; + + Transitions[ORD(transitionStateComment)][ORD(transitionClassEof)].Action := NIL; + Transitions[ORD(transitionStateComment)][ORD(transitionClassEof)].NextState := transitionStateEnd; + + (* Closing comment. *) + SetDefaultTransition(transitionStateClosingComment, TransitionActionAccumulate, transitionStateComment); + + Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassInvalid)].Action := NIL; + Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassInvalid)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassRightParen)].Action := TransitionActionDelimited; + Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassRightParen)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassAsterisk)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassAsterisk)].NextState := transitionStateClosingComment; + + Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassEof)].Action := NIL; + Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassEof)].NextState := transitionStateEnd; + + (* Character. *) + SetDefaultTransition(transitionStateCharacter, TransitionActionAccumulate, transitionStateCharacter); + + Transitions[ORD(transitionStateCharacter)][ORD(transitionClassInvalid)].Action := NIL; + Transitions[ORD(transitionStateCharacter)][ORD(transitionClassInvalid)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateCharacter)][ORD(transitionClassEof)].Action := NIL; + Transitions[ORD(transitionStateCharacter)][ORD(transitionClassEof)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateCharacter)][ORD(transitionClassSingleQuote)].Action := TransitionActionDelimited; + Transitions[ORD(transitionStateCharacter)][ORD(transitionClassSingleQuote)].NextState := transitionStateEnd; + + (* String. *) + SetDefaultTransition(transitionStateString, TransitionActionAccumulate, transitionStateString); + + Transitions[ORD(transitionStateString)][ORD(transitionClassInvalid)].Action := NIL; + Transitions[ORD(transitionStateString)][ORD(transitionClassInvalid)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateString)][ORD(transitionClassEof)].Action := NIL; + Transitions[ORD(transitionStateString)][ORD(transitionClassEof)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateString)][ORD(transitionClassDoubleQuote)].Action := TransitionActionDelimited; + Transitions[ORD(transitionStateString)][ORD(transitionClassDoubleQuote)].NextState := transitionStateEnd; + + (* Leading zero. *) + SetDefaultTransition(transitionStateLeadingZero, TransitionActionInteger, transitionStateEnd); + + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassDigit)].Action := NIL; + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassDigit)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassAlpha)].Action := NIL; + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassAlpha)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassUnderscore)].Action := NIL; + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassUnderscore)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassHex)].Action := NIL; + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassHex)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassZero)].Action := NIL; + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassZero)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassX)].Action := NIL; + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassX)].NextState := transitionStateEnd; + + (* Digit with a character suffix. *) + SetDefaultTransition(transitionStateDecimalSuffix, TransitionActionInteger, transitionStateEnd); + + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassAlpha)].Action := NIL; + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassAlpha)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassDigit)].Action := NIL; + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassDigit)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassHex)].Action := NIL; + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassHex)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassZero)].Action := NIL; + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassZero)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassX)].Action := NIL; + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassX)].NextState := transitionStateEnd +END InitializeTransitions; + +PROCEDURE LexerInitialize(ALexer: PLexer; Input: File); +BEGIN + ALexer^.Input := Input; + ALexer^.Length := 0; + + ALLOCATE(ALexer^.Buffer, ChunkSize); + MemZero(ALexer^.Buffer, ChunkSize); + ALexer^.Size := ChunkSize +END LexerInitialize; + +PROCEDURE LexerCurrent(ALexer: PLexer): LexerToken; +VAR + CurrentClass: TransitionClass; + CurrentState: TransitionState; + CurrentTransition: Transition; + Result: LexerToken; +BEGIN + ALexer^.Current := ALexer^.Start; + Result.Kind := lexerKindTrait; + CurrentState := transitionStateStart; + + WHILE CurrentState <> transitionStateEnd DO + CurrentClass := Classification[ORD(ALexer^.Current^) + 1]; + + CurrentTransition := Transitions[ORD(CurrentState)][ORD(CurrentClass)]; + IF CurrentTransition.Action <> NIL THEN + CurrentTransition.Action(ALexer, ADR(Result)) + END; + CurrentState := CurrentTransition.NextState + END; + RETURN Result +END LexerCurrent; + +PROCEDURE LexerLex(ALexer: PLexer): LexerToken; +BEGIN + IF ALexer^.Length = 0 THEN + ALexer^.Length := ReadNBytes(ALexer^.Input, ChunkSize, ALexer^.Buffer); + ALexer^.Current := ALexer^.Buffer + END; + ALexer^.Start := ALexer^.Current; + + RETURN LexerCurrent(ALexer) +END LexerLex; + +PROCEDURE LexerDestroy(ALexer: PLexer); +BEGIN + DEALLOCATE(ALexer^.Buffer, ALexer^.Size) +END LexerDestroy; + +BEGIN + InitializeClassification(); + InitializeTransitions() +END Lexer. diff --git a/boot/Transpiler.def b/boot/Transpiler.def new file mode 100644 index 0000000..a19ad37 --- /dev/null +++ b/boot/Transpiler.def @@ -0,0 +1,7 @@ +DEFINITION MODULE Transpiler; + +FROM Lexer IMPORT PLexer; + +PROCEDURE Transpile(ALexer: PLexer); + +END Transpiler. diff --git a/boot/Transpiler.mod b/boot/Transpiler.mod new file mode 100644 index 0000000..25dfe52 --- /dev/null +++ b/boot/Transpiler.mod @@ -0,0 +1,160 @@ +IMPLEMENTATION MODULE Transpiler; + +FROM FIO IMPORT WriteNBytes, StdOut; +FROM SYSTEM IMPORT ADR, ADDRESS; + +FROM Terminal IMPORT Write, WriteLn, WriteString; +FROM Lexer IMPORT Lexer, LexerToken, LexerCurrent, LexerLex, LexerKind; + +TYPE + TranspilerContext = RECORD + END; + PTranspilerContext = POINTER TO TranspilerContext; + +(* Calls LexerLex() but skips the comments. *) +PROCEDURE TranspilerLex(ALexer: PLexer): LexerToken; +VAR + Result: LexerToken; +BEGIN + Result := LexerLex(ALexer); + + WHILE Result.Kind = lexerKindComment DO + Result := LexerLex(ALexer) + END; + + RETURN Result +END TranspilerLex; + +(* Write a semicolon followed by a newline. *) +PROCEDURE WriteSemicolon(); +BEGIN + WriteString(';'); + WriteLn() +END WriteSemicolon; + +PROCEDURE TranspileImport(AContext: PTranspilerContext; ALexer: PLexer); +VAR + Token: LexerToken; + WrittenBytes: CARDINAL; +BEGIN + WriteString('FROM '); + Token := TranspilerLex(ALexer); + + WrittenBytes := WriteNBytes(StdOut, ADDRESS(ALexer^.Current - ALexer^.Start), ALexer^.Start); + + Token := TranspilerLex(ALexer); + WriteString(' IMPORT '); + + Token := TranspilerLex(ALexer); + WrittenBytes := WriteNBytes(StdOut, ADDRESS(ALexer^.Current - ALexer^.Start), ALexer^.Start); + + Token := TranspilerLex(ALexer); + WHILE Token.Kind <> lexerKindSemicolon DO + WriteString(', '); + Token := TranspilerLex(ALexer); + WrittenBytes := WriteNBytes(StdOut, ADDRESS(ALexer^.Current - ALexer^.Start), ALexer^.Start); + Token := TranspilerLex(ALexer) + END; + WriteSemicolon(); + Token := TranspilerLex(ALexer) +END TranspileImport; + +PROCEDURE TranspileImportPart(AContext: PTranspilerContext; ALexer: PLexer); +VAR + Token: LexerToken; +BEGIN + Token := LexerCurrent(ALexer); + + WHILE Token.Kind = lexerKindFrom DO + TranspileImport(AContext, ALexer); + Token := LexerCurrent(ALexer) + END; + WriteLn() +END TranspileImportPart; + +PROCEDURE TranspileConstant(AContext: PTranspilerContext; ALexer: PLexer); +VAR + Token: LexerToken; + WrittenBytes: CARDINAL; +BEGIN + WriteString(' '); + Token := LexerCurrent(ALexer); + WrittenBytes := WriteNBytes(StdOut, ADDRESS(ALexer^.Current - ALexer^.Start), ALexer^.Start); + + Token := TranspilerLex(ALexer); + WriteString(' = '); + + Token := TranspilerLex(ALexer); + WrittenBytes := WriteNBytes(StdOut, ADDRESS(ALexer^.Current - ALexer^.Start), ALexer^.Start); + + Token := TranspilerLex(ALexer); + WriteSemicolon() +END TranspileConstant; + +PROCEDURE TranspileConstantPart(AContext: PTranspilerContext; ALexer: PLexer); +VAR + Token: LexerToken; +BEGIN + Token := LexerCurrent(ALexer); + + IF Token.Kind = lexerKindConst THEN + WriteString('CONST'); + WriteLn(); + Token := TranspilerLex(ALexer); + + WHILE Token.Kind = lexerKindIdentifier DO + TranspileConstant(AContext, ALexer); + Token := TranspilerLex(ALexer) + END; + WriteLn() + END +END TranspileConstantPart; + +PROCEDURE TranspileModule(AContext: PTranspilerContext; ALexer: PLexer); +VAR + Token: LexerToken; + WrittenBytes: CARDINAL; +BEGIN + Token := TranspilerLex(ALexer); + + IF Token.Kind = lexerKindDefinition THEN + WriteString('DEFINITION '); + Token := TranspilerLex(ALexer); + ELSIF Token.Kind = lexerKindImplementation THEN + WriteString('IMPLEMENTATION '); + Token := TranspilerLex(ALexer) + END; + WriteString('MODULE '); + + (* Write the module name and end the line with a semicolon and newline. *) + Token := TranspilerLex(ALexer); + WrittenBytes := WriteNBytes(StdOut, ADDRESS(ALexer^.Current - ALexer^.Start), ALexer^.Start); + + Token := TranspilerLex(ALexer); + WriteSemicolon(); + WriteLn(); + + (* Write the module body. *) + Token := TranspilerLex(ALexer); + TranspileImportPart(AContext, ALexer); + TranspileConstantPart(AContext, ALexer); + + Token := LexerCurrent(ALexer); + WHILE Token.Kind <> lexerKindEof DO + WrittenBytes := WriteNBytes(StdOut, ADDRESS(ALexer^.Current - ALexer^.Start), ALexer^.Start); + WriteLn(); + + Token := TranspilerLex(ALexer) + END +END TranspileModule; + +PROCEDURE Transpile(ALexer: PLexer); +VAR + Token: LexerToken; + WrittenBytes: CARDINAL; + Context: TranspilerContext; +BEGIN + TranspileModule(ADR(Context), ALexer) +END Transpile; + +END Transpiler.