From f29e68ec93e420de6a6029e542abf2c953d929b3 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Sun, 5 Jun 2022 23:43:45 +0200 Subject: [PATCH] Start a Modula-2 experiment --- .gitignore | 2 +- README | 37 -- README.md | 63 +++ Rakefile | 130 +++--- boot/Compiler.mod | 18 + boot/Lexer.def | 92 ++++ boot/Lexer.mod | 717 ++++++++++++++++++++++++++++++ boot/Transpiler.def | 7 + boot/Transpiler.mod | 24 + dub.json | 9 - source/elna/extended.d | 9 - source/elna/generator.d | 660 --------------------------- source/elna/ir.d | 144 ------ source/elna/lexer.d | 252 ----------- source/elna/parser.d | 269 ----------- source/elna/result.d | 84 ---- source/main.d | 72 --- tests/const_list.elna | 3 - tests/expectations/const_list.txt | 1 - tests/expectations/sum.txt | 1 - tests/expectations/sums.txt | 1 - tests/sum.elna | 2 - tests/sums.elna | 2 - 23 files changed, 994 insertions(+), 1605 deletions(-) delete mode 100644 README create mode 100644 README.md create mode 100644 boot/Compiler.mod create mode 100644 boot/Lexer.def create mode 100644 boot/Lexer.mod create mode 100644 boot/Transpiler.def create mode 100644 boot/Transpiler.mod delete mode 100644 dub.json delete mode 100644 source/elna/extended.d delete mode 100644 source/elna/generator.d delete mode 100644 source/elna/ir.d delete mode 100644 source/elna/lexer.d delete mode 100644 source/elna/parser.d delete mode 100644 source/elna/result.d delete mode 100644 source/main.d delete mode 100644 tests/const_list.elna delete mode 100644 tests/expectations/const_list.txt delete mode 100644 tests/expectations/sum.txt delete mode 100644 tests/expectations/sums.txt delete mode 100644 tests/sum.elna delete mode 100644 tests/sums.elna diff --git a/.gitignore b/.gitignore index d0d201a..cde7e11 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ -/.dub/ +a.out /dub.selections.json /build/ diff --git a/README b/README deleted file mode 100644 index b6e03eb..0000000 --- a/README +++ /dev/null @@ -1,37 +0,0 @@ -# Elna programming language - -Elna compiles simple mathematical operations to machine code. -The compiled program returns the result of the operation. - -## File extension - -.elna - -## Grammar PL/0 - -program = block "." ; - -block = [ "const" ident "=" number {"," ident "=" number} ";"] - [ "var" ident {"," ident} ";"] - { "procedure" ident ";" block ";" } statement ; - -statement = [ ident ":=" expression | "call" ident - | "?" ident | "!" expression - | "begin" statement {";" statement } "end" - | "if" condition "then" statement - | "while" condition "do" statement ]; - -condition = "odd" expression | - expression ("="|"#"|"<"|"<="|">"|">=") expression ; - -expression = [ "+"|"-"] term { ("+"|"-") term}; - -term = factor {("*"|"/") factor}; - -factor = ident | number | "(" expression ")"; - -## Operations - -"!" - Write a line. -"?" - Read user input. -"odd" - The only function, returns whether a number is odd. diff --git a/README.md b/README.md new file mode 100644 index 0000000..211ca55 --- /dev/null +++ b/README.md @@ -0,0 +1,63 @@ +# Elna programming language + +Elna is a simple, imperative, low-level programming language. + +It is intendet to accompany other languages in the areas, where a high-level +language doesn't fit well. It is also supposed to be an intermediate +representation for a such high-level hypothetical programming language. + +## File extension + +.elna + +## Current implementation + +This repository contains a GCC frontend for Elna. After finishing the frontend +I'm planning to rewrite the compiler in Elna itself with its own backend and +a hand-written parser. So GCC gives a way to have a simple bootstrap compiler +and a possbility to compile Elna programs for different platforms. + +## Grammar + +program = block "." ; + +block = [ "const" ident "=" number {"," ident "=" number} ";"] + [ "var" ident {"," ident} ";"] + { "procedure" ident ";" block ";" } statement ; + +statement = [ ident ":=" expression | "call" ident + | "?" ident | "!" expression + | "begin" statement {";" statement } "end" + | "if" condition "then" statement + | "while" condition "do" statement ]; + +condition = "odd" expression | + expression ("="|"#"|"<"|"<="|">"|">=") expression ; + +expression = [ "+"|"-"] term { ("+"|"-") term}; + +term = factor {("*"|"/") factor}; + +factor = ident | number | "(" expression ")"; + +## Build + +The frontend requires GCC 14.2.0 (not tested with other versions). + +Download the GCC source. Copy the contents of this repository into `gcc/elna` +inside GCC. Finally build GCC enabling the frontend with +`--enable-languages=c,c++,elna`. After the installation the compiler can be +invoked with `$prefix/bin/gelna`. + +There is also a `Rakefile` that downloads, builds and installs GCC into the +`./build/` subdirectory. The `Rakefile` assumes that ruby and rake, as well as +all GCC dependencies are already available in the system. It works under Linux +and Mac OS. In the latter case GCC is patched with the patches used by Homebrew +(official GCC doesn't support Apple silicon targets). Invoke with + +```sh +rake boot +``` + +See `rake -T` for more tasks. The GCC source is under `build/tools`. The +installation path is `build/host/install`. diff --git a/Rakefile b/Rakefile index acafe3b..870de81 100644 --- a/Rakefile +++ b/Rakefile @@ -2,68 +2,82 @@ require 'pathname' require 'rake/clean' require 'open3' -DFLAGS = ['--warn-no-deprecated', '-L/usr/lib64/gcc-12'] -BINARY = 'build/bin/elna' -TESTS = FileList['tests/*.elna'] - .map { |test| (Pathname.new('build') + test).sub_ext('').to_path } -SOURCES = FileList['source/**/*.d'] - -directory 'build' - -CLEAN.include 'build' -CLEAN.include '.dub' - -rule(/build\/tests\/.+/ => ->(file) { test_for_out(file) }) do |t| - Pathname.new(t.name).dirname.mkpath - sh BINARY, t.source - sh 'gcc', '-o', t.name, "#{t.name}.o" - # Open3.pipeline [BINARY, t.source], ['gcc', '-x', 'assembler', '-o', t.name, '-'] -end - -file BINARY => SOURCES do |t| - sh({ 'DFLAGS' => (DFLAGS * ' ') }, 'dub', 'build', '--compiler=gdc-12') -end - -file 'build/tests/sample' => BINARY do |t| - sh t.source - sh 'gcc', '-o', t.name, 'build/tests/sample.o' -end - -task default: BINARY - -desc 'Run all tests and check the results' -task test: TESTS -task test: BINARY do - TESTS.each do |test| - expected = Pathname - .new(test) - .sub_ext('.txt') - .sub(/^build\/tests\//, 'tests/expectations/') - .read - .to_i - - puts "Running #{test}" - system test - actual = $?.exitstatus - - fail "#{test}: Expected #{expected}, got #{actual}" unless expected == actual +M2C = 'gm2' # Modula-2 compiler. +BOOT_OBJECTS = FileList['boot/*.mod'] + .map do |source| + Pathname.new(source).basename.sub_ext('.o') end - # system './build/tests/sample' - # actual = $?.exitstatus - # fail "./build/tests/sample: Expected 3, got #{actual}" unless 3 == actual +def source_for_object(out_file) + path = Pathname.new(out_file).relative_path_from('build') + result = ['build/boot'] + + definition = File.join('boot', path.basename.sub_ext('.def')) + result << definition if File.exist? definition + + implementation = path.sub_ext('.mod').to_path + implementation = File.join 'build', implementation unless File.exist? implementation + + result << implementation end -desc 'Run unittest blocks' -task unittest: SOURCES do |t| - sh('dub', 'test', '--compiler=gdc-12') +directory 'build/boot' +directory 'build/self' + +CLEAN.include 'build' + +rule(/build\/.+\.o$/ => ->(file) { source_for_object(file) }) do |t| + sources = t.prerequisites.filter { |f| f.end_with? '.mod' } + + sh M2C, '-c', '-I', 'boot', '-o', t.name, *sources end -def test_for_out(out_file) - test_source = Pathname - .new(out_file) - .sub_ext('.elna') - .sub(/^build\//, '') - .to_path - [test_source, BINARY] +rule(/build\/self\/.+\.mod$/ => [ + 'build/self', 'build/boot/Compiler', + ->(file) { File.join('boot', Pathname.new(file).basename) } +]) do |t| + sources, compiler = t.prerequisites + .reject { |f| File.directory? f } + .partition { |f| f.end_with? '.mod' } + + File.open t.name, 'w' do |output| + puts + puts(compiler * ' ') + + Open3.popen2(*compiler) do |cl_in, cl_out| + cl_in.write File.read(*sources) + cl_in.close + + IO.copy_stream cl_out, output + cl_out.close + end + end +end + +['boot', 'self'].each do |sub| + compiler_binary = Pathname.new('build') + sub + 'Compiler' + + file compiler_binary.to_path => BOOT_OBJECTS.map { |file| File.join('build', sub, file) } do |t| + sh M2C, '-o', t.name, *t.prerequisites + end + + compiler_object = compiler_binary.sub_ext('.o') + file compiler_object.to_path => source_for_object(compiler_object) do |t| + sources = t.prerequisites.filter { |f| f.end_with? '.mod' } + + sh M2C, '-fscaffold-main', '-c', '-I', 'boot', '-o', t.name, *sources + end +end + +task default: 'build/self/Compiler' +task default: 'build/self/Compiler.mod' +task default: 'boot/Compiler.mod' +task :default do |t| + exe, previous_output, source = t.prerequisites + + cat_arguments = ['cat', source] + diff_arguments = ['diff', '-Nur', '--text', previous_output, '-'] + + puts [cat_arguments * ' ', exe, diff_arguments * ' '].join(' | ') + Open3.pipeline(cat_arguments, exe, diff_arguments) end diff --git a/boot/Compiler.mod b/boot/Compiler.mod new file mode 100644 index 0000000..1cd98ab --- /dev/null +++ b/boot/Compiler.mod @@ -0,0 +1,18 @@ +MODULE Compiler; + +FROM FIO IMPORT StdIn; +FROM SYSTEM IMPORT ADR; + +FROM Lexer IMPORT Lexer, LexerDestroy, LexerInitialize; +FROM Transpiler IMPORT Transpile; + +VAR + ALexer: Lexer; + +BEGIN + LexerInitialize(ADR(ALexer), StdIn); + + Transpile(ADR(ALexer)); + + LexerDestroy(ADR(ALexer)) +END Compiler. diff --git a/boot/Lexer.def b/boot/Lexer.def new file mode 100644 index 0000000..263b809 --- /dev/null +++ b/boot/Lexer.def @@ -0,0 +1,92 @@ +DEFINITION MODULE Lexer; + +FROM FIO IMPORT File; + +TYPE + PLexerBuffer = POINTER TO CHAR; + Lexer = RECORD + Input: File; + Buffer: PLexerBuffer; + Size: CARDINAL; + Length: CARDINAL; + Start: PLexerBuffer; + Current: PLexerBuffer + END; + PLexer = POINTER TO Lexer; + LexerKind = ( + lexerKindEof, + lexerKindIdentifier, + lexerKindIf, + lexerKindThen, + lexerKindElse, + lexerKindElsif, + lexerKindWhile, + lexerKindDo, + lexerKindProc, + lexerKindBegin, + lexerKindEnd, + lexerKindImplementation, + lexerKindConst, + lexerKindVar, + lexerKindCase, + lexerKindOf, + lexerKindType, + lexerKindRecord, + lexerKindUnion, + lexerKindPipe, + lexerKindTo, + lexerKindBoolean, + lexerKindNull, + lexerKindAnd, + lexerKindOr, + lexerKindNot, + lexerKindReturn, + lexerKindDefinition, + lexerKindRange, + lexerKindLeftParen, + lexerKindRightParen, + lexerKindLeftSquare, + lexerKindRightSquare, + lexerKindGreaterEqual, + lexerKindLessEqual, + lexerKindGreaterThan, + lexerKindLessThan, + lexerKindNotEqual, + lexerKindEqual, + lexerKindSemicolon, + lexerKindDot, + lexerKindComma, + lexerKindPlus, + lexerKindMinus, + lexerKindMultiplication, + lexerKindDivision, + lexerKindRemainder, + lexerKindAssignment, + lexerKindColon, + lexerKindHat, + lexerKindAt, + lexerKindComment, + lexerKindInteger, + lexerKindWord, + lexerKindCharacter, + lexerKindString, + lexerKindFrom, + lexerKindExclamation, + lexerKindArrow, + lexerKindTrait, + lexerKindProgram, + lexerKindModule, + lexerKindImport + ); + LexerToken = RECORD + CASE Kind: LexerKind OF + lexerKindBoolean: booleanKind: BOOLEAN + END + END; + PLexerToken = POINTER TO LexerToken; + +PROCEDURE LexerInitialize(ALexer: PLexer; Input: File); +PROCEDURE LexerDestroy(ALexer: PLexer); +PROCEDURE LexerLex(ALexer: PLexer): LexerToken; + +END Lexer. diff --git a/boot/Lexer.mod b/boot/Lexer.mod new file mode 100644 index 0000000..6635d9e --- /dev/null +++ b/boot/Lexer.mod @@ -0,0 +1,717 @@ +IMPLEMENTATION MODULE Lexer; + +FROM FIO IMPORT ReadNBytes; +FROM SYSTEM IMPORT ADR; + +FROM Storage IMPORT DEALLOCATE, ALLOCATE; +FROM Strings IMPORT Length; +FROM MemUtils IMPORT MemZero; + +CONST + ChunkSize = 65536; + +TYPE + (* + * Classification table assigns each possible character to a group (class). All + * characters of the same group a handled equivalently. + * + * Classification: + *) + TransitionClass = ( + transitionClassInvalid, + transitionClassDigit, + transitionClassAlpha, + transitionClassSpace, + transitionClassColon, + transitionClassEquals, + transitionClassLeftParen, + transitionClassRightParen, + transitionClassAsterisk, + transitionClassUnderscore, + transitionClassSingle, + transitionClassHex, + transitionClassZero, + transitionClassX, + transitionClassEof, + transitionClassDot, + transitionClassMinus, + transitionClassSingleQuote, + transitionClassDoubleQuote, + transitionClassGreater, + transitionClassLess, + transitionClassOther + ); + TransitionState = ( + transitionStateStart, + transitionStateColon, + transitionStateIdentifier, + transitionStateDecimal, + transitionStateGreater, + transitionStateMinus, + transitionStateLeftParen, + transitionStateLess, + transitionStateDot, + transitionStateComment, + transitionStateClosingComment, + transitionStateCharacter, + transitionStateString, + transitionStateLeadingZero, + transitionStateDecimalSuffix, + transitionStateEnd + ); + TransitionAction = PROCEDURE(PLexer, PLexerToken); + Transition = RECORD + Action: TransitionAction; + NextState: TransitionState + END; + +VAR + Classification: ARRAY[1..128] OF TransitionClass; + Transitions: ARRAY[0..MAX(TransitionState)] OF ARRAY[0..MAX(TransitionClass)] OF Transition; + +PROCEDURE InitializeClassification(); +BEGIN + Classification[1] := transitionClassEof; (* NUL *) + Classification[2] := transitionClassInvalid; (* SOH *) + Classification[3] := transitionClassInvalid; (* STX *) + Classification[4] := transitionClassInvalid; (* ETX *) + Classification[5] := transitionClassInvalid; (* EOT *) + Classification[6] := transitionClassInvalid; (* EMQ *) + Classification[7] := transitionClassInvalid; (* ACK *) + Classification[8] := transitionClassInvalid; (* BEL *) + Classification[9] := transitionClassInvalid; (* BS *) + Classification[10] := transitionClassSpace; (* HT *) + Classification[11] := transitionClassSpace; (* LF *) + Classification[12] := transitionClassInvalid; (* VT *) + Classification[13] := transitionClassInvalid; (* FF *) + Classification[14] := transitionClassSpace; (* CR *) + Classification[15] := transitionClassInvalid; (* SO *) + Classification[16] := transitionClassInvalid; (* SI *) + Classification[17] := transitionClassInvalid; (* DLE *) + Classification[18] := transitionClassInvalid; (* DC1 *) + Classification[19] := transitionClassInvalid; (* DC2 *) + Classification[20] := transitionClassInvalid; (* DC3 *) + Classification[21] := transitionClassInvalid; (* DC4 *) + Classification[22] := transitionClassInvalid; (* NAK *) + Classification[23] := transitionClassInvalid; (* SYN *) + Classification[24] := transitionClassInvalid; (* ETB *) + Classification[25] := transitionClassInvalid; (* CAN *) + Classification[26] := transitionClassInvalid; (* EM *) + Classification[27] := transitionClassInvalid; (* SUB *) + Classification[28] := transitionClassInvalid; (* ESC *) + Classification[29] := transitionClassInvalid; (* FS *) + Classification[30] := transitionClassInvalid; (* GS *) + Classification[31] := transitionClassInvalid; (* RS *) + Classification[32] := transitionClassInvalid; (* US *) + Classification[33] := transitionClassSpace; (* Space *) + Classification[34] := transitionClassSingle; (* ! *) + Classification[35] := transitionClassDoubleQuote; (* " *) + Classification[36] := transitionClassOther; (* # *) + Classification[37] := transitionClassOther; (* $ *) + Classification[38] := transitionClassSingle; (* % *) + Classification[39] := transitionClassSingle; (* & *) + Classification[40] := transitionClassSingleQuote; (* ' *) + Classification[41] := transitionClassLeftParen; (* ( *) + Classification[42] := transitionClassRightParen; (* ) *) + Classification[43] := transitionClassAsterisk; (* * *) + Classification[44] := transitionClassSingle; (* + *) + Classification[45] := transitionClassSingle; (* , *) + Classification[46] := transitionClassMinus; (* - *) + Classification[47] := transitionClassDot; (* . *) + Classification[48] := transitionClassSingle; (* / *) + Classification[49] := transitionClassZero; (* 0 *) + Classification[50] := transitionClassDigit; (* 1 *) + Classification[51] := transitionClassDigit; (* 2 *) + Classification[52] := transitionClassDigit; (* 3 *) + Classification[53] := transitionClassDigit; (* 4 *) + Classification[54] := transitionClassDigit; (* 5 *) + Classification[55] := transitionClassDigit; (* 6 *) + Classification[56] := transitionClassDigit; (* 7 *) + Classification[57] := transitionClassDigit; (* 8 *) + Classification[58] := transitionClassDigit; (* 9 *) + Classification[59] := transitionClassColon; (* : *) + Classification[60] := transitionClassSingle; (* ; *) + Classification[61] := transitionClassLess; (* < *) + Classification[62] := transitionClassEquals; (* = *) + Classification[63] := transitionClassGreater; (* > *) + Classification[64] := transitionClassOther; (* ? *) + Classification[65] := transitionClassSingle; (* @ *) + Classification[66] := transitionClassAlpha; (* A *) + Classification[67] := transitionClassAlpha; (* B *) + Classification[68] := transitionClassAlpha; (* C *) + Classification[69] := transitionClassAlpha; (* D *) + Classification[70] := transitionClassAlpha; (* E *) + Classification[71] := transitionClassAlpha; (* F *) + Classification[72] := transitionClassAlpha; (* G *) + Classification[73] := transitionClassAlpha; (* H *) + Classification[74] := transitionClassAlpha; (* I *) + Classification[75] := transitionClassAlpha; (* J *) + Classification[76] := transitionClassAlpha; (* K *) + Classification[77] := transitionClassAlpha; (* L *) + Classification[78] := transitionClassAlpha; (* M *) + Classification[79] := transitionClassAlpha; (* N *) + Classification[80] := transitionClassAlpha; (* O *) + Classification[81] := transitionClassAlpha; (* P *) + Classification[82] := transitionClassAlpha; (* Q *) + Classification[83] := transitionClassAlpha; (* R *) + Classification[84] := transitionClassAlpha; (* S *) + Classification[85] := transitionClassAlpha; (* T *) + Classification[86] := transitionClassAlpha; (* U *) + Classification[87] := transitionClassAlpha; (* V *) + Classification[88] := transitionClassAlpha; (* W *) + Classification[89] := transitionClassAlpha; (* X *) + Classification[90] := transitionClassAlpha; (* Y *) + Classification[91] := transitionClassAlpha; (* Z *) + Classification[92] := transitionClassSingle; (* [ *) + Classification[93] := transitionClassOther; (* \ *) + Classification[94] := transitionClassSingle; (* ] *) + Classification[95] := transitionClassSingle; (* ^ *) + Classification[96] := transitionClassUnderscore; (* _ *) + Classification[97] := transitionClassOther; (* ` *) + Classification[98] := transitionClassHex; (* a *) + Classification[99] := transitionClassHex; (* b *) + Classification[100] := transitionClassHex; (* c *) + Classification[101] := transitionClassHex; (* d *) + Classification[102] := transitionClassHex; (* e *) + Classification[103] := transitionClassHex; (* f *) + Classification[104] := transitionClassAlpha; (* g *) + Classification[105] := transitionClassAlpha; (* h *) + Classification[106] := transitionClassAlpha; (* i *) + Classification[107] := transitionClassAlpha; (* j *) + Classification[108] := transitionClassAlpha; (* k *) + Classification[109] := transitionClassAlpha; (* l *) + Classification[110] := transitionClassAlpha; (* m *) + Classification[111] := transitionClassAlpha; (* n *) + Classification[112] := transitionClassAlpha; (* o *) + Classification[113] := transitionClassAlpha; (* p *) + Classification[114] := transitionClassAlpha; (* q *) + Classification[115] := transitionClassAlpha; (* r *) + Classification[116] := transitionClassAlpha; (* s *) + Classification[117] := transitionClassAlpha; (* t *) + Classification[118] := transitionClassAlpha; (* u *) + Classification[119] := transitionClassAlpha; (* v *) + Classification[120] := transitionClassAlpha; (* w *) + Classification[121] := transitionClassX; (* x *) + Classification[122] := transitionClassAlpha; (* y *) + Classification[123] := transitionClassAlpha; (* z *) + Classification[124] := transitionClassOther; (* { *) + Classification[125] := transitionClassSingle; (* | *) + Classification[126] := transitionClassOther; (* } *) + Classification[127] := transitionClassSingle; (* ~ *) + Classification[128] := transitionClassInvalid (* DEL *) +END InitializeClassification; + +PROCEDURE CompareKeyword(Keyword: ARRAY OF CHAR; TokenStart: PLexerBuffer; TokenEnd: PLexerBuffer): BOOLEAN; +VAR + Result: BOOLEAN; + Index: CARDINAL; +BEGIN + Index := 0; + Result := TRUE; + + WHILE (Index < Length(Keyword)) AND (TokenStart <> TokenEnd) AND Result DO + Result := Keyword[Index] = TokenStart^; + INC(TokenStart); + INC(Index) + END; + RETURN (Index = Length(Keyword)) AND (TokenStart = TokenEnd) AND Result +END CompareKeyword; + +(* Reached the end of file. *) +PROCEDURE TransitionActionEof(ALexer: PLexer; AToken: PLexerToken); +BEGIN + AToken^.Kind := lexerKindEof +END TransitionActionEof; + +(* Add the character to the token currently read and advance to the next character. *) +PROCEDURE TransitionActionAccumulate(ALexer: PLexer; AToken: PLexerToken); +BEGIN + INC(ALexer^.Current) +END TransitionActionAccumulate; + +(* The current character is not a part of the token. Finish the token already + * read. Don't advance to the next character. *) +PROCEDURE TransitionActionFinalize(ALexer: PLexer; AToken: PLexerToken); +BEGIN + IF ALexer^.Start^ = ':' THEN + AToken^.Kind := lexerKindColon + ELSIF ALexer^.Start^ = '>' THEN + AToken^.Kind := lexerKindGreaterThan + ELSIF ALexer^.Start^ = '<' THEN + AToken^.Kind := lexerKindLessThan + ELSIF ALexer^.Start^ = '(' THEN + AToken^.Kind := lexerKindLeftParen + ELSIF ALexer^.Start^ = '-' THEN + AToken^.Kind := lexerKindLeftParen + ELSIF ALexer^.Start^ = '.' THEN + AToken^.Kind := lexerKindDot + END +END TransitionActionFinalize; + +(* An action for tokens containing multiple characters. *) +PROCEDURE TransitionActionComposite(ALexer: PLexer; AToken: PLexerToken); +BEGIN + IF ALexer^.Start^ = '<' THEN + IF ALexer^.Current^ = '>' THEN + AToken^.Kind := lexerKindNotEqual + ELSIF ALexer^.Current^ = '=' THEN + AToken^.Kind := lexerKindLessEqual + END + ELSIF (ALexer^.Start^ = '>') AND (ALexer^.Current^ = '=') THEN + AToken^.Kind := lexerKindGreaterEqual + ELSIF (ALexer^.Start^ = '.') AND (ALexer^.Current^ = '.') THEN + AToken^.Kind := lexerKindRange + ELSIF (ALexer^.Start^ = ':') AND (ALexer^.Current^ = '=') THEN + AToken^.Kind := lexerKindAssignment + END; + INC(ALexer^.Current) +END TransitionActionComposite; + +(* Skip a space. *) +PROCEDURE TransitionActionSkip(ALexer: PLexer; AToken: PLexerToken); +BEGIN + INC(ALexer^.Current); + INC(ALexer^.Start) +END TransitionActionSkip; + +(* 0x04. Delimited string action. *) +PROCEDURE TransitionActionDelimited(ALexer: PLexer; AToken: PLexerToken); +BEGIN + IF ALexer^.Start^ = '(' THEN + AToken^.Kind := lexerKindComment + ELSIF ALexer^.Start^ = '"' THEN + AToken^.Kind := lexerKindCharacter + ELSIF ALexer^.Start^ = "'" THEN + AToken^.Kind := lexerKindString + END; + INC(ALexer^.Current) +END TransitionActionDelimited; + +(* Finalize keyword or identifier. *) +PROCEDURE TransitionActionKeyId(ALexer: PLexer; AToken: PLexerToken); +BEGIN + IF CompareKeyword('PROGRAM', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindProgram + ELSIF CompareKeyword('IMPORT', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindImport + ELSIF CompareKeyword('CONST', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindConst + ELSIF CompareKeyword('VAR', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindVar + ELSIF CompareKeyword('IF', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindIf + ELSIF CompareKeyword('THEN', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindThen + ELSIF CompareKeyword('ELSIF', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindElsif + ELSIF CompareKeyword('ELSE', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindElse + ELSIF CompareKeyword('WHILE', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindWhile + ELSIF CompareKeyword('DO', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindDo + ELSIF CompareKeyword('PROCEDURE', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindProc + ELSIF CompareKeyword('BEGIN', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindBegin + ELSIF CompareKeyword('END', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindEnd + ELSIF CompareKeyword('TYPE', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindType + ELSIF CompareKeyword('RECORD', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindRecord + ELSIF CompareKeyword('UNION', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindUnion + ELSIF CompareKeyword('NIL', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindNull + ELSIF CompareKeyword('AND', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindAnd + ELSIF CompareKeyword('OR', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindOr + ELSIF CompareKeyword('RETURN', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindReturn + ELSIF CompareKeyword('DEFINITION', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindDefinition + ELSIF CompareKeyword('TO', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindTo + ELSIF CompareKeyword('CASE', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindCase + ELSIF CompareKeyword('OF', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindOf + ELSIF CompareKeyword('FROM', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindFrom + ELSIF CompareKeyword('MODULE', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindModule + ELSIF CompareKeyword('IMPLEMENTATION', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindImplementation + ELSIF CompareKeyword('TRUE', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindBoolean; + AToken^.booleanKind := TRUE + ELSIF CompareKeyword('FALSE', ALexer^.Start, ALexer^.Current) THEN + AToken^.Kind := lexerKindBoolean; + AToken^.booleanKind := FALSE + ELSE + AToken^.Kind := lexerKindIdentifier + END; +END TransitionActionKeyId; + +(* Action for tokens containing only one character. The character cannot be + * followed by other characters forming a composite token. *) +PROCEDURE TransitionActionSingle(ALexer: PLexer; AToken: PLexerToken); +BEGIN + CASE ALexer^.Current^ OF + '&': AToken^.Kind := lexerKindAnd | + ';': AToken^.Kind := lexerKindSemicolon | + ',': AToken^.Kind := lexerKindComma | + ')': AToken^.Kind := lexerKindRightParen | + '[': AToken^.Kind := lexerKindLeftSquare | + ']': AToken^.Kind := lexerKindRightSquare | + '^': AToken^.Kind := lexerKindHat | + '=': AToken^.Kind := lexerKindEqual | + '+': AToken^.Kind := lexerKindPlus | + '/': AToken^.Kind := lexerKindDivision | + '%': AToken^.Kind := lexerKindRemainder | + '@': AToken^.Kind := lexerKindAt | + '|': AToken^.Kind := lexerKindPipe + END; + INC(ALexer^.Current) +END TransitionActionSingle; + +(* Handle an integer literal. *) +PROCEDURE TransitionActionInteger(ALexer: PLexer; AToken: PLexerToken); +BEGIN + AToken^.Kind := lexerKindInteger +END TransitionActionInteger; + +PROCEDURE SetDefaultTransition(CurrentState: TransitionState; DefaultAction: TransitionAction; NextState: TransitionState); +VAR DefaultTransition: Transition; +BEGIN + DefaultTransition.Action := DefaultAction; + DefaultTransition.NextState := NextState; + + Transitions[ORD(CurrentState)][ORD(transitionClassInvalid)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassDigit)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassAlpha)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassSpace)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassColon)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassEquals)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassLeftParen)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassRightParen)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassAsterisk)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassUnderscore)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassSingle)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassHex)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassZero)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassX)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassEof)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassDot)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassMinus)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassSingleQuote)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassDoubleQuote)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassGreater)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassLess)] := DefaultTransition; + Transitions[ORD(CurrentState)][ORD(transitionClassOther)] := DefaultTransition; +END SetDefaultTransition; + +(* + * The transition table describes transitions from one state to another, given + * a symbol (character class). + * + * The table has m rows and n columns, where m is the amount of states and n is + * the amount of classes. So given the current state and a classified character + * the table can be used to look up the next state. + * + * Each cell is a word long. + * - The least significant byte of the word is a row number (beginning with 0). + * It specifies the target state. "ff" means that this is an end state and no + * transition is possible. + * - The next byte is the action that should be performed when transitioning. + * For the meaning of actions see labels in the lex_next function, which + * handles each action. + *) +PROCEDURE InitializeTransitions(); +BEGIN + (* Start state. *) + Transitions[ORD(transitionStateStart)][ORD(transitionClassInvalid)].Action := NIL; + Transitions[ORD(transitionStateStart)][ORD(transitionClassInvalid)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassDigit)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassDigit)].NextState := transitionStateDecimal; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassAlpha)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassAlpha)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassSpace)].Action := TransitionActionSkip; + Transitions[ORD(transitionStateStart)][ORD(transitionClassSpace)].NextState := transitionStateStart; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassColon)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassColon)].NextState := transitionStateColon; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassEquals)].Action := TransitionActionSingle; + Transitions[ORD(transitionStateStart)][ORD(transitionClassEquals)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassLeftParen)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassLeftParen)].NextState := transitionStateLeftParen; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassRightParen)].Action := TransitionActionSingle; + Transitions[ORD(transitionStateStart)][ORD(transitionClassRightParen)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassAsterisk)].Action := TransitionActionSingle; + Transitions[ORD(transitionStateStart)][ORD(transitionClassAsterisk)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassUnderscore)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassUnderscore)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassSingle)].Action := TransitionActionSingle; + Transitions[ORD(transitionStateStart)][ORD(transitionClassSingle)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassHex)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassHex)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassZero)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassZero)].NextState := transitionStateLeadingZero; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassX)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassX)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassEof)].Action := TransitionActionEof; + Transitions[ORD(transitionStateStart)][ORD(transitionClassEof)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassDot)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassDot)].NextState := transitionStateDot; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassMinus)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassMinus)].NextState := transitionStateMinus; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassSingleQuote)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassSingleQuote)].NextState := transitionStateCharacter; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassDoubleQuote)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassDoubleQuote)].NextState := transitionStateString; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassGreater)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassGreater)].NextState := transitionStateGreater; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassLess)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateStart)][ORD(transitionClassLess)].NextState := transitionStateLess; + + Transitions[ORD(transitionStateStart)][ORD(transitionClassOther)].Action := NIL; + Transitions[ORD(transitionStateStart)][ORD(transitionClassOther)].NextState := transitionStateEnd; + + (* Colon state. *) + SetDefaultTransition(transitionStateColon, TransitionActionFinalize, transitionStateEnd); + + Transitions[ORD(transitionStateColon)][ORD(transitionClassEquals)].Action := TransitionActionComposite; + Transitions[ORD(transitionStateColon)][ORD(transitionClassEquals)].NextState := transitionStateEnd; + + (* Identifier state. *) + SetDefaultTransition(transitionStateIdentifier, TransitionActionKeyId, transitionStateEnd); + + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassDigit)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassDigit)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassAlpha)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassAlpha)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassUnderscore)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassUnderscore)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassHex)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassHex)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassZero)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassZero)].NextState := transitionStateIdentifier; + + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassX)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassX)].NextState := transitionStateIdentifier; + + (* Decimal state. *) + SetDefaultTransition(transitionStateDecimal, TransitionActionInteger, transitionStateEnd); + + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassDigit)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassDigit)].NextState := transitionStateDecimal; + + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassAlpha)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassAlpha)].NextState := transitionStateDecimalSuffix; + + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassUnderscore)].Action := NIL; + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassUnderscore)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassHex)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassHex)].NextState := transitionStateDecimalSuffix; + + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassZero)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassZero)].NextState := transitionStateDecimal; + + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassX)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateDecimal)][ORD(transitionClassX)].NextState := transitionStateDecimalSuffix; + + (* Greater state. *) + SetDefaultTransition(transitionStateGreater, TransitionActionFinalize, transitionStateEnd); + + Transitions[ORD(transitionStateGreater)][ORD(transitionClassEquals)].Action := TransitionActionComposite; + Transitions[ORD(transitionStateGreater)][ORD(transitionClassEquals)].NextState := transitionStateEnd; + + (* Minus state. *) + SetDefaultTransition(transitionStateMinus, TransitionActionFinalize, transitionStateEnd); + + Transitions[ORD(transitionStateMinus)][ORD(transitionClassGreater)].Action := TransitionActionComposite; + Transitions[ORD(transitionStateMinus)][ORD(transitionClassGreater)].NextState := transitionStateEnd; + + (* Left paren state. *) + SetDefaultTransition(transitionStateLeftParen, TransitionActionFinalize, transitionStateEnd); + + Transitions[ORD(transitionStateLeftParen)][ORD(transitionClassAsterisk)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateLeftParen)][ORD(transitionClassAsterisk)].NextState := transitionStateComment; + + (* Less state. *) + SetDefaultTransition(transitionStateLess, TransitionActionFinalize, transitionStateEnd); + + Transitions[ORD(transitionStateLess)][ORD(transitionClassEquals)].Action := TransitionActionComposite; + Transitions[ORD(transitionStateLess)][ORD(transitionClassEquals)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateLess)][ORD(transitionClassGreater)].Action := TransitionActionComposite; + Transitions[ORD(transitionStateLess)][ORD(transitionClassGreater)].NextState := transitionStateEnd; + + (* Hexadecimal after 0x. *) + SetDefaultTransition(transitionStateDot, TransitionActionFinalize, transitionStateEnd); + + Transitions[ORD(transitionStateDot)][ORD(transitionClassDot)].Action := TransitionActionComposite; + Transitions[ORD(transitionStateDot)][ORD(transitionClassDot)].NextState := transitionStateEnd; + + (* Comment. *) + SetDefaultTransition(transitionStateComment, TransitionActionAccumulate, transitionStateComment); + + Transitions[ORD(transitionStateComment)][ORD(transitionClassAsterisk)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateComment)][ORD(transitionClassAsterisk)].NextState := transitionStateClosingComment; + + Transitions[ORD(transitionStateComment)][ORD(transitionClassEof)].Action := NIL; + Transitions[ORD(transitionStateComment)][ORD(transitionClassEof)].NextState := transitionStateEnd; + + (* Closing comment. *) + SetDefaultTransition(transitionStateClosingComment, TransitionActionAccumulate, transitionStateComment); + + Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassInvalid)].Action := NIL; + Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassInvalid)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassRightParen)].Action := TransitionActionDelimited; + Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassRightParen)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassAsterisk)].Action := TransitionActionAccumulate; + Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassAsterisk)].NextState := transitionStateClosingComment; + + Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassEof)].Action := NIL; + Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassEof)].NextState := transitionStateEnd; + + (* Character. *) + SetDefaultTransition(transitionStateCharacter, TransitionActionAccumulate, transitionStateCharacter); + + Transitions[ORD(transitionStateCharacter)][ORD(transitionClassInvalid)].Action := NIL; + Transitions[ORD(transitionStateCharacter)][ORD(transitionClassInvalid)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateCharacter)][ORD(transitionClassEof)].Action := NIL; + Transitions[ORD(transitionStateCharacter)][ORD(transitionClassEof)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateCharacter)][ORD(transitionClassSingleQuote)].Action := TransitionActionDelimited; + Transitions[ORD(transitionStateCharacter)][ORD(transitionClassSingleQuote)].NextState := transitionStateEnd; + + (* String. *) + SetDefaultTransition(transitionStateString, TransitionActionAccumulate, transitionStateString); + + Transitions[ORD(transitionStateString)][ORD(transitionClassInvalid)].Action := NIL; + Transitions[ORD(transitionStateString)][ORD(transitionClassInvalid)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateString)][ORD(transitionClassEof)].Action := NIL; + Transitions[ORD(transitionStateString)][ORD(transitionClassEof)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateString)][ORD(transitionClassDoubleQuote)].Action := TransitionActionDelimited; + Transitions[ORD(transitionStateString)][ORD(transitionClassDoubleQuote)].NextState := transitionStateEnd; + + (* Leading zero. *) + SetDefaultTransition(transitionStateLeadingZero, TransitionActionInteger, transitionStateEnd); + + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassDigit)].Action := NIL; + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassDigit)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassAlpha)].Action := NIL; + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassAlpha)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassUnderscore)].Action := NIL; + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassUnderscore)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassHex)].Action := NIL; + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassHex)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassZero)].Action := NIL; + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassZero)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassX)].Action := NIL; + Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassX)].NextState := transitionStateEnd; + + (* Digit with a character suffix. *) + SetDefaultTransition(transitionStateDecimalSuffix, TransitionActionInteger, transitionStateEnd); + + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassAlpha)].Action := NIL; + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassAlpha)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassDigit)].Action := NIL; + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassDigit)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassHex)].Action := NIL; + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassHex)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassZero)].Action := NIL; + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassZero)].NextState := transitionStateEnd; + + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassX)].Action := NIL; + Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassX)].NextState := transitionStateEnd +END InitializeTransitions; + +PROCEDURE LexerInitialize(ALexer: PLexer; Input: File); +BEGIN + ALexer^.Input := Input; + ALexer^.Length := 0; + + ALLOCATE(ALexer^.Buffer, ChunkSize); + MemZero(ALexer^.Buffer, ChunkSize); + ALexer^.Size := ChunkSize +END LexerInitialize; + +PROCEDURE LexerLex(ALexer: PLexer): LexerToken; +VAR + WrittenBytes: CARDINAL; + CurrentClass: TransitionClass; + CurrentState: TransitionState; + CurrentTransition: Transition; + Result: LexerToken; +BEGIN + IF ALexer^.Length = 0 THEN + ALexer^.Length := ReadNBytes(ALexer^.Input, ChunkSize, ALexer^.Buffer); + ALexer^.Current := ALexer^.Buffer + END; + + Result.Kind := lexerKindTrait; + ALexer^.Start := ALexer^.Current; + CurrentState := transitionStateStart; + + WHILE CurrentState <> transitionStateEnd DO + CurrentClass := Classification[ORD(ALexer^.Current^) + 1]; + + CurrentTransition := Transitions[ORD(CurrentState)][ORD(CurrentClass)]; + IF CurrentTransition.Action <> NIL THEN + CurrentTransition.Action(ALexer, ADR(Result)) + END; + CurrentState := CurrentTransition.NextState + END; + RETURN Result +END LexerLex; + +PROCEDURE LexerDestroy(ALexer: PLexer); +BEGIN + DEALLOCATE(ALexer^.Buffer, ALexer^.Size) +END LexerDestroy; + +BEGIN + InitializeClassification(); + InitializeTransitions() +END Lexer. diff --git a/boot/Transpiler.def b/boot/Transpiler.def new file mode 100644 index 0000000..a19ad37 --- /dev/null +++ b/boot/Transpiler.def @@ -0,0 +1,7 @@ +DEFINITION MODULE Transpiler; + +FROM Lexer IMPORT PLexer; + +PROCEDURE Transpile(ALexer: PLexer); + +END Transpiler. diff --git a/boot/Transpiler.mod b/boot/Transpiler.mod new file mode 100644 index 0000000..61beafb --- /dev/null +++ b/boot/Transpiler.mod @@ -0,0 +1,24 @@ +IMPLEMENTATION MODULE Transpiler; + +FROM FIO IMPORT WriteNBytes, StdOut; +FROM SYSTEM IMPORT ADDRESS; + +FROM Terminal IMPORT WriteLn; +FROM Lexer IMPORT Lexer, LexerToken, LexerLex, LexerKind; + +PROCEDURE Transpile(ALexer: PLexer); +VAR + Token: LexerToken; + WrittenBytes: CARDINAL; +BEGIN + Token := LexerLex(ALexer); + + WHILE Token.Kind <> lexerKindEof DO + WrittenBytes := WriteNBytes(StdOut, ADDRESS(ALexer^.Current - ALexer^.Start), ALexer^.Start); + WriteLn(); + + Token := LexerLex(ALexer) + END +END Transpile; + +END Transpiler. diff --git a/dub.json b/dub.json deleted file mode 100644 index 8567d3a..0000000 --- a/dub.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dependencies": { - "tanya": "~>0.18.0" - }, - "name": "elna", - "targetType": "executable", - "targetPath": "build/bin", - "mainSourceFile": "source/main.d" -} diff --git a/source/elna/extended.d b/source/elna/extended.d deleted file mode 100644 index 0473cee..0000000 --- a/source/elna/extended.d +++ /dev/null @@ -1,9 +0,0 @@ -/** - * File I/O that can be moved into more generic library when and if finished. - */ -module elna.extended; - -struct File -{ - @disable this(this); -} diff --git a/source/elna/generator.d b/source/elna/generator.d deleted file mode 100644 index ce2b3d1..0000000 --- a/source/elna/generator.d +++ /dev/null @@ -1,660 +0,0 @@ -module elna.generator; - -import core.stdc.stdio; -import core.stdc.stdlib; -import core.stdc.string; -import elna.ir; -import tanya.container.array; -import tanya.container.string; -import tanya.memory.mmappool; -import tanya.format; - -/// Unsigned program address. -alias Elf64_Addr = void*; -/// Unsigned file offset. -alias Elf64_Off = ulong; -/// Unsigned medium integer. -alias Elf64_Half = ushort; -/// Unsigned integer. -alias Elf64_Word = uint; -/// Signed integer. -alias Elf64_Sword = int; -/// Unsigned long integer. -alias Elf64_Xword = ulong; -/// Signed long integer. -alias Elf64_Sxword = long; - -enum size_t EI_INDENT = 16; - -/** - * File header. - */ -struct Elf64_Ehdr -{ - /// ELF identification. - ubyte[EI_INDENT] e_ident; - /// Object file type. - Elf64_Half e_type; - /// Machine type. - Elf64_Half e_machine; - /// Object file version - Elf64_Word e_version; - /// Entry point address. - Elf64_Addr e_entry; - /// Program header offset. - Elf64_Off e_phoff; - /// Section header offset. - Elf64_Off e_shoff; - /// Processor-specific flags. - Elf64_Word e_flags; - /// ELF header size. - Elf64_Half e_ehsize; - /// Size of program header entry. - Elf64_Half e_phentsize; - /// Number of program header entries. - Elf64_Half e_phnum; - /// Size of section header entry. - Elf64_Half e_shentsize; - /// Number of section header entries. - Elf64_Half e_shnum; - /// Section name string table index. - Elf64_Half e_shstrndx; -} - -/** - * Section header. - */ -struct Elf64_Shdr -{ - /// Section name. - Elf64_Word sh_name; - /// Section type. - Elf64_Word sh_type; - /// Section attributes. - Elf64_Xword sh_flags; - /// Virtual address in memory. - Elf64_Addr sh_addr; - /// Offset in file. - Elf64_Off sh_offset; - /// Size of section. - Elf64_Xword sh_size; - /// Link to other section. - Elf64_Word sh_link; - /// Miscellaneous information. - Elf64_Word sh_info; - /// Address alignment boundary. - Elf64_Xword sh_addralign; - /// Size of entries, if section has table. - Elf64_Xword sh_entsize; -} - -struct Elf64_Sym -{ - /// Symbol name. - Elf64_Word st_name; - /// Type and Binding attributes. - ubyte st_info; - /// Reserved. - ubyte st_other; - /// Section table index. - Elf64_Half st_shndx; - /// Symbol value. - Elf64_Addr st_value; - /// Size of object (e.g., common). - Elf64_Xword st_size; -} - -/// Section Types, sh_type. -enum : Elf64_Word -{ - /// Marks an unused section header. - SHT_NULL = 0, - /// Contains information defined by the program. - SHT_PROGBITS = 1, - /// Contains a linker symbol table. - SHT_SYMTAB = 2, - /// Contains a string table. - SHT_STRTAB = 3, - /// Contains “Rela” type relocation entries. - SHT_RELA = 4, - /// Contains a symbol hash table - SHT_HASH = 5, - /// Contains dynamic linking tables - SHT_DYNAMIC = 6, - /// Contains note information - SHT_NOTE = 7, - /// Contains uninitialized space; does not occupy any space in the file. - SHT_NOBITS = 8, - /// Contains "Rel" type relocation entries. - SHT_REL = 9, - /// Reserved. - SHT_SHLIB = 10, - /// Contains a dynamic loader symbol table. - SHT_DYNSYM = 11, - /// Environment-specific use. - SHT_LOOS = 0x60000000, - SHT_HIOS = 0x6FFFFFFF, - /// Processor-specific use. - SHT_LOPROC = 0x70000000, - SHT_HIPROC = 0x7FFFFFFF, -} - -/** - * Section Attributes, sh_flags. - */ -enum : Elf64_Xword -{ - /// Section contains writable data. - SHF_WRITE = 0x1, - /// Section is allocated in memory image of program. - SHF_ALLOC = 0x2, - /// Section contains executable instructions. - SHF_EXECINSTR = 0x4, - /// Environment-specific use. - SHF_MASKOS = 0x0F000000, - /// Processor-specific use. - SHF_MASKPROC = 0xF0000000, -} - -enum : Elf64_Word -{ - /// Not visible outside the object file. - STB_LOCAL = 0, - /// Global symbol, visible to all object files. - STB_GLOBAL = 1, - /// Global scope, but with lower precedence than global symbols. - STB_WEAK = 2, - /// Environment-specific use. - STB_LOOS = 10, - STB_HIOS = 12, - /// Processor-specific use. - STB_LOPROC = 13, - STB_HIPROC = 15, -} - -enum : Elf64_Word -{ - /// No type specified (e.g., an absolute symbol). - STT_NOTYPE = 0, - /// Data object. - STT_OBJECT = 1, - /// Function entry point. - STT_FUNC = 2, - /// Symbol is associated with a section. - STT_SECTION = 3, - /// Source file associated with the object file. - STT_FILE = 4, - /// Environment-specific use. - STT_LOOS = 10, - STT_HIOS = 12, - /// Processor-specific use. - STT_LOPROC = 13, - STT_HIPROC = 15, -} - -Elf64_Ehdr makeFileHeader(Elf64_Off sectionHeaderOffset, - Elf64_Half sectionHeaderCount, - Elf64_Half stringIndex) @nogc -{ - Elf64_Ehdr header; - - // Magic number. - header.e_ident[0] = '\x7f'; - header.e_ident[1] = 'E'; - header.e_ident[2] = 'L'; - header.e_ident[3] = 'F'; - - // File class. - header.e_ident[4] = EI_CLASS.ELFCLASS64; - - // Data encoding. - header.e_ident[5] = EI_DATA.ELFDATA2LSB; - - // Version. - header.e_ident[6] = EV_CURRENT; - - // OS/ABI identification. - header.e_ident[7] = EI_OSABI.ELFOSABI_SYSV; - - // ABI version. - header.e_ident[8] = 0; - - // Size of e_ident[]. - header.e_ident[15] = 0; - - header.e_type = ET_REL; - header.e_machine = 0x3e; // EM_X86_64: AMD x86-64 architecture - header.e_version = EV_CURRENT; - header.e_entry = null; - header.e_phoff = 0; - header.e_shoff = sectionHeaderOffset; - header.e_flags = 0; - header.e_ehsize = Elf64_Ehdr.sizeof; - header.e_phentsize = 0; - header.e_phnum = 0; - header.e_shentsize = Elf64_Shdr.sizeof; - header.e_shnum = sectionHeaderCount; - header.e_shstrndx = stringIndex; - - return header; -} - -enum char[33] sectionStringTable = "\0.symtab\0.strtab\0.shstrtab\0.text\0"; - -Elf64_Shdr makeTextHeader(Elf64_Off offset, Elf64_Xword size) @nogc -{ - Elf64_Shdr table; - - table.sh_name = 0x1b; - table.sh_type = SHT_PROGBITS; - table.sh_flags = SHF_EXECINSTR | SHF_ALLOC; - table.sh_addr = null; - table.sh_offset = offset; - table.sh_size = size; - table.sh_link = SHN_UNDEF; - table.sh_info = 0; - table.sh_addralign = 1; - table.sh_entsize = 0; - - return table; -} - -Elf64_Shdr makeDataHeader(Elf64_Off offset, Elf64_Xword size) @nogc -{ - Elf64_Shdr table; - - table.sh_name = 0x21; - table.sh_type = SHT_PROGBITS; - table.sh_flags = SHF_WRITE | SHF_ALLOC; - table.sh_addr = null; - table.sh_offset = offset; - table.sh_size = size; - table.sh_link = SHN_UNDEF; - table.sh_info = 0; - table.sh_addralign = 1; - table.sh_entsize = 0; - - return table; -} - -Elf64_Shdr makeSymtableHeader(Elf64_Off offset, Elf64_Xword size, Elf64_Word entriesCount) @nogc -{ - Elf64_Shdr table; - - table.sh_name = 0x01; - table.sh_type = SHT_SYMTAB; - table.sh_flags = 0; - table.sh_addr = null; - table.sh_offset = offset; - table.sh_size = size; - table.sh_link = 0x03; // String table used by entries in this section. - table.sh_info = entriesCount; - table.sh_addralign = 8; - table.sh_entsize = Elf64_Sym.sizeof; - - return table; -} - -Elf64_Shdr makeStringHeader(Elf64_Word stringIndex, Elf64_Off offset, Elf64_Xword size) @nogc -{ - Elf64_Shdr table; - - table.sh_name = stringIndex; - table.sh_type = SHT_STRTAB; - table.sh_flags = 0; - table.sh_addr = null; - table.sh_offset = offset; - table.sh_size = size; - table.sh_link = SHN_UNDEF; - table.sh_info = 0; - table.sh_addralign = 1; - table.sh_entsize = 0; - - return table; -} - -Elf64_Shdr makeInitialHeader() @nogc -{ - Elf64_Shdr table; - - table.sh_name = 0; - table.sh_type = SHT_NULL; - table.sh_flags = 0; - table.sh_addr = null; - table.sh_offset = 0; - table.sh_size = 0; - table.sh_link = SHN_UNDEF; - table.sh_info = 0; - table.sh_addralign = 0; - table.sh_entsize = 0; - - return table; -} - -Elf64_Sym makeInitialSymTable() @nogc -{ - Elf64_Sym table; - - table.st_name = 0; - table.st_info = 0; - table.st_other = 0; - table.st_shndx = 0; - table.st_value = null; - table.st_size = 0; - - return table; -} - -Elf64_Sym makeMainSymTable(Elf64_Half textIndex) @nogc -{ - Elf64_Sym table; - - table.st_name = 0x01; - table.st_info = ELF32_ST_INFO(STB_GLOBAL, STT_FUNC); - table.st_other = 0; - table.st_shndx = textIndex; - table.st_value = null; - table.st_size = 0; - - return table; -} - -ubyte ELF32_ST_BIND(ubyte i) @nogc nothrow pure @safe -{ - return i >> 4; -} - -ubyte ELF32_ST_TYPE(ubyte i) @nogc nothrow pure @safe -{ - return i & 0xf; -} - -ubyte ELF32_ST_INFO(ubyte b, ubyte t) @nogc nothrow pure @safe -{ - return cast(ubyte) ((b << 4) + (t & 0xf)); -} - -/// Special Section Indices. -enum : Elf64_Half -{ - /// Used to mark an undefined or meaningless section reference. - SHN_UNDEF = 0, - /// Processor-specific use. - SHN_LOPROC = 0xFF00, - SHN_HIPROC = 0xFF1F, - /// Environment-specific use. - SHN_LOOS = 0xFF20, - SHN_HIOS = 0xFF3F, - /// Indicates that the corresponding reference is an absolute value. - SHN_ABS = 0xFFF1, - /** - * Indicates a symbol that has been declared as a common block (Fortran - * COMMON or C tentative declaration). - */ - SHN_COMMON = 0xFFF2, -} - -/** - * Object File Classes, e_ident[EI_CLASS]. - */ -enum EI_CLASS : ubyte -{ - /// 32-bit objects. - ELFCLASS32 = 1, - /// 64-bit objects. - ELFCLASS64 = 2, -} - -enum ubyte EV_CURRENT = 1; - -/** - * Data Encodings, e_ident[EI_DATA]. - */ -enum EI_DATA : ubyte -{ - /// Object file data structures are little-endian. - ELFDATA2LSB = 1, - /// Object file data structures are big-endian. - ELFDATA2MSB = 2, -} - -/** - * Operating System and ABI Identifiers, e_ident[EI_OSABI]. - */ -enum EI_OSABI : ubyte -{ - /// System V ABI. - ELFOSABI_SYSV = 0, - /// HP-UX operating system. - ELFOSABI_HPUX = 1, - /// Standalone (embedded) application. - ELFOSABI_STANDALONE = 255, -} - -enum : Elf64_Half -{ - ET_NONE = 0, /// No file type. - ET_REL = 1, /// Relocatable object file. - ET_EXEC = 2, /// Executable file. - ET_DYN = 3, /// Shared object file. - ET_CORE = 4, /// Core file. - ET_LOOS = 0xFE00, /// Environment-specific use. - ET_HIOS = 0xFEFF, - ET_LOPROC = 0xFF00, /// Processor-specific use. - ET_HIPROC = 0xFFFF, -} - -private size_t pad(size_t value) @nogc -{ - return (value / 8 + 1) * 8; -} - -struct Symbol -{ - String name; - Array!ubyte instructions; -} - -/* -.text - .globl main - .type main, @function -main: - movl $3, %eax - ret -*/ -immutable ubyte[] instructions = [ - // Opcode of pushq is “0x50 + r”, where “r” is the register opcode. - // Register opcode of %rbq is 5. - 0x50 + 5, // push% %rbp - 0x48, 0x89, 0xe5, // movq %rsp, %rbp - - 0xb8, 0x03, 0x00, 0x00, 0x00, // movl $3, %eax - - // Epilogue. - 0x48, 0x89, 0xec, // movq %rbp, %rsp - 0x58 + 5, // popq %rbp - 0xc3, // ret -]; - -void writeObject(Definition ast, String outputFilename) @nogc -{ - auto handle = fopen(outputFilename.toStringz, "wb"); - - if (handle is null) - { - perror("writing sample"); - return; - } - scope (exit) - { - fclose(handle); - } - - size_t currentOffset = Elf64_Ehdr.sizeof; - Array!Elf64_Shdr sectionHeaders; - Array!Elf64_Sym symbolEntries; - - // Prologue - Array!ubyte asmTemplate = Array!ubyte(cast(ubyte[]) [ - // Opcode of pushq is “0x50 + r”, where “r” is the register opcode. - // Register opcode of %rbq is 5. - 0x50 + 5, // pushq %rbp - 0x48, 0x89, 0xe5, // movq %rsp, %rbp - ]); - int i = 1; - foreach (statement; ast.statements[]) - { - if ((cast(Number) statement.subroutine.lhs) !is null) - { - // Opcode of mov is “0xb8 + r”, where “r” is the register opcode. - // Register opcode of %eax is 0. - asmTemplate.insertBack(cast(ubyte) 0xb8); // movl $x, %eax; where $x is a number. - asmTemplate.insertBack((cast(ubyte*) &(cast(Number) statement.subroutine.lhs).value)[0 .. int.sizeof]); - } - else if ((cast(Variable) statement.subroutine.lhs) !is null) - { - // movl -x(%rbp), %ebx; where x is a number. - asmTemplate.insertBack(cast(ubyte[]) [0x8b, 0x45]); - const disposition = (cast(Variable) statement.subroutine.lhs).counter * (-4); - asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]); - } - if ((cast(Number) statement.subroutine.rhs) !is null) - { - // Opcode of mov is “0xb8 + r”, where “r” is the register opcode. - // Register opcode of %ebx is 3. - asmTemplate.insertBack(cast(ubyte) 0xbb); // movl $x, %ebx; where $x is a number. - asmTemplate.insertBack((cast(ubyte*) &(cast(Number) statement.subroutine.rhs).value)[0 .. int.sizeof]); - } - else if ((cast(Variable) statement.subroutine.rhs) !is null) - { - // movl -x(%rbp), %ebx; where x is a number. - asmTemplate.insertBack(cast(ubyte[]) [0x8b, 0x5d]); - const disposition = (cast(Variable) statement.subroutine.rhs).counter * (-4); - asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]); - } - // Calculate the result and assign it to a variable on the stack. - asmTemplate.insertBack(cast(ubyte[]) [0x01, 0xd8]); // add %ebx, %eax - - asmTemplate.insertBack(cast(ubyte[]) [0x89, 0x45]); // movl %eax, -x(%rbp); where x is a number. - const disposition = i * (-4); - asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]); - ++i; - } - // Epilogue. - asmTemplate.insertBack(cast(ubyte[]) [ - 0x48, 0x89, 0xec, // movq %rbp, %rsp - 0x58 + 5, // popq %rbp - 0xc3, // ret - ]); - - Symbol[1] symbols = [Symbol(String("main"), asmTemplate)]; - - sectionHeaders.insertBack(makeInitialHeader()); - symbolEntries.insertBack(makeInitialSymTable()); - - String stringTable = String("\0"); - foreach (symbol; symbols[]) - { - stringTable.insertBack(symbol.name[]); - stringTable.insertBack('\0'); - - sectionHeaders.insertBack(makeTextHeader(currentOffset, symbol.instructions.length)); - currentOffset = pad(currentOffset + symbol.instructions.length); - - symbolEntries.insertBack(makeMainSymTable(cast(Elf64_Half) (sectionHeaders.length - 1))); - } - - const symbolTableSize = (symbols.length + 1) * Elf64_Sym.sizeof; - sectionHeaders.insertBack(makeSymtableHeader(currentOffset, symbolTableSize, symbols.length)); - currentOffset += symbolTableSize; - - sectionHeaders.insertBack(makeStringHeader(0x09, currentOffset, stringTable.length)); - currentOffset += stringTable.length; - - sectionHeaders.insertBack(makeStringHeader(0x11, currentOffset, sectionStringTable.length)); - currentOffset = pad(currentOffset + sectionStringTable.length); - - auto fileHeader = makeFileHeader(currentOffset, 5, 4); - - version (none) - { - printf("%.2x\n", cast(uint) currentOffset); - } - ubyte[8] padding = 0; - size_t codeLength = stringTable.length + sectionStringTable.length; - - fwrite(&fileHeader, 8, Elf64_Ehdr.sizeof / 8, handle); - foreach (symbol; symbols[]) - { - immutable size_t instructionsLength = pad(symbol.instructions.length); - fwrite(symbol.instructions.get.ptr, 1, symbol.instructions.length, handle); - fwrite(padding.ptr, 1, instructionsLength - symbol.instructions.length, handle); - codeLength += instructionsLength; - } - fwrite(symbolEntries.get.ptr, Elf64_Sym.sizeof, symbolEntries.length, handle); - fwrite(stringTable.get.ptr, 1, stringTable.length, handle); - fwrite(sectionStringTable.ptr, 1, sectionStringTable.length, handle); - fwrite(padding.ptr, pad(codeLength) - codeLength, 1, handle); - fwrite(sectionHeaders.get.ptr, Elf64_Shdr.sizeof, sectionHeaders.length, handle); -} - -String generate(Definition ast) @nogc -{ - // Prologue - String asmTemplate = ".text - .globl main - .type main, @function -main: - pushq %rbp - movq %rsp, %rbp -"; - - /* Allocate space on the stack for local variables. - asmTemplate.insertBack(" sub $"); - asmTemplate.insertBack(format!"{}"(ast.statements.length)[]); - asmTemplate.insertBack(", $esp\n"); */ - - int i = 1; - foreach (statement; ast.statements[]) - { - if ((cast(Number) statement.subroutine.lhs) !is null) - { - asmTemplate.insertBack(" movl $"); - asmTemplate.insertBack(format!"{}"((cast(Number) statement.subroutine.lhs).value)[]); - asmTemplate.insertBack(", %eax\n"); - } - else if ((cast(Variable) statement.subroutine.lhs) !is null) - { - asmTemplate.insertBack(" movl -"); - asmTemplate.insertBack(format!"{}"((cast(Variable) statement.subroutine.lhs).counter * 4)[]); - asmTemplate.insertBack("(%rbp), %eax\n"); - } - if ((cast(Number) statement.subroutine.rhs) !is null) - { - asmTemplate.insertBack(" movl $"); - asmTemplate.insertBack(format!"{}"((cast(Number) statement.subroutine.rhs).value)[]); - asmTemplate.insertBack(", %ebx\n"); - } - else if ((cast(Variable) statement.subroutine.rhs) !is null) - { - asmTemplate.insertBack(" movl -"); - asmTemplate.insertBack(format!"{}"((cast(Variable) statement.subroutine.rhs).counter * 4)[]); - asmTemplate.insertBack("(%rbp), %ebx\n"); - } - // Calculate the result and assign it to a variable on the stack. - asmTemplate.insertBack(" add %ebx, %eax\n"); - asmTemplate.insertBack(" movl %eax, -"); - asmTemplate.insertBack(format!"{}"(i * 4)[]); - asmTemplate.insertBack("(%rbp)\n"); - ++i; - } - - // Epilogue. - asmTemplate.insertBack(" movq %rbp, %rsp - popq %rbp - ret -"); - - return asmTemplate; -} diff --git a/source/elna/ir.d b/source/elna/ir.d deleted file mode 100644 index e2a8df4..0000000 --- a/source/elna/ir.d +++ /dev/null @@ -1,144 +0,0 @@ -module elna.ir; - -import parser = elna.parser; -import tanya.container.array; -import tanya.container.hashtable; -import tanya.container.string; -import tanya.memory.allocator; -import tanya.memory.mmappool; - -/** - * Definition. - */ -class Definition -{ - char[] identifier; - Array!Statement statements; - Array!VariableDeclaration variableDeclarations; -} - -class Statement -{ - Subroutine subroutine; -} - -abstract class Expression -{ -} - -class Number : Expression -{ - int value; -} - -class Variable : Expression -{ - size_t counter; -} - -class VariableDeclaration -{ - String identifier; -} - -class Subroutine -{ - Expression lhs, rhs; -} - -private Number transformNumber(parser.Number number) @nogc -{ - return MmapPool.instance.make!Number(number.value); -} - -private Variable transformSubroutine(parser.Subroutine subroutine, - ref Array!Statement statements, - ref HashTable!(String, int) constants) @nogc -{ - auto target = MmapPool.instance.make!Subroutine; - target.lhs = transformExpression(subroutine.lhs, statements, constants); - target.rhs = transformExpression(subroutine.rhs, statements, constants); - - auto newStatement = MmapPool.instance.make!Statement; - newStatement.subroutine = target; - statements.insertBack(newStatement); - - auto newVariable = MmapPool.instance.make!Variable; - newVariable.counter = statements.length; - - return newVariable; -} - -private Expression transformExpression(parser.Expression expression, - ref Array!Statement statements, - ref HashTable!(String, int) constants) @nogc -{ - if ((cast(parser.Number) expression) !is null) - { - auto numberExpression = MmapPool.instance.make!Number; - numberExpression.value = (cast(parser.Number) expression).value; - - return numberExpression; - } - if ((cast(parser.Variable) expression) !is null) - { - auto numberExpression = MmapPool.instance.make!Number; - numberExpression.value = constants[(cast(parser.Variable) expression).identifier]; - - return numberExpression; - } - else if ((cast(parser.Subroutine) expression) !is null) - { - return transformSubroutine(cast(parser.Subroutine) expression, statements, constants); - } - return null; -} - -Expression transformStatement(parser.Statement statement, - ref Array!Statement statements, - ref HashTable!(String, int) constants) @nogc -{ - if ((cast(parser.BangStatement) statement) !is null) - { - return transformExpression((cast(parser.BangStatement) statement).expression, statements, constants); - } - return null; -} - -HashTable!(String, int) transformConstants(ref Array!(parser.Definition) definitions) @nogc -{ - typeof(return) constants; - - foreach (definition; definitions[]) - { - constants[definition.identifier] = definition.number.value; - } - - return constants; -} - -Array!VariableDeclaration transformVariableDeclarations(ref Array!(parser.VariableDeclaration) variableDeclarations) -@nogc -{ - typeof(return) variables; - - foreach (ref variableDeclaration; variableDeclarations) - { - auto newDeclaration = MmapPool.instance.make!VariableDeclaration; - newDeclaration.identifier = variableDeclaration.identifier; - variables.insertBack(newDeclaration); - } - - return variables; -} - -Definition transform(parser.Block block) @nogc -{ - auto target = MmapPool.instance.make!Definition; - auto constants = transformConstants(block.definitions); - - transformStatement(block.statement, target.statements, constants); - target.variableDeclarations = transformVariableDeclarations(block.variableDeclarations); - - return target; -} diff --git a/source/elna/lexer.d b/source/elna/lexer.d deleted file mode 100644 index c47aae0..0000000 --- a/source/elna/lexer.d +++ /dev/null @@ -1,252 +0,0 @@ -module elna.lexer; - -import core.stdc.stdlib; -import core.stdc.ctype; -import core.stdc.string; -import elna.result; -import std.range; -import tanya.container.array; -import tanya.container.string; -import tanya.memory.mmappool; - -struct Token -{ - enum Type - { - number, - subroutine, // Operator. - let, - identifier, - equals, - var, - semicolon, - leftParen, - rightParen, - bang, - dot, - comma, - } - - union Value - { - int number; - String identifier; - } - - private Type type; - private Value value_; - private Position position_; - - @disable this(); - - this(Type type, Position position) @nogc nothrow pure @safe - { - this.type = type; - this.position_ = position; - } - - this(Type type, int value, Position position) @nogc nothrow pure @trusted - in (type == Type.number) - { - this(type, position); - this.value_.number = value; - } - - this()(Type type, auto ref String value, Position position) - @nogc nothrow pure @trusted - in (type == Type.identifier) - { - this(type, position); - this.value_.identifier = value; - } - - /** - * Params: - * type = Expected type. - * - * Returns: Whether this token is of the expected type. - */ - bool ofType(Type type) const @nogc nothrow pure @safe - { - return this.type == type; - } - - @property auto value(Type type)() @nogc nothrow pure @trusted - in (ofType(type)) - { - static if (type == Type.number) - { - return this.value_.number; - } - else static if (type == Type.identifier) - { - return this.value_.identifier; - } - else - { - static assert(false, "This type doesn't have a value"); - } - } - - /** - * Returns: The token position in the source text. - */ - @property const(Position) position() const @nogc nothrow pure @safe - { - return this.position_; - } -} - -/** - * Range over the source text that keeps track of the current position. - */ -struct Source -{ - char[] buffer; - Position position; - - this(char[] buffer) @nogc nothrow pure @safe - { - this.buffer = buffer; - } - - @disable this(); - - bool empty() @nogc nothrow pure @safe - { - return this.length == 0; - } - - char front() @nogc nothrow pure @safe - in (!empty) - { - return this.buffer[0]; - } - - void popFront() @nogc nothrow pure @safe - in (!empty) - { - this.buffer = buffer[1 .. $]; - ++this.position.column; - } - - void breakLine() @nogc nothrow pure @safe - in (!empty) - { - this.buffer = buffer[1 .. $]; - ++this.position.line; - this.position.column = 1; - } - - @property size_t length() const @nogc nothrow pure @safe - { - return this.buffer.length; - } - - char opIndex(size_t index) @nogc nothrow pure @safe - in (index < length) - { - return this.buffer[index]; - } - - char[] opSlice(size_t i, size_t j) @nogc nothrow pure @safe - in - { - assert(i <= j); - assert(j <= length); - } - do - { - return this.buffer[i .. j]; - } -} - -Array!Token lex(char[] buffer) @nogc -{ - Array!Token tokens; - auto source = Source(buffer); - - while (!source.empty) - { - if (source.front == ' ') - { - source.popFront; - } - else if (source.front >= '0' && source.front <= '9') // Multi-digit. - { - tokens.insertBack(Token(Token.Type.number, source.front - '0', source.position)); - source.popFront; - } - else if (source.front == '=') - { - tokens.insertBack(Token(Token.Type.equals, source.position)); - source.popFront; - } - else if (source.front == '(') - { - tokens.insertBack(Token(Token.Type.leftParen, source.position)); - source.popFront; - } - else if (source.front == ')') - { - tokens.insertBack(Token(Token.Type.rightParen, source.position)); - source.popFront; - } - else if (source.front == ';') - { - tokens.insertBack(Token(Token.Type.semicolon, source.position)); - source.popFront; - } - else if (source.front == ',') - { - tokens.insertBack(Token(Token.Type.comma, source.position)); - source.popFront; - } - else if (source.front == '!') - { - tokens.insertBack(Token(Token.Type.bang, source.position)); - source.popFront; - } - else if (source.front == '.') - { - tokens.insertBack(Token(Token.Type.dot, source.position)); - source.popFront; - } - else if (isalpha(source.front)) - { - size_t i = 1; - while (i < source.length && isalpha(source[i])) - { - ++i; - } - if (source[0 .. i] == "const") - { - tokens.insertBack(Token(Token.Type.let, source.position)); - } - else if (source[0 .. i] == "var") - { - tokens.insertBack(Token(Token.Type.var, source.position)); - } - else - { - auto identifier = String(source[0 .. i]); - tokens.insertBack(Token(Token.Type.identifier, identifier, source.position)); - } - source.popFrontN(i); - } - else if (source.front == '+') // Multi-character, random special characters. - { - tokens.insertBack(Token(Token.Type.subroutine, source.position)); - source.popFront; - } - else if (source.front == '\n') - { - source.breakLine; - } - else - { - return typeof(tokens)(); // Error. - } - } - return tokens; -} diff --git a/source/elna/parser.d b/source/elna/parser.d deleted file mode 100644 index 28a090b..0000000 --- a/source/elna/parser.d +++ /dev/null @@ -1,269 +0,0 @@ -module elna.parser; - -import elna.lexer; -import elna.result; -import tanya.container.array; -import tanya.container.string; -import tanya.memory.allocator; -import tanya.memory.mmappool; - -/** - * Constant definition. - */ -class Definition -{ - Number number; - String identifier; -} - -/** - * Variable declaration. - */ -class VariableDeclaration -{ - String identifier; -} - -abstract class Statement -{ -} - -class BangStatement : Statement -{ - Expression expression; -} - -class Block -{ - Array!Definition definitions; - Array!VariableDeclaration variableDeclarations; - Statement statement; -} - -abstract class Expression -{ -} - -class Number : Expression -{ - int value; -} - -class Variable : Expression -{ - String identifier; -} - -class Subroutine : Expression -{ - Expression lhs, rhs; -} - -private Result!Expression parseExpression(ref Array!(Token).Range tokens) @nogc -in (!tokens.empty, "Expected expression, got end of stream") -{ - if (tokens.front.ofType(Token.Type.number)) - { - auto number = MmapPool.instance.make!Number; - number.value = tokens.front.value!(Token.Type.number); - tokens.popFront; - return Result!Expression(number); - } - else if (tokens.front.ofType(Token.Type.identifier)) - { - auto variable = MmapPool.instance.make!Variable; - variable.identifier = tokens.front.value!(Token.Type.identifier); - tokens.popFront; - return Result!Expression(variable); - } - else if (tokens.front.ofType(Token.Type.subroutine)) - { - auto subroutine = MmapPool.instance.make!Subroutine; - tokens.popFront; - auto expression = parseExpression(tokens); - if (expression.valid) - { - subroutine.lhs = expression.result; - } - else - { - return Result!Expression("Expected left-hand side to be an expression", tokens.front.position); - } - expression = parseExpression(tokens); - if (expression.valid) - { - subroutine.rhs = expression.result; - } - else - { - return Result!Expression("Expected left-hand side to be an expression", tokens.front.position); - } - return Result!Expression(subroutine); - } - else if (tokens.front.ofType(Token.Type.leftParen)) - { - tokens.popFront; - - auto expression = parseExpression(tokens); - - tokens.popFront; - return expression; - } - return Result!Expression("Expected an expression", tokens.front.position); -} - -private Result!Definition parseDefinition(ref Array!Token.Range tokens) @nogc -in (!tokens.empty, "Expected definition, got end of stream") -{ - auto definition = MmapPool.instance.make!Definition; - definition.identifier = tokens.front.value!(Token.Type.identifier); // Copy. - - tokens.popFront(); - tokens.popFront(); // Skip the equals sign. - - if (tokens.front.ofType(Token.Type.number)) - { - auto number = MmapPool.instance.make!Number; - number.value = tokens.front.value!(Token.Type.number); - definition.number = number; - tokens.popFront; - return Result!Definition(definition); - } - return Result!Definition("Expected a number", tokens.front.position); -} - -private Result!Statement parseStatement(ref Array!Token.Range tokens) @nogc -in (!tokens.empty, "Expected block, got end of stream") -{ - if (tokens.front.ofType(Token.Type.bang)) - { - tokens.popFront; - auto statement = MmapPool.instance.make!BangStatement; - auto expression = parseExpression(tokens); - if (expression.valid) - { - statement.expression = expression.result; - } - else - { - return Result!Statement(expression.error.get); - } - return Result!Statement(statement); - } - return Result!Statement("Expected ! statement", tokens.front.position); -} - -private Result!(Array!Definition) parseDefinitions(ref Array!Token.Range tokens) @nogc -in (!tokens.empty, "Expected definition, got end of stream") -{ - tokens.popFront; // Skip const. - - Array!Definition definitions; - - while (!tokens.empty) - { - auto definition = parseDefinition(tokens); - if (!definition.valid) - { - return typeof(return)(definition.error.get); - } - definitions.insertBack(definition.result); - if (tokens.front.ofType(Token.Type.semicolon)) - { - break; - } - if (tokens.front.ofType(Token.Type.comma)) - { - tokens.popFront; - } - } - - return typeof(return)(definitions); -} - -private Result!(Array!VariableDeclaration) parseVariableDeclarations(ref Array!Token.Range tokens) @nogc -in (!tokens.empty, "Expected variable declarations, got end of stream") -{ - tokens.popFront; // Skip var. - - Array!VariableDeclaration variableDeclarations; - - while (!tokens.empty) - { - auto currentToken = tokens.front; - if (currentToken.ofType(Token.Type.identifier)) - { - auto variableDeclaration = MmapPool.instance.make!VariableDeclaration; - variableDeclaration.identifier = currentToken.value!(Token.Type.identifier); - variableDeclarations.insertBack(variableDeclaration); - tokens.popFront; - } - else - { - return typeof(return)("Expected variable name", tokens.front.position); - } - if (tokens.empty) - { - return typeof(return)("Expected \";\" or \",\" name", currentToken.position); - } - if (tokens.front.ofType(Token.Type.semicolon)) - { - break; - } - if (tokens.front.ofType(Token.Type.comma)) - { - tokens.popFront; - } - } - - return typeof(return)(variableDeclarations); -} - -private Result!Block parseBlock(ref Array!Token.Range tokens) @nogc -in (!tokens.empty, "Expected block, got end of stream") -{ - auto block = MmapPool.instance.make!Block; - if (tokens.front.ofType(Token.Type.let)) - { - auto constDefinitions = parseDefinitions(tokens); - if (constDefinitions.valid) - { - block.definitions = constDefinitions.result; - } - else - { - return Result!Block(constDefinitions.error.get); - } - tokens.popFront; - } - if (tokens.front.ofType(Token.Type.var)) - { - auto variableDeclarations = parseVariableDeclarations(tokens); - if (variableDeclarations.valid) - { - block.variableDeclarations = variableDeclarations.result; - } - else - { - return Result!Block(variableDeclarations.error.get); - } - tokens.popFront; - } - auto statement = parseStatement(tokens); - if (statement.valid) - { - block.statement = statement.result; - } - else - { - return Result!Block(statement.error.get); - } - - return Result!Block(block); -} - -Result!Block parse(ref Array!Token tokenStream) @nogc -{ - auto tokens = tokenStream[]; - return parseBlock(tokens); -} diff --git a/source/elna/result.d b/source/elna/result.d deleted file mode 100644 index 049c453..0000000 --- a/source/elna/result.d +++ /dev/null @@ -1,84 +0,0 @@ -module elna.result; - -import std.typecons; - -/** - * Position in the source text. - */ -struct Position -{ - /// Line. - size_t line = 1; - - /// Column. - size_t column = 1; -} - -struct CompileError -{ - private string message_; - - private Position position_; - - @disable this(); - - /** - * Params: - * message = Error text. - * position = Error position in the source text. - */ - this(string message, Position position) @nogc nothrow pure @safe - { - this.message_ = message; - this.position_ = position; - } - - /// Error text. - @property string message() const @nogc nothrow pure @safe - { - return this.message_; - } - - /// Error line in the source text. - @property size_t line() const @nogc nothrow pure @safe - { - return this.position_.line; - } - - /// Error column in the source text. - @property size_t column() const @nogc nothrow pure @safe - { - return this.position_.column; - } -} - -struct Result(T) -{ - Nullable!CompileError error; - T result; - - this(T result) - { - this.result = result; - this.error = typeof(this.error).init; - } - - this(string message, Position position) - { - this.result = T.init; - this.error = CompileError(message, position); - } - - this(CompileError compileError) - { - this.result = null; - this.error = compileError; - } - - @disable this(); - - @property bool valid() const - { - return error.isNull; - } -} diff --git a/source/main.d b/source/main.d deleted file mode 100644 index 71d23cd..0000000 --- a/source/main.d +++ /dev/null @@ -1,72 +0,0 @@ -import core.stdc.stdio; -import core.stdc.string; -import core.stdc.stdlib; -import elna.lexer; -import elna.parser; -import elna.generator; -import elna.ir; -import tanya.container.string; -import tanya.memory.allocator; -import tanya.memory.mmappool; - -private char[] readSource(size_t N)(string source, out char[N] buffer) @nogc -{ - memcpy(buffer.ptr, source.ptr, source.length + 1); - buffer[source.length] = '\0'; - auto handle = fopen(buffer.ptr, "r"); - if (handle is null) - { - perror(buffer.ptr); - return null; - } - fseek(handle, 0, SEEK_END); - size_t fsize = ftell(handle); - rewind(handle); - - fread(buffer.ptr, fsize, 1, handle); - fclose(handle); - buffer[fsize] = '\0'; - - return buffer[0 .. fsize]; -} - -int main(string[] args) -{ - char[255] buffer; - - defaultAllocator = MmapPool.instance; - - if (args.length < 2) - { - return 4; - } - auto sourceText = readSource(args[1], buffer); - if (sourceText is null) - { - return 3; - } - auto tokens = lex(sourceText); - if (tokens.length == 0) - { - printf("Lexical analysis failed.\n"); - return 1; - } - auto ast = parse(tokens); - if (!ast.valid) - { - auto compileError = ast.error.get; - printf("%lu:%lu: %s\n", compileError.line, compileError.column, compileError.message.ptr); - return 2; - } - auto ir = transform(ast.result); - - String outputFilename = String("build/"); - outputFilename.insertBack(args[1][0 .. $ - 4]); - outputFilename.insertBack("o"); - writeObject(ir, outputFilename); - - auto code = generate(ir); - printf("%s", code.toStringz()); - - return 0; -} diff --git a/tests/const_list.elna b/tests/const_list.elna deleted file mode 100644 index 18a6711..0000000 --- a/tests/const_list.elna +++ /dev/null @@ -1,3 +0,0 @@ -const a = 1, b = 2; -! + a b -. diff --git a/tests/expectations/const_list.txt b/tests/expectations/const_list.txt deleted file mode 100644 index 00750ed..0000000 --- a/tests/expectations/const_list.txt +++ /dev/null @@ -1 +0,0 @@ -3 diff --git a/tests/expectations/sum.txt b/tests/expectations/sum.txt deleted file mode 100644 index 45a4fb7..0000000 --- a/tests/expectations/sum.txt +++ /dev/null @@ -1 +0,0 @@ -8 diff --git a/tests/expectations/sums.txt b/tests/expectations/sums.txt deleted file mode 100644 index 45a4fb7..0000000 --- a/tests/expectations/sums.txt +++ /dev/null @@ -1 +0,0 @@ -8 diff --git a/tests/sum.elna b/tests/sum.elna deleted file mode 100644 index 12343f0..0000000 --- a/tests/sum.elna +++ /dev/null @@ -1,2 +0,0 @@ -! + 1 7 -. diff --git a/tests/sums.elna b/tests/sums.elna deleted file mode 100644 index bf80ecc..0000000 --- a/tests/sums.elna +++ /dev/null @@ -1,2 +0,0 @@ -! + 1 (+ 3 4) -.