Start a Modula-2 experiment

This commit is contained in:
2022-06-05 23:43:45 +02:00
parent 5490f6ce1c
commit f29e68ec93
23 changed files with 994 additions and 1605 deletions

2
.gitignore vendored
View File

@@ -1,3 +1,3 @@
/.dub/
a.out
/dub.selections.json
/build/

37
README
View File

@@ -1,37 +0,0 @@
# Elna programming language
Elna compiles simple mathematical operations to machine code.
The compiled program returns the result of the operation.
## File extension
.elna
## Grammar PL/0
program = block "." ;
block = [ "const" ident "=" number {"," ident "=" number} ";"]
[ "var" ident {"," ident} ";"]
{ "procedure" ident ";" block ";" } statement ;
statement = [ ident ":=" expression | "call" ident
| "?" ident | "!" expression
| "begin" statement {";" statement } "end"
| "if" condition "then" statement
| "while" condition "do" statement ];
condition = "odd" expression |
expression ("="|"#"|"<"|"<="|">"|">=") expression ;
expression = [ "+"|"-"] term { ("+"|"-") term};
term = factor {("*"|"/") factor};
factor = ident | number | "(" expression ")";
## Operations
"!" - Write a line.
"?" - Read user input.
"odd" - The only function, returns whether a number is odd.

63
README.md Normal file
View File

@@ -0,0 +1,63 @@
# Elna programming language
Elna is a simple, imperative, low-level programming language.
It is intendet to accompany other languages in the areas, where a high-level
language doesn't fit well. It is also supposed to be an intermediate
representation for a such high-level hypothetical programming language.
## File extension
.elna
## Current implementation
This repository contains a GCC frontend for Elna. After finishing the frontend
I'm planning to rewrite the compiler in Elna itself with its own backend and
a hand-written parser. So GCC gives a way to have a simple bootstrap compiler
and a possbility to compile Elna programs for different platforms.
## Grammar
program = block "." ;
block = [ "const" ident "=" number {"," ident "=" number} ";"]
[ "var" ident {"," ident} ";"]
{ "procedure" ident ";" block ";" } statement ;
statement = [ ident ":=" expression | "call" ident
| "?" ident | "!" expression
| "begin" statement {";" statement } "end"
| "if" condition "then" statement
| "while" condition "do" statement ];
condition = "odd" expression |
expression ("="|"#"|"<"|"<="|">"|">=") expression ;
expression = [ "+"|"-"] term { ("+"|"-") term};
term = factor {("*"|"/") factor};
factor = ident | number | "(" expression ")";
## Build
The frontend requires GCC 14.2.0 (not tested with other versions).
Download the GCC source. Copy the contents of this repository into `gcc/elna`
inside GCC. Finally build GCC enabling the frontend with
`--enable-languages=c,c++,elna`. After the installation the compiler can be
invoked with `$prefix/bin/gelna`.
There is also a `Rakefile` that downloads, builds and installs GCC into the
`./build/` subdirectory. The `Rakefile` assumes that ruby and rake, as well as
all GCC dependencies are already available in the system. It works under Linux
and Mac OS. In the latter case GCC is patched with the patches used by Homebrew
(official GCC doesn't support Apple silicon targets). Invoke with
```sh
rake boot
```
See `rake -T` for more tasks. The GCC source is under `build/tools`. The
installation path is `build/host/install`.

130
Rakefile
View File

@@ -2,68 +2,82 @@ require 'pathname'
require 'rake/clean'
require 'open3'
DFLAGS = ['--warn-no-deprecated', '-L/usr/lib64/gcc-12']
BINARY = 'build/bin/elna'
TESTS = FileList['tests/*.elna']
.map { |test| (Pathname.new('build') + test).sub_ext('').to_path }
SOURCES = FileList['source/**/*.d']
directory 'build'
CLEAN.include 'build'
CLEAN.include '.dub'
rule(/build\/tests\/.+/ => ->(file) { test_for_out(file) }) do |t|
Pathname.new(t.name).dirname.mkpath
sh BINARY, t.source
sh 'gcc', '-o', t.name, "#{t.name}.o"
# Open3.pipeline [BINARY, t.source], ['gcc', '-x', 'assembler', '-o', t.name, '-']
end
file BINARY => SOURCES do |t|
sh({ 'DFLAGS' => (DFLAGS * ' ') }, 'dub', 'build', '--compiler=gdc-12')
end
file 'build/tests/sample' => BINARY do |t|
sh t.source
sh 'gcc', '-o', t.name, 'build/tests/sample.o'
end
task default: BINARY
desc 'Run all tests and check the results'
task test: TESTS
task test: BINARY do
TESTS.each do |test|
expected = Pathname
.new(test)
.sub_ext('.txt')
.sub(/^build\/tests\//, 'tests/expectations/')
.read
.to_i
puts "Running #{test}"
system test
actual = $?.exitstatus
fail "#{test}: Expected #{expected}, got #{actual}" unless expected == actual
M2C = 'gm2' # Modula-2 compiler.
BOOT_OBJECTS = FileList['boot/*.mod']
.map do |source|
Pathname.new(source).basename.sub_ext('.o')
end
# system './build/tests/sample'
# actual = $?.exitstatus
# fail "./build/tests/sample: Expected 3, got #{actual}" unless 3 == actual
def source_for_object(out_file)
path = Pathname.new(out_file).relative_path_from('build')
result = ['build/boot']
definition = File.join('boot', path.basename.sub_ext('.def'))
result << definition if File.exist? definition
implementation = path.sub_ext('.mod').to_path
implementation = File.join 'build', implementation unless File.exist? implementation
result << implementation
end
desc 'Run unittest blocks'
task unittest: SOURCES do |t|
sh('dub', 'test', '--compiler=gdc-12')
directory 'build/boot'
directory 'build/self'
CLEAN.include 'build'
rule(/build\/.+\.o$/ => ->(file) { source_for_object(file) }) do |t|
sources = t.prerequisites.filter { |f| f.end_with? '.mod' }
sh M2C, '-c', '-I', 'boot', '-o', t.name, *sources
end
def test_for_out(out_file)
test_source = Pathname
.new(out_file)
.sub_ext('.elna')
.sub(/^build\//, '')
.to_path
[test_source, BINARY]
rule(/build\/self\/.+\.mod$/ => [
'build/self', 'build/boot/Compiler',
->(file) { File.join('boot', Pathname.new(file).basename) }
]) do |t|
sources, compiler = t.prerequisites
.reject { |f| File.directory? f }
.partition { |f| f.end_with? '.mod' }
File.open t.name, 'w' do |output|
puts
puts(compiler * ' ')
Open3.popen2(*compiler) do |cl_in, cl_out|
cl_in.write File.read(*sources)
cl_in.close
IO.copy_stream cl_out, output
cl_out.close
end
end
end
['boot', 'self'].each do |sub|
compiler_binary = Pathname.new('build') + sub + 'Compiler'
file compiler_binary.to_path => BOOT_OBJECTS.map { |file| File.join('build', sub, file) } do |t|
sh M2C, '-o', t.name, *t.prerequisites
end
compiler_object = compiler_binary.sub_ext('.o')
file compiler_object.to_path => source_for_object(compiler_object) do |t|
sources = t.prerequisites.filter { |f| f.end_with? '.mod' }
sh M2C, '-fscaffold-main', '-c', '-I', 'boot', '-o', t.name, *sources
end
end
task default: 'build/self/Compiler'
task default: 'build/self/Compiler.mod'
task default: 'boot/Compiler.mod'
task :default do |t|
exe, previous_output, source = t.prerequisites
cat_arguments = ['cat', source]
diff_arguments = ['diff', '-Nur', '--text', previous_output, '-']
puts [cat_arguments * ' ', exe, diff_arguments * ' '].join(' | ')
Open3.pipeline(cat_arguments, exe, diff_arguments)
end

18
boot/Compiler.mod Normal file
View File

@@ -0,0 +1,18 @@
MODULE Compiler;
FROM FIO IMPORT StdIn;
FROM SYSTEM IMPORT ADR;
FROM Lexer IMPORT Lexer, LexerDestroy, LexerInitialize;
FROM Transpiler IMPORT Transpile;
VAR
ALexer: Lexer;
BEGIN
LexerInitialize(ADR(ALexer), StdIn);
Transpile(ADR(ALexer));
LexerDestroy(ADR(ALexer))
END Compiler.

92
boot/Lexer.def Normal file
View File

@@ -0,0 +1,92 @@
DEFINITION MODULE Lexer;
FROM FIO IMPORT File;
TYPE
PLexerBuffer = POINTER TO CHAR;
Lexer = RECORD
Input: File;
Buffer: PLexerBuffer;
Size: CARDINAL;
Length: CARDINAL;
Start: PLexerBuffer;
Current: PLexerBuffer
END;
PLexer = POINTER TO Lexer;
LexerKind = (
lexerKindEof,
lexerKindIdentifier,
lexerKindIf,
lexerKindThen,
lexerKindElse,
lexerKindElsif,
lexerKindWhile,
lexerKindDo,
lexerKindProc,
lexerKindBegin,
lexerKindEnd,
lexerKindImplementation,
lexerKindConst,
lexerKindVar,
lexerKindCase,
lexerKindOf,
lexerKindType,
lexerKindRecord,
lexerKindUnion,
lexerKindPipe,
lexerKindTo,
lexerKindBoolean,
lexerKindNull,
lexerKindAnd,
lexerKindOr,
lexerKindNot,
lexerKindReturn,
lexerKindDefinition,
lexerKindRange,
lexerKindLeftParen,
lexerKindRightParen,
lexerKindLeftSquare,
lexerKindRightSquare,
lexerKindGreaterEqual,
lexerKindLessEqual,
lexerKindGreaterThan,
lexerKindLessThan,
lexerKindNotEqual,
lexerKindEqual,
lexerKindSemicolon,
lexerKindDot,
lexerKindComma,
lexerKindPlus,
lexerKindMinus,
lexerKindMultiplication,
lexerKindDivision,
lexerKindRemainder,
lexerKindAssignment,
lexerKindColon,
lexerKindHat,
lexerKindAt,
lexerKindComment,
lexerKindInteger,
lexerKindWord,
lexerKindCharacter,
lexerKindString,
lexerKindFrom,
lexerKindExclamation,
lexerKindArrow,
lexerKindTrait,
lexerKindProgram,
lexerKindModule,
lexerKindImport
);
LexerToken = RECORD
CASE Kind: LexerKind OF
lexerKindBoolean: booleanKind: BOOLEAN
END
END;
PLexerToken = POINTER TO LexerToken;
PROCEDURE LexerInitialize(ALexer: PLexer; Input: File);
PROCEDURE LexerDestroy(ALexer: PLexer);
PROCEDURE LexerLex(ALexer: PLexer): LexerToken;
END Lexer.

717
boot/Lexer.mod Normal file
View File

@@ -0,0 +1,717 @@
IMPLEMENTATION MODULE Lexer;
FROM FIO IMPORT ReadNBytes;
FROM SYSTEM IMPORT ADR;
FROM Storage IMPORT DEALLOCATE, ALLOCATE;
FROM Strings IMPORT Length;
FROM MemUtils IMPORT MemZero;
CONST
ChunkSize = 65536;
TYPE
(*
* Classification table assigns each possible character to a group (class). All
* characters of the same group a handled equivalently.
*
* Classification:
*)
TransitionClass = (
transitionClassInvalid,
transitionClassDigit,
transitionClassAlpha,
transitionClassSpace,
transitionClassColon,
transitionClassEquals,
transitionClassLeftParen,
transitionClassRightParen,
transitionClassAsterisk,
transitionClassUnderscore,
transitionClassSingle,
transitionClassHex,
transitionClassZero,
transitionClassX,
transitionClassEof,
transitionClassDot,
transitionClassMinus,
transitionClassSingleQuote,
transitionClassDoubleQuote,
transitionClassGreater,
transitionClassLess,
transitionClassOther
);
TransitionState = (
transitionStateStart,
transitionStateColon,
transitionStateIdentifier,
transitionStateDecimal,
transitionStateGreater,
transitionStateMinus,
transitionStateLeftParen,
transitionStateLess,
transitionStateDot,
transitionStateComment,
transitionStateClosingComment,
transitionStateCharacter,
transitionStateString,
transitionStateLeadingZero,
transitionStateDecimalSuffix,
transitionStateEnd
);
TransitionAction = PROCEDURE(PLexer, PLexerToken);
Transition = RECORD
Action: TransitionAction;
NextState: TransitionState
END;
VAR
Classification: ARRAY[1..128] OF TransitionClass;
Transitions: ARRAY[0..MAX(TransitionState)] OF ARRAY[0..MAX(TransitionClass)] OF Transition;
PROCEDURE InitializeClassification();
BEGIN
Classification[1] := transitionClassEof; (* NUL *)
Classification[2] := transitionClassInvalid; (* SOH *)
Classification[3] := transitionClassInvalid; (* STX *)
Classification[4] := transitionClassInvalid; (* ETX *)
Classification[5] := transitionClassInvalid; (* EOT *)
Classification[6] := transitionClassInvalid; (* EMQ *)
Classification[7] := transitionClassInvalid; (* ACK *)
Classification[8] := transitionClassInvalid; (* BEL *)
Classification[9] := transitionClassInvalid; (* BS *)
Classification[10] := transitionClassSpace; (* HT *)
Classification[11] := transitionClassSpace; (* LF *)
Classification[12] := transitionClassInvalid; (* VT *)
Classification[13] := transitionClassInvalid; (* FF *)
Classification[14] := transitionClassSpace; (* CR *)
Classification[15] := transitionClassInvalid; (* SO *)
Classification[16] := transitionClassInvalid; (* SI *)
Classification[17] := transitionClassInvalid; (* DLE *)
Classification[18] := transitionClassInvalid; (* DC1 *)
Classification[19] := transitionClassInvalid; (* DC2 *)
Classification[20] := transitionClassInvalid; (* DC3 *)
Classification[21] := transitionClassInvalid; (* DC4 *)
Classification[22] := transitionClassInvalid; (* NAK *)
Classification[23] := transitionClassInvalid; (* SYN *)
Classification[24] := transitionClassInvalid; (* ETB *)
Classification[25] := transitionClassInvalid; (* CAN *)
Classification[26] := transitionClassInvalid; (* EM *)
Classification[27] := transitionClassInvalid; (* SUB *)
Classification[28] := transitionClassInvalid; (* ESC *)
Classification[29] := transitionClassInvalid; (* FS *)
Classification[30] := transitionClassInvalid; (* GS *)
Classification[31] := transitionClassInvalid; (* RS *)
Classification[32] := transitionClassInvalid; (* US *)
Classification[33] := transitionClassSpace; (* Space *)
Classification[34] := transitionClassSingle; (* ! *)
Classification[35] := transitionClassDoubleQuote; (* " *)
Classification[36] := transitionClassOther; (* # *)
Classification[37] := transitionClassOther; (* $ *)
Classification[38] := transitionClassSingle; (* % *)
Classification[39] := transitionClassSingle; (* & *)
Classification[40] := transitionClassSingleQuote; (* ' *)
Classification[41] := transitionClassLeftParen; (* ( *)
Classification[42] := transitionClassRightParen; (* ) *)
Classification[43] := transitionClassAsterisk; (* * *)
Classification[44] := transitionClassSingle; (* + *)
Classification[45] := transitionClassSingle; (* , *)
Classification[46] := transitionClassMinus; (* - *)
Classification[47] := transitionClassDot; (* . *)
Classification[48] := transitionClassSingle; (* / *)
Classification[49] := transitionClassZero; (* 0 *)
Classification[50] := transitionClassDigit; (* 1 *)
Classification[51] := transitionClassDigit; (* 2 *)
Classification[52] := transitionClassDigit; (* 3 *)
Classification[53] := transitionClassDigit; (* 4 *)
Classification[54] := transitionClassDigit; (* 5 *)
Classification[55] := transitionClassDigit; (* 6 *)
Classification[56] := transitionClassDigit; (* 7 *)
Classification[57] := transitionClassDigit; (* 8 *)
Classification[58] := transitionClassDigit; (* 9 *)
Classification[59] := transitionClassColon; (* : *)
Classification[60] := transitionClassSingle; (* ; *)
Classification[61] := transitionClassLess; (* < *)
Classification[62] := transitionClassEquals; (* = *)
Classification[63] := transitionClassGreater; (* > *)
Classification[64] := transitionClassOther; (* ? *)
Classification[65] := transitionClassSingle; (* @ *)
Classification[66] := transitionClassAlpha; (* A *)
Classification[67] := transitionClassAlpha; (* B *)
Classification[68] := transitionClassAlpha; (* C *)
Classification[69] := transitionClassAlpha; (* D *)
Classification[70] := transitionClassAlpha; (* E *)
Classification[71] := transitionClassAlpha; (* F *)
Classification[72] := transitionClassAlpha; (* G *)
Classification[73] := transitionClassAlpha; (* H *)
Classification[74] := transitionClassAlpha; (* I *)
Classification[75] := transitionClassAlpha; (* J *)
Classification[76] := transitionClassAlpha; (* K *)
Classification[77] := transitionClassAlpha; (* L *)
Classification[78] := transitionClassAlpha; (* M *)
Classification[79] := transitionClassAlpha; (* N *)
Classification[80] := transitionClassAlpha; (* O *)
Classification[81] := transitionClassAlpha; (* P *)
Classification[82] := transitionClassAlpha; (* Q *)
Classification[83] := transitionClassAlpha; (* R *)
Classification[84] := transitionClassAlpha; (* S *)
Classification[85] := transitionClassAlpha; (* T *)
Classification[86] := transitionClassAlpha; (* U *)
Classification[87] := transitionClassAlpha; (* V *)
Classification[88] := transitionClassAlpha; (* W *)
Classification[89] := transitionClassAlpha; (* X *)
Classification[90] := transitionClassAlpha; (* Y *)
Classification[91] := transitionClassAlpha; (* Z *)
Classification[92] := transitionClassSingle; (* [ *)
Classification[93] := transitionClassOther; (* \ *)
Classification[94] := transitionClassSingle; (* ] *)
Classification[95] := transitionClassSingle; (* ^ *)
Classification[96] := transitionClassUnderscore; (* _ *)
Classification[97] := transitionClassOther; (* ` *)
Classification[98] := transitionClassHex; (* a *)
Classification[99] := transitionClassHex; (* b *)
Classification[100] := transitionClassHex; (* c *)
Classification[101] := transitionClassHex; (* d *)
Classification[102] := transitionClassHex; (* e *)
Classification[103] := transitionClassHex; (* f *)
Classification[104] := transitionClassAlpha; (* g *)
Classification[105] := transitionClassAlpha; (* h *)
Classification[106] := transitionClassAlpha; (* i *)
Classification[107] := transitionClassAlpha; (* j *)
Classification[108] := transitionClassAlpha; (* k *)
Classification[109] := transitionClassAlpha; (* l *)
Classification[110] := transitionClassAlpha; (* m *)
Classification[111] := transitionClassAlpha; (* n *)
Classification[112] := transitionClassAlpha; (* o *)
Classification[113] := transitionClassAlpha; (* p *)
Classification[114] := transitionClassAlpha; (* q *)
Classification[115] := transitionClassAlpha; (* r *)
Classification[116] := transitionClassAlpha; (* s *)
Classification[117] := transitionClassAlpha; (* t *)
Classification[118] := transitionClassAlpha; (* u *)
Classification[119] := transitionClassAlpha; (* v *)
Classification[120] := transitionClassAlpha; (* w *)
Classification[121] := transitionClassX; (* x *)
Classification[122] := transitionClassAlpha; (* y *)
Classification[123] := transitionClassAlpha; (* z *)
Classification[124] := transitionClassOther; (* { *)
Classification[125] := transitionClassSingle; (* | *)
Classification[126] := transitionClassOther; (* } *)
Classification[127] := transitionClassSingle; (* ~ *)
Classification[128] := transitionClassInvalid (* DEL *)
END InitializeClassification;
PROCEDURE CompareKeyword(Keyword: ARRAY OF CHAR; TokenStart: PLexerBuffer; TokenEnd: PLexerBuffer): BOOLEAN;
VAR
Result: BOOLEAN;
Index: CARDINAL;
BEGIN
Index := 0;
Result := TRUE;
WHILE (Index < Length(Keyword)) AND (TokenStart <> TokenEnd) AND Result DO
Result := Keyword[Index] = TokenStart^;
INC(TokenStart);
INC(Index)
END;
RETURN (Index = Length(Keyword)) AND (TokenStart = TokenEnd) AND Result
END CompareKeyword;
(* Reached the end of file. *)
PROCEDURE TransitionActionEof(ALexer: PLexer; AToken: PLexerToken);
BEGIN
AToken^.Kind := lexerKindEof
END TransitionActionEof;
(* Add the character to the token currently read and advance to the next character. *)
PROCEDURE TransitionActionAccumulate(ALexer: PLexer; AToken: PLexerToken);
BEGIN
INC(ALexer^.Current)
END TransitionActionAccumulate;
(* The current character is not a part of the token. Finish the token already
* read. Don't advance to the next character. *)
PROCEDURE TransitionActionFinalize(ALexer: PLexer; AToken: PLexerToken);
BEGIN
IF ALexer^.Start^ = ':' THEN
AToken^.Kind := lexerKindColon
ELSIF ALexer^.Start^ = '>' THEN
AToken^.Kind := lexerKindGreaterThan
ELSIF ALexer^.Start^ = '<' THEN
AToken^.Kind := lexerKindLessThan
ELSIF ALexer^.Start^ = '(' THEN
AToken^.Kind := lexerKindLeftParen
ELSIF ALexer^.Start^ = '-' THEN
AToken^.Kind := lexerKindLeftParen
ELSIF ALexer^.Start^ = '.' THEN
AToken^.Kind := lexerKindDot
END
END TransitionActionFinalize;
(* An action for tokens containing multiple characters. *)
PROCEDURE TransitionActionComposite(ALexer: PLexer; AToken: PLexerToken);
BEGIN
IF ALexer^.Start^ = '<' THEN
IF ALexer^.Current^ = '>' THEN
AToken^.Kind := lexerKindNotEqual
ELSIF ALexer^.Current^ = '=' THEN
AToken^.Kind := lexerKindLessEqual
END
ELSIF (ALexer^.Start^ = '>') AND (ALexer^.Current^ = '=') THEN
AToken^.Kind := lexerKindGreaterEqual
ELSIF (ALexer^.Start^ = '.') AND (ALexer^.Current^ = '.') THEN
AToken^.Kind := lexerKindRange
ELSIF (ALexer^.Start^ = ':') AND (ALexer^.Current^ = '=') THEN
AToken^.Kind := lexerKindAssignment
END;
INC(ALexer^.Current)
END TransitionActionComposite;
(* Skip a space. *)
PROCEDURE TransitionActionSkip(ALexer: PLexer; AToken: PLexerToken);
BEGIN
INC(ALexer^.Current);
INC(ALexer^.Start)
END TransitionActionSkip;
(* 0x04. Delimited string action. *)
PROCEDURE TransitionActionDelimited(ALexer: PLexer; AToken: PLexerToken);
BEGIN
IF ALexer^.Start^ = '(' THEN
AToken^.Kind := lexerKindComment
ELSIF ALexer^.Start^ = '"' THEN
AToken^.Kind := lexerKindCharacter
ELSIF ALexer^.Start^ = "'" THEN
AToken^.Kind := lexerKindString
END;
INC(ALexer^.Current)
END TransitionActionDelimited;
(* Finalize keyword or identifier. *)
PROCEDURE TransitionActionKeyId(ALexer: PLexer; AToken: PLexerToken);
BEGIN
IF CompareKeyword('PROGRAM', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindProgram
ELSIF CompareKeyword('IMPORT', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindImport
ELSIF CompareKeyword('CONST', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindConst
ELSIF CompareKeyword('VAR', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindVar
ELSIF CompareKeyword('IF', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindIf
ELSIF CompareKeyword('THEN', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindThen
ELSIF CompareKeyword('ELSIF', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindElsif
ELSIF CompareKeyword('ELSE', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindElse
ELSIF CompareKeyword('WHILE', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindWhile
ELSIF CompareKeyword('DO', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindDo
ELSIF CompareKeyword('PROCEDURE', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindProc
ELSIF CompareKeyword('BEGIN', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindBegin
ELSIF CompareKeyword('END', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindEnd
ELSIF CompareKeyword('TYPE', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindType
ELSIF CompareKeyword('RECORD', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindRecord
ELSIF CompareKeyword('UNION', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindUnion
ELSIF CompareKeyword('NIL', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindNull
ELSIF CompareKeyword('AND', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindAnd
ELSIF CompareKeyword('OR', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindOr
ELSIF CompareKeyword('RETURN', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindReturn
ELSIF CompareKeyword('DEFINITION', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindDefinition
ELSIF CompareKeyword('TO', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindTo
ELSIF CompareKeyword('CASE', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindCase
ELSIF CompareKeyword('OF', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindOf
ELSIF CompareKeyword('FROM', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindFrom
ELSIF CompareKeyword('MODULE', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindModule
ELSIF CompareKeyword('IMPLEMENTATION', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindImplementation
ELSIF CompareKeyword('TRUE', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindBoolean;
AToken^.booleanKind := TRUE
ELSIF CompareKeyword('FALSE', ALexer^.Start, ALexer^.Current) THEN
AToken^.Kind := lexerKindBoolean;
AToken^.booleanKind := FALSE
ELSE
AToken^.Kind := lexerKindIdentifier
END;
END TransitionActionKeyId;
(* Action for tokens containing only one character. The character cannot be
* followed by other characters forming a composite token. *)
PROCEDURE TransitionActionSingle(ALexer: PLexer; AToken: PLexerToken);
BEGIN
CASE ALexer^.Current^ OF
'&': AToken^.Kind := lexerKindAnd |
';': AToken^.Kind := lexerKindSemicolon |
',': AToken^.Kind := lexerKindComma |
')': AToken^.Kind := lexerKindRightParen |
'[': AToken^.Kind := lexerKindLeftSquare |
']': AToken^.Kind := lexerKindRightSquare |
'^': AToken^.Kind := lexerKindHat |
'=': AToken^.Kind := lexerKindEqual |
'+': AToken^.Kind := lexerKindPlus |
'/': AToken^.Kind := lexerKindDivision |
'%': AToken^.Kind := lexerKindRemainder |
'@': AToken^.Kind := lexerKindAt |
'|': AToken^.Kind := lexerKindPipe
END;
INC(ALexer^.Current)
END TransitionActionSingle;
(* Handle an integer literal. *)
PROCEDURE TransitionActionInteger(ALexer: PLexer; AToken: PLexerToken);
BEGIN
AToken^.Kind := lexerKindInteger
END TransitionActionInteger;
PROCEDURE SetDefaultTransition(CurrentState: TransitionState; DefaultAction: TransitionAction; NextState: TransitionState);
VAR DefaultTransition: Transition;
BEGIN
DefaultTransition.Action := DefaultAction;
DefaultTransition.NextState := NextState;
Transitions[ORD(CurrentState)][ORD(transitionClassInvalid)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassDigit)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassAlpha)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassSpace)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassColon)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassEquals)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassLeftParen)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassRightParen)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassAsterisk)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassUnderscore)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassSingle)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassHex)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassZero)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassX)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassEof)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassDot)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassMinus)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassSingleQuote)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassDoubleQuote)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassGreater)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassLess)] := DefaultTransition;
Transitions[ORD(CurrentState)][ORD(transitionClassOther)] := DefaultTransition;
END SetDefaultTransition;
(*
* The transition table describes transitions from one state to another, given
* a symbol (character class).
*
* The table has m rows and n columns, where m is the amount of states and n is
* the amount of classes. So given the current state and a classified character
* the table can be used to look up the next state.
*
* Each cell is a word long.
* - The least significant byte of the word is a row number (beginning with 0).
* It specifies the target state. "ff" means that this is an end state and no
* transition is possible.
* - The next byte is the action that should be performed when transitioning.
* For the meaning of actions see labels in the lex_next function, which
* handles each action.
*)
PROCEDURE InitializeTransitions();
BEGIN
(* Start state. *)
Transitions[ORD(transitionStateStart)][ORD(transitionClassInvalid)].Action := NIL;
Transitions[ORD(transitionStateStart)][ORD(transitionClassInvalid)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateStart)][ORD(transitionClassDigit)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateStart)][ORD(transitionClassDigit)].NextState := transitionStateDecimal;
Transitions[ORD(transitionStateStart)][ORD(transitionClassAlpha)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateStart)][ORD(transitionClassAlpha)].NextState := transitionStateIdentifier;
Transitions[ORD(transitionStateStart)][ORD(transitionClassSpace)].Action := TransitionActionSkip;
Transitions[ORD(transitionStateStart)][ORD(transitionClassSpace)].NextState := transitionStateStart;
Transitions[ORD(transitionStateStart)][ORD(transitionClassColon)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateStart)][ORD(transitionClassColon)].NextState := transitionStateColon;
Transitions[ORD(transitionStateStart)][ORD(transitionClassEquals)].Action := TransitionActionSingle;
Transitions[ORD(transitionStateStart)][ORD(transitionClassEquals)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateStart)][ORD(transitionClassLeftParen)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateStart)][ORD(transitionClassLeftParen)].NextState := transitionStateLeftParen;
Transitions[ORD(transitionStateStart)][ORD(transitionClassRightParen)].Action := TransitionActionSingle;
Transitions[ORD(transitionStateStart)][ORD(transitionClassRightParen)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateStart)][ORD(transitionClassAsterisk)].Action := TransitionActionSingle;
Transitions[ORD(transitionStateStart)][ORD(transitionClassAsterisk)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateStart)][ORD(transitionClassUnderscore)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateStart)][ORD(transitionClassUnderscore)].NextState := transitionStateIdentifier;
Transitions[ORD(transitionStateStart)][ORD(transitionClassSingle)].Action := TransitionActionSingle;
Transitions[ORD(transitionStateStart)][ORD(transitionClassSingle)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateStart)][ORD(transitionClassHex)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateStart)][ORD(transitionClassHex)].NextState := transitionStateIdentifier;
Transitions[ORD(transitionStateStart)][ORD(transitionClassZero)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateStart)][ORD(transitionClassZero)].NextState := transitionStateLeadingZero;
Transitions[ORD(transitionStateStart)][ORD(transitionClassX)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateStart)][ORD(transitionClassX)].NextState := transitionStateIdentifier;
Transitions[ORD(transitionStateStart)][ORD(transitionClassEof)].Action := TransitionActionEof;
Transitions[ORD(transitionStateStart)][ORD(transitionClassEof)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateStart)][ORD(transitionClassDot)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateStart)][ORD(transitionClassDot)].NextState := transitionStateDot;
Transitions[ORD(transitionStateStart)][ORD(transitionClassMinus)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateStart)][ORD(transitionClassMinus)].NextState := transitionStateMinus;
Transitions[ORD(transitionStateStart)][ORD(transitionClassSingleQuote)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateStart)][ORD(transitionClassSingleQuote)].NextState := transitionStateCharacter;
Transitions[ORD(transitionStateStart)][ORD(transitionClassDoubleQuote)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateStart)][ORD(transitionClassDoubleQuote)].NextState := transitionStateString;
Transitions[ORD(transitionStateStart)][ORD(transitionClassGreater)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateStart)][ORD(transitionClassGreater)].NextState := transitionStateGreater;
Transitions[ORD(transitionStateStart)][ORD(transitionClassLess)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateStart)][ORD(transitionClassLess)].NextState := transitionStateLess;
Transitions[ORD(transitionStateStart)][ORD(transitionClassOther)].Action := NIL;
Transitions[ORD(transitionStateStart)][ORD(transitionClassOther)].NextState := transitionStateEnd;
(* Colon state. *)
SetDefaultTransition(transitionStateColon, TransitionActionFinalize, transitionStateEnd);
Transitions[ORD(transitionStateColon)][ORD(transitionClassEquals)].Action := TransitionActionComposite;
Transitions[ORD(transitionStateColon)][ORD(transitionClassEquals)].NextState := transitionStateEnd;
(* Identifier state. *)
SetDefaultTransition(transitionStateIdentifier, TransitionActionKeyId, transitionStateEnd);
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassDigit)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassDigit)].NextState := transitionStateIdentifier;
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassAlpha)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassAlpha)].NextState := transitionStateIdentifier;
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassUnderscore)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassUnderscore)].NextState := transitionStateIdentifier;
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassHex)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassHex)].NextState := transitionStateIdentifier;
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassZero)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassZero)].NextState := transitionStateIdentifier;
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassX)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateIdentifier)][ORD(transitionClassX)].NextState := transitionStateIdentifier;
(* Decimal state. *)
SetDefaultTransition(transitionStateDecimal, TransitionActionInteger, transitionStateEnd);
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassDigit)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassDigit)].NextState := transitionStateDecimal;
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassAlpha)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassAlpha)].NextState := transitionStateDecimalSuffix;
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassUnderscore)].Action := NIL;
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassUnderscore)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassHex)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassHex)].NextState := transitionStateDecimalSuffix;
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassZero)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassZero)].NextState := transitionStateDecimal;
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassX)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateDecimal)][ORD(transitionClassX)].NextState := transitionStateDecimalSuffix;
(* Greater state. *)
SetDefaultTransition(transitionStateGreater, TransitionActionFinalize, transitionStateEnd);
Transitions[ORD(transitionStateGreater)][ORD(transitionClassEquals)].Action := TransitionActionComposite;
Transitions[ORD(transitionStateGreater)][ORD(transitionClassEquals)].NextState := transitionStateEnd;
(* Minus state. *)
SetDefaultTransition(transitionStateMinus, TransitionActionFinalize, transitionStateEnd);
Transitions[ORD(transitionStateMinus)][ORD(transitionClassGreater)].Action := TransitionActionComposite;
Transitions[ORD(transitionStateMinus)][ORD(transitionClassGreater)].NextState := transitionStateEnd;
(* Left paren state. *)
SetDefaultTransition(transitionStateLeftParen, TransitionActionFinalize, transitionStateEnd);
Transitions[ORD(transitionStateLeftParen)][ORD(transitionClassAsterisk)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateLeftParen)][ORD(transitionClassAsterisk)].NextState := transitionStateComment;
(* Less state. *)
SetDefaultTransition(transitionStateLess, TransitionActionFinalize, transitionStateEnd);
Transitions[ORD(transitionStateLess)][ORD(transitionClassEquals)].Action := TransitionActionComposite;
Transitions[ORD(transitionStateLess)][ORD(transitionClassEquals)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateLess)][ORD(transitionClassGreater)].Action := TransitionActionComposite;
Transitions[ORD(transitionStateLess)][ORD(transitionClassGreater)].NextState := transitionStateEnd;
(* Hexadecimal after 0x. *)
SetDefaultTransition(transitionStateDot, TransitionActionFinalize, transitionStateEnd);
Transitions[ORD(transitionStateDot)][ORD(transitionClassDot)].Action := TransitionActionComposite;
Transitions[ORD(transitionStateDot)][ORD(transitionClassDot)].NextState := transitionStateEnd;
(* Comment. *)
SetDefaultTransition(transitionStateComment, TransitionActionAccumulate, transitionStateComment);
Transitions[ORD(transitionStateComment)][ORD(transitionClassAsterisk)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateComment)][ORD(transitionClassAsterisk)].NextState := transitionStateClosingComment;
Transitions[ORD(transitionStateComment)][ORD(transitionClassEof)].Action := NIL;
Transitions[ORD(transitionStateComment)][ORD(transitionClassEof)].NextState := transitionStateEnd;
(* Closing comment. *)
SetDefaultTransition(transitionStateClosingComment, TransitionActionAccumulate, transitionStateComment);
Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassInvalid)].Action := NIL;
Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassInvalid)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassRightParen)].Action := TransitionActionDelimited;
Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassRightParen)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassAsterisk)].Action := TransitionActionAccumulate;
Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassAsterisk)].NextState := transitionStateClosingComment;
Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassEof)].Action := NIL;
Transitions[ORD(transitionStateClosingComment)][ORD(transitionClassEof)].NextState := transitionStateEnd;
(* Character. *)
SetDefaultTransition(transitionStateCharacter, TransitionActionAccumulate, transitionStateCharacter);
Transitions[ORD(transitionStateCharacter)][ORD(transitionClassInvalid)].Action := NIL;
Transitions[ORD(transitionStateCharacter)][ORD(transitionClassInvalid)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateCharacter)][ORD(transitionClassEof)].Action := NIL;
Transitions[ORD(transitionStateCharacter)][ORD(transitionClassEof)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateCharacter)][ORD(transitionClassSingleQuote)].Action := TransitionActionDelimited;
Transitions[ORD(transitionStateCharacter)][ORD(transitionClassSingleQuote)].NextState := transitionStateEnd;
(* String. *)
SetDefaultTransition(transitionStateString, TransitionActionAccumulate, transitionStateString);
Transitions[ORD(transitionStateString)][ORD(transitionClassInvalid)].Action := NIL;
Transitions[ORD(transitionStateString)][ORD(transitionClassInvalid)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateString)][ORD(transitionClassEof)].Action := NIL;
Transitions[ORD(transitionStateString)][ORD(transitionClassEof)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateString)][ORD(transitionClassDoubleQuote)].Action := TransitionActionDelimited;
Transitions[ORD(transitionStateString)][ORD(transitionClassDoubleQuote)].NextState := transitionStateEnd;
(* Leading zero. *)
SetDefaultTransition(transitionStateLeadingZero, TransitionActionInteger, transitionStateEnd);
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassDigit)].Action := NIL;
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassDigit)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassAlpha)].Action := NIL;
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassAlpha)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassUnderscore)].Action := NIL;
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassUnderscore)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassHex)].Action := NIL;
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassHex)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassZero)].Action := NIL;
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassZero)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassX)].Action := NIL;
Transitions[ORD(transitionStateLeadingZero)][ORD(transitionClassX)].NextState := transitionStateEnd;
(* Digit with a character suffix. *)
SetDefaultTransition(transitionStateDecimalSuffix, TransitionActionInteger, transitionStateEnd);
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassAlpha)].Action := NIL;
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassAlpha)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassDigit)].Action := NIL;
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassDigit)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassHex)].Action := NIL;
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassHex)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassZero)].Action := NIL;
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassZero)].NextState := transitionStateEnd;
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassX)].Action := NIL;
Transitions[ORD(transitionStateDecimalSuffix)][ORD(transitionClassX)].NextState := transitionStateEnd
END InitializeTransitions;
PROCEDURE LexerInitialize(ALexer: PLexer; Input: File);
BEGIN
ALexer^.Input := Input;
ALexer^.Length := 0;
ALLOCATE(ALexer^.Buffer, ChunkSize);
MemZero(ALexer^.Buffer, ChunkSize);
ALexer^.Size := ChunkSize
END LexerInitialize;
PROCEDURE LexerLex(ALexer: PLexer): LexerToken;
VAR
WrittenBytes: CARDINAL;
CurrentClass: TransitionClass;
CurrentState: TransitionState;
CurrentTransition: Transition;
Result: LexerToken;
BEGIN
IF ALexer^.Length = 0 THEN
ALexer^.Length := ReadNBytes(ALexer^.Input, ChunkSize, ALexer^.Buffer);
ALexer^.Current := ALexer^.Buffer
END;
Result.Kind := lexerKindTrait;
ALexer^.Start := ALexer^.Current;
CurrentState := transitionStateStart;
WHILE CurrentState <> transitionStateEnd DO
CurrentClass := Classification[ORD(ALexer^.Current^) + 1];
CurrentTransition := Transitions[ORD(CurrentState)][ORD(CurrentClass)];
IF CurrentTransition.Action <> NIL THEN
CurrentTransition.Action(ALexer, ADR(Result))
END;
CurrentState := CurrentTransition.NextState
END;
RETURN Result
END LexerLex;
PROCEDURE LexerDestroy(ALexer: PLexer);
BEGIN
DEALLOCATE(ALexer^.Buffer, ALexer^.Size)
END LexerDestroy;
BEGIN
InitializeClassification();
InitializeTransitions()
END Lexer.

7
boot/Transpiler.def Normal file
View File

@@ -0,0 +1,7 @@
DEFINITION MODULE Transpiler;
FROM Lexer IMPORT PLexer;
PROCEDURE Transpile(ALexer: PLexer);
END Transpiler.

24
boot/Transpiler.mod Normal file
View File

@@ -0,0 +1,24 @@
IMPLEMENTATION MODULE Transpiler;
FROM FIO IMPORT WriteNBytes, StdOut;
FROM SYSTEM IMPORT ADDRESS;
FROM Terminal IMPORT WriteLn;
FROM Lexer IMPORT Lexer, LexerToken, LexerLex, LexerKind;
PROCEDURE Transpile(ALexer: PLexer);
VAR
Token: LexerToken;
WrittenBytes: CARDINAL;
BEGIN
Token := LexerLex(ALexer);
WHILE Token.Kind <> lexerKindEof DO
WrittenBytes := WriteNBytes(StdOut, ADDRESS(ALexer^.Current - ALexer^.Start), ALexer^.Start);
WriteLn();
Token := LexerLex(ALexer)
END
END Transpile;
END Transpiler.

View File

@@ -1,9 +0,0 @@
{
"dependencies": {
"tanya": "~>0.18.0"
},
"name": "elna",
"targetType": "executable",
"targetPath": "build/bin",
"mainSourceFile": "source/main.d"
}

View File

@@ -1,9 +0,0 @@
/**
* File I/O that can be moved into more generic library when and if finished.
*/
module elna.extended;
struct File
{
@disable this(this);
}

View File

@@ -1,660 +0,0 @@
module elna.generator;
import core.stdc.stdio;
import core.stdc.stdlib;
import core.stdc.string;
import elna.ir;
import tanya.container.array;
import tanya.container.string;
import tanya.memory.mmappool;
import tanya.format;
/// Unsigned program address.
alias Elf64_Addr = void*;
/// Unsigned file offset.
alias Elf64_Off = ulong;
/// Unsigned medium integer.
alias Elf64_Half = ushort;
/// Unsigned integer.
alias Elf64_Word = uint;
/// Signed integer.
alias Elf64_Sword = int;
/// Unsigned long integer.
alias Elf64_Xword = ulong;
/// Signed long integer.
alias Elf64_Sxword = long;
enum size_t EI_INDENT = 16;
/**
* File header.
*/
struct Elf64_Ehdr
{
/// ELF identification.
ubyte[EI_INDENT] e_ident;
/// Object file type.
Elf64_Half e_type;
/// Machine type.
Elf64_Half e_machine;
/// Object file version
Elf64_Word e_version;
/// Entry point address.
Elf64_Addr e_entry;
/// Program header offset.
Elf64_Off e_phoff;
/// Section header offset.
Elf64_Off e_shoff;
/// Processor-specific flags.
Elf64_Word e_flags;
/// ELF header size.
Elf64_Half e_ehsize;
/// Size of program header entry.
Elf64_Half e_phentsize;
/// Number of program header entries.
Elf64_Half e_phnum;
/// Size of section header entry.
Elf64_Half e_shentsize;
/// Number of section header entries.
Elf64_Half e_shnum;
/// Section name string table index.
Elf64_Half e_shstrndx;
}
/**
* Section header.
*/
struct Elf64_Shdr
{
/// Section name.
Elf64_Word sh_name;
/// Section type.
Elf64_Word sh_type;
/// Section attributes.
Elf64_Xword sh_flags;
/// Virtual address in memory.
Elf64_Addr sh_addr;
/// Offset in file.
Elf64_Off sh_offset;
/// Size of section.
Elf64_Xword sh_size;
/// Link to other section.
Elf64_Word sh_link;
/// Miscellaneous information.
Elf64_Word sh_info;
/// Address alignment boundary.
Elf64_Xword sh_addralign;
/// Size of entries, if section has table.
Elf64_Xword sh_entsize;
}
struct Elf64_Sym
{
/// Symbol name.
Elf64_Word st_name;
/// Type and Binding attributes.
ubyte st_info;
/// Reserved.
ubyte st_other;
/// Section table index.
Elf64_Half st_shndx;
/// Symbol value.
Elf64_Addr st_value;
/// Size of object (e.g., common).
Elf64_Xword st_size;
}
/// Section Types, sh_type.
enum : Elf64_Word
{
/// Marks an unused section header.
SHT_NULL = 0,
/// Contains information defined by the program.
SHT_PROGBITS = 1,
/// Contains a linker symbol table.
SHT_SYMTAB = 2,
/// Contains a string table.
SHT_STRTAB = 3,
/// Contains “Rela” type relocation entries.
SHT_RELA = 4,
/// Contains a symbol hash table
SHT_HASH = 5,
/// Contains dynamic linking tables
SHT_DYNAMIC = 6,
/// Contains note information
SHT_NOTE = 7,
/// Contains uninitialized space; does not occupy any space in the file.
SHT_NOBITS = 8,
/// Contains "Rel" type relocation entries.
SHT_REL = 9,
/// Reserved.
SHT_SHLIB = 10,
/// Contains a dynamic loader symbol table.
SHT_DYNSYM = 11,
/// Environment-specific use.
SHT_LOOS = 0x60000000,
SHT_HIOS = 0x6FFFFFFF,
/// Processor-specific use.
SHT_LOPROC = 0x70000000,
SHT_HIPROC = 0x7FFFFFFF,
}
/**
* Section Attributes, sh_flags.
*/
enum : Elf64_Xword
{
/// Section contains writable data.
SHF_WRITE = 0x1,
/// Section is allocated in memory image of program.
SHF_ALLOC = 0x2,
/// Section contains executable instructions.
SHF_EXECINSTR = 0x4,
/// Environment-specific use.
SHF_MASKOS = 0x0F000000,
/// Processor-specific use.
SHF_MASKPROC = 0xF0000000,
}
enum : Elf64_Word
{
/// Not visible outside the object file.
STB_LOCAL = 0,
/// Global symbol, visible to all object files.
STB_GLOBAL = 1,
/// Global scope, but with lower precedence than global symbols.
STB_WEAK = 2,
/// Environment-specific use.
STB_LOOS = 10,
STB_HIOS = 12,
/// Processor-specific use.
STB_LOPROC = 13,
STB_HIPROC = 15,
}
enum : Elf64_Word
{
/// No type specified (e.g., an absolute symbol).
STT_NOTYPE = 0,
/// Data object.
STT_OBJECT = 1,
/// Function entry point.
STT_FUNC = 2,
/// Symbol is associated with a section.
STT_SECTION = 3,
/// Source file associated with the object file.
STT_FILE = 4,
/// Environment-specific use.
STT_LOOS = 10,
STT_HIOS = 12,
/// Processor-specific use.
STT_LOPROC = 13,
STT_HIPROC = 15,
}
Elf64_Ehdr makeFileHeader(Elf64_Off sectionHeaderOffset,
Elf64_Half sectionHeaderCount,
Elf64_Half stringIndex) @nogc
{
Elf64_Ehdr header;
// Magic number.
header.e_ident[0] = '\x7f';
header.e_ident[1] = 'E';
header.e_ident[2] = 'L';
header.e_ident[3] = 'F';
// File class.
header.e_ident[4] = EI_CLASS.ELFCLASS64;
// Data encoding.
header.e_ident[5] = EI_DATA.ELFDATA2LSB;
// Version.
header.e_ident[6] = EV_CURRENT;
// OS/ABI identification.
header.e_ident[7] = EI_OSABI.ELFOSABI_SYSV;
// ABI version.
header.e_ident[8] = 0;
// Size of e_ident[].
header.e_ident[15] = 0;
header.e_type = ET_REL;
header.e_machine = 0x3e; // EM_X86_64: AMD x86-64 architecture
header.e_version = EV_CURRENT;
header.e_entry = null;
header.e_phoff = 0;
header.e_shoff = sectionHeaderOffset;
header.e_flags = 0;
header.e_ehsize = Elf64_Ehdr.sizeof;
header.e_phentsize = 0;
header.e_phnum = 0;
header.e_shentsize = Elf64_Shdr.sizeof;
header.e_shnum = sectionHeaderCount;
header.e_shstrndx = stringIndex;
return header;
}
enum char[33] sectionStringTable = "\0.symtab\0.strtab\0.shstrtab\0.text\0";
Elf64_Shdr makeTextHeader(Elf64_Off offset, Elf64_Xword size) @nogc
{
Elf64_Shdr table;
table.sh_name = 0x1b;
table.sh_type = SHT_PROGBITS;
table.sh_flags = SHF_EXECINSTR | SHF_ALLOC;
table.sh_addr = null;
table.sh_offset = offset;
table.sh_size = size;
table.sh_link = SHN_UNDEF;
table.sh_info = 0;
table.sh_addralign = 1;
table.sh_entsize = 0;
return table;
}
Elf64_Shdr makeDataHeader(Elf64_Off offset, Elf64_Xword size) @nogc
{
Elf64_Shdr table;
table.sh_name = 0x21;
table.sh_type = SHT_PROGBITS;
table.sh_flags = SHF_WRITE | SHF_ALLOC;
table.sh_addr = null;
table.sh_offset = offset;
table.sh_size = size;
table.sh_link = SHN_UNDEF;
table.sh_info = 0;
table.sh_addralign = 1;
table.sh_entsize = 0;
return table;
}
Elf64_Shdr makeSymtableHeader(Elf64_Off offset, Elf64_Xword size, Elf64_Word entriesCount) @nogc
{
Elf64_Shdr table;
table.sh_name = 0x01;
table.sh_type = SHT_SYMTAB;
table.sh_flags = 0;
table.sh_addr = null;
table.sh_offset = offset;
table.sh_size = size;
table.sh_link = 0x03; // String table used by entries in this section.
table.sh_info = entriesCount;
table.sh_addralign = 8;
table.sh_entsize = Elf64_Sym.sizeof;
return table;
}
Elf64_Shdr makeStringHeader(Elf64_Word stringIndex, Elf64_Off offset, Elf64_Xword size) @nogc
{
Elf64_Shdr table;
table.sh_name = stringIndex;
table.sh_type = SHT_STRTAB;
table.sh_flags = 0;
table.sh_addr = null;
table.sh_offset = offset;
table.sh_size = size;
table.sh_link = SHN_UNDEF;
table.sh_info = 0;
table.sh_addralign = 1;
table.sh_entsize = 0;
return table;
}
Elf64_Shdr makeInitialHeader() @nogc
{
Elf64_Shdr table;
table.sh_name = 0;
table.sh_type = SHT_NULL;
table.sh_flags = 0;
table.sh_addr = null;
table.sh_offset = 0;
table.sh_size = 0;
table.sh_link = SHN_UNDEF;
table.sh_info = 0;
table.sh_addralign = 0;
table.sh_entsize = 0;
return table;
}
Elf64_Sym makeInitialSymTable() @nogc
{
Elf64_Sym table;
table.st_name = 0;
table.st_info = 0;
table.st_other = 0;
table.st_shndx = 0;
table.st_value = null;
table.st_size = 0;
return table;
}
Elf64_Sym makeMainSymTable(Elf64_Half textIndex) @nogc
{
Elf64_Sym table;
table.st_name = 0x01;
table.st_info = ELF32_ST_INFO(STB_GLOBAL, STT_FUNC);
table.st_other = 0;
table.st_shndx = textIndex;
table.st_value = null;
table.st_size = 0;
return table;
}
ubyte ELF32_ST_BIND(ubyte i) @nogc nothrow pure @safe
{
return i >> 4;
}
ubyte ELF32_ST_TYPE(ubyte i) @nogc nothrow pure @safe
{
return i & 0xf;
}
ubyte ELF32_ST_INFO(ubyte b, ubyte t) @nogc nothrow pure @safe
{
return cast(ubyte) ((b << 4) + (t & 0xf));
}
/// Special Section Indices.
enum : Elf64_Half
{
/// Used to mark an undefined or meaningless section reference.
SHN_UNDEF = 0,
/// Processor-specific use.
SHN_LOPROC = 0xFF00,
SHN_HIPROC = 0xFF1F,
/// Environment-specific use.
SHN_LOOS = 0xFF20,
SHN_HIOS = 0xFF3F,
/// Indicates that the corresponding reference is an absolute value.
SHN_ABS = 0xFFF1,
/**
* Indicates a symbol that has been declared as a common block (Fortran
* COMMON or C tentative declaration).
*/
SHN_COMMON = 0xFFF2,
}
/**
* Object File Classes, e_ident[EI_CLASS].
*/
enum EI_CLASS : ubyte
{
/// 32-bit objects.
ELFCLASS32 = 1,
/// 64-bit objects.
ELFCLASS64 = 2,
}
enum ubyte EV_CURRENT = 1;
/**
* Data Encodings, e_ident[EI_DATA].
*/
enum EI_DATA : ubyte
{
/// Object file data structures are little-endian.
ELFDATA2LSB = 1,
/// Object file data structures are big-endian.
ELFDATA2MSB = 2,
}
/**
* Operating System and ABI Identifiers, e_ident[EI_OSABI].
*/
enum EI_OSABI : ubyte
{
/// System V ABI.
ELFOSABI_SYSV = 0,
/// HP-UX operating system.
ELFOSABI_HPUX = 1,
/// Standalone (embedded) application.
ELFOSABI_STANDALONE = 255,
}
enum : Elf64_Half
{
ET_NONE = 0, /// No file type.
ET_REL = 1, /// Relocatable object file.
ET_EXEC = 2, /// Executable file.
ET_DYN = 3, /// Shared object file.
ET_CORE = 4, /// Core file.
ET_LOOS = 0xFE00, /// Environment-specific use.
ET_HIOS = 0xFEFF,
ET_LOPROC = 0xFF00, /// Processor-specific use.
ET_HIPROC = 0xFFFF,
}
private size_t pad(size_t value) @nogc
{
return (value / 8 + 1) * 8;
}
struct Symbol
{
String name;
Array!ubyte instructions;
}
/*
.text
.globl main
.type main, @function
main:
movl $3, %eax
ret
*/
immutable ubyte[] instructions = [
// Opcode of pushq is “0x50 + r”, where “r” is the register opcode.
// Register opcode of %rbq is 5.
0x50 + 5, // push% %rbp
0x48, 0x89, 0xe5, // movq %rsp, %rbp
0xb8, 0x03, 0x00, 0x00, 0x00, // movl $3, %eax
// Epilogue.
0x48, 0x89, 0xec, // movq %rbp, %rsp
0x58 + 5, // popq %rbp
0xc3, // ret
];
void writeObject(Definition ast, String outputFilename) @nogc
{
auto handle = fopen(outputFilename.toStringz, "wb");
if (handle is null)
{
perror("writing sample");
return;
}
scope (exit)
{
fclose(handle);
}
size_t currentOffset = Elf64_Ehdr.sizeof;
Array!Elf64_Shdr sectionHeaders;
Array!Elf64_Sym symbolEntries;
// Prologue
Array!ubyte asmTemplate = Array!ubyte(cast(ubyte[]) [
// Opcode of pushq is “0x50 + r”, where “r” is the register opcode.
// Register opcode of %rbq is 5.
0x50 + 5, // pushq %rbp
0x48, 0x89, 0xe5, // movq %rsp, %rbp
]);
int i = 1;
foreach (statement; ast.statements[])
{
if ((cast(Number) statement.subroutine.lhs) !is null)
{
// Opcode of mov is “0xb8 + r”, where “r” is the register opcode.
// Register opcode of %eax is 0.
asmTemplate.insertBack(cast(ubyte) 0xb8); // movl $x, %eax; where $x is a number.
asmTemplate.insertBack((cast(ubyte*) &(cast(Number) statement.subroutine.lhs).value)[0 .. int.sizeof]);
}
else if ((cast(Variable) statement.subroutine.lhs) !is null)
{
// movl -x(%rbp), %ebx; where x is a number.
asmTemplate.insertBack(cast(ubyte[]) [0x8b, 0x45]);
const disposition = (cast(Variable) statement.subroutine.lhs).counter * (-4);
asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]);
}
if ((cast(Number) statement.subroutine.rhs) !is null)
{
// Opcode of mov is “0xb8 + r”, where “r” is the register opcode.
// Register opcode of %ebx is 3.
asmTemplate.insertBack(cast(ubyte) 0xbb); // movl $x, %ebx; where $x is a number.
asmTemplate.insertBack((cast(ubyte*) &(cast(Number) statement.subroutine.rhs).value)[0 .. int.sizeof]);
}
else if ((cast(Variable) statement.subroutine.rhs) !is null)
{
// movl -x(%rbp), %ebx; where x is a number.
asmTemplate.insertBack(cast(ubyte[]) [0x8b, 0x5d]);
const disposition = (cast(Variable) statement.subroutine.rhs).counter * (-4);
asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]);
}
// Calculate the result and assign it to a variable on the stack.
asmTemplate.insertBack(cast(ubyte[]) [0x01, 0xd8]); // add %ebx, %eax
asmTemplate.insertBack(cast(ubyte[]) [0x89, 0x45]); // movl %eax, -x(%rbp); where x is a number.
const disposition = i * (-4);
asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]);
++i;
}
// Epilogue.
asmTemplate.insertBack(cast(ubyte[]) [
0x48, 0x89, 0xec, // movq %rbp, %rsp
0x58 + 5, // popq %rbp
0xc3, // ret
]);
Symbol[1] symbols = [Symbol(String("main"), asmTemplate)];
sectionHeaders.insertBack(makeInitialHeader());
symbolEntries.insertBack(makeInitialSymTable());
String stringTable = String("\0");
foreach (symbol; symbols[])
{
stringTable.insertBack(symbol.name[]);
stringTable.insertBack('\0');
sectionHeaders.insertBack(makeTextHeader(currentOffset, symbol.instructions.length));
currentOffset = pad(currentOffset + symbol.instructions.length);
symbolEntries.insertBack(makeMainSymTable(cast(Elf64_Half) (sectionHeaders.length - 1)));
}
const symbolTableSize = (symbols.length + 1) * Elf64_Sym.sizeof;
sectionHeaders.insertBack(makeSymtableHeader(currentOffset, symbolTableSize, symbols.length));
currentOffset += symbolTableSize;
sectionHeaders.insertBack(makeStringHeader(0x09, currentOffset, stringTable.length));
currentOffset += stringTable.length;
sectionHeaders.insertBack(makeStringHeader(0x11, currentOffset, sectionStringTable.length));
currentOffset = pad(currentOffset + sectionStringTable.length);
auto fileHeader = makeFileHeader(currentOffset, 5, 4);
version (none)
{
printf("%.2x\n", cast(uint) currentOffset);
}
ubyte[8] padding = 0;
size_t codeLength = stringTable.length + sectionStringTable.length;
fwrite(&fileHeader, 8, Elf64_Ehdr.sizeof / 8, handle);
foreach (symbol; symbols[])
{
immutable size_t instructionsLength = pad(symbol.instructions.length);
fwrite(symbol.instructions.get.ptr, 1, symbol.instructions.length, handle);
fwrite(padding.ptr, 1, instructionsLength - symbol.instructions.length, handle);
codeLength += instructionsLength;
}
fwrite(symbolEntries.get.ptr, Elf64_Sym.sizeof, symbolEntries.length, handle);
fwrite(stringTable.get.ptr, 1, stringTable.length, handle);
fwrite(sectionStringTable.ptr, 1, sectionStringTable.length, handle);
fwrite(padding.ptr, pad(codeLength) - codeLength, 1, handle);
fwrite(sectionHeaders.get.ptr, Elf64_Shdr.sizeof, sectionHeaders.length, handle);
}
String generate(Definition ast) @nogc
{
// Prologue
String asmTemplate = ".text
.globl main
.type main, @function
main:
pushq %rbp
movq %rsp, %rbp
";
/* Allocate space on the stack for local variables.
asmTemplate.insertBack(" sub $");
asmTemplate.insertBack(format!"{}"(ast.statements.length)[]);
asmTemplate.insertBack(", $esp\n"); */
int i = 1;
foreach (statement; ast.statements[])
{
if ((cast(Number) statement.subroutine.lhs) !is null)
{
asmTemplate.insertBack(" movl $");
asmTemplate.insertBack(format!"{}"((cast(Number) statement.subroutine.lhs).value)[]);
asmTemplate.insertBack(", %eax\n");
}
else if ((cast(Variable) statement.subroutine.lhs) !is null)
{
asmTemplate.insertBack(" movl -");
asmTemplate.insertBack(format!"{}"((cast(Variable) statement.subroutine.lhs).counter * 4)[]);
asmTemplate.insertBack("(%rbp), %eax\n");
}
if ((cast(Number) statement.subroutine.rhs) !is null)
{
asmTemplate.insertBack(" movl $");
asmTemplate.insertBack(format!"{}"((cast(Number) statement.subroutine.rhs).value)[]);
asmTemplate.insertBack(", %ebx\n");
}
else if ((cast(Variable) statement.subroutine.rhs) !is null)
{
asmTemplate.insertBack(" movl -");
asmTemplate.insertBack(format!"{}"((cast(Variable) statement.subroutine.rhs).counter * 4)[]);
asmTemplate.insertBack("(%rbp), %ebx\n");
}
// Calculate the result and assign it to a variable on the stack.
asmTemplate.insertBack(" add %ebx, %eax\n");
asmTemplate.insertBack(" movl %eax, -");
asmTemplate.insertBack(format!"{}"(i * 4)[]);
asmTemplate.insertBack("(%rbp)\n");
++i;
}
// Epilogue.
asmTemplate.insertBack(" movq %rbp, %rsp
popq %rbp
ret
");
return asmTemplate;
}

View File

@@ -1,144 +0,0 @@
module elna.ir;
import parser = elna.parser;
import tanya.container.array;
import tanya.container.hashtable;
import tanya.container.string;
import tanya.memory.allocator;
import tanya.memory.mmappool;
/**
* Definition.
*/
class Definition
{
char[] identifier;
Array!Statement statements;
Array!VariableDeclaration variableDeclarations;
}
class Statement
{
Subroutine subroutine;
}
abstract class Expression
{
}
class Number : Expression
{
int value;
}
class Variable : Expression
{
size_t counter;
}
class VariableDeclaration
{
String identifier;
}
class Subroutine
{
Expression lhs, rhs;
}
private Number transformNumber(parser.Number number) @nogc
{
return MmapPool.instance.make!Number(number.value);
}
private Variable transformSubroutine(parser.Subroutine subroutine,
ref Array!Statement statements,
ref HashTable!(String, int) constants) @nogc
{
auto target = MmapPool.instance.make!Subroutine;
target.lhs = transformExpression(subroutine.lhs, statements, constants);
target.rhs = transformExpression(subroutine.rhs, statements, constants);
auto newStatement = MmapPool.instance.make!Statement;
newStatement.subroutine = target;
statements.insertBack(newStatement);
auto newVariable = MmapPool.instance.make!Variable;
newVariable.counter = statements.length;
return newVariable;
}
private Expression transformExpression(parser.Expression expression,
ref Array!Statement statements,
ref HashTable!(String, int) constants) @nogc
{
if ((cast(parser.Number) expression) !is null)
{
auto numberExpression = MmapPool.instance.make!Number;
numberExpression.value = (cast(parser.Number) expression).value;
return numberExpression;
}
if ((cast(parser.Variable) expression) !is null)
{
auto numberExpression = MmapPool.instance.make!Number;
numberExpression.value = constants[(cast(parser.Variable) expression).identifier];
return numberExpression;
}
else if ((cast(parser.Subroutine) expression) !is null)
{
return transformSubroutine(cast(parser.Subroutine) expression, statements, constants);
}
return null;
}
Expression transformStatement(parser.Statement statement,
ref Array!Statement statements,
ref HashTable!(String, int) constants) @nogc
{
if ((cast(parser.BangStatement) statement) !is null)
{
return transformExpression((cast(parser.BangStatement) statement).expression, statements, constants);
}
return null;
}
HashTable!(String, int) transformConstants(ref Array!(parser.Definition) definitions) @nogc
{
typeof(return) constants;
foreach (definition; definitions[])
{
constants[definition.identifier] = definition.number.value;
}
return constants;
}
Array!VariableDeclaration transformVariableDeclarations(ref Array!(parser.VariableDeclaration) variableDeclarations)
@nogc
{
typeof(return) variables;
foreach (ref variableDeclaration; variableDeclarations)
{
auto newDeclaration = MmapPool.instance.make!VariableDeclaration;
newDeclaration.identifier = variableDeclaration.identifier;
variables.insertBack(newDeclaration);
}
return variables;
}
Definition transform(parser.Block block) @nogc
{
auto target = MmapPool.instance.make!Definition;
auto constants = transformConstants(block.definitions);
transformStatement(block.statement, target.statements, constants);
target.variableDeclarations = transformVariableDeclarations(block.variableDeclarations);
return target;
}

View File

@@ -1,252 +0,0 @@
module elna.lexer;
import core.stdc.stdlib;
import core.stdc.ctype;
import core.stdc.string;
import elna.result;
import std.range;
import tanya.container.array;
import tanya.container.string;
import tanya.memory.mmappool;
struct Token
{
enum Type
{
number,
subroutine, // Operator.
let,
identifier,
equals,
var,
semicolon,
leftParen,
rightParen,
bang,
dot,
comma,
}
union Value
{
int number;
String identifier;
}
private Type type;
private Value value_;
private Position position_;
@disable this();
this(Type type, Position position) @nogc nothrow pure @safe
{
this.type = type;
this.position_ = position;
}
this(Type type, int value, Position position) @nogc nothrow pure @trusted
in (type == Type.number)
{
this(type, position);
this.value_.number = value;
}
this()(Type type, auto ref String value, Position position)
@nogc nothrow pure @trusted
in (type == Type.identifier)
{
this(type, position);
this.value_.identifier = value;
}
/**
* Params:
* type = Expected type.
*
* Returns: Whether this token is of the expected type.
*/
bool ofType(Type type) const @nogc nothrow pure @safe
{
return this.type == type;
}
@property auto value(Type type)() @nogc nothrow pure @trusted
in (ofType(type))
{
static if (type == Type.number)
{
return this.value_.number;
}
else static if (type == Type.identifier)
{
return this.value_.identifier;
}
else
{
static assert(false, "This type doesn't have a value");
}
}
/**
* Returns: The token position in the source text.
*/
@property const(Position) position() const @nogc nothrow pure @safe
{
return this.position_;
}
}
/**
* Range over the source text that keeps track of the current position.
*/
struct Source
{
char[] buffer;
Position position;
this(char[] buffer) @nogc nothrow pure @safe
{
this.buffer = buffer;
}
@disable this();
bool empty() @nogc nothrow pure @safe
{
return this.length == 0;
}
char front() @nogc nothrow pure @safe
in (!empty)
{
return this.buffer[0];
}
void popFront() @nogc nothrow pure @safe
in (!empty)
{
this.buffer = buffer[1 .. $];
++this.position.column;
}
void breakLine() @nogc nothrow pure @safe
in (!empty)
{
this.buffer = buffer[1 .. $];
++this.position.line;
this.position.column = 1;
}
@property size_t length() const @nogc nothrow pure @safe
{
return this.buffer.length;
}
char opIndex(size_t index) @nogc nothrow pure @safe
in (index < length)
{
return this.buffer[index];
}
char[] opSlice(size_t i, size_t j) @nogc nothrow pure @safe
in
{
assert(i <= j);
assert(j <= length);
}
do
{
return this.buffer[i .. j];
}
}
Array!Token lex(char[] buffer) @nogc
{
Array!Token tokens;
auto source = Source(buffer);
while (!source.empty)
{
if (source.front == ' ')
{
source.popFront;
}
else if (source.front >= '0' && source.front <= '9') // Multi-digit.
{
tokens.insertBack(Token(Token.Type.number, source.front - '0', source.position));
source.popFront;
}
else if (source.front == '=')
{
tokens.insertBack(Token(Token.Type.equals, source.position));
source.popFront;
}
else if (source.front == '(')
{
tokens.insertBack(Token(Token.Type.leftParen, source.position));
source.popFront;
}
else if (source.front == ')')
{
tokens.insertBack(Token(Token.Type.rightParen, source.position));
source.popFront;
}
else if (source.front == ';')
{
tokens.insertBack(Token(Token.Type.semicolon, source.position));
source.popFront;
}
else if (source.front == ',')
{
tokens.insertBack(Token(Token.Type.comma, source.position));
source.popFront;
}
else if (source.front == '!')
{
tokens.insertBack(Token(Token.Type.bang, source.position));
source.popFront;
}
else if (source.front == '.')
{
tokens.insertBack(Token(Token.Type.dot, source.position));
source.popFront;
}
else if (isalpha(source.front))
{
size_t i = 1;
while (i < source.length && isalpha(source[i]))
{
++i;
}
if (source[0 .. i] == "const")
{
tokens.insertBack(Token(Token.Type.let, source.position));
}
else if (source[0 .. i] == "var")
{
tokens.insertBack(Token(Token.Type.var, source.position));
}
else
{
auto identifier = String(source[0 .. i]);
tokens.insertBack(Token(Token.Type.identifier, identifier, source.position));
}
source.popFrontN(i);
}
else if (source.front == '+') // Multi-character, random special characters.
{
tokens.insertBack(Token(Token.Type.subroutine, source.position));
source.popFront;
}
else if (source.front == '\n')
{
source.breakLine;
}
else
{
return typeof(tokens)(); // Error.
}
}
return tokens;
}

View File

@@ -1,269 +0,0 @@
module elna.parser;
import elna.lexer;
import elna.result;
import tanya.container.array;
import tanya.container.string;
import tanya.memory.allocator;
import tanya.memory.mmappool;
/**
* Constant definition.
*/
class Definition
{
Number number;
String identifier;
}
/**
* Variable declaration.
*/
class VariableDeclaration
{
String identifier;
}
abstract class Statement
{
}
class BangStatement : Statement
{
Expression expression;
}
class Block
{
Array!Definition definitions;
Array!VariableDeclaration variableDeclarations;
Statement statement;
}
abstract class Expression
{
}
class Number : Expression
{
int value;
}
class Variable : Expression
{
String identifier;
}
class Subroutine : Expression
{
Expression lhs, rhs;
}
private Result!Expression parseExpression(ref Array!(Token).Range tokens) @nogc
in (!tokens.empty, "Expected expression, got end of stream")
{
if (tokens.front.ofType(Token.Type.number))
{
auto number = MmapPool.instance.make!Number;
number.value = tokens.front.value!(Token.Type.number);
tokens.popFront;
return Result!Expression(number);
}
else if (tokens.front.ofType(Token.Type.identifier))
{
auto variable = MmapPool.instance.make!Variable;
variable.identifier = tokens.front.value!(Token.Type.identifier);
tokens.popFront;
return Result!Expression(variable);
}
else if (tokens.front.ofType(Token.Type.subroutine))
{
auto subroutine = MmapPool.instance.make!Subroutine;
tokens.popFront;
auto expression = parseExpression(tokens);
if (expression.valid)
{
subroutine.lhs = expression.result;
}
else
{
return Result!Expression("Expected left-hand side to be an expression", tokens.front.position);
}
expression = parseExpression(tokens);
if (expression.valid)
{
subroutine.rhs = expression.result;
}
else
{
return Result!Expression("Expected left-hand side to be an expression", tokens.front.position);
}
return Result!Expression(subroutine);
}
else if (tokens.front.ofType(Token.Type.leftParen))
{
tokens.popFront;
auto expression = parseExpression(tokens);
tokens.popFront;
return expression;
}
return Result!Expression("Expected an expression", tokens.front.position);
}
private Result!Definition parseDefinition(ref Array!Token.Range tokens) @nogc
in (!tokens.empty, "Expected definition, got end of stream")
{
auto definition = MmapPool.instance.make!Definition;
definition.identifier = tokens.front.value!(Token.Type.identifier); // Copy.
tokens.popFront();
tokens.popFront(); // Skip the equals sign.
if (tokens.front.ofType(Token.Type.number))
{
auto number = MmapPool.instance.make!Number;
number.value = tokens.front.value!(Token.Type.number);
definition.number = number;
tokens.popFront;
return Result!Definition(definition);
}
return Result!Definition("Expected a number", tokens.front.position);
}
private Result!Statement parseStatement(ref Array!Token.Range tokens) @nogc
in (!tokens.empty, "Expected block, got end of stream")
{
if (tokens.front.ofType(Token.Type.bang))
{
tokens.popFront;
auto statement = MmapPool.instance.make!BangStatement;
auto expression = parseExpression(tokens);
if (expression.valid)
{
statement.expression = expression.result;
}
else
{
return Result!Statement(expression.error.get);
}
return Result!Statement(statement);
}
return Result!Statement("Expected ! statement", tokens.front.position);
}
private Result!(Array!Definition) parseDefinitions(ref Array!Token.Range tokens) @nogc
in (!tokens.empty, "Expected definition, got end of stream")
{
tokens.popFront; // Skip const.
Array!Definition definitions;
while (!tokens.empty)
{
auto definition = parseDefinition(tokens);
if (!definition.valid)
{
return typeof(return)(definition.error.get);
}
definitions.insertBack(definition.result);
if (tokens.front.ofType(Token.Type.semicolon))
{
break;
}
if (tokens.front.ofType(Token.Type.comma))
{
tokens.popFront;
}
}
return typeof(return)(definitions);
}
private Result!(Array!VariableDeclaration) parseVariableDeclarations(ref Array!Token.Range tokens) @nogc
in (!tokens.empty, "Expected variable declarations, got end of stream")
{
tokens.popFront; // Skip var.
Array!VariableDeclaration variableDeclarations;
while (!tokens.empty)
{
auto currentToken = tokens.front;
if (currentToken.ofType(Token.Type.identifier))
{
auto variableDeclaration = MmapPool.instance.make!VariableDeclaration;
variableDeclaration.identifier = currentToken.value!(Token.Type.identifier);
variableDeclarations.insertBack(variableDeclaration);
tokens.popFront;
}
else
{
return typeof(return)("Expected variable name", tokens.front.position);
}
if (tokens.empty)
{
return typeof(return)("Expected \";\" or \",\" name", currentToken.position);
}
if (tokens.front.ofType(Token.Type.semicolon))
{
break;
}
if (tokens.front.ofType(Token.Type.comma))
{
tokens.popFront;
}
}
return typeof(return)(variableDeclarations);
}
private Result!Block parseBlock(ref Array!Token.Range tokens) @nogc
in (!tokens.empty, "Expected block, got end of stream")
{
auto block = MmapPool.instance.make!Block;
if (tokens.front.ofType(Token.Type.let))
{
auto constDefinitions = parseDefinitions(tokens);
if (constDefinitions.valid)
{
block.definitions = constDefinitions.result;
}
else
{
return Result!Block(constDefinitions.error.get);
}
tokens.popFront;
}
if (tokens.front.ofType(Token.Type.var))
{
auto variableDeclarations = parseVariableDeclarations(tokens);
if (variableDeclarations.valid)
{
block.variableDeclarations = variableDeclarations.result;
}
else
{
return Result!Block(variableDeclarations.error.get);
}
tokens.popFront;
}
auto statement = parseStatement(tokens);
if (statement.valid)
{
block.statement = statement.result;
}
else
{
return Result!Block(statement.error.get);
}
return Result!Block(block);
}
Result!Block parse(ref Array!Token tokenStream) @nogc
{
auto tokens = tokenStream[];
return parseBlock(tokens);
}

View File

@@ -1,84 +0,0 @@
module elna.result;
import std.typecons;
/**
* Position in the source text.
*/
struct Position
{
/// Line.
size_t line = 1;
/// Column.
size_t column = 1;
}
struct CompileError
{
private string message_;
private Position position_;
@disable this();
/**
* Params:
* message = Error text.
* position = Error position in the source text.
*/
this(string message, Position position) @nogc nothrow pure @safe
{
this.message_ = message;
this.position_ = position;
}
/// Error text.
@property string message() const @nogc nothrow pure @safe
{
return this.message_;
}
/// Error line in the source text.
@property size_t line() const @nogc nothrow pure @safe
{
return this.position_.line;
}
/// Error column in the source text.
@property size_t column() const @nogc nothrow pure @safe
{
return this.position_.column;
}
}
struct Result(T)
{
Nullable!CompileError error;
T result;
this(T result)
{
this.result = result;
this.error = typeof(this.error).init;
}
this(string message, Position position)
{
this.result = T.init;
this.error = CompileError(message, position);
}
this(CompileError compileError)
{
this.result = null;
this.error = compileError;
}
@disable this();
@property bool valid() const
{
return error.isNull;
}
}

View File

@@ -1,72 +0,0 @@
import core.stdc.stdio;
import core.stdc.string;
import core.stdc.stdlib;
import elna.lexer;
import elna.parser;
import elna.generator;
import elna.ir;
import tanya.container.string;
import tanya.memory.allocator;
import tanya.memory.mmappool;
private char[] readSource(size_t N)(string source, out char[N] buffer) @nogc
{
memcpy(buffer.ptr, source.ptr, source.length + 1);
buffer[source.length] = '\0';
auto handle = fopen(buffer.ptr, "r");
if (handle is null)
{
perror(buffer.ptr);
return null;
}
fseek(handle, 0, SEEK_END);
size_t fsize = ftell(handle);
rewind(handle);
fread(buffer.ptr, fsize, 1, handle);
fclose(handle);
buffer[fsize] = '\0';
return buffer[0 .. fsize];
}
int main(string[] args)
{
char[255] buffer;
defaultAllocator = MmapPool.instance;
if (args.length < 2)
{
return 4;
}
auto sourceText = readSource(args[1], buffer);
if (sourceText is null)
{
return 3;
}
auto tokens = lex(sourceText);
if (tokens.length == 0)
{
printf("Lexical analysis failed.\n");
return 1;
}
auto ast = parse(tokens);
if (!ast.valid)
{
auto compileError = ast.error.get;
printf("%lu:%lu: %s\n", compileError.line, compileError.column, compileError.message.ptr);
return 2;
}
auto ir = transform(ast.result);
String outputFilename = String("build/");
outputFilename.insertBack(args[1][0 .. $ - 4]);
outputFilename.insertBack("o");
writeObject(ir, outputFilename);
auto code = generate(ir);
printf("%s", code.toStringz());
return 0;
}

View File

@@ -1,3 +0,0 @@
const a = 1, b = 2;
! + a b
.

View File

@@ -1 +0,0 @@
3

View File

@@ -1 +0,0 @@
8

View File

@@ -1 +0,0 @@
8

View File

@@ -1,2 +0,0 @@
! + 1 7
.

View File

@@ -1,2 +0,0 @@
! + 1 (+ 3 4)
.