From 5490f6ce1c05d4177e4b021212e0865300d063a8 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Sun, 5 Jun 2022 15:16:04 +0200 Subject: [PATCH] Initial commit --- .gitignore | 3 + README | 37 ++ Rakefile | 69 ++++ dub.json | 9 + source/elna/extended.d | 9 + source/elna/generator.d | 660 ++++++++++++++++++++++++++++++ source/elna/ir.d | 144 +++++++ source/elna/lexer.d | 252 ++++++++++++ source/elna/parser.d | 269 ++++++++++++ source/elna/result.d | 84 ++++ source/main.d | 72 ++++ tests/const_list.elna | 3 + tests/expectations/const_list.txt | 1 + tests/expectations/sum.txt | 1 + tests/expectations/sums.txt | 1 + tests/sum.elna | 2 + tests/sums.elna | 2 + 17 files changed, 1618 insertions(+) create mode 100644 .gitignore create mode 100644 README create mode 100644 Rakefile create mode 100644 dub.json create mode 100644 source/elna/extended.d create mode 100644 source/elna/generator.d create mode 100644 source/elna/ir.d create mode 100644 source/elna/lexer.d create mode 100644 source/elna/parser.d create mode 100644 source/elna/result.d create mode 100644 source/main.d create mode 100644 tests/const_list.elna create mode 100644 tests/expectations/const_list.txt create mode 100644 tests/expectations/sum.txt create mode 100644 tests/expectations/sums.txt create mode 100644 tests/sum.elna create mode 100644 tests/sums.elna diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d0d201a --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/.dub/ +/dub.selections.json +/build/ diff --git a/README b/README new file mode 100644 index 0000000..b6e03eb --- /dev/null +++ b/README @@ -0,0 +1,37 @@ +# Elna programming language + +Elna compiles simple mathematical operations to machine code. +The compiled program returns the result of the operation. + +## File extension + +.elna + +## Grammar PL/0 + +program = block "." ; + +block = [ "const" ident "=" number {"," ident "=" number} ";"] + [ "var" ident {"," ident} ";"] + { "procedure" ident ";" block ";" } statement ; + +statement = [ ident ":=" expression | "call" ident + | "?" ident | "!" expression + | "begin" statement {";" statement } "end" + | "if" condition "then" statement + | "while" condition "do" statement ]; + +condition = "odd" expression | + expression ("="|"#"|"<"|"<="|">"|">=") expression ; + +expression = [ "+"|"-"] term { ("+"|"-") term}; + +term = factor {("*"|"/") factor}; + +factor = ident | number | "(" expression ")"; + +## Operations + +"!" - Write a line. +"?" - Read user input. +"odd" - The only function, returns whether a number is odd. diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..acafe3b --- /dev/null +++ b/Rakefile @@ -0,0 +1,69 @@ +require 'pathname' +require 'rake/clean' +require 'open3' + +DFLAGS = ['--warn-no-deprecated', '-L/usr/lib64/gcc-12'] +BINARY = 'build/bin/elna' +TESTS = FileList['tests/*.elna'] + .map { |test| (Pathname.new('build') + test).sub_ext('').to_path } +SOURCES = FileList['source/**/*.d'] + +directory 'build' + +CLEAN.include 'build' +CLEAN.include '.dub' + +rule(/build\/tests\/.+/ => ->(file) { test_for_out(file) }) do |t| + Pathname.new(t.name).dirname.mkpath + sh BINARY, t.source + sh 'gcc', '-o', t.name, "#{t.name}.o" + # Open3.pipeline [BINARY, t.source], ['gcc', '-x', 'assembler', '-o', t.name, '-'] +end + +file BINARY => SOURCES do |t| + sh({ 'DFLAGS' => (DFLAGS * ' ') }, 'dub', 'build', '--compiler=gdc-12') +end + +file 'build/tests/sample' => BINARY do |t| + sh t.source + sh 'gcc', '-o', t.name, 'build/tests/sample.o' +end + +task default: BINARY + +desc 'Run all tests and check the results' +task test: TESTS +task test: BINARY do + TESTS.each do |test| + expected = Pathname + .new(test) + .sub_ext('.txt') + .sub(/^build\/tests\//, 'tests/expectations/') + .read + .to_i + + puts "Running #{test}" + system test + actual = $?.exitstatus + + fail "#{test}: Expected #{expected}, got #{actual}" unless expected == actual + end + + # system './build/tests/sample' + # actual = $?.exitstatus + # fail "./build/tests/sample: Expected 3, got #{actual}" unless 3 == actual +end + +desc 'Run unittest blocks' +task unittest: SOURCES do |t| + sh('dub', 'test', '--compiler=gdc-12') +end + +def test_for_out(out_file) + test_source = Pathname + .new(out_file) + .sub_ext('.elna') + .sub(/^build\//, '') + .to_path + [test_source, BINARY] +end diff --git a/dub.json b/dub.json new file mode 100644 index 0000000..8567d3a --- /dev/null +++ b/dub.json @@ -0,0 +1,9 @@ +{ + "dependencies": { + "tanya": "~>0.18.0" + }, + "name": "elna", + "targetType": "executable", + "targetPath": "build/bin", + "mainSourceFile": "source/main.d" +} diff --git a/source/elna/extended.d b/source/elna/extended.d new file mode 100644 index 0000000..0473cee --- /dev/null +++ b/source/elna/extended.d @@ -0,0 +1,9 @@ +/** + * File I/O that can be moved into more generic library when and if finished. + */ +module elna.extended; + +struct File +{ + @disable this(this); +} diff --git a/source/elna/generator.d b/source/elna/generator.d new file mode 100644 index 0000000..ce2b3d1 --- /dev/null +++ b/source/elna/generator.d @@ -0,0 +1,660 @@ +module elna.generator; + +import core.stdc.stdio; +import core.stdc.stdlib; +import core.stdc.string; +import elna.ir; +import tanya.container.array; +import tanya.container.string; +import tanya.memory.mmappool; +import tanya.format; + +/// Unsigned program address. +alias Elf64_Addr = void*; +/// Unsigned file offset. +alias Elf64_Off = ulong; +/// Unsigned medium integer. +alias Elf64_Half = ushort; +/// Unsigned integer. +alias Elf64_Word = uint; +/// Signed integer. +alias Elf64_Sword = int; +/// Unsigned long integer. +alias Elf64_Xword = ulong; +/// Signed long integer. +alias Elf64_Sxword = long; + +enum size_t EI_INDENT = 16; + +/** + * File header. + */ +struct Elf64_Ehdr +{ + /// ELF identification. + ubyte[EI_INDENT] e_ident; + /// Object file type. + Elf64_Half e_type; + /// Machine type. + Elf64_Half e_machine; + /// Object file version + Elf64_Word e_version; + /// Entry point address. + Elf64_Addr e_entry; + /// Program header offset. + Elf64_Off e_phoff; + /// Section header offset. + Elf64_Off e_shoff; + /// Processor-specific flags. + Elf64_Word e_flags; + /// ELF header size. + Elf64_Half e_ehsize; + /// Size of program header entry. + Elf64_Half e_phentsize; + /// Number of program header entries. + Elf64_Half e_phnum; + /// Size of section header entry. + Elf64_Half e_shentsize; + /// Number of section header entries. + Elf64_Half e_shnum; + /// Section name string table index. + Elf64_Half e_shstrndx; +} + +/** + * Section header. + */ +struct Elf64_Shdr +{ + /// Section name. + Elf64_Word sh_name; + /// Section type. + Elf64_Word sh_type; + /// Section attributes. + Elf64_Xword sh_flags; + /// Virtual address in memory. + Elf64_Addr sh_addr; + /// Offset in file. + Elf64_Off sh_offset; + /// Size of section. + Elf64_Xword sh_size; + /// Link to other section. + Elf64_Word sh_link; + /// Miscellaneous information. + Elf64_Word sh_info; + /// Address alignment boundary. + Elf64_Xword sh_addralign; + /// Size of entries, if section has table. + Elf64_Xword sh_entsize; +} + +struct Elf64_Sym +{ + /// Symbol name. + Elf64_Word st_name; + /// Type and Binding attributes. + ubyte st_info; + /// Reserved. + ubyte st_other; + /// Section table index. + Elf64_Half st_shndx; + /// Symbol value. + Elf64_Addr st_value; + /// Size of object (e.g., common). + Elf64_Xword st_size; +} + +/// Section Types, sh_type. +enum : Elf64_Word +{ + /// Marks an unused section header. + SHT_NULL = 0, + /// Contains information defined by the program. + SHT_PROGBITS = 1, + /// Contains a linker symbol table. + SHT_SYMTAB = 2, + /// Contains a string table. + SHT_STRTAB = 3, + /// Contains “Rela” type relocation entries. + SHT_RELA = 4, + /// Contains a symbol hash table + SHT_HASH = 5, + /// Contains dynamic linking tables + SHT_DYNAMIC = 6, + /// Contains note information + SHT_NOTE = 7, + /// Contains uninitialized space; does not occupy any space in the file. + SHT_NOBITS = 8, + /// Contains "Rel" type relocation entries. + SHT_REL = 9, + /// Reserved. + SHT_SHLIB = 10, + /// Contains a dynamic loader symbol table. + SHT_DYNSYM = 11, + /// Environment-specific use. + SHT_LOOS = 0x60000000, + SHT_HIOS = 0x6FFFFFFF, + /// Processor-specific use. + SHT_LOPROC = 0x70000000, + SHT_HIPROC = 0x7FFFFFFF, +} + +/** + * Section Attributes, sh_flags. + */ +enum : Elf64_Xword +{ + /// Section contains writable data. + SHF_WRITE = 0x1, + /// Section is allocated in memory image of program. + SHF_ALLOC = 0x2, + /// Section contains executable instructions. + SHF_EXECINSTR = 0x4, + /// Environment-specific use. + SHF_MASKOS = 0x0F000000, + /// Processor-specific use. + SHF_MASKPROC = 0xF0000000, +} + +enum : Elf64_Word +{ + /// Not visible outside the object file. + STB_LOCAL = 0, + /// Global symbol, visible to all object files. + STB_GLOBAL = 1, + /// Global scope, but with lower precedence than global symbols. + STB_WEAK = 2, + /// Environment-specific use. + STB_LOOS = 10, + STB_HIOS = 12, + /// Processor-specific use. + STB_LOPROC = 13, + STB_HIPROC = 15, +} + +enum : Elf64_Word +{ + /// No type specified (e.g., an absolute symbol). + STT_NOTYPE = 0, + /// Data object. + STT_OBJECT = 1, + /// Function entry point. + STT_FUNC = 2, + /// Symbol is associated with a section. + STT_SECTION = 3, + /// Source file associated with the object file. + STT_FILE = 4, + /// Environment-specific use. + STT_LOOS = 10, + STT_HIOS = 12, + /// Processor-specific use. + STT_LOPROC = 13, + STT_HIPROC = 15, +} + +Elf64_Ehdr makeFileHeader(Elf64_Off sectionHeaderOffset, + Elf64_Half sectionHeaderCount, + Elf64_Half stringIndex) @nogc +{ + Elf64_Ehdr header; + + // Magic number. + header.e_ident[0] = '\x7f'; + header.e_ident[1] = 'E'; + header.e_ident[2] = 'L'; + header.e_ident[3] = 'F'; + + // File class. + header.e_ident[4] = EI_CLASS.ELFCLASS64; + + // Data encoding. + header.e_ident[5] = EI_DATA.ELFDATA2LSB; + + // Version. + header.e_ident[6] = EV_CURRENT; + + // OS/ABI identification. + header.e_ident[7] = EI_OSABI.ELFOSABI_SYSV; + + // ABI version. + header.e_ident[8] = 0; + + // Size of e_ident[]. + header.e_ident[15] = 0; + + header.e_type = ET_REL; + header.e_machine = 0x3e; // EM_X86_64: AMD x86-64 architecture + header.e_version = EV_CURRENT; + header.e_entry = null; + header.e_phoff = 0; + header.e_shoff = sectionHeaderOffset; + header.e_flags = 0; + header.e_ehsize = Elf64_Ehdr.sizeof; + header.e_phentsize = 0; + header.e_phnum = 0; + header.e_shentsize = Elf64_Shdr.sizeof; + header.e_shnum = sectionHeaderCount; + header.e_shstrndx = stringIndex; + + return header; +} + +enum char[33] sectionStringTable = "\0.symtab\0.strtab\0.shstrtab\0.text\0"; + +Elf64_Shdr makeTextHeader(Elf64_Off offset, Elf64_Xword size) @nogc +{ + Elf64_Shdr table; + + table.sh_name = 0x1b; + table.sh_type = SHT_PROGBITS; + table.sh_flags = SHF_EXECINSTR | SHF_ALLOC; + table.sh_addr = null; + table.sh_offset = offset; + table.sh_size = size; + table.sh_link = SHN_UNDEF; + table.sh_info = 0; + table.sh_addralign = 1; + table.sh_entsize = 0; + + return table; +} + +Elf64_Shdr makeDataHeader(Elf64_Off offset, Elf64_Xword size) @nogc +{ + Elf64_Shdr table; + + table.sh_name = 0x21; + table.sh_type = SHT_PROGBITS; + table.sh_flags = SHF_WRITE | SHF_ALLOC; + table.sh_addr = null; + table.sh_offset = offset; + table.sh_size = size; + table.sh_link = SHN_UNDEF; + table.sh_info = 0; + table.sh_addralign = 1; + table.sh_entsize = 0; + + return table; +} + +Elf64_Shdr makeSymtableHeader(Elf64_Off offset, Elf64_Xword size, Elf64_Word entriesCount) @nogc +{ + Elf64_Shdr table; + + table.sh_name = 0x01; + table.sh_type = SHT_SYMTAB; + table.sh_flags = 0; + table.sh_addr = null; + table.sh_offset = offset; + table.sh_size = size; + table.sh_link = 0x03; // String table used by entries in this section. + table.sh_info = entriesCount; + table.sh_addralign = 8; + table.sh_entsize = Elf64_Sym.sizeof; + + return table; +} + +Elf64_Shdr makeStringHeader(Elf64_Word stringIndex, Elf64_Off offset, Elf64_Xword size) @nogc +{ + Elf64_Shdr table; + + table.sh_name = stringIndex; + table.sh_type = SHT_STRTAB; + table.sh_flags = 0; + table.sh_addr = null; + table.sh_offset = offset; + table.sh_size = size; + table.sh_link = SHN_UNDEF; + table.sh_info = 0; + table.sh_addralign = 1; + table.sh_entsize = 0; + + return table; +} + +Elf64_Shdr makeInitialHeader() @nogc +{ + Elf64_Shdr table; + + table.sh_name = 0; + table.sh_type = SHT_NULL; + table.sh_flags = 0; + table.sh_addr = null; + table.sh_offset = 0; + table.sh_size = 0; + table.sh_link = SHN_UNDEF; + table.sh_info = 0; + table.sh_addralign = 0; + table.sh_entsize = 0; + + return table; +} + +Elf64_Sym makeInitialSymTable() @nogc +{ + Elf64_Sym table; + + table.st_name = 0; + table.st_info = 0; + table.st_other = 0; + table.st_shndx = 0; + table.st_value = null; + table.st_size = 0; + + return table; +} + +Elf64_Sym makeMainSymTable(Elf64_Half textIndex) @nogc +{ + Elf64_Sym table; + + table.st_name = 0x01; + table.st_info = ELF32_ST_INFO(STB_GLOBAL, STT_FUNC); + table.st_other = 0; + table.st_shndx = textIndex; + table.st_value = null; + table.st_size = 0; + + return table; +} + +ubyte ELF32_ST_BIND(ubyte i) @nogc nothrow pure @safe +{ + return i >> 4; +} + +ubyte ELF32_ST_TYPE(ubyte i) @nogc nothrow pure @safe +{ + return i & 0xf; +} + +ubyte ELF32_ST_INFO(ubyte b, ubyte t) @nogc nothrow pure @safe +{ + return cast(ubyte) ((b << 4) + (t & 0xf)); +} + +/// Special Section Indices. +enum : Elf64_Half +{ + /// Used to mark an undefined or meaningless section reference. + SHN_UNDEF = 0, + /// Processor-specific use. + SHN_LOPROC = 0xFF00, + SHN_HIPROC = 0xFF1F, + /// Environment-specific use. + SHN_LOOS = 0xFF20, + SHN_HIOS = 0xFF3F, + /// Indicates that the corresponding reference is an absolute value. + SHN_ABS = 0xFFF1, + /** + * Indicates a symbol that has been declared as a common block (Fortran + * COMMON or C tentative declaration). + */ + SHN_COMMON = 0xFFF2, +} + +/** + * Object File Classes, e_ident[EI_CLASS]. + */ +enum EI_CLASS : ubyte +{ + /// 32-bit objects. + ELFCLASS32 = 1, + /// 64-bit objects. + ELFCLASS64 = 2, +} + +enum ubyte EV_CURRENT = 1; + +/** + * Data Encodings, e_ident[EI_DATA]. + */ +enum EI_DATA : ubyte +{ + /// Object file data structures are little-endian. + ELFDATA2LSB = 1, + /// Object file data structures are big-endian. + ELFDATA2MSB = 2, +} + +/** + * Operating System and ABI Identifiers, e_ident[EI_OSABI]. + */ +enum EI_OSABI : ubyte +{ + /// System V ABI. + ELFOSABI_SYSV = 0, + /// HP-UX operating system. + ELFOSABI_HPUX = 1, + /// Standalone (embedded) application. + ELFOSABI_STANDALONE = 255, +} + +enum : Elf64_Half +{ + ET_NONE = 0, /// No file type. + ET_REL = 1, /// Relocatable object file. + ET_EXEC = 2, /// Executable file. + ET_DYN = 3, /// Shared object file. + ET_CORE = 4, /// Core file. + ET_LOOS = 0xFE00, /// Environment-specific use. + ET_HIOS = 0xFEFF, + ET_LOPROC = 0xFF00, /// Processor-specific use. + ET_HIPROC = 0xFFFF, +} + +private size_t pad(size_t value) @nogc +{ + return (value / 8 + 1) * 8; +} + +struct Symbol +{ + String name; + Array!ubyte instructions; +} + +/* +.text + .globl main + .type main, @function +main: + movl $3, %eax + ret +*/ +immutable ubyte[] instructions = [ + // Opcode of pushq is “0x50 + r”, where “r” is the register opcode. + // Register opcode of %rbq is 5. + 0x50 + 5, // push% %rbp + 0x48, 0x89, 0xe5, // movq %rsp, %rbp + + 0xb8, 0x03, 0x00, 0x00, 0x00, // movl $3, %eax + + // Epilogue. + 0x48, 0x89, 0xec, // movq %rbp, %rsp + 0x58 + 5, // popq %rbp + 0xc3, // ret +]; + +void writeObject(Definition ast, String outputFilename) @nogc +{ + auto handle = fopen(outputFilename.toStringz, "wb"); + + if (handle is null) + { + perror("writing sample"); + return; + } + scope (exit) + { + fclose(handle); + } + + size_t currentOffset = Elf64_Ehdr.sizeof; + Array!Elf64_Shdr sectionHeaders; + Array!Elf64_Sym symbolEntries; + + // Prologue + Array!ubyte asmTemplate = Array!ubyte(cast(ubyte[]) [ + // Opcode of pushq is “0x50 + r”, where “r” is the register opcode. + // Register opcode of %rbq is 5. + 0x50 + 5, // pushq %rbp + 0x48, 0x89, 0xe5, // movq %rsp, %rbp + ]); + int i = 1; + foreach (statement; ast.statements[]) + { + if ((cast(Number) statement.subroutine.lhs) !is null) + { + // Opcode of mov is “0xb8 + r”, where “r” is the register opcode. + // Register opcode of %eax is 0. + asmTemplate.insertBack(cast(ubyte) 0xb8); // movl $x, %eax; where $x is a number. + asmTemplate.insertBack((cast(ubyte*) &(cast(Number) statement.subroutine.lhs).value)[0 .. int.sizeof]); + } + else if ((cast(Variable) statement.subroutine.lhs) !is null) + { + // movl -x(%rbp), %ebx; where x is a number. + asmTemplate.insertBack(cast(ubyte[]) [0x8b, 0x45]); + const disposition = (cast(Variable) statement.subroutine.lhs).counter * (-4); + asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]); + } + if ((cast(Number) statement.subroutine.rhs) !is null) + { + // Opcode of mov is “0xb8 + r”, where “r” is the register opcode. + // Register opcode of %ebx is 3. + asmTemplate.insertBack(cast(ubyte) 0xbb); // movl $x, %ebx; where $x is a number. + asmTemplate.insertBack((cast(ubyte*) &(cast(Number) statement.subroutine.rhs).value)[0 .. int.sizeof]); + } + else if ((cast(Variable) statement.subroutine.rhs) !is null) + { + // movl -x(%rbp), %ebx; where x is a number. + asmTemplate.insertBack(cast(ubyte[]) [0x8b, 0x5d]); + const disposition = (cast(Variable) statement.subroutine.rhs).counter * (-4); + asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]); + } + // Calculate the result and assign it to a variable on the stack. + asmTemplate.insertBack(cast(ubyte[]) [0x01, 0xd8]); // add %ebx, %eax + + asmTemplate.insertBack(cast(ubyte[]) [0x89, 0x45]); // movl %eax, -x(%rbp); where x is a number. + const disposition = i * (-4); + asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]); + ++i; + } + // Epilogue. + asmTemplate.insertBack(cast(ubyte[]) [ + 0x48, 0x89, 0xec, // movq %rbp, %rsp + 0x58 + 5, // popq %rbp + 0xc3, // ret + ]); + + Symbol[1] symbols = [Symbol(String("main"), asmTemplate)]; + + sectionHeaders.insertBack(makeInitialHeader()); + symbolEntries.insertBack(makeInitialSymTable()); + + String stringTable = String("\0"); + foreach (symbol; symbols[]) + { + stringTable.insertBack(symbol.name[]); + stringTable.insertBack('\0'); + + sectionHeaders.insertBack(makeTextHeader(currentOffset, symbol.instructions.length)); + currentOffset = pad(currentOffset + symbol.instructions.length); + + symbolEntries.insertBack(makeMainSymTable(cast(Elf64_Half) (sectionHeaders.length - 1))); + } + + const symbolTableSize = (symbols.length + 1) * Elf64_Sym.sizeof; + sectionHeaders.insertBack(makeSymtableHeader(currentOffset, symbolTableSize, symbols.length)); + currentOffset += symbolTableSize; + + sectionHeaders.insertBack(makeStringHeader(0x09, currentOffset, stringTable.length)); + currentOffset += stringTable.length; + + sectionHeaders.insertBack(makeStringHeader(0x11, currentOffset, sectionStringTable.length)); + currentOffset = pad(currentOffset + sectionStringTable.length); + + auto fileHeader = makeFileHeader(currentOffset, 5, 4); + + version (none) + { + printf("%.2x\n", cast(uint) currentOffset); + } + ubyte[8] padding = 0; + size_t codeLength = stringTable.length + sectionStringTable.length; + + fwrite(&fileHeader, 8, Elf64_Ehdr.sizeof / 8, handle); + foreach (symbol; symbols[]) + { + immutable size_t instructionsLength = pad(symbol.instructions.length); + fwrite(symbol.instructions.get.ptr, 1, symbol.instructions.length, handle); + fwrite(padding.ptr, 1, instructionsLength - symbol.instructions.length, handle); + codeLength += instructionsLength; + } + fwrite(symbolEntries.get.ptr, Elf64_Sym.sizeof, symbolEntries.length, handle); + fwrite(stringTable.get.ptr, 1, stringTable.length, handle); + fwrite(sectionStringTable.ptr, 1, sectionStringTable.length, handle); + fwrite(padding.ptr, pad(codeLength) - codeLength, 1, handle); + fwrite(sectionHeaders.get.ptr, Elf64_Shdr.sizeof, sectionHeaders.length, handle); +} + +String generate(Definition ast) @nogc +{ + // Prologue + String asmTemplate = ".text + .globl main + .type main, @function +main: + pushq %rbp + movq %rsp, %rbp +"; + + /* Allocate space on the stack for local variables. + asmTemplate.insertBack(" sub $"); + asmTemplate.insertBack(format!"{}"(ast.statements.length)[]); + asmTemplate.insertBack(", $esp\n"); */ + + int i = 1; + foreach (statement; ast.statements[]) + { + if ((cast(Number) statement.subroutine.lhs) !is null) + { + asmTemplate.insertBack(" movl $"); + asmTemplate.insertBack(format!"{}"((cast(Number) statement.subroutine.lhs).value)[]); + asmTemplate.insertBack(", %eax\n"); + } + else if ((cast(Variable) statement.subroutine.lhs) !is null) + { + asmTemplate.insertBack(" movl -"); + asmTemplate.insertBack(format!"{}"((cast(Variable) statement.subroutine.lhs).counter * 4)[]); + asmTemplate.insertBack("(%rbp), %eax\n"); + } + if ((cast(Number) statement.subroutine.rhs) !is null) + { + asmTemplate.insertBack(" movl $"); + asmTemplate.insertBack(format!"{}"((cast(Number) statement.subroutine.rhs).value)[]); + asmTemplate.insertBack(", %ebx\n"); + } + else if ((cast(Variable) statement.subroutine.rhs) !is null) + { + asmTemplate.insertBack(" movl -"); + asmTemplate.insertBack(format!"{}"((cast(Variable) statement.subroutine.rhs).counter * 4)[]); + asmTemplate.insertBack("(%rbp), %ebx\n"); + } + // Calculate the result and assign it to a variable on the stack. + asmTemplate.insertBack(" add %ebx, %eax\n"); + asmTemplate.insertBack(" movl %eax, -"); + asmTemplate.insertBack(format!"{}"(i * 4)[]); + asmTemplate.insertBack("(%rbp)\n"); + ++i; + } + + // Epilogue. + asmTemplate.insertBack(" movq %rbp, %rsp + popq %rbp + ret +"); + + return asmTemplate; +} diff --git a/source/elna/ir.d b/source/elna/ir.d new file mode 100644 index 0000000..e2a8df4 --- /dev/null +++ b/source/elna/ir.d @@ -0,0 +1,144 @@ +module elna.ir; + +import parser = elna.parser; +import tanya.container.array; +import tanya.container.hashtable; +import tanya.container.string; +import tanya.memory.allocator; +import tanya.memory.mmappool; + +/** + * Definition. + */ +class Definition +{ + char[] identifier; + Array!Statement statements; + Array!VariableDeclaration variableDeclarations; +} + +class Statement +{ + Subroutine subroutine; +} + +abstract class Expression +{ +} + +class Number : Expression +{ + int value; +} + +class Variable : Expression +{ + size_t counter; +} + +class VariableDeclaration +{ + String identifier; +} + +class Subroutine +{ + Expression lhs, rhs; +} + +private Number transformNumber(parser.Number number) @nogc +{ + return MmapPool.instance.make!Number(number.value); +} + +private Variable transformSubroutine(parser.Subroutine subroutine, + ref Array!Statement statements, + ref HashTable!(String, int) constants) @nogc +{ + auto target = MmapPool.instance.make!Subroutine; + target.lhs = transformExpression(subroutine.lhs, statements, constants); + target.rhs = transformExpression(subroutine.rhs, statements, constants); + + auto newStatement = MmapPool.instance.make!Statement; + newStatement.subroutine = target; + statements.insertBack(newStatement); + + auto newVariable = MmapPool.instance.make!Variable; + newVariable.counter = statements.length; + + return newVariable; +} + +private Expression transformExpression(parser.Expression expression, + ref Array!Statement statements, + ref HashTable!(String, int) constants) @nogc +{ + if ((cast(parser.Number) expression) !is null) + { + auto numberExpression = MmapPool.instance.make!Number; + numberExpression.value = (cast(parser.Number) expression).value; + + return numberExpression; + } + if ((cast(parser.Variable) expression) !is null) + { + auto numberExpression = MmapPool.instance.make!Number; + numberExpression.value = constants[(cast(parser.Variable) expression).identifier]; + + return numberExpression; + } + else if ((cast(parser.Subroutine) expression) !is null) + { + return transformSubroutine(cast(parser.Subroutine) expression, statements, constants); + } + return null; +} + +Expression transformStatement(parser.Statement statement, + ref Array!Statement statements, + ref HashTable!(String, int) constants) @nogc +{ + if ((cast(parser.BangStatement) statement) !is null) + { + return transformExpression((cast(parser.BangStatement) statement).expression, statements, constants); + } + return null; +} + +HashTable!(String, int) transformConstants(ref Array!(parser.Definition) definitions) @nogc +{ + typeof(return) constants; + + foreach (definition; definitions[]) + { + constants[definition.identifier] = definition.number.value; + } + + return constants; +} + +Array!VariableDeclaration transformVariableDeclarations(ref Array!(parser.VariableDeclaration) variableDeclarations) +@nogc +{ + typeof(return) variables; + + foreach (ref variableDeclaration; variableDeclarations) + { + auto newDeclaration = MmapPool.instance.make!VariableDeclaration; + newDeclaration.identifier = variableDeclaration.identifier; + variables.insertBack(newDeclaration); + } + + return variables; +} + +Definition transform(parser.Block block) @nogc +{ + auto target = MmapPool.instance.make!Definition; + auto constants = transformConstants(block.definitions); + + transformStatement(block.statement, target.statements, constants); + target.variableDeclarations = transformVariableDeclarations(block.variableDeclarations); + + return target; +} diff --git a/source/elna/lexer.d b/source/elna/lexer.d new file mode 100644 index 0000000..c47aae0 --- /dev/null +++ b/source/elna/lexer.d @@ -0,0 +1,252 @@ +module elna.lexer; + +import core.stdc.stdlib; +import core.stdc.ctype; +import core.stdc.string; +import elna.result; +import std.range; +import tanya.container.array; +import tanya.container.string; +import tanya.memory.mmappool; + +struct Token +{ + enum Type + { + number, + subroutine, // Operator. + let, + identifier, + equals, + var, + semicolon, + leftParen, + rightParen, + bang, + dot, + comma, + } + + union Value + { + int number; + String identifier; + } + + private Type type; + private Value value_; + private Position position_; + + @disable this(); + + this(Type type, Position position) @nogc nothrow pure @safe + { + this.type = type; + this.position_ = position; + } + + this(Type type, int value, Position position) @nogc nothrow pure @trusted + in (type == Type.number) + { + this(type, position); + this.value_.number = value; + } + + this()(Type type, auto ref String value, Position position) + @nogc nothrow pure @trusted + in (type == Type.identifier) + { + this(type, position); + this.value_.identifier = value; + } + + /** + * Params: + * type = Expected type. + * + * Returns: Whether this token is of the expected type. + */ + bool ofType(Type type) const @nogc nothrow pure @safe + { + return this.type == type; + } + + @property auto value(Type type)() @nogc nothrow pure @trusted + in (ofType(type)) + { + static if (type == Type.number) + { + return this.value_.number; + } + else static if (type == Type.identifier) + { + return this.value_.identifier; + } + else + { + static assert(false, "This type doesn't have a value"); + } + } + + /** + * Returns: The token position in the source text. + */ + @property const(Position) position() const @nogc nothrow pure @safe + { + return this.position_; + } +} + +/** + * Range over the source text that keeps track of the current position. + */ +struct Source +{ + char[] buffer; + Position position; + + this(char[] buffer) @nogc nothrow pure @safe + { + this.buffer = buffer; + } + + @disable this(); + + bool empty() @nogc nothrow pure @safe + { + return this.length == 0; + } + + char front() @nogc nothrow pure @safe + in (!empty) + { + return this.buffer[0]; + } + + void popFront() @nogc nothrow pure @safe + in (!empty) + { + this.buffer = buffer[1 .. $]; + ++this.position.column; + } + + void breakLine() @nogc nothrow pure @safe + in (!empty) + { + this.buffer = buffer[1 .. $]; + ++this.position.line; + this.position.column = 1; + } + + @property size_t length() const @nogc nothrow pure @safe + { + return this.buffer.length; + } + + char opIndex(size_t index) @nogc nothrow pure @safe + in (index < length) + { + return this.buffer[index]; + } + + char[] opSlice(size_t i, size_t j) @nogc nothrow pure @safe + in + { + assert(i <= j); + assert(j <= length); + } + do + { + return this.buffer[i .. j]; + } +} + +Array!Token lex(char[] buffer) @nogc +{ + Array!Token tokens; + auto source = Source(buffer); + + while (!source.empty) + { + if (source.front == ' ') + { + source.popFront; + } + else if (source.front >= '0' && source.front <= '9') // Multi-digit. + { + tokens.insertBack(Token(Token.Type.number, source.front - '0', source.position)); + source.popFront; + } + else if (source.front == '=') + { + tokens.insertBack(Token(Token.Type.equals, source.position)); + source.popFront; + } + else if (source.front == '(') + { + tokens.insertBack(Token(Token.Type.leftParen, source.position)); + source.popFront; + } + else if (source.front == ')') + { + tokens.insertBack(Token(Token.Type.rightParen, source.position)); + source.popFront; + } + else if (source.front == ';') + { + tokens.insertBack(Token(Token.Type.semicolon, source.position)); + source.popFront; + } + else if (source.front == ',') + { + tokens.insertBack(Token(Token.Type.comma, source.position)); + source.popFront; + } + else if (source.front == '!') + { + tokens.insertBack(Token(Token.Type.bang, source.position)); + source.popFront; + } + else if (source.front == '.') + { + tokens.insertBack(Token(Token.Type.dot, source.position)); + source.popFront; + } + else if (isalpha(source.front)) + { + size_t i = 1; + while (i < source.length && isalpha(source[i])) + { + ++i; + } + if (source[0 .. i] == "const") + { + tokens.insertBack(Token(Token.Type.let, source.position)); + } + else if (source[0 .. i] == "var") + { + tokens.insertBack(Token(Token.Type.var, source.position)); + } + else + { + auto identifier = String(source[0 .. i]); + tokens.insertBack(Token(Token.Type.identifier, identifier, source.position)); + } + source.popFrontN(i); + } + else if (source.front == '+') // Multi-character, random special characters. + { + tokens.insertBack(Token(Token.Type.subroutine, source.position)); + source.popFront; + } + else if (source.front == '\n') + { + source.breakLine; + } + else + { + return typeof(tokens)(); // Error. + } + } + return tokens; +} diff --git a/source/elna/parser.d b/source/elna/parser.d new file mode 100644 index 0000000..28a090b --- /dev/null +++ b/source/elna/parser.d @@ -0,0 +1,269 @@ +module elna.parser; + +import elna.lexer; +import elna.result; +import tanya.container.array; +import tanya.container.string; +import tanya.memory.allocator; +import tanya.memory.mmappool; + +/** + * Constant definition. + */ +class Definition +{ + Number number; + String identifier; +} + +/** + * Variable declaration. + */ +class VariableDeclaration +{ + String identifier; +} + +abstract class Statement +{ +} + +class BangStatement : Statement +{ + Expression expression; +} + +class Block +{ + Array!Definition definitions; + Array!VariableDeclaration variableDeclarations; + Statement statement; +} + +abstract class Expression +{ +} + +class Number : Expression +{ + int value; +} + +class Variable : Expression +{ + String identifier; +} + +class Subroutine : Expression +{ + Expression lhs, rhs; +} + +private Result!Expression parseExpression(ref Array!(Token).Range tokens) @nogc +in (!tokens.empty, "Expected expression, got end of stream") +{ + if (tokens.front.ofType(Token.Type.number)) + { + auto number = MmapPool.instance.make!Number; + number.value = tokens.front.value!(Token.Type.number); + tokens.popFront; + return Result!Expression(number); + } + else if (tokens.front.ofType(Token.Type.identifier)) + { + auto variable = MmapPool.instance.make!Variable; + variable.identifier = tokens.front.value!(Token.Type.identifier); + tokens.popFront; + return Result!Expression(variable); + } + else if (tokens.front.ofType(Token.Type.subroutine)) + { + auto subroutine = MmapPool.instance.make!Subroutine; + tokens.popFront; + auto expression = parseExpression(tokens); + if (expression.valid) + { + subroutine.lhs = expression.result; + } + else + { + return Result!Expression("Expected left-hand side to be an expression", tokens.front.position); + } + expression = parseExpression(tokens); + if (expression.valid) + { + subroutine.rhs = expression.result; + } + else + { + return Result!Expression("Expected left-hand side to be an expression", tokens.front.position); + } + return Result!Expression(subroutine); + } + else if (tokens.front.ofType(Token.Type.leftParen)) + { + tokens.popFront; + + auto expression = parseExpression(tokens); + + tokens.popFront; + return expression; + } + return Result!Expression("Expected an expression", tokens.front.position); +} + +private Result!Definition parseDefinition(ref Array!Token.Range tokens) @nogc +in (!tokens.empty, "Expected definition, got end of stream") +{ + auto definition = MmapPool.instance.make!Definition; + definition.identifier = tokens.front.value!(Token.Type.identifier); // Copy. + + tokens.popFront(); + tokens.popFront(); // Skip the equals sign. + + if (tokens.front.ofType(Token.Type.number)) + { + auto number = MmapPool.instance.make!Number; + number.value = tokens.front.value!(Token.Type.number); + definition.number = number; + tokens.popFront; + return Result!Definition(definition); + } + return Result!Definition("Expected a number", tokens.front.position); +} + +private Result!Statement parseStatement(ref Array!Token.Range tokens) @nogc +in (!tokens.empty, "Expected block, got end of stream") +{ + if (tokens.front.ofType(Token.Type.bang)) + { + tokens.popFront; + auto statement = MmapPool.instance.make!BangStatement; + auto expression = parseExpression(tokens); + if (expression.valid) + { + statement.expression = expression.result; + } + else + { + return Result!Statement(expression.error.get); + } + return Result!Statement(statement); + } + return Result!Statement("Expected ! statement", tokens.front.position); +} + +private Result!(Array!Definition) parseDefinitions(ref Array!Token.Range tokens) @nogc +in (!tokens.empty, "Expected definition, got end of stream") +{ + tokens.popFront; // Skip const. + + Array!Definition definitions; + + while (!tokens.empty) + { + auto definition = parseDefinition(tokens); + if (!definition.valid) + { + return typeof(return)(definition.error.get); + } + definitions.insertBack(definition.result); + if (tokens.front.ofType(Token.Type.semicolon)) + { + break; + } + if (tokens.front.ofType(Token.Type.comma)) + { + tokens.popFront; + } + } + + return typeof(return)(definitions); +} + +private Result!(Array!VariableDeclaration) parseVariableDeclarations(ref Array!Token.Range tokens) @nogc +in (!tokens.empty, "Expected variable declarations, got end of stream") +{ + tokens.popFront; // Skip var. + + Array!VariableDeclaration variableDeclarations; + + while (!tokens.empty) + { + auto currentToken = tokens.front; + if (currentToken.ofType(Token.Type.identifier)) + { + auto variableDeclaration = MmapPool.instance.make!VariableDeclaration; + variableDeclaration.identifier = currentToken.value!(Token.Type.identifier); + variableDeclarations.insertBack(variableDeclaration); + tokens.popFront; + } + else + { + return typeof(return)("Expected variable name", tokens.front.position); + } + if (tokens.empty) + { + return typeof(return)("Expected \";\" or \",\" name", currentToken.position); + } + if (tokens.front.ofType(Token.Type.semicolon)) + { + break; + } + if (tokens.front.ofType(Token.Type.comma)) + { + tokens.popFront; + } + } + + return typeof(return)(variableDeclarations); +} + +private Result!Block parseBlock(ref Array!Token.Range tokens) @nogc +in (!tokens.empty, "Expected block, got end of stream") +{ + auto block = MmapPool.instance.make!Block; + if (tokens.front.ofType(Token.Type.let)) + { + auto constDefinitions = parseDefinitions(tokens); + if (constDefinitions.valid) + { + block.definitions = constDefinitions.result; + } + else + { + return Result!Block(constDefinitions.error.get); + } + tokens.popFront; + } + if (tokens.front.ofType(Token.Type.var)) + { + auto variableDeclarations = parseVariableDeclarations(tokens); + if (variableDeclarations.valid) + { + block.variableDeclarations = variableDeclarations.result; + } + else + { + return Result!Block(variableDeclarations.error.get); + } + tokens.popFront; + } + auto statement = parseStatement(tokens); + if (statement.valid) + { + block.statement = statement.result; + } + else + { + return Result!Block(statement.error.get); + } + + return Result!Block(block); +} + +Result!Block parse(ref Array!Token tokenStream) @nogc +{ + auto tokens = tokenStream[]; + return parseBlock(tokens); +} diff --git a/source/elna/result.d b/source/elna/result.d new file mode 100644 index 0000000..049c453 --- /dev/null +++ b/source/elna/result.d @@ -0,0 +1,84 @@ +module elna.result; + +import std.typecons; + +/** + * Position in the source text. + */ +struct Position +{ + /// Line. + size_t line = 1; + + /// Column. + size_t column = 1; +} + +struct CompileError +{ + private string message_; + + private Position position_; + + @disable this(); + + /** + * Params: + * message = Error text. + * position = Error position in the source text. + */ + this(string message, Position position) @nogc nothrow pure @safe + { + this.message_ = message; + this.position_ = position; + } + + /// Error text. + @property string message() const @nogc nothrow pure @safe + { + return this.message_; + } + + /// Error line in the source text. + @property size_t line() const @nogc nothrow pure @safe + { + return this.position_.line; + } + + /// Error column in the source text. + @property size_t column() const @nogc nothrow pure @safe + { + return this.position_.column; + } +} + +struct Result(T) +{ + Nullable!CompileError error; + T result; + + this(T result) + { + this.result = result; + this.error = typeof(this.error).init; + } + + this(string message, Position position) + { + this.result = T.init; + this.error = CompileError(message, position); + } + + this(CompileError compileError) + { + this.result = null; + this.error = compileError; + } + + @disable this(); + + @property bool valid() const + { + return error.isNull; + } +} diff --git a/source/main.d b/source/main.d new file mode 100644 index 0000000..71d23cd --- /dev/null +++ b/source/main.d @@ -0,0 +1,72 @@ +import core.stdc.stdio; +import core.stdc.string; +import core.stdc.stdlib; +import elna.lexer; +import elna.parser; +import elna.generator; +import elna.ir; +import tanya.container.string; +import tanya.memory.allocator; +import tanya.memory.mmappool; + +private char[] readSource(size_t N)(string source, out char[N] buffer) @nogc +{ + memcpy(buffer.ptr, source.ptr, source.length + 1); + buffer[source.length] = '\0'; + auto handle = fopen(buffer.ptr, "r"); + if (handle is null) + { + perror(buffer.ptr); + return null; + } + fseek(handle, 0, SEEK_END); + size_t fsize = ftell(handle); + rewind(handle); + + fread(buffer.ptr, fsize, 1, handle); + fclose(handle); + buffer[fsize] = '\0'; + + return buffer[0 .. fsize]; +} + +int main(string[] args) +{ + char[255] buffer; + + defaultAllocator = MmapPool.instance; + + if (args.length < 2) + { + return 4; + } + auto sourceText = readSource(args[1], buffer); + if (sourceText is null) + { + return 3; + } + auto tokens = lex(sourceText); + if (tokens.length == 0) + { + printf("Lexical analysis failed.\n"); + return 1; + } + auto ast = parse(tokens); + if (!ast.valid) + { + auto compileError = ast.error.get; + printf("%lu:%lu: %s\n", compileError.line, compileError.column, compileError.message.ptr); + return 2; + } + auto ir = transform(ast.result); + + String outputFilename = String("build/"); + outputFilename.insertBack(args[1][0 .. $ - 4]); + outputFilename.insertBack("o"); + writeObject(ir, outputFilename); + + auto code = generate(ir); + printf("%s", code.toStringz()); + + return 0; +} diff --git a/tests/const_list.elna b/tests/const_list.elna new file mode 100644 index 0000000..18a6711 --- /dev/null +++ b/tests/const_list.elna @@ -0,0 +1,3 @@ +const a = 1, b = 2; +! + a b +. diff --git a/tests/expectations/const_list.txt b/tests/expectations/const_list.txt new file mode 100644 index 0000000..00750ed --- /dev/null +++ b/tests/expectations/const_list.txt @@ -0,0 +1 @@ +3 diff --git a/tests/expectations/sum.txt b/tests/expectations/sum.txt new file mode 100644 index 0000000..45a4fb7 --- /dev/null +++ b/tests/expectations/sum.txt @@ -0,0 +1 @@ +8 diff --git a/tests/expectations/sums.txt b/tests/expectations/sums.txt new file mode 100644 index 0000000..45a4fb7 --- /dev/null +++ b/tests/expectations/sums.txt @@ -0,0 +1 @@ +8 diff --git a/tests/sum.elna b/tests/sum.elna new file mode 100644 index 0000000..12343f0 --- /dev/null +++ b/tests/sum.elna @@ -0,0 +1,2 @@ +! + 1 7 +. diff --git a/tests/sums.elna b/tests/sums.elna new file mode 100644 index 0000000..bf80ecc --- /dev/null +++ b/tests/sums.elna @@ -0,0 +1,2 @@ +! + 1 (+ 3 4) +.