From f5c4a27a6dbc179a7350fef61a17619de705f5b7 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Sat, 11 Jun 2022 00:38:03 +0200 Subject: [PATCH] Implement a RISC-V backend --- Rakefile | 43 +- source/elna/backend.d | 65 ++ source/elna/elf.d | 859 ++++++++++++++++++++++ source/elna/generator.d | 770 ------------------- source/elna/riscv.d | 254 +++++++ source/main.d | 66 +- tests/{const_list.elna => const_list.eln} | 0 tests/expectations/left_nested_sum.txt | 1 + tests/left_nested_sum.eln | 2 + tests/{sum.elna => sum.eln} | 0 tests/{sums.elna => sums.eln} | 0 11 files changed, 1211 insertions(+), 849 deletions(-) create mode 100644 source/elna/backend.d create mode 100644 source/elna/elf.d delete mode 100644 source/elna/generator.d create mode 100644 source/elna/riscv.d rename tests/{const_list.elna => const_list.eln} (100%) create mode 100644 tests/expectations/left_nested_sum.txt create mode 100644 tests/left_nested_sum.eln rename tests/{sum.elna => sum.eln} (100%) rename tests/{sums.elna => sums.eln} (100%) diff --git a/Rakefile b/Rakefile index ca2ae0b..4dfb642 100644 --- a/Rakefile +++ b/Rakefile @@ -4,12 +4,13 @@ require 'open3' DFLAGS = ['--warn-no-deprecated', '-L/usr/lib64/gcc-12'] BINARY = 'build/bin/elna' -TESTS = FileList['tests/*.elna'].flat_map do |test| +TESTS = FileList['tests/*.eln'].flat_map do |test| build = Pathname.new 'build' - asm_test = build + 'asm' + Pathname.new(test).basename('') + test_basename = Pathname.new(test).basename('') - [build + test, asm_test].map { |path| path.sub_ext('').to_path } + [build + 'riscv' + test_basename].map { |path| path.sub_ext('').to_path } end + SOURCES = FileList['source/**/*.d'] directory 'build' @@ -17,27 +18,23 @@ directory 'build' CLEAN.include 'build' CLEAN.include '.dub' -rule(/build\/tests\/[^\/\.]+$/ => ->(file) { test_for_out(file, '.o') }) do |t| - sh 'ld.gold', '-L/usr/lib64', - '--dynamic-linker', '/lib64/ld-linux-x86-64.so.2', +rule(/build\/riscv\/[^\/\.]+$/ => ->(file) { test_for_out(file, '.o') }) do |t| + sh '/opt/riscv/bin/riscv32-unknown-elf-ld', '-o', t.name, - '/usr/lib64/crt1.o', '/usr/lib64/crti.o', '-lc', t.source, '/usr/lib64/crtn.o' + '-L/opt/riscv/lib/gcc/riscv32-unknown-elf/11.1.0', + '-L/opt/riscv/riscv32-unknown-elf/lib', + '/opt/riscv/riscv32-unknown-elf/lib/crt0.o', + '/opt/riscv/lib/gcc/riscv32-unknown-elf/11.1.0/crtbegin.o', + t.source, + '--start-group', '-lc', '-lgloss', '--end-group', + '/opt/riscv/lib/gcc/riscv32-unknown-elf/11.1.0/crtend.o' end -rule(/build\/asm\/[^\/\.]+$/ => ->(file) { test_for_out(file, '.s') }) do |t| - sh 'gcc', '-x', 'assembler', '-o', t.name, t.source -end - -rule(/build\/tests\/.+\.o$/ => ->(file) { test_for_object(file) }) do |t| +rule(/build\/riscv\/.+\.o$/ => ->(file) { test_for_object(file, '.eln') }) do |t| Pathname.new(t.name).dirname.mkpath sh BINARY, '-o', t.name, t.source end -rule(/build\/asm\/.+\.s$/ => ->(file) { test_for_object(file) }) do |t| - Pathname.new(t.name).dirname.mkpath - sh BINARY, '-s', '-o', t.name, t.source -end - file BINARY => SOURCES do |t| sh({ 'DFLAGS' => (DFLAGS * ' ') }, 'dub', 'build', '--compiler=gdc-12') end @@ -56,7 +53,13 @@ task test: BINARY do .to_i puts "Running #{test}" - system test + if test.include? '/riscv/' + system('/opt/riscv/bin/spike', + '/opt/riscv/riscv32-unknown-elf/bin/pk', test, + { out: '/dev/null' }) + else + raise 'Unsupported test platform' + end actual = $?.exitstatus fail "#{test}: Expected #{expected}, got #{actual}" unless expected == actual @@ -68,10 +71,10 @@ task unittest: SOURCES do |t| sh('dub', 'test', '--compiler=gdc-12') end -def test_for_object(out_file) +def test_for_object(out_file, extension) test_source = Pathname .new(out_file) - .sub_ext('.elna') + .sub_ext(extension) .sub(/^build\/[[:alpha:]]+\//, 'tests/') .to_path [test_source, BINARY] diff --git a/source/elna/backend.d b/source/elna/backend.d new file mode 100644 index 0000000..c7cb2ca --- /dev/null +++ b/source/elna/backend.d @@ -0,0 +1,65 @@ +module elna.backend; + +import core.stdc.stdio; +import elna.elf; +import elna.ir; +import elna.extended; +import elna.riscv; +import elna.lexer; +import elna.parser; +import std.algorithm; +import std.sumtype; +import std.typecons; +import tanya.os.error; +import tanya.container.array; +import tanya.container.string; + +private Nullable!String readSource(string source) @nogc +{ + enum size_t bufferSize = 255; + auto sourceFilename = String(source); + + return readFile(sourceFilename).match!( + (ErrorCode errorCode) { + perror(sourceFilename.toStringz); + return Nullable!String(); + }, + (Array!ubyte contents) => nullable(String(cast(char[]) contents.get)) + ); +} + +int generate(string inFile, ref String outputFilename) @nogc +{ + auto sourceText = readSource(inFile); + if (sourceText.isNull) + { + return 3; + } + auto tokens = lex(sourceText.get.get); + if (tokens.length == 0) + { + printf("Lexical analysis failed.\n"); + return 1; + } + auto ast = parse(tokens); + if (!ast.valid) + { + auto compileError = ast.error.get; + printf("%lu:%lu: %s\n", compileError.line, compileError.column, compileError.message.ptr); + return 2; + } + auto ir = transform(ast.result); + + auto handle = File.open(outputFilename.toStringz, BitFlags!(File.Mode)(File.Mode.truncate)); + if (!handle.valid) + { + return 1; + } + auto programText = writeNext(ir); + auto elf = Elf(move(handle)); + elf.addCode("main", programText); + + elf.finish(); + + return 0; +} diff --git a/source/elna/elf.d b/source/elna/elf.d new file mode 100644 index 0000000..b0f6d2c --- /dev/null +++ b/source/elna/elf.d @@ -0,0 +1,859 @@ +module elna.elf; + +import elna.extended; +import std.algorithm; +import tanya.container.array; +import tanya.container.string; + +/// Unsigned program address. +alias Elf64_Addr = ulong*; +/// Unsigned file offset. +alias Elf64_Off = ulong; +/// Unsigned medium integer. +alias Elf64_Half = ushort; +/// Unsigned integer. +alias Elf64_Word = uint; +/// Signed integer. +alias Elf64_Sword = int; +/// Unsigned long integer. +alias Elf64_Xword = ulong; +/// Signed long integer. +alias Elf64_Sxword = long; + +/// Unsigned program address. +alias Elf32_Addr = uint; +/// Unsigned file offset. +alias Elf32_Off = uint; +/// Unsigned medium integer. +alias Elf32_Half = ushort; +/// Unsigned integer. +alias Elf32_Word = uint; +/// Signed integer. +alias Elf32_Sword = int; + +enum : size_t +{ + /// File identification. + EI_MAG0 = 0, + /// File identification. + EI_MAG1 = 1, + /// File identification. + EI_MAG2 = 2, + /// File identification. + EI_MAG3 = 3, + /// File class. + EI_CLASS = 4, + /// Data encoding. + EI_DATA = 5, + /// File version. + EI_VERSION = 6, + /// Start of padding bytes. + EI_PAD = 7, + /// Size of e_ident[] + EI_NIDENT = 16 +} + +enum : ubyte +{ + /// e_ident[EI_MAG0]. + ELFMAG0 = 0x7f, + /// e_ident[EI_MAG1]. + ELFMAG1 = 'E', + /// e_ident[EI_MAG2]. + ELFMAG2 = 'L', + /// e_ident[EI_MAG3]. + ELFMAG3 = 'F' +} + +/** + * File header. + */ +struct Elf64_Ehdr +{ + /// ELF identification. + ubyte[EI_NIDENT] e_ident; + /// Object file type. + Elf64_Half e_type; + /// Machine type. + Elf64_Half e_machine; + /// Object file version + Elf64_Word e_version; + /// Entry point address. + Elf64_Addr e_entry; + /// Program header offset. + Elf64_Off e_phoff; + /// Section header offset. + Elf64_Off e_shoff; + /// Processor-specific flags. + Elf64_Word e_flags; + /// ELF header size. + Elf64_Half e_ehsize; + /// Size of program header entry. + Elf64_Half e_phentsize; + /// Number of program header entries. + Elf64_Half e_phnum; + /// Size of section header entry. + Elf64_Half e_shentsize; + /// Number of section header entries. + Elf64_Half e_shnum; + /// Section name string table index. + Elf64_Half e_shstrndx; +} + +/** + * File header. + */ +struct Elf32_Ehdr { + /// ELF identification. + ubyte[EI_NIDENT] e_ident; + /// Object file type. + Elf32_Half e_type; + /// Machine type. + Elf32_Half e_machine; + /// Object file version + Elf32_Word e_version; + /// Entry point address. + Elf32_Addr e_entry; + /// Program header offset. + Elf32_Off e_phoff; + /// Section header offset. + Elf32_Off e_shoff; + /// Processor-specific flags. + Elf32_Word e_flags; + /// ELF header size. + Elf32_Half e_ehsize; + /// Size of program header entry. + Elf32_Half e_phentsize; + /// Number of program header entries. + Elf32_Half e_phnum; + /// Size of section header entry. + Elf32_Half e_shentsize; + /// Number of section header entries. + Elf32_Half e_shnum; + /// Section name string table index. + Elf32_Half e_shstrndx; +} + +/** + * Section header. + */ +struct Elf64_Shdr +{ + /// Section name. + Elf64_Word sh_name; + /// Section type. + Elf64_Word sh_type; + /// Section attributes. + Elf64_Xword sh_flags; + /// Virtual address in memory. + Elf64_Addr sh_addr; + /// Offset in file. + Elf64_Off sh_offset; + /// Size of section. + Elf64_Xword sh_size; + /// Link to other section. + Elf64_Word sh_link; + /// Miscellaneous information. + Elf64_Word sh_info; + /// Address alignment boundary. + Elf64_Xword sh_addralign; + /// Size of entries, if section has table. + Elf64_Xword sh_entsize; +} + +/** + * Section header. + */ +struct Elf32_Shdr +{ + /// Section name. + Elf32_Word sh_name; + /// Section type. + Elf32_Word sh_type; + /// Section attributes. + Elf32_Word sh_flags; + /// Virtual address in memory. + Elf32_Addr sh_addr; + /// Offset in file. + Elf32_Off sh_offset; + /// Size of section. + Elf32_Word sh_size; + /// Link to other section. + Elf32_Word sh_link; + /// Miscellaneous information. + Elf32_Word sh_info; + /// Address alignment boundary. + Elf32_Word sh_addralign; + /// Size of entries, if section has table. + Elf32_Word sh_entsize; +} + +/** + * Symbol table entry. + */ +struct Elf64_Sym +{ + /// Symbol name. + Elf64_Word st_name; + /// Type and Binding attributes. + ubyte st_info; + /// Reserved. + ubyte st_other; + /// Section table index. + Elf64_Half st_shndx; + /// Symbol value. + Elf64_Addr st_value; + /// Size of object (e.g., common). + Elf64_Xword st_size; +} + +/** + * Relocation entry. + */ +struct Elf64_Rel +{ + /// Address of reference. + Elf64_Addr r_offset; + /// Symbol index and type of relocation. + Elf64_Xword r_info; +} + +/** + * Relocation entry with explicit addend. + */ +struct Elf64_Rela +{ + /// Address of reference. + Elf64_Addr r_offset; + /// Symbol index and type of relocation. + Elf64_Xword r_info; + /// Constant part of expression. + Elf64_Sxword r_addend; +} + +/** + * Symbol table entry. + */ +struct Elf32_Sym +{ + /// Symbol name. + Elf32_Word st_name; + /// Symbol value. + Elf32_Addr st_value; + /// Size of object (e.g., common). + Elf32_Word st_size; + /// Type and Binding attributes. + ubyte st_info; + /// Reserved. + ubyte st_other; + /// Section table index. + Elf32_Half st_shndx; +} + +/** + * Relocation entry. + */ +struct Elf32_Rel +{ + /// Address of reference. + Elf32_Addr r_offset; + /// Symbol index and type of relocation. + Elf32_Word r_info; +} + +/** + * Relocation entry with explicit addend. + */ +struct Elf32_Rela +{ + /// Address of reference. + Elf32_Addr r_offset; + /// Symbol index and type of relocation. + Elf32_Word r_info; + /// Constant part of expression. + Elf32_Sword r_addend; +} + +/// Section Types, sh_type. +enum : Elf64_Word +{ + /// Marks an unused section header. + SHT_NULL = 0, + /// Contains information defined by the program. + SHT_PROGBITS = 1, + /// Contains a linker symbol table. + SHT_SYMTAB = 2, + /// Contains a string table. + SHT_STRTAB = 3, + /// Contains “Rela” type relocation entries. + SHT_RELA = 4, + /// Contains a symbol hash table + SHT_HASH = 5, + /// Contains dynamic linking tables + SHT_DYNAMIC = 6, + /// Contains note information + SHT_NOTE = 7, + /// Contains uninitialized space; does not occupy any space in the file. + SHT_NOBITS = 8, + /// Contains "Rel" type relocation entries. + SHT_REL = 9, + /// Reserved. + SHT_SHLIB = 10, + /// Contains a dynamic loader symbol table. + SHT_DYNSYM = 11, + /// Environment-specific use. + SHT_LOOS = 0x60000000, + SHT_HIOS = 0x6FFFFFFF, + /// Processor-specific use. + SHT_LOPROC = 0x70000000, + SHT_HIPROC = 0x7FFFFFFF, +} + +/** + * Section Attributes, sh_flags. + */ +enum : Elf64_Xword +{ + /// Section contains writable data. + SHF_WRITE = 0x1, + /// Section is allocated in memory image of program. + SHF_ALLOC = 0x2, + /// Section contains executable instructions. + SHF_EXECINSTR = 0x4, + /// Environment-specific use. + SHF_MASKOS = 0x0F000000, + /// Processor-specific use. + SHF_MASKPROC = 0xF0000000, +} + +auto ELF64_R_SYM(I)(I i) +{ + return i >> 32; +} + +auto ELF64_R_TYPE(I)(I i) +{ + return i & 0xffffffffL; +} + +auto ELF64_R_INFO(S, T)(S s, t) +{ + return (s << 32) + (t & 0xffffffffL); +} + +ubyte ELF32_ST_BIND(ubyte i) @nogc nothrow pure @safe +{ + return i >> 4; +} + +ubyte ELF32_ST_TYPE(ubyte i) @nogc nothrow pure @safe +{ + return i & 0xf; +} + +ubyte ELF32_ST_INFO(ubyte b, ubyte t) @nogc nothrow pure @safe +{ + return cast(ubyte) ((b << 4) + (t & 0xf)); +} + +T ELF32_R_SYMT(I)(I i) +{ + return i >> 8; +} + +ubyte ELF32_R_TYPE(I)(I i) +{ + return cast(ubyte) i; +} + +auto ELF32_R_INFO(S, T)(S s, T t) +{ + return (s << 8) + cast(ubyte) t; +} + +enum : uint +{ + /// Not visible outside the object file. + STB_LOCAL = 0, + /// Global symbol, visible to all object files. + STB_GLOBAL = 1, + /// Global scope, but with lower precedence than global symbols. + STB_WEAK = 2, + /// Environment-specific use. + STB_LOOS = 10, + STB_HIOS = 12, + /// Processor-specific use. + STB_LOPROC = 13, + STB_HIPROC = 15, +} + +enum : uint +{ + /// No type specified (e.g., an absolute symbol). + STT_NOTYPE = 0, + /// Data object. + STT_OBJECT = 1, + /// Function entry point. + STT_FUNC = 2, + /// Symbol is associated with a section. + STT_SECTION = 3, + /// Source file associated with the object file. + STT_FILE = 4, + /// Environment-specific use. + STT_LOOS = 10, + STT_HIOS = 12, + /// Processor-specific use. + STT_LOPROC = 13, + STT_HIPROC = 15, +} + +Elf64_Ehdr makeFileHeader(Elf64_Off sectionHeaderOffset, + Elf64_Half sectionHeaderCount, + Elf64_Half stringIndex) @nogc +{ + Elf64_Ehdr header; + + // Magic number. + header.e_ident[0] = '\x7f'; + header.e_ident[1] = 'E'; + header.e_ident[2] = 'L'; + header.e_ident[3] = 'F'; + + // File class. + header.e_ident[4] = ELFCLASS64; + + // Data encoding. + header.e_ident[5] = ELFDATA2LSB; + + // Version. + header.e_ident[6] = EV_CURRENT; + + // OS/ABI identification. + header.e_ident[7] = EI_OSABI.ELFOSABI_SYSV; + + // ABI version. + header.e_ident[8] = 0; + + // Size of e_ident[]. + header.e_ident[15] = 0; + + header.e_type = ET_REL; + header.e_machine = 0x3e; // EM_X86_64: AMD x86-64 architecture + header.e_version = EV_CURRENT; + header.e_entry = null; + header.e_phoff = 0; + header.e_shoff = sectionHeaderOffset; + header.e_flags = 0; + header.e_ehsize = Elf64_Ehdr.sizeof; + header.e_phentsize = 0; + header.e_phnum = 0; + header.e_shentsize = Elf64_Shdr.sizeof; + header.e_shnum = sectionHeaderCount; + header.e_shstrndx = stringIndex; + + return header; +} + +Elf64_Shdr makeTextHeader(Elf64_Off offset, Elf64_Xword size) @nogc +{ + Elf64_Shdr table; + + table.sh_name = 0x1b; + table.sh_type = SHT_PROGBITS; + table.sh_flags = SHF_EXECINSTR | SHF_ALLOC; + table.sh_addr = null; + table.sh_offset = offset; + table.sh_size = size; + table.sh_link = SHN_UNDEF; + table.sh_info = 0; + table.sh_addralign = 1; + table.sh_entsize = 0; + + return table; +} + +Elf64_Shdr makeSymtableHeader(Elf64_Off offset, Elf64_Xword size, Elf64_Word entriesCount) @nogc +{ + Elf64_Shdr table; + + table.sh_name = 0x01; + table.sh_type = SHT_SYMTAB; + table.sh_flags = 0; + table.sh_addr = null; + table.sh_offset = offset; + table.sh_size = size; + table.sh_link = 0x03; // String table used by entries in this section. + table.sh_info = entriesCount; + table.sh_addralign = 8; + table.sh_entsize = Elf64_Sym.sizeof; + + return table; +} + +Elf64_Shdr makeStringHeader(Elf64_Word stringIndex, Elf64_Off offset, Elf64_Xword size) @nogc +{ + Elf64_Shdr table; + + table.sh_name = stringIndex; + table.sh_type = SHT_STRTAB; + table.sh_flags = 0; + table.sh_addr = null; + table.sh_offset = offset; + table.sh_size = size; + table.sh_link = SHN_UNDEF; + table.sh_info = 0; + table.sh_addralign = 1; + table.sh_entsize = 0; + + return table; +} + +Elf64_Shdr makeInitialHeader() @nogc +{ + Elf64_Shdr table; + + table.sh_name = 0; + table.sh_type = SHT_NULL; + table.sh_flags = 0; + table.sh_addr = null; + table.sh_offset = 0; + table.sh_size = 0; + table.sh_link = SHN_UNDEF; + table.sh_info = 0; + table.sh_addralign = 0; + table.sh_entsize = 0; + + return table; +} + +Elf64_Sym makeInitialSymTable() @nogc +{ + Elf64_Sym table; + + table.st_name = 0; + table.st_info = 0; + table.st_other = 0; + table.st_shndx = 0; + table.st_value = null; + table.st_size = 0; + + return table; +} + +Elf64_Sym makeMainSymTable(Elf64_Half textIndex) @nogc +{ + Elf64_Sym table; + + table.st_name = 0x01; + table.st_info = ELF32_ST_INFO(STB_GLOBAL, STT_FUNC); + table.st_other = 0; + table.st_shndx = textIndex; + table.st_value = null; + table.st_size = 0; + + return table; +} + +/// Special Section Indices. +enum : ushort +{ + /// Used to mark an undefined or meaningless section reference. + SHN_UNDEF = 0, + /// This value specifies the lower bound of the range of reserved indexes. + SHN_LORESERVE = 0xff00, + /// Processor-specific use. + SHN_LOPROC = 0xFF00, + SHN_HIPROC = 0xFF1F, + /// Environment-specific use. + SHN_LOOS = 0xFF20, + SHN_HIOS = 0xFF3F, + /// Indicates that the corresponding reference is an absolute value. + SHN_ABS = 0xFFF1, + /** + * Indicates a symbol that has been declared as a common block (Fortran + * COMMON or C tentative declaration). + */ + SHN_COMMON = 0xFFF2, +} + +/** + * Object File Classes, e_ident[EI_CLASS]. + */ +enum : ubyte +{ + /// Invalid class. + ELFCLASSNONE = 0, + /// 32-bit objects. + ELFCLASS32 = 1, + /// 64-bit objects. + ELFCLASS64 = 2 +} + +enum : ubyte { + /// Invalid version. + EV_NONE = 0, + /// Current version. + EV_CURRENT = 1 +} + +/** + * Data Encodings, e_ident[EI_DATA]. + */ +enum : ubyte +{ + /// Object file data structures are little-endian. + ELFDATA2LSB = 1, + /// Object file data structures are big-endian. + ELFDATA2MSB = 2, +} + +/** + * Operating System and ABI Identifiers, e_ident[EI_OSABI]. + */ +enum EI_OSABI : ubyte +{ + /// System V ABI. + ELFOSABI_SYSV = 0, + /// HP-UX operating system. + ELFOSABI_HPUX = 1, + /// Standalone (embedded) application. + ELFOSABI_STANDALONE = 255, +} + +enum : Elf64_Half +{ + ET_NONE = 0, /// No file type. + ET_REL = 1, /// Relocatable object file. + ET_EXEC = 2, /// Executable file. + ET_DYN = 3, /// Shared object file. + ET_CORE = 4, /// Core file. + ET_LOOS = 0xFE00, /// Environment-specific use. + ET_HIOS = 0xFEFF, + ET_LOPROC = 0xFF00, /// Processor-specific use. + ET_HIPROC = 0xFFFF, +} + +auto pad(ubyte elfClass)(size_t value) @nogc +{ + static if (elfClass == ELFCLASS32) + { + return cast(Elf32_Word) (value / 4 + 1) * 4; + } + else static if (elfClass == ELFCLASS64) + { + return cast(Elf64_Xword) (value / 8 + 1) * 8; + } + else + { + static assert(false, "Invalid ELF class"); + } +} + +struct Symbol +{ + String name; + const(ubyte)[] text; +} + +struct Elf +{ + private Elf32_Ehdr fileHeader; + private Array!Elf32_Shdr sectionHeaders; + private Elf32_Off currentOffset = Elf32_Ehdr.sizeof; + private Array!Elf32_Sym symbols; + static immutable char[41] sections = + "\0.symtab\0.strtab\0.shstrtab\0.text\0.rodata\0"; + private String strings; + private Elf32_Word lastLocalSymbol; + private Elf32_Word textSize; + private File output; + + static Elf opCall(File output) @nogc + { + Elf elf = Elf.init; + + elf.initializeFileHeader(); + elf.initializeSectionHeaders(); + elf.insertSymbols(); + elf.output = move(output); + + elf.output.seek(Elf32_Ehdr.sizeof, File.Whence.set); + + return elf; + } + + @disable this(this); + + void finish() @nogc + { + makeTextHeader(); + initializeSymbolTable(cast(Elf32_Word) (this.sectionHeaders.length + 1)); + + foreach (symbol; this.symbols) + { + output.write((cast(ubyte*) &symbol)[0 .. Elf32_Sym.sizeof]); + this.currentOffset += Elf32_Sym.sizeof; + } + + this.sectionHeaders.insertBack(makeStringHeader(0x09, this.currentOffset, cast(Elf32_Word) strings.length)); + output.write(cast(ubyte[]) this.strings.toStringz[0 .. this.strings.length + 1]); + this.currentOffset += this.strings.length + 1; + + this.sectionHeaders.insertBack(makeStringHeader(0x11, this.currentOffset, sections.length)); + output.write(cast(const(ubyte)[]) this.sections); + this.currentOffset += this.sections.length; + auto alignment = pad!ELFCLASS32(this.strings.length + 1 + this.sections.length); + const(ubyte)[4] padding = 0; + output.write(padding[0 .. alignment - this.strings.length - 1 - this.sections.length]); + this.currentOffset += alignment - this.strings.length - 1 - this.sections.length; + + // End writing data, start writing headers. + + output.write((cast(ubyte*) this.sectionHeaders.get)[0 .. Elf32_Shdr.sizeof * this.sectionHeaders.length]); + + output.seek(0, File.Whence.set); + this.fileHeader.e_shoff = this.currentOffset; + this.fileHeader.e_shnum = cast(Elf32_Half) this.sectionHeaders.length; + // String table is the last one + this.fileHeader.e_shstrndx = cast(Elf32_Half) (this.sectionHeaders.length - 1); + output.write((cast(ubyte*) &this.fileHeader)[0 .. fileHeader.sizeof]); + } + + private void insertSymbols() @nogc + { + // Zero symbol + Elf32_Sym symbol; + symbol.st_name = 0; // Word + symbol.st_value = 0; // Addr + symbol.st_size = 0; // Word + symbol.st_info = 0; // char + symbol.st_other = 0; // char + symbol.st_shndx = 0; // Half word + this.symbols.insertBack(symbol); + // All symbols are global. + this.lastLocalSymbol = cast(Elf32_Word) this.symbols.length; + } + + private Elf32_Shdr makeStringHeader(Elf32_Word stringIndex, Elf32_Off offset, Elf32_Word size) @nogc + { + Elf32_Shdr table; + + table.sh_name = stringIndex; + table.sh_type = SHT_STRTAB; + table.sh_flags = 0; + table.sh_addr = 0; + table.sh_offset = offset; + table.sh_size = size; + table.sh_link = SHN_UNDEF; + table.sh_info = 0; + table.sh_addralign = 1; + table.sh_entsize = 0; + + return table; + } + + private void initializeSymbolTable(Elf32_Word stringTableIndex) @nogc + { + Elf32_Shdr symbolTableHeader; + + symbolTableHeader.sh_name = 0x01; + symbolTableHeader.sh_type = SHT_SYMTAB; + symbolTableHeader.sh_flags = 0; + symbolTableHeader.sh_addr = 0; + symbolTableHeader.sh_offset = this.currentOffset; + symbolTableHeader.sh_size = cast(Elf32_Word) (this.symbols.length * Elf32_Sym.sizeof); + // String table used by entries in this section. + symbolTableHeader.sh_link = stringTableIndex; + symbolTableHeader.sh_info = this.lastLocalSymbol; + symbolTableHeader.sh_addralign = 4; + symbolTableHeader.sh_entsize = Elf32_Sym.sizeof; + + this.sectionHeaders.insertBack(symbolTableHeader); + } + + void addCode(string name, ref Array!ubyte text) @nogc + { + this.output.write(text.get); + + this.strings.insertBack("\0"); + this.strings.insertBack(name[]); + + Elf32_Sym symbol; + // Main function + symbol.st_name = 0x1; // Word + symbol.st_value = 0; // Addr + symbol.st_size = cast(Elf32_Word) text.length; // Word + symbol.st_info = ELF32_ST_INFO(STB_GLOBAL, STT_FUNC); // char + symbol.st_other = 0; // char + // .text header index, half word + symbol.st_shndx = cast(Elf32_Half) this.sectionHeaders.length; + this.symbols.insertBack(symbol); + this.textSize += text.length; + } + + private void makeTextHeader() @nogc + { + Elf32_Shdr textHeader; + + textHeader.sh_name = 0x1b; + textHeader.sh_type = SHT_PROGBITS; + textHeader.sh_flags = SHF_EXECINSTR | SHF_ALLOC; + textHeader.sh_addr = 0; + textHeader.sh_offset = this.currentOffset; + textHeader.sh_size = cast(Elf32_Word) this.textSize; + textHeader.sh_link = SHN_UNDEF; + textHeader.sh_info = 0; + textHeader.sh_addralign = 1; + textHeader.sh_entsize = 0; + + this.sectionHeaders.insertBack(textHeader); + this.currentOffset += this.textSize; + } + + private void initializeSectionHeaders() @nogc + { + Elf32_Shdr table; + + table.sh_name = 0; + table.sh_type = SHT_NULL; + table.sh_flags = 0; + table.sh_addr = 0; + table.sh_offset = 0; + table.sh_size = 0; + table.sh_link = SHN_UNDEF; + table.sh_info = 0; + table.sh_addralign = 0; + table.sh_entsize = 0; + + this.sectionHeaders.insertBack(table); + } + + private void initializeFileHeader() @nogc + { + // Magic number. + this.fileHeader.e_ident[0] = '\x7f'; + this.fileHeader.e_ident[1] = 'E'; + this.fileHeader.e_ident[2] = 'L'; + this.fileHeader.e_ident[3] = 'F'; + + this.fileHeader.e_ident[4] = ELFCLASS32; + this.fileHeader.e_ident[5] = ELFDATA2LSB; + this.fileHeader.e_ident[6] = EV_CURRENT; + this.fileHeader.e_ident[7] = EI_OSABI.ELFOSABI_SYSV; + this.fileHeader.e_ident[8] = 0; + + this.fileHeader.e_type = ET_REL; + this.fileHeader.e_machine = 0xf3; // EM_RISCV + this.fileHeader.e_version = EV_CURRENT; + this.fileHeader.e_entry = 0; + this.fileHeader.e_phoff = 0; + // this.fileHeader.e_shoff = ?; (section header offset) + this.fileHeader.e_flags = 0; + this.fileHeader.e_ehsize = Elf32_Ehdr.sizeof; + this.fileHeader.e_phentsize = 0; + this.fileHeader.e_phnum = 0; + this.fileHeader.e_shentsize = Elf32_Shdr.sizeof; + // this.fileHeader.e_shnum = ?; (section header count) + // this.fileHeader.e_shstrndx = ?; (string index) + } +} diff --git a/source/elna/generator.d b/source/elna/generator.d deleted file mode 100644 index 7a28b3a..0000000 --- a/source/elna/generator.d +++ /dev/null @@ -1,770 +0,0 @@ -module elna.generator; - -import core.stdc.stdio; -import core.stdc.stdlib; -import core.stdc.string; -import elna.ir; -import elna.extended; -import std.sumtype; -import std.typecons; -import tanya.container.array; -import tanya.container.string; -import tanya.memory.mmappool; -import tanya.format; - -/// Unsigned program address. -alias Elf64_Addr = void*; -/// Unsigned file offset. -alias Elf64_Off = ulong; -/// Unsigned medium integer. -alias Elf64_Half = ushort; -/// Unsigned integer. -alias Elf64_Word = uint; -/// Signed integer. -alias Elf64_Sword = int; -/// Unsigned long integer. -alias Elf64_Xword = ulong; -/// Signed long integer. -alias Elf64_Sxword = long; - -enum size_t EI_INDENT = 16; - -/** - * File header. - */ -struct Elf64_Ehdr -{ - /// ELF identification. - ubyte[EI_INDENT] e_ident; - /// Object file type. - Elf64_Half e_type; - /// Machine type. - Elf64_Half e_machine; - /// Object file version - Elf64_Word e_version; - /// Entry point address. - Elf64_Addr e_entry; - /// Program header offset. - Elf64_Off e_phoff; - /// Section header offset. - Elf64_Off e_shoff; - /// Processor-specific flags. - Elf64_Word e_flags; - /// ELF header size. - Elf64_Half e_ehsize; - /// Size of program header entry. - Elf64_Half e_phentsize; - /// Number of program header entries. - Elf64_Half e_phnum; - /// Size of section header entry. - Elf64_Half e_shentsize; - /// Number of section header entries. - Elf64_Half e_shnum; - /// Section name string table index. - Elf64_Half e_shstrndx; -} - -/** - * Section header. - */ -struct Elf64_Shdr -{ - /// Section name. - Elf64_Word sh_name; - /// Section type. - Elf64_Word sh_type; - /// Section attributes. - Elf64_Xword sh_flags; - /// Virtual address in memory. - Elf64_Addr sh_addr; - /// Offset in file. - Elf64_Off sh_offset; - /// Size of section. - Elf64_Xword sh_size; - /// Link to other section. - Elf64_Word sh_link; - /// Miscellaneous information. - Elf64_Word sh_info; - /// Address alignment boundary. - Elf64_Xword sh_addralign; - /// Size of entries, if section has table. - Elf64_Xword sh_entsize; -} - -struct Elf64_Sym -{ - /// Symbol name. - Elf64_Word st_name; - /// Type and Binding attributes. - ubyte st_info; - /// Reserved. - ubyte st_other; - /// Section table index. - Elf64_Half st_shndx; - /// Symbol value. - Elf64_Addr st_value; - /// Size of object (e.g., common). - Elf64_Xword st_size; -} - -/// Section Types, sh_type. -enum : Elf64_Word -{ - /// Marks an unused section header. - SHT_NULL = 0, - /// Contains information defined by the program. - SHT_PROGBITS = 1, - /// Contains a linker symbol table. - SHT_SYMTAB = 2, - /// Contains a string table. - SHT_STRTAB = 3, - /// Contains “Rela” type relocation entries. - SHT_RELA = 4, - /// Contains a symbol hash table - SHT_HASH = 5, - /// Contains dynamic linking tables - SHT_DYNAMIC = 6, - /// Contains note information - SHT_NOTE = 7, - /// Contains uninitialized space; does not occupy any space in the file. - SHT_NOBITS = 8, - /// Contains "Rel" type relocation entries. - SHT_REL = 9, - /// Reserved. - SHT_SHLIB = 10, - /// Contains a dynamic loader symbol table. - SHT_DYNSYM = 11, - /// Environment-specific use. - SHT_LOOS = 0x60000000, - SHT_HIOS = 0x6FFFFFFF, - /// Processor-specific use. - SHT_LOPROC = 0x70000000, - SHT_HIPROC = 0x7FFFFFFF, -} - -/** - * Section Attributes, sh_flags. - */ -enum : Elf64_Xword -{ - /// Section contains writable data. - SHF_WRITE = 0x1, - /// Section is allocated in memory image of program. - SHF_ALLOC = 0x2, - /// Section contains executable instructions. - SHF_EXECINSTR = 0x4, - /// Environment-specific use. - SHF_MASKOS = 0x0F000000, - /// Processor-specific use. - SHF_MASKPROC = 0xF0000000, -} - -enum : Elf64_Word -{ - /// Not visible outside the object file. - STB_LOCAL = 0, - /// Global symbol, visible to all object files. - STB_GLOBAL = 1, - /// Global scope, but with lower precedence than global symbols. - STB_WEAK = 2, - /// Environment-specific use. - STB_LOOS = 10, - STB_HIOS = 12, - /// Processor-specific use. - STB_LOPROC = 13, - STB_HIPROC = 15, -} - -enum : Elf64_Word -{ - /// No type specified (e.g., an absolute symbol). - STT_NOTYPE = 0, - /// Data object. - STT_OBJECT = 1, - /// Function entry point. - STT_FUNC = 2, - /// Symbol is associated with a section. - STT_SECTION = 3, - /// Source file associated with the object file. - STT_FILE = 4, - /// Environment-specific use. - STT_LOOS = 10, - STT_HIOS = 12, - /// Processor-specific use. - STT_LOPROC = 13, - STT_HIPROC = 15, -} - -Elf64_Ehdr makeFileHeader(Elf64_Off sectionHeaderOffset, - Elf64_Half sectionHeaderCount, - Elf64_Half stringIndex) @nogc -{ - Elf64_Ehdr header; - - // Magic number. - header.e_ident[0] = '\x7f'; - header.e_ident[1] = 'E'; - header.e_ident[2] = 'L'; - header.e_ident[3] = 'F'; - - // File class. - header.e_ident[4] = EI_CLASS.ELFCLASS64; - - // Data encoding. - header.e_ident[5] = EI_DATA.ELFDATA2LSB; - - // Version. - header.e_ident[6] = EV_CURRENT; - - // OS/ABI identification. - header.e_ident[7] = EI_OSABI.ELFOSABI_SYSV; - - // ABI version. - header.e_ident[8] = 0; - - // Size of e_ident[]. - header.e_ident[15] = 0; - - header.e_type = ET_REL; - header.e_machine = 0x3e; // EM_X86_64: AMD x86-64 architecture - header.e_version = EV_CURRENT; - header.e_entry = null; - header.e_phoff = 0; - header.e_shoff = sectionHeaderOffset; - header.e_flags = 0; - header.e_ehsize = Elf64_Ehdr.sizeof; - header.e_phentsize = 0; - header.e_phnum = 0; - header.e_shentsize = Elf64_Shdr.sizeof; - header.e_shnum = sectionHeaderCount; - header.e_shstrndx = stringIndex; - - return header; -} - -enum char[33] sectionStringTable = "\0.symtab\0.strtab\0.shstrtab\0.text\0"; - -Elf64_Shdr makeTextHeader(Elf64_Off offset, Elf64_Xword size) @nogc -{ - Elf64_Shdr table; - - table.sh_name = 0x1b; - table.sh_type = SHT_PROGBITS; - table.sh_flags = SHF_EXECINSTR | SHF_ALLOC; - table.sh_addr = null; - table.sh_offset = offset; - table.sh_size = size; - table.sh_link = SHN_UNDEF; - table.sh_info = 0; - table.sh_addralign = 1; - table.sh_entsize = 0; - - return table; -} - -Elf64_Shdr makeDataHeader(Elf64_Off offset, Elf64_Xword size) @nogc -{ - Elf64_Shdr table; - - table.sh_name = 0x21; - table.sh_type = SHT_PROGBITS; - table.sh_flags = SHF_WRITE | SHF_ALLOC; - table.sh_addr = null; - table.sh_offset = offset; - table.sh_size = size; - table.sh_link = SHN_UNDEF; - table.sh_info = 0; - table.sh_addralign = 1; - table.sh_entsize = 0; - - return table; -} - -Elf64_Shdr makeSymtableHeader(Elf64_Off offset, Elf64_Xword size, Elf64_Word entriesCount) @nogc -{ - Elf64_Shdr table; - - table.sh_name = 0x01; - table.sh_type = SHT_SYMTAB; - table.sh_flags = 0; - table.sh_addr = null; - table.sh_offset = offset; - table.sh_size = size; - table.sh_link = 0x03; // String table used by entries in this section. - table.sh_info = entriesCount; - table.sh_addralign = 8; - table.sh_entsize = Elf64_Sym.sizeof; - - return table; -} - -Elf64_Shdr makeStringHeader(Elf64_Word stringIndex, Elf64_Off offset, Elf64_Xword size) @nogc -{ - Elf64_Shdr table; - - table.sh_name = stringIndex; - table.sh_type = SHT_STRTAB; - table.sh_flags = 0; - table.sh_addr = null; - table.sh_offset = offset; - table.sh_size = size; - table.sh_link = SHN_UNDEF; - table.sh_info = 0; - table.sh_addralign = 1; - table.sh_entsize = 0; - - return table; -} - -Elf64_Shdr makeInitialHeader() @nogc -{ - Elf64_Shdr table; - - table.sh_name = 0; - table.sh_type = SHT_NULL; - table.sh_flags = 0; - table.sh_addr = null; - table.sh_offset = 0; - table.sh_size = 0; - table.sh_link = SHN_UNDEF; - table.sh_info = 0; - table.sh_addralign = 0; - table.sh_entsize = 0; - - return table; -} - -Elf64_Sym makeInitialSymTable() @nogc -{ - Elf64_Sym table; - - table.st_name = 0; - table.st_info = 0; - table.st_other = 0; - table.st_shndx = 0; - table.st_value = null; - table.st_size = 0; - - return table; -} - -Elf64_Sym makeMainSymTable(Elf64_Half textIndex) @nogc -{ - Elf64_Sym table; - - table.st_name = 0x01; - table.st_info = ELF32_ST_INFO(STB_GLOBAL, STT_FUNC); - table.st_other = 0; - table.st_shndx = textIndex; - table.st_value = null; - table.st_size = 0; - - return table; -} - -ubyte ELF32_ST_BIND(ubyte i) @nogc nothrow pure @safe -{ - return i >> 4; -} - -ubyte ELF32_ST_TYPE(ubyte i) @nogc nothrow pure @safe -{ - return i & 0xf; -} - -ubyte ELF32_ST_INFO(ubyte b, ubyte t) @nogc nothrow pure @safe -{ - return cast(ubyte) ((b << 4) + (t & 0xf)); -} - -/// Special Section Indices. -enum : Elf64_Half -{ - /// Used to mark an undefined or meaningless section reference. - SHN_UNDEF = 0, - /// Processor-specific use. - SHN_LOPROC = 0xFF00, - SHN_HIPROC = 0xFF1F, - /// Environment-specific use. - SHN_LOOS = 0xFF20, - SHN_HIOS = 0xFF3F, - /// Indicates that the corresponding reference is an absolute value. - SHN_ABS = 0xFFF1, - /** - * Indicates a symbol that has been declared as a common block (Fortran - * COMMON or C tentative declaration). - */ - SHN_COMMON = 0xFFF2, -} - -/** - * Object File Classes, e_ident[EI_CLASS]. - */ -enum EI_CLASS : ubyte -{ - /// 32-bit objects. - ELFCLASS32 = 1, - /// 64-bit objects. - ELFCLASS64 = 2, -} - -enum ubyte EV_CURRENT = 1; - -/** - * Data Encodings, e_ident[EI_DATA]. - */ -enum EI_DATA : ubyte -{ - /// Object file data structures are little-endian. - ELFDATA2LSB = 1, - /// Object file data structures are big-endian. - ELFDATA2MSB = 2, -} - -/** - * Operating System and ABI Identifiers, e_ident[EI_OSABI]. - */ -enum EI_OSABI : ubyte -{ - /// System V ABI. - ELFOSABI_SYSV = 0, - /// HP-UX operating system. - ELFOSABI_HPUX = 1, - /// Standalone (embedded) application. - ELFOSABI_STANDALONE = 255, -} - -enum : Elf64_Half -{ - ET_NONE = 0, /// No file type. - ET_REL = 1, /// Relocatable object file. - ET_EXEC = 2, /// Executable file. - ET_DYN = 3, /// Shared object file. - ET_CORE = 4, /// Core file. - ET_LOOS = 0xFE00, /// Environment-specific use. - ET_HIOS = 0xFEFF, - ET_LOPROC = 0xFF00, /// Processor-specific use. - ET_HIPROC = 0xFFFF, -} - -private size_t pad(size_t value) @nogc -{ - return (value / 8 + 1) * 8; -} - -struct Symbol -{ - String name; - Array!Instruction instructions; -} - -enum Register : ubyte -{ - AX = 0, - CX = 1, - DX = 2, - BX = 3, - SP = 4, - BP = 5, - SI = 6, - DI = 7, -} - -enum MOD : ubyte -{ - indirect = 0, - one = 1, - four = 2, - direct = 3 -} - -enum Direction : ubyte -{ - registerToMemory = 0, - memoryToRegister = 1 << 1 -} - -enum Size : ubyte -{ - eight = 0, - thirtyTwo = 1 -} - -struct Instruction -{ - private ushort opcode; - private ubyte rexPrefix; - private Nullable!ubyte modrmByte; - private SumType!(typeof(null), byte, int) operand1; - - this(ushort opcode) @nogc nothrow pure @safe - { - this.opcode = opcode; - } - - this(ubyte opcode, Register register) @nogc nothrow pure @safe - { - this.opcode = opcode + register; - } - - this(ubyte opcode, Direction direction, Size size = Size.eight) - @nogc nothrow pure @safe - { - this.opcode = opcode | direction | size; - } - - ref Instruction addREXPrefix(bool w = true, bool r = false, bool x = false, bool b = false) - return @nogc nothrow pure @safe - { - this.rexPrefix = 0x40 | (w << 3) | (r << 2) | (x << 1) | b; - return this; - } - - ref Instruction addMODRMByte(MOD mode, Register register, Register rm) - return @nogc nothrow pure @safe - { - this.modrmByte = cast(ubyte) ((mode << 6) | (register << 3) | rm); - return this; - } - - ref Instruction addOperand1(int operand) - return @nogc nothrow pure @safe - { - this.operand1 = operand; - return this; - } - - ref Instruction addOperand1(byte operand) - return @nogc nothrow pure @safe - { - this.operand1 = operand; - return this; - } -} - -Array!ubyte binaryInstructions(ref Array!Instruction instructions) -@nogc nothrow -{ - Array!ubyte binary; - - foreach (ref instruction; instructions) - { - if (instruction.rexPrefix) - { - binary.insertBack((&instruction.rexPrefix)[0 .. 1]); - } - binary.insertBack((cast(ubyte*) &instruction.opcode)[0 .. 1]); - if (!instruction.modrmByte.isNull) - { - binary.insertBack((&instruction.modrmByte.get())[0 .. 1]); - } - instruction.operand1.match!( - (byte operand) { - binary.insertBack((cast(ubyte*) &operand)[0 .. 1]); - }, - (int operand) { - binary.insertBack((cast(ubyte*) &operand)[0 .. int.sizeof]); - }, - (typeof(null)) { - } - ); - } - - return binary; -} - -Array!Symbol buildInstructions(Definition ast) @nogc -{ - Array!Instruction instructions; - - // Prologue - // Opcode of pushq is “0x50 + r”, where “r” is the register opcode. - // Register opcode of %rbq is 5. - instructions.insertBack(Instruction(0x50, Register.BP)); // pushq %rbp - instructions.insertBack( // movq %rsp, %rbp - Instruction(0x89) - .addREXPrefix() - .addMODRMByte(MOD.direct, Register.SP, Register.BP) - ); - int i = 1; - foreach (statement; ast.statements[]) - { - if ((cast(Number) statement.subroutine.lhs) !is null) - { - // Opcode of mov is “0xb8 + r”, where “r” is the register opcode. - // Register opcode of %eax is 0. - instructions.insertBack( - Instruction(0xb8, Register.AX) // movl $x, %eax; where $x is a number. - .addOperand1((cast(Number) statement.subroutine.lhs).value) - ); - } - else if ((cast(Variable) statement.subroutine.lhs) !is null) - { - // movl -x(%rbp), %eax; where x is a number. - instructions.insertBack( - Instruction(0x89, Direction.memoryToRegister, Size.thirtyTwo) - .addMODRMByte(MOD.one, Register.AX, Register.BP) - ); - } - if ((cast(Number) statement.subroutine.rhs) !is null) - { - // Opcode of mov is “0xb8 + r”, where “r” is the register opcode. - // Register opcode of %ebx is 3. - instructions.insertBack( - Instruction(0xb8, Register.BX) // movl $x, %ebx; where $x is a number. - .addOperand1((cast(Number) statement.subroutine.rhs).value) - ); - } - else if ((cast(Variable) statement.subroutine.rhs) !is null) - { - // movl -x(%rbp), %ebx; where x is a number. - instructions.insertBack( - Instruction(0x89, Direction.memoryToRegister, Size.thirtyTwo) - .addMODRMByte(MOD.one, Register.BX, Register.BP) - .addOperand1(cast(byte) ((cast(Variable) statement.subroutine.rhs).counter * (-4))) - ); - } - // Calculate the result and assign it to a variable on the stack. - instructions.insertBack( - Instruction(0x00, Direction.registerToMemory, Size.thirtyTwo) - .addMODRMByte(MOD.direct, Register.BX, Register.AX) // add %ebx, %eax - ); - - instructions.insertBack( // movl %eax, -x(%rbp); where x is a number. - Instruction(0x89, Direction.registerToMemory, Size.thirtyTwo) - .addMODRMByte(MOD.one, Register.AX, Register.BP) - .addOperand1(cast(byte) (i * (-4))) - ); - ++i; - } - // Epilogue. - instructions.insertBack( // movq %rbp, %rsp - Instruction(0x89) - .addREXPrefix() - .addMODRMByte(MOD.direct, Register.BP, Register.SP) - ); - instructions.insertBack(Instruction(0x58, Register.BP)); // popq %rbp - instructions.insertBack(Instruction(0xc3)); // ret - - return typeof(return)([Symbol(String("main"), instructions)]); -} - -void writeObject(Definition ast, String outputFilename) @nogc -{ - auto handle = File.open(outputFilename.toStringz, BitFlags!(File.Mode)(File.Mode.truncate)); - - if (!handle.valid) - { - perror("writing sample"); - return; - } - - size_t currentOffset = Elf64_Ehdr.sizeof; - auto symbols = buildInstructions(ast); - - Array!Elf64_Shdr sectionHeaders = [makeInitialHeader()]; - Array!Elf64_Sym symbolEntries = [makeInitialSymTable()]; - Array!ubyte instructionSection; - ubyte[8] padding = 0; - - String stringTable = String("\0"); - foreach (symbol; symbols[]) - { - stringTable.insertBack(symbol.name[]); - stringTable.insertBack('\0'); - - auto code = binaryInstructions(symbol.instructions); - sectionHeaders.insertBack(makeTextHeader(currentOffset, code.length)); - - symbolEntries.insertBack(makeMainSymTable(cast(Elf64_Half) (sectionHeaders.length - 1))); - - immutable size_t instructionsLength = pad(code.length); - instructionSection.insertBack(code[]); - instructionSection.insertBack(padding[0 .. instructionsLength - code.length]); - - currentOffset += instructionsLength; - } - - const symbolTableSize = (symbols.length + 1) * Elf64_Sym.sizeof; - sectionHeaders.insertBack(makeSymtableHeader(currentOffset, symbolTableSize, cast(uint) symbols.length)); - currentOffset += symbolTableSize; - - sectionHeaders.insertBack(makeStringHeader(0x09, currentOffset, stringTable.length)); - currentOffset += stringTable.length; - - sectionHeaders.insertBack(makeStringHeader(0x11, currentOffset, sectionStringTable.length)); - currentOffset = pad(currentOffset + sectionStringTable.length); - - auto fileHeader = makeFileHeader(currentOffset, 5, 4); - - handle.write((cast(ubyte*) &fileHeader)[0 .. Elf64_Ehdr.sizeof]); - handle.write(instructionSection.get); - handle.write((cast(ubyte*) symbolEntries.get.ptr)[0 .. Elf64_Sym.sizeof * symbolEntries.length]); - - immutable size_t codeLength = stringTable.length + sectionStringTable.length; - handle.write(cast(ubyte[]) stringTable.get); - handle.write(cast(ubyte[]) sectionStringTable); - handle.write(padding[0 .. pad(codeLength) - codeLength]); - - handle.write((cast(ubyte*) sectionHeaders.get.ptr)[0 .. Elf64_Shdr.sizeof * sectionHeaders.length]); -} - -void generate(Definition ast, String outputFilename) @nogc -{ - auto asmTemplate = File.open(outputFilename.toStringz, BitFlags!(File.Mode)(File.Mode.truncate)); - - // Prologue - asmTemplate.write(cast(const(ubyte)[]) ".text - .globl main - .type main, @function -main: - pushq %rbp - movq %rsp, %rbp -"); - - /* Allocate space on the stack for local variables. - asmTemplate.insertBack(" sub $"); - asmTemplate.insertBack(format!"{}"(ast.statements.length)[]); - asmTemplate.insertBack(", $esp\n"); */ - - int i = 1; - foreach (statement; ast.statements[]) - { - if ((cast(Number) statement.subroutine.lhs) !is null) - { - asmTemplate.write(cast(const(ubyte)[]) " movl $"); - asmTemplate.write(cast(ubyte[]) format!"{}"((cast(Number) statement.subroutine.lhs).value).get); - asmTemplate.write(cast(const(ubyte)[]) ", %eax\n"); - } - else if ((cast(Variable) statement.subroutine.lhs) !is null) - { - asmTemplate.write(cast(const(ubyte)[]) " movl -"); - asmTemplate.write(cast(ubyte[]) format!"{}"((cast(Variable) statement.subroutine.lhs).counter * 4).get); - asmTemplate.write(cast(const(ubyte)[]) "(%rbp), %eax\n"); - } - if ((cast(Number) statement.subroutine.rhs) !is null) - { - asmTemplate.write(cast(const(ubyte)[]) " movl $"); - asmTemplate.write(cast(ubyte[]) format!"{}"((cast(Number) statement.subroutine.rhs).value).get); - asmTemplate.write(cast(const(ubyte)[]) ", %ebx\n"); - } - else if ((cast(Variable) statement.subroutine.rhs) !is null) - { - asmTemplate.write(cast(const(ubyte)[]) " movl -"); - asmTemplate.write(cast(ubyte[]) format!"{}"((cast(Variable) statement.subroutine.rhs).counter * 4).get); - asmTemplate.write(cast(const(ubyte)[]) "(%rbp), %ebx\n"); - } - // Calculate the result and assign it to a variable on the stack. - asmTemplate.write(cast(const(ubyte)[]) " add %ebx, %eax\n"); - asmTemplate.write(cast(const(ubyte)[]) " movl %eax, -"); - asmTemplate.write(cast(ubyte[]) format!"{}"(i * 4).get); - asmTemplate.write(cast(const(ubyte)[]) "(%rbp)\n"); - ++i; - } - - // Epilogue. - asmTemplate.write(cast(const(ubyte)[]) " movq %rbp, %rsp - popq %rbp - ret -"); -} diff --git a/source/elna/riscv.d b/source/elna/riscv.d new file mode 100644 index 0000000..fe6989f --- /dev/null +++ b/source/elna/riscv.d @@ -0,0 +1,254 @@ +module elna.riscv; + +import elna.extended; +import elna.ir; +import std.algorithm; +import std.typecons; +import tanya.container.array; +import tanya.container.string; + +enum XRegister : ubyte +{ + zero = 0, + ra = 1, + sp = 2, + gp = 3, + tp = 4, + t0 = 5, + t1 = 6, + t2 = 7, + s0 = 8, + s1 = 9, + a0 = 10, + a1 = 11, + a2 = 12, + a3 = 13, + a4 = 14, + a5 = 15, + a6 = 16, + a7 = 17, + s2 = 18, + s3 = 19, + s4 = 20, + s5 = 21, + s6 = 22, + s7 = 23, + s8 = 24, + s9 = 25, + s10 = 26, + s11 = 27, + t3 = 28, + t4 = 29, + t5 = 30, + t6 = 31, +} + +enum Funct3 : ubyte +{ + addi = 0b000, + slti = 0b001, + sltiu = 0b011, + andi = 0b111, + ori = 0b110, + xori = 0b100, + slli = 0b000, + srli = 0b101, + srai = 0b101, + add = 0b000, + slt = 0b010, + sltu = 0b011, + and = 0b111, + or = 0b110, + xor = 0b100, + sll = 0b001, + srl = 0b101, + sub = 0b000, + sra = 0b101, + beq = 0b000, + bne = 0b001, + blt = 0b100, + bltu = 0b110, + bge = 0b101, + bgeu = 0b111, + fence = 0b000, + fenceI = 0b001, + csrrw = 0b001, + csrrs = 0b010, + csrrc = 0b011, + csrrwi = 0b101, + csrrsi = 0b110, + csrrci = 0b111, + priv = 0b000, + sb = 0b000, + sh = 0b001, + sw = 0b010, + lb = 0b000, + lh = 0b001, + lw = 0b010, + lbu = 0b100, + lhu = 0b101, + jalr = 0b000, +} + +enum Funct12 : ubyte +{ + ecall = 0b000000000000, + ebreak = 0b000000000001, +} + +enum BaseOpcode : ubyte +{ + opImm = 0b0010011, + lui = 0b0110111, + auipc = 0b0010111, + op = 0b0110011, + jal = 0b1101111, + jalr = 0b1100111, + branch = 0b1100011, + load = 0b0000011, + store = 0b0100011, + miscMem = 0b0001111, + system = 0b1110011, +} + +struct Instruction +{ + private uint instruction; + + ref Instruction i(BaseOpcode opcode, XRegister rd, Funct3 funct3, XRegister rs1, uint immediate) + return scope @nogc + { + this.instruction = opcode + | (rd << 7) + | (funct3 << 12) + | (rs1 << 15) + | (immediate << 20); + + return this; + } + + ref Instruction s(BaseOpcode opcode, uint imm1, Funct3 funct3, XRegister rs1, XRegister rs2, uint imm2 = 0) + return scope @nogc + { + this.instruction = opcode + | (imm1 << 7) + | (funct3 << 12) + | (rs1 << 15) + | (rs2 << 20) + | (imm2 << 25); + + return this; + } + + ref Instruction r(BaseOpcode opcode, XRegister rd, Funct3 funct3, XRegister rs1, XRegister rs2, ubyte funct7 = 0) + return scope @nogc + { + this.instruction = opcode + | (rd << 7) + | (funct3 << 12) + | (rs1 << 15) + | (rs2 << 20) + | (funct7 << 25); + + return this; + } + + ubyte[] encode() return scope @nogc + { + return (cast(ubyte*) (&this.instruction))[0 .. uint.sizeof]; + } +} + +Array!ubyte writeNext(Definition ast) @nogc +{ + Array!Instruction instructions; + + // Prologue. + instructions.insertBack( + Instruction() + .i(BaseOpcode.opImm, XRegister.sp, Funct3.addi, XRegister.sp, cast(uint) -16) + ); + instructions.insertBack( + Instruction() + .s(BaseOpcode.store, 12, Funct3.sw, XRegister.sp, XRegister.s0) + ); + instructions.insertBack( + Instruction() + .i(BaseOpcode.opImm, XRegister.s0, Funct3.addi, XRegister.sp, 16) + ); + + int i = 1; + foreach (statement; ast.statements[]) + { + if ((cast(Number) statement.subroutine.lhs) !is null) + { + // Opcode of mov is “0xb8 + r”, where “r” is the register opcode. + // Register opcode of %eax is 0. + instructions.insertBack( + Instruction() // movl $x, %eax; where $x is a number. + .i(BaseOpcode.opImm, XRegister.a0, Funct3.addi, XRegister.zero, + (cast(Number) statement.subroutine.lhs).value) + ); + } + else if ((cast(Variable) statement.subroutine.lhs) !is null) + { + // movl -x(%rbp), %eax; where x is a number. + instructions.insertBack( + Instruction() + .i(BaseOpcode.load, XRegister.a0, Funct3.lw, XRegister.sp, + cast(byte) (cast(Variable) statement.subroutine.lhs).counter * 4) + ); + } + if ((cast(Number) statement.subroutine.rhs) !is null) + { + // Opcode of mov is “0xb8 + r”, where “r” is the register opcode. + // Register opcode of %ebx is 3. + instructions.insertBack( + Instruction() // movl $x, %ebx; where $x is a number. + .i(BaseOpcode.opImm, XRegister.t0, Funct3.addi, XRegister.zero, + (cast(Number) statement.subroutine.rhs).value) + ); + } + else if ((cast(Variable) statement.subroutine.rhs) !is null) + { + // movl -x(%rbp), %ebx; where x is a number. + instructions.insertBack( + Instruction() + .i(BaseOpcode.load, XRegister.t0, Funct3.lw, XRegister.sp, + cast(byte) (cast(Variable) statement.subroutine.rhs).counter * 4) + ); + } + // Calculate the result and assign it to a variable on the stack. + instructions.insertBack( + Instruction() + .r(BaseOpcode.op, XRegister.a0, Funct3.add, XRegister.a0, XRegister.t0) + ); + + instructions.insertBack( // movl %eax, -x(%rbp); where x is a number. + Instruction() + .s(BaseOpcode.store, cast(uint) (i * 4), Funct3.sw, XRegister.sp, XRegister.a0) + ); + ++i; + } + + // Prologue. + instructions.insertBack( + Instruction() + .i(BaseOpcode.load, XRegister.s0, Funct3.lw, XRegister.sp, 12) + ); + instructions.insertBack( + Instruction() + .i(BaseOpcode.opImm, XRegister.sp, Funct3.addi, XRegister.sp, 16) + ); + instructions.insertBack( + Instruction() + .i(BaseOpcode.jalr, XRegister.zero, Funct3.jalr, XRegister.ra, 0) + ); + + Array!ubyte programText; + foreach (ref instruction; instructions) + { + programText.insertBack(instruction.encode); + } + return programText; +} diff --git a/source/main.d b/source/main.d index bb912a9..dcb9fa0 100644 --- a/source/main.d +++ b/source/main.d @@ -1,35 +1,11 @@ -import core.stdc.stdio; -import core.stdc.string; -import core.stdc.stdlib; -import elna.lexer; -import elna.parser; -import elna.generator; +import elna.backend; import elna.ir; -import elna.extended; import elna.arguments; -import std.algorithm; -import std.range; +import std.path; import std.sumtype; -import std.typecons; -import tanya.container.array; import tanya.container.string; import tanya.memory.allocator; import tanya.memory.mmappool; -import tanya.os.error; - -private Nullable!String readSource(string source) @nogc -{ - enum size_t bufferSize = 255; - auto sourceFilename = String(source); - - return readFile(sourceFilename).match!( - (ErrorCode errorCode) { - perror(sourceFilename.toStringz); - return Nullable!String(); - }, - (Array!ubyte contents) => nullable(String(cast(char[]) contents.get)) - ); -} int main(string[] args) { @@ -38,48 +14,20 @@ int main(string[] args) return Arguments.parse(args).match!( (ArgumentError argumentError) => 4, (Arguments arguments) { - auto sourceText = readSource(arguments.inFile); - if (sourceText.isNull) - { - return 3; - } - auto tokens = lex(sourceText.get.get); - if (tokens.length == 0) - { - printf("Lexical analysis failed.\n"); - return 1; - } - auto ast = parse(tokens); - if (!ast.valid) - { - auto compileError = ast.error.get; - printf("%lu:%lu: %s\n", compileError.line, compileError.column, compileError.message.ptr); - return 2; - } - auto ir = transform(ast.result); - String outputFilename; if (arguments.output is null) { - auto slashIndex = max(0, arguments.inFile.retro.countUntil('/')); - - outputFilename.insertBack(arguments.inFile[$ - slashIndex .. $ - 4]); - outputFilename.insertBack(arguments.assembler ? "s" : "o"); + outputFilename = arguments + .inFile + .baseName + .withExtension("o"); } else { outputFilename = String(arguments.output); } - if (arguments.assembler) - { - generate(ir, outputFilename); - } - else - { - writeObject(ir, outputFilename); - } - return 0; + return generate(arguments.inFile, outputFilename); } ); } diff --git a/tests/const_list.elna b/tests/const_list.eln similarity index 100% rename from tests/const_list.elna rename to tests/const_list.eln diff --git a/tests/expectations/left_nested_sum.txt b/tests/expectations/left_nested_sum.txt new file mode 100644 index 0000000..45a4fb7 --- /dev/null +++ b/tests/expectations/left_nested_sum.txt @@ -0,0 +1 @@ +8 diff --git a/tests/left_nested_sum.eln b/tests/left_nested_sum.eln new file mode 100644 index 0000000..93fb29b --- /dev/null +++ b/tests/left_nested_sum.eln @@ -0,0 +1,2 @@ +! + (+ 3 4) 1 +. diff --git a/tests/sum.elna b/tests/sum.eln similarity index 100% rename from tests/sum.elna rename to tests/sum.eln diff --git a/tests/sums.elna b/tests/sums.eln similarity index 100% rename from tests/sums.elna rename to tests/sums.eln