From 77857ad118e82e35a9f08524962eee70bcc15296 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Wed, 8 Jun 2022 08:34:44 +0200 Subject: [PATCH] Add intermediate assembler representation --- Rakefile | 23 ++- source/elna/arguments.d | 22 ++- source/elna/generator.d | 326 +++++++++++++++++++++++++++------------- source/main.d | 77 ++++++---- 4 files changed, 295 insertions(+), 153 deletions(-) diff --git a/Rakefile b/Rakefile index 1ba8f77..ca2ae0b 100644 --- a/Rakefile +++ b/Rakefile @@ -17,18 +17,25 @@ directory 'build' CLEAN.include 'build' CLEAN.include '.dub' -rule(/build\/tests\/[^\/\.]+$/ => ->(file) { test_for_out(file) }) do |t| - sh 'gcc', '-o', t.name, "#{t.name}.o" +rule(/build\/tests\/[^\/\.]+$/ => ->(file) { test_for_out(file, '.o') }) do |t| + sh 'ld.gold', '-L/usr/lib64', + '--dynamic-linker', '/lib64/ld-linux-x86-64.so.2', + '-o', t.name, + '/usr/lib64/crt1.o', '/usr/lib64/crti.o', '-lc', t.source, '/usr/lib64/crtn.o' end -rule(/build\/asm\/[^\/\.]+$/ => ->(file) { test_for_object(file) }) do |t| - Pathname.new(t.name).dirname.mkpath - Open3.pipeline [BINARY, t.source], ['gcc', '-x', 'assembler', '-o', t.name, '-'] +rule(/build\/asm\/[^\/\.]+$/ => ->(file) { test_for_out(file, '.s') }) do |t| + sh 'gcc', '-x', 'assembler', '-o', t.name, t.source end rule(/build\/tests\/.+\.o$/ => ->(file) { test_for_object(file) }) do |t| Pathname.new(t.name).dirname.mkpath - sh BINARY, t.source + sh BINARY, '-o', t.name, t.source +end + +rule(/build\/asm\/.+\.s$/ => ->(file) { test_for_object(file) }) do |t| + Pathname.new(t.name).dirname.mkpath + sh BINARY, '-s', '-o', t.name, t.source end file BINARY => SOURCES do |t| @@ -70,9 +77,9 @@ def test_for_object(out_file) [test_source, BINARY] end -def test_for_out(out_file) +def test_for_out(out_file, extension) Pathname .new(out_file) - .sub_ext('.o') + .sub_ext(extension) .to_path end diff --git a/source/elna/arguments.d b/source/elna/arguments.d index 5e14dd8..1ad928a 100644 --- a/source/elna/arguments.d +++ b/source/elna/arguments.d @@ -13,6 +13,7 @@ struct ArgumentError { expectedOutputFile, noInput, + superfluousArguments, } private Type type_; @@ -50,11 +51,11 @@ struct Arguments { private bool assembler_; private string output_; - private string[] inFiles_; + private string inFile_; - @property string[] inFiles() @nogc nothrow pure @safe + @property string inFile() @nogc nothrow pure @safe { - return this.inFiles_; + return this.inFile_; } /** @@ -95,10 +96,17 @@ struct Arguments return parseArguments(arguments).match!( (Arguments parsed) { - if (parsed.inFiles.empty) + if (parsed.inFile is null) { return ReturnType(ArgumentError(ArgumentError.Type.noInput)); } + else if (!arguments.empty) + { + return ReturnType(ArgumentError( + ArgumentError.Type.superfluousArguments, + arguments.front + )); + } return ReturnType(parsed); }, (ArgumentError argumentError) => ReturnType(argumentError) @@ -131,13 +139,13 @@ struct Arguments else if (arguments.front == "--") { arguments.popFront; - parsed.inFiles_ = arguments; + parsed.inFile_ = arguments.front; + arguments.popFront; break; } else if (!arguments.front.startsWith("-")) { - parsed.inFiles_ = arguments; - break; + parsed.inFile_ = arguments.front; } arguments.popFront; } diff --git a/source/elna/generator.d b/source/elna/generator.d index ce2b3d1..7a28b3a 100644 --- a/source/elna/generator.d +++ b/source/elna/generator.d @@ -4,6 +4,9 @@ import core.stdc.stdio; import core.stdc.stdlib; import core.stdc.string; import elna.ir; +import elna.extended; +import std.sumtype; +import std.typecons; import tanya.container.array; import tanya.container.string; import tanya.memory.mmappool; @@ -452,56 +455,137 @@ private size_t pad(size_t value) @nogc struct Symbol { String name; - Array!ubyte instructions; + Array!Instruction instructions; } -/* -.text - .globl main - .type main, @function -main: - movl $3, %eax - ret -*/ -immutable ubyte[] instructions = [ - // Opcode of pushq is “0x50 + r”, where “r” is the register opcode. - // Register opcode of %rbq is 5. - 0x50 + 5, // push% %rbp - 0x48, 0x89, 0xe5, // movq %rsp, %rbp - - 0xb8, 0x03, 0x00, 0x00, 0x00, // movl $3, %eax - - // Epilogue. - 0x48, 0x89, 0xec, // movq %rbp, %rsp - 0x58 + 5, // popq %rbp - 0xc3, // ret -]; - -void writeObject(Definition ast, String outputFilename) @nogc +enum Register : ubyte { - auto handle = fopen(outputFilename.toStringz, "wb"); + AX = 0, + CX = 1, + DX = 2, + BX = 3, + SP = 4, + BP = 5, + SI = 6, + DI = 7, +} - if (handle is null) +enum MOD : ubyte +{ + indirect = 0, + one = 1, + four = 2, + direct = 3 +} + +enum Direction : ubyte +{ + registerToMemory = 0, + memoryToRegister = 1 << 1 +} + +enum Size : ubyte +{ + eight = 0, + thirtyTwo = 1 +} + +struct Instruction +{ + private ushort opcode; + private ubyte rexPrefix; + private Nullable!ubyte modrmByte; + private SumType!(typeof(null), byte, int) operand1; + + this(ushort opcode) @nogc nothrow pure @safe { - perror("writing sample"); - return; - } - scope (exit) - { - fclose(handle); + this.opcode = opcode; } - size_t currentOffset = Elf64_Ehdr.sizeof; - Array!Elf64_Shdr sectionHeaders; - Array!Elf64_Sym symbolEntries; + this(ubyte opcode, Register register) @nogc nothrow pure @safe + { + this.opcode = opcode + register; + } + + this(ubyte opcode, Direction direction, Size size = Size.eight) + @nogc nothrow pure @safe + { + this.opcode = opcode | direction | size; + } + + ref Instruction addREXPrefix(bool w = true, bool r = false, bool x = false, bool b = false) + return @nogc nothrow pure @safe + { + this.rexPrefix = 0x40 | (w << 3) | (r << 2) | (x << 1) | b; + return this; + } + + ref Instruction addMODRMByte(MOD mode, Register register, Register rm) + return @nogc nothrow pure @safe + { + this.modrmByte = cast(ubyte) ((mode << 6) | (register << 3) | rm); + return this; + } + + ref Instruction addOperand1(int operand) + return @nogc nothrow pure @safe + { + this.operand1 = operand; + return this; + } + + ref Instruction addOperand1(byte operand) + return @nogc nothrow pure @safe + { + this.operand1 = operand; + return this; + } +} + +Array!ubyte binaryInstructions(ref Array!Instruction instructions) +@nogc nothrow +{ + Array!ubyte binary; + + foreach (ref instruction; instructions) + { + if (instruction.rexPrefix) + { + binary.insertBack((&instruction.rexPrefix)[0 .. 1]); + } + binary.insertBack((cast(ubyte*) &instruction.opcode)[0 .. 1]); + if (!instruction.modrmByte.isNull) + { + binary.insertBack((&instruction.modrmByte.get())[0 .. 1]); + } + instruction.operand1.match!( + (byte operand) { + binary.insertBack((cast(ubyte*) &operand)[0 .. 1]); + }, + (int operand) { + binary.insertBack((cast(ubyte*) &operand)[0 .. int.sizeof]); + }, + (typeof(null)) { + } + ); + } + + return binary; +} + +Array!Symbol buildInstructions(Definition ast) @nogc +{ + Array!Instruction instructions; // Prologue - Array!ubyte asmTemplate = Array!ubyte(cast(ubyte[]) [ - // Opcode of pushq is “0x50 + r”, where “r” is the register opcode. - // Register opcode of %rbq is 5. - 0x50 + 5, // pushq %rbp - 0x48, 0x89, 0xe5, // movq %rsp, %rbp - ]); + // Opcode of pushq is “0x50 + r”, where “r” is the register opcode. + // Register opcode of %rbq is 5. + instructions.insertBack(Instruction(0x50, Register.BP)); // pushq %rbp + instructions.insertBack( // movq %rsp, %rbp + Instruction(0x89) + .addREXPrefix() + .addMODRMByte(MOD.direct, Register.SP, Register.BP) + ); int i = 1; foreach (statement; ast.statements[]) { @@ -509,49 +593,79 @@ void writeObject(Definition ast, String outputFilename) @nogc { // Opcode of mov is “0xb8 + r”, where “r” is the register opcode. // Register opcode of %eax is 0. - asmTemplate.insertBack(cast(ubyte) 0xb8); // movl $x, %eax; where $x is a number. - asmTemplate.insertBack((cast(ubyte*) &(cast(Number) statement.subroutine.lhs).value)[0 .. int.sizeof]); + instructions.insertBack( + Instruction(0xb8, Register.AX) // movl $x, %eax; where $x is a number. + .addOperand1((cast(Number) statement.subroutine.lhs).value) + ); } else if ((cast(Variable) statement.subroutine.lhs) !is null) { - // movl -x(%rbp), %ebx; where x is a number. - asmTemplate.insertBack(cast(ubyte[]) [0x8b, 0x45]); - const disposition = (cast(Variable) statement.subroutine.lhs).counter * (-4); - asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]); + // movl -x(%rbp), %eax; where x is a number. + instructions.insertBack( + Instruction(0x89, Direction.memoryToRegister, Size.thirtyTwo) + .addMODRMByte(MOD.one, Register.AX, Register.BP) + ); } if ((cast(Number) statement.subroutine.rhs) !is null) { // Opcode of mov is “0xb8 + r”, where “r” is the register opcode. // Register opcode of %ebx is 3. - asmTemplate.insertBack(cast(ubyte) 0xbb); // movl $x, %ebx; where $x is a number. - asmTemplate.insertBack((cast(ubyte*) &(cast(Number) statement.subroutine.rhs).value)[0 .. int.sizeof]); + instructions.insertBack( + Instruction(0xb8, Register.BX) // movl $x, %ebx; where $x is a number. + .addOperand1((cast(Number) statement.subroutine.rhs).value) + ); } else if ((cast(Variable) statement.subroutine.rhs) !is null) { // movl -x(%rbp), %ebx; where x is a number. - asmTemplate.insertBack(cast(ubyte[]) [0x8b, 0x5d]); - const disposition = (cast(Variable) statement.subroutine.rhs).counter * (-4); - asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]); + instructions.insertBack( + Instruction(0x89, Direction.memoryToRegister, Size.thirtyTwo) + .addMODRMByte(MOD.one, Register.BX, Register.BP) + .addOperand1(cast(byte) ((cast(Variable) statement.subroutine.rhs).counter * (-4))) + ); } // Calculate the result and assign it to a variable on the stack. - asmTemplate.insertBack(cast(ubyte[]) [0x01, 0xd8]); // add %ebx, %eax + instructions.insertBack( + Instruction(0x00, Direction.registerToMemory, Size.thirtyTwo) + .addMODRMByte(MOD.direct, Register.BX, Register.AX) // add %ebx, %eax + ); - asmTemplate.insertBack(cast(ubyte[]) [0x89, 0x45]); // movl %eax, -x(%rbp); where x is a number. - const disposition = i * (-4); - asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]); + instructions.insertBack( // movl %eax, -x(%rbp); where x is a number. + Instruction(0x89, Direction.registerToMemory, Size.thirtyTwo) + .addMODRMByte(MOD.one, Register.AX, Register.BP) + .addOperand1(cast(byte) (i * (-4))) + ); ++i; } // Epilogue. - asmTemplate.insertBack(cast(ubyte[]) [ - 0x48, 0x89, 0xec, // movq %rbp, %rsp - 0x58 + 5, // popq %rbp - 0xc3, // ret - ]); + instructions.insertBack( // movq %rbp, %rsp + Instruction(0x89) + .addREXPrefix() + .addMODRMByte(MOD.direct, Register.BP, Register.SP) + ); + instructions.insertBack(Instruction(0x58, Register.BP)); // popq %rbp + instructions.insertBack(Instruction(0xc3)); // ret - Symbol[1] symbols = [Symbol(String("main"), asmTemplate)]; + return typeof(return)([Symbol(String("main"), instructions)]); +} - sectionHeaders.insertBack(makeInitialHeader()); - symbolEntries.insertBack(makeInitialSymTable()); +void writeObject(Definition ast, String outputFilename) @nogc +{ + auto handle = File.open(outputFilename.toStringz, BitFlags!(File.Mode)(File.Mode.truncate)); + + if (!handle.valid) + { + perror("writing sample"); + return; + } + + size_t currentOffset = Elf64_Ehdr.sizeof; + auto symbols = buildInstructions(ast); + + Array!Elf64_Shdr sectionHeaders = [makeInitialHeader()]; + Array!Elf64_Sym symbolEntries = [makeInitialSymTable()]; + Array!ubyte instructionSection; + ubyte[8] padding = 0; String stringTable = String("\0"); foreach (symbol; symbols[]) @@ -559,14 +673,20 @@ void writeObject(Definition ast, String outputFilename) @nogc stringTable.insertBack(symbol.name[]); stringTable.insertBack('\0'); - sectionHeaders.insertBack(makeTextHeader(currentOffset, symbol.instructions.length)); - currentOffset = pad(currentOffset + symbol.instructions.length); + auto code = binaryInstructions(symbol.instructions); + sectionHeaders.insertBack(makeTextHeader(currentOffset, code.length)); symbolEntries.insertBack(makeMainSymTable(cast(Elf64_Half) (sectionHeaders.length - 1))); + + immutable size_t instructionsLength = pad(code.length); + instructionSection.insertBack(code[]); + instructionSection.insertBack(padding[0 .. instructionsLength - code.length]); + + currentOffset += instructionsLength; } const symbolTableSize = (symbols.length + 1) * Elf64_Sym.sizeof; - sectionHeaders.insertBack(makeSymtableHeader(currentOffset, symbolTableSize, symbols.length)); + sectionHeaders.insertBack(makeSymtableHeader(currentOffset, symbolTableSize, cast(uint) symbols.length)); currentOffset += symbolTableSize; sectionHeaders.insertBack(makeStringHeader(0x09, currentOffset, stringTable.length)); @@ -577,38 +697,30 @@ void writeObject(Definition ast, String outputFilename) @nogc auto fileHeader = makeFileHeader(currentOffset, 5, 4); - version (none) - { - printf("%.2x\n", cast(uint) currentOffset); - } - ubyte[8] padding = 0; - size_t codeLength = stringTable.length + sectionStringTable.length; + handle.write((cast(ubyte*) &fileHeader)[0 .. Elf64_Ehdr.sizeof]); + handle.write(instructionSection.get); + handle.write((cast(ubyte*) symbolEntries.get.ptr)[0 .. Elf64_Sym.sizeof * symbolEntries.length]); - fwrite(&fileHeader, 8, Elf64_Ehdr.sizeof / 8, handle); - foreach (symbol; symbols[]) - { - immutable size_t instructionsLength = pad(symbol.instructions.length); - fwrite(symbol.instructions.get.ptr, 1, symbol.instructions.length, handle); - fwrite(padding.ptr, 1, instructionsLength - symbol.instructions.length, handle); - codeLength += instructionsLength; - } - fwrite(symbolEntries.get.ptr, Elf64_Sym.sizeof, symbolEntries.length, handle); - fwrite(stringTable.get.ptr, 1, stringTable.length, handle); - fwrite(sectionStringTable.ptr, 1, sectionStringTable.length, handle); - fwrite(padding.ptr, pad(codeLength) - codeLength, 1, handle); - fwrite(sectionHeaders.get.ptr, Elf64_Shdr.sizeof, sectionHeaders.length, handle); + immutable size_t codeLength = stringTable.length + sectionStringTable.length; + handle.write(cast(ubyte[]) stringTable.get); + handle.write(cast(ubyte[]) sectionStringTable); + handle.write(padding[0 .. pad(codeLength) - codeLength]); + + handle.write((cast(ubyte*) sectionHeaders.get.ptr)[0 .. Elf64_Shdr.sizeof * sectionHeaders.length]); } -String generate(Definition ast) @nogc +void generate(Definition ast, String outputFilename) @nogc { + auto asmTemplate = File.open(outputFilename.toStringz, BitFlags!(File.Mode)(File.Mode.truncate)); + // Prologue - String asmTemplate = ".text + asmTemplate.write(cast(const(ubyte)[]) ".text .globl main .type main, @function main: pushq %rbp movq %rsp, %rbp -"; +"); /* Allocate space on the stack for local variables. asmTemplate.insertBack(" sub $"); @@ -620,41 +732,39 @@ main: { if ((cast(Number) statement.subroutine.lhs) !is null) { - asmTemplate.insertBack(" movl $"); - asmTemplate.insertBack(format!"{}"((cast(Number) statement.subroutine.lhs).value)[]); - asmTemplate.insertBack(", %eax\n"); + asmTemplate.write(cast(const(ubyte)[]) " movl $"); + asmTemplate.write(cast(ubyte[]) format!"{}"((cast(Number) statement.subroutine.lhs).value).get); + asmTemplate.write(cast(const(ubyte)[]) ", %eax\n"); } else if ((cast(Variable) statement.subroutine.lhs) !is null) { - asmTemplate.insertBack(" movl -"); - asmTemplate.insertBack(format!"{}"((cast(Variable) statement.subroutine.lhs).counter * 4)[]); - asmTemplate.insertBack("(%rbp), %eax\n"); + asmTemplate.write(cast(const(ubyte)[]) " movl -"); + asmTemplate.write(cast(ubyte[]) format!"{}"((cast(Variable) statement.subroutine.lhs).counter * 4).get); + asmTemplate.write(cast(const(ubyte)[]) "(%rbp), %eax\n"); } if ((cast(Number) statement.subroutine.rhs) !is null) { - asmTemplate.insertBack(" movl $"); - asmTemplate.insertBack(format!"{}"((cast(Number) statement.subroutine.rhs).value)[]); - asmTemplate.insertBack(", %ebx\n"); + asmTemplate.write(cast(const(ubyte)[]) " movl $"); + asmTemplate.write(cast(ubyte[]) format!"{}"((cast(Number) statement.subroutine.rhs).value).get); + asmTemplate.write(cast(const(ubyte)[]) ", %ebx\n"); } else if ((cast(Variable) statement.subroutine.rhs) !is null) { - asmTemplate.insertBack(" movl -"); - asmTemplate.insertBack(format!"{}"((cast(Variable) statement.subroutine.rhs).counter * 4)[]); - asmTemplate.insertBack("(%rbp), %ebx\n"); + asmTemplate.write(cast(const(ubyte)[]) " movl -"); + asmTemplate.write(cast(ubyte[]) format!"{}"((cast(Variable) statement.subroutine.rhs).counter * 4).get); + asmTemplate.write(cast(const(ubyte)[]) "(%rbp), %ebx\n"); } // Calculate the result and assign it to a variable on the stack. - asmTemplate.insertBack(" add %ebx, %eax\n"); - asmTemplate.insertBack(" movl %eax, -"); - asmTemplate.insertBack(format!"{}"(i * 4)[]); - asmTemplate.insertBack("(%rbp)\n"); + asmTemplate.write(cast(const(ubyte)[]) " add %ebx, %eax\n"); + asmTemplate.write(cast(const(ubyte)[]) " movl %eax, -"); + asmTemplate.write(cast(ubyte[]) format!"{}"(i * 4).get); + asmTemplate.write(cast(const(ubyte)[]) "(%rbp)\n"); ++i; } // Epilogue. - asmTemplate.insertBack(" movq %rbp, %rsp + asmTemplate.write(cast(const(ubyte)[]) " movq %rbp, %rsp popq %rbp ret "); - - return asmTemplate; } diff --git a/source/main.d b/source/main.d index ccf390a..bb912a9 100644 --- a/source/main.d +++ b/source/main.d @@ -6,6 +6,9 @@ import elna.parser; import elna.generator; import elna.ir; import elna.extended; +import elna.arguments; +import std.algorithm; +import std.range; import std.sumtype; import std.typecons; import tanya.container.array; @@ -32,37 +35,51 @@ int main(string[] args) { defaultAllocator = MmapPool.instance; - if (args.length < 2) - { - return 4; - } - auto sourceText = readSource(args[1]); - if (sourceText.isNull) - { - return 3; - } - auto tokens = lex(sourceText.get.get); - if (tokens.length == 0) - { - printf("Lexical analysis failed.\n"); - return 1; - } - auto ast = parse(tokens); - if (!ast.valid) - { - auto compileError = ast.error.get; - printf("%lu:%lu: %s\n", compileError.line, compileError.column, compileError.message.ptr); - return 2; - } - auto ir = transform(ast.result); + return Arguments.parse(args).match!( + (ArgumentError argumentError) => 4, + (Arguments arguments) { + auto sourceText = readSource(arguments.inFile); + if (sourceText.isNull) + { + return 3; + } + auto tokens = lex(sourceText.get.get); + if (tokens.length == 0) + { + printf("Lexical analysis failed.\n"); + return 1; + } + auto ast = parse(tokens); + if (!ast.valid) + { + auto compileError = ast.error.get; + printf("%lu:%lu: %s\n", compileError.line, compileError.column, compileError.message.ptr); + return 2; + } + auto ir = transform(ast.result); - String outputFilename = String("build/"); - outputFilename.insertBack(args[1][0 .. $ - 4]); - outputFilename.insertBack("o"); - writeObject(ir, outputFilename); + String outputFilename; + if (arguments.output is null) + { + auto slashIndex = max(0, arguments.inFile.retro.countUntil('/')); - auto code = generate(ir); - printf("%s", code.toStringz()); + outputFilename.insertBack(arguments.inFile[$ - slashIndex .. $ - 4]); + outputFilename.insertBack(arguments.assembler ? "s" : "o"); + } + else + { + outputFilename = String(arguments.output); + } + if (arguments.assembler) + { + generate(ir, outputFilename); + } + else + { + writeObject(ir, outputFilename); + } - return 0; + return 0; + } + ); }