Add intermediate assembler representation

This commit is contained in:
Eugen Wissner 2022-06-08 08:34:44 +02:00
parent 473cd4e498
commit 77857ad118
Signed by: belka
GPG Key ID: A27FDC1E8EE902C0
4 changed files with 295 additions and 153 deletions

View File

@ -17,18 +17,25 @@ directory 'build'
CLEAN.include 'build' CLEAN.include 'build'
CLEAN.include '.dub' CLEAN.include '.dub'
rule(/build\/tests\/[^\/\.]+$/ => ->(file) { test_for_out(file) }) do |t| rule(/build\/tests\/[^\/\.]+$/ => ->(file) { test_for_out(file, '.o') }) do |t|
sh 'gcc', '-o', t.name, "#{t.name}.o" sh 'ld.gold', '-L/usr/lib64',
'--dynamic-linker', '/lib64/ld-linux-x86-64.so.2',
'-o', t.name,
'/usr/lib64/crt1.o', '/usr/lib64/crti.o', '-lc', t.source, '/usr/lib64/crtn.o'
end end
rule(/build\/asm\/[^\/\.]+$/ => ->(file) { test_for_object(file) }) do |t| rule(/build\/asm\/[^\/\.]+$/ => ->(file) { test_for_out(file, '.s') }) do |t|
Pathname.new(t.name).dirname.mkpath sh 'gcc', '-x', 'assembler', '-o', t.name, t.source
Open3.pipeline [BINARY, t.source], ['gcc', '-x', 'assembler', '-o', t.name, '-']
end end
rule(/build\/tests\/.+\.o$/ => ->(file) { test_for_object(file) }) do |t| rule(/build\/tests\/.+\.o$/ => ->(file) { test_for_object(file) }) do |t|
Pathname.new(t.name).dirname.mkpath Pathname.new(t.name).dirname.mkpath
sh BINARY, t.source sh BINARY, '-o', t.name, t.source
end
rule(/build\/asm\/.+\.s$/ => ->(file) { test_for_object(file) }) do |t|
Pathname.new(t.name).dirname.mkpath
sh BINARY, '-s', '-o', t.name, t.source
end end
file BINARY => SOURCES do |t| file BINARY => SOURCES do |t|
@ -70,9 +77,9 @@ def test_for_object(out_file)
[test_source, BINARY] [test_source, BINARY]
end end
def test_for_out(out_file) def test_for_out(out_file, extension)
Pathname Pathname
.new(out_file) .new(out_file)
.sub_ext('.o') .sub_ext(extension)
.to_path .to_path
end end

View File

@ -13,6 +13,7 @@ struct ArgumentError
{ {
expectedOutputFile, expectedOutputFile,
noInput, noInput,
superfluousArguments,
} }
private Type type_; private Type type_;
@ -50,11 +51,11 @@ struct Arguments
{ {
private bool assembler_; private bool assembler_;
private string output_; private string output_;
private string[] inFiles_; private string inFile_;
@property string[] inFiles() @nogc nothrow pure @safe @property string inFile() @nogc nothrow pure @safe
{ {
return this.inFiles_; return this.inFile_;
} }
/** /**
@ -95,10 +96,17 @@ struct Arguments
return parseArguments(arguments).match!( return parseArguments(arguments).match!(
(Arguments parsed) { (Arguments parsed) {
if (parsed.inFiles.empty) if (parsed.inFile is null)
{ {
return ReturnType(ArgumentError(ArgumentError.Type.noInput)); return ReturnType(ArgumentError(ArgumentError.Type.noInput));
} }
else if (!arguments.empty)
{
return ReturnType(ArgumentError(
ArgumentError.Type.superfluousArguments,
arguments.front
));
}
return ReturnType(parsed); return ReturnType(parsed);
}, },
(ArgumentError argumentError) => ReturnType(argumentError) (ArgumentError argumentError) => ReturnType(argumentError)
@ -131,13 +139,13 @@ struct Arguments
else if (arguments.front == "--") else if (arguments.front == "--")
{ {
arguments.popFront; arguments.popFront;
parsed.inFiles_ = arguments; parsed.inFile_ = arguments.front;
arguments.popFront;
break; break;
} }
else if (!arguments.front.startsWith("-")) else if (!arguments.front.startsWith("-"))
{ {
parsed.inFiles_ = arguments; parsed.inFile_ = arguments.front;
break;
} }
arguments.popFront; arguments.popFront;
} }

View File

@ -4,6 +4,9 @@ import core.stdc.stdio;
import core.stdc.stdlib; import core.stdc.stdlib;
import core.stdc.string; import core.stdc.string;
import elna.ir; import elna.ir;
import elna.extended;
import std.sumtype;
import std.typecons;
import tanya.container.array; import tanya.container.array;
import tanya.container.string; import tanya.container.string;
import tanya.memory.mmappool; import tanya.memory.mmappool;
@ -452,56 +455,137 @@ private size_t pad(size_t value) @nogc
struct Symbol struct Symbol
{ {
String name; String name;
Array!ubyte instructions; Array!Instruction instructions;
} }
/* enum Register : ubyte
.text
.globl main
.type main, @function
main:
movl $3, %eax
ret
*/
immutable ubyte[] instructions = [
// Opcode of pushq is “0x50 + r”, where “r” is the register opcode.
// Register opcode of %rbq is 5.
0x50 + 5, // push% %rbp
0x48, 0x89, 0xe5, // movq %rsp, %rbp
0xb8, 0x03, 0x00, 0x00, 0x00, // movl $3, %eax
// Epilogue.
0x48, 0x89, 0xec, // movq %rbp, %rsp
0x58 + 5, // popq %rbp
0xc3, // ret
];
void writeObject(Definition ast, String outputFilename) @nogc
{ {
auto handle = fopen(outputFilename.toStringz, "wb"); AX = 0,
CX = 1,
DX = 2,
BX = 3,
SP = 4,
BP = 5,
SI = 6,
DI = 7,
}
if (handle is null) enum MOD : ubyte
{
indirect = 0,
one = 1,
four = 2,
direct = 3
}
enum Direction : ubyte
{
registerToMemory = 0,
memoryToRegister = 1 << 1
}
enum Size : ubyte
{
eight = 0,
thirtyTwo = 1
}
struct Instruction
{
private ushort opcode;
private ubyte rexPrefix;
private Nullable!ubyte modrmByte;
private SumType!(typeof(null), byte, int) operand1;
this(ushort opcode) @nogc nothrow pure @safe
{ {
perror("writing sample"); this.opcode = opcode;
return;
}
scope (exit)
{
fclose(handle);
} }
size_t currentOffset = Elf64_Ehdr.sizeof; this(ubyte opcode, Register register) @nogc nothrow pure @safe
Array!Elf64_Shdr sectionHeaders; {
Array!Elf64_Sym symbolEntries; this.opcode = opcode + register;
}
this(ubyte opcode, Direction direction, Size size = Size.eight)
@nogc nothrow pure @safe
{
this.opcode = opcode | direction | size;
}
ref Instruction addREXPrefix(bool w = true, bool r = false, bool x = false, bool b = false)
return @nogc nothrow pure @safe
{
this.rexPrefix = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
return this;
}
ref Instruction addMODRMByte(MOD mode, Register register, Register rm)
return @nogc nothrow pure @safe
{
this.modrmByte = cast(ubyte) ((mode << 6) | (register << 3) | rm);
return this;
}
ref Instruction addOperand1(int operand)
return @nogc nothrow pure @safe
{
this.operand1 = operand;
return this;
}
ref Instruction addOperand1(byte operand)
return @nogc nothrow pure @safe
{
this.operand1 = operand;
return this;
}
}
Array!ubyte binaryInstructions(ref Array!Instruction instructions)
@nogc nothrow
{
Array!ubyte binary;
foreach (ref instruction; instructions)
{
if (instruction.rexPrefix)
{
binary.insertBack((&instruction.rexPrefix)[0 .. 1]);
}
binary.insertBack((cast(ubyte*) &instruction.opcode)[0 .. 1]);
if (!instruction.modrmByte.isNull)
{
binary.insertBack((&instruction.modrmByte.get())[0 .. 1]);
}
instruction.operand1.match!(
(byte operand) {
binary.insertBack((cast(ubyte*) &operand)[0 .. 1]);
},
(int operand) {
binary.insertBack((cast(ubyte*) &operand)[0 .. int.sizeof]);
},
(typeof(null)) {
}
);
}
return binary;
}
Array!Symbol buildInstructions(Definition ast) @nogc
{
Array!Instruction instructions;
// Prologue // Prologue
Array!ubyte asmTemplate = Array!ubyte(cast(ubyte[]) [ // Opcode of pushq is “0x50 + r”, where “r” is the register opcode.
// Opcode of pushq is “0x50 + r”, where “r” is the register opcode. // Register opcode of %rbq is 5.
// Register opcode of %rbq is 5. instructions.insertBack(Instruction(0x50, Register.BP)); // pushq %rbp
0x50 + 5, // pushq %rbp instructions.insertBack( // movq %rsp, %rbp
0x48, 0x89, 0xe5, // movq %rsp, %rbp Instruction(0x89)
]); .addREXPrefix()
.addMODRMByte(MOD.direct, Register.SP, Register.BP)
);
int i = 1; int i = 1;
foreach (statement; ast.statements[]) foreach (statement; ast.statements[])
{ {
@ -509,49 +593,79 @@ void writeObject(Definition ast, String outputFilename) @nogc
{ {
// Opcode of mov is “0xb8 + r”, where “r” is the register opcode. // Opcode of mov is “0xb8 + r”, where “r” is the register opcode.
// Register opcode of %eax is 0. // Register opcode of %eax is 0.
asmTemplate.insertBack(cast(ubyte) 0xb8); // movl $x, %eax; where $x is a number. instructions.insertBack(
asmTemplate.insertBack((cast(ubyte*) &(cast(Number) statement.subroutine.lhs).value)[0 .. int.sizeof]); Instruction(0xb8, Register.AX) // movl $x, %eax; where $x is a number.
.addOperand1((cast(Number) statement.subroutine.lhs).value)
);
} }
else if ((cast(Variable) statement.subroutine.lhs) !is null) else if ((cast(Variable) statement.subroutine.lhs) !is null)
{ {
// movl -x(%rbp), %ebx; where x is a number. // movl -x(%rbp), %eax; where x is a number.
asmTemplate.insertBack(cast(ubyte[]) [0x8b, 0x45]); instructions.insertBack(
const disposition = (cast(Variable) statement.subroutine.lhs).counter * (-4); Instruction(0x89, Direction.memoryToRegister, Size.thirtyTwo)
asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]); .addMODRMByte(MOD.one, Register.AX, Register.BP)
);
} }
if ((cast(Number) statement.subroutine.rhs) !is null) if ((cast(Number) statement.subroutine.rhs) !is null)
{ {
// Opcode of mov is “0xb8 + r”, where “r” is the register opcode. // Opcode of mov is “0xb8 + r”, where “r” is the register opcode.
// Register opcode of %ebx is 3. // Register opcode of %ebx is 3.
asmTemplate.insertBack(cast(ubyte) 0xbb); // movl $x, %ebx; where $x is a number. instructions.insertBack(
asmTemplate.insertBack((cast(ubyte*) &(cast(Number) statement.subroutine.rhs).value)[0 .. int.sizeof]); Instruction(0xb8, Register.BX) // movl $x, %ebx; where $x is a number.
.addOperand1((cast(Number) statement.subroutine.rhs).value)
);
} }
else if ((cast(Variable) statement.subroutine.rhs) !is null) else if ((cast(Variable) statement.subroutine.rhs) !is null)
{ {
// movl -x(%rbp), %ebx; where x is a number. // movl -x(%rbp), %ebx; where x is a number.
asmTemplate.insertBack(cast(ubyte[]) [0x8b, 0x5d]); instructions.insertBack(
const disposition = (cast(Variable) statement.subroutine.rhs).counter * (-4); Instruction(0x89, Direction.memoryToRegister, Size.thirtyTwo)
asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]); .addMODRMByte(MOD.one, Register.BX, Register.BP)
.addOperand1(cast(byte) ((cast(Variable) statement.subroutine.rhs).counter * (-4)))
);
} }
// Calculate the result and assign it to a variable on the stack. // Calculate the result and assign it to a variable on the stack.
asmTemplate.insertBack(cast(ubyte[]) [0x01, 0xd8]); // add %ebx, %eax instructions.insertBack(
Instruction(0x00, Direction.registerToMemory, Size.thirtyTwo)
.addMODRMByte(MOD.direct, Register.BX, Register.AX) // add %ebx, %eax
);
asmTemplate.insertBack(cast(ubyte[]) [0x89, 0x45]); // movl %eax, -x(%rbp); where x is a number. instructions.insertBack( // movl %eax, -x(%rbp); where x is a number.
const disposition = i * (-4); Instruction(0x89, Direction.registerToMemory, Size.thirtyTwo)
asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]); .addMODRMByte(MOD.one, Register.AX, Register.BP)
.addOperand1(cast(byte) (i * (-4)))
);
++i; ++i;
} }
// Epilogue. // Epilogue.
asmTemplate.insertBack(cast(ubyte[]) [ instructions.insertBack( // movq %rbp, %rsp
0x48, 0x89, 0xec, // movq %rbp, %rsp Instruction(0x89)
0x58 + 5, // popq %rbp .addREXPrefix()
0xc3, // ret .addMODRMByte(MOD.direct, Register.BP, Register.SP)
]); );
instructions.insertBack(Instruction(0x58, Register.BP)); // popq %rbp
instructions.insertBack(Instruction(0xc3)); // ret
Symbol[1] symbols = [Symbol(String("main"), asmTemplate)]; return typeof(return)([Symbol(String("main"), instructions)]);
}
sectionHeaders.insertBack(makeInitialHeader()); void writeObject(Definition ast, String outputFilename) @nogc
symbolEntries.insertBack(makeInitialSymTable()); {
auto handle = File.open(outputFilename.toStringz, BitFlags!(File.Mode)(File.Mode.truncate));
if (!handle.valid)
{
perror("writing sample");
return;
}
size_t currentOffset = Elf64_Ehdr.sizeof;
auto symbols = buildInstructions(ast);
Array!Elf64_Shdr sectionHeaders = [makeInitialHeader()];
Array!Elf64_Sym symbolEntries = [makeInitialSymTable()];
Array!ubyte instructionSection;
ubyte[8] padding = 0;
String stringTable = String("\0"); String stringTable = String("\0");
foreach (symbol; symbols[]) foreach (symbol; symbols[])
@ -559,14 +673,20 @@ void writeObject(Definition ast, String outputFilename) @nogc
stringTable.insertBack(symbol.name[]); stringTable.insertBack(symbol.name[]);
stringTable.insertBack('\0'); stringTable.insertBack('\0');
sectionHeaders.insertBack(makeTextHeader(currentOffset, symbol.instructions.length)); auto code = binaryInstructions(symbol.instructions);
currentOffset = pad(currentOffset + symbol.instructions.length); sectionHeaders.insertBack(makeTextHeader(currentOffset, code.length));
symbolEntries.insertBack(makeMainSymTable(cast(Elf64_Half) (sectionHeaders.length - 1))); symbolEntries.insertBack(makeMainSymTable(cast(Elf64_Half) (sectionHeaders.length - 1)));
immutable size_t instructionsLength = pad(code.length);
instructionSection.insertBack(code[]);
instructionSection.insertBack(padding[0 .. instructionsLength - code.length]);
currentOffset += instructionsLength;
} }
const symbolTableSize = (symbols.length + 1) * Elf64_Sym.sizeof; const symbolTableSize = (symbols.length + 1) * Elf64_Sym.sizeof;
sectionHeaders.insertBack(makeSymtableHeader(currentOffset, symbolTableSize, symbols.length)); sectionHeaders.insertBack(makeSymtableHeader(currentOffset, symbolTableSize, cast(uint) symbols.length));
currentOffset += symbolTableSize; currentOffset += symbolTableSize;
sectionHeaders.insertBack(makeStringHeader(0x09, currentOffset, stringTable.length)); sectionHeaders.insertBack(makeStringHeader(0x09, currentOffset, stringTable.length));
@ -577,38 +697,30 @@ void writeObject(Definition ast, String outputFilename) @nogc
auto fileHeader = makeFileHeader(currentOffset, 5, 4); auto fileHeader = makeFileHeader(currentOffset, 5, 4);
version (none) handle.write((cast(ubyte*) &fileHeader)[0 .. Elf64_Ehdr.sizeof]);
{ handle.write(instructionSection.get);
printf("%.2x\n", cast(uint) currentOffset); handle.write((cast(ubyte*) symbolEntries.get.ptr)[0 .. Elf64_Sym.sizeof * symbolEntries.length]);
}
ubyte[8] padding = 0;
size_t codeLength = stringTable.length + sectionStringTable.length;
fwrite(&fileHeader, 8, Elf64_Ehdr.sizeof / 8, handle); immutable size_t codeLength = stringTable.length + sectionStringTable.length;
foreach (symbol; symbols[]) handle.write(cast(ubyte[]) stringTable.get);
{ handle.write(cast(ubyte[]) sectionStringTable);
immutable size_t instructionsLength = pad(symbol.instructions.length); handle.write(padding[0 .. pad(codeLength) - codeLength]);
fwrite(symbol.instructions.get.ptr, 1, symbol.instructions.length, handle);
fwrite(padding.ptr, 1, instructionsLength - symbol.instructions.length, handle); handle.write((cast(ubyte*) sectionHeaders.get.ptr)[0 .. Elf64_Shdr.sizeof * sectionHeaders.length]);
codeLength += instructionsLength;
}
fwrite(symbolEntries.get.ptr, Elf64_Sym.sizeof, symbolEntries.length, handle);
fwrite(stringTable.get.ptr, 1, stringTable.length, handle);
fwrite(sectionStringTable.ptr, 1, sectionStringTable.length, handle);
fwrite(padding.ptr, pad(codeLength) - codeLength, 1, handle);
fwrite(sectionHeaders.get.ptr, Elf64_Shdr.sizeof, sectionHeaders.length, handle);
} }
String generate(Definition ast) @nogc void generate(Definition ast, String outputFilename) @nogc
{ {
auto asmTemplate = File.open(outputFilename.toStringz, BitFlags!(File.Mode)(File.Mode.truncate));
// Prologue // Prologue
String asmTemplate = ".text asmTemplate.write(cast(const(ubyte)[]) ".text
.globl main .globl main
.type main, @function .type main, @function
main: main:
pushq %rbp pushq %rbp
movq %rsp, %rbp movq %rsp, %rbp
"; ");
/* Allocate space on the stack for local variables. /* Allocate space on the stack for local variables.
asmTemplate.insertBack(" sub $"); asmTemplate.insertBack(" sub $");
@ -620,41 +732,39 @@ main:
{ {
if ((cast(Number) statement.subroutine.lhs) !is null) if ((cast(Number) statement.subroutine.lhs) !is null)
{ {
asmTemplate.insertBack(" movl $"); asmTemplate.write(cast(const(ubyte)[]) " movl $");
asmTemplate.insertBack(format!"{}"((cast(Number) statement.subroutine.lhs).value)[]); asmTemplate.write(cast(ubyte[]) format!"{}"((cast(Number) statement.subroutine.lhs).value).get);
asmTemplate.insertBack(", %eax\n"); asmTemplate.write(cast(const(ubyte)[]) ", %eax\n");
} }
else if ((cast(Variable) statement.subroutine.lhs) !is null) else if ((cast(Variable) statement.subroutine.lhs) !is null)
{ {
asmTemplate.insertBack(" movl -"); asmTemplate.write(cast(const(ubyte)[]) " movl -");
asmTemplate.insertBack(format!"{}"((cast(Variable) statement.subroutine.lhs).counter * 4)[]); asmTemplate.write(cast(ubyte[]) format!"{}"((cast(Variable) statement.subroutine.lhs).counter * 4).get);
asmTemplate.insertBack("(%rbp), %eax\n"); asmTemplate.write(cast(const(ubyte)[]) "(%rbp), %eax\n");
} }
if ((cast(Number) statement.subroutine.rhs) !is null) if ((cast(Number) statement.subroutine.rhs) !is null)
{ {
asmTemplate.insertBack(" movl $"); asmTemplate.write(cast(const(ubyte)[]) " movl $");
asmTemplate.insertBack(format!"{}"((cast(Number) statement.subroutine.rhs).value)[]); asmTemplate.write(cast(ubyte[]) format!"{}"((cast(Number) statement.subroutine.rhs).value).get);
asmTemplate.insertBack(", %ebx\n"); asmTemplate.write(cast(const(ubyte)[]) ", %ebx\n");
} }
else if ((cast(Variable) statement.subroutine.rhs) !is null) else if ((cast(Variable) statement.subroutine.rhs) !is null)
{ {
asmTemplate.insertBack(" movl -"); asmTemplate.write(cast(const(ubyte)[]) " movl -");
asmTemplate.insertBack(format!"{}"((cast(Variable) statement.subroutine.rhs).counter * 4)[]); asmTemplate.write(cast(ubyte[]) format!"{}"((cast(Variable) statement.subroutine.rhs).counter * 4).get);
asmTemplate.insertBack("(%rbp), %ebx\n"); asmTemplate.write(cast(const(ubyte)[]) "(%rbp), %ebx\n");
} }
// Calculate the result and assign it to a variable on the stack. // Calculate the result and assign it to a variable on the stack.
asmTemplate.insertBack(" add %ebx, %eax\n"); asmTemplate.write(cast(const(ubyte)[]) " add %ebx, %eax\n");
asmTemplate.insertBack(" movl %eax, -"); asmTemplate.write(cast(const(ubyte)[]) " movl %eax, -");
asmTemplate.insertBack(format!"{}"(i * 4)[]); asmTemplate.write(cast(ubyte[]) format!"{}"(i * 4).get);
asmTemplate.insertBack("(%rbp)\n"); asmTemplate.write(cast(const(ubyte)[]) "(%rbp)\n");
++i; ++i;
} }
// Epilogue. // Epilogue.
asmTemplate.insertBack(" movq %rbp, %rsp asmTemplate.write(cast(const(ubyte)[]) " movq %rbp, %rsp
popq %rbp popq %rbp
ret ret
"); ");
return asmTemplate;
} }

View File

@ -6,6 +6,9 @@ import elna.parser;
import elna.generator; import elna.generator;
import elna.ir; import elna.ir;
import elna.extended; import elna.extended;
import elna.arguments;
import std.algorithm;
import std.range;
import std.sumtype; import std.sumtype;
import std.typecons; import std.typecons;
import tanya.container.array; import tanya.container.array;
@ -32,37 +35,51 @@ int main(string[] args)
{ {
defaultAllocator = MmapPool.instance; defaultAllocator = MmapPool.instance;
if (args.length < 2) return Arguments.parse(args).match!(
{ (ArgumentError argumentError) => 4,
return 4; (Arguments arguments) {
} auto sourceText = readSource(arguments.inFile);
auto sourceText = readSource(args[1]); if (sourceText.isNull)
if (sourceText.isNull) {
{ return 3;
return 3; }
} auto tokens = lex(sourceText.get.get);
auto tokens = lex(sourceText.get.get); if (tokens.length == 0)
if (tokens.length == 0) {
{ printf("Lexical analysis failed.\n");
printf("Lexical analysis failed.\n"); return 1;
return 1; }
} auto ast = parse(tokens);
auto ast = parse(tokens); if (!ast.valid)
if (!ast.valid) {
{ auto compileError = ast.error.get;
auto compileError = ast.error.get; printf("%lu:%lu: %s\n", compileError.line, compileError.column, compileError.message.ptr);
printf("%lu:%lu: %s\n", compileError.line, compileError.column, compileError.message.ptr); return 2;
return 2; }
} auto ir = transform(ast.result);
auto ir = transform(ast.result);
String outputFilename = String("build/"); String outputFilename;
outputFilename.insertBack(args[1][0 .. $ - 4]); if (arguments.output is null)
outputFilename.insertBack("o"); {
writeObject(ir, outputFilename); auto slashIndex = max(0, arguments.inFile.retro.countUntil('/'));
auto code = generate(ir); outputFilename.insertBack(arguments.inFile[$ - slashIndex .. $ - 4]);
printf("%s", code.toStringz()); outputFilename.insertBack(arguments.assembler ? "s" : "o");
}
else
{
outputFilename = String(arguments.output);
}
if (arguments.assembler)
{
generate(ir, outputFilename);
}
else
{
writeObject(ir, outputFilename);
}
return 0; return 0;
}
);
} }