Add intermediate assembler representation

This commit is contained in:
Eugen Wissner 2022-06-08 08:34:44 +02:00
parent 473cd4e498
commit 77857ad118
Signed by: belka
GPG Key ID: A27FDC1E8EE902C0
4 changed files with 295 additions and 153 deletions

View File

@ -17,18 +17,25 @@ directory 'build'
CLEAN.include 'build'
CLEAN.include '.dub'
rule(/build\/tests\/[^\/\.]+$/ => ->(file) { test_for_out(file) }) do |t|
sh 'gcc', '-o', t.name, "#{t.name}.o"
rule(/build\/tests\/[^\/\.]+$/ => ->(file) { test_for_out(file, '.o') }) do |t|
sh 'ld.gold', '-L/usr/lib64',
'--dynamic-linker', '/lib64/ld-linux-x86-64.so.2',
'-o', t.name,
'/usr/lib64/crt1.o', '/usr/lib64/crti.o', '-lc', t.source, '/usr/lib64/crtn.o'
end
rule(/build\/asm\/[^\/\.]+$/ => ->(file) { test_for_object(file) }) do |t|
Pathname.new(t.name).dirname.mkpath
Open3.pipeline [BINARY, t.source], ['gcc', '-x', 'assembler', '-o', t.name, '-']
rule(/build\/asm\/[^\/\.]+$/ => ->(file) { test_for_out(file, '.s') }) do |t|
sh 'gcc', '-x', 'assembler', '-o', t.name, t.source
end
rule(/build\/tests\/.+\.o$/ => ->(file) { test_for_object(file) }) do |t|
Pathname.new(t.name).dirname.mkpath
sh BINARY, t.source
sh BINARY, '-o', t.name, t.source
end
rule(/build\/asm\/.+\.s$/ => ->(file) { test_for_object(file) }) do |t|
Pathname.new(t.name).dirname.mkpath
sh BINARY, '-s', '-o', t.name, t.source
end
file BINARY => SOURCES do |t|
@ -70,9 +77,9 @@ def test_for_object(out_file)
[test_source, BINARY]
end
def test_for_out(out_file)
def test_for_out(out_file, extension)
Pathname
.new(out_file)
.sub_ext('.o')
.sub_ext(extension)
.to_path
end

View File

@ -13,6 +13,7 @@ struct ArgumentError
{
expectedOutputFile,
noInput,
superfluousArguments,
}
private Type type_;
@ -50,11 +51,11 @@ struct Arguments
{
private bool assembler_;
private string output_;
private string[] inFiles_;
private string inFile_;
@property string[] inFiles() @nogc nothrow pure @safe
@property string inFile() @nogc nothrow pure @safe
{
return this.inFiles_;
return this.inFile_;
}
/**
@ -95,10 +96,17 @@ struct Arguments
return parseArguments(arguments).match!(
(Arguments parsed) {
if (parsed.inFiles.empty)
if (parsed.inFile is null)
{
return ReturnType(ArgumentError(ArgumentError.Type.noInput));
}
else if (!arguments.empty)
{
return ReturnType(ArgumentError(
ArgumentError.Type.superfluousArguments,
arguments.front
));
}
return ReturnType(parsed);
},
(ArgumentError argumentError) => ReturnType(argumentError)
@ -131,13 +139,13 @@ struct Arguments
else if (arguments.front == "--")
{
arguments.popFront;
parsed.inFiles_ = arguments;
parsed.inFile_ = arguments.front;
arguments.popFront;
break;
}
else if (!arguments.front.startsWith("-"))
{
parsed.inFiles_ = arguments;
break;
parsed.inFile_ = arguments.front;
}
arguments.popFront;
}

View File

@ -4,6 +4,9 @@ import core.stdc.stdio;
import core.stdc.stdlib;
import core.stdc.string;
import elna.ir;
import elna.extended;
import std.sumtype;
import std.typecons;
import tanya.container.array;
import tanya.container.string;
import tanya.memory.mmappool;
@ -452,56 +455,137 @@ private size_t pad(size_t value) @nogc
struct Symbol
{
String name;
Array!ubyte instructions;
Array!Instruction instructions;
}
/*
.text
.globl main
.type main, @function
main:
movl $3, %eax
ret
*/
immutable ubyte[] instructions = [
// Opcode of pushq is “0x50 + r”, where “r” is the register opcode.
// Register opcode of %rbq is 5.
0x50 + 5, // push% %rbp
0x48, 0x89, 0xe5, // movq %rsp, %rbp
0xb8, 0x03, 0x00, 0x00, 0x00, // movl $3, %eax
// Epilogue.
0x48, 0x89, 0xec, // movq %rbp, %rsp
0x58 + 5, // popq %rbp
0xc3, // ret
];
void writeObject(Definition ast, String outputFilename) @nogc
enum Register : ubyte
{
auto handle = fopen(outputFilename.toStringz, "wb");
AX = 0,
CX = 1,
DX = 2,
BX = 3,
SP = 4,
BP = 5,
SI = 6,
DI = 7,
}
if (handle is null)
enum MOD : ubyte
{
indirect = 0,
one = 1,
four = 2,
direct = 3
}
enum Direction : ubyte
{
registerToMemory = 0,
memoryToRegister = 1 << 1
}
enum Size : ubyte
{
eight = 0,
thirtyTwo = 1
}
struct Instruction
{
private ushort opcode;
private ubyte rexPrefix;
private Nullable!ubyte modrmByte;
private SumType!(typeof(null), byte, int) operand1;
this(ushort opcode) @nogc nothrow pure @safe
{
perror("writing sample");
return;
}
scope (exit)
{
fclose(handle);
this.opcode = opcode;
}
size_t currentOffset = Elf64_Ehdr.sizeof;
Array!Elf64_Shdr sectionHeaders;
Array!Elf64_Sym symbolEntries;
this(ubyte opcode, Register register) @nogc nothrow pure @safe
{
this.opcode = opcode + register;
}
this(ubyte opcode, Direction direction, Size size = Size.eight)
@nogc nothrow pure @safe
{
this.opcode = opcode | direction | size;
}
ref Instruction addREXPrefix(bool w = true, bool r = false, bool x = false, bool b = false)
return @nogc nothrow pure @safe
{
this.rexPrefix = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
return this;
}
ref Instruction addMODRMByte(MOD mode, Register register, Register rm)
return @nogc nothrow pure @safe
{
this.modrmByte = cast(ubyte) ((mode << 6) | (register << 3) | rm);
return this;
}
ref Instruction addOperand1(int operand)
return @nogc nothrow pure @safe
{
this.operand1 = operand;
return this;
}
ref Instruction addOperand1(byte operand)
return @nogc nothrow pure @safe
{
this.operand1 = operand;
return this;
}
}
Array!ubyte binaryInstructions(ref Array!Instruction instructions)
@nogc nothrow
{
Array!ubyte binary;
foreach (ref instruction; instructions)
{
if (instruction.rexPrefix)
{
binary.insertBack((&instruction.rexPrefix)[0 .. 1]);
}
binary.insertBack((cast(ubyte*) &instruction.opcode)[0 .. 1]);
if (!instruction.modrmByte.isNull)
{
binary.insertBack((&instruction.modrmByte.get())[0 .. 1]);
}
instruction.operand1.match!(
(byte operand) {
binary.insertBack((cast(ubyte*) &operand)[0 .. 1]);
},
(int operand) {
binary.insertBack((cast(ubyte*) &operand)[0 .. int.sizeof]);
},
(typeof(null)) {
}
);
}
return binary;
}
Array!Symbol buildInstructions(Definition ast) @nogc
{
Array!Instruction instructions;
// Prologue
Array!ubyte asmTemplate = Array!ubyte(cast(ubyte[]) [
// Opcode of pushq is “0x50 + r”, where “r” is the register opcode.
// Register opcode of %rbq is 5.
0x50 + 5, // pushq %rbp
0x48, 0x89, 0xe5, // movq %rsp, %rbp
]);
// Opcode of pushq is “0x50 + r”, where “r” is the register opcode.
// Register opcode of %rbq is 5.
instructions.insertBack(Instruction(0x50, Register.BP)); // pushq %rbp
instructions.insertBack( // movq %rsp, %rbp
Instruction(0x89)
.addREXPrefix()
.addMODRMByte(MOD.direct, Register.SP, Register.BP)
);
int i = 1;
foreach (statement; ast.statements[])
{
@ -509,49 +593,79 @@ void writeObject(Definition ast, String outputFilename) @nogc
{
// Opcode of mov is “0xb8 + r”, where “r” is the register opcode.
// Register opcode of %eax is 0.
asmTemplate.insertBack(cast(ubyte) 0xb8); // movl $x, %eax; where $x is a number.
asmTemplate.insertBack((cast(ubyte*) &(cast(Number) statement.subroutine.lhs).value)[0 .. int.sizeof]);
instructions.insertBack(
Instruction(0xb8, Register.AX) // movl $x, %eax; where $x is a number.
.addOperand1((cast(Number) statement.subroutine.lhs).value)
);
}
else if ((cast(Variable) statement.subroutine.lhs) !is null)
{
// movl -x(%rbp), %ebx; where x is a number.
asmTemplate.insertBack(cast(ubyte[]) [0x8b, 0x45]);
const disposition = (cast(Variable) statement.subroutine.lhs).counter * (-4);
asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]);
// movl -x(%rbp), %eax; where x is a number.
instructions.insertBack(
Instruction(0x89, Direction.memoryToRegister, Size.thirtyTwo)
.addMODRMByte(MOD.one, Register.AX, Register.BP)
);
}
if ((cast(Number) statement.subroutine.rhs) !is null)
{
// Opcode of mov is “0xb8 + r”, where “r” is the register opcode.
// Register opcode of %ebx is 3.
asmTemplate.insertBack(cast(ubyte) 0xbb); // movl $x, %ebx; where $x is a number.
asmTemplate.insertBack((cast(ubyte*) &(cast(Number) statement.subroutine.rhs).value)[0 .. int.sizeof]);
instructions.insertBack(
Instruction(0xb8, Register.BX) // movl $x, %ebx; where $x is a number.
.addOperand1((cast(Number) statement.subroutine.rhs).value)
);
}
else if ((cast(Variable) statement.subroutine.rhs) !is null)
{
// movl -x(%rbp), %ebx; where x is a number.
asmTemplate.insertBack(cast(ubyte[]) [0x8b, 0x5d]);
const disposition = (cast(Variable) statement.subroutine.rhs).counter * (-4);
asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]);
instructions.insertBack(
Instruction(0x89, Direction.memoryToRegister, Size.thirtyTwo)
.addMODRMByte(MOD.one, Register.BX, Register.BP)
.addOperand1(cast(byte) ((cast(Variable) statement.subroutine.rhs).counter * (-4)))
);
}
// Calculate the result and assign it to a variable on the stack.
asmTemplate.insertBack(cast(ubyte[]) [0x01, 0xd8]); // add %ebx, %eax
instructions.insertBack(
Instruction(0x00, Direction.registerToMemory, Size.thirtyTwo)
.addMODRMByte(MOD.direct, Register.BX, Register.AX) // add %ebx, %eax
);
asmTemplate.insertBack(cast(ubyte[]) [0x89, 0x45]); // movl %eax, -x(%rbp); where x is a number.
const disposition = i * (-4);
asmTemplate.insertBack((cast(ubyte*) &disposition)[0 .. 1]);
instructions.insertBack( // movl %eax, -x(%rbp); where x is a number.
Instruction(0x89, Direction.registerToMemory, Size.thirtyTwo)
.addMODRMByte(MOD.one, Register.AX, Register.BP)
.addOperand1(cast(byte) (i * (-4)))
);
++i;
}
// Epilogue.
asmTemplate.insertBack(cast(ubyte[]) [
0x48, 0x89, 0xec, // movq %rbp, %rsp
0x58 + 5, // popq %rbp
0xc3, // ret
]);
instructions.insertBack( // movq %rbp, %rsp
Instruction(0x89)
.addREXPrefix()
.addMODRMByte(MOD.direct, Register.BP, Register.SP)
);
instructions.insertBack(Instruction(0x58, Register.BP)); // popq %rbp
instructions.insertBack(Instruction(0xc3)); // ret
Symbol[1] symbols = [Symbol(String("main"), asmTemplate)];
return typeof(return)([Symbol(String("main"), instructions)]);
}
sectionHeaders.insertBack(makeInitialHeader());
symbolEntries.insertBack(makeInitialSymTable());
void writeObject(Definition ast, String outputFilename) @nogc
{
auto handle = File.open(outputFilename.toStringz, BitFlags!(File.Mode)(File.Mode.truncate));
if (!handle.valid)
{
perror("writing sample");
return;
}
size_t currentOffset = Elf64_Ehdr.sizeof;
auto symbols = buildInstructions(ast);
Array!Elf64_Shdr sectionHeaders = [makeInitialHeader()];
Array!Elf64_Sym symbolEntries = [makeInitialSymTable()];
Array!ubyte instructionSection;
ubyte[8] padding = 0;
String stringTable = String("\0");
foreach (symbol; symbols[])
@ -559,14 +673,20 @@ void writeObject(Definition ast, String outputFilename) @nogc
stringTable.insertBack(symbol.name[]);
stringTable.insertBack('\0');
sectionHeaders.insertBack(makeTextHeader(currentOffset, symbol.instructions.length));
currentOffset = pad(currentOffset + symbol.instructions.length);
auto code = binaryInstructions(symbol.instructions);
sectionHeaders.insertBack(makeTextHeader(currentOffset, code.length));
symbolEntries.insertBack(makeMainSymTable(cast(Elf64_Half) (sectionHeaders.length - 1)));
immutable size_t instructionsLength = pad(code.length);
instructionSection.insertBack(code[]);
instructionSection.insertBack(padding[0 .. instructionsLength - code.length]);
currentOffset += instructionsLength;
}
const symbolTableSize = (symbols.length + 1) * Elf64_Sym.sizeof;
sectionHeaders.insertBack(makeSymtableHeader(currentOffset, symbolTableSize, symbols.length));
sectionHeaders.insertBack(makeSymtableHeader(currentOffset, symbolTableSize, cast(uint) symbols.length));
currentOffset += symbolTableSize;
sectionHeaders.insertBack(makeStringHeader(0x09, currentOffset, stringTable.length));
@ -577,38 +697,30 @@ void writeObject(Definition ast, String outputFilename) @nogc
auto fileHeader = makeFileHeader(currentOffset, 5, 4);
version (none)
{
printf("%.2x\n", cast(uint) currentOffset);
}
ubyte[8] padding = 0;
size_t codeLength = stringTable.length + sectionStringTable.length;
handle.write((cast(ubyte*) &fileHeader)[0 .. Elf64_Ehdr.sizeof]);
handle.write(instructionSection.get);
handle.write((cast(ubyte*) symbolEntries.get.ptr)[0 .. Elf64_Sym.sizeof * symbolEntries.length]);
fwrite(&fileHeader, 8, Elf64_Ehdr.sizeof / 8, handle);
foreach (symbol; symbols[])
{
immutable size_t instructionsLength = pad(symbol.instructions.length);
fwrite(symbol.instructions.get.ptr, 1, symbol.instructions.length, handle);
fwrite(padding.ptr, 1, instructionsLength - symbol.instructions.length, handle);
codeLength += instructionsLength;
}
fwrite(symbolEntries.get.ptr, Elf64_Sym.sizeof, symbolEntries.length, handle);
fwrite(stringTable.get.ptr, 1, stringTable.length, handle);
fwrite(sectionStringTable.ptr, 1, sectionStringTable.length, handle);
fwrite(padding.ptr, pad(codeLength) - codeLength, 1, handle);
fwrite(sectionHeaders.get.ptr, Elf64_Shdr.sizeof, sectionHeaders.length, handle);
immutable size_t codeLength = stringTable.length + sectionStringTable.length;
handle.write(cast(ubyte[]) stringTable.get);
handle.write(cast(ubyte[]) sectionStringTable);
handle.write(padding[0 .. pad(codeLength) - codeLength]);
handle.write((cast(ubyte*) sectionHeaders.get.ptr)[0 .. Elf64_Shdr.sizeof * sectionHeaders.length]);
}
String generate(Definition ast) @nogc
void generate(Definition ast, String outputFilename) @nogc
{
auto asmTemplate = File.open(outputFilename.toStringz, BitFlags!(File.Mode)(File.Mode.truncate));
// Prologue
String asmTemplate = ".text
asmTemplate.write(cast(const(ubyte)[]) ".text
.globl main
.type main, @function
main:
pushq %rbp
movq %rsp, %rbp
";
");
/* Allocate space on the stack for local variables.
asmTemplate.insertBack(" sub $");
@ -620,41 +732,39 @@ main:
{
if ((cast(Number) statement.subroutine.lhs) !is null)
{
asmTemplate.insertBack(" movl $");
asmTemplate.insertBack(format!"{}"((cast(Number) statement.subroutine.lhs).value)[]);
asmTemplate.insertBack(", %eax\n");
asmTemplate.write(cast(const(ubyte)[]) " movl $");
asmTemplate.write(cast(ubyte[]) format!"{}"((cast(Number) statement.subroutine.lhs).value).get);
asmTemplate.write(cast(const(ubyte)[]) ", %eax\n");
}
else if ((cast(Variable) statement.subroutine.lhs) !is null)
{
asmTemplate.insertBack(" movl -");
asmTemplate.insertBack(format!"{}"((cast(Variable) statement.subroutine.lhs).counter * 4)[]);
asmTemplate.insertBack("(%rbp), %eax\n");
asmTemplate.write(cast(const(ubyte)[]) " movl -");
asmTemplate.write(cast(ubyte[]) format!"{}"((cast(Variable) statement.subroutine.lhs).counter * 4).get);
asmTemplate.write(cast(const(ubyte)[]) "(%rbp), %eax\n");
}
if ((cast(Number) statement.subroutine.rhs) !is null)
{
asmTemplate.insertBack(" movl $");
asmTemplate.insertBack(format!"{}"((cast(Number) statement.subroutine.rhs).value)[]);
asmTemplate.insertBack(", %ebx\n");
asmTemplate.write(cast(const(ubyte)[]) " movl $");
asmTemplate.write(cast(ubyte[]) format!"{}"((cast(Number) statement.subroutine.rhs).value).get);
asmTemplate.write(cast(const(ubyte)[]) ", %ebx\n");
}
else if ((cast(Variable) statement.subroutine.rhs) !is null)
{
asmTemplate.insertBack(" movl -");
asmTemplate.insertBack(format!"{}"((cast(Variable) statement.subroutine.rhs).counter * 4)[]);
asmTemplate.insertBack("(%rbp), %ebx\n");
asmTemplate.write(cast(const(ubyte)[]) " movl -");
asmTemplate.write(cast(ubyte[]) format!"{}"((cast(Variable) statement.subroutine.rhs).counter * 4).get);
asmTemplate.write(cast(const(ubyte)[]) "(%rbp), %ebx\n");
}
// Calculate the result and assign it to a variable on the stack.
asmTemplate.insertBack(" add %ebx, %eax\n");
asmTemplate.insertBack(" movl %eax, -");
asmTemplate.insertBack(format!"{}"(i * 4)[]);
asmTemplate.insertBack("(%rbp)\n");
asmTemplate.write(cast(const(ubyte)[]) " add %ebx, %eax\n");
asmTemplate.write(cast(const(ubyte)[]) " movl %eax, -");
asmTemplate.write(cast(ubyte[]) format!"{}"(i * 4).get);
asmTemplate.write(cast(const(ubyte)[]) "(%rbp)\n");
++i;
}
// Epilogue.
asmTemplate.insertBack(" movq %rbp, %rsp
asmTemplate.write(cast(const(ubyte)[]) " movq %rbp, %rsp
popq %rbp
ret
");
return asmTemplate;
}

View File

@ -6,6 +6,9 @@ import elna.parser;
import elna.generator;
import elna.ir;
import elna.extended;
import elna.arguments;
import std.algorithm;
import std.range;
import std.sumtype;
import std.typecons;
import tanya.container.array;
@ -32,37 +35,51 @@ int main(string[] args)
{
defaultAllocator = MmapPool.instance;
if (args.length < 2)
{
return 4;
}
auto sourceText = readSource(args[1]);
if (sourceText.isNull)
{
return 3;
}
auto tokens = lex(sourceText.get.get);
if (tokens.length == 0)
{
printf("Lexical analysis failed.\n");
return 1;
}
auto ast = parse(tokens);
if (!ast.valid)
{
auto compileError = ast.error.get;
printf("%lu:%lu: %s\n", compileError.line, compileError.column, compileError.message.ptr);
return 2;
}
auto ir = transform(ast.result);
return Arguments.parse(args).match!(
(ArgumentError argumentError) => 4,
(Arguments arguments) {
auto sourceText = readSource(arguments.inFile);
if (sourceText.isNull)
{
return 3;
}
auto tokens = lex(sourceText.get.get);
if (tokens.length == 0)
{
printf("Lexical analysis failed.\n");
return 1;
}
auto ast = parse(tokens);
if (!ast.valid)
{
auto compileError = ast.error.get;
printf("%lu:%lu: %s\n", compileError.line, compileError.column, compileError.message.ptr);
return 2;
}
auto ir = transform(ast.result);
String outputFilename = String("build/");
outputFilename.insertBack(args[1][0 .. $ - 4]);
outputFilename.insertBack("o");
writeObject(ir, outputFilename);
String outputFilename;
if (arguments.output is null)
{
auto slashIndex = max(0, arguments.inFile.retro.countUntil('/'));
auto code = generate(ir);
printf("%s", code.toStringz());
outputFilename.insertBack(arguments.inFile[$ - slashIndex .. $ - 4]);
outputFilename.insertBack(arguments.assembler ? "s" : "o");
}
else
{
outputFilename = String(arguments.output);
}
if (arguments.assembler)
{
generate(ir, outputFilename);
}
else
{
writeObject(ir, outputFilename);
}
return 0;
return 0;
}
);
}