summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/build.ninja5
-rw-r--r--arch/x64/linux/memory/cmp.S67
-rw-r--r--arch/x64/linux/memory/copy.S67
-rw-r--r--arch/x64/linux/memory/fill.S155
-rw-r--r--dub.json5
-rw-r--r--source/tanya/memory/arch/x86_64.d346
-rw-r--r--source/tanya/memory/op.d43
7 files changed, 322 insertions, 366 deletions
diff --git a/arch/build.ninja b/arch/build.ninja
index eff2eb9..fce2838 100644
--- a/arch/build.ninja
+++ b/arch/build.ninja
@@ -5,6 +5,9 @@ rule archive
command = ar rcs $out $in
build abs.o: gas x64/linux/math/abs.S
+build cmp.o: gas x64/linux/memory/cmp.S
+build fill.o: gas x64/linux/memory/fill.S
+build copy.o: gas x64/linux/memory/copy.S
build syscall.o: gas x64/linux/syscall.S
-build tanya.a: archive syscall.o abs.o
+build tanya.a: archive syscall.o copy.o fill.o cmp.o abs.o
diff --git a/arch/x64/linux/memory/cmp.S b/arch/x64/linux/memory/cmp.S
new file mode 100644
index 0000000..169e2eb
--- /dev/null
+++ b/arch/x64/linux/memory/cmp.S
@@ -0,0 +1,67 @@
+ .text
+
+/*
+ * cmpMemory.
+ *
+ * rdi - r1 length
+ * rsi - r1 data.
+ * rdx - r2 length.
+ * rcx - r2 data.
+ */
+ .globl _D5tanya6memory2op9cmpMemoryFNaNbNixAvxAvZi
+ .type _D5tanya6memory2op9cmpMemoryFNaNbNixAvxAvZi, @function
+
+_D5tanya6memory2op9cmpMemoryFNaNbNixAvxAvZi:
+ // Compare the lengths
+ cmp %rdx, %rdi
+ jl less
+ jg greater
+
+ mov %rcx, %rdi
+
+ // Check if we're aligned
+ cmp $0x08, %rdx
+ jc aligned_1
+ test $0x07, %edi
+ jz aligned_8
+
+ naligned:
+ cmpsb
+ jl less
+ jg greater
+
+ dec %rdx
+ test $0x07, %edi
+ jnz naligned
+
+ aligned_8:
+ mov %rdx, %rcx
+ shr $0x03, %rcx
+
+ repe cmpsq
+ jl less
+ jg greater
+
+ and $0x07, %edx
+ jz equal
+
+ aligned_1: // Compare the remaining bytes
+ mov %rdx, %rcx
+
+ repe cmpsb
+ jl less
+ jg greater
+
+ equal:
+ xor %rax, %rax // Return 0
+ jmp end
+
+ greater:
+ mov $0x01, %rax
+ jmp end
+
+ less:
+ mov $-0x01, %rax
+
+ end:
+ ret
diff --git a/arch/x64/linux/memory/copy.S b/arch/x64/linux/memory/copy.S
new file mode 100644
index 0000000..bf74e0f
--- /dev/null
+++ b/arch/x64/linux/memory/copy.S
@@ -0,0 +1,67 @@
+ .text
+
+/*
+ * copyMemory.
+ *
+ * rdi - source length
+ * rsi - source data.
+ * rdx - target length.
+ * rcx - target data.
+ */
+ .globl _D5tanya6memory2op10copyMemoryFNaNbNixAvAvZv
+ .type _D5tanya6memory2op10copyMemoryFNaNbNixAvAvZv, @function
+
+_D5tanya6memory2op10copyMemoryFNaNbNixAvAvZv:
+ mov %rdi, %rdx
+ mov %rcx, %rdi
+
+ cmp $0x08, %rdx
+ jc aligned_1
+ test $0x07, %edi
+ jz aligned_8
+
+ naligned:
+ movsb
+ dec %rdx
+ test $0x07, %edi
+ jnz naligned
+
+ aligned_8:
+ mov %rdx, %rcx
+ shr $0x03, %rcx
+ rep movsq
+ and $0x07, %edx
+ jz end
+
+ aligned_1:
+ // Write the remaining bytes
+ mov %rdx, %rcx
+ rep movsb
+
+ end:
+ ret
+
+/*
+ * moveMemory.
+ *
+ * rdi - source length
+ * rsi - source data.
+ * rdx - target length.
+ * rcx - target data.
+ */
+ .globl _D5tanya6memory2op10moveMemoryFNaNbNixAvAvZv
+ .type _D5tanya6memory2op10moveMemoryFNaNbNixAvAvZv, @function
+
+_D5tanya6memory2op10moveMemoryFNaNbNixAvAvZv:
+ mov %rdi, %rdx
+
+ lea -1(%rdx, %rsi), %rsi
+ lea -1(%rdx, %rcx), %rdi
+ mov %rdx, %rcx
+
+ std // Set the direction flag
+
+ rep movsb
+
+ cld // Clear the direction flag
+ ret
diff --git a/arch/x64/linux/memory/fill.S b/arch/x64/linux/memory/fill.S
new file mode 100644
index 0000000..d4fc0ac
--- /dev/null
+++ b/arch/x64/linux/memory/fill.S
@@ -0,0 +1,155 @@
+ .text
+
+/*
+ * fillMemory.
+ *
+ * rdi - length.
+ * rsi - pointer.
+ * rdx - value filled with a byte.
+ */
+ .globl _D5tanya6memory2op10fillMemoryFNaNbNiAvmZv
+ .type _D5tanya6memory2op10fillMemoryFNaNbNiAvmZv, @function
+
+_D5tanya6memory2op10fillMemoryFNaNbNiAvmZv:
+ // Check for zero length
+ test %rdi, %rdi
+ jz end
+
+ mov %rdi, %rax
+ mov %rsi, %r8
+
+ movq %rdx, %xmm0
+ movlhps %xmm0, %xmm0
+
+ // Check if the pointer is aligned to a 16-byte boundary
+ and $-0x10, %r8
+
+ // Compute the number of misaligned bytes
+ mov %rsi, %r9
+ sub %r8, %r9
+
+ test %r9, %r9
+ jz aligned
+
+ // Get the number of bytes to be written until we are aligned
+ mov $0x10, %rcx
+ sub %r9, %rcx
+
+ mov %rsi, %r8
+
+ naligned:
+ mov %dl, (%r8) // Write a byte
+
+ // Advance the pointer. Decrease the total number of bytes
+ // and the misaligned ones
+ inc %r8
+ dec %rcx
+ dec %rax
+
+ // Checks if we are aligned
+ test %rcx, %rcx
+ jnz naligned
+
+ aligned:
+ // Checks if we're done writing bytes
+ test %rax, %rax
+ jz end
+
+ // Write 1 byte at a time
+ cmp $8, %rax
+ jl aligned_1
+
+ // Write 8 bytes at a time
+ cmp $16, %rax
+ jl aligned_8
+
+ // Write 16 bytes at a time
+ cmp $32, %rax
+ jl aligned_16
+
+ // Write 32 bytes at a time
+ cmp $64, %rax
+ jl aligned_32
+
+ aligned_64:
+ movdqa %xmm0, (%r8)
+ movdqa %xmm0, 16(%r8)
+ movdqa %xmm0, 32(%r8)
+ movdqa %xmm0, 48(%r8)
+
+ add $64, %r8
+ sub $64, %rax
+
+ cmp $64, %rax
+ jge aligned_64
+
+ // Checks if we're done writing bytes
+ test %rax, %rax
+ jz end
+
+ // Write 1 byte at a time
+ cmp $8, %rax
+ jl aligned_1
+
+ // Write 8 bytes at a time
+ cmp $16, %rax
+ jl aligned_8
+
+ // Write 16 bytes at a time
+ cmp $32, %rax
+ jl aligned_16
+
+ aligned_32:
+ movdqa %xmm0, (%r8)
+ movdqa %xmm0, 16(%r8)
+
+ add $32, %r8
+ sub $32, %rax
+
+ // Checks if we're done writing bytes
+ test %rax, %rax
+ jz end
+
+ // Write 1 byte at a time
+ cmp $8, %rax
+ jl aligned_1
+
+ // Write 8 bytes at a time
+ cmp $16, %rax
+ jl aligned_8
+
+ aligned_16:
+ movdqa %xmm0, (%r8)
+
+ add $16, %r8
+ sub $16, %rax
+
+ // Checks if we're done writing bytes
+ test %rax, %rax
+ jz end
+
+ // Write 1 byte at a time
+ cmp $8, %rax
+ jl aligned_1
+
+ aligned_8:
+ mov %rdx, (%r8)
+
+ add $8, %r8
+ sub $8, %rax
+
+ // Checks if we're done writing bytes
+ test %rax, %rax
+ jz end
+
+ aligned_1:
+ mov %dl, (%r8)
+
+ inc %r8
+ dec %rax
+
+ test %rax, %rax
+ jnz aligned_1
+
+ end:
+ ret
diff --git a/dub.json b/dub.json
index e6dfaaf..00188bf 100644
--- a/dub.json
+++ b/dub.json
@@ -18,9 +18,10 @@
{
"name": "native",
"targetType": "library",
- "platforms": ["linux-x86_64"],
+ "platforms": ["linux-x86_64-gdc"],
"preBuildCommands": ["ninja -C arch"],
- "lflags": ["arch/tanya.a"]
+ "lflags": ["arch/tanya.a"],
+ "versions": ["TanyaNative"]
}
]
}
diff --git a/source/tanya/memory/arch/x86_64.d b/source/tanya/memory/arch/x86_64.d
deleted file mode 100644
index 57e1563..0000000
--- a/source/tanya/memory/arch/x86_64.d
+++ /dev/null
@@ -1,346 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-/*
- * Implementions of functions found in $(D_PSYMBOL tanya.memory.op) for x64.
- *
- * Copyright: Eugene Wissner 2017.
- * License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
- * Mozilla Public License, v. 2.0).
- * Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
- * Source: $(LINK2 https://github.com/caraus-ecms/tanya/blob/master/source/tanya/memory/arch/x86_64.d,
- * tanya/memory/arch/x86_64.d)
- */
-module tanya.memory.arch.x86_64;
-
-import tanya.memory.op;
-
-version (D_InlineAsm_X86_64):
-
-pragma(inline, true)
-package (tanya.memory) void copy(const void[] source, void[] target)
-pure nothrow @system @nogc
-{
- asm pure nothrow @nogc
- {
- naked;
-
- // RDI and RSI should be preserved.
- mov RAX, RDI;
- mov R8, RSI;
-
- // RDX - source length.
- // RCX - source data.
- // RDI - target length
- // RSI - target data.
-
- mov RDI, RSI;
- mov RSI, RCX;
-
- cmp RDX, 0x08;
- jc aligned_1;
- test EDI, 0x07;
- jz aligned_8;
-
- naligned:
- movsb;
- dec RDX;
- test EDI, 0x07;
- jnz naligned;
-
- aligned_8:
- mov RCX, RDX;
- shr RCX, 0x03;
- rep;
- movsq;
- and EDX, 0x07;
- jz end;
-
- aligned_1:
- // Write the remaining bytes.
- mov RCX, RDX;
- rep;
- movsb;
-
- end: // Restore registers.
- mov RSI, R8;
- mov RDI, RAX;
-
- ret;
- }
-}
-
-pragma(inline, true)
-package (tanya.memory) void fill(void[], ulong) pure nothrow @system @nogc
-{
- asm pure nothrow @nogc
- {
- naked;
-
- // Check for zero length.
- test RSI, RSI;
- jz end;
-
- /*
- * RDX - pointer.
- * RSI - length.
- * RDI - value filled with a byte.
- */
- mov RAX, RSI;
- mov R8, RDX;
-
- movq XMM0, RDI;
- movlhps XMM0, XMM0;
-
- // Check if the pointer is aligned to a 16-byte boundary.
- and R8, -0x10;
-
- // Compute the number of misaligned bytes.
- mov R9, RDX;
- sub R9, R8;
-
- test R9, R9;
- jz aligned;
-
- // Get the number of bytes to be written until we are aligned.
- mov RCX, 0x10;
- sub RCX, R9;
-
- mov R8, RDX;
-
- naligned:
- mov [ R8 ], DIL; // Write a byte.
-
- // Advance the pointer. Decrease the total number of bytes
- // and the misaligned ones.
- inc R8;
- dec RCX;
- dec RAX;
-
- // Checks if we are aligned.
- test RCX, RCX;
- jnz naligned;
-
- aligned:
- // Checks if we're done writing bytes.
- test RAX, RAX;
- jz end;
-
- // Write 1 byte at a time.
- cmp RAX, 8;
- jl aligned_1;
-
- // Write 8 bytes at a time.
- cmp RAX, 16;
- jl aligned_8;
-
- // Write 16 bytes at a time.
- cmp RAX, 32;
- jl aligned_16;
-
- // Write 32 bytes at a time.
- cmp RAX, 64;
- jl aligned_32;
-
- aligned_64:
- movdqa [ R8 ], XMM0;
- movdqa [ R8 + 16 ], XMM0;
- movdqa [ R8 + 32 ], XMM0;
- movdqa [ R8 + 48 ], XMM0;
-
- add R8, 64;
- sub RAX, 64;
-
- cmp RAX, 64;
- jge aligned_64;
-
- // Checks if we're done writing bytes.
- test RAX, RAX;
- jz end;
-
- // Write 1 byte at a time.
- cmp RAX, 8;
- jl aligned_1;
-
- // Write 8 bytes at a time.
- cmp RAX, 16;
- jl aligned_8;
-
- // Write 16 bytes at a time.
- cmp RAX, 32;
- jl aligned_16;
-
- aligned_32:
- movdqa [ R8 ], XMM0;
- movdqa [ R8 + 16 ], XMM0;
-
- add R8, 32;
- sub RAX, 32;
-
- // Checks if we're done writing bytes.
- test RAX, RAX;
- jz end;
-
- // Write 1 byte at a time.
- cmp RAX, 8;
- jl aligned_1;
-
- // Write 8 bytes at a time.
- cmp RAX, 16;
- jl aligned_8;
-
- aligned_16:
- movdqa [ R8 ], XMM0;
-
- add R8, 16;
- sub RAX, 16;
-
- // Checks if we're done writing bytes.
- test RAX, RAX;
- jz end;
-
- // Write 1 byte at a time.
- cmp RAX, 8;
- jl aligned_1;
-
- aligned_8:
- mov [ R8 ], RDI;
-
- add R8, 8;
- sub RAX, 8;
-
- // Checks if we're done writing bytes.
- test RAX, RAX;
- jz end;
-
- aligned_1:
- mov [ R8 ], DIL;
-
- inc R8;
- dec RAX;
-
- test RAX, RAX;
- jnz aligned_1;
-
- end:
- ret;
- }
-}
-
-pragma(inline, true)
-package (tanya.memory) void copyBackward(const void[] source, void[] target)
-pure nothrow @system @nogc
-{
- asm pure nothrow @nogc
- {
- naked;
-
- // Save the registers should be restored.
- mov R8, RSI;
- mov R9, RDI;
-
- // RDX - source length.
- // RCX - source data.
- // RDI - target length
- // RSI - target data.
-
- lea RDI, [ RSI + RDX - 1 ];
- lea RSI, [ RCX + RDX - 1 ];
- mov RCX, RDX;
-
- std; // Set the direction flag.
-
- rep;
- movsb;
-
- cld; // Clear the direction flag.
-
- // Restore registers.
- mov RDI, R9;
- mov RSI, R8;
-
- ret;
- }
-}
-
-pragma(inline, true)
-package (tanya.memory) int cmp(const void[] r1, const void[] r2)
-pure nothrow @system @nogc
-{
- asm pure nothrow @nogc
- {
- naked;
-
- // RDI and RSI should be preserved.
- mov R9, RDI;
- mov R8, RSI;
-
- // RDX - r1 length.
- // RCX - r1 data.
- // RDI - r2 length
- // RSI - r2 data.
-
- mov RSI, RCX;
- mov RCX, RDI;
- mov RDI, R8;
-
- // Compare the lengths.
- cmp RDX, RCX;
- jl less;
- jg greater;
-
- // Check if we're aligned.
- cmp RDX, 0x08;
- jc aligned_1;
- test EDI, 0x07;
- jz aligned_8;
-
- naligned:
- cmpsb;
- jl less;
- jg greater;
-
- dec RDX;
- test EDI, 0x07;
- jnz naligned;
-
- aligned_8:
- mov RCX, RDX;
- shr RCX, 0x03;
-
- repe;
- cmpsq;
- jl less;
- jg greater;
-
- and EDX, 0x07;
- jz equal;
-
- aligned_1: // Compare the remaining bytes.
- mov RCX, RDX;
-
- repe;
- cmpsb;
- jl less;
- jg greater;
-
- equal:
- xor RAX, RAX; // Return 0.
- jmp end;
-
- greater:
- mov RAX, 1;
- jmp end;
-
- less:
- mov RAX, -1;
- jmp end;
-
- end: // Restore registers.
- mov RSI, R8;
- mov RDI, R9;
-
- ret;
- }
-}
diff --git a/source/tanya/memory/op.d b/source/tanya/memory/op.d
index e3c9451..9af7fad 100644
--- a/source/tanya/memory/op.d
+++ b/source/tanya/memory/op.d
@@ -14,13 +14,22 @@
*/
module tanya.memory.op;
-version (TanyaPhobos)
+version (TanyaNative)
{
- import core.stdc.string;
+ extern private void fillMemory(void[], size_t) pure nothrow @system @nogc;
+
+ extern private void copyMemory(const void[], void[])
+ pure nothrow @system @nogc;
+
+ extern private void moveMemory(const void[], void[])
+ pure nothrow @system @nogc;
+
+ extern private int cmpMemory(const void[], const void[])
+ pure nothrow @system @nogc;
}
else
{
- static import tanya.memory.arch.x86_64;
+ import core.stdc.string;
}
private enum alignMask = size_t.sizeof - 1;
@@ -49,13 +58,13 @@ in
}
body
{
- version (TanyaPhobos)
+ version (TanyaNative)
{
- memcpy(target.ptr, source.ptr, source.length);
+ copyMemory(source, target);
}
else
{
- tanya.memory.arch.x86_64.copy(source, target);
+ memcpy(target.ptr, source.ptr, source.length);
}
}
@@ -112,13 +121,13 @@ private template filledBytes(ubyte Byte, ubyte I = 0)
*/
void fill(ubyte c = 0)(void[] memory) @trusted
{
- version (TanyaPhobos)
+ version (TanyaNative)
{
- memset(memory.ptr, c, memory.length);
+ fillMemory(memory, filledBytes!c);
}
else
{
- tanya.memory.arch.x86_64.fill(memory, filledBytes!c);
+ memset(memory.ptr, c, memory.length);
}
}
@@ -187,13 +196,13 @@ in
}
body
{
- version (TanyaPhobos)
+ version (TanyaNative)
{
- memmove(target.ptr, source.ptr, source.length);
+ moveMemory(source, target);
}
else
{
- tanya.memory.arch.x86_64.copyBackward(source, target);
+ memmove(target.ptr, source.ptr, source.length);
}
}
@@ -235,7 +244,11 @@ private nothrow @safe @nogc unittest
*/
int cmp(const void[] r1, const void[] r2) pure nothrow @trusted @nogc
{
- version (TanyaPhobos)
+ version (TanyaNative)
+ {
+ return cmpMemory(r1, r2);
+ }
+ else
{
if (r1.length > r2.length)
{
@@ -243,10 +256,6 @@ int cmp(const void[] r1, const void[] r2) pure nothrow @trusted @nogc
}
return r1.length < r2.length ? -1 : memcmp(r1.ptr, r2.ptr, r1.length);
}
- else
- {
- return tanya.memory.arch.x86_64.cmp(r1, r2);
- }
}
///