summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEugen Wissner <belka@caraus.de>2017-08-02 05:31:08 +0200
committerEugen Wissner <belka@caraus.de>2017-08-02 06:41:54 +0200
commited92e3993ee5350d3c523b848631cbf4ba5eca78 (patch)
treef117c98c5316873a0c5cf4a93221a511b674e11e
parent1a4d1238a14b38c5f17c0e4428995ca007ffb1a5 (diff)
downloadtanya-ed92e3993ee5350d3c523b848631cbf4ba5eca78.tar.gz
Add fast function to zero memory
-rw-r--r--source/tanya/memory/arch/x86_64.d189
-rw-r--r--source/tanya/memory/op.d92
2 files changed, 271 insertions, 10 deletions
diff --git a/source/tanya/memory/arch/x86_64.d b/source/tanya/memory/arch/x86_64.d
index 0c3b86f..2db1aaa 100644
--- a/source/tanya/memory/arch/x86_64.d
+++ b/source/tanya/memory/arch/x86_64.d
@@ -79,3 +79,192 @@ pure nothrow @system @nogc
ret;
}
}
+
+pragma(inline, true)
+package (tanya.memory) void zero(void[] memory)
+pure nothrow @system @nogc
+{
+ asm pure nothrow @nogc
+ {
+ naked;
+ }
+ version (Windows) asm pure nothrow @nogc
+ {
+ /*
+ * RCX - array.
+ */
+ mov R8, [ RCX ];
+ mov R9, [ RCX + 8 ];
+ }
+ else asm pure nothrow @nogc
+ {
+ /*
+ * RSI - pointer.
+ * RDI - length.
+ */
+ mov R8, RDI;
+ mov R9, RSI;
+ }
+ asm pure nothrow @nogc
+ {
+ // Check for zero length.
+ test R8, R8;
+ jz end;
+
+ // Set to 0.
+ pxor XMM0, XMM0;
+
+ // Check if the pointer is aligned to a 16-byte boundary.
+ and R9, -0x10;
+ }
+ // Compute the number of misaligned bytes.
+ version (Windows) asm pure nothrow @nogc
+ {
+ mov RAX, [ RCX + 8 ];
+ }
+ else asm pure nothrow @nogc
+ {
+ mov RAX, RSI;
+ }
+ asm pure nothrow @nogc
+ {
+ sub RAX, R9;
+
+ test RAX, RAX;
+ jz aligned;
+
+ // Get the number of bytes to be written until we are aligned.
+ mov RDX, 0x10;
+ sub RDX, RAX;
+ }
+ version (Windows) asm pure nothrow @nogc
+ {
+ mov R9, [ RCX + 8 ];
+ }
+ else asm pure nothrow @nogc
+ {
+ mov R9, RSI;
+ }
+ asm pure nothrow @nogc
+ {
+ // Set RAX to zero, so we can set bytes and dwords.
+ xor RAX, RAX;
+
+ naligned:
+ mov [ R9 ], AL; // Write a byte.
+
+ // Advance the pointer. Decrease the total number of bytes
+ // and the misaligned ones.
+ inc R9;
+ dec RDX;
+ dec R8;
+
+ // Checks if we are aligned.
+ test RDX, RDX;
+ jnz naligned;
+
+ aligned:
+ // Checks if we're done writing bytes.
+ test R8, R8;
+ jz end;
+
+ // Write 1 byte at a time.
+ cmp R8, 8;
+ jl aligned_1;
+
+ // Write 8 bytes at a time.
+ cmp R8, 16;
+ jl aligned_8;
+
+ // Write 16 bytes at a time.
+ cmp R8, 32;
+ jl aligned_16;
+
+ // Write 32 bytes at a time.
+ cmp R8, 64;
+ jl aligned_32;
+
+ aligned_64:
+ movdqa [ R9 ], XMM0;
+ movdqa [ R9 + 16 ], XMM0;
+ movdqa [ R9 + 32 ], XMM0;
+ movdqa [ R9 + 48 ], XMM0;
+
+ add R9, 64;
+ sub R8, 64;
+
+ cmp R8, 64;
+ jge aligned_64;
+
+ // Checks if we're done writing bytes.
+ test R8, R8;
+ jz end;
+
+ // Write 1 byte at a time.
+ cmp R8, 8;
+ jl aligned_1;
+
+ // Write 8 bytes at a time.
+ cmp R8, 16;
+ jl aligned_8;
+
+ // Write 16 bytes at a time.
+ cmp R8, 32;
+ jl aligned_16;
+
+ aligned_32:
+ movdqa [ R9 ], XMM0;
+ movdqa [ R9 + 16 ], XMM0;
+
+ add R9, 32;
+ sub R8, 32;
+
+ // Checks if we're done writing bytes.
+ test R8, R8;
+ jz end;
+
+ // Write 1 byte at a time.
+ cmp R8, 8;
+ jl aligned_1;
+
+ // Write 8 bytes at a time.
+ cmp R8, 16;
+ jl aligned_8;
+
+ aligned_16:
+ movdqa [ R9 ], XMM0;
+
+ add R9, 16;
+ sub R8, 16;
+
+ // Checks if we're done writing bytes.
+ test R8, R8;
+ jz end;
+
+ // Write 1 byte at a time.
+ cmp R8, 8;
+ jl aligned_1;
+
+ aligned_8:
+ mov [ R9 ], RAX;
+
+ add R9, 8;
+ sub R8, 8;
+
+ // Checks if we're done writing bytes.
+ test R8, R8;
+ jz end;
+
+ aligned_1:
+ mov [ R9 ], AL;
+
+ inc R9;
+ dec R8;
+
+ test R8, R8;
+ jnz aligned_1;
+
+ end:
+ ret;
+ }
+}
diff --git a/source/tanya/memory/op.d b/source/tanya/memory/op.d
index 98e5b90..25f7d25 100644
--- a/source/tanya/memory/op.d
+++ b/source/tanya/memory/op.d
@@ -1,4 +1,4 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
+/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
@@ -17,6 +17,8 @@ version (D_InlineAsm_X86_64)
static import tanya.memory.arch.x86_64;
}
+private enum alignmentMask = size_t.sizeof - 1;
+
/**
* Copies $(D_PARAM source) into $(D_PARAM target).
*
@@ -24,7 +26,7 @@ version (D_InlineAsm_X86_64)
* of $(D_PARAM target) points to an element of $(D_PARAM source).
*
* $(D_PARAM target) shall have enough space $(D_INLINECODE source.length)
- * elements.
+ * elements.
*
* Params:
* source = Memory to copy from.
@@ -48,7 +50,6 @@ body
auto source1 = cast(const(ubyte)*) source;
auto target1 = cast(ubyte*) target;
auto count = source.length;
- enum alignmentMask = size_t.sizeof - 1;
// Check if the pointers are aligned or at least can be aligned
// properly.
@@ -79,19 +80,17 @@ body
while (count--)
{
*target1++ = *source1++;
- }
+ }
}
}
///
pure nothrow @safe @nogc unittest
{
- {
- ubyte[9] source = [1, 2, 3, 4, 5, 6, 7, 8, 9];
- ubyte[9] target;
- source.copy(target);
- assert(source == target);
- }
+ ubyte[9] source = [1, 2, 3, 4, 5, 6, 7, 8, 9];
+ ubyte[9] target;
+ source.copy(target);
+ assert(source == target);
}
private pure nothrow @safe @nogc unittest
@@ -113,3 +112,76 @@ private pure nothrow @safe @nogc unittest
assert(source == target);
}
}
+
+/**
+ * Fills $(D_PARAM memory) with zero-valued bytes.
+ *
+ * Param:
+ * memory = Memory block.
+ */
+void zero(void[] memory) pure nothrow @trusted @nogc
+{
+ version (D_InlineAsm_X86_64)
+ {
+ tanya.memory.arch.x86_64.zero(memory);
+ }
+ else // Naive implementation.
+ {
+ auto n = memory.length;
+ ubyte* vp = cast(ubyte*) memory.ptr;
+
+ // Align.
+ while (((cast(size_t) vp) & alignmentMask) != 0)
+ {
+ *vp++ = 0;
+ --n;
+ }
+
+ // Set size_t.sizeof bytes at ones.
+ auto sp = cast(size_t*) vp;
+ while (n / size_t.sizeof > 0)
+ {
+ *sp++ = 0;
+ n -= size_t.sizeof;
+ }
+
+ // Write the remaining bytes.
+ vp = cast(ubyte*) sp;
+ while (n--)
+ {
+ *vp = 0;
+ ++vp;
+ }
+ }
+}
+
+///
+pure nothrow @safe @nogc unittest
+{
+ ubyte[9] memory = [1, 2, 3, 4, 5, 6, 7, 8, 9];
+ memory.zero();
+ foreach (ubyte v; memory)
+ {
+ assert(v == 0);
+ }
+}
+
+// Stress test. Checks that `zero` can handle unaligned pointers and different
+// lengths.
+pure nothrow @safe @nogc private unittest
+{
+ ubyte[192] memory;
+
+ foreach (j; 0 .. 192)
+ {
+ foreach (ubyte i, ref ubyte v; memory[j .. $])
+ {
+ v = i;
+ }
+ zero(memory[j .. $]);
+ foreach (ubyte v; memory[j .. $])
+ {
+ assert(v == 0);
+ }
+ }
+}