diff --git a/source/tanya/memory/arch/x86_64.d b/source/tanya/memory/arch/x86_64.d index 0c3b86f..2db1aaa 100644 --- a/source/tanya/memory/arch/x86_64.d +++ b/source/tanya/memory/arch/x86_64.d @@ -79,3 +79,192 @@ pure nothrow @system @nogc ret; } } + +pragma(inline, true) +package (tanya.memory) void zero(void[] memory) +pure nothrow @system @nogc +{ + asm pure nothrow @nogc + { + naked; + } + version (Windows) asm pure nothrow @nogc + { + /* + * RCX - array. + */ + mov R8, [ RCX ]; + mov R9, [ RCX + 8 ]; + } + else asm pure nothrow @nogc + { + /* + * RSI - pointer. + * RDI - length. + */ + mov R8, RDI; + mov R9, RSI; + } + asm pure nothrow @nogc + { + // Check for zero length. + test R8, R8; + jz end; + + // Set to 0. + pxor XMM0, XMM0; + + // Check if the pointer is aligned to a 16-byte boundary. + and R9, -0x10; + } + // Compute the number of misaligned bytes. + version (Windows) asm pure nothrow @nogc + { + mov RAX, [ RCX + 8 ]; + } + else asm pure nothrow @nogc + { + mov RAX, RSI; + } + asm pure nothrow @nogc + { + sub RAX, R9; + + test RAX, RAX; + jz aligned; + + // Get the number of bytes to be written until we are aligned. + mov RDX, 0x10; + sub RDX, RAX; + } + version (Windows) asm pure nothrow @nogc + { + mov R9, [ RCX + 8 ]; + } + else asm pure nothrow @nogc + { + mov R9, RSI; + } + asm pure nothrow @nogc + { + // Set RAX to zero, so we can set bytes and dwords. + xor RAX, RAX; + + naligned: + mov [ R9 ], AL; // Write a byte. + + // Advance the pointer. Decrease the total number of bytes + // and the misaligned ones. + inc R9; + dec RDX; + dec R8; + + // Checks if we are aligned. + test RDX, RDX; + jnz naligned; + + aligned: + // Checks if we're done writing bytes. + test R8, R8; + jz end; + + // Write 1 byte at a time. + cmp R8, 8; + jl aligned_1; + + // Write 8 bytes at a time. + cmp R8, 16; + jl aligned_8; + + // Write 16 bytes at a time. + cmp R8, 32; + jl aligned_16; + + // Write 32 bytes at a time. + cmp R8, 64; + jl aligned_32; + + aligned_64: + movdqa [ R9 ], XMM0; + movdqa [ R9 + 16 ], XMM0; + movdqa [ R9 + 32 ], XMM0; + movdqa [ R9 + 48 ], XMM0; + + add R9, 64; + sub R8, 64; + + cmp R8, 64; + jge aligned_64; + + // Checks if we're done writing bytes. + test R8, R8; + jz end; + + // Write 1 byte at a time. + cmp R8, 8; + jl aligned_1; + + // Write 8 bytes at a time. + cmp R8, 16; + jl aligned_8; + + // Write 16 bytes at a time. + cmp R8, 32; + jl aligned_16; + + aligned_32: + movdqa [ R9 ], XMM0; + movdqa [ R9 + 16 ], XMM0; + + add R9, 32; + sub R8, 32; + + // Checks if we're done writing bytes. + test R8, R8; + jz end; + + // Write 1 byte at a time. + cmp R8, 8; + jl aligned_1; + + // Write 8 bytes at a time. + cmp R8, 16; + jl aligned_8; + + aligned_16: + movdqa [ R9 ], XMM0; + + add R9, 16; + sub R8, 16; + + // Checks if we're done writing bytes. + test R8, R8; + jz end; + + // Write 1 byte at a time. + cmp R8, 8; + jl aligned_1; + + aligned_8: + mov [ R9 ], RAX; + + add R9, 8; + sub R8, 8; + + // Checks if we're done writing bytes. + test R8, R8; + jz end; + + aligned_1: + mov [ R9 ], AL; + + inc R9; + dec R8; + + test R8, R8; + jnz aligned_1; + + end: + ret; + } +} diff --git a/source/tanya/memory/op.d b/source/tanya/memory/op.d index 98e5b90..25f7d25 100644 --- a/source/tanya/memory/op.d +++ b/source/tanya/memory/op.d @@ -1,4 +1,4 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public +/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ @@ -17,6 +17,8 @@ version (D_InlineAsm_X86_64) static import tanya.memory.arch.x86_64; } +private enum alignmentMask = size_t.sizeof - 1; + /** * Copies $(D_PARAM source) into $(D_PARAM target). * @@ -24,7 +26,7 @@ version (D_InlineAsm_X86_64) * of $(D_PARAM target) points to an element of $(D_PARAM source). * * $(D_PARAM target) shall have enough space $(D_INLINECODE source.length) - * elements. + * elements. * * Params: * source = Memory to copy from. @@ -48,7 +50,6 @@ body auto source1 = cast(const(ubyte)*) source; auto target1 = cast(ubyte*) target; auto count = source.length; - enum alignmentMask = size_t.sizeof - 1; // Check if the pointers are aligned or at least can be aligned // properly. @@ -79,19 +80,17 @@ body while (count--) { *target1++ = *source1++; - } + } } } /// pure nothrow @safe @nogc unittest { - { - ubyte[9] source = [1, 2, 3, 4, 5, 6, 7, 8, 9]; - ubyte[9] target; - source.copy(target); - assert(source == target); - } + ubyte[9] source = [1, 2, 3, 4, 5, 6, 7, 8, 9]; + ubyte[9] target; + source.copy(target); + assert(source == target); } private pure nothrow @safe @nogc unittest @@ -113,3 +112,76 @@ private pure nothrow @safe @nogc unittest assert(source == target); } } + +/** + * Fills $(D_PARAM memory) with zero-valued bytes. + * + * Param: + * memory = Memory block. + */ +void zero(void[] memory) pure nothrow @trusted @nogc +{ + version (D_InlineAsm_X86_64) + { + tanya.memory.arch.x86_64.zero(memory); + } + else // Naive implementation. + { + auto n = memory.length; + ubyte* vp = cast(ubyte*) memory.ptr; + + // Align. + while (((cast(size_t) vp) & alignmentMask) != 0) + { + *vp++ = 0; + --n; + } + + // Set size_t.sizeof bytes at ones. + auto sp = cast(size_t*) vp; + while (n / size_t.sizeof > 0) + { + *sp++ = 0; + n -= size_t.sizeof; + } + + // Write the remaining bytes. + vp = cast(ubyte*) sp; + while (n--) + { + *vp = 0; + ++vp; + } + } +} + +/// +pure nothrow @safe @nogc unittest +{ + ubyte[9] memory = [1, 2, 3, 4, 5, 6, 7, 8, 9]; + memory.zero(); + foreach (ubyte v; memory) + { + assert(v == 0); + } +} + +// Stress test. Checks that `zero` can handle unaligned pointers and different +// lengths. +pure nothrow @safe @nogc private unittest +{ + ubyte[192] memory; + + foreach (j; 0 .. 192) + { + foreach (ubyte i, ref ubyte v; memory[j .. $]) + { + v = i; + } + zero(memory[j .. $]); + foreach (ubyte v; memory[j .. $]) + { + assert(v == 0); + } + } +}