Add fast function to zero memory
This commit is contained in:
parent
1a4d1238a1
commit
ed92e3993e
@ -79,3 +79,192 @@ pure nothrow @system @nogc
|
||||
ret;
|
||||
}
|
||||
}
|
||||
|
||||
pragma(inline, true)
|
||||
package (tanya.memory) void zero(void[] memory)
|
||||
pure nothrow @system @nogc
|
||||
{
|
||||
asm pure nothrow @nogc
|
||||
{
|
||||
naked;
|
||||
}
|
||||
version (Windows) asm pure nothrow @nogc
|
||||
{
|
||||
/*
|
||||
* RCX - array.
|
||||
*/
|
||||
mov R8, [ RCX ];
|
||||
mov R9, [ RCX + 8 ];
|
||||
}
|
||||
else asm pure nothrow @nogc
|
||||
{
|
||||
/*
|
||||
* RSI - pointer.
|
||||
* RDI - length.
|
||||
*/
|
||||
mov R8, RDI;
|
||||
mov R9, RSI;
|
||||
}
|
||||
asm pure nothrow @nogc
|
||||
{
|
||||
// Check for zero length.
|
||||
test R8, R8;
|
||||
jz end;
|
||||
|
||||
// Set to 0.
|
||||
pxor XMM0, XMM0;
|
||||
|
||||
// Check if the pointer is aligned to a 16-byte boundary.
|
||||
and R9, -0x10;
|
||||
}
|
||||
// Compute the number of misaligned bytes.
|
||||
version (Windows) asm pure nothrow @nogc
|
||||
{
|
||||
mov RAX, [ RCX + 8 ];
|
||||
}
|
||||
else asm pure nothrow @nogc
|
||||
{
|
||||
mov RAX, RSI;
|
||||
}
|
||||
asm pure nothrow @nogc
|
||||
{
|
||||
sub RAX, R9;
|
||||
|
||||
test RAX, RAX;
|
||||
jz aligned;
|
||||
|
||||
// Get the number of bytes to be written until we are aligned.
|
||||
mov RDX, 0x10;
|
||||
sub RDX, RAX;
|
||||
}
|
||||
version (Windows) asm pure nothrow @nogc
|
||||
{
|
||||
mov R9, [ RCX + 8 ];
|
||||
}
|
||||
else asm pure nothrow @nogc
|
||||
{
|
||||
mov R9, RSI;
|
||||
}
|
||||
asm pure nothrow @nogc
|
||||
{
|
||||
// Set RAX to zero, so we can set bytes and dwords.
|
||||
xor RAX, RAX;
|
||||
|
||||
naligned:
|
||||
mov [ R9 ], AL; // Write a byte.
|
||||
|
||||
// Advance the pointer. Decrease the total number of bytes
|
||||
// and the misaligned ones.
|
||||
inc R9;
|
||||
dec RDX;
|
||||
dec R8;
|
||||
|
||||
// Checks if we are aligned.
|
||||
test RDX, RDX;
|
||||
jnz naligned;
|
||||
|
||||
aligned:
|
||||
// Checks if we're done writing bytes.
|
||||
test R8, R8;
|
||||
jz end;
|
||||
|
||||
// Write 1 byte at a time.
|
||||
cmp R8, 8;
|
||||
jl aligned_1;
|
||||
|
||||
// Write 8 bytes at a time.
|
||||
cmp R8, 16;
|
||||
jl aligned_8;
|
||||
|
||||
// Write 16 bytes at a time.
|
||||
cmp R8, 32;
|
||||
jl aligned_16;
|
||||
|
||||
// Write 32 bytes at a time.
|
||||
cmp R8, 64;
|
||||
jl aligned_32;
|
||||
|
||||
aligned_64:
|
||||
movdqa [ R9 ], XMM0;
|
||||
movdqa [ R9 + 16 ], XMM0;
|
||||
movdqa [ R9 + 32 ], XMM0;
|
||||
movdqa [ R9 + 48 ], XMM0;
|
||||
|
||||
add R9, 64;
|
||||
sub R8, 64;
|
||||
|
||||
cmp R8, 64;
|
||||
jge aligned_64;
|
||||
|
||||
// Checks if we're done writing bytes.
|
||||
test R8, R8;
|
||||
jz end;
|
||||
|
||||
// Write 1 byte at a time.
|
||||
cmp R8, 8;
|
||||
jl aligned_1;
|
||||
|
||||
// Write 8 bytes at a time.
|
||||
cmp R8, 16;
|
||||
jl aligned_8;
|
||||
|
||||
// Write 16 bytes at a time.
|
||||
cmp R8, 32;
|
||||
jl aligned_16;
|
||||
|
||||
aligned_32:
|
||||
movdqa [ R9 ], XMM0;
|
||||
movdqa [ R9 + 16 ], XMM0;
|
||||
|
||||
add R9, 32;
|
||||
sub R8, 32;
|
||||
|
||||
// Checks if we're done writing bytes.
|
||||
test R8, R8;
|
||||
jz end;
|
||||
|
||||
// Write 1 byte at a time.
|
||||
cmp R8, 8;
|
||||
jl aligned_1;
|
||||
|
||||
// Write 8 bytes at a time.
|
||||
cmp R8, 16;
|
||||
jl aligned_8;
|
||||
|
||||
aligned_16:
|
||||
movdqa [ R9 ], XMM0;
|
||||
|
||||
add R9, 16;
|
||||
sub R8, 16;
|
||||
|
||||
// Checks if we're done writing bytes.
|
||||
test R8, R8;
|
||||
jz end;
|
||||
|
||||
// Write 1 byte at a time.
|
||||
cmp R8, 8;
|
||||
jl aligned_1;
|
||||
|
||||
aligned_8:
|
||||
mov [ R9 ], RAX;
|
||||
|
||||
add R9, 8;
|
||||
sub R8, 8;
|
||||
|
||||
// Checks if we're done writing bytes.
|
||||
test R8, R8;
|
||||
jz end;
|
||||
|
||||
aligned_1:
|
||||
mov [ R9 ], AL;
|
||||
|
||||
inc R9;
|
||||
dec R8;
|
||||
|
||||
test R8, R8;
|
||||
jnz aligned_1;
|
||||
|
||||
end:
|
||||
ret;
|
||||
}
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
@ -17,6 +17,8 @@ version (D_InlineAsm_X86_64)
|
||||
static import tanya.memory.arch.x86_64;
|
||||
}
|
||||
|
||||
private enum alignmentMask = size_t.sizeof - 1;
|
||||
|
||||
/**
|
||||
* Copies $(D_PARAM source) into $(D_PARAM target).
|
||||
*
|
||||
@ -24,7 +26,7 @@ version (D_InlineAsm_X86_64)
|
||||
* of $(D_PARAM target) points to an element of $(D_PARAM source).
|
||||
*
|
||||
* $(D_PARAM target) shall have enough space $(D_INLINECODE source.length)
|
||||
* elements.
|
||||
* elements.
|
||||
*
|
||||
* Params:
|
||||
* source = Memory to copy from.
|
||||
@ -48,7 +50,6 @@ body
|
||||
auto source1 = cast(const(ubyte)*) source;
|
||||
auto target1 = cast(ubyte*) target;
|
||||
auto count = source.length;
|
||||
enum alignmentMask = size_t.sizeof - 1;
|
||||
|
||||
// Check if the pointers are aligned or at least can be aligned
|
||||
// properly.
|
||||
@ -79,19 +80,17 @@ body
|
||||
while (count--)
|
||||
{
|
||||
*target1++ = *source1++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
pure nothrow @safe @nogc unittest
|
||||
{
|
||||
{
|
||||
ubyte[9] source = [1, 2, 3, 4, 5, 6, 7, 8, 9];
|
||||
ubyte[9] target;
|
||||
source.copy(target);
|
||||
assert(source == target);
|
||||
}
|
||||
ubyte[9] source = [1, 2, 3, 4, 5, 6, 7, 8, 9];
|
||||
ubyte[9] target;
|
||||
source.copy(target);
|
||||
assert(source == target);
|
||||
}
|
||||
|
||||
private pure nothrow @safe @nogc unittest
|
||||
@ -113,3 +112,76 @@ private pure nothrow @safe @nogc unittest
|
||||
assert(source == target);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fills $(D_PARAM memory) with zero-valued bytes.
|
||||
*
|
||||
* Param:
|
||||
* memory = Memory block.
|
||||
*/
|
||||
void zero(void[] memory) pure nothrow @trusted @nogc
|
||||
{
|
||||
version (D_InlineAsm_X86_64)
|
||||
{
|
||||
tanya.memory.arch.x86_64.zero(memory);
|
||||
}
|
||||
else // Naive implementation.
|
||||
{
|
||||
auto n = memory.length;
|
||||
ubyte* vp = cast(ubyte*) memory.ptr;
|
||||
|
||||
// Align.
|
||||
while (((cast(size_t) vp) & alignmentMask) != 0)
|
||||
{
|
||||
*vp++ = 0;
|
||||
--n;
|
||||
}
|
||||
|
||||
// Set size_t.sizeof bytes at ones.
|
||||
auto sp = cast(size_t*) vp;
|
||||
while (n / size_t.sizeof > 0)
|
||||
{
|
||||
*sp++ = 0;
|
||||
n -= size_t.sizeof;
|
||||
}
|
||||
|
||||
// Write the remaining bytes.
|
||||
vp = cast(ubyte*) sp;
|
||||
while (n--)
|
||||
{
|
||||
*vp = 0;
|
||||
++vp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
pure nothrow @safe @nogc unittest
|
||||
{
|
||||
ubyte[9] memory = [1, 2, 3, 4, 5, 6, 7, 8, 9];
|
||||
memory.zero();
|
||||
foreach (ubyte v; memory)
|
||||
{
|
||||
assert(v == 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Stress test. Checks that `zero` can handle unaligned pointers and different
|
||||
// lengths.
|
||||
pure nothrow @safe @nogc private unittest
|
||||
{
|
||||
ubyte[192] memory;
|
||||
|
||||
foreach (j; 0 .. 192)
|
||||
{
|
||||
foreach (ubyte i, ref ubyte v; memory[j .. $])
|
||||
{
|
||||
v = i;
|
||||
}
|
||||
zero(memory[j .. $]);
|
||||
foreach (ubyte v; memory[j .. $])
|
||||
{
|
||||
assert(v == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user