summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEugen Wissner <belka@caraus.de>2017-08-09 07:01:57 +0200
committerEugen Wissner <belka@caraus.de>2017-08-09 07:01:57 +0200
commit7c2abadb90d2196c1f51a0e656a68981a7e7fa77 (patch)
tree239875a6f2033c23b848050e354a6072b34392cc
parente6b28468ca6e78e98452af58bd4ec5879ecbee56 (diff)
downloadtanya-7c2abadb90d2196c1f51a0e656a68981a7e7fa77.tar.gz
Add memory.op.copyBackward
Added function that can copy memory chunks that can overlap.
-rw-r--r--source/tanya/memory/arch/x86_64.d367
-rw-r--r--source/tanya/memory/op.d95
2 files changed, 298 insertions, 164 deletions
diff --git a/source/tanya/memory/arch/x86_64.d b/source/tanya/memory/arch/x86_64.d
index a9caec5..26d948e 100644
--- a/source/tanya/memory/arch/x86_64.d
+++ b/source/tanya/memory/arch/x86_64.d
@@ -82,206 +82,261 @@ pure nothrow @system @nogc
}
}
-private enum const(char[]) MovArrayPointer(string Destination)()
+package (tanya.memory) template fill(ubyte Byte)
{
- string asmCode = "asm pure nothrow @nogc { mov ";
- version (Windows)
+ private enum const(char[]) MovArrayPointer(string Destination)()
{
- asmCode ~= Destination ~ ", [ RCX + 8 ];";
- }
- else
- {
- asmCode ~= Destination ~ ", RSI;";
+ string asmCode = "asm pure nothrow @nogc { mov ";
+ version (Windows)
+ {
+ asmCode ~= Destination ~ ", [ RCX + 8 ];";
+ }
+ else
+ {
+ asmCode ~= Destination ~ ", RSI;";
+ }
+ return asmCode ~ "}";
}
- return asmCode ~ "}";
-}
-pragma(inline, true)
-package (tanya.memory) void fill(ubyte Byte)(void[] memory)
-pure nothrow @system @nogc
-{
- asm pure nothrow @nogc
- {
- naked;
- }
- version (Windows) asm pure nothrow @nogc
- {
- /*
- * RCX - array.
- */
- mov R8, [ RCX ];
- }
- else asm pure nothrow @nogc
+ pragma(inline, true)
+ void fill(void[] memory)
{
- /*
- * RSI - pointer.
- * RDI - length.
- */
- mov R8, RDI;
- }
- mixin(MovArrayPointer!"R9");
+ asm pure nothrow @nogc
+ {
+ naked;
+ }
+ version (Windows) asm pure nothrow @nogc
+ {
+ /*
+ * RCX - array.
+ */
+ mov R8, [ RCX ];
+ }
+ else asm pure nothrow @nogc
+ {
+ /*
+ * RSI - pointer.
+ * RDI - length.
+ */
+ mov R8, RDI;
+ }
+ mixin(MovArrayPointer!"R9");
- asm pure nothrow @nogc
- {
- // Check for zero length.
- test R8, R8;
- jz end;
- }
- // Set 128- and 64-bit registers to values we want to fill with.
- static if (Byte == 0)
- {
asm pure nothrow @nogc
{
- xor RAX, RAX;
- pxor XMM0, XMM0;
+ // Check for zero length.
+ test R8, R8;
+ jz end;
+ }
+ // Set 128- and 64-bit registers to values we want to fill with.
+ static if (Byte == 0)
+ {
+ asm pure nothrow @nogc
+ {
+ xor RAX, RAX;
+ pxor XMM0, XMM0;
+ }
+ }
+ else
+ {
+ enum ulong FilledBytes = FilledBytes!Byte;
+ asm pure nothrow @nogc
+ {
+ mov RAX, FilledBytes;
+ movq XMM0, RAX;
+ movlhps XMM0, XMM0;
+ }
}
- }
- else
- {
- enum ulong FilledBytes = FilledBytes!Byte;
asm pure nothrow @nogc
{
- mov RAX, FilledBytes;
- movq XMM0, RAX;
- movlhps XMM0, XMM0;
+ // Check if the pointer is aligned to a 16-byte boundary.
+ and R9, -0x10;
}
- }
- asm pure nothrow @nogc
- {
- // Check if the pointer is aligned to a 16-byte boundary.
- and R9, -0x10;
- }
- // Compute the number of misaligned bytes.
- mixin(MovArrayPointer!"R10");
- asm pure nothrow @nogc
- {
- sub R10, R9;
+ // Compute the number of misaligned bytes.
+ mixin(MovArrayPointer!"R10");
+ asm pure nothrow @nogc
+ {
+ sub R10, R9;
- test R10, R10;
- jz aligned;
+ test R10, R10;
+ jz aligned;
- // Get the number of bytes to be written until we are aligned.
- mov RDX, 0x10;
- sub RDX, R10;
- }
- mixin(MovArrayPointer!"R9");
- asm pure nothrow @nogc
- {
- naligned:
- mov [ R9 ], AL; // Write a byte.
+ // Get the number of bytes to be written until we are aligned.
+ mov RDX, 0x10;
+ sub RDX, R10;
+ }
+ mixin(MovArrayPointer!"R9");
+ asm pure nothrow @nogc
+ {
+ naligned:
+ mov [ R9 ], AL; // Write a byte.
- // Advance the pointer. Decrease the total number of bytes
- // and the misaligned ones.
- inc R9;
- dec RDX;
- dec R8;
+ // Advance the pointer. Decrease the total number of bytes
+ // and the misaligned ones.
+ inc R9;
+ dec RDX;
+ dec R8;
- // Checks if we are aligned.
- test RDX, RDX;
- jnz naligned;
+ // Checks if we are aligned.
+ test RDX, RDX;
+ jnz naligned;
- aligned:
- // Checks if we're done writing bytes.
- test R8, R8;
- jz end;
+ aligned:
+ // Checks if we're done writing bytes.
+ test R8, R8;
+ jz end;
- // Write 1 byte at a time.
- cmp R8, 8;
- jl aligned_1;
+ // Write 1 byte at a time.
+ cmp R8, 8;
+ jl aligned_1;
- // Write 8 bytes at a time.
- cmp R8, 16;
- jl aligned_8;
+ // Write 8 bytes at a time.
+ cmp R8, 16;
+ jl aligned_8;
- // Write 16 bytes at a time.
- cmp R8, 32;
- jl aligned_16;
+ // Write 16 bytes at a time.
+ cmp R8, 32;
+ jl aligned_16;
- // Write 32 bytes at a time.
- cmp R8, 64;
- jl aligned_32;
+ // Write 32 bytes at a time.
+ cmp R8, 64;
+ jl aligned_32;
- aligned_64:
- movdqa [ R9 ], XMM0;
- movdqa [ R9 + 16 ], XMM0;
- movdqa [ R9 + 32 ], XMM0;
- movdqa [ R9 + 48 ], XMM0;
+ aligned_64:
+ movdqa [ R9 ], XMM0;
+ movdqa [ R9 + 16 ], XMM0;
+ movdqa [ R9 + 32 ], XMM0;
+ movdqa [ R9 + 48 ], XMM0;
- add R9, 64;
- sub R8, 64;
+ add R9, 64;
+ sub R8, 64;
- cmp R8, 64;
- jge aligned_64;
+ cmp R8, 64;
+ jge aligned_64;
- // Checks if we're done writing bytes.
- test R8, R8;
- jz end;
+ // Checks if we're done writing bytes.
+ test R8, R8;
+ jz end;
- // Write 1 byte at a time.
- cmp R8, 8;
- jl aligned_1;
+ // Write 1 byte at a time.
+ cmp R8, 8;
+ jl aligned_1;
- // Write 8 bytes at a time.
- cmp R8, 16;
- jl aligned_8;
+ // Write 8 bytes at a time.
+ cmp R8, 16;
+ jl aligned_8;
- // Write 16 bytes at a time.
- cmp R8, 32;
- jl aligned_16;
+ // Write 16 bytes at a time.
+ cmp R8, 32;
+ jl aligned_16;
- aligned_32:
- movdqa [ R9 ], XMM0;
- movdqa [ R9 + 16 ], XMM0;
+ aligned_32:
+ movdqa [ R9 ], XMM0;
+ movdqa [ R9 + 16 ], XMM0;
- add R9, 32;
- sub R8, 32;
+ add R9, 32;
+ sub R8, 32;
- // Checks if we're done writing bytes.
- test R8, R8;
- jz end;
+ // Checks if we're done writing bytes.
+ test R8, R8;
+ jz end;
- // Write 1 byte at a time.
- cmp R8, 8;
- jl aligned_1;
+ // Write 1 byte at a time.
+ cmp R8, 8;
+ jl aligned_1;
- // Write 8 bytes at a time.
- cmp R8, 16;
- jl aligned_8;
+ // Write 8 bytes at a time.
+ cmp R8, 16;
+ jl aligned_8;
- aligned_16:
- movdqa [ R9 ], XMM0;
+ aligned_16:
+ movdqa [ R9 ], XMM0;
- add R9, 16;
- sub R8, 16;
+ add R9, 16;
+ sub R8, 16;
- // Checks if we're done writing bytes.
- test R8, R8;
- jz end;
+ // Checks if we're done writing bytes.
+ test R8, R8;
+ jz end;
- // Write 1 byte at a time.
- cmp R8, 8;
- jl aligned_1;
+ // Write 1 byte at a time.
+ cmp R8, 8;
+ jl aligned_1;
- aligned_8:
- mov [ R9 ], RAX;
+ aligned_8:
+ mov [ R9 ], RAX;
- add R9, 8;
- sub R8, 8;
+ add R9, 8;
+ sub R8, 8;
- // Checks if we're done writing bytes.
- test R8, R8;
- jz end;
+ // Checks if we're done writing bytes.
+ test R8, R8;
+ jz end;
+
+ aligned_1:
+ mov [ R9 ], AL;
+
+ inc R9;
+ dec R8;
- aligned_1:
- mov [ R9 ], AL;
+ test R8, R8;
+ jnz aligned_1;
- inc R9;
- dec R8;
+ end:
+ ret;
+ }
+ }
+}
+
+pragma(inline, true)
+package (tanya.memory) void copyBackward(const void[] source, void[] target)
+pure nothrow @system @nogc
+{
+ asm pure nothrow @nogc
+ {
+ naked;
- test R8, R8;
- jnz aligned_1;
+ // Save the registers should be restored.
+ mov R8, RSI;
+ mov R9, RDI;
+ }
+ // Prepare the registers for movsb.
+ version (Windows) asm pure nothrow @nogc
+ {
+ // RDX - source.
+ // RCX - target.
+
+ mov RAX, [ RCX + 8 ];
+ mov R10, [ RDX + 8 ];
+ mov RCX, [ RDX ];
+
+ lea RDI, [ RAX + RCX - 1 ];
+ lea RSI, [ R10 + RCX - 1 ];
+ }
+ else asm pure nothrow @nogc
+ {
+ // RDX - source length.
+ // RCX - source data.
+ // RDI - target length
+ // RSI - target data.
+
+ lea RDI, [ RSI + RDX - 1 ];
+ lea RSI, [ RCX + RDX - 1 ];
+ mov RCX, RDX;
+ }
+ asm pure nothrow @nogc
+ {
+ std; // Set the direction flag.
+
+ rep;
+ movsb;
+
+ cld; // Clear the direction flag.
+
+ // Restore registers.
+ mov RDI, R9;
+ mov RSI, R8;
- end:
ret;
}
}
diff --git a/source/tanya/memory/op.d b/source/tanya/memory/op.d
index e0a3dea..9b15586 100644
--- a/source/tanya/memory/op.d
+++ b/source/tanya/memory/op.d
@@ -17,21 +17,23 @@ version (D_InlineAsm_X86_64)
static import tanya.memory.arch.x86_64;
}
-private enum alignmentMask = size_t.sizeof - 1;
+private enum alignMask = size_t.sizeof - 1;
/**
* Copies $(D_PARAM source) into $(D_PARAM target).
*
- * $(D_PARAM source) and $(D_PARAM target) shall not overlap so that an element
- * of $(D_PARAM target) points to an element of $(D_PARAM source).
+ * $(D_PARAM source) and $(D_PARAM target) shall not overlap so that
+ * $(D_PARAM source) points ahead of $(D_PARAM target).
*
- * $(D_PARAM target) shall have enough space $(D_INLINECODE source.length)
+ * $(D_PARAM target) shall have enough space for $(D_INLINECODE source.length)
* elements.
*
* Params:
* source = Memory to copy from.
* target = Destination memory.
*
+ * See_Also: $(D_PSYMBOL copyBackward).
+ *
* Precondition: $(D_INLINECODE source.length <= target.length).
*/
void copy(const void[] source, void[] target) pure nothrow @trusted @nogc
@@ -53,8 +55,8 @@ body
// Check if the pointers are aligned or at least can be aligned
// properly.
- ushort naligned = (cast(size_t) source.ptr) & alignmentMask;
- if (naligned == ((cast(size_t) target.ptr) & alignmentMask))
+ ushort naligned = (cast(size_t) source.ptr) & alignMask;
+ if (naligned == ((cast(size_t) target.ptr) & alignMask))
{
// Align the pointers if possible.
if (naligned != 0)
@@ -135,7 +137,7 @@ package template FilledBytes(ubyte Byte, ubyte I = 0)
* Byte = The value to fill $(D_PARAM memory) with.
* memory = Memory block.
*/
-void fill(ubyte Byte = 0)(void[] memory) pure nothrow @trusted @nogc
+void fill(ubyte Byte = 0)(void[] memory) @trusted
{
version (D_InlineAsm_X86_64)
{
@@ -147,7 +149,7 @@ void fill(ubyte Byte = 0)(void[] memory) pure nothrow @trusted @nogc
ubyte* vp = cast(ubyte*) memory.ptr;
// Align.
- while (((cast(size_t) vp) & alignmentMask) != 0)
+ while (((cast(size_t) vp) & alignMask) != 0)
{
*vp++ = Byte;
--n;
@@ -206,3 +208,80 @@ pure nothrow @safe @nogc private unittest
}
}
}
+
+/**
+ * Copies starting from the end of $(D_PARAM source) into the end of
+ * $(D_PARAM target).
+ *
+ * $(D_PSYMBOL copyBackward) copies the elements in reverse order, but the
+ * order of elements in the $(D_PARAM target) is exactly the same as in the
+ * $(D_PARAM source).
+ *
+ * $(D_PARAM source) and $(D_PARAM target) shall not overlap so that
+ * $(D_PARAM target) points ahead of $(D_PARAM source).
+ *
+ * $(D_PARAM target) shall have enough space for $(D_INLINECODE source.length)
+ * elements.
+ *
+ * Params:
+ * source = Memory to copy from.
+ * target = Destination memory.
+ *
+ * See_Also: $(D_PSYMBOL copy).
+ *
+ * Precondition: $(D_INLINECODE source.length <= target.length).
+ */
+void copyBackward(const void[] source, void[] target) pure nothrow @trusted @nogc
+in
+{
+ assert(source.length <= target.length);
+}
+body
+{
+ version (D_InlineAsm_X86_64)
+ {
+ tanya.memory.arch.x86_64.copyBackward(source, target);
+ }
+ else // Naive implementation.
+ {
+ auto count = source.length;
+
+ // Try to align the pointers if possible.
+ if (((cast(size_t) source.ptr) & alignMask) == ((cast(size_t) target.ptr) & alignMask))
+ {
+ while (((cast(size_t) (source.ptr + count)) & alignMask) != 0)
+ {
+ if (!count--)
+ {
+ return;
+ }
+ (cast(ubyte[]) target)[count]
+ = (cast(const(ubyte)[]) source)[count];
+ }
+ }
+
+ // Write as long we're aligned.
+ for (; count >= size_t.sizeof; count -= size_t.sizeof)
+ {
+ *(cast(size_t*) (target.ptr + count - size_t.sizeof))
+ = *(cast(const(size_t)*) (source.ptr + count - size_t.sizeof));
+ }
+
+ // Write the remaining bytes.
+ while (count--)
+ {
+ (cast(ubyte[]) target)[count]
+ = (cast(const(ubyte)[]) source)[count];
+ }
+ }
+}
+
+///
+pure nothrow @safe @nogc unittest
+{
+ ubyte[6] mem = [ 'a', 'a', 'b', 'b', 'c', 'c' ];
+ ubyte[6] expected = [ 'a', 'a', 'a', 'a', 'b', 'b' ];
+
+ copyBackward(mem[0 .. 4], mem[2 .. $]);
+ assert(expected == mem);
+}