summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorEugen Wissner <belka@caraus.de>2017-09-26 08:26:12 +0200
committerEugen Wissner <belka@caraus.de>2017-09-26 08:26:12 +0200
commitfaebf3e4d5f6fcdfe1ad9f30c75fb478a7259fd1 (patch)
treead53ce90a2314d4676331bce326c7b5aeb9a2f59 /arch
parent20e7df386bfd9449bf7fb0926918e176bb37140c (diff)
downloadtanya-faebf3e4d5f6fcdfe1ad9f30c75fb478a7259fd1.tar.gz
Fix #304
Replace inline assembly with GAS.
Diffstat (limited to 'arch')
-rw-r--r--arch/build.ninja5
-rw-r--r--arch/x64/linux/memory/cmp.S67
-rw-r--r--arch/x64/linux/memory/copy.S67
-rw-r--r--arch/x64/linux/memory/fill.S155
4 files changed, 293 insertions, 1 deletions
diff --git a/arch/build.ninja b/arch/build.ninja
index eff2eb9..fce2838 100644
--- a/arch/build.ninja
+++ b/arch/build.ninja
@@ -5,6 +5,9 @@ rule archive
command = ar rcs $out $in
build abs.o: gas x64/linux/math/abs.S
+build cmp.o: gas x64/linux/memory/cmp.S
+build fill.o: gas x64/linux/memory/fill.S
+build copy.o: gas x64/linux/memory/copy.S
build syscall.o: gas x64/linux/syscall.S
-build tanya.a: archive syscall.o abs.o
+build tanya.a: archive syscall.o copy.o fill.o cmp.o abs.o
diff --git a/arch/x64/linux/memory/cmp.S b/arch/x64/linux/memory/cmp.S
new file mode 100644
index 0000000..169e2eb
--- /dev/null
+++ b/arch/x64/linux/memory/cmp.S
@@ -0,0 +1,67 @@
+ .text
+
+/*
+ * cmpMemory.
+ *
+ * rdi - r1 length
+ * rsi - r1 data.
+ * rdx - r2 length.
+ * rcx - r2 data.
+ */
+ .globl _D5tanya6memory2op9cmpMemoryFNaNbNixAvxAvZi
+ .type _D5tanya6memory2op9cmpMemoryFNaNbNixAvxAvZi, @function
+
+_D5tanya6memory2op9cmpMemoryFNaNbNixAvxAvZi:
+ // Compare the lengths
+ cmp %rdx, %rdi
+ jl less
+ jg greater
+
+ mov %rcx, %rdi
+
+ // Check if we're aligned
+ cmp $0x08, %rdx
+ jc aligned_1
+ test $0x07, %edi
+ jz aligned_8
+
+ naligned:
+ cmpsb
+ jl less
+ jg greater
+
+ dec %rdx
+ test $0x07, %edi
+ jnz naligned
+
+ aligned_8:
+ mov %rdx, %rcx
+ shr $0x03, %rcx
+
+ repe cmpsq
+ jl less
+ jg greater
+
+ and $0x07, %edx
+ jz equal
+
+ aligned_1: // Compare the remaining bytes
+ mov %rdx, %rcx
+
+ repe cmpsb
+ jl less
+ jg greater
+
+ equal:
+ xor %rax, %rax // Return 0
+ jmp end
+
+ greater:
+ mov $0x01, %rax
+ jmp end
+
+ less:
+ mov $-0x01, %rax
+
+ end:
+ ret
diff --git a/arch/x64/linux/memory/copy.S b/arch/x64/linux/memory/copy.S
new file mode 100644
index 0000000..bf74e0f
--- /dev/null
+++ b/arch/x64/linux/memory/copy.S
@@ -0,0 +1,67 @@
+ .text
+
+/*
+ * copyMemory.
+ *
+ * rdi - source length
+ * rsi - source data.
+ * rdx - target length.
+ * rcx - target data.
+ */
+ .globl _D5tanya6memory2op10copyMemoryFNaNbNixAvAvZv
+ .type _D5tanya6memory2op10copyMemoryFNaNbNixAvAvZv, @function
+
+_D5tanya6memory2op10copyMemoryFNaNbNixAvAvZv:
+ mov %rdi, %rdx
+ mov %rcx, %rdi
+
+ cmp $0x08, %rdx
+ jc aligned_1
+ test $0x07, %edi
+ jz aligned_8
+
+ naligned:
+ movsb
+ dec %rdx
+ test $0x07, %edi
+ jnz naligned
+
+ aligned_8:
+ mov %rdx, %rcx
+ shr $0x03, %rcx
+ rep movsq
+ and $0x07, %edx
+ jz end
+
+ aligned_1:
+ // Write the remaining bytes
+ mov %rdx, %rcx
+ rep movsb
+
+ end:
+ ret
+
+/*
+ * moveMemory.
+ *
+ * rdi - source length
+ * rsi - source data.
+ * rdx - target length.
+ * rcx - target data.
+ */
+ .globl _D5tanya6memory2op10moveMemoryFNaNbNixAvAvZv
+ .type _D5tanya6memory2op10moveMemoryFNaNbNixAvAvZv, @function
+
+_D5tanya6memory2op10moveMemoryFNaNbNixAvAvZv:
+ mov %rdi, %rdx
+
+ lea -1(%rdx, %rsi), %rsi
+ lea -1(%rdx, %rcx), %rdi
+ mov %rdx, %rcx
+
+ std // Set the direction flag
+
+ rep movsb
+
+ cld // Clear the direction flag
+ ret
diff --git a/arch/x64/linux/memory/fill.S b/arch/x64/linux/memory/fill.S
new file mode 100644
index 0000000..d4fc0ac
--- /dev/null
+++ b/arch/x64/linux/memory/fill.S
@@ -0,0 +1,155 @@
+ .text
+
+/*
+ * fillMemory.
+ *
+ * rdi - length.
+ * rsi - pointer.
+ * rdx - value filled with a byte.
+ */
+ .globl _D5tanya6memory2op10fillMemoryFNaNbNiAvmZv
+ .type _D5tanya6memory2op10fillMemoryFNaNbNiAvmZv, @function
+
+_D5tanya6memory2op10fillMemoryFNaNbNiAvmZv:
+ // Check for zero length
+ test %rdi, %rdi
+ jz end
+
+ mov %rdi, %rax
+ mov %rsi, %r8
+
+ movq %rdx, %xmm0
+ movlhps %xmm0, %xmm0
+
+ // Check if the pointer is aligned to a 16-byte boundary
+ and $-0x10, %r8
+
+ // Compute the number of misaligned bytes
+ mov %rsi, %r9
+ sub %r8, %r9
+
+ test %r9, %r9
+ jz aligned
+
+ // Get the number of bytes to be written until we are aligned
+ mov $0x10, %rcx
+ sub %r9, %rcx
+
+ mov %rsi, %r8
+
+ naligned:
+ mov %dl, (%r8) // Write a byte
+
+ // Advance the pointer. Decrease the total number of bytes
+ // and the misaligned ones
+ inc %r8
+ dec %rcx
+ dec %rax
+
+ // Checks if we are aligned
+ test %rcx, %rcx
+ jnz naligned
+
+ aligned:
+ // Checks if we're done writing bytes
+ test %rax, %rax
+ jz end
+
+ // Write 1 byte at a time
+ cmp $8, %rax
+ jl aligned_1
+
+ // Write 8 bytes at a time
+ cmp $16, %rax
+ jl aligned_8
+
+ // Write 16 bytes at a time
+ cmp $32, %rax
+ jl aligned_16
+
+ // Write 32 bytes at a time
+ cmp $64, %rax
+ jl aligned_32
+
+ aligned_64:
+ movdqa %xmm0, (%r8)
+ movdqa %xmm0, 16(%r8)
+ movdqa %xmm0, 32(%r8)
+ movdqa %xmm0, 48(%r8)
+
+ add $64, %r8
+ sub $64, %rax
+
+ cmp $64, %rax
+ jge aligned_64
+
+ // Checks if we're done writing bytes
+ test %rax, %rax
+ jz end
+
+ // Write 1 byte at a time
+ cmp $8, %rax
+ jl aligned_1
+
+ // Write 8 bytes at a time
+ cmp $16, %rax
+ jl aligned_8
+
+ // Write 16 bytes at a time
+ cmp $32, %rax
+ jl aligned_16
+
+ aligned_32:
+ movdqa %xmm0, (%r8)
+ movdqa %xmm0, 16(%r8)
+
+ add $32, %r8
+ sub $32, %rax
+
+ // Checks if we're done writing bytes
+ test %rax, %rax
+ jz end
+
+ // Write 1 byte at a time
+ cmp $8, %rax
+ jl aligned_1
+
+ // Write 8 bytes at a time
+ cmp $16, %rax
+ jl aligned_8
+
+ aligned_16:
+ movdqa %xmm0, (%r8)
+
+ add $16, %r8
+ sub $16, %rax
+
+ // Checks if we're done writing bytes
+ test %rax, %rax
+ jz end
+
+ // Write 1 byte at a time
+ cmp $8, %rax
+ jl aligned_1
+
+ aligned_8:
+ mov %rdx, (%r8)
+
+ add $8, %r8
+ sub $8, %rax
+
+ // Checks if we're done writing bytes
+ test %rax, %rax
+ jz end
+
+ aligned_1:
+ mov %dl, (%r8)
+
+ inc %r8
+ dec %rax
+
+ test %rax, %rax
+ jnz aligned_1
+
+ end:
+ ret