diff options
| author | Eugen Wissner <belka@caraus.de> | 2017-09-26 08:26:12 +0200 |
|---|---|---|
| committer | Eugen Wissner <belka@caraus.de> | 2017-09-26 08:26:12 +0200 |
| commit | faebf3e4d5f6fcdfe1ad9f30c75fb478a7259fd1 (patch) | |
| tree | ad53ce90a2314d4676331bce326c7b5aeb9a2f59 /arch | |
| parent | 20e7df386bfd9449bf7fb0926918e176bb37140c (diff) | |
| download | tanya-faebf3e4d5f6fcdfe1ad9f30c75fb478a7259fd1.tar.gz | |
Fix #304
Replace inline assembly with GAS.
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/build.ninja | 5 | ||||
| -rw-r--r-- | arch/x64/linux/memory/cmp.S | 67 | ||||
| -rw-r--r-- | arch/x64/linux/memory/copy.S | 67 | ||||
| -rw-r--r-- | arch/x64/linux/memory/fill.S | 155 |
4 files changed, 293 insertions, 1 deletions
diff --git a/arch/build.ninja b/arch/build.ninja index eff2eb9..fce2838 100644 --- a/arch/build.ninja +++ b/arch/build.ninja @@ -5,6 +5,9 @@ rule archive command = ar rcs $out $in build abs.o: gas x64/linux/math/abs.S +build cmp.o: gas x64/linux/memory/cmp.S +build fill.o: gas x64/linux/memory/fill.S +build copy.o: gas x64/linux/memory/copy.S build syscall.o: gas x64/linux/syscall.S -build tanya.a: archive syscall.o abs.o +build tanya.a: archive syscall.o copy.o fill.o cmp.o abs.o diff --git a/arch/x64/linux/memory/cmp.S b/arch/x64/linux/memory/cmp.S new file mode 100644 index 0000000..169e2eb --- /dev/null +++ b/arch/x64/linux/memory/cmp.S @@ -0,0 +1,67 @@ + .text + +/* + * cmpMemory. + * + * rdi - r1 length + * rsi - r1 data. + * rdx - r2 length. + * rcx - r2 data. + */ + .globl _D5tanya6memory2op9cmpMemoryFNaNbNixAvxAvZi + .type _D5tanya6memory2op9cmpMemoryFNaNbNixAvxAvZi, @function + +_D5tanya6memory2op9cmpMemoryFNaNbNixAvxAvZi: + // Compare the lengths + cmp %rdx, %rdi + jl less + jg greater + + mov %rcx, %rdi + + // Check if we're aligned + cmp $0x08, %rdx + jc aligned_1 + test $0x07, %edi + jz aligned_8 + + naligned: + cmpsb + jl less + jg greater + + dec %rdx + test $0x07, %edi + jnz naligned + + aligned_8: + mov %rdx, %rcx + shr $0x03, %rcx + + repe cmpsq + jl less + jg greater + + and $0x07, %edx + jz equal + + aligned_1: // Compare the remaining bytes + mov %rdx, %rcx + + repe cmpsb + jl less + jg greater + + equal: + xor %rax, %rax // Return 0 + jmp end + + greater: + mov $0x01, %rax + jmp end + + less: + mov $-0x01, %rax + + end: + ret diff --git a/arch/x64/linux/memory/copy.S b/arch/x64/linux/memory/copy.S new file mode 100644 index 0000000..bf74e0f --- /dev/null +++ b/arch/x64/linux/memory/copy.S @@ -0,0 +1,67 @@ + .text + +/* + * copyMemory. + * + * rdi - source length + * rsi - source data. + * rdx - target length. + * rcx - target data. + */ + .globl _D5tanya6memory2op10copyMemoryFNaNbNixAvAvZv + .type _D5tanya6memory2op10copyMemoryFNaNbNixAvAvZv, @function + +_D5tanya6memory2op10copyMemoryFNaNbNixAvAvZv: + mov %rdi, %rdx + mov %rcx, %rdi + + cmp $0x08, %rdx + jc aligned_1 + test $0x07, %edi + jz aligned_8 + + naligned: + movsb + dec %rdx + test $0x07, %edi + jnz naligned + + aligned_8: + mov %rdx, %rcx + shr $0x03, %rcx + rep movsq + and $0x07, %edx + jz end + + aligned_1: + // Write the remaining bytes + mov %rdx, %rcx + rep movsb + + end: + ret + +/* + * moveMemory. + * + * rdi - source length + * rsi - source data. + * rdx - target length. + * rcx - target data. + */ + .globl _D5tanya6memory2op10moveMemoryFNaNbNixAvAvZv + .type _D5tanya6memory2op10moveMemoryFNaNbNixAvAvZv, @function + +_D5tanya6memory2op10moveMemoryFNaNbNixAvAvZv: + mov %rdi, %rdx + + lea -1(%rdx, %rsi), %rsi + lea -1(%rdx, %rcx), %rdi + mov %rdx, %rcx + + std // Set the direction flag + + rep movsb + + cld // Clear the direction flag + ret diff --git a/arch/x64/linux/memory/fill.S b/arch/x64/linux/memory/fill.S new file mode 100644 index 0000000..d4fc0ac --- /dev/null +++ b/arch/x64/linux/memory/fill.S @@ -0,0 +1,155 @@ + .text + +/* + * fillMemory. + * + * rdi - length. + * rsi - pointer. + * rdx - value filled with a byte. + */ + .globl _D5tanya6memory2op10fillMemoryFNaNbNiAvmZv + .type _D5tanya6memory2op10fillMemoryFNaNbNiAvmZv, @function + +_D5tanya6memory2op10fillMemoryFNaNbNiAvmZv: + // Check for zero length + test %rdi, %rdi + jz end + + mov %rdi, %rax + mov %rsi, %r8 + + movq %rdx, %xmm0 + movlhps %xmm0, %xmm0 + + // Check if the pointer is aligned to a 16-byte boundary + and $-0x10, %r8 + + // Compute the number of misaligned bytes + mov %rsi, %r9 + sub %r8, %r9 + + test %r9, %r9 + jz aligned + + // Get the number of bytes to be written until we are aligned + mov $0x10, %rcx + sub %r9, %rcx + + mov %rsi, %r8 + + naligned: + mov %dl, (%r8) // Write a byte + + // Advance the pointer. Decrease the total number of bytes + // and the misaligned ones + inc %r8 + dec %rcx + dec %rax + + // Checks if we are aligned + test %rcx, %rcx + jnz naligned + + aligned: + // Checks if we're done writing bytes + test %rax, %rax + jz end + + // Write 1 byte at a time + cmp $8, %rax + jl aligned_1 + + // Write 8 bytes at a time + cmp $16, %rax + jl aligned_8 + + // Write 16 bytes at a time + cmp $32, %rax + jl aligned_16 + + // Write 32 bytes at a time + cmp $64, %rax + jl aligned_32 + + aligned_64: + movdqa %xmm0, (%r8) + movdqa %xmm0, 16(%r8) + movdqa %xmm0, 32(%r8) + movdqa %xmm0, 48(%r8) + + add $64, %r8 + sub $64, %rax + + cmp $64, %rax + jge aligned_64 + + // Checks if we're done writing bytes + test %rax, %rax + jz end + + // Write 1 byte at a time + cmp $8, %rax + jl aligned_1 + + // Write 8 bytes at a time + cmp $16, %rax + jl aligned_8 + + // Write 16 bytes at a time + cmp $32, %rax + jl aligned_16 + + aligned_32: + movdqa %xmm0, (%r8) + movdqa %xmm0, 16(%r8) + + add $32, %r8 + sub $32, %rax + + // Checks if we're done writing bytes + test %rax, %rax + jz end + + // Write 1 byte at a time + cmp $8, %rax + jl aligned_1 + + // Write 8 bytes at a time + cmp $16, %rax + jl aligned_8 + + aligned_16: + movdqa %xmm0, (%r8) + + add $16, %r8 + sub $16, %rax + + // Checks if we're done writing bytes + test %rax, %rax + jz end + + // Write 1 byte at a time + cmp $8, %rax + jl aligned_1 + + aligned_8: + mov %rdx, (%r8) + + add $8, %r8 + sub $8, %rax + + // Checks if we're done writing bytes + test %rax, %rax + jz end + + aligned_1: + mov %dl, (%r8) + + inc %r8 + dec %rax + + test %rax, %rax + jnz aligned_1 + + end: + ret |
