summaryrefslogtreecommitdiff
path: root/REORG.TODO/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S
diff options
context:
space:
mode:
Diffstat (limited to 'REORG.TODO/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S')
-rw-r--r--REORG.TODO/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S152
1 files changed, 152 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S b/REORG.TODO/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S
new file mode 100644
index 0000000000..2ab33b59a7
--- /dev/null
+++ b/REORG.TODO/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S
@@ -0,0 +1,152 @@
+/* Function sincosf vectorized with SSE2.
+ Copyright (C) 2014-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVbN4vl4l4_sincosf)
+WRAPPER_IMPL_SSE2_fFF sincosf
+END (_ZGVbN4vl4l4_sincosf)
+libmvec_hidden_def (_ZGVbN4vl4l4_sincosf)
+
+/* SSE2 ISA version as wrapper to scalar (for vector
+ function declared with #pragma omp declare simd notinbranch). */
+.macro WRAPPER_IMPL_SSE2_fFF_vvv callee
+#ifndef __ILP32__
+ subq $120, %rsp
+ cfi_adjust_cfa_offset(120)
+ movaps %xmm0, 96(%rsp)
+ lea (%rsp), %rdi
+ movdqa %xmm1, 32(%rdi)
+ lea 16(%rsp), %rsi
+ movdqa %xmm2, 32(%rsi)
+ movdqa %xmm3, 48(%rsi)
+ movdqa %xmm4, 64(%rsi)
+ call JUMPTARGET(\callee)
+ movss 100(%rsp), %xmm0
+ lea 4(%rsp), %rdi
+ lea 20(%rsp), %rsi
+ call JUMPTARGET(\callee)
+ movss 104(%rsp), %xmm0
+ lea 8(%rsp), %rdi
+ lea 24(%rsp), %rsi
+ call JUMPTARGET(\callee)
+ movss 108(%rsp), %xmm0
+ lea 12(%rsp), %rdi
+ lea 28(%rsp), %rsi
+ call JUMPTARGET(\callee)
+ movq 32(%rsp), %rdx
+ movq 40(%rsp), %rsi
+ movq 48(%rsp), %r8
+ movq 56(%rsp), %r10
+ movl (%rsp), %eax
+ movl 4(%rsp), %ecx
+ movl 8(%rsp), %edi
+ movl 12(%rsp), %r9d
+ movl %eax, (%rdx)
+ movl %ecx, (%rsi)
+ movq 64(%rsp), %rax
+ movq 72(%rsp), %rcx
+ movl %edi, (%r8)
+ movl %r9d, (%r10)
+ movq 80(%rsp), %rdi
+ movq 88(%rsp), %r9
+ movl 16(%rsp), %r11d
+ movl 20(%rsp), %edx
+ movl 24(%rsp), %esi
+ movl 28(%rsp), %r8d
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movl %esi, (%rdi)
+ movl %r8d, (%r9)
+ addq $120, %rsp
+ cfi_adjust_cfa_offset(-120)
+ ret
+#else
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset 6, -16
+ pushq %rbx
+ .cfi_def_cfa_offset 24
+ .cfi_offset 3, -24
+ subl $88, %esp
+ .cfi_def_cfa_offset 112
+ leal 64(%rsp), %esi
+ movaps %xmm1, (%esp)
+ leal 48(%rsp), %edi
+ movaps %xmm2, 16(%esp)
+ movq %rsi, %rbp
+ movq %rdi, %rbx
+ movaps %xmm0, 32(%esp)
+ call JUMPTARGET(\callee)
+ movups 36(%esp), %xmm0
+ leal 4(%rbp), %esi
+ leal 4(%rbx), %edi
+ call JUMPTARGET(\callee)
+ movups 40(%esp), %xmm0
+ leal 8(%rbp), %esi
+ leal 8(%rbx), %edi
+ call JUMPTARGET(\callee)
+ movups 44(%esp), %xmm0
+ leal 12(%rbp), %esi
+ leal 12(%rbx), %edi
+ call JUMPTARGET(\callee)
+ movq (%esp), %rax
+ movss 48(%esp), %xmm0
+ movdqa (%esp), %xmm4
+ movdqa 16(%esp), %xmm7
+ movss %xmm0, (%eax)
+ movss 52(%esp), %xmm0
+ pextrd $1, %xmm4, %eax
+ movss %xmm0, (%eax)
+ movq 8(%esp), %rax
+ movss 56(%esp), %xmm0
+ movss %xmm0, (%eax)
+ movss 60(%esp), %xmm0
+ pextrd $3, %xmm4, %eax
+ movss %xmm0, (%eax)
+ movq 16(%esp), %rax
+ movss 64(%esp), %xmm0
+ movss %xmm0, (%eax)
+ movss 68(%esp), %xmm0
+ pextrd $1, %xmm7, %eax
+ movss %xmm0, (%eax)
+ movq 24(%esp), %rax
+ movss 72(%esp), %xmm0
+ movss %xmm0, (%eax)
+ movss 76(%esp), %xmm0
+ pextrd $3, %xmm7, %eax
+ movss %xmm0, (%eax)
+ addl $88, %esp
+ .cfi_def_cfa_offset 24
+ popq %rbx
+ .cfi_def_cfa_offset 16
+ popq %rbp
+ .cfi_def_cfa_offset 8
+ ret
+#endif
+.endm
+
+ENTRY (_ZGVbN4vvv_sincosf)
+WRAPPER_IMPL_SSE2_fFF_vvv sincosf
+END (_ZGVbN4vvv_sincosf)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVbN4vvv_sincosf)
+#endif