Than Ngo 13262e
From: Michel Normand <normand@linux.vnet.ibm.com>
Than Ngo 13262e
Subject: atlas.3.10.2 ppc64le abiv2 patch
Than Ngo 13262e
Date: Mon, 28 Jul 2014 04:29:05 -0400
Than Ngo 13262e
Than Ngo 13262e
atlas.3.10.2 abiv2 step2 complete the changes already present in atlas 3.10.2
Than Ngo 13262e
* still some files with opd ABI V1 to be disabled for ABI V2
Than Ngo 13262e
 tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
Than Ngo 13262e
 tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
Than Ngo 13262e
 tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
Than Ngo 13262e
Than Ngo 13262e
atlas.3.10.2 ppc64le abiv2 step3
Than Ngo 13262e
* change offsets of parameters read from stack to avoid some segfaults.
Than Ngo 13262e
  (values changes 120 => 104 and 128 => 112 identified by gdb investigation)
Than Ngo 13262e
Than Ngo 13262e
Despite this step3 patch there are two Remaining problems for ppc64le archi:
Than Ngo 13262e
* TODO: still have seg-faults in console during build/check
Than Ngo 13262e
but is not critical (without make check) and rpm are generated on fedora.
Than Ngo 13262e
unable to investigate because of problem tracked by issue 950
Than Ngo 13262e
https://sourceforge.net/p/math-atlas/support-requests/950/
Than Ngo 13262e
Than Ngo 13262e
* TODO: make check failure because xsslvtst execution failure
Than Ngo 13262e
related to vector assembly code that assumes big-endian env
Than Ngo 13262e
as written in ATL_cmm4x4x128_av.c and ATL_smm4x4x128_av.c.
Than Ngo 13262e
Would need significant work to support little-endian as per
Than Ngo 13262e
endianess comments of all PowerPC vector instructions in:
Than Ngo 13262e
https://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/FBFA164F824370F987256D6A006F424D/$file/vector_simd_pem.ppc.2005AUG23.pdf
Than Ngo 13262e
Than Ngo 13262e
Signed-off-by: Michel Normand <normand@linux.vnet.ibm.com>
Than Ngo 13262e
---
Than Ngo 13262e
 tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c |    7 +++++++
Than Ngo 13262e
 tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c |    7 +++++++
Than Ngo 13262e
 tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c |    9 ++++++++-
Than Ngo 13262e
 tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c |   20 ++++++++++++++++++--
Than Ngo 13262e
 tune/blas/gemm/CASES/ATL_smm4x4x128_av.c |   23 ++++++++++++++++++++++-
Than Ngo 13262e
 5 files changed, 62 insertions(+), 4 deletions(-)
Than Ngo 13262e
Than Ngo 13262e
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
Than Ngo 13262e
===================================================================
Than Ngo 13262e
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
Than Ngo 13262e
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
Than Ngo 13262e
@@ -268,7 +268,7 @@ Mjoin(.,ATL_USERMM):
Than Ngo 13262e
 	.globl  Mjoin(_,ATL_USERMM)
Than Ngo 13262e
 Mjoin(_,ATL_USERMM):
Than Ngo 13262e
    #else
Than Ngo 13262e
-      #if defined(ATL_USE64BITS)
Than Ngo 13262e
+      #if defined(ATL_USE64BITS) && _CALL_ELF != 2
Than Ngo 13262e
 /*
Than Ngo 13262e
  *      Official Program Descripter section, seg fault w/o it on Linux/PPC64
Than Ngo 13262e
  */
Than Ngo 13262e
@@ -324,8 +324,15 @@ ATL_USERMM:
Than Ngo 13262e
 #endif
Than Ngo 13262e
 
Than Ngo 13262e
 #ifdef ATL_USE64BITS
Than Ngo 13262e
+#if _CALL_ELF == 2
Than Ngo 13262e
+/* ABIv2 */
Than Ngo 13262e
+        ld      pC0, 104(r1)
Than Ngo 13262e
+        ld      incCn, 112(r1)
Than Ngo 13262e
+#else
Than Ngo 13262e
+/* ABIv1 */
Than Ngo 13262e
         ld      pC0, 120(r1)
Than Ngo 13262e
         ld      incCn, 128(r1)
Than Ngo 13262e
+#endif
Than Ngo 13262e
 #elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC)
Than Ngo 13262e
         lwz     pC0, 68(r1)
Than Ngo 13262e
         lwz     incCn,  72(r1)
Than Ngo 13262e
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
Than Ngo 13262e
===================================================================
Than Ngo 13262e
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
Than Ngo 13262e
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
Than Ngo 13262e
@@ -170,13 +170,21 @@ void ATL_USERMM(const int M, const int N
Than Ngo 13262e
                 const TYPE beta, TYPE *C, const int ldc)
Than Ngo 13262e
                                   (r10)    8(r1)
Than Ngo 13262e
 *******************************************************************************
Than Ngo 13262e
-64 bit ABIs:
Than Ngo 13262e
+64 bit ABIv1s:
Than Ngo 13262e
                          r3           r4           r5             r6/f1
Than Ngo 13262e
 void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
Than Ngo 13262e
                            r7             r8             r9            r10
Than Ngo 13262e
                 const TYPE *A, const int lda, const TYPE *B, const int ldb,
Than Ngo 13262e
                              f2   120(r1)        128(r1)
Than Ngo 13262e
                 const TYPE beta, TYPE *C, const int ldc)
Than Ngo 13262e
+
Than Ngo 13262e
+64 bit ABIv2s:
Than Ngo 13262e
+                         r3           r4           r5             r6/f1
Than Ngo 13262e
+void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
Than Ngo 13262e
+                           r7             r8             r9            r10
Than Ngo 13262e
+                const TYPE *A, const int lda, const TYPE *B, const int ldb,
Than Ngo 13262e
+                             f2   104(r1)        112(r1)
Than Ngo 13262e
+                const TYPE beta, TYPE *C, const int ldc)
Than Ngo 13262e
 #endif
Than Ngo 13262e
 #ifdef ATL_AS_AIX_PPC
Than Ngo 13262e
         .csect .text[PR]
Than Ngo 13262e
@@ -202,7 +210,7 @@ Mjoin(.,ATL_USERMM):
Than Ngo 13262e
 	.globl  Mjoin(_,ATL_USERMM)
Than Ngo 13262e
 Mjoin(_,ATL_USERMM):
Than Ngo 13262e
    #else
Than Ngo 13262e
-      #if defined(ATL_USE64BITS)
Than Ngo 13262e
+      #if defined(ATL_USE64BITS) && _CALL_ELF != 2
Than Ngo 13262e
 /*
Than Ngo 13262e
  *      Official Program Descripter section, seg fault w/o it on Linux/PPC64
Than Ngo 13262e
  */
Than Ngo 13262e
@@ -257,9 +265,17 @@ ATL_USERMM:
Than Ngo 13262e
    #endif
Than Ngo 13262e
 #endif
Than Ngo 13262e
 
Than Ngo 13262e
+
Than Ngo 13262e
 #if defined (ATL_USE64BITS)
Than Ngo 13262e
+#if _CALL_ELF == 2
Than Ngo 13262e
+/* ABIv2 */
Than Ngo 13262e
+        ld      pC0, 104(r1)
Than Ngo 13262e
+        ld      incCn, 112(r1)
Than Ngo 13262e
+#else
Than Ngo 13262e
+/* ABIv1 */
Than Ngo 13262e
         ld      pC0, 120(r1)
Than Ngo 13262e
         ld      incCn, 128(r1)
Than Ngo 13262e
+#endif
Than Ngo 13262e
 #elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC)
Than Ngo 13262e
         lwz     pC0, 68(r1)
Than Ngo 13262e
         lwz     incCn,  72(r1)
Than Ngo 13262e
Index: ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
Than Ngo 13262e
===================================================================
Than Ngo 13262e
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
Than Ngo 13262e
+++ ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
Than Ngo 13262e
@@ -196,7 +196,7 @@ void ATL_USERMM(const int M, const int N
Than Ngo 13262e
 	.globl  Mjoin(_,ATL_USERMM)
Than Ngo 13262e
 Mjoin(_,ATL_USERMM):
Than Ngo 13262e
 #else
Than Ngo 13262e
-   #if defined(ATL_USE64BITS)
Than Ngo 13262e
+   #if defined(ATL_USE64BITS) && _CALL_ELF != 2
Than Ngo 13262e
 /*
Than Ngo 13262e
  *      Official Program Descripter section, seg fault w/o it on Linux/PPC64
Than Ngo 13262e
  */
Than Ngo 13262e
@@ -221,8 +221,15 @@ ATL_USERMM:
Than Ngo 13262e
  *      kernel instead
Than Ngo 13262e
  */
Than Ngo 13262e
 #if defined (ATL_USE64BITS)
Than Ngo 13262e
+#if _CALL_ELF == 2
Than Ngo 13262e
+/* ABIv2 */
Than Ngo 13262e
+        ld      r10, 104(r1)
Than Ngo 13262e
+        ld      r5, 112(r1)
Than Ngo 13262e
+#else
Than Ngo 13262e
+/* ABIv1 */
Than Ngo 13262e
         ld      r10, 120(r1)
Than Ngo 13262e
         ld      r5, 128(r1)
Than Ngo 13262e
+#endif
Than Ngo 13262e
 #elif defined(ATL_AS_OSX_PPC)
Than Ngo 13262e
         lwz     r10, 60(r1)
Than Ngo 13262e
         lwz     r5,  64(r1)
Than Ngo 13262e
@@ -285,8 +292,15 @@ ATL_USERMM:
Than Ngo 13262e
         eqv     r0, r0, r0      /* all 1s */
Than Ngo 13262e
         ATL_WriteVRSAVE(r0)     /* signal we use all vector regs */
Than Ngo 13262e
 #if defined (ATL_USE64BITS)
Than Ngo 13262e
+#if _CALL_ELF == 2
Than Ngo 13262e
+        /* ABIv2 */
Than Ngo 13262e
+        ld      pC0, FSIZE+104(r1)
Than Ngo 13262e
+        ld      ldc, FSIZE+112(r1)
Than Ngo 13262e
+#else
Than Ngo 13262e
+        /* ABIv1 */
Than Ngo 13262e
         ld      pC0, FSIZE+120(r1)
Than Ngo 13262e
         ld      ldc, FSIZE+128(r1)
Than Ngo 13262e
+#endif
Than Ngo 13262e
 #elif defined(ATL_AS_OSX_PPC)
Than Ngo 13262e
         lwz     pC0, FSIZE+60(r1)
Than Ngo 13262e
         lwz     ldc,  FSIZE+64(r1)
Than Ngo 13262e
@@ -4258,8 +4272,15 @@ UNALIGNED_C:
Than Ngo 13262e
         eqv     r0, r0, r0      /* all 1s */
Than Ngo 13262e
         ATL_WriteVRSAVE(r0)     /* signal we use all vector regs */
Than Ngo 13262e
 #if defined (ATL_USE64BITS)
Than Ngo 13262e
+#if _CALL_ELF == 2
Than Ngo 13262e
+        /* ABIv2 */
Than Ngo 13262e
+        ld      pC0, FSIZE+104(r1)
Than Ngo 13262e
+        ld      ldc, FSIZE+112(r1)
Than Ngo 13262e
+#else
Than Ngo 13262e
+        /* ABIv1 */
Than Ngo 13262e
         ld      pC0, FSIZE+120(r1)
Than Ngo 13262e
         ld      ldc, FSIZE+128(r1)
Than Ngo 13262e
+#endif
Than Ngo 13262e
 #elif defined(ATL_AS_OSX_PPC)
Than Ngo 13262e
         lwz     pC0, FSIZE+60(r1)
Than Ngo 13262e
         lwz     ldc,  FSIZE+64(r1)
Than Ngo 13262e
Index: ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
Than Ngo 13262e
===================================================================
Than Ngo 13262e
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
Than Ngo 13262e
+++ ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
Than Ngo 13262e
@@ -258,8 +258,15 @@ ATL_USERMM:
Than Ngo 13262e
         eqv     r0, r0, r0      /* all 1s */
Than Ngo 13262e
         ATL_WriteVRSAVE(r0)     /* signal we use all vector regs */
Than Ngo 13262e
 #if defined (ATL_USE64BITS)
Than Ngo 13262e
+#if _CALL_ELF == 2
Than Ngo 13262e
+/* ABIv2 */
Than Ngo 13262e
+        ld      pC0, FSIZE+104(r1)
Than Ngo 13262e
+        ld      ldc, FSIZE+112(r1)
Than Ngo 13262e
+#else
Than Ngo 13262e
+/* ABIv1 */
Than Ngo 13262e
         ld      pC0, FSIZE+120(r1)
Than Ngo 13262e
         ld      ldc, FSIZE+128(r1)
Than Ngo 13262e
+#endif
Than Ngo 13262e
 #elif defined(ATL_AS_OSX_PPC)
Than Ngo 13262e
         lwz     pC0, FSIZE+60(r1)
Than Ngo 13262e
         lwz     ldc,  FSIZE+64(r1)
Than Ngo 13262e
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
Than Ngo 13262e
===================================================================
Than Ngo 13262e
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
Than Ngo 13262e
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
Than Ngo 13262e
@@ -405,8 +405,15 @@ Mjoin(_,ATL_USERMM):
Than Ngo 13262e
  */
Than Ngo 13262e
 #ifdef ATL_GAS_LINUX_PPC
Than Ngo 13262e
    #ifdef ATL_USE64BITS
Than Ngo 13262e
+      #if _CALL_ELF == 2
Than Ngo 13262e
+      /* ABIv2 */
Than Ngo 13262e
+        ld      pC0, 104(r1)
Than Ngo 13262e
+        ld      incCn, 112(r1)
Than Ngo 13262e
+      #else
Than Ngo 13262e
+      /* ABIv1 */
Than Ngo 13262e
 	ld 	pC0, 120(r1)
Than Ngo 13262e
 	ld 	incCn, 128(r1)
Than Ngo 13262e
+      #endif
Than Ngo 13262e
    #else
Than Ngo 13262e
 	lwz	incCn, FSIZE+8(r1)
Than Ngo 13262e
    #endif