From: Michel Normand Subject: atlas.3.10.2 ppc64le abiv2 patch Date: Mon, 28 Jul 2014 04:29:05 -0400 atlas.3.10.2 abiv2 step2 complete the changes already present in atlas 3.10.2 * still some files with opd ABI V1 to be disabled for ABI V2 tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c tune/blas/gemm/CASES/ATL_smm4x4x128_av.c atlas.3.10.2 ppc64le abiv2 step3 * change offsets of parameters read from stack to avoid some segfaults. (values changes 120 => 104 and 128 => 112 identified by gdb investigation) Despite this step3 patch there are two Remaining problems for ppc64le archi: * TODO: still have seg-faults in console during build/check but is not critical (without make check) and rpm are generated on fedora. unable to investigate because of problem tracked by issue 950 https://sourceforge.net/p/math-atlas/support-requests/950/ * TODO: make check failure because xsslvtst execution failure related to vector assembly code that assumes big-endian env as written in ATL_cmm4x4x128_av.c and ATL_smm4x4x128_av.c. Would need significant work to support little-endian as per endianess comments of all PowerPC vector instructions in: https://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/FBFA164F824370F987256D6A006F424D/$file/vector_simd_pem.ppc.2005AUG23.pdf Signed-off-by: Michel Normand --- tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c | 7 +++++++ tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c | 7 +++++++ tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c | 9 ++++++++- tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c | 20 ++++++++++++++++++-- tune/blas/gemm/CASES/ATL_smm4x4x128_av.c | 23 ++++++++++++++++++++++- 5 files changed, 62 insertions(+), 4 deletions(-) Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c =================================================================== --- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c +++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c @@ -268,7 +268,7 @@ Mjoin(.,ATL_USERMM): .globl Mjoin(_,ATL_USERMM) Mjoin(_,ATL_USERMM): #else - #if defined(ATL_USE64BITS) + #if defined(ATL_USE64BITS) && _CALL_ELF != 2 /* * Official Program Descripter section, seg fault w/o it on Linux/PPC64 */ @@ -324,8 +324,15 @@ ATL_USERMM: #endif #ifdef ATL_USE64BITS +#if _CALL_ELF == 2 +/* ABIv2 */ + ld pC0, 104(r1) + ld incCn, 112(r1) +#else +/* ABIv1 */ ld pC0, 120(r1) ld incCn, 128(r1) +#endif #elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC) lwz pC0, 68(r1) lwz incCn, 72(r1) Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c =================================================================== --- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c +++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c @@ -170,13 +170,21 @@ void ATL_USERMM(const int M, const int N const TYPE beta, TYPE *C, const int ldc) (r10) 8(r1) ******************************************************************************* -64 bit ABIs: +64 bit ABIv1s: r3 r4 r5 r6/f1 void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha, r7 r8 r9 r10 const TYPE *A, const int lda, const TYPE *B, const int ldb, f2 120(r1) 128(r1) const TYPE beta, TYPE *C, const int ldc) + +64 bit ABIv2s: + r3 r4 r5 r6/f1 +void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha, + r7 r8 r9 r10 + const TYPE *A, const int lda, const TYPE *B, const int ldb, + f2 104(r1) 112(r1) + const TYPE beta, TYPE *C, const int ldc) #endif #ifdef ATL_AS_AIX_PPC .csect .text[PR] @@ -202,7 +210,7 @@ Mjoin(.,ATL_USERMM): .globl Mjoin(_,ATL_USERMM) Mjoin(_,ATL_USERMM): #else - #if defined(ATL_USE64BITS) + #if defined(ATL_USE64BITS) && _CALL_ELF != 2 /* * Official Program Descripter section, seg fault w/o it on Linux/PPC64 */ @@ -257,9 +265,17 @@ ATL_USERMM: #endif #endif + #if defined (ATL_USE64BITS) +#if _CALL_ELF == 2 +/* ABIv2 */ + ld pC0, 104(r1) + ld incCn, 112(r1) +#else +/* ABIv1 */ ld pC0, 120(r1) ld incCn, 128(r1) +#endif #elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC) lwz pC0, 68(r1) lwz incCn, 72(r1) Index: ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c =================================================================== --- ATLAS.orig/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c +++ ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c @@ -196,7 +196,7 @@ void ATL_USERMM(const int M, const int N .globl Mjoin(_,ATL_USERMM) Mjoin(_,ATL_USERMM): #else - #if defined(ATL_USE64BITS) + #if defined(ATL_USE64BITS) && _CALL_ELF != 2 /* * Official Program Descripter section, seg fault w/o it on Linux/PPC64 */ @@ -221,8 +221,15 @@ ATL_USERMM: * kernel instead */ #if defined (ATL_USE64BITS) +#if _CALL_ELF == 2 +/* ABIv2 */ + ld r10, 104(r1) + ld r5, 112(r1) +#else +/* ABIv1 */ ld r10, 120(r1) ld r5, 128(r1) +#endif #elif defined(ATL_AS_OSX_PPC) lwz r10, 60(r1) lwz r5, 64(r1) @@ -285,8 +292,15 @@ ATL_USERMM: eqv r0, r0, r0 /* all 1s */ ATL_WriteVRSAVE(r0) /* signal we use all vector regs */ #if defined (ATL_USE64BITS) +#if _CALL_ELF == 2 + /* ABIv2 */ + ld pC0, FSIZE+104(r1) + ld ldc, FSIZE+112(r1) +#else + /* ABIv1 */ ld pC0, FSIZE+120(r1) ld ldc, FSIZE+128(r1) +#endif #elif defined(ATL_AS_OSX_PPC) lwz pC0, FSIZE+60(r1) lwz ldc, FSIZE+64(r1) @@ -4258,8 +4272,15 @@ UNALIGNED_C: eqv r0, r0, r0 /* all 1s */ ATL_WriteVRSAVE(r0) /* signal we use all vector regs */ #if defined (ATL_USE64BITS) +#if _CALL_ELF == 2 + /* ABIv2 */ + ld pC0, FSIZE+104(r1) + ld ldc, FSIZE+112(r1) +#else + /* ABIv1 */ ld pC0, FSIZE+120(r1) ld ldc, FSIZE+128(r1) +#endif #elif defined(ATL_AS_OSX_PPC) lwz pC0, FSIZE+60(r1) lwz ldc, FSIZE+64(r1) Index: ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c =================================================================== --- ATLAS.orig/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c +++ ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c @@ -258,8 +258,15 @@ ATL_USERMM: eqv r0, r0, r0 /* all 1s */ ATL_WriteVRSAVE(r0) /* signal we use all vector regs */ #if defined (ATL_USE64BITS) +#if _CALL_ELF == 2 +/* ABIv2 */ + ld pC0, FSIZE+104(r1) + ld ldc, FSIZE+112(r1) +#else +/* ABIv1 */ ld pC0, FSIZE+120(r1) ld ldc, FSIZE+128(r1) +#endif #elif defined(ATL_AS_OSX_PPC) lwz pC0, FSIZE+60(r1) lwz ldc, FSIZE+64(r1) Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c =================================================================== --- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c +++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c @@ -405,8 +405,15 @@ Mjoin(_,ATL_USERMM): */ #ifdef ATL_GAS_LINUX_PPC #ifdef ATL_USE64BITS + #if _CALL_ELF == 2 + /* ABIv2 */ + ld pC0, 104(r1) + ld incCn, 112(r1) + #else + /* ABIv1 */ ld pC0, 120(r1) ld incCn, 128(r1) + #endif #else lwz incCn, FSIZE+8(r1) #endif