diff --git a/.gitignore b/.gitignore index b3e90b8..6661901 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ K7323DNow.tgz /ARMv732NEON.tar.bz2 /lapack-3.5.0.tgz /atlas3.10.2.tar.bz2 +/POWER864LEVSXp4.tar.bz2 diff --git a/atlas-new_archdef_for_ppc64le.patch b/atlas-new_archdef_for_ppc64le.patch new file mode 100644 index 0000000..0356786 --- /dev/null +++ b/atlas-new_archdef_for_ppc64le.patch @@ -0,0 +1,32 @@ +Subject: atlas new archdef for ppc64le +From: Michel Normand +Date: Sun, 13 Jun 2014 18:02:47 +0200 + +Need to define different archdef names +for ppc64 (that is Big Endian) and ppc64le (that is Little Endian). +This is already done upstream in atlas 3.11.30 with issue +https://sourceforge.net/p/math-atlas/patches/66/ + +Required at least as long as I need the bypass of +atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch + +Signed-off-by: Michel Normand +--- + CONFIG/src/SpewMakeInc.c | 4 ++++ + 1 file changed, 4 insertions(+) + +Index: ATLAS/CONFIG/src/SpewMakeInc.c +=================================================================== +--- ATLAS.orig/CONFIG/src/SpewMakeInc.c ++++ ATLAS/CONFIG/src/SpewMakeInc.c +@@ -542,6 +542,10 @@ int main(int nargs, char **args) + fprintf(fpout, "# -------------------------------------------------\n"); + fprintf(fpout, " ARCH = %s", machnam[mach]); + fprintf(fpout, "%d", ptrbits); ++ /* for ppc64le archi add 'LE' characters */ ++ #if defined(__powerpc64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) ++ fprintf(fpout, "%s", "LE"); ++ #endif + if (ISAX) + fprintf(fpout, "%s", ISAXNAM[ISAX]); + if (!USEIEEE) diff --git a/atlas.3.10.2-add_power8_cpu.patch b/atlas.3.10.2-add_power8_cpu.patch new file mode 100644 index 0000000..7b58353 --- /dev/null +++ b/atlas.3.10.2-add_power8_cpu.patch @@ -0,0 +1,131 @@ +From: Michel Normand +Subject: atlas.3.10.2 add power8 cpu +Date: Thu, 18 Sep 2014 15:13:24 +0200 + +atlas.3.10.2 add Power8 cpu +tracked upstream by issue 67 +https://sourceforge.net/p/math-atlas/patches/67/ + +Signed-off-by: Michel Normand +--- + CONFIG/ARCHS/Make.ext | 7 +++++++ + CONFIG/include/atlconf.h | 6 +++--- + CONFIG/src/atlcomp.txt | 6 ++++++ + CONFIG/src/backend/archinfo_aix.c | 2 ++ + CONFIG/src/backend/archinfo_linux.c | 1 + + include/atlas_pca.h | 2 +- + 6 files changed, 20 insertions(+), 4 deletions(-) + +Index: ATLAS/CONFIG/ARCHS/Make.ext +=================================================================== +--- ATLAS.orig/CONFIG/ARCHS/Make.ext ++++ ATLAS/CONFIG/ARCHS/Make.ext +@@ -33,6 +33,7 @@ files = AMD64K10h32SSE3.tar.bz2 AMD64K10 + MIPSR1xK64.tar.bz2 Makefile P432SSE2.tar.bz2 P4E32SSE3.tar.bz2 \ + P4E64SSE3.tar.bz2 PIII32SSE1.tar.bz2 POWER432.tar.bz2 \ + POWER464.tar.bz2 POWER564.tar.bz2 POWER764VSX.tar.bz2 \ ++ POWER864VSX.tar.bz2 \ + PPCG432AltiVec.tar.bz2 PPCG532AltiVec.tar.bz2 PPCG564AltiVec.tar.bz2 \ + PPRO32.tar.bz2 USIII32.tar.bz2 USIII64.tar.bz2 USIV32.tar.bz2 \ + USIV64.tar.bz2 UST232.tar.bz2 UST264.tar.bz2 atlas_test1.1.3.tar.bz2 \ +@@ -308,6 +309,12 @@ POWER764VSX.tar.bz2 : $(basdr)/POWER764V + /tmp/POWER764VSX.tar POWER764VSX + bzip2 /tmp/POWER764VSX.tar + mv /tmp/POWER764VSX.tar.bz2 ./. ++POWER864VSX.tar.bz2 : $(basdr)/POWER864VSX ++ - rm -f /tmp/POWER864VSX.tar /tmp/POWER864VSX.tar.bz2 ++ cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \ ++ /tmp/POWER864VSX.tar POWER864VSX ++ bzip2 /tmp/POWER864VSX.tar ++ mv /tmp/POWER864VSX.tar.bz2 ./. + IBMz1032.tar.bz2 : $(basdr)/IBMz1032 + - rm -f /tmp/IBMz1032.tar /tmp/IBMz1032.tar.bz2 + cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \ +Index: ATLAS/CONFIG/include/atlconf.h +=================================================================== +--- ATLAS.orig/CONFIG/include/atlconf.h ++++ ATLAS/CONFIG/include/atlconf.h +@@ -18,10 +18,10 @@ enum OSTYPE {OSOther=0, OSLinux, OSSunOS + enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS, + AFARM, AFS390}; + +-#define NMACH 52 ++#define NMACH 53 + static char *machnam[NMACH] = + {"UNKNOWN", "POWER3", "POWER4", "POWER5", "PPCG4", "PPCG5", +- "POWER6", "POWER7", "POWERe6500", "IBMz9", "IBMz10", "IBMz196", ++ "POWER6", "POWER7", "POWER8", "POWERe6500", "IBMz9", "IBMz10", "IBMz196", + "x86x87", "x86SSE1", "x86SSE2", "x86SSE3", + "P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo", + "CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3", +@@ -31,7 +31,7 @@ static char *machnam[NMACH] = + "USI", "USII", "USIII", "USIV", "UST1", "UST2", "UnknownUS", + "MIPSR1xK", "MIPSICE9", "ARMv7"}; + enum MACHTYPE {MACHOther, IbmPwr3, IbmPwr4, IbmPwr5, PPCG4, PPCG5, +- IbmPwr6, IbmPwr7, Pwre6500, ++ IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500, + IbmZ9, IbmZ10, IbmZ196, /* s390(x) in Linux */ + x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */ + IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS, +Index: ATLAS/CONFIG/src/atlcomp.txt +=================================================================== +--- ATLAS.orig/CONFIG/src/atlcomp.txt ++++ ATLAS/CONFIG/src/atlcomp.txt +@@ -190,6 +190,10 @@ MACH=PPCG5 OS=ALL LVL=1000 COMPS=dmc,icc + 'gcc' '-mpowerpc64 -maltivec -mabi=altivec -mcpu=970 -mtune=970 -O2' + MACH=PPCG5 OS=ALL LVL=1000 COMPS=skc + 'gcc' '-mpowerpc64 -maltivec -mabi=altivec -mcpu=970 -mtune=970 -O2 -mvrsave' ++MACH=POWER8 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc ++ 'gcc' '-O2 -mvsx -mcpu=power8 -mtune=power8 -m64 -mvrsave -funroll-all-loops' ++MACH=POWER8 OS=ALL LVL=1010 COMPS=f77 ++ 'gfortran' '-O2 -mvsx -mcpu=power8 -mtune=power8 -m64 -mvrsave -funroll-all-loops' + MACH=POWER7 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc + 'gcc' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops' + MACH=POWER7 OS=ALL LVL=1010 COMPS=f77 +@@ -210,6 +214,8 @@ MACH=POWER4 OS=ALL LVL=1010 COMPS=icc,dm + 'gcc' '-mcpu=power4 -mtune=power4 -O3 -fno-schedule-insns -fno-rerun-loop-opt' + MACH=POWER4 OS=ALL LVL=1010 COMPS=f77 + 'xlf' '-qtune=pwr4 -qarch=pwr4 -O3 -qmaxmem=-1 -qfloat=hsflt' ++MACH=POWER8 OS=ALL LVL=1010 COMPS=f77 ++ 'xlf' '-qtune=pwr8 -qarch=pwr8 -O3 -qmaxmem=-1 -qfloat=hsflt' + # + # IBM System z or zEnterprise. + # These compiler flags given by IBM; -O3 -funroll-loops are chosen because +Index: ATLAS/CONFIG/src/backend/archinfo_linux.c +=================================================================== +--- ATLAS.orig/CONFIG/src/backend/archinfo_linux.c ++++ ATLAS/CONFIG/src/backend/archinfo_linux.c +@@ -77,6 +77,7 @@ enum MACHTYPE ProbeArch() + else if (strstr(res, "7455")) mach = PPCG4; + else if (strstr(res, "PPC970FX")) mach = PPCG5; + else if (strstr(res, "PPC970MP")) mach = PPCG5; ++ else if (strstr(res, "POWER8")) mach = IbmPwr8; + else if (strstr(res, "POWER7")) mach = IbmPwr7; + else if (strstr(res, "POWER6")) mach = IbmPwr6; + else if (strstr(res, "POWER5")) mach = IbmPwr5; +Index: ATLAS/include/atlas_pca.h +=================================================================== +--- ATLAS.orig/include/atlas_pca.h ++++ ATLAS/include/atlas_pca.h +@@ -26,7 +26,7 @@ + #endif + #elif defined(ATL_ARCH_POWER3) || defined(ATL_ARCH_POWER4) || \ + defined(ATL_ARCH_POWER5) || defined(ATL_ARCH_POWER6) || \ +- defined(ATL_ARCH_POWER7) ++ defined(ATL_ARCH_POWER7) || defined(ATL_ARCH_POWER8) + #ifdef __GNUC__ + #define ATL_membarrier __asm__ __volatile__ ("dcs") + /* #define ATL_USEPCA 1 */ +Index: ATLAS/CONFIG/src/backend/archinfo_aix.c +=================================================================== +--- ATLAS.orig/CONFIG/src/backend/archinfo_aix.c ++++ ATLAS/CONFIG/src/backend/archinfo_aix.c +@@ -67,6 +67,8 @@ enum MACHTYPE ProbeArch() + { + if (strstr(res, "PowerPC_POWER5")) + mach = IbmPwr5; ++ else if (strstr(res, "PowerPC_POWER8")) ++ mach = IbmPwr8; + else if (strstr(res, "PowerPC_POWER7")) + mach = IbmPwr7; + else if (strstr(res, "PowerPC_POWER6")) diff --git a/atlas.3.10.2-ppc64le_abiv2.patch b/atlas.3.10.2-ppc64le_abiv2.patch new file mode 100644 index 0000000..6633d95 --- /dev/null +++ b/atlas.3.10.2-ppc64le_abiv2.patch @@ -0,0 +1,220 @@ +From: Michel Normand +Subject: atlas.3.10.2 ppc64le abiv2 patch +Date: Mon, 28 Jul 2014 04:29:05 -0400 + +atlas.3.10.2 abiv2 step2 complete the changes already present in atlas 3.10.2 +* still some files with opd ABI V1 to be disabled for ABI V2 + tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c + tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c + tune/blas/gemm/CASES/ATL_smm4x4x128_av.c + +atlas.3.10.2 ppc64le abiv2 step3 +* change offsets of parameters read from stack to avoid some segfaults. + (values changes 120 => 104 and 128 => 112 identified by gdb investigation) + +Despite this step3 patch there are two Remaining problems for ppc64le archi: +* TODO: still have seg-faults in console during build/check +but is not critical (without make check) and rpm are generated on fedora. +unable to investigate because of problem tracked by issue 950 +https://sourceforge.net/p/math-atlas/support-requests/950/ + +* TODO: make check failure because xsslvtst execution failure +related to vector assembly code that assumes big-endian env +as written in ATL_cmm4x4x128_av.c and ATL_smm4x4x128_av.c. +Would need significant work to support little-endian as per +endianess comments of all PowerPC vector instructions in: +https://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/FBFA164F824370F987256D6A006F424D/$file/vector_simd_pem.ppc.2005AUG23.pdf + +Signed-off-by: Michel Normand +--- + tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c | 7 +++++++ + tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c | 7 +++++++ + tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c | 9 ++++++++- + tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c | 20 ++++++++++++++++++-- + tune/blas/gemm/CASES/ATL_smm4x4x128_av.c | 23 ++++++++++++++++++++++- + 5 files changed, 62 insertions(+), 4 deletions(-) + +Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c +=================================================================== +--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c ++++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c +@@ -268,7 +268,7 @@ Mjoin(.,ATL_USERMM): + .globl Mjoin(_,ATL_USERMM) + Mjoin(_,ATL_USERMM): + #else +- #if defined(ATL_USE64BITS) ++ #if defined(ATL_USE64BITS) && _CALL_ELF != 2 + /* + * Official Program Descripter section, seg fault w/o it on Linux/PPC64 + */ +@@ -324,8 +324,15 @@ ATL_USERMM: + #endif + + #ifdef ATL_USE64BITS ++#if _CALL_ELF == 2 ++/* ABIv2 */ ++ ld pC0, 104(r1) ++ ld incCn, 112(r1) ++#else ++/* ABIv1 */ + ld pC0, 120(r1) + ld incCn, 128(r1) ++#endif + #elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC) + lwz pC0, 68(r1) + lwz incCn, 72(r1) +Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c +=================================================================== +--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c ++++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c +@@ -170,13 +170,21 @@ void ATL_USERMM(const int M, const int N + const TYPE beta, TYPE *C, const int ldc) + (r10) 8(r1) + ******************************************************************************* +-64 bit ABIs: ++64 bit ABIv1s: + r3 r4 r5 r6/f1 + void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha, + r7 r8 r9 r10 + const TYPE *A, const int lda, const TYPE *B, const int ldb, + f2 120(r1) 128(r1) + const TYPE beta, TYPE *C, const int ldc) ++ ++64 bit ABIv2s: ++ r3 r4 r5 r6/f1 ++void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha, ++ r7 r8 r9 r10 ++ const TYPE *A, const int lda, const TYPE *B, const int ldb, ++ f2 104(r1) 112(r1) ++ const TYPE beta, TYPE *C, const int ldc) + #endif + #ifdef ATL_AS_AIX_PPC + .csect .text[PR] +@@ -202,7 +210,7 @@ Mjoin(.,ATL_USERMM): + .globl Mjoin(_,ATL_USERMM) + Mjoin(_,ATL_USERMM): + #else +- #if defined(ATL_USE64BITS) ++ #if defined(ATL_USE64BITS) && _CALL_ELF != 2 + /* + * Official Program Descripter section, seg fault w/o it on Linux/PPC64 + */ +@@ -257,9 +265,17 @@ ATL_USERMM: + #endif + #endif + ++ + #if defined (ATL_USE64BITS) ++#if _CALL_ELF == 2 ++/* ABIv2 */ ++ ld pC0, 104(r1) ++ ld incCn, 112(r1) ++#else ++/* ABIv1 */ + ld pC0, 120(r1) + ld incCn, 128(r1) ++#endif + #elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC) + lwz pC0, 68(r1) + lwz incCn, 72(r1) +Index: ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c +=================================================================== +--- ATLAS.orig/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c ++++ ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c +@@ -196,7 +196,7 @@ void ATL_USERMM(const int M, const int N + .globl Mjoin(_,ATL_USERMM) + Mjoin(_,ATL_USERMM): + #else +- #if defined(ATL_USE64BITS) ++ #if defined(ATL_USE64BITS) && _CALL_ELF != 2 + /* + * Official Program Descripter section, seg fault w/o it on Linux/PPC64 + */ +@@ -221,8 +221,15 @@ ATL_USERMM: + * kernel instead + */ + #if defined (ATL_USE64BITS) ++#if _CALL_ELF == 2 ++/* ABIv2 */ ++ ld r10, 104(r1) ++ ld r5, 112(r1) ++#else ++/* ABIv1 */ + ld r10, 120(r1) + ld r5, 128(r1) ++#endif + #elif defined(ATL_AS_OSX_PPC) + lwz r10, 60(r1) + lwz r5, 64(r1) +@@ -285,8 +292,15 @@ ATL_USERMM: + eqv r0, r0, r0 /* all 1s */ + ATL_WriteVRSAVE(r0) /* signal we use all vector regs */ + #if defined (ATL_USE64BITS) ++#if _CALL_ELF == 2 ++ /* ABIv2 */ ++ ld pC0, FSIZE+104(r1) ++ ld ldc, FSIZE+112(r1) ++#else ++ /* ABIv1 */ + ld pC0, FSIZE+120(r1) + ld ldc, FSIZE+128(r1) ++#endif + #elif defined(ATL_AS_OSX_PPC) + lwz pC0, FSIZE+60(r1) + lwz ldc, FSIZE+64(r1) +@@ -4258,8 +4272,15 @@ UNALIGNED_C: + eqv r0, r0, r0 /* all 1s */ + ATL_WriteVRSAVE(r0) /* signal we use all vector regs */ + #if defined (ATL_USE64BITS) ++#if _CALL_ELF == 2 ++ /* ABIv2 */ ++ ld pC0, FSIZE+104(r1) ++ ld ldc, FSIZE+112(r1) ++#else ++ /* ABIv1 */ + ld pC0, FSIZE+120(r1) + ld ldc, FSIZE+128(r1) ++#endif + #elif defined(ATL_AS_OSX_PPC) + lwz pC0, FSIZE+60(r1) + lwz ldc, FSIZE+64(r1) +Index: ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c +=================================================================== +--- ATLAS.orig/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c ++++ ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c +@@ -258,8 +258,15 @@ ATL_USERMM: + eqv r0, r0, r0 /* all 1s */ + ATL_WriteVRSAVE(r0) /* signal we use all vector regs */ + #if defined (ATL_USE64BITS) ++#if _CALL_ELF == 2 ++/* ABIv2 */ ++ ld pC0, FSIZE+104(r1) ++ ld ldc, FSIZE+112(r1) ++#else ++/* ABIv1 */ + ld pC0, FSIZE+120(r1) + ld ldc, FSIZE+128(r1) ++#endif + #elif defined(ATL_AS_OSX_PPC) + lwz pC0, FSIZE+60(r1) + lwz ldc, FSIZE+64(r1) +Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c +=================================================================== +--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c ++++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c +@@ -405,8 +405,15 @@ Mjoin(_,ATL_USERMM): + */ + #ifdef ATL_GAS_LINUX_PPC + #ifdef ATL_USE64BITS ++ #if _CALL_ELF == 2 ++ /* ABIv2 */ ++ ld pC0, 104(r1) ++ ld incCn, 112(r1) ++ #else ++ /* ABIv1 */ + ld pC0, 120(r1) + ld incCn, 128(r1) ++ #endif + #else + lwz incCn, FSIZE+8(r1) + #endif diff --git a/atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch b/atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch new file mode 100644 index 0000000..a3a6607 --- /dev/null +++ b/atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch @@ -0,0 +1,151 @@ +From: Michel Normand +Subject: atlas.3.10.2 ppc64le do not use files with lvx +Date: Tue, 12 Aug 2014 16:07:06 +0200 + +ppc64le do not use files with lvx +This is a temporary patch as long as the related files +are not ported yet to ppc64 little-endian. + +Warning: patch to be applied only for ppc64le architecture +and will also need atlas-new_archdef_for_ppc64le.patch + +Signed-off-by: Michel Normand +--- + tune/blas/gemm/CASES/ccases.flg | 6 +----- + tune/blas/gemm/CASES/dcases.flg | 8 +------- + tune/blas/gemm/CASES/dcases.vnb | 4 ---- + tune/blas/gemm/CASES/scases.flg | 9 +-------- + tune/blas/gemm/CASES/scases.vnb | 3 --- + tune/blas/gemm/CASES/zcases.flg | 8 +------- + 6 files changed, 4 insertions(+), 34 deletions(-) + +Index: ATLAS/tune/blas/gemm/CASES/ccases.flg +=================================================================== +--- ATLAS.orig/tune/blas/gemm/CASES/ccases.flg ++++ ATLAS/tune/blas/gemm/CASES/ccases.flg +@@ -1,5 +1,5 @@ + "" +-24 ++22 + 304 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" \ + gcc + -mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O +@@ -48,13 +48,9 @@ gcc + 328 480 8 8 2 1 1 8 8 2 ATL_mm8x8x2.c "R. Clint Whaley" \ + gcc + -fomit-frame-pointer -O2 -fno-tree-loop-optimize +-329 192 4 4 4 1 16 4 4 4 ATL_cmm4x4x128_av.c "R. Clint Whaley" \ +-gcc +--x assembler-with-cpp + 331 192 4 4 1 1 1 4 4 1 ATL_smm4x4xURx_mips.c "R. Clint Whaley" \ + gcc + -x assembler-with-cpp -mips4 +-332 192 8 2 4 1 0 8 2 4 ATL_smm8x2x4_av.c "IBM" + 333 448 4 4 2 1 1 4 4 2 ATL_smm4x4x2pf_arm.c "R. Clint Whaley" \ + gcc + -x assembler-with-cpp -mfpu=vfpv3 +Index: ATLAS/tune/blas/gemm/CASES/scases.flg +=================================================================== +--- ATLAS.orig/tune/blas/gemm/CASES/scases.flg ++++ ATLAS/tune/blas/gemm/CASES/scases.flg +@@ -1,5 +1,5 @@ + "" +-25 ++22 + 304 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" \ + gcc + -mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O +@@ -48,16 +48,9 @@ gcc + 328 480 8 8 2 1 1 8 8 2 ATL_mm8x8x2.c "R. Clint Whaley" \ + gcc + -fomit-frame-pointer -O2 -fno-tree-loop-optimize +-329 192 4 4 4 1 16 4 4 4 ATL_smm4x4x128_av.c "R. Clint Whaley" \ +-gcc +--x assembler-with-cpp +-330 200 92 92 92 1 16 92 92 92 ATL_smm4x4x128_av.c "R. Clint Whaley" \ +-gcc +--x assembler-with-cpp + 331 192 4 4 1 1 1 4 4 1 ATL_smm4x4xURx_mips.c "R. Clint Whaley" \ + gcc + -x assembler-with-cpp -mips4 +-332 192 8 2 4 1 0 8 2 4 ATL_smm8x2x4_av.c "IBM" + 333 448 4 4 2 1 1 4 4 2 ATL_smm4x4x2pf_arm.c "R. Clint Whaley" \ + gcc + -x assembler-with-cpp -mfpu=vfpv3 +Index: ATLAS/tune/blas/gemm/CASES/scases.vnb +=================================================================== +--- ATLAS.orig/tune/blas/gemm/CASES/scases.vnb ++++ ATLAS/tune/blas/gemm/CASES/scases.vnb +@@ -31,9 +31,6 @@ + # Defaults: TA='t', TB='n', SSE=0, X87=0, LDBOT=1, RTKU=0, AOUTER=0, + # KBMAX=KU, KBMIN=KU, BETAN1=0, RTMN=1 + # +-ID=1 ROUT='ATL_smm4x4x128_av.c' AUTH='R. Clint Whaley' MU=4 NU=4 KU=4 \ +- LDKB=1 LDBOT=1 KBMIN=4 KBMAX=128 ASM=GAS_PPC \ +- COMP='gcc' FLAGS='-x assembler-with-cpp' + ID=2 ROUT='ATL_smm4x4x16_av.c' AUTH='R. Clint Whaley' MU=4 NU=4 KU=16 \ + LDKB=1 LDBOT=0 KBMIN=16 KBMAX=2048 ASM=GAS_SPARC \ + COMP='gcc' FLAGS='-x assembler-with-cpp' +Index: ATLAS/tune/blas/gemm/CASES/dcases.flg +=================================================================== +--- ATLAS.orig/tune/blas/gemm/CASES/dcases.flg ++++ ATLAS/tune/blas/gemm/CASES/dcases.flg +@@ -1,5 +1,5 @@ + "" +-32 ++30 + 306 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" \ + gcc + -mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O -fno-schedule-insns -fno-schedule-insns2 +@@ -79,12 +79,6 @@ gcc + 336 192 4 4 1 1 1 4 4 1 ATL_dmm4x4xURx_mips.c "R. Clint Whaley" \ + gcc + -x assembler-with-cpp -mips4 +-337 192 4 4 1 1 16 4 4 1 ATL_dmm4x4x80_ppc.c "Whaley & Castaldo" \ +-gcc +--x assembler-with-cpp +-338 192 8 4 2 1 0 8 4 2 ATL_dmm8x4x2_vsx.c "IBM" \ +-gcc +--O3 -mvsx + 339 448 4 4 2 1 1 4 4 2 ATL_dmm4x4x2pf_arm.c "R. Clint Whaley" \ + gcc + -x assembler-with-cpp -mfpu=vfpv3 +Index: ATLAS/tune/blas/gemm/CASES/dcases.vnb +=================================================================== +--- ATLAS.orig/tune/blas/gemm/CASES/dcases.vnb ++++ ATLAS/tune/blas/gemm/CASES/dcases.vnb +@@ -53,10 +53,6 @@ ID=6 ROUT='ATL_dmm4x1x90_x87.c' AUTH='R + ID=7 ROUT='ATL_dmm8x1x120_sse2.c' AUTH='R. Clint Whaley' \ + MU=8 NU=1 KU=1 KBMAX=512 ASM=GAS_x8664 BETAN1=1 \ + COMP='gcc' FLAGS='-m64 -x assembler-with-cpp' +-ID=70 ROUT='ATL_dmm4x4x80_ppc.c' AUTH='R. Clint Whaley' TA='T', TB='N' \ +- MU=4 NU=4 KU=1 KBMIN=1 KBMAX=80 ASM=GAS_PPC BETAN1=0 LDBOT=0 \ +- LDAB=0 LDISKB=1 RTN=1 RTM=1 RTK=0 \ +- COMP='gcc' FLAGS='-x assembler-with-cpp' + ID=80 ROUT='ATL_dmm4x4x16r8_US.c' AUTH='R. Clint Whaley' TA='T', TB='N' \ + MU=4 NU=4 KU=24 KBMIN=24 KBMAX=512 ASM=GAS_SPARC BETAN1=0 \ + LDAB=0 RTK=1 RTN=1 RTM=1 LDBOT=0 LDISKB=1 LDAB=1 \ +Index: ATLAS/tune/blas/gemm/CASES/zcases.flg +=================================================================== +--- ATLAS.orig/tune/blas/gemm/CASES/zcases.flg ++++ ATLAS/tune/blas/gemm/CASES/zcases.flg +@@ -1,5 +1,5 @@ + "" +-31 ++29 + 306 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" \ + gcc + -mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O -fno-schedule-insns -fno-schedule-insns2 +@@ -76,12 +76,6 @@ gcc + 336 192 4 4 1 1 1 4 4 1 ATL_dmm4x4xURx_mips.c "R. Clint Whaley" \ + gcc + -x assembler-with-cpp -mips4 +-337 192 4 4 1 1 16 4 4 1 ATL_dmm4x4x80_ppc.c "Whaley & Castaldo" \ +-gcc +--x assembler-with-cpp +-338 192 8 4 2 1 0 8 4 2 ATL_dmm8x4x2_vsx.c "IBM" \ +-gcc +--O3 -mvsx + 339 448 4 4 2 1 1 4 4 2 ATL_dmm4x4x2pf_arm.c "R. Clint Whaley" \ + gcc + -x assembler-with-cpp -mfpu=vfpv3 diff --git a/atlas.spec b/atlas.spec index a4c466f..ca5cac8 100644 --- a/atlas.spec +++ b/atlas.spec @@ -5,7 +5,7 @@ Version: 3.10.2 %if "%{?enable_native_atlas}" != "0" %define dist .native %endif -Release: 9%{?dist} +Release: 10%{?dist} Summary: Automatically Tuned Linear Algebra Software Group: System Environment/Libraries @@ -27,6 +27,7 @@ Source12: IBMz932.tar.bz2 Source13: IBMz964.tar.bz2 #upstream arm uses softfp abi, fedora arm uses hard Source14: ARMv732NEON.tar.bz2 +Source15: POWER864LEVSXp4.tar.bz2 Patch2: atlas-fedora-arm.patch # Properly pass -melf_* to the linker with -Wl, fixes FTBFS bug 817552 @@ -45,13 +46,16 @@ Patch8: atlas-genparse.patch # Unbundle LAPACK (BZ #1181369) Patch9: atlas.3.10.1-unbundle.patch -# ppc64le patches -Patch95: initialize_malloc_memory.invtrsm.wms.oct23.patch -Patch96: xlf.command.not.found.patch -Patch98: getdoublearr.stripwhite.patch -Patch99: ppc64le-remove-vsx.patch -Patch100: ppc64le-abiv2.patch -Patch110: p8-mem-barrier.patch +# for ppc64 ppc64le +# https://bugzilla.redhat.com/show_bug.cgi?id=1080073#c40 +Patch95: getdoublearr.stripwhite.patch +Patch96: initialize_malloc_memory.invtrsm.wms.oct23.patch +Patch97: atlas.3.10.2-ppc64le_abiv2.patch +Patch98: atlas-new_archdef_for_ppc64le.patch +Patch99: atlas.3.10.2-add_power8_cpu.patch + +# for ppc64le +Patch100: atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch BuildRequires: gcc-gfortran, lapack-static @@ -304,19 +308,6 @@ ix86 architecture. %endif %endif -# disable the archdef for ppc64le -# do it only one time. -%ifarch ppc64le -%define arch_option -Si archdef 0 -%endif - -%ifarch ppc64 -%global arch_option -A 7 -%global assembler_option -Wa,--noexecstack,-mpower7 -%else -%global assembler_option -Wa,--noexecstack -%endif - %prep #uname -a #cat /proc/cpuinfo @@ -337,7 +328,6 @@ ix86 architecture. %patch7 -p1 -b .aarch64 %endif %patch8 -p1 -b .genparse - %patch9 -p1 -b .unbundle cp %{SOURCE1} CONFIG/ARCHS/ @@ -347,16 +337,20 @@ cp %{SOURCE11} CONFIG/ARCHS/ cp %{SOURCE12} CONFIG/ARCHS/ cp %{SOURCE13} CONFIG/ARCHS/ cp %{SOURCE14} CONFIG/ARCHS/ +cp %{SOURCE15} CONFIG/ARCHS/ #cp %{SOURCE8} CONFIG/ARCHS/ #cp %{SOURCE9} CONFIG/ARCHS/ -%ifarch ppc64le +%ifarch ppc64le ppc64 +%patch95 -p1 -b .than +%patch96 -p1 +%patch97 -p1 +%patch98 -p1 %patch99 -p1 -%patch98 -p2 -%patch96 -p2 -%patch95 -p2 -%patch100 -p2 -%patch110 -p1 +%endif + +%ifarch ppc64le +%patch100 -p1 %endif %ifarch %{arm} @@ -392,7 +386,7 @@ for type in %{types}; do mkdir -p %{_arch}_${type} pushd %{_arch}_${type} - ../configure %{mode} %{?threads_option} %{?arch_option} -D c -DWALL -Fa alg '%{armflags} -g %{assembler_option} -fPIC'\ + ../configure %{mode} %{?threads_option} %{?arch_option} -D c -DWALL -Fa alg '%{armflags} -g -Wa,--noexecstack -fPIC'\ --prefix=%{buildroot}%{_prefix} \ --incdir=%{buildroot}%{_includedir} \ --libdir=%{buildroot}%{_libdir}/${libname} @@ -509,14 +503,6 @@ for type in %{types}; do sed -i 's#-m64#-m32#g' Make.inc %endif -%ifarch ppc64le - sed -i 's#-mvsx##g' Make.inc - sed -i 's#-DATL_VSX##g' Make.inc - sed -i 's#-DATL_AltiVec##g' Make.inc - sed -i 's#-maltivec##g' Make.inc - sed -i 's#ARCH =.*#ARCH = POWER464#' Make.inc -%endif - %endif make build cd lib @@ -833,6 +819,9 @@ fi %endif %changelog +* Thu Nov 26 2015 Than Ngo 3.10.2-10 +- backport upstream patch for power8 support + * Fri Nov 13 2015 Than Ngo 3.10.2-9 - add correct assembler option for ppc64 diff --git a/getdoublearr.stripwhite.patch b/getdoublearr.stripwhite.patch index e1dc84d..86358a1 100644 --- a/getdoublearr.stripwhite.patch +++ b/getdoublearr.stripwhite.patch @@ -1,18 +1,6 @@ -Subject: getdoublearr.stripwhite -From: Michel Normand - -GetDoubleArr must only handle the comma delimited list at string head -and ignore anything after the first blank character. - -Signed-off-by: Michel Normand ---- - ATLAS/include/atlas_genparse.h | 16 ++++++++++++++-- - 1 file changed, 14 insertions(+), 2 deletions(-) - -Index: atlas/ATLAS/include/atlas_genparse.h -=================================================================== ---- atlas.orig/ATLAS/include/atlas_genparse.h -+++ atlas/ATLAS/include/atlas_genparse.h +diff -up ATLAS/include/atlas_genparse.h.than ATLAS/include/atlas_genparse.h +--- ATLAS/include/atlas_genparse.h.than 2015-11-26 10:53:55.056586198 -0500 ++++ ATLAS/include/atlas_genparse.h 2015-11-26 10:56:00.168537914 -0500 @@ -149,13 +149,24 @@ static int asmNames2bitfield(char *str) } @@ -40,8 +28,8 @@ Index: atlas/ATLAS/include/atlas_genparse.h assert(sscanf(str, "%le", d) == 1); while (i < N) { -@@ -166,6 +177,7 @@ static int GetDoubleArr(char *str, int N - break; +@@ -167,6 +178,7 @@ static int GetDoubleArr(char *str, int N + break; i++; } + free(dupstr); diff --git a/p8-mem-barrier.patch b/p8-mem-barrier.patch deleted file mode 100644 index 15d7b8a..0000000 --- a/p8-mem-barrier.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff -Naur ATLAS.orig/include/atlas_pca.h ATLAS/include/atlas_pca.h ---- ATLAS.orig/include/atlas_pca.h 2013-01-08 19:15:40.000000000 +0100 -+++ ATLAS/include/atlas_pca.h 2014-10-23 13:45:36.956698637 +0200 -@@ -26,7 +26,7 @@ - #endif - #elif defined(ATL_ARCH_POWER3) || defined(ATL_ARCH_POWER4) || \ - defined(ATL_ARCH_POWER5) || defined(ATL_ARCH_POWER6) || \ -- defined(ATL_ARCH_POWER7) -+ defined(ATL_ARCH_POWER7) || 1 - #ifdef __GNUC__ - #define ATL_membarrier __asm__ __volatile__ ("dcs") - /* #define ATL_USEPCA 1 */ diff --git a/ppc64le-abiv2.patch b/ppc64le-abiv2.patch deleted file mode 100644 index 556dd04..0000000 --- a/ppc64le-abiv2.patch +++ /dev/null @@ -1,60 +0,0 @@ ---- atlas/ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c 2013-12-05 19:19:57.000000000 +0100 -+++ atlas/ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c.new 2013-12-06 16:29:57.000000000 +0100 -@@ -170,13 +170,21 @@ void ATL_USERMM(const int M, const int N - const TYPE beta, TYPE *C, const int ldc) - (r10) 8(r1) - ******************************************************************************* --64 bit ABIs: -+64 bit ABIv1s: - r3 r4 r5 r6/f1 - void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha, - r7 r8 r9 r10 - const TYPE *A, const int lda, const TYPE *B, const int ldb, - f2 120(r1) 128(r1) - const TYPE beta, TYPE *C, const int ldc) -+ -+64 bit ABIv2s: -+ r3 r4 r5 r6/f1 -+void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha, -+ r7 r8 r9 r10 -+ const TYPE *A, const int lda, const TYPE *B, const int ldb, -+ f2 104(r1) 112(r1) -+ const TYPE beta, TYPE *C, const int ldc) - #endif - #ifdef ATL_AS_AIX_PPC - .csect .text[PR] -@@ -202,7 +210,7 @@ Mjoin(.,ATL_USERMM): - .globl Mjoin(_,ATL_USERMM) - Mjoin(_,ATL_USERMM): - #else -- #if defined(ATL_USE64BITS) -+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2 - /* - * Official Program Descripter section, seg fault w/o it on Linux/PPC64 - */ -@@ -217,6 +225,7 @@ ATL_USERMM: - .globl Mjoin(.,ATL_USERMM) - Mjoin(.,ATL_USERMM): - #else -+/* ppc64 have no longer function descriptors in ABIv2 */ - .globl ATL_USERMM - ATL_USERMM: - #endif -@@ -257,9 +266,17 @@ ATL_USERMM: - #endif - #endif - -+ - #if defined (ATL_USE64BITS) -+#if _CALL_ELF == 2 -+/* ABIv2 */ -+ ld pC0, 104(r1) -+ ld incCn, 112(r1) -+#else -+/* ABIv1 */ - ld pC0, 120(r1) - ld incCn, 128(r1) -+#endif - #elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC) - lwz pC0, 68(r1) - lwz incCn, 72(r1) diff --git a/ppc64le-remove-vsx.patch b/ppc64le-remove-vsx.patch deleted file mode 100644 index c060135..0000000 --- a/ppc64le-remove-vsx.patch +++ /dev/null @@ -1,37 +0,0 @@ -Subject: ppc64le remove vsx -From: Michel Normand - -temporarily remove the vsx related flags -as long as not supported for ppc64le -Note that also force as power4 - -Signed-off-by: Michel Normand -diff -up ATLAS/CONFIG/src/atlcomp.txt.orig ATLAS/CONFIG/src/atlcomp.txt ---- ATLAS/CONFIG/src/atlcomp.txt.orig 2014-07-10 18:22:02.000000000 +0200 -+++ ATLAS/CONFIG/src/atlcomp.txt 2015-07-09 09:44:07.270264073 +0200 -@@ -191,9 +191,9 @@ MACH=PPCG5 OS=ALL LVL=1000 COMPS=dmc,icc - MACH=PPCG5 OS=ALL LVL=1000 COMPS=skc - 'gcc' '-mpowerpc64 -maltivec -mabi=altivec -mcpu=970 -mtune=970 -O2 -mvrsave' - MACH=POWER7 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc -- 'gcc' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops' -+ 'gcc' '-O2 -m64 -mvrsave -funroll-all-loops' - MACH=POWER7 OS=ALL LVL=1010 COMPS=f77 -- 'gfortran' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops' -+ 'gfortran' '-O2 -m64 -mvrsave -funroll-all-loops' - MACH=POWER6 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc - 'gcc' '-mcpu=power6 -mtune=power6 -maltivec -O3 -fno-schedule-insns -fschedule-insns2 -minsert-sched-nops=2' - MACH=POWER5 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc -diff -up ATLAS/CONFIG/src/probe_comp.c.orig ATLAS/CONFIG/src/probe_comp.c ---- ATLAS/CONFIG/src/probe_comp.c.orig 2015-07-09 09:44:07.280264074 +0200 -+++ ATLAS/CONFIG/src/probe_comp.c 2015-07-09 09:45:51.480266328 +0200 -@@ -450,8 +450,8 @@ COMPNODE **GetDefaultComps(enum OSTYPE O - vp = "-mavx -mno-sse2avx -mfma"; - else if ((vecexts & (1< - -try to bypass error while building ppc64le -"make[2]: xlf: Command not found" - -Signed-off-by: Michel Normand ---- - ATLAS/CONFIG/src/atlcomp.txt | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -Index: atlas/ATLAS/CONFIG/src/atlcomp.txt -=================================================================== ---- atlas.orig/ATLAS/CONFIG/src/atlcomp.txt -+++ atlas/ATLAS/CONFIG/src/atlcomp.txt -@@ -199,7 +199,7 @@ MACH=POWER6 OS=ALL LVL=1010 COMPS=f77 - MACH=POWER5 OS=ALL LVL=1010 COMPS=f77 - 'gfortran' '-mcpu=power5 -mtune=power5 -O3 -fno-schedule-insns -fno-rerun-loop-opt' - MACH=POWER7 OS=ALL LVL=1010 COMPS=f77 -- 'xlf' '-qtune=pwr7 -qarch=pwr7 -O3 -qmaxmem=-1 -qfloat=hsflt' -+ 'gfortran' '-O2 -m64 -mvrsave -funroll-all-loops' - MACH=POWER5 OS=ALL LVL=1010 COMPS=f77 - 'xlf' '-qtune=pwr5 -qarch=pwr5 -O3 -qmaxmem=-1 -qfloat=hsflt' - MACH=POWER4 OS=ALL LVL=1010 COMPS=icc,dmc,smc,dkc,skc,xcc,gcc