From f7d47dc9a324fb0342f0f3cf274ceadc636fdb75 Mon Sep 17 00:00:00 2001 From: Jaromir Capik Date: Oct 30 2014 17:29:59 +0000 Subject: patching for Power8 to pass performance tunings and tests on P8 builders --- diff --git a/atlas.spec b/atlas.spec index 1eb9553..bbbd85a 100644 --- a/atlas.spec +++ b/atlas.spec @@ -5,7 +5,7 @@ Version: 3.10.1 %if "%{?enable_native_atlas}" != "0" %define dist .native %endif -Release: 16%{?dist} +Release: 17%{?dist} Summary: Automatically Tuned Linear Algebra Software Group: System Environment/Libraries @@ -44,6 +44,14 @@ Patch6: atlas-affinity.patch Patch7: atlas-aarch64port.patch Patch8: atlas-genparse.patch +# ppc64le patches +Patch95: initialize_malloc_memory.invtrsm.wms.oct23.patch +Patch96: xlf.command.not.found.patch +Patch98: getdoublearr.stripwhite.patch +Patch99: ppc64le-remove-vsx.patch +Patch100: ppc64le-abiv2.patch +Patch110: p8-mem-barrier.patch + BuildRequires: gcc-gfortran Provides: bundled(lapack) @@ -332,6 +340,16 @@ cp %{SOURCE13} CONFIG/ARCHS/ cp %{SOURCE14} CONFIG/ARCHS/ #cp %{SOURCE8} CONFIG/ARCHS/ #cp %{SOURCE9} CONFIG/ARCHS/ + +%ifarch ppc64le +%patch99 -p2 +%patch98 -p2 +%patch96 -p2 +%patch95 -p2 +%patch100 -p2 +%patch110 -p1 +%endif + %ifarch %{arm} # Set arm flags in atlcomp.txt sed -i -e 's,-mfpu=vfpv3,-mfpu=neon,' CONFIG/src/atlcomp.txt @@ -342,6 +360,7 @@ sed -i -e 's,-mfpu=vfpv3,,' tune/blas/gemm/CASES/*.flg # Debug #sed -i -e 's,> \(.*\)/ptsanity.out,> \1/ptsanity.out || cat \1/ptsanity.out \&\& exit 1,' makes/Make.* + %build for type in %{types}; do if [ "$type" = "base" ]; then @@ -467,6 +486,14 @@ for type in %{types}; do sed -i 's#-m64#-m32#g' Make.inc %endif +%ifarch ppc64le + sed -i 's#-mvsx##g' Make.inc + sed -i 's#-DATL_VSX##g' Make.inc + sed -i 's#-DATL_AltiVec##g' Make.inc + sed -i 's#-maltivec##g' Make.inc + sed -i 's#ARCH =.*#ARCH = POWER464#' Make.inc +%endif + %endif make build cd lib @@ -783,6 +810,9 @@ fi %endif %changelog +* Thu Oct 30 2014 Jaromir Capik - 3.10.1-17 +- patching for Power8 to pass performance tunings and tests on P8 builders + * Fri Oct 24 2014 Orion Poplawski - 3.10.1-16 - Fix alternatives install diff --git a/getdoublearr.stripwhite.patch b/getdoublearr.stripwhite.patch new file mode 100644 index 0000000..e1dc84d --- /dev/null +++ b/getdoublearr.stripwhite.patch @@ -0,0 +1,50 @@ +Subject: getdoublearr.stripwhite +From: Michel Normand + +GetDoubleArr must only handle the comma delimited list at string head +and ignore anything after the first blank character. + +Signed-off-by: Michel Normand +--- + ATLAS/include/atlas_genparse.h | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +Index: atlas/ATLAS/include/atlas_genparse.h +=================================================================== +--- atlas.orig/ATLAS/include/atlas_genparse.h ++++ atlas/ATLAS/include/atlas_genparse.h +@@ -149,13 +149,24 @@ static int asmNames2bitfield(char *str) + } + + /* procedure 7 */ +-static int GetDoubleArr(char *str, int N, double *d) ++static int GetDoubleArr(char *callerstr, int N, double *d) + /* + * Reads in a list with form "%le,%le...,%le"; N-length d recieves doubles. + * RETURNS: the number of doubles found, or N, whichever is less + */ + { +- int i=1; ++ int i; ++ char *dupstr = DupString(callerstr); ++ char *str = dupstr; ++ /* strip the string to end on first white space */ ++ for (i=0; dupstr[i]; i++) ++ { ++ if (isspace(dupstr[i])) { ++ dupstr[i] = '\0'; ++ break; ++ } ++ } ++ i = 1; + assert(sscanf(str, "%le", d) == 1); + while (i < N) + { +@@ -166,6 +177,7 @@ static int GetDoubleArr(char *str, int N + break; + i++; + } ++ free(dupstr); + return(i); + } + diff --git a/initialize_malloc_memory.invtrsm.wms.oct23.patch b/initialize_malloc_memory.invtrsm.wms.oct23.patch new file mode 100644 index 0000000..f57a9e4 --- /dev/null +++ b/initialize_malloc_memory.invtrsm.wms.oct23.patch @@ -0,0 +1,10 @@ +--- ./ATLAS.first/tune/blas/level3/invtrsm.c 2013-10-22 19:35:03.000000000 +0000 ++++ ./ATLAS/tune/blas/level3/invtrsm.c 2013-10-23 21:24:01.000000000 +0000 +@@ -525,6 +525,7 @@ + a = A = malloc(i * ATL_MulBySize(incA)); + if (A) + { ++ memset(A,0,i*ATL_MulBySize(incA)); /* wms (!!) malloc call above returns non-initialized memory. */ + if (Uplo == TestGE) + for (i=0; i < k; i++) + Mjoin(PATL,gegen)(N, N, A+i*incA, lda, N+lda); diff --git a/p8-mem-barrier.patch b/p8-mem-barrier.patch new file mode 100644 index 0000000..15d7b8a --- /dev/null +++ b/p8-mem-barrier.patch @@ -0,0 +1,12 @@ +diff -Naur ATLAS.orig/include/atlas_pca.h ATLAS/include/atlas_pca.h +--- ATLAS.orig/include/atlas_pca.h 2013-01-08 19:15:40.000000000 +0100 ++++ ATLAS/include/atlas_pca.h 2014-10-23 13:45:36.956698637 +0200 +@@ -26,7 +26,7 @@ + #endif + #elif defined(ATL_ARCH_POWER3) || defined(ATL_ARCH_POWER4) || \ + defined(ATL_ARCH_POWER5) || defined(ATL_ARCH_POWER6) || \ +- defined(ATL_ARCH_POWER7) ++ defined(ATL_ARCH_POWER7) || 1 + #ifdef __GNUC__ + #define ATL_membarrier __asm__ __volatile__ ("dcs") + /* #define ATL_USEPCA 1 */ diff --git a/ppc64le-abiv2.patch b/ppc64le-abiv2.patch new file mode 100644 index 0000000..556dd04 --- /dev/null +++ b/ppc64le-abiv2.patch @@ -0,0 +1,60 @@ +--- atlas/ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c 2013-12-05 19:19:57.000000000 +0100 ++++ atlas/ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c.new 2013-12-06 16:29:57.000000000 +0100 +@@ -170,13 +170,21 @@ void ATL_USERMM(const int M, const int N + const TYPE beta, TYPE *C, const int ldc) + (r10) 8(r1) + ******************************************************************************* +-64 bit ABIs: ++64 bit ABIv1s: + r3 r4 r5 r6/f1 + void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha, + r7 r8 r9 r10 + const TYPE *A, const int lda, const TYPE *B, const int ldb, + f2 120(r1) 128(r1) + const TYPE beta, TYPE *C, const int ldc) ++ ++64 bit ABIv2s: ++ r3 r4 r5 r6/f1 ++void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha, ++ r7 r8 r9 r10 ++ const TYPE *A, const int lda, const TYPE *B, const int ldb, ++ f2 104(r1) 112(r1) ++ const TYPE beta, TYPE *C, const int ldc) + #endif + #ifdef ATL_AS_AIX_PPC + .csect .text[PR] +@@ -202,7 +210,7 @@ Mjoin(.,ATL_USERMM): + .globl Mjoin(_,ATL_USERMM) + Mjoin(_,ATL_USERMM): + #else +- #if defined(ATL_USE64BITS) ++ #if defined(ATL_USE64BITS) && _CALL_ELF != 2 + /* + * Official Program Descripter section, seg fault w/o it on Linux/PPC64 + */ +@@ -217,6 +225,7 @@ ATL_USERMM: + .globl Mjoin(.,ATL_USERMM) + Mjoin(.,ATL_USERMM): + #else ++/* ppc64 have no longer function descriptors in ABIv2 */ + .globl ATL_USERMM + ATL_USERMM: + #endif +@@ -257,9 +266,17 @@ ATL_USERMM: + #endif + #endif + ++ + #if defined (ATL_USE64BITS) ++#if _CALL_ELF == 2 ++/* ABIv2 */ ++ ld pC0, 104(r1) ++ ld incCn, 112(r1) ++#else ++/* ABIv1 */ + ld pC0, 120(r1) + ld incCn, 128(r1) ++#endif + #elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC) + lwz pC0, 68(r1) + lwz incCn, 72(r1) diff --git a/ppc64le-remove-vsx.patch b/ppc64le-remove-vsx.patch new file mode 100644 index 0000000..a79bea6 --- /dev/null +++ b/ppc64le-remove-vsx.patch @@ -0,0 +1,39 @@ +Subject: ppc64le remove vsx +From: Michel Normand + +temporarily remove the vsx related flags +as long as not supported for ppc64le +Note that also force as power4 + +Signed-off-by: Michel Normand +Index: atlas/ATLAS/CONFIG/src/atlcomp.txt +=================================================================== +--- atlas.orig/ATLAS/CONFIG/src/atlcomp.txt ++++ atlas/ATLAS/CONFIG/src/atlcomp.txt +@@ -187,9 +187,9 @@ MACH=PPCG5 OS=ALL LVL=1000 COMPS=dmc,icc + MACH=PPCG5 OS=ALL LVL=1000 COMPS=skc + 'gcc' '-mpowerpc64 -maltivec -mabi=altivec -mcpu=970 -mtune=970 -O2 -mvrsave' + MACH=POWER7 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc +- 'gcc' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops' ++ 'gcc' '-O2 -m64 -mvrsave -funroll-all-loops' + MACH=POWER7 OS=ALL LVL=1010 COMPS=f77 +- 'gfortran' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops' ++ 'gfortran' '-O2 -m64 -mvrsave -funroll-all-loops' + MACH=POWER6 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc + 'gcc' '-mcpu=power6 -mtune=power6 -maltivec -O3 -fno-schedule-insns -fschedule-insns2 -minsert-sched-nops=2' + MACH=POWER5 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc +Index: atlas/ATLAS/CONFIG/src/probe_comp.c +=================================================================== +--- atlas.orig/ATLAS/CONFIG/src/probe_comp.c ++++ atlas/ATLAS/CONFIG/src/probe_comp.c +@@ -446,8 +446,8 @@ COMPNODE **GetDefaultComps(enum OSTYPE O + + if ((vecexts & (1< + +try to bypass error while building ppc64le +"make[2]: xlf: Command not found" + +Signed-off-by: Michel Normand +--- + ATLAS/CONFIG/src/atlcomp.txt | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +Index: atlas/ATLAS/CONFIG/src/atlcomp.txt +=================================================================== +--- atlas.orig/ATLAS/CONFIG/src/atlcomp.txt ++++ atlas/ATLAS/CONFIG/src/atlcomp.txt +@@ -199,7 +199,7 @@ MACH=POWER6 OS=ALL LVL=1010 COMPS=f77 + MACH=POWER5 OS=ALL LVL=1010 COMPS=f77 + 'gfortran' '-mcpu=power5 -mtune=power5 -O3 -fno-schedule-insns -fno-rerun-loop-opt' + MACH=POWER7 OS=ALL LVL=1010 COMPS=f77 +- 'xlf' '-qtune=pwr7 -qarch=pwr7 -O3 -qmaxmem=-1 -qfloat=hsflt' ++ 'gfortran' '-O2 -m64 -mvrsave -funroll-all-loops' + MACH=POWER5 OS=ALL LVL=1010 COMPS=f77 + 'xlf' '-qtune=pwr5 -qarch=pwr5 -O3 -qmaxmem=-1 -qfloat=hsflt' + MACH=POWER4 OS=ALL LVL=1010 COMPS=icc,dmc,smc,dkc,skc,xcc,gcc