|
Packit Service |
155747 |
/*
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
This file is provided under a dual BSD/GPLv2 license. When using or
|
|
Packit Service |
155747 |
redistributing this file, you may do so under either license.
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
GPL LICENSE SUMMARY
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
Copyright(c) 2015 Intel Corporation.
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
This program is free software; you can redistribute it and/or modify
|
|
Packit Service |
155747 |
it under the terms of version 2 of the GNU General Public License as
|
|
Packit Service |
155747 |
published by the Free Software Foundation.
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
This program is distributed in the hope that it will be useful, but
|
|
Packit Service |
155747 |
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit Service |
155747 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit Service |
155747 |
General Public License for more details.
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
Contact Information:
|
|
Packit Service |
155747 |
Intel Corporation, www.intel.com
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
BSD LICENSE
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
Copyright(c) 2015 Intel Corporation.
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
Redistribution and use in source and binary forms, with or without
|
|
Packit Service |
155747 |
modification, are permitted provided that the following conditions
|
|
Packit Service |
155747 |
are met:
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
* Redistributions of source code must retain the above copyright
|
|
Packit Service |
155747 |
notice, this list of conditions and the following disclaimer.
|
|
Packit Service |
155747 |
* Redistributions in binary form must reproduce the above copyright
|
|
Packit Service |
155747 |
notice, this list of conditions and the following disclaimer in
|
|
Packit Service |
155747 |
the documentation and/or other materials provided with the
|
|
Packit Service |
155747 |
distribution.
|
|
Packit Service |
155747 |
* Neither the name of Intel Corporation nor the names of its
|
|
Packit Service |
155747 |
contributors may be used to endorse or promote products derived
|
|
Packit Service |
155747 |
from this software without specific prior written permission.
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
Packit Service |
155747 |
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
Packit Service |
155747 |
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
Packit Service |
155747 |
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
Packit Service |
155747 |
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
Packit Service |
155747 |
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
Packit Service |
155747 |
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
Packit Service |
155747 |
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
Packit Service |
155747 |
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
Packit Service |
155747 |
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
Packit Service |
155747 |
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
*/
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
/* Copyright (c) 2003-2014 Intel Corporation. All rights reserved. */
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
#include <stdint.h>
|
|
Packit Service |
155747 |
#include <immintrin.h>
|
|
Packit Service |
155747 |
#include "opa_intf.h"
|
|
Packit Service |
155747 |
#include "psm_user.h"
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
#if defined(__x86_64__)
|
|
Packit Service |
155747 |
#define hfi_dwordcpy hfi_dwordcpy_safe
|
|
Packit Service |
155747 |
#define hfi_qwordcpy hfi_qwordcpy_safe
|
|
Packit Service |
155747 |
#endif
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
void hfi_dwordcpy(volatile uint32_t *dest, const uint32_t *src, uint32_t ndwords)
|
|
Packit Service |
155747 |
{
|
|
Packit Service |
155747 |
uint_fast32_t ndw = ndwords;
|
|
Packit Service |
155747 |
const uint64_t *src64[4];
|
|
Packit Service |
155747 |
volatile uint64_t *dst64[4];
|
|
Packit Service |
155747 |
src64[0] = (const uint64_t *) src;
|
|
Packit Service |
155747 |
dst64[0] = (volatile uint64_t *) dest;
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
while (ndw >= 8) {
|
|
Packit Service |
155747 |
*dst64[0] = *src64[0];
|
|
Packit Service |
155747 |
src64[1] = src64[0] + 1;
|
|
Packit Service |
155747 |
src64[2] = src64[0] + 2;
|
|
Packit Service |
155747 |
src64[3] = src64[0] + 3;
|
|
Packit Service |
155747 |
ndw -= 8;
|
|
Packit Service |
155747 |
dst64[1] = dst64[0] + 1;
|
|
Packit Service |
155747 |
dst64[2] = dst64[0] + 2;
|
|
Packit Service |
155747 |
dst64[3] = dst64[0] + 3;
|
|
Packit Service |
155747 |
*dst64[1] = *src64[1];
|
|
Packit Service |
155747 |
*dst64[2] = *src64[2];
|
|
Packit Service |
155747 |
*dst64[3] = *src64[3];
|
|
Packit Service |
155747 |
src64[0] += 4;
|
|
Packit Service |
155747 |
dst64[0] += 4;
|
|
Packit Service |
155747 |
}
|
|
Packit Service |
155747 |
if (ndw) {
|
|
Packit Service |
155747 |
src = (const uint32_t *) src64[0];
|
|
Packit Service |
155747 |
dest = (volatile uint32_t *) dst64[0];
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
switch (ndw) {
|
|
Packit Service |
155747 |
case 7:
|
|
Packit Service |
155747 |
*dest++ = *src++;
|
|
Packit Service |
155747 |
case 6:
|
|
Packit Service |
155747 |
*dest++ = *src++;
|
|
Packit Service |
155747 |
case 5:
|
|
Packit Service |
155747 |
*dest++ = *src++;
|
|
Packit Service |
155747 |
case 4:
|
|
Packit Service |
155747 |
*dest++ = *src++;
|
|
Packit Service |
155747 |
case 3:
|
|
Packit Service |
155747 |
*dest++ = *src++;
|
|
Packit Service |
155747 |
case 2:
|
|
Packit Service |
155747 |
*dest++ = *src++;
|
|
Packit Service |
155747 |
case 1:
|
|
Packit Service |
155747 |
*dest++ = *src++;
|
|
Packit Service |
155747 |
}
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
}
|
|
Packit Service |
155747 |
}
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
void hfi_qwordcpy(volatile uint64_t *dest, const uint64_t *src, uint32_t nqwords)
|
|
Packit Service |
155747 |
{
|
|
Packit Service |
155747 |
uint_fast32_t nqw = nqwords;
|
|
Packit Service |
155747 |
const uint64_t *src64[4];
|
|
Packit Service |
155747 |
volatile uint64_t *dst64[4];
|
|
Packit Service |
155747 |
src64[0] = src;
|
|
Packit Service |
155747 |
dst64[0] = dest;
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
while (nqw >= 8) {
|
|
Packit Service |
155747 |
*dst64[0] = *src64[0];
|
|
Packit Service |
155747 |
src64[1] = src64[0] + 1;
|
|
Packit Service |
155747 |
src64[2] = src64[0] + 2;
|
|
Packit Service |
155747 |
src64[3] = src64[0] + 3;
|
|
Packit Service |
155747 |
dst64[1] = dst64[0] + 1;
|
|
Packit Service |
155747 |
dst64[2] = dst64[0] + 2;
|
|
Packit Service |
155747 |
dst64[3] = dst64[0] + 3;
|
|
Packit Service |
155747 |
*dst64[1] = *src64[1];
|
|
Packit Service |
155747 |
*dst64[2] = *src64[2];
|
|
Packit Service |
155747 |
*dst64[3] = *src64[3];
|
|
Packit Service |
155747 |
src64[0] += 4;
|
|
Packit Service |
155747 |
dst64[0] += 4;
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
*dst64[0] = *src64[0];
|
|
Packit Service |
155747 |
src64[1] = src64[0] + 1;
|
|
Packit Service |
155747 |
src64[2] = src64[0] + 2;
|
|
Packit Service |
155747 |
src64[3] = src64[0] + 3;
|
|
Packit Service |
155747 |
dst64[1] = dst64[0] + 1;
|
|
Packit Service |
155747 |
dst64[2] = dst64[0] + 2;
|
|
Packit Service |
155747 |
dst64[3] = dst64[0] + 3;
|
|
Packit Service |
155747 |
*dst64[1] = *src64[1];
|
|
Packit Service |
155747 |
*dst64[2] = *src64[2];
|
|
Packit Service |
155747 |
*dst64[3] = *src64[3];
|
|
Packit Service |
155747 |
src64[0] += 4;
|
|
Packit Service |
155747 |
dst64[0] += 4;
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
nqw -= 8;
|
|
Packit Service |
155747 |
}
|
|
Packit Service |
155747 |
if (nqw) {
|
|
Packit Service |
155747 |
switch (nqw) {
|
|
Packit Service |
155747 |
case 7:
|
|
Packit Service |
155747 |
*(dst64[0])++ = *(src64[0])++;
|
|
Packit Service |
155747 |
case 6:
|
|
Packit Service |
155747 |
*(dst64[0])++ = *(src64[0])++;
|
|
Packit Service |
155747 |
case 5:
|
|
Packit Service |
155747 |
*(dst64[0])++ = *(src64[0])++;
|
|
Packit Service |
155747 |
case 4:
|
|
Packit Service |
155747 |
*(dst64[0])++ = *(src64[0])++;
|
|
Packit Service |
155747 |
case 3:
|
|
Packit Service |
155747 |
*(dst64[0])++ = *(src64[0])++;
|
|
Packit Service |
155747 |
case 2:
|
|
Packit Service |
155747 |
*(dst64[0])++ = *(src64[0])++;
|
|
Packit Service |
155747 |
case 1:
|
|
Packit Service |
155747 |
*(dst64[0])++ = *(src64[0])++;
|
|
Packit Service |
155747 |
}
|
|
Packit Service |
155747 |
}
|
|
Packit Service |
155747 |
}
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
#ifdef PSM_AVX512
|
|
Packit Service |
155747 |
void hfi_pio_blockcpy_512(volatile uint64_t *dest, const uint64_t *src, uint32_t nblock)
|
|
Packit Service |
155747 |
{
|
|
Packit Service |
155747 |
volatile __m512i *dp = (volatile __m512i *) dest;
|
|
Packit Service |
155747 |
const __m512i *sp = (const __m512i *) src;
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
psmi_assert((dp != NULL) && (sp != NULL));
|
|
Packit Service |
155747 |
psmi_assert((((uintptr_t) dp) & 0x3f) == 0x0);
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
if ((((uintptr_t) sp) & 0x3f) == 0x0) {
|
|
Packit Service |
155747 |
/* source and destination are both 64 byte aligned */
|
|
Packit Service |
155747 |
do {
|
|
Packit Service |
155747 |
__m512i tmp0 = _mm512_load_si512(sp);
|
|
Packit Service |
155747 |
_mm512_store_si512((__m512i *)dp, tmp0);
|
|
Packit Service |
155747 |
} while ((--nblock) && (++dp) && (++sp));
|
|
Packit Service |
155747 |
} else {
|
|
Packit Service |
155747 |
/* only destination is 64 byte aligned - use unaligned loads */
|
|
Packit Service |
155747 |
do {
|
|
Packit Service |
155747 |
__m512i tmp0 = _mm512_loadu_si512(sp);
|
|
Packit Service |
155747 |
_mm512_store_si512((__m512i *)dp, tmp0);
|
|
Packit Service |
155747 |
} while ((--nblock) && (++dp) && (++sp));
|
|
Packit Service |
155747 |
}
|
|
Packit Service |
155747 |
}
|
|
Packit Service |
155747 |
#endif
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
void hfi_pio_blockcpy_256(volatile uint64_t *dest, const uint64_t *src, uint32_t nblock)
|
|
Packit Service |
155747 |
{
|
|
Packit Service |
155747 |
volatile __m256i *dp = (volatile __m256i *) dest;
|
|
Packit Service |
155747 |
const __m256i *sp = (const __m256i *) src;
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
psmi_assert((dp != NULL) && (sp != NULL));
|
|
Packit Service |
155747 |
psmi_assert((((uintptr_t) dp) & 0x3f) == 0x0);
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
if ((((uintptr_t) sp) & 0x1f) == 0x0) {
|
|
Packit Service |
155747 |
/* source and destination are both 32 byte aligned */
|
|
Packit Service |
155747 |
do {
|
|
Packit Service |
155747 |
__m256i tmp0 = _mm256_load_si256(sp);
|
|
Packit Service |
155747 |
__m256i tmp1 = _mm256_load_si256(sp + 1);
|
|
Packit Service |
155747 |
_mm256_store_si256((__m256i *)dp, tmp0);
|
|
Packit Service |
155747 |
_mm256_store_si256((__m256i *)(dp + 1), tmp1);
|
|
Packit Service |
155747 |
} while ((--nblock) && (dp = dp+2) && (sp = sp+2));
|
|
Packit Service |
155747 |
} else {
|
|
Packit Service |
155747 |
/* only destination is 32 byte aligned - use unaligned loads */
|
|
Packit Service |
155747 |
do {
|
|
Packit Service |
155747 |
__m256i tmp0 = _mm256_loadu_si256(sp);
|
|
Packit Service |
155747 |
__m256i tmp1 = _mm256_loadu_si256(sp + 1);
|
|
Packit Service |
155747 |
_mm256_store_si256((__m256i *)dp, tmp0);
|
|
Packit Service |
155747 |
_mm256_store_si256((__m256i *)(dp + 1), tmp1);
|
|
Packit Service |
155747 |
} while ((--nblock) && (dp = dp+2) && (sp = sp+2));
|
|
Packit Service |
155747 |
}
|
|
Packit Service |
155747 |
}
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
void hfi_pio_blockcpy_128(volatile uint64_t *dest, const uint64_t *src, uint32_t nblock)
|
|
Packit Service |
155747 |
{
|
|
Packit Service |
155747 |
volatile __m128i *dp = (volatile __m128i *) dest;
|
|
Packit Service |
155747 |
const __m128i *sp = (const __m128i *) src;
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
psmi_assert((dp != NULL) && (sp != NULL));
|
|
Packit Service |
155747 |
psmi_assert((((uintptr_t) dp) & 0x3f) == 0x0);
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
if ((((uintptr_t) sp) & 0xf) == 0x0) {
|
|
Packit Service |
155747 |
/* source and destination are both 16 byte aligned */
|
|
Packit Service |
155747 |
do {
|
|
Packit Service |
155747 |
__m128i tmp0 = _mm_load_si128(sp);
|
|
Packit Service |
155747 |
__m128i tmp1 = _mm_load_si128(sp + 1);
|
|
Packit Service |
155747 |
__m128i tmp2 = _mm_load_si128(sp + 2);
|
|
Packit Service |
155747 |
__m128i tmp3 = _mm_load_si128(sp + 3);
|
|
Packit Service |
155747 |
_mm_store_si128((__m128i *)dp, tmp0);
|
|
Packit Service |
155747 |
_mm_store_si128((__m128i *)(dp + 1), tmp1);
|
|
Packit Service |
155747 |
_mm_store_si128((__m128i *)(dp + 2), tmp2);
|
|
Packit Service |
155747 |
_mm_store_si128((__m128i *)(dp + 3), tmp3);
|
|
Packit Service |
155747 |
} while ((--nblock) && (dp = dp+4) && (sp = sp+4));
|
|
Packit Service |
155747 |
} else {
|
|
Packit Service |
155747 |
/* only destination is 16 byte aligned - use unaligned loads */
|
|
Packit Service |
155747 |
do {
|
|
Packit Service |
155747 |
__m128i tmp0 = _mm_loadu_si128(sp);
|
|
Packit Service |
155747 |
__m128i tmp1 = _mm_loadu_si128(sp + 1);
|
|
Packit Service |
155747 |
__m128i tmp2 = _mm_loadu_si128(sp + 2);
|
|
Packit Service |
155747 |
__m128i tmp3 = _mm_loadu_si128(sp + 3);
|
|
Packit Service |
155747 |
_mm_store_si128((__m128i *)dp, tmp0);
|
|
Packit Service |
155747 |
_mm_store_si128((__m128i *)(dp + 1), tmp1);
|
|
Packit Service |
155747 |
_mm_store_si128((__m128i *)(dp + 2), tmp2);
|
|
Packit Service |
155747 |
_mm_store_si128((__m128i *)(dp + 3), tmp3);
|
|
Packit Service |
155747 |
} while ((--nblock) && (dp = dp+4) && (sp = sp+4));
|
|
Packit Service |
155747 |
}
|
|
Packit Service |
155747 |
}
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
void hfi_pio_blockcpy_64(volatile uint64_t *dest, const uint64_t *src, uint32_t nblock)
|
|
Packit Service |
155747 |
{
|
|
Packit Service |
155747 |
const uint64_t *src64[4];
|
|
Packit Service |
155747 |
volatile uint64_t *dst64[4];
|
|
Packit Service |
155747 |
src64[0] = src;
|
|
Packit Service |
155747 |
dst64[0] = dest;
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
psmi_assert((dst64[0] != NULL) && (src64[0] != NULL));
|
|
Packit Service |
155747 |
psmi_assert((((uintptr_t) dest) & 0x3f) == 0x0);
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
do {
|
|
Packit Service |
155747 |
*dst64[0] = *src64[0];
|
|
Packit Service |
155747 |
src64[1] = src64[0] + 1;
|
|
Packit Service |
155747 |
src64[2] = src64[0] + 2;
|
|
Packit Service |
155747 |
src64[3] = src64[0] + 3;
|
|
Packit Service |
155747 |
dst64[1] = dst64[0] + 1;
|
|
Packit Service |
155747 |
dst64[2] = dst64[0] + 2;
|
|
Packit Service |
155747 |
dst64[3] = dst64[0] + 3;
|
|
Packit Service |
155747 |
*dst64[1] = *src64[1];
|
|
Packit Service |
155747 |
*dst64[2] = *src64[2];
|
|
Packit Service |
155747 |
*dst64[3] = *src64[3];
|
|
Packit Service |
155747 |
src64[0] += 4;
|
|
Packit Service |
155747 |
dst64[0] += 4;
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
*dst64[0] = *src64[0];
|
|
Packit Service |
155747 |
src64[1] = src64[0] + 1;
|
|
Packit Service |
155747 |
src64[2] = src64[0] + 2;
|
|
Packit Service |
155747 |
src64[3] = src64[0] + 3;
|
|
Packit Service |
155747 |
dst64[1] = dst64[0] + 1;
|
|
Packit Service |
155747 |
dst64[2] = dst64[0] + 2;
|
|
Packit Service |
155747 |
dst64[3] = dst64[0] + 3;
|
|
Packit Service |
155747 |
*dst64[1] = *src64[1];
|
|
Packit Service |
155747 |
*dst64[2] = *src64[2];
|
|
Packit Service |
155747 |
*dst64[3] = *src64[3];
|
|
Packit Service |
155747 |
src64[0] += 4;
|
|
Packit Service |
155747 |
dst64[0] += 4;
|
|
Packit Service |
155747 |
} while (--nblock);
|
|
Packit Service |
155747 |
}
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
void MOCKABLE(psmi_mq_mtucpy)(void *vdest, const void *vsrc, uint32_t nchars)
|
|
Packit Service |
155747 |
{
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
#ifdef PSM_CUDA
|
|
Packit Service |
155747 |
if (PSMI_IS_CUDA_ENABLED && (PSMI_IS_CUDA_MEM(vdest) || PSMI_IS_CUDA_MEM((void *) vsrc))) {
|
|
Packit Service |
155747 |
PSMI_CUDA_CALL(cuMemcpy,
|
|
Packit Service |
155747 |
(CUdeviceptr)vdest, (CUdeviceptr)vsrc, nchars);
|
|
Packit Service |
155747 |
return;
|
|
Packit Service |
155747 |
}
|
|
Packit Service |
155747 |
#endif
|
|
Packit Service |
155747 |
memcpy(vdest, vsrc, nchars);
|
|
Packit Service |
155747 |
return;
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
}
|
|
Packit Service |
155747 |
MOCK_DEF_EPILOGUE(psmi_mq_mtucpy);
|
|
Packit Service |
155747 |
|
|
Packit Service |
155747 |
void psmi_mq_mtucpy_host_mem(void *vdest, const void *vsrc, uint32_t nchars)
|
|
Packit Service |
155747 |
{
|
|
Packit Service |
155747 |
memcpy(vdest, vsrc, nchars);
|
|
Packit Service |
155747 |
return;
|
|
Packit Service |
155747 |
}
|