Blame cbits/blake2/sse/blake2s-load-xop.h

Packit 141393
/*
Packit 141393
   BLAKE2 reference source code package - optimized C implementations
Packit 141393
Packit 141393
   Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
Packit 141393
   terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
Packit 141393
   your option.  The terms of these licenses can be found at:
Packit 141393
Packit 141393
   - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
Packit 141393
   - OpenSSL license   : https://www.openssl.org/source/license.html
Packit 141393
   - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
Packit 141393
Packit 141393
   More information about the BLAKE2 hash function can be found at
Packit 141393
   https://blake2.net.
Packit 141393
*/
Packit 141393
#ifndef BLAKE2S_LOAD_XOP_H
Packit 141393
#define BLAKE2S_LOAD_XOP_H
Packit 141393
Packit 141393
#define TOB(x) ((x)*4*0x01010101 + 0x03020100) /* ..or not TOB */
Packit 141393
Packit 141393
#if 0
Packit 141393
/* Basic VPPERM emulation, for testing purposes */
Packit 141393
static __m128i _mm_perm_epi8(const __m128i src1, const __m128i src2, const __m128i sel)
Packit 141393
{
Packit 141393
   const __m128i sixteen = _mm_set1_epi8(16);
Packit 141393
   const __m128i t0 = _mm_shuffle_epi8(src1, sel);
Packit 141393
   const __m128i s1 = _mm_shuffle_epi8(src2, _mm_sub_epi8(sel, sixteen));
Packit 141393
   const __m128i mask = _mm_or_si128(_mm_cmpeq_epi8(sel, sixteen),
Packit 141393
                                     _mm_cmpgt_epi8(sel, sixteen)); /* (>=16) = 0xff : 00 */
Packit 141393
   return _mm_blendv_epi8(t0, s1, mask);
Packit 141393
}
Packit 141393
#endif
Packit 141393
Packit 141393
#define LOAD_MSG_0_1(buf) \
Packit 141393
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_0_2(buf) \
Packit 141393
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) );
Packit 141393
Packit 141393
#define LOAD_MSG_0_3(buf) \
Packit 141393
buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_0_4(buf) \
Packit 141393
buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) );
Packit 141393
Packit 141393
#define LOAD_MSG_1_1(buf) \
Packit 141393
t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(5),TOB(0),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) );
Packit 141393
Packit 141393
#define LOAD_MSG_1_2(buf) \
Packit 141393
t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(2),TOB(0),TOB(4),TOB(6)) ); \
Packit 141393
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_1_3(buf) \
Packit 141393
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(0),TOB(0),TOB(1)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_1_4(buf) \
Packit 141393
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(7),TOB(2),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) );
Packit 141393
Packit 141393
#define LOAD_MSG_2_1(buf) \
Packit 141393
t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(1),TOB(0),TOB(7)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(4),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_2_2(buf) \
Packit 141393
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(2),TOB(0),TOB(4)) ); \
Packit 141393
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_2_3(buf) \
Packit 141393
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(7),TOB(3),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) );
Packit 141393
Packit 141393
#define LOAD_MSG_2_4(buf) \
Packit 141393
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(1),TOB(6),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) );
Packit 141393
Packit 141393
#define LOAD_MSG_3_1(buf) \
Packit 141393
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(3),TOB(7)) ); \
Packit 141393
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(1),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_3_2(buf) \
Packit 141393
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(1),TOB(5)) ); \
Packit 141393
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(1),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_3_3(buf) \
Packit 141393
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(5),TOB(2)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_3_4(buf) \
Packit 141393
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \
Packit 141393
buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(6),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_4_1(buf) \
Packit 141393
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(5),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(5)) );
Packit 141393
Packit 141393
#define LOAD_MSG_4_2(buf) \
Packit 141393
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(7),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_4_3(buf) \
Packit 141393
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(6),TOB(0),TOB(0)) ); \
Packit 141393
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) );
Packit 141393
Packit 141393
#define LOAD_MSG_4_4(buf) \
Packit 141393
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(4),TOB(0),TOB(1)) ); \
Packit 141393
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(4),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_5_1(buf) \
Packit 141393
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(2)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(1),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_5_2(buf) \
Packit 141393
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(6),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) );
Packit 141393
Packit 141393
#define LOAD_MSG_5_3(buf) \
Packit 141393
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(0),TOB(7),TOB(4)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_5_4(buf) \
Packit 141393
t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(5),TOB(0),TOB(1),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(5)) );
Packit 141393
Packit 141393
#define LOAD_MSG_6_1(buf) \
Packit 141393
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(0),TOB(1),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(4)) );
Packit 141393
Packit 141393
#define LOAD_MSG_6_2(buf) \
Packit 141393
t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(6),TOB(0),TOB(0),TOB(1)) ); \
Packit 141393
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(7),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_6_3(buf) \
Packit 141393
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(5),TOB(1),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_6_4(buf) \
Packit 141393
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(3),TOB(7)) ); \
Packit 141393
buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_7_1(buf) \
Packit 141393
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(0),TOB(7),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(5)) );
Packit 141393
Packit 141393
#define LOAD_MSG_7_2(buf) \
Packit 141393
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(5),TOB(1),TOB(0),TOB(7)) ); \
Packit 141393
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_7_3(buf) \
Packit 141393
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(2),TOB(0),TOB(0),TOB(5)) ); \
Packit 141393
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_7_4(buf) \
Packit 141393
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(6),TOB(4),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_8_1(buf) \
Packit 141393
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \
Packit 141393
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) );
Packit 141393
Packit 141393
#define LOAD_MSG_8_2(buf) \
Packit 141393
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(4),TOB(3),TOB(5),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(7)) );
Packit 141393
Packit 141393
#define LOAD_MSG_8_3(buf) \
Packit 141393
t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(6),TOB(1),TOB(0),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(5),TOB(4)) ); \
Packit 141393
Packit 141393
#define LOAD_MSG_8_4(buf) \
Packit 141393
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(4),TOB(7),TOB(2)) );
Packit 141393
Packit 141393
#define LOAD_MSG_9_1(buf) \
Packit 141393
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(7),TOB(0),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(4),TOB(6)) );
Packit 141393
Packit 141393
#define LOAD_MSG_9_2(buf) \
Packit 141393
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(6),TOB(4),TOB(2)) );
Packit 141393
Packit 141393
#define LOAD_MSG_9_3(buf) \
Packit 141393
t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(3),TOB(5),TOB(0)) ); \
Packit 141393
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(7)) );
Packit 141393
Packit 141393
#define LOAD_MSG_9_4(buf) \
Packit 141393
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(7)) ); \
Packit 141393
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(6),TOB(0)) );
Packit 141393
Packit 141393
#endif