Blob Blame History Raw
/*
 * Copyright (c) 2012-2013 Vincent Hanquez <vincent@snarc.org>
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the author nor the names of his contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

void SIZED(cryptonite_aesni_encrypt_block)(aes_block *out, aes_key *key, aes_block *in)
{
	__m128i *k = (__m128i *) key->data;
	PRELOAD_ENC(k);
	__m128i m = _mm_loadu_si128((__m128i *) in);
	DO_ENC_BLOCK(m);
	_mm_storeu_si128((__m128i *) out, m);
}

void SIZED(cryptonite_aesni_decrypt_block)(aes_block *out, aes_key *key, aes_block *in)
{
	__m128i *k = (__m128i *) key->data;
	PRELOAD_DEC(k);
	__m128i m = _mm_loadu_si128((__m128i *) in);
	DO_DEC_BLOCK(m);
	_mm_storeu_si128((__m128i *) out, m);
}

void SIZED(cryptonite_aesni_encrypt_ecb)(aes_block *out, aes_key *key, aes_block *in, uint32_t blocks)
{
	__m128i *k = (__m128i *) key->data;

	PRELOAD_ENC(k);
	for (; blocks-- > 0; in += 1, out += 1) {
		__m128i m = _mm_loadu_si128((__m128i *) in);
		DO_ENC_BLOCK(m);
		_mm_storeu_si128((__m128i *) out, m);
	}
}

void SIZED(cryptonite_aesni_decrypt_ecb)(aes_block *out, aes_key *key, aes_block *in, uint32_t blocks)
{
	__m128i *k = (__m128i *) key->data;

	PRELOAD_DEC(k);

	for (; blocks-- > 0; in += 1, out += 1) {
		__m128i m = _mm_loadu_si128((__m128i *) in);
		DO_DEC_BLOCK(m);
		_mm_storeu_si128((__m128i *) out, m);
	}
}

void SIZED(cryptonite_aesni_encrypt_cbc)(aes_block *out, aes_key *key, aes_block *_iv, aes_block *in, uint32_t blocks)
{
	__m128i *k = (__m128i *) key->data;
	__m128i iv = _mm_loadu_si128((__m128i *) _iv);

	PRELOAD_ENC(k);

	for (; blocks-- > 0; in += 1, out += 1) {
		__m128i m = _mm_loadu_si128((__m128i *) in);
		m = _mm_xor_si128(m, iv);
		DO_ENC_BLOCK(m);
		iv = m;
		_mm_storeu_si128((__m128i *) out, m);
	}
}

void SIZED(cryptonite_aesni_decrypt_cbc)(aes_block *out, aes_key *key, aes_block *_iv, aes_block *in, uint32_t blocks)
{
	__m128i *k = (__m128i *) key->data;
	__m128i iv = _mm_loadu_si128((__m128i *) _iv);

	PRELOAD_DEC(k);

	for (; blocks-- > 0; in += 1, out += 1) {
		__m128i m = _mm_loadu_si128((__m128i *) in);
		__m128i ivnext = m;

		DO_DEC_BLOCK(m);
		m = _mm_xor_si128(m, iv);

		_mm_storeu_si128((__m128i *) out, m);
		iv = ivnext;
	}
}

void SIZED(cryptonite_aesni_encrypt_ctr)(uint8_t *output, aes_key *key, aes_block *_iv, uint8_t *input, uint32_t len)
{
	__m128i *k = (__m128i *) key->data;
	__m128i bswap_mask = _mm_setr_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8);
	__m128i one        = _mm_set_epi32(0,1,0,0);
	uint32_t nb_blocks = len / 16;
	uint32_t part_block_len = len % 16;

	/* get the IV in little endian format */
	__m128i iv = _mm_loadu_si128((__m128i *) _iv);
	iv = _mm_shuffle_epi8(iv, bswap_mask);

	PRELOAD_ENC(k);

	for (; nb_blocks-- > 0; output += 16, input += 16) {
		/* put back the iv in big endian mode,
		 * encrypt it and and xor it the input block
		 */
		__m128i tmp = _mm_shuffle_epi8(iv, bswap_mask);
		DO_ENC_BLOCK(tmp);
		__m128i m = _mm_loadu_si128((__m128i *) input);
		m = _mm_xor_si128(m, tmp);

		_mm_storeu_si128((__m128i *) output, m);
		/* iv += 1 */
		iv = _mm_add_epi64(iv, one);
	}

	if (part_block_len != 0) {
		aes_block block;
		memset(&block.b, 0, 16);
		memcpy(&block.b, input, part_block_len);

		__m128i m = _mm_loadu_si128((__m128i *) &block);
		__m128i tmp = _mm_shuffle_epi8(iv, bswap_mask);

		DO_ENC_BLOCK(tmp);
		m = _mm_xor_si128(m, tmp);
		_mm_storeu_si128((__m128i *) &block.b, m);
		memcpy(output, &block.b, part_block_len);
	}

	return ;
}

void SIZED(cryptonite_aesni_encrypt_xts)(aes_block *out, aes_key *key1, aes_key *key2,
                               aes_block *_tweak, uint32_t spoint, aes_block *in, uint32_t blocks)
{
	__m128i tweak = _mm_loadu_si128((__m128i *) _tweak);

	do {
		__m128i *k2 = (__m128i *) key2->data;
		PRELOAD_ENC(k2);
		DO_ENC_BLOCK(tweak);

		while (spoint-- > 0)
			tweak = gfmulx(tweak);
	} while (0) ;

	do {
		__m128i *k1 = (__m128i *) key1->data;
		PRELOAD_ENC(k1);

		for ( ; blocks-- > 0; in += 1, out += 1, tweak = gfmulx(tweak)) {
			__m128i m = _mm_loadu_si128((__m128i *) in);

			m = _mm_xor_si128(m, tweak);
			DO_ENC_BLOCK(m);
			m = _mm_xor_si128(m, tweak);

			_mm_storeu_si128((__m128i *) out, m);
		}
	} while (0);
}

void SIZED(cryptonite_aesni_gcm_encrypt)(uint8_t *output, aes_gcm *gcm, aes_key *key, uint8_t *input, uint32_t length)
{
	__m128i *k = (__m128i *) key->data;
	__m128i bswap_mask = _mm_setr_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8);
	__m128i one        = _mm_set_epi32(0,1,0,0);
	uint32_t nb_blocks = length / 16;
	uint32_t part_block_len = length % 16;

	gcm->length_input += length;

	__m128i h  = _mm_loadu_si128((__m128i *) &gcm->h);
	__m128i tag = _mm_loadu_si128((__m128i *) &gcm->tag);
	__m128i iv = _mm_loadu_si128((__m128i *) &gcm->civ);
	iv = _mm_shuffle_epi8(iv, bswap_mask);

	PRELOAD_ENC(k);

	for (; nb_blocks-- > 0; output += 16, input += 16) {
		/* iv += 1 */
		iv = _mm_add_epi64(iv, one);

		/* put back iv in big endian, encrypt it,
		 * and xor it to input */
		__m128i tmp = _mm_shuffle_epi8(iv, bswap_mask);
		DO_ENC_BLOCK(tmp);
		__m128i m = _mm_loadu_si128((__m128i *) input);
		m = _mm_xor_si128(m, tmp);

		tag = ghash_add(tag, h, m);

		/* store it out */
		_mm_storeu_si128((__m128i *) output, m);
	}
	if (part_block_len > 0) {
		__m128i mask;
		aes_block block;
		/* FIXME could do something a bit more clever (slli & sub & and maybe) ... */
		switch (part_block_len) {
		case 1: mask = _mm_setr_epi8(0,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
		case 2: mask = _mm_setr_epi8(0,1,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
		case 3: mask = _mm_setr_epi8(0,1,2,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
		case 4: mask = _mm_setr_epi8(0,1,2,3,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
		case 5: mask = _mm_setr_epi8(0,1,2,3,4,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
		case 6: mask = _mm_setr_epi8(0,1,2,3,4,5,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
		case 7: mask = _mm_setr_epi8(0,1,2,3,4,5,6,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
		case 8: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
		case 9: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
		case 10: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,0x80,0x80,0x80,0x80,0x80,0x80); break;
		case 11: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,0x80,0x80,0x80,0x80,0x80); break;
		case 12: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,0x80,0x80,0x80,0x80); break;
		case 13: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,0x80,0x80,0x80); break;
		case 14: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,0x80,0x80); break;
		case 15: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0x80); break;
		default: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15); break;
		}
		block128_zero(&block);
		block128_copy_bytes(&block, input, part_block_len);

		/* iv += 1 */
		iv = _mm_add_epi64(iv, one);

		/* put back iv in big endian mode, encrypt it and xor it with input */
		__m128i tmp = _mm_shuffle_epi8(iv, bswap_mask);
		DO_ENC_BLOCK(tmp);

		__m128i m = _mm_loadu_si128((__m128i *) &block);
		m = _mm_xor_si128(m, tmp);
		m = _mm_shuffle_epi8(m, mask);

		tag = ghash_add(tag, h, m);

		/* make output */
		_mm_storeu_si128((__m128i *) &block.b, m);
		memcpy(output, &block.b, part_block_len);
	}
	/* store back IV & tag */
	__m128i tmp = _mm_shuffle_epi8(iv, bswap_mask);
	_mm_storeu_si128((__m128i *) &gcm->civ, tmp);
	_mm_storeu_si128((__m128i *) &gcm->tag, tag);
}