From 4d4e105e918711704f610473248dcfb1dbc85f10 Mon Sep 17 00:00:00 2001 From: Packit Service Date: Dec 09 2020 19:00:51 +0000 Subject: Apply patch libgcrypt-1.8.5-aes-perf.patch patch_name: libgcrypt-1.8.5-aes-perf.patch present_in_specfile: true --- diff --git a/cipher/Makefile.am b/cipher/Makefile.am index 95c4510..85a5b5f 100644 --- a/cipher/Makefile.am +++ b/cipher/Makefile.am @@ -83,7 +83,8 @@ rijndael.c rijndael-internal.h rijndael-tables.h rijndael-aesni.c \ rijndael-padlock.c rijndael-amd64.S rijndael-arm.S \ rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \ rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S rijndael-armv8-aarch64-ce.S \ - rijndael-aarch64.S \ + rijndael-aarch64.S rijndael-ppc.c rijndael-ppc9le.c \ + rijndael-ppc-common.h rijndael-ppc-functions.h \ rmd160.c \ rsa.c \ salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \ @@ -128,3 +129,23 @@ tiger.o: $(srcdir)/tiger.c tiger.lo: $(srcdir)/tiger.c `echo $(LTCOMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) ` + +if ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS +ppc_vcrypto_cflags = -maltivec -mvsx -mcrypto +else +ppc_vcrypto_cflags = +endif + +rijndael-ppc.o: $(srcdir)/rijndael-ppc.c Makefile + `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< ` + +rijndael-ppc.lo: $(srcdir)/rijndael-ppc.c Makefile + `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< ` + +rijndael-ppc9le.o: $(srcdir)/rijndael-ppc9le.c Makefile + `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< ` + +rijndael-ppc9le.lo: $(srcdir)/rijndael-ppc9le.c Makefile + `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< ` + + diff --git a/cipher/arcfour.c b/cipher/arcfour.c index 44e8ef4..2266457 100644 --- a/cipher/arcfour.c +++ b/cipher/arcfour.c @@ -184,10 +184,12 @@ do_arcfour_setkey (void *context, const byte *key, unsigned int keylen) } static gcry_err_code_t -arcfour_setkey ( void *context, const byte *key, unsigned int keylen ) +arcfour_setkey ( void *context, const byte *key, unsigned int keylen, + gcry_cipher_hd_t hd ) { ARCFOUR_context *ctx = (ARCFOUR_context *) context; gcry_err_code_t rc = do_arcfour_setkey (ctx, key, keylen ); + (void)hd; return rc; } @@ -207,11 +209,11 @@ selftest(void) static const byte ciphertext_1[] = { 0xF1, 0x38, 0x29, 0xC9, 0xDE }; - arcfour_setkey( &ctx, key_1, sizeof(key_1)); + arcfour_setkey( &ctx, key_1, sizeof(key_1), NULL); encrypt_stream( &ctx, scratch, plaintext_1, sizeof(plaintext_1)); if ( memcmp (scratch, ciphertext_1, sizeof (ciphertext_1))) return "Arcfour encryption test 1 failed."; - arcfour_setkey( &ctx, key_1, sizeof(key_1)); + arcfour_setkey( &ctx, key_1, sizeof(key_1), NULL); encrypt_stream(&ctx, scratch, scratch, sizeof(plaintext_1)); /* decrypt */ if ( memcmp (scratch, plaintext_1, sizeof (plaintext_1))) return "Arcfour decryption test 1 failed."; diff --git a/cipher/blowfish.c b/cipher/blowfish.c index a3fc26c..ddc6a8f 100644 --- a/cipher/blowfish.c +++ b/cipher/blowfish.c @@ -37,6 +37,7 @@ #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" +#include "cipher-internal.h" #include "cipher-selftest.h" #define BLOWFISH_BLOCKSIZE 8 @@ -67,7 +68,8 @@ typedef struct { u32 p[BLOWFISH_ROUNDS+2]; } BLOWFISH_context; -static gcry_err_code_t bf_setkey (void *c, const byte *key, unsigned keylen); +static gcry_err_code_t bf_setkey (void *c, const byte *key, unsigned keylen, + gcry_cipher_hd_t hd); static unsigned int encrypt_block (void *bc, byte *outbuf, const byte *inbuf); static unsigned int decrypt_block (void *bc, byte *outbuf, const byte *inbuf); @@ -703,7 +705,7 @@ _gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, /* Encrypt the counter. */ do_encrypt_block(ctx, tmpbuf, ctr); /* XOR the input with the encrypted counter and store in output. */ - buf_xor(outbuf, tmpbuf, inbuf, BLOWFISH_BLOCKSIZE); + cipher_block_xor(outbuf, tmpbuf, inbuf, BLOWFISH_BLOCKSIZE); outbuf += BLOWFISH_BLOCKSIZE; inbuf += BLOWFISH_BLOCKSIZE; /* Increment the counter. */ @@ -771,7 +773,7 @@ _gcry_blowfish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, the intermediate result to SAVEBUF. */ do_decrypt_block (ctx, savebuf, inbuf); - buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOWFISH_BLOCKSIZE); + cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOWFISH_BLOCKSIZE); inbuf += BLOWFISH_BLOCKSIZE; outbuf += BLOWFISH_BLOCKSIZE; } @@ -828,7 +830,7 @@ _gcry_blowfish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, for ( ;nblocks; nblocks-- ) { do_encrypt_block(ctx, iv, iv); - buf_xor_n_copy(outbuf, iv, inbuf, BLOWFISH_BLOCKSIZE); + cipher_block_xor_n_copy(outbuf, iv, inbuf, BLOWFISH_BLOCKSIZE); outbuf += BLOWFISH_BLOCKSIZE; inbuf += BLOWFISH_BLOCKSIZE; } @@ -897,7 +899,7 @@ selftest(void) const char *r; bf_setkey( (void *) &c, - (const unsigned char*)"abcdefghijklmnopqrstuvwxyz", 26 ); + (const unsigned char*)"abcdefghijklmnopqrstuvwxyz", 26, NULL ); encrypt_block( (void *) &c, buffer, plain ); if( memcmp( buffer, "\x32\x4E\xD0\xFE\xF4\x13\xA2\x03", 8 ) ) return "Blowfish selftest failed (1)."; @@ -905,7 +907,7 @@ selftest(void) if( memcmp( buffer, plain, 8 ) ) return "Blowfish selftest failed (2)."; - bf_setkey( (void *) &c, key3, 8 ); + bf_setkey( (void *) &c, key3, 8, NULL ); encrypt_block( (void *) &c, buffer, plain3 ); if( memcmp( buffer, cipher3, 8 ) ) return "Blowfish selftest failed (3)."; @@ -1095,10 +1097,12 @@ do_bf_setkey (BLOWFISH_context *c, const byte *key, unsigned keylen) static gcry_err_code_t -bf_setkey (void *context, const byte *key, unsigned keylen) +bf_setkey (void *context, const byte *key, unsigned keylen, + gcry_cipher_hd_t hd) { BLOWFISH_context *c = (BLOWFISH_context *) context; gcry_err_code_t rc = do_bf_setkey (c, key, keylen); + (void)hd; return rc; } diff --git a/cipher/bufhelp.h b/cipher/bufhelp.h index 83d3f53..4e97c4d 100644 --- a/cipher/bufhelp.h +++ b/cipher/bufhelp.h @@ -450,7 +450,21 @@ static inline void buf_put_le64(void *_buf, u64 val) out->a = le_bswap64(val); } - #endif /*BUFHELP_UNALIGNED_ACCESS*/ + +/* Host-endian get/put macros */ +#ifdef WORDS_BIGENDIAN +# define buf_get_he32 buf_get_be32 +# define buf_put_he32 buf_put_be32 +# define buf_get_he64 buf_get_be64 +# define buf_put_he64 buf_put_be64 +#else +# define buf_get_he32 buf_get_le32 +# define buf_put_he32 buf_put_le32 +# define buf_get_he64 buf_get_le64 +# define buf_put_he64 buf_put_le64 +#endif + + #endif /*GCRYPT_BUFHELP_H*/ diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c index 7687094..69b240b 100644 --- a/cipher/camellia-glue.c +++ b/cipher/camellia-glue.c @@ -204,7 +204,8 @@ extern void _gcry_camellia_aesni_avx2_ocb_auth(CAMELLIA_context *ctx, static const char *selftest(void); static gcry_err_code_t -camellia_setkey(void *c, const byte *key, unsigned keylen) +camellia_setkey(void *c, const byte *key, unsigned keylen, + gcry_cipher_hd_t hd) { CAMELLIA_context *ctx=c; static int initialized=0; @@ -213,6 +214,8 @@ camellia_setkey(void *c, const byte *key, unsigned keylen) unsigned int hwf = _gcry_get_hw_features (); #endif + (void)hd; + if(keylen!=16 && keylen!=24 && keylen!=32) return GPG_ERR_INV_KEYLEN; @@ -427,7 +430,7 @@ _gcry_camellia_ctr_enc(void *context, unsigned char *ctr, /* Encrypt the counter. */ Camellia_EncryptBlock(ctx->keybitlength, ctr, ctx->keytable, tmpbuf); /* XOR the input with the encrypted counter and store in output. */ - buf_xor(outbuf, tmpbuf, inbuf, CAMELLIA_BLOCK_SIZE); + cipher_block_xor(outbuf, tmpbuf, inbuf, CAMELLIA_BLOCK_SIZE); outbuf += CAMELLIA_BLOCK_SIZE; inbuf += CAMELLIA_BLOCK_SIZE; /* Increment the counter. */ @@ -520,7 +523,8 @@ _gcry_camellia_cbc_dec(void *context, unsigned char *iv, the intermediate result to SAVEBUF. */ Camellia_DecryptBlock(ctx->keybitlength, inbuf, ctx->keytable, savebuf); - buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAMELLIA_BLOCK_SIZE); + cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, + CAMELLIA_BLOCK_SIZE); inbuf += CAMELLIA_BLOCK_SIZE; outbuf += CAMELLIA_BLOCK_SIZE; } @@ -602,7 +606,7 @@ _gcry_camellia_cfb_dec(void *context, unsigned char *iv, for ( ;nblocks; nblocks-- ) { Camellia_EncryptBlock(ctx->keybitlength, iv, ctx->keytable, iv); - buf_xor_n_copy(outbuf, iv, inbuf, CAMELLIA_BLOCK_SIZE); + cipher_block_xor_n_copy(outbuf, iv, inbuf, CAMELLIA_BLOCK_SIZE); outbuf += CAMELLIA_BLOCK_SIZE; inbuf += CAMELLIA_BLOCK_SIZE; } @@ -991,7 +995,7 @@ selftest(void) 0x20,0xef,0x7c,0x91,0x9e,0x3a,0x75,0x09 }; - camellia_setkey(&ctx,key_128,sizeof(key_128)); + camellia_setkey(&ctx,key_128,sizeof(key_128),NULL); camellia_encrypt(&ctx,scratch,plaintext); if(memcmp(scratch,ciphertext_128,sizeof(ciphertext_128))!=0) return "CAMELLIA-128 test encryption failed."; @@ -999,7 +1003,7 @@ selftest(void) if(memcmp(scratch,plaintext,sizeof(plaintext))!=0) return "CAMELLIA-128 test decryption failed."; - camellia_setkey(&ctx,key_192,sizeof(key_192)); + camellia_setkey(&ctx,key_192,sizeof(key_192),NULL); camellia_encrypt(&ctx,scratch,plaintext); if(memcmp(scratch,ciphertext_192,sizeof(ciphertext_192))!=0) return "CAMELLIA-192 test encryption failed."; @@ -1007,7 +1011,7 @@ selftest(void) if(memcmp(scratch,plaintext,sizeof(plaintext))!=0) return "CAMELLIA-192 test decryption failed."; - camellia_setkey(&ctx,key_256,sizeof(key_256)); + camellia_setkey(&ctx,key_256,sizeof(key_256),NULL); camellia_encrypt(&ctx,scratch,plaintext); if(memcmp(scratch,ciphertext_256,sizeof(ciphertext_256))!=0) return "CAMELLIA-256 test encryption failed."; diff --git a/cipher/cast5.c b/cipher/cast5.c index 94dcee7..61c345e 100644 --- a/cipher/cast5.c +++ b/cipher/cast5.c @@ -44,6 +44,7 @@ #include "cipher.h" #include "bithelp.h" #include "bufhelp.h" +#include "cipher-internal.h" #include "cipher-selftest.h" /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ @@ -72,7 +73,8 @@ typedef struct { #endif } CAST5_context; -static gcry_err_code_t cast_setkey (void *c, const byte *key, unsigned keylen); +static gcry_err_code_t cast_setkey (void *c, const byte *key, unsigned keylen, + gcry_cipher_hd_t hd); static unsigned int encrypt_block (void *c, byte *outbuf, const byte *inbuf); static unsigned int decrypt_block (void *c, byte *outbuf, const byte *inbuf); @@ -671,7 +673,7 @@ _gcry_cast5_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, /* Encrypt the counter. */ do_encrypt_block(ctx, tmpbuf, ctr); /* XOR the input with the encrypted counter and store in output. */ - buf_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE); + cipher_block_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE); outbuf += CAST5_BLOCKSIZE; inbuf += CAST5_BLOCKSIZE; /* Increment the counter. */ @@ -739,7 +741,7 @@ _gcry_cast5_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, the intermediate result to SAVEBUF. */ do_decrypt_block (ctx, savebuf, inbuf); - buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAST5_BLOCKSIZE); + cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAST5_BLOCKSIZE); inbuf += CAST5_BLOCKSIZE; outbuf += CAST5_BLOCKSIZE; } @@ -795,7 +797,7 @@ _gcry_cast5_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, for ( ;nblocks; nblocks-- ) { do_encrypt_block(ctx, iv, iv); - buf_xor_n_copy(outbuf, iv, inbuf, CAST5_BLOCKSIZE); + cipher_block_xor_n_copy(outbuf, iv, inbuf, CAST5_BLOCKSIZE); outbuf += CAST5_BLOCKSIZE; inbuf += CAST5_BLOCKSIZE; } @@ -863,7 +865,7 @@ selftest(void) byte buffer[8]; const char *r; - cast_setkey( &c, key, 16 ); + cast_setkey( &c, key, 16, NULL ); encrypt_block( &c, buffer, plain ); if( memcmp( buffer, cipher, 8 ) ) return "1"; @@ -884,10 +886,10 @@ selftest(void) 0x80,0xAC,0x05,0xB8,0xE8,0x3D,0x69,0x6E }; for(i=0; i < 1000000; i++ ) { - cast_setkey( &c, b0, 16 ); + cast_setkey( &c, b0, 16, NULL ); encrypt_block( &c, a0, a0 ); encrypt_block( &c, a0+8, a0+8 ); - cast_setkey( &c, a0, 16 ); + cast_setkey( &c, a0, 16, NULL ); encrypt_block( &c, b0, b0 ); encrypt_block( &c, b0+8, b0+8 ); } @@ -1029,10 +1031,12 @@ do_cast_setkey( CAST5_context *c, const byte *key, unsigned keylen ) } static gcry_err_code_t -cast_setkey (void *context, const byte *key, unsigned keylen ) +cast_setkey (void *context, const byte *key, unsigned keylen, + gcry_cipher_hd_t hd ) { CAST5_context *c = (CAST5_context *) context; gcry_err_code_t rc = do_cast_setkey (c, key, keylen); + (void)hd; return rc; } diff --git a/cipher/chacha20.c b/cipher/chacha20.c index 613fa82..ebbfeb2 100644 --- a/cipher/chacha20.c +++ b/cipher/chacha20.c @@ -419,10 +419,12 @@ chacha20_do_setkey (CHACHA20_context_t * ctx, static gcry_err_code_t -chacha20_setkey (void *context, const byte * key, unsigned int keylen) +chacha20_setkey (void *context, const byte *key, unsigned int keylen, + gcry_cipher_hd_t hd) { CHACHA20_context_t *ctx = (CHACHA20_context_t *) context; gcry_err_code_t rc = chacha20_do_setkey (ctx, key, keylen); + (void)hd; _gcry_burn_stack (4 + sizeof (void *) + 4 * sizeof (void *)); return rc; } @@ -569,7 +571,7 @@ selftest (void) /* 16-byte alignment required for amd64 implementation. */ ctx = (CHACHA20_context_t *)((uintptr_t)(ctxbuf + 15) & ~(uintptr_t)15); - chacha20_setkey (ctx, key_1, sizeof key_1); + chacha20_setkey (ctx, key_1, sizeof key_1, NULL); chacha20_setiv (ctx, nonce_1, sizeof nonce_1); scratch[sizeof (scratch) - 1] = 0; chacha20_encrypt_stream (ctx, scratch, plaintext_1, sizeof plaintext_1); @@ -577,7 +579,7 @@ selftest (void) return "ChaCha20 encryption test 1 failed."; if (scratch[sizeof (scratch) - 1]) return "ChaCha20 wrote too much."; - chacha20_setkey (ctx, key_1, sizeof (key_1)); + chacha20_setkey (ctx, key_1, sizeof (key_1), NULL); chacha20_setiv (ctx, nonce_1, sizeof nonce_1); chacha20_encrypt_stream (ctx, scratch, scratch, sizeof plaintext_1); if (memcmp (scratch, plaintext_1, sizeof plaintext_1)) @@ -585,12 +587,12 @@ selftest (void) for (i = 0; i < sizeof buf; i++) buf[i] = i; - chacha20_setkey (ctx, key_1, sizeof key_1); + chacha20_setkey (ctx, key_1, sizeof key_1, NULL); chacha20_setiv (ctx, nonce_1, sizeof nonce_1); /*encrypt */ chacha20_encrypt_stream (ctx, buf, buf, sizeof buf); /*decrypt */ - chacha20_setkey (ctx, key_1, sizeof key_1); + chacha20_setkey (ctx, key_1, sizeof key_1, NULL); chacha20_setiv (ctx, nonce_1, sizeof nonce_1); chacha20_encrypt_stream (ctx, buf, buf, 1); chacha20_encrypt_stream (ctx, buf + 1, buf + 1, (sizeof buf) - 1 - 1); @@ -600,13 +602,13 @@ selftest (void) if (buf[i] != (byte) i) return "ChaCha20 encryption test 2 failed."; - chacha20_setkey (ctx, key_1, sizeof key_1); + chacha20_setkey (ctx, key_1, sizeof key_1, NULL); chacha20_setiv (ctx, nonce_1, sizeof nonce_1); /* encrypt */ for (i = 0; i < sizeof buf; i++) chacha20_encrypt_stream (ctx, &buf[i], &buf[i], 1); /* decrypt */ - chacha20_setkey (ctx, key_1, sizeof key_1); + chacha20_setkey (ctx, key_1, sizeof key_1, NULL); chacha20_setiv (ctx, nonce_1, sizeof nonce_1); chacha20_encrypt_stream (ctx, buf, buf, sizeof buf); for (i = 0; i < sizeof buf; i++) diff --git a/cipher/cipher-aeswrap.c b/cipher/cipher-aeswrap.c index a8d0e03..c182657 100644 --- a/cipher/cipher-aeswrap.c +++ b/cipher/cipher-aeswrap.c @@ -99,7 +99,7 @@ _gcry_cipher_aeswrap_encrypt (gcry_cipher_hd_t c, break; } /* A := MSB_64(B) ^ t */ - buf_xor(a, b, t, 8); + cipher_block_xor(a, b, t, 8); /* R[i] := LSB_64(B) */ memcpy (r+i*8, b+8, 8); } @@ -170,7 +170,7 @@ _gcry_cipher_aeswrap_decrypt (gcry_cipher_hd_t c, for (i = n; i >= 1; i--) { /* B := AES_k^1( (A ^ t)| R[i] ) */ - buf_xor(b, a, t, 8); + cipher_block_xor(b, a, t, 8); memcpy (b+8, r+(i-1)*8, 8); nburn = c->spec->decrypt (&c->context.c, b, b); burn = nburn > burn ? nburn : burn; diff --git a/cipher/cipher-cbc.c b/cipher/cipher-cbc.c index 95c49b2..967795f 100644 --- a/cipher/cipher-cbc.c +++ b/cipher/cipher-cbc.c @@ -79,7 +79,7 @@ _gcry_cipher_cbc_encrypt (gcry_cipher_hd_t c, for (n=0; n < nblocks; n++ ) { - buf_xor (outbuf, inbuf, ivp, blocksize); + cipher_block_xor (outbuf, inbuf, ivp, blocksize); nburn = enc_fn ( &c->context.c, outbuf, outbuf ); burn = nburn > burn ? nburn : burn; ivp = outbuf; @@ -116,7 +116,7 @@ _gcry_cipher_cbc_encrypt (gcry_cipher_hd_t c, nburn = enc_fn (&c->context.c, outbuf, outbuf); burn = nburn > burn ? nburn : burn; - buf_cpy (c->u_iv.iv, outbuf, blocksize); + cipher_block_cpy (c->u_iv.iv, outbuf, blocksize); } if (burn > 0) @@ -158,7 +158,7 @@ _gcry_cipher_cbc_decrypt (gcry_cipher_hd_t c, nblocks--; if ((inbuflen % blocksize) == 0) nblocks--; - buf_cpy (c->lastiv, c->u_iv.iv, blocksize); + cipher_block_cpy (c->lastiv, c->u_iv.iv, blocksize); } if (c->bulk.cbc_dec) @@ -176,7 +176,8 @@ _gcry_cipher_cbc_decrypt (gcry_cipher_hd_t c, storage here because it is not used otherwise. */ nburn = dec_fn ( &c->context.c, c->lastiv, inbuf ); burn = nburn > burn ? nburn : burn; - buf_xor_n_copy_2(outbuf, c->lastiv, c->u_iv.iv, inbuf, blocksize); + cipher_block_xor_n_copy_2 (outbuf, c->lastiv, c->u_iv.iv, inbuf, + blocksize); inbuf += blocksize; outbuf += blocksize; } @@ -191,7 +192,7 @@ _gcry_cipher_cbc_decrypt (gcry_cipher_hd_t c, else restbytes = inbuflen % blocksize; - buf_cpy (c->lastiv, c->u_iv.iv, blocksize ); /* Save Cn-2. */ + cipher_block_cpy (c->lastiv, c->u_iv.iv, blocksize ); /* Save Cn-2. */ buf_cpy (c->u_iv.iv, inbuf + blocksize, restbytes ); /* Save Cn. */ nburn = dec_fn ( &c->context.c, outbuf, inbuf ); @@ -203,7 +204,7 @@ _gcry_cipher_cbc_decrypt (gcry_cipher_hd_t c, c->u_iv.iv[i] = outbuf[i]; nburn = dec_fn (&c->context.c, outbuf, c->u_iv.iv); burn = nburn > burn ? nburn : burn; - buf_xor(outbuf, outbuf, c->lastiv, blocksize); + cipher_block_xor(outbuf, outbuf, c->lastiv, blocksize); /* c->lastiv is now really lastlastiv, does this matter? */ } diff --git a/cipher/cipher-ccm.c b/cipher/cipher-ccm.c index d7f14d8..e71c6f1 100644 --- a/cipher/cipher-ccm.c +++ b/cipher/cipher-ccm.c @@ -67,7 +67,8 @@ do_cbc_mac (gcry_cipher_hd_t c, const unsigned char *inbuf, size_t inlen, if (unused > 0) { /* Process one block from macbuf. */ - buf_xor(c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.macbuf, blocksize); + cipher_block_xor(c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.macbuf, + blocksize); set_burn (burn, enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv )); unused = 0; @@ -86,7 +87,7 @@ do_cbc_mac (gcry_cipher_hd_t c, const unsigned char *inbuf, size_t inlen, { while (inlen >= blocksize) { - buf_xor(c->u_iv.iv, c->u_iv.iv, inbuf, blocksize); + cipher_block_xor(c->u_iv.iv, c->u_iv.iv, inbuf, blocksize); set_burn (burn, enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv )); @@ -272,7 +273,7 @@ _gcry_cipher_ccm_tag (gcry_cipher_hd_t c, unsigned char *outbuf, burn = do_cbc_mac (c, NULL, 0, 1); /* Perform final padding. */ /* Add S_0 */ - buf_xor (c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.s0, 16); + cipher_block_xor (c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.s0, 16); wipememory (c->u_ctr.ctr, 16); wipememory (c->u_mode.ccm.s0, 16); diff --git a/cipher/cipher-cfb.c b/cipher/cipher-cfb.c index c888e70..c01046b 100644 --- a/cipher/cipher-cfb.c +++ b/cipher/cipher-cfb.c @@ -91,7 +91,7 @@ _gcry_cipher_cfb_encrypt (gcry_cipher_hd_t c, nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); burn = nburn > burn ? nburn : burn; /* XOR the input with the IV and store input into IV. */ - buf_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize); + cipher_block_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize); outbuf += blocksize; inbuf += blocksize; inbuflen -= blocksize; @@ -101,11 +101,11 @@ _gcry_cipher_cfb_encrypt (gcry_cipher_hd_t c, if ( inbuflen >= blocksize ) { /* Save the current IV and then encrypt the IV. */ - buf_cpy( c->lastiv, c->u_iv.iv, blocksize ); + cipher_block_cpy( c->lastiv, c->u_iv.iv, blocksize ); nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); burn = nburn > burn ? nburn : burn; /* XOR the input with the IV and store input into IV */ - buf_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize); + cipher_block_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize); outbuf += blocksize; inbuf += blocksize; inbuflen -= blocksize; @@ -113,7 +113,7 @@ _gcry_cipher_cfb_encrypt (gcry_cipher_hd_t c, if ( inbuflen ) { /* Save the current IV and then encrypt the IV. */ - buf_cpy( c->lastiv, c->u_iv.iv, blocksize ); + cipher_block_cpy( c->lastiv, c->u_iv.iv, blocksize ); nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); burn = nburn > burn ? nburn : burn; c->unused = blocksize; @@ -193,7 +193,7 @@ _gcry_cipher_cfb_decrypt (gcry_cipher_hd_t c, nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); burn = nburn > burn ? nburn : burn; /* XOR the input with the IV and store input into IV. */ - buf_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize); + cipher_block_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize); outbuf += blocksize; inbuf += blocksize; inbuflen -= blocksize; @@ -203,11 +203,11 @@ _gcry_cipher_cfb_decrypt (gcry_cipher_hd_t c, if (inbuflen >= blocksize ) { /* Save the current IV and then encrypt the IV. */ - buf_cpy ( c->lastiv, c->u_iv.iv, blocksize); + cipher_block_cpy ( c->lastiv, c->u_iv.iv, blocksize); nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); burn = nburn > burn ? nburn : burn; /* XOR the input with the IV and store input into IV */ - buf_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize); + cipher_block_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize); outbuf += blocksize; inbuf += blocksize; inbuflen -= blocksize; @@ -216,7 +216,7 @@ _gcry_cipher_cfb_decrypt (gcry_cipher_hd_t c, if (inbuflen) { /* Save the current IV and then encrypt the IV. */ - buf_cpy ( c->lastiv, c->u_iv.iv, blocksize ); + cipher_block_cpy ( c->lastiv, c->u_iv.iv, blocksize ); nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); burn = nburn > burn ? nburn : burn; c->unused = blocksize; diff --git a/cipher/cipher-cmac.c b/cipher/cipher-cmac.c index 5c941b6..0b1b567 100644 --- a/cipher/cipher-cmac.c +++ b/cipher/cipher-cmac.c @@ -63,7 +63,7 @@ cmac_write (gcry_cipher_hd_t c, const byte * inbuf, size_t inlen) for (; inlen && c->unused < blocksize; inlen--) c->lastiv[c->unused++] = *inbuf++; - buf_xor (c->u_iv.iv, c->u_iv.iv, c->lastiv, blocksize); + cipher_block_xor (c->u_iv.iv, c->u_iv.iv, c->lastiv, blocksize); set_burn (burn, enc_fn (&c->context.c, c->u_iv.iv, c->u_iv.iv)); c->unused = 0; @@ -83,7 +83,7 @@ cmac_write (gcry_cipher_hd_t c, const byte * inbuf, size_t inlen) else while (inlen > blocksize) { - buf_xor (c->u_iv.iv, c->u_iv.iv, inbuf, blocksize); + cipher_block_xor (c->u_iv.iv, c->u_iv.iv, inbuf, blocksize); set_burn (burn, enc_fn (&c->context.c, c->u_iv.iv, c->u_iv.iv)); inlen -= blocksize; inbuf += blocksize; @@ -174,9 +174,9 @@ cmac_final (gcry_cipher_hd_t c) c->lastiv[count++] = 0; } - buf_xor (c->lastiv, c->lastiv, subkey, blocksize); + cipher_block_xor (c->lastiv, c->lastiv, subkey, blocksize); - buf_xor (c->u_iv.iv, c->u_iv.iv, c->lastiv, blocksize); + cipher_block_xor (c->u_iv.iv, c->u_iv.iv, c->lastiv, blocksize); burn = c->spec->encrypt (&c->context.c, c->u_iv.iv, c->u_iv.iv); if (burn) _gcry_burn_stack (burn + 4 * sizeof (void *)); diff --git a/cipher/cipher-ctr.c b/cipher/cipher-ctr.c index f9cb6b5..7d46d38 100644 --- a/cipher/cipher-ctr.c +++ b/cipher/cipher-ctr.c @@ -81,24 +81,34 @@ _gcry_cipher_ctr_encrypt (gcry_cipher_hd_t c, { unsigned char tmp[MAX_BLOCKSIZE]; - do { - nburn = enc_fn (&c->context.c, tmp, c->u_ctr.ctr); - burn = nburn > burn ? nburn : burn; - - for (i = blocksize; i > 0; i--) - { - c->u_ctr.ctr[i-1]++; - if (c->u_ctr.ctr[i-1] != 0) - break; - } - - n = blocksize < inbuflen ? blocksize : inbuflen; - buf_xor(outbuf, inbuf, tmp, n); - - inbuflen -= n; - outbuf += n; - inbuf += n; - } while (inbuflen); + do + { + nburn = enc_fn (&c->context.c, tmp, c->u_ctr.ctr); + burn = nburn > burn ? nburn : burn; + + for (i = blocksize; i > 0; i--) + { + c->u_ctr.ctr[i-1]++; + if (c->u_ctr.ctr[i-1] != 0) + break; + } + + if (inbuflen < blocksize) + break; + n = blocksize; + cipher_block_xor(outbuf, inbuf, tmp, blocksize); + + inbuflen -= n; + outbuf += n; + inbuf += n; + } + while (inbuflen); + + if (inbuflen) + { + n = inbuflen; + buf_xor(outbuf, inbuf, tmp, inbuflen); + } /* Save the unused bytes of the counter. */ c->unused = blocksize - n; diff --git a/cipher/cipher-gcm-intel-pclmul.c b/cipher/cipher-gcm-intel-pclmul.c index a327249..0f26277 100644 --- a/cipher/cipher-gcm-intel-pclmul.c +++ b/cipher/cipher-gcm-intel-pclmul.c @@ -248,7 +248,8 @@ static inline void gfmul_pclmul_aggr4(void) void _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c) { - u64 tmp[2]; + static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = + { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; #if defined(__x86_64__) && defined(__WIN64__) char win64tmp[3 * 16]; @@ -262,15 +263,19 @@ _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c) #endif /* Swap endianness of hsub. */ - tmp[0] = buf_get_be64(c->u_mode.gcm.u_ghash_key.key + 8); - tmp[1] = buf_get_be64(c->u_mode.gcm.u_ghash_key.key + 0); - buf_cpy (c->u_mode.gcm.u_ghash_key.key, tmp, GCRY_GCM_BLOCK_LEN); + asm volatile ("movdqu (%[key]), %%xmm0\n\t" + "pshufb %[be_mask], %%xmm0\n\t" + "movdqu %%xmm0, (%[key])\n\t" + : + : [key] "r" (c->u_mode.gcm.u_ghash_key.key), + [be_mask] "m" (*be_mask) + : "memory"); #ifdef __x86_64__ - asm volatile ("movdqu %[h_1], %%xmm0\n\t" - "movdqa %%xmm0, %%xmm1\n\t" + asm volatile ("movdqa %%xmm0, %%xmm1\n\t" : - : [h_1] "m" (*tmp)); + : + : "memory"); gfmul_pclmul (); /* H•H => H² */ @@ -324,8 +329,6 @@ _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c) ::: "cc" ); #endif #endif - - wipememory (tmp, sizeof(tmp)); } diff --git a/cipher/cipher-gcm.c b/cipher/cipher-gcm.c index 6169d14..32ec9fa 100644 --- a/cipher/cipher-gcm.c +++ b/cipher/cipher-gcm.c @@ -150,7 +150,7 @@ do_ghash (unsigned char *result, const unsigned char *buf, const u64 *gcmM) u32 A; int i; - buf_xor (V, result, buf, 16); + cipher_block_xor (V, result, buf, 16); V[0] = be_bswap64 (V[0]); V[1] = be_bswap64 (V[1]); @@ -259,7 +259,7 @@ do_ghash (unsigned char *result, const unsigned char *buf, const u32 *gcmM) u32 T[3]; int i; - buf_xor (V, result, buf, 16); /* V is big-endian */ + cipher_block_xor (V, result, buf, 16); /* V is big-endian */ /* First round can be manually tweaked based on fact that 'tmp' is zero. */ i = 15; @@ -342,7 +342,7 @@ do_ghash (unsigned char *hsub, unsigned char *result, const unsigned char *buf) #else unsigned long T[4]; - buf_xor (V, result, buf, 16); + cipher_block_xor (V, result, buf, 16); for (i = 0; i < 4; i++) { V[i] = (V[i] & 0x00ff00ff) << 8 | (V[i] & 0xff00ff00) >> 8; @@ -358,7 +358,7 @@ do_ghash (unsigned char *hsub, unsigned char *result, const unsigned char *buf) for (j = 0x80; j; j >>= 1) { if (hsub[i] & j) - buf_xor (p, p, V, 16); + cipher_block_xor (p, p, V, 16); if (bshift (V)) V[0] ^= 0xe1000000; } @@ -598,7 +598,7 @@ gcm_ctr_encrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, } fix_ctr = 1; - buf_cpy(ctr_copy, c->u_ctr.ctr, GCRY_GCM_BLOCK_LEN); + cipher_block_cpy(ctr_copy, c->u_ctr.ctr, GCRY_GCM_BLOCK_LEN); } } @@ -928,8 +928,8 @@ _gcry_cipher_gcm_tag (gcry_cipher_hd_t c, /* Add bitlengths to tag. */ do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, (byte*)bitlengths, GCRY_GCM_BLOCK_LEN, 1); - buf_xor (c->u_mode.gcm.u_tag.tag, c->u_mode.gcm.tagiv, - c->u_mode.gcm.u_tag.tag, GCRY_GCM_BLOCK_LEN); + cipher_block_xor (c->u_mode.gcm.u_tag.tag, c->u_mode.gcm.tagiv, + c->u_mode.gcm.u_tag.tag, GCRY_GCM_BLOCK_LEN); c->marks.tag = 1; wipememory (bitlengths, sizeof (bitlengths)); diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h index b748125..a95e084 100644 --- a/cipher/cipher-internal.h +++ b/cipher/cipher-internal.h @@ -121,6 +121,25 @@ struct gcry_cipher_handle interface does not easily allow to retrieve this value. */ int algo; + /* A structure with function pointers for mode operations. */ + struct { + gcry_err_code_t (*encrypt)(gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); + gcry_err_code_t (*decrypt)(gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); + gcry_err_code_t (*setiv)(gcry_cipher_hd_t c, const unsigned char *iv, + size_t ivlen); + + gcry_err_code_t (*authenticate)(gcry_cipher_hd_t c, + const unsigned char *abuf, size_t abuflen); + gcry_err_code_t (*get_tag)(gcry_cipher_hd_t c, unsigned char *outtag, + size_t taglen); + gcry_err_code_t (*check_tag)(gcry_cipher_hd_t c, const unsigned char *intag, + size_t taglen); + } mode_ops; + /* A structure with function pointers for bulk operations. Due to limitations of the module system (we don't want to change the API) we need to keep these function pointers here. The cipher @@ -146,7 +165,7 @@ struct gcry_cipher_handle const void *inbuf_arg, size_t nblocks, int encrypt); size_t (*ocb_auth)(gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); - void (*xts_crypt)(gcry_cipher_hd_t c, unsigned char *tweak, + void (*xts_crypt)(void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); } bulk; @@ -479,9 +498,12 @@ gcry_err_code_t _gcry_cipher_ocb_check_tag /*-- cipher-xts.c --*/ -gcry_err_code_t _gcry_cipher_xts_crypt +gcry_err_code_t _gcry_cipher_xts_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, - const unsigned char *inbuf, size_t inbuflen, int encrypt); + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_xts_decrypt +/* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); /* Return the L-value for block N. Note: 'cipher_ocb.c' ensures that N @@ -506,4 +528,145 @@ ocb_get_l (gcry_cipher_hd_t c, u64 n) return c->u_mode.ocb.L[ntz]; } +/* Optimized function for cipher block copying */ +static inline void +cipher_block_cpy(void *_dst, const void *_src, size_t blocksize) +{ + byte *dst = _dst; + const byte *src = _src; + u64 s[2]; + + if (blocksize == 8) + { + buf_put_he64(dst + 0, buf_get_he64(src + 0)); + } + else /* blocksize == 16 */ + { + s[0] = buf_get_he64(src + 0); + s[1] = buf_get_he64(src + 8); + buf_put_he64(dst + 0, s[0]); + buf_put_he64(dst + 8, s[1]); + } +} + + +/* Optimized function for cipher block xoring */ +static inline void +cipher_block_xor(void *_dst, const void *_src1, const void *_src2, + size_t blocksize) +{ + byte *dst = _dst; + const byte *src1 = _src1; + const byte *src2 = _src2; + u64 s1[2]; + u64 s2[2]; + + if (blocksize == 8) + { + buf_put_he64(dst + 0, buf_get_he64(src1 + 0) ^ buf_get_he64(src2 + 0)); + } + else /* blocksize == 16 */ + { + s1[0] = buf_get_he64(src1 + 0); + s1[1] = buf_get_he64(src1 + 8); + s2[0] = buf_get_he64(src2 + 0); + s2[1] = buf_get_he64(src2 + 8); + buf_put_he64(dst + 0, s1[0] ^ s2[0]); + buf_put_he64(dst + 8, s1[1] ^ s2[1]); + } +} + + +/* Optimized function for in-place cipher block xoring */ +static inline void +cipher_block_xor_1(void *_dst, const void *_src, size_t blocksize) +{ + cipher_block_xor (_dst, _dst, _src, blocksize); +} + + +/* Optimized function for cipher block xoring with two destination cipher + blocks. Used mainly by CFB mode encryption. */ +static inline void +cipher_block_xor_2dst(void *_dst1, void *_dst2, const void *_src, + size_t blocksize) +{ + byte *dst1 = _dst1; + byte *dst2 = _dst2; + const byte *src = _src; + u64 d2[2]; + u64 s[2]; + + if (blocksize == 8) + { + d2[0] = buf_get_he64(dst2 + 0) ^ buf_get_he64(src + 0); + buf_put_he64(dst2 + 0, d2[0]); + buf_put_he64(dst1 + 0, d2[0]); + } + else /* blocksize == 16 */ + { + s[0] = buf_get_he64(src + 0); + s[1] = buf_get_he64(src + 8); + d2[0] = buf_get_he64(dst2 + 0); + d2[1] = buf_get_he64(dst2 + 8); + d2[0] = d2[0] ^ s[0]; + d2[1] = d2[1] ^ s[1]; + buf_put_he64(dst2 + 0, d2[0]); + buf_put_he64(dst2 + 8, d2[1]); + buf_put_he64(dst1 + 0, d2[0]); + buf_put_he64(dst1 + 8, d2[1]); + } +} + + +/* Optimized function for combined cipher block xoring and copying. + Used by mainly CBC mode decryption. */ +static inline void +cipher_block_xor_n_copy_2(void *_dst_xor, const void *_src_xor, + void *_srcdst_cpy, const void *_src_cpy, + size_t blocksize) +{ + byte *dst_xor = _dst_xor; + byte *srcdst_cpy = _srcdst_cpy; + const byte *src_xor = _src_xor; + const byte *src_cpy = _src_cpy; + u64 sc[2]; + u64 sx[2]; + u64 sdc[2]; + + if (blocksize == 8) + { + sc[0] = buf_get_he64(src_cpy + 0); + buf_put_he64(dst_xor + 0, + buf_get_he64(srcdst_cpy + 0) ^ buf_get_he64(src_xor + 0)); + buf_put_he64(srcdst_cpy + 0, sc[0]); + } + else /* blocksize == 16 */ + { + sc[0] = buf_get_he64(src_cpy + 0); + sc[1] = buf_get_he64(src_cpy + 8); + sx[0] = buf_get_he64(src_xor + 0); + sx[1] = buf_get_he64(src_xor + 8); + sdc[0] = buf_get_he64(srcdst_cpy + 0); + sdc[1] = buf_get_he64(srcdst_cpy + 8); + sx[0] ^= sdc[0]; + sx[1] ^= sdc[1]; + buf_put_he64(dst_xor + 0, sx[0]); + buf_put_he64(dst_xor + 8, sx[1]); + buf_put_he64(srcdst_cpy + 0, sc[0]); + buf_put_he64(srcdst_cpy + 8, sc[1]); + } +} + + +/* Optimized function for combined cipher block xoring and copying. + Used by mainly CFB mode decryption. */ +static inline void +cipher_block_xor_n_copy(void *_dst_xor, void *_srcdst_cpy, const void *_src, + size_t blocksize) +{ + cipher_block_xor_n_copy_2(_dst_xor, _src, _srcdst_cpy, _src, blocksize); +} + + #endif /*G10_CIPHER_INTERNAL_H*/ diff --git a/cipher/cipher-ocb.c b/cipher/cipher-ocb.c index db42aaf..f71520a 100644 --- a/cipher/cipher-ocb.c +++ b/cipher/cipher-ocb.c @@ -82,7 +82,7 @@ static void double_block_cpy (unsigned char *d, const unsigned char *s) { if (d != s) - buf_cpy (d, s, OCB_BLOCK_LEN); + cipher_block_cpy (d, s, OCB_BLOCK_LEN); double_block (d); } @@ -181,8 +181,8 @@ _gcry_cipher_ocb_set_nonce (gcry_cipher_hd_t c, const unsigned char *nonce, nburn = c->spec->encrypt (&c->context.c, ktop, ktop); burn = nburn > burn ? nburn : burn; /* Stretch = Ktop || (Ktop[1..64] xor Ktop[9..72]) */ - buf_cpy (stretch, ktop, OCB_BLOCK_LEN); - buf_xor (stretch + OCB_BLOCK_LEN, ktop, ktop + 1, 8); + cipher_block_cpy (stretch, ktop, OCB_BLOCK_LEN); + cipher_block_xor (stretch + OCB_BLOCK_LEN, ktop, ktop + 1, 8); /* Offset_0 = Stretch[1+bottom..128+bottom] (We use the IV field to store the offset) */ bit_copy (c->u_iv.iv, stretch, bottom, OCB_BLOCK_LEN); @@ -267,18 +267,18 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf, } else { - buf_cpy (l_tmp, ocb_get_l (c, c->u_mode.ocb.aad_nblocks), - OCB_BLOCK_LEN); + cipher_block_cpy (l_tmp, ocb_get_l (c, c->u_mode.ocb.aad_nblocks), + OCB_BLOCK_LEN); } /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN); + cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN); /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ - buf_xor (l_tmp, c->u_mode.ocb.aad_offset, - c->u_mode.ocb.aad_leftover, OCB_BLOCK_LEN); + cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_leftover, OCB_BLOCK_LEN); nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp); burn = nburn > burn ? nburn : burn; - buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN); + cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN); c->u_mode.ocb.aad_nleftover = 0; } @@ -309,12 +309,13 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf, ocb_get_L_big(c, c->u_mode.ocb.aad_nblocks, l_tmp); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN); + cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN); /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ - buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN); + cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, + OCB_BLOCK_LEN); nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp); burn = nburn > burn ? nburn : burn; - buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN); + cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN); abuf += OCB_BLOCK_LEN; abuflen -= OCB_BLOCK_LEN; @@ -349,14 +350,15 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf, gcry_assert(c->u_mode.ocb.aad_nblocks & table_size_mask); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_mode.ocb.aad_offset, - ocb_get_l (c, c->u_mode.ocb.aad_nblocks), - OCB_BLOCK_LEN); + cipher_block_xor_1 (c->u_mode.ocb.aad_offset, + ocb_get_l (c, c->u_mode.ocb.aad_nblocks), + OCB_BLOCK_LEN); /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ - buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN); + cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, + OCB_BLOCK_LEN); nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp); burn = nburn > burn ? nburn : burn; - buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN); + cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN); abuf += OCB_BLOCK_LEN; abuflen -= OCB_BLOCK_LEN; @@ -397,18 +399,18 @@ ocb_aad_finalize (gcry_cipher_hd_t c) if (c->u_mode.ocb.aad_nleftover) { /* Offset_* = Offset_m xor L_* */ - buf_xor_1 (c->u_mode.ocb.aad_offset, - c->u_mode.ocb.L_star, OCB_BLOCK_LEN); + cipher_block_xor_1 (c->u_mode.ocb.aad_offset, + c->u_mode.ocb.L_star, OCB_BLOCK_LEN); /* CipherInput = (A_* || 1 || zeros(127-bitlen(A_*))) xor Offset_* */ buf_cpy (l_tmp, c->u_mode.ocb.aad_leftover, c->u_mode.ocb.aad_nleftover); memset (l_tmp + c->u_mode.ocb.aad_nleftover, 0, OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover); l_tmp[c->u_mode.ocb.aad_nleftover] = 0x80; - buf_xor_1 (l_tmp, c->u_mode.ocb.aad_offset, OCB_BLOCK_LEN); + cipher_block_xor_1 (l_tmp, c->u_mode.ocb.aad_offset, OCB_BLOCK_LEN); /* Sum = Sum_m xor ENCIPHER(K, CipherInput) */ nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp); burn = nburn > burn ? nburn : burn; - buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN); + cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN); c->u_mode.ocb.aad_nleftover = 0; } @@ -431,7 +433,7 @@ ocb_checksum (unsigned char *chksum, const unsigned char *plainbuf, while (nblks > 0) { /* Checksum_i = Checksum_{i-1} xor P_i */ - buf_xor_1(chksum, plainbuf, OCB_BLOCK_LEN); + cipher_block_xor_1(chksum, plainbuf, OCB_BLOCK_LEN); plainbuf += OCB_BLOCK_LEN; nblks--; @@ -491,12 +493,12 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt, } /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_iv.iv, l_tmp, OCB_BLOCK_LEN); + cipher_block_xor_1 (c->u_iv.iv, l_tmp, OCB_BLOCK_LEN); /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ - buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN); + cipher_block_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN); nburn = crypt_fn (&c->context.c, outbuf, outbuf); burn = nburn > burn ? nburn : burn; - buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN); + cipher_block_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN); if (!encrypt) { @@ -551,14 +553,14 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt, gcry_assert(c->u_mode.ocb.data_nblocks & table_size_mask); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_iv.iv, - ocb_get_l (c, c->u_mode.ocb.data_nblocks), - OCB_BLOCK_LEN); + cipher_block_xor_1 (c->u_iv.iv, + ocb_get_l (c, c->u_mode.ocb.data_nblocks), + OCB_BLOCK_LEN); /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ - buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN); + cipher_block_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN); nburn = crypt_fn (&c->context.c, outbuf, outbuf); burn = nburn > burn ? nburn : burn; - buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN); + cipher_block_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN); inbuf += OCB_BLOCK_LEN; inbuflen -= OCB_BLOCK_LEN; @@ -584,7 +586,7 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt, unsigned char pad[OCB_BLOCK_LEN]; /* Offset_* = Offset_m xor L_* */ - buf_xor_1 (c->u_iv.iv, c->u_mode.ocb.L_star, OCB_BLOCK_LEN); + cipher_block_xor_1 (c->u_iv.iv, c->u_mode.ocb.L_star, OCB_BLOCK_LEN); /* Pad = ENCIPHER(K, Offset_*) */ nburn = c->spec->encrypt (&c->context.c, pad, c->u_iv.iv); burn = nburn > burn ? nburn : burn; @@ -596,7 +598,7 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt, buf_cpy (l_tmp, inbuf, inbuflen); memset (l_tmp + inbuflen, 0, OCB_BLOCK_LEN - inbuflen); l_tmp[inbuflen] = 0x80; - buf_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN); + cipher_block_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN); /* C_* = P_* xor Pad[1..bitlen(P_*)] */ buf_xor (outbuf, inbuf, pad, inbuflen); } @@ -604,13 +606,13 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt, { /* P_* = C_* xor Pad[1..bitlen(C_*)] */ /* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) */ - buf_cpy (l_tmp, pad, OCB_BLOCK_LEN); + cipher_block_cpy (l_tmp, pad, OCB_BLOCK_LEN); buf_cpy (l_tmp, inbuf, inbuflen); - buf_xor_1 (l_tmp, pad, OCB_BLOCK_LEN); + cipher_block_xor_1 (l_tmp, pad, OCB_BLOCK_LEN); l_tmp[inbuflen] = 0x80; buf_cpy (outbuf, l_tmp, inbuflen); - buf_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN); + cipher_block_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN); } } @@ -618,8 +620,10 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt, if (c->marks.finalize) { /* Tag = ENCIPHER(K, Checksum xor Offset xor L_$) xor HASH(K,A) */ - buf_xor (c->u_mode.ocb.tag, c->u_ctr.ctr, c->u_iv.iv, OCB_BLOCK_LEN); - buf_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.L_dollar, OCB_BLOCK_LEN); + cipher_block_xor (c->u_mode.ocb.tag, c->u_ctr.ctr, c->u_iv.iv, + OCB_BLOCK_LEN); + cipher_block_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.L_dollar, + OCB_BLOCK_LEN); nburn = c->spec->encrypt (&c->context.c, c->u_mode.ocb.tag, c->u_mode.ocb.tag); burn = nburn > burn ? nburn : burn; @@ -672,7 +676,8 @@ compute_tag_if_needed (gcry_cipher_hd_t c) if (!c->marks.tag) { ocb_aad_finalize (c); - buf_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.aad_sum, OCB_BLOCK_LEN); + cipher_block_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.aad_sum, + OCB_BLOCK_LEN); c->marks.tag = 1; } } diff --git a/cipher/cipher-ofb.c b/cipher/cipher-ofb.c index f821d1b..50fece2 100644 --- a/cipher/cipher-ofb.c +++ b/cipher/cipher-ofb.c @@ -76,7 +76,7 @@ _gcry_cipher_ofb_encrypt (gcry_cipher_hd_t c, /* Encrypt the IV (and save the current one). */ nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); burn = nburn > burn ? nburn : burn; - buf_xor(outbuf, c->u_iv.iv, inbuf, blocksize); + cipher_block_xor(outbuf, c->u_iv.iv, inbuf, blocksize); outbuf += blocksize; inbuf += blocksize; inbuflen -= blocksize; diff --git a/cipher/cipher-selftest.c b/cipher/cipher-selftest.c index cecbab7..eb3614a 100644 --- a/cipher/cipher-selftest.c +++ b/cipher/cipher-selftest.c @@ -105,7 +105,7 @@ _gcry_selftest_helper_cbc (const char *cipher, gcry_cipher_setkey_t setkey_func, ciphertext = plaintext2 + nblocks * blocksize; /* Initialize ctx */ - if (setkey_func (ctx, key, sizeof(key)) != GPG_ERR_NO_ERROR) + if (setkey_func (ctx, key, sizeof(key), NULL) != GPG_ERR_NO_ERROR) { xfree(mem); return "setkey failed"; @@ -228,7 +228,7 @@ _gcry_selftest_helper_cfb (const char *cipher, gcry_cipher_setkey_t setkey_func, ciphertext = plaintext2 + nblocks * blocksize; /* Initialize ctx */ - if (setkey_func (ctx, key, sizeof(key)) != GPG_ERR_NO_ERROR) + if (setkey_func (ctx, key, sizeof(key), NULL) != GPG_ERR_NO_ERROR) { xfree(mem); return "setkey failed"; @@ -351,7 +351,7 @@ _gcry_selftest_helper_ctr (const char *cipher, gcry_cipher_setkey_t setkey_func, ciphertext2 = ciphertext + nblocks * blocksize; /* Initialize ctx */ - if (setkey_func (ctx, key, sizeof(key)) != GPG_ERR_NO_ERROR) + if (setkey_func (ctx, key, sizeof(key), NULL) != GPG_ERR_NO_ERROR) { xfree(mem); return "setkey failed"; diff --git a/cipher/cipher-xts.c b/cipher/cipher-xts.c index 4da89e5..0522a27 100644 --- a/cipher/cipher-xts.c +++ b/cipher/cipher-xts.c @@ -93,7 +93,8 @@ _gcry_cipher_xts_crypt (gcry_cipher_hd_t c, /* Use a bulk method if available. */ if (nblocks && c->bulk.xts_crypt) { - c->bulk.xts_crypt (c, c->u_ctr.ctr, outbuf, inbuf, nblocks, encrypt); + c->bulk.xts_crypt (&c->context.c, c->u_ctr.ctr, outbuf, inbuf, nblocks, + encrypt); inbuf += nblocks * GCRY_XTS_BLOCK_LEN; outbuf += nblocks * GCRY_XTS_BLOCK_LEN; inbuflen -= nblocks * GCRY_XTS_BLOCK_LEN; @@ -106,10 +107,10 @@ _gcry_cipher_xts_crypt (gcry_cipher_hd_t c, while (nblocks) { /* Xor-Encrypt/Decrypt-Xor block. */ - buf_xor (tmp.x64, inbuf, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); + cipher_block_xor (tmp.x64, inbuf, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); nburn = crypt_fn (&c->context.c, tmp.x1, tmp.x1); burn = nburn > burn ? nburn : burn; - buf_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); + cipher_block_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); outbuf += GCRY_XTS_BLOCK_LEN; inbuf += GCRY_XTS_BLOCK_LEN; @@ -132,10 +133,10 @@ _gcry_cipher_xts_crypt (gcry_cipher_hd_t c, xts_gfmul_byA (tmp.x1, c->u_ctr.ctr); /* Decrypt last block first. */ - buf_xor (outbuf, inbuf, tmp.x64, GCRY_XTS_BLOCK_LEN); + cipher_block_xor (outbuf, inbuf, tmp.x64, GCRY_XTS_BLOCK_LEN); nburn = crypt_fn (&c->context.c, outbuf, outbuf); burn = nburn > burn ? nburn : burn; - buf_xor (outbuf, outbuf, tmp.x64, GCRY_XTS_BLOCK_LEN); + cipher_block_xor (outbuf, outbuf, tmp.x64, GCRY_XTS_BLOCK_LEN); inbuflen -= GCRY_XTS_BLOCK_LEN; inbuf += GCRY_XTS_BLOCK_LEN; @@ -146,15 +147,15 @@ _gcry_cipher_xts_crypt (gcry_cipher_hd_t c, outbuf -= GCRY_XTS_BLOCK_LEN; /* Steal ciphertext from previous block. */ - buf_cpy (tmp.x64, outbuf, GCRY_XTS_BLOCK_LEN); + cipher_block_cpy (tmp.x64, outbuf, GCRY_XTS_BLOCK_LEN); buf_cpy (tmp.x64, inbuf, inbuflen); buf_cpy (outbuf + GCRY_XTS_BLOCK_LEN, outbuf, inbuflen); /* Decrypt/Encrypt last block. */ - buf_xor (tmp.x64, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); + cipher_block_xor (tmp.x64, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); nburn = crypt_fn (&c->context.c, tmp.x1, tmp.x1); burn = nburn > burn ? nburn : burn; - buf_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); + cipher_block_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); } /* Auto-increment data-unit sequence number */ @@ -168,3 +169,21 @@ _gcry_cipher_xts_crypt (gcry_cipher_hd_t c, return 0; } + + +gcry_err_code_t +_gcry_cipher_xts_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + return _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 1); +} + + +gcry_err_code_t +_gcry_cipher_xts_decrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + return _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 0); +} diff --git a/cipher/cipher.c b/cipher/cipher.c index 9812738..993036f 100644 --- a/cipher/cipher.c +++ b/cipher/cipher.c @@ -92,6 +92,8 @@ static gcry_cipher_spec_t *cipher_list[] = +static void _gcry_cipher_setup_mode_ops(gcry_cipher_hd_t c, int mode); + static int map_algo (int algo) @@ -532,6 +534,7 @@ _gcry_cipher_open_internal (gcry_cipher_hd_t *handle, h->bulk.ctr_enc = _gcry_aes_ctr_enc; h->bulk.ocb_crypt = _gcry_aes_ocb_crypt; h->bulk.ocb_auth = _gcry_aes_ocb_auth; + h->bulk.xts_crypt = _gcry_aes_xts_crypt; break; #endif /*USE_AES*/ #ifdef USE_BLOWFISH @@ -592,6 +595,9 @@ _gcry_cipher_open_internal (gcry_cipher_hd_t *handle, break; } + /* Setup mode routines. */ + _gcry_cipher_setup_mode_ops(h, mode); + /* Setup defaults depending on the mode. */ switch (mode) { @@ -609,8 +615,7 @@ _gcry_cipher_open_internal (gcry_cipher_hd_t *handle, default: break; } - - } + } } /* Done. */ @@ -675,7 +680,7 @@ cipher_setkey (gcry_cipher_hd_t c, byte *key, size_t keylen) } } - rc = c->spec->setkey (&c->context.c, key, keylen); + rc = c->spec->setkey (&c->context.c, key, keylen, c); if (!rc) { /* Duplicate initial context. */ @@ -701,7 +706,7 @@ cipher_setkey (gcry_cipher_hd_t c, byte *key, size_t keylen) case GCRY_CIPHER_MODE_XTS: /* Setup tweak cipher with second part of XTS key. */ rc = c->spec->setkey (c->u_mode.xts.tweak_context, key + keylen, - keylen); + keylen, c); if (!rc) { /* Duplicate initial tweak context. */ @@ -872,85 +877,78 @@ do_ecb_decrypt (gcry_cipher_hd_t c, } -/**************** - * Encrypt INBUF to OUTBUF with the mode selected at open. - * inbuf and outbuf may overlap or be the same. - * Depending on the mode some constraints apply to INBUFLEN. - */ static gcry_err_code_t -cipher_encrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, - const byte *inbuf, size_t inbuflen) +do_stream_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + (void)outbuflen; + c->spec->stencrypt (&c->context.c, outbuf, (void *)inbuf, inbuflen); + return 0; +} + +static gcry_err_code_t +do_stream_decrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + (void)outbuflen; + c->spec->stdecrypt (&c->context.c, outbuf, (void *)inbuf, inbuflen); + return 0; +} + + +static gcry_err_code_t +do_encrypt_none_unknown (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) { gcry_err_code_t rc; - if (c->mode != GCRY_CIPHER_MODE_NONE && !c->marks.key) - { - log_error ("cipher_encrypt: key not set\n"); - return GPG_ERR_MISSING_KEY; - } + (void)outbuflen; switch (c->mode) { - case GCRY_CIPHER_MODE_ECB: - rc = do_ecb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); - break; - - case GCRY_CIPHER_MODE_CBC: - rc = _gcry_cipher_cbc_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); - break; - - case GCRY_CIPHER_MODE_CFB: - rc = _gcry_cipher_cfb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); + case GCRY_CIPHER_MODE_CMAC: + rc = GPG_ERR_INV_CIPHER_MODE; break; - case GCRY_CIPHER_MODE_CFB8: - rc = _gcry_cipher_cfb8_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); + case GCRY_CIPHER_MODE_NONE: + if (fips_mode () || !_gcry_get_debug_flag (0)) + { + fips_signal_error ("cipher mode NONE used"); + rc = GPG_ERR_INV_CIPHER_MODE; + } + else + { + if (inbuf != outbuf) + memmove (outbuf, inbuf, inbuflen); + rc = 0; + } break; - case GCRY_CIPHER_MODE_OFB: - rc = _gcry_cipher_ofb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); + default: + log_fatal ("cipher_encrypt: invalid mode %d\n", c->mode ); + rc = GPG_ERR_INV_CIPHER_MODE; break; + } - case GCRY_CIPHER_MODE_CTR: - rc = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); - break; + return rc; +} - case GCRY_CIPHER_MODE_AESWRAP: - rc = _gcry_cipher_aeswrap_encrypt (c, outbuf, outbuflen, - inbuf, inbuflen); - break; +static gcry_err_code_t +do_decrypt_none_unknown (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + gcry_err_code_t rc; - case GCRY_CIPHER_MODE_CCM: - rc = _gcry_cipher_ccm_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); - break; + (void)outbuflen; + switch (c->mode) + { case GCRY_CIPHER_MODE_CMAC: rc = GPG_ERR_INV_CIPHER_MODE; break; - case GCRY_CIPHER_MODE_GCM: - rc = _gcry_cipher_gcm_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); - break; - - case GCRY_CIPHER_MODE_POLY1305: - rc = _gcry_cipher_poly1305_encrypt (c, outbuf, outbuflen, - inbuf, inbuflen); - break; - - case GCRY_CIPHER_MODE_OCB: - rc = _gcry_cipher_ocb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); - break; - - case GCRY_CIPHER_MODE_XTS: - rc = _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 1); - break; - - case GCRY_CIPHER_MODE_STREAM: - c->spec->stencrypt (&c->context.c, - outbuf, (byte*)/*arggg*/inbuf, inbuflen); - rc = 0; - break; - case GCRY_CIPHER_MODE_NONE: if (fips_mode () || !_gcry_get_debug_flag (0)) { @@ -966,7 +964,7 @@ cipher_encrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, break; default: - log_fatal ("cipher_encrypt: invalid mode %d\n", c->mode ); + log_fatal ("cipher_decrypt: invalid mode %d\n", c->mode ); rc = GPG_ERR_INV_CIPHER_MODE; break; } @@ -991,7 +989,13 @@ _gcry_cipher_encrypt (gcry_cipher_hd_t h, void *out, size_t outsize, inlen = outsize; } - rc = cipher_encrypt (h, out, outsize, in, inlen); + if (h->mode != GCRY_CIPHER_MODE_NONE && !h->marks.key) + { + log_error ("cipher_decrypt: key not set\n"); + return GPG_ERR_MISSING_KEY; + } + + rc = h->mode_ops.encrypt (h, out, outsize, in, inlen); /* Failsafe: Make sure that the plaintext will never make it into OUT if the encryption returned an error. */ @@ -1002,110 +1006,10 @@ _gcry_cipher_encrypt (gcry_cipher_hd_t h, void *out, size_t outsize, } - /**************** - * Decrypt INBUF to OUTBUF with the mode selected at open. - * inbuf and outbuf may overlap or be the same. - * Depending on the mode some some constraints apply to INBUFLEN. + * Decrypt IN and write it to OUT. If IN is NULL, in-place encryption has + * been requested. */ -static gcry_err_code_t -cipher_decrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, - const byte *inbuf, size_t inbuflen) -{ - gcry_err_code_t rc; - - if (c->mode != GCRY_CIPHER_MODE_NONE && !c->marks.key) - { - log_error ("cipher_decrypt: key not set\n"); - return GPG_ERR_MISSING_KEY; - } - - switch (c->mode) - { - case GCRY_CIPHER_MODE_ECB: - rc = do_ecb_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); - break; - - case GCRY_CIPHER_MODE_CBC: - rc = _gcry_cipher_cbc_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); - break; - - case GCRY_CIPHER_MODE_CFB: - rc = _gcry_cipher_cfb_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); - break; - - case GCRY_CIPHER_MODE_CFB8: - rc = _gcry_cipher_cfb8_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); - break; - - case GCRY_CIPHER_MODE_OFB: - rc = _gcry_cipher_ofb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); - break; - - case GCRY_CIPHER_MODE_CTR: - rc = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); - break; - - case GCRY_CIPHER_MODE_AESWRAP: - rc = _gcry_cipher_aeswrap_decrypt (c, outbuf, outbuflen, - inbuf, inbuflen); - break; - - case GCRY_CIPHER_MODE_CCM: - rc = _gcry_cipher_ccm_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); - break; - - case GCRY_CIPHER_MODE_CMAC: - rc = GPG_ERR_INV_CIPHER_MODE; - break; - - case GCRY_CIPHER_MODE_GCM: - rc = _gcry_cipher_gcm_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); - break; - - case GCRY_CIPHER_MODE_POLY1305: - rc = _gcry_cipher_poly1305_decrypt (c, outbuf, outbuflen, - inbuf, inbuflen); - break; - - case GCRY_CIPHER_MODE_OCB: - rc = _gcry_cipher_ocb_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); - break; - - case GCRY_CIPHER_MODE_XTS: - rc = _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 0); - break; - - case GCRY_CIPHER_MODE_STREAM: - c->spec->stdecrypt (&c->context.c, - outbuf, (byte*)/*arggg*/inbuf, inbuflen); - rc = 0; - break; - - case GCRY_CIPHER_MODE_NONE: - if (fips_mode () || !_gcry_get_debug_flag (0)) - { - fips_signal_error ("cipher mode NONE used"); - rc = GPG_ERR_INV_CIPHER_MODE; - } - else - { - if (inbuf != outbuf) - memmove (outbuf, inbuf, inbuflen); - rc = 0; - } - break; - - default: - log_fatal ("cipher_decrypt: invalid mode %d\n", c->mode ); - rc = GPG_ERR_INV_CIPHER_MODE; - break; - } - - return rc; -} - - gcry_err_code_t _gcry_cipher_decrypt (gcry_cipher_hd_t h, void *out, size_t outsize, const void *in, size_t inlen) @@ -1116,9 +1020,14 @@ _gcry_cipher_decrypt (gcry_cipher_hd_t h, void *out, size_t outsize, inlen = outsize; } - return cipher_decrypt (h, out, outsize, in, inlen); -} + if (h->mode != GCRY_CIPHER_MODE_NONE && !h->marks.key) + { + log_error ("cipher_decrypt: key not set\n"); + return GPG_ERR_MISSING_KEY; + } + return h->mode_ops.decrypt (h, out, outsize, in, inlen); +} /**************** @@ -1149,33 +1058,10 @@ _gcry_cipher_setkey (gcry_cipher_hd_t hd, const void *key, size_t keylen) gcry_err_code_t _gcry_cipher_setiv (gcry_cipher_hd_t hd, const void *iv, size_t ivlen) { - gcry_err_code_t rc = 0; - - switch (hd->mode) - { - case GCRY_CIPHER_MODE_CCM: - rc = _gcry_cipher_ccm_set_nonce (hd, iv, ivlen); - break; - - case GCRY_CIPHER_MODE_GCM: - rc = _gcry_cipher_gcm_setiv (hd, iv, ivlen); - break; - - case GCRY_CIPHER_MODE_POLY1305: - rc = _gcry_cipher_poly1305_setiv (hd, iv, ivlen); - break; - - case GCRY_CIPHER_MODE_OCB: - rc = _gcry_cipher_ocb_set_nonce (hd, iv, ivlen); - break; - - default: - rc = cipher_setiv (hd, iv, ivlen); - break; - } - return rc; + return hd->mode_ops.setiv (hd, iv, ivlen); } + /* Set counter for CTR mode. (CTR,CTRLEN) must denote a buffer of block size length, or (NULL,0) to set the CTR to the all-zero block. */ @@ -1209,38 +1095,40 @@ _gcry_cipher_getctr (gcry_cipher_hd_t hd, void *ctr, size_t ctrlen) return 0; } + gcry_err_code_t _gcry_cipher_authenticate (gcry_cipher_hd_t hd, const void *abuf, size_t abuflen) { gcry_err_code_t rc; - switch (hd->mode) + if (hd->mode_ops.authenticate) { - case GCRY_CIPHER_MODE_CCM: - rc = _gcry_cipher_ccm_authenticate (hd, abuf, abuflen); - break; - - case GCRY_CIPHER_MODE_CMAC: - rc = _gcry_cipher_cmac_authenticate (hd, abuf, abuflen); - break; + rc = hd->mode_ops.authenticate (hd, abuf, abuflen); + } + else + { + log_error ("gcry_cipher_authenticate: invalid mode %d\n", hd->mode); + rc = GPG_ERR_INV_CIPHER_MODE; + } - case GCRY_CIPHER_MODE_GCM: - rc = _gcry_cipher_gcm_authenticate (hd, abuf, abuflen); - break; + return rc; +} - case GCRY_CIPHER_MODE_POLY1305: - rc = _gcry_cipher_poly1305_authenticate (hd, abuf, abuflen); - break; - case GCRY_CIPHER_MODE_OCB: - rc = _gcry_cipher_ocb_authenticate (hd, abuf, abuflen); - break; +gcry_err_code_t +_gcry_cipher_gettag (gcry_cipher_hd_t hd, void *outtag, size_t taglen) +{ + gcry_err_code_t rc; - default: - log_error ("gcry_cipher_authenticate: invalid mode %d\n", hd->mode); + if (hd->mode_ops.get_tag) + { + rc = hd->mode_ops.get_tag (hd, outtag, taglen); + } + else + { + log_error ("gcry_cipher_gettag: invalid mode %d\n", hd->mode); rc = GPG_ERR_INV_CIPHER_MODE; - break; } return rc; @@ -1248,76 +1136,166 @@ _gcry_cipher_authenticate (gcry_cipher_hd_t hd, const void *abuf, gcry_err_code_t -_gcry_cipher_gettag (gcry_cipher_hd_t hd, void *outtag, size_t taglen) +_gcry_cipher_checktag (gcry_cipher_hd_t hd, const void *intag, size_t taglen) { gcry_err_code_t rc; - switch (hd->mode) + if (hd->mode_ops.check_tag) { - case GCRY_CIPHER_MODE_CCM: - rc = _gcry_cipher_ccm_get_tag (hd, outtag, taglen); + rc = hd->mode_ops.check_tag (hd, intag, taglen); + } + else + { + log_error ("gcry_cipher_checktag: invalid mode %d\n", hd->mode); + rc = GPG_ERR_INV_CIPHER_MODE; + } + + return rc; +} + + + +static void +_gcry_cipher_setup_mode_ops(gcry_cipher_hd_t c, int mode) +{ + /* Setup encryption and decryption routines. */ + switch (mode) + { + case GCRY_CIPHER_MODE_STREAM: + c->mode_ops.encrypt = do_stream_encrypt; + c->mode_ops.decrypt = do_stream_decrypt; break; - case GCRY_CIPHER_MODE_CMAC: - rc = _gcry_cipher_cmac_get_tag (hd, outtag, taglen); + case GCRY_CIPHER_MODE_ECB: + c->mode_ops.encrypt = do_ecb_encrypt; + c->mode_ops.decrypt = do_ecb_decrypt; + break; + + case GCRY_CIPHER_MODE_CBC: + c->mode_ops.encrypt = _gcry_cipher_cbc_encrypt; + c->mode_ops.decrypt = _gcry_cipher_cbc_decrypt; + break; + + case GCRY_CIPHER_MODE_CFB: + c->mode_ops.encrypt = _gcry_cipher_cfb_encrypt; + c->mode_ops.decrypt = _gcry_cipher_cfb_decrypt; + break; + + case GCRY_CIPHER_MODE_CFB8: + c->mode_ops.encrypt = _gcry_cipher_cfb8_encrypt; + c->mode_ops.decrypt = _gcry_cipher_cfb8_decrypt; + break; + + case GCRY_CIPHER_MODE_OFB: + c->mode_ops.encrypt = _gcry_cipher_ofb_encrypt; + c->mode_ops.decrypt = _gcry_cipher_ofb_encrypt; + break; + + case GCRY_CIPHER_MODE_CTR: + c->mode_ops.encrypt = _gcry_cipher_ctr_encrypt; + c->mode_ops.decrypt = _gcry_cipher_ctr_encrypt; + break; + + case GCRY_CIPHER_MODE_AESWRAP: + c->mode_ops.encrypt = _gcry_cipher_aeswrap_encrypt; + c->mode_ops.decrypt = _gcry_cipher_aeswrap_decrypt; + break; + + case GCRY_CIPHER_MODE_CCM: + c->mode_ops.encrypt = _gcry_cipher_ccm_encrypt; + c->mode_ops.decrypt = _gcry_cipher_ccm_decrypt; break; case GCRY_CIPHER_MODE_GCM: - rc = _gcry_cipher_gcm_get_tag (hd, outtag, taglen); + c->mode_ops.encrypt = _gcry_cipher_gcm_encrypt; + c->mode_ops.decrypt = _gcry_cipher_gcm_decrypt; break; case GCRY_CIPHER_MODE_POLY1305: - rc = _gcry_cipher_poly1305_get_tag (hd, outtag, taglen); + c->mode_ops.encrypt = _gcry_cipher_poly1305_encrypt; + c->mode_ops.decrypt = _gcry_cipher_poly1305_decrypt; break; case GCRY_CIPHER_MODE_OCB: - rc = _gcry_cipher_ocb_get_tag (hd, outtag, taglen); + c->mode_ops.encrypt = _gcry_cipher_ocb_encrypt; + c->mode_ops.decrypt = _gcry_cipher_ocb_decrypt; + break; + + case GCRY_CIPHER_MODE_XTS: + c->mode_ops.encrypt = _gcry_cipher_xts_encrypt; + c->mode_ops.decrypt = _gcry_cipher_xts_decrypt; break; default: - log_error ("gcry_cipher_gettag: invalid mode %d\n", hd->mode); - rc = GPG_ERR_INV_CIPHER_MODE; + c->mode_ops.encrypt = do_encrypt_none_unknown; + c->mode_ops.decrypt = do_decrypt_none_unknown; break; } - return rc; -} + /* Setup IV setting routine. */ + switch (mode) + { + case GCRY_CIPHER_MODE_CCM: + c->mode_ops.setiv = _gcry_cipher_ccm_set_nonce; + break; + + case GCRY_CIPHER_MODE_GCM: + c->mode_ops.setiv = _gcry_cipher_gcm_setiv; + break; + case GCRY_CIPHER_MODE_POLY1305: + c->mode_ops.setiv = _gcry_cipher_poly1305_setiv; + break; -gcry_err_code_t -_gcry_cipher_checktag (gcry_cipher_hd_t hd, const void *intag, size_t taglen) -{ - gcry_err_code_t rc; + case GCRY_CIPHER_MODE_OCB: + c->mode_ops.setiv = _gcry_cipher_ocb_set_nonce; + break; - switch (hd->mode) + default: + c->mode_ops.setiv = cipher_setiv; + break; + } + + + /* Setup authentication routines for AEAD modes. */ + switch (mode) { case GCRY_CIPHER_MODE_CCM: - rc = _gcry_cipher_ccm_check_tag (hd, intag, taglen); + c->mode_ops.authenticate = _gcry_cipher_ccm_authenticate; + c->mode_ops.get_tag = _gcry_cipher_ccm_get_tag; + c->mode_ops.check_tag = _gcry_cipher_ccm_check_tag; break; case GCRY_CIPHER_MODE_CMAC: - rc = _gcry_cipher_cmac_check_tag (hd, intag, taglen); + c->mode_ops.authenticate = _gcry_cipher_cmac_authenticate; + c->mode_ops.get_tag = _gcry_cipher_cmac_get_tag; + c->mode_ops.check_tag = _gcry_cipher_cmac_check_tag; break; case GCRY_CIPHER_MODE_GCM: - rc = _gcry_cipher_gcm_check_tag (hd, intag, taglen); + c->mode_ops.authenticate = _gcry_cipher_gcm_authenticate; + c->mode_ops.get_tag = _gcry_cipher_gcm_get_tag; + c->mode_ops.check_tag = _gcry_cipher_gcm_check_tag; break; case GCRY_CIPHER_MODE_POLY1305: - rc = _gcry_cipher_poly1305_check_tag (hd, intag, taglen); + c->mode_ops.authenticate = _gcry_cipher_poly1305_authenticate; + c->mode_ops.get_tag = _gcry_cipher_poly1305_get_tag; + c->mode_ops.check_tag = _gcry_cipher_poly1305_check_tag; break; case GCRY_CIPHER_MODE_OCB: - rc = _gcry_cipher_ocb_check_tag (hd, intag, taglen); + c->mode_ops.authenticate = _gcry_cipher_ocb_authenticate; + c->mode_ops.get_tag = _gcry_cipher_ocb_get_tag; + c->mode_ops.check_tag = _gcry_cipher_ocb_check_tag; break; default: - log_error ("gcry_cipher_checktag: invalid mode %d\n", hd->mode); - rc = GPG_ERR_INV_CIPHER_MODE; + c->mode_ops.authenticate = NULL; + c->mode_ops.get_tag = NULL; + c->mode_ops.check_tag = NULL; break; } - - return rc; } diff --git a/cipher/des.c b/cipher/des.c index 5c99f50..5d4c2fd 100644 --- a/cipher/des.c +++ b/cipher/des.c @@ -119,6 +119,7 @@ #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" +#include "cipher-internal.h" #include "cipher-selftest.h" @@ -197,7 +198,8 @@ static unsigned int do_tripledes_encrypt(void *context, byte *outbuf, static unsigned int do_tripledes_decrypt(void *context, byte *outbuf, const byte *inbuf ); static gcry_err_code_t do_tripledes_setkey(void *context, const byte *key, - unsigned keylen); + unsigned keylen, + gcry_cipher_hd_t hd); static int initialized; @@ -940,7 +942,7 @@ _gcry_3des_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, /* Encrypt the counter. */ tripledes_ecb_encrypt (ctx, ctr, tmpbuf); /* XOR the input with the encrypted counter and store in output. */ - buf_xor(outbuf, tmpbuf, inbuf, DES_BLOCKSIZE); + cipher_block_xor(outbuf, tmpbuf, inbuf, DES_BLOCKSIZE); outbuf += DES_BLOCKSIZE; inbuf += DES_BLOCKSIZE; /* Increment the counter. */ @@ -996,7 +998,7 @@ _gcry_3des_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, the intermediate result to SAVEBUF. */ tripledes_ecb_decrypt (ctx, inbuf, savebuf); - buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, DES_BLOCKSIZE); + cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, DES_BLOCKSIZE); inbuf += DES_BLOCKSIZE; outbuf += DES_BLOCKSIZE; } @@ -1041,7 +1043,7 @@ _gcry_3des_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, for ( ;nblocks; nblocks-- ) { tripledes_ecb_encrypt (ctx, iv, iv); - buf_xor_n_copy(outbuf, iv, inbuf, DES_BLOCKSIZE); + cipher_block_xor_n_copy(outbuf, iv, inbuf, DES_BLOCKSIZE); outbuf += DES_BLOCKSIZE; inbuf += DES_BLOCKSIZE; } @@ -1086,7 +1088,8 @@ is_weak_key ( const byte *key ) /* Alternative setkey for selftests; need larger key than default. */ static gcry_err_code_t -bulk_selftest_setkey (void *context, const byte *__key, unsigned __keylen) +bulk_selftest_setkey (void *context, const byte *__key, unsigned __keylen, + gcry_cipher_hd_t hd) { static const unsigned char key[24] ATTR_ALIGNED_16 = { 0x66,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, @@ -1094,10 +1097,11 @@ bulk_selftest_setkey (void *context, const byte *__key, unsigned __keylen) 0x18,0x2A,0x39,0x47,0x5E,0x6F,0x75,0x82 }; + (void)hd; (void)__key; (void)__keylen; - return do_tripledes_setkey(context, key, sizeof(key)); + return do_tripledes_setkey(context, key, sizeof(key), NULL); } @@ -1349,10 +1353,13 @@ selftest (void) static gcry_err_code_t -do_tripledes_setkey ( void *context, const byte *key, unsigned keylen ) +do_tripledes_setkey ( void *context, const byte *key, unsigned keylen, + gcry_cipher_hd_t hd ) { struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context; + (void)hd; + if( keylen != 24 ) return GPG_ERR_INV_KEYLEN; @@ -1413,10 +1420,13 @@ do_tripledes_decrypt( void *context, byte *outbuf, const byte *inbuf ) } static gcry_err_code_t -do_des_setkey (void *context, const byte *key, unsigned keylen) +do_des_setkey (void *context, const byte *key, unsigned keylen, + gcry_cipher_hd_t hd) { struct _des_ctx *ctx = (struct _des_ctx *) context; + (void)hd; + if (keylen != 8) return GPG_ERR_INV_KEYLEN; diff --git a/cipher/gost28147.c b/cipher/gost28147.c index 4ff80b4..1b8ab7a 100644 --- a/cipher/gost28147.c +++ b/cipher/gost28147.c @@ -39,11 +39,14 @@ #include "gost-sb.h" static gcry_err_code_t -gost_setkey (void *c, const byte *key, unsigned keylen) +gost_setkey (void *c, const byte *key, unsigned keylen, + gcry_cipher_hd_t hd) { int i; GOST28147_context *ctx = c; + (void)hd; + if (keylen != 256 / 8) return GPG_ERR_INV_KEYLEN; diff --git a/cipher/idea.c b/cipher/idea.c index ffe821d..abfe675 100644 --- a/cipher/idea.c +++ b/cipher/idea.c @@ -258,10 +258,12 @@ do_setkey( IDEA_context *c, const byte *key, unsigned int keylen ) } static gcry_err_code_t -idea_setkey (void *context, const byte *key, unsigned int keylen) +idea_setkey (void *context, const byte *key, unsigned int keylen, + gcry_cipher_hd_t hd) { IDEA_context *ctx = context; int rc = do_setkey (ctx, key, keylen); + (void)hd; _gcry_burn_stack (23+6*sizeof(void*)); return rc; } diff --git a/cipher/rfc2268.c b/cipher/rfc2268.c index aed8cad..0914946 100644 --- a/cipher/rfc2268.c +++ b/cipher/rfc2268.c @@ -262,8 +262,10 @@ setkey_core (void *context, const unsigned char *key, unsigned int keylen, int w } static gpg_err_code_t -do_setkey (void *context, const unsigned char *key, unsigned int keylen) +do_setkey (void *context, const unsigned char *key, unsigned int keylen, + gcry_cipher_hd_t hd) { + (void)hd; return setkey_core (context, key, keylen, 1); } diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c index 735e5cd..91174d5 100644 --- a/cipher/rijndael-aesni.c +++ b/cipher/rijndael-aesni.c @@ -327,8 +327,8 @@ _gcry_aes_aesni_do_setkey (RIJNDAEL_context *ctx, const byte *key) /* Make a decryption key from an encryption key. */ -void -_gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx) +static inline void +do_aesni_prepare_decryption (RIJNDAEL_context *ctx) { /* The AES-NI decrypt instructions use the Equivalent Inverse Cipher, thus we can't use the the standard decrypt key @@ -338,8 +338,6 @@ _gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx) int rr; int r; - aesni_prepare(); - #define DO_AESNI_AESIMC() \ asm volatile ("movdqa %[ekey], %%xmm1\n\t" \ /*"aesimc %%xmm1, %%xmm1\n\t"*/ \ @@ -375,7 +373,13 @@ _gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx) dkey[r] = ekey[0]; #undef DO_AESNI_AESIMC +} +void +_gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx) +{ + aesni_prepare(); + do_aesni_prepare_decryption (ctx); aesni_cleanup(); } @@ -1023,8 +1027,8 @@ _gcry_aes_aesni_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, void -_gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, - const unsigned char *inbuf, unsigned char *iv, +_gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx, unsigned char *iv, + unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { aesni_prepare (); @@ -1059,8 +1063,8 @@ _gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, void -_gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, - const unsigned char *inbuf, unsigned char *iv, +_gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv, + unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks, int cbc_mac) { aesni_prepare_2_6_variable; @@ -1105,8 +1109,8 @@ _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, void -_gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, - const unsigned char *inbuf, unsigned char *ctr, +_gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr, + unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = @@ -1160,8 +1164,8 @@ _gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, void -_gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, - const unsigned char *inbuf, unsigned char *iv, +_gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv, + unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { aesni_prepare_2_6_variable; @@ -1245,15 +1249,21 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, void -_gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, - const unsigned char *inbuf, unsigned char *iv, - size_t nblocks) +_gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv, + unsigned char *outbuf, const unsigned char *inbuf, + size_t nblocks) { aesni_prepare_2_6_variable; aesni_prepare (); aesni_prepare_2_6(); + if ( !ctx->decryption_prepared ) + { + do_aesni_prepare_decryption ( ctx ); + ctx->decryption_prepared = 1; + } + asm volatile ("movdqu %[iv], %%xmm5\n\t" /* use xmm5 as fast IV storage */ : /* No output */ @@ -1514,6 +1524,12 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, aesni_prepare (); aesni_prepare_2_6 (); + if ( !ctx->decryption_prepared ) + { + do_aesni_prepare_decryption ( ctx ); + ctx->decryption_prepared = 1; + } + /* Preload Offset and Checksum */ asm volatile ("movdqu %[iv], %%xmm5\n\t" "movdqu %[ctr], %%xmm6\n\t" @@ -1665,7 +1681,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, } -void +size_t _gcry_aes_aesni_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { @@ -1673,10 +1689,12 @@ _gcry_aes_aesni_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg, aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks); else aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks); + + return 0; } -void +size_t _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) { @@ -1810,7 +1828,306 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, aesni_cleanup (); aesni_cleanup_2_6 (); + + return 0; } +static const u64 xts_gfmul_const[16] __attribute__ ((aligned (16))) = + { 0x87, 0x01 }; + + +static void +_gcry_aes_aesni_xts_enc (RIJNDAEL_context *ctx, unsigned char *tweak, + unsigned char *outbuf, const unsigned char *inbuf, + size_t nblocks) +{ + aesni_prepare_2_6_variable; + + aesni_prepare (); + aesni_prepare_2_6 (); + + /* Preload Tweak */ + asm volatile ("movdqu %[tweak], %%xmm5\n\t" + "movdqa %[gfmul], %%xmm6\n\t" + : + : [tweak] "m" (*tweak), + [gfmul] "m" (*xts_gfmul_const) + : "memory" ); + + for ( ;nblocks >= 4; nblocks -= 4 ) + { + asm volatile ("pshufd $0x13, %%xmm5, %%xmm4\n\t" + "movdqu %[inbuf0], %%xmm1\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "movdqu %%xmm5, %[outbuf0]\n\t" + + "movdqa %%xmm4, %%xmm0\n\t" + "paddd %%xmm4, %%xmm4\n\t" + "psrad $31, %%xmm0\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm5\n\t" + : [outbuf0] "=m" (*(outbuf + 0 * 16)) + : [inbuf0] "m" (*(inbuf + 0 * 16)) + : "memory" ); + + asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" + "pxor %%xmm5, %%xmm2\n\t" + "movdqu %%xmm5, %[outbuf1]\n\t" + + "movdqa %%xmm4, %%xmm0\n\t" + "paddd %%xmm4, %%xmm4\n\t" + "psrad $31, %%xmm0\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm5\n\t" + : [outbuf1] "=m" (*(outbuf + 1 * 16)) + : [inbuf1] "m" (*(inbuf + 1 * 16)) + : "memory" ); + + asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "movdqu %%xmm5, %[outbuf2]\n\t" + + "movdqa %%xmm4, %%xmm0\n\t" + "paddd %%xmm4, %%xmm4\n\t" + "psrad $31, %%xmm0\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm5\n\t" + : [outbuf2] "=m" (*(outbuf + 2 * 16)) + : [inbuf2] "m" (*(inbuf + 2 * 16)) + : "memory" ); + + asm volatile ("movdqa %%xmm4, %%xmm0\n\t" + "movdqu %[inbuf3], %%xmm4\n\t" + "pxor %%xmm5, %%xmm4\n\t" + "movdqu %%xmm5, %[outbuf3]\n\t" + + "psrad $31, %%xmm0\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm5\n\t" + : [outbuf3] "=m" (*(outbuf + 3 * 16)) + : [inbuf3] "m" (*(inbuf + 3 * 16)) + : "memory" ); + + do_aesni_enc_vec4 (ctx); + + asm volatile ("movdqu %[outbuf0], %%xmm0\n\t" + "pxor %%xmm0, %%xmm1\n\t" + "movdqu %[outbuf1], %%xmm0\n\t" + "movdqu %%xmm1, %[outbuf0]\n\t" + "movdqu %[outbuf2], %%xmm1\n\t" + "pxor %%xmm0, %%xmm2\n\t" + "movdqu %[outbuf3], %%xmm0\n\t" + "pxor %%xmm1, %%xmm3\n\t" + "pxor %%xmm0, %%xmm4\n\t" + "movdqu %%xmm2, %[outbuf1]\n\t" + "movdqu %%xmm3, %[outbuf2]\n\t" + "movdqu %%xmm4, %[outbuf3]\n\t" + : [outbuf0] "+m" (*(outbuf + 0 * 16)), + [outbuf1] "+m" (*(outbuf + 1 * 16)), + [outbuf2] "+m" (*(outbuf + 2 * 16)), + [outbuf3] "+m" (*(outbuf + 3 * 16)) + : + : "memory" ); + + outbuf += BLOCKSIZE * 4; + inbuf += BLOCKSIZE * 4; + } + + for ( ;nblocks; nblocks-- ) + { + asm volatile ("movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "movdqa %%xmm5, %%xmm4\n\t" + + "pshufd $0x13, %%xmm5, %%xmm1\n\t" + "psrad $31, %%xmm1\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm1\n\t" + "pxor %%xmm1, %%xmm5\n\t" + : + : [inbuf] "m" (*inbuf) + : "memory" ); + + do_aesni_enc (ctx); + + asm volatile ("pxor %%xmm4, %%xmm0\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : + : "memory" ); + + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + } + + asm volatile ("movdqu %%xmm5, %[tweak]\n\t" + : [tweak] "=m" (*tweak) + : + : "memory" ); + + aesni_cleanup (); + aesni_cleanup_2_6 (); +} + + +static void +_gcry_aes_aesni_xts_dec (RIJNDAEL_context *ctx, unsigned char *tweak, + unsigned char *outbuf, const unsigned char *inbuf, + size_t nblocks) +{ + aesni_prepare_2_6_variable; + + aesni_prepare (); + aesni_prepare_2_6 (); + + if ( !ctx->decryption_prepared ) + { + do_aesni_prepare_decryption ( ctx ); + ctx->decryption_prepared = 1; + } + + /* Preload Tweak */ + asm volatile ("movdqu %[tweak], %%xmm5\n\t" + "movdqa %[gfmul], %%xmm6\n\t" + : + : [tweak] "m" (*tweak), + [gfmul] "m" (*xts_gfmul_const) + : "memory" ); + + for ( ;nblocks >= 4; nblocks -= 4 ) + { + asm volatile ("pshufd $0x13, %%xmm5, %%xmm4\n\t" + "movdqu %[inbuf0], %%xmm1\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "movdqu %%xmm5, %[outbuf0]\n\t" + + "movdqa %%xmm4, %%xmm0\n\t" + "paddd %%xmm4, %%xmm4\n\t" + "psrad $31, %%xmm0\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm5\n\t" + : [outbuf0] "=m" (*(outbuf + 0 * 16)) + : [inbuf0] "m" (*(inbuf + 0 * 16)) + : "memory" ); + + asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" + "pxor %%xmm5, %%xmm2\n\t" + "movdqu %%xmm5, %[outbuf1]\n\t" + + "movdqa %%xmm4, %%xmm0\n\t" + "paddd %%xmm4, %%xmm4\n\t" + "psrad $31, %%xmm0\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm5\n\t" + : [outbuf1] "=m" (*(outbuf + 1 * 16)) + : [inbuf1] "m" (*(inbuf + 1 * 16)) + : "memory" ); + + asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "movdqu %%xmm5, %[outbuf2]\n\t" + + "movdqa %%xmm4, %%xmm0\n\t" + "paddd %%xmm4, %%xmm4\n\t" + "psrad $31, %%xmm0\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm5\n\t" + : [outbuf2] "=m" (*(outbuf + 2 * 16)) + : [inbuf2] "m" (*(inbuf + 2 * 16)) + : "memory" ); + + asm volatile ("movdqa %%xmm4, %%xmm0\n\t" + "movdqu %[inbuf3], %%xmm4\n\t" + "pxor %%xmm5, %%xmm4\n\t" + "movdqu %%xmm5, %[outbuf3]\n\t" + + "psrad $31, %%xmm0\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm5\n\t" + : [outbuf3] "=m" (*(outbuf + 3 * 16)) + : [inbuf3] "m" (*(inbuf + 3 * 16)) + : "memory" ); + + do_aesni_dec_vec4 (ctx); + + asm volatile ("movdqu %[outbuf0], %%xmm0\n\t" + "pxor %%xmm0, %%xmm1\n\t" + "movdqu %[outbuf1], %%xmm0\n\t" + "movdqu %%xmm1, %[outbuf0]\n\t" + "movdqu %[outbuf2], %%xmm1\n\t" + "pxor %%xmm0, %%xmm2\n\t" + "movdqu %[outbuf3], %%xmm0\n\t" + "pxor %%xmm1, %%xmm3\n\t" + "pxor %%xmm0, %%xmm4\n\t" + "movdqu %%xmm2, %[outbuf1]\n\t" + "movdqu %%xmm3, %[outbuf2]\n\t" + "movdqu %%xmm4, %[outbuf3]\n\t" + : [outbuf0] "+m" (*(outbuf + 0 * 16)), + [outbuf1] "+m" (*(outbuf + 1 * 16)), + [outbuf2] "+m" (*(outbuf + 2 * 16)), + [outbuf3] "+m" (*(outbuf + 3 * 16)) + : + : "memory" ); + + outbuf += BLOCKSIZE * 4; + inbuf += BLOCKSIZE * 4; + } + + for ( ;nblocks; nblocks-- ) + { + asm volatile ("movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "movdqa %%xmm5, %%xmm4\n\t" + + "pshufd $0x13, %%xmm5, %%xmm1\n\t" + "psrad $31, %%xmm1\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm1\n\t" + "pxor %%xmm1, %%xmm5\n\t" + : + : [inbuf] "m" (*inbuf) + : "memory" ); + + do_aesni_dec (ctx); + + asm volatile ("pxor %%xmm4, %%xmm0\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : + : "memory" ); + + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + } + + asm volatile ("movdqu %%xmm5, %[tweak]\n\t" + : [tweak] "=m" (*tweak) + : + : "memory" ); + + aesni_cleanup (); + aesni_cleanup_2_6 (); +} + + +void +_gcry_aes_aesni_xts_crypt (RIJNDAEL_context *ctx, unsigned char *tweak, + unsigned char *outbuf, const unsigned char *inbuf, + size_t nblocks, int encrypt) +{ + if (encrypt) + _gcry_aes_aesni_xts_enc(ctx, tweak, outbuf, inbuf, nblocks); + else + _gcry_aes_aesni_xts_dec(ctx, tweak, outbuf, inbuf, nblocks); +} + #endif /* USE_AESNI */ diff --git a/cipher/rijndael-armv8-aarch32-ce.S b/cipher/rijndael-armv8-aarch32-ce.S index 5c8fa3c..66440bd 100644 --- a/cipher/rijndael-armv8-aarch32-ce.S +++ b/cipher/rijndael-armv8-aarch32-ce.S @@ -1517,6 +1517,317 @@ _gcry_aes_ocb_auth_armv8_ce: .size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce; + +/* + * void _gcry_aes_xts_enc_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *iv, unsigned int nrounds); + */ + +.align 3 +.globl _gcry_aes_xts_enc_armv8_ce +.type _gcry_aes_xts_enc_armv8_ce,%function; +_gcry_aes_xts_enc_armv8_ce: + /* input: + * r0: keysched + * r1: outbuf + * r2: inbuf + * r3: iv + * %st+0: nblocks => r4 + * %st+4: nrounds => r5 + */ + + vpush {q4-q7} + push {r4-r12,lr} /* 4*16 + 4*10 = 104b */ + ldr r4, [sp, #(104+0)] + ldr r5, [sp, #(104+4)] + cmp r4, #0 + beq .Lxts_enc_skip + + cmp r5, #12 + + vld1.8 {q0}, [r3] /* load tweak */ + mov r7, #0x87; + + aes_preload_keys(r0, r6); + + beq .Lxts_enc_entry_192 + bhi .Lxts_enc_entry_256 + +#define CTR_XTS(bits, ...) \ + .Lxts_enc_entry_##bits: \ + cmp r4, #4; \ + blo .Lxts_enc_loop_##bits; \ + \ + .Lxts_enc_loop4_##bits: \ + sub r4, r4, #4; \ + veor q9, q9, q9; \ + \ + vld1.8 {q1-q2}, [r2]!; /* load plaintext */ \ + veor q1, q1, q0; \ + cmp r4, #4; \ + vmov.u32 d18[0], r7; \ + vst1.8 {q0}, [r1]!; /* store tweak0 to temp */ \ + \ + vshr.s64 d16, d1, #63; \ + vshr.u64 d17, d0, #63; \ + vadd.u64 q0, q0, q0; \ + vand d16, d16, d18; \ + veor q0, q0, q8; \ + \ + vld1.8 {q3-q4}, [r2]!; /* load plaintext */ \ + veor q2, q2, q0; \ + vst1.8 {q0}, [r1]!; /* store tweak1 to temp */ \ + \ + vshr.s64 d16, d1, #63; \ + vshr.u64 d17, d0, #63; \ + vadd.u64 q0, q0, q0; \ + vand d16, d16, d18; \ + veor q0, q0, q8; \ + \ + veor q3, q3, q0; \ + vst1.8 {q0}, [r1]!; /* store tweak2 to temp */ \ + \ + vshr.s64 d16, d1, #63; \ + vshr.u64 d17, d0, #63; \ + vadd.u64 q0, q0, q0; \ + vand d16, d16, d18; \ + veor q0, q0, q8; \ + \ + veor q4, q4, q0; \ + vst1.8 {q0}, [r1]; /* store tweak3 to temp */ \ + sub r1, r1, #48; \ + \ + vshr.s64 d16, d1, #63; \ + vshr.u64 d17, d0, #63; \ + vadd.u64 q0, q0, q0; \ + vand d16, d16, d18; \ + veor q0, q0, q8; \ + \ + do_aes_4_##bits(e, mc, q1, q2, q3, q4, ##__VA_ARGS__); \ + \ + vld1.8 {q8-q9}, [r1]!; /* load tweak from temp */ \ + veor q1, q1, q8; \ + veor q2, q2, q9; \ + vld1.8 {q8-q9}, [r1]; /* load tweak from temp */ \ + sub r1, r1, #32; \ + veor q3, q3, q8; \ + veor q4, q4, q9; \ + vst1.8 {q1-q2}, [r1]!; /* store plaintext */ \ + vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \ + \ + bhs .Lxts_enc_loop4_##bits; \ + cmp r4, #0; \ + beq .Lxts_enc_done; \ + \ + .Lxts_enc_loop_##bits: \ + \ + vld1.8 {q1}, [r2]!; /* load ciphertext */ \ + \ + veor q9, q9, q9; \ + veor q1, q1, q0; \ + vmov.u32 d18[0], r7; \ + vmov q2, q0; \ + \ + vshr.s64 d16, d1, #63; \ + vshr.u64 d17, d0, #63; \ + vadd.u64 q0, q0, q0; \ + vand d16, d16, d18; \ + veor q0, q0, q8; \ + subs r4, r4, #1; \ + \ + do_aes_one##bits(e, mc, q1, q1, ##__VA_ARGS__); \ + \ + veor q1, q1, q2; \ + vst1.8 {q1}, [r1]!; /* store plaintext */ \ + \ + bne .Lxts_enc_loop_##bits; \ + b .Lxts_enc_done; + + CTR_XTS(128re, r0, r6) + CTR_XTS(192, r0, r6) + CTR_XTS(256, r0, r6) + +#undef CTR_XTS + +.Lxts_enc_done: + vst1.8 {q0}, [r3] /* store tweak */ + + CLEAR_REG(q0) + CLEAR_REG(q1) + CLEAR_REG(q2) + CLEAR_REG(q3) + CLEAR_REG(q8) + CLEAR_REG(q9) + CLEAR_REG(q10) + CLEAR_REG(q11) + CLEAR_REG(q12) + CLEAR_REG(q13) + CLEAR_REG(q14) + +.Lxts_enc_skip: + pop {r4-r12,lr} + vpop {q4-q7} + bx lr +.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce; + + +/* + * void _gcry_aes_xts_dec_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *iv, unsigned int nrounds); + */ + +.align 3 +.globl _gcry_aes_xts_dec_armv8_ce +.type _gcry_aes_xts_dec_armv8_ce,%function; +_gcry_aes_xts_dec_armv8_ce: + /* input: + * r0: keysched + * r1: outbuf + * r2: inbuf + * r3: iv + * %st+0: nblocks => r4 + * %st+4: nrounds => r5 + */ + + vpush {q4-q7} + push {r4-r12,lr} /* 4*16 + 4*10 = 104b */ + ldr r4, [sp, #(104+0)] + ldr r5, [sp, #(104+4)] + cmp r4, #0 + beq .Lxts_dec_skip + + cmp r5, #12 + + vld1.8 {q0}, [r3] /* load tweak */ + mov r7, #0x87; + + aes_preload_keys(r0, r6); + + beq .Lxts_dec_entry_192 + bhi .Lxts_dec_entry_256 + +#define CTR_XTS(bits, ...) \ + .Lxts_dec_entry_##bits: \ + cmp r4, #4; \ + blo .Lxts_dec_loop_##bits; \ + \ + .Lxts_dec_loop4_##bits: \ + sub r4, r4, #4; \ + veor q9, q9, q9; \ + \ + vld1.8 {q1-q2}, [r2]!; /* load plaintext */ \ + veor q1, q1, q0; \ + cmp r4, #4; \ + vmov.u32 d18[0], r7; \ + vst1.8 {q0}, [r1]!; /* store tweak0 to temp */ \ + \ + vshr.s64 d16, d1, #63; \ + vshr.u64 d17, d0, #63; \ + vadd.u64 q0, q0, q0; \ + vand d16, d16, d18; \ + veor q0, q0, q8; \ + \ + vld1.8 {q3-q4}, [r2]!; /* load plaintext */ \ + veor q2, q2, q0; \ + vst1.8 {q0}, [r1]!; /* store tweak1 to temp */ \ + \ + vshr.s64 d16, d1, #63; \ + vshr.u64 d17, d0, #63; \ + vadd.u64 q0, q0, q0; \ + vand d16, d16, d18; \ + veor q0, q0, q8; \ + \ + veor q3, q3, q0; \ + vst1.8 {q0}, [r1]!; /* store tweak2 to temp */ \ + \ + vshr.s64 d16, d1, #63; \ + vshr.u64 d17, d0, #63; \ + vadd.u64 q0, q0, q0; \ + vand d16, d16, d18; \ + veor q0, q0, q8; \ + \ + veor q4, q4, q0; \ + vst1.8 {q0}, [r1]; /* store tweak3 to temp */ \ + sub r1, r1, #48; \ + \ + vshr.s64 d16, d1, #63; \ + vshr.u64 d17, d0, #63; \ + vadd.u64 q0, q0, q0; \ + vand d16, d16, d18; \ + veor q0, q0, q8; \ + \ + do_aes_4_##bits(d, imc, q1, q2, q3, q4, ##__VA_ARGS__); \ + \ + vld1.8 {q8-q9}, [r1]!; /* load tweak from temp */ \ + veor q1, q1, q8; \ + veor q2, q2, q9; \ + vld1.8 {q8-q9}, [r1]; /* load tweak from temp */ \ + sub r1, r1, #32; \ + veor q3, q3, q8; \ + veor q4, q4, q9; \ + vst1.8 {q1-q2}, [r1]!; /* store plaintext */ \ + vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \ + \ + bhs .Lxts_dec_loop4_##bits; \ + cmp r4, #0; \ + beq .Lxts_dec_done; \ + \ + .Lxts_dec_loop_##bits: \ + \ + vld1.8 {q1}, [r2]!; /* load ciphertext */ \ + \ + veor q9, q9, q9; \ + veor q1, q1, q0; \ + vmov.u32 d18[0], r7; \ + vmov q2, q0; \ + \ + vshr.s64 d16, d1, #63; \ + vshr.u64 d17, d0, #63; \ + vadd.u64 q0, q0, q0; \ + vand d16, d16, d18; \ + veor q0, q0, q8; \ + subs r4, r4, #1; \ + \ + do_aes_one##bits(d, imc, q1, q1, ##__VA_ARGS__); \ + \ + veor q1, q1, q2; \ + vst1.8 {q1}, [r1]!; /* store plaintext */ \ + \ + bne .Lxts_dec_loop_##bits; \ + b .Lxts_dec_done; + + CTR_XTS(128re, r0, r6) + CTR_XTS(192, r0, r6) + CTR_XTS(256, r0, r6) + +#undef CTR_XTS + +.Lxts_dec_done: + vst1.8 {q0}, [r3] /* store tweak */ + + CLEAR_REG(q0) + CLEAR_REG(q1) + CLEAR_REG(q2) + CLEAR_REG(q3) + CLEAR_REG(q8) + CLEAR_REG(q9) + CLEAR_REG(q10) + CLEAR_REG(q11) + CLEAR_REG(q12) + CLEAR_REG(q13) + CLEAR_REG(q14) + +.Lxts_dec_skip: + pop {r4-r12,lr} + vpop {q4-q7} + bx lr +.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce; + + /* * u32 _gcry_aes_sbox4_armv8_ce(u32 in4b); */ diff --git a/cipher/rijndael-armv8-aarch64-ce.S b/cipher/rijndael-armv8-aarch64-ce.S index 708ef34..09462a8 100644 --- a/cipher/rijndael-armv8-aarch64-ce.S +++ b/cipher/rijndael-armv8-aarch64-ce.S @@ -1157,8 +1157,8 @@ _gcry_aes_ocb_auth_armv8_ce: * w6: nrounds => w7 * w7: blkn => w12 */ - mov x12, x7 - mov x7, x6 + mov w12, w7 + mov w7, w6 mov x6, x5 mov x5, x4 mov x4, x3 @@ -1277,6 +1277,284 @@ _gcry_aes_ocb_auth_armv8_ce: /* + * void _gcry_aes_xts_enc_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *tweak, + * size_t nblocks, + * unsigned int nrounds); + */ + +.align 3 +.globl _gcry_aes_xts_enc_armv8_ce +.type _gcry_aes_xts_enc_armv8_ce,%function; +_gcry_aes_xts_enc_armv8_ce: + /* input: + * r0: keysched + * r1: outbuf + * r2: inbuf + * r3: tweak + * x4: nblocks + * w5: nrounds + */ + + cbz x4, .Lxts_enc_skip + + /* load tweak */ + ld1 {v0.16b}, [x3] + + /* load gfmul mask */ + mov x6, #0x87 + mov x7, #0x01 + mov v16.D[0], x6 + mov v16.D[1], x7 + + aes_preload_keys(x0, w5); + + b.eq .Lxts_enc_entry_192 + b.hi .Lxts_enc_entry_256 + +#define XTS_ENC(bits) \ + .Lxts_enc_entry_##bits: \ + cmp x4, #4; \ + b.lo .Lxts_enc_loop_##bits; \ + \ + .Lxts_enc_loop4_##bits: \ + \ + ext v4.16b, v0.16b, v0.16b, #8; \ + \ + sshr v2.2d, v4.2d, #63; \ + add v5.2d, v0.2d, v0.2d; \ + and v2.16b, v2.16b, v16.16b; \ + add v4.2d, v4.2d, v4.2d; \ + eor v5.16b, v5.16b, v2.16b; \ + \ + sshr v2.2d, v4.2d, #63; \ + add v6.2d, v5.2d, v5.2d; \ + and v2.16b, v2.16b, v16.16b; \ + add v4.2d, v4.2d, v4.2d; \ + eor v6.16b, v6.16b, v2.16b; \ + \ + sshr v2.2d, v4.2d, #63; \ + add v7.2d, v6.2d, v6.2d; \ + and v2.16b, v2.16b, v16.16b; \ + add v4.2d, v4.2d, v4.2d; \ + eor v7.16b, v7.16b, v2.16b; \ + \ + sshr v2.2d, v4.2d, #63; \ + add v3.2d, v7.2d, v7.2d; \ + and v2.16b, v2.16b, v16.16b; \ + add v4.2d, v4.2d, v4.2d; \ + eor v3.16b, v3.16b, v2.16b; \ + ld1 {v1.16b-v2.16b}, [x2], #32; /* load plaintext */ \ + st1 {v3.16b}, [x3]; \ + sub x4, x4, #4; \ + eor v1.16b, v1.16b, v0.16b; \ + \ + ld1 {v3.16b-v4.16b}, [x2], #32; /* load plaintext */ \ + cmp x4, #4; \ + eor v2.16b, v2.16b, v5.16b; \ + eor v3.16b, v3.16b, v6.16b; \ + eor v4.16b, v4.16b, v7.16b; \ + \ + do_aes_4_##bits(e, mc, v1, v2, v3, v4); \ + \ + eor v1.16b, v1.16b, v0.16b; \ + ld1 {v0.16b}, [x3]; \ + eor v2.16b, v2.16b, v5.16b; \ + eor v3.16b, v3.16b, v6.16b; \ + eor v4.16b, v4.16b, v7.16b; \ + st1 {v1.16b-v4.16b}, [x1], #64; /* store plaintext */ \ + \ + b.hs .Lxts_enc_loop4_##bits; \ + CLEAR_REG(v3); \ + CLEAR_REG(v4); \ + CLEAR_REG(v5); \ + CLEAR_REG(v6); \ + CLEAR_REG(v7); \ + cbz x4, .Lxts_enc_done; \ + \ + .Lxts_enc_loop_##bits: \ + \ + ld1 {v1.16b}, [x2], #16; /* load plaintext */ \ + ext v3.16b, v0.16b, v0.16b, #8; \ + mov v2.16b, v0.16b; \ + sshr v3.2d, v3.2d, #63; \ + add v0.2d, v0.2d, v0.2d; \ + and v3.16b, v3.16b, v16.16b; \ + eor v1.16b, v1.16b, v2.16b; \ + eor v0.16b, v0.16b, v3.16b; \ + sub x4, x4, #1; \ + \ + do_aes_one##bits(e, mc, v1, v1); \ + \ + eor v1.16b, v1.16b, v2.16b; \ + st1 {v1.16b}, [x1], #16; /* store ciphertext */ \ + \ + cbnz x4, .Lxts_enc_loop_##bits; \ + b .Lxts_enc_done; + + XTS_ENC(128) + XTS_ENC(192) + XTS_ENC(256) + +#undef XTS_ENC + +.Lxts_enc_done: + aes_clear_keys(w5) + + st1 {v0.16b}, [x3] /* store tweak */ + + CLEAR_REG(v0) + CLEAR_REG(v1) + CLEAR_REG(v2) + +.Lxts_enc_skip: + ret + +.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce; + + +/* + * void _gcry_aes_xts_dec_armv8_ce (const void *keysched, + * unsigned char *outbuf, + * const unsigned char *inbuf, + * unsigned char *tweak, + * size_t nblocks, + * unsigned int nrounds); + */ + +.align 3 +.globl _gcry_aes_xts_dec_armv8_ce +.type _gcry_aes_xts_dec_armv8_ce,%function; +_gcry_aes_xts_dec_armv8_ce: + /* input: + * r0: keysched + * r1: outbuf + * r2: inbuf + * r3: tweak + * x4: nblocks + * w5: nrounds + */ + + cbz x4, .Lxts_dec_skip + + /* load tweak */ + ld1 {v0.16b}, [x3] + + /* load gfmul mask */ + mov x6, #0x87 + mov x7, #0x01 + mov v16.D[0], x6 + mov v16.D[1], x7 + + aes_preload_keys(x0, w5); + + b.eq .Lxts_dec_entry_192 + b.hi .Lxts_dec_entry_256 + +#define XTS_DEC(bits) \ + .Lxts_dec_entry_##bits: \ + cmp x4, #4; \ + b.lo .Lxts_dec_loop_##bits; \ + \ + .Lxts_dec_loop4_##bits: \ + \ + ext v4.16b, v0.16b, v0.16b, #8; \ + \ + sshr v2.2d, v4.2d, #63; \ + add v5.2d, v0.2d, v0.2d; \ + and v2.16b, v2.16b, v16.16b; \ + add v4.2d, v4.2d, v4.2d; \ + eor v5.16b, v5.16b, v2.16b; \ + \ + sshr v2.2d, v4.2d, #63; \ + add v6.2d, v5.2d, v5.2d; \ + and v2.16b, v2.16b, v16.16b; \ + add v4.2d, v4.2d, v4.2d; \ + eor v6.16b, v6.16b, v2.16b; \ + \ + sshr v2.2d, v4.2d, #63; \ + add v7.2d, v6.2d, v6.2d; \ + and v2.16b, v2.16b, v16.16b; \ + add v4.2d, v4.2d, v4.2d; \ + eor v7.16b, v7.16b, v2.16b; \ + \ + sshr v2.2d, v4.2d, #63; \ + add v3.2d, v7.2d, v7.2d; \ + and v2.16b, v2.16b, v16.16b; \ + add v4.2d, v4.2d, v4.2d; \ + eor v3.16b, v3.16b, v2.16b; \ + ld1 {v1.16b-v2.16b}, [x2], #32; /* load plaintext */ \ + st1 {v3.16b}, [x3]; \ + sub x4, x4, #4; \ + eor v1.16b, v1.16b, v0.16b; \ + \ + ld1 {v3.16b-v4.16b}, [x2], #32; /* load plaintext */ \ + cmp x4, #4; \ + eor v2.16b, v2.16b, v5.16b; \ + eor v3.16b, v3.16b, v6.16b; \ + eor v4.16b, v4.16b, v7.16b; \ + \ + do_aes_4_##bits(d, imc, v1, v2, v3, v4); \ + \ + eor v1.16b, v1.16b, v0.16b; \ + ld1 {v0.16b}, [x3]; \ + eor v2.16b, v2.16b, v5.16b; \ + eor v3.16b, v3.16b, v6.16b; \ + eor v4.16b, v4.16b, v7.16b; \ + st1 {v1.16b-v4.16b}, [x1], #64; /* store plaintext */ \ + \ + b.hs .Lxts_dec_loop4_##bits; \ + CLEAR_REG(v3); \ + CLEAR_REG(v4); \ + CLEAR_REG(v5); \ + CLEAR_REG(v6); \ + CLEAR_REG(v7); \ + cbz x4, .Lxts_dec_done; \ + \ + .Lxts_dec_loop_##bits: \ + \ + ld1 {v1.16b}, [x2], #16; /* load plaintext */ \ + ext v3.16b, v0.16b, v0.16b, #8; \ + mov v2.16b, v0.16b; \ + sshr v3.2d, v3.2d, #63; \ + add v0.2d, v0.2d, v0.2d; \ + and v3.16b, v3.16b, v16.16b; \ + eor v1.16b, v1.16b, v2.16b; \ + eor v0.16b, v0.16b, v3.16b; \ + sub x4, x4, #1; \ + \ + do_aes_one##bits(d, imc, v1, v1); \ + \ + eor v1.16b, v1.16b, v2.16b; \ + st1 {v1.16b}, [x1], #16; /* store ciphertext */ \ + \ + cbnz x4, .Lxts_dec_loop_##bits; \ + b .Lxts_dec_done; + + XTS_DEC(128) + XTS_DEC(192) + XTS_DEC(256) + +#undef XTS_DEC + +.Lxts_dec_done: + aes_clear_keys(w5) + + st1 {v0.16b}, [x3] /* store tweak */ + + CLEAR_REG(v0) + CLEAR_REG(v1) + CLEAR_REG(v2) + +.Lxts_dec_skip: + ret + +.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce; + + +/* * u32 _gcry_aes_sbox4_armv8_ce(u32 in4b); */ .align 3 diff --git a/cipher/rijndael-armv8-ce.c b/cipher/rijndael-armv8-ce.c index 334cf68..6e46830 100644 --- a/cipher/rijndael-armv8-ce.c +++ b/cipher/rijndael-armv8-ce.c @@ -101,6 +101,16 @@ extern void _gcry_aes_ocb_auth_armv8_ce (const void *keysched, size_t nblocks, unsigned int nrounds, unsigned int blkn); +extern void _gcry_aes_xts_enc_armv8_ce (const void *keysched, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *tweak, + size_t nblocks, unsigned int nrounds); +extern void _gcry_aes_xts_dec_armv8_ce (const void *keysched, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *tweak, + size_t nblocks, unsigned int nrounds); typedef void (*ocb_crypt_fn_t) (const void *keysched, unsigned char *outbuf, const unsigned char *inbuf, @@ -108,6 +118,11 @@ typedef void (*ocb_crypt_fn_t) (const void *keysched, unsigned char *outbuf, unsigned char *L_table, size_t nblocks, unsigned int nrounds, unsigned int blkn); +typedef void (*xts_crypt_fn_t) (const void *keysched, unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *tweak, size_t nblocks, + unsigned int nrounds); + void _gcry_aes_armv8_ce_setkey (RIJNDAEL_context *ctx, const byte *key) { @@ -269,8 +284,8 @@ _gcry_aes_armv8_ce_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, } void -_gcry_aes_armv8_ce_cbc_enc (const RIJNDAEL_context *ctx, unsigned char *outbuf, - const unsigned char *inbuf, unsigned char *iv, +_gcry_aes_armv8_ce_cbc_enc (const RIJNDAEL_context *ctx, unsigned char *iv, + unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks, int cbc_mac) { const void *keysched = ctx->keyschenc32; @@ -281,19 +296,25 @@ _gcry_aes_armv8_ce_cbc_enc (const RIJNDAEL_context *ctx, unsigned char *outbuf, } void -_gcry_aes_armv8_ce_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, - const unsigned char *inbuf, unsigned char *iv, +_gcry_aes_armv8_ce_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv, + unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { const void *keysched = ctx->keyschdec32; unsigned int nrounds = ctx->rounds; + if ( !ctx->decryption_prepared ) + { + _gcry_aes_armv8_ce_prepare_decryption ( ctx ); + ctx->decryption_prepared = 1; + } + _gcry_aes_cbc_dec_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds); } void -_gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, - const unsigned char *inbuf, unsigned char *iv, +_gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx, unsigned char *iv, + unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { const void *keysched = ctx->keyschenc32; @@ -303,8 +324,8 @@ _gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, } void -_gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, - const unsigned char *inbuf, unsigned char *iv, +_gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv, + unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { const void *keysched = ctx->keyschenc32; @@ -314,8 +335,8 @@ _gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, } void -_gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, - const unsigned char *inbuf, unsigned char *iv, +_gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx, unsigned char *iv, + unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { const void *keysched = ctx->keyschenc32; @@ -324,7 +345,7 @@ _gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, _gcry_aes_ctr_enc_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds); } -void +size_t _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) @@ -338,13 +359,21 @@ _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, unsigned int nrounds = ctx->rounds; u64 blkn = c->u_mode.ocb.data_nblocks; + if ( !encrypt && !ctx->decryption_prepared ) + { + _gcry_aes_armv8_ce_prepare_decryption ( ctx ); + ctx->decryption_prepared = 1; + } + c->u_mode.ocb.data_nblocks = blkn + nblocks; crypt_fn(keysched, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, c->u_mode.ocb.L[0], nblocks, nrounds, (unsigned int)blkn); + + return 0; } -void +size_t _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks) { @@ -359,6 +388,27 @@ _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, _gcry_aes_ocb_auth_armv8_ce(keysched, abuf, c->u_mode.ocb.aad_offset, c->u_mode.ocb.aad_sum, c->u_mode.ocb.L[0], nblocks, nrounds, (unsigned int)blkn); + + return 0; +} + +void +_gcry_aes_armv8_ce_xts_crypt (RIJNDAEL_context *ctx, unsigned char *tweak, + unsigned char *outbuf, const unsigned char *inbuf, + size_t nblocks, int encrypt) +{ + const void *keysched = encrypt ? ctx->keyschenc32 : ctx->keyschdec32; + xts_crypt_fn_t crypt_fn = encrypt ? _gcry_aes_xts_enc_armv8_ce + : _gcry_aes_xts_dec_armv8_ce; + unsigned int nrounds = ctx->rounds; + + if ( !encrypt && !ctx->decryption_prepared ) + { + _gcry_aes_armv8_ce_prepare_decryption ( ctx ); + ctx->decryption_prepared = 1; + } + + crypt_fn(keysched, outbuf, inbuf, tweak, nblocks, nrounds); } #endif /* USE_ARM_CE */ diff --git a/cipher/rijndael-internal.h b/cipher/rijndael-internal.h index 160fb8c..34b1f10 100644 --- a/cipher/rijndael-internal.h +++ b/cipher/rijndael-internal.h @@ -73,7 +73,7 @@ # define USE_PADLOCK 1 # endif # endif -#endif /*ENABLE_PADLOCK_SUPPORT*/ +#endif /* ENABLE_PADLOCK_SUPPORT */ /* USE_AESNI inidicates whether to compile with Intel AES-NI code. We need the vector-size attribute which seems to be available since @@ -102,6 +102,23 @@ # endif #endif /* ENABLE_ARM_CRYPTO_SUPPORT */ +/* USE_PPC_CRYPTO indicates whether to enable PowerPC vector crypto + * accelerated code. USE_PPC_CRYPTO_WITH_PPC9LE indicates whether to + * enable POWER9 optimized variant. */ +#undef USE_PPC_CRYPTO +#undef USE_PPC_CRYPTO_WITH_PPC9LE +#ifdef ENABLE_PPC_CRYPTO_SUPPORT +# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \ + defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) +# if __GNUC__ >= 4 +# define USE_PPC_CRYPTO 1 +# if !defined(WORDS_BIGENDIAN) && defined(HAVE_GCC_INLINE_ASM_PPC_ARCH_3_00) +# define USE_PPC_CRYPTO_WITH_PPC9LE 1 +# endif +# endif +# endif +#endif /* ENABLE_PPC_CRYPTO_SUPPORT */ + struct RIJNDAEL_context_s; typedef unsigned int (*rijndael_cryptfn_t)(const struct RIJNDAEL_context_s *ctx, @@ -150,6 +167,12 @@ typedef struct RIJNDAEL_context_s #ifdef USE_ARM_CE unsigned int use_arm_ce:1; /* ARMv8 CE shall be used. */ #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO + unsigned int use_ppc_crypto:1; /* PowerPC crypto shall be used. */ +#endif /*USE_PPC_CRYPTO*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + unsigned int use_ppc9le_crypto:1; /* POWER9 LE crypto shall be used. */ +#endif rijndael_cryptfn_t encrypt_fn; rijndael_cryptfn_t decrypt_fn; rijndael_prefetchfn_t prefetch_enc_fn; diff --git a/cipher/rijndael-ppc-common.h b/cipher/rijndael-ppc-common.h new file mode 100644 index 0000000..bbbeaac --- /dev/null +++ b/cipher/rijndael-ppc-common.h @@ -0,0 +1,342 @@ +/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation + * Copyright (C) 2019 Shawn Landden + * Copyright (C) 2019-2020 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + * + * Alternatively, this code may be used in OpenSSL from The OpenSSL Project, + * and Cryptogams by Andy Polyakov, and if made part of a release of either + * or both projects, is thereafter dual-licensed under the license said project + * is released under. + */ + +#ifndef G10_RIJNDAEL_PPC_COMMON_H +#define G10_RIJNDAEL_PPC_COMMON_H + +#include + + +typedef vector unsigned char block; + +typedef union +{ + u32 data32[4]; +} __attribute__((packed, aligned(1), may_alias)) u128_t; + + +#define ALWAYS_INLINE inline __attribute__((always_inline)) +#define NO_INLINE __attribute__((noinline)) +#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function)) + +#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION +#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE +#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE + + +#define ALIGNED_LOAD(in_ptr, offs) \ + (asm_aligned_ld ((offs) * 16, (const void *)(in_ptr))) + +#define ALIGNED_STORE(out_ptr, offs, vec) \ + (asm_aligned_st ((vec), (offs) * 16, (void *)(out_ptr))) + +#define VEC_BE_SWAP(vec, bige_const) (asm_be_swap ((vec), (bige_const))) + +#define VEC_LOAD_BE(in_ptr, offs, bige_const) \ + (asm_be_swap (asm_load_be_noswap ((offs) * 16, (const void *)(in_ptr)), \ + bige_const)) + +#define VEC_LOAD_BE_NOSWAP(in_ptr, offs) \ + (asm_load_be_noswap ((offs) * 16, (const unsigned char *)(in_ptr))) + +#define VEC_STORE_BE(out_ptr, offs, vec, bige_const) \ + (asm_store_be_noswap (asm_be_swap ((vec), (bige_const)), (offs) * 16, \ + (void *)(out_ptr))) + +#define VEC_STORE_BE_NOSWAP(out_ptr, offs, vec) \ + (asm_store_be_noswap ((vec), (offs) * 16, (void *)(out_ptr))) + + +#define ROUND_KEY_VARIABLES \ + block rkey0, rkeylast + +#define PRELOAD_ROUND_KEYS(nrounds) \ + do { \ + rkey0 = ALIGNED_LOAD (rk, 0); \ + rkeylast = ALIGNED_LOAD (rk, nrounds); \ + } while (0) + +#define AES_ENCRYPT(blk, nrounds) \ + do { \ + blk ^= rkey0; \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 1)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 2)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 3)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 4)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 5)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 6)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 7)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 8)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 9)); \ + if (nrounds >= 12) \ + { \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 10)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 11)); \ + if (rounds > 12) \ + { \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 12)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 13)); \ + } \ + } \ + blk = asm_cipherlast_be (blk, rkeylast); \ + } while (0) + +#define AES_DECRYPT(blk, nrounds) \ + do { \ + blk ^= rkey0; \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 1)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 2)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 3)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 4)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 5)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 6)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 7)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 8)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 9)); \ + if (nrounds >= 12) \ + { \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 10)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 11)); \ + if (rounds > 12) \ + { \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 12)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 13)); \ + } \ + } \ + blk = asm_ncipherlast_be (blk, rkeylast); \ + } while (0) + + +#define ROUND_KEY_VARIABLES_ALL \ + block rkey0, rkey1, rkey2, rkey3, rkey4, rkey5, rkey6, rkey7, rkey8, \ + rkey9, rkey10, rkey11, rkey12, rkey13, rkeylast + +#define PRELOAD_ROUND_KEYS_ALL(nrounds) \ + do { \ + rkey0 = ALIGNED_LOAD (rk, 0); \ + rkey1 = ALIGNED_LOAD (rk, 1); \ + rkey2 = ALIGNED_LOAD (rk, 2); \ + rkey3 = ALIGNED_LOAD (rk, 3); \ + rkey4 = ALIGNED_LOAD (rk, 4); \ + rkey5 = ALIGNED_LOAD (rk, 5); \ + rkey6 = ALIGNED_LOAD (rk, 6); \ + rkey7 = ALIGNED_LOAD (rk, 7); \ + rkey8 = ALIGNED_LOAD (rk, 8); \ + rkey9 = ALIGNED_LOAD (rk, 9); \ + if (nrounds >= 12) \ + { \ + rkey10 = ALIGNED_LOAD (rk, 10); \ + rkey11 = ALIGNED_LOAD (rk, 11); \ + if (rounds > 12) \ + { \ + rkey12 = ALIGNED_LOAD (rk, 12); \ + rkey13 = ALIGNED_LOAD (rk, 13); \ + } \ + } \ + rkeylast = ALIGNED_LOAD (rk, nrounds); \ + } while (0) + +#define AES_ENCRYPT_ALL(blk, nrounds) \ + do { \ + blk ^= rkey0; \ + blk = asm_cipher_be (blk, rkey1); \ + blk = asm_cipher_be (blk, rkey2); \ + blk = asm_cipher_be (blk, rkey3); \ + blk = asm_cipher_be (blk, rkey4); \ + blk = asm_cipher_be (blk, rkey5); \ + blk = asm_cipher_be (blk, rkey6); \ + blk = asm_cipher_be (blk, rkey7); \ + blk = asm_cipher_be (blk, rkey8); \ + blk = asm_cipher_be (blk, rkey9); \ + if (nrounds >= 12) \ + { \ + blk = asm_cipher_be (blk, rkey10); \ + blk = asm_cipher_be (blk, rkey11); \ + if (rounds > 12) \ + { \ + blk = asm_cipher_be (blk, rkey12); \ + blk = asm_cipher_be (blk, rkey13); \ + } \ + } \ + blk = asm_cipherlast_be (blk, rkeylast); \ + } while (0) + + +static ASM_FUNC_ATTR_INLINE block +asm_aligned_ld(unsigned long offset, const void *ptr) +{ + block vec; +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("lvx %0,0,%1\n\t" + : "=v" (vec) + : "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("lvx %0,%1,%2\n\t" + : "=v" (vec) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); + return vec; +} + +static ASM_FUNC_ATTR_INLINE void +asm_aligned_st(block vec, unsigned long offset, void *ptr) +{ +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("stvx %0,0,%1\n\t" + : + : "v" (vec), "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("stvx %0,%1,%2\n\t" + : + : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); +} + +static ASM_FUNC_ATTR_INLINE block +asm_vperm1(block vec, block mask) +{ + block o; + __asm__ volatile ("vperm %0,%1,%1,%2\n\t" + : "=v" (o) + : "v" (vec), "v" (mask)); + return o; +} + +static ASM_FUNC_ATTR_INLINE block +asm_add_uint128(block a, block b) +{ + block res; + __asm__ volatile ("vadduqm %0,%1,%2\n\t" + : "=v" (res) + : "v" (a), "v" (b)); + return res; +} + +static ASM_FUNC_ATTR_INLINE block +asm_add_uint64(block a, block b) +{ + block res; + __asm__ volatile ("vaddudm %0,%1,%2\n\t" + : "=v" (res) + : "v" (a), "v" (b)); + return res; +} + +static ASM_FUNC_ATTR_INLINE block +asm_sra_int64(block a, block b) +{ + block res; + __asm__ volatile ("vsrad %0,%1,%2\n\t" + : "=v" (res) + : "v" (a), "v" (b)); + return res; +} + +static block +asm_swap_uint64_halfs(block a) +{ + block res; + __asm__ volatile ("xxswapd %x0, %x1" + : "=wa" (res) + : "wa" (a)); + return res; +} + +static ASM_FUNC_ATTR_INLINE block +asm_xor(block a, block b) +{ + block res; + __asm__ volatile ("vxor %0,%1,%2\n\t" + : "=v" (res) + : "v" (a), "v" (b)); + return res; +} + +static ASM_FUNC_ATTR_INLINE block +asm_cipher_be(block b, block rk) +{ + block o; + __asm__ volatile ("vcipher %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; +} + +static ASM_FUNC_ATTR_INLINE block +asm_cipherlast_be(block b, block rk) +{ + block o; + __asm__ volatile ("vcipherlast %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; +} + +static ASM_FUNC_ATTR_INLINE block +asm_ncipher_be(block b, block rk) +{ + block o; + __asm__ volatile ("vncipher %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; +} + +static ASM_FUNC_ATTR_INLINE block +asm_ncipherlast_be(block b, block rk) +{ + block o; + __asm__ volatile ("vncipherlast %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; +} + + +/* Make a decryption key from an encryption key. */ +static ASM_FUNC_ATTR_INLINE void +internal_aes_ppc_prepare_decryption (RIJNDAEL_context *ctx) +{ + u128_t *ekey = (u128_t *)(void *)ctx->keyschenc; + u128_t *dkey = (u128_t *)(void *)ctx->keyschdec; + int rounds = ctx->rounds; + int rr; + int r; + + r = 0; + rr = rounds; + for (r = 0, rr = rounds; r <= rounds; r++, rr--) + { + ALIGNED_STORE (dkey, r, ALIGNED_LOAD (ekey, rr)); + } +} + +#endif /* G10_RIJNDAEL_PPC_COMMON_H */ diff --git a/cipher/rijndael-ppc-functions.h b/cipher/rijndael-ppc-functions.h new file mode 100644 index 0000000..72f3185 --- /dev/null +++ b/cipher/rijndael-ppc-functions.h @@ -0,0 +1,2020 @@ +/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation + * Copyright (C) 2019 Shawn Landden + * Copyright (C) 2019-2020 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + * + * Alternatively, this code may be used in OpenSSL from The OpenSSL Project, + * and Cryptogams by Andy Polyakov, and if made part of a release of either + * or both projects, is thereafter dual-licensed under the license said project + * is released under. + */ + +unsigned int ENCRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx, + unsigned char *out, + const unsigned char *in) +{ + const block bige_const = asm_load_be_const(); + const u128_t *rk = (u128_t *)&ctx->keyschenc; + int rounds = ctx->rounds; + ROUND_KEY_VARIABLES; + block b; + + b = VEC_LOAD_BE (in, 0, bige_const); + + PRELOAD_ROUND_KEYS (rounds); + + AES_ENCRYPT (b, rounds); + VEC_STORE_BE (out, 0, b, bige_const); + + return 0; /* does not use stack */ +} + + +unsigned int DECRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx, + unsigned char *out, + const unsigned char *in) +{ + const block bige_const = asm_load_be_const(); + const u128_t *rk = (u128_t *)&ctx->keyschdec; + int rounds = ctx->rounds; + ROUND_KEY_VARIABLES; + block b; + + b = VEC_LOAD_BE (in, 0, bige_const); + + PRELOAD_ROUND_KEYS (rounds); + + AES_DECRYPT (b, rounds); + VEC_STORE_BE (out, 0, b, bige_const); + + return 0; /* does not use stack */ +} + + +void CFB_ENC_FUNC (void *context, unsigned char *iv_arg, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = context; + const u128_t *rk = (u128_t *)&ctx->keyschenc; + const u128_t *in = (const u128_t *)inbuf_arg; + u128_t *out = (u128_t *)outbuf_arg; + int rounds = ctx->rounds; + ROUND_KEY_VARIABLES_ALL; + block rkeylast_orig; + block iv; + + iv = VEC_LOAD_BE (iv_arg, 0, bige_const); + + PRELOAD_ROUND_KEYS_ALL (rounds); + rkeylast_orig = rkeylast; + + for (; nblocks >= 2; nblocks -= 2) + { + block in2, iv1; + + rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const); + in2 = VEC_LOAD_BE (in + 1, 0, bige_const); + in += 2; + + AES_ENCRYPT_ALL (iv, rounds); + + iv1 = iv; + rkeylast = rkeylast_orig ^ in2; + + AES_ENCRYPT_ALL (iv, rounds); + + VEC_STORE_BE (out++, 0, iv1, bige_const); + VEC_STORE_BE (out++, 0, iv, bige_const); + } + + for (; nblocks; nblocks--) + { + rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in++, 0, bige_const); + + AES_ENCRYPT_ALL (iv, rounds); + + VEC_STORE_BE (out++, 0, iv, bige_const); + } + + VEC_STORE_BE (iv_arg, 0, iv, bige_const); +} + +void CFB_DEC_FUNC (void *context, unsigned char *iv_arg, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = context; + const u128_t *rk = (u128_t *)&ctx->keyschenc; + const u128_t *in = (const u128_t *)inbuf_arg; + u128_t *out = (u128_t *)outbuf_arg; + int rounds = ctx->rounds; + ROUND_KEY_VARIABLES; + block rkeylast_orig; + block iv, b, bin; + block in0, in1, in2, in3, in4, in5, in6, in7; + block b0, b1, b2, b3, b4, b5, b6, b7; + block rkey; + + iv = VEC_LOAD_BE (iv_arg, 0, bige_const); + + PRELOAD_ROUND_KEYS (rounds); + rkeylast_orig = rkeylast; + + for (; nblocks >= 8; nblocks -= 8) + { + in0 = iv; + in1 = VEC_LOAD_BE_NOSWAP (in, 0); + in2 = VEC_LOAD_BE_NOSWAP (in, 1); + in3 = VEC_LOAD_BE_NOSWAP (in, 2); + in4 = VEC_LOAD_BE_NOSWAP (in, 3); + in1 = VEC_BE_SWAP (in1, bige_const); + in2 = VEC_BE_SWAP (in2, bige_const); + in5 = VEC_LOAD_BE_NOSWAP (in, 4); + in6 = VEC_LOAD_BE_NOSWAP (in, 5); + in3 = VEC_BE_SWAP (in3, bige_const); + in4 = VEC_BE_SWAP (in4, bige_const); + in7 = VEC_LOAD_BE_NOSWAP (in, 6); + iv = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + in5 = VEC_BE_SWAP (in5, bige_const); + in6 = VEC_BE_SWAP (in6, bige_const); + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + in7 = VEC_BE_SWAP (in7, bige_const); + iv = VEC_BE_SWAP (iv, bige_const); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); + b4 = asm_xor (rkey0, in4); + b5 = asm_xor (rkey0, in5); + b6 = asm_xor (rkey0, in6); + b7 = asm_xor (rkey0, in7); + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + in4 = asm_xor (rkeylast, in4); + b0 = asm_cipherlast_be (b0, in1); + b1 = asm_cipherlast_be (b1, in2); + in5 = asm_xor (rkeylast, in5); + in6 = asm_xor (rkeylast, in6); + b2 = asm_cipherlast_be (b2, in3); + b3 = asm_cipherlast_be (b3, in4); + in7 = asm_xor (rkeylast, in7); + in0 = asm_xor (rkeylast, iv); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_cipherlast_be (b4, in5); + b5 = asm_cipherlast_be (b5, in6); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_cipherlast_be (b6, in7); + b7 = asm_cipherlast_be (b7, in0); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); + out += 8; + } + + if (nblocks >= 4) + { + in0 = iv; + in1 = VEC_LOAD_BE (in, 0, bige_const); + in2 = VEC_LOAD_BE (in, 1, bige_const); + in3 = VEC_LOAD_BE (in, 2, bige_const); + iv = VEC_LOAD_BE (in, 3, bige_const); + + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + in0 = asm_xor (rkeylast, iv); + b0 = asm_cipherlast_be (b0, in1); + b1 = asm_cipherlast_be (b1, in2); + b2 = asm_cipherlast_be (b2, in3); + b3 = asm_cipherlast_be (b3, in0); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + + in += 4; + out += 4; + nblocks -= 4; + } + + for (; nblocks; nblocks--) + { + bin = VEC_LOAD_BE (in, 0, bige_const); + rkeylast = rkeylast_orig ^ bin; + b = iv; + iv = bin; + + AES_ENCRYPT (b, rounds); + + VEC_STORE_BE (out, 0, b, bige_const); + + out++; + in++; + } + + VEC_STORE_BE (iv_arg, 0, iv, bige_const); +} + + +void CBC_ENC_FUNC (void *context, unsigned char *iv_arg, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int cbc_mac) +{ + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = context; + const u128_t *rk = (u128_t *)&ctx->keyschenc; + const u128_t *in = (const u128_t *)inbuf_arg; + byte *out = (byte *)outbuf_arg; + int rounds = ctx->rounds; + ROUND_KEY_VARIABLES_ALL; + block lastiv, b; + unsigned int outadd = -(!cbc_mac) & 16; + + lastiv = VEC_LOAD_BE (iv_arg, 0, bige_const); + + PRELOAD_ROUND_KEYS_ALL (rounds); + + for (; nblocks >= 2; nblocks -= 2) + { + block in2, lastiv1; + + b = lastiv ^ VEC_LOAD_BE (in, 0, bige_const); + in2 = VEC_LOAD_BE (in + 1, 0, bige_const); + in += 2; + + AES_ENCRYPT_ALL (b, rounds); + + lastiv1 = b; + b = lastiv1 ^ in2; + + AES_ENCRYPT_ALL (b, rounds); + + lastiv = b; + VEC_STORE_BE ((u128_t *)out, 0, lastiv1, bige_const); + out += outadd; + VEC_STORE_BE ((u128_t *)out, 0, lastiv, bige_const); + out += outadd; + } + + for (; nblocks; nblocks--) + { + b = lastiv ^ VEC_LOAD_BE (in++, 0, bige_const); + + AES_ENCRYPT_ALL (b, rounds); + + lastiv = b; + VEC_STORE_BE ((u128_t *)out, 0, b, bige_const); + out += outadd; + } + + VEC_STORE_BE (iv_arg, 0, lastiv, bige_const); +} + +void CBC_DEC_FUNC (void *context, unsigned char *iv_arg, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = context; + const u128_t *rk = (u128_t *)&ctx->keyschdec; + const u128_t *in = (const u128_t *)inbuf_arg; + u128_t *out = (u128_t *)outbuf_arg; + int rounds = ctx->rounds; + ROUND_KEY_VARIABLES; + block rkeylast_orig; + block in0, in1, in2, in3, in4, in5, in6, in7; + block b0, b1, b2, b3, b4, b5, b6, b7; + block rkey; + block iv, b; + + if (!ctx->decryption_prepared) + { + internal_aes_ppc_prepare_decryption (ctx); + ctx->decryption_prepared = 1; + } + + iv = VEC_LOAD_BE (iv_arg, 0, bige_const); + + PRELOAD_ROUND_KEYS (rounds); + rkeylast_orig = rkeylast; + + for (; nblocks >= 8; nblocks -= 8) + { + in0 = VEC_LOAD_BE_NOSWAP (in, 0); + in1 = VEC_LOAD_BE_NOSWAP (in, 1); + in2 = VEC_LOAD_BE_NOSWAP (in, 2); + in3 = VEC_LOAD_BE_NOSWAP (in, 3); + in0 = VEC_BE_SWAP (in0, bige_const); + in1 = VEC_BE_SWAP (in1, bige_const); + in4 = VEC_LOAD_BE_NOSWAP (in, 4); + in5 = VEC_LOAD_BE_NOSWAP (in, 5); + in2 = VEC_BE_SWAP (in2, bige_const); + in3 = VEC_BE_SWAP (in3, bige_const); + in6 = VEC_LOAD_BE_NOSWAP (in, 6); + in7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + in4 = VEC_BE_SWAP (in4, bige_const); + in5 = VEC_BE_SWAP (in5, bige_const); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); + in6 = VEC_BE_SWAP (in6, bige_const); + in7 = VEC_BE_SWAP (in7, bige_const); + b4 = asm_xor (rkey0, in4); + b5 = asm_xor (rkey0, in5); + b6 = asm_xor (rkey0, in6); + b7 = asm_xor (rkey0, in7); + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); \ + b4 = asm_ncipher_be (b4, rkey); \ + b5 = asm_ncipher_be (b5, rkey); \ + b6 = asm_ncipher_be (b6, rkey); \ + b7 = asm_ncipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + iv = asm_xor (rkeylast, iv); + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + b0 = asm_ncipherlast_be (b0, iv); + iv = in7; + b1 = asm_ncipherlast_be (b1, in0); + in3 = asm_xor (rkeylast, in3); + in4 = asm_xor (rkeylast, in4); + b2 = asm_ncipherlast_be (b2, in1); + b3 = asm_ncipherlast_be (b3, in2); + in5 = asm_xor (rkeylast, in5); + in6 = asm_xor (rkeylast, in6); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_ncipherlast_be (b4, in3); + b5 = asm_ncipherlast_be (b5, in4); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_ncipherlast_be (b6, in5); + b7 = asm_ncipherlast_be (b7, in6); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); + out += 8; + } + + if (nblocks >= 4) + { + in0 = VEC_LOAD_BE (in, 0, bige_const); + in1 = VEC_LOAD_BE (in, 1, bige_const); + in2 = VEC_LOAD_BE (in, 2, bige_const); + in3 = VEC_LOAD_BE (in, 3, bige_const); + + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + iv = asm_xor (rkeylast, iv); + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + + b0 = asm_ncipherlast_be (b0, iv); + iv = in3; + b1 = asm_ncipherlast_be (b1, in0); + b2 = asm_ncipherlast_be (b2, in1); + b3 = asm_ncipherlast_be (b3, in2); + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + + in += 4; + out += 4; + nblocks -= 4; + } + + for (; nblocks; nblocks--) + { + rkeylast = rkeylast_orig ^ iv; + + iv = VEC_LOAD_BE (in, 0, bige_const); + b = iv; + AES_DECRYPT (b, rounds); + + VEC_STORE_BE (out, 0, b, bige_const); + + in++; + out++; + } + + VEC_STORE_BE (iv_arg, 0, iv, bige_const); +} + + +void CTR_ENC_FUNC (void *context, unsigned char *ctr_arg, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + static const unsigned char vec_one_const[16] = + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = context; + const u128_t *rk = (u128_t *)&ctx->keyschenc; + const u128_t *in = (const u128_t *)inbuf_arg; + u128_t *out = (u128_t *)outbuf_arg; + int rounds = ctx->rounds; + ROUND_KEY_VARIABLES; + block rkeylast_orig; + block ctr, b, one; + + ctr = VEC_LOAD_BE (ctr_arg, 0, bige_const); + one = VEC_LOAD_BE (&vec_one_const, 0, bige_const); + + PRELOAD_ROUND_KEYS (rounds); + rkeylast_orig = rkeylast; + + if (nblocks >= 4) + { + block in0, in1, in2, in3, in4, in5, in6, in7; + block b0, b1, b2, b3, b4, b5, b6, b7; + block two, three, four; + block rkey; + + two = asm_add_uint128 (one, one); + three = asm_add_uint128 (two, one); + four = asm_add_uint128 (two, two); + + for (; nblocks >= 8; nblocks -= 8) + { + b1 = asm_add_uint128 (ctr, one); + b2 = asm_add_uint128 (ctr, two); + b3 = asm_add_uint128 (ctr, three); + b4 = asm_add_uint128 (ctr, four); + b5 = asm_add_uint128 (b1, four); + b6 = asm_add_uint128 (b2, four); + b7 = asm_add_uint128 (b3, four); + b0 = asm_xor (rkey0, ctr); + rkey = ALIGNED_LOAD (rk, 1); + ctr = asm_add_uint128 (b4, four); + b1 = asm_xor (rkey0, b1); + b2 = asm_xor (rkey0, b2); + b3 = asm_xor (rkey0, b3); + b0 = asm_cipher_be (b0, rkey); + b1 = asm_cipher_be (b1, rkey); + b2 = asm_cipher_be (b2, rkey); + b3 = asm_cipher_be (b3, rkey); + b4 = asm_xor (rkey0, b4); + b5 = asm_xor (rkey0, b5); + b6 = asm_xor (rkey0, b6); + b7 = asm_xor (rkey0, b7); + b4 = asm_cipher_be (b4, rkey); + b5 = asm_cipher_be (b5, rkey); + b6 = asm_cipher_be (b6, rkey); + b7 = asm_cipher_be (b7, rkey); + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); + + in0 = VEC_LOAD_BE_NOSWAP (in, 0); + DO_ROUND(2); + in1 = VEC_LOAD_BE_NOSWAP (in, 1); + DO_ROUND(3); + in2 = VEC_LOAD_BE_NOSWAP (in, 2); + DO_ROUND(4); + in3 = VEC_LOAD_BE_NOSWAP (in, 3); + DO_ROUND(5); + in4 = VEC_LOAD_BE_NOSWAP (in, 4); + DO_ROUND(6); + in5 = VEC_LOAD_BE_NOSWAP (in, 5); + DO_ROUND(7); + in6 = VEC_LOAD_BE_NOSWAP (in, 6); + DO_ROUND(8); + in7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + DO_ROUND(9); + + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + in0 = VEC_BE_SWAP (in0, bige_const); + in1 = VEC_BE_SWAP (in1, bige_const); + in2 = VEC_BE_SWAP (in2, bige_const); + in3 = VEC_BE_SWAP (in3, bige_const); + in4 = VEC_BE_SWAP (in4, bige_const); + in5 = VEC_BE_SWAP (in5, bige_const); + in6 = VEC_BE_SWAP (in6, bige_const); + in7 = VEC_BE_SWAP (in7, bige_const); + + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + b0 = asm_cipherlast_be (b0, in0); + b1 = asm_cipherlast_be (b1, in1); + in4 = asm_xor (rkeylast, in4); + in5 = asm_xor (rkeylast, in5); + b2 = asm_cipherlast_be (b2, in2); + b3 = asm_cipherlast_be (b3, in3); + in6 = asm_xor (rkeylast, in6); + in7 = asm_xor (rkeylast, in7); + b4 = asm_cipherlast_be (b4, in4); + b5 = asm_cipherlast_be (b5, in5); + b6 = asm_cipherlast_be (b6, in6); + b7 = asm_cipherlast_be (b7, in7); + + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); + out += 8; + } + + if (nblocks >= 4) + { + b1 = asm_add_uint128 (ctr, one); + b2 = asm_add_uint128 (ctr, two); + b3 = asm_add_uint128 (ctr, three); + b0 = asm_xor (rkey0, ctr); + ctr = asm_add_uint128 (ctr, four); + b1 = asm_xor (rkey0, b1); + b2 = asm_xor (rkey0, b2); + b3 = asm_xor (rkey0, b3); + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + + in0 = VEC_LOAD_BE (in, 0, bige_const); + in1 = VEC_LOAD_BE (in, 1, bige_const); + in2 = VEC_LOAD_BE (in, 2, bige_const); + in3 = VEC_LOAD_BE (in, 3, bige_const); + + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + + b0 = asm_cipherlast_be (b0, in0); + b1 = asm_cipherlast_be (b1, in1); + b2 = asm_cipherlast_be (b2, in2); + b3 = asm_cipherlast_be (b3, in3); + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + + in += 4; + out += 4; + nblocks -= 4; + } + } + + for (; nblocks; nblocks--) + { + b = ctr; + ctr = asm_add_uint128 (ctr, one); + rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const); + + AES_ENCRYPT (b, rounds); + + VEC_STORE_BE (out, 0, b, bige_const); + + out++; + in++; + } + + VEC_STORE_BE (ctr_arg, 0, ctr, bige_const); +} + + +size_t OCB_CRYPT_FUNC (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt) +{ + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = (void *)&c->context.c; + const u128_t *in = (const u128_t *)inbuf_arg; + u128_t *out = (u128_t *)outbuf_arg; + int rounds = ctx->rounds; + u64 data_nblocks = c->u_mode.ocb.data_nblocks; + block l0, l1, l2, l; + block b0, b1, b2, b3, b4, b5, b6, b7, b; + block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; + block rkey, rkeylf; + block ctr, iv; + ROUND_KEY_VARIABLES; + + iv = VEC_LOAD_BE (c->u_iv.iv, 0, bige_const); + ctr = VEC_LOAD_BE (c->u_ctr.ctr, 0, bige_const); + + l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const); + l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const); + l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const); + + if (encrypt) + { + const u128_t *rk = (u128_t *)&ctx->keyschenc; + + PRELOAD_ROUND_KEYS (rounds); + + for (; nblocks >= 8 && data_nblocks % 8; nblocks--) + { + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + iv ^= l; + /* Checksum_i = Checksum_{i-1} xor P_i */ + ctr ^= b; + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + b ^= iv; + AES_ENCRYPT (b, rounds); + b ^= iv; + + VEC_STORE_BE (out, 0, b, bige_const); + + in += 1; + out += 1; + } + + for (; nblocks >= 8; nblocks -= 8) + { + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0); + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + l = VEC_BE_SWAP(l, bige_const); + + ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; + + iv ^= rkey0; + + iv0 = iv ^ l0; + iv1 = iv ^ l0 ^ l1; + iv2 = iv ^ l1; + iv3 = iv ^ l1 ^ l2; + iv4 = iv ^ l1 ^ l2 ^ l0; + iv5 = iv ^ l2 ^ l0; + iv6 = iv ^ l2; + iv7 = iv ^ l2 ^ l; + + b0 ^= iv0; + b1 ^= iv1; + b2 ^= iv2; + b3 ^= iv3; + b4 ^= iv4; + b5 ^= iv5; + b6 ^= iv6; + b7 ^= iv7; + iv = iv7 ^ rkey0; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + + DO_ROUND(8); + + iv0 = asm_xor (rkeylf, iv0); + iv1 = asm_xor (rkeylf, iv1); + iv2 = asm_xor (rkeylf, iv2); + iv3 = asm_xor (rkeylf, iv3); + iv4 = asm_xor (rkeylf, iv4); + iv5 = asm_xor (rkeylf, iv5); + iv6 = asm_xor (rkeylf, iv6); + iv7 = asm_xor (rkeylf, iv7); + + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + b0 = asm_cipherlast_be (b0, iv0); + b1 = asm_cipherlast_be (b1, iv1); + b2 = asm_cipherlast_be (b2, iv2); + b3 = asm_cipherlast_be (b3, iv3); + b4 = asm_cipherlast_be (b4, iv4); + b5 = asm_cipherlast_be (b5, iv5); + b6 = asm_cipherlast_be (b6, iv6); + b7 = asm_cipherlast_be (b7, iv7); + + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); + out += 8; + } + + if (nblocks >= 4 && (data_nblocks % 4) == 0) + { + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); + + ctr ^= b0 ^ b1 ^ b2 ^ b3; + + iv ^= rkey0; + + iv0 = iv ^ l0; + iv1 = iv ^ l0 ^ l1; + iv2 = iv ^ l1; + iv3 = iv ^ l1 ^ l; + + b0 ^= iv0; + b1 ^= iv1; + b2 ^= iv2; + b3 ^= iv3; + iv = iv3 ^ rkey0; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + rkey = rkeylast ^ rkey0; + b0 = asm_cipherlast_be (b0, rkey ^ iv0); + b1 = asm_cipherlast_be (b1, rkey ^ iv1); + b2 = asm_cipherlast_be (b2, rkey ^ iv2); + b3 = asm_cipherlast_be (b3, rkey ^ iv3); + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + + in += 4; + out += 4; + nblocks -= 4; + } + + for (; nblocks; nblocks--) + { + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + iv ^= l; + /* Checksum_i = Checksum_{i-1} xor P_i */ + ctr ^= b; + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + b ^= iv; + AES_ENCRYPT (b, rounds); + b ^= iv; + + VEC_STORE_BE (out, 0, b, bige_const); + + in += 1; + out += 1; + } + } + else + { + const u128_t *rk = (u128_t *)&ctx->keyschdec; + + if (!ctx->decryption_prepared) + { + internal_aes_ppc_prepare_decryption (ctx); + ctx->decryption_prepared = 1; + } + + PRELOAD_ROUND_KEYS (rounds); + + for (; nblocks >= 8 && data_nblocks % 8; nblocks--) + { + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + iv ^= l; + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + b ^= iv; + AES_DECRYPT (b, rounds); + b ^= iv; + /* Checksum_i = Checksum_{i-1} xor P_i */ + ctr ^= b; + + VEC_STORE_BE (out, 0, b, bige_const); + + in += 1; + out += 1; + } + + for (; nblocks >= 8; nblocks -= 8) + { + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0); + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + l = VEC_BE_SWAP(l, bige_const); + + iv ^= rkey0; + + iv0 = iv ^ l0; + iv1 = iv ^ l0 ^ l1; + iv2 = iv ^ l1; + iv3 = iv ^ l1 ^ l2; + iv4 = iv ^ l1 ^ l2 ^ l0; + iv5 = iv ^ l2 ^ l0; + iv6 = iv ^ l2; + iv7 = iv ^ l2 ^ l; + + b0 ^= iv0; + b1 ^= iv1; + b2 ^= iv2; + b3 ^= iv3; + b4 ^= iv4; + b5 ^= iv5; + b6 ^= iv6; + b7 ^= iv7; + iv = iv7 ^ rkey0; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); \ + b4 = asm_ncipher_be (b4, rkey); \ + b5 = asm_ncipher_be (b5, rkey); \ + b6 = asm_ncipher_be (b6, rkey); \ + b7 = asm_ncipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + + DO_ROUND(8); + + iv0 = asm_xor (rkeylf, iv0); + iv1 = asm_xor (rkeylf, iv1); + iv2 = asm_xor (rkeylf, iv2); + iv3 = asm_xor (rkeylf, iv3); + iv4 = asm_xor (rkeylf, iv4); + iv5 = asm_xor (rkeylf, iv5); + iv6 = asm_xor (rkeylf, iv6); + iv7 = asm_xor (rkeylf, iv7); + + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + b0 = asm_ncipherlast_be (b0, iv0); + b1 = asm_ncipherlast_be (b1, iv1); + b2 = asm_ncipherlast_be (b2, iv2); + b3 = asm_ncipherlast_be (b3, iv3); + b4 = asm_ncipherlast_be (b4, iv4); + b5 = asm_ncipherlast_be (b5, iv5); + b6 = asm_ncipherlast_be (b6, iv6); + b7 = asm_ncipherlast_be (b7, iv7); + + ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; + + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); + out += 8; + } + + if (nblocks >= 4 && (data_nblocks % 4) == 0) + { + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); + + iv ^= rkey0; + + iv0 = iv ^ l0; + iv1 = iv ^ l0 ^ l1; + iv2 = iv ^ l1; + iv3 = iv ^ l1 ^ l; + + b0 ^= iv0; + b1 ^= iv1; + b2 ^= iv2; + b3 ^= iv3; + iv = iv3 ^ rkey0; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + rkey = rkeylast ^ rkey0; + b0 = asm_ncipherlast_be (b0, rkey ^ iv0); + b1 = asm_ncipherlast_be (b1, rkey ^ iv1); + b2 = asm_ncipherlast_be (b2, rkey ^ iv2); + b3 = asm_ncipherlast_be (b3, rkey ^ iv3); + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + + ctr ^= b0 ^ b1 ^ b2 ^ b3; + + in += 4; + out += 4; + nblocks -= 4; + } + + for (; nblocks; nblocks--) + { + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + iv ^= l; + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + b ^= iv; + AES_DECRYPT (b, rounds); + b ^= iv; + /* Checksum_i = Checksum_{i-1} xor P_i */ + ctr ^= b; + + VEC_STORE_BE (out, 0, b, bige_const); + + in += 1; + out += 1; + } + } + + VEC_STORE_BE (c->u_iv.iv, 0, iv, bige_const); + VEC_STORE_BE (c->u_ctr.ctr, 0, ctr, bige_const); + c->u_mode.ocb.data_nblocks = data_nblocks; + + return 0; +} + +size_t OCB_AUTH_FUNC (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks) +{ + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = (void *)&c->context.c; + const u128_t *rk = (u128_t *)&ctx->keyschenc; + const u128_t *abuf = (const u128_t *)abuf_arg; + int rounds = ctx->rounds; + u64 data_nblocks = c->u_mode.ocb.aad_nblocks; + block l0, l1, l2, l; + block b0, b1, b2, b3, b4, b5, b6, b7, b; + block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; + block rkey, frkey; + block ctr, iv; + ROUND_KEY_VARIABLES; + + iv = VEC_LOAD_BE (c->u_mode.ocb.aad_offset, 0, bige_const); + ctr = VEC_LOAD_BE (c->u_mode.ocb.aad_sum, 0, bige_const); + + l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const); + l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const); + l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const); + + PRELOAD_ROUND_KEYS (rounds); + + for (; nblocks >= 8 && data_nblocks % 8; nblocks--) + { + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (abuf, 0, bige_const); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + iv ^= l; + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + b ^= iv; + AES_ENCRYPT (b, rounds); + ctr ^= b; + + abuf += 1; + } + + for (; nblocks >= 8; nblocks -= 8) + { + b0 = VEC_LOAD_BE (abuf, 0, bige_const); + b1 = VEC_LOAD_BE (abuf, 1, bige_const); + b2 = VEC_LOAD_BE (abuf, 2, bige_const); + b3 = VEC_LOAD_BE (abuf, 3, bige_const); + b4 = VEC_LOAD_BE (abuf, 4, bige_const); + b5 = VEC_LOAD_BE (abuf, 5, bige_const); + b6 = VEC_LOAD_BE (abuf, 6, bige_const); + b7 = VEC_LOAD_BE (abuf, 7, bige_const); + + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), 0, bige_const); + + frkey = rkey0; + iv ^= frkey; + + iv0 = iv ^ l0; + iv1 = iv ^ l0 ^ l1; + iv2 = iv ^ l1; + iv3 = iv ^ l1 ^ l2; + iv4 = iv ^ l1 ^ l2 ^ l0; + iv5 = iv ^ l2 ^ l0; + iv6 = iv ^ l2; + iv7 = iv ^ l2 ^ l; + + b0 ^= iv0; + b1 ^= iv1; + b2 ^= iv2; + b3 ^= iv3; + b4 ^= iv4; + b5 ^= iv5; + b6 ^= iv6; + b7 ^= iv7; + iv = iv7 ^ frkey; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + rkey = rkeylast; + b0 = asm_cipherlast_be (b0, rkey); + b1 = asm_cipherlast_be (b1, rkey); + b2 = asm_cipherlast_be (b2, rkey); + b3 = asm_cipherlast_be (b3, rkey); + b4 = asm_cipherlast_be (b4, rkey); + b5 = asm_cipherlast_be (b5, rkey); + b6 = asm_cipherlast_be (b6, rkey); + b7 = asm_cipherlast_be (b7, rkey); + + ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; + + abuf += 8; + } + + if (nblocks >= 4 && (data_nblocks % 4) == 0) + { + b0 = VEC_LOAD_BE (abuf, 0, bige_const); + b1 = VEC_LOAD_BE (abuf, 1, bige_const); + b2 = VEC_LOAD_BE (abuf, 2, bige_const); + b3 = VEC_LOAD_BE (abuf, 3, bige_const); + + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); + + frkey = rkey0; + iv ^= frkey; + + iv0 = iv ^ l0; + iv1 = iv ^ l0 ^ l1; + iv2 = iv ^ l1; + iv3 = iv ^ l1 ^ l; + + b0 ^= iv0; + b1 ^= iv1; + b2 ^= iv2; + b3 ^= iv3; + iv = iv3 ^ frkey; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + rkey = rkeylast; + b0 = asm_cipherlast_be (b0, rkey); + b1 = asm_cipherlast_be (b1, rkey); + b2 = asm_cipherlast_be (b2, rkey); + b3 = asm_cipherlast_be (b3, rkey); + + ctr ^= b0 ^ b1 ^ b2 ^ b3; + + abuf += 4; + nblocks -= 4; + } + + for (; nblocks; nblocks--) + { + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (abuf, 0, bige_const); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + iv ^= l; + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + b ^= iv; + AES_ENCRYPT (b, rounds); + ctr ^= b; + + abuf += 1; + } + + VEC_STORE_BE (c->u_mode.ocb.aad_offset, 0, iv, bige_const); + VEC_STORE_BE (c->u_mode.ocb.aad_sum, 0, ctr, bige_const); + c->u_mode.ocb.aad_nblocks = data_nblocks; + + return 0; +} + + +void XTS_CRYPT_FUNC (void *context, unsigned char *tweak_arg, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int encrypt) +{ +#ifdef WORDS_BIGENDIAN + static const block vec_bswap128_const = + { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; +#else + static const block vec_bswap128_const = + { ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8, ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0 }; +#endif + static const unsigned char vec_tweak_const[16] = + { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0x87 }; + static const vector unsigned long long vec_shift63_const = + { 63, 63 }; + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = context; + const u128_t *in = (const u128_t *)inbuf_arg; + u128_t *out = (u128_t *)outbuf_arg; + int rounds = ctx->rounds; + block tweak; + block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey, rkeylf; + block tweak0, tweak1, tweak2, tweak3, tweak4, tweak5, tweak6, tweak7; + block tweak_const, bswap128_const, shift63_const; + ROUND_KEY_VARIABLES; + + tweak_const = VEC_LOAD_BE (&vec_tweak_const, 0, bige_const); + bswap128_const = ALIGNED_LOAD (&vec_bswap128_const, 0); + shift63_const = ALIGNED_LOAD (&vec_shift63_const, 0); + + tweak = VEC_LOAD_BE (tweak_arg, 0, bige_const); + tweak = asm_vperm1 (tweak, bswap128_const); + +#define GEN_TWEAK(tout, tin) /* Generate next tweak. */ \ + do { \ + block tmp1, tmp2; \ + tmp1 = asm_swap_uint64_halfs(tin); \ + tmp2 = asm_add_uint64(tin, tin); \ + tmp1 = asm_sra_int64(tmp1, shift63_const) & tweak_const; \ + tout = asm_xor(tmp1, tmp2); \ + } while (0) + + if (encrypt) + { + const u128_t *rk = (u128_t *)&ctx->keyschenc; + + PRELOAD_ROUND_KEYS (rounds); + + for (; nblocks >= 8; nblocks -= 8) + { + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + tweak0 = asm_vperm1 (tweak0, bswap128_const); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + GEN_TWEAK (tweak2, tweak1); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + GEN_TWEAK (tweak3, tweak2); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + GEN_TWEAK (tweak4, tweak3); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + GEN_TWEAK (tweak5, tweak4); + tweak4 = asm_vperm1 (tweak4, bswap128_const); + GEN_TWEAK (tweak6, tweak5); + tweak5 = asm_vperm1 (tweak5, bswap128_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + GEN_TWEAK (tweak7, tweak6); + tweak6 = asm_vperm1 (tweak6, bswap128_const); + GEN_TWEAK (tweak, tweak7); + tweak7 = asm_vperm1 (tweak7, bswap128_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + + tweak0 = asm_xor (tweak0, rkey0); + tweak1 = asm_xor (tweak1, rkey0); + tweak2 = asm_xor (tweak2, rkey0); + tweak3 = asm_xor (tweak3, rkey0); + tweak4 = asm_xor (tweak4, rkey0); + tweak5 = asm_xor (tweak5, rkey0); + tweak6 = asm_xor (tweak6, rkey0); + tweak7 = asm_xor (tweak7, rkey0); + + b0 = asm_xor (b0, tweak0); + b1 = asm_xor (b1, tweak1); + b2 = asm_xor (b2, tweak2); + b3 = asm_xor (b3, tweak3); + b4 = asm_xor (b4, tweak4); + b5 = asm_xor (b5, tweak5); + b6 = asm_xor (b6, tweak6); + b7 = asm_xor (b7, tweak7); + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + + DO_ROUND(8); + + tweak0 = asm_xor (tweak0, rkeylf); + tweak1 = asm_xor (tweak1, rkeylf); + tweak2 = asm_xor (tweak2, rkeylf); + tweak3 = asm_xor (tweak3, rkeylf); + tweak4 = asm_xor (tweak4, rkeylf); + tweak5 = asm_xor (tweak5, rkeylf); + tweak6 = asm_xor (tweak6, rkeylf); + tweak7 = asm_xor (tweak7, rkeylf); + + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + b0 = asm_cipherlast_be (b0, tweak0); + b1 = asm_cipherlast_be (b1, tweak1); + b2 = asm_cipherlast_be (b2, tweak2); + b3 = asm_cipherlast_be (b3, tweak3); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_cipherlast_be (b4, tweak4); + b5 = asm_cipherlast_be (b5, tweak5); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_cipherlast_be (b6, tweak6); + b7 = asm_cipherlast_be (b7, tweak7); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); + out += 8; + } + + if (nblocks >= 4) + { + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + GEN_TWEAK (tweak2, tweak1); + GEN_TWEAK (tweak3, tweak2); + GEN_TWEAK (tweak, tweak3); + + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + + tweak0 = asm_vperm1 (tweak0, bswap128_const); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + + b0 ^= tweak0 ^ rkey0; + b1 ^= tweak1 ^ rkey0; + b2 ^= tweak2 ^ rkey0; + b3 ^= tweak3 ^ rkey0; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + rkey = rkeylast; + b0 = asm_cipherlast_be (b0, rkey ^ tweak0); + b1 = asm_cipherlast_be (b1, rkey ^ tweak1); + b2 = asm_cipherlast_be (b2, rkey ^ tweak2); + b3 = asm_cipherlast_be (b3, rkey ^ tweak3); + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + + in += 4; + out += 4; + nblocks -= 4; + } + + for (; nblocks; nblocks--) + { + tweak0 = asm_vperm1 (tweak, bswap128_const); + + /* Xor-Encrypt/Decrypt-Xor block. */ + b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0; + + /* Generate next tweak. */ + GEN_TWEAK (tweak, tweak); + + AES_ENCRYPT (b, rounds); + + b ^= tweak0; + VEC_STORE_BE (out, 0, b, bige_const); + + in++; + out++; + } + } + else + { + const u128_t *rk = (u128_t *)&ctx->keyschdec; + + if (!ctx->decryption_prepared) + { + internal_aes_ppc_prepare_decryption (ctx); + ctx->decryption_prepared = 1; + } + + PRELOAD_ROUND_KEYS (rounds); + + for (; nblocks >= 8; nblocks -= 8) + { + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + tweak0 = asm_vperm1 (tweak0, bswap128_const); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + GEN_TWEAK (tweak2, tweak1); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + GEN_TWEAK (tweak3, tweak2); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + GEN_TWEAK (tweak4, tweak3); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + GEN_TWEAK (tweak5, tweak4); + tweak4 = asm_vperm1 (tweak4, bswap128_const); + GEN_TWEAK (tweak6, tweak5); + tweak5 = asm_vperm1 (tweak5, bswap128_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + GEN_TWEAK (tweak7, tweak6); + tweak6 = asm_vperm1 (tweak6, bswap128_const); + GEN_TWEAK (tweak, tweak7); + tweak7 = asm_vperm1 (tweak7, bswap128_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + + tweak0 = asm_xor (tweak0, rkey0); + tweak1 = asm_xor (tweak1, rkey0); + tweak2 = asm_xor (tweak2, rkey0); + tweak3 = asm_xor (tweak3, rkey0); + tweak4 = asm_xor (tweak4, rkey0); + tweak5 = asm_xor (tweak5, rkey0); + tweak6 = asm_xor (tweak6, rkey0); + tweak7 = asm_xor (tweak7, rkey0); + + b0 = asm_xor (b0, tweak0); + b1 = asm_xor (b1, tweak1); + b2 = asm_xor (b2, tweak2); + b3 = asm_xor (b3, tweak3); + b4 = asm_xor (b4, tweak4); + b5 = asm_xor (b5, tweak5); + b6 = asm_xor (b6, tweak6); + b7 = asm_xor (b7, tweak7); + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); \ + b4 = asm_ncipher_be (b4, rkey); \ + b5 = asm_ncipher_be (b5, rkey); \ + b6 = asm_ncipher_be (b6, rkey); \ + b7 = asm_ncipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + + DO_ROUND(8); + + tweak0 = asm_xor (tweak0, rkeylf); + tweak1 = asm_xor (tweak1, rkeylf); + tweak2 = asm_xor (tweak2, rkeylf); + tweak3 = asm_xor (tweak3, rkeylf); + tweak4 = asm_xor (tweak4, rkeylf); + tweak5 = asm_xor (tweak5, rkeylf); + tweak6 = asm_xor (tweak6, rkeylf); + tweak7 = asm_xor (tweak7, rkeylf); + + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + b0 = asm_ncipherlast_be (b0, tweak0); + b1 = asm_ncipherlast_be (b1, tweak1); + b2 = asm_ncipherlast_be (b2, tweak2); + b3 = asm_ncipherlast_be (b3, tweak3); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_ncipherlast_be (b4, tweak4); + b5 = asm_ncipherlast_be (b5, tweak5); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_ncipherlast_be (b6, tweak6); + b7 = asm_ncipherlast_be (b7, tweak7); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); + out += 8; + } + + if (nblocks >= 4) + { + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + GEN_TWEAK (tweak2, tweak1); + GEN_TWEAK (tweak3, tweak2); + GEN_TWEAK (tweak, tweak3); + + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + + tweak0 = asm_vperm1 (tweak0, bswap128_const); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + + b0 ^= tweak0 ^ rkey0; + b1 ^= tweak1 ^ rkey0; + b2 ^= tweak2 ^ rkey0; + b3 ^= tweak3 ^ rkey0; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + rkey = rkeylast; + b0 = asm_ncipherlast_be (b0, rkey ^ tweak0); + b1 = asm_ncipherlast_be (b1, rkey ^ tweak1); + b2 = asm_ncipherlast_be (b2, rkey ^ tweak2); + b3 = asm_ncipherlast_be (b3, rkey ^ tweak3); + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + + in += 4; + out += 4; + nblocks -= 4; + } + + for (; nblocks; nblocks--) + { + tweak0 = asm_vperm1 (tweak, bswap128_const); + + /* Xor-Encrypt/Decrypt-Xor block. */ + b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0; + + /* Generate next tweak. */ + GEN_TWEAK (tweak, tweak); + + AES_DECRYPT (b, rounds); + + b ^= tweak0; + VEC_STORE_BE (out, 0, b, bige_const); + + in++; + out++; + } + } + + tweak = asm_vperm1 (tweak, bswap128_const); + VEC_STORE_BE (tweak_arg, 0, tweak, bige_const); + +#undef GEN_TWEAK +} diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c new file mode 100644 index 0000000..f5c3236 --- /dev/null +++ b/cipher/rijndael-ppc.c @@ -0,0 +1,259 @@ +/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation + * Copyright (C) 2019 Shawn Landden + * Copyright (C) 2019-2020 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + * + * Alternatively, this code may be used in OpenSSL from The OpenSSL Project, + * and Cryptogams by Andy Polyakov, and if made part of a release of either + * or both projects, is thereafter dual-licensed under the license said project + * is released under. + */ + +#include + +#include "rijndael-internal.h" +#include "cipher-internal.h" +#include "bufhelp.h" + +#ifdef USE_PPC_CRYPTO + +#include "rijndael-ppc-common.h" + + +#ifdef WORDS_BIGENDIAN +static const block vec_bswap32_const = + { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; +#else +static const block vec_bswap32_const_neg = + { ~3, ~2, ~1, ~0, ~7, ~6, ~5, ~4, ~11, ~10, ~9, ~8, ~15, ~14, ~13, ~12 }; +#endif + + +static ASM_FUNC_ATTR_INLINE block +asm_load_be_const(void) +{ +#ifndef WORDS_BIGENDIAN + return ALIGNED_LOAD (&vec_bswap32_const_neg, 0); +#else + static const block vec_dummy = { 0 }; + return vec_dummy; +#endif +} + +static ASM_FUNC_ATTR_INLINE block +asm_be_swap(block vec, block be_bswap_const) +{ + (void)be_bswap_const; +#ifndef WORDS_BIGENDIAN + return asm_vperm1 (vec, be_bswap_const); +#else + return vec; +#endif +} + +static ASM_FUNC_ATTR_INLINE block +asm_load_be_noswap(unsigned long offset, const void *ptr) +{ + block vec; +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("lxvw4x %x0,0,%1\n\t" + : "=wa" (vec) + : "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("lxvw4x %x0,%1,%2\n\t" + : "=wa" (vec) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); + /* NOTE: vec needs to be be-swapped using 'asm_be_swap' by caller */ + return vec; +} + +static ASM_FUNC_ATTR_INLINE void +asm_store_be_noswap(block vec, unsigned long offset, void *ptr) +{ + /* NOTE: vec be-swapped using 'asm_be_swap' by caller */ +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("stxvw4x %x0,0,%1\n\t" + : + : "wa" (vec), "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("stxvw4x %x0,%1,%2\n\t" + : + : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); +} + + +static ASM_FUNC_ATTR_INLINE u32 +_gcry_aes_sbox4_ppc8(u32 fourbytes) +{ + union + { + PROPERLY_ALIGNED_TYPE dummy; + block data_vec; + u32 data32[4]; + } u; + + u.data32[0] = fourbytes; + u.data_vec = vec_sbox_be(u.data_vec); + return u.data32[0]; +} + +void +_gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key) +{ + const block bige_const = asm_load_be_const(); + union + { + PROPERLY_ALIGNED_TYPE dummy; + byte data[MAXKC][4]; + u32 data32[MAXKC]; + } tkk[2]; + unsigned int rounds = ctx->rounds; + int KC = rounds - 6; + unsigned int keylen = KC * 4; + u128_t *ekey = (u128_t *)(void *)ctx->keyschenc; + unsigned int i, r, t; + byte rcon = 1; + int j; +#define k tkk[0].data +#define k_u32 tkk[0].data32 +#define tk tkk[1].data +#define tk_u32 tkk[1].data32 +#define W (ctx->keyschenc) +#define W_u32 (ctx->keyschenc32) + + for (i = 0; i < keylen; i++) + { + k[i >> 2][i & 3] = key[i]; + } + + for (j = KC-1; j >= 0; j--) + { + tk_u32[j] = k_u32[j]; + } + r = 0; + t = 0; + /* Copy values into round key array. */ + for (j = 0; (j < KC) && (r < rounds + 1); ) + { + for (; (j < KC) && (t < 4); j++, t++) + { + W_u32[r][t] = le_bswap32(tk_u32[j]); + } + if (t == 4) + { + r++; + t = 0; + } + } + while (r < rounds + 1) + { + tk_u32[0] ^= + le_bswap32( + _gcry_aes_sbox4_ppc8(rol(le_bswap32(tk_u32[KC - 1]), 24)) ^ rcon); + + if (KC != 8) + { + for (j = 1; j < KC; j++) + { + tk_u32[j] ^= tk_u32[j-1]; + } + } + else + { + for (j = 1; j < KC/2; j++) + { + tk_u32[j] ^= tk_u32[j-1]; + } + + tk_u32[KC/2] ^= + le_bswap32(_gcry_aes_sbox4_ppc8(le_bswap32(tk_u32[KC/2 - 1]))); + + for (j = KC/2 + 1; j < KC; j++) + { + tk_u32[j] ^= tk_u32[j-1]; + } + } + + /* Copy values into round key array. */ + for (j = 0; (j < KC) && (r < rounds + 1); ) + { + for (; (j < KC) && (t < 4); j++, t++) + { + W_u32[r][t] = le_bswap32(tk_u32[j]); + } + if (t == 4) + { + r++; + t = 0; + } + } + + rcon = (rcon << 1) ^ (-(rcon >> 7) & 0x1b); + } + + /* Store in big-endian order. */ + for (r = 0; r <= rounds; r++) + { +#ifndef WORDS_BIGENDIAN + VEC_STORE_BE(ekey, r, ALIGNED_LOAD (ekey, r), bige_const); +#else + block rvec = ALIGNED_LOAD (ekey, r); + ALIGNED_STORE (ekey, r, + vec_perm(rvec, rvec, vec_bswap32_const)); + (void)bige_const; +#endif + } + +#undef W +#undef tk +#undef k +#undef W_u32 +#undef tk_u32 +#undef k_u32 + wipememory(&tkk, sizeof(tkk)); +} + +void +_gcry_aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx) +{ + internal_aes_ppc_prepare_decryption (ctx); +} + + +#define GCRY_AES_PPC8 1 +#define ENCRYPT_BLOCK_FUNC _gcry_aes_ppc8_encrypt +#define DECRYPT_BLOCK_FUNC _gcry_aes_ppc8_decrypt +#define CFB_ENC_FUNC _gcry_aes_ppc8_cfb_enc +#define CFB_DEC_FUNC _gcry_aes_ppc8_cfb_dec +#define CBC_ENC_FUNC _gcry_aes_ppc8_cbc_enc +#define CBC_DEC_FUNC _gcry_aes_ppc8_cbc_dec +#define CTR_ENC_FUNC _gcry_aes_ppc8_ctr_enc +#define OCB_CRYPT_FUNC _gcry_aes_ppc8_ocb_crypt +#define OCB_AUTH_FUNC _gcry_aes_ppc8_ocb_auth +#define XTS_CRYPT_FUNC _gcry_aes_ppc8_xts_crypt + +#include + +#endif /* USE_PPC_CRYPTO */ diff --git a/cipher/rijndael-ppc9le.c b/cipher/rijndael-ppc9le.c new file mode 100644 index 0000000..facdedd --- /dev/null +++ b/cipher/rijndael-ppc9le.c @@ -0,0 +1,102 @@ +/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation + * Copyright (C) 2019 Shawn Landden + * Copyright (C) 2019-2020 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + * + * Alternatively, this code may be used in OpenSSL from The OpenSSL Project, + * and Cryptogams by Andy Polyakov, and if made part of a release of either + * or both projects, is thereafter dual-licensed under the license said project + * is released under. + */ + +#include + +#include "rijndael-internal.h" +#include "cipher-internal.h" +#include "bufhelp.h" + +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + +#include "rijndael-ppc-common.h" + + +static ASM_FUNC_ATTR_INLINE block +asm_load_be_const(void) +{ + static const block vec_dummy = { 0 }; + return vec_dummy; +} + +static ASM_FUNC_ATTR_INLINE block +asm_be_swap(block vec, block be_bswap_const) +{ + (void)be_bswap_const; + return vec; +} + +static ASM_FUNC_ATTR_INLINE block +asm_load_be_noswap(unsigned long offset, const void *ptr) +{ + block vec; +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("lxvb16x %x0,0,%1\n\t" + : "=wa" (vec) + : "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("lxvb16x %x0,%1,%2\n\t" + : "=wa" (vec) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); + return vec; +} + +static ASM_FUNC_ATTR_INLINE void +asm_store_be_noswap(block vec, unsigned long offset, void *ptr) +{ +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("stxvb16x %x0,0,%1\n\t" + : + : "wa" (vec), "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("stxvb16x %x0,%1,%2\n\t" + : + : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); +} + + +#define GCRY_AES_PPC9LE 1 +#define ENCRYPT_BLOCK_FUNC _gcry_aes_ppc9le_encrypt +#define DECRYPT_BLOCK_FUNC _gcry_aes_ppc9le_decrypt +#define CFB_ENC_FUNC _gcry_aes_ppc9le_cfb_enc +#define CFB_DEC_FUNC _gcry_aes_ppc9le_cfb_dec +#define CBC_ENC_FUNC _gcry_aes_ppc9le_cbc_enc +#define CBC_DEC_FUNC _gcry_aes_ppc9le_cbc_dec +#define CTR_ENC_FUNC _gcry_aes_ppc9le_ctr_enc +#define OCB_CRYPT_FUNC _gcry_aes_ppc9le_ocb_crypt +#define OCB_AUTH_FUNC _gcry_aes_ppc9le_ocb_auth +#define XTS_CRYPT_FUNC _gcry_aes_ppc9le_xts_crypt + +#include + +#endif /* USE_PPC_CRYPTO */ diff --git a/cipher/rijndael-ssse3-amd64.c b/cipher/rijndael-ssse3-amd64.c index da5339e..eefccc2 100644 --- a/cipher/rijndael-ssse3-amd64.c +++ b/cipher/rijndael-ssse3-amd64.c @@ -208,11 +208,11 @@ _gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key) /* Make a decryption key from an encryption key. */ -void -_gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx) +static inline void +do_ssse3_prepare_decryption (RIJNDAEL_context *ctx, + byte ssse3_state[SSSE3_STATE_SIZE]) { unsigned int keybits = (ctx->rounds - 10) * 32 + 128; - byte ssse3_state[SSSE3_STATE_SIZE]; vpaes_ssse3_prepare(); @@ -237,6 +237,14 @@ _gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx) vpaes_ssse3_cleanup(); } +void +_gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx) +{ + byte ssse3_state[SSSE3_STATE_SIZE]; + + do_ssse3_prepare_decryption(ctx, ssse3_state); +} + /* Encrypt one block using the Intel SSSE3 instructions. Block is input * and output through SSE register xmm0. */ @@ -295,9 +303,9 @@ _gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, void -_gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, - const unsigned char *inbuf, unsigned char *iv, - size_t nblocks) +_gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, unsigned char *iv, + unsigned char *outbuf, const unsigned char *inbuf, + size_t nblocks) { unsigned int nrounds = ctx->rounds; byte ssse3_state[SSSE3_STATE_SIZE]; @@ -334,9 +342,9 @@ _gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, void -_gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, - const unsigned char *inbuf, unsigned char *iv, - size_t nblocks, int cbc_mac) +_gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv, + unsigned char *outbuf, const unsigned char *inbuf, + size_t nblocks, int cbc_mac) { unsigned int nrounds = ctx->rounds; byte ssse3_state[SSSE3_STATE_SIZE]; @@ -379,9 +387,9 @@ _gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, void -_gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, - const unsigned char *inbuf, unsigned char *ctr, - size_t nblocks) +_gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr, + unsigned char *outbuf, const unsigned char *inbuf, + size_t nblocks) { static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; @@ -447,7 +455,7 @@ _gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, unsigned int _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, - const unsigned char *src) + const unsigned char *src) { unsigned int nrounds = ctx->rounds; byte ssse3_state[SSSE3_STATE_SIZE]; @@ -468,9 +476,9 @@ _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, void -_gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, - const unsigned char *inbuf, unsigned char *iv, - size_t nblocks) +_gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv, + unsigned char *outbuf, const unsigned char *inbuf, + size_t nblocks) { unsigned int nrounds = ctx->rounds; byte ssse3_state[SSSE3_STATE_SIZE]; @@ -508,13 +516,19 @@ _gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, void -_gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, - const unsigned char *inbuf, unsigned char *iv, - size_t nblocks) +_gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv, + unsigned char *outbuf, const unsigned char *inbuf, + size_t nblocks) { unsigned int nrounds = ctx->rounds; byte ssse3_state[SSSE3_STATE_SIZE]; + if ( !ctx->decryption_prepared ) + { + do_ssse3_prepare_decryption ( ctx, ssse3_state ); + ctx->decryption_prepared = 1; + } + vpaes_ssse3_prepare_dec (); asm volatile ("movdqu %[iv], %%xmm7\n\t" /* use xmm7 as fast IV storage */ @@ -626,6 +640,12 @@ ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, unsigned int nrounds = ctx->rounds; byte ssse3_state[SSSE3_STATE_SIZE]; + if ( !ctx->decryption_prepared ) + { + do_ssse3_prepare_decryption ( ctx, ssse3_state ); + ctx->decryption_prepared = 1; + } + vpaes_ssse3_prepare_dec (); /* Preload Offset and Checksum */ @@ -679,7 +699,7 @@ ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, } -void +size_t _gcry_aes_ssse3_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { @@ -687,10 +707,12 @@ _gcry_aes_ssse3_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg, ssse3_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks); else ssse3_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks); + + return 0; } -void +size_t _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) { @@ -746,6 +768,8 @@ _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, : "memory" ); vpaes_ssse3_cleanup (); + + return 0; } #endif /* USE_SSSE3 */ diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 8637195..7899177 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -77,32 +77,29 @@ extern unsigned int _gcry_aes_aesni_encrypt (const RIJNDAEL_context *ctx, extern unsigned int _gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); -extern void _gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx, - unsigned char *outbuf, - const unsigned char *inbuf, - unsigned char *iv, size_t nblocks); -extern void _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, - unsigned char *outbuf, - const unsigned char *inbuf, - unsigned char *iv, size_t nblocks, - int cbc_mac); -extern void _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, - unsigned char *outbuf, - const unsigned char *inbuf, - unsigned char *ctr, size_t nblocks); -extern void _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, - unsigned char *outbuf, - const unsigned char *inbuf, - unsigned char *iv, size_t nblocks); -extern void _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, - unsigned char *outbuf, - const unsigned char *inbuf, - unsigned char *iv, size_t nblocks); -extern void _gcry_aes_aesni_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, - const void *inbuf_arg, size_t nblocks, - int encrypt); -extern void _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, - size_t nblocks); +extern void _gcry_aes_aesni_cfb_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_aesni_cbc_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int cbc_mac); +extern void _gcry_aes_aesni_ctr_enc (void *context, unsigned char *ctr, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_aesni_cfb_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_aesni_cbc_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern size_t _gcry_aes_aesni_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +extern size_t _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks); +extern void _gcry_aes_aesni_xts_crypt (void *context, unsigned char *tweak, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int encrypt); #endif #ifdef USE_SSSE3 @@ -116,32 +113,27 @@ extern unsigned int _gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx, extern unsigned int _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); -extern void _gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, - unsigned char *outbuf, - const unsigned char *inbuf, - unsigned char *iv, size_t nblocks); -extern void _gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, - unsigned char *outbuf, - const unsigned char *inbuf, - unsigned char *iv, size_t nblocks, +extern void _gcry_aes_ssse3_cfb_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_ssse3_cbc_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int cbc_mac); -extern void _gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, - unsigned char *outbuf, - const unsigned char *inbuf, - unsigned char *ctr, size_t nblocks); -extern void _gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, - unsigned char *outbuf, - const unsigned char *inbuf, - unsigned char *iv, size_t nblocks); -extern void _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, - unsigned char *outbuf, - const unsigned char *inbuf, - unsigned char *iv, size_t nblocks); -extern void _gcry_aes_ssse3_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, - const void *inbuf_arg, size_t nblocks, - int encrypt); -extern void _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, - size_t nblocks); +extern void _gcry_aes_ssse3_ctr_enc (void *context, unsigned char *ctr, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_ssse3_cfb_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_ssse3_cbc_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern size_t _gcry_aes_ssse3_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +extern size_t _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks); #endif #ifdef USE_PADLOCK @@ -180,34 +172,110 @@ extern unsigned int _gcry_aes_armv8_ce_decrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); -extern void _gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx, - unsigned char *outbuf, - const unsigned char *inbuf, - unsigned char *iv, size_t nblocks); -extern void _gcry_aes_armv8_ce_cbc_enc (RIJNDAEL_context *ctx, - unsigned char *outbuf, - const unsigned char *inbuf, - unsigned char *iv, size_t nblocks, +extern void _gcry_aes_armv8_ce_cfb_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_armv8_ce_cbc_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int cbc_mac); -extern void _gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx, - unsigned char *outbuf, - const unsigned char *inbuf, - unsigned char *ctr, size_t nblocks); -extern void _gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx, - unsigned char *outbuf, - const unsigned char *inbuf, - unsigned char *iv, size_t nblocks); -extern void _gcry_aes_armv8_ce_cbc_dec (RIJNDAEL_context *ctx, - unsigned char *outbuf, - const unsigned char *inbuf, - unsigned char *iv, size_t nblocks); -extern void _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, - const void *inbuf_arg, size_t nblocks, - int encrypt); -extern void _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, - const void *abuf_arg, size_t nblocks); +extern void _gcry_aes_armv8_ce_ctr_enc (void *context, unsigned char *ctr, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_armv8_ce_cfb_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_armv8_ce_cbc_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern size_t _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +extern size_t _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, + const void *abuf_arg, size_t nblocks); +extern void _gcry_aes_armv8_ce_xts_crypt (void *context, unsigned char *tweak, + void *outbuf_arg, + const void *inbuf_arg, + size_t nblocks, int encrypt); #endif /*USE_ARM_ASM*/ +#ifdef USE_PPC_CRYPTO +/* PowerPC Crypto implementations of AES */ +extern void _gcry_aes_ppc8_setkey(RIJNDAEL_context *ctx, const byte *key); +extern void _gcry_aes_ppc8_prepare_decryption(RIJNDAEL_context *ctx); + +extern unsigned int _gcry_aes_ppc8_encrypt(const RIJNDAEL_context *ctx, + unsigned char *dst, + const unsigned char *src); +extern unsigned int _gcry_aes_ppc8_decrypt(const RIJNDAEL_context *ctx, + unsigned char *dst, + const unsigned char *src); + +extern void _gcry_aes_ppc8_cfb_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_ppc8_cbc_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int cbc_mac); +extern void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); + +extern size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +extern size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, + const void *abuf_arg, size_t nblocks); + +extern void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak, + void *outbuf_arg, + const void *inbuf_arg, + size_t nblocks, int encrypt); +#endif /*USE_PPC_CRYPTO*/ + +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE +/* Power9 little-endian crypto implementations of AES */ +extern unsigned int _gcry_aes_ppc9le_encrypt(const RIJNDAEL_context *ctx, + unsigned char *dst, + const unsigned char *src); +extern unsigned int _gcry_aes_ppc9le_decrypt(const RIJNDAEL_context *ctx, + unsigned char *dst, + const unsigned char *src); + +extern void _gcry_aes_ppc9le_cfb_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_ppc9le_cbc_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int cbc_mac); +extern void _gcry_aes_ppc9le_ctr_enc (void *context, unsigned char *ctr, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_ppc9le_cfb_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_ppc9le_cbc_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); + +extern size_t _gcry_aes_ppc9le_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +extern size_t _gcry_aes_ppc9le_ocb_auth (gcry_cipher_hd_t c, + const void *abuf_arg, size_t nblocks); + +extern void _gcry_aes_ppc9le_xts_crypt (void *context, unsigned char *tweak, + void *outbuf_arg, + const void *inbuf_arg, + size_t nblocks, int encrypt); +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ + static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, @@ -260,7 +328,8 @@ static void prefetch_dec(void) /* Perform the key setup. */ static gcry_err_code_t -do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) +do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen, + gcry_cipher_hd_t hd) { static int initialized = 0; static const char *selftest_failed = 0; @@ -268,7 +337,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) int i,j, r, t, rconpointer = 0; int KC; #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \ - || defined(USE_ARM_CE) + || defined(USE_ARM_CE) || defined(USE_PPC_CRYPTO) unsigned int hwfeatures; #endif @@ -310,7 +379,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) ctx->rounds = rounds; #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \ - || defined(USE_ARM_CE) + || defined(USE_ARM_CE) || defined(USE_PPC_CRYPTO) hwfeatures = _gcry_get_hw_features (); #endif @@ -327,6 +396,12 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) #ifdef USE_ARM_CE ctx->use_arm_ce = 0; #endif +#ifdef USE_PPC_CRYPTO + ctx->use_ppc_crypto = 0; +#endif +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + ctx->use_ppc9le_crypto = 0; +#endif if (0) { @@ -340,6 +415,17 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_aesni = 1; + if (hd) + { + hd->bulk.cfb_enc = _gcry_aes_aesni_cfb_enc; + hd->bulk.cfb_dec = _gcry_aes_aesni_cfb_dec; + hd->bulk.cbc_enc = _gcry_aes_aesni_cbc_enc; + hd->bulk.cbc_dec = _gcry_aes_aesni_cbc_dec; + hd->bulk.ctr_enc = _gcry_aes_aesni_ctr_enc; + hd->bulk.ocb_crypt = _gcry_aes_aesni_ocb_crypt; + hd->bulk.ocb_auth = _gcry_aes_aesni_ocb_auth; + hd->bulk.xts_crypt = _gcry_aes_aesni_xts_crypt; + } } #endif #ifdef USE_PADLOCK @@ -361,6 +447,16 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_ssse3 = 1; + if (hd) + { + hd->bulk.cfb_enc = _gcry_aes_ssse3_cfb_enc; + hd->bulk.cfb_dec = _gcry_aes_ssse3_cfb_dec; + hd->bulk.cbc_enc = _gcry_aes_ssse3_cbc_enc; + hd->bulk.cbc_dec = _gcry_aes_ssse3_cbc_dec; + hd->bulk.ctr_enc = _gcry_aes_ssse3_ctr_enc; + hd->bulk.ocb_crypt = _gcry_aes_ssse3_ocb_crypt; + hd->bulk.ocb_auth = _gcry_aes_ssse3_ocb_auth; + } } #endif #ifdef USE_ARM_CE @@ -371,6 +467,60 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_arm_ce = 1; + if (hd) + { + hd->bulk.cfb_enc = _gcry_aes_armv8_ce_cfb_enc; + hd->bulk.cfb_dec = _gcry_aes_armv8_ce_cfb_dec; + hd->bulk.cbc_enc = _gcry_aes_armv8_ce_cbc_enc; + hd->bulk.cbc_dec = _gcry_aes_armv8_ce_cbc_dec; + hd->bulk.ctr_enc = _gcry_aes_armv8_ce_ctr_enc; + hd->bulk.ocb_crypt = _gcry_aes_armv8_ce_ocb_crypt; + hd->bulk.ocb_auth = _gcry_aes_armv8_ce_ocb_auth; + hd->bulk.xts_crypt = _gcry_aes_armv8_ce_xts_crypt; + } + } +#endif +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if ((hwfeatures & HWF_PPC_VCRYPTO) && (hwfeatures & HWF_PPC_ARCH_3_00)) + { + ctx->encrypt_fn = _gcry_aes_ppc9le_encrypt; + ctx->decrypt_fn = _gcry_aes_ppc9le_decrypt; + ctx->prefetch_enc_fn = NULL; + ctx->prefetch_dec_fn = NULL; + ctx->use_ppc_crypto = 1; /* same key-setup as USE_PPC_CRYPTO */ + ctx->use_ppc9le_crypto = 1; + if (hd) + { + hd->bulk.cfb_enc = _gcry_aes_ppc9le_cfb_enc; + hd->bulk.cfb_dec = _gcry_aes_ppc9le_cfb_dec; + hd->bulk.cbc_enc = _gcry_aes_ppc9le_cbc_enc; + hd->bulk.cbc_dec = _gcry_aes_ppc9le_cbc_dec; + hd->bulk.ctr_enc = _gcry_aes_ppc9le_ctr_enc; + hd->bulk.ocb_crypt = _gcry_aes_ppc9le_ocb_crypt; + hd->bulk.ocb_auth = _gcry_aes_ppc9le_ocb_auth; + hd->bulk.xts_crypt = _gcry_aes_ppc9le_xts_crypt; + } + } +#endif +#ifdef USE_PPC_CRYPTO + else if (hwfeatures & HWF_PPC_VCRYPTO) + { + ctx->encrypt_fn = _gcry_aes_ppc8_encrypt; + ctx->decrypt_fn = _gcry_aes_ppc8_decrypt; + ctx->prefetch_enc_fn = NULL; + ctx->prefetch_dec_fn = NULL; + ctx->use_ppc_crypto = 1; + if (hd) + { + hd->bulk.cfb_enc = _gcry_aes_ppc8_cfb_enc; + hd->bulk.cfb_dec = _gcry_aes_ppc8_cfb_dec; + hd->bulk.cbc_enc = _gcry_aes_ppc8_cbc_enc; + hd->bulk.cbc_dec = _gcry_aes_ppc8_cbc_dec; + hd->bulk.ctr_enc = _gcry_aes_ppc8_ctr_enc; + hd->bulk.ocb_crypt = _gcry_aes_ppc8_ocb_crypt; + hd->bulk.ocb_auth = _gcry_aes_ppc8_ocb_auth; + hd->bulk.xts_crypt = _gcry_aes_ppc8_xts_crypt; + } } #endif else @@ -399,6 +549,10 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) else if (ctx->use_arm_ce) _gcry_aes_armv8_ce_setkey (ctx, key); #endif +#ifdef USE_PPC_CRYPTO + else if (ctx->use_ppc_crypto) + _gcry_aes_ppc8_setkey (ctx, key); +#endif else { const byte *sbox = ((const byte *)encT) + 1; @@ -503,10 +657,11 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) static gcry_err_code_t -rijndael_setkey (void *context, const byte *key, const unsigned keylen) +rijndael_setkey (void *context, const byte *key, const unsigned keylen, + gcry_cipher_hd_t hd) { RIJNDAEL_context *ctx = context; - return do_setkey (ctx, key, keylen); + return do_setkey (ctx, key, keylen, hd); } @@ -535,7 +690,19 @@ prepare_decryption( RIJNDAEL_context *ctx ) { _gcry_aes_armv8_ce_prepare_decryption (ctx); } -#endif /*USE_SSSE3*/ +#endif /*USE_ARM_CE*/ +#ifdef USE_ARM_CE + else if (ctx->use_arm_ce) + { + _gcry_aes_armv8_ce_prepare_decryption (ctx); + } +#endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO + else if (ctx->use_ppc_crypto) + { + _gcry_aes_ppc8_prepare_decryption (ctx); + } +#endif #ifdef USE_PADLOCK else if (ctx->use_padlock) { @@ -790,42 +957,56 @@ _gcry_aes_cfb_enc (void *context, unsigned char *iv, const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; - if (ctx->prefetch_enc_fn) - ctx->prefetch_enc_fn(); - if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { - _gcry_aes_aesni_cfb_enc (ctx, outbuf, inbuf, iv, nblocks); - burn_depth = 0; + _gcry_aes_aesni_cfb_enc (ctx, iv, outbuf, inbuf, nblocks); + return; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { - _gcry_aes_ssse3_cfb_enc (ctx, outbuf, inbuf, iv, nblocks); - burn_depth = 0; + _gcry_aes_ssse3_cfb_enc (ctx, iv, outbuf, inbuf, nblocks); + return; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { - _gcry_aes_armv8_ce_cfb_enc (ctx, outbuf, inbuf, iv, nblocks); - burn_depth = 0; + _gcry_aes_armv8_ce_cfb_enc (ctx, iv, outbuf, inbuf, nblocks); + return; } #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if (ctx->use_ppc9le_crypto) + { + _gcry_aes_ppc9le_cfb_enc (ctx, iv, outbuf, inbuf, nblocks); + return; + } +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ +#ifdef USE_PPC_CRYPTO + else if (ctx->use_ppc_crypto) + { + _gcry_aes_ppc8_cfb_enc (ctx, iv, outbuf, inbuf, nblocks); + return; + } +#endif /*USE_PPC_CRYPTO*/ else { rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + for ( ;nblocks; nblocks-- ) { /* Encrypt the IV. */ burn_depth = encrypt_fn (ctx, iv, iv); /* XOR the input with the IV and store input into IV. */ - buf_xor_2dst(outbuf, iv, inbuf, BLOCKSIZE); + cipher_block_xor_2dst(outbuf, iv, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } @@ -851,41 +1032,55 @@ _gcry_aes_cbc_enc (void *context, unsigned char *iv, unsigned char *last_iv; unsigned int burn_depth = 0; - if (ctx->prefetch_enc_fn) - ctx->prefetch_enc_fn(); - if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { - _gcry_aes_aesni_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac); - burn_depth = 0; + _gcry_aes_aesni_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac); + return; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { - _gcry_aes_ssse3_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac); - burn_depth = 0; + _gcry_aes_ssse3_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac); + return; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { - _gcry_aes_armv8_ce_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac); - burn_depth = 0; + _gcry_aes_armv8_ce_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac); + return; } #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if (ctx->use_ppc9le_crypto) + { + _gcry_aes_ppc9le_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac); + return; + } +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ +#ifdef USE_PPC_CRYPTO + else if (ctx->use_ppc_crypto) + { + _gcry_aes_ppc8_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac); + return; + } +#endif /*USE_PPC_CRYPTO*/ else { rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + last_iv = iv; for ( ;nblocks; nblocks-- ) { - buf_xor(outbuf, inbuf, last_iv, BLOCKSIZE); + cipher_block_xor(outbuf, inbuf, last_iv, BLOCKSIZE); burn_depth = encrypt_fn (ctx, outbuf, outbuf); @@ -896,7 +1091,7 @@ _gcry_aes_cbc_enc (void *context, unsigned char *iv, } if (last_iv != iv) - buf_cpy (iv, last_iv, BLOCKSIZE); + cipher_block_cpy (iv, last_iv, BLOCKSIZE); } if (burn_depth) @@ -920,43 +1115,57 @@ _gcry_aes_ctr_enc (void *context, unsigned char *ctr, unsigned int burn_depth = 0; int i; - if (ctx->prefetch_enc_fn) - ctx->prefetch_enc_fn(); - if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { - _gcry_aes_aesni_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks); - burn_depth = 0; + _gcry_aes_aesni_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks); + return; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { - _gcry_aes_ssse3_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks); - burn_depth = 0; + _gcry_aes_ssse3_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks); + return; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { - _gcry_aes_armv8_ce_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks); - burn_depth = 0; + _gcry_aes_armv8_ce_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks); + return; } #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if (ctx->use_ppc9le_crypto) + { + _gcry_aes_ppc9le_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks); + return; + } +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ +#ifdef USE_PPC_CRYPTO + else if (ctx->use_ppc_crypto) + { + _gcry_aes_ppc8_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks); + return; + } +#endif /*USE_PPC_CRYPTO*/ else { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + for ( ;nblocks; nblocks-- ) { /* Encrypt the counter. */ burn_depth = encrypt_fn (ctx, tmp.x1, ctr); /* XOR the input with the encrypted counter and store in output. */ - buf_xor(outbuf, tmp.x1, inbuf, BLOCKSIZE); + cipher_block_xor(outbuf, tmp.x1, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; /* Increment the counter. */ @@ -1187,40 +1396,54 @@ _gcry_aes_cfb_dec (void *context, unsigned char *iv, const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; - if (ctx->prefetch_enc_fn) - ctx->prefetch_enc_fn(); - if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { - _gcry_aes_aesni_cfb_dec (ctx, outbuf, inbuf, iv, nblocks); - burn_depth = 0; + _gcry_aes_aesni_cfb_dec (ctx, iv, outbuf, inbuf, nblocks); + return; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { - _gcry_aes_ssse3_cfb_dec (ctx, outbuf, inbuf, iv, nblocks); - burn_depth = 0; + _gcry_aes_ssse3_cfb_dec (ctx, iv, outbuf, inbuf, nblocks); + return; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { - _gcry_aes_armv8_ce_cfb_dec (ctx, outbuf, inbuf, iv, nblocks); - burn_depth = 0; + _gcry_aes_armv8_ce_cfb_dec (ctx, iv, outbuf, inbuf, nblocks); + return; } #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if (ctx->use_ppc9le_crypto) + { + _gcry_aes_ppc9le_cfb_dec (ctx, iv, outbuf, inbuf, nblocks); + return; + } +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ +#ifdef USE_PPC_CRYPTO + else if (ctx->use_ppc_crypto) + { + _gcry_aes_ppc8_cfb_dec (ctx, iv, outbuf, inbuf, nblocks); + return; + } +#endif /*USE_PPC_CRYPTO*/ else { rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + for ( ;nblocks; nblocks-- ) { burn_depth = encrypt_fn (ctx, iv, iv); - buf_xor_n_copy(outbuf, iv, inbuf, BLOCKSIZE); + cipher_block_xor_n_copy(outbuf, iv, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } @@ -1245,39 +1468,53 @@ _gcry_aes_cbc_dec (void *context, unsigned char *iv, const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; - check_decryption_preparation (ctx); - - if (ctx->prefetch_dec_fn) - ctx->prefetch_dec_fn(); - if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { - _gcry_aes_aesni_cbc_dec (ctx, outbuf, inbuf, iv, nblocks); - burn_depth = 0; + _gcry_aes_aesni_cbc_dec (ctx, iv, outbuf, inbuf, nblocks); + return; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { - _gcry_aes_ssse3_cbc_dec (ctx, outbuf, inbuf, iv, nblocks); - burn_depth = 0; + _gcry_aes_ssse3_cbc_dec (ctx, iv, outbuf, inbuf, nblocks); + return; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { - _gcry_aes_armv8_ce_cbc_dec (ctx, outbuf, inbuf, iv, nblocks); - burn_depth = 0; + _gcry_aes_armv8_ce_cbc_dec (ctx, iv, outbuf, inbuf, nblocks); + return; } #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if (ctx->use_ppc9le_crypto) + { + _gcry_aes_ppc9le_cbc_dec (ctx, iv, outbuf, inbuf, nblocks); + return; + } +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ +#ifdef USE_PPC_CRYPTO + else if (ctx->use_ppc_crypto) + { + _gcry_aes_ppc8_cbc_dec (ctx, iv, outbuf, inbuf, nblocks); + return; + } +#endif /*USE_PPC_CRYPTO*/ else { unsigned char savebuf[BLOCKSIZE] ATTR_ALIGNED_16; rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn; + check_decryption_preparation (ctx); + + if (ctx->prefetch_dec_fn) + ctx->prefetch_dec_fn(); + for ( ;nblocks; nblocks-- ) { /* INBUF is needed later and it may be identical to OUTBUF, so store @@ -1285,7 +1522,7 @@ _gcry_aes_cbc_dec (void *context, unsigned char *iv, burn_depth = decrypt_fn (ctx, savebuf, inbuf); - buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOCKSIZE); + cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } @@ -1309,62 +1546,61 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; - if (encrypt) - { - if (ctx->prefetch_enc_fn) - ctx->prefetch_enc_fn(); - } - else - { - check_decryption_preparation (ctx); - - if (ctx->prefetch_dec_fn) - ctx->prefetch_dec_fn(); - } - if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { - _gcry_aes_aesni_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); - burn_depth = 0; + return _gcry_aes_aesni_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { - _gcry_aes_ssse3_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); - burn_depth = 0; + return _gcry_aes_ssse3_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { - _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); - burn_depth = 0; + return _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); } #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if (ctx->use_ppc9le_crypto) + { + return _gcry_aes_ppc9le_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); + } +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ +#ifdef USE_PPC_CRYPTO + else if (ctx->use_ppc_crypto) + { + return _gcry_aes_ppc8_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); + } +#endif /*USE_PPC_CRYPTO*/ else if (encrypt) { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.data_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE); - buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE); + cipher_block_xor_1 (c->u_iv.iv, l, BLOCKSIZE); + cipher_block_cpy (l_tmp.x1, inbuf, BLOCKSIZE); /* Checksum_i = Checksum_{i-1} xor P_i */ - buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); + cipher_block_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ - buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); + cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1); - buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); - buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE); + cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); + cipher_block_cpy (outbuf, l_tmp.x1, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; @@ -1375,21 +1611,26 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn; + check_decryption_preparation (ctx); + + if (ctx->prefetch_dec_fn) + ctx->prefetch_dec_fn(); + for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.data_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE); - buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE); + cipher_block_xor_1 (c->u_iv.iv, l, BLOCKSIZE); + cipher_block_cpy (l_tmp.x1, inbuf, BLOCKSIZE); /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ - buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); + cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); burn_depth = decrypt_fn (ctx, l_tmp.x1, l_tmp.x1); - buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); + cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); /* Checksum_i = Checksum_{i-1} xor P_i */ - buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); - buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE); + cipher_block_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); + cipher_block_cpy (outbuf, l_tmp.x1, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; @@ -1411,48 +1652,58 @@ _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) const unsigned char *abuf = abuf_arg; unsigned int burn_depth = 0; - if (ctx->prefetch_enc_fn) - ctx->prefetch_enc_fn(); - if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { - _gcry_aes_aesni_ocb_auth (c, abuf, nblocks); - burn_depth = 0; + return _gcry_aes_aesni_ocb_auth (c, abuf, nblocks); } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { - _gcry_aes_ssse3_ocb_auth (c, abuf, nblocks); - burn_depth = 0; + return _gcry_aes_ssse3_ocb_auth (c, abuf, nblocks); } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { - _gcry_aes_armv8_ce_ocb_auth (c, abuf, nblocks); - burn_depth = 0; + return _gcry_aes_armv8_ce_ocb_auth (c, abuf, nblocks); } #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if (ctx->use_ppc9le_crypto) + { + return _gcry_aes_ppc9le_ocb_auth (c, abuf, nblocks); + } +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ +#ifdef USE_PPC_CRYPTO + else if (ctx->use_ppc_crypto) + { + return _gcry_aes_ppc8_ocb_auth (c, abuf, nblocks); + } +#endif /*USE_PPC_CRYPTO*/ else { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.aad_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE); + cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE); /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ - buf_xor (l_tmp.x1, c->u_mode.ocb.aad_offset, abuf, BLOCKSIZE); + cipher_block_xor (l_tmp.x1, c->u_mode.ocb.aad_offset, abuf, + BLOCKSIZE); burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1); - buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp.x1, BLOCKSIZE); + cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp.x1, BLOCKSIZE); abuf += BLOCKSIZE; } @@ -1467,6 +1718,106 @@ _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) } +/* Bulk encryption/decryption of complete blocks in XTS mode. */ +void +_gcry_aes_xts_crypt (void *context, unsigned char *tweak, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int encrypt) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned int burn_depth = 0; + rijndael_cryptfn_t crypt_fn; + u64 tweak_lo, tweak_hi, tweak_next_lo, tweak_next_hi, tmp_lo, tmp_hi, carry; + + if (0) + ; +#ifdef USE_AESNI + else if (ctx->use_aesni) + { + _gcry_aes_aesni_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt); + return; + } +#endif /*USE_AESNI*/ +#ifdef USE_ARM_CE + else if (ctx->use_arm_ce) + { + _gcry_aes_armv8_ce_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt); + return; + } +#endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if (ctx->use_ppc9le_crypto) + { + _gcry_aes_ppc9le_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt); + return; + } +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ +#ifdef USE_PPC_CRYPTO + else if (ctx->use_ppc_crypto) + { + _gcry_aes_ppc8_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt); + return; + } +#endif /*USE_PPC_CRYPTO*/ + else + { + if (encrypt) + { + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + + crypt_fn = ctx->encrypt_fn; + } + else + { + check_decryption_preparation (ctx); + + if (ctx->prefetch_dec_fn) + ctx->prefetch_dec_fn(); + + crypt_fn = ctx->decrypt_fn; + } + + tweak_next_lo = buf_get_le64 (tweak + 0); + tweak_next_hi = buf_get_le64 (tweak + 8); + + while (nblocks) + { + tweak_lo = tweak_next_lo; + tweak_hi = tweak_next_hi; + + /* Xor-Encrypt/Decrypt-Xor block. */ + tmp_lo = buf_get_le64 (inbuf + 0) ^ tweak_lo; + tmp_hi = buf_get_le64 (inbuf + 8) ^ tweak_hi; + + buf_put_le64 (outbuf + 0, tmp_lo); + buf_put_le64 (outbuf + 8, tmp_hi); + + /* Generate next tweak. */ + carry = -(tweak_next_hi >> 63) & 0x87; + tweak_next_hi = (tweak_next_hi << 1) + (tweak_next_lo >> 63); + tweak_next_lo = (tweak_next_lo << 1) ^ carry; + + burn_depth = crypt_fn (ctx, outbuf, outbuf); + + buf_put_le64 (outbuf + 0, buf_get_le64 (outbuf + 0) ^ tweak_lo); + buf_put_le64 (outbuf + 8, buf_get_le64 (outbuf + 8) ^ tweak_hi); + + outbuf += GCRY_XTS_BLOCK_LEN; + inbuf += GCRY_XTS_BLOCK_LEN; + nblocks--; + } + + buf_put_le64 (tweak + 0, tweak_next_lo); + buf_put_le64 (tweak + 8, tweak_next_hi); + } + + if (burn_depth) + _gcry_burn_stack (burn_depth + 5 * sizeof(void *)); +} + /* Run the self-tests for AES 128. Returns NULL on success. */ static const char* @@ -1522,7 +1873,7 @@ selftest_basic_128 (void) if (!ctx) return "failed to allocate memory"; - rijndael_setkey (ctx, key_128, sizeof (key_128)); + rijndael_setkey (ctx, key_128, sizeof (key_128), NULL); rijndael_encrypt (ctx, scratch, plaintext_128); if (memcmp (scratch, ciphertext_128, sizeof (ciphertext_128))) { @@ -1565,7 +1916,7 @@ selftest_basic_192 (void) ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem); if (!ctx) return "failed to allocate memory"; - rijndael_setkey (ctx, key_192, sizeof(key_192)); + rijndael_setkey (ctx, key_192, sizeof(key_192), NULL); rijndael_encrypt (ctx, scratch, plaintext_192); if (memcmp (scratch, ciphertext_192, sizeof (ciphertext_192))) { @@ -1610,7 +1961,7 @@ selftest_basic_256 (void) ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem); if (!ctx) return "failed to allocate memory"; - rijndael_setkey (ctx, key_256, sizeof(key_256)); + rijndael_setkey (ctx, key_256, sizeof(key_256), NULL); rijndael_encrypt (ctx, scratch, plaintext_256); if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256))) { diff --git a/cipher/salsa20.c b/cipher/salsa20.c index 9768198..5c5e2b5 100644 --- a/cipher/salsa20.c +++ b/cipher/salsa20.c @@ -366,10 +366,12 @@ salsa20_do_setkey (SALSA20_context_t *ctx, static gcry_err_code_t -salsa20_setkey (void *context, const byte *key, unsigned int keylen) +salsa20_setkey (void *context, const byte *key, unsigned int keylen, + gcry_cipher_hd_t hd) { SALSA20_context_t *ctx = (SALSA20_context_t *)context; gcry_err_code_t rc = salsa20_do_setkey (ctx, key, keylen); + (void)hd; _gcry_burn_stack (4 + sizeof (void *) + 4 * sizeof (void *)); return rc; } @@ -522,7 +524,7 @@ selftest (void) /* 16-byte alignment required for amd64 implementation. */ ctx = (SALSA20_context_t *)((uintptr_t)(ctxbuf + 15) & ~(uintptr_t)15); - salsa20_setkey (ctx, key_1, sizeof key_1); + salsa20_setkey (ctx, key_1, sizeof key_1, NULL); salsa20_setiv (ctx, nonce_1, sizeof nonce_1); scratch[8] = 0; salsa20_encrypt_stream (ctx, scratch, plaintext_1, sizeof plaintext_1); @@ -530,7 +532,7 @@ selftest (void) return "Salsa20 encryption test 1 failed."; if (scratch[8]) return "Salsa20 wrote too much."; - salsa20_setkey( ctx, key_1, sizeof(key_1)); + salsa20_setkey( ctx, key_1, sizeof(key_1), NULL); salsa20_setiv (ctx, nonce_1, sizeof nonce_1); salsa20_encrypt_stream (ctx, scratch, scratch, sizeof plaintext_1); if (memcmp (scratch, plaintext_1, sizeof plaintext_1)) @@ -538,12 +540,12 @@ selftest (void) for (i = 0; i < sizeof buf; i++) buf[i] = i; - salsa20_setkey (ctx, key_1, sizeof key_1); + salsa20_setkey (ctx, key_1, sizeof key_1, NULL); salsa20_setiv (ctx, nonce_1, sizeof nonce_1); /*encrypt*/ salsa20_encrypt_stream (ctx, buf, buf, sizeof buf); /*decrypt*/ - salsa20_setkey (ctx, key_1, sizeof key_1); + salsa20_setkey (ctx, key_1, sizeof key_1, NULL); salsa20_setiv (ctx, nonce_1, sizeof nonce_1); salsa20_encrypt_stream (ctx, buf, buf, 1); salsa20_encrypt_stream (ctx, buf+1, buf+1, (sizeof buf)-1-1); diff --git a/cipher/seed.c b/cipher/seed.c index 9f87c05..e36d3cf 100644 --- a/cipher/seed.c +++ b/cipher/seed.c @@ -309,11 +309,12 @@ do_setkey (SEED_context *ctx, const byte *key, const unsigned keylen) } static gcry_err_code_t -seed_setkey (void *context, const byte *key, const unsigned keylen) +seed_setkey (void *context, const byte *key, const unsigned keylen, + gcry_cipher_hd_t hd) { SEED_context *ctx = context; - int rc = do_setkey (ctx, key, keylen); + (void)hd; _gcry_burn_stack (4*6 + sizeof(void*)*2 + sizeof(int)*2); return rc; } @@ -446,7 +447,7 @@ selftest (void) 0x22, 0x6B, 0xC3, 0x14, 0x2C, 0xD4, 0x0D, 0x4A, }; - seed_setkey (&ctx, key, sizeof(key)); + seed_setkey (&ctx, key, sizeof(key), NULL); seed_encrypt (&ctx, scratch, plaintext); if (memcmp (scratch, ciphertext, sizeof (ciphertext))) return "SEED test encryption failed."; diff --git a/cipher/serpent.c b/cipher/serpent.c index ea4b8ed..8e3faa7 100644 --- a/cipher/serpent.c +++ b/cipher/serpent.c @@ -748,13 +748,16 @@ serpent_setkey_internal (serpent_context_t *context, /* Initialize CTX with the key KEY of KEY_LENGTH bytes. */ static gcry_err_code_t serpent_setkey (void *ctx, - const byte *key, unsigned int key_length) + const byte *key, unsigned int key_length, + gcry_cipher_hd_t hd) { serpent_context_t *context = ctx; static const char *serpent_test_ret; static int serpent_init_done; gcry_err_code_t ret = GPG_ERR_NO_ERROR; + (void)hd; + if (! serpent_init_done) { /* Execute a self-test the first time, Serpent is used. */ @@ -999,7 +1002,7 @@ _gcry_serpent_ctr_enc(void *context, unsigned char *ctr, /* Encrypt the counter. */ serpent_encrypt_internal(ctx, ctr, tmpbuf); /* XOR the input with the encrypted counter and store in output. */ - buf_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t)); + cipher_block_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t)); outbuf += sizeof(serpent_block_t); inbuf += sizeof(serpent_block_t); /* Increment the counter. */ @@ -1114,7 +1117,8 @@ _gcry_serpent_cbc_dec(void *context, unsigned char *iv, the intermediate result to SAVEBUF. */ serpent_decrypt_internal (ctx, inbuf, savebuf); - buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, sizeof(serpent_block_t)); + cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, + sizeof(serpent_block_t)); inbuf += sizeof(serpent_block_t); outbuf += sizeof(serpent_block_t); } @@ -1218,7 +1222,7 @@ _gcry_serpent_cfb_dec(void *context, unsigned char *iv, for ( ;nblocks; nblocks-- ) { serpent_encrypt_internal(ctx, iv, iv); - buf_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t)); + cipher_block_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t)); outbuf += sizeof(serpent_block_t); inbuf += sizeof(serpent_block_t); } diff --git a/cipher/twofish.c b/cipher/twofish.c index 942e8d4..3409255 100644 --- a/cipher/twofish.c +++ b/cipher/twofish.c @@ -734,12 +734,15 @@ do_twofish_setkey (TWOFISH_context *ctx, const byte *key, const unsigned keylen) } static gcry_err_code_t -twofish_setkey (void *context, const byte *key, unsigned int keylen) +twofish_setkey (void *context, const byte *key, unsigned int keylen, + gcry_cipher_hd_t hd) { TWOFISH_context *ctx = context; unsigned int hwfeatures = _gcry_get_hw_features (); int rc; + (void)hd; + rc = do_twofish_setkey (ctx, key, keylen); #ifdef USE_AVX2 @@ -1245,7 +1248,7 @@ _gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, burn_stack_depth = burn; /* XOR the input with the encrypted counter and store in output. */ - buf_xor(outbuf, tmpbuf, inbuf, TWOFISH_BLOCKSIZE); + cipher_block_xor(outbuf, tmpbuf, inbuf, TWOFISH_BLOCKSIZE); outbuf += TWOFISH_BLOCKSIZE; inbuf += TWOFISH_BLOCKSIZE; /* Increment the counter. */ @@ -1327,7 +1330,7 @@ _gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, if (burn > burn_stack_depth) burn_stack_depth = burn; - buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, TWOFISH_BLOCKSIZE); + cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, TWOFISH_BLOCKSIZE); inbuf += TWOFISH_BLOCKSIZE; outbuf += TWOFISH_BLOCKSIZE; } @@ -1399,7 +1402,7 @@ _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, if (burn > burn_stack_depth) burn_stack_depth = burn; - buf_xor_n_copy(outbuf, iv, inbuf, TWOFISH_BLOCKSIZE); + cipher_block_xor_n_copy(outbuf, iv, inbuf, TWOFISH_BLOCKSIZE); outbuf += TWOFISH_BLOCKSIZE; inbuf += TWOFISH_BLOCKSIZE; } @@ -1710,7 +1713,7 @@ selftest (void) 0x05, 0x93, 0x1C, 0xB6, 0xD4, 0x08, 0xE7, 0xFA }; - twofish_setkey (&ctx, key, sizeof(key)); + twofish_setkey (&ctx, key, sizeof(key), NULL); twofish_encrypt (&ctx, scratch, plaintext); if (memcmp (scratch, ciphertext, sizeof (ciphertext))) return "Twofish-128 test encryption failed."; @@ -1718,7 +1721,7 @@ selftest (void) if (memcmp (scratch, plaintext, sizeof (plaintext))) return "Twofish-128 test decryption failed."; - twofish_setkey (&ctx, key_256, sizeof(key_256)); + twofish_setkey (&ctx, key_256, sizeof(key_256), NULL); twofish_encrypt (&ctx, scratch, plaintext_256); if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256))) return "Twofish-256 test encryption failed."; @@ -1800,13 +1803,13 @@ main() /* Encryption test. */ for (i = 0; i < 125; i++) { - twofish_setkey (&ctx, buffer[0], sizeof (buffer[0])); + twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]), NULL); for (j = 0; j < 1000; j++) twofish_encrypt (&ctx, buffer[2], buffer[2]); - twofish_setkey (&ctx, buffer[1], sizeof (buffer[1])); + twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]), NULL); for (j = 0; j < 1000; j++) twofish_encrypt (&ctx, buffer[3], buffer[3]); - twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2); + twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2, NULL); for (j = 0; j < 1000; j++) { twofish_encrypt (&ctx, buffer[0], buffer[0]); twofish_encrypt (&ctx, buffer[1], buffer[1]); @@ -1818,15 +1821,15 @@ main() /* Decryption test. */ for (i = 0; i < 125; i++) { - twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2); + twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2, NULL); for (j = 0; j < 1000; j++) { twofish_decrypt (&ctx, buffer[0], buffer[0]); twofish_decrypt (&ctx, buffer[1], buffer[1]); } - twofish_setkey (&ctx, buffer[1], sizeof (buffer[1])); + twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]), NULL); for (j = 0; j < 1000; j++) twofish_decrypt (&ctx, buffer[3], buffer[3]); - twofish_setkey (&ctx, buffer[0], sizeof (buffer[0])); + twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]), NULL); for (j = 0; j < 1000; j++) twofish_decrypt (&ctx, buffer[2], buffer[2]); } diff --git a/configure.ac b/configure.ac index b7fb62c..06e122c 100644 --- a/configure.ac +++ b/configure.ac @@ -649,6 +649,14 @@ AC_ARG_ENABLE(arm-crypto-support, armcryptosupport=$enableval,armcryptosupport=yes) AC_MSG_RESULT($armcryptosupport) +# Implementation of the --disable-ppc-crypto-support switch. +AC_MSG_CHECKING([whether PPC crypto support is requested]) +AC_ARG_ENABLE(ppc-crypto-support, + AC_HELP_STRING([--disable-ppc-crypto-support], + [Disable support for the PPC crypto instructions introduced in POWER 8 (PowerISA 2.07)]), + ppccryptosupport=$enableval,ppccryptosupport=yes) +AC_MSG_RESULT($ppccryptosupport) + # Implementation of the --disable-O-flag-munging switch. AC_MSG_CHECKING([whether a -O flag munging is requested]) AC_ARG_ENABLE([O-flag-munging], @@ -1196,6 +1204,9 @@ if test "$mpi_cpu_arch" != "arm" ; then fi fi +if test "$mpi_cpu_arch" != "ppc"; then + ppccryptosupport="n/a" +fi ############################################# #### #### @@ -1722,6 +1733,113 @@ if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" = "yes" ; then fi +# +# Check whether PowerPC AltiVec/VSX intrinsics +# +AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX intrinsics], + [gcry_cv_cc_ppc_altivec], + [if test "$mpi_cpu_arch" != "ppc" ; then + gcry_cv_cc_ppc_altivec="n/a" + else + gcry_cv_cc_ppc_altivec=no + AC_COMPILE_IFELSE([AC_LANG_SOURCE( + [[#include + typedef vector unsigned char block; + block fn(block in) + { + block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0)); + return vec_cipher_be (t, in); + } + ]])], + [gcry_cv_cc_ppc_altivec=yes]) + fi]) +if test "$gcry_cv_cc_ppc_altivec" = "yes" ; then + AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC,1, + [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics]) +fi + +_gcc_cflags_save=$CFLAGS +CFLAGS="$CFLAGS -maltivec -mvsx -mcrypto" + +if test "$gcry_cv_cc_ppc_altivec" = "no" && + test "$mpi_cpu_arch" = "ppc" ; then + AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX/crypto intrinsics with extra GCC flags], + [gcry_cv_cc_ppc_altivec_cflags], + [gcry_cv_cc_ppc_altivec_cflags=no + AC_COMPILE_IFELSE([AC_LANG_SOURCE( + [[#include + typedef vector unsigned char block; + block fn(block in) + { + block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0)); + return vec_cipher_be (t, in); + }]])], + [gcry_cv_cc_ppc_altivec_cflags=yes])]) + if test "$gcry_cv_cc_ppc_altivec_cflags" = "yes" ; then + AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC,1, + [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics]) + AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC_WITH_CFLAGS,1, + [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics with extra GCC flags]) + fi +fi + +AM_CONDITIONAL(ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS, + test "$gcry_cv_cc_ppc_altivec_cflags" = "yes") + +# Restore flags. +CFLAGS=$_gcc_cflags_save; + + +# +# Check whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto instructions +# +AC_CACHE_CHECK([whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto instructions], + [gcry_cv_gcc_inline_asm_ppc_altivec], + [if test "$mpi_cpu_arch" != "ppc" ; then + gcry_cv_gcc_inline_asm_ppc_altivec="n/a" + else + gcry_cv_gcc_inline_asm_ppc_altivec=no + AC_COMPILE_IFELSE([AC_LANG_SOURCE( + [[__asm__(".globl testfn;\n" + "testfn:\n" + "stvx %v31,%r12,%r0;\n" + "lvx %v20,%r12,%r0;\n" + "vcipher %v0, %v1, %v22;\n" + "lxvw4x %vs32, %r0, %r1;\n" + "vadduwm %v0, %v1, %v22;\n" + ); + ]])], + [gcry_cv_gcc_inline_asm_ppc_altivec=yes]) + fi]) +if test "$gcry_cv_gcc_inline_asm_ppc_altivec" = "yes" ; then + AC_DEFINE(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC,1, + [Defined if inline assembler supports PowerPC AltiVec/VSX/crypto instructions]) +fi + + +# +# Check whether GCC inline assembler supports PowerISA 3.00 instructions +# +AC_CACHE_CHECK([whether GCC inline assembler supports PowerISA 3.00 instructions], + [gcry_cv_gcc_inline_asm_ppc_arch_3_00], + [if test "$mpi_cpu_arch" != "ppc" ; then + gcry_cv_gcc_inline_asm_ppc_arch_3_00="n/a" + else + gcry_cv_gcc_inline_asm_ppc_arch_3_00=no + AC_COMPILE_IFELSE([AC_LANG_SOURCE( + [[__asm__(".globl testfn;\n" + "testfn:\n" + "stxvb16x %r1,%v12,%v30;\n" + ); + ]])], + [gcry_cv_gcc_inline_asm_ppc_arch_3_00=yes]) + fi]) +if test "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" = "yes" ; then + AC_DEFINE(HAVE_GCC_INLINE_ASM_PPC_ARCH_3_00,1, + [Defined if inline assembler supports PowerISA 3.00 instructions]) +fi + + ####################################### #### Checks for library functions. #### ####################################### @@ -1999,6 +2117,10 @@ if test x"$armcryptosupport" = xyes ; then AC_DEFINE(ENABLE_ARM_CRYPTO_SUPPORT,1, [Enable support for ARMv8 Crypto Extension instructions.]) fi +if test x"$ppccryptosupport" = xyes ; then + AC_DEFINE(ENABLE_PPC_CRYPTO_SUPPORT,1, + [Enable support for POWER 8 (PowerISA 2.07) crypto extension.]) +fi if test x"$jentsupport" = xyes ; then AC_DEFINE(ENABLE_JENT_SUPPORT, 1, [Enable support for the jitter entropy collector.]) @@ -2106,6 +2228,21 @@ if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-ce.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-aarch64-ce.lo" ;; + powerpc64le-*-*) + # Build with the crypto extension implementation + GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc.lo" + GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc9le.lo" + ;; + powerpc64-*-*) + # Big-Endian. + # Build with the crypto extension implementation + GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc.lo" + ;; + powerpc-*-*) + # Big-Endian. + # Build with the crypto extension implementation + GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc.lo" + ;; esac case "$mpi_cpu_arch" in @@ -2555,6 +2692,7 @@ case "$mpi_cpu_arch" in ;; ppc) AC_DEFINE(HAVE_CPU_ARCH_PPC, 1, [Defined for PPC platforms]) + GCRYPT_HWF_MODULES="hwf-ppc.lo" ;; arm) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM platforms]) @@ -2653,6 +2791,7 @@ GCRY_MSG_SHOW([Try using Intel AVX: ],[$avxsupport]) GCRY_MSG_SHOW([Try using Intel AVX2: ],[$avx2support]) GCRY_MSG_SHOW([Try using ARM NEON: ],[$neonsupport]) GCRY_MSG_SHOW([Try using ARMv8 crypto: ],[$armcryptosupport]) +GCRY_MSG_SHOW([Try using PPC crypto: ],[$ppccryptosupport]) GCRY_MSG_SHOW([],[]) if test "x${gpg_config_script_warn}" != x; then diff --git a/src/Makefile.am b/src/Makefile.am index 3d3149f..6d5d93a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -66,7 +66,7 @@ libgcrypt_la_SOURCES = \ hmac256.c hmac256.h context.c context.h \ ec-context.h -EXTRA_libgcrypt_la_SOURCES = hwf-x86.c hwf-arm.c +EXTRA_libgcrypt_la_SOURCES = hwf-x86.c hwf-arm.c hwf-ppc.c gcrypt_hwf_modules = @GCRYPT_HWF_MODULES@ diff --git a/src/cipher-proto.h b/src/cipher-proto.h index d831d3f..3ea6779 100644 --- a/src/cipher-proto.h +++ b/src/cipher-proto.h @@ -132,7 +132,8 @@ typedef struct gcry_pk_spec /* Type for the cipher_setkey function. */ typedef gcry_err_code_t (*gcry_cipher_setkey_t) (void *c, const unsigned char *key, - unsigned keylen); + unsigned keylen, + gcry_cipher_hd_t hd); /* Type for the cipher_encrypt function. */ typedef unsigned int (*gcry_cipher_encrypt_t) (void *c, diff --git a/src/cipher.h b/src/cipher.h index f2acb55..d9e0ac6 100644 --- a/src/cipher.h +++ b/src/cipher.h @@ -158,6 +158,9 @@ size_t _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); size_t _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); +void _gcry_aes_xts_crypt (void *context, unsigned char *tweak, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int encrypt); /*-- blowfish.c --*/ void _gcry_blowfish_cfb_dec (void *context, unsigned char *iv, diff --git a/src/g10lib.h b/src/g10lib.h index 37c4eaf..6158708 100644 --- a/src/g10lib.h +++ b/src/g10lib.h @@ -233,7 +233,9 @@ char **_gcry_strtokenize (const char *string, const char *delim); #define HWF_INTEL_RDTSC (1 << 20) - +#define HWF_PPC_VCRYPTO (1 << 22) +#define HWF_PPC_ARCH_3_00 (1 << 23) +#define HWF_PPC_ARCH_2_07 (1 << 24) gpg_err_code_t _gcry_disable_hw_feature (const char *name); void _gcry_detect_hw_features (void); diff --git a/src/hwf-common.h b/src/hwf-common.h index 8f156b5..76f346e 100644 --- a/src/hwf-common.h +++ b/src/hwf-common.h @@ -22,6 +22,6 @@ unsigned int _gcry_hwf_detect_x86 (void); unsigned int _gcry_hwf_detect_arm (void); - +unsigned int _gcry_hwf_detect_ppc (void); #endif /*HWF_COMMON_H*/ diff --git a/src/hwf-ppc.c b/src/hwf-ppc.c new file mode 100644 index 0000000..7477a71 --- /dev/null +++ b/src/hwf-ppc.c @@ -0,0 +1,243 @@ +/* hwf-ppc.c - Detect hardware features - PPC part + * Copyright (C) 2013,2019 Jussi Kivilinna + * Copyright (C) 2019 Shawn Landden + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#if defined(HAVE_SYS_AUXV_H) && (defined(HAVE_GETAUXVAL) || \ + defined(HAVE_ELF_AUX_INFO)) +#include +#endif + +#include "g10lib.h" +#include "hwf-common.h" + +#if !defined (__powerpc__) && !defined (__powerpc64__) +# error Module build for wrong CPU. +#endif + + +#if defined(HAVE_SYS_AUXV_H) && defined(HAVE_ELF_AUX_INFO) && \ + !defined(HAVE_GETAUXVAL) && defined(AT_HWCAP) +#define HAVE_GETAUXVAL +static unsigned long getauxval(unsigned long type) +{ + unsigned long auxval = 0; + int err; + + /* FreeBSD provides 'elf_aux_info' function that does the same as + * 'getauxval' on Linux. */ + + err = elf_aux_info (type, &auxval, sizeof(auxval)); + if (err) + { + errno = err; + auxval = 0; + } + + return auxval; +} +#endif + + +#undef HAS_SYS_AT_HWCAP +#if defined(__linux__) || \ + (defined(HAVE_SYS_AUXV_H) && defined(HAVE_GETAUXVAL)) +#define HAS_SYS_AT_HWCAP 1 + +struct feature_map_s + { + unsigned int hwcap_flag; + unsigned int hwcap2_flag; + unsigned int hwf_flag; + }; + +#if defined(__powerpc__) || defined(__powerpc64__) + +/* Note: These macros have same values on Linux and FreeBSD. */ +#ifndef AT_HWCAP +# define AT_HWCAP 16 +#endif +#ifndef AT_HWCAP2 +# define AT_HWCAP2 26 +#endif + +#ifndef PPC_FEATURE2_ARCH_2_07 +# define PPC_FEATURE2_ARCH_2_07 0x80000000 +#endif +#ifndef PPC_FEATURE2_VEC_CRYPTO +# define PPC_FEATURE2_VEC_CRYPTO 0x02000000 +#endif +#ifndef PPC_FEATURE2_ARCH_3_00 +# define PPC_FEATURE2_ARCH_3_00 0x00800000 +#endif + +static const struct feature_map_s ppc_features[] = + { + { 0, PPC_FEATURE2_ARCH_2_07, HWF_PPC_ARCH_2_07 }, +#ifdef ENABLE_PPC_CRYPTO_SUPPORT + { 0, PPC_FEATURE2_VEC_CRYPTO, HWF_PPC_VCRYPTO }, +#endif + { 0, PPC_FEATURE2_ARCH_3_00, HWF_PPC_ARCH_3_00 }, + }; +#endif + +static int +get_hwcap(unsigned int *hwcap, unsigned int *hwcap2) +{ + struct { unsigned long a_type; unsigned long a_val; } auxv; + FILE *f; + int err = -1; + static int hwcap_initialized = 0; + static unsigned int stored_hwcap = 0; + static unsigned int stored_hwcap2 = 0; + + if (hwcap_initialized) + { + *hwcap = stored_hwcap; + *hwcap2 = stored_hwcap2; + return 0; + } + +#if 0 // TODO: configure.ac detection for __builtin_cpu_supports + // TODO: move to 'detect_ppc_builtin_cpu_supports' +#if defined(__GLIBC__) && defined(__GNUC__) && __GNUC__ >= 6 + /* __builtin_cpu_supports returns 0 if glibc support doesn't exist, so + * we can only trust positive results. */ +#ifdef ENABLE_PPC_CRYPTO_SUPPORT + if (__builtin_cpu_supports("vcrypto")) /* TODO: Configure.ac */ + { + stored_hwcap2 |= PPC_FEATURE2_VEC_CRYPTO; + hwcap_initialized = 1; + } +#endif + + if (__builtin_cpu_supports("arch_3_00")) /* TODO: Configure.ac */ + { + stored_hwcap2 |= PPC_FEATURE2_ARCH_3_00; + hwcap_initialized = 1; + } +#endif +#endif + +#if defined(HAVE_SYS_AUXV_H) && defined(HAVE_GETAUXVAL) + errno = 0; + auxv.a_val = getauxval (AT_HWCAP); + if (errno == 0) + { + stored_hwcap |= auxv.a_val; + hwcap_initialized = 1; + } + + if (AT_HWCAP2 >= 0) + { + errno = 0; + auxv.a_val = getauxval (AT_HWCAP2); + if (errno == 0) + { + stored_hwcap2 |= auxv.a_val; + hwcap_initialized = 1; + } + } + + if (hwcap_initialized && (stored_hwcap || stored_hwcap2)) + { + *hwcap = stored_hwcap; + *hwcap2 = stored_hwcap2; + return 0; + } +#endif + + f = fopen("/proc/self/auxv", "r"); + if (!f) + { + *hwcap = stored_hwcap; + *hwcap2 = stored_hwcap2; + return -1; + } + + while (fread(&auxv, sizeof(auxv), 1, f) > 0) + { + if (auxv.a_type == AT_HWCAP) + { + stored_hwcap |= auxv.a_val; + hwcap_initialized = 1; + } + + if (auxv.a_type == AT_HWCAP2) + { + stored_hwcap2 |= auxv.a_val; + hwcap_initialized = 1; + } + } + + if (hwcap_initialized) + err = 0; + + fclose(f); + + *hwcap = stored_hwcap; + *hwcap2 = stored_hwcap2; + return err; +} + +static unsigned int +detect_ppc_at_hwcap(void) +{ + unsigned int hwcap; + unsigned int hwcap2; + unsigned int features = 0; + unsigned int i; + + if (get_hwcap(&hwcap, &hwcap2) < 0) + return features; + + for (i = 0; i < DIM(ppc_features); i++) + { + if (hwcap & ppc_features[i].hwcap_flag) + features |= ppc_features[i].hwf_flag; + + if (hwcap2 & ppc_features[i].hwcap2_flag) + features |= ppc_features[i].hwf_flag; + } + + return features; +} + +#endif + +unsigned int +_gcry_hwf_detect_ppc (void) +{ + unsigned int ret = 0; + unsigned int broken_hwfs = 0; + +#if defined (HAS_SYS_AT_HWCAP) + ret |= detect_ppc_at_hwcap (); +#endif + + ret &= ~broken_hwfs; + + return ret; +} diff --git a/src/hwfeatures.c b/src/hwfeatures.c index 1cad546..7e0d490 100644 --- a/src/hwfeatures.c +++ b/src/hwfeatures.c @@ -42,6 +42,7 @@ static struct const char *desc; } hwflist[] = { +#if defined(HAVE_CPU_ARCH_X86) { HWF_PADLOCK_RNG, "padlock-rng" }, { HWF_PADLOCK_AES, "padlock-aes" }, { HWF_PADLOCK_SHA, "padlock-sha" }, @@ -58,11 +59,17 @@ static struct { HWF_INTEL_AVX2, "intel-avx2" }, { HWF_INTEL_FAST_VPGATHER, "intel-fast-vpgather" }, { HWF_INTEL_RDTSC, "intel-rdtsc" }, +#elif defined(HAVE_CPU_ARCH_ARM) { HWF_ARM_NEON, "arm-neon" }, { HWF_ARM_AES, "arm-aes" }, { HWF_ARM_SHA1, "arm-sha1" }, { HWF_ARM_SHA2, "arm-sha2" }, - { HWF_ARM_PMULL, "arm-pmull" } + { HWF_ARM_PMULL, "arm-pmull" }, +#elif defined(HAVE_CPU_ARCH_PPC) + { HWF_PPC_VCRYPTO, "ppc-vcrypto" }, + { HWF_PPC_ARCH_3_00, "ppc-arch_3_00" }, + { HWF_PPC_ARCH_2_07, "ppc-arch_2_07" }, +#endif }; /* A bit vector with the hardware features which shall not be used. @@ -207,12 +214,14 @@ _gcry_detect_hw_features (void) { hw_features = _gcry_hwf_detect_x86 (); } -#endif /* HAVE_CPU_ARCH_X86 */ -#if defined (HAVE_CPU_ARCH_ARM) +#elif defined (HAVE_CPU_ARCH_ARM) { hw_features = _gcry_hwf_detect_arm (); } -#endif /* HAVE_CPU_ARCH_ARM */ - +#elif defined (HAVE_CPU_ARCH_PPC) + { + hw_features = _gcry_hwf_detect_ppc (); + } +#endif hw_features &= ~disabled_hw_features; }