Blame crypto/ec/ecp_nistp256.c

Packit c4476c
/*
Packit c4476c
 * Copyright 2011-2020 The OpenSSL Project Authors. All Rights Reserved.
Packit c4476c
 *
Packit c4476c
 * Licensed under the OpenSSL license (the "License").  You may not use
Packit c4476c
 * this file except in compliance with the License.  You can obtain a copy
Packit c4476c
 * in the file LICENSE in the source distribution or at
Packit c4476c
 * https://www.openssl.org/source/license.html
Packit c4476c
 */
Packit c4476c
Packit c4476c
/* Copyright 2011 Google Inc.
Packit c4476c
 *
Packit c4476c
 * Licensed under the Apache License, Version 2.0 (the "License");
Packit c4476c
 *
Packit c4476c
 * you may not use this file except in compliance with the License.
Packit c4476c
 * You may obtain a copy of the License at
Packit c4476c
 *
Packit c4476c
 *     http://www.apache.org/licenses/LICENSE-2.0
Packit c4476c
 *
Packit c4476c
 *  Unless required by applicable law or agreed to in writing, software
Packit c4476c
 *  distributed under the License is distributed on an "AS IS" BASIS,
Packit c4476c
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Packit c4476c
 *  See the License for the specific language governing permissions and
Packit c4476c
 *  limitations under the License.
Packit c4476c
 */
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
Packit c4476c
 *
Packit c4476c
 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
Packit c4476c
 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
Packit c4476c
 * work which got its smarts from Daniel J. Bernstein's work on the same.
Packit c4476c
 */
Packit c4476c
Packit c4476c
#include <openssl/opensslconf.h>
Packit c4476c
#ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
Packit c4476c
NON_EMPTY_TRANSLATION_UNIT
Packit c4476c
#else
Packit c4476c
Packit c4476c
# include <stdint.h>
Packit c4476c
# include <string.h>
Packit c4476c
# include <openssl/err.h>
Packit c4476c
# include "ec_local.h"
Packit c4476c
Packit c4476c
# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
Packit c4476c
  /* even with gcc, the typedef won't work for 32-bit platforms */
Packit c4476c
typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
Packit c4476c
                                 * platforms */
Packit c4476c
typedef __int128_t int128_t;
Packit c4476c
# else
Packit c4476c
#  error "Your compiler doesn't appear to support 128-bit integer types"
Packit c4476c
# endif
Packit c4476c
Packit c4476c
typedef uint8_t u8;
Packit c4476c
typedef uint32_t u32;
Packit c4476c
typedef uint64_t u64;
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * The underlying field. P256 operates over GF(2^256-2^224+2^192+2^96-1). We
Packit c4476c
 * can serialise an element of this field into 32 bytes. We call this an
Packit c4476c
 * felem_bytearray.
Packit c4476c
 */
Packit c4476c
Packit c4476c
typedef u8 felem_bytearray[32];
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * These are the parameters of P256, taken from FIPS 186-3, page 86. These
Packit c4476c
 * values are big-endian.
Packit c4476c
 */
Packit c4476c
static const felem_bytearray nistp256_curve_params[5] = {
Packit c4476c
    {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
Packit c4476c
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
Packit c4476c
     0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
Packit c4476c
     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
Packit c4476c
    {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
Packit c4476c
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
Packit c4476c
     0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
Packit c4476c
     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc},
Packit c4476c
    {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, /* b */
Packit c4476c
     0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
Packit c4476c
     0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
Packit c4476c
     0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
Packit c4476c
    {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
Packit c4476c
     0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
Packit c4476c
     0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
Packit c4476c
     0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
Packit c4476c
    {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
Packit c4476c
     0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
Packit c4476c
     0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
Packit c4476c
     0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
Packit c4476c
};
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * The representation of field elements.
Packit c4476c
 * ------------------------------------
Packit c4476c
 *
Packit c4476c
 * We represent field elements with either four 128-bit values, eight 128-bit
Packit c4476c
 * values, or four 64-bit values. The field element represented is:
Packit c4476c
 *   v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192  (mod p)
Packit c4476c
 * or:
Packit c4476c
 *   v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512  (mod p)
Packit c4476c
 *
Packit c4476c
 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
Packit c4476c
 * apart, but are 128-bits wide, the most significant bits of each limb overlap
Packit c4476c
 * with the least significant bits of the next.
Packit c4476c
 *
Packit c4476c
 * A field element with four limbs is an 'felem'. One with eight limbs is a
Packit c4476c
 * 'longfelem'
Packit c4476c
 *
Packit c4476c
 * A field element with four, 64-bit values is called a 'smallfelem'. Small
Packit c4476c
 * values are used as intermediate values before multiplication.
Packit c4476c
 */
Packit c4476c
Packit c4476c
# define NLIMBS 4
Packit c4476c
Packit c4476c
typedef uint128_t limb;
Packit c4476c
typedef limb felem[NLIMBS];
Packit c4476c
typedef limb longfelem[NLIMBS * 2];
Packit c4476c
typedef u64 smallfelem[NLIMBS];
Packit c4476c
Packit c4476c
/* This is the value of the prime as four 64-bit words, little-endian. */
Packit c4476c
static const u64 kPrime[4] =
Packit c4476c
    { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
Packit c4476c
static const u64 bottom63bits = 0x7ffffffffffffffful;
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * bin32_to_felem takes a little-endian byte array and converts it into felem
Packit c4476c
 * form. This assumes that the CPU is little-endian.
Packit c4476c
 */
Packit c4476c
static void bin32_to_felem(felem out, const u8 in[32])
Packit c4476c
{
Packit c4476c
    out[0] = *((u64 *)&in[0]);
Packit c4476c
    out[1] = *((u64 *)&in[8]);
Packit c4476c
    out[2] = *((u64 *)&in[16]);
Packit c4476c
    out[3] = *((u64 *)&in[24]);
Packit c4476c
}
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * smallfelem_to_bin32 takes a smallfelem and serialises into a little
Packit c4476c
 * endian, 32 byte array. This assumes that the CPU is little-endian.
Packit c4476c
 */
Packit c4476c
static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
Packit c4476c
{
Packit c4476c
    *((u64 *)&out[0]) = in[0];
Packit c4476c
    *((u64 *)&out[8]) = in[1];
Packit c4476c
    *((u64 *)&out[16]) = in[2];
Packit c4476c
    *((u64 *)&out[24]) = in[3];
Packit c4476c
}
Packit c4476c
Packit c4476c
/* BN_to_felem converts an OpenSSL BIGNUM into an felem */
Packit c4476c
static int BN_to_felem(felem out, const BIGNUM *bn)
Packit c4476c
{
Packit c4476c
    felem_bytearray b_out;
Packit c4476c
    int num_bytes;
Packit c4476c
Packit c4476c
    if (BN_is_negative(bn)) {
Packit c4476c
        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
Packit c4476c
        return 0;
Packit c4476c
    }
Packit c4476c
    num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
Packit c4476c
    if (num_bytes < 0) {
Packit c4476c
        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
Packit c4476c
        return 0;
Packit c4476c
    }
Packit c4476c
    bin32_to_felem(out, b_out);
Packit c4476c
    return 1;
Packit c4476c
}
Packit c4476c
Packit c4476c
/* felem_to_BN converts an felem into an OpenSSL BIGNUM */
Packit c4476c
static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
Packit c4476c
{
Packit c4476c
    felem_bytearray b_out;
Packit c4476c
    smallfelem_to_bin32(b_out, in);
Packit c4476c
    return BN_lebin2bn(b_out, sizeof(b_out), out);
Packit c4476c
}
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * Field operations
Packit c4476c
 * ----------------
Packit c4476c
 */
Packit c4476c
Packit c4476c
static void smallfelem_one(smallfelem out)
Packit c4476c
{
Packit c4476c
    out[0] = 1;
Packit c4476c
    out[1] = 0;
Packit c4476c
    out[2] = 0;
Packit c4476c
    out[3] = 0;
Packit c4476c
}
Packit c4476c
Packit c4476c
static void smallfelem_assign(smallfelem out, const smallfelem in)
Packit c4476c
{
Packit c4476c
    out[0] = in[0];
Packit c4476c
    out[1] = in[1];
Packit c4476c
    out[2] = in[2];
Packit c4476c
    out[3] = in[3];
Packit c4476c
}
Packit c4476c
Packit c4476c
static void felem_assign(felem out, const felem in)
Packit c4476c
{
Packit c4476c
    out[0] = in[0];
Packit c4476c
    out[1] = in[1];
Packit c4476c
    out[2] = in[2];
Packit c4476c
    out[3] = in[3];
Packit c4476c
}
Packit c4476c
Packit c4476c
/* felem_sum sets out = out + in. */
Packit c4476c
static void felem_sum(felem out, const felem in)
Packit c4476c
{
Packit c4476c
    out[0] += in[0];
Packit c4476c
    out[1] += in[1];
Packit c4476c
    out[2] += in[2];
Packit c4476c
    out[3] += in[3];
Packit c4476c
}
Packit c4476c
Packit c4476c
/* felem_small_sum sets out = out + in. */
Packit c4476c
static void felem_small_sum(felem out, const smallfelem in)
Packit c4476c
{
Packit c4476c
    out[0] += in[0];
Packit c4476c
    out[1] += in[1];
Packit c4476c
    out[2] += in[2];
Packit c4476c
    out[3] += in[3];
Packit c4476c
}
Packit c4476c
Packit c4476c
/* felem_scalar sets out = out * scalar */
Packit c4476c
static void felem_scalar(felem out, const u64 scalar)
Packit c4476c
{
Packit c4476c
    out[0] *= scalar;
Packit c4476c
    out[1] *= scalar;
Packit c4476c
    out[2] *= scalar;
Packit c4476c
    out[3] *= scalar;
Packit c4476c
}
Packit c4476c
Packit c4476c
/* longfelem_scalar sets out = out * scalar */
Packit c4476c
static void longfelem_scalar(longfelem out, const u64 scalar)
Packit c4476c
{
Packit c4476c
    out[0] *= scalar;
Packit c4476c
    out[1] *= scalar;
Packit c4476c
    out[2] *= scalar;
Packit c4476c
    out[3] *= scalar;
Packit c4476c
    out[4] *= scalar;
Packit c4476c
    out[5] *= scalar;
Packit c4476c
    out[6] *= scalar;
Packit c4476c
    out[7] *= scalar;
Packit c4476c
}
Packit c4476c
Packit c4476c
# define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
Packit c4476c
# define two105 (((limb)1) << 105)
Packit c4476c
# define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
Packit c4476c
Packit c4476c
/* zero105 is 0 mod p */
Packit c4476c
static const felem zero105 =
Packit c4476c
    { two105m41m9, two105, two105m41p9, two105m41p9 };
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * smallfelem_neg sets |out| to |-small|
Packit c4476c
 * On exit:
Packit c4476c
 *   out[i] < out[i] + 2^105
Packit c4476c
 */
Packit c4476c
static void smallfelem_neg(felem out, const smallfelem small)
Packit c4476c
{
Packit c4476c
    /* In order to prevent underflow, we subtract from 0 mod p. */
Packit c4476c
    out[0] = zero105[0] - small[0];
Packit c4476c
    out[1] = zero105[1] - small[1];
Packit c4476c
    out[2] = zero105[2] - small[2];
Packit c4476c
    out[3] = zero105[3] - small[3];
Packit c4476c
}
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * felem_diff subtracts |in| from |out|
Packit c4476c
 * On entry:
Packit c4476c
 *   in[i] < 2^104
Packit c4476c
 * On exit:
Packit c4476c
 *   out[i] < out[i] + 2^105
Packit c4476c
 */
Packit c4476c
static void felem_diff(felem out, const felem in)
Packit c4476c
{
Packit c4476c
    /*
Packit c4476c
     * In order to prevent underflow, we add 0 mod p before subtracting.
Packit c4476c
     */
Packit c4476c
    out[0] += zero105[0];
Packit c4476c
    out[1] += zero105[1];
Packit c4476c
    out[2] += zero105[2];
Packit c4476c
    out[3] += zero105[3];
Packit c4476c
Packit c4476c
    out[0] -= in[0];
Packit c4476c
    out[1] -= in[1];
Packit c4476c
    out[2] -= in[2];
Packit c4476c
    out[3] -= in[3];
Packit c4476c
}
Packit c4476c
Packit c4476c
# define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
Packit c4476c
# define two107 (((limb)1) << 107)
Packit c4476c
# define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
Packit c4476c
Packit c4476c
/* zero107 is 0 mod p */
Packit c4476c
static const felem zero107 =
Packit c4476c
    { two107m43m11, two107, two107m43p11, two107m43p11 };
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * An alternative felem_diff for larger inputs |in|
Packit c4476c
 * felem_diff_zero107 subtracts |in| from |out|
Packit c4476c
 * On entry:
Packit c4476c
 *   in[i] < 2^106
Packit c4476c
 * On exit:
Packit c4476c
 *   out[i] < out[i] + 2^107
Packit c4476c
 */
Packit c4476c
static void felem_diff_zero107(felem out, const felem in)
Packit c4476c
{
Packit c4476c
    /*
Packit c4476c
     * In order to prevent underflow, we add 0 mod p before subtracting.
Packit c4476c
     */
Packit c4476c
    out[0] += zero107[0];
Packit c4476c
    out[1] += zero107[1];
Packit c4476c
    out[2] += zero107[2];
Packit c4476c
    out[3] += zero107[3];
Packit c4476c
Packit c4476c
    out[0] -= in[0];
Packit c4476c
    out[1] -= in[1];
Packit c4476c
    out[2] -= in[2];
Packit c4476c
    out[3] -= in[3];
Packit c4476c
}
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * longfelem_diff subtracts |in| from |out|
Packit c4476c
 * On entry:
Packit c4476c
 *   in[i] < 7*2^67
Packit c4476c
 * On exit:
Packit c4476c
 *   out[i] < out[i] + 2^70 + 2^40
Packit c4476c
 */
Packit c4476c
static void longfelem_diff(longfelem out, const longfelem in)
Packit c4476c
{
Packit c4476c
    static const limb two70m8p6 =
Packit c4476c
        (((limb) 1) << 70) - (((limb) 1) << 8) + (((limb) 1) << 6);
Packit c4476c
    static const limb two70p40 = (((limb) 1) << 70) + (((limb) 1) << 40);
Packit c4476c
    static const limb two70 = (((limb) 1) << 70);
Packit c4476c
    static const limb two70m40m38p6 =
Packit c4476c
        (((limb) 1) << 70) - (((limb) 1) << 40) - (((limb) 1) << 38) +
Packit c4476c
        (((limb) 1) << 6);
Packit c4476c
    static const limb two70m6 = (((limb) 1) << 70) - (((limb) 1) << 6);
Packit c4476c
Packit c4476c
    /* add 0 mod p to avoid underflow */
Packit c4476c
    out[0] += two70m8p6;
Packit c4476c
    out[1] += two70p40;
Packit c4476c
    out[2] += two70;
Packit c4476c
    out[3] += two70m40m38p6;
Packit c4476c
    out[4] += two70m6;
Packit c4476c
    out[5] += two70m6;
Packit c4476c
    out[6] += two70m6;
Packit c4476c
    out[7] += two70m6;
Packit c4476c
Packit c4476c
    /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
Packit c4476c
    out[0] -= in[0];
Packit c4476c
    out[1] -= in[1];
Packit c4476c
    out[2] -= in[2];
Packit c4476c
    out[3] -= in[3];
Packit c4476c
    out[4] -= in[4];
Packit c4476c
    out[5] -= in[5];
Packit c4476c
    out[6] -= in[6];
Packit c4476c
    out[7] -= in[7];
Packit c4476c
}
Packit c4476c
Packit c4476c
# define two64m0 (((limb)1) << 64) - 1
Packit c4476c
# define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
Packit c4476c
# define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
Packit c4476c
# define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
Packit c4476c
Packit c4476c
/* zero110 is 0 mod p */
Packit c4476c
static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * felem_shrink converts an felem into a smallfelem. The result isn't quite
Packit c4476c
 * minimal as the value may be greater than p.
Packit c4476c
 *
Packit c4476c
 * On entry:
Packit c4476c
 *   in[i] < 2^109
Packit c4476c
 * On exit:
Packit c4476c
 *   out[i] < 2^64
Packit c4476c
 */
Packit c4476c
static void felem_shrink(smallfelem out, const felem in)
Packit c4476c
{
Packit c4476c
    felem tmp;
Packit c4476c
    u64 a, b, mask;
Packit c4476c
    u64 high, low;
Packit c4476c
    static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
Packit c4476c
Packit c4476c
    /* Carry 2->3 */
Packit c4476c
    tmp[3] = zero110[3] + in[3] + ((u64)(in[2] >> 64));
Packit c4476c
    /* tmp[3] < 2^110 */
Packit c4476c
Packit c4476c
    tmp[2] = zero110[2] + (u64)in[2];
Packit c4476c
    tmp[0] = zero110[0] + in[0];
Packit c4476c
    tmp[1] = zero110[1] + in[1];
Packit c4476c
    /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
Packit c4476c
Packit c4476c
    /*
Packit c4476c
     * We perform two partial reductions where we eliminate the high-word of
Packit c4476c
     * tmp[3]. We don't update the other words till the end.
Packit c4476c
     */
Packit c4476c
    a = tmp[3] >> 64;           /* a < 2^46 */
Packit c4476c
    tmp[3] = (u64)tmp[3];
Packit c4476c
    tmp[3] -= a;
Packit c4476c
    tmp[3] += ((limb) a) << 32;
Packit c4476c
    /* tmp[3] < 2^79 */
Packit c4476c
Packit c4476c
    b = a;
Packit c4476c
    a = tmp[3] >> 64;           /* a < 2^15 */
Packit c4476c
    b += a;                     /* b < 2^46 + 2^15 < 2^47 */
Packit c4476c
    tmp[3] = (u64)tmp[3];
Packit c4476c
    tmp[3] -= a;
Packit c4476c
    tmp[3] += ((limb) a) << 32;
Packit c4476c
    /* tmp[3] < 2^64 + 2^47 */
Packit c4476c
Packit c4476c
    /*
Packit c4476c
     * This adjusts the other two words to complete the two partial
Packit c4476c
     * reductions.
Packit c4476c
     */
Packit c4476c
    tmp[0] += b;
Packit c4476c
    tmp[1] -= (((limb) b) << 32);
Packit c4476c
Packit c4476c
    /*
Packit c4476c
     * In order to make space in tmp[3] for the carry from 2 -> 3, we
Packit c4476c
     * conditionally subtract kPrime if tmp[3] is large enough.
Packit c4476c
     */
Packit c4476c
    high = (u64)(tmp[3] >> 64);
Packit c4476c
    /* As tmp[3] < 2^65, high is either 1 or 0 */
Packit c4476c
    high = 0 - high;
Packit c4476c
    /*-
Packit c4476c
     * high is:
Packit c4476c
     *   all ones   if the high word of tmp[3] is 1
Packit c4476c
     *   all zeros  if the high word of tmp[3] if 0
Packit c4476c
     */
Packit c4476c
    low = (u64)tmp[3];
Packit c4476c
    mask = 0 - (low >> 63);
Packit c4476c
    /*-
Packit c4476c
     * mask is:
Packit c4476c
     *   all ones   if the MSB of low is 1
Packit c4476c
     *   all zeros  if the MSB of low if 0
Packit c4476c
     */
Packit c4476c
    low &= bottom63bits;
Packit c4476c
    low -= kPrime3Test;
Packit c4476c
    /* if low was greater than kPrime3Test then the MSB is zero */
Packit c4476c
    low = ~low;
Packit c4476c
    low = 0 - (low >> 63);
Packit c4476c
    /*-
Packit c4476c
     * low is:
Packit c4476c
     *   all ones   if low was > kPrime3Test
Packit c4476c
     *   all zeros  if low was <= kPrime3Test
Packit c4476c
     */
Packit c4476c
    mask = (mask & low) | high;
Packit c4476c
    tmp[0] -= mask & kPrime[0];
Packit c4476c
    tmp[1] -= mask & kPrime[1];
Packit c4476c
    /* kPrime[2] is zero, so omitted */
Packit c4476c
    tmp[3] -= mask & kPrime[3];
Packit c4476c
    /* tmp[3] < 2**64 - 2**32 + 1 */
Packit c4476c
Packit c4476c
    tmp[1] += ((u64)(tmp[0] >> 64));
Packit c4476c
    tmp[0] = (u64)tmp[0];
Packit c4476c
    tmp[2] += ((u64)(tmp[1] >> 64));
Packit c4476c
    tmp[1] = (u64)tmp[1];
Packit c4476c
    tmp[3] += ((u64)(tmp[2] >> 64));
Packit c4476c
    tmp[2] = (u64)tmp[2];
Packit c4476c
    /* tmp[i] < 2^64 */
Packit c4476c
Packit c4476c
    out[0] = tmp[0];
Packit c4476c
    out[1] = tmp[1];
Packit c4476c
    out[2] = tmp[2];
Packit c4476c
    out[3] = tmp[3];
Packit c4476c
}
Packit c4476c
Packit c4476c
/* smallfelem_expand converts a smallfelem to an felem */
Packit c4476c
static void smallfelem_expand(felem out, const smallfelem in)
Packit c4476c
{
Packit c4476c
    out[0] = in[0];
Packit c4476c
    out[1] = in[1];
Packit c4476c
    out[2] = in[2];
Packit c4476c
    out[3] = in[3];
Packit c4476c
}
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * smallfelem_square sets |out| = |small|^2
Packit c4476c
 * On entry:
Packit c4476c
 *   small[i] < 2^64
Packit c4476c
 * On exit:
Packit c4476c
 *   out[i] < 7 * 2^64 < 2^67
Packit c4476c
 */
Packit c4476c
static void smallfelem_square(longfelem out, const smallfelem small)
Packit c4476c
{
Packit c4476c
    limb a;
Packit c4476c
    u64 high, low;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small[0]) * small[0];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[0] = low;
Packit c4476c
    out[1] = high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small[0]) * small[1];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[1] += low;
Packit c4476c
    out[1] += low;
Packit c4476c
    out[2] = high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small[0]) * small[2];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[2] += low;
Packit c4476c
    out[2] *= 2;
Packit c4476c
    out[3] = high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small[0]) * small[3];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[3] += low;
Packit c4476c
    out[4] = high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small[1]) * small[2];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[3] += low;
Packit c4476c
    out[3] *= 2;
Packit c4476c
    out[4] += high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small[1]) * small[1];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[2] += low;
Packit c4476c
    out[3] += high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small[1]) * small[3];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[4] += low;
Packit c4476c
    out[4] *= 2;
Packit c4476c
    out[5] = high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small[2]) * small[3];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[5] += low;
Packit c4476c
    out[5] *= 2;
Packit c4476c
    out[6] = high;
Packit c4476c
    out[6] += high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small[2]) * small[2];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[4] += low;
Packit c4476c
    out[5] += high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small[3]) * small[3];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[6] += low;
Packit c4476c
    out[7] = high;
Packit c4476c
}
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * felem_square sets |out| = |in|^2
Packit c4476c
 * On entry:
Packit c4476c
 *   in[i] < 2^109
Packit c4476c
 * On exit:
Packit c4476c
 *   out[i] < 7 * 2^64 < 2^67
Packit c4476c
 */
Packit c4476c
static void felem_square(longfelem out, const felem in)
Packit c4476c
{
Packit c4476c
    u64 small[4];
Packit c4476c
    felem_shrink(small, in);
Packit c4476c
    smallfelem_square(out, small);
Packit c4476c
}
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * smallfelem_mul sets |out| = |small1| * |small2|
Packit c4476c
 * On entry:
Packit c4476c
 *   small1[i] < 2^64
Packit c4476c
 *   small2[i] < 2^64
Packit c4476c
 * On exit:
Packit c4476c
 *   out[i] < 7 * 2^64 < 2^67
Packit c4476c
 */
Packit c4476c
static void smallfelem_mul(longfelem out, const smallfelem small1,
Packit c4476c
                           const smallfelem small2)
Packit c4476c
{
Packit c4476c
    limb a;
Packit c4476c
    u64 high, low;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small1[0]) * small2[0];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[0] = low;
Packit c4476c
    out[1] = high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small1[0]) * small2[1];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[1] += low;
Packit c4476c
    out[2] = high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small1[1]) * small2[0];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[1] += low;
Packit c4476c
    out[2] += high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small1[0]) * small2[2];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[2] += low;
Packit c4476c
    out[3] = high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small1[1]) * small2[1];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[2] += low;
Packit c4476c
    out[3] += high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small1[2]) * small2[0];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[2] += low;
Packit c4476c
    out[3] += high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small1[0]) * small2[3];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[3] += low;
Packit c4476c
    out[4] = high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small1[1]) * small2[2];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[3] += low;
Packit c4476c
    out[4] += high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small1[2]) * small2[1];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[3] += low;
Packit c4476c
    out[4] += high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small1[3]) * small2[0];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[3] += low;
Packit c4476c
    out[4] += high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small1[1]) * small2[3];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[4] += low;
Packit c4476c
    out[5] = high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small1[2]) * small2[2];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[4] += low;
Packit c4476c
    out[5] += high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small1[3]) * small2[1];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[4] += low;
Packit c4476c
    out[5] += high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small1[2]) * small2[3];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[5] += low;
Packit c4476c
    out[6] = high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small1[3]) * small2[2];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[5] += low;
Packit c4476c
    out[6] += high;
Packit c4476c
Packit c4476c
    a = ((uint128_t) small1[3]) * small2[3];
Packit c4476c
    low = a;
Packit c4476c
    high = a >> 64;
Packit c4476c
    out[6] += low;
Packit c4476c
    out[7] = high;
Packit c4476c
}
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * felem_mul sets |out| = |in1| * |in2|
Packit c4476c
 * On entry:
Packit c4476c
 *   in1[i] < 2^109
Packit c4476c
 *   in2[i] < 2^109
Packit c4476c
 * On exit:
Packit c4476c
 *   out[i] < 7 * 2^64 < 2^67
Packit c4476c
 */
Packit c4476c
static void felem_mul(longfelem out, const felem in1, const felem in2)
Packit c4476c
{
Packit c4476c
    smallfelem small1, small2;
Packit c4476c
    felem_shrink(small1, in1);
Packit c4476c
    felem_shrink(small2, in2);
Packit c4476c
    smallfelem_mul(out, small1, small2);
Packit c4476c
}
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * felem_small_mul sets |out| = |small1| * |in2|
Packit c4476c
 * On entry:
Packit c4476c
 *   small1[i] < 2^64
Packit c4476c
 *   in2[i] < 2^109
Packit c4476c
 * On exit:
Packit c4476c
 *   out[i] < 7 * 2^64 < 2^67
Packit c4476c
 */
Packit c4476c
static void felem_small_mul(longfelem out, const smallfelem small1,
Packit c4476c
                            const felem in2)
Packit c4476c
{
Packit c4476c
    smallfelem small2;
Packit c4476c
    felem_shrink(small2, in2);
Packit c4476c
    smallfelem_mul(out, small1, small2);
Packit c4476c
}
Packit c4476c
Packit c4476c
# define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
Packit c4476c
# define two100 (((limb)1) << 100)
Packit c4476c
# define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
Packit c4476c
/* zero100 is 0 mod p */
Packit c4476c
static const felem zero100 =
Packit c4476c
    { two100m36m4, two100, two100m36p4, two100m36p4 };
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * Internal function for the different flavours of felem_reduce.
Packit c4476c
 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
Packit c4476c
 * On entry:
Packit c4476c
 *   out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
Packit c4476c
 *   out[1] >= in[7] + 2^32*in[4]
Packit c4476c
 *   out[2] >= in[5] + 2^32*in[5]
Packit c4476c
 *   out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
Packit c4476c
 * On exit:
Packit c4476c
 *   out[0] <= out[0] + in[4] + 2^32*in[5]
Packit c4476c
 *   out[1] <= out[1] + in[5] + 2^33*in[6]
Packit c4476c
 *   out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
Packit c4476c
 *   out[3] <= out[3] + 2^32*in[4] + 3*in[7]
Packit c4476c
 */
Packit c4476c
static void felem_reduce_(felem out, const longfelem in)
Packit c4476c
{
Packit c4476c
    int128_t c;
Packit c4476c
    /* combine common terms from below */
Packit c4476c
    c = in[4] + (in[5] << 32);
Packit c4476c
    out[0] += c;
Packit c4476c
    out[3] -= c;
Packit c4476c
Packit c4476c
    c = in[5] - in[7];
Packit c4476c
    out[1] += c;
Packit c4476c
    out[2] -= c;
Packit c4476c
Packit c4476c
    /* the remaining terms */
Packit c4476c
    /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
Packit c4476c
    out[1] -= (in[4] << 32);
Packit c4476c
    out[3] += (in[4] << 32);
Packit c4476c
Packit c4476c
    /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
Packit c4476c
    out[2] -= (in[5] << 32);
Packit c4476c
Packit c4476c
    /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
Packit c4476c
    out[0] -= in[6];
Packit c4476c
    out[0] -= (in[6] << 32);
Packit c4476c
    out[1] += (in[6] << 33);
Packit c4476c
    out[2] += (in[6] * 2);
Packit c4476c
    out[3] -= (in[6] << 32);
Packit c4476c
Packit c4476c
    /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
Packit c4476c
    out[0] -= in[7];
Packit c4476c
    out[0] -= (in[7] << 32);
Packit c4476c
    out[2] += (in[7] << 33);
Packit c4476c
    out[3] += (in[7] * 3);
Packit c4476c
}
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * felem_reduce converts a longfelem into an felem.
Packit c4476c
 * To be called directly after felem_square or felem_mul.
Packit c4476c
 * On entry:
Packit c4476c
 *   in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
Packit c4476c
 *   in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
Packit c4476c
 * On exit:
Packit c4476c
 *   out[i] < 2^101
Packit c4476c
 */
Packit c4476c
static void felem_reduce(felem out, const longfelem in)
Packit c4476c
{
Packit c4476c
    out[0] = zero100[0] + in[0];
Packit c4476c
    out[1] = zero100[1] + in[1];
Packit c4476c
    out[2] = zero100[2] + in[2];
Packit c4476c
    out[3] = zero100[3] + in[3];
Packit c4476c
Packit c4476c
    felem_reduce_(out, in);
Packit c4476c
Packit c4476c
    /*-
Packit c4476c
     * out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
Packit c4476c
     * out[1] > 2^100 - 2^64 - 7*2^96 > 0
Packit c4476c
     * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
Packit c4476c
     * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
Packit c4476c
     *
Packit c4476c
     * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
Packit c4476c
     * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
Packit c4476c
     * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
Packit c4476c
     * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
Packit c4476c
     */
Packit c4476c
}
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * felem_reduce_zero105 converts a larger longfelem into an felem.
Packit c4476c
 * On entry:
Packit c4476c
 *   in[0] < 2^71
Packit c4476c
 * On exit:
Packit c4476c
 *   out[i] < 2^106
Packit c4476c
 */
Packit c4476c
static void felem_reduce_zero105(felem out, const longfelem in)
Packit c4476c
{
Packit c4476c
    out[0] = zero105[0] + in[0];
Packit c4476c
    out[1] = zero105[1] + in[1];
Packit c4476c
    out[2] = zero105[2] + in[2];
Packit c4476c
    out[3] = zero105[3] + in[3];
Packit c4476c
Packit c4476c
    felem_reduce_(out, in);
Packit c4476c
Packit c4476c
    /*-
Packit c4476c
     * out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
Packit c4476c
     * out[1] > 2^105 - 2^71 - 2^103 > 0
Packit c4476c
     * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
Packit c4476c
     * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
Packit c4476c
     *
Packit c4476c
     * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
Packit c4476c
     * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
Packit c4476c
     * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
Packit c4476c
     * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
Packit c4476c
     */
Packit c4476c
}
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * subtract_u64 sets *result = *result - v and *carry to one if the
Packit c4476c
 * subtraction underflowed.
Packit c4476c
 */
Packit c4476c
static void subtract_u64(u64 *result, u64 *carry, u64 v)
Packit c4476c
{
Packit c4476c
    uint128_t r = *result;
Packit c4476c
    r -= v;
Packit c4476c
    *carry = (r >> 64) & 1;
Packit c4476c
    *result = (u64)r;
Packit c4476c
}
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * felem_contract converts |in| to its unique, minimal representation. On
Packit c4476c
 * entry: in[i] < 2^109
Packit c4476c
 */
Packit c4476c
static void felem_contract(smallfelem out, const felem in)
Packit c4476c
{
Packit c4476c
    unsigned i;
Packit c4476c
    u64 all_equal_so_far = 0, result = 0, carry;
Packit c4476c
Packit c4476c
    felem_shrink(out, in);
Packit c4476c
    /* small is minimal except that the value might be > p */
Packit c4476c
Packit c4476c
    all_equal_so_far--;
Packit c4476c
    /*
Packit c4476c
     * We are doing a constant time test if out >= kPrime. We need to compare
Packit c4476c
     * each u64, from most-significant to least significant. For each one, if
Packit c4476c
     * all words so far have been equal (m is all ones) then a non-equal
Packit c4476c
     * result is the answer. Otherwise we continue.
Packit c4476c
     */
Packit c4476c
    for (i = 3; i < 4; i--) {
Packit c4476c
        u64 equal;
Packit c4476c
        uint128_t a = ((uint128_t) kPrime[i]) - out[i];
Packit c4476c
        /*
Packit c4476c
         * if out[i] > kPrime[i] then a will underflow and the high 64-bits
Packit c4476c
         * will all be set.
Packit c4476c
         */
Packit c4476c
        result |= all_equal_so_far & ((u64)(a >> 64));
Packit c4476c
Packit c4476c
        /*
Packit c4476c
         * if kPrime[i] == out[i] then |equal| will be all zeros and the
Packit c4476c
         * decrement will make it all ones.
Packit c4476c
         */
Packit c4476c
        equal = kPrime[i] ^ out[i];
Packit c4476c
        equal--;
Packit c4476c
        equal &= equal << 32;
Packit c4476c
        equal &= equal << 16;
Packit c4476c
        equal &= equal << 8;
Packit c4476c
        equal &= equal << 4;
Packit c4476c
        equal &= equal << 2;
Packit c4476c
        equal &= equal << 1;
Packit c4476c
        equal = 0 - (equal >> 63);
Packit c4476c
Packit c4476c
        all_equal_so_far &= equal;
Packit c4476c
    }
Packit c4476c
Packit c4476c
    /*
Packit c4476c
     * if all_equal_so_far is still all ones then the two values are equal
Packit c4476c
     * and so out >= kPrime is true.
Packit c4476c
     */
Packit c4476c
    result |= all_equal_so_far;
Packit c4476c
Packit c4476c
    /* if out >= kPrime then we subtract kPrime. */
Packit c4476c
    subtract_u64(&out[0], &carry, result & kPrime[0]);
Packit c4476c
    subtract_u64(&out[1], &carry, carry);
Packit c4476c
    subtract_u64(&out[2], &carry, carry);
Packit c4476c
    subtract_u64(&out[3], &carry, carry);
Packit c4476c
Packit c4476c
    subtract_u64(&out[1], &carry, result & kPrime[1]);
Packit c4476c
    subtract_u64(&out[2], &carry, carry);
Packit c4476c
    subtract_u64(&out[3], &carry, carry);
Packit c4476c
Packit c4476c
    subtract_u64(&out[2], &carry, result & kPrime[2]);
Packit c4476c
    subtract_u64(&out[3], &carry, carry);
Packit c4476c
Packit c4476c
    subtract_u64(&out[3], &carry, result & kPrime[3]);
Packit c4476c
}
Packit c4476c
Packit c4476c
static void smallfelem_square_contract(smallfelem out, const smallfelem in)
Packit c4476c
{
Packit c4476c
    longfelem longtmp;
Packit c4476c
    felem tmp;
Packit c4476c
Packit c4476c
    smallfelem_square(longtmp, in);
Packit c4476c
    felem_reduce(tmp, longtmp);
Packit c4476c
    felem_contract(out, tmp);
Packit c4476c
}
Packit c4476c
Packit c4476c
static void smallfelem_mul_contract(smallfelem out, const smallfelem in1,
Packit c4476c
                                    const smallfelem in2)
Packit c4476c
{
Packit c4476c
    longfelem longtmp;
Packit c4476c
    felem tmp;
Packit c4476c
Packit c4476c
    smallfelem_mul(longtmp, in1, in2);
Packit c4476c
    felem_reduce(tmp, longtmp);
Packit c4476c
    felem_contract(out, tmp);
Packit c4476c
}
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
Packit c4476c
 * otherwise.
Packit c4476c
 * On entry:
Packit c4476c
 *   small[i] < 2^64
Packit c4476c
 */
Packit c4476c
static limb smallfelem_is_zero(const smallfelem small)
Packit c4476c
{
Packit c4476c
    limb result;
Packit c4476c
    u64 is_p;
Packit c4476c
Packit c4476c
    u64 is_zero = small[0] | small[1] | small[2] | small[3];
Packit c4476c
    is_zero--;
Packit c4476c
    is_zero &= is_zero << 32;
Packit c4476c
    is_zero &= is_zero << 16;
Packit c4476c
    is_zero &= is_zero << 8;
Packit c4476c
    is_zero &= is_zero << 4;
Packit c4476c
    is_zero &= is_zero << 2;
Packit c4476c
    is_zero &= is_zero << 1;
Packit c4476c
    is_zero = 0 - (is_zero >> 63);
Packit c4476c
Packit c4476c
    is_p = (small[0] ^ kPrime[0]) |
Packit c4476c
        (small[1] ^ kPrime[1]) |
Packit c4476c
        (small[2] ^ kPrime[2]) | (small[3] ^ kPrime[3]);
Packit c4476c
    is_p--;
Packit c4476c
    is_p &= is_p << 32;
Packit c4476c
    is_p &= is_p << 16;
Packit c4476c
    is_p &= is_p << 8;
Packit c4476c
    is_p &= is_p << 4;
Packit c4476c
    is_p &= is_p << 2;
Packit c4476c
    is_p &= is_p << 1;
Packit c4476c
    is_p = 0 - (is_p >> 63);
Packit c4476c
Packit c4476c
    is_zero |= is_p;
Packit c4476c
Packit c4476c
    result = is_zero;
Packit c4476c
    result |= ((limb) is_zero) << 64;
Packit c4476c
    return result;
Packit c4476c
}
Packit c4476c
Packit c4476c
static int smallfelem_is_zero_int(const void *small)
Packit c4476c
{
Packit c4476c
    return (int)(smallfelem_is_zero(small) & ((limb) 1));
Packit c4476c
}
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * felem_inv calculates |out| = |in|^{-1}
Packit c4476c
 *
Packit c4476c
 * Based on Fermat's Little Theorem:
Packit c4476c
 *   a^p = a (mod p)
Packit c4476c
 *   a^{p-1} = 1 (mod p)
Packit c4476c
 *   a^{p-2} = a^{-1} (mod p)
Packit c4476c
 */
Packit c4476c
static void felem_inv(felem out, const felem in)
Packit c4476c
{
Packit c4476c
    felem ftmp, ftmp2;
Packit c4476c
    /* each e_I will hold |in|^{2^I - 1} */
Packit c4476c
    felem e2, e4, e8, e16, e32, e64;
Packit c4476c
    longfelem tmp;
Packit c4476c
    unsigned i;
Packit c4476c
Packit c4476c
    felem_square(tmp, in);
Packit c4476c
    felem_reduce(ftmp, tmp);    /* 2^1 */
Packit c4476c
    felem_mul(tmp, in, ftmp);
Packit c4476c
    felem_reduce(ftmp, tmp);    /* 2^2 - 2^0 */
Packit c4476c
    felem_assign(e2, ftmp);
Packit c4476c
    felem_square(tmp, ftmp);
Packit c4476c
    felem_reduce(ftmp, tmp);    /* 2^3 - 2^1 */
Packit c4476c
    felem_square(tmp, ftmp);
Packit c4476c
    felem_reduce(ftmp, tmp);    /* 2^4 - 2^2 */
Packit c4476c
    felem_mul(tmp, ftmp, e2);
Packit c4476c
    felem_reduce(ftmp, tmp);    /* 2^4 - 2^0 */
Packit c4476c
    felem_assign(e4, ftmp);
Packit c4476c
    felem_square(tmp, ftmp);
Packit c4476c
    felem_reduce(ftmp, tmp);    /* 2^5 - 2^1 */
Packit c4476c
    felem_square(tmp, ftmp);
Packit c4476c
    felem_reduce(ftmp, tmp);    /* 2^6 - 2^2 */
Packit c4476c
    felem_square(tmp, ftmp);
Packit c4476c
    felem_reduce(ftmp, tmp);    /* 2^7 - 2^3 */
Packit c4476c
    felem_square(tmp, ftmp);
Packit c4476c
    felem_reduce(ftmp, tmp);    /* 2^8 - 2^4 */
Packit c4476c
    felem_mul(tmp, ftmp, e4);
Packit c4476c
    felem_reduce(ftmp, tmp);    /* 2^8 - 2^0 */
Packit c4476c
    felem_assign(e8, ftmp);
Packit c4476c
    for (i = 0; i < 8; i++) {
Packit c4476c
        felem_square(tmp, ftmp);
Packit c4476c
        felem_reduce(ftmp, tmp);
Packit c4476c
    }                           /* 2^16 - 2^8 */
Packit c4476c
    felem_mul(tmp, ftmp, e8);
Packit c4476c
    felem_reduce(ftmp, tmp);    /* 2^16 - 2^0 */
Packit c4476c
    felem_assign(e16, ftmp);
Packit c4476c
    for (i = 0; i < 16; i++) {
Packit c4476c
        felem_square(tmp, ftmp);
Packit c4476c
        felem_reduce(ftmp, tmp);
Packit c4476c
    }                           /* 2^32 - 2^16 */
Packit c4476c
    felem_mul(tmp, ftmp, e16);
Packit c4476c
    felem_reduce(ftmp, tmp);    /* 2^32 - 2^0 */
Packit c4476c
    felem_assign(e32, ftmp);
Packit c4476c
    for (i = 0; i < 32; i++) {
Packit c4476c
        felem_square(tmp, ftmp);
Packit c4476c
        felem_reduce(ftmp, tmp);
Packit c4476c
    }                           /* 2^64 - 2^32 */
Packit c4476c
    felem_assign(e64, ftmp);
Packit c4476c
    felem_mul(tmp, ftmp, in);
Packit c4476c
    felem_reduce(ftmp, tmp);    /* 2^64 - 2^32 + 2^0 */
Packit c4476c
    for (i = 0; i < 192; i++) {
Packit c4476c
        felem_square(tmp, ftmp);
Packit c4476c
        felem_reduce(ftmp, tmp);
Packit c4476c
    }                           /* 2^256 - 2^224 + 2^192 */
Packit c4476c
Packit c4476c
    felem_mul(tmp, e64, e32);
Packit c4476c
    felem_reduce(ftmp2, tmp);   /* 2^64 - 2^0 */
Packit c4476c
    for (i = 0; i < 16; i++) {
Packit c4476c
        felem_square(tmp, ftmp2);
Packit c4476c
        felem_reduce(ftmp2, tmp);
Packit c4476c
    }                           /* 2^80 - 2^16 */
Packit c4476c
    felem_mul(tmp, ftmp2, e16);
Packit c4476c
    felem_reduce(ftmp2, tmp);   /* 2^80 - 2^0 */
Packit c4476c
    for (i = 0; i < 8; i++) {
Packit c4476c
        felem_square(tmp, ftmp2);
Packit c4476c
        felem_reduce(ftmp2, tmp);
Packit c4476c
    }                           /* 2^88 - 2^8 */
Packit c4476c
    felem_mul(tmp, ftmp2, e8);
Packit c4476c
    felem_reduce(ftmp2, tmp);   /* 2^88 - 2^0 */
Packit c4476c
    for (i = 0; i < 4; i++) {
Packit c4476c
        felem_square(tmp, ftmp2);
Packit c4476c
        felem_reduce(ftmp2, tmp);
Packit c4476c
    }                           /* 2^92 - 2^4 */
Packit c4476c
    felem_mul(tmp, ftmp2, e4);
Packit c4476c
    felem_reduce(ftmp2, tmp);   /* 2^92 - 2^0 */
Packit c4476c
    felem_square(tmp, ftmp2);
Packit c4476c
    felem_reduce(ftmp2, tmp);   /* 2^93 - 2^1 */
Packit c4476c
    felem_square(tmp, ftmp2);
Packit c4476c
    felem_reduce(ftmp2, tmp);   /* 2^94 - 2^2 */
Packit c4476c
    felem_mul(tmp, ftmp2, e2);
Packit c4476c
    felem_reduce(ftmp2, tmp);   /* 2^94 - 2^0 */
Packit c4476c
    felem_square(tmp, ftmp2);
Packit c4476c
    felem_reduce(ftmp2, tmp);   /* 2^95 - 2^1 */
Packit c4476c
    felem_square(tmp, ftmp2);
Packit c4476c
    felem_reduce(ftmp2, tmp);   /* 2^96 - 2^2 */
Packit c4476c
    felem_mul(tmp, ftmp2, in);
Packit c4476c
    felem_reduce(ftmp2, tmp);   /* 2^96 - 3 */
Packit c4476c
Packit c4476c
    felem_mul(tmp, ftmp2, ftmp);
Packit c4476c
    felem_reduce(out, tmp);     /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
Packit c4476c
}
Packit c4476c
Packit c4476c
static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
Packit c4476c
{
Packit c4476c
    felem tmp;
Packit c4476c
Packit c4476c
    smallfelem_expand(tmp, in);
Packit c4476c
    felem_inv(tmp, tmp);
Packit c4476c
    felem_contract(out, tmp);
Packit c4476c
}
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * Group operations
Packit c4476c
 * ----------------
Packit c4476c
 *
Packit c4476c
 * Building on top of the field operations we have the operations on the
Packit c4476c
 * elliptic curve group itself. Points on the curve are represented in Jacobian
Packit c4476c
 * coordinates
Packit c4476c
 */
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * point_double calculates 2*(x_in, y_in, z_in)
Packit c4476c
 *
Packit c4476c
 * The method is taken from:
Packit c4476c
 *   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
Packit c4476c
 *
Packit c4476c
 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
Packit c4476c
 * while x_out == y_in is not (maybe this works, but it's not tested).
Packit c4476c
 */
Packit c4476c
static void
Packit c4476c
point_double(felem x_out, felem y_out, felem z_out,
Packit c4476c
             const felem x_in, const felem y_in, const felem z_in)
Packit c4476c
{
Packit c4476c
    longfelem tmp, tmp2;
Packit c4476c
    felem delta, gamma, beta, alpha, ftmp, ftmp2;
Packit c4476c
    smallfelem small1, small2;
Packit c4476c
Packit c4476c
    felem_assign(ftmp, x_in);
Packit c4476c
    /* ftmp[i] < 2^106 */
Packit c4476c
    felem_assign(ftmp2, x_in);
Packit c4476c
    /* ftmp2[i] < 2^106 */
Packit c4476c
Packit c4476c
    /* delta = z^2 */
Packit c4476c
    felem_square(tmp, z_in);
Packit c4476c
    felem_reduce(delta, tmp);
Packit c4476c
    /* delta[i] < 2^101 */
Packit c4476c
Packit c4476c
    /* gamma = y^2 */
Packit c4476c
    felem_square(tmp, y_in);
Packit c4476c
    felem_reduce(gamma, tmp);
Packit c4476c
    /* gamma[i] < 2^101 */
Packit c4476c
    felem_shrink(small1, gamma);
Packit c4476c
Packit c4476c
    /* beta = x*gamma */
Packit c4476c
    felem_small_mul(tmp, small1, x_in);
Packit c4476c
    felem_reduce(beta, tmp);
Packit c4476c
    /* beta[i] < 2^101 */
Packit c4476c
Packit c4476c
    /* alpha = 3*(x-delta)*(x+delta) */
Packit c4476c
    felem_diff(ftmp, delta);
Packit c4476c
    /* ftmp[i] < 2^105 + 2^106 < 2^107 */
Packit c4476c
    felem_sum(ftmp2, delta);
Packit c4476c
    /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
Packit c4476c
    felem_scalar(ftmp2, 3);
Packit c4476c
    /* ftmp2[i] < 3 * 2^107 < 2^109 */
Packit c4476c
    felem_mul(tmp, ftmp, ftmp2);
Packit c4476c
    felem_reduce(alpha, tmp);
Packit c4476c
    /* alpha[i] < 2^101 */
Packit c4476c
    felem_shrink(small2, alpha);
Packit c4476c
Packit c4476c
    /* x' = alpha^2 - 8*beta */
Packit c4476c
    smallfelem_square(tmp, small2);
Packit c4476c
    felem_reduce(x_out, tmp);
Packit c4476c
    felem_assign(ftmp, beta);
Packit c4476c
    felem_scalar(ftmp, 8);
Packit c4476c
    /* ftmp[i] < 8 * 2^101 = 2^104 */
Packit c4476c
    felem_diff(x_out, ftmp);
Packit c4476c
    /* x_out[i] < 2^105 + 2^101 < 2^106 */
Packit c4476c
Packit c4476c
    /* z' = (y + z)^2 - gamma - delta */
Packit c4476c
    felem_sum(delta, gamma);
Packit c4476c
    /* delta[i] < 2^101 + 2^101 = 2^102 */
Packit c4476c
    felem_assign(ftmp, y_in);
Packit c4476c
    felem_sum(ftmp, z_in);
Packit c4476c
    /* ftmp[i] < 2^106 + 2^106 = 2^107 */
Packit c4476c
    felem_square(tmp, ftmp);
Packit c4476c
    felem_reduce(z_out, tmp);
Packit c4476c
    felem_diff(z_out, delta);
Packit c4476c
    /* z_out[i] < 2^105 + 2^101 < 2^106 */
Packit c4476c
Packit c4476c
    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
Packit c4476c
    felem_scalar(beta, 4);
Packit c4476c
    /* beta[i] < 4 * 2^101 = 2^103 */
Packit c4476c
    felem_diff_zero107(beta, x_out);
Packit c4476c
    /* beta[i] < 2^107 + 2^103 < 2^108 */
Packit c4476c
    felem_small_mul(tmp, small2, beta);
Packit c4476c
    /* tmp[i] < 7 * 2^64 < 2^67 */
Packit c4476c
    smallfelem_square(tmp2, small1);
Packit c4476c
    /* tmp2[i] < 7 * 2^64 */
Packit c4476c
    longfelem_scalar(tmp2, 8);
Packit c4476c
    /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
Packit c4476c
    longfelem_diff(tmp, tmp2);
Packit c4476c
    /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
Packit c4476c
    felem_reduce_zero105(y_out, tmp);
Packit c4476c
    /* y_out[i] < 2^106 */
Packit c4476c
}
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * point_double_small is the same as point_double, except that it operates on
Packit c4476c
 * smallfelems
Packit c4476c
 */
Packit c4476c
static void
Packit c4476c
point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
Packit c4476c
                   const smallfelem x_in, const smallfelem y_in,
Packit c4476c
                   const smallfelem z_in)
Packit c4476c
{
Packit c4476c
    felem felem_x_out, felem_y_out, felem_z_out;
Packit c4476c
    felem felem_x_in, felem_y_in, felem_z_in;
Packit c4476c
Packit c4476c
    smallfelem_expand(felem_x_in, x_in);
Packit c4476c
    smallfelem_expand(felem_y_in, y_in);
Packit c4476c
    smallfelem_expand(felem_z_in, z_in);
Packit c4476c
    point_double(felem_x_out, felem_y_out, felem_z_out,
Packit c4476c
                 felem_x_in, felem_y_in, felem_z_in);
Packit c4476c
    felem_shrink(x_out, felem_x_out);
Packit c4476c
    felem_shrink(y_out, felem_y_out);
Packit c4476c
    felem_shrink(z_out, felem_z_out);
Packit c4476c
}
Packit c4476c
Packit c4476c
/* copy_conditional copies in to out iff mask is all ones. */
Packit c4476c
static void copy_conditional(felem out, const felem in, limb mask)
Packit c4476c
{
Packit c4476c
    unsigned i;
Packit c4476c
    for (i = 0; i < NLIMBS; ++i) {
Packit c4476c
        const limb tmp = mask & (in[i] ^ out[i]);
Packit c4476c
        out[i] ^= tmp;
Packit c4476c
    }
Packit c4476c
}
Packit c4476c
Packit c4476c
/* copy_small_conditional copies in to out iff mask is all ones. */
Packit c4476c
static void copy_small_conditional(felem out, const smallfelem in, limb mask)
Packit c4476c
{
Packit c4476c
    unsigned i;
Packit c4476c
    const u64 mask64 = mask;
Packit c4476c
    for (i = 0; i < NLIMBS; ++i) {
Packit c4476c
        out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
Packit c4476c
    }
Packit c4476c
}
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * point_add calculates (x1, y1, z1) + (x2, y2, z2)
Packit c4476c
 *
Packit c4476c
 * The method is taken from:
Packit c4476c
 *   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
Packit c4476c
 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
Packit c4476c
 *
Packit c4476c
 * This function includes a branch for checking whether the two input points
Packit c4476c
 * are equal, (while not equal to the point at infinity). This case never
Packit c4476c
 * happens during single point multiplication, so there is no timing leak for
Packit c4476c
 * ECDH or ECDSA signing.
Packit c4476c
 */
Packit c4476c
static void point_add(felem x3, felem y3, felem z3,
Packit c4476c
                      const felem x1, const felem y1, const felem z1,
Packit c4476c
                      const int mixed, const smallfelem x2,
Packit c4476c
                      const smallfelem y2, const smallfelem z2)
Packit c4476c
{
Packit c4476c
    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
Packit c4476c
    longfelem tmp, tmp2;
Packit c4476c
    smallfelem small1, small2, small3, small4, small5;
Packit c4476c
    limb x_equal, y_equal, z1_is_zero, z2_is_zero;
Packit c4476c
    limb points_equal;
Packit c4476c
Packit c4476c
    felem_shrink(small3, z1);
Packit c4476c
Packit c4476c
    z1_is_zero = smallfelem_is_zero(small3);
Packit c4476c
    z2_is_zero = smallfelem_is_zero(z2);
Packit c4476c
Packit c4476c
    /* ftmp = z1z1 = z1**2 */
Packit c4476c
    smallfelem_square(tmp, small3);
Packit c4476c
    felem_reduce(ftmp, tmp);
Packit c4476c
    /* ftmp[i] < 2^101 */
Packit c4476c
    felem_shrink(small1, ftmp);
Packit c4476c
Packit c4476c
    if (!mixed) {
Packit c4476c
        /* ftmp2 = z2z2 = z2**2 */
Packit c4476c
        smallfelem_square(tmp, z2);
Packit c4476c
        felem_reduce(ftmp2, tmp);
Packit c4476c
        /* ftmp2[i] < 2^101 */
Packit c4476c
        felem_shrink(small2, ftmp2);
Packit c4476c
Packit c4476c
        felem_shrink(small5, x1);
Packit c4476c
Packit c4476c
        /* u1 = ftmp3 = x1*z2z2 */
Packit c4476c
        smallfelem_mul(tmp, small5, small2);
Packit c4476c
        felem_reduce(ftmp3, tmp);
Packit c4476c
        /* ftmp3[i] < 2^101 */
Packit c4476c
Packit c4476c
        /* ftmp5 = z1 + z2 */
Packit c4476c
        felem_assign(ftmp5, z1);
Packit c4476c
        felem_small_sum(ftmp5, z2);
Packit c4476c
        /* ftmp5[i] < 2^107 */
Packit c4476c
Packit c4476c
        /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
Packit c4476c
        felem_square(tmp, ftmp5);
Packit c4476c
        felem_reduce(ftmp5, tmp);
Packit c4476c
        /* ftmp2 = z2z2 + z1z1 */
Packit c4476c
        felem_sum(ftmp2, ftmp);
Packit c4476c
        /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
Packit c4476c
        felem_diff(ftmp5, ftmp2);
Packit c4476c
        /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
Packit c4476c
Packit c4476c
        /* ftmp2 = z2 * z2z2 */
Packit c4476c
        smallfelem_mul(tmp, small2, z2);
Packit c4476c
        felem_reduce(ftmp2, tmp);
Packit c4476c
Packit c4476c
        /* s1 = ftmp2 = y1 * z2**3 */
Packit c4476c
        felem_mul(tmp, y1, ftmp2);
Packit c4476c
        felem_reduce(ftmp6, tmp);
Packit c4476c
        /* ftmp6[i] < 2^101 */
Packit c4476c
    } else {
Packit c4476c
        /*
Packit c4476c
         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
Packit c4476c
         */
Packit c4476c
Packit c4476c
        /* u1 = ftmp3 = x1*z2z2 */
Packit c4476c
        felem_assign(ftmp3, x1);
Packit c4476c
        /* ftmp3[i] < 2^106 */
Packit c4476c
Packit c4476c
        /* ftmp5 = 2z1z2 */
Packit c4476c
        felem_assign(ftmp5, z1);
Packit c4476c
        felem_scalar(ftmp5, 2);
Packit c4476c
        /* ftmp5[i] < 2*2^106 = 2^107 */
Packit c4476c
Packit c4476c
        /* s1 = ftmp2 = y1 * z2**3 */
Packit c4476c
        felem_assign(ftmp6, y1);
Packit c4476c
        /* ftmp6[i] < 2^106 */
Packit c4476c
    }
Packit c4476c
Packit c4476c
    /* u2 = x2*z1z1 */
Packit c4476c
    smallfelem_mul(tmp, x2, small1);
Packit c4476c
    felem_reduce(ftmp4, tmp);
Packit c4476c
Packit c4476c
    /* h = ftmp4 = u2 - u1 */
Packit c4476c
    felem_diff_zero107(ftmp4, ftmp3);
Packit c4476c
    /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
Packit c4476c
    felem_shrink(small4, ftmp4);
Packit c4476c
Packit c4476c
    x_equal = smallfelem_is_zero(small4);
Packit c4476c
Packit c4476c
    /* z_out = ftmp5 * h */
Packit c4476c
    felem_small_mul(tmp, small4, ftmp5);
Packit c4476c
    felem_reduce(z_out, tmp);
Packit c4476c
    /* z_out[i] < 2^101 */
Packit c4476c
Packit c4476c
    /* ftmp = z1 * z1z1 */
Packit c4476c
    smallfelem_mul(tmp, small1, small3);
Packit c4476c
    felem_reduce(ftmp, tmp);
Packit c4476c
Packit c4476c
    /* s2 = tmp = y2 * z1**3 */
Packit c4476c
    felem_small_mul(tmp, y2, ftmp);
Packit c4476c
    felem_reduce(ftmp5, tmp);
Packit c4476c
Packit c4476c
    /* r = ftmp5 = (s2 - s1)*2 */
Packit c4476c
    felem_diff_zero107(ftmp5, ftmp6);
Packit c4476c
    /* ftmp5[i] < 2^107 + 2^107 = 2^108 */
Packit c4476c
    felem_scalar(ftmp5, 2);
Packit c4476c
    /* ftmp5[i] < 2^109 */
Packit c4476c
    felem_shrink(small1, ftmp5);
Packit c4476c
    y_equal = smallfelem_is_zero(small1);
Packit c4476c
Packit c4476c
    /*
Packit c4476c
     * The formulae are incorrect if the points are equal, in affine coordinates
Packit c4476c
     * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
Packit c4476c
     * happens.
Packit c4476c
     *
Packit c4476c
     * We use bitwise operations to avoid potential side-channels introduced by
Packit c4476c
     * the short-circuiting behaviour of boolean operators.
Packit c4476c
     *
Packit c4476c
     * The special case of either point being the point at infinity (z1 and/or
Packit c4476c
     * z2 are zero), is handled separately later on in this function, so we
Packit c4476c
     * avoid jumping to point_double here in those special cases.
Packit c4476c
     */
Packit c4476c
    points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero));
Packit c4476c
Packit c4476c
    if (points_equal) {
Packit c4476c
        /*
Packit c4476c
         * This is obviously not constant-time but, as mentioned before, this
Packit c4476c
         * case never happens during single point multiplication, so there is no
Packit c4476c
         * timing leak for ECDH or ECDSA signing.
Packit c4476c
         */
Packit c4476c
        point_double(x3, y3, z3, x1, y1, z1);
Packit c4476c
        return;
Packit c4476c
    }
Packit c4476c
Packit c4476c
    /* I = ftmp = (2h)**2 */
Packit c4476c
    felem_assign(ftmp, ftmp4);
Packit c4476c
    felem_scalar(ftmp, 2);
Packit c4476c
    /* ftmp[i] < 2*2^108 = 2^109 */
Packit c4476c
    felem_square(tmp, ftmp);
Packit c4476c
    felem_reduce(ftmp, tmp);
Packit c4476c
Packit c4476c
    /* J = ftmp2 = h * I */
Packit c4476c
    felem_mul(tmp, ftmp4, ftmp);
Packit c4476c
    felem_reduce(ftmp2, tmp);
Packit c4476c
Packit c4476c
    /* V = ftmp4 = U1 * I */
Packit c4476c
    felem_mul(tmp, ftmp3, ftmp);
Packit c4476c
    felem_reduce(ftmp4, tmp);
Packit c4476c
Packit c4476c
    /* x_out = r**2 - J - 2V */
Packit c4476c
    smallfelem_square(tmp, small1);
Packit c4476c
    felem_reduce(x_out, tmp);
Packit c4476c
    felem_assign(ftmp3, ftmp4);
Packit c4476c
    felem_scalar(ftmp4, 2);
Packit c4476c
    felem_sum(ftmp4, ftmp2);
Packit c4476c
    /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
Packit c4476c
    felem_diff(x_out, ftmp4);
Packit c4476c
    /* x_out[i] < 2^105 + 2^101 */
Packit c4476c
Packit c4476c
    /* y_out = r(V-x_out) - 2 * s1 * J */
Packit c4476c
    felem_diff_zero107(ftmp3, x_out);
Packit c4476c
    /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
Packit c4476c
    felem_small_mul(tmp, small1, ftmp3);
Packit c4476c
    felem_mul(tmp2, ftmp6, ftmp2);
Packit c4476c
    longfelem_scalar(tmp2, 2);
Packit c4476c
    /* tmp2[i] < 2*2^67 = 2^68 */
Packit c4476c
    longfelem_diff(tmp, tmp2);
Packit c4476c
    /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
Packit c4476c
    felem_reduce_zero105(y_out, tmp);
Packit c4476c
    /* y_out[i] < 2^106 */
Packit c4476c
Packit c4476c
    copy_small_conditional(x_out, x2, z1_is_zero);
Packit c4476c
    copy_conditional(x_out, x1, z2_is_zero);
Packit c4476c
    copy_small_conditional(y_out, y2, z1_is_zero);
Packit c4476c
    copy_conditional(y_out, y1, z2_is_zero);
Packit c4476c
    copy_small_conditional(z_out, z2, z1_is_zero);
Packit c4476c
    copy_conditional(z_out, z1, z2_is_zero);
Packit c4476c
    felem_assign(x3, x_out);
Packit c4476c
    felem_assign(y3, y_out);
Packit c4476c
    felem_assign(z3, z_out);
Packit c4476c
}
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * point_add_small is the same as point_add, except that it operates on
Packit c4476c
 * smallfelems
Packit c4476c
 */
Packit c4476c
static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
Packit c4476c
                            smallfelem x1, smallfelem y1, smallfelem z1,
Packit c4476c
                            smallfelem x2, smallfelem y2, smallfelem z2)
Packit c4476c
{
Packit c4476c
    felem felem_x3, felem_y3, felem_z3;
Packit c4476c
    felem felem_x1, felem_y1, felem_z1;
Packit c4476c
    smallfelem_expand(felem_x1, x1);
Packit c4476c
    smallfelem_expand(felem_y1, y1);
Packit c4476c
    smallfelem_expand(felem_z1, z1);
Packit c4476c
    point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0,
Packit c4476c
              x2, y2, z2);
Packit c4476c
    felem_shrink(x3, felem_x3);
Packit c4476c
    felem_shrink(y3, felem_y3);
Packit c4476c
    felem_shrink(z3, felem_z3);
Packit c4476c
}
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * Base point pre computation
Packit c4476c
 * --------------------------
Packit c4476c
 *
Packit c4476c
 * Two different sorts of precomputed tables are used in the following code.
Packit c4476c
 * Each contain various points on the curve, where each point is three field
Packit c4476c
 * elements (x, y, z).
Packit c4476c
 *
Packit c4476c
 * For the base point table, z is usually 1 (0 for the point at infinity).
Packit c4476c
 * This table has 2 * 16 elements, starting with the following:
Packit c4476c
 * index | bits    | point
Packit c4476c
 * ------+---------+------------------------------
Packit c4476c
 *     0 | 0 0 0 0 | 0G
Packit c4476c
 *     1 | 0 0 0 1 | 1G
Packit c4476c
 *     2 | 0 0 1 0 | 2^64G
Packit c4476c
 *     3 | 0 0 1 1 | (2^64 + 1)G
Packit c4476c
 *     4 | 0 1 0 0 | 2^128G
Packit c4476c
 *     5 | 0 1 0 1 | (2^128 + 1)G
Packit c4476c
 *     6 | 0 1 1 0 | (2^128 + 2^64)G
Packit c4476c
 *     7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
Packit c4476c
 *     8 | 1 0 0 0 | 2^192G
Packit c4476c
 *     9 | 1 0 0 1 | (2^192 + 1)G
Packit c4476c
 *    10 | 1 0 1 0 | (2^192 + 2^64)G
Packit c4476c
 *    11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
Packit c4476c
 *    12 | 1 1 0 0 | (2^192 + 2^128)G
Packit c4476c
 *    13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
Packit c4476c
 *    14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
Packit c4476c
 *    15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
Packit c4476c
 * followed by a copy of this with each element multiplied by 2^32.
Packit c4476c
 *
Packit c4476c
 * The reason for this is so that we can clock bits into four different
Packit c4476c
 * locations when doing simple scalar multiplies against the base point,
Packit c4476c
 * and then another four locations using the second 16 elements.
Packit c4476c
 *
Packit c4476c
 * Tables for other points have table[i] = iG for i in 0 .. 16. */
Packit c4476c
Packit c4476c
/* gmul is the table of precomputed base points */
Packit c4476c
static const smallfelem gmul[2][16][3] = {
Packit c4476c
    {{{0, 0, 0, 0},
Packit c4476c
      {0, 0, 0, 0},
Packit c4476c
      {0, 0, 0, 0}},
Packit c4476c
     {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2,
Packit c4476c
       0x6b17d1f2e12c4247},
Packit c4476c
      {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16,
Packit c4476c
       0x4fe342e2fe1a7f9b},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de,
Packit c4476c
       0x0fa822bc2811aaa5},
Packit c4476c
      {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b,
Packit c4476c
       0xbff44ae8f5dba80d},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789,
Packit c4476c
       0x300a4bbc89d6726f},
Packit c4476c
      {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f,
Packit c4476c
       0x72aac7e0d09b4644},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e,
Packit c4476c
       0x447d739beedb5e67},
Packit c4476c
      {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7,
Packit c4476c
       0x2d4825ab834131ee},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60,
Packit c4476c
       0xef9519328a9c72ff},
Packit c4476c
      {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c,
Packit c4476c
       0x611e9fc37dbb2c9b},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf,
Packit c4476c
       0x550663797b51f5d8},
Packit c4476c
      {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5,
Packit c4476c
       0x157164848aecb851},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391,
Packit c4476c
       0xeb5d7745b21141ea},
Packit c4476c
      {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee,
Packit c4476c
       0xeafd72ebdbecc17b},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5,
Packit c4476c
       0xa6d39677a7849276},
Packit c4476c
      {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf,
Packit c4476c
       0x674f84749b0b8816},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb,
Packit c4476c
       0x4e769e7672c9ddad},
Packit c4476c
      {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281,
Packit c4476c
       0x42b99082de830663},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478,
Packit c4476c
       0x78878ef61c6ce04d},
Packit c4476c
      {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def,
Packit c4476c
       0xb6cb3f5d7b72c321},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae,
Packit c4476c
       0x0c88bc4d716b1287},
Packit c4476c
      {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa,
Packit c4476c
       0xdd5ddea3f3901dc6},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3,
Packit c4476c
       0x68f344af6b317466},
Packit c4476c
      {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3,
Packit c4476c
       0x31b9c405f8540a20},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0,
Packit c4476c
       0x4052bf4b6f461db9},
Packit c4476c
      {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8,
Packit c4476c
       0xfecf4d5190b0fc61},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a,
Packit c4476c
       0x1eddbae2c802e41a},
Packit c4476c
      {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0,
Packit c4476c
       0x43104d86560ebcfc},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a,
Packit c4476c
       0xb48e26b484f7a21c},
Packit c4476c
      {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668,
Packit c4476c
       0xfac015404d4d3dab},
Packit c4476c
      {1, 0, 0, 0}}},
Packit c4476c
    {{{0, 0, 0, 0},
Packit c4476c
      {0, 0, 0, 0},
Packit c4476c
      {0, 0, 0, 0}},
Packit c4476c
     {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da,
Packit c4476c
       0x7fe36b40af22af89},
Packit c4476c
      {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1,
Packit c4476c
       0xe697d45825b63624},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902,
Packit c4476c
       0x4a5b506612a677a6},
Packit c4476c
      {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40,
Packit c4476c
       0xeb13461ceac089f1},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857,
Packit c4476c
       0x0781b8291c6a220a},
Packit c4476c
      {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434,
Packit c4476c
       0x690cde8df0151593},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326,
Packit c4476c
       0x8a535f566ec73617},
Packit c4476c
      {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf,
Packit c4476c
       0x0455c08468b08bd7},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279,
Packit c4476c
       0x06bada7ab77f8276},
Packit c4476c
      {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70,
Packit c4476c
       0x5b476dfd0e6cb18a},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8,
Packit c4476c
       0x3e29864e8a2ec908},
Packit c4476c
      {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed,
Packit c4476c
       0x239b90ea3dc31e7e},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4,
Packit c4476c
       0x820f4dd949f72ff7},
Packit c4476c
      {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3,
Packit c4476c
       0x140406ec783a05ec},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe,
Packit c4476c
       0x68f6b8542783dfee},
Packit c4476c
      {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028,
Packit c4476c
       0xcbe1feba92e40ce6},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927,
Packit c4476c
       0xd0b2f94d2f420109},
Packit c4476c
      {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a,
Packit c4476c
       0x971459828b0719e5},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687,
Packit c4476c
       0x961610004a866aba},
Packit c4476c
      {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c,
Packit c4476c
       0x7acb9fadcee75e44},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea,
Packit c4476c
       0x24eb9acca333bf5b},
Packit c4476c
      {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d,
Packit c4476c
       0x69f891c5acd079cc},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514,
Packit c4476c
       0xe51f547c5972a107},
Packit c4476c
      {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06,
Packit c4476c
       0x1c309a2b25bb1387},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828,
Packit c4476c
       0x20b87b8aa2c4e503},
Packit c4476c
      {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044,
Packit c4476c
       0xf5c6fa49919776be},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56,
Packit c4476c
       0x1ed7d1b9332010b9},
Packit c4476c
      {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24,
Packit c4476c
       0x3a2b03f03217257a},
Packit c4476c
      {1, 0, 0, 0}},
Packit c4476c
     {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b,
Packit c4476c
       0x15fee545c78dd9f6},
Packit c4476c
      {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb,
Packit c4476c
       0x4ab5b6b2b8753f81},
Packit c4476c
      {1, 0, 0, 0}}}
Packit c4476c
};
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * select_point selects the |idx|th point from a precomputation table and
Packit c4476c
 * copies it to out.
Packit c4476c
 */
Packit c4476c
static void select_point(const u64 idx, unsigned int size,
Packit c4476c
                         const smallfelem pre_comp[16][3], smallfelem out[3])
Packit c4476c
{
Packit c4476c
    unsigned i, j;
Packit c4476c
    u64 *outlimbs = &out[0][0];
Packit c4476c
Packit c4476c
    memset(out, 0, sizeof(*out) * 3);
Packit c4476c
Packit c4476c
    for (i = 0; i < size; i++) {
Packit c4476c
        const u64 *inlimbs = (u64 *)&pre_comp[i][0][0];
Packit c4476c
        u64 mask = i ^ idx;
Packit c4476c
        mask |= mask >> 4;
Packit c4476c
        mask |= mask >> 2;
Packit c4476c
        mask |= mask >> 1;
Packit c4476c
        mask &= 1;
Packit c4476c
        mask--;
Packit c4476c
        for (j = 0; j < NLIMBS * 3; j++)
Packit c4476c
            outlimbs[j] |= inlimbs[j] & mask;
Packit c4476c
    }
Packit c4476c
}
Packit c4476c
Packit c4476c
/* get_bit returns the |i|th bit in |in| */
Packit c4476c
static char get_bit(const felem_bytearray in, int i)
Packit c4476c
{
Packit c4476c
    if ((i < 0) || (i >= 256))
Packit c4476c
        return 0;
Packit c4476c
    return (in[i >> 3] >> (i & 7)) & 1;
Packit c4476c
}
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * Interleaved point multiplication using precomputed point multiples: The
Packit c4476c
 * small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[], the scalars
Packit c4476c
 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
Packit c4476c
 * generator, using certain (large) precomputed multiples in g_pre_comp.
Packit c4476c
 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
Packit c4476c
 */
Packit c4476c
static void batch_mul(felem x_out, felem y_out, felem z_out,
Packit c4476c
                      const felem_bytearray scalars[],
Packit c4476c
                      const unsigned num_points, const u8 *g_scalar,
Packit c4476c
                      const int mixed, const smallfelem pre_comp[][17][3],
Packit c4476c
                      const smallfelem g_pre_comp[2][16][3])
Packit c4476c
{
Packit c4476c
    int i, skip;
Packit c4476c
    unsigned num, gen_mul = (g_scalar != NULL);
Packit c4476c
    felem nq[3], ftmp;
Packit c4476c
    smallfelem tmp[3];
Packit c4476c
    u64 bits;
Packit c4476c
    u8 sign, digit;
Packit c4476c
Packit c4476c
    /* set nq to the point at infinity */
Packit c4476c
    memset(nq, 0, sizeof(nq));
Packit c4476c
Packit c4476c
    /*
Packit c4476c
     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
Packit c4476c
     * of the generator (two in each of the last 32 rounds) and additions of
Packit c4476c
     * other points multiples (every 5th round).
Packit c4476c
     */
Packit c4476c
    skip = 1;                   /* save two point operations in the first
Packit c4476c
                                 * round */
Packit c4476c
    for (i = (num_points ? 255 : 31); i >= 0; --i) {
Packit c4476c
        /* double */
Packit c4476c
        if (!skip)
Packit c4476c
            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
Packit c4476c
Packit c4476c
        /* add multiples of the generator */
Packit c4476c
        if (gen_mul && (i <= 31)) {
Packit c4476c
            /* first, look 32 bits upwards */
Packit c4476c
            bits = get_bit(g_scalar, i + 224) << 3;
Packit c4476c
            bits |= get_bit(g_scalar, i + 160) << 2;
Packit c4476c
            bits |= get_bit(g_scalar, i + 96) << 1;
Packit c4476c
            bits |= get_bit(g_scalar, i + 32);
Packit c4476c
            /* select the point to add, in constant time */
Packit c4476c
            select_point(bits, 16, g_pre_comp[1], tmp);
Packit c4476c
Packit c4476c
            if (!skip) {
Packit c4476c
                /* Arg 1 below is for "mixed" */
Packit c4476c
                point_add(nq[0], nq[1], nq[2],
Packit c4476c
                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
Packit c4476c
            } else {
Packit c4476c
                smallfelem_expand(nq[0], tmp[0]);
Packit c4476c
                smallfelem_expand(nq[1], tmp[1]);
Packit c4476c
                smallfelem_expand(nq[2], tmp[2]);
Packit c4476c
                skip = 0;
Packit c4476c
            }
Packit c4476c
Packit c4476c
            /* second, look at the current position */
Packit c4476c
            bits = get_bit(g_scalar, i + 192) << 3;
Packit c4476c
            bits |= get_bit(g_scalar, i + 128) << 2;
Packit c4476c
            bits |= get_bit(g_scalar, i + 64) << 1;
Packit c4476c
            bits |= get_bit(g_scalar, i);
Packit c4476c
            /* select the point to add, in constant time */
Packit c4476c
            select_point(bits, 16, g_pre_comp[0], tmp);
Packit c4476c
            /* Arg 1 below is for "mixed" */
Packit c4476c
            point_add(nq[0], nq[1], nq[2],
Packit c4476c
                      nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
Packit c4476c
        }
Packit c4476c
Packit c4476c
        /* do other additions every 5 doublings */
Packit c4476c
        if (num_points && (i % 5 == 0)) {
Packit c4476c
            /* loop over all scalars */
Packit c4476c
            for (num = 0; num < num_points; ++num) {
Packit c4476c
                bits = get_bit(scalars[num], i + 4) << 5;
Packit c4476c
                bits |= get_bit(scalars[num], i + 3) << 4;
Packit c4476c
                bits |= get_bit(scalars[num], i + 2) << 3;
Packit c4476c
                bits |= get_bit(scalars[num], i + 1) << 2;
Packit c4476c
                bits |= get_bit(scalars[num], i) << 1;
Packit c4476c
                bits |= get_bit(scalars[num], i - 1);
Packit c4476c
                ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
Packit c4476c
Packit c4476c
                /*
Packit c4476c
                 * select the point to add or subtract, in constant time
Packit c4476c
                 */
Packit c4476c
                select_point(digit, 17, pre_comp[num], tmp);
Packit c4476c
                smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative
Packit c4476c
                                               * point */
Packit c4476c
                copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
Packit c4476c
                felem_contract(tmp[1], ftmp);
Packit c4476c
Packit c4476c
                if (!skip) {
Packit c4476c
                    point_add(nq[0], nq[1], nq[2],
Packit c4476c
                              nq[0], nq[1], nq[2],
Packit c4476c
                              mixed, tmp[0], tmp[1], tmp[2]);
Packit c4476c
                } else {
Packit c4476c
                    smallfelem_expand(nq[0], tmp[0]);
Packit c4476c
                    smallfelem_expand(nq[1], tmp[1]);
Packit c4476c
                    smallfelem_expand(nq[2], tmp[2]);
Packit c4476c
                    skip = 0;
Packit c4476c
                }
Packit c4476c
            }
Packit c4476c
        }
Packit c4476c
    }
Packit c4476c
    felem_assign(x_out, nq[0]);
Packit c4476c
    felem_assign(y_out, nq[1]);
Packit c4476c
    felem_assign(z_out, nq[2]);
Packit c4476c
}
Packit c4476c
Packit c4476c
/* Precomputation for the group generator. */
Packit c4476c
struct nistp256_pre_comp_st {
Packit c4476c
    smallfelem g_pre_comp[2][16][3];
Packit c4476c
    CRYPTO_REF_COUNT references;
Packit c4476c
    CRYPTO_RWLOCK *lock;
Packit c4476c
};
Packit c4476c
Packit c4476c
const EC_METHOD *EC_GFp_nistp256_method(void)
Packit c4476c
{
Packit c4476c
    static const EC_METHOD ret = {
Packit c4476c
        EC_FLAGS_DEFAULT_OCT,
Packit c4476c
        NID_X9_62_prime_field,
Packit c4476c
        ec_GFp_nistp256_group_init,
Packit c4476c
        ec_GFp_simple_group_finish,
Packit c4476c
        ec_GFp_simple_group_clear_finish,
Packit c4476c
        ec_GFp_nist_group_copy,
Packit c4476c
        ec_GFp_nistp256_group_set_curve,
Packit c4476c
        ec_GFp_simple_group_get_curve,
Packit c4476c
        ec_GFp_simple_group_get_degree,
Packit c4476c
        ec_group_simple_order_bits,
Packit c4476c
        ec_GFp_simple_group_check_discriminant,
Packit c4476c
        ec_GFp_simple_point_init,
Packit c4476c
        ec_GFp_simple_point_finish,
Packit c4476c
        ec_GFp_simple_point_clear_finish,
Packit c4476c
        ec_GFp_simple_point_copy,
Packit c4476c
        ec_GFp_simple_point_set_to_infinity,
Packit c4476c
        ec_GFp_simple_set_Jprojective_coordinates_GFp,
Packit c4476c
        ec_GFp_simple_get_Jprojective_coordinates_GFp,
Packit c4476c
        ec_GFp_simple_point_set_affine_coordinates,
Packit c4476c
        ec_GFp_nistp256_point_get_affine_coordinates,
Packit c4476c
        0 /* point_set_compressed_coordinates */ ,
Packit c4476c
        0 /* point2oct */ ,
Packit c4476c
        0 /* oct2point */ ,
Packit c4476c
        ec_GFp_simple_add,
Packit c4476c
        ec_GFp_simple_dbl,
Packit c4476c
        ec_GFp_simple_invert,
Packit c4476c
        ec_GFp_simple_is_at_infinity,
Packit c4476c
        ec_GFp_simple_is_on_curve,
Packit c4476c
        ec_GFp_simple_cmp,
Packit c4476c
        ec_GFp_simple_make_affine,
Packit c4476c
        ec_GFp_simple_points_make_affine,
Packit c4476c
        ec_GFp_nistp256_points_mul,
Packit c4476c
        ec_GFp_nistp256_precompute_mult,
Packit c4476c
        ec_GFp_nistp256_have_precompute_mult,
Packit c4476c
        ec_GFp_nist_field_mul,
Packit c4476c
        ec_GFp_nist_field_sqr,
Packit c4476c
        0 /* field_div */ ,
Packit c4476c
        ec_GFp_simple_field_inv,
Packit c4476c
        0 /* field_encode */ ,
Packit c4476c
        0 /* field_decode */ ,
Packit c4476c
        0,                      /* field_set_to_one */
Packit c4476c
        ec_key_simple_priv2oct,
Packit c4476c
        ec_key_simple_oct2priv,
Packit c4476c
        0, /* set private */
Packit c4476c
        ec_key_simple_generate_key,
Packit c4476c
        ec_key_simple_check_key,
Packit c4476c
        ec_key_simple_generate_public_key,
Packit c4476c
        0, /* keycopy */
Packit c4476c
        0, /* keyfinish */
Packit c4476c
        ecdh_simple_compute_key,
Packit c4476c
        ecdsa_simple_sign_setup,
Packit c4476c
        ecdsa_simple_sign_sig,
Packit c4476c
        ecdsa_simple_verify_sig,
Packit c4476c
        0, /* field_inverse_mod_ord */
Packit c4476c
        0, /* blind_coordinates */
Packit c4476c
        0, /* ladder_pre */
Packit c4476c
        0, /* ladder_step */
Packit c4476c
        0  /* ladder_post */
Packit c4476c
    };
Packit c4476c
Packit c4476c
    return &ret;
Packit c4476c
}
Packit c4476c
Packit c4476c
/******************************************************************************/
Packit c4476c
/*
Packit c4476c
 * FUNCTIONS TO MANAGE PRECOMPUTATION
Packit c4476c
 */
Packit c4476c
Packit c4476c
static NISTP256_PRE_COMP *nistp256_pre_comp_new(void)
Packit c4476c
{
Packit c4476c
    NISTP256_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
Packit c4476c
Packit c4476c
    if (ret == NULL) {
Packit c4476c
        ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
Packit c4476c
        return ret;
Packit c4476c
    }
Packit c4476c
Packit c4476c
    ret->references = 1;
Packit c4476c
Packit c4476c
    ret->lock = CRYPTO_THREAD_lock_new();
Packit c4476c
    if (ret->lock == NULL) {
Packit c4476c
        ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
Packit c4476c
        OPENSSL_free(ret);
Packit c4476c
        return NULL;
Packit c4476c
    }
Packit c4476c
    return ret;
Packit c4476c
}
Packit c4476c
Packit c4476c
NISTP256_PRE_COMP *EC_nistp256_pre_comp_dup(NISTP256_PRE_COMP *p)
Packit c4476c
{
Packit c4476c
    int i;
Packit c4476c
    if (p != NULL)
Packit c4476c
        CRYPTO_UP_REF(&p->references, &i, p->lock);
Packit c4476c
    return p;
Packit c4476c
}
Packit c4476c
Packit c4476c
void EC_nistp256_pre_comp_free(NISTP256_PRE_COMP *pre)
Packit c4476c
{
Packit c4476c
    int i;
Packit c4476c
Packit c4476c
    if (pre == NULL)
Packit c4476c
        return;
Packit c4476c
Packit c4476c
    CRYPTO_DOWN_REF(&pre->references, &i, pre->lock);
Packit c4476c
    REF_PRINT_COUNT("EC_nistp256", x);
Packit c4476c
    if (i > 0)
Packit c4476c
        return;
Packit c4476c
    REF_ASSERT_ISNT(i < 0);
Packit c4476c
Packit c4476c
    CRYPTO_THREAD_lock_free(pre->lock);
Packit c4476c
    OPENSSL_free(pre);
Packit c4476c
}
Packit c4476c
Packit c4476c
/******************************************************************************/
Packit c4476c
/*
Packit c4476c
 * OPENSSL EC_METHOD FUNCTIONS
Packit c4476c
 */
Packit c4476c
Packit c4476c
int ec_GFp_nistp256_group_init(EC_GROUP *group)
Packit c4476c
{
Packit c4476c
    int ret;
Packit c4476c
    ret = ec_GFp_simple_group_init(group);
Packit c4476c
    group->a_is_minus3 = 1;
Packit c4476c
    return ret;
Packit c4476c
}
Packit c4476c
Packit c4476c
int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
Packit c4476c
                                    const BIGNUM *a, const BIGNUM *b,
Packit c4476c
                                    BN_CTX *ctx)
Packit c4476c
{
Packit c4476c
    int ret = 0;
Packit c4476c
    BN_CTX *new_ctx = NULL;
Packit c4476c
    BIGNUM *curve_p, *curve_a, *curve_b;
Packit c4476c
Packit c4476c
    if (ctx == NULL)
Packit c4476c
        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
Packit c4476c
            return 0;
Packit c4476c
    BN_CTX_start(ctx);
Packit c4476c
    curve_p = BN_CTX_get(ctx);
Packit c4476c
    curve_a = BN_CTX_get(ctx);
Packit c4476c
    curve_b = BN_CTX_get(ctx);
Packit c4476c
    if (curve_b == NULL)
Packit c4476c
        goto err;
Packit c4476c
    BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
Packit c4476c
    BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
Packit c4476c
    BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
Packit c4476c
    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
Packit c4476c
        ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
Packit c4476c
              EC_R_WRONG_CURVE_PARAMETERS);
Packit c4476c
        goto err;
Packit c4476c
    }
Packit c4476c
    group->field_mod_func = BN_nist_mod_256;
Packit c4476c
    ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
Packit c4476c
 err:
Packit c4476c
    BN_CTX_end(ctx);
Packit c4476c
    BN_CTX_free(new_ctx);
Packit c4476c
    return ret;
Packit c4476c
}
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
Packit c4476c
 * (X/Z^2, Y/Z^3)
Packit c4476c
 */
Packit c4476c
int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
Packit c4476c
                                                 const EC_POINT *point,
Packit c4476c
                                                 BIGNUM *x, BIGNUM *y,
Packit c4476c
                                                 BN_CTX *ctx)
Packit c4476c
{
Packit c4476c
    felem z1, z2, x_in, y_in;
Packit c4476c
    smallfelem x_out, y_out;
Packit c4476c
    longfelem tmp;
Packit c4476c
Packit c4476c
    if (EC_POINT_is_at_infinity(group, point)) {
Packit c4476c
        ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
Packit c4476c
              EC_R_POINT_AT_INFINITY);
Packit c4476c
        return 0;
Packit c4476c
    }
Packit c4476c
    if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
Packit c4476c
        (!BN_to_felem(z1, point->Z)))
Packit c4476c
        return 0;
Packit c4476c
    felem_inv(z2, z1);
Packit c4476c
    felem_square(tmp, z2);
Packit c4476c
    felem_reduce(z1, tmp);
Packit c4476c
    felem_mul(tmp, x_in, z1);
Packit c4476c
    felem_reduce(x_in, tmp);
Packit c4476c
    felem_contract(x_out, x_in);
Packit c4476c
    if (x != NULL) {
Packit c4476c
        if (!smallfelem_to_BN(x, x_out)) {
Packit c4476c
            ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
Packit c4476c
                  ERR_R_BN_LIB);
Packit c4476c
            return 0;
Packit c4476c
        }
Packit c4476c
    }
Packit c4476c
    felem_mul(tmp, z1, z2);
Packit c4476c
    felem_reduce(z1, tmp);
Packit c4476c
    felem_mul(tmp, y_in, z1);
Packit c4476c
    felem_reduce(y_in, tmp);
Packit c4476c
    felem_contract(y_out, y_in);
Packit c4476c
    if (y != NULL) {
Packit c4476c
        if (!smallfelem_to_BN(y, y_out)) {
Packit c4476c
            ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
Packit c4476c
                  ERR_R_BN_LIB);
Packit c4476c
            return 0;
Packit c4476c
        }
Packit c4476c
    }
Packit c4476c
    return 1;
Packit c4476c
}
Packit c4476c
Packit c4476c
/* points below is of size |num|, and tmp_smallfelems is of size |num+1| */
Packit c4476c
static void make_points_affine(size_t num, smallfelem points[][3],
Packit c4476c
                               smallfelem tmp_smallfelems[])
Packit c4476c
{
Packit c4476c
    /*
Packit c4476c
     * Runs in constant time, unless an input is the point at infinity (which
Packit c4476c
     * normally shouldn't happen).
Packit c4476c
     */
Packit c4476c
    ec_GFp_nistp_points_make_affine_internal(num,
Packit c4476c
                                             points,
Packit c4476c
                                             sizeof(smallfelem),
Packit c4476c
                                             tmp_smallfelems,
Packit c4476c
                                             (void (*)(void *))smallfelem_one,
Packit c4476c
                                             smallfelem_is_zero_int,
Packit c4476c
                                             (void (*)(void *, const void *))
Packit c4476c
                                             smallfelem_assign,
Packit c4476c
                                             (void (*)(void *, const void *))
Packit c4476c
                                             smallfelem_square_contract,
Packit c4476c
                                             (void (*)
Packit c4476c
                                              (void *, const void *,
Packit c4476c
                                               const void *))
Packit c4476c
                                             smallfelem_mul_contract,
Packit c4476c
                                             (void (*)(void *, const void *))
Packit c4476c
                                             smallfelem_inv_contract,
Packit c4476c
                                             /* nothing to contract */
Packit c4476c
                                             (void (*)(void *, const void *))
Packit c4476c
                                             smallfelem_assign);
Packit c4476c
}
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
Packit c4476c
 * values Result is stored in r (r can equal one of the inputs).
Packit c4476c
 */
Packit c4476c
int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
Packit c4476c
                               const BIGNUM *scalar, size_t num,
Packit c4476c
                               const EC_POINT *points[],
Packit c4476c
                               const BIGNUM *scalars[], BN_CTX *ctx)
Packit c4476c
{
Packit c4476c
    int ret = 0;
Packit c4476c
    int j;
Packit c4476c
    int mixed = 0;
Packit c4476c
    BIGNUM *x, *y, *z, *tmp_scalar;
Packit c4476c
    felem_bytearray g_secret;
Packit c4476c
    felem_bytearray *secrets = NULL;
Packit c4476c
    smallfelem (*pre_comp)[17][3] = NULL;
Packit c4476c
    smallfelem *tmp_smallfelems = NULL;
Packit c4476c
    unsigned i;
Packit c4476c
    int num_bytes;
Packit c4476c
    int have_pre_comp = 0;
Packit c4476c
    size_t num_points = num;
Packit c4476c
    smallfelem x_in, y_in, z_in;
Packit c4476c
    felem x_out, y_out, z_out;
Packit c4476c
    NISTP256_PRE_COMP *pre = NULL;
Packit c4476c
    const smallfelem(*g_pre_comp)[16][3] = NULL;
Packit c4476c
    EC_POINT *generator = NULL;
Packit c4476c
    const EC_POINT *p = NULL;
Packit c4476c
    const BIGNUM *p_scalar = NULL;
Packit c4476c
Packit c4476c
    BN_CTX_start(ctx);
Packit c4476c
    x = BN_CTX_get(ctx);
Packit c4476c
    y = BN_CTX_get(ctx);
Packit c4476c
    z = BN_CTX_get(ctx);
Packit c4476c
    tmp_scalar = BN_CTX_get(ctx);
Packit c4476c
    if (tmp_scalar == NULL)
Packit c4476c
        goto err;
Packit c4476c
Packit c4476c
    if (scalar != NULL) {
Packit c4476c
        pre = group->pre_comp.nistp256;
Packit c4476c
        if (pre)
Packit c4476c
            /* we have precomputation, try to use it */
Packit c4476c
            g_pre_comp = (const smallfelem(*)[16][3])pre->g_pre_comp;
Packit c4476c
        else
Packit c4476c
            /* try to use the standard precomputation */
Packit c4476c
            g_pre_comp = &gmul[0];
Packit c4476c
        generator = EC_POINT_new(group);
Packit c4476c
        if (generator == NULL)
Packit c4476c
            goto err;
Packit c4476c
        /* get the generator from precomputation */
Packit c4476c
        if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
Packit c4476c
            !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
Packit c4476c
            !smallfelem_to_BN(z, g_pre_comp[0][1][2])) {
Packit c4476c
            ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
Packit c4476c
            goto err;
Packit c4476c
        }
Packit c4476c
        if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
Packit c4476c
                                                      generator, x, y, z,
Packit c4476c
                                                      ctx))
Packit c4476c
            goto err;
Packit c4476c
        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
Packit c4476c
            /* precomputation matches generator */
Packit c4476c
            have_pre_comp = 1;
Packit c4476c
        else
Packit c4476c
            /*
Packit c4476c
             * we don't have valid precomputation: treat the generator as a
Packit c4476c
             * random point
Packit c4476c
             */
Packit c4476c
            num_points++;
Packit c4476c
    }
Packit c4476c
    if (num_points > 0) {
Packit c4476c
        if (num_points >= 3) {
Packit c4476c
            /*
Packit c4476c
             * unless we precompute multiples for just one or two points,
Packit c4476c
             * converting those into affine form is time well spent
Packit c4476c
             */
Packit c4476c
            mixed = 1;
Packit c4476c
        }
Packit c4476c
        secrets = OPENSSL_malloc(sizeof(*secrets) * num_points);
Packit c4476c
        pre_comp = OPENSSL_malloc(sizeof(*pre_comp) * num_points);
Packit c4476c
        if (mixed)
Packit c4476c
            tmp_smallfelems =
Packit c4476c
              OPENSSL_malloc(sizeof(*tmp_smallfelems) * (num_points * 17 + 1));
Packit c4476c
        if ((secrets == NULL) || (pre_comp == NULL)
Packit c4476c
            || (mixed && (tmp_smallfelems == NULL))) {
Packit c4476c
            ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
Packit c4476c
            goto err;
Packit c4476c
        }
Packit c4476c
Packit c4476c
        /*
Packit c4476c
         * we treat NULL scalars as 0, and NULL points as points at infinity,
Packit c4476c
         * i.e., they contribute nothing to the linear combination
Packit c4476c
         */
Packit c4476c
        memset(secrets, 0, sizeof(*secrets) * num_points);
Packit c4476c
        memset(pre_comp, 0, sizeof(*pre_comp) * num_points);
Packit c4476c
        for (i = 0; i < num_points; ++i) {
Packit c4476c
            if (i == num) {
Packit c4476c
                /*
Packit c4476c
                 * we didn't have a valid precomputation, so we pick the
Packit c4476c
                 * generator
Packit c4476c
                 */
Packit c4476c
                p = EC_GROUP_get0_generator(group);
Packit c4476c
                p_scalar = scalar;
Packit c4476c
            } else {
Packit c4476c
                /* the i^th point */
Packit c4476c
                p = points[i];
Packit c4476c
                p_scalar = scalars[i];
Packit c4476c
            }
Packit c4476c
            if ((p_scalar != NULL) && (p != NULL)) {
Packit c4476c
                /* reduce scalar to 0 <= scalar < 2^256 */
Packit c4476c
                if ((BN_num_bits(p_scalar) > 256)
Packit c4476c
                    || (BN_is_negative(p_scalar))) {
Packit c4476c
                    /*
Packit c4476c
                     * this is an unusual input, and we don't guarantee
Packit c4476c
                     * constant-timeness
Packit c4476c
                     */
Packit c4476c
                    if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
Packit c4476c
                        ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
Packit c4476c
                        goto err;
Packit c4476c
                    }
Packit c4476c
                    num_bytes = BN_bn2lebinpad(tmp_scalar,
Packit c4476c
                                               secrets[i], sizeof(secrets[i]));
Packit c4476c
                } else {
Packit c4476c
                    num_bytes = BN_bn2lebinpad(p_scalar,
Packit c4476c
                                               secrets[i], sizeof(secrets[i]));
Packit c4476c
                }
Packit c4476c
                if (num_bytes < 0) {
Packit c4476c
                    ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
Packit c4476c
                    goto err;
Packit c4476c
                }
Packit c4476c
                /* precompute multiples */
Packit c4476c
                if ((!BN_to_felem(x_out, p->X)) ||
Packit c4476c
                    (!BN_to_felem(y_out, p->Y)) ||
Packit c4476c
                    (!BN_to_felem(z_out, p->Z)))
Packit c4476c
                    goto err;
Packit c4476c
                felem_shrink(pre_comp[i][1][0], x_out);
Packit c4476c
                felem_shrink(pre_comp[i][1][1], y_out);
Packit c4476c
                felem_shrink(pre_comp[i][1][2], z_out);
Packit c4476c
                for (j = 2; j <= 16; ++j) {
Packit c4476c
                    if (j & 1) {
Packit c4476c
                        point_add_small(pre_comp[i][j][0], pre_comp[i][j][1],
Packit c4476c
                                        pre_comp[i][j][2], pre_comp[i][1][0],
Packit c4476c
                                        pre_comp[i][1][1], pre_comp[i][1][2],
Packit c4476c
                                        pre_comp[i][j - 1][0],
Packit c4476c
                                        pre_comp[i][j - 1][1],
Packit c4476c
                                        pre_comp[i][j - 1][2]);
Packit c4476c
                    } else {
Packit c4476c
                        point_double_small(pre_comp[i][j][0],
Packit c4476c
                                           pre_comp[i][j][1],
Packit c4476c
                                           pre_comp[i][j][2],
Packit c4476c
                                           pre_comp[i][j / 2][0],
Packit c4476c
                                           pre_comp[i][j / 2][1],
Packit c4476c
                                           pre_comp[i][j / 2][2]);
Packit c4476c
                    }
Packit c4476c
                }
Packit c4476c
            }
Packit c4476c
        }
Packit c4476c
        if (mixed)
Packit c4476c
            make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
Packit c4476c
    }
Packit c4476c
Packit c4476c
    /* the scalar for the generator */
Packit c4476c
    if ((scalar != NULL) && (have_pre_comp)) {
Packit c4476c
        memset(g_secret, 0, sizeof(g_secret));
Packit c4476c
        /* reduce scalar to 0 <= scalar < 2^256 */
Packit c4476c
        if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar))) {
Packit c4476c
            /*
Packit c4476c
             * this is an unusual input, and we don't guarantee
Packit c4476c
             * constant-timeness
Packit c4476c
             */
Packit c4476c
            if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
Packit c4476c
                ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
Packit c4476c
                goto err;
Packit c4476c
            }
Packit c4476c
            num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
Packit c4476c
        } else {
Packit c4476c
            num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
Packit c4476c
        }
Packit c4476c
        /* do the multiplication with generator precomputation */
Packit c4476c
        batch_mul(x_out, y_out, z_out,
Packit c4476c
                  (const felem_bytearray(*))secrets, num_points,
Packit c4476c
                  g_secret,
Packit c4476c
                  mixed, (const smallfelem(*)[17][3])pre_comp, g_pre_comp);
Packit c4476c
    } else {
Packit c4476c
        /* do the multiplication without generator precomputation */
Packit c4476c
        batch_mul(x_out, y_out, z_out,
Packit c4476c
                  (const felem_bytearray(*))secrets, num_points,
Packit c4476c
                  NULL, mixed, (const smallfelem(*)[17][3])pre_comp, NULL);
Packit c4476c
    }
Packit c4476c
    /* reduce the output to its unique minimal representation */
Packit c4476c
    felem_contract(x_in, x_out);
Packit c4476c
    felem_contract(y_in, y_out);
Packit c4476c
    felem_contract(z_in, z_out);
Packit c4476c
    if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
Packit c4476c
        (!smallfelem_to_BN(z, z_in))) {
Packit c4476c
        ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
Packit c4476c
        goto err;
Packit c4476c
    }
Packit c4476c
    ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
Packit c4476c
Packit c4476c
 err:
Packit c4476c
    BN_CTX_end(ctx);
Packit c4476c
    EC_POINT_free(generator);
Packit c4476c
    OPENSSL_free(secrets);
Packit c4476c
    OPENSSL_free(pre_comp);
Packit c4476c
    OPENSSL_free(tmp_smallfelems);
Packit c4476c
    return ret;
Packit c4476c
}
Packit c4476c
Packit c4476c
int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
Packit c4476c
{
Packit c4476c
    int ret = 0;
Packit c4476c
    NISTP256_PRE_COMP *pre = NULL;
Packit c4476c
    int i, j;
Packit c4476c
    BN_CTX *new_ctx = NULL;
Packit c4476c
    BIGNUM *x, *y;
Packit c4476c
    EC_POINT *generator = NULL;
Packit c4476c
    smallfelem tmp_smallfelems[32];
Packit c4476c
    felem x_tmp, y_tmp, z_tmp;
Packit c4476c
Packit c4476c
    /* throw away old precomputation */
Packit c4476c
    EC_pre_comp_free(group);
Packit c4476c
    if (ctx == NULL)
Packit c4476c
        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
Packit c4476c
            return 0;
Packit c4476c
    BN_CTX_start(ctx);
Packit c4476c
    x = BN_CTX_get(ctx);
Packit c4476c
    y = BN_CTX_get(ctx);
Packit c4476c
    if (y == NULL)
Packit c4476c
        goto err;
Packit c4476c
    /* get the generator */
Packit c4476c
    if (group->generator == NULL)
Packit c4476c
        goto err;
Packit c4476c
    generator = EC_POINT_new(group);
Packit c4476c
    if (generator == NULL)
Packit c4476c
        goto err;
Packit c4476c
    BN_bin2bn(nistp256_curve_params[3], sizeof(felem_bytearray), x);
Packit c4476c
    BN_bin2bn(nistp256_curve_params[4], sizeof(felem_bytearray), y);
Packit c4476c
    if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
Packit c4476c
        goto err;
Packit c4476c
    if ((pre = nistp256_pre_comp_new()) == NULL)
Packit c4476c
        goto err;
Packit c4476c
    /*
Packit c4476c
     * if the generator is the standard one, use built-in precomputation
Packit c4476c
     */
Packit c4476c
    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
Packit c4476c
        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
Packit c4476c
        goto done;
Packit c4476c
    }
Packit c4476c
    if ((!BN_to_felem(x_tmp, group->generator->X)) ||
Packit c4476c
        (!BN_to_felem(y_tmp, group->generator->Y)) ||
Packit c4476c
        (!BN_to_felem(z_tmp, group->generator->Z)))
Packit c4476c
        goto err;
Packit c4476c
    felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
Packit c4476c
    felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
Packit c4476c
    felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
Packit c4476c
    /*
Packit c4476c
     * compute 2^64*G, 2^128*G, 2^192*G for the first table, 2^32*G, 2^96*G,
Packit c4476c
     * 2^160*G, 2^224*G for the second one
Packit c4476c
     */
Packit c4476c
    for (i = 1; i <= 8; i <<= 1) {
Packit c4476c
        point_double_small(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
Packit c4476c
                           pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
Packit c4476c
                           pre->g_pre_comp[0][i][1],
Packit c4476c
                           pre->g_pre_comp[0][i][2]);
Packit c4476c
        for (j = 0; j < 31; ++j) {
Packit c4476c
            point_double_small(pre->g_pre_comp[1][i][0],
Packit c4476c
                               pre->g_pre_comp[1][i][1],
Packit c4476c
                               pre->g_pre_comp[1][i][2],
Packit c4476c
                               pre->g_pre_comp[1][i][0],
Packit c4476c
                               pre->g_pre_comp[1][i][1],
Packit c4476c
                               pre->g_pre_comp[1][i][2]);
Packit c4476c
        }
Packit c4476c
        if (i == 8)
Packit c4476c
            break;
Packit c4476c
        point_double_small(pre->g_pre_comp[0][2 * i][0],
Packit c4476c
                           pre->g_pre_comp[0][2 * i][1],
Packit c4476c
                           pre->g_pre_comp[0][2 * i][2],
Packit c4476c
                           pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
Packit c4476c
                           pre->g_pre_comp[1][i][2]);
Packit c4476c
        for (j = 0; j < 31; ++j) {
Packit c4476c
            point_double_small(pre->g_pre_comp[0][2 * i][0],
Packit c4476c
                               pre->g_pre_comp[0][2 * i][1],
Packit c4476c
                               pre->g_pre_comp[0][2 * i][2],
Packit c4476c
                               pre->g_pre_comp[0][2 * i][0],
Packit c4476c
                               pre->g_pre_comp[0][2 * i][1],
Packit c4476c
                               pre->g_pre_comp[0][2 * i][2]);
Packit c4476c
        }
Packit c4476c
    }
Packit c4476c
    for (i = 0; i < 2; i++) {
Packit c4476c
        /* g_pre_comp[i][0] is the point at infinity */
Packit c4476c
        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
Packit c4476c
        /* the remaining multiples */
Packit c4476c
        /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
Packit c4476c
        point_add_small(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
Packit c4476c
                        pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
Packit c4476c
                        pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
Packit c4476c
                        pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
Packit c4476c
                        pre->g_pre_comp[i][2][2]);
Packit c4476c
        /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
Packit c4476c
        point_add_small(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
Packit c4476c
                        pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
Packit c4476c
                        pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
Packit c4476c
                        pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
Packit c4476c
                        pre->g_pre_comp[i][2][2]);
Packit c4476c
        /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
Packit c4476c
        point_add_small(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
Packit c4476c
                        pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
Packit c4476c
                        pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
Packit c4476c
                        pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
Packit c4476c
                        pre->g_pre_comp[i][4][2]);
Packit c4476c
        /*
Packit c4476c
         * 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G
Packit c4476c
         */
Packit c4476c
        point_add_small(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
Packit c4476c
                        pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
Packit c4476c
                        pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
Packit c4476c
                        pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
Packit c4476c
                        pre->g_pre_comp[i][2][2]);
Packit c4476c
        for (j = 1; j < 8; ++j) {
Packit c4476c
            /* odd multiples: add G resp. 2^32*G */
Packit c4476c
            point_add_small(pre->g_pre_comp[i][2 * j + 1][0],
Packit c4476c
                            pre->g_pre_comp[i][2 * j + 1][1],
Packit c4476c
                            pre->g_pre_comp[i][2 * j + 1][2],
Packit c4476c
                            pre->g_pre_comp[i][2 * j][0],
Packit c4476c
                            pre->g_pre_comp[i][2 * j][1],
Packit c4476c
                            pre->g_pre_comp[i][2 * j][2],
Packit c4476c
                            pre->g_pre_comp[i][1][0],
Packit c4476c
                            pre->g_pre_comp[i][1][1],
Packit c4476c
                            pre->g_pre_comp[i][1][2]);
Packit c4476c
        }
Packit c4476c
    }
Packit c4476c
    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
Packit c4476c
Packit c4476c
 done:
Packit c4476c
    SETPRECOMP(group, nistp256, pre);
Packit c4476c
    pre = NULL;
Packit c4476c
    ret = 1;
Packit c4476c
Packit c4476c
 err:
Packit c4476c
    BN_CTX_end(ctx);
Packit c4476c
    EC_POINT_free(generator);
Packit c4476c
    BN_CTX_free(new_ctx);
Packit c4476c
    EC_nistp256_pre_comp_free(pre);
Packit c4476c
    return ret;
Packit c4476c
}
Packit c4476c
Packit c4476c
int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
Packit c4476c
{
Packit c4476c
    return HAVEPRECOMP(group, nistp256);
Packit c4476c
}
Packit c4476c
#endif