Blame mpn/generic/sqrlo_basecase.c

Packit 5c3484
/* mpn_sqrlo_basecase -- Internal routine to square a natural number
Packit 5c3484
   of length n.
Packit 5c3484
Packit 5c3484
   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
Packit 5c3484
Packit 5c3484
Packit 5c3484
Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2015
Packit 5c3484
Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
#ifndef SQRLO_SHORTCUT_MULTIPLICATIONS
Packit 5c3484
#if HAVE_NATIVE_mpn_addmul_1
Packit 5c3484
#define SQRLO_SHORTCUT_MULTIPLICATIONS 0
Packit 5c3484
#else
Packit 5c3484
#define SQRLO_SHORTCUT_MULTIPLICATIONS 1
Packit 5c3484
#endif
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if HAVE_NATIVE_mpn_sqr_diagonal
Packit 5c3484
#define MPN_SQR_DIAGONAL(rp, up, n)					\
Packit 5c3484
  mpn_sqr_diagonal (rp, up, n)
Packit 5c3484
#else
Packit 5c3484
#define MPN_SQR_DIAGONAL(rp, up, n)					\
Packit 5c3484
  do {									\
Packit 5c3484
    mp_size_t _i;							\
Packit 5c3484
    for (_i = 0; _i < (n); _i++)					\
Packit 5c3484
      {									\
Packit 5c3484
	mp_limb_t ul, lpl;						\
Packit 5c3484
	ul = (up)[_i];							\
Packit 5c3484
	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
Packit 5c3484
	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
Packit 5c3484
      }									\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#define MPN_SQRLO_DIAGONAL(rp, up, n)					\
Packit 5c3484
  do {									\
Packit 5c3484
    mp_size_t nhalf;							\
Packit 5c3484
    nhalf = (n) >> 1;							\
Packit 5c3484
    MPN_SQR_DIAGONAL ((rp), (up), nhalf);				\
Packit 5c3484
    if (((n) & 1) != 0)							\
Packit 5c3484
      {									\
Packit 5c3484
	mp_limb_t op;							\
Packit 5c3484
	op = (up)[nhalf];						\
Packit 5c3484
	(rp)[(n) - 1] = (op * op) & GMP_NUMB_MASK;			\
Packit 5c3484
      }									\
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
#if HAVE_NATIVE_mpn_addlsh1_n_ip1
Packit 5c3484
#define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
Packit 5c3484
  do {									\
Packit 5c3484
    MPN_SQRLO_DIAGONAL((rp), (up), (n));				\
Packit 5c3484
    mpn_addlsh1_n_ip1 ((rp) + 1, (tp), (n) - 1);			\
Packit 5c3484
  } while (0)
Packit 5c3484
#else
Packit 5c3484
#define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
Packit 5c3484
  do {									\
Packit 5c3484
    MPN_SQRLO_DIAGONAL((rp), (up), (n));				\
Packit 5c3484
    mpn_lshift ((tp), (tp), (n) - 1, 1);				\
Packit 5c3484
    mpn_add_n ((rp) + 1, (rp) + 1, (tp), (n) - 1);			\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* Avoid zero allocations when SQRLO_LO_THRESHOLD is 0 (this code not used). */
Packit 5c3484
#define SQRLO_BASECASE_ALLOC						\
Packit 5c3484
  (SQRLO_DC_THRESHOLD_LIMIT < 2 ? 1 : SQRLO_DC_THRESHOLD_LIMIT - 1)
Packit 5c3484
Packit 5c3484
/* Default mpn_sqrlo_basecase using mpn_addmul_1.  */
Packit 5c3484
#ifndef SQRLO_SPECIAL_CASES
Packit 5c3484
#define SQRLO_SPECIAL_CASES 2
Packit 5c3484
#endif
Packit 5c3484
void
Packit 5c3484
mpn_sqrlo_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t ul;
Packit 5c3484
Packit 5c3484
  ASSERT (n >= 1);
Packit 5c3484
  ASSERT (! MPN_OVERLAP_P (rp, n, up, n));
Packit 5c3484
Packit 5c3484
  ul = up[0];
Packit 5c3484
Packit 5c3484
  if (n <= SQRLO_SPECIAL_CASES)
Packit 5c3484
    {
Packit 5c3484
#if SQRLO_SPECIAL_CASES == 1
Packit 5c3484
      rp[0] = (ul * ul) & GMP_NUMB_MASK;
Packit 5c3484
#else
Packit 5c3484
      if (n == 1)
Packit 5c3484
	rp[0] = (ul * ul) & GMP_NUMB_MASK;
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t hi, lo, ul1;
Packit 5c3484
	  umul_ppmm (hi, lo, ul, ul << GMP_NAIL_BITS);
Packit 5c3484
	  rp[0] = lo >> GMP_NAIL_BITS;
Packit 5c3484
	  ul1 = up[1];
Packit 5c3484
#if SQRLO_SPECIAL_CASES == 2
Packit 5c3484
	  rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
Packit 5c3484
#else
Packit 5c3484
	  if (n == 2)
Packit 5c3484
	    rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
	      mp_limb_t hi1;
Packit 5c3484
#if GMP_NAIL_BITS != 0
Packit 5c3484
	      ul <<= 1;
Packit 5c3484
#endif
Packit 5c3484
	      umul_ppmm (hi1, lo, ul1 << GMP_NAIL_BITS, ul);
Packit 5c3484
	      hi1 += ul * up[2];
Packit 5c3484
#if GMP_NAIL_BITS == 0
Packit 5c3484
	      hi1 = (hi1 << 1) | (lo >> (GMP_LIMB_BITS - 1));
Packit 5c3484
	      add_ssaaaa(rp[2], rp[1], hi1, lo << 1, ul1 * ul1, hi);
Packit 5c3484
#else
Packit 5c3484
	      hi += lo >> GMP_NAIL_BITS;
Packit 5c3484
	      rp[1] = hi & GMP_NUMB_MASK;
Packit 5c3484
	      rp[2] = (hi1 + ul1 * ul1 + (hi >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;
Packit 5c3484
#endif
Packit 5c3484
	    }
Packit 5c3484
#endif
Packit 5c3484
	}
Packit 5c3484
#endif
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      mp_limb_t tp[SQRLO_BASECASE_ALLOC];
Packit 5c3484
      mp_size_t i;
Packit 5c3484
Packit 5c3484
      /* must fit n-1 limbs in tp */
Packit 5c3484
      ASSERT (n <= SQRLO_DC_THRESHOLD_LIMIT);
Packit 5c3484
Packit 5c3484
      --n;
Packit 5c3484
#if SQRLO_SHORTCUT_MULTIPLICATIONS
Packit 5c3484
      {
Packit 5c3484
	mp_limb_t cy;
Packit 5c3484
Packit 5c3484
	cy = ul * up[n] + mpn_mul_1 (tp, up + 1, n - 1, ul);
Packit 5c3484
	for (i = 1; 2 * i + 1 < n; ++i)
Packit 5c3484
	  {
Packit 5c3484
	    ul = up[i];
Packit 5c3484
	    cy += ul * up[n - i] + mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i - 1, ul);
Packit 5c3484
	  }
Packit 5c3484
	tp [n-1] = (cy + ((n & 1)?up[i] * up[i + 1]:0)) & GMP_NUMB_MASK;
Packit 5c3484
      }
Packit 5c3484
#else
Packit 5c3484
      mpn_mul_1 (tp, up + 1, n, ul);
Packit 5c3484
      for (i = 1; 2 * i < n; ++i)
Packit 5c3484
	mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i, up[i]);
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
      MPN_SQRLO_DIAG_ADDLSH1 (rp, tp, up, n + 1);
Packit 5c3484
    }
Packit 5c3484
}
Packit 5c3484
#undef SQRLO_SPECIAL_CASES