Blame stdlib/mul_n.c

Packit 6c4009
/* mpn_mul_n -- Multiply two natural numbers of length n.
Packit 6c4009
Packit 6c4009
Copyright (C) 1991-2018 Free Software Foundation, Inc.
Packit 6c4009
Packit 6c4009
This file is part of the GNU MP Library.
Packit 6c4009
Packit 6c4009
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 6c4009
it under the terms of the GNU Lesser General Public License as published by
Packit 6c4009
the Free Software Foundation; either version 2.1 of the License, or (at your
Packit 6c4009
option) any later version.
Packit 6c4009
Packit 6c4009
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 6c4009
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 6c4009
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
Packit 6c4009
License for more details.
Packit 6c4009
Packit 6c4009
You should have received a copy of the GNU Lesser General Public License
Packit 6c4009
along with the GNU MP Library; see the file COPYING.LIB.  If not, see
Packit 6c4009
<http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <gmp.h>
Packit 6c4009
#include "gmp-impl.h"
Packit 6c4009
Packit 6c4009
/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
Packit 6c4009
   both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
Packit 6c4009
   always stored.  Return the most significant limb.
Packit 6c4009
Packit 6c4009
   Argument constraints:
Packit 6c4009
   1. PRODP != UP and PRODP != VP, i.e. the destination
Packit 6c4009
      must be distinct from the multiplier and the multiplicand.  */
Packit 6c4009
Packit 6c4009
/* If KARATSUBA_THRESHOLD is not already defined, define it to a
Packit 6c4009
   value which is good on most machines.  */
Packit 6c4009
#ifndef KARATSUBA_THRESHOLD
Packit 6c4009
#define KARATSUBA_THRESHOLD 32
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
/* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
Packit 6c4009
#if KARATSUBA_THRESHOLD < 2
Packit 6c4009
#undef KARATSUBA_THRESHOLD
Packit 6c4009
#define KARATSUBA_THRESHOLD 2
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
/* Handle simple cases with traditional multiplication.
Packit 6c4009
Packit 6c4009
   This is the most critical code of multiplication.  All multiplies rely
Packit 6c4009
   on this, both small and huge.  Small ones arrive here immediately.  Huge
Packit 6c4009
   ones arrive here as this is the base case for Karatsuba's recursive
Packit 6c4009
   algorithm below.  */
Packit 6c4009
Packit 6c4009
void
Packit 6c4009
impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
Packit 6c4009
{
Packit 6c4009
  mp_size_t i;
Packit 6c4009
  mp_limb_t cy_limb;
Packit 6c4009
  mp_limb_t v_limb;
Packit 6c4009
Packit 6c4009
  /* Multiply by the first limb in V separately, as the result can be
Packit 6c4009
     stored (not added) to PROD.  We also avoid a loop for zeroing.  */
Packit 6c4009
  v_limb = vp[0];
Packit 6c4009
  if (v_limb <= 1)
Packit 6c4009
    {
Packit 6c4009
      if (v_limb == 1)
Packit 6c4009
	MPN_COPY (prodp, up, size);
Packit 6c4009
      else
Packit 6c4009
	MPN_ZERO (prodp, size);
Packit 6c4009
      cy_limb = 0;
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
Packit 6c4009
Packit 6c4009
  prodp[size] = cy_limb;
Packit 6c4009
  prodp++;
Packit 6c4009
Packit 6c4009
  /* For each iteration in the outer loop, multiply one limb from
Packit 6c4009
     U with one limb from V, and add it to PROD.  */
Packit 6c4009
  for (i = 1; i < size; i++)
Packit 6c4009
    {
Packit 6c4009
      v_limb = vp[i];
Packit 6c4009
      if (v_limb <= 1)
Packit 6c4009
	{
Packit 6c4009
	  cy_limb = 0;
Packit 6c4009
	  if (v_limb == 1)
Packit 6c4009
	    cy_limb = mpn_add_n (prodp, prodp, up, size);
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
Packit 6c4009
Packit 6c4009
      prodp[size] = cy_limb;
Packit 6c4009
      prodp++;
Packit 6c4009
    }
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
void
Packit 6c4009
impn_mul_n (mp_ptr prodp,
Packit 6c4009
	     mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
Packit 6c4009
{
Packit 6c4009
  if ((size & 1) != 0)
Packit 6c4009
    {
Packit 6c4009
      /* The size is odd, the code code below doesn't handle that.
Packit 6c4009
	 Multiply the least significant (size - 1) limbs with a recursive
Packit 6c4009
	 call, and handle the most significant limb of S1 and S2
Packit 6c4009
	 separately.  */
Packit 6c4009
      /* A slightly faster way to do this would be to make the Karatsuba
Packit 6c4009
	 code below behave as if the size were even, and let it check for
Packit 6c4009
	 odd size in the end.  I.e., in essence move this code to the end.
Packit 6c4009
	 Doing so would save us a recursive call, and potentially make the
Packit 6c4009
	 stack grow a lot less.  */
Packit 6c4009
Packit 6c4009
      mp_size_t esize = size - 1;	/* even size */
Packit 6c4009
      mp_limb_t cy_limb;
Packit 6c4009
Packit 6c4009
      MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
Packit 6c4009
      cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
Packit 6c4009
      prodp[esize + esize] = cy_limb;
Packit 6c4009
      cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
Packit 6c4009
Packit 6c4009
      prodp[esize + size] = cy_limb;
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
Packit 6c4009
Packit 6c4009
	 Split U in two pieces, U1 and U0, such that
Packit 6c4009
	 U = U0 + U1*(B**n),
Packit 6c4009
	 and V in V1 and V0, such that
Packit 6c4009
	 V = V0 + V1*(B**n).
Packit 6c4009
Packit 6c4009
	 UV is then computed recursively using the identity
Packit 6c4009
Packit 6c4009
		2n   n          n                     n
Packit 6c4009
	 UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
Packit 6c4009
			1 1        1  0   0  1              0 0
Packit 6c4009
Packit 6c4009
	 Where B = 2**BITS_PER_MP_LIMB.  */
Packit 6c4009
Packit 6c4009
      mp_size_t hsize = size >> 1;
Packit 6c4009
      mp_limb_t cy;
Packit 6c4009
      int negflg;
Packit 6c4009
Packit 6c4009
      /*** Product H.	 ________________  ________________
Packit 6c4009
			|_____U1 x V1____||____U0 x V0_____|  */
Packit 6c4009
      /* Put result in upper part of PROD and pass low part of TSPACE
Packit 6c4009
	 as new TSPACE.  */
Packit 6c4009
      MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
Packit 6c4009
Packit 6c4009
      /*** Product M.	 ________________
Packit 6c4009
			|_(U1-U0)(V0-V1)_|  */
Packit 6c4009
      if (mpn_cmp (up + hsize, up, hsize) >= 0)
Packit 6c4009
	{
Packit 6c4009
	  mpn_sub_n (prodp, up + hsize, up, hsize);
Packit 6c4009
	  negflg = 0;
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  mpn_sub_n (prodp, up, up + hsize, hsize);
Packit 6c4009
	  negflg = 1;
Packit 6c4009
	}
Packit 6c4009
      if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
Packit 6c4009
	{
Packit 6c4009
	  mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
Packit 6c4009
	  negflg ^= 1;
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
Packit 6c4009
	  /* No change of NEGFLG.  */
Packit 6c4009
	}
Packit 6c4009
      /* Read temporary operands from low part of PROD.
Packit 6c4009
	 Put result in low part of TSPACE using upper part of TSPACE
Packit 6c4009
	 as new TSPACE.  */
Packit 6c4009
      MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
Packit 6c4009
Packit 6c4009
      /*** Add/copy product H.  */
Packit 6c4009
      MPN_COPY (prodp + hsize, prodp + size, hsize);
Packit 6c4009
      cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
Packit 6c4009
Packit 6c4009
      /*** Add product M (if NEGFLG M is a negative number).  */
Packit 6c4009
      if (negflg)
Packit 6c4009
	cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
Packit 6c4009
      else
Packit 6c4009
	cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
Packit 6c4009
Packit 6c4009
      /*** Product L.	 ________________  ________________
Packit 6c4009
			|________________||____U0 x V0_____|  */
Packit 6c4009
      /* Read temporary operands from low part of PROD.
Packit 6c4009
	 Put result in low part of TSPACE using upper part of TSPACE
Packit 6c4009
	 as new TSPACE.  */
Packit 6c4009
      MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
Packit 6c4009
Packit 6c4009
      /*** Add/copy Product L (twice).  */
Packit 6c4009
Packit 6c4009
      cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
Packit 6c4009
      if (cy)
Packit 6c4009
	mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
Packit 6c4009
Packit 6c4009
      MPN_COPY (prodp, tspace, hsize);
Packit 6c4009
      cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
Packit 6c4009
      if (cy)
Packit 6c4009
	mpn_add_1 (prodp + size, prodp + size, size, 1);
Packit 6c4009
    }
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
void
Packit 6c4009
impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
Packit 6c4009
{
Packit 6c4009
  mp_size_t i;
Packit 6c4009
  mp_limb_t cy_limb;
Packit 6c4009
  mp_limb_t v_limb;
Packit 6c4009
Packit 6c4009
  /* Multiply by the first limb in V separately, as the result can be
Packit 6c4009
     stored (not added) to PROD.  We also avoid a loop for zeroing.  */
Packit 6c4009
  v_limb = up[0];
Packit 6c4009
  if (v_limb <= 1)
Packit 6c4009
    {
Packit 6c4009
      if (v_limb == 1)
Packit 6c4009
	MPN_COPY (prodp, up, size);
Packit 6c4009
      else
Packit 6c4009
	MPN_ZERO (prodp, size);
Packit 6c4009
      cy_limb = 0;
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
Packit 6c4009
Packit 6c4009
  prodp[size] = cy_limb;
Packit 6c4009
  prodp++;
Packit 6c4009
Packit 6c4009
  /* For each iteration in the outer loop, multiply one limb from
Packit 6c4009
     U with one limb from V, and add it to PROD.  */
Packit 6c4009
  for (i = 1; i < size; i++)
Packit 6c4009
    {
Packit 6c4009
      v_limb = up[i];
Packit 6c4009
      if (v_limb <= 1)
Packit 6c4009
	{
Packit 6c4009
	  cy_limb = 0;
Packit 6c4009
	  if (v_limb == 1)
Packit 6c4009
	    cy_limb = mpn_add_n (prodp, prodp, up, size);
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
Packit 6c4009
Packit 6c4009
      prodp[size] = cy_limb;
Packit 6c4009
      prodp++;
Packit 6c4009
    }
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
void
Packit 6c4009
impn_sqr_n (mp_ptr prodp,
Packit 6c4009
	     mp_srcptr up, mp_size_t size, mp_ptr tspace)
Packit 6c4009
{
Packit 6c4009
  if ((size & 1) != 0)
Packit 6c4009
    {
Packit 6c4009
      /* The size is odd, the code code below doesn't handle that.
Packit 6c4009
	 Multiply the least significant (size - 1) limbs with a recursive
Packit 6c4009
	 call, and handle the most significant limb of S1 and S2
Packit 6c4009
	 separately.  */
Packit 6c4009
      /* A slightly faster way to do this would be to make the Karatsuba
Packit 6c4009
	 code below behave as if the size were even, and let it check for
Packit 6c4009
	 odd size in the end.  I.e., in essence move this code to the end.
Packit 6c4009
	 Doing so would save us a recursive call, and potentially make the
Packit 6c4009
	 stack grow a lot less.  */
Packit 6c4009
Packit 6c4009
      mp_size_t esize = size - 1;	/* even size */
Packit 6c4009
      mp_limb_t cy_limb;
Packit 6c4009
Packit 6c4009
      MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
Packit 6c4009
      cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
Packit 6c4009
      prodp[esize + esize] = cy_limb;
Packit 6c4009
      cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
Packit 6c4009
Packit 6c4009
      prodp[esize + size] = cy_limb;
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      mp_size_t hsize = size >> 1;
Packit 6c4009
      mp_limb_t cy;
Packit 6c4009
Packit 6c4009
      /*** Product H.	 ________________  ________________
Packit 6c4009
			|_____U1 x U1____||____U0 x U0_____|  */
Packit 6c4009
      /* Put result in upper part of PROD and pass low part of TSPACE
Packit 6c4009
	 as new TSPACE.  */
Packit 6c4009
      MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
Packit 6c4009
Packit 6c4009
      /*** Product M.	 ________________
Packit 6c4009
			|_(U1-U0)(U0-U1)_|  */
Packit 6c4009
      if (mpn_cmp (up + hsize, up, hsize) >= 0)
Packit 6c4009
	{
Packit 6c4009
	  mpn_sub_n (prodp, up + hsize, up, hsize);
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  mpn_sub_n (prodp, up, up + hsize, hsize);
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      /* Read temporary operands from low part of PROD.
Packit 6c4009
	 Put result in low part of TSPACE using upper part of TSPACE
Packit 6c4009
	 as new TSPACE.  */
Packit 6c4009
      MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
Packit 6c4009
Packit 6c4009
      /*** Add/copy product H.  */
Packit 6c4009
      MPN_COPY (prodp + hsize, prodp + size, hsize);
Packit 6c4009
      cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
Packit 6c4009
Packit 6c4009
      /*** Add product M (if NEGFLG M is a negative number).  */
Packit 6c4009
      cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
Packit 6c4009
Packit 6c4009
      /*** Product L.	 ________________  ________________
Packit 6c4009
			|________________||____U0 x U0_____|  */
Packit 6c4009
      /* Read temporary operands from low part of PROD.
Packit 6c4009
	 Put result in low part of TSPACE using upper part of TSPACE
Packit 6c4009
	 as new TSPACE.  */
Packit 6c4009
      MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
Packit 6c4009
Packit 6c4009
      /*** Add/copy Product L (twice).  */
Packit 6c4009
Packit 6c4009
      cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
Packit 6c4009
      if (cy)
Packit 6c4009
	mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
Packit 6c4009
Packit 6c4009
      MPN_COPY (prodp, tspace, hsize);
Packit 6c4009
      cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
Packit 6c4009
      if (cy)
Packit 6c4009
	mpn_add_1 (prodp + size, prodp + size, size, 1);
Packit 6c4009
    }
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* This should be made into an inline function in gmp.h.  */
Packit 6c4009
void
Packit 6c4009
mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
Packit 6c4009
{
Packit 6c4009
  TMP_DECL (marker);
Packit 6c4009
  TMP_MARK (marker);
Packit 6c4009
  if (up == vp)
Packit 6c4009
    {
Packit 6c4009
      if (size < KARATSUBA_THRESHOLD)
Packit 6c4009
	{
Packit 6c4009
	  impn_sqr_n_basecase (prodp, up, size);
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  mp_ptr tspace;
Packit 6c4009
	  tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
Packit 6c4009
	  impn_sqr_n (prodp, up, size, tspace);
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      if (size < KARATSUBA_THRESHOLD)
Packit 6c4009
	{
Packit 6c4009
	  impn_mul_n_basecase (prodp, up, vp, size);
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  mp_ptr tspace;
Packit 6c4009
	  tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
Packit 6c4009
	  impn_mul_n (prodp, up, vp, size, tspace);
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
  TMP_FREE (marker);
Packit 6c4009
}