Blame mpn/generic/mul.c

Packit 5c3484
/* mpn_mul -- Multiply two natural numbers.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Torbjorn Granlund.
Packit 5c3484
Packit 5c3484
Copyright 1991, 1993, 1994, 1996, 1997, 1999-2003, 2005-2007, 2009, 2010, 2012,
Packit 5c3484
2014 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
Packit 5c3484
Packit 5c3484
#ifndef MUL_BASECASE_MAX_UN
Packit 5c3484
#define MUL_BASECASE_MAX_UN 500
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* Areas where the different toom algorithms can be called (extracted
Packit 5c3484
   from the t-toom*.c files, and ignoring small constant offsets):
Packit 5c3484
Packit 5c3484
   1/6  1/5 1/4 4/13 1/3 3/8 2/5 5/11 1/2 3/5 2/3 3/4 4/5   1 vn/un
Packit 5c3484
                                        4/7              6/7
Packit 5c3484
				       6/11
Packit 5c3484
                                       |--------------------| toom22 (small)
Packit 5c3484
                                                           || toom22 (large)
Packit 5c3484
                                                       |xxxx| toom22 called
Packit 5c3484
                      |-------------------------------------| toom32
Packit 5c3484
                                         |xxxxxxxxxxxxxxxx| | toom32 called
Packit 5c3484
                                               |------------| toom33
Packit 5c3484
                                                          |x| toom33 called
Packit 5c3484
             |---------------------------------|            | toom42
Packit 5c3484
	              |xxxxxxxxxxxxxxxxxxxxxxxx|            | toom42 called
Packit 5c3484
                                       |--------------------| toom43
Packit 5c3484
                                               |xxxxxxxxxx|   toom43 called
Packit 5c3484
         |-----------------------------|                      toom52 (unused)
Packit 5c3484
                                                   |--------| toom44
Packit 5c3484
						   |xxxxxxxx| toom44 called
Packit 5c3484
                              |--------------------|        | toom53
Packit 5c3484
                                        |xxxxxx|              toom53 called
Packit 5c3484
    |-------------------------|                               toom62 (unused)
Packit 5c3484
                                           |----------------| toom54 (unused)
Packit 5c3484
                      |--------------------|                  toom63
Packit 5c3484
	                      |xxxxxxxxx|                   | toom63 called
Packit 5c3484
                          |---------------------------------| toom6h
Packit 5c3484
						   |xxxxxxxx| toom6h called
Packit 5c3484
                                  |-------------------------| toom8h (32 bit)
Packit 5c3484
                 |------------------------------------------| toom8h (64 bit)
Packit 5c3484
						   |xxxxxxxx| toom8h called
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
#define TOOM33_OK(an,bn) (6 + 2 * an < 3 * bn)
Packit 5c3484
#define TOOM44_OK(an,bn) (12 + 3 * an < 4 * bn)
Packit 5c3484
Packit 5c3484
/* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v
Packit 5c3484
   (pointed to by VP, with VN limbs), and store the result at PRODP.  The
Packit 5c3484
   result is UN + VN limbs.  Return the most significant limb of the result.
Packit 5c3484
Packit 5c3484
   NOTE: The space pointed to by PRODP is overwritten before finished with U
Packit 5c3484
   and V, so overlap is an error.
Packit 5c3484
Packit 5c3484
   Argument constraints:
Packit 5c3484
   1. UN >= VN.
Packit 5c3484
   2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from
Packit 5c3484
      the multiplier and the multiplicand.  */
Packit 5c3484
Packit 5c3484
/*
Packit 5c3484
  * The cutoff lines in the toomX2 and toomX3 code are now exactly between the
Packit 5c3484
    ideal lines of the surrounding algorithms.  Is that optimal?
Packit 5c3484
Packit 5c3484
  * The toomX3 code now uses a structure similar to the one of toomX2, except
Packit 5c3484
    that it loops longer in the unbalanced case.  The result is that the
Packit 5c3484
    remaining area might have un < vn.  Should we fix the toomX2 code in a
Packit 5c3484
    similar way?
Packit 5c3484
Packit 5c3484
  * The toomX3 code is used for the largest non-FFT unbalanced operands.  It
Packit 5c3484
    therefore calls mpn_mul recursively for certain cases.
Packit 5c3484
Packit 5c3484
  * Allocate static temp space using THRESHOLD variables (except for toom44
Packit 5c3484
    when !WANT_FFT).  That way, we can typically have no TMP_ALLOC at all.
Packit 5c3484
Packit 5c3484
  * We sort ToomX2 algorithms together, assuming the toom22, toom32, toom42
Packit 5c3484
    have the same vn threshold.  This is not true, we should actually use
Packit 5c3484
    mul_basecase for slightly larger operands for toom32 than for toom22, and
Packit 5c3484
    even larger for toom42.
Packit 5c3484
Packit 5c3484
  * That problem is even more prevalent for toomX3.  We therefore use special
Packit 5c3484
    THRESHOLD variables there.
Packit 5c3484
Packit 5c3484
  * Is our ITCH allocation correct?
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
#define ITCH (16*vn + 100)
Packit 5c3484
Packit 5c3484
mp_limb_t
Packit 5c3484
mpn_mul (mp_ptr prodp,
Packit 5c3484
	 mp_srcptr up, mp_size_t un,
Packit 5c3484
	 mp_srcptr vp, mp_size_t vn)
Packit 5c3484
{
Packit 5c3484
  ASSERT (un >= vn);
Packit 5c3484
  ASSERT (vn >= 1);
Packit 5c3484
  ASSERT (! MPN_OVERLAP_P (prodp, un+vn, up, un));
Packit 5c3484
  ASSERT (! MPN_OVERLAP_P (prodp, un+vn, vp, vn));
Packit 5c3484
Packit 5c3484
  if (un == vn)
Packit 5c3484
    {
Packit 5c3484
      if (up == vp)
Packit 5c3484
	mpn_sqr (prodp, up, un);
Packit 5c3484
      else
Packit 5c3484
	mpn_mul_n (prodp, up, vp, un);
Packit 5c3484
    }
Packit 5c3484
  else if (vn < MUL_TOOM22_THRESHOLD)
Packit 5c3484
    { /* plain schoolbook multiplication */
Packit 5c3484
Packit 5c3484
      /* Unless un is very large, or else if have an applicable mpn_mul_N,
Packit 5c3484
	 perform basecase multiply directly.  */
Packit 5c3484
      if (un <= MUL_BASECASE_MAX_UN
Packit 5c3484
#if HAVE_NATIVE_mpn_mul_2
Packit 5c3484
	  || vn <= 2
Packit 5c3484
#else
Packit 5c3484
	  || vn == 1
Packit 5c3484
#endif
Packit 5c3484
	  )
Packit 5c3484
	mpn_mul_basecase (prodp, up, un, vp, vn);
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  /* We have un >> MUL_BASECASE_MAX_UN > vn.  For better memory
Packit 5c3484
	     locality, split up[] into MUL_BASECASE_MAX_UN pieces and multiply
Packit 5c3484
	     these pieces with the vp[] operand.  After each such partial
Packit 5c3484
	     multiplication (but the last) we copy the most significant vn
Packit 5c3484
	     limbs into a temporary buffer since that part would otherwise be
Packit 5c3484
	     overwritten by the next multiplication.  After the next
Packit 5c3484
	     multiplication, we add it back.  This illustrates the situation:
Packit 5c3484
Packit 5c3484
                                                    -->vn<--
Packit 5c3484
                                                      |  |<------- un ------->|
Packit 5c3484
                                                         _____________________|
Packit 5c3484
                                                        X                    /|
Packit 5c3484
                                                      /XX__________________/  |
Packit 5c3484
                                    _____________________                     |
Packit 5c3484
                                   X                    /                     |
Packit 5c3484
                                 /XX__________________/                       |
Packit 5c3484
               _____________________                                          |
Packit 5c3484
              /                    /                                          |
Packit 5c3484
            /____________________/                                            |
Packit 5c3484
	    ==================================================================
Packit 5c3484
Packit 5c3484
	    The parts marked with X are the parts whose sums are copied into
Packit 5c3484
	    the temporary buffer.  */
Packit 5c3484
Packit 5c3484
	  mp_limb_t tp[MUL_TOOM22_THRESHOLD_LIMIT];
Packit 5c3484
	  mp_limb_t cy;
Packit 5c3484
	  ASSERT (MUL_TOOM22_THRESHOLD <= MUL_TOOM22_THRESHOLD_LIMIT);
Packit 5c3484
Packit 5c3484
	  mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
Packit 5c3484
	  prodp += MUL_BASECASE_MAX_UN;
Packit 5c3484
	  MPN_COPY (tp, prodp, vn);		/* preserve high triangle */
Packit 5c3484
	  up += MUL_BASECASE_MAX_UN;
Packit 5c3484
	  un -= MUL_BASECASE_MAX_UN;
Packit 5c3484
	  while (un > MUL_BASECASE_MAX_UN)
Packit 5c3484
	    {
Packit 5c3484
	      mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
Packit 5c3484
	      cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
Packit 5c3484
	      mpn_incr_u (prodp + vn, cy);
Packit 5c3484
	      prodp += MUL_BASECASE_MAX_UN;
Packit 5c3484
	      MPN_COPY (tp, prodp, vn);		/* preserve high triangle */
Packit 5c3484
	      up += MUL_BASECASE_MAX_UN;
Packit 5c3484
	      un -= MUL_BASECASE_MAX_UN;
Packit 5c3484
	    }
Packit 5c3484
	  if (un > vn)
Packit 5c3484
	    {
Packit 5c3484
	      mpn_mul_basecase (prodp, up, un, vp, vn);
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
	      ASSERT (un > 0);
Packit 5c3484
	      mpn_mul_basecase (prodp, vp, vn, up, un);
Packit 5c3484
	    }
Packit 5c3484
	  cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
Packit 5c3484
	  mpn_incr_u (prodp + vn, cy);
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
  else if (BELOW_THRESHOLD (vn, MUL_TOOM33_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      /* Use ToomX2 variants */
Packit 5c3484
      mp_ptr scratch;
Packit 5c3484
      TMP_SDECL; TMP_SMARK;
Packit 5c3484
Packit 5c3484
#define ITCH_TOOMX2 (9 * vn / 2 + GMP_NUMB_BITS * 2)
Packit 5c3484
      scratch = TMP_SALLOC_LIMBS (ITCH_TOOMX2);
Packit 5c3484
      ASSERT (mpn_toom22_mul_itch ((5*vn-1)/4, vn) <= ITCH_TOOMX2); /* 5vn/2+ */
Packit 5c3484
      ASSERT (mpn_toom32_mul_itch ((7*vn-1)/4, vn) <= ITCH_TOOMX2); /* 7vn/6+ */
Packit 5c3484
      ASSERT (mpn_toom42_mul_itch (3 * vn - 1, vn) <= ITCH_TOOMX2); /* 9vn/2+ */
Packit 5c3484
#undef ITCH_TOOMX2
Packit 5c3484
Packit 5c3484
      /* FIXME: This condition (repeated in the loop below) leaves from a vn*vn
Packit 5c3484
	 square to a (3vn-1)*vn rectangle.  Leaving such a rectangle is hardly
Packit 5c3484
	 wise; we would get better balance by slightly moving the bound.  We
Packit 5c3484
	 will sometimes end up with un < vn, like in the X3 arm below.  */
Packit 5c3484
      if (un >= 3 * vn)
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t cy;
Packit 5c3484
	  mp_ptr ws;
Packit 5c3484
Packit 5c3484
	  /* The maximum ws usage is for the mpn_mul result.  */
Packit 5c3484
	  ws = TMP_SALLOC_LIMBS (4 * vn);
Packit 5c3484
Packit 5c3484
	  mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
Packit 5c3484
	  un -= 2 * vn;
Packit 5c3484
	  up += 2 * vn;
Packit 5c3484
	  prodp += 2 * vn;
Packit 5c3484
Packit 5c3484
	  while (un >= 3 * vn)
Packit 5c3484
	    {
Packit 5c3484
	      mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
Packit 5c3484
	      un -= 2 * vn;
Packit 5c3484
	      up += 2 * vn;
Packit 5c3484
	      cy = mpn_add_n (prodp, prodp, ws, vn);
Packit 5c3484
	      MPN_COPY (prodp + vn, ws + vn, 2 * vn);
Packit 5c3484
	      mpn_incr_u (prodp + vn, cy);
Packit 5c3484
	      prodp += 2 * vn;
Packit 5c3484
	    }
Packit 5c3484
Packit 5c3484
	  /* vn <= un < 3vn */
Packit 5c3484
Packit 5c3484
	  if (4 * un < 5 * vn)
Packit 5c3484
	    mpn_toom22_mul (ws, up, un, vp, vn, scratch);
Packit 5c3484
	  else if (4 * un < 7 * vn)
Packit 5c3484
	    mpn_toom32_mul (ws, up, un, vp, vn, scratch);
Packit 5c3484
	  else
Packit 5c3484
	    mpn_toom42_mul (ws, up, un, vp, vn, scratch);
Packit 5c3484
Packit 5c3484
	  cy = mpn_add_n (prodp, prodp, ws, vn);
Packit 5c3484
	  MPN_COPY (prodp + vn, ws + vn, un);
Packit 5c3484
	  mpn_incr_u (prodp + vn, cy);
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  if (4 * un < 5 * vn)
Packit 5c3484
	    mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
Packit 5c3484
	  else if (4 * un < 7 * vn)
Packit 5c3484
	    mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
Packit 5c3484
	  else
Packit 5c3484
	    mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
Packit 5c3484
	}
Packit 5c3484
      TMP_SFREE;
Packit 5c3484
    }
Packit 5c3484
  else if (BELOW_THRESHOLD ((un + vn) >> 1, MUL_FFT_THRESHOLD) ||
Packit 5c3484
	   BELOW_THRESHOLD (3 * vn, MUL_FFT_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      /* Handle the largest operands that are not in the FFT range.  The 2nd
Packit 5c3484
	 condition makes very unbalanced operands avoid the FFT code (except
Packit 5c3484
	 perhaps as coefficient products of the Toom code.  */
Packit 5c3484
Packit 5c3484
      if (BELOW_THRESHOLD (vn, MUL_TOOM44_THRESHOLD) || !TOOM44_OK (un, vn))
Packit 5c3484
	{
Packit 5c3484
	  /* Use ToomX3 variants */
Packit 5c3484
	  mp_ptr scratch;
Packit 5c3484
	  TMP_DECL; TMP_MARK;
Packit 5c3484
Packit 5c3484
#define ITCH_TOOMX3 (4 * vn + GMP_NUMB_BITS)
Packit 5c3484
	  scratch = TMP_ALLOC_LIMBS (ITCH_TOOMX3);
Packit 5c3484
	  ASSERT (mpn_toom33_mul_itch ((7*vn-1)/6, vn) <= ITCH_TOOMX3); /* 7vn/2+ */
Packit 5c3484
	  ASSERT (mpn_toom43_mul_itch ((3*vn-1)/2, vn) <= ITCH_TOOMX3); /* 9vn/4+ */
Packit 5c3484
	  ASSERT (mpn_toom32_mul_itch ((7*vn-1)/4, vn) <= ITCH_TOOMX3); /* 7vn/6+ */
Packit 5c3484
	  ASSERT (mpn_toom53_mul_itch ((11*vn-1)/6, vn) <= ITCH_TOOMX3); /* 11vn/3+ */
Packit 5c3484
	  ASSERT (mpn_toom42_mul_itch ((5*vn-1)/2, vn) <= ITCH_TOOMX3); /* 15vn/4+ */
Packit 5c3484
	  ASSERT (mpn_toom63_mul_itch ((5*vn-1)/2, vn) <= ITCH_TOOMX3); /* 15vn/4+ */
Packit 5c3484
#undef ITCH_TOOMX3
Packit 5c3484
Packit 5c3484
	  if (2 * un >= 5 * vn)
Packit 5c3484
	    {
Packit 5c3484
	      mp_limb_t cy;
Packit 5c3484
	      mp_ptr ws;
Packit 5c3484
Packit 5c3484
	      /* The maximum ws usage is for the mpn_mul result.  */
Packit 5c3484
	      ws = TMP_ALLOC_LIMBS (7 * vn >> 1);
Packit 5c3484
Packit 5c3484
	      if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
Packit 5c3484
		mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
Packit 5c3484
	      else
Packit 5c3484
		mpn_toom63_mul (prodp, up, 2 * vn, vp, vn, scratch);
Packit 5c3484
	      un -= 2 * vn;
Packit 5c3484
	      up += 2 * vn;
Packit 5c3484
	      prodp += 2 * vn;
Packit 5c3484
Packit 5c3484
	      while (2 * un >= 5 * vn)	/* un >= 2.5vn */
Packit 5c3484
		{
Packit 5c3484
		  if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
Packit 5c3484
		    mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
Packit 5c3484
		  else
Packit 5c3484
		    mpn_toom63_mul (ws, up, 2 * vn, vp, vn, scratch);
Packit 5c3484
		  un -= 2 * vn;
Packit 5c3484
		  up += 2 * vn;
Packit 5c3484
		  cy = mpn_add_n (prodp, prodp, ws, vn);
Packit 5c3484
		  MPN_COPY (prodp + vn, ws + vn, 2 * vn);
Packit 5c3484
		  mpn_incr_u (prodp + vn, cy);
Packit 5c3484
		  prodp += 2 * vn;
Packit 5c3484
		}
Packit 5c3484
Packit 5c3484
	      /* vn / 2 <= un < 2.5vn */
Packit 5c3484
Packit 5c3484
	      if (un < vn)
Packit 5c3484
		mpn_mul (ws, vp, vn, up, un);
Packit 5c3484
	      else
Packit 5c3484
		mpn_mul (ws, up, un, vp, vn);
Packit 5c3484
Packit 5c3484
	      cy = mpn_add_n (prodp, prodp, ws, vn);
Packit 5c3484
	      MPN_COPY (prodp + vn, ws + vn, un);
Packit 5c3484
	      mpn_incr_u (prodp + vn, cy);
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
	      if (6 * un < 7 * vn)
Packit 5c3484
		mpn_toom33_mul (prodp, up, un, vp, vn, scratch);
Packit 5c3484
	      else if (2 * un < 3 * vn)
Packit 5c3484
		{
Packit 5c3484
		  if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM43_THRESHOLD))
Packit 5c3484
		    mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
Packit 5c3484
		  else
Packit 5c3484
		    mpn_toom43_mul (prodp, up, un, vp, vn, scratch);
Packit 5c3484
		}
Packit 5c3484
	      else if (6 * un < 11 * vn)
Packit 5c3484
		{
Packit 5c3484
		  if (4 * un < 7 * vn)
Packit 5c3484
		    {
Packit 5c3484
		      if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM53_THRESHOLD))
Packit 5c3484
			mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
Packit 5c3484
		      else
Packit 5c3484
			mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
Packit 5c3484
		    }
Packit 5c3484
		  else
Packit 5c3484
		    {
Packit 5c3484
		      if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM53_THRESHOLD))
Packit 5c3484
			mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
Packit 5c3484
		      else
Packit 5c3484
			mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
Packit 5c3484
		    }
Packit 5c3484
		}
Packit 5c3484
	      else
Packit 5c3484
		{
Packit 5c3484
		  if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
Packit 5c3484
		    mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
Packit 5c3484
		  else
Packit 5c3484
		    mpn_toom63_mul (prodp, up, un, vp, vn, scratch);
Packit 5c3484
		}
Packit 5c3484
	    }
Packit 5c3484
	  TMP_FREE;
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  mp_ptr scratch;
Packit 5c3484
	  TMP_DECL; TMP_MARK;
Packit 5c3484
Packit 5c3484
	  if (BELOW_THRESHOLD (vn, MUL_TOOM6H_THRESHOLD))
Packit 5c3484
	    {
Packit 5c3484
	      scratch = TMP_SALLOC_LIMBS (mpn_toom44_mul_itch (un, vn));
Packit 5c3484
	      mpn_toom44_mul (prodp, up, un, vp, vn, scratch);
Packit 5c3484
	    }
Packit 5c3484
	  else if (BELOW_THRESHOLD (vn, MUL_TOOM8H_THRESHOLD))
Packit 5c3484
	    {
Packit 5c3484
	      scratch = TMP_SALLOC_LIMBS (mpn_toom6h_mul_itch (un, vn));
Packit 5c3484
	      mpn_toom6h_mul (prodp, up, un, vp, vn, scratch);
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
	      scratch = TMP_ALLOC_LIMBS (mpn_toom8h_mul_itch (un, vn));
Packit 5c3484
	      mpn_toom8h_mul (prodp, up, un, vp, vn, scratch);
Packit 5c3484
	    }
Packit 5c3484
	  TMP_FREE;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      if (un >= 8 * vn)
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t cy;
Packit 5c3484
	  mp_ptr ws;
Packit 5c3484
	  TMP_DECL; TMP_MARK;
Packit 5c3484
Packit 5c3484
	  /* The maximum ws usage is for the mpn_mul result.  */
Packit 5c3484
	  ws = TMP_BALLOC_LIMBS (9 * vn >> 1);
Packit 5c3484
Packit 5c3484
	  mpn_fft_mul (prodp, up, 3 * vn, vp, vn);
Packit 5c3484
	  un -= 3 * vn;
Packit 5c3484
	  up += 3 * vn;
Packit 5c3484
	  prodp += 3 * vn;
Packit 5c3484
Packit 5c3484
	  while (2 * un >= 7 * vn)	/* un >= 3.5vn  */
Packit 5c3484
	    {
Packit 5c3484
	      mpn_fft_mul (ws, up, 3 * vn, vp, vn);
Packit 5c3484
	      un -= 3 * vn;
Packit 5c3484
	      up += 3 * vn;
Packit 5c3484
	      cy = mpn_add_n (prodp, prodp, ws, vn);
Packit 5c3484
	      MPN_COPY (prodp + vn, ws + vn, 3 * vn);
Packit 5c3484
	      mpn_incr_u (prodp + vn, cy);
Packit 5c3484
	      prodp += 3 * vn;
Packit 5c3484
	    }
Packit 5c3484
Packit 5c3484
	  /* vn / 2 <= un < 3.5vn */
Packit 5c3484
Packit 5c3484
	  if (un < vn)
Packit 5c3484
	    mpn_mul (ws, vp, vn, up, un);
Packit 5c3484
	  else
Packit 5c3484
	    mpn_mul (ws, up, un, vp, vn);
Packit 5c3484
Packit 5c3484
	  cy = mpn_add_n (prodp, prodp, ws, vn);
Packit 5c3484
	  MPN_COPY (prodp + vn, ws + vn, un);
Packit 5c3484
	  mpn_incr_u (prodp + vn, cy);
Packit 5c3484
Packit 5c3484
	  TMP_FREE;
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	mpn_fft_mul (prodp, up, un, vp, vn);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  return prodp[un + vn - 1];	/* historic */
Packit 5c3484
}