Blame mpn/generic/toom43_mul.c

Packit 5c3484
/* mpn_toom43_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
Packit 5c3484
   times as large as bn.  Or more accurately, bn < an < 2 bn.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Marco Bodrato.
Packit 5c3484
Packit 5c3484
   The idea of applying toom to unbalanced multiplication is due to Marco
Packit 5c3484
   Bodrato and Alberto Zanoni.
Packit 5c3484
Packit 5c3484
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2009 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
Packit 5c3484
/* Evaluate in: -2, -1, 0, +1, +2, +inf
Packit 5c3484
Packit 5c3484
  <-s-><--n--><--n--><--n-->
Packit 5c3484
   ___ ______ ______ ______
Packit 5c3484
  |a3_|___a2_|___a1_|___a0_|
Packit 5c3484
	|_b2_|___b1_|___b0_|
Packit 5c3484
	<-t--><--n--><--n-->
Packit 5c3484
Packit 5c3484
  v0  =  a0             * b0          #   A(0)*B(0)
Packit 5c3484
  v1  = (a0+ a1+ a2+ a3)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 3  bh <= 2
Packit 5c3484
  vm1 = (a0- a1+ a2- a3)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1 |bh|<= 1
Packit 5c3484
  v2  = (a0+2a1+4a2+8a3)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 14 bh <= 6
Packit 5c3484
  vm2 = (a0-2a1+4a2-8a3)*(b0-2b1+4b2) #  A(-2)*B(-2)    |ah| <= 9 |bh|<= 4
Packit 5c3484
  vinf=              a3 *         b2  # A(inf)*B(inf)
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
mpn_toom43_mul (mp_ptr pp,
Packit 5c3484
		mp_srcptr ap, mp_size_t an,
Packit 5c3484
		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
Packit 5c3484
{
Packit 5c3484
  mp_size_t n, s, t;
Packit 5c3484
  enum toom6_flags flags;
Packit 5c3484
  mp_limb_t cy;
Packit 5c3484
Packit 5c3484
#define a0  ap
Packit 5c3484
#define a1  (ap + n)
Packit 5c3484
#define a2  (ap + 2 * n)
Packit 5c3484
#define a3  (ap + 3 * n)
Packit 5c3484
#define b0  bp
Packit 5c3484
#define b1  (bp + n)
Packit 5c3484
#define b2  (bp + 2 * n)
Packit 5c3484
Packit 5c3484
  n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
Packit 5c3484
Packit 5c3484
  s = an - 3 * n;
Packit 5c3484
  t = bn - 2 * n;
Packit 5c3484
Packit 5c3484
  ASSERT (0 < s && s <= n);
Packit 5c3484
  ASSERT (0 < t && t <= n);
Packit 5c3484
Packit 5c3484
  /* This is true whenever an >= 25 or bn >= 19, I think. It
Packit 5c3484
     guarantees that we can fit 5 values of size n+1 in the product
Packit 5c3484
     area. */
Packit 5c3484
  ASSERT (s+t >= 5);
Packit 5c3484
Packit 5c3484
#define v0    pp				/* 2n */
Packit 5c3484
#define vm1   (scratch)				/* 2n+1 */
Packit 5c3484
#define v1    (pp + 2*n)			/* 2n+1 */
Packit 5c3484
#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
Packit 5c3484
#define v2    (scratch + 4 * n + 2)		/* 2n+1 */
Packit 5c3484
#define vinf  (pp + 5 * n)			/* s+t */
Packit 5c3484
#define bs1    pp				/* n+1 */
Packit 5c3484
#define bsm1  (scratch + 2 * n + 2)		/* n+1 */
Packit 5c3484
#define asm1  (scratch + 3 * n + 3)		/* n+1 */
Packit 5c3484
#define asm2  (scratch + 4 * n + 4)		/* n+1 */
Packit 5c3484
#define bsm2  (pp + n + 1)			/* n+1 */
Packit 5c3484
#define bs2   (pp + 2 * n + 2)			/* n+1 */
Packit 5c3484
#define as2   (pp + 3 * n + 3)			/* n+1 */
Packit 5c3484
#define as1   (pp + 4 * n + 4)			/* n+1 */
Packit 5c3484
Packit 5c3484
  /* Total sccratch need is 6 * n + 3 + 1; we allocate one extra
Packit 5c3484
     limb, because products will overwrite 2n+2 limbs. */
Packit 5c3484
Packit 5c3484
#define a0a2  scratch
Packit 5c3484
#define b0b2  scratch
Packit 5c3484
#define a1a3  asm1
Packit 5c3484
#define b1d   bsm1
Packit 5c3484
Packit 5c3484
  /* Compute as2 and asm2.  */
Packit 5c3484
  flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_dgr3_pm2 (as2, asm2, ap, n, s, a1a3));
Packit 5c3484
Packit 5c3484
  /* Compute bs2 and bsm2.  */
Packit 5c3484
  b1d[n] = mpn_lshift (b1d, b1, n, 1);			/*       2b1      */
Packit 5c3484
  cy  = mpn_lshift (b0b2, b2, t, 2);			/*  4b2           */
Packit 5c3484
  cy += mpn_add_n (b0b2, b0b2, b0, t);			/*  4b2      + b0 */
Packit 5c3484
  if (t != n)
Packit 5c3484
    cy = mpn_add_1 (b0b2 + t, b0 + t, n - t, cy);
Packit 5c3484
  b0b2[n] = cy;
Packit 5c3484
Packit 5c3484
#if HAVE_NATIVE_mpn_add_n_sub_n
Packit 5c3484
  if (mpn_cmp (b0b2, b1d, n+1) < 0)
Packit 5c3484
    {
Packit 5c3484
      mpn_add_n_sub_n (bs2, bsm2, b1d, b0b2, n+1);
Packit 5c3484
      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      mpn_add_n_sub_n (bs2, bsm2, b0b2, b1d, n+1);
Packit 5c3484
    }
Packit 5c3484
#else
Packit 5c3484
  mpn_add_n (bs2, b0b2, b1d, n+1);
Packit 5c3484
  if (mpn_cmp (b0b2, b1d, n+1) < 0)
Packit 5c3484
    {
Packit 5c3484
      mpn_sub_n (bsm2, b1d, b0b2, n+1);
Packit 5c3484
      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      mpn_sub_n (bsm2, b0b2, b1d, n+1);
Packit 5c3484
    }
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  /* Compute as1 and asm1.  */
Packit 5c3484
  flags = (enum toom6_flags) (flags ^ (toom6_vm1_neg & mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0a2)));
Packit 5c3484
Packit 5c3484
  /* Compute bs1 and bsm1.  */
Packit 5c3484
  bsm1[n] = mpn_add (bsm1, b0, n, b2, t);
Packit 5c3484
#if HAVE_NATIVE_mpn_add_n_sub_n
Packit 5c3484
  if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
Packit 5c3484
    {
Packit 5c3484
      cy = mpn_add_n_sub_n (bs1, bsm1, b1, bsm1, n);
Packit 5c3484
      bs1[n] = cy >> 1;
Packit 5c3484
      flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      cy = mpn_add_n_sub_n (bs1, bsm1, bsm1, b1, n);
Packit 5c3484
      bs1[n] = bsm1[n] + (cy >> 1);
Packit 5c3484
      bsm1[n]-= cy & 1;
Packit 5c3484
    }
Packit 5c3484
#else
Packit 5c3484
  bs1[n] = bsm1[n] + mpn_add_n (bs1, bsm1, b1, n);
Packit 5c3484
  if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
Packit 5c3484
    {
Packit 5c3484
      mpn_sub_n (bsm1, b1, bsm1, n);
Packit 5c3484
      flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      bsm1[n] -= mpn_sub_n (bsm1, bsm1, b1, n);
Packit 5c3484
    }
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  ASSERT (as1[n] <= 3);
Packit 5c3484
  ASSERT (bs1[n] <= 2);
Packit 5c3484
  ASSERT (asm1[n] <= 1);
Packit 5c3484
  ASSERT (bsm1[n] <= 1);
Packit 5c3484
  ASSERT (as2[n] <=14);
Packit 5c3484
  ASSERT (bs2[n] <= 6);
Packit 5c3484
  ASSERT (asm2[n] <= 9);
Packit 5c3484
  ASSERT (bsm2[n] <= 4);
Packit 5c3484
Packit 5c3484
  /* vm1, 2n+1 limbs */
Packit 5c3484
  mpn_mul_n (vm1, asm1, bsm1, n+1);  /* W4 */
Packit 5c3484
Packit 5c3484
  /* vm2, 2n+1 limbs */
Packit 5c3484
  mpn_mul_n (vm2, asm2, bsm2, n+1);  /* W2 */
Packit 5c3484
Packit 5c3484
  /* v2, 2n+1 limbs */
Packit 5c3484
  mpn_mul_n (v2, as2, bs2, n+1);  /* W1 */
Packit 5c3484
Packit 5c3484
  /* v1, 2n+1 limbs */
Packit 5c3484
  mpn_mul_n (v1, as1, bs1, n+1);  /* W3 */
Packit 5c3484
Packit 5c3484
  /* vinf, s+t limbs */   /* W0 */
Packit 5c3484
  if (s > t)  mpn_mul (vinf, a3, s, b2, t);
Packit 5c3484
  else        mpn_mul (vinf, b2, t, a3, s);
Packit 5c3484
Packit 5c3484
  /* v0, 2n limbs */
Packit 5c3484
  mpn_mul_n (v0, ap, bp, n);  /* W5 */
Packit 5c3484
Packit 5c3484
  mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);
Packit 5c3484
Packit 5c3484
#undef v0
Packit 5c3484
#undef vm1
Packit 5c3484
#undef v1
Packit 5c3484
#undef vm2
Packit 5c3484
#undef v2
Packit 5c3484
#undef vinf
Packit 5c3484
#undef bs1
Packit 5c3484
#undef bs2
Packit 5c3484
#undef bsm1
Packit 5c3484
#undef bsm2
Packit 5c3484
#undef asm1
Packit 5c3484
#undef asm2
Packit 5c3484
/* #undef as1 */
Packit 5c3484
/* #undef as2 */
Packit 5c3484
#undef a0a2
Packit 5c3484
#undef b0b2
Packit 5c3484
#undef a1a3
Packit 5c3484
#undef b1d
Packit 5c3484
#undef a0
Packit 5c3484
#undef a1
Packit 5c3484
#undef a2
Packit 5c3484
#undef a3
Packit 5c3484
#undef b0
Packit 5c3484
#undef b1
Packit 5c3484
#undef b2
Packit 5c3484
}