Blame mpn/generic/mulmid.c

Packit 5c3484
/* mpn_mulmid -- middle product
Packit 5c3484
Packit 5c3484
   Contributed by David Harvey.
Packit 5c3484
Packit 5c3484
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2011 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
Packit 5c3484
Packit 5c3484
#define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
Packit 5c3484
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
mpn_mulmid (mp_ptr rp,
Packit 5c3484
            mp_srcptr ap, mp_size_t an,
Packit 5c3484
            mp_srcptr bp, mp_size_t bn)
Packit 5c3484
{
Packit 5c3484
  mp_size_t rn, k;
Packit 5c3484
  mp_ptr scratch, temp;
Packit 5c3484
Packit 5c3484
  ASSERT (an >= bn);
Packit 5c3484
  ASSERT (bn >= 1);
Packit 5c3484
  ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
Packit 5c3484
  ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
Packit 5c3484
Packit 5c3484
  if (bn < MULMID_TOOM42_THRESHOLD)
Packit 5c3484
    {
Packit 5c3484
      /* region not tall enough to make toom42 worthwhile for any portion */
Packit 5c3484
Packit 5c3484
      if (an < CHUNK)
Packit 5c3484
	{
Packit 5c3484
	  /* region not too wide either, just call basecase directly */
Packit 5c3484
	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
Packit 5c3484
	  return;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      /* Region quite wide. For better locality, use basecase on chunks:
Packit 5c3484
Packit 5c3484
	 AAABBBCC..
Packit 5c3484
	 .AAABBBCC.
Packit 5c3484
	 ..AAABBBCC
Packit 5c3484
      */
Packit 5c3484
Packit 5c3484
      k = CHUNK - bn + 1;    /* number of diagonals per chunk */
Packit 5c3484
Packit 5c3484
      /* first chunk (marked A in the above diagram) */
Packit 5c3484
      mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
Packit 5c3484
Packit 5c3484
      /* remaining chunks (B, C, etc) */
Packit 5c3484
      an -= k;
Packit 5c3484
Packit 5c3484
      while (an >= CHUNK)
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t t0, t1, cy;
Packit 5c3484
	  ap += k, rp += k;
Packit 5c3484
	  t0 = rp[0], t1 = rp[1];
Packit 5c3484
	  mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
Packit 5c3484
	  ADDC_LIMB (cy, rp[0], rp[0], t0);    /* add back saved limbs */
Packit 5c3484
	  MPN_INCR_U (rp + 1, k + 1, t1 + cy);
Packit 5c3484
	  an -= k;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      if (an >= bn)
Packit 5c3484
	{
Packit 5c3484
	  /* last remaining chunk */
Packit 5c3484
	  mp_limb_t t0, t1, cy;
Packit 5c3484
	  ap += k, rp += k;
Packit 5c3484
	  t0 = rp[0], t1 = rp[1];
Packit 5c3484
	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
Packit 5c3484
	  ADDC_LIMB (cy, rp[0], rp[0], t0);
Packit 5c3484
	  MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  /* region is tall enough for toom42 */
Packit 5c3484
Packit 5c3484
  rn = an - bn + 1;
Packit 5c3484
Packit 5c3484
  if (rn < MULMID_TOOM42_THRESHOLD)
Packit 5c3484
    {
Packit 5c3484
      /* region not wide enough to make toom42 worthwhile for any portion */
Packit 5c3484
Packit 5c3484
      TMP_DECL;
Packit 5c3484
Packit 5c3484
      if (bn < CHUNK)
Packit 5c3484
	{
Packit 5c3484
	  /* region not too tall either, just call basecase directly */
Packit 5c3484
	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
Packit 5c3484
	  return;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      /* Region quite tall. For better locality, use basecase on chunks:
Packit 5c3484
Packit 5c3484
	 AAAAA....
Packit 5c3484
	 .AAAAA...
Packit 5c3484
	 ..BBBBB..
Packit 5c3484
	 ...BBBBB.
Packit 5c3484
	 ....CCCCC
Packit 5c3484
      */
Packit 5c3484
Packit 5c3484
      TMP_MARK;
Packit 5c3484
Packit 5c3484
      temp = TMP_ALLOC_LIMBS (rn + 2);
Packit 5c3484
Packit 5c3484
      /* first chunk (marked A in the above diagram) */
Packit 5c3484
      bp += bn - CHUNK, an -= bn - CHUNK;
Packit 5c3484
      mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
Packit 5c3484
Packit 5c3484
      /* remaining chunks (B, C, etc) */
Packit 5c3484
      bn -= CHUNK;
Packit 5c3484
Packit 5c3484
      while (bn >= CHUNK)
Packit 5c3484
	{
Packit 5c3484
	  ap += CHUNK, bp -= CHUNK;
Packit 5c3484
	  mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
Packit 5c3484
	  mpn_add_n (rp, rp, temp, rn + 2);
Packit 5c3484
	  bn -= CHUNK;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      if (bn)
Packit 5c3484
	{
Packit 5c3484
	  /* last remaining chunk */
Packit 5c3484
	  ap += CHUNK, bp -= bn;
Packit 5c3484
	  mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
Packit 5c3484
	  mpn_add_n (rp, rp, temp, rn + 2);
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      TMP_FREE;
Packit 5c3484
      return;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  /* we're definitely going to use toom42 somewhere */
Packit 5c3484
Packit 5c3484
  if (bn > rn)
Packit 5c3484
    {
Packit 5c3484
      /* slice region into chunks, use toom42 on all chunks except possibly
Packit 5c3484
	 the last:
Packit 5c3484
Packit 5c3484
         AA....
Packit 5c3484
         .AA...
Packit 5c3484
         ..BB..
Packit 5c3484
         ...BB.
Packit 5c3484
         ....CC
Packit 5c3484
      */
Packit 5c3484
Packit 5c3484
      TMP_DECL;
Packit 5c3484
      TMP_MARK;
Packit 5c3484
Packit 5c3484
      temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
Packit 5c3484
      scratch = temp + rn + 2;
Packit 5c3484
Packit 5c3484
      /* first chunk (marked A in the above diagram) */
Packit 5c3484
      bp += bn - rn;
Packit 5c3484
      mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
Packit 5c3484
Packit 5c3484
      /* remaining chunks (B, C, etc) */
Packit 5c3484
      bn -= rn;
Packit 5c3484
Packit 5c3484
      while (bn >= rn)
Packit 5c3484
        {
Packit 5c3484
          ap += rn, bp -= rn;
Packit 5c3484
	  mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
Packit 5c3484
          mpn_add_n (rp, rp, temp, rn + 2);
Packit 5c3484
          bn -= rn;
Packit 5c3484
        }
Packit 5c3484
Packit 5c3484
      if (bn)
Packit 5c3484
        {
Packit 5c3484
          /* last remaining chunk */
Packit 5c3484
          ap += rn, bp -= bn;
Packit 5c3484
	  mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
Packit 5c3484
          mpn_add_n (rp, rp, temp, rn + 2);
Packit 5c3484
        }
Packit 5c3484
Packit 5c3484
      TMP_FREE;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      /* slice region into chunks, use toom42 on all chunks except possibly
Packit 5c3484
	 the last:
Packit 5c3484
Packit 5c3484
         AAABBBCC..
Packit 5c3484
         .AAABBBCC.
Packit 5c3484
         ..AAABBBCC
Packit 5c3484
      */
Packit 5c3484
Packit 5c3484
      TMP_DECL;
Packit 5c3484
      TMP_MARK;
Packit 5c3484
Packit 5c3484
      scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
Packit 5c3484
Packit 5c3484
      /* first chunk (marked A in the above diagram) */
Packit 5c3484
      mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
Packit 5c3484
Packit 5c3484
      /* remaining chunks (B, C, etc) */
Packit 5c3484
      rn -= bn;
Packit 5c3484
Packit 5c3484
      while (rn >= bn)
Packit 5c3484
        {
Packit 5c3484
	  mp_limb_t t0, t1, cy;
Packit 5c3484
          ap += bn, rp += bn;
Packit 5c3484
          t0 = rp[0], t1 = rp[1];
Packit 5c3484
          mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
Packit 5c3484
	  ADDC_LIMB (cy, rp[0], rp[0], t0);     /* add back saved limbs */
Packit 5c3484
	  MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
Packit 5c3484
	  rn -= bn;
Packit 5c3484
        }
Packit 5c3484
Packit 5c3484
      TMP_FREE;
Packit 5c3484
Packit 5c3484
      if (rn)
Packit 5c3484
        {
Packit 5c3484
          /* last remaining chunk */
Packit 5c3484
	  mp_limb_t t0, t1, cy;
Packit 5c3484
          ap += bn, rp += bn;
Packit 5c3484
          t0 = rp[0], t1 = rp[1];
Packit 5c3484
          mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
Packit 5c3484
	  ADDC_LIMB (cy, rp[0], rp[0], t0);
Packit 5c3484
	  MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
Packit 5c3484
        }
Packit 5c3484
    }
Packit 5c3484
}