Blob Blame History Raw
/*
 * Copyright (C) 2015 Colin Walters <walters@verbum.org>
 *
 * SPDX-License-Identifier: LGPL-2.0+
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

#include "config.h"

#include <string.h>
#include <zlib.h>

#include "ostree-rollsum.h"
#include "libglnx.h"
#include "bupsplit.h"

#define ROLLSUM_BLOB_MAX (8192*4)

static GHashTable *
rollsum_chunks_crc32 (GBytes           *bytes)
{
  gsize start = 0;
  gboolean rollsum_end = FALSE;
  GHashTable *ret_rollsums = NULL;
  const guint8 *buf;
  gsize buflen;
  gsize remaining;

  ret_rollsums = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify)g_ptr_array_unref);

  buf = g_bytes_get_data (bytes, &buflen);

  remaining = buflen;
  while (remaining > 0)
    {
      int offset, bits;

      if (!rollsum_end)
        {
          offset = bupsplit_find_ofs (buf + start, MIN(G_MAXINT32, remaining), &bits); 
          if (offset == 0)
            {
              rollsum_end = TRUE;
              offset = MIN(ROLLSUM_BLOB_MAX, remaining);
            }
          else if (offset > ROLLSUM_BLOB_MAX)
            offset = ROLLSUM_BLOB_MAX;
        }
      else
        offset = MIN(ROLLSUM_BLOB_MAX, remaining);

      /* Use zlib's crc32 */
      { guint32 crc = crc32 (0L, NULL, 0);
        GVariant *val;
        GPtrArray *matches;

        crc = crc32 (crc, buf, offset);

        val = g_variant_ref_sink (g_variant_new ("(utt)", crc, (guint64) start, (guint64)offset));
        matches = g_hash_table_lookup (ret_rollsums, GUINT_TO_POINTER (crc));
        if (!matches)
          {
            matches = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
            g_hash_table_insert (ret_rollsums, GUINT_TO_POINTER (crc), matches);
          }
        g_ptr_array_add (matches, val);
      }

      start += offset;
      remaining -= offset;
    }

  return ret_rollsums;
}

static gint
compare_matches (const void *app,
                 const void *bpp)
{
  GVariant **avpp = (GVariant**)app;
  GVariant *a = *avpp;
  GVariant **bvpp = (GVariant**)bpp;
  GVariant *b = *bvpp;
  guint64 a_start, b_start;
  
  g_variant_get_child (a, 2, "t", &a_start);
  g_variant_get_child (b, 2, "t", &b_start);

  g_assert_cmpint (a_start, !=, b_start);

  if (a_start < b_start)
    return -1;
  return 1;
}

OstreeRollsumMatches *
_ostree_compute_rollsum_matches (GBytes                           *from,
                                 GBytes                           *to)
{
  OstreeRollsumMatches *ret_rollsum = NULL;
  g_autoptr(GHashTable) from_rollsum = NULL;
  g_autoptr(GHashTable) to_rollsum = NULL;
  g_autoptr(GPtrArray) matches = NULL;
  const guint8 *from_buf;
  gsize from_len;
  const guint8 *to_buf;
  gsize to_len;
  gpointer hkey, hvalue;
  GHashTableIter hiter;

  ret_rollsum = g_new0 (OstreeRollsumMatches, 1);

  matches = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);

  from_buf = g_bytes_get_data (from, &from_len);
  to_buf = g_bytes_get_data (to, &to_len);

  from_rollsum = rollsum_chunks_crc32 (from);
  to_rollsum = rollsum_chunks_crc32 (to);

  g_hash_table_iter_init (&hiter, to_rollsum);
  while (g_hash_table_iter_next (&hiter, &hkey, &hvalue))
    {
      GPtrArray *to_chunks = hvalue;
      GPtrArray *from_chunks;

      from_chunks = g_hash_table_lookup (from_rollsum, hkey);
      if (from_chunks != NULL)
        {
          guint i;

          ret_rollsum->crcmatches++;

          for (i = 0; i < to_chunks->len; i++)
            {
              GVariant *to_chunk = to_chunks->pdata[i];
              guint64 to_start, to_offset;
              guint32 tocrc;
              guint j;

              g_variant_get (to_chunk, "(utt)", &tocrc, &to_start, &to_offset);

              for (j = 0; j < from_chunks->len; j++)
                {
                  GVariant *from_chunk = from_chunks->pdata[j];
                  guint32 fromcrc;
                  guint64 from_start, from_offset;

                  g_variant_get (from_chunk, "(utt)", &fromcrc, &from_start, &from_offset);

                  g_assert (fromcrc == tocrc);

                  /* Same crc32 but different length, skip it.  */
                  if (to_offset != from_offset)
                    continue;
                  
                  /* Rsync uses a cryptographic checksum, but let's be
                   * very conservative here and just memcmp.
                   */
                  if (memcmp (from_buf + from_start, to_buf + to_start, to_offset) == 0)
                    {
                      GVariant *match = g_variant_new ("(uttt)", fromcrc, to_offset, to_start, from_start);
                      ret_rollsum->bufmatches++;
                      ret_rollsum->match_size += to_offset;
                      g_ptr_array_add (matches, g_variant_ref_sink (match));
                      break; /* Don't need any more matches */
                    } 
                }
            }
        }

      ret_rollsum->total += to_chunks->len;
    }

  g_ptr_array_sort (matches, compare_matches);

  ret_rollsum->from_rollsums = from_rollsum; from_rollsum = NULL;
  ret_rollsum->to_rollsums = to_rollsum; to_rollsum = NULL;
  ret_rollsum->matches = matches; matches = NULL;

  return ret_rollsum;
}

void
_ostree_rollsum_matches_free (OstreeRollsumMatches *rollsum)
{
  g_hash_table_unref (rollsum->to_rollsums);
  g_hash_table_unref (rollsum->from_rollsums);
  g_ptr_array_unref (rollsum->matches);
  g_free (rollsum);
}