Blame lib/paranoia/isort.c

Packit cb6d3d
/*
Packit cb6d3d
  Copyright (C) 2004, 2005, 2008, 2011 Rocky Bernstein <rocky@gnu.org>
Packit cb6d3d
  Copyright (C) 1998 Monty xiphmont@mit.edu
Packit cb6d3d
Packit cb6d3d
  This program is free software: you can redistribute it and/or modify
Packit cb6d3d
  it under the terms of the GNU General Public License as published by
Packit cb6d3d
  the Free Software Foundation, either version 3 of the License, or
Packit cb6d3d
  (at your option) any later version.
Packit cb6d3d
Packit cb6d3d
  This program is distributed in the hope that it will be useful,
Packit cb6d3d
  but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit cb6d3d
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit cb6d3d
  GNU General Public License for more details.
Packit cb6d3d
Packit cb6d3d
  You should have received a copy of the GNU General Public License
Packit cb6d3d
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
Packit cb6d3d
*/
Packit cb6d3d
Packit cb6d3d
/* sorted vector abstraction for paranoia */
Packit cb6d3d
Packit cb6d3d
/* Old isort got a bit complex.  This re-constrains complexity to
Packit cb6d3d
   give a go at speed through a more alpha-6-like mechanism. */
Packit cb6d3d
Packit cb6d3d
Packit cb6d3d
/* "Sort" is a bit of a misnomer in this implementation.  It's actually
Packit cb6d3d
 * basically a hash table of sample values (with a linked-list collision
Packit cb6d3d
 * resolution), which lets you quickly determine where in a vector a
Packit cb6d3d
 * particular sample value occurs.
Packit cb6d3d
 *
Packit cb6d3d
 * Collisions aren't due to hash collisions, as the table has one bucket
Packit cb6d3d
 * for each possible sample value.  Instead, the "collisions" represent
Packit cb6d3d
 * multiple occurrences of a given value.
Packit cb6d3d
 */
Packit cb6d3d
Packit cb6d3d
Packit cb6d3d
#ifdef HAVE_CONFIG_H
Packit cb6d3d
# include "config.h"
Packit cb6d3d
# define __CDIO_CONFIG_H__ 1
Packit cb6d3d
#endif
Packit cb6d3d
Packit cb6d3d
#ifdef HAVE_STDLIB_H
Packit cb6d3d
#include <stdlib.h>
Packit cb6d3d
#endif
Packit cb6d3d
#ifdef HAVE_STRING_H
Packit cb6d3d
#include <string.h>
Packit cb6d3d
#endif
Packit cb6d3d
#include "p_block.h"
Packit cb6d3d
#include "isort.h"
Packit cb6d3d
Packit cb6d3d
Packit cb6d3d
/* ===========================================================================
Packit cb6d3d
 * sort_alloc()
Packit cb6d3d
 *
Packit cb6d3d
 * Allocates and initializes a new, empty sort_info object, which can be
Packit cb6d3d
 * used to index up to (size) samples from a vector.
Packit cb6d3d
 */
Packit cb6d3d
Packit cb6d3d
sort_info_t *
Packit cb6d3d
sort_alloc(long size)
Packit cb6d3d
{
Packit cb6d3d
  sort_info_t *ret=calloc(1, sizeof(sort_info_t));
Packit cb6d3d
Packit cb6d3d
  ret->vector=NULL;
Packit cb6d3d
  ret->sortbegin=-1;
Packit cb6d3d
  ret->size=-1;
Packit cb6d3d
  ret->maxsize=size;
Packit cb6d3d
Packit cb6d3d
  ret->head=calloc(65536,sizeof(sort_link_t *));
Packit cb6d3d
  ret->bucketusage=calloc(1, 65536*sizeof(long));
Packit cb6d3d
  ret->revindex=calloc(size,sizeof(sort_link_t));
Packit cb6d3d
  ret->lastbucket=0;
Packit cb6d3d
Packit cb6d3d
  return(ret);
Packit cb6d3d
}
Packit cb6d3d
Packit cb6d3d
Packit cb6d3d
/* ===========================================================================
Packit cb6d3d
 * sort_unsortall() (internal)
Packit cb6d3d
 *
Packit cb6d3d
 * This function resets the index for further use with a different vector
Packit cb6d3d
 * or range, without the overhead of an unnecessary free/alloc.
Packit cb6d3d
 */
Packit cb6d3d
Packit cb6d3d
void 
Packit cb6d3d
sort_unsortall(sort_info_t *i)
Packit cb6d3d
{
Packit cb6d3d
  /* If there were few enough different samples encountered (and hence few
Packit cb6d3d
   * enough buckets used), we can just zero out those buckets.  If there
Packit cb6d3d
   * were many (2000 is picked somewhat arbitrarily), it's faster simply to
Packit cb6d3d
   * zero out all buckets with a memset() rather than walking the data
Packit cb6d3d
   * structure and zeroing them out one by one.
Packit cb6d3d
   */
Packit cb6d3d
  if (i->lastbucket>2000) { /* a guess */
Packit cb6d3d
    memset(i->head,0,65536*sizeof(sort_link_t *));
Packit cb6d3d
  } else {
Packit cb6d3d
    long b;
Packit cb6d3d
    for (b=0; b<i->lastbucket; b++)
Packit cb6d3d
      i->head[i->bucketusage[b]]=NULL;
Packit cb6d3d
  }
Packit cb6d3d
Packit cb6d3d
  i->lastbucket=0;
Packit cb6d3d
  i->sortbegin=-1;
Packit cb6d3d
Packit cb6d3d
  /* Curiously, this function preserves the vector association created
Packit cb6d3d
   * by sort_setup(), but it is used only internally by sort_setup, so
Packit cb6d3d
   * preserving this association is unnecessary.
Packit cb6d3d
   */
Packit cb6d3d
}
Packit cb6d3d
Packit cb6d3d
Packit cb6d3d
/* ===========================================================================
Packit cb6d3d
 * sort_free()
Packit cb6d3d
 *
Packit cb6d3d
 * Releases all memory consumed by a sort_info object.
Packit cb6d3d
 */
Packit cb6d3d
Packit cb6d3d
void 
Packit cb6d3d
sort_free(sort_info_t *i)
Packit cb6d3d
{
Packit cb6d3d
  free(i->revindex);
Packit cb6d3d
  free(i->head);
Packit cb6d3d
  free(i->bucketusage);
Packit cb6d3d
  free(i);
Packit cb6d3d
}
Packit cb6d3d
Packit cb6d3d
Packit cb6d3d
/* ===========================================================================
Packit cb6d3d
 * sort_sort() (internal)
Packit cb6d3d
 *
Packit cb6d3d
 * This function builds the index to allow for fast searching for sample
Packit cb6d3d
 * values within a portion (sortlo - sorthi) of the object's associated
Packit cb6d3d
 * vector.  It is called internally and only when needed.
Packit cb6d3d
 */
Packit cb6d3d
Packit cb6d3d
static void 
Packit cb6d3d
sort_sort(sort_info_t *i,long sortlo,long sorthi)
Packit cb6d3d
{
Packit cb6d3d
  long j;
Packit cb6d3d
Packit cb6d3d
  /* We walk backward through the range to index because we insert new
Packit cb6d3d
   * samples at the head of each bucket's list.  At the end, they'll be
Packit cb6d3d
   * sorted from first to last occurrence.
Packit cb6d3d
   */
Packit cb6d3d
  for (j=sorthi-1; j>=sortlo; j--) {
Packit cb6d3d
    /* i->vector[j] = the signed 16-bit sample to index.
Packit cb6d3d
     * hv           = pointer to the head of the sorted list of occurences
Packit cb6d3d
     *                of this sample
Packit cb6d3d
     * l            = the node to associate with this sample
Packit cb6d3d
     *
Packit cb6d3d
     * We add 32768 to convert the signed 16-bit integer to an unsigned
Packit cb6d3d
     * range from 0 to 65535.
Packit cb6d3d
     *
Packit cb6d3d
     * Note that l is located within i->revindex at a position
Packit cb6d3d
     * corresponding to the sample's position in the vector.  This allows
Packit cb6d3d
     * ipos() to determine the sample position from a returned sort_link.
Packit cb6d3d
     */
Packit cb6d3d
    sort_link_t **hv = i->head+i->vector[j]+32768;
Packit cb6d3d
    sort_link_t *l   = i->revindex+j;
Packit cb6d3d
Packit cb6d3d
    /* If this is the first time we've encountered this sample, add its
Packit cb6d3d
     * bucket to the list of buckets used.  This list is used only for
Packit cb6d3d
     * resetting the index quickly.
Packit cb6d3d
     */
Packit cb6d3d
    if(*hv==NULL){
Packit cb6d3d
      i->bucketusage[i->lastbucket] = i->vector[j]+32768;
Packit cb6d3d
      i->lastbucket++;
Packit cb6d3d
    }
Packit cb6d3d
Packit cb6d3d
    /* Point the new node at the old head, then assign the new node as
Packit cb6d3d
     * the new head.
Packit cb6d3d
     */
Packit cb6d3d
    l->next=*hv;    
Packit cb6d3d
    *hv=l;
Packit cb6d3d
  }
Packit cb6d3d
Packit cb6d3d
  /* Mark the index as initialized.
Packit cb6d3d
   */
Packit cb6d3d
  i->sortbegin=0;
Packit cb6d3d
}
Packit cb6d3d
Packit cb6d3d
Packit cb6d3d
/* ===========================================================================
Packit cb6d3d
 * sort_setup()
Packit cb6d3d
 *
Packit cb6d3d
 * This function initializes a previously allocated sort_info_t.  The
Packit cb6d3d
 * sort_info_t is associated with a vector of samples of length
Packit cb6d3d
 * (size), whose position begins at (*abspos) within the CD's stream
Packit cb6d3d
 * of samples.  Only the range of samples between (sortlo, sorthi)
Packit cb6d3d
 * will eventually be indexed for fast searching.  (sortlo, sorthi)
Packit cb6d3d
 * are absolute sample positions.
Packit cb6d3d
 *
Packit cb6d3d
 * ???: Why is abspos a pointer?  Why not just store a copy?
Packit cb6d3d
 *
Packit cb6d3d
 * Note: size *must* be <= the size given to the preceding sort_alloc(),
Packit cb6d3d
 * but no error checking is done here.
Packit cb6d3d
 */
Packit cb6d3d
Packit cb6d3d
void 
Packit cb6d3d
sort_setup(sort_info_t *i, int16_t *vector, long int *abspos, 
Packit cb6d3d
	   long int size, long int sortlo, long int sorthi)
Packit cb6d3d
{
Packit cb6d3d
  /* Reset the index if it has already been built.
Packit cb6d3d
   */
Packit cb6d3d
  if (i->sortbegin!=-1)
Packit cb6d3d
    sort_unsortall(i);
Packit cb6d3d
Packit cb6d3d
  i->vector=vector;
Packit cb6d3d
  i->size=size;
Packit cb6d3d
  i->abspos=abspos;
Packit cb6d3d
Packit cb6d3d
  /* Convert the absolute (sortlo, sorthi) to offsets within the vector.
Packit cb6d3d
   * Note that the index will not be built until sort_getmatch() is called.
Packit cb6d3d
   * Here we're simply hanging on to the range to index until then.
Packit cb6d3d
   */
Packit cb6d3d
  i->lo = min(size, max(sortlo - *abspos, 0));
Packit cb6d3d
  i->hi = max(0, min(sorthi - *abspos, size));
Packit cb6d3d
}
Packit cb6d3d
Packit cb6d3d
/* ===========================================================================
Packit cb6d3d
 * sort_getmatch()
Packit cb6d3d
 *
Packit cb6d3d
 * This function returns a sort_link_t pointer which refers to the
Packit cb6d3d
 * first sample equal to (value) in the vector.  It only searches for
Packit cb6d3d
 * hits within (overlap) samples of (post), where (post) is an offset
Packit cb6d3d
 * within the vector.  The caller can determine the position of the
Packit cb6d3d
 * matched sample using ipos(sort_info *, sort_link *).
Packit cb6d3d
 *
Packit cb6d3d
 * This function returns NULL if no matches were found.
Packit cb6d3d
 */
Packit cb6d3d
Packit cb6d3d
sort_link_t *
Packit cb6d3d
sort_getmatch(sort_info_t *i, long post, long overlap, int value)
Packit cb6d3d
{
Packit cb6d3d
  sort_link_t *ret;
Packit cb6d3d
Packit cb6d3d
  /* If the vector hasn't been indexed yet, index it now.
Packit cb6d3d
   */
Packit cb6d3d
  if (i->sortbegin==-1)
Packit cb6d3d
    sort_sort(i,i->lo,i->hi);
Packit cb6d3d
  /* Now we reuse lo and hi */
Packit cb6d3d
Packit cb6d3d
  /* We'll only return samples within (overlap) samples of (post).
Packit cb6d3d
   * Clamp the boundaries to search to the boundaries of the array,
Packit cb6d3d
   * convert the signed sample to an unsigned offset, and store the
Packit cb6d3d
   * state so that future calls to sort_nextmatch do the right thing.
Packit cb6d3d
   *
Packit cb6d3d
   * Reusing lo and hi this way is awful.
Packit cb6d3d
   */
Packit cb6d3d
  post=max(0,min(i->size,post));
Packit cb6d3d
  i->val=value+32768;
Packit cb6d3d
  i->lo=max(0,post-overlap);       /* absolute position */
Packit cb6d3d
  i->hi=min(i->size,post+overlap); /* absolute position */
Packit cb6d3d
Packit cb6d3d
  /* Walk through the linked list of samples with this value, until
Packit cb6d3d
   * we find the first one within the bounds specified.  If there
Packit cb6d3d
   * aren't any, return NULL.
Packit cb6d3d
   */
Packit cb6d3d
  ret=i->head[i->val];
Packit cb6d3d
Packit cb6d3d
  while (ret) {
Packit cb6d3d
    /* ipos() calculates the offset (in terms of the original vector)
Packit cb6d3d
     * of this hit.
Packit cb6d3d
     */
Packit cb6d3d
Packit cb6d3d
    if (ipos(i,ret)<i->lo) {
Packit cb6d3d
      ret=ret->next;
Packit cb6d3d
    } else {
Packit cb6d3d
      if (ipos(i,ret)>=i->hi)
Packit cb6d3d
	ret=NULL;
Packit cb6d3d
      break;
Packit cb6d3d
    }
Packit cb6d3d
  }
Packit cb6d3d
  /*i->head[i->val]=ret;*/
Packit cb6d3d
  return(ret);
Packit cb6d3d
}
Packit cb6d3d
Packit cb6d3d
Packit cb6d3d
/* ===========================================================================
Packit cb6d3d
 * sort_nextmatch()
Packit cb6d3d
 *
Packit cb6d3d
 * This function returns a sort_link_t pointer which refers to the next sample
Packit cb6d3d
 * matching the criteria previously passed to sort_getmatch().  See
Packit cb6d3d
 * sort_getmatch() for details.
Packit cb6d3d
 *
Packit cb6d3d
 * This function returns NULL if no further matches were found.
Packit cb6d3d
 */
Packit cb6d3d
Packit cb6d3d
sort_link_t *
Packit cb6d3d
sort_nextmatch(sort_info_t *i, sort_link_t *prev)
Packit cb6d3d
{
Packit cb6d3d
  sort_link_t *ret=prev->next;
Packit cb6d3d
Packit cb6d3d
  /* If there aren't any more hits, or we've passed the boundary requested
Packit cb6d3d
   * of sort_getmatch(), we're done.
Packit cb6d3d
   */
Packit cb6d3d
  if (!ret || ipos(i,ret)>=i->hi)
Packit cb6d3d
    return(NULL); 
Packit cb6d3d
Packit cb6d3d
  return(ret);
Packit cb6d3d
}
Packit cb6d3d