Blame source/uds/util/radixSort.c

Packit Service 310c69
/*
Packit Service 310c69
 * Copyright (c) 2020 Red Hat, Inc.
Packit Service 310c69
 *
Packit Service 310c69
 * This program is free software; you can redistribute it and/or
Packit Service 310c69
 * modify it under the terms of the GNU General Public License
Packit Service 310c69
 * as published by the Free Software Foundation; either version 2
Packit Service 310c69
 * of the License, or (at your option) any later version.
Packit Service 310c69
 * 
Packit Service 310c69
 * This program is distributed in the hope that it will be useful,
Packit Service 310c69
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 310c69
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit Service 310c69
 * GNU General Public License for more details.
Packit Service 310c69
 * 
Packit Service 310c69
 * You should have received a copy of the GNU General Public License
Packit Service 310c69
 * along with this program; if not, write to the Free Software
Packit Service 310c69
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
Packit Service 310c69
 * 02110-1301, USA. 
Packit Service 310c69
 *
Packit Service 310c69
 * $Id: //eng/uds-releases/jasper/src/uds/util/radixSort.c#2 $
Packit Service 310c69
 */
Packit Service 310c69
Packit Service 310c69
/*
Packit Service 310c69
 * Radix sort is implemented using an American Flag sort, an unstable,
Packit Service 310c69
 * in-place 8-bit radix exchange sort.
Packit Service 310c69
 *
Packit Service 310c69
 * Adapted from the algorithm in the paper by Peter M. McIlroy, Keith Bostic,
Packit Service 310c69
 * and M. Douglas McIlroy, "Engineering Radix Sort".
Packit Service 310c69
 * http://www.usenix.org/publications/compsystems/1993/win_mcilroy.pdf
Packit Service 310c69
 */
Packit Service 310c69
Packit Service 310c69
#include "radixSort.h"
Packit Service 310c69
Packit Service 310c69
#include "compiler.h"
Packit Service 310c69
#include "memoryAlloc.h"
Packit Service 310c69
#include "stringUtils.h"
Packit Service 310c69
#include "typeDefs.h"
Packit Service 310c69
#include "uds.h"
Packit Service 310c69
Packit Service 310c69
enum {
Packit Service 310c69
  // Piles smaller than this are handled with a simple insertion sort.
Packit Service 310c69
  INSERTION_SORT_THRESHOLD  = 12
Packit Service 310c69
};
Packit Service 310c69
Packit Service 310c69
// Sort keys are pointers to immutable fixed-length arrays of bytes.
Packit Service 310c69
typedef const uint8_t * Key;
Packit Service 310c69
Packit Service 310c69
/**
Packit Service 310c69
 * The keys are separated into piles based on the byte in each
Packit Service 310c69
 * keys at the current offset, so the number of keys with each
Packit Service 310c69
 * byte must be counted.
Packit Service 310c69
 **/
Packit Service 310c69
typedef struct {
Packit Service 310c69
  uint16_t used;       // number of non-empty bins
Packit Service 310c69
  uint16_t first;      // index (key byte)  of the first non-empty bin
Packit Service 310c69
  uint16_t last;       // index (key byte) of the last non-empty bin
Packit Service 310c69
  uint32_t size[256];  // size[byte] == # of occurrences of byte
Packit Service 310c69
} Histogram;
Packit Service 310c69
Packit Service 310c69
/**
Packit Service 310c69
 * Sub-tasks are manually managed on a stack, both for performance
Packit Service 310c69
 * and to put a logarithmic bound on the stack space needed.
Packit Service 310c69
 **/
Packit Service 310c69
typedef struct {
Packit Service 310c69
  Key      *firstKey;  // Pointers to first and last keys to sort, inclusive.
Packit Service 310c69
  Key      *lastKey;
Packit Service 310c69
  uint16_t  offset;    // The offset into the key at which to continue sorting.
Packit Service 310c69
  uint16_t  length;    // The number of bytes remaining in the sort keys.
Packit Service 310c69
} Task;
Packit Service 310c69
Packit Service 310c69
struct radixSorter {
Packit Service 310c69
  unsigned int  count;
Packit Service 310c69
  Histogram     bins;
Packit Service 310c69
  Key          *pile[256];
Packit Service 310c69
  Task         *endOfStack;
Packit Service 310c69
  Task          isList[256];
Packit Service 310c69
  Task          stack[];
Packit Service 310c69
};
Packit Service 310c69
Packit Service 310c69
/**
Packit Service 310c69
 * Compare a segment of two fixed-length keys starting an offset.
Packit Service 310c69
 *
Packit Service 310c69
 * @param key1    the first key
Packit Service 310c69
 * @param key2    the second key
Packit Service 310c69
 * @param offset  the offset into the keys of the first byte to compare
Packit Service 310c69
 * @param length  the number of bytes remaining in each key
Packit Service 310c69
 **/
Packit Service 310c69
static INLINE int compare(Key key1, Key key2, uint16_t offset, uint16_t length)
Packit Service 310c69
{
Packit Service 310c69
  return memcmp(&key1[offset], &key2[offset], length);
Packit Service 310c69
}
Packit Service 310c69
Packit Service 310c69
/**
Packit Service 310c69
 * Insert the next unsorted key into an array of sorted keys.
Packit Service 310c69
 *
Packit Service 310c69
 * @param task  the description of the keys being sorted
Packit Service 310c69
 * @param next  the pointer to the unsorted key to insert into
Packit Service 310c69
 *              the array of sorted key pointers preceding it
Packit Service 310c69
 **/
Packit Service 310c69
static INLINE void insertKey(const Task task, Key *next)
Packit Service 310c69
{
Packit Service 310c69
  // Pull the unsorted key out, freeing up the array slot.
Packit Service 310c69
  Key unsorted = *next;
Packit Service 310c69
  // Compare the key to the preceding sorted entries, shifting
Packit Service 310c69
  // down the ones that are larger.
Packit Service 310c69
  while ((--next >= task.firstKey)
Packit Service 310c69
         && (compare(unsorted, next[0], task.offset, task.length) < 0)) {
Packit Service 310c69
    next[1] = next[0];
Packit Service 310c69
  }
Packit Service 310c69
  // Insert the key into the last slot that was cleared, sorting it.
Packit Service 310c69
  next[1] = unsorted;
Packit Service 310c69
}
Packit Service 310c69
Packit Service 310c69
/**
Packit Service 310c69
 * Sort a range of key segments using an insertion sort. This simple sort is
Packit Service 310c69
 * faster than the 256-way radix sort when the number of keys to sort is
Packit Service 310c69
 * small.
Packit Service 310c69
 *
Packit Service 310c69
 * @param task  the description of the keys to sort
Packit Service 310c69
 **/
Packit Service 310c69
static INLINE void insertionSort(const Task task)
Packit Service 310c69
{
Packit Service 310c69
  // (firstKey .. firstKey) is trivially sorted. Repeatedly insert the next
Packit Service 310c69
  // key into the sorted list of keys preceding it, and voila!
Packit Service 310c69
  Key *next;
Packit Service 310c69
  for (next = task.firstKey + 1; next <= task.lastKey; next++) {
Packit Service 310c69
    insertKey(task, next);
Packit Service 310c69
  }
Packit Service 310c69
}
Packit Service 310c69
Packit Service 310c69
/**
Packit Service 310c69
 * Push a sorting task onto the task stack, increasing the stack pointer.
Packit Service 310c69
 **/
Packit Service 310c69
static INLINE void pushTask(Task    **stackPointer,
Packit Service 310c69
                            Key      *firstKey,
Packit Service 310c69
                            uint32_t  count,
Packit Service 310c69
                            uint16_t  offset,
Packit Service 310c69
                            uint16_t  length)
Packit Service 310c69
{
Packit Service 310c69
  Task *task = (*stackPointer)++;
Packit Service 310c69
  task->firstKey = firstKey;
Packit Service 310c69
  task->lastKey  = &firstKey[count - 1];
Packit Service 310c69
  task->offset   = offset;
Packit Service 310c69
  task->length   = length;
Packit Service 310c69
}
Packit Service 310c69
Packit Service 310c69
/**********************************************************************/
Packit Service 310c69
static INLINE void swapKeys(Key *a, Key *b)
Packit Service 310c69
{
Packit Service 310c69
  Key c = *a;
Packit Service 310c69
  *a = *b;
Packit Service 310c69
  *b = c;
Packit Service 310c69
}
Packit Service 310c69
Packit Service 310c69
/**
Packit Service 310c69
 * Count the number of times each byte value appears in in the arrays of keys
Packit Service 310c69
 * to sort at the current offset, keeping track of the number of non-empty
Packit Service 310c69
 * bins, and the index of the first and last non-empty bin.
Packit Service 310c69
 *
Packit Service 310c69
 * @param task  the description of the keys to sort
Packit Service 310c69
 * @param bins  the histogram bins receiving the counts
Packit Service 310c69
 **/
Packit Service 310c69
static INLINE void measureBins(const Task task, Histogram *bins)
Packit Service 310c69
{
Packit Service 310c69
  // Set bogus values that will will be replaced by min and max, respectively.
Packit Service 310c69
  bins->first = UINT8_MAX;
Packit Service 310c69
  bins->last = 0;
Packit Service 310c69
Packit Service 310c69
  // Subtle invariant: bins->used and bins->size[] are zero because the
Packit Service 310c69
  // sorting code clears it all out as it goes. Even though this structure is
Packit Service 310c69
  // re-used, we don't need to pay to zero it before starting a new tally.
Packit Service 310c69
Packit Service 310c69
  Key *keyPtr;
Packit Service 310c69
  for (keyPtr = task.firstKey; keyPtr <= task.lastKey; keyPtr++) {
Packit Service 310c69
    // Increment the count for the byte in the key at the current offset.
Packit Service 310c69
    uint8_t bin = (*keyPtr)[task.offset];
Packit Service 310c69
    uint32_t size = ++bins->size[bin];
Packit Service 310c69
Packit Service 310c69
    // Track non-empty bins when the count transitions from zero to one.
Packit Service 310c69
    if (size == 1) {
Packit Service 310c69
      bins->used += 1;
Packit Service 310c69
      if (bin < bins->first) {
Packit Service 310c69
        bins->first = bin;
Packit Service 310c69
      }
Packit Service 310c69
      if (bin > bins->last) {
Packit Service 310c69
        bins->last = bin;
Packit Service 310c69
      }
Packit Service 310c69
    }
Packit Service 310c69
  }
Packit Service 310c69
}
Packit Service 310c69
Packit Service 310c69
/**
Packit Service 310c69
 * Convert the bin sizes to pointers to where each pile goes.
Packit Service 310c69
 *
Packit Service 310c69
 *   pile[0] = firstKey + bin->size[0],
Packit Service 310c69
 *   pile[1] = pile[0]  + bin->size[1], etc.
Packit Service 310c69
 *
Packit Service 310c69
 * After the keys are moved to the appropriate pile, we'll need to sort
Packit Service 310c69
 * each of the piles by the next radix position.  A new task is put on the
Packit Service 310c69
 * stack for each pile containing lots of keys, or a new task is is put on
Packit Service 310c69
 * the list for each pile containing few keys.
Packit Service 310c69
 *
Packit Service 310c69
 * @param stack      pointer the top of the stack
Packit Service 310c69
 * @param endOfStack the end of the stack
Packit Service 310c69
 * @param list       pointer the head of the list
Packit Service 310c69
 * @param pile       array that will be filled pointers to the end of each pile
Packit Service 310c69
 * @param bins       the histogram of the sizes of each pile
Packit Service 310c69
 * @param firstKey   the first key of the stack
Packit Service 310c69
 * @param offset     the next radix position to sort by
Packit Service 310c69
 * @param length     the number of bytes remaining in the sort keys
Packit Service 310c69
 *
Packit Service 310c69
 * @return UDS_SUCCESS or an error code
Packit Service 310c69
 **/
Packit Service 310c69
static INLINE int pushBins(Task       **stack,
Packit Service 310c69
                           Task       *endOfStack,
Packit Service 310c69
                           Task       **list,
Packit Service 310c69
                           Key        *pile[],
Packit Service 310c69
                           Histogram  *bins,
Packit Service 310c69
                           Key        *firstKey,
Packit Service 310c69
                           uint16_t    offset,
Packit Service 310c69
                           uint16_t    length)
Packit Service 310c69
{
Packit Service 310c69
  Key *pileStart = firstKey;
Packit Service 310c69
  int bin;
Packit Service 310c69
  for (bin = bins->first; ; bin++) {
Packit Service 310c69
    uint32_t size = bins->size[bin];
Packit Service 310c69
    // Skip empty piles.
Packit Service 310c69
    if (size == 0) {
Packit Service 310c69
      continue;
Packit Service 310c69
    }
Packit Service 310c69
    // There's no need to sort empty keys.
Packit Service 310c69
    if (length > 0) {
Packit Service 310c69
      if (size > INSERTION_SORT_THRESHOLD) {
Packit Service 310c69
        if (*stack >= endOfStack) {
Packit Service 310c69
          return UDS_BAD_STATE;
Packit Service 310c69
        }
Packit Service 310c69
        pushTask(stack, pileStart, size, offset, length);
Packit Service 310c69
      } else if (size > 1) {
Packit Service 310c69
        pushTask(list, pileStart, size, offset, length);
Packit Service 310c69
      }
Packit Service 310c69
    }
Packit Service 310c69
    pileStart += size;
Packit Service 310c69
    pile[bin] = pileStart;
Packit Service 310c69
    if (--bins->used == 0) {
Packit Service 310c69
      break;
Packit Service 310c69
    }
Packit Service 310c69
  }
Packit Service 310c69
  return UDS_SUCCESS;
Packit Service 310c69
}
Packit Service 310c69
Packit Service 310c69
/**********************************************************************/
Packit Service 310c69
int makeRadixSorter(unsigned int count, RadixSorter **sorter)
Packit Service 310c69
{
Packit Service 310c69
  unsigned int stackSize = count / INSERTION_SORT_THRESHOLD;
Packit Service 310c69
  RadixSorter *radixSorter;
Packit Service 310c69
  int result = ALLOCATE_EXTENDED(RadixSorter, stackSize, Task, __func__,
Packit Service 310c69
                                 &radixSorter);
Packit Service 310c69
  if (result != UDS_SUCCESS) {
Packit Service 310c69
    return result;
Packit Service 310c69
  }
Packit Service 310c69
  radixSorter->count = count;
Packit Service 310c69
  radixSorter->endOfStack = radixSorter->stack + stackSize;
Packit Service 310c69
  *sorter = radixSorter;
Packit Service 310c69
  return UDS_SUCCESS;
Packit Service 310c69
}
Packit Service 310c69
Packit Service 310c69
/**********************************************************************/
Packit Service 310c69
void freeRadixSorter(RadixSorter *sorter)
Packit Service 310c69
{
Packit Service 310c69
  FREE(sorter);
Packit Service 310c69
}
Packit Service 310c69
Packit Service 310c69
/**********************************************************************/
Packit Service 310c69
int radixSort(RadixSorter         *sorter,
Packit Service 310c69
              const unsigned char *keys[],
Packit Service 310c69
              unsigned int         count,
Packit Service 310c69
              unsigned short       length)
Packit Service 310c69
{
Packit Service 310c69
  // All zero-length keys are identical and therefore already sorted.
Packit Service 310c69
  if ((count == 0) || (length == 0)) {
Packit Service 310c69
    return UDS_SUCCESS;
Packit Service 310c69
  }
Packit Service 310c69
Packit Service 310c69
  // The initial task is to sort the entire length of all the keys.
Packit Service 310c69
  Task start = {
Packit Service 310c69
    .firstKey = keys,
Packit Service 310c69
    .lastKey  = &keys[count - 1],
Packit Service 310c69
    .offset   = 0,
Packit Service 310c69
    .length   = length,
Packit Service 310c69
  };
Packit Service 310c69
Packit Service 310c69
  if (count <= INSERTION_SORT_THRESHOLD) {
Packit Service 310c69
    insertionSort(start);
Packit Service 310c69
    return UDS_SUCCESS;
Packit Service 310c69
  }
Packit Service 310c69
Packit Service 310c69
  if (count > sorter->count) {
Packit Service 310c69
    return UDS_INVALID_ARGUMENT;
Packit Service 310c69
  }
Packit Service 310c69
Packit Service 310c69
  Histogram  *bins  = &sorter->bins;
Packit Service 310c69
  Key       **pile  = sorter->pile;
Packit Service 310c69
  Task       *sp    = sorter->stack;
Packit Service 310c69
Packit Service 310c69
  /*
Packit Service 310c69
   * Repeatedly consume a sorting task from the stack and process it, pushing
Packit Service 310c69
   * new sub-tasks onto to the stack for each radix-sorted pile. When all
Packit Service 310c69
   * tasks and sub-tasks have been processed, the stack will be empty and all
Packit Service 310c69
   * the keys in the starting task will be fully sorted.
Packit Service 310c69
   */
Packit Service 310c69
  for (*sp = start; sp >= sorter->stack; sp--) {
Packit Service 310c69
    const Task task = *sp;
Packit Service 310c69
    measureBins(task, bins);
Packit Service 310c69
Packit Service 310c69
    // Now that we know how large each bin is, generate pointers for each of
Packit Service 310c69
    // the piles and push a new task to sort each pile by the next radix byte.
Packit Service 310c69
    Task *lp = sorter->isList;
Packit Service 310c69
    int result = pushBins(&sp, sorter->endOfStack, &lp, pile, bins,
Packit Service 310c69
                          task.firstKey, task.offset + 1, task.length - 1);
Packit Service 310c69
    if (result != UDS_SUCCESS) {
Packit Service 310c69
      memset(bins, 0, sizeof(*bins));
Packit Service 310c69
      return result;
Packit Service 310c69
    }
Packit Service 310c69
    // Now bins->used is zero again.
Packit Service 310c69
Packit Service 310c69
    // Don't bother processing the last pile--when piles 0..N-1 are all in
Packit Service 310c69
    // place, then pile N must also be in place.
Packit Service 310c69
    Key *end = task.lastKey - bins->size[bins->last];
Packit Service 310c69
    bins->size[bins->last] = 0;
Packit Service 310c69
Packit Service 310c69
    Key *fence;
Packit Service 310c69
    for (fence = task.firstKey; fence <= end; ) {
Packit Service 310c69
      uint8_t bin;
Packit Service 310c69
      Key key = *fence;
Packit Service 310c69
      // The radix byte of the key tells us which pile it belongs in. Swap it
Packit Service 310c69
      // for an unprocessed item just below that pile, and repeat.
Packit Service 310c69
      while (--pile[bin = key[task.offset]] > fence) {
Packit Service 310c69
        swapKeys(pile[bin], &key);
Packit Service 310c69
      }
Packit Service 310c69
      // The pile reached the fence. Put the key at the bottom of that pile.
Packit Service 310c69
      // completing it, and advance the fence to the next pile.
Packit Service 310c69
      *fence = key;
Packit Service 310c69
      fence += bins->size[bin];
Packit Service 310c69
      bins->size[bin] = 0;
Packit Service 310c69
    }
Packit Service 310c69
    // Now bins->size[] is all zero again.
Packit Service 310c69
Packit Service 310c69
    // When the number of keys in a task gets small enough, its faster to use
Packit Service 310c69
    // an insertion sort than to keep subdividing into tiny piles.
Packit Service 310c69
    while (--lp >= sorter->isList) {
Packit Service 310c69
      insertionSort(*lp);
Packit Service 310c69
    }
Packit Service 310c69
  }
Packit Service 310c69
  return UDS_SUCCESS;
Packit Service 310c69
}