/* -*- c -*- */
/*
* The purpose of this module is to add faster sort functions
* that are type-specific. This is done by altering the
* function table for the builtin descriptors.
*
* These sorting functions are copied almost directly from numarray
* with a few modifications (complex comparisons compare the imaginary
* part if the real parts are equal, for example), and the names
* are changed.
*
* The original sorting code is due to Charles R. Harris who wrote
* it for numarray.
*/
/*
* Quick sort is usually the fastest, but the worst case scenario can
* be slower than the merge and heap sorts. The merge sort requires
* extra memory and so for large arrays may not be useful.
*
* The merge sort is *stable*, meaning that equal components
* are unmoved from their entry versions, so it can be used to
* implement lexigraphic sorting on multiple keys.
*
* The heap sort is included for completeness.
*/
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#include "npy_sort.h"
#include "npysort_common.h"
#include <stdlib.h>
#define NOT_USED NPY_UNUSED(unused)
#define PYA_QS_STACK 100
#define SMALL_QUICKSORT 15
#define SMALL_MERGESORT 20
#define SMALL_STRING 16
/*
*****************************************************************************
** NUMERIC SORTS **
*****************************************************************************
*/
/**begin repeat
*
* #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG,
* LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE,
* CFLOAT, CDOUBLE, CLONGDOUBLE, DATETIME, TIMEDELTA#
* #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong,
* longlong, ulonglong, half, float, double, longdouble,
* cfloat, cdouble, clongdouble, datetime, timedelta#
* #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int,
* npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong,
* npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat,
* npy_cdouble, npy_clongdouble, npy_datetime, npy_timedelta#
*/
int
heapsort_@suff@(void *start, npy_intp n, void *NOT_USED)
{
@type@ tmp, *a;
npy_intp i,j,l;
/* The array needs to be offset by one for heapsort indexing */
a = (@type@ *)start - 1;
for (l = n>>1; l > 0; --l) {
tmp = a[l];
for (i = l, j = l<<1; j <= n;) {
if (j < n && @TYPE@_LT(a[j], a[j+1])) {
j += 1;
}
if (@TYPE@_LT(tmp, a[j])) {
a[i] = a[j];
i = j;
j += j;
}
else {
break;
}
}
a[i] = tmp;
}
for (; n > 1;) {
tmp = a[n];
a[n] = a[1];
n -= 1;
for (i = 1, j = 2; j <= n;) {
if (j < n && @TYPE@_LT(a[j], a[j+1])) {
j++;
}
if (@TYPE@_LT(tmp, a[j])) {
a[i] = a[j];
i = j;
j += j;
}
else {
break;
}
}
a[i] = tmp;
}
return 0;
}
int
aheapsort_@suff@(void *vv, npy_intp *tosort, npy_intp n, void *NOT_USED)
{
@type@ *v = vv;
npy_intp *a, i,j,l, tmp;
/* The arrays need to be offset by one for heapsort indexing */
a = tosort - 1;
for (l = n>>1; l > 0; --l) {
tmp = a[l];
for (i = l, j = l<<1; j <= n;) {
if (j < n && @TYPE@_LT(v[a[j]], v[a[j+1]])) {
j += 1;
}
if (@TYPE@_LT(v[tmp], v[a[j]])) {
a[i] = a[j];
i = j;
j += j;
}
else {
break;
}
}
a[i] = tmp;
}
for (; n > 1;) {
tmp = a[n];
a[n] = a[1];
n -= 1;
for (i = 1, j = 2; j <= n;) {
if (j < n && @TYPE@_LT(v[a[j]], v[a[j+1]])) {
j++;
}
if (@TYPE@_LT(v[tmp], v[a[j]])) {
a[i] = a[j];
i = j;
j += j;
}
else {
break;
}
}
a[i] = tmp;
}
return 0;
}
/**end repeat**/
/*
*****************************************************************************
** STRING SORTS **
*****************************************************************************
*/
/**begin repeat
*
* #TYPE = STRING, UNICODE#
* #suff = string, unicode#
* #type = npy_char, npy_ucs4#
*/
int
heapsort_@suff@(void *start, npy_intp n, void *varr)
{
PyArrayObject *arr = varr;
size_t len = PyArray_ITEMSIZE(arr)/sizeof(@type@);
@type@ *tmp = malloc(PyArray_ITEMSIZE(arr));
@type@ *a = (@type@ *)start - len;
npy_intp i, j, l;
if (tmp == NULL) {
return -NPY_ENOMEM;
}
for (l = n>>1; l > 0; --l) {
@TYPE@_COPY(tmp, a + l*len, len);
for (i = l, j = l<<1; j <= n;) {
if (j < n && @TYPE@_LT(a + j*len, a + (j+1)*len, len))
j += 1;
if (@TYPE@_LT(tmp, a + j*len, len)) {
@TYPE@_COPY(a + i*len, a + j*len, len);
i = j;
j += j;
}
else {
break;
}
}
@TYPE@_COPY(a + i*len, tmp, len);
}
for (; n > 1;) {
@TYPE@_COPY(tmp, a + n*len, len);
@TYPE@_COPY(a + n*len, a + len, len);
n -= 1;
for (i = 1, j = 2; j <= n;) {
if (j < n && @TYPE@_LT(a + j*len, a + (j+1)*len, len))
j++;
if (@TYPE@_LT(tmp, a + j*len, len)) {
@TYPE@_COPY(a + i*len, a + j*len, len);
i = j;
j += j;
}
else {
break;
}
}
@TYPE@_COPY(a + i*len, tmp, len);
}
free(tmp);
return 0;
}
int
aheapsort_@suff@(void *vv, npy_intp *tosort, npy_intp n, void *varr)
{
@type@ *v = vv;
PyArrayObject *arr = varr;
size_t len = PyArray_ITEMSIZE(arr)/sizeof(@type@);
npy_intp *a, i,j,l, tmp;
/* The array needs to be offset by one for heapsort indexing */
a = tosort - 1;
for (l = n>>1; l > 0; --l) {
tmp = a[l];
for (i = l, j = l<<1; j <= n;) {
if (j < n && @TYPE@_LT(v + a[j]*len, v + a[j+1]*len, len))
j += 1;
if (@TYPE@_LT(v + tmp*len, v + a[j]*len, len)) {
a[i] = a[j];
i = j;
j += j;
}
else {
break;
}
}
a[i] = tmp;
}
for (; n > 1;) {
tmp = a[n];
a[n] = a[1];
n -= 1;
for (i = 1, j = 2; j <= n;) {
if (j < n && @TYPE@_LT(v + a[j]*len, v + a[j+1]*len, len))
j++;
if (@TYPE@_LT(v + tmp*len, v + a[j]*len, len)) {
a[i] = a[j];
i = j;
j += j;
}
else {
break;
}
}
a[i] = tmp;
}
return 0;
}
/**end repeat**/
/*
*****************************************************************************
** GENERIC SORT **
*****************************************************************************
*/
int
npy_heapsort(void *start, npy_intp num, void *varr)
{
PyArrayObject *arr = varr;
npy_intp elsize = PyArray_ITEMSIZE(arr);
PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare;
char *tmp = malloc(elsize);
char *a = (char *)start - elsize;
npy_intp i, j, l;
if (tmp == NULL) {
return -NPY_ENOMEM;
}
for (l = num >> 1; l > 0; --l) {
GENERIC_COPY(tmp, a + l*elsize, elsize);
for (i = l, j = l << 1; j <= num;) {
if (j < num && cmp(a + j*elsize, a + (j+1)*elsize, arr) < 0) {
++j;
}
if (cmp(tmp, a + j*elsize, arr) < 0) {
GENERIC_COPY(a + i*elsize, a + j*elsize, elsize);
i = j;
j += j;
}
else {
break;
}
}
GENERIC_COPY(a + i*elsize, tmp, elsize);
}
for (; num > 1;) {
GENERIC_COPY(tmp, a + num*elsize, elsize);
GENERIC_COPY(a + num*elsize, a + elsize, elsize);
num -= 1;
for (i = 1, j = 2; j <= num;) {
if (j < num && cmp(a + j*elsize, a + (j+1)*elsize, arr) < 0) {
++j;
}
if (cmp(tmp, a + j*elsize, arr) < 0) {
GENERIC_COPY(a + i*elsize, a + j*elsize, elsize);
i = j;
j += j;
}
else {
break;
}
}
GENERIC_COPY(a + i*elsize, tmp, elsize);
}
free(tmp);
return 0;
}
int
npy_aheapsort(void *vv, npy_intp *tosort, npy_intp n, void *varr)
{
char *v = vv;
PyArrayObject *arr = varr;
npy_intp elsize = PyArray_ITEMSIZE(arr);
PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare;
npy_intp *a, i, j, l, tmp;
/* The array needs to be offset by one for heapsort indexing */
a = tosort - 1;
for (l = n >> 1; l > 0; --l) {
tmp = a[l];
for (i = l, j = l<<1; j <= n;) {
if (j < n && cmp(v + a[j]*elsize, v + a[j+1]*elsize, arr) < 0) {
++j;
}
if (cmp(v + tmp*elsize, v + a[j]*elsize, arr) < 0) {
a[i] = a[j];
i = j;
j += j;
}
else {
break;
}
}
a[i] = tmp;
}
for (; n > 1;) {
tmp = a[n];
a[n] = a[1];
n -= 1;
for (i = 1, j = 2; j <= n;) {
if (j < n && cmp(v + a[j]*elsize, v + a[j+1]*elsize, arr) < 0) {
++j;
}
if (cmp(v + tmp*elsize, v + a[j]*elsize, arr) < 0) {
a[i] = a[j];
i = j;
j += j;
}
else {
break;
}
}
a[i] = tmp;
}
return 0;
}