/* -*- c -*- */
/*
* The purpose of this module is to add faster sort functions
* that are type-specific. This is done by altering the
* function table for the builtin descriptors.
*
* These sorting functions are copied almost directly from numarray
* with a few modifications (complex comparisons compare the imaginary
* part if the real parts are equal, for example), and the names
* are changed.
*
* The original sorting code is due to Charles R. Harris who wrote
* it for numarray.
*/
/*
* Quick sort is usually the fastest, but the worst case scenario is O(N^2) so
* the code switches to the O(NlogN) worst case heapsort if not enough progress
* is made on the large side of the two quicksort partitions. This improves the
* worst case while still retaining the speed of quicksort for the common case.
* This is variant known as introsort.
*
*
* def introsort(lower, higher, recursion_limit=log2(higher - lower + 1) * 2):
* # sort remainder with heapsort if we are not making enough progress
* # we arbitrarily choose 2 * log(n) as the cutoff point
* if recursion_limit < 0:
* heapsort(lower, higher)
* return
*
* if lower < higher:
* pivot_pos = partition(lower, higher)
* # recurse into smaller first and leave larger on stack
* # this limits the required stack space
* if (pivot_pos - lower > higher - pivot_pos):
* quicksort(pivot_pos + 1, higher, recursion_limit - 1)
* quicksort(lower, pivot_pos, recursion_limit - 1)
* else:
* quicksort(lower, pivot_pos, recursion_limit - 1)
* quicksort(pivot_pos + 1, higher, recursion_limit - 1)
*
*
* the below code implements this converted to an iteration and as an
* additional minor optimization skips the recursion depth checking on the
* smaller partition as it is always less than half of the remaining data and
* will thus terminate fast enough
*/
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#include "npy_sort.h"
#include "npysort_common.h"
#include <stdlib.h>
#define NOT_USED NPY_UNUSED(unused)
/*
* pushing largest partition has upper bound of log2(n) space
* we store two pointers each time
*/
#define PYA_QS_STACK (NPY_BITSOF_INTP * 2)
#define SMALL_QUICKSORT 15
#define SMALL_MERGESORT 20
#define SMALL_STRING 16
/*
*****************************************************************************
** NUMERIC SORTS **
*****************************************************************************
*/
/**begin repeat
*
* #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG,
* LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE,
* CFLOAT, CDOUBLE, CLONGDOUBLE, DATETIME, TIMEDELTA#
* #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong,
* longlong, ulonglong, half, float, double, longdouble,
* cfloat, cdouble, clongdouble, datetime, timedelta#
* #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int,
* npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong,
* npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat,
* npy_cdouble, npy_clongdouble, npy_datetime, npy_timedelta#
*/
int
quicksort_@suff@(void *start, npy_intp num, void *NOT_USED)
{
@type@ vp;
@type@ *pl = start;
@type@ *pr = pl + num - 1;
@type@ *stack[PYA_QS_STACK];
@type@ **sptr = stack;
@type@ *pm, *pi, *pj, *pk;
int depth[PYA_QS_STACK];
int * psdepth = depth;
int cdepth = npy_get_msb(num) * 2;
for (;;) {
if (NPY_UNLIKELY(cdepth < 0)) {
heapsort_@suff@(pl, pr - pl + 1, NULL);
goto stack_pop;
}
while ((pr - pl) > SMALL_QUICKSORT) {
/* quicksort partition */
pm = pl + ((pr - pl) >> 1);
if (@TYPE@_LT(*pm, *pl)) @TYPE@_SWAP(*pm, *pl);
if (@TYPE@_LT(*pr, *pm)) @TYPE@_SWAP(*pr, *pm);
if (@TYPE@_LT(*pm, *pl)) @TYPE@_SWAP(*pm, *pl);
vp = *pm;
pi = pl;
pj = pr - 1;
@TYPE@_SWAP(*pm, *pj);
for (;;) {
do ++pi; while (@TYPE@_LT(*pi, vp));
do --pj; while (@TYPE@_LT(vp, *pj));
if (pi >= pj) {
break;
}
@TYPE@_SWAP(*pi,*pj);
}
pk = pr - 1;
@TYPE@_SWAP(*pi, *pk);
/* push largest partition on stack */
if (pi - pl < pr - pi) {
*sptr++ = pi + 1;
*sptr++ = pr;
pr = pi - 1;
}
else {
*sptr++ = pl;
*sptr++ = pi - 1;
pl = pi + 1;
}
*psdepth++ = --cdepth;
}
/* insertion sort */
for (pi = pl + 1; pi <= pr; ++pi) {
vp = *pi;
pj = pi;
pk = pi - 1;
while (pj > pl && @TYPE@_LT(vp, *pk)) {
*pj-- = *pk--;
}
*pj = vp;
}
stack_pop:
if (sptr == stack) {
break;
}
pr = *(--sptr);
pl = *(--sptr);
cdepth = *(--psdepth);
}
return 0;
}
int
aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *NOT_USED)
{
@type@ *v = vv;
@type@ vp;
npy_intp *pl = tosort;
npy_intp *pr = tosort + num - 1;
npy_intp *stack[PYA_QS_STACK];
npy_intp **sptr = stack;
npy_intp *pm, *pi, *pj, *pk, vi;
int depth[PYA_QS_STACK];
int * psdepth = depth;
int cdepth = npy_get_msb(num) * 2;
for (;;) {
if (NPY_UNLIKELY(cdepth < 0)) {
aheapsort_@suff@(vv, pl, pr - pl + 1, NULL);
goto stack_pop;
}
while ((pr - pl) > SMALL_QUICKSORT) {
/* quicksort partition */
pm = pl + ((pr - pl) >> 1);
if (@TYPE@_LT(v[*pm],v[*pl])) INTP_SWAP(*pm, *pl);
if (@TYPE@_LT(v[*pr],v[*pm])) INTP_SWAP(*pr, *pm);
if (@TYPE@_LT(v[*pm],v[*pl])) INTP_SWAP(*pm, *pl);
vp = v[*pm];
pi = pl;
pj = pr - 1;
INTP_SWAP(*pm, *pj);
for (;;) {
do ++pi; while (@TYPE@_LT(v[*pi], vp));
do --pj; while (@TYPE@_LT(vp, v[*pj]));
if (pi >= pj) {
break;
}
INTP_SWAP(*pi, *pj);
}
pk = pr - 1;
INTP_SWAP(*pi,*pk);
/* push largest partition on stack */
if (pi - pl < pr - pi) {
*sptr++ = pi + 1;
*sptr++ = pr;
pr = pi - 1;
}
else {
*sptr++ = pl;
*sptr++ = pi - 1;
pl = pi + 1;
}
*psdepth++ = --cdepth;
}
/* insertion sort */
for (pi = pl + 1; pi <= pr; ++pi) {
vi = *pi;
vp = v[vi];
pj = pi;
pk = pi - 1;
while (pj > pl && @TYPE@_LT(vp, v[*pk])) {
*pj-- = *pk--;
}
*pj = vi;
}
stack_pop:
if (sptr == stack) {
break;
}
pr = *(--sptr);
pl = *(--sptr);
cdepth = *(--psdepth);
}
return 0;
}
/**end repeat**/
/*
*****************************************************************************
** STRING SORTS **
*****************************************************************************
*/
/**begin repeat
*
* #TYPE = STRING, UNICODE#
* #suff = string, unicode#
* #type = npy_char, npy_ucs4#
*/
int
quicksort_@suff@(void *start, npy_intp num, void *varr)
{
PyArrayObject *arr = varr;
const size_t len = PyArray_ITEMSIZE(arr)/sizeof(@type@);
@type@ *vp;
@type@ *pl = start;
@type@ *pr = pl + (num - 1)*len;
@type@ *stack[PYA_QS_STACK], **sptr = stack, *pm, *pi, *pj, *pk;
int depth[PYA_QS_STACK];
int * psdepth = depth;
int cdepth = npy_get_msb(num) * 2;
/* Items that have zero size don't make sense to sort */
if (len == 0) {
return 0;
}
vp = malloc(PyArray_ITEMSIZE(arr));
if (vp == NULL) {
return -NPY_ENOMEM;
}
for (;;) {
if (NPY_UNLIKELY(cdepth < 0)) {
heapsort_@suff@(pl, (pr - pl) / len + 1, varr);
goto stack_pop;
}
while ((size_t)(pr - pl) > SMALL_QUICKSORT*len) {
/* quicksort partition */
pm = pl + (((pr - pl)/len) >> 1)*len;
if (@TYPE@_LT(pm, pl, len)) @TYPE@_SWAP(pm, pl, len);
if (@TYPE@_LT(pr, pm, len)) @TYPE@_SWAP(pr, pm, len);
if (@TYPE@_LT(pm, pl, len)) @TYPE@_SWAP(pm, pl, len);
@TYPE@_COPY(vp, pm, len);
pi = pl;
pj = pr - len;
@TYPE@_SWAP(pm, pj, len);
for (;;) {
do pi += len; while (@TYPE@_LT(pi, vp, len));
do pj -= len; while (@TYPE@_LT(vp, pj, len));
if (pi >= pj) {
break;
}
@TYPE@_SWAP(pi, pj, len);
}
pk = pr - len;
@TYPE@_SWAP(pi, pk, len);
/* push largest partition on stack */
if (pi - pl < pr - pi) {
*sptr++ = pi + len;
*sptr++ = pr;
pr = pi - len;
}
else {
*sptr++ = pl;
*sptr++ = pi - len;
pl = pi + len;
}
*psdepth++ = --cdepth;
}
/* insertion sort */
for (pi = pl + len; pi <= pr; pi += len) {
@TYPE@_COPY(vp, pi, len);
pj = pi;
pk = pi - len;
while (pj > pl && @TYPE@_LT(vp, pk, len)) {
@TYPE@_COPY(pj, pk, len);
pj -= len;
pk -= len;
}
@TYPE@_COPY(pj, vp, len);
}
stack_pop:
if (sptr == stack) {
break;
}
pr = *(--sptr);
pl = *(--sptr);
cdepth = *(--psdepth);
}
free(vp);
return 0;
}
int
aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *varr)
{
@type@ *v = vv;
PyArrayObject *arr = varr;
size_t len = PyArray_ITEMSIZE(arr)/sizeof(@type@);
@type@ *vp;
npy_intp *pl = tosort;
npy_intp *pr = tosort + num - 1;
npy_intp *stack[PYA_QS_STACK];
npy_intp **sptr=stack;
npy_intp *pm, *pi, *pj, *pk, vi;
int depth[PYA_QS_STACK];
int * psdepth = depth;
int cdepth = npy_get_msb(num) * 2;
/* Items that have zero size don't make sense to sort */
if (len == 0) {
return 0;
}
for (;;) {
if (NPY_UNLIKELY(cdepth < 0)) {
aheapsort_@suff@(vv, pl, pr - pl + 1, varr);
goto stack_pop;
}
while ((pr - pl) > SMALL_QUICKSORT) {
/* quicksort partition */
pm = pl + ((pr - pl) >> 1);
if (@TYPE@_LT(v + (*pm)*len, v + (*pl)*len, len)) INTP_SWAP(*pm, *pl);
if (@TYPE@_LT(v + (*pr)*len, v + (*pm)*len, len)) INTP_SWAP(*pr, *pm);
if (@TYPE@_LT(v + (*pm)*len, v + (*pl)*len, len)) INTP_SWAP(*pm, *pl);
vp = v + (*pm)*len;
pi = pl;
pj = pr - 1;
INTP_SWAP(*pm,*pj);
for (;;) {
do ++pi; while (@TYPE@_LT(v + (*pi)*len, vp, len));
do --pj; while (@TYPE@_LT(vp, v + (*pj)*len, len));
if (pi >= pj) {
break;
}
INTP_SWAP(*pi,*pj);
}
pk = pr - 1;
INTP_SWAP(*pi,*pk);
/* push largest partition on stack */
if (pi - pl < pr - pi) {
*sptr++ = pi + 1;
*sptr++ = pr;
pr = pi - 1;
}
else {
*sptr++ = pl;
*sptr++ = pi - 1;
pl = pi + 1;
}
*psdepth++ = --cdepth;
}
/* insertion sort */
for (pi = pl + 1; pi <= pr; ++pi) {
vi = *pi;
vp = v + vi*len;
pj = pi;
pk = pi - 1;
while (pj > pl && @TYPE@_LT(vp, v + (*pk)*len, len)) {
*pj-- = *pk--;
}
*pj = vi;
}
stack_pop:
if (sptr == stack) {
break;
}
pr = *(--sptr);
pl = *(--sptr);
cdepth = *(--psdepth);
}
return 0;
}
/**end repeat**/
/*
*****************************************************************************
** GENERIC SORT **
*****************************************************************************
*/
int
npy_quicksort(void *start, npy_intp num, void *varr)
{
PyArrayObject *arr = varr;
npy_intp elsize = PyArray_ITEMSIZE(arr);
PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare;
char *vp;
char *pl = start;
char *pr = pl + (num - 1)*elsize;
char *stack[PYA_QS_STACK];
char **sptr = stack;
char *pm, *pi, *pj, *pk;
int depth[PYA_QS_STACK];
int * psdepth = depth;
int cdepth = npy_get_msb(num) * 2;
/* Items that have zero size don't make sense to sort */
if (elsize == 0) {
return 0;
}
vp = malloc(elsize);
if (vp == NULL) {
return -NPY_ENOMEM;
}
for (;;) {
if (NPY_UNLIKELY(cdepth < 0)) {
npy_heapsort(pl, (pr - pl) / elsize + 1, varr);
goto stack_pop;
}
while(pr - pl > SMALL_QUICKSORT*elsize) {
/* quicksort partition */
pm = pl + (((pr - pl) / elsize) >> 1) * elsize;
if (cmp(pm, pl, arr) < 0) {
GENERIC_SWAP(pm, pl, elsize);
}
if (cmp(pr, pm, arr) < 0) {
GENERIC_SWAP(pr, pm, elsize);
}
if (cmp(pm, pl, arr) < 0) {
GENERIC_SWAP(pm, pl, elsize);
}
GENERIC_COPY(vp, pm, elsize);
pi = pl;
pj = pr - elsize;
GENERIC_SWAP(pm, pj, elsize);
/*
* Generic comparisons may be buggy, so don't rely on the sentinals
* to keep the pointers from going out of bounds.
*/
for (;;) {
do {
pi += elsize;
} while (cmp(pi, vp, arr) < 0 && pi < pj);
do {
pj -= elsize;
} while (cmp(vp, pj, arr) < 0 && pi < pj);
if (pi >= pj) {
break;
}
GENERIC_SWAP(pi, pj, elsize);
}
pk = pr - elsize;
GENERIC_SWAP(pi, pk, elsize);
/* push largest partition on stack */
if (pi - pl < pr - pi) {
*sptr++ = pi + elsize;
*sptr++ = pr;
pr = pi - elsize;
}
else {
*sptr++ = pl;
*sptr++ = pi - elsize;
pl = pi + elsize;
}
*psdepth++ = --cdepth;
}
/* insertion sort */
for (pi = pl + elsize; pi <= pr; pi += elsize) {
GENERIC_COPY(vp, pi, elsize);
pj = pi;
pk = pi - elsize;
while (pj > pl && cmp(vp, pk, arr) < 0) {
GENERIC_COPY(pj, pk, elsize);
pj -= elsize;
pk -= elsize;
}
GENERIC_COPY(pj, vp, elsize);
}
stack_pop:
if (sptr == stack) {
break;
}
pr = *(--sptr);
pl = *(--sptr);
cdepth = *(--psdepth);
}
free(vp);
return 0;
}
int
npy_aquicksort(void *vv, npy_intp* tosort, npy_intp num, void *varr)
{
char *v = vv;
PyArrayObject *arr = varr;
npy_intp elsize = PyArray_ITEMSIZE(arr);
PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare;
char *vp;
npy_intp *pl = tosort;
npy_intp *pr = tosort + num - 1;
npy_intp *stack[PYA_QS_STACK];
npy_intp **sptr = stack;
npy_intp *pm, *pi, *pj, *pk, vi;
int depth[PYA_QS_STACK];
int * psdepth = depth;
int cdepth = npy_get_msb(num) * 2;
/* Items that have zero size don't make sense to sort */
if (elsize == 0) {
return 0;
}
for (;;) {
if (NPY_UNLIKELY(cdepth < 0)) {
npy_aheapsort(vv, pl, pr - pl + 1, varr);
goto stack_pop;
}
while ((pr - pl) > SMALL_QUICKSORT) {
/* quicksort partition */
pm = pl + ((pr - pl) >> 1);
if (cmp(v + (*pm)*elsize, v + (*pl)*elsize, arr) < 0) {
INTP_SWAP(*pm, *pl);
}
if (cmp(v + (*pr)*elsize, v + (*pm)*elsize, arr) < 0) {
INTP_SWAP(*pr, *pm);
}
if (cmp(v + (*pm)*elsize, v + (*pl)*elsize, arr) < 0) {
INTP_SWAP(*pm, *pl);
}
vp = v + (*pm)*elsize;
pi = pl;
pj = pr - 1;
INTP_SWAP(*pm,*pj);
for (;;) {
do {
++pi;
} while (cmp(v + (*pi)*elsize, vp, arr) < 0 && pi < pj);
do {
--pj;
} while (cmp(vp, v + (*pj)*elsize, arr) < 0 && pi < pj);
if (pi >= pj) {
break;
}
INTP_SWAP(*pi,*pj);
}
pk = pr - 1;
INTP_SWAP(*pi,*pk);
/* push largest partition on stack */
if (pi - pl < pr - pi) {
*sptr++ = pi + 1;
*sptr++ = pr;
pr = pi - 1;
}
else {
*sptr++ = pl;
*sptr++ = pi - 1;
pl = pi + 1;
}
*psdepth++ = --cdepth;
}
/* insertion sort */
for (pi = pl + 1; pi <= pr; ++pi) {
vi = *pi;
vp = v + vi*elsize;
pj = pi;
pk = pi - 1;
while (pj > pl && cmp(vp, v + (*pk)*elsize, arr) < 0) {
*pj-- = *pk--;
}
*pj = vi;
}
stack_pop:
if (sptr == stack) {
break;
}
pr = *(--sptr);
pl = *(--sptr);
cdepth = *(--psdepth);
}
return 0;
}