Blob Blame History Raw
/*
 * Copyright (C) the libgit2 contributors. All rights reserved.
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */

#include "common.h"

/**
 * An array-of-pointers implementation of Python's Timsort
 * Based on code by Christopher Swenson under the MIT license
 *
 * Copyright (c) 2010 Christopher Swenson
 * Copyright (c) 2011 Vicent Marti
 */

#ifndef MAX
#	define MAX(x,y) (((x) > (y) ? (x) : (y)))
#endif

#ifndef MIN
#	define MIN(x,y) (((x) < (y) ? (x) : (y)))
#endif

static int binsearch(
	void **dst, const void *x, size_t size, git__sort_r_cmp cmp, void *payload)
{
	int l, c, r;
	void *lx, *cx;

	assert(size > 0);

	l = 0;
	r = (int)size - 1;
	c = r >> 1;
	lx = dst[l];

	/* check for beginning conditions */
	if (cmp(x, lx, payload) < 0)
		return 0;

	else if (cmp(x, lx, payload) == 0) {
		int i = 1;
		while (cmp(x, dst[i], payload) == 0)
			i++;
		return i;
	}

	/* guaranteed not to be >= rx */
	cx = dst[c];
	while (1) {
		const int val = cmp(x, cx, payload);
		if (val < 0) {
			if (c - l <= 1) return c;
			r = c;
		} else if (val > 0) {
			if (r - c <= 1) return c + 1;
			l = c;
			lx = cx;
		} else {
			do {
				cx = dst[++c];
			} while (cmp(x, cx, payload) == 0);
			return c;
		}
		c = l + ((r - l) >> 1);
		cx = dst[c];
	}
}

/* Binary insertion sort, but knowing that the first "start" entries are sorted. Used in timsort. */
static void bisort(
	void **dst, size_t start, size_t size, git__sort_r_cmp cmp, void *payload)
{
	size_t i;
	void *x;
	int location;

	for (i = start; i < size; i++) {
		int j;
		/* If this entry is already correct, just move along */
		if (cmp(dst[i - 1], dst[i], payload) <= 0)
			continue;

		/* Else we need to find the right place, shift everything over, and squeeze in */
		x = dst[i];
		location = binsearch(dst, x, i, cmp, payload);
		for (j = (int)i - 1; j >= location; j--) {
			dst[j + 1] = dst[j];
		}
		dst[location] = x;
	}
}


/* timsort implementation, based on timsort.txt */
struct tsort_run {
	ssize_t start;
	ssize_t length;
};

struct tsort_store {
	size_t alloc;
	git__sort_r_cmp cmp;
	void *payload;
	void **storage;
};

static void reverse_elements(void **dst, ssize_t start, ssize_t end)
{
	while (start < end) {
		void *tmp = dst[start];
		dst[start] = dst[end];
		dst[end] = tmp;

		start++;
		end--;
	}
}

static ssize_t count_run(
	void **dst, ssize_t start, ssize_t size, struct tsort_store *store)
{
	ssize_t curr = start + 2;

	if (size - start == 1)
		return 1;

	if (start >= size - 2) {
		if (store->cmp(dst[size - 2], dst[size - 1], store->payload) > 0) {
			void *tmp = dst[size - 1];
			dst[size - 1] = dst[size - 2];
			dst[size - 2] = tmp;
		}

		return 2;
	}

	if (store->cmp(dst[start], dst[start + 1], store->payload) <= 0) {
		while (curr < size - 1 &&
				store->cmp(dst[curr - 1], dst[curr], store->payload) <= 0)
			curr++;

		return curr - start;
	} else {
		while (curr < size - 1 &&
				store->cmp(dst[curr - 1], dst[curr], store->payload) > 0)
			curr++;

		/* reverse in-place */
		reverse_elements(dst, start, curr - 1);
		return curr - start;
	}
}

static size_t compute_minrun(size_t n)
{
	int r = 0;
	while (n >= 64) {
		r |= n & 1;
		n >>= 1;
	}
	return n + r;
}

static int check_invariant(struct tsort_run *stack, ssize_t stack_curr)
{
	if (stack_curr < 2)
		return 1;

	else if (stack_curr == 2) {
		const ssize_t A = stack[stack_curr - 2].length;
		const ssize_t B = stack[stack_curr - 1].length;
		return (A > B);
	} else {
		const ssize_t A = stack[stack_curr - 3].length;
		const ssize_t B = stack[stack_curr - 2].length;
		const ssize_t C = stack[stack_curr - 1].length;
		return !((A <= B + C) || (B <= C));
	}
}

static int resize(struct tsort_store *store, size_t new_size)
{
	if (store->alloc < new_size) {
		void **tempstore;

		tempstore = git__reallocarray(store->storage, new_size, sizeof(void *));

		/**
		 * Do not propagate on OOM; this will abort the sort and
		 * leave the array unsorted, but no error code will be
		 * raised
		 */
		if (tempstore == NULL)
			return -1;

		store->storage = tempstore;
		store->alloc = new_size;
	}

	return 0;
}

static void merge(void **dst, const struct tsort_run *stack, ssize_t stack_curr, struct tsort_store *store)
{
	const ssize_t A = stack[stack_curr - 2].length;
	const ssize_t B = stack[stack_curr - 1].length;
	const ssize_t curr = stack[stack_curr - 2].start;

	void **storage;
	ssize_t i, j, k;

	if (resize(store, MIN(A, B)) < 0)
		return;

	storage = store->storage;

	/* left merge */
	if (A < B) {
		memcpy(storage, &dst[curr], A * sizeof(void *));
		i = 0;
		j = curr + A;

		for (k = curr; k < curr + A + B; k++) {
			if ((i < A) && (j < curr + A + B)) {
				if (store->cmp(storage[i], dst[j], store->payload) <= 0)
					dst[k] = storage[i++];
				else
					dst[k] = dst[j++];
			} else if (i < A) {
				dst[k] = storage[i++];
			} else
				dst[k] = dst[j++];
		}
	} else {
		memcpy(storage, &dst[curr + A], B * sizeof(void *));
		i = B - 1;
		j = curr + A - 1;

		for (k = curr + A + B - 1; k >= curr; k--) {
			if ((i >= 0) && (j >= curr)) {
				if (store->cmp(dst[j], storage[i], store->payload) > 0)
					dst[k] = dst[j--];
				else
					dst[k] = storage[i--];
			} else if (i >= 0)
				dst[k] = storage[i--];
			else
				dst[k] = dst[j--];
		}
	}
}

static ssize_t collapse(void **dst, struct tsort_run *stack, ssize_t stack_curr, struct tsort_store *store, ssize_t size)
{
	ssize_t A, B, C;

	while (1) {
		/* if the stack only has one thing on it, we are done with the collapse */
		if (stack_curr <= 1)
			break;

		/* if this is the last merge, just do it */
		if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) {
			merge(dst, stack, stack_curr, store);
			stack[0].length += stack[1].length;
			stack_curr--;
			break;
		}

		/* check if the invariant is off for a stack of 2 elements */
		else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) {
			merge(dst, stack, stack_curr, store);
			stack[0].length += stack[1].length;
			stack_curr--;
			break;
		}
		else if (stack_curr == 2)
			break;

		A = stack[stack_curr - 3].length;
		B = stack[stack_curr - 2].length;
		C = stack[stack_curr - 1].length;

		/* check first invariant */
		if (A <= B + C) {
			if (A < C) {
				merge(dst, stack, stack_curr - 1, store);
				stack[stack_curr - 3].length += stack[stack_curr - 2].length;
				stack[stack_curr - 2] = stack[stack_curr - 1];
				stack_curr--;
			} else {
				merge(dst, stack, stack_curr, store);
				stack[stack_curr - 2].length += stack[stack_curr - 1].length;
				stack_curr--;
			}
		} else if (B <= C) {
			merge(dst, stack, stack_curr, store);
			stack[stack_curr - 2].length += stack[stack_curr - 1].length;
			stack_curr--;
		} else
			break;
	}

	return stack_curr;
}

#define PUSH_NEXT() do {\
	len = count_run(dst, curr, size, store);\
	run = minrun;\
	if (run < minrun) run = minrun;\
	if (run > (ssize_t)size - curr) run = size - curr;\
	if (run > len) {\
		bisort(&dst[curr], len, run, cmp, payload);\
		len = run;\
	}\
	run_stack[stack_curr].start = curr;\
	run_stack[stack_curr++].length = len;\
	curr += len;\
	if (curr == (ssize_t)size) {\
		/* finish up */ \
		while (stack_curr > 1) { \
			merge(dst, run_stack, stack_curr, store); \
			run_stack[stack_curr - 2].length += run_stack[stack_curr - 1].length; \
			stack_curr--; \
		} \
		if (store->storage != NULL) {\
			git__free(store->storage);\
			store->storage = NULL;\
		}\
		return;\
	}\
}\
while (0)

void git__tsort_r(
	void **dst, size_t size, git__sort_r_cmp cmp, void *payload)
{
	struct tsort_store _store, *store = &_store;
	struct tsort_run run_stack[128];

	ssize_t stack_curr = 0;
	ssize_t len, run;
	ssize_t curr = 0;
	ssize_t minrun;

	if (size < 64) {
		bisort(dst, 1, size, cmp, payload);
		return;
	}

	/* compute the minimum run length */
	minrun = (ssize_t)compute_minrun(size);

	/* temporary storage for merges */
	store->alloc = 0;
	store->storage = NULL;
	store->cmp = cmp;
	store->payload = payload;

	PUSH_NEXT();
	PUSH_NEXT();
	PUSH_NEXT();

	while (1) {
		if (!check_invariant(run_stack, stack_curr)) {
			stack_curr = collapse(dst, run_stack, stack_curr, store, size);
			continue;
		}

		PUSH_NEXT();
	}
}

static int tsort_r_cmp(const void *a, const void *b, void *payload)
{
	return ((git__tsort_cmp)payload)(a, b);
}

void git__tsort(void **dst, size_t size, git__tsort_cmp cmp)
{
	git__tsort_r(dst, size, tsort_r_cmp, cmp);
}