Blame src/tsort.c

Packit ae9e2a
/*
Packit ae9e2a
 * Copyright (C) the libgit2 contributors. All rights reserved.
Packit ae9e2a
 *
Packit ae9e2a
 * This file is part of libgit2, distributed under the GNU GPL v2 with
Packit ae9e2a
 * a Linking Exception. For full terms see the included COPYING file.
Packit ae9e2a
 */
Packit ae9e2a
Packit ae9e2a
#include "common.h"
Packit ae9e2a
Packit ae9e2a
/**
Packit ae9e2a
 * An array-of-pointers implementation of Python's Timsort
Packit ae9e2a
 * Based on code by Christopher Swenson under the MIT license
Packit ae9e2a
 *
Packit ae9e2a
 * Copyright (c) 2010 Christopher Swenson
Packit ae9e2a
 * Copyright (c) 2011 Vicent Marti
Packit ae9e2a
 */
Packit ae9e2a
Packit ae9e2a
#ifndef MAX
Packit ae9e2a
#	define MAX(x,y) (((x) > (y) ? (x) : (y)))
Packit ae9e2a
#endif
Packit ae9e2a
Packit ae9e2a
#ifndef MIN
Packit ae9e2a
#	define MIN(x,y) (((x) < (y) ? (x) : (y)))
Packit ae9e2a
#endif
Packit ae9e2a
Packit ae9e2a
static int binsearch(
Packit ae9e2a
	void **dst, const void *x, size_t size, git__sort_r_cmp cmp, void *payload)
Packit ae9e2a
{
Packit ae9e2a
	int l, c, r;
Packit ae9e2a
	void *lx, *cx;
Packit ae9e2a
Packit ae9e2a
	assert(size > 0);
Packit ae9e2a
Packit ae9e2a
	l = 0;
Packit ae9e2a
	r = (int)size - 1;
Packit ae9e2a
	c = r >> 1;
Packit ae9e2a
	lx = dst[l];
Packit ae9e2a
Packit ae9e2a
	/* check for beginning conditions */
Packit ae9e2a
	if (cmp(x, lx, payload) < 0)
Packit ae9e2a
		return 0;
Packit ae9e2a
Packit ae9e2a
	else if (cmp(x, lx, payload) == 0) {
Packit ae9e2a
		int i = 1;
Packit ae9e2a
		while (cmp(x, dst[i], payload) == 0)
Packit ae9e2a
			i++;
Packit ae9e2a
		return i;
Packit ae9e2a
	}
Packit ae9e2a
Packit ae9e2a
	/* guaranteed not to be >= rx */
Packit ae9e2a
	cx = dst[c];
Packit ae9e2a
	while (1) {
Packit ae9e2a
		const int val = cmp(x, cx, payload);
Packit ae9e2a
		if (val < 0) {
Packit ae9e2a
			if (c - l <= 1) return c;
Packit ae9e2a
			r = c;
Packit ae9e2a
		} else if (val > 0) {
Packit ae9e2a
			if (r - c <= 1) return c + 1;
Packit ae9e2a
			l = c;
Packit ae9e2a
			lx = cx;
Packit ae9e2a
		} else {
Packit ae9e2a
			do {
Packit ae9e2a
				cx = dst[++c];
Packit ae9e2a
			} while (cmp(x, cx, payload) == 0);
Packit ae9e2a
			return c;
Packit ae9e2a
		}
Packit ae9e2a
		c = l + ((r - l) >> 1);
Packit ae9e2a
		cx = dst[c];
Packit ae9e2a
	}
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
/* Binary insertion sort, but knowing that the first "start" entries are sorted. Used in timsort. */
Packit ae9e2a
static void bisort(
Packit ae9e2a
	void **dst, size_t start, size_t size, git__sort_r_cmp cmp, void *payload)
Packit ae9e2a
{
Packit ae9e2a
	size_t i;
Packit ae9e2a
	void *x;
Packit ae9e2a
	int location;
Packit ae9e2a
Packit ae9e2a
	for (i = start; i < size; i++) {
Packit ae9e2a
		int j;
Packit ae9e2a
		/* If this entry is already correct, just move along */
Packit ae9e2a
		if (cmp(dst[i - 1], dst[i], payload) <= 0)
Packit ae9e2a
			continue;
Packit ae9e2a
Packit ae9e2a
		/* Else we need to find the right place, shift everything over, and squeeze in */
Packit ae9e2a
		x = dst[i];
Packit ae9e2a
		location = binsearch(dst, x, i, cmp, payload);
Packit ae9e2a
		for (j = (int)i - 1; j >= location; j--) {
Packit ae9e2a
			dst[j + 1] = dst[j];
Packit ae9e2a
		}
Packit ae9e2a
		dst[location] = x;
Packit ae9e2a
	}
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
Packit ae9e2a
/* timsort implementation, based on timsort.txt */
Packit ae9e2a
struct tsort_run {
Packit ae9e2a
	ssize_t start;
Packit ae9e2a
	ssize_t length;
Packit ae9e2a
};
Packit ae9e2a
Packit ae9e2a
struct tsort_store {
Packit ae9e2a
	size_t alloc;
Packit ae9e2a
	git__sort_r_cmp cmp;
Packit ae9e2a
	void *payload;
Packit ae9e2a
	void **storage;
Packit ae9e2a
};
Packit ae9e2a
Packit ae9e2a
static void reverse_elements(void **dst, ssize_t start, ssize_t end)
Packit ae9e2a
{
Packit ae9e2a
	while (start < end) {
Packit ae9e2a
		void *tmp = dst[start];
Packit ae9e2a
		dst[start] = dst[end];
Packit ae9e2a
		dst[end] = tmp;
Packit ae9e2a
Packit ae9e2a
		start++;
Packit ae9e2a
		end--;
Packit ae9e2a
	}
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
static ssize_t count_run(
Packit ae9e2a
	void **dst, ssize_t start, ssize_t size, struct tsort_store *store)
Packit ae9e2a
{
Packit ae9e2a
	ssize_t curr = start + 2;
Packit ae9e2a
Packit ae9e2a
	if (size - start == 1)
Packit ae9e2a
		return 1;
Packit ae9e2a
Packit ae9e2a
	if (start >= size - 2) {
Packit ae9e2a
		if (store->cmp(dst[size - 2], dst[size - 1], store->payload) > 0) {
Packit ae9e2a
			void *tmp = dst[size - 1];
Packit ae9e2a
			dst[size - 1] = dst[size - 2];
Packit ae9e2a
			dst[size - 2] = tmp;
Packit ae9e2a
		}
Packit ae9e2a
Packit ae9e2a
		return 2;
Packit ae9e2a
	}
Packit ae9e2a
Packit ae9e2a
	if (store->cmp(dst[start], dst[start + 1], store->payload) <= 0) {
Packit ae9e2a
		while (curr < size - 1 &&
Packit ae9e2a
				store->cmp(dst[curr - 1], dst[curr], store->payload) <= 0)
Packit ae9e2a
			curr++;
Packit ae9e2a
Packit ae9e2a
		return curr - start;
Packit ae9e2a
	} else {
Packit ae9e2a
		while (curr < size - 1 &&
Packit ae9e2a
				store->cmp(dst[curr - 1], dst[curr], store->payload) > 0)
Packit ae9e2a
			curr++;
Packit ae9e2a
Packit ae9e2a
		/* reverse in-place */
Packit ae9e2a
		reverse_elements(dst, start, curr - 1);
Packit ae9e2a
		return curr - start;
Packit ae9e2a
	}
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
static size_t compute_minrun(size_t n)
Packit ae9e2a
{
Packit ae9e2a
	int r = 0;
Packit ae9e2a
	while (n >= 64) {
Packit ae9e2a
		r |= n & 1;
Packit ae9e2a
		n >>= 1;
Packit ae9e2a
	}
Packit ae9e2a
	return n + r;
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
static int check_invariant(struct tsort_run *stack, ssize_t stack_curr)
Packit ae9e2a
{
Packit ae9e2a
	if (stack_curr < 2)
Packit ae9e2a
		return 1;
Packit ae9e2a
Packit ae9e2a
	else if (stack_curr == 2) {
Packit ae9e2a
		const ssize_t A = stack[stack_curr - 2].length;
Packit ae9e2a
		const ssize_t B = stack[stack_curr - 1].length;
Packit ae9e2a
		return (A > B);
Packit ae9e2a
	} else {
Packit ae9e2a
		const ssize_t A = stack[stack_curr - 3].length;
Packit ae9e2a
		const ssize_t B = stack[stack_curr - 2].length;
Packit ae9e2a
		const ssize_t C = stack[stack_curr - 1].length;
Packit ae9e2a
		return !((A <= B + C) || (B <= C));
Packit ae9e2a
	}
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
static int resize(struct tsort_store *store, size_t new_size)
Packit ae9e2a
{
Packit ae9e2a
	if (store->alloc < new_size) {
Packit ae9e2a
		void **tempstore;
Packit ae9e2a
Packit ae9e2a
		tempstore = git__reallocarray(store->storage, new_size, sizeof(void *));
Packit ae9e2a
Packit ae9e2a
		/**
Packit ae9e2a
		 * Do not propagate on OOM; this will abort the sort and
Packit ae9e2a
		 * leave the array unsorted, but no error code will be
Packit ae9e2a
		 * raised
Packit ae9e2a
		 */
Packit ae9e2a
		if (tempstore == NULL)
Packit ae9e2a
			return -1;
Packit ae9e2a
Packit ae9e2a
		store->storage = tempstore;
Packit ae9e2a
		store->alloc = new_size;
Packit ae9e2a
	}
Packit ae9e2a
Packit ae9e2a
	return 0;
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
static void merge(void **dst, const struct tsort_run *stack, ssize_t stack_curr, struct tsort_store *store)
Packit ae9e2a
{
Packit ae9e2a
	const ssize_t A = stack[stack_curr - 2].length;
Packit ae9e2a
	const ssize_t B = stack[stack_curr - 1].length;
Packit ae9e2a
	const ssize_t curr = stack[stack_curr - 2].start;
Packit ae9e2a
Packit ae9e2a
	void **storage;
Packit ae9e2a
	ssize_t i, j, k;
Packit ae9e2a
Packit ae9e2a
	if (resize(store, MIN(A, B)) < 0)
Packit ae9e2a
		return;
Packit ae9e2a
Packit ae9e2a
	storage = store->storage;
Packit ae9e2a
Packit ae9e2a
	/* left merge */
Packit ae9e2a
	if (A < B) {
Packit ae9e2a
		memcpy(storage, &dst[curr], A * sizeof(void *));
Packit ae9e2a
		i = 0;
Packit ae9e2a
		j = curr + A;
Packit ae9e2a
Packit ae9e2a
		for (k = curr; k < curr + A + B; k++) {
Packit ae9e2a
			if ((i < A) && (j < curr + A + B)) {
Packit ae9e2a
				if (store->cmp(storage[i], dst[j], store->payload) <= 0)
Packit ae9e2a
					dst[k] = storage[i++];
Packit ae9e2a
				else
Packit ae9e2a
					dst[k] = dst[j++];
Packit ae9e2a
			} else if (i < A) {
Packit ae9e2a
				dst[k] = storage[i++];
Packit ae9e2a
			} else
Packit ae9e2a
				dst[k] = dst[j++];
Packit ae9e2a
		}
Packit ae9e2a
	} else {
Packit ae9e2a
		memcpy(storage, &dst[curr + A], B * sizeof(void *));
Packit ae9e2a
		i = B - 1;
Packit ae9e2a
		j = curr + A - 1;
Packit ae9e2a
Packit ae9e2a
		for (k = curr + A + B - 1; k >= curr; k--) {
Packit ae9e2a
			if ((i >= 0) && (j >= curr)) {
Packit ae9e2a
				if (store->cmp(dst[j], storage[i], store->payload) > 0)
Packit ae9e2a
					dst[k] = dst[j--];
Packit ae9e2a
				else
Packit ae9e2a
					dst[k] = storage[i--];
Packit ae9e2a
			} else if (i >= 0)
Packit ae9e2a
				dst[k] = storage[i--];
Packit ae9e2a
			else
Packit ae9e2a
				dst[k] = dst[j--];
Packit ae9e2a
		}
Packit ae9e2a
	}
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
static ssize_t collapse(void **dst, struct tsort_run *stack, ssize_t stack_curr, struct tsort_store *store, ssize_t size)
Packit ae9e2a
{
Packit ae9e2a
	ssize_t A, B, C;
Packit ae9e2a
Packit ae9e2a
	while (1) {
Packit ae9e2a
		/* if the stack only has one thing on it, we are done with the collapse */
Packit ae9e2a
		if (stack_curr <= 1)
Packit ae9e2a
			break;
Packit ae9e2a
Packit ae9e2a
		/* if this is the last merge, just do it */
Packit ae9e2a
		if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) {
Packit ae9e2a
			merge(dst, stack, stack_curr, store);
Packit ae9e2a
			stack[0].length += stack[1].length;
Packit ae9e2a
			stack_curr--;
Packit ae9e2a
			break;
Packit ae9e2a
		}
Packit ae9e2a
Packit ae9e2a
		/* check if the invariant is off for a stack of 2 elements */
Packit ae9e2a
		else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) {
Packit ae9e2a
			merge(dst, stack, stack_curr, store);
Packit ae9e2a
			stack[0].length += stack[1].length;
Packit ae9e2a
			stack_curr--;
Packit ae9e2a
			break;
Packit ae9e2a
		}
Packit ae9e2a
		else if (stack_curr == 2)
Packit ae9e2a
			break;
Packit ae9e2a
Packit ae9e2a
		A = stack[stack_curr - 3].length;
Packit ae9e2a
		B = stack[stack_curr - 2].length;
Packit ae9e2a
		C = stack[stack_curr - 1].length;
Packit ae9e2a
Packit ae9e2a
		/* check first invariant */
Packit ae9e2a
		if (A <= B + C) {
Packit ae9e2a
			if (A < C) {
Packit ae9e2a
				merge(dst, stack, stack_curr - 1, store);
Packit ae9e2a
				stack[stack_curr - 3].length += stack[stack_curr - 2].length;
Packit ae9e2a
				stack[stack_curr - 2] = stack[stack_curr - 1];
Packit ae9e2a
				stack_curr--;
Packit ae9e2a
			} else {
Packit ae9e2a
				merge(dst, stack, stack_curr, store);
Packit ae9e2a
				stack[stack_curr - 2].length += stack[stack_curr - 1].length;
Packit ae9e2a
				stack_curr--;
Packit ae9e2a
			}
Packit ae9e2a
		} else if (B <= C) {
Packit ae9e2a
			merge(dst, stack, stack_curr, store);
Packit ae9e2a
			stack[stack_curr - 2].length += stack[stack_curr - 1].length;
Packit ae9e2a
			stack_curr--;
Packit ae9e2a
		} else
Packit ae9e2a
			break;
Packit ae9e2a
	}
Packit ae9e2a
Packit ae9e2a
	return stack_curr;
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
#define PUSH_NEXT() do {\
Packit ae9e2a
	len = count_run(dst, curr, size, store);\
Packit ae9e2a
	run = minrun;\
Packit ae9e2a
	if (run < minrun) run = minrun;\
Packit ae9e2a
	if (run > (ssize_t)size - curr) run = size - curr;\
Packit ae9e2a
	if (run > len) {\
Packit ae9e2a
		bisort(&dst[curr], len, run, cmp, payload);\
Packit ae9e2a
		len = run;\
Packit ae9e2a
	}\
Packit ae9e2a
	run_stack[stack_curr].start = curr;\
Packit ae9e2a
	run_stack[stack_curr++].length = len;\
Packit ae9e2a
	curr += len;\
Packit ae9e2a
	if (curr == (ssize_t)size) {\
Packit ae9e2a
		/* finish up */ \
Packit ae9e2a
		while (stack_curr > 1) { \
Packit ae9e2a
			merge(dst, run_stack, stack_curr, store); \
Packit ae9e2a
			run_stack[stack_curr - 2].length += run_stack[stack_curr - 1].length; \
Packit ae9e2a
			stack_curr--; \
Packit ae9e2a
		} \
Packit ae9e2a
		if (store->storage != NULL) {\
Packit ae9e2a
			git__free(store->storage);\
Packit ae9e2a
			store->storage = NULL;\
Packit ae9e2a
		}\
Packit ae9e2a
		return;\
Packit ae9e2a
	}\
Packit ae9e2a
}\
Packit ae9e2a
while (0)
Packit ae9e2a
Packit ae9e2a
void git__tsort_r(
Packit ae9e2a
	void **dst, size_t size, git__sort_r_cmp cmp, void *payload)
Packit ae9e2a
{
Packit ae9e2a
	struct tsort_store _store, *store = &_store;
Packit ae9e2a
	struct tsort_run run_stack[128];
Packit ae9e2a
Packit ae9e2a
	ssize_t stack_curr = 0;
Packit ae9e2a
	ssize_t len, run;
Packit ae9e2a
	ssize_t curr = 0;
Packit ae9e2a
	ssize_t minrun;
Packit ae9e2a
Packit ae9e2a
	if (size < 64) {
Packit ae9e2a
		bisort(dst, 1, size, cmp, payload);
Packit ae9e2a
		return;
Packit ae9e2a
	}
Packit ae9e2a
Packit ae9e2a
	/* compute the minimum run length */
Packit ae9e2a
	minrun = (ssize_t)compute_minrun(size);
Packit ae9e2a
Packit ae9e2a
	/* temporary storage for merges */
Packit ae9e2a
	store->alloc = 0;
Packit ae9e2a
	store->storage = NULL;
Packit ae9e2a
	store->cmp = cmp;
Packit ae9e2a
	store->payload = payload;
Packit ae9e2a
Packit ae9e2a
	PUSH_NEXT();
Packit ae9e2a
	PUSH_NEXT();
Packit ae9e2a
	PUSH_NEXT();
Packit ae9e2a
Packit ae9e2a
	while (1) {
Packit ae9e2a
		if (!check_invariant(run_stack, stack_curr)) {
Packit ae9e2a
			stack_curr = collapse(dst, run_stack, stack_curr, store, size);
Packit ae9e2a
			continue;
Packit ae9e2a
		}
Packit ae9e2a
Packit ae9e2a
		PUSH_NEXT();
Packit ae9e2a
	}
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
static int tsort_r_cmp(const void *a, const void *b, void *payload)
Packit ae9e2a
{
Packit ae9e2a
	return ((git__tsort_cmp)payload)(a, b);
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
void git__tsort(void **dst, size_t size, git__tsort_cmp cmp)
Packit ae9e2a
{
Packit ae9e2a
	git__tsort_r(dst, size, tsort_r_cmp, cmp);
Packit ae9e2a
}