Blame src/buf_text.c

Packit ae9e2a
/*
Packit ae9e2a
 * Copyright (C) the libgit2 contributors. All rights reserved.
Packit ae9e2a
 *
Packit ae9e2a
 * This file is part of libgit2, distributed under the GNU GPL v2 with
Packit ae9e2a
 * a Linking Exception. For full terms see the included COPYING file.
Packit ae9e2a
 */
Packit ae9e2a
#include "buf_text.h"
Packit ae9e2a
Packit ae9e2a
int git_buf_text_puts_escaped(
Packit ae9e2a
	git_buf *buf,
Packit ae9e2a
	const char *string,
Packit ae9e2a
	const char *esc_chars,
Packit ae9e2a
	const char *esc_with)
Packit ae9e2a
{
Packit ae9e2a
	const char *scan;
Packit ae9e2a
	size_t total = 0, esc_len = strlen(esc_with), count, alloclen;
Packit ae9e2a
Packit ae9e2a
	if (!string)
Packit ae9e2a
		return 0;
Packit ae9e2a
Packit ae9e2a
	for (scan = string; *scan; ) {
Packit ae9e2a
		/* count run of non-escaped characters */
Packit ae9e2a
		count = strcspn(scan, esc_chars);
Packit ae9e2a
		total += count;
Packit ae9e2a
		scan += count;
Packit ae9e2a
		/* count run of escaped characters */
Packit ae9e2a
		count = strspn(scan, esc_chars);
Packit ae9e2a
		total += count * (esc_len + 1);
Packit ae9e2a
		scan += count;
Packit ae9e2a
	}
Packit ae9e2a
Packit ae9e2a
	GITERR_CHECK_ALLOC_ADD(&alloclen, total, 1);
Packit ae9e2a
	if (git_buf_grow_by(buf, alloclen) < 0)
Packit ae9e2a
		return -1;
Packit ae9e2a
Packit ae9e2a
	for (scan = string; *scan; ) {
Packit ae9e2a
		count = strcspn(scan, esc_chars);
Packit ae9e2a
Packit ae9e2a
		memmove(buf->ptr + buf->size, scan, count);
Packit ae9e2a
		scan += count;
Packit ae9e2a
		buf->size += count;
Packit ae9e2a
Packit ae9e2a
		for (count = strspn(scan, esc_chars); count > 0; --count) {
Packit ae9e2a
			/* copy escape sequence */
Packit ae9e2a
			memmove(buf->ptr + buf->size, esc_with, esc_len);
Packit ae9e2a
			buf->size += esc_len;
Packit ae9e2a
			/* copy character to be escaped */
Packit ae9e2a
			buf->ptr[buf->size] = *scan;
Packit ae9e2a
			buf->size++;
Packit ae9e2a
			scan++;
Packit ae9e2a
		}
Packit ae9e2a
	}
Packit ae9e2a
Packit ae9e2a
	buf->ptr[buf->size] = '\0';
Packit ae9e2a
Packit ae9e2a
	return 0;
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
void git_buf_text_unescape(git_buf *buf)
Packit ae9e2a
{
Packit ae9e2a
	buf->size = git__unescape(buf->ptr);
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src)
Packit ae9e2a
{
Packit ae9e2a
	const char *scan = src->ptr;
Packit ae9e2a
	const char *scan_end = src->ptr + src->size;
Packit ae9e2a
	const char *next = memchr(scan, '\r', src->size);
Packit ae9e2a
	size_t new_size;
Packit ae9e2a
	char *out;
Packit ae9e2a
Packit ae9e2a
	assert(tgt != src);
Packit ae9e2a
Packit ae9e2a
	if (!next)
Packit ae9e2a
		return git_buf_set(tgt, src->ptr, src->size);
Packit ae9e2a
Packit ae9e2a
	/* reduce reallocs while in the loop */
Packit ae9e2a
	GITERR_CHECK_ALLOC_ADD(&new_size, src->size, 1);
Packit ae9e2a
	if (git_buf_grow(tgt, new_size) < 0)
Packit ae9e2a
		return -1;
Packit ae9e2a
Packit ae9e2a
	out = tgt->ptr;
Packit ae9e2a
	tgt->size = 0;
Packit ae9e2a
Packit ae9e2a
	/* Find the next \r and copy whole chunk up to there to tgt */
Packit ae9e2a
	for (; next; scan = next + 1, next = memchr(scan, '\r', scan_end - scan)) {
Packit ae9e2a
		if (next > scan) {
Packit ae9e2a
			size_t copylen = (size_t)(next - scan);
Packit ae9e2a
			memcpy(out, scan, copylen);
Packit ae9e2a
			out += copylen;
Packit ae9e2a
		}
Packit ae9e2a
Packit ae9e2a
		/* Do not drop \r unless it is followed by \n */
Packit ae9e2a
		if (next + 1 == scan_end || next[1] != '\n')
Packit ae9e2a
			*out++ = '\r';
Packit ae9e2a
	}
Packit ae9e2a
Packit ae9e2a
	/* Copy remaining input into dest */
Packit ae9e2a
	if (scan < scan_end) {
Packit ae9e2a
		size_t remaining = (size_t)(scan_end - scan);
Packit ae9e2a
		memcpy(out, scan, remaining);
Packit ae9e2a
		out += remaining;
Packit ae9e2a
	}
Packit ae9e2a
Packit ae9e2a
	tgt->size = (size_t)(out - tgt->ptr);
Packit ae9e2a
	tgt->ptr[tgt->size] = '\0';
Packit ae9e2a
Packit ae9e2a
	return 0;
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src)
Packit ae9e2a
{
Packit ae9e2a
	const char *start = src->ptr;
Packit ae9e2a
	const char *end = start + src->size;
Packit ae9e2a
	const char *scan = start;
Packit ae9e2a
	const char *next = memchr(scan, '\n', src->size);
Packit ae9e2a
	size_t alloclen;
Packit ae9e2a
Packit ae9e2a
	assert(tgt != src);
Packit ae9e2a
Packit ae9e2a
	if (!next)
Packit ae9e2a
		return git_buf_set(tgt, src->ptr, src->size);
Packit ae9e2a
Packit ae9e2a
	/* attempt to reduce reallocs while in the loop */
Packit ae9e2a
	GITERR_CHECK_ALLOC_ADD(&alloclen, src->size, src->size >> 4);
Packit ae9e2a
	GITERR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1);
Packit ae9e2a
	if (git_buf_grow(tgt, alloclen) < 0)
Packit ae9e2a
		return -1;
Packit ae9e2a
	tgt->size = 0;
Packit ae9e2a
Packit ae9e2a
	for (; next; scan = next + 1, next = memchr(scan, '\n', end - scan)) {
Packit ae9e2a
		size_t copylen = next - scan;
Packit ae9e2a
Packit ae9e2a
		/* if we find mixed line endings, carry on */
Packit ae9e2a
		if (copylen && next[-1] == '\r')
Packit ae9e2a
			copylen--;
Packit ae9e2a
Packit ae9e2a
		GITERR_CHECK_ALLOC_ADD(&alloclen, copylen, 3);
Packit ae9e2a
		if (git_buf_grow_by(tgt, alloclen) < 0)
Packit ae9e2a
			return -1;
Packit ae9e2a
Packit ae9e2a
		if (copylen) {
Packit ae9e2a
			memcpy(tgt->ptr + tgt->size, scan, copylen);
Packit ae9e2a
			tgt->size += copylen;
Packit ae9e2a
		}
Packit ae9e2a
Packit ae9e2a
		tgt->ptr[tgt->size++] = '\r';
Packit ae9e2a
		tgt->ptr[tgt->size++] = '\n';
Packit ae9e2a
	}
Packit ae9e2a
Packit ae9e2a
	tgt->ptr[tgt->size] = '\0';
Packit ae9e2a
	return git_buf_put(tgt, scan, end - scan);
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings)
Packit ae9e2a
{
Packit ae9e2a
	size_t i;
Packit ae9e2a
	const char *str, *pfx;
Packit ae9e2a
Packit ae9e2a
	git_buf_clear(buf);
Packit ae9e2a
Packit ae9e2a
	if (!strings || !strings->count)
Packit ae9e2a
		return 0;
Packit ae9e2a
Packit ae9e2a
	/* initialize common prefix to first string */
Packit ae9e2a
	if (git_buf_sets(buf, strings->strings[0]) < 0)
Packit ae9e2a
		return -1;
Packit ae9e2a
Packit ae9e2a
	/* go through the rest of the strings, truncating to shared prefix */
Packit ae9e2a
	for (i = 1; i < strings->count; ++i) {
Packit ae9e2a
Packit ae9e2a
		for (str = strings->strings[i], pfx = buf->ptr;
Packit ae9e2a
			 *str && *str == *pfx; str++, pfx++)
Packit ae9e2a
			/* scanning */;
Packit ae9e2a
Packit ae9e2a
		git_buf_truncate(buf, pfx - buf->ptr);
Packit ae9e2a
Packit ae9e2a
		if (!buf->size)
Packit ae9e2a
			break;
Packit ae9e2a
	}
Packit ae9e2a
Packit ae9e2a
	return 0;
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
bool git_buf_text_is_binary(const git_buf *buf)
Packit ae9e2a
{
Packit ae9e2a
	const char *scan = buf->ptr, *end = buf->ptr + buf->size;
Packit ae9e2a
	git_bom_t bom;
Packit ae9e2a
	int printable = 0, nonprintable = 0;
Packit ae9e2a
Packit ae9e2a
	scan += git_buf_text_detect_bom(&bom, buf, 0);
Packit ae9e2a
Packit ae9e2a
	if (bom > GIT_BOM_UTF8)
Packit ae9e2a
		return 1;
Packit ae9e2a
Packit ae9e2a
	while (scan < end) {
Packit ae9e2a
		unsigned char c = *scan++;
Packit ae9e2a
Packit ae9e2a
		/* Printable characters are those above SPACE (0x1F) excluding DEL,
Packit ae9e2a
		 * and including BS, ESC and FF.
Packit ae9e2a
		 */
Packit ae9e2a
		if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014')
Packit ae9e2a
			printable++;
Packit ae9e2a
		else if (c == '\0')
Packit ae9e2a
			return true;
Packit ae9e2a
		else if (!git__isspace(c))
Packit ae9e2a
			nonprintable++;
Packit ae9e2a
	}
Packit ae9e2a
Packit ae9e2a
	return ((printable >> 7) < nonprintable);
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
bool git_buf_text_contains_nul(const git_buf *buf)
Packit ae9e2a
{
Packit ae9e2a
	return (memchr(buf->ptr, '\0', buf->size) != NULL);
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset)
Packit ae9e2a
{
Packit ae9e2a
	const char *ptr;
Packit ae9e2a
	size_t len;
Packit ae9e2a
Packit ae9e2a
	*bom = GIT_BOM_NONE;
Packit ae9e2a
	/* need at least 2 bytes after offset to look for any BOM */
Packit ae9e2a
	if (buf->size < offset + 2)
Packit ae9e2a
		return 0;
Packit ae9e2a
Packit ae9e2a
	ptr = buf->ptr + offset;
Packit ae9e2a
	len = buf->size - offset;
Packit ae9e2a
Packit ae9e2a
	switch (*ptr++) {
Packit ae9e2a
	case 0:
Packit ae9e2a
		if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') {
Packit ae9e2a
			*bom = GIT_BOM_UTF32_BE;
Packit ae9e2a
			return 4;
Packit ae9e2a
		}
Packit ae9e2a
		break;
Packit ae9e2a
	case '\xEF':
Packit ae9e2a
		if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') {
Packit ae9e2a
			*bom = GIT_BOM_UTF8;
Packit ae9e2a
			return 3;
Packit ae9e2a
		}
Packit ae9e2a
		break;
Packit ae9e2a
	case '\xFE':
Packit ae9e2a
		if (*ptr == '\xFF') {
Packit ae9e2a
			*bom = GIT_BOM_UTF16_BE;
Packit ae9e2a
			return 2;
Packit ae9e2a
		}
Packit ae9e2a
		break;
Packit ae9e2a
	case '\xFF':
Packit ae9e2a
		if (*ptr != '\xFE')
Packit ae9e2a
			break;
Packit ae9e2a
		if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) {
Packit ae9e2a
			*bom = GIT_BOM_UTF32_LE;
Packit ae9e2a
			return 4;
Packit ae9e2a
		} else {
Packit ae9e2a
			*bom = GIT_BOM_UTF16_LE;
Packit ae9e2a
			return 2;
Packit ae9e2a
		}
Packit ae9e2a
		break;
Packit ae9e2a
	default:
Packit ae9e2a
		break;
Packit ae9e2a
	}
Packit ae9e2a
Packit ae9e2a
	return 0;
Packit ae9e2a
}
Packit ae9e2a
Packit ae9e2a
bool git_buf_text_gather_stats(
Packit ae9e2a
	git_buf_text_stats *stats, const git_buf *buf, bool skip_bom)
Packit ae9e2a
{
Packit ae9e2a
	const char *scan = buf->ptr, *end = buf->ptr + buf->size;
Packit ae9e2a
	int skip;
Packit ae9e2a
Packit ae9e2a
	memset(stats, 0, sizeof(*stats));
Packit ae9e2a
Packit ae9e2a
	/* BOM detection */
Packit ae9e2a
	skip = git_buf_text_detect_bom(&stats->bom, buf, 0);
Packit ae9e2a
	if (skip_bom)
Packit ae9e2a
		scan += skip;
Packit ae9e2a
Packit ae9e2a
	/* Ignore EOF character */
Packit ae9e2a
	if (buf->size > 0 && end[-1] == '\032')
Packit ae9e2a
		end--;
Packit ae9e2a
Packit ae9e2a
	/* Counting loop */
Packit ae9e2a
	while (scan < end) {
Packit ae9e2a
		unsigned char c = *scan++;
Packit ae9e2a
Packit ae9e2a
		if (c > 0x1F && c != 0x7F)
Packit ae9e2a
			stats->printable++;
Packit ae9e2a
		else switch (c) {
Packit ae9e2a
			case '\0':
Packit ae9e2a
				stats->nul++;
Packit ae9e2a
				stats->nonprintable++;
Packit ae9e2a
				break;
Packit ae9e2a
			case '\n':
Packit ae9e2a
				stats->lf++;
Packit ae9e2a
				break;
Packit ae9e2a
			case '\r':
Packit ae9e2a
				stats->cr++;
Packit ae9e2a
				if (scan < end && *scan == '\n')
Packit ae9e2a
					stats->crlf++;
Packit ae9e2a
				break;
Packit ae9e2a
			case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/
Packit ae9e2a
				stats->printable++;
Packit ae9e2a
				break;
Packit ae9e2a
			default:
Packit ae9e2a
				stats->nonprintable++;
Packit ae9e2a
				break;
Packit ae9e2a
			}
Packit ae9e2a
	}
Packit ae9e2a
Packit ae9e2a
	return (stats->nul > 0 ||
Packit ae9e2a
		((stats->printable >> 7) < stats->nonprintable));
Packit ae9e2a
}