/* * Copyright (C) the libgit2 contributors. All rights reserved. * * This file is part of libgit2, distributed under the GNU GPL v2 with * a Linking Exception. For full terms see the included COPYING file. */ #include "buf_text.h" int git_buf_text_puts_escaped( git_buf *buf, const char *string, const char *esc_chars, const char *esc_with) { const char *scan; size_t total = 0, esc_len = strlen(esc_with), count, alloclen; if (!string) return 0; for (scan = string; *scan; ) { /* count run of non-escaped characters */ count = strcspn(scan, esc_chars); total += count; scan += count; /* count run of escaped characters */ count = strspn(scan, esc_chars); total += count * (esc_len + 1); scan += count; } GITERR_CHECK_ALLOC_ADD(&alloclen, total, 1); if (git_buf_grow_by(buf, alloclen) < 0) return -1; for (scan = string; *scan; ) { count = strcspn(scan, esc_chars); memmove(buf->ptr + buf->size, scan, count); scan += count; buf->size += count; for (count = strspn(scan, esc_chars); count > 0; --count) { /* copy escape sequence */ memmove(buf->ptr + buf->size, esc_with, esc_len); buf->size += esc_len; /* copy character to be escaped */ buf->ptr[buf->size] = *scan; buf->size++; scan++; } } buf->ptr[buf->size] = '\0'; return 0; } void git_buf_text_unescape(git_buf *buf) { buf->size = git__unescape(buf->ptr); } int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src) { const char *scan = src->ptr; const char *scan_end = src->ptr + src->size; const char *next = memchr(scan, '\r', src->size); size_t new_size; char *out; assert(tgt != src); if (!next) return git_buf_set(tgt, src->ptr, src->size); /* reduce reallocs while in the loop */ GITERR_CHECK_ALLOC_ADD(&new_size, src->size, 1); if (git_buf_grow(tgt, new_size) < 0) return -1; out = tgt->ptr; tgt->size = 0; /* Find the next \r and copy whole chunk up to there to tgt */ for (; next; scan = next + 1, next = memchr(scan, '\r', scan_end - scan)) { if (next > scan) { size_t copylen = (size_t)(next - scan); memcpy(out, scan, copylen); out += copylen; } /* Do not drop \r unless it is followed by \n */ if (next + 1 == scan_end || next[1] != '\n') *out++ = '\r'; } /* Copy remaining input into dest */ if (scan < scan_end) { size_t remaining = (size_t)(scan_end - scan); memcpy(out, scan, remaining); out += remaining; } tgt->size = (size_t)(out - tgt->ptr); tgt->ptr[tgt->size] = '\0'; return 0; } int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src) { const char *start = src->ptr; const char *end = start + src->size; const char *scan = start; const char *next = memchr(scan, '\n', src->size); size_t alloclen; assert(tgt != src); if (!next) return git_buf_set(tgt, src->ptr, src->size); /* attempt to reduce reallocs while in the loop */ GITERR_CHECK_ALLOC_ADD(&alloclen, src->size, src->size >> 4); GITERR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1); if (git_buf_grow(tgt, alloclen) < 0) return -1; tgt->size = 0; for (; next; scan = next + 1, next = memchr(scan, '\n', end - scan)) { size_t copylen = next - scan; /* if we find mixed line endings, carry on */ if (copylen && next[-1] == '\r') copylen--; GITERR_CHECK_ALLOC_ADD(&alloclen, copylen, 3); if (git_buf_grow_by(tgt, alloclen) < 0) return -1; if (copylen) { memcpy(tgt->ptr + tgt->size, scan, copylen); tgt->size += copylen; } tgt->ptr[tgt->size++] = '\r'; tgt->ptr[tgt->size++] = '\n'; } tgt->ptr[tgt->size] = '\0'; return git_buf_put(tgt, scan, end - scan); } int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings) { size_t i; const char *str, *pfx; git_buf_clear(buf); if (!strings || !strings->count) return 0; /* initialize common prefix to first string */ if (git_buf_sets(buf, strings->strings[0]) < 0) return -1; /* go through the rest of the strings, truncating to shared prefix */ for (i = 1; i < strings->count; ++i) { for (str = strings->strings[i], pfx = buf->ptr; *str && *str == *pfx; str++, pfx++) /* scanning */; git_buf_truncate(buf, pfx - buf->ptr); if (!buf->size) break; } return 0; } bool git_buf_text_is_binary(const git_buf *buf) { const char *scan = buf->ptr, *end = buf->ptr + buf->size; git_bom_t bom; int printable = 0, nonprintable = 0; scan += git_buf_text_detect_bom(&bom, buf, 0); if (bom > GIT_BOM_UTF8) return 1; while (scan < end) { unsigned char c = *scan++; /* Printable characters are those above SPACE (0x1F) excluding DEL, * and including BS, ESC and FF. */ if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014') printable++; else if (c == '\0') return true; else if (!git__isspace(c)) nonprintable++; } return ((printable >> 7) < nonprintable); } bool git_buf_text_contains_nul(const git_buf *buf) { return (memchr(buf->ptr, '\0', buf->size) != NULL); } int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset) { const char *ptr; size_t len; *bom = GIT_BOM_NONE; /* need at least 2 bytes after offset to look for any BOM */ if (buf->size < offset + 2) return 0; ptr = buf->ptr + offset; len = buf->size - offset; switch (*ptr++) { case 0: if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') { *bom = GIT_BOM_UTF32_BE; return 4; } break; case '\xEF': if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') { *bom = GIT_BOM_UTF8; return 3; } break; case '\xFE': if (*ptr == '\xFF') { *bom = GIT_BOM_UTF16_BE; return 2; } break; case '\xFF': if (*ptr != '\xFE') break; if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) { *bom = GIT_BOM_UTF32_LE; return 4; } else { *bom = GIT_BOM_UTF16_LE; return 2; } break; default: break; } return 0; } bool git_buf_text_gather_stats( git_buf_text_stats *stats, const git_buf *buf, bool skip_bom) { const char *scan = buf->ptr, *end = buf->ptr + buf->size; int skip; memset(stats, 0, sizeof(*stats)); /* BOM detection */ skip = git_buf_text_detect_bom(&stats->bom, buf, 0); if (skip_bom) scan += skip; /* Ignore EOF character */ if (buf->size > 0 && end[-1] == '\032') end--; /* Counting loop */ while (scan < end) { unsigned char c = *scan++; if (c > 0x1F && c != 0x7F) stats->printable++; else switch (c) { case '\0': stats->nul++; stats->nonprintable++; break; case '\n': stats->lf++; break; case '\r': stats->cr++; if (scan < end && *scan == '\n') stats->crlf++; break; case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/ stats->printable++; break; default: stats->nonprintable++; break; } } return (stats->nul > 0 || ((stats->printable >> 7) < stats->nonprintable)); }