Blame libarchive/archive_pathmatch.c

Packit 08bd4c
/*-
Packit 08bd4c
 * Copyright (c) 2003-2007 Tim Kientzle
Packit 08bd4c
 * All rights reserved.
Packit 08bd4c
 *
Packit 08bd4c
 * Redistribution and use in source and binary forms, with or without
Packit 08bd4c
 * modification, are permitted provided that the following conditions
Packit 08bd4c
 * are met:
Packit 08bd4c
 * 1. Redistributions of source code must retain the above copyright
Packit 08bd4c
 *    notice, this list of conditions and the following disclaimer
Packit 08bd4c
 *    in this position and unchanged.
Packit 08bd4c
 * 2. Redistributions in binary form must reproduce the above copyright
Packit 08bd4c
 *    notice, this list of conditions and the following disclaimer in the
Packit 08bd4c
 *    documentation and/or other materials provided with the distribution.
Packit 08bd4c
 *
Packit 08bd4c
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
Packit 08bd4c
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
Packit 08bd4c
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
Packit 08bd4c
 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
Packit 08bd4c
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
Packit 08bd4c
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
Packit 08bd4c
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
Packit 08bd4c
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
Packit 08bd4c
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
Packit 08bd4c
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Packit 08bd4c
 */
Packit 08bd4c
Packit 08bd4c
#include "archive_platform.h"
Packit 08bd4c
__FBSDID("$FreeBSD$");
Packit 08bd4c
Packit 08bd4c
#ifdef HAVE_STRING_H
Packit 08bd4c
#include <string.h>
Packit 08bd4c
#endif
Packit 08bd4c
#ifdef HAVE_WCHAR_H
Packit 08bd4c
#include <wchar.h>
Packit 08bd4c
#endif
Packit 08bd4c
Packit 08bd4c
#include "archive_pathmatch.h"
Packit 08bd4c
Packit 08bd4c
/*
Packit 08bd4c
 * Check whether a character 'c' is matched by a list specification [...]:
Packit 08bd4c
 *    * Leading '!' or '^' negates the class.
Packit 08bd4c
 *    * <char>-<char> is a range of characters
Packit 08bd4c
 *    * \<char> removes any special meaning for <char>
Packit 08bd4c
 *
Packit 08bd4c
 * Some interesting boundary cases:
Packit 08bd4c
 *   a-d-e is one range (a-d) followed by two single characters - and e.
Packit 08bd4c
 *   \a-\d is same as a-d
Packit 08bd4c
 *   a\-d is three single characters: a, d, -
Packit 08bd4c
 *   Trailing - is not special (so [a-] is two characters a and -).
Packit 08bd4c
 *   Initial - is not special ([a-] is same as [-a] is same as [\\-a])
Packit 08bd4c
 *   This function never sees a trailing \.
Packit 08bd4c
 *   [] always fails
Packit 08bd4c
 *   [!] always succeeds
Packit 08bd4c
 */
Packit 08bd4c
static int
Packit 08bd4c
pm_list(const char *start, const char *end, const char c, int flags)
Packit 08bd4c
{
Packit 08bd4c
	const char *p = start;
Packit 08bd4c
	char rangeStart = '\0', nextRangeStart;
Packit 08bd4c
	int match = 1, nomatch = 0;
Packit 08bd4c
Packit 08bd4c
	/* This will be used soon... */
Packit 08bd4c
	(void)flags; /* UNUSED */
Packit 08bd4c
Packit 08bd4c
	/* If this is a negated class, return success for nomatch. */
Packit 08bd4c
	if ((*p == '!' || *p == '^') && p < end) {
Packit 08bd4c
		match = 0;
Packit 08bd4c
		nomatch = 1;
Packit 08bd4c
		++p;
Packit 08bd4c
	}
Packit 08bd4c
Packit 08bd4c
	while (p < end) {
Packit 08bd4c
		nextRangeStart = '\0';
Packit 08bd4c
		switch (*p) {
Packit 08bd4c
		case '-':
Packit 08bd4c
			/* Trailing or initial '-' is not special. */
Packit 08bd4c
			if ((rangeStart == '\0') || (p == end - 1)) {
Packit 08bd4c
				if (*p == c)
Packit 08bd4c
					return (match);
Packit 08bd4c
			} else {
Packit 08bd4c
				char rangeEnd = *++p;
Packit 08bd4c
				if (rangeEnd == '\\')
Packit 08bd4c
					rangeEnd = *++p;
Packit 08bd4c
				if ((rangeStart <= c) && (c <= rangeEnd))
Packit 08bd4c
					return (match);
Packit 08bd4c
			}
Packit 08bd4c
			break;
Packit 08bd4c
		case '\\':
Packit 08bd4c
			++p;
Packit 08bd4c
			/* Fall through */
Packit 08bd4c
		default:
Packit 08bd4c
			if (*p == c)
Packit 08bd4c
				return (match);
Packit 08bd4c
			nextRangeStart = *p; /* Possible start of range. */
Packit 08bd4c
		}
Packit 08bd4c
		rangeStart = nextRangeStart;
Packit 08bd4c
		++p;
Packit 08bd4c
	}
Packit 08bd4c
	return (nomatch);
Packit 08bd4c
}
Packit 08bd4c
Packit 08bd4c
static int
Packit 08bd4c
pm_list_w(const wchar_t *start, const wchar_t *end, const wchar_t c, int flags)
Packit 08bd4c
{
Packit 08bd4c
	const wchar_t *p = start;
Packit 08bd4c
	wchar_t rangeStart = L'\0', nextRangeStart;
Packit 08bd4c
	int match = 1, nomatch = 0;
Packit 08bd4c
Packit 08bd4c
	/* This will be used soon... */
Packit 08bd4c
	(void)flags; /* UNUSED */
Packit 08bd4c
Packit 08bd4c
	/* If this is a negated class, return success for nomatch. */
Packit 08bd4c
	if ((*p == L'!' || *p == L'^') && p < end) {
Packit 08bd4c
		match = 0;
Packit 08bd4c
		nomatch = 1;
Packit 08bd4c
		++p;
Packit 08bd4c
	}
Packit 08bd4c
Packit 08bd4c
	while (p < end) {
Packit 08bd4c
		nextRangeStart = L'\0';
Packit 08bd4c
		switch (*p) {
Packit 08bd4c
		case L'-':
Packit 08bd4c
			/* Trailing or initial '-' is not special. */
Packit 08bd4c
			if ((rangeStart == L'\0') || (p == end - 1)) {
Packit 08bd4c
				if (*p == c)
Packit 08bd4c
					return (match);
Packit 08bd4c
			} else {
Packit 08bd4c
				wchar_t rangeEnd = *++p;
Packit 08bd4c
				if (rangeEnd == L'\\')
Packit 08bd4c
					rangeEnd = *++p;
Packit 08bd4c
				if ((rangeStart <= c) && (c <= rangeEnd))
Packit 08bd4c
					return (match);
Packit 08bd4c
			}
Packit 08bd4c
			break;
Packit 08bd4c
		case L'\\':
Packit 08bd4c
			++p;
Packit 08bd4c
			/* Fall through */
Packit 08bd4c
		default:
Packit 08bd4c
			if (*p == c)
Packit 08bd4c
				return (match);
Packit 08bd4c
			nextRangeStart = *p; /* Possible start of range. */
Packit 08bd4c
		}
Packit 08bd4c
		rangeStart = nextRangeStart;
Packit 08bd4c
		++p;
Packit 08bd4c
	}
Packit 08bd4c
	return (nomatch);
Packit 08bd4c
}
Packit 08bd4c
Packit 08bd4c
/*
Packit 08bd4c
 * If s is pointing to "./", ".//", "./././" or the like, skip it.
Packit 08bd4c
 */
Packit 08bd4c
static const char *
Packit 08bd4c
pm_slashskip(const char *s) {
Packit 08bd4c
	while ((*s == '/')
Packit 08bd4c
	    || (s[0] == '.' && s[1] == '/')
Packit 08bd4c
	    || (s[0] == '.' && s[1] == '\0'))
Packit 08bd4c
		++s;
Packit 08bd4c
	return (s);
Packit 08bd4c
}
Packit 08bd4c
Packit 08bd4c
static const wchar_t *
Packit 08bd4c
pm_slashskip_w(const wchar_t *s) {
Packit 08bd4c
	while ((*s == L'/')
Packit 08bd4c
	    || (s[0] == L'.' && s[1] == L'/')
Packit 08bd4c
	    || (s[0] == L'.' && s[1] == L'\0'))
Packit 08bd4c
		++s;
Packit 08bd4c
	return (s);
Packit 08bd4c
}
Packit 08bd4c
Packit 08bd4c
static int
Packit 08bd4c
pm(const char *p, const char *s, int flags)
Packit 08bd4c
{
Packit 08bd4c
	const char *end;
Packit 08bd4c
Packit 08bd4c
	/*
Packit 08bd4c
	 * Ignore leading './', './/', '././', etc.
Packit 08bd4c
	 */
Packit 08bd4c
	if (s[0] == '.' && s[1] == '/')
Packit 08bd4c
		s = pm_slashskip(s + 1);
Packit 08bd4c
	if (p[0] == '.' && p[1] == '/')
Packit 08bd4c
		p = pm_slashskip(p + 1);
Packit 08bd4c
Packit 08bd4c
	for (;;) {
Packit 08bd4c
		switch (*p) {
Packit 08bd4c
		case '\0':
Packit 08bd4c
			if (s[0] == '/') {
Packit 08bd4c
				if (flags & PATHMATCH_NO_ANCHOR_END)
Packit 08bd4c
					return (1);
Packit 08bd4c
				/* "dir" == "dir/" == "dir/." */
Packit 08bd4c
				s = pm_slashskip(s);
Packit 08bd4c
			}
Packit 08bd4c
			return (*s == '\0');
Packit 08bd4c
		case '?':
Packit 08bd4c
			/* ? always succeeds, unless we hit end of 's' */
Packit 08bd4c
			if (*s == '\0')
Packit 08bd4c
				return (0);
Packit 08bd4c
			break;
Packit 08bd4c
		case '*':
Packit 08bd4c
			/* "*" == "**" == "***" ... */
Packit 08bd4c
			while (*p == '*')
Packit 08bd4c
				++p;
Packit 08bd4c
			/* Trailing '*' always succeeds. */
Packit 08bd4c
			if (*p == '\0')
Packit 08bd4c
				return (1);
Packit 08bd4c
			while (*s) {
Packit 08bd4c
				if (archive_pathmatch(p, s, flags))
Packit 08bd4c
					return (1);
Packit 08bd4c
				++s;
Packit 08bd4c
			}
Packit 08bd4c
			return (0);
Packit 08bd4c
		case '[':
Packit 08bd4c
			/*
Packit 08bd4c
			 * Find the end of the [...] character class,
Packit 08bd4c
			 * ignoring \] that might occur within the class.
Packit 08bd4c
			 */
Packit 08bd4c
			end = p + 1;
Packit 08bd4c
			while (*end != '\0' && *end != ']') {
Packit 08bd4c
				if (*end == '\\' && end[1] != '\0')
Packit 08bd4c
					++end;
Packit 08bd4c
				++end;
Packit 08bd4c
			}
Packit 08bd4c
			if (*end == ']') {
Packit 08bd4c
				/* We found [...], try to match it. */
Packit 08bd4c
				if (!pm_list(p + 1, end, *s, flags))
Packit 08bd4c
					return (0);
Packit 08bd4c
				p = end; /* Jump to trailing ']' char. */
Packit 08bd4c
				break;
Packit 08bd4c
			} else
Packit 08bd4c
				/* No final ']', so just match '['. */
Packit 08bd4c
				if (*p != *s)
Packit 08bd4c
					return (0);
Packit 08bd4c
			break;
Packit 08bd4c
		case '\\':
Packit 08bd4c
			/* Trailing '\\' matches itself. */
Packit 08bd4c
			if (p[1] == '\0') {
Packit 08bd4c
				if (*s != '\\')
Packit 08bd4c
					return (0);
Packit 08bd4c
			} else {
Packit 08bd4c
				++p;
Packit 08bd4c
				if (*p != *s)
Packit 08bd4c
					return (0);
Packit 08bd4c
			}
Packit 08bd4c
			break;
Packit 08bd4c
		case '/':
Packit 08bd4c
			if (*s != '/' && *s != '\0')
Packit 08bd4c
				return (0);
Packit 08bd4c
			/* Note: pattern "/\./" won't match "/";
Packit 08bd4c
			 * pm_slashskip() correctly stops at backslash. */
Packit 08bd4c
			p = pm_slashskip(p);
Packit 08bd4c
			s = pm_slashskip(s);
Packit 08bd4c
			if (*p == '\0' && (flags & PATHMATCH_NO_ANCHOR_END))
Packit 08bd4c
				return (1);
Packit 08bd4c
			--p; /* Counteract the increment below. */
Packit 08bd4c
			--s;
Packit 08bd4c
			break;
Packit 08bd4c
		case '$':
Packit 08bd4c
			/* '$' is special only at end of pattern and only
Packit 08bd4c
			 * if PATHMATCH_NO_ANCHOR_END is specified. */
Packit 08bd4c
			if (p[1] == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)){
Packit 08bd4c
				/* "dir" == "dir/" == "dir/." */
Packit 08bd4c
				return (*pm_slashskip(s) == '\0');
Packit 08bd4c
			}
Packit 08bd4c
			/* Otherwise, '$' is not special. */
Packit 08bd4c
			/* FALL THROUGH */
Packit 08bd4c
		default:
Packit 08bd4c
			if (*p != *s)
Packit 08bd4c
				return (0);
Packit 08bd4c
			break;
Packit 08bd4c
		}
Packit 08bd4c
		++p;
Packit 08bd4c
		++s;
Packit 08bd4c
	}
Packit 08bd4c
}
Packit 08bd4c
Packit 08bd4c
static int
Packit 08bd4c
pm_w(const wchar_t *p, const wchar_t *s, int flags)
Packit 08bd4c
{
Packit 08bd4c
	const wchar_t *end;
Packit 08bd4c
Packit 08bd4c
	/*
Packit 08bd4c
	 * Ignore leading './', './/', '././', etc.
Packit 08bd4c
	 */
Packit 08bd4c
	if (s[0] == L'.' && s[1] == L'/')
Packit 08bd4c
		s = pm_slashskip_w(s + 1);
Packit 08bd4c
	if (p[0] == L'.' && p[1] == L'/')
Packit 08bd4c
		p = pm_slashskip_w(p + 1);
Packit 08bd4c
Packit 08bd4c
	for (;;) {
Packit 08bd4c
		switch (*p) {
Packit 08bd4c
		case L'\0':
Packit 08bd4c
			if (s[0] == L'/') {
Packit 08bd4c
				if (flags & PATHMATCH_NO_ANCHOR_END)
Packit 08bd4c
					return (1);
Packit 08bd4c
				/* "dir" == "dir/" == "dir/." */
Packit 08bd4c
				s = pm_slashskip_w(s);
Packit 08bd4c
			}
Packit 08bd4c
			return (*s == L'\0');
Packit 08bd4c
		case L'?':
Packit 08bd4c
			/* ? always succeeds, unless we hit end of 's' */
Packit 08bd4c
			if (*s == L'\0')
Packit 08bd4c
				return (0);
Packit 08bd4c
			break;
Packit 08bd4c
		case L'*':
Packit 08bd4c
			/* "*" == "**" == "***" ... */
Packit 08bd4c
			while (*p == L'*')
Packit 08bd4c
				++p;
Packit 08bd4c
			/* Trailing '*' always succeeds. */
Packit 08bd4c
			if (*p == L'\0')
Packit 08bd4c
				return (1);
Packit 08bd4c
			while (*s) {
Packit 08bd4c
				if (archive_pathmatch_w(p, s, flags))
Packit 08bd4c
					return (1);
Packit 08bd4c
				++s;
Packit 08bd4c
			}
Packit 08bd4c
			return (0);
Packit 08bd4c
		case L'[':
Packit 08bd4c
			/*
Packit 08bd4c
			 * Find the end of the [...] character class,
Packit 08bd4c
			 * ignoring \] that might occur within the class.
Packit 08bd4c
			 */
Packit 08bd4c
			end = p + 1;
Packit 08bd4c
			while (*end != L'\0' && *end != L']') {
Packit 08bd4c
				if (*end == L'\\' && end[1] != L'\0')
Packit 08bd4c
					++end;
Packit 08bd4c
				++end;
Packit 08bd4c
			}
Packit 08bd4c
			if (*end == L']') {
Packit 08bd4c
				/* We found [...], try to match it. */
Packit 08bd4c
				if (!pm_list_w(p + 1, end, *s, flags))
Packit 08bd4c
					return (0);
Packit 08bd4c
				p = end; /* Jump to trailing ']' char. */
Packit 08bd4c
				break;
Packit 08bd4c
			} else
Packit 08bd4c
				/* No final ']', so just match '['. */
Packit 08bd4c
				if (*p != *s)
Packit 08bd4c
					return (0);
Packit 08bd4c
			break;
Packit 08bd4c
		case L'\\':
Packit 08bd4c
			/* Trailing '\\' matches itself. */
Packit 08bd4c
			if (p[1] == L'\0') {
Packit 08bd4c
				if (*s != L'\\')
Packit 08bd4c
					return (0);
Packit 08bd4c
			} else {
Packit 08bd4c
				++p;
Packit 08bd4c
				if (*p != *s)
Packit 08bd4c
					return (0);
Packit 08bd4c
			}
Packit 08bd4c
			break;
Packit 08bd4c
		case L'/':
Packit 08bd4c
			if (*s != L'/' && *s != L'\0')
Packit 08bd4c
				return (0);
Packit 08bd4c
			/* Note: pattern "/\./" won't match "/";
Packit 08bd4c
			 * pm_slashskip() correctly stops at backslash. */
Packit 08bd4c
			p = pm_slashskip_w(p);
Packit 08bd4c
			s = pm_slashskip_w(s);
Packit 08bd4c
			if (*p == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END))
Packit 08bd4c
				return (1);
Packit 08bd4c
			--p; /* Counteract the increment below. */
Packit 08bd4c
			--s;
Packit 08bd4c
			break;
Packit 08bd4c
		case L'$':
Packit 08bd4c
			/* '$' is special only at end of pattern and only
Packit 08bd4c
			 * if PATHMATCH_NO_ANCHOR_END is specified. */
Packit 08bd4c
			if (p[1] == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END)){
Packit 08bd4c
				/* "dir" == "dir/" == "dir/." */
Packit 08bd4c
				return (*pm_slashskip_w(s) == L'\0');
Packit 08bd4c
			}
Packit 08bd4c
			/* Otherwise, '$' is not special. */
Packit 08bd4c
			/* FALL THROUGH */
Packit 08bd4c
		default:
Packit 08bd4c
			if (*p != *s)
Packit 08bd4c
				return (0);
Packit 08bd4c
			break;
Packit 08bd4c
		}
Packit 08bd4c
		++p;
Packit 08bd4c
		++s;
Packit 08bd4c
	}
Packit 08bd4c
}
Packit 08bd4c
Packit 08bd4c
/* Main entry point. */
Packit 08bd4c
int
Packit 08bd4c
__archive_pathmatch(const char *p, const char *s, int flags)
Packit 08bd4c
{
Packit 08bd4c
	/* Empty pattern only matches the empty string. */
Packit 08bd4c
	if (p == NULL || *p == '\0')
Packit 08bd4c
		return (s == NULL || *s == '\0');
Packit 08bd4c
Packit 08bd4c
	/* Leading '^' anchors the start of the pattern. */
Packit 08bd4c
	if (*p == '^') {
Packit 08bd4c
		++p;
Packit 08bd4c
		flags &= ~PATHMATCH_NO_ANCHOR_START;
Packit 08bd4c
	}
Packit 08bd4c
Packit 08bd4c
	if (*p == '/' && *s != '/')
Packit 08bd4c
		return (0);
Packit 08bd4c
Packit 08bd4c
	/* Certain patterns anchor implicitly. */
Packit 08bd4c
	if (*p == '*' || *p == '/') {
Packit 08bd4c
		while (*p == '/')
Packit 08bd4c
			++p;
Packit 08bd4c
		while (*s == '/')
Packit 08bd4c
			++s;
Packit 08bd4c
		return (pm(p, s, flags));
Packit 08bd4c
	}
Packit 08bd4c
Packit 08bd4c
	/* If start is unanchored, try to match start of each path element. */
Packit 08bd4c
	if (flags & PATHMATCH_NO_ANCHOR_START) {
Packit 08bd4c
		for ( ; s != NULL; s = strchr(s, '/')) {
Packit 08bd4c
			if (*s == '/')
Packit 08bd4c
				s++;
Packit 08bd4c
			if (pm(p, s, flags))
Packit 08bd4c
				return (1);
Packit 08bd4c
		}
Packit 08bd4c
		return (0);
Packit 08bd4c
	}
Packit 08bd4c
Packit 08bd4c
	/* Default: Match from beginning. */
Packit 08bd4c
	return (pm(p, s, flags));
Packit 08bd4c
}
Packit 08bd4c
Packit 08bd4c
int
Packit 08bd4c
__archive_pathmatch_w(const wchar_t *p, const wchar_t *s, int flags)
Packit 08bd4c
{
Packit 08bd4c
	/* Empty pattern only matches the empty string. */
Packit 08bd4c
	if (p == NULL || *p == L'\0')
Packit 08bd4c
		return (s == NULL || *s == L'\0');
Packit 08bd4c
Packit 08bd4c
	/* Leading '^' anchors the start of the pattern. */
Packit 08bd4c
	if (*p == L'^') {
Packit 08bd4c
		++p;
Packit 08bd4c
		flags &= ~PATHMATCH_NO_ANCHOR_START;
Packit 08bd4c
	}
Packit 08bd4c
Packit 08bd4c
	if (*p == L'/' && *s != L'/')
Packit 08bd4c
		return (0);
Packit 08bd4c
Packit 08bd4c
	/* Certain patterns anchor implicitly. */
Packit 08bd4c
	if (*p == L'*' || *p == L'/') {
Packit 08bd4c
		while (*p == L'/')
Packit 08bd4c
			++p;
Packit 08bd4c
		while (*s == L'/')
Packit 08bd4c
			++s;
Packit 08bd4c
		return (pm_w(p, s, flags));
Packit 08bd4c
	}
Packit 08bd4c
Packit 08bd4c
	/* If start is unanchored, try to match start of each path element. */
Packit 08bd4c
	if (flags & PATHMATCH_NO_ANCHOR_START) {
Packit 08bd4c
		for ( ; s != NULL; s = wcschr(s, L'/')) {
Packit 08bd4c
			if (*s == L'/')
Packit 08bd4c
				s++;
Packit 08bd4c
			if (pm_w(p, s, flags))
Packit 08bd4c
				return (1);
Packit 08bd4c
		}
Packit 08bd4c
		return (0);
Packit 08bd4c
	}
Packit 08bd4c
Packit 08bd4c
	/* Default: Match from beginning. */
Packit 08bd4c
	return (pm_w(p, s, flags));
Packit 08bd4c
}