/**
* @file oval_glob_to_regex.c
* \brief Open Vulnerability and Assessment Language
*
* See more details at http://oval.mitre.org/
*/
/*
* Copyright 2015 Red Hat Inc., Durham, North Carolina.
* All Rights Reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors:
* "Jan Černý" <jcerny@redhat.com>
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdlib.h>
#include "oval_glob_to_regex.h"
#include "common/oscap_string.h"
#include "common/debug_priv.h"
typedef enum {
START,
NORMAL,
LEFT_BRACKET,
CLASS,
ESCAPE,
SLASH
} states;
char *oval_glob_to_regex (const char *glob, int noescape)
{
struct oscap_string *regex;
char * result;
char c;
int i = 0;
int state = START;
regex = oscap_string_new();
if (regex == NULL) {
return NULL;
}
oscap_string_append_char(regex, '^'); // regex must match whole string
while(1) {
c = glob[i++];
switch (state) {
case START:
if (c == '\0') {
goto finish;
} else if (c== '/') {
oscap_string_append_char(regex, c);
state = SLASH;
} else if (c == '?') {
// a ? matches any single character, but
// a ? at the begining of glob pattern can't match a .
// a ? never matches a / (see man 7 glob - Pathnames)
oscap_string_append_string(regex, "[^./]");
state = NORMAL;
} else if (c == '*') {
// a * matches any string, but
// a * at the begining of glob pattern can't match a .
// a * never matches a / (see man 7 glob - Pathnames)
oscap_string_append_string(regex, "(?=[^.])[^/]*");
state = NORMAL;
} else if (c == '.' || c == '|' || c == '^' || c == '(' || c == ')'
|| c == '{' || c == '}' || c == '+' || c == '$') {
oscap_string_append_char(regex, '\\');
oscap_string_append_char(regex, c);
state = NORMAL;
} else if (c == '[') {
oscap_string_append_char(regex,'[');
state = LEFT_BRACKET;
} else if (c == '\\') {
if (noescape) {
oscap_string_append_char(regex, '\\');
oscap_string_append_char(regex, '\\');
state = NORMAL;
} else {
state = ESCAPE;
}
} else {
oscap_string_append_char(regex, c);
state = NORMAL;
}
break;
case NORMAL:
if (c == '\0') {
goto finish;
} else if (c== '/') {
oscap_string_append_char(regex, c);
state = SLASH;
} else if (c == '?') {
// a ? matches any single character, but
// it can never match a /
oscap_string_append_string(regex, "[^/]");
} else if (c == '*') {
// a * matches any string, but
// it can never match a /
oscap_string_append_string(regex, "[^/]*");
} else if (c == '.' || c == '|' || c == '^' || c == '(' || c == ')'
|| c == '{' || c == '}' || c == '+' || c == '$' ) {
oscap_string_append_char(regex, '\\');
oscap_string_append_char(regex, c);
} else if (c == '[') {
oscap_string_append_char(regex,'[');
state = LEFT_BRACKET;
} else if (c == '\\') {
if (noescape) {
oscap_string_append_char(regex, '\\');
oscap_string_append_char(regex, '\\');
} else {
state = ESCAPE;
}
} else {
oscap_string_append_char(regex, c);
}
break;
case LEFT_BRACKET:
if (c == '!') {
oscap_string_append_char(regex, '^');
} else if (c == '\0') {
dE("Can't convert glob '%s' to regular expression. Expecting ']' at the end of glob.", glob);
oscap_string_free(regex);
return NULL;
} else {
oscap_string_append_char(regex, c);
}
state = CLASS;
break;
case CLASS:
if (c == '\\') {
if (noescape) {
oscap_string_append_char(regex, '\\');
}
} else if (c == ']') {
state = NORMAL;
} else if (c == '\0') {
dE("Can't convert glob '%s' to regular expression. Expecting ']' at the end of glob.", glob);
oscap_string_free(regex);
return NULL;
}
oscap_string_append_char(regex, c);
break;
case ESCAPE:
// ?, *, [ and ] are special characters, they must be escaped in glob.
// A backslash is treated as an escape character only for these characters.
// For all other characters the backslash is just an ordinary character.
// Other characters, that are special in perl's regex,
// are not special in a glob.
if (c == '?' || c == '*' || c == '[' || c == ']') {
oscap_string_append_char(regex, '\\');
oscap_string_append_char(regex, c);
} else if (c == '\0') {
oscap_string_append_char(regex, '\\');
oscap_string_append_char(regex, '\\');
goto finish;
} else {
oscap_string_append_char(regex, '\\');
oscap_string_append_char(regex, '\\');
oscap_string_append_char(regex, c);
}
state = NORMAL;
break;
case SLASH:
if (c == '\0') {
goto finish;
} else if (c == '?') {
// a ? matches any single character, but
// a ? at the begining of glob pattern can't match a .
// a ? never matches a / (see man 7 glob - Pathnames)
oscap_string_append_string(regex, "[^./]");
state = NORMAL;
} else if (c == '*') {
// a * matches any string, but
// a * at the begining of glob pattern can't match a .
// a * never matches a / (see man 7 glob - Pathnames)
oscap_string_append_string(regex, "(?=[^.])[^/]*");
state = NORMAL;
} else if (c == '.' || c == '|' || c == '^' || c == '(' || c == ')'
|| c == '{' || c == '}' || c == '+' || c == '$' ) {
oscap_string_append_char(regex, '\\');
oscap_string_append_char(regex, c);
state = NORMAL;
} else if (c == '[') {
state = LEFT_BRACKET;
} else if (c == '\\') {
if (noescape) {
oscap_string_append_char(regex, '\\');
oscap_string_append_char(regex, '\\');
state = NORMAL;
} else {
state = ESCAPE;
}
} else {
oscap_string_append_char(regex, c);
state = NORMAL;
}
break;
default:
break;
}
}
finish:
oscap_string_append_char(regex, '$'); // regex must match only whole string
result = oscap_strdup(oscap_string_get_cstr(regex));
oscap_string_free(regex);
return result;
}