Blame gnulib-tests/test-regex.c

Packit 33f14e
/* Test regular expressions
Packit 33f14e
   Copyright 1996-2001, 2003-2017 Free Software Foundation, Inc.
Packit 33f14e
Packit 33f14e
   This program is free software: you can redistribute it and/or modify
Packit 33f14e
   it under the terms of the GNU General Public License as published by
Packit 33f14e
   the Free Software Foundation; either version 3 of the License, or
Packit 33f14e
   (at your option) any later version.
Packit 33f14e
Packit 33f14e
   This program is distributed in the hope that it will be useful,
Packit 33f14e
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 33f14e
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 33f14e
   GNU General Public License for more details.
Packit 33f14e
Packit 33f14e
   You should have received a copy of the GNU General Public License
Packit 33f14e
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
Packit 33f14e
Packit 33f14e
#include <config.h>
Packit 33f14e
Packit 33f14e
#include "regex.h"
Packit 33f14e
Packit 33f14e
#include <locale.h>
Packit 33f14e
#include <limits.h>
Packit 33f14e
#include <stdlib.h>
Packit 33f14e
#include <string.h>
Packit 33f14e
#if HAVE_DECL_ALARM
Packit 33f14e
# include <unistd.h>
Packit 33f14e
# include <signal.h>
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
#include "localcharset.h"
Packit 33f14e
Packit 33f14e
int
Packit 33f14e
main (void)
Packit 33f14e
{
Packit 33f14e
  int result = 0;
Packit 33f14e
  static struct re_pattern_buffer regex;
Packit 33f14e
  unsigned char folded_chars[UCHAR_MAX + 1];
Packit 33f14e
  int i;
Packit 33f14e
  const char *s;
Packit 33f14e
  struct re_registers regs;
Packit 33f14e
Packit 33f14e
#if HAVE_DECL_ALARM
Packit 33f14e
  /* Some builds of glibc go into an infinite loop on this test.  */
Packit 33f14e
  int alarm_value = 2;
Packit 33f14e
  signal (SIGALRM, SIG_DFL);
Packit 33f14e
  alarm (alarm_value);
Packit 33f14e
#endif
Packit 33f14e
  if (setlocale (LC_ALL, "en_US.UTF-8"))
Packit 33f14e
    {
Packit 33f14e
      {
Packit 33f14e
        /* http://sourceware.org/ml/libc-hacker/2006-09/msg00008.html
Packit 33f14e
           This test needs valgrind to catch the bug on Debian
Packit 33f14e
           GNU/Linux 3.1 x86, but it might catch the bug better
Packit 33f14e
           on other platforms and it shouldn't hurt to try the
Packit 33f14e
           test here.  */
Packit 33f14e
        static char const pat[] = "insert into";
Packit 33f14e
        static char const data[] =
Packit 33f14e
          "\xFF\0\x12\xA2\xAA\xC4\xB1,K\x12\xC4\xB1*\xACK";
Packit 33f14e
        re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE
Packit 33f14e
                       | RE_ICASE);
Packit 33f14e
        memset (&regex, 0, sizeof regex);
Packit 33f14e
        s = re_compile_pattern (pat, sizeof pat - 1, &regex);
Packit 33f14e
        if (s)
Packit 33f14e
          result |= 1;
Packit 33f14e
        else
Packit 33f14e
          {
Packit 33f14e
            memset (&regs, 0, sizeof regs);
Packit 33f14e
            if (re_search (&regex, data, sizeof data - 1,
Packit 33f14e
                           0, sizeof data - 1, &regs)
Packit 33f14e
                != -1)
Packit 33f14e
              result |= 1;
Packit 33f14e
            regfree (&regex);
Packit 33f14e
            free (regs.start);
Packit 33f14e
            free (regs.end);
Packit 33f14e
          }
Packit 33f14e
      }
Packit 33f14e
Packit 33f14e
      /* Check whether it's really a UTF-8 locale.
Packit 33f14e
         On mingw, the setlocale call succeeds but returns
Packit 33f14e
         "English_United States.1252", with locale_charset() returning
Packit 33f14e
         "CP1252".  */
Packit 33f14e
      if (strcmp (locale_charset (), "UTF-8") == 0)
Packit 33f14e
        {
Packit 33f14e
          /* This test is from glibc bug 15078.
Packit 33f14e
             The test case is from Andreas Schwab in
Packit 33f14e
             <http://www.sourceware.org/ml/libc-alpha/2013-01/msg00967.html>.
Packit 33f14e
          */
Packit 33f14e
          static char const pat[] = "[^x]x";
Packit 33f14e
          static char const data[] =
Packit 33f14e
            /* <U1000><U103B><U103D><U1014><U103A><U102F><U1015><U103A> */
Packit 33f14e
            "\xe1\x80\x80"
Packit 33f14e
            "\xe1\x80\xbb"
Packit 33f14e
            "\xe1\x80\xbd"
Packit 33f14e
            "\xe1\x80\x94"
Packit 33f14e
            "\xe1\x80\xba"
Packit 33f14e
            "\xe1\x80\xaf"
Packit 33f14e
            "\xe1\x80\x95"
Packit 33f14e
            "\xe1\x80\xba"
Packit 33f14e
            "x";
Packit 33f14e
          re_set_syntax (0);
Packit 33f14e
          memset (&regex, 0, sizeof regex);
Packit 33f14e
          s = re_compile_pattern (pat, sizeof pat - 1, &regex);
Packit 33f14e
          if (s)
Packit 33f14e
            result |= 1;
Packit 33f14e
          else
Packit 33f14e
            {
Packit 33f14e
              memset (&regs, 0, sizeof regs);
Packit 33f14e
              i = re_search (&regex, data, sizeof data - 1,
Packit 33f14e
                             0, sizeof data - 1, 0);
Packit 33f14e
              if (i != 0 && i != 21)
Packit 33f14e
                result |= 1;
Packit 33f14e
              regfree (&regex);
Packit 33f14e
              free (regs.start);
Packit 33f14e
              free (regs.end);
Packit 33f14e
            }
Packit 33f14e
        }
Packit 33f14e
Packit 33f14e
      if (! setlocale (LC_ALL, "C"))
Packit 33f14e
        return 1;
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  /* This test is from glibc bug 3957, reported by Andrew Mackey.  */
Packit 33f14e
  re_set_syntax (RE_SYNTAX_EGREP | RE_HAT_LISTS_NOT_NEWLINE);
Packit 33f14e
  memset (&regex, 0, sizeof regex);
Packit 33f14e
  s = re_compile_pattern ("a[^x]b", 6, &regex);
Packit 33f14e
  if (s)
Packit 33f14e
    result |= 2;
Packit 33f14e
  /* This should fail, but succeeds for glibc-2.5.  */
Packit 33f14e
  else
Packit 33f14e
    {
Packit 33f14e
      memset (&regs, 0, sizeof regs);
Packit 33f14e
      if (re_search (&regex, "a\nb", 3, 0, 3, &regs) != -1)
Packit 33f14e
        result |= 2;
Packit 33f14e
      regfree (&regex);
Packit 33f14e
      free (regs.start);
Packit 33f14e
      free (regs.end);
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  /* This regular expression is from Spencer ere test number 75
Packit 33f14e
     in grep-2.3.  */
Packit 33f14e
  re_set_syntax (RE_SYNTAX_POSIX_EGREP);
Packit 33f14e
  memset (&regex, 0, sizeof regex);
Packit 33f14e
  for (i = 0; i <= UCHAR_MAX; i++)
Packit 33f14e
    folded_chars[i] = i;
Packit 33f14e
  regex.translate = folded_chars;
Packit 33f14e
  s = re_compile_pattern ("a[[:@:>@:]]b\n", 11, &regex);
Packit 33f14e
  /* This should fail with _Invalid character class name_ error.  */
Packit 33f14e
  if (!s)
Packit 33f14e
    {
Packit 33f14e
      result |= 4;
Packit 33f14e
      regfree (&regex);
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  /* Ensure that [b-a] is diagnosed as invalid, when
Packit 33f14e
     using RE_NO_EMPTY_RANGES. */
Packit 33f14e
  re_set_syntax (RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES);
Packit 33f14e
  memset (&regex, 0, sizeof regex);
Packit 33f14e
  s = re_compile_pattern ("a[b-a]", 6, &regex);
Packit 33f14e
  if (s == 0)
Packit 33f14e
    {
Packit 33f14e
      result |= 8;
Packit 33f14e
      regfree (&regex);
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  /* This should succeed, but does not for glibc-2.1.3.  */
Packit 33f14e
  memset (&regex, 0, sizeof regex);
Packit 33f14e
  s = re_compile_pattern ("{1", 2, &regex);
Packit 33f14e
  if (s)
Packit 33f14e
    result |= 8;
Packit 33f14e
  else
Packit 33f14e
    regfree (&regex);
Packit 33f14e
Packit 33f14e
  /* The following example is derived from a problem report
Packit 33f14e
     against gawk from Jorge Stolfi <stolfi@ic.unicamp.br>.  */
Packit 33f14e
  memset (&regex, 0, sizeof regex);
Packit 33f14e
  s = re_compile_pattern ("[an\371]*n", 7, &regex);
Packit 33f14e
  if (s)
Packit 33f14e
    result |= 8;
Packit 33f14e
  /* This should match, but does not for glibc-2.2.1.  */
Packit 33f14e
  else
Packit 33f14e
    {
Packit 33f14e
      memset (&regs, 0, sizeof regs);
Packit 33f14e
      if (re_match (&regex, "an", 2, 0, &regs) != 2)
Packit 33f14e
        result |= 8;
Packit 33f14e
      regfree (&regex);
Packit 33f14e
      free (regs.start);
Packit 33f14e
      free (regs.end);
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  memset (&regex, 0, sizeof regex);
Packit 33f14e
  s = re_compile_pattern ("x", 1, &regex);
Packit 33f14e
  if (s)
Packit 33f14e
    result |= 8;
Packit 33f14e
  /* glibc-2.2.93 does not work with a negative RANGE argument.  */
Packit 33f14e
  else
Packit 33f14e
    {
Packit 33f14e
      memset (&regs, 0, sizeof regs);
Packit 33f14e
      if (re_search (&regex, "wxy", 3, 2, -2, &regs) != 1)
Packit 33f14e
        result |= 8;
Packit 33f14e
      regfree (&regex);
Packit 33f14e
      free (regs.start);
Packit 33f14e
      free (regs.end);
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  /* The version of regex.c in older versions of gnulib
Packit 33f14e
     ignored RE_ICASE.  Detect that problem too.  */
Packit 33f14e
  re_set_syntax (RE_SYNTAX_EMACS | RE_ICASE);
Packit 33f14e
  memset (&regex, 0, sizeof regex);
Packit 33f14e
  s = re_compile_pattern ("x", 1, &regex);
Packit 33f14e
  if (s)
Packit 33f14e
    result |= 16;
Packit 33f14e
  else
Packit 33f14e
    {
Packit 33f14e
      memset (&regs, 0, sizeof regs);
Packit 33f14e
      if (re_search (&regex, "WXY", 3, 0, 3, &regs) < 0)
Packit 33f14e
        result |= 16;
Packit 33f14e
      regfree (&regex);
Packit 33f14e
      free (regs.start);
Packit 33f14e
      free (regs.end);
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  /* Catch a bug reported by Vin Shelton in
Packit 33f14e
     http://lists.gnu.org/archive/html/bug-coreutils/2007-06/msg00089.html
Packit 33f14e
     */
Packit 33f14e
  re_set_syntax (RE_SYNTAX_POSIX_BASIC
Packit 33f14e
                 & ~RE_CONTEXT_INVALID_DUP
Packit 33f14e
                 & ~RE_NO_EMPTY_RANGES);
Packit 33f14e
  memset (&regex, 0, sizeof regex);
Packit 33f14e
  s = re_compile_pattern ("[[:alnum:]_-]\\\\+$", 16, &regex);
Packit 33f14e
  if (s)
Packit 33f14e
    result |= 32;
Packit 33f14e
  else
Packit 33f14e
    regfree (&regex);
Packit 33f14e
Packit 33f14e
  /* REG_STARTEND was added to glibc on 2004-01-15.
Packit 33f14e
     Reject older versions.  */
Packit 33f14e
  if (! REG_STARTEND)
Packit 33f14e
    result |= 64;
Packit 33f14e
Packit 33f14e
#if 0
Packit 33f14e
  /* It would be nice to reject hosts whose regoff_t values are too
Packit 33f14e
     narrow (including glibc on hosts with 64-bit ptrdiff_t and
Packit 33f14e
     32-bit int), but we should wait until glibc implements this
Packit 33f14e
     feature.  Otherwise, support for equivalence classes and
Packit 33f14e
     multibyte collation symbols would always be broken except
Packit 33f14e
     when compiling --without-included-regex.   */
Packit 33f14e
  if (sizeof (regoff_t) < sizeof (ptrdiff_t)
Packit 33f14e
      || sizeof (regoff_t) < sizeof (ssize_t))
Packit 33f14e
    result |= 64;
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
  return result;
Packit 33f14e
}