Blame gnulib-tests/test-regex.c

Packit 709fb3
/* Test regular expressions
Packit 709fb3
   Copyright 1996-2001, 2003-2017 Free Software Foundation, Inc.
Packit 709fb3
Packit 709fb3
   This program is free software: you can redistribute it and/or modify
Packit 709fb3
   it under the terms of the GNU General Public License as published by
Packit 709fb3
   the Free Software Foundation; either version 3 of the License, or
Packit 709fb3
   (at your option) any later version.
Packit 709fb3
Packit 709fb3
   This program is distributed in the hope that it will be useful,
Packit 709fb3
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 709fb3
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 709fb3
   GNU General Public License for more details.
Packit 709fb3
Packit 709fb3
   You should have received a copy of the GNU General Public License
Packit 709fb3
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
Packit 709fb3
Packit 709fb3
#include <config.h>
Packit 709fb3
Packit 709fb3
#include "regex.h"
Packit 709fb3
Packit 709fb3
#include <locale.h>
Packit 709fb3
#include <limits.h>
Packit 709fb3
#include <stdlib.h>
Packit 709fb3
#include <string.h>
Packit 709fb3
#if HAVE_DECL_ALARM
Packit 709fb3
# include <unistd.h>
Packit 709fb3
# include <signal.h>
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
#include "localcharset.h"
Packit 709fb3
Packit 709fb3
int
Packit 709fb3
main (void)
Packit 709fb3
{
Packit 709fb3
  int result = 0;
Packit 709fb3
  static struct re_pattern_buffer regex;
Packit 709fb3
  unsigned char folded_chars[UCHAR_MAX + 1];
Packit 709fb3
  int i;
Packit 709fb3
  const char *s;
Packit 709fb3
  struct re_registers regs;
Packit 709fb3
Packit 709fb3
#if HAVE_DECL_ALARM
Packit 709fb3
  /* Some builds of glibc go into an infinite loop on this test.  */
Packit 709fb3
  int alarm_value = 2;
Packit 709fb3
  signal (SIGALRM, SIG_DFL);
Packit 709fb3
  alarm (alarm_value);
Packit 709fb3
#endif
Packit 709fb3
  if (setlocale (LC_ALL, "en_US.UTF-8"))
Packit 709fb3
    {
Packit 709fb3
      {
Packit 709fb3
        /* http://sourceware.org/ml/libc-hacker/2006-09/msg00008.html
Packit 709fb3
           This test needs valgrind to catch the bug on Debian
Packit 709fb3
           GNU/Linux 3.1 x86, but it might catch the bug better
Packit 709fb3
           on other platforms and it shouldn't hurt to try the
Packit 709fb3
           test here.  */
Packit 709fb3
        static char const pat[] = "insert into";
Packit 709fb3
        static char const data[] =
Packit 709fb3
          "\xFF\0\x12\xA2\xAA\xC4\xB1,K\x12\xC4\xB1*\xACK";
Packit 709fb3
        re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE
Packit 709fb3
                       | RE_ICASE);
Packit 709fb3
        memset (&regex, 0, sizeof regex);
Packit 709fb3
        s = re_compile_pattern (pat, sizeof pat - 1, &regex);
Packit 709fb3
        if (s)
Packit 709fb3
          result |= 1;
Packit 709fb3
        else
Packit 709fb3
          {
Packit 709fb3
            memset (&regs, 0, sizeof regs);
Packit 709fb3
            if (re_search (&regex, data, sizeof data - 1,
Packit 709fb3
                           0, sizeof data - 1, &regs)
Packit 709fb3
                != -1)
Packit 709fb3
              result |= 1;
Packit 709fb3
            regfree (&regex);
Packit 709fb3
            free (regs.start);
Packit 709fb3
            free (regs.end);
Packit 709fb3
          }
Packit 709fb3
      }
Packit 709fb3
Packit 709fb3
      /* Check whether it's really a UTF-8 locale.
Packit 709fb3
         On mingw, the setlocale call succeeds but returns
Packit 709fb3
         "English_United States.1252", with locale_charset() returning
Packit 709fb3
         "CP1252".  */
Packit 709fb3
      if (strcmp (locale_charset (), "UTF-8") == 0)
Packit 709fb3
        {
Packit 709fb3
          /* This test is from glibc bug 15078.
Packit 709fb3
             The test case is from Andreas Schwab in
Packit 709fb3
             <http://www.sourceware.org/ml/libc-alpha/2013-01/msg00967.html>.
Packit 709fb3
          */
Packit 709fb3
          static char const pat[] = "[^x]x";
Packit 709fb3
          static char const data[] =
Packit 709fb3
            /* <U1000><U103B><U103D><U1014><U103A><U102F><U1015><U103A> */
Packit 709fb3
            "\xe1\x80\x80"
Packit 709fb3
            "\xe1\x80\xbb"
Packit 709fb3
            "\xe1\x80\xbd"
Packit 709fb3
            "\xe1\x80\x94"
Packit 709fb3
            "\xe1\x80\xba"
Packit 709fb3
            "\xe1\x80\xaf"
Packit 709fb3
            "\xe1\x80\x95"
Packit 709fb3
            "\xe1\x80\xba"
Packit 709fb3
            "x";
Packit 709fb3
          re_set_syntax (0);
Packit 709fb3
          memset (&regex, 0, sizeof regex);
Packit 709fb3
          s = re_compile_pattern (pat, sizeof pat - 1, &regex);
Packit 709fb3
          if (s)
Packit 709fb3
            result |= 1;
Packit 709fb3
          else
Packit 709fb3
            {
Packit 709fb3
              memset (&regs, 0, sizeof regs);
Packit 709fb3
              i = re_search (&regex, data, sizeof data - 1,
Packit 709fb3
                             0, sizeof data - 1, 0);
Packit 709fb3
              if (i != 0 && i != 21)
Packit 709fb3
                result |= 1;
Packit 709fb3
              regfree (&regex);
Packit 709fb3
              free (regs.start);
Packit 709fb3
              free (regs.end);
Packit 709fb3
            }
Packit 709fb3
        }
Packit 709fb3
Packit 709fb3
      if (! setlocale (LC_ALL, "C"))
Packit 709fb3
        return 1;
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* This test is from glibc bug 3957, reported by Andrew Mackey.  */
Packit 709fb3
  re_set_syntax (RE_SYNTAX_EGREP | RE_HAT_LISTS_NOT_NEWLINE);
Packit 709fb3
  memset (&regex, 0, sizeof regex);
Packit 709fb3
  s = re_compile_pattern ("a[^x]b", 6, &regex);
Packit 709fb3
  if (s)
Packit 709fb3
    result |= 2;
Packit 709fb3
  /* This should fail, but succeeds for glibc-2.5.  */
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
      memset (&regs, 0, sizeof regs);
Packit 709fb3
      if (re_search (&regex, "a\nb", 3, 0, 3, &regs) != -1)
Packit 709fb3
        result |= 2;
Packit 709fb3
      regfree (&regex);
Packit 709fb3
      free (regs.start);
Packit 709fb3
      free (regs.end);
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* This regular expression is from Spencer ere test number 75
Packit 709fb3
     in grep-2.3.  */
Packit 709fb3
  re_set_syntax (RE_SYNTAX_POSIX_EGREP);
Packit 709fb3
  memset (&regex, 0, sizeof regex);
Packit 709fb3
  for (i = 0; i <= UCHAR_MAX; i++)
Packit 709fb3
    folded_chars[i] = i;
Packit 709fb3
  regex.translate = folded_chars;
Packit 709fb3
  s = re_compile_pattern ("a[[:@:>@:]]b\n", 11, &regex);
Packit 709fb3
  /* This should fail with _Invalid character class name_ error.  */
Packit 709fb3
  if (!s)
Packit 709fb3
    {
Packit 709fb3
      result |= 4;
Packit 709fb3
      regfree (&regex);
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Ensure that [b-a] is diagnosed as invalid, when
Packit 709fb3
     using RE_NO_EMPTY_RANGES. */
Packit 709fb3
  re_set_syntax (RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES);
Packit 709fb3
  memset (&regex, 0, sizeof regex);
Packit 709fb3
  s = re_compile_pattern ("a[b-a]", 6, &regex);
Packit 709fb3
  if (s == 0)
Packit 709fb3
    {
Packit 709fb3
      result |= 8;
Packit 709fb3
      regfree (&regex);
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* This should succeed, but does not for glibc-2.1.3.  */
Packit 709fb3
  memset (&regex, 0, sizeof regex);
Packit 709fb3
  s = re_compile_pattern ("{1", 2, &regex);
Packit 709fb3
  if (s)
Packit 709fb3
    result |= 8;
Packit 709fb3
  else
Packit 709fb3
    regfree (&regex);
Packit 709fb3
Packit 709fb3
  /* The following example is derived from a problem report
Packit 709fb3
     against gawk from Jorge Stolfi <stolfi@ic.unicamp.br>.  */
Packit 709fb3
  memset (&regex, 0, sizeof regex);
Packit 709fb3
  s = re_compile_pattern ("[an\371]*n", 7, &regex);
Packit 709fb3
  if (s)
Packit 709fb3
    result |= 8;
Packit 709fb3
  /* This should match, but does not for glibc-2.2.1.  */
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
      memset (&regs, 0, sizeof regs);
Packit 709fb3
      if (re_match (&regex, "an", 2, 0, &regs) != 2)
Packit 709fb3
        result |= 8;
Packit 709fb3
      regfree (&regex);
Packit 709fb3
      free (regs.start);
Packit 709fb3
      free (regs.end);
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  memset (&regex, 0, sizeof regex);
Packit 709fb3
  s = re_compile_pattern ("x", 1, &regex);
Packit 709fb3
  if (s)
Packit 709fb3
    result |= 8;
Packit 709fb3
  /* glibc-2.2.93 does not work with a negative RANGE argument.  */
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
      memset (&regs, 0, sizeof regs);
Packit 709fb3
      if (re_search (&regex, "wxy", 3, 2, -2, &regs) != 1)
Packit 709fb3
        result |= 8;
Packit 709fb3
      regfree (&regex);
Packit 709fb3
      free (regs.start);
Packit 709fb3
      free (regs.end);
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* The version of regex.c in older versions of gnulib
Packit 709fb3
     ignored RE_ICASE.  Detect that problem too.  */
Packit 709fb3
  re_set_syntax (RE_SYNTAX_EMACS | RE_ICASE);
Packit 709fb3
  memset (&regex, 0, sizeof regex);
Packit 709fb3
  s = re_compile_pattern ("x", 1, &regex);
Packit 709fb3
  if (s)
Packit 709fb3
    result |= 16;
Packit 709fb3
  else
Packit 709fb3
    {
Packit 709fb3
      memset (&regs, 0, sizeof regs);
Packit 709fb3
      if (re_search (&regex, "WXY", 3, 0, 3, &regs) < 0)
Packit 709fb3
        result |= 16;
Packit 709fb3
      regfree (&regex);
Packit 709fb3
      free (regs.start);
Packit 709fb3
      free (regs.end);
Packit 709fb3
    }
Packit 709fb3
Packit 709fb3
  /* Catch a bug reported by Vin Shelton in
Packit 709fb3
     http://lists.gnu.org/archive/html/bug-coreutils/2007-06/msg00089.html
Packit 709fb3
     */
Packit 709fb3
  re_set_syntax (RE_SYNTAX_POSIX_BASIC
Packit 709fb3
                 & ~RE_CONTEXT_INVALID_DUP
Packit 709fb3
                 & ~RE_NO_EMPTY_RANGES);
Packit 709fb3
  memset (&regex, 0, sizeof regex);
Packit 709fb3
  s = re_compile_pattern ("[[:alnum:]_-]\\\\+$", 16, &regex);
Packit 709fb3
  if (s)
Packit 709fb3
    result |= 32;
Packit 709fb3
  else
Packit 709fb3
    regfree (&regex);
Packit 709fb3
Packit 709fb3
  /* REG_STARTEND was added to glibc on 2004-01-15.
Packit 709fb3
     Reject older versions.  */
Packit 709fb3
  if (! REG_STARTEND)
Packit 709fb3
    result |= 64;
Packit 709fb3
Packit 709fb3
#if 0
Packit 709fb3
  /* It would be nice to reject hosts whose regoff_t values are too
Packit 709fb3
     narrow (including glibc on hosts with 64-bit ptrdiff_t and
Packit 709fb3
     32-bit int), but we should wait until glibc implements this
Packit 709fb3
     feature.  Otherwise, support for equivalence classes and
Packit 709fb3
     multibyte collation symbols would always be broken except
Packit 709fb3
     when compiling --without-included-regex.   */
Packit 709fb3
  if (sizeof (regoff_t) < sizeof (ptrdiff_t)
Packit 709fb3
      || sizeof (regoff_t) < sizeof (ssize_t))
Packit 709fb3
    result |= 64;
Packit 709fb3
#endif
Packit 709fb3
Packit 709fb3
  return result;
Packit 709fb3
}