Blame gnulib/tests/test-regex.c

Packit 06dd63
/* Test regular expressions
Packit 06dd63
   Copyright 1996-2001, 2003-2019 Free Software Foundation, Inc.
Packit 06dd63
Packit 06dd63
   This program is free software: you can redistribute it and/or modify
Packit 06dd63
   it under the terms of the GNU General Public License as published by
Packit 06dd63
   the Free Software Foundation; either version 3 of the License, or
Packit 06dd63
   (at your option) any later version.
Packit 06dd63
Packit 06dd63
   This program is distributed in the hope that it will be useful,
Packit 06dd63
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 06dd63
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 06dd63
   GNU General Public License for more details.
Packit 06dd63
Packit 06dd63
   You should have received a copy of the GNU General Public License
Packit 06dd63
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
Packit 06dd63
Packit 06dd63
#include <config.h>
Packit 06dd63
Packit 06dd63
#include "regex.h"
Packit 06dd63
Packit 06dd63
#include <locale.h>
Packit 06dd63
#include <limits.h>
Packit 06dd63
#include <stdlib.h>
Packit 06dd63
#include <string.h>
Packit 06dd63
#if HAVE_DECL_ALARM
Packit 06dd63
# include <unistd.h>
Packit 06dd63
# include <signal.h>
Packit 06dd63
#endif
Packit 06dd63
Packit 06dd63
#include "localcharset.h"
Packit 06dd63
Packit 06dd63
int
Packit 06dd63
main (void)
Packit 06dd63
{
Packit 06dd63
  int result = 0;
Packit 06dd63
  static struct re_pattern_buffer regex;
Packit 06dd63
  unsigned char folded_chars[UCHAR_MAX + 1];
Packit 06dd63
  int i;
Packit 06dd63
  const char *s;
Packit 06dd63
  struct re_registers regs;
Packit 06dd63
Packit 06dd63
#if HAVE_DECL_ALARM
Packit 06dd63
  /* Some builds of glibc go into an infinite loop on this test.  */
Packit 06dd63
  int alarm_value = 2;
Packit 06dd63
  signal (SIGALRM, SIG_DFL);
Packit 06dd63
  alarm (alarm_value);
Packit 06dd63
#endif
Packit 06dd63
  if (setlocale (LC_ALL, "en_US.UTF-8"))
Packit 06dd63
    {
Packit 06dd63
      {
Packit 06dd63
        /* https://sourceware.org/ml/libc-hacker/2006-09/msg00008.html
Packit 06dd63
           This test needs valgrind to catch the bug on Debian
Packit 06dd63
           GNU/Linux 3.1 x86, but it might catch the bug better
Packit 06dd63
           on other platforms and it shouldn't hurt to try the
Packit 06dd63
           test here.  */
Packit 06dd63
        static char const pat[] = "insert into";
Packit 06dd63
        static char const data[] =
Packit 06dd63
          "\xFF\0\x12\xA2\xAA\xC4\xB1,K\x12\xC4\xB1*\xACK";
Packit 06dd63
        re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE
Packit 06dd63
                       | RE_ICASE);
Packit 06dd63
        memset (&regex, 0, sizeof regex);
Packit 06dd63
        s = re_compile_pattern (pat, sizeof pat - 1, &regex);
Packit 06dd63
        if (s)
Packit 06dd63
          result |= 1;
Packit 06dd63
        else
Packit 06dd63
          {
Packit 06dd63
            memset (&regs, 0, sizeof regs);
Packit 06dd63
            if (re_search (&regex, data, sizeof data - 1,
Packit 06dd63
                           0, sizeof data - 1, &regs)
Packit 06dd63
                != -1)
Packit 06dd63
              result |= 1;
Packit 06dd63
            regfree (&regex);
Packit 06dd63
            free (regs.start);
Packit 06dd63
            free (regs.end);
Packit 06dd63
          }
Packit 06dd63
      }
Packit 06dd63
Packit 06dd63
      /* Check whether it's really a UTF-8 locale.
Packit 06dd63
         On mingw, the setlocale call succeeds but returns
Packit 06dd63
         "English_United States.1252", with locale_charset() returning
Packit 06dd63
         "CP1252".  */
Packit 06dd63
      if (strcmp (locale_charset (), "UTF-8") == 0)
Packit 06dd63
        {
Packit 06dd63
          /* This test is from glibc bug 15078.
Packit 06dd63
             The test case is from Andreas Schwab in
Packit 06dd63
             <https://sourceware.org/ml/libc-alpha/2013-01/msg00967.html>.
Packit 06dd63
          */
Packit 06dd63
          static char const pat[] = "[^x]x";
Packit 06dd63
          static char const data[] =
Packit 06dd63
            /* <U1000><U103B><U103D><U1014><U103A><U102F><U1015><U103A> */
Packit 06dd63
            "\xe1\x80\x80"
Packit 06dd63
            "\xe1\x80\xbb"
Packit 06dd63
            "\xe1\x80\xbd"
Packit 06dd63
            "\xe1\x80\x94"
Packit 06dd63
            "\xe1\x80\xba"
Packit 06dd63
            "\xe1\x80\xaf"
Packit 06dd63
            "\xe1\x80\x95"
Packit 06dd63
            "\xe1\x80\xba"
Packit 06dd63
            "x";
Packit 06dd63
          re_set_syntax (0);
Packit 06dd63
          memset (&regex, 0, sizeof regex);
Packit 06dd63
          s = re_compile_pattern (pat, sizeof pat - 1, &regex);
Packit 06dd63
          if (s)
Packit 06dd63
            result |= 1;
Packit 06dd63
          else
Packit 06dd63
            {
Packit 06dd63
              memset (&regs, 0, sizeof regs);
Packit 06dd63
              i = re_search (&regex, data, sizeof data - 1,
Packit 06dd63
                             0, sizeof data - 1, 0);
Packit 06dd63
              if (i != 0 && i != 21)
Packit 06dd63
                result |= 1;
Packit 06dd63
              regfree (&regex);
Packit 06dd63
              free (regs.start);
Packit 06dd63
              free (regs.end);
Packit 06dd63
            }
Packit 06dd63
        }
Packit 06dd63
Packit 06dd63
      if (! setlocale (LC_ALL, "C"))
Packit 06dd63
        return 1;
Packit 06dd63
    }
Packit 06dd63
Packit 06dd63
  /* This test is from glibc bug 3957, reported by Andrew Mackey.  */
Packit 06dd63
  re_set_syntax (RE_SYNTAX_EGREP | RE_HAT_LISTS_NOT_NEWLINE);
Packit 06dd63
  memset (&regex, 0, sizeof regex);
Packit 06dd63
  s = re_compile_pattern ("a[^x]b", 6, &regex);
Packit 06dd63
  if (s)
Packit 06dd63
    result |= 2;
Packit 06dd63
  /* This should fail, but succeeds for glibc-2.5.  */
Packit 06dd63
  else
Packit 06dd63
    {
Packit 06dd63
      memset (&regs, 0, sizeof regs);
Packit 06dd63
      if (re_search (&regex, "a\nb", 3, 0, 3, &regs) != -1)
Packit 06dd63
        result |= 2;
Packit 06dd63
      regfree (&regex);
Packit 06dd63
      free (regs.start);
Packit 06dd63
      free (regs.end);
Packit 06dd63
    }
Packit 06dd63
Packit 06dd63
  /* This regular expression is from Spencer ere test number 75
Packit 06dd63
     in grep-2.3.  */
Packit 06dd63
  re_set_syntax (RE_SYNTAX_POSIX_EGREP);
Packit 06dd63
  memset (&regex, 0, sizeof regex);
Packit 06dd63
  for (i = 0; i <= UCHAR_MAX; i++)
Packit 06dd63
    folded_chars[i] = i;
Packit 06dd63
  regex.translate = folded_chars;
Packit 06dd63
  s = re_compile_pattern ("a[[:@:>@:]]b\n", 11, &regex);
Packit 06dd63
  /* This should fail with _Invalid character class name_ error.  */
Packit 06dd63
  if (!s)
Packit 06dd63
    {
Packit 06dd63
      result |= 4;
Packit 06dd63
      regfree (&regex);
Packit 06dd63
    }
Packit 06dd63
Packit 06dd63
  /* Ensure that [b-a] is diagnosed as invalid, when
Packit 06dd63
     using RE_NO_EMPTY_RANGES. */
Packit 06dd63
  re_set_syntax (RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES);
Packit 06dd63
  memset (&regex, 0, sizeof regex);
Packit 06dd63
  s = re_compile_pattern ("a[b-a]", 6, &regex);
Packit 06dd63
  if (s == 0)
Packit 06dd63
    {
Packit 06dd63
      result |= 8;
Packit 06dd63
      regfree (&regex);
Packit 06dd63
    }
Packit 06dd63
Packit 06dd63
  /* This should succeed, but does not for glibc-2.1.3.  */
Packit 06dd63
  memset (&regex, 0, sizeof regex);
Packit 06dd63
  s = re_compile_pattern ("{1", 2, &regex);
Packit 06dd63
  if (s)
Packit 06dd63
    result |= 8;
Packit 06dd63
  else
Packit 06dd63
    regfree (&regex);
Packit 06dd63
Packit 06dd63
  /* The following example is derived from a problem report
Packit 06dd63
     against gawk from Jorge Stolfi <stolfi@ic.unicamp.br>.  */
Packit 06dd63
  memset (&regex, 0, sizeof regex);
Packit 06dd63
  s = re_compile_pattern ("[an\371]*n", 7, &regex);
Packit 06dd63
  if (s)
Packit 06dd63
    result |= 8;
Packit 06dd63
  /* This should match, but does not for glibc-2.2.1.  */
Packit 06dd63
  else
Packit 06dd63
    {
Packit 06dd63
      memset (&regs, 0, sizeof regs);
Packit 06dd63
      if (re_match (&regex, "an", 2, 0, &regs) != 2)
Packit 06dd63
        result |= 8;
Packit 06dd63
      regfree (&regex);
Packit 06dd63
      free (regs.start);
Packit 06dd63
      free (regs.end);
Packit 06dd63
    }
Packit 06dd63
Packit 06dd63
  memset (&regex, 0, sizeof regex);
Packit 06dd63
  s = re_compile_pattern ("x", 1, &regex);
Packit 06dd63
  if (s)
Packit 06dd63
    result |= 8;
Packit 06dd63
  /* glibc-2.2.93 does not work with a negative RANGE argument.  */
Packit 06dd63
  else
Packit 06dd63
    {
Packit 06dd63
      memset (&regs, 0, sizeof regs);
Packit 06dd63
      if (re_search (&regex, "wxy", 3, 2, -2, &regs) != 1)
Packit 06dd63
        result |= 8;
Packit 06dd63
      regfree (&regex);
Packit 06dd63
      free (regs.start);
Packit 06dd63
      free (regs.end);
Packit 06dd63
    }
Packit 06dd63
Packit 06dd63
  /* The version of regex.c in older versions of gnulib
Packit 06dd63
     ignored RE_ICASE.  Detect that problem too.  */
Packit 06dd63
  re_set_syntax (RE_SYNTAX_EMACS | RE_ICASE);
Packit 06dd63
  memset (&regex, 0, sizeof regex);
Packit 06dd63
  s = re_compile_pattern ("x", 1, &regex);
Packit 06dd63
  if (s)
Packit 06dd63
    result |= 16;
Packit 06dd63
  else
Packit 06dd63
    {
Packit 06dd63
      memset (&regs, 0, sizeof regs);
Packit 06dd63
      if (re_search (&regex, "WXY", 3, 0, 3, &regs) < 0)
Packit 06dd63
        result |= 16;
Packit 06dd63
      regfree (&regex);
Packit 06dd63
      free (regs.start);
Packit 06dd63
      free (regs.end);
Packit 06dd63
    }
Packit 06dd63
Packit 06dd63
  /* Catch a bug reported by Vin Shelton in
Packit 06dd63
     https://lists.gnu.org/r/bug-coreutils/2007-06/msg00089.html
Packit 06dd63
     */
Packit 06dd63
  re_set_syntax (RE_SYNTAX_POSIX_BASIC
Packit 06dd63
                 & ~RE_CONTEXT_INVALID_DUP
Packit 06dd63
                 & ~RE_NO_EMPTY_RANGES);
Packit 06dd63
  memset (&regex, 0, sizeof regex);
Packit 06dd63
  s = re_compile_pattern ("[[:alnum:]_-]\\\\+$", 16, &regex);
Packit 06dd63
  if (s)
Packit 06dd63
    result |= 32;
Packit 06dd63
  else
Packit 06dd63
    regfree (&regex);
Packit 06dd63
Packit 06dd63
  /* REG_STARTEND was added to glibc on 2004-01-15.
Packit 06dd63
     Reject older versions.  */
Packit 06dd63
  if (! REG_STARTEND)
Packit 06dd63
    result |= 64;
Packit 06dd63
Packit 06dd63
  /* Matching with the compiled form of this regexp would provoke
Packit 06dd63
     an assertion failure prior to glibc-2.28:
Packit 06dd63
       regexec.c:1375: pop_fail_stack: Assertion 'num >= 0' failed
Packit 06dd63
     With glibc-2.28, compilation fails and reports the invalid
Packit 06dd63
     back reference.  */
Packit 06dd63
  re_set_syntax (RE_SYNTAX_POSIX_EGREP);
Packit 06dd63
  memset (&regex, 0, sizeof regex);
Packit 06dd63
  s = re_compile_pattern ("0|()0|\\1|0", 10, &regex);
Packit 06dd63
  if (!s || strcmp (s, "Invalid back reference"))
Packit 06dd63
    result |= 64;
Packit 06dd63
Packit 06dd63
#if 0
Packit 06dd63
  /* It would be nice to reject hosts whose regoff_t values are too
Packit 06dd63
     narrow (including glibc on hosts with 64-bit ptrdiff_t and
Packit 06dd63
     32-bit int), but we should wait until glibc implements this
Packit 06dd63
     feature.  Otherwise, support for equivalence classes and
Packit 06dd63
     multibyte collation symbols would always be broken except
Packit 06dd63
     when compiling --without-included-regex.   */
Packit 06dd63
  if (sizeof (regoff_t) < sizeof (ptrdiff_t)
Packit 06dd63
      || sizeof (regoff_t) < sizeof (ssize_t))
Packit 06dd63
    result |= 64;
Packit 06dd63
#endif
Packit 06dd63
Packit 06dd63
  return result;
Packit 06dd63
}