|
Packit |
709fb3 |
/* searchutils.c - helper subroutines for grep's matchers.
|
|
Packit |
709fb3 |
Copyright 1992, 1998, 2000, 2007, 2009-2017 Free Software Foundation, Inc.
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
This program is free software; you can redistribute it and/or modify
|
|
Packit |
709fb3 |
it under the terms of the GNU General Public License as published by
|
|
Packit |
709fb3 |
the Free Software Foundation; either version 3, or (at your option)
|
|
Packit |
709fb3 |
any later version.
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
This program is distributed in the hope that it will be useful,
|
|
Packit |
709fb3 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
709fb3 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
Packit |
709fb3 |
GNU General Public License for more details.
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
You should have received a copy of the GNU General Public License
|
|
Packit |
709fb3 |
along with this program; if not, write to the Free Software
|
|
Packit |
709fb3 |
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
|
Packit |
709fb3 |
02110-1301, USA. */
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
#include <config.h>
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
#define SEARCH_INLINE _GL_EXTERN_INLINE
|
|
Packit |
709fb3 |
#define SYSTEM_INLINE _GL_EXTERN_INLINE
|
|
Packit |
709fb3 |
#include "search.h"
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* For each byte B, sbwordchar[B] is true if B is a single-byte
|
|
Packit |
709fb3 |
character that is a word constituent, and is false otherwise. */
|
|
Packit |
709fb3 |
static bool sbwordchar[NCHAR];
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* Whether -w considers WC to be a word constituent. */
|
|
Packit |
709fb3 |
static bool
|
|
Packit |
709fb3 |
wordchar (wint_t wc)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
return wc == L'_' || iswalnum (wc);
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
void
|
|
Packit |
709fb3 |
wordinit (void)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
for (int i = 0; i < NCHAR; i++)
|
|
Packit |
709fb3 |
sbwordchar[i] = wordchar (localeinfo.sbctowc[i]);
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
kwset_t
|
|
Packit |
709fb3 |
kwsinit (bool mb_trans)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
char *trans = NULL;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
if (match_icase && (MB_CUR_MAX == 1 || mb_trans))
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
trans = xmalloc (NCHAR);
|
|
Packit |
709fb3 |
if (MB_CUR_MAX == 1)
|
|
Packit |
709fb3 |
for (int i = 0; i < NCHAR; i++)
|
|
Packit |
709fb3 |
trans[i] = toupper (i);
|
|
Packit |
709fb3 |
else
|
|
Packit |
709fb3 |
for (int i = 0; i < NCHAR; i++)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
wint_t wc = localeinfo.sbctowc[i];
|
|
Packit |
709fb3 |
wint_t uwc = towupper (wc);
|
|
Packit |
709fb3 |
if (uwc != wc)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
mbstate_t mbs = { 0 };
|
|
Packit |
709fb3 |
size_t len = wcrtomb (&trans[i], uwc, &mbs);
|
|
Packit |
709fb3 |
if (len != 1)
|
|
Packit |
709fb3 |
abort ();
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
else
|
|
Packit |
709fb3 |
trans[i] = i;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
return kwsalloc (trans);
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* In the buffer *MB_START, return the number of bytes needed to go
|
|
Packit |
709fb3 |
back from CUR to the previous boundary, where a "boundary" is the
|
|
Packit |
709fb3 |
start of a multibyte character or is an error-encoding byte. The
|
|
Packit |
709fb3 |
buffer ends at END (i.e., one past the address of the buffer's last
|
|
Packit |
709fb3 |
byte). If CUR is already at a boundary, return 0. If *MB_START is
|
|
Packit |
709fb3 |
greater than CUR, return the negative value CUR - *MB_START.
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
When returning zero, set *MB_START to CUR. When returning a
|
|
Packit |
709fb3 |
positive value, set *MB_START to the next boundary after CUR, or to
|
|
Packit |
709fb3 |
END if there is no such boundary. When returning a negative value,
|
|
Packit |
709fb3 |
leave *MB_START alone. */
|
|
Packit |
709fb3 |
ptrdiff_t
|
|
Packit |
709fb3 |
mb_goback (char const **mb_start, char const *cur, char const *end)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
const char *p = *mb_start;
|
|
Packit |
709fb3 |
const char *p0 = p;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
if (cur <= p)
|
|
Packit |
709fb3 |
return cur - p;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
if (localeinfo.using_utf8)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
p = cur;
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
if (cur < end && (*cur & 0xc0) == 0x80)
|
|
Packit |
709fb3 |
for (int i = 1; i <= 3; i++)
|
|
Packit |
709fb3 |
if ((cur[-i] & 0xc0) != 0x80)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
mbstate_t mbs = { 0 };
|
|
Packit |
709fb3 |
size_t clen = mb_clen (cur - i, end - (cur - i), &mbs);
|
|
Packit |
709fb3 |
if (i < clen && clen < (size_t) -2)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
p0 = cur - i;
|
|
Packit |
709fb3 |
p = p0 + clen;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
break;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
else
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
mbstate_t mbs = { 0 };
|
|
Packit |
709fb3 |
do
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
size_t clen = mb_clen (p, end - p, &mbs);
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
if ((size_t) -2 <= clen)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
/* An invalid sequence, or a truncated multibyte character.
|
|
Packit |
709fb3 |
Treat it as a single byte character. */
|
|
Packit |
709fb3 |
clen = 1;
|
|
Packit |
709fb3 |
memset (&mbs, 0, sizeof mbs);
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
p0 = p;
|
|
Packit |
709fb3 |
p += clen;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
while (p < cur);
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
*mb_start = p;
|
|
Packit |
709fb3 |
return p == cur ? 0 : cur - p0;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* Examine the start of BUF (which goes to END) for word constituents.
|
|
Packit |
709fb3 |
If COUNTALL, examine as many as possible; otherwise, examine at most one.
|
|
Packit |
709fb3 |
Return the total number of bytes in the examined characters. */
|
|
Packit |
709fb3 |
static size_t
|
|
Packit |
709fb3 |
wordchars_count (char const *buf, char const *end, bool countall)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
size_t n = 0;
|
|
Packit |
709fb3 |
mbstate_t mbs = { 0 };
|
|
Packit |
709fb3 |
while (n < end - buf)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
unsigned char b = buf[n];
|
|
Packit |
709fb3 |
if (sbwordchar[b])
|
|
Packit |
709fb3 |
n++;
|
|
Packit |
709fb3 |
else if (localeinfo.sbclen[b] != -2)
|
|
Packit |
709fb3 |
break;
|
|
Packit |
709fb3 |
else
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
wchar_t wc = 0;
|
|
Packit |
709fb3 |
size_t wcbytes = mbrtowc (&wc, buf + n, end - buf - n, &mbs);
|
|
Packit |
709fb3 |
if (!wordchar (wc))
|
|
Packit |
709fb3 |
break;
|
|
Packit |
709fb3 |
n += wcbytes + !wcbytes;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
if (!countall)
|
|
Packit |
709fb3 |
break;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
return n;
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* Examine the start of BUF for the longest prefix containing just
|
|
Packit |
709fb3 |
word constituents. Return the total number of bytes in the prefix.
|
|
Packit |
709fb3 |
The buffer ends at END. */
|
|
Packit |
709fb3 |
size_t
|
|
Packit |
709fb3 |
wordchars_size (char const *buf, char const *end)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
return wordchars_count (buf, end, true);
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* If BUF starts with a word constituent, return the number of bytes
|
|
Packit |
709fb3 |
used to represent it; otherwise, return zero. The buffer ends at END. */
|
|
Packit |
709fb3 |
size_t
|
|
Packit |
709fb3 |
wordchar_next (char const *buf, char const *end)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
return wordchars_count (buf, end, false);
|
|
Packit |
709fb3 |
}
|
|
Packit |
709fb3 |
|
|
Packit |
709fb3 |
/* In the buffer BUF, return nonzero if the character whose encoding
|
|
Packit |
709fb3 |
contains the byte before CUR is a word constituent. The buffer
|
|
Packit |
709fb3 |
ends at END. */
|
|
Packit |
709fb3 |
size_t
|
|
Packit |
709fb3 |
wordchar_prev (char const *buf, char const *cur, char const *end)
|
|
Packit |
709fb3 |
{
|
|
Packit |
709fb3 |
if (buf == cur)
|
|
Packit |
709fb3 |
return 0;
|
|
Packit |
709fb3 |
unsigned char b = *--cur;
|
|
Packit |
709fb3 |
if (! localeinfo.multibyte
|
|
Packit |
709fb3 |
|| (localeinfo.using_utf8 && localeinfo.sbclen[b] != -2))
|
|
Packit |
709fb3 |
return sbwordchar[b];
|
|
Packit |
709fb3 |
char const *p = buf;
|
|
Packit |
709fb3 |
cur -= mb_goback (&p, cur, end);
|
|
Packit |
709fb3 |
return wordchar_next (cur, end);
|
|
Packit |
709fb3 |
}
|