Blame string/strcoll_l.c

Packit 6c4009
/* Copyright (C) 1995-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
   Written by Ulrich Drepper <drepper@gnu.org>, 1995.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
Packit 6c4009
#include <assert.h>
Packit 6c4009
#include <langinfo.h>
Packit 6c4009
#include <locale.h>
Packit 6c4009
#include <stddef.h>
Packit 6c4009
#include <stdint.h>
Packit 6c4009
#include <string.h>
Packit 6c4009
#include <sys/param.h>
Packit 6c4009
#include <libc-diag.h>
Packit 6c4009
Packit 6c4009
#ifndef STRING_TYPE
Packit 6c4009
# define STRING_TYPE char
Packit 6c4009
# define USTRING_TYPE unsigned char
Packit 6c4009
# define STRCOLL __strcoll_l
Packit 6c4009
# define STRCMP strcmp
Packit 6c4009
# define WEIGHT_H "../locale/weight.h"
Packit 6c4009
# define SUFFIX	MB
Packit 6c4009
# define L(arg) arg
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
#define CONCAT(a,b) CONCAT1(a,b)
Packit 6c4009
#define CONCAT1(a,b) a##b
Packit 6c4009
Packit 6c4009
#include "../locale/localeinfo.h"
Packit 6c4009
#include WEIGHT_H
Packit 6c4009
Packit 6c4009
/* Track status while looking for sequences in a string.  */
Packit 6c4009
typedef struct
Packit 6c4009
{
Packit 6c4009
  int len;			/* Length of the current sequence.  */
Packit 6c4009
  size_t val;			/* Position of the sequence relative to the
Packit 6c4009
				   previous non-ignored sequence.  */
Packit 6c4009
  size_t idxmax;		/* Maximum index in sequences.  */
Packit 6c4009
  size_t idxcnt;		/* Current count of indices.  */
Packit 6c4009
  size_t backw;			/* Current Backward sequence index.  */
Packit 6c4009
  size_t backw_stop;		/* Index where the backward sequences stop.  */
Packit 6c4009
  const USTRING_TYPE *us;	/* The string.  */
Packit 6c4009
  unsigned char rule;		/* Saved rule for the first sequence.  */
Packit 6c4009
  int32_t idx;			/* Index to weight of the current sequence.  */
Packit 6c4009
  int32_t save_idx;		/* Save looked up index of a forward
Packit 6c4009
				   sequence after the last backward
Packit 6c4009
				   sequence.  */
Packit 6c4009
  const USTRING_TYPE *back_us;	/* Beginning of the backward sequence.  */
Packit 6c4009
} coll_seq;
Packit 6c4009
Packit 6c4009
/* Get next sequence.  Traverse the string as required.  */
Packit 6c4009
static __always_inline void
Packit 6c4009
get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets,
Packit 6c4009
	      const USTRING_TYPE *weights, const int32_t *table,
Packit 6c4009
	      const USTRING_TYPE *extra, const int32_t *indirect,
Packit 6c4009
	      int pass)
Packit 6c4009
{
Packit 6c4009
  size_t val = seq->val = 0;
Packit 6c4009
  int len = seq->len;
Packit 6c4009
  size_t backw_stop = seq->backw_stop;
Packit 6c4009
  size_t backw = seq->backw;
Packit 6c4009
  size_t idxcnt = seq->idxcnt;
Packit 6c4009
  size_t idxmax = seq->idxmax;
Packit 6c4009
  int32_t idx = seq->idx;
Packit 6c4009
  const USTRING_TYPE *us = seq->us;
Packit 6c4009
Packit 6c4009
  while (len == 0)
Packit 6c4009
    {
Packit 6c4009
      ++val;
Packit 6c4009
      if (backw_stop != ~0ul)
Packit 6c4009
	{
Packit 6c4009
	  /* There is something pushed.  */
Packit 6c4009
	  if (backw == backw_stop)
Packit 6c4009
	    {
Packit 6c4009
	      /* The last pushed character was handled.  Continue
Packit 6c4009
		 with forward characters.  */
Packit 6c4009
	      if (idxcnt < idxmax)
Packit 6c4009
		{
Packit 6c4009
		  idx = seq->save_idx;
Packit 6c4009
		  backw_stop = ~0ul;
Packit 6c4009
		}
Packit 6c4009
	      else
Packit 6c4009
		{
Packit 6c4009
		  /* Nothing anymore.  The backward sequence ended with
Packit 6c4009
		     the last sequence in the string.  Note that len is
Packit 6c4009
		     still zero.  */
Packit 6c4009
		  idx = 0;
Packit 6c4009
		  break;
Packit 6c4009
	        }
Packit 6c4009
	    }
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      /* XXX Traverse BACKW sequences from the beginning of
Packit 6c4009
		 BACKW_STOP to get the next sequence.  Is ther a quicker way
Packit 6c4009
	         to do this?  */
Packit 6c4009
	      size_t i = backw_stop;
Packit 6c4009
	      us = seq->back_us;
Packit 6c4009
	      while (i < backw)
Packit 6c4009
		{
Packit 6c4009
		  int32_t tmp = findidx (table, indirect, extra, &us, -1);
Packit 6c4009
		  idx = tmp & 0xffffff;
Packit 6c4009
		  i++;
Packit 6c4009
		}
Packit 6c4009
	      --backw;
Packit 6c4009
	      us = seq->us;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  backw_stop = idxmax;
Packit 6c4009
	  int32_t prev_idx = idx;
Packit 6c4009
Packit 6c4009
	  while (*us != L('\0'))
Packit 6c4009
	    {
Packit 6c4009
	      int32_t tmp = findidx (table, indirect, extra, &us, -1);
Packit 6c4009
	      unsigned char rule = tmp >> 24;
Packit 6c4009
	      prev_idx = idx;
Packit 6c4009
	      idx = tmp & 0xffffff;
Packit 6c4009
	      idxcnt = idxmax++;
Packit 6c4009
Packit 6c4009
	      /* Save the rule for the first sequence.  */
Packit 6c4009
	      if (__glibc_unlikely (idxcnt == 0))
Packit 6c4009
	        seq->rule = rule;
Packit 6c4009
Packit 6c4009
	      if ((rulesets[rule * nrules + pass]
Packit 6c4009
		   & sort_backward) == 0)
Packit 6c4009
		/* No more backward characters to push.  */
Packit 6c4009
		break;
Packit 6c4009
	      ++idxcnt;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  if (backw_stop >= idxcnt)
Packit 6c4009
	    {
Packit 6c4009
	      /* No sequence at all or just one.  */
Packit 6c4009
	      if (idxcnt == idxmax || backw_stop > idxcnt)
Packit 6c4009
		/* Note that len is still zero.  */
Packit 6c4009
		break;
Packit 6c4009
Packit 6c4009
	      backw_stop = ~0ul;
Packit 6c4009
	    }
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      /* We pushed backward sequences.  If the stream ended with the
Packit 6c4009
		 backward sequence, then we process the last sequence we
Packit 6c4009
		 found.  Otherwise we process the sequence before the last
Packit 6c4009
		 one since the last one was a forward sequence.  */
Packit 6c4009
	      seq->back_us = seq->us;
Packit 6c4009
	      seq->us = us;
Packit 6c4009
	      backw = idxcnt;
Packit 6c4009
	      if (idxmax > idxcnt)
Packit 6c4009
		{
Packit 6c4009
		  backw--;
Packit 6c4009
		  seq->save_idx = idx;
Packit 6c4009
		  idx = prev_idx;
Packit 6c4009
		}
Packit 6c4009
	      if (backw > backw_stop)
Packit 6c4009
		backw--;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      /* With GCC 5.3 when compiling with -Os the compiler complains
Packit 6c4009
	 that idx, taken from seq->idx (seq1 or seq2 from STRCOLL) may
Packit 6c4009
	 be used uninitialized.  In general this can't possibly be true
Packit 6c4009
	 since seq1.idx and seq2.idx are initialized to zero in the
Packit 6c4009
	 outer function.  Only one case where seq->idx is restored from
Packit 6c4009
	 seq->save_idx might result in an uninitialized idx value, but
Packit 6c4009
	 it is guarded by a sequence of checks against backw_stop which
Packit 6c4009
	 ensures that seq->save_idx was saved to first and contains a
Packit 6c4009
	 valid value.  */
Packit 6c4009
      DIAG_PUSH_NEEDS_COMMENT;
Packit 6c4009
      DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
Packit 6c4009
      len = weights[idx++];
Packit 6c4009
      DIAG_POP_NEEDS_COMMENT;
Packit 6c4009
      /* Skip over indices of previous levels.  */
Packit 6c4009
      for (int i = 0; i < pass; i++)
Packit 6c4009
	{
Packit 6c4009
	  idx += len;
Packit 6c4009
	  len = weights[idx];
Packit 6c4009
	  idx++;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Update the structure.  */
Packit 6c4009
  seq->val = val;
Packit 6c4009
  seq->len = len;
Packit 6c4009
  seq->backw_stop = backw_stop;
Packit 6c4009
  seq->backw = backw;
Packit 6c4009
  seq->idxcnt = idxcnt;
Packit 6c4009
  seq->idxmax = idxmax;
Packit 6c4009
  seq->us = us;
Packit 6c4009
  seq->idx = idx;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Compare two sequences.  */
Packit 6c4009
static __always_inline int
Packit 6c4009
do_compare (coll_seq *seq1, coll_seq *seq2, int position,
Packit 6c4009
	    const USTRING_TYPE *weights)
Packit 6c4009
{
Packit 6c4009
  int seq1len = seq1->len;
Packit 6c4009
  int seq2len = seq2->len;
Packit 6c4009
  size_t val1 = seq1->val;
Packit 6c4009
  size_t val2 = seq2->val;
Packit 6c4009
  int idx1 = seq1->idx;
Packit 6c4009
  int idx2 = seq2->idx;
Packit 6c4009
  int result = 0;
Packit 6c4009
Packit 6c4009
  /* Test for position if necessary.  */
Packit 6c4009
  if (position && val1 != val2)
Packit 6c4009
    {
Packit 6c4009
      result = val1 > val2 ? 1 : -1;
Packit 6c4009
      goto out;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Compare the two sequences.  */
Packit 6c4009
  do
Packit 6c4009
    {
Packit 6c4009
      if (weights[idx1] != weights[idx2])
Packit 6c4009
	{
Packit 6c4009
	  /* The sequences differ.  */
Packit 6c4009
	  result = weights[idx1] - weights[idx2];
Packit 6c4009
	  goto out;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      /* Increment the offsets.  */
Packit 6c4009
      ++idx1;
Packit 6c4009
      ++idx2;
Packit 6c4009
Packit 6c4009
      --seq1len;
Packit 6c4009
      --seq2len;
Packit 6c4009
    }
Packit 6c4009
  while (seq1len > 0 && seq2len > 0);
Packit 6c4009
Packit 6c4009
  if (position && seq1len != seq2len)
Packit 6c4009
    result = seq1len - seq2len;
Packit 6c4009
Packit 6c4009
out:
Packit 6c4009
  seq1->len = seq1len;
Packit 6c4009
  seq2->len = seq2len;
Packit 6c4009
  seq1->idx = idx1;
Packit 6c4009
  seq2->idx = idx2;
Packit 6c4009
  return result;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
int
Packit 6c4009
STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, locale_t l)
Packit 6c4009
{
Packit 6c4009
  struct __locale_data *current = l->__locales[LC_COLLATE];
Packit 6c4009
  uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
Packit 6c4009
  /* We don't assign the following values right away since it might be
Packit 6c4009
     unnecessary in case there are no rules.  */
Packit 6c4009
  const unsigned char *rulesets;
Packit 6c4009
  const int32_t *table;
Packit 6c4009
  const USTRING_TYPE *weights;
Packit 6c4009
  const USTRING_TYPE *extra;
Packit 6c4009
  const int32_t *indirect;
Packit 6c4009
Packit 6c4009
  if (nrules == 0)
Packit 6c4009
    return STRCMP (s1, s2);
Packit 6c4009
Packit 6c4009
  /* Catch empty strings.  */
Packit 6c4009
  if (__glibc_unlikely (*s1 == '\0') || __glibc_unlikely (*s2 == '\0'))
Packit 6c4009
    return (*s1 != '\0') - (*s2 != '\0');
Packit 6c4009
Packit 6c4009
  rulesets = (const unsigned char *)
Packit 6c4009
    current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
Packit 6c4009
  table = (const int32_t *)
Packit 6c4009
    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
Packit 6c4009
  weights = (const USTRING_TYPE *)
Packit 6c4009
    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
Packit 6c4009
  extra = (const USTRING_TYPE *)
Packit 6c4009
    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
Packit 6c4009
  indirect = (const int32_t *)
Packit 6c4009
    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
Packit 6c4009
Packit 6c4009
  assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
Packit 6c4009
  assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
Packit 6c4009
  assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
Packit 6c4009
  assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
Packit 6c4009
Packit 6c4009
  int result = 0, rule = 0;
Packit 6c4009
Packit 6c4009
  /* With GCC 7 when compiling with -Os the compiler warns that
Packit 6c4009
     seq1.back_us and seq2.back_us might be used uninitialized.
Packit 6c4009
     Sometimes this warning appears at locations in locale/weightwc.h
Packit 6c4009
     where the actual use is, but on architectures other than x86_64,
Packit 6c4009
     x86 and s390x, a warning appears at the definitions of seq1 and
Packit 6c4009
     seq2.  This uninitialized use is impossible for the same reason
Packit 6c4009
     as described in comments in locale/weightwc.h.  */
Packit 6c4009
  DIAG_PUSH_NEEDS_COMMENT;
Packit 6c4009
  DIAG_IGNORE_Os_NEEDS_COMMENT (7, "-Wmaybe-uninitialized");
Packit 6c4009
  coll_seq seq1, seq2;
Packit 6c4009
  DIAG_POP_NEEDS_COMMENT;
Packit 6c4009
  seq1.len = 0;
Packit 6c4009
  seq1.idxmax = 0;
Packit 6c4009
  seq1.rule = 0;
Packit 6c4009
  seq2.len = 0;
Packit 6c4009
  seq2.idxmax = 0;
Packit 6c4009
Packit 6c4009
  for (int pass = 0; pass < nrules; ++pass)
Packit 6c4009
    {
Packit 6c4009
      seq1.idxcnt = 0;
Packit 6c4009
      seq1.idx = 0;
Packit 6c4009
      seq2.idx = 0;
Packit 6c4009
      seq1.backw_stop = ~0ul;
Packit 6c4009
      seq1.backw = ~0ul;
Packit 6c4009
      seq2.idxcnt = 0;
Packit 6c4009
      seq2.backw_stop = ~0ul;
Packit 6c4009
      seq2.backw = ~0ul;
Packit 6c4009
Packit 6c4009
      /* We need the elements of the strings as unsigned values since they
Packit 6c4009
	 are used as indices.  */
Packit 6c4009
      seq1.us = (const USTRING_TYPE *) s1;
Packit 6c4009
      seq2.us = (const USTRING_TYPE *) s2;
Packit 6c4009
Packit 6c4009
      /* We assume that if a rule has defined `position' in one section
Packit 6c4009
	 this is true for all of them.  Please note that the localedef programs
Packit 6c4009
	 makes sure that `position' is not used at the first level.  */
Packit 6c4009
Packit 6c4009
      int position = rulesets[rule * nrules + pass] & sort_position;
Packit 6c4009
Packit 6c4009
      while (1)
Packit 6c4009
	{
Packit 6c4009
	  get_next_seq (&seq1, nrules, rulesets, weights, table,
Packit 6c4009
				    extra, indirect, pass);
Packit 6c4009
	  get_next_seq (&seq2, nrules, rulesets, weights, table,
Packit 6c4009
				    extra, indirect, pass);
Packit 6c4009
	  /* See whether any or both strings are empty.  */
Packit 6c4009
	  if (seq1.len == 0 || seq2.len == 0)
Packit 6c4009
	    {
Packit 6c4009
	      if (seq1.len == seq2.len)
Packit 6c4009
		{
Packit 6c4009
		  /* Both strings ended and are equal at this level.  Do a
Packit 6c4009
		     byte-level comparison to ensure that we don't waste time
Packit 6c4009
		     going through multiple passes for totally equal strings
Packit 6c4009
		     before proceeding to subsequent passes.  */
Packit 6c4009
		  if (pass == 0 && STRCMP (s1, s2) == 0)
Packit 6c4009
		    return result;
Packit 6c4009
		  else
Packit 6c4009
		    break;
Packit 6c4009
	        }
Packit 6c4009
Packit 6c4009
	      /* This means one string is shorter than the other.  Find out
Packit 6c4009
		 which one and return an appropriate value.  */
Packit 6c4009
	      return seq1.len == 0 ? -1 : 1;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  result = do_compare (&seq1, &seq2, position, weights);
Packit 6c4009
	  if (result != 0)
Packit 6c4009
	    return result;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      rule = seq1.rule;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  return result;
Packit 6c4009
}
Packit 6c4009
libc_hidden_def (STRCOLL)
Packit 6c4009
Packit 6c4009
#ifndef WIDE_CHAR_VERSION
Packit 6c4009
weak_alias (__strcoll_l, strcoll_l)
Packit 6c4009
#endif