Blame storage/mozStorageSQLFunctions.cpp

Packit f0b94e
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
Packit f0b94e
 * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
Packit f0b94e
 * This Source Code Form is subject to the terms of the Mozilla Public
Packit f0b94e
 * License, v. 2.0. If a copy of the MPL was not distributed with this
Packit f0b94e
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
Packit f0b94e
Packit f0b94e
#include "mozilla/ArrayUtils.h"
Packit f0b94e
Packit f0b94e
#include "mozStorageSQLFunctions.h"
Packit f0b94e
#include "nsUnicharUtils.h"
Packit f0b94e
#include <algorithm>
Packit f0b94e
Packit f0b94e
namespace mozilla {
Packit f0b94e
namespace storage {
Packit f0b94e
Packit f0b94e
////////////////////////////////////////////////////////////////////////////////
Packit f0b94e
//// Local Helper Functions
Packit f0b94e
Packit f0b94e
namespace {
Packit f0b94e
Packit f0b94e
/**
Packit f0b94e
 * Performs the LIKE comparison of a string against a pattern.  For more detail
Packit f0b94e
 * see http://www.sqlite.org/lang_expr.html#like.
Packit f0b94e
 *
Packit f0b94e
 * @param aPatternItr
Packit f0b94e
 *        An iterator at the start of the pattern to check for.
Packit f0b94e
 * @param aPatternEnd
Packit f0b94e
 *        An iterator at the end of the pattern to check for.
Packit f0b94e
 * @param aStringItr
Packit f0b94e
 *        An iterator at the start of the string to check for the pattern.
Packit f0b94e
 * @param aStringEnd
Packit f0b94e
 *        An iterator at the end of the string to check for the pattern.
Packit f0b94e
 * @param aEscapeChar
Packit f0b94e
 *        The character to use for escaping symbols in the pattern.
Packit f0b94e
 * @return 1 if the pattern is found, 0 otherwise.
Packit f0b94e
 */
Packit f0b94e
int likeCompare(nsAString::const_iterator aPatternItr,
Packit f0b94e
                nsAString::const_iterator aPatternEnd,
Packit f0b94e
                nsAString::const_iterator aStringItr,
Packit f0b94e
                nsAString::const_iterator aStringEnd, char16_t aEscapeChar) {
Packit f0b94e
  const char16_t MATCH_ALL('%');
Packit f0b94e
  const char16_t MATCH_ONE('_');
Packit f0b94e
Packit f0b94e
  bool lastWasEscape = false;
Packit f0b94e
  while (aPatternItr != aPatternEnd) {
Packit f0b94e
    /**
Packit f0b94e
     * What we do in here is take a look at each character from the input
Packit f0b94e
     * pattern, and do something with it.  There are 4 possibilities:
Packit f0b94e
     * 1) character is an un-escaped match-all character
Packit f0b94e
     * 2) character is an un-escaped match-one character
Packit f0b94e
     * 3) character is an un-escaped escape character
Packit f0b94e
     * 4) character is not any of the above
Packit f0b94e
     */
Packit f0b94e
    if (!lastWasEscape && *aPatternItr == MATCH_ALL) {
Packit f0b94e
      // CASE 1
Packit f0b94e
      /**
Packit f0b94e
       * Now we need to skip any MATCH_ALL or MATCH_ONE characters that follow a
Packit f0b94e
       * MATCH_ALL character.  For each MATCH_ONE character, skip one character
Packit f0b94e
       * in the pattern string.
Packit f0b94e
       */
Packit f0b94e
      while (*aPatternItr == MATCH_ALL || *aPatternItr == MATCH_ONE) {
Packit f0b94e
        if (*aPatternItr == MATCH_ONE) {
Packit f0b94e
          // If we've hit the end of the string we are testing, no match
Packit f0b94e
          if (aStringItr == aStringEnd) return 0;
Packit f0b94e
          aStringItr++;
Packit f0b94e
        }
Packit f0b94e
        aPatternItr++;
Packit f0b94e
      }
Packit f0b94e
Packit f0b94e
      // If we've hit the end of the pattern string, match
Packit f0b94e
      if (aPatternItr == aPatternEnd) return 1;
Packit f0b94e
Packit f0b94e
      while (aStringItr != aStringEnd) {
Packit f0b94e
        if (likeCompare(aPatternItr, aPatternEnd, aStringItr, aStringEnd,
Packit f0b94e
                        aEscapeChar)) {
Packit f0b94e
          // we've hit a match, so indicate this
Packit f0b94e
          return 1;
Packit f0b94e
        }
Packit f0b94e
        aStringItr++;
Packit f0b94e
      }
Packit f0b94e
Packit f0b94e
      // No match
Packit f0b94e
      return 0;
Packit f0b94e
    } else if (!lastWasEscape && *aPatternItr == MATCH_ONE) {
Packit f0b94e
      // CASE 2
Packit f0b94e
      if (aStringItr == aStringEnd) {
Packit f0b94e
        // If we've hit the end of the string we are testing, no match
Packit f0b94e
        return 0;
Packit f0b94e
      }
Packit f0b94e
      aStringItr++;
Packit f0b94e
      lastWasEscape = false;
Packit f0b94e
    } else if (!lastWasEscape && *aPatternItr == aEscapeChar) {
Packit f0b94e
      // CASE 3
Packit f0b94e
      lastWasEscape = true;
Packit f0b94e
    } else {
Packit f0b94e
      // CASE 4
Packit f0b94e
      if (::ToUpperCase(*aStringItr) != ::ToUpperCase(*aPatternItr)) {
Packit f0b94e
        // If we've hit a point where the strings don't match, there is no match
Packit f0b94e
        return 0;
Packit f0b94e
      }
Packit f0b94e
      aStringItr++;
Packit f0b94e
      lastWasEscape = false;
Packit f0b94e
    }
Packit f0b94e
Packit f0b94e
    aPatternItr++;
Packit f0b94e
  }
Packit f0b94e
Packit f0b94e
  return aStringItr == aStringEnd;
Packit f0b94e
}
Packit f0b94e
Packit f0b94e
/**
Packit f0b94e
 * Compute the Levenshtein Edit Distance between two strings.
Packit f0b94e
 *
Packit f0b94e
 * @param aStringS
Packit f0b94e
 *        a string
Packit f0b94e
 * @param aStringT
Packit f0b94e
 *        another string
Packit f0b94e
 * @param _result
Packit f0b94e
 *        an outparam that will receive the edit distance between the arguments
Packit f0b94e
 * @return a Sqlite result code, e.g. SQLITE_OK, SQLITE_NOMEM, etc.
Packit f0b94e
 */
Packit f0b94e
int levenshteinDistance(const nsAString &aStringS, const nsAString &aStringT,
Packit f0b94e
                        int *_result) {
Packit f0b94e
  // Set the result to a non-sensical value in case we encounter an error.
Packit f0b94e
  *_result = -1;
Packit f0b94e
Packit f0b94e
  const uint32_t sLen = aStringS.Length();
Packit f0b94e
  const uint32_t tLen = aStringT.Length();
Packit f0b94e
Packit f0b94e
  if (sLen == 0) {
Packit f0b94e
    *_result = tLen;
Packit f0b94e
    return SQLITE_OK;
Packit f0b94e
  }
Packit f0b94e
  if (tLen == 0) {
Packit f0b94e
    *_result = sLen;
Packit f0b94e
    return SQLITE_OK;
Packit f0b94e
  }
Packit f0b94e
Packit f0b94e
  // Notionally, Levenshtein Distance is computed in a matrix.  If we
Packit f0b94e
  // assume s = "span" and t = "spam", the matrix would look like this:
Packit f0b94e
  //    s -->
Packit f0b94e
  //  t          s   p   a   n
Packit f0b94e
  //  |      0   1   2   3   4
Packit f0b94e
  //  V  s   1   *   *   *   *
Packit f0b94e
  //     p   2   *   *   *   *
Packit f0b94e
  //     a   3   *   *   *   *
Packit f0b94e
  //     m   4   *   *   *   *
Packit f0b94e
  //
Packit f0b94e
  // Note that the row width is sLen + 1 and the column height is tLen + 1,
Packit f0b94e
  // where sLen is the length of the string "s" and tLen is the length of "t".
Packit f0b94e
  // The first row and the first column are initialized as shown, and
Packit f0b94e
  // the algorithm computes the remaining cells row-by-row, and
Packit f0b94e
  // left-to-right within each row.  The computation only requires that
Packit f0b94e
  // we be able to see the current row and the previous one.
Packit f0b94e
Packit f0b94e
  // Allocate memory for two rows.
Packit f0b94e
  AutoTArray<int, nsAutoString::kStorageSize> row1;
Packit f0b94e
  AutoTArray<int, nsAutoString::kStorageSize> row2;
Packit f0b94e
Packit f0b94e
  // Declare the raw pointers that will actually be used to access the memory.
Packit f0b94e
  int *prevRow = row1.AppendElements(sLen + 1);
Packit f0b94e
  int *currRow = row2.AppendElements(sLen + 1);
Packit f0b94e
Packit f0b94e
  // Initialize the first row.
Packit f0b94e
  for (uint32_t i = 0; i <= sLen; i++) prevRow[i] = i;
Packit f0b94e
Packit f0b94e
  const char16_t *s = aStringS.BeginReading();
Packit f0b94e
  const char16_t *t = aStringT.BeginReading();
Packit f0b94e
Packit f0b94e
  // Compute the empty cells in the "matrix" row-by-row, starting with
Packit f0b94e
  // the second row.
Packit f0b94e
  for (uint32_t ti = 1; ti <= tLen; ti++) {
Packit f0b94e
    // Initialize the first cell in this row.
Packit f0b94e
    currRow[0] = ti;
Packit f0b94e
Packit f0b94e
    // Get the character from "t" that corresponds to this row.
Packit f0b94e
    const char16_t tch = t[ti - 1];
Packit f0b94e
Packit f0b94e
    // Compute the remaining cells in this row, left-to-right,
Packit f0b94e
    // starting at the second column (and first character of "s").
Packit f0b94e
    for (uint32_t si = 1; si <= sLen; si++) {
Packit f0b94e
      // Get the character from "s" that corresponds to this column,
Packit f0b94e
      // compare it to the t-character, and compute the "cost".
Packit f0b94e
      const char16_t sch = s[si - 1];
Packit f0b94e
      int cost = (sch == tch) ? 0 : 1;
Packit f0b94e
Packit f0b94e
      // ............ We want to calculate the value of cell "d" from
Packit f0b94e
      // ...ab....... the previously calculated (or initialized) cells
Packit f0b94e
      // ...cd....... "a", "b", and "c", where d = min(a', b', c').
Packit f0b94e
      // ............
Packit f0b94e
      int aPrime = prevRow[si - 1] + cost;
Packit f0b94e
      int bPrime = prevRow[si] + 1;
Packit f0b94e
      int cPrime = currRow[si - 1] + 1;
Packit f0b94e
      currRow[si] = std::min(aPrime, std::min(bPrime, cPrime));
Packit f0b94e
    }
Packit f0b94e
Packit f0b94e
    // Advance to the next row.  The current row becomes the previous
Packit f0b94e
    // row and we recycle the old previous row as the new current row.
Packit f0b94e
    // We don't need to re-initialize the new current row since we will
Packit f0b94e
    // rewrite all of its cells anyway.
Packit f0b94e
    int *oldPrevRow = prevRow;
Packit f0b94e
    prevRow = currRow;
Packit f0b94e
    currRow = oldPrevRow;
Packit f0b94e
  }
Packit f0b94e
Packit f0b94e
  // The final result is the value of the last cell in the last row.
Packit f0b94e
  // Note that that's now in the "previous" row, since we just swapped them.
Packit f0b94e
  *_result = prevRow[sLen];
Packit f0b94e
  return SQLITE_OK;
Packit f0b94e
}
Packit f0b94e
Packit f0b94e
// This struct is used only by registerFunctions below, but ISO C++98 forbids
Packit f0b94e
// instantiating a template dependent on a locally-defined type.  Boo-urns!
Packit f0b94e
struct Functions {
Packit f0b94e
  const char *zName;
Packit f0b94e
  int nArg;
Packit f0b94e
  int enc;
Packit f0b94e
  void *pContext;
Packit f0b94e
  void (*xFunc)(::sqlite3_context *, int, sqlite3_value **);
Packit f0b94e
};
Packit f0b94e
Packit f0b94e
}  // namespace
Packit f0b94e
Packit f0b94e
////////////////////////////////////////////////////////////////////////////////
Packit f0b94e
//// Exposed Functions
Packit f0b94e
Packit f0b94e
int registerFunctions(sqlite3 *aDB) {
Packit f0b94e
  Functions functions[] = {
Packit f0b94e
      {"lower", 1, SQLITE_UTF16, 0, caseFunction},
Packit f0b94e
      {"lower", 1, SQLITE_UTF8, 0, caseFunction},
Packit f0b94e
      {"upper", 1, SQLITE_UTF16, (void *)1, caseFunction},
Packit f0b94e
      {"upper", 1, SQLITE_UTF8, (void *)1, caseFunction},
Packit f0b94e
Packit f0b94e
      {"like", 2, SQLITE_UTF16, 0, likeFunction},
Packit f0b94e
      {"like", 2, SQLITE_UTF8, 0, likeFunction},
Packit f0b94e
      {"like", 3, SQLITE_UTF16, 0, likeFunction},
Packit f0b94e
      {"like", 3, SQLITE_UTF8, 0, likeFunction},
Packit f0b94e
Packit f0b94e
      {"levenshteinDistance", 2, SQLITE_UTF16, 0, levenshteinDistanceFunction},
Packit f0b94e
      {"levenshteinDistance", 2, SQLITE_UTF8, 0, levenshteinDistanceFunction},
Packit f0b94e
  };
Packit f0b94e
Packit f0b94e
  int rv = SQLITE_OK;
Packit f0b94e
  for (size_t i = 0; SQLITE_OK == rv && i < ArrayLength(functions); ++i) {
Packit f0b94e
    struct Functions *p = &functions[i];
Packit f0b94e
    rv = ::sqlite3_create_function(aDB, p->zName, p->nArg, p->enc, p->pContext,
Packit f0b94e
                                   p->xFunc, nullptr, nullptr);
Packit f0b94e
  }
Packit f0b94e
Packit f0b94e
  return rv;
Packit f0b94e
}
Packit f0b94e
Packit f0b94e
////////////////////////////////////////////////////////////////////////////////
Packit f0b94e
//// SQL Functions
Packit f0b94e
Packit f0b94e
void caseFunction(sqlite3_context *aCtx, int aArgc, sqlite3_value **aArgv) {
Packit f0b94e
  NS_ASSERTION(1 == aArgc, "Invalid number of arguments!");
Packit f0b94e
Packit f0b94e
  nsAutoString data(
Packit f0b94e
      static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0])));
Packit f0b94e
  bool toUpper = ::sqlite3_user_data(aCtx) ? true : false;
Packit f0b94e
Packit f0b94e
  if (toUpper)
Packit f0b94e
    ::ToUpperCase(data);
Packit f0b94e
  else
Packit f0b94e
    ::ToLowerCase(data);
Packit f0b94e
Packit f0b94e
  // Set the result.
Packit f0b94e
  ::sqlite3_result_text16(aCtx, data.get(), -1, SQLITE_TRANSIENT);
Packit f0b94e
}
Packit f0b94e
Packit f0b94e
/**
Packit f0b94e
 * This implements the like() SQL function.  This is used by the LIKE operator.
Packit f0b94e
 * The SQL statement 'A LIKE B' is implemented as 'like(B, A)', and if there is
Packit f0b94e
 * an escape character, say E, it is implemented as 'like(B, A, E)'.
Packit f0b94e
 */
Packit f0b94e
void likeFunction(sqlite3_context *aCtx, int aArgc, sqlite3_value **aArgv) {
Packit f0b94e
  NS_ASSERTION(2 == aArgc || 3 == aArgc, "Invalid number of arguments!");
Packit f0b94e
Packit f0b94e
  if (::sqlite3_value_bytes(aArgv[0]) > SQLITE_MAX_LIKE_PATTERN_LENGTH) {
Packit f0b94e
    ::sqlite3_result_error(aCtx, "LIKE or GLOB pattern too complex",
Packit f0b94e
                           SQLITE_TOOBIG);
Packit f0b94e
    return;
Packit f0b94e
  }
Packit f0b94e
Packit f0b94e
  if (!::sqlite3_value_text16(aArgv[0]) || !::sqlite3_value_text16(aArgv[1]))
Packit f0b94e
    return;
Packit f0b94e
Packit f0b94e
  nsDependentString A(
Packit f0b94e
      static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1])));
Packit f0b94e
  nsDependentString B(
Packit f0b94e
      static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0])));
Packit f0b94e
  NS_ASSERTION(!B.IsEmpty(), "LIKE string must not be null!");
Packit f0b94e
Packit f0b94e
  char16_t E = 0;
Packit f0b94e
  if (3 == aArgc)
Packit f0b94e
    E = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[2]))[0];
Packit f0b94e
Packit f0b94e
  nsAString::const_iterator itrString, endString;
Packit f0b94e
  A.BeginReading(itrString);
Packit f0b94e
  A.EndReading(endString);
Packit f0b94e
  nsAString::const_iterator itrPattern, endPattern;
Packit f0b94e
  B.BeginReading(itrPattern);
Packit f0b94e
  B.EndReading(endPattern);
Packit f0b94e
  ::sqlite3_result_int(
Packit f0b94e
      aCtx, likeCompare(itrPattern, endPattern, itrString, endString, E));
Packit f0b94e
}
Packit f0b94e
Packit f0b94e
void levenshteinDistanceFunction(sqlite3_context *aCtx, int aArgc,
Packit f0b94e
                                 sqlite3_value **aArgv) {
Packit f0b94e
  NS_ASSERTION(2 == aArgc, "Invalid number of arguments!");
Packit f0b94e
Packit f0b94e
  // If either argument is a SQL NULL, then return SQL NULL.
Packit f0b94e
  if (::sqlite3_value_type(aArgv[0]) == SQLITE_NULL ||
Packit f0b94e
      ::sqlite3_value_type(aArgv[1]) == SQLITE_NULL) {
Packit f0b94e
    ::sqlite3_result_null(aCtx);
Packit f0b94e
    return;
Packit f0b94e
  }
Packit f0b94e
Packit f0b94e
  int aLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t);
Packit f0b94e
  const char16_t *a =
Packit f0b94e
      static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0]));
Packit f0b94e
Packit f0b94e
  int bLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(char16_t);
Packit f0b94e
  const char16_t *b =
Packit f0b94e
      static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1]));
Packit f0b94e
Packit f0b94e
  // Compute the Levenshtein Distance, and return the result (or error).
Packit f0b94e
  int distance = -1;
Packit f0b94e
  const nsDependentString A(a, aLen);
Packit f0b94e
  const nsDependentString B(b, bLen);
Packit f0b94e
  int status = levenshteinDistance(A, B, &distance);
Packit f0b94e
  if (status == SQLITE_OK) {
Packit f0b94e
    ::sqlite3_result_int(aCtx, distance);
Packit f0b94e
  } else if (status == SQLITE_NOMEM) {
Packit f0b94e
    ::sqlite3_result_error_nomem(aCtx);
Packit f0b94e
  } else {
Packit f0b94e
    ::sqlite3_result_error(aCtx, "User function returned error code", -1);
Packit f0b94e
  }
Packit f0b94e
}
Packit f0b94e
Packit f0b94e
}  // namespace storage
Packit f0b94e
}  // namespace mozilla