Blame src/idna.c

Packit Service e08953
/* Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl>
Packit Service e08953
 *
Packit Service e08953
 * Permission to use, copy, modify, and/or distribute this software for any
Packit Service e08953
 * purpose with or without fee is hereby granted, provided that the above
Packit Service e08953
 * copyright notice and this permission notice appear in all copies.
Packit Service e08953
 *
Packit Service e08953
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
Packit Service e08953
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
Packit Service e08953
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
Packit Service e08953
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
Packit Service e08953
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
Packit Service e08953
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
Packit Service e08953
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
Packit Service e08953
 */
Packit Service e08953
Packit Service e08953
/* Derived from https://github.com/bnoordhuis/punycode
Packit Service e08953
 * but updated to support IDNA 2008.
Packit Service e08953
 */
Packit Service e08953
Packit Service e08953
#include "uv.h"
Packit Service e08953
#include "idna.h"
Packit Service e08953
#include <string.h>
Packit Service e08953
Packit Service e08953
static unsigned uv__utf8_decode1_slow(const char** p,
Packit Service e08953
                                      const char* pe,
Packit Service e08953
                                      unsigned a) {
Packit Service e08953
  unsigned b;
Packit Service e08953
  unsigned c;
Packit Service e08953
  unsigned d;
Packit Service e08953
  unsigned min;
Packit Service e08953
Packit Service e08953
  if (a > 0xF7)
Packit Service e08953
    return -1;
Packit Service e08953
Packit Service e08953
  switch (*p - pe) {
Packit Service e08953
  default:
Packit Service e08953
    if (a > 0xEF) {
Packit Service e08953
      min = 0x10000;
Packit Service e08953
      a = a & 7;
Packit Service e08953
      b = (unsigned char) *(*p)++;
Packit Service e08953
      c = (unsigned char) *(*p)++;
Packit Service e08953
      d = (unsigned char) *(*p)++;
Packit Service e08953
      break;
Packit Service e08953
    }
Packit Service e08953
    /* Fall through. */
Packit Service e08953
  case 2:
Packit Service e08953
    if (a > 0xDF) {
Packit Service e08953
      min = 0x800;
Packit Service e08953
      b = 0x80 | (a & 15);
Packit Service e08953
      c = (unsigned char) *(*p)++;
Packit Service e08953
      d = (unsigned char) *(*p)++;
Packit Service e08953
      a = 0;
Packit Service e08953
      break;
Packit Service e08953
    }
Packit Service e08953
    /* Fall through. */
Packit Service e08953
  case 1:
Packit Service e08953
    if (a > 0xBF) {
Packit Service e08953
      min = 0x80;
Packit Service e08953
      b = 0x80;
Packit Service e08953
      c = 0x80 | (a & 31);
Packit Service e08953
      d = (unsigned char) *(*p)++;
Packit Service e08953
      a = 0;
Packit Service e08953
      break;
Packit Service e08953
    }
Packit Service e08953
    return -1;  /* Invalid continuation byte. */
Packit Service e08953
  }
Packit Service e08953
Packit Service e08953
  if (0x80 != (0xC0 & (b ^ c ^ d)))
Packit Service e08953
    return -1;  /* Invalid sequence. */
Packit Service e08953
Packit Service e08953
  b &= 63;
Packit Service e08953
  c &= 63;
Packit Service e08953
  d &= 63;
Packit Service e08953
  a = (a << 18) | (b << 12) | (c << 6) | d;
Packit Service e08953
Packit Service e08953
  if (a < min)
Packit Service e08953
    return -1;  /* Overlong sequence. */
Packit Service e08953
Packit Service e08953
  if (a > 0x10FFFF)
Packit Service e08953
    return -1;  /* Four-byte sequence > U+10FFFF. */
Packit Service e08953
Packit Service e08953
  if (a >= 0xD800 && a <= 0xDFFF)
Packit Service e08953
    return -1;  /* Surrogate pair. */
Packit Service e08953
Packit Service e08953
  return a;
Packit Service e08953
}
Packit Service e08953
Packit Service e08953
unsigned uv__utf8_decode1(const char** p, const char* pe) {
Packit Service e08953
  unsigned a;
Packit Service e08953
Packit Service e08953
  a = (unsigned char) *(*p)++;
Packit Service e08953
Packit Service e08953
  if (a < 128)
Packit Service e08953
    return a;  /* ASCII, common case. */
Packit Service e08953
Packit Service e08953
  return uv__utf8_decode1_slow(p, pe, a);
Packit Service e08953
}
Packit Service e08953
Packit Service e08953
#define foreach_codepoint(c, p, pe) \
Packit Service e08953
  for (; (void) (*p <= pe && (c = uv__utf8_decode1(p, pe))), *p <= pe;)
Packit Service e08953
Packit Service e08953
static int uv__idna_toascii_label(const char* s, const char* se,
Packit Service e08953
                                  char** d, char* de) {
Packit Service e08953
  static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789";
Packit Service e08953
  const char* ss;
Packit Service e08953
  unsigned c;
Packit Service e08953
  unsigned h;
Packit Service e08953
  unsigned k;
Packit Service e08953
  unsigned n;
Packit Service e08953
  unsigned m;
Packit Service e08953
  unsigned q;
Packit Service e08953
  unsigned t;
Packit Service e08953
  unsigned x;
Packit Service e08953
  unsigned y;
Packit Service e08953
  unsigned bias;
Packit Service e08953
  unsigned delta;
Packit Service e08953
  unsigned todo;
Packit Service e08953
  int first;
Packit Service e08953
Packit Service e08953
  h = 0;
Packit Service e08953
  ss = s;
Packit Service e08953
  todo = 0;
Packit Service e08953
Packit Service e08953
  foreach_codepoint(c, &s, se) {
Packit Service e08953
    if (c < 128)
Packit Service e08953
      h++;
Packit Service e08953
    else if (c == (unsigned) -1)
Packit Service e08953
      return UV_EINVAL;
Packit Service e08953
    else
Packit Service e08953
      todo++;
Packit Service e08953
  }
Packit Service e08953
Packit Service e08953
  if (todo > 0) {
Packit Service e08953
    if (*d < de) *(*d)++ = 'x';
Packit Service e08953
    if (*d < de) *(*d)++ = 'n';
Packit Service e08953
    if (*d < de) *(*d)++ = '-';
Packit Service e08953
    if (*d < de) *(*d)++ = '-';
Packit Service e08953
  }
Packit Service e08953
Packit Service e08953
  x = 0;
Packit Service e08953
  s = ss;
Packit Service e08953
  foreach_codepoint(c, &s, se) {
Packit Service e08953
    if (c > 127)
Packit Service e08953
      continue;
Packit Service e08953
Packit Service e08953
    if (*d < de)
Packit Service e08953
      *(*d)++ = c;
Packit Service e08953
Packit Service e08953
    if (++x == h)
Packit Service e08953
      break;  /* Visited all ASCII characters. */
Packit Service e08953
  }
Packit Service e08953
Packit Service e08953
  if (todo == 0)
Packit Service e08953
    return h;
Packit Service e08953
Packit Service e08953
  /* Only write separator when we've written ASCII characters first. */
Packit Service e08953
  if (h > 0)
Packit Service e08953
    if (*d < de)
Packit Service e08953
      *(*d)++ = '-';
Packit Service e08953
Packit Service e08953
  n = 128;
Packit Service e08953
  bias = 72;
Packit Service e08953
  delta = 0;
Packit Service e08953
  first = 1;
Packit Service e08953
Packit Service e08953
  while (todo > 0) {
Packit Service e08953
    m = -1;
Packit Service e08953
    s = ss;
Packit Service e08953
    foreach_codepoint(c, &s, se)
Packit Service e08953
      if (c >= n)
Packit Service e08953
        if (c < m)
Packit Service e08953
          m = c;
Packit Service e08953
Packit Service e08953
    x = m - n;
Packit Service e08953
    y = h + 1;
Packit Service e08953
Packit Service e08953
    if (x > ~delta / y)
Packit Service e08953
      return UV_E2BIG;  /* Overflow. */
Packit Service e08953
Packit Service e08953
    delta += x * y;
Packit Service e08953
    n = m;
Packit Service e08953
Packit Service e08953
    s = ss;
Packit Service e08953
    foreach_codepoint(c, &s, se) {
Packit Service e08953
      if (c < n)
Packit Service e08953
        if (++delta == 0)
Packit Service e08953
          return UV_E2BIG;  /* Overflow. */
Packit Service e08953
Packit Service e08953
      if (c != n)
Packit Service e08953
        continue;
Packit Service e08953
Packit Service e08953
      for (k = 36, q = delta; /* empty */; k += 36) {
Packit Service e08953
        t = 1;
Packit Service e08953
Packit Service e08953
        if (k > bias)
Packit Service e08953
          t = k - bias;
Packit Service e08953
Packit Service e08953
        if (t > 26)
Packit Service e08953
          t = 26;
Packit Service e08953
Packit Service e08953
        if (q < t)
Packit Service e08953
          break;
Packit Service e08953
Packit Service e08953
        /* TODO(bnoordhuis) Since 1 <= t <= 26 and therefore
Packit Service e08953
         * 10 <= y <= 35, we can optimize the long division
Packit Service e08953
         * into a table-based reciprocal multiplication.
Packit Service e08953
         */
Packit Service e08953
        x = q - t;
Packit Service e08953
        y = 36 - t;  /* 10 <= y <= 35 since 1 <= t <= 26. */
Packit Service e08953
        q = x / y;
Packit Service e08953
        t = t + x % y;  /* 1 <= t <= 35 because of y. */
Packit Service e08953
Packit Service e08953
        if (*d < de)
Packit Service e08953
          *(*d)++ = alphabet[t];
Packit Service e08953
      }
Packit Service e08953
Packit Service e08953
      if (*d < de)
Packit Service e08953
        *(*d)++ = alphabet[q];
Packit Service e08953
Packit Service e08953
      delta /= 2;
Packit Service e08953
Packit Service e08953
      if (first) {
Packit Service e08953
        delta /= 350;
Packit Service e08953
        first = 0;
Packit Service e08953
      }
Packit Service e08953
Packit Service e08953
      /* No overflow check is needed because |delta| was just
Packit Service e08953
       * divided by 2 and |delta+delta >= delta + delta/h|.
Packit Service e08953
       */
Packit Service e08953
      h++;
Packit Service e08953
      delta += delta / h;
Packit Service e08953
Packit Service e08953
      for (bias = 0; delta > 35 * 26 / 2; bias += 36)
Packit Service e08953
        delta /= 35;
Packit Service e08953
Packit Service e08953
      bias += 36 * delta / (delta + 38);
Packit Service e08953
      delta = 0;
Packit Service e08953
      todo--;
Packit Service e08953
    }
Packit Service e08953
Packit Service e08953
    delta++;
Packit Service e08953
    n++;
Packit Service e08953
  }
Packit Service e08953
Packit Service e08953
  return 0;
Packit Service e08953
}
Packit Service e08953
Packit Service e08953
#undef foreach_codepoint
Packit Service e08953
Packit Service e08953
long uv__idna_toascii(const char* s, const char* se, char* d, char* de) {
Packit Service e08953
  const char* si;
Packit Service e08953
  const char* st;
Packit Service e08953
  unsigned c;
Packit Service e08953
  char* ds;
Packit Service e08953
  int rc;
Packit Service e08953
Packit Service e08953
  ds = d;
Packit Service e08953
Packit Service e08953
  for (si = s; si < se; /* empty */) {
Packit Service e08953
    st = si;
Packit Service e08953
    c = uv__utf8_decode1(&si, se);
Packit Service e08953
Packit Service e08953
    if (c != '.')
Packit Service e08953
      if (c != 0x3002)  /* 。 */
Packit Service e08953
        if (c != 0xFF0E)  /* . */
Packit Service e08953
          if (c != 0xFF61)  /* 。 */
Packit Service e08953
            continue;
Packit Service e08953
Packit Service e08953
    rc = uv__idna_toascii_label(s, st, &d, de);
Packit Service e08953
Packit Service e08953
    if (rc < 0)
Packit Service e08953
      return rc;
Packit Service e08953
Packit Service e08953
    if (d < de)
Packit Service e08953
      *d++ = '.';
Packit Service e08953
Packit Service e08953
    s = si;
Packit Service e08953
  }
Packit Service e08953
Packit Service e08953
  if (s < se) {
Packit Service e08953
    rc = uv__idna_toascii_label(s, se, &d, de);
Packit Service e08953
Packit Service e08953
    if (rc < 0)
Packit Service e08953
      return rc;
Packit Service e08953
  }
Packit Service e08953
Packit Service e08953
  if (d < de)
Packit Service e08953
    *d++ = '\0';
Packit Service e08953
Packit Service e08953
  return d - ds;  /* Number of bytes written. */
Packit Service e08953
}