Blame src/idna.c

Packit Service 7c31a4
/* Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl>
Packit Service 7c31a4
 *
Packit Service 7c31a4
 * Permission to use, copy, modify, and/or distribute this software for any
Packit Service 7c31a4
 * purpose with or without fee is hereby granted, provided that the above
Packit Service 7c31a4
 * copyright notice and this permission notice appear in all copies.
Packit Service 7c31a4
 *
Packit Service 7c31a4
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
Packit Service 7c31a4
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
Packit Service 7c31a4
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
Packit Service 7c31a4
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
Packit Service 7c31a4
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
Packit Service 7c31a4
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
Packit Service 7c31a4
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
Packit Service 7c31a4
 */
Packit Service 7c31a4
Packit Service 7c31a4
/* Derived from https://github.com/bnoordhuis/punycode
Packit Service 7c31a4
 * but updated to support IDNA 2008.
Packit Service 7c31a4
 */
Packit Service 7c31a4
Packit Service 7c31a4
#include "uv.h"
Packit Service 7c31a4
#include "idna.h"
Packit Service 7c31a4
#include <string.h>
Packit Service 7c31a4
Packit Service 7c31a4
static unsigned uv__utf8_decode1_slow(const char** p,
Packit Service 7c31a4
                                      const char* pe,
Packit Service 7c31a4
                                      unsigned a) {
Packit Service 7c31a4
  unsigned b;
Packit Service 7c31a4
  unsigned c;
Packit Service 7c31a4
  unsigned d;
Packit Service 7c31a4
  unsigned min;
Packit Service 7c31a4
Packit Service 7c31a4
  if (a > 0xF7)
Packit Service 7c31a4
    return -1;
Packit Service 7c31a4
Packit Service 7c31a4
  switch (*p - pe) {
Packit Service 7c31a4
  default:
Packit Service 7c31a4
    if (a > 0xEF) {
Packit Service 7c31a4
      min = 0x10000;
Packit Service 7c31a4
      a = a & 7;
Packit Service 7c31a4
      b = (unsigned char) *(*p)++;
Packit Service 7c31a4
      c = (unsigned char) *(*p)++;
Packit Service 7c31a4
      d = (unsigned char) *(*p)++;
Packit Service 7c31a4
      break;
Packit Service 7c31a4
    }
Packit Service 7c31a4
    /* Fall through. */
Packit Service 7c31a4
  case 2:
Packit Service 7c31a4
    if (a > 0xDF) {
Packit Service 7c31a4
      min = 0x800;
Packit Service 7c31a4
      b = 0x80 | (a & 15);
Packit Service 7c31a4
      c = (unsigned char) *(*p)++;
Packit Service 7c31a4
      d = (unsigned char) *(*p)++;
Packit Service 7c31a4
      a = 0;
Packit Service 7c31a4
      break;
Packit Service 7c31a4
    }
Packit Service 7c31a4
    /* Fall through. */
Packit Service 7c31a4
  case 1:
Packit Service 7c31a4
    if (a > 0xBF) {
Packit Service 7c31a4
      min = 0x80;
Packit Service 7c31a4
      b = 0x80;
Packit Service 7c31a4
      c = 0x80 | (a & 31);
Packit Service 7c31a4
      d = (unsigned char) *(*p)++;
Packit Service 7c31a4
      a = 0;
Packit Service 7c31a4
      break;
Packit Service 7c31a4
    }
Packit Service 7c31a4
    return -1;  /* Invalid continuation byte. */
Packit Service 7c31a4
  }
Packit Service 7c31a4
Packit Service 7c31a4
  if (0x80 != (0xC0 & (b ^ c ^ d)))
Packit Service 7c31a4
    return -1;  /* Invalid sequence. */
Packit Service 7c31a4
Packit Service 7c31a4
  b &= 63;
Packit Service 7c31a4
  c &= 63;
Packit Service 7c31a4
  d &= 63;
Packit Service 7c31a4
  a = (a << 18) | (b << 12) | (c << 6) | d;
Packit Service 7c31a4
Packit Service 7c31a4
  if (a < min)
Packit Service 7c31a4
    return -1;  /* Overlong sequence. */
Packit Service 7c31a4
Packit Service 7c31a4
  if (a > 0x10FFFF)
Packit Service 7c31a4
    return -1;  /* Four-byte sequence > U+10FFFF. */
Packit Service 7c31a4
Packit Service 7c31a4
  if (a >= 0xD800 && a <= 0xDFFF)
Packit Service 7c31a4
    return -1;  /* Surrogate pair. */
Packit Service 7c31a4
Packit Service 7c31a4
  return a;
Packit Service 7c31a4
}
Packit Service 7c31a4
Packit Service 7c31a4
unsigned uv__utf8_decode1(const char** p, const char* pe) {
Packit Service 7c31a4
  unsigned a;
Packit Service 7c31a4
Packit Service 7c31a4
  a = (unsigned char) *(*p)++;
Packit Service 7c31a4
Packit Service 7c31a4
  if (a < 128)
Packit Service 7c31a4
    return a;  /* ASCII, common case. */
Packit Service 7c31a4
Packit Service 7c31a4
  return uv__utf8_decode1_slow(p, pe, a);
Packit Service 7c31a4
}
Packit Service 7c31a4
Packit Service 7c31a4
#define foreach_codepoint(c, p, pe) \
Packit Service 7c31a4
  for (; (void) (*p <= pe && (c = uv__utf8_decode1(p, pe))), *p <= pe;)
Packit Service 7c31a4
Packit Service 7c31a4
static int uv__idna_toascii_label(const char* s, const char* se,
Packit Service 7c31a4
                                  char** d, char* de) {
Packit Service 7c31a4
  static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789";
Packit Service 7c31a4
  const char* ss;
Packit Service 7c31a4
  unsigned c;
Packit Service 7c31a4
  unsigned h;
Packit Service 7c31a4
  unsigned k;
Packit Service 7c31a4
  unsigned n;
Packit Service 7c31a4
  unsigned m;
Packit Service 7c31a4
  unsigned q;
Packit Service 7c31a4
  unsigned t;
Packit Service 7c31a4
  unsigned x;
Packit Service 7c31a4
  unsigned y;
Packit Service 7c31a4
  unsigned bias;
Packit Service 7c31a4
  unsigned delta;
Packit Service 7c31a4
  unsigned todo;
Packit Service 7c31a4
  int first;
Packit Service 7c31a4
Packit Service 7c31a4
  h = 0;
Packit Service 7c31a4
  ss = s;
Packit Service 7c31a4
  todo = 0;
Packit Service 7c31a4
Packit Service 7c31a4
  foreach_codepoint(c, &s, se) {
Packit Service 7c31a4
    if (c < 128)
Packit Service 7c31a4
      h++;
Packit Service 7c31a4
    else if (c == (unsigned) -1)
Packit Service 7c31a4
      return UV_EINVAL;
Packit Service 7c31a4
    else
Packit Service 7c31a4
      todo++;
Packit Service 7c31a4
  }
Packit Service 7c31a4
Packit Service 7c31a4
  if (todo > 0) {
Packit Service 7c31a4
    if (*d < de) *(*d)++ = 'x';
Packit Service 7c31a4
    if (*d < de) *(*d)++ = 'n';
Packit Service 7c31a4
    if (*d < de) *(*d)++ = '-';
Packit Service 7c31a4
    if (*d < de) *(*d)++ = '-';
Packit Service 7c31a4
  }
Packit Service 7c31a4
Packit Service 7c31a4
  x = 0;
Packit Service 7c31a4
  s = ss;
Packit Service 7c31a4
  foreach_codepoint(c, &s, se) {
Packit Service 7c31a4
    if (c > 127)
Packit Service 7c31a4
      continue;
Packit Service 7c31a4
Packit Service 7c31a4
    if (*d < de)
Packit Service 7c31a4
      *(*d)++ = c;
Packit Service 7c31a4
Packit Service 7c31a4
    if (++x == h)
Packit Service 7c31a4
      break;  /* Visited all ASCII characters. */
Packit Service 7c31a4
  }
Packit Service 7c31a4
Packit Service 7c31a4
  if (todo == 0)
Packit Service 7c31a4
    return h;
Packit Service 7c31a4
Packit Service 7c31a4
  /* Only write separator when we've written ASCII characters first. */
Packit Service 7c31a4
  if (h > 0)
Packit Service 7c31a4
    if (*d < de)
Packit Service 7c31a4
      *(*d)++ = '-';
Packit Service 7c31a4
Packit Service 7c31a4
  n = 128;
Packit Service 7c31a4
  bias = 72;
Packit Service 7c31a4
  delta = 0;
Packit Service 7c31a4
  first = 1;
Packit Service 7c31a4
Packit Service 7c31a4
  while (todo > 0) {
Packit Service 7c31a4
    m = -1;
Packit Service 7c31a4
    s = ss;
Packit Service 7c31a4
    foreach_codepoint(c, &s, se)
Packit Service 7c31a4
      if (c >= n)
Packit Service 7c31a4
        if (c < m)
Packit Service 7c31a4
          m = c;
Packit Service 7c31a4
Packit Service 7c31a4
    x = m - n;
Packit Service 7c31a4
    y = h + 1;
Packit Service 7c31a4
Packit Service 7c31a4
    if (x > ~delta / y)
Packit Service 7c31a4
      return UV_E2BIG;  /* Overflow. */
Packit Service 7c31a4
Packit Service 7c31a4
    delta += x * y;
Packit Service 7c31a4
    n = m;
Packit Service 7c31a4
Packit Service 7c31a4
    s = ss;
Packit Service 7c31a4
    foreach_codepoint(c, &s, se) {
Packit Service 7c31a4
      if (c < n)
Packit Service 7c31a4
        if (++delta == 0)
Packit Service 7c31a4
          return UV_E2BIG;  /* Overflow. */
Packit Service 7c31a4
Packit Service 7c31a4
      if (c != n)
Packit Service 7c31a4
        continue;
Packit Service 7c31a4
Packit Service 7c31a4
      for (k = 36, q = delta; /* empty */; k += 36) {
Packit Service 7c31a4
        t = 1;
Packit Service 7c31a4
Packit Service 7c31a4
        if (k > bias)
Packit Service 7c31a4
          t = k - bias;
Packit Service 7c31a4
Packit Service 7c31a4
        if (t > 26)
Packit Service 7c31a4
          t = 26;
Packit Service 7c31a4
Packit Service 7c31a4
        if (q < t)
Packit Service 7c31a4
          break;
Packit Service 7c31a4
Packit Service 7c31a4
        /* TODO(bnoordhuis) Since 1 <= t <= 26 and therefore
Packit Service 7c31a4
         * 10 <= y <= 35, we can optimize the long division
Packit Service 7c31a4
         * into a table-based reciprocal multiplication.
Packit Service 7c31a4
         */
Packit Service 7c31a4
        x = q - t;
Packit Service 7c31a4
        y = 36 - t;  /* 10 <= y <= 35 since 1 <= t <= 26. */
Packit Service 7c31a4
        q = x / y;
Packit Service 7c31a4
        t = t + x % y;  /* 1 <= t <= 35 because of y. */
Packit Service 7c31a4
Packit Service 7c31a4
        if (*d < de)
Packit Service 7c31a4
          *(*d)++ = alphabet[t];
Packit Service 7c31a4
      }
Packit Service 7c31a4
Packit Service 7c31a4
      if (*d < de)
Packit Service 7c31a4
        *(*d)++ = alphabet[q];
Packit Service 7c31a4
Packit Service 7c31a4
      delta /= 2;
Packit Service 7c31a4
Packit Service 7c31a4
      if (first) {
Packit Service 7c31a4
        delta /= 350;
Packit Service 7c31a4
        first = 0;
Packit Service 7c31a4
      }
Packit Service 7c31a4
Packit Service 7c31a4
      /* No overflow check is needed because |delta| was just
Packit Service 7c31a4
       * divided by 2 and |delta+delta >= delta + delta/h|.
Packit Service 7c31a4
       */
Packit Service 7c31a4
      h++;
Packit Service 7c31a4
      delta += delta / h;
Packit Service 7c31a4
Packit Service 7c31a4
      for (bias = 0; delta > 35 * 26 / 2; bias += 36)
Packit Service 7c31a4
        delta /= 35;
Packit Service 7c31a4
Packit Service 7c31a4
      bias += 36 * delta / (delta + 38);
Packit Service 7c31a4
      delta = 0;
Packit Service 7c31a4
      todo--;
Packit Service 7c31a4
    }
Packit Service 7c31a4
Packit Service 7c31a4
    delta++;
Packit Service 7c31a4
    n++;
Packit Service 7c31a4
  }
Packit Service 7c31a4
Packit Service 7c31a4
  return 0;
Packit Service 7c31a4
}
Packit Service 7c31a4
Packit Service 7c31a4
#undef foreach_codepoint
Packit Service 7c31a4
Packit Service 7c31a4
long uv__idna_toascii(const char* s, const char* se, char* d, char* de) {
Packit Service 7c31a4
  const char* si;
Packit Service 7c31a4
  const char* st;
Packit Service 7c31a4
  unsigned c;
Packit Service 7c31a4
  char* ds;
Packit Service 7c31a4
  int rc;
Packit Service 7c31a4
Packit Service 7c31a4
  ds = d;
Packit Service 7c31a4
Packit Service 7c31a4
  for (si = s; si < se; /* empty */) {
Packit Service 7c31a4
    st = si;
Packit Service 7c31a4
    c = uv__utf8_decode1(&si, se);
Packit Service 7c31a4
Packit Service 7c31a4
    if (c != '.')
Packit Service 7c31a4
      if (c != 0x3002)  /* 。 */
Packit Service 7c31a4
        if (c != 0xFF0E)  /* . */
Packit Service 7c31a4
          if (c != 0xFF61)  /* 。 */
Packit Service 7c31a4
            continue;
Packit Service 7c31a4
Packit Service 7c31a4
    rc = uv__idna_toascii_label(s, st, &d, de);
Packit Service 7c31a4
Packit Service 7c31a4
    if (rc < 0)
Packit Service 7c31a4
      return rc;
Packit Service 7c31a4
Packit Service 7c31a4
    if (d < de)
Packit Service 7c31a4
      *d++ = '.';
Packit Service 7c31a4
Packit Service 7c31a4
    s = si;
Packit Service 7c31a4
  }
Packit Service 7c31a4
Packit Service 7c31a4
  if (s < se) {
Packit Service 7c31a4
    rc = uv__idna_toascii_label(s, se, &d, de);
Packit Service 7c31a4
Packit Service 7c31a4
    if (rc < 0)
Packit Service 7c31a4
      return rc;
Packit Service 7c31a4
  }
Packit Service 7c31a4
Packit Service 7c31a4
  if (d < de)
Packit Service 7c31a4
    *d++ = '\0';
Packit Service 7c31a4
Packit Service 7c31a4
  return d - ds;  /* Number of bytes written. */
Packit Service 7c31a4
}