Blame lib/unistr/u8-mbtoucr.c

Packit 33f14e
/* Look at first character in UTF-8 string, returning an error code.
Packit 33f14e
   Copyright (C) 1999-2002, 2006-2007, 2009-2017 Free Software Foundation, Inc.
Packit 33f14e
   Written by Bruno Haible <bruno@clisp.org>, 2001.
Packit 33f14e
Packit 33f14e
   This program is free software: you can redistribute it and/or modify it
Packit 33f14e
   under the terms of the GNU General Public License as published
Packit 33f14e
   by the Free Software Foundation; either version 3 of the License, or
Packit 33f14e
   (at your option) any later version.
Packit 33f14e
Packit 33f14e
   This program is distributed in the hope that it will be useful,
Packit 33f14e
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 33f14e
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 33f14e
   General Public License for more details.
Packit 33f14e
Packit 33f14e
   You should have received a copy of the GNU General Public License
Packit 33f14e
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
Packit 33f14e
Packit 33f14e
#include <config.h>
Packit 33f14e
Packit 33f14e
/* Specification.  */
Packit 33f14e
#include "unistr.h"
Packit 33f14e
Packit 33f14e
int
Packit 33f14e
u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n)
Packit 33f14e
{
Packit 33f14e
  uint8_t c = *s;
Packit 33f14e
Packit 33f14e
  if (c < 0x80)
Packit 33f14e
    {
Packit 33f14e
      *puc = c;
Packit 33f14e
      return 1;
Packit 33f14e
    }
Packit 33f14e
  else if (c >= 0xc2)
Packit 33f14e
    {
Packit 33f14e
      if (c < 0xe0)
Packit 33f14e
        {
Packit 33f14e
          if (n >= 2)
Packit 33f14e
            {
Packit 33f14e
              if ((s[1] ^ 0x80) < 0x40)
Packit 33f14e
                {
Packit 33f14e
                  *puc = ((unsigned int) (c & 0x1f) << 6)
Packit 33f14e
                         | (unsigned int) (s[1] ^ 0x80);
Packit 33f14e
                  return 2;
Packit 33f14e
                }
Packit 33f14e
              /* invalid multibyte character */
Packit 33f14e
            }
Packit 33f14e
          else
Packit 33f14e
            {
Packit 33f14e
              /* incomplete multibyte character */
Packit 33f14e
              *puc = 0xfffd;
Packit 33f14e
              return -2;
Packit 33f14e
            }
Packit 33f14e
        }
Packit 33f14e
      else if (c < 0xf0)
Packit 33f14e
        {
Packit 33f14e
          if (n >= 2)
Packit 33f14e
            {
Packit 33f14e
              if ((s[1] ^ 0x80) < 0x40
Packit 33f14e
                  && (c >= 0xe1 || s[1] >= 0xa0)
Packit 33f14e
                  && (c != 0xed || s[1] < 0xa0))
Packit 33f14e
                {
Packit 33f14e
                  if (n >= 3)
Packit 33f14e
                    {
Packit 33f14e
                      if ((s[2] ^ 0x80) < 0x40)
Packit 33f14e
                        {
Packit 33f14e
                          *puc = ((unsigned int) (c & 0x0f) << 12)
Packit 33f14e
                                 | ((unsigned int) (s[1] ^ 0x80) << 6)
Packit 33f14e
                                 | (unsigned int) (s[2] ^ 0x80);
Packit 33f14e
                          return 3;
Packit 33f14e
                        }
Packit 33f14e
                      /* invalid multibyte character */
Packit 33f14e
                    }
Packit 33f14e
                  else
Packit 33f14e
                    {
Packit 33f14e
                      /* incomplete multibyte character */
Packit 33f14e
                      *puc = 0xfffd;
Packit 33f14e
                      return -2;
Packit 33f14e
                    }
Packit 33f14e
                }
Packit 33f14e
              /* invalid multibyte character */
Packit 33f14e
            }
Packit 33f14e
          else
Packit 33f14e
            {
Packit 33f14e
              /* incomplete multibyte character */
Packit 33f14e
              *puc = 0xfffd;
Packit 33f14e
              return -2;
Packit 33f14e
            }
Packit 33f14e
        }
Packit 33f14e
      else if (c < 0xf8)
Packit 33f14e
        {
Packit 33f14e
          if (n >= 2)
Packit 33f14e
            {
Packit 33f14e
              if ((s[1] ^ 0x80) < 0x40
Packit 33f14e
                  && (c >= 0xf1 || s[1] >= 0x90)
Packit 33f14e
                  && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)))
Packit 33f14e
                {
Packit 33f14e
                  if (n >= 3)
Packit 33f14e
                    {
Packit 33f14e
                      if ((s[2] ^ 0x80) < 0x40)
Packit 33f14e
                        {
Packit 33f14e
                          if (n >= 4)
Packit 33f14e
                            {
Packit 33f14e
                              if ((s[3] ^ 0x80) < 0x40)
Packit 33f14e
                                {
Packit 33f14e
                                  *puc = ((unsigned int) (c & 0x07) << 18)
Packit 33f14e
                                         | ((unsigned int) (s[1] ^ 0x80) << 12)
Packit 33f14e
                                         | ((unsigned int) (s[2] ^ 0x80) << 6)
Packit 33f14e
                                         | (unsigned int) (s[3] ^ 0x80);
Packit 33f14e
                                  return 4;
Packit 33f14e
                                }
Packit 33f14e
                              /* invalid multibyte character */
Packit 33f14e
                            }
Packit 33f14e
                          else
Packit 33f14e
                            {
Packit 33f14e
                              /* incomplete multibyte character */
Packit 33f14e
                              *puc = 0xfffd;
Packit 33f14e
                              return -2;
Packit 33f14e
                            }
Packit 33f14e
                        }
Packit 33f14e
                      /* invalid multibyte character */
Packit 33f14e
                    }
Packit 33f14e
                  else
Packit 33f14e
                    {
Packit 33f14e
                      /* incomplete multibyte character */
Packit 33f14e
                      *puc = 0xfffd;
Packit 33f14e
                      return -2;
Packit 33f14e
                    }
Packit 33f14e
                }
Packit 33f14e
              /* invalid multibyte character */
Packit 33f14e
            }
Packit 33f14e
          else
Packit 33f14e
            {
Packit 33f14e
              /* incomplete multibyte character */
Packit 33f14e
              *puc = 0xfffd;
Packit 33f14e
              return -2;
Packit 33f14e
            }
Packit 33f14e
        }
Packit 33f14e
    }
Packit 33f14e
  /* invalid multibyte character */
Packit 33f14e
  *puc = 0xfffd;
Packit 33f14e
  return -1;
Packit 33f14e
}