Blame lib/unistr/u8-mbtoucr.c

Packit 709fb3
/* Look at first character in UTF-8 string, returning an error code.
Packit 709fb3
   Copyright (C) 1999-2002, 2006-2007, 2009-2017 Free Software Foundation, Inc.
Packit 709fb3
   Written by Bruno Haible <bruno@clisp.org>, 2001.
Packit 709fb3
Packit 709fb3
   This program is free software: you can redistribute it and/or modify it
Packit 709fb3
   under the terms of the GNU General Public License as published
Packit 709fb3
   by the Free Software Foundation; either version 3 of the License, or
Packit 709fb3
   (at your option) any later version.
Packit 709fb3
Packit 709fb3
   This program is distributed in the hope that it will be useful,
Packit 709fb3
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 709fb3
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 709fb3
   General Public License for more details.
Packit 709fb3
Packit 709fb3
   You should have received a copy of the GNU General Public License
Packit 709fb3
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
Packit 709fb3
Packit 709fb3
#include <config.h>
Packit 709fb3
Packit 709fb3
/* Specification.  */
Packit 709fb3
#include "unistr.h"
Packit 709fb3
Packit 709fb3
int
Packit 709fb3
u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n)
Packit 709fb3
{
Packit 709fb3
  uint8_t c = *s;
Packit 709fb3
Packit 709fb3
  if (c < 0x80)
Packit 709fb3
    {
Packit 709fb3
      *puc = c;
Packit 709fb3
      return 1;
Packit 709fb3
    }
Packit 709fb3
  else if (c >= 0xc2)
Packit 709fb3
    {
Packit 709fb3
      if (c < 0xe0)
Packit 709fb3
        {
Packit 709fb3
          if (n >= 2)
Packit 709fb3
            {
Packit 709fb3
              if ((s[1] ^ 0x80) < 0x40)
Packit 709fb3
                {
Packit 709fb3
                  *puc = ((unsigned int) (c & 0x1f) << 6)
Packit 709fb3
                         | (unsigned int) (s[1] ^ 0x80);
Packit 709fb3
                  return 2;
Packit 709fb3
                }
Packit 709fb3
              /* invalid multibyte character */
Packit 709fb3
            }
Packit 709fb3
          else
Packit 709fb3
            {
Packit 709fb3
              /* incomplete multibyte character */
Packit 709fb3
              *puc = 0xfffd;
Packit 709fb3
              return -2;
Packit 709fb3
            }
Packit 709fb3
        }
Packit 709fb3
      else if (c < 0xf0)
Packit 709fb3
        {
Packit 709fb3
          if (n >= 2)
Packit 709fb3
            {
Packit 709fb3
              if ((s[1] ^ 0x80) < 0x40
Packit 709fb3
                  && (c >= 0xe1 || s[1] >= 0xa0)
Packit 709fb3
                  && (c != 0xed || s[1] < 0xa0))
Packit 709fb3
                {
Packit 709fb3
                  if (n >= 3)
Packit 709fb3
                    {
Packit 709fb3
                      if ((s[2] ^ 0x80) < 0x40)
Packit 709fb3
                        {
Packit 709fb3
                          *puc = ((unsigned int) (c & 0x0f) << 12)
Packit 709fb3
                                 | ((unsigned int) (s[1] ^ 0x80) << 6)
Packit 709fb3
                                 | (unsigned int) (s[2] ^ 0x80);
Packit 709fb3
                          return 3;
Packit 709fb3
                        }
Packit 709fb3
                      /* invalid multibyte character */
Packit 709fb3
                    }
Packit 709fb3
                  else
Packit 709fb3
                    {
Packit 709fb3
                      /* incomplete multibyte character */
Packit 709fb3
                      *puc = 0xfffd;
Packit 709fb3
                      return -2;
Packit 709fb3
                    }
Packit 709fb3
                }
Packit 709fb3
              /* invalid multibyte character */
Packit 709fb3
            }
Packit 709fb3
          else
Packit 709fb3
            {
Packit 709fb3
              /* incomplete multibyte character */
Packit 709fb3
              *puc = 0xfffd;
Packit 709fb3
              return -2;
Packit 709fb3
            }
Packit 709fb3
        }
Packit 709fb3
      else if (c < 0xf8)
Packit 709fb3
        {
Packit 709fb3
          if (n >= 2)
Packit 709fb3
            {
Packit 709fb3
              if ((s[1] ^ 0x80) < 0x40
Packit 709fb3
                  && (c >= 0xf1 || s[1] >= 0x90)
Packit 709fb3
                  && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)))
Packit 709fb3
                {
Packit 709fb3
                  if (n >= 3)
Packit 709fb3
                    {
Packit 709fb3
                      if ((s[2] ^ 0x80) < 0x40)
Packit 709fb3
                        {
Packit 709fb3
                          if (n >= 4)
Packit 709fb3
                            {
Packit 709fb3
                              if ((s[3] ^ 0x80) < 0x40)
Packit 709fb3
                                {
Packit 709fb3
                                  *puc = ((unsigned int) (c & 0x07) << 18)
Packit 709fb3
                                         | ((unsigned int) (s[1] ^ 0x80) << 12)
Packit 709fb3
                                         | ((unsigned int) (s[2] ^ 0x80) << 6)
Packit 709fb3
                                         | (unsigned int) (s[3] ^ 0x80);
Packit 709fb3
                                  return 4;
Packit 709fb3
                                }
Packit 709fb3
                              /* invalid multibyte character */
Packit 709fb3
                            }
Packit 709fb3
                          else
Packit 709fb3
                            {
Packit 709fb3
                              /* incomplete multibyte character */
Packit 709fb3
                              *puc = 0xfffd;
Packit 709fb3
                              return -2;
Packit 709fb3
                            }
Packit 709fb3
                        }
Packit 709fb3
                      /* invalid multibyte character */
Packit 709fb3
                    }
Packit 709fb3
                  else
Packit 709fb3
                    {
Packit 709fb3
                      /* incomplete multibyte character */
Packit 709fb3
                      *puc = 0xfffd;
Packit 709fb3
                      return -2;
Packit 709fb3
                    }
Packit 709fb3
                }
Packit 709fb3
              /* invalid multibyte character */
Packit 709fb3
            }
Packit 709fb3
          else
Packit 709fb3
            {
Packit 709fb3
              /* incomplete multibyte character */
Packit 709fb3
              *puc = 0xfffd;
Packit 709fb3
              return -2;
Packit 709fb3
            }
Packit 709fb3
        }
Packit 709fb3
    }
Packit 709fb3
  /* invalid multibyte character */
Packit 709fb3
  *puc = 0xfffd;
Packit 709fb3
  return -1;
Packit 709fb3
}