Blame lib/unistr/u8-mbtoucr.c

Packit 8f70b4
/* Look at first character in UTF-8 string, returning an error code.
Packit 8f70b4
   Copyright (C) 1999-2002, 2006-2007, 2009-2018 Free Software Foundation, Inc.
Packit 8f70b4
   Written by Bruno Haible <bruno@clisp.org>, 2001.
Packit 8f70b4
Packit 8f70b4
   This program is free software: you can redistribute it and/or modify it
Packit 8f70b4
   under the terms of the GNU General Public License as published
Packit 8f70b4
   by the Free Software Foundation; either version 3 of the License, or
Packit 8f70b4
   (at your option) any later version.
Packit 8f70b4
Packit 8f70b4
   This program is distributed in the hope that it will be useful,
Packit 8f70b4
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 8f70b4
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 8f70b4
   General Public License for more details.
Packit 8f70b4
Packit 8f70b4
   You should have received a copy of the GNU General Public License
Packit 8f70b4
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
Packit 8f70b4
Packit 8f70b4
#include <config.h>
Packit 8f70b4
Packit 8f70b4
/* Specification.  */
Packit 8f70b4
#include "unistr.h"
Packit 8f70b4
Packit 8f70b4
int
Packit 8f70b4
u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n)
Packit 8f70b4
{
Packit 8f70b4
  uint8_t c = *s;
Packit 8f70b4
Packit 8f70b4
  if (c < 0x80)
Packit 8f70b4
    {
Packit 8f70b4
      *puc = c;
Packit 8f70b4
      return 1;
Packit 8f70b4
    }
Packit 8f70b4
  else if (c >= 0xc2)
Packit 8f70b4
    {
Packit 8f70b4
      if (c < 0xe0)
Packit 8f70b4
        {
Packit 8f70b4
          if (n >= 2)
Packit 8f70b4
            {
Packit 8f70b4
              if ((s[1] ^ 0x80) < 0x40)
Packit 8f70b4
                {
Packit 8f70b4
                  *puc = ((unsigned int) (c & 0x1f) << 6)
Packit 8f70b4
                         | (unsigned int) (s[1] ^ 0x80);
Packit 8f70b4
                  return 2;
Packit 8f70b4
                }
Packit 8f70b4
              /* invalid multibyte character */
Packit 8f70b4
            }
Packit 8f70b4
          else
Packit 8f70b4
            {
Packit 8f70b4
              /* incomplete multibyte character */
Packit 8f70b4
              *puc = 0xfffd;
Packit 8f70b4
              return -2;
Packit 8f70b4
            }
Packit 8f70b4
        }
Packit 8f70b4
      else if (c < 0xf0)
Packit 8f70b4
        {
Packit 8f70b4
          if (n >= 2)
Packit 8f70b4
            {
Packit 8f70b4
              if ((s[1] ^ 0x80) < 0x40
Packit 8f70b4
                  && (c >= 0xe1 || s[1] >= 0xa0)
Packit 8f70b4
                  && (c != 0xed || s[1] < 0xa0))
Packit 8f70b4
                {
Packit 8f70b4
                  if (n >= 3)
Packit 8f70b4
                    {
Packit 8f70b4
                      if ((s[2] ^ 0x80) < 0x40)
Packit 8f70b4
                        {
Packit 8f70b4
                          *puc = ((unsigned int) (c & 0x0f) << 12)
Packit 8f70b4
                                 | ((unsigned int) (s[1] ^ 0x80) << 6)
Packit 8f70b4
                                 | (unsigned int) (s[2] ^ 0x80);
Packit 8f70b4
                          return 3;
Packit 8f70b4
                        }
Packit 8f70b4
                      /* invalid multibyte character */
Packit 8f70b4
                    }
Packit 8f70b4
                  else
Packit 8f70b4
                    {
Packit 8f70b4
                      /* incomplete multibyte character */
Packit 8f70b4
                      *puc = 0xfffd;
Packit 8f70b4
                      return -2;
Packit 8f70b4
                    }
Packit 8f70b4
                }
Packit 8f70b4
              /* invalid multibyte character */
Packit 8f70b4
            }
Packit 8f70b4
          else
Packit 8f70b4
            {
Packit 8f70b4
              /* incomplete multibyte character */
Packit 8f70b4
              *puc = 0xfffd;
Packit 8f70b4
              return -2;
Packit 8f70b4
            }
Packit 8f70b4
        }
Packit 8f70b4
      else if (c < 0xf8)
Packit 8f70b4
        {
Packit 8f70b4
          if (n >= 2)
Packit 8f70b4
            {
Packit 8f70b4
              if ((s[1] ^ 0x80) < 0x40
Packit 8f70b4
                  && (c >= 0xf1 || s[1] >= 0x90)
Packit 8f70b4
                  && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)))
Packit 8f70b4
                {
Packit 8f70b4
                  if (n >= 3)
Packit 8f70b4
                    {
Packit 8f70b4
                      if ((s[2] ^ 0x80) < 0x40)
Packit 8f70b4
                        {
Packit 8f70b4
                          if (n >= 4)
Packit 8f70b4
                            {
Packit 8f70b4
                              if ((s[3] ^ 0x80) < 0x40)
Packit 8f70b4
                                {
Packit 8f70b4
                                  *puc = ((unsigned int) (c & 0x07) << 18)
Packit 8f70b4
                                         | ((unsigned int) (s[1] ^ 0x80) << 12)
Packit 8f70b4
                                         | ((unsigned int) (s[2] ^ 0x80) << 6)
Packit 8f70b4
                                         | (unsigned int) (s[3] ^ 0x80);
Packit 8f70b4
                                  return 4;
Packit 8f70b4
                                }
Packit 8f70b4
                              /* invalid multibyte character */
Packit 8f70b4
                            }
Packit 8f70b4
                          else
Packit 8f70b4
                            {
Packit 8f70b4
                              /* incomplete multibyte character */
Packit 8f70b4
                              *puc = 0xfffd;
Packit 8f70b4
                              return -2;
Packit 8f70b4
                            }
Packit 8f70b4
                        }
Packit 8f70b4
                      /* invalid multibyte character */
Packit 8f70b4
                    }
Packit 8f70b4
                  else
Packit 8f70b4
                    {
Packit 8f70b4
                      /* incomplete multibyte character */
Packit 8f70b4
                      *puc = 0xfffd;
Packit 8f70b4
                      return -2;
Packit 8f70b4
                    }
Packit 8f70b4
                }
Packit 8f70b4
              /* invalid multibyte character */
Packit 8f70b4
            }
Packit 8f70b4
          else
Packit 8f70b4
            {
Packit 8f70b4
              /* incomplete multibyte character */
Packit 8f70b4
              *puc = 0xfffd;
Packit 8f70b4
              return -2;
Packit 8f70b4
            }
Packit 8f70b4
        }
Packit 8f70b4
    }
Packit 8f70b4
  /* invalid multibyte character */
Packit 8f70b4
  *puc = 0xfffd;
Packit 8f70b4
  return -1;
Packit 8f70b4
}