Blame lib/unistr/u8-mbtoucr.c

Packit Service a2489d
/* Look at first character in UTF-8 string, returning an error code.
Packit Service a2489d
   Copyright (C) 1999-2002, 2006-2007, 2009-2018 Free Software Foundation, Inc.
Packit Service a2489d
   Written by Bruno Haible <bruno@clisp.org>, 2001.
Packit Service a2489d
Packit Service a2489d
   This program is free software: you can redistribute it and/or modify it
Packit Service a2489d
   under the terms of the GNU General Public License as published
Packit Service a2489d
   by the Free Software Foundation; either version 3 of the License, or
Packit Service a2489d
   (at your option) any later version.
Packit Service a2489d
Packit Service a2489d
   This program is distributed in the hope that it will be useful,
Packit Service a2489d
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service a2489d
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service a2489d
   General Public License for more details.
Packit Service a2489d
Packit Service a2489d
   You should have received a copy of the GNU General Public License
Packit Service a2489d
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
Packit Service a2489d
Packit Service a2489d
#include <config.h>
Packit Service a2489d
Packit Service a2489d
/* Specification.  */
Packit Service a2489d
#include "unistr.h"
Packit Service a2489d
Packit Service a2489d
int
Packit Service a2489d
u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n)
Packit Service a2489d
{
Packit Service a2489d
  uint8_t c = *s;
Packit Service a2489d
Packit Service a2489d
  if (c < 0x80)
Packit Service a2489d
    {
Packit Service a2489d
      *puc = c;
Packit Service a2489d
      return 1;
Packit Service a2489d
    }
Packit Service a2489d
  else if (c >= 0xc2)
Packit Service a2489d
    {
Packit Service a2489d
      if (c < 0xe0)
Packit Service a2489d
        {
Packit Service a2489d
          if (n >= 2)
Packit Service a2489d
            {
Packit Service a2489d
              if ((s[1] ^ 0x80) < 0x40)
Packit Service a2489d
                {
Packit Service a2489d
                  *puc = ((unsigned int) (c & 0x1f) << 6)
Packit Service a2489d
                         | (unsigned int) (s[1] ^ 0x80);
Packit Service a2489d
                  return 2;
Packit Service a2489d
                }
Packit Service a2489d
              /* invalid multibyte character */
Packit Service a2489d
            }
Packit Service a2489d
          else
Packit Service a2489d
            {
Packit Service a2489d
              /* incomplete multibyte character */
Packit Service a2489d
              *puc = 0xfffd;
Packit Service a2489d
              return -2;
Packit Service a2489d
            }
Packit Service a2489d
        }
Packit Service a2489d
      else if (c < 0xf0)
Packit Service a2489d
        {
Packit Service a2489d
          if (n >= 2)
Packit Service a2489d
            {
Packit Service a2489d
              if ((s[1] ^ 0x80) < 0x40
Packit Service a2489d
                  && (c >= 0xe1 || s[1] >= 0xa0)
Packit Service a2489d
                  && (c != 0xed || s[1] < 0xa0))
Packit Service a2489d
                {
Packit Service a2489d
                  if (n >= 3)
Packit Service a2489d
                    {
Packit Service a2489d
                      if ((s[2] ^ 0x80) < 0x40)
Packit Service a2489d
                        {
Packit Service a2489d
                          *puc = ((unsigned int) (c & 0x0f) << 12)
Packit Service a2489d
                                 | ((unsigned int) (s[1] ^ 0x80) << 6)
Packit Service a2489d
                                 | (unsigned int) (s[2] ^ 0x80);
Packit Service a2489d
                          return 3;
Packit Service a2489d
                        }
Packit Service a2489d
                      /* invalid multibyte character */
Packit Service a2489d
                    }
Packit Service a2489d
                  else
Packit Service a2489d
                    {
Packit Service a2489d
                      /* incomplete multibyte character */
Packit Service a2489d
                      *puc = 0xfffd;
Packit Service a2489d
                      return -2;
Packit Service a2489d
                    }
Packit Service a2489d
                }
Packit Service a2489d
              /* invalid multibyte character */
Packit Service a2489d
            }
Packit Service a2489d
          else
Packit Service a2489d
            {
Packit Service a2489d
              /* incomplete multibyte character */
Packit Service a2489d
              *puc = 0xfffd;
Packit Service a2489d
              return -2;
Packit Service a2489d
            }
Packit Service a2489d
        }
Packit Service a2489d
      else if (c < 0xf8)
Packit Service a2489d
        {
Packit Service a2489d
          if (n >= 2)
Packit Service a2489d
            {
Packit Service a2489d
              if ((s[1] ^ 0x80) < 0x40
Packit Service a2489d
                  && (c >= 0xf1 || s[1] >= 0x90)
Packit Service a2489d
                  && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)))
Packit Service a2489d
                {
Packit Service a2489d
                  if (n >= 3)
Packit Service a2489d
                    {
Packit Service a2489d
                      if ((s[2] ^ 0x80) < 0x40)
Packit Service a2489d
                        {
Packit Service a2489d
                          if (n >= 4)
Packit Service a2489d
                            {
Packit Service a2489d
                              if ((s[3] ^ 0x80) < 0x40)
Packit Service a2489d
                                {
Packit Service a2489d
                                  *puc = ((unsigned int) (c & 0x07) << 18)
Packit Service a2489d
                                         | ((unsigned int) (s[1] ^ 0x80) << 12)
Packit Service a2489d
                                         | ((unsigned int) (s[2] ^ 0x80) << 6)
Packit Service a2489d
                                         | (unsigned int) (s[3] ^ 0x80);
Packit Service a2489d
                                  return 4;
Packit Service a2489d
                                }
Packit Service a2489d
                              /* invalid multibyte character */
Packit Service a2489d
                            }
Packit Service a2489d
                          else
Packit Service a2489d
                            {
Packit Service a2489d
                              /* incomplete multibyte character */
Packit Service a2489d
                              *puc = 0xfffd;
Packit Service a2489d
                              return -2;
Packit Service a2489d
                            }
Packit Service a2489d
                        }
Packit Service a2489d
                      /* invalid multibyte character */
Packit Service a2489d
                    }
Packit Service a2489d
                  else
Packit Service a2489d
                    {
Packit Service a2489d
                      /* incomplete multibyte character */
Packit Service a2489d
                      *puc = 0xfffd;
Packit Service a2489d
                      return -2;
Packit Service a2489d
                    }
Packit Service a2489d
                }
Packit Service a2489d
              /* invalid multibyte character */
Packit Service a2489d
            }
Packit Service a2489d
          else
Packit Service a2489d
            {
Packit Service a2489d
              /* incomplete multibyte character */
Packit Service a2489d
              *puc = 0xfffd;
Packit Service a2489d
              return -2;
Packit Service a2489d
            }
Packit Service a2489d
        }
Packit Service a2489d
    }
Packit Service a2489d
  /* invalid multibyte character */
Packit Service a2489d
  *puc = 0xfffd;
Packit Service a2489d
  return -1;
Packit Service a2489d
}