Blame lib/localcharset.c

Packit 33f14e
/* Determine a canonical name for the current locale's character encoding.
Packit 33f14e
Packit 33f14e
   Copyright (C) 2000-2006, 2008-2017 Free Software Foundation, Inc.
Packit 33f14e
Packit 33f14e
   This program is free software; you can redistribute it and/or modify
Packit 33f14e
   it under the terms of the GNU General Public License as published by
Packit 33f14e
   the Free Software Foundation; either version 3, or (at your option)
Packit 33f14e
   any later version.
Packit 33f14e
Packit 33f14e
   This program is distributed in the hope that it will be useful,
Packit 33f14e
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 33f14e
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 33f14e
   GNU General Public License for more details.
Packit 33f14e
Packit 33f14e
   You should have received a copy of the GNU General Public License along
Packit 33f14e
   with this program; if not, see <http://www.gnu.org/licenses/>.  */
Packit 33f14e
Packit 33f14e
/* Written by Bruno Haible <bruno@clisp.org>.  */
Packit 33f14e
Packit 33f14e
#include <config.h>
Packit 33f14e
Packit 33f14e
/* Specification.  */
Packit 33f14e
#include "localcharset.h"
Packit 33f14e
Packit 33f14e
#include <fcntl.h>
Packit 33f14e
#include <stddef.h>
Packit 33f14e
#include <stdio.h>
Packit 33f14e
#include <string.h>
Packit 33f14e
#include <stdlib.h>
Packit 33f14e
Packit 33f14e
#if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
Packit 33f14e
# define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
#if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__
Packit 33f14e
# define WINDOWS_NATIVE
Packit 33f14e
# include <locale.h>
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
#if defined __EMX__
Packit 33f14e
/* Assume EMX program runs on OS/2, even if compiled under DOS.  */
Packit 33f14e
# ifndef OS2
Packit 33f14e
#  define OS2
Packit 33f14e
# endif
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
#if !defined WINDOWS_NATIVE
Packit 33f14e
# include <unistd.h>
Packit 33f14e
# if HAVE_LANGINFO_CODESET
Packit 33f14e
#  include <langinfo.h>
Packit 33f14e
# else
Packit 33f14e
#  if 0 /* see comment below */
Packit 33f14e
#   include <locale.h>
Packit 33f14e
#  endif
Packit 33f14e
# endif
Packit 33f14e
# ifdef __CYGWIN__
Packit 33f14e
#  define WIN32_LEAN_AND_MEAN
Packit 33f14e
#  include <windows.h>
Packit 33f14e
# endif
Packit 33f14e
#elif defined WINDOWS_NATIVE
Packit 33f14e
# define WIN32_LEAN_AND_MEAN
Packit 33f14e
# include <windows.h>
Packit 33f14e
#endif
Packit 33f14e
#if defined OS2
Packit 33f14e
# define INCL_DOS
Packit 33f14e
# include <os2.h>
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
/* For MB_CUR_MAX_L */
Packit 33f14e
#if defined DARWIN7
Packit 33f14e
# include <xlocale.h>
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
#if ENABLE_RELOCATABLE
Packit 33f14e
# include "relocatable.h"
Packit 33f14e
#else
Packit 33f14e
# define relocate(pathname) (pathname)
Packit 33f14e
# define relocate2(pathname,allocatedp) (*(allocatedp) = NULL, (pathname))
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
/* Get LIBDIR.  */
Packit 33f14e
#ifndef LIBDIR
Packit 33f14e
# include "configmake.h"
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
/* Define O_NOFOLLOW to 0 on platforms where it does not exist.  */
Packit 33f14e
#ifndef O_NOFOLLOW
Packit 33f14e
# define O_NOFOLLOW 0
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__
Packit 33f14e
  /* Native Windows, Cygwin, OS/2, DOS */
Packit 33f14e
# define ISSLASH(C) ((C) == '/' || (C) == '\\')
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
#ifndef DIRECTORY_SEPARATOR
Packit 33f14e
# define DIRECTORY_SEPARATOR '/'
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
#ifndef ISSLASH
Packit 33f14e
# define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR)
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
#if HAVE_DECL_GETC_UNLOCKED
Packit 33f14e
# undef getc
Packit 33f14e
# define getc getc_unlocked
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
/* The following static variable is declared 'volatile' to avoid a
Packit 33f14e
   possible multithread problem in the function get_charset_aliases. If we
Packit 33f14e
   are running in a threaded environment, and if two threads initialize
Packit 33f14e
   'charset_aliases' simultaneously, both will produce the same value,
Packit 33f14e
   and everything will be ok if the two assignments to 'charset_aliases'
Packit 33f14e
   are atomic. But I don't know what will happen if the two assignments mix.  */
Packit 33f14e
#if __STDC__ != 1
Packit 33f14e
# define volatile /* empty */
Packit 33f14e
#endif
Packit 33f14e
/* Pointer to the contents of the charset.alias file, if it has already been
Packit 33f14e
   read, else NULL.  Its format is:
Packit 33f14e
   ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0'  */
Packit 33f14e
static const char * volatile charset_aliases;
Packit 33f14e
Packit 33f14e
/* Return a pointer to the contents of the charset.alias file.  */
Packit 33f14e
static const char *
Packit 33f14e
get_charset_aliases (void)
Packit 33f14e
{
Packit 33f14e
  const char *cp;
Packit 33f14e
Packit 33f14e
  cp = charset_aliases;
Packit 33f14e
  if (cp == NULL)
Packit 33f14e
    {
Packit 33f14e
#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__ || defined OS2)
Packit 33f14e
      char *malloc_dir = NULL;
Packit 33f14e
      const char *dir;
Packit 33f14e
      const char *base = "charset.alias";
Packit 33f14e
      char *file_name;
Packit 33f14e
Packit 33f14e
      /* Make it possible to override the charset.alias location.  This is
Packit 33f14e
         necessary for running the testsuite before "make install".  */
Packit 33f14e
      dir = getenv ("CHARSETALIASDIR");
Packit 33f14e
      if (dir == NULL || dir[0] == '\0')
Packit 33f14e
        dir = relocate2 (LIBDIR, &malloc_dir);
Packit 33f14e
Packit 33f14e
      /* Concatenate dir and base into freshly allocated file_name.  */
Packit 33f14e
      {
Packit 33f14e
        size_t dir_len = strlen (dir);
Packit 33f14e
        size_t base_len = strlen (base);
Packit 33f14e
        int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
Packit 33f14e
        file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
Packit 33f14e
        if (file_name != NULL)
Packit 33f14e
          {
Packit 33f14e
            memcpy (file_name, dir, dir_len);
Packit 33f14e
            if (add_slash)
Packit 33f14e
              file_name[dir_len] = DIRECTORY_SEPARATOR;
Packit 33f14e
            memcpy (file_name + dir_len + add_slash, base, base_len + 1);
Packit 33f14e
          }
Packit 33f14e
      }
Packit 33f14e
Packit 33f14e
      free (malloc_dir);
Packit 33f14e
Packit 33f14e
      if (file_name == NULL)
Packit 33f14e
        /* Out of memory.  Treat the file as empty.  */
Packit 33f14e
        cp = "";
Packit 33f14e
      else
Packit 33f14e
        {
Packit 33f14e
          int fd;
Packit 33f14e
Packit 33f14e
          /* Open the file.  Reject symbolic links on platforms that support
Packit 33f14e
             O_NOFOLLOW.  This is a security feature.  Without it, an attacker
Packit 33f14e
             could retrieve parts of the contents (namely, the tail of the
Packit 33f14e
             first line that starts with "* ") of an arbitrary file by placing
Packit 33f14e
             a symbolic link to that file under the name "charset.alias" in
Packit 33f14e
             some writable directory and defining the environment variable
Packit 33f14e
             CHARSETALIASDIR to point to that directory.  */
Packit 33f14e
          fd = open (file_name,
Packit 33f14e
                     O_RDONLY | (HAVE_WORKING_O_NOFOLLOW ? O_NOFOLLOW : 0));
Packit 33f14e
          if (fd < 0)
Packit 33f14e
            /* File not found.  Treat it as empty.  */
Packit 33f14e
            cp = "";
Packit 33f14e
          else
Packit 33f14e
            {
Packit 33f14e
              FILE *fp;
Packit 33f14e
Packit 33f14e
              fp = fdopen (fd, "r");
Packit 33f14e
              if (fp == NULL)
Packit 33f14e
                {
Packit 33f14e
                  /* Out of memory.  Treat the file as empty.  */
Packit 33f14e
                  close (fd);
Packit 33f14e
                  cp = "";
Packit 33f14e
                }
Packit 33f14e
              else
Packit 33f14e
                {
Packit 33f14e
                  /* Parse the file's contents.  */
Packit 33f14e
                  char *res_ptr = NULL;
Packit 33f14e
                  size_t res_size = 0;
Packit 33f14e
Packit 33f14e
                  for (;;)
Packit 33f14e
                    {
Packit 33f14e
                      int c;
Packit 33f14e
                      char buf1[50+1];
Packit 33f14e
                      char buf2[50+1];
Packit 33f14e
                      size_t l1, l2;
Packit 33f14e
                      char *old_res_ptr;
Packit 33f14e
Packit 33f14e
                      c = getc (fp);
Packit 33f14e
                      if (c == EOF)
Packit 33f14e
                        break;
Packit 33f14e
                      if (c == '\n' || c == ' ' || c == '\t')
Packit 33f14e
                        continue;
Packit 33f14e
                      if (c == '#')
Packit 33f14e
                        {
Packit 33f14e
                          /* Skip comment, to end of line.  */
Packit 33f14e
                          do
Packit 33f14e
                            c = getc (fp);
Packit 33f14e
                          while (!(c == EOF || c == '\n'));
Packit 33f14e
                          if (c == EOF)
Packit 33f14e
                            break;
Packit 33f14e
                          continue;
Packit 33f14e
                        }
Packit 33f14e
                      ungetc (c, fp);
Packit 33f14e
                      if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)
Packit 33f14e
                        break;
Packit 33f14e
                      l1 = strlen (buf1);
Packit 33f14e
                      l2 = strlen (buf2);
Packit 33f14e
                      old_res_ptr = res_ptr;
Packit 33f14e
                      if (res_size == 0)
Packit 33f14e
                        {
Packit 33f14e
                          res_size = l1 + 1 + l2 + 1;
Packit 33f14e
                          res_ptr = (char *) malloc (res_size + 1);
Packit 33f14e
                        }
Packit 33f14e
                      else
Packit 33f14e
                        {
Packit 33f14e
                          res_size += l1 + 1 + l2 + 1;
Packit 33f14e
                          res_ptr = (char *) realloc (res_ptr, res_size + 1);
Packit 33f14e
                        }
Packit 33f14e
                      if (res_ptr == NULL)
Packit 33f14e
                        {
Packit 33f14e
                          /* Out of memory. */
Packit 33f14e
                          res_size = 0;
Packit 33f14e
                          free (old_res_ptr);
Packit 33f14e
                          break;
Packit 33f14e
                        }
Packit 33f14e
                      strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
Packit 33f14e
                      strcpy (res_ptr + res_size - (l2 + 1), buf2);
Packit 33f14e
                    }
Packit 33f14e
                  fclose (fp);
Packit 33f14e
                  if (res_size == 0)
Packit 33f14e
                    cp = "";
Packit 33f14e
                  else
Packit 33f14e
                    {
Packit 33f14e
                      *(res_ptr + res_size) = '\0';
Packit 33f14e
                      cp = res_ptr;
Packit 33f14e
                    }
Packit 33f14e
                }
Packit 33f14e
            }
Packit 33f14e
Packit 33f14e
          free (file_name);
Packit 33f14e
        }
Packit 33f14e
Packit 33f14e
#else
Packit 33f14e
Packit 33f14e
# if defined DARWIN7
Packit 33f14e
      /* To avoid the trouble of installing a file that is shared by many
Packit 33f14e
         GNU packages -- many packaging systems have problems with this --,
Packit 33f14e
         simply inline the aliases here.  */
Packit 33f14e
      cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
Packit 33f14e
           "ISO8859-2" "\0" "ISO-8859-2" "\0"
Packit 33f14e
           "ISO8859-4" "\0" "ISO-8859-4" "\0"
Packit 33f14e
           "ISO8859-5" "\0" "ISO-8859-5" "\0"
Packit 33f14e
           "ISO8859-7" "\0" "ISO-8859-7" "\0"
Packit 33f14e
           "ISO8859-9" "\0" "ISO-8859-9" "\0"
Packit 33f14e
           "ISO8859-13" "\0" "ISO-8859-13" "\0"
Packit 33f14e
           "ISO8859-15" "\0" "ISO-8859-15" "\0"
Packit 33f14e
           "KOI8-R" "\0" "KOI8-R" "\0"
Packit 33f14e
           "KOI8-U" "\0" "KOI8-U" "\0"
Packit 33f14e
           "CP866" "\0" "CP866" "\0"
Packit 33f14e
           "CP949" "\0" "CP949" "\0"
Packit 33f14e
           "CP1131" "\0" "CP1131" "\0"
Packit 33f14e
           "CP1251" "\0" "CP1251" "\0"
Packit 33f14e
           "eucCN" "\0" "GB2312" "\0"
Packit 33f14e
           "GB2312" "\0" "GB2312" "\0"
Packit 33f14e
           "eucJP" "\0" "EUC-JP" "\0"
Packit 33f14e
           "eucKR" "\0" "EUC-KR" "\0"
Packit 33f14e
           "Big5" "\0" "BIG5" "\0"
Packit 33f14e
           "Big5HKSCS" "\0" "BIG5-HKSCS" "\0"
Packit 33f14e
           "GBK" "\0" "GBK" "\0"
Packit 33f14e
           "GB18030" "\0" "GB18030" "\0"
Packit 33f14e
           "SJIS" "\0" "SHIFT_JIS" "\0"
Packit 33f14e
           "ARMSCII-8" "\0" "ARMSCII-8" "\0"
Packit 33f14e
           "PT154" "\0" "PT154" "\0"
Packit 33f14e
         /*"ISCII-DEV" "\0" "?" "\0"*/
Packit 33f14e
           "*" "\0" "UTF-8" "\0";
Packit 33f14e
# endif
Packit 33f14e
Packit 33f14e
# if defined VMS
Packit 33f14e
      /* To avoid the troubles of an extra file charset.alias_vms in the
Packit 33f14e
         sources of many GNU packages, simply inline the aliases here.  */
Packit 33f14e
      /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
Packit 33f14e
         "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
Packit 33f14e
         section 10.7 "Handling Different Character Sets".  */
Packit 33f14e
      cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
Packit 33f14e
           "ISO8859-2" "\0" "ISO-8859-2" "\0"
Packit 33f14e
           "ISO8859-5" "\0" "ISO-8859-5" "\0"
Packit 33f14e
           "ISO8859-7" "\0" "ISO-8859-7" "\0"
Packit 33f14e
           "ISO8859-8" "\0" "ISO-8859-8" "\0"
Packit 33f14e
           "ISO8859-9" "\0" "ISO-8859-9" "\0"
Packit 33f14e
           /* Japanese */
Packit 33f14e
           "eucJP" "\0" "EUC-JP" "\0"
Packit 33f14e
           "SJIS" "\0" "SHIFT_JIS" "\0"
Packit 33f14e
           "DECKANJI" "\0" "DEC-KANJI" "\0"
Packit 33f14e
           "SDECKANJI" "\0" "EUC-JP" "\0"
Packit 33f14e
           /* Chinese */
Packit 33f14e
           "eucTW" "\0" "EUC-TW" "\0"
Packit 33f14e
           "DECHANYU" "\0" "DEC-HANYU" "\0"
Packit 33f14e
           "DECHANZI" "\0" "GB2312" "\0"
Packit 33f14e
           /* Korean */
Packit 33f14e
           "DECKOREAN" "\0" "EUC-KR" "\0";
Packit 33f14e
# endif
Packit 33f14e
Packit 33f14e
# if defined WINDOWS_NATIVE || defined __CYGWIN__
Packit 33f14e
      /* To avoid the troubles of installing a separate file in the same
Packit 33f14e
         directory as the DLL and of retrieving the DLL's directory at
Packit 33f14e
         runtime, simply inline the aliases here.  */
Packit 33f14e
Packit 33f14e
      cp = "CP936" "\0" "GBK" "\0"
Packit 33f14e
           "CP1361" "\0" "JOHAB" "\0"
Packit 33f14e
           "CP20127" "\0" "ASCII" "\0"
Packit 33f14e
           "CP20866" "\0" "KOI8-R" "\0"
Packit 33f14e
           "CP20936" "\0" "GB2312" "\0"
Packit 33f14e
           "CP21866" "\0" "KOI8-RU" "\0"
Packit 33f14e
           "CP28591" "\0" "ISO-8859-1" "\0"
Packit 33f14e
           "CP28592" "\0" "ISO-8859-2" "\0"
Packit 33f14e
           "CP28593" "\0" "ISO-8859-3" "\0"
Packit 33f14e
           "CP28594" "\0" "ISO-8859-4" "\0"
Packit 33f14e
           "CP28595" "\0" "ISO-8859-5" "\0"
Packit 33f14e
           "CP28596" "\0" "ISO-8859-6" "\0"
Packit 33f14e
           "CP28597" "\0" "ISO-8859-7" "\0"
Packit 33f14e
           "CP28598" "\0" "ISO-8859-8" "\0"
Packit 33f14e
           "CP28599" "\0" "ISO-8859-9" "\0"
Packit 33f14e
           "CP28605" "\0" "ISO-8859-15" "\0"
Packit 33f14e
           "CP38598" "\0" "ISO-8859-8" "\0"
Packit 33f14e
           "CP51932" "\0" "EUC-JP" "\0"
Packit 33f14e
           "CP51936" "\0" "GB2312" "\0"
Packit 33f14e
           "CP51949" "\0" "EUC-KR" "\0"
Packit 33f14e
           "CP51950" "\0" "EUC-TW" "\0"
Packit 33f14e
           "CP54936" "\0" "GB18030" "\0"
Packit 33f14e
           "CP65001" "\0" "UTF-8" "\0";
Packit 33f14e
# endif
Packit 33f14e
# if defined OS2
Packit 33f14e
      /* To avoid the troubles of installing a separate file in the same
Packit 33f14e
         directory as the DLL and of retrieving the DLL's directory at
Packit 33f14e
         runtime, simply inline the aliases here.  */
Packit 33f14e
Packit 33f14e
      /* The list of encodings is taken from "List of OS/2 Codepages"
Packit 33f14e
         by Alex Taylor:
Packit 33f14e
         <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
Packit 33f14e
         See also "IBM Globalization - Code page identifiers":
Packit 33f14e
         <http://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>.  */
Packit 33f14e
      cp = "CP813" "\0" "ISO-8859-7" "\0"
Packit 33f14e
           "CP878" "\0" "KOI8-R" "\0"
Packit 33f14e
           "CP819" "\0" "ISO-8859-1" "\0"
Packit 33f14e
           "CP912" "\0" "ISO-8859-2" "\0"
Packit 33f14e
           "CP913" "\0" "ISO-8859-3" "\0"
Packit 33f14e
           "CP914" "\0" "ISO-8859-4" "\0"
Packit 33f14e
           "CP915" "\0" "ISO-8859-5" "\0"
Packit 33f14e
           "CP916" "\0" "ISO-8859-8" "\0"
Packit 33f14e
           "CP920" "\0" "ISO-8859-9" "\0"
Packit 33f14e
           "CP921" "\0" "ISO-8859-13" "\0"
Packit 33f14e
           "CP923" "\0" "ISO-8859-15" "\0"
Packit 33f14e
           "CP954" "\0" "EUC-JP" "\0"
Packit 33f14e
           "CP964" "\0" "EUC-TW" "\0"
Packit 33f14e
           "CP970" "\0" "EUC-KR" "\0"
Packit 33f14e
           "CP1089" "\0" "ISO-8859-6" "\0"
Packit 33f14e
           "CP1208" "\0" "UTF-8" "\0"
Packit 33f14e
           "CP1381" "\0" "GB2312" "\0"
Packit 33f14e
           "CP1386" "\0" "GBK" "\0"
Packit 33f14e
           "CP3372" "\0" "EUC-JP" "\0";
Packit 33f14e
# endif
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
      charset_aliases = cp;
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  return cp;
Packit 33f14e
}
Packit 33f14e
Packit 33f14e
/* Determine the current locale's character encoding, and canonicalize it
Packit 33f14e
   into one of the canonical names listed in config.charset.
Packit 33f14e
   The result must not be freed; it is statically allocated.
Packit 33f14e
   If the canonical name cannot be determined, the result is a non-canonical
Packit 33f14e
   name.  */
Packit 33f14e
Packit 33f14e
#ifdef STATIC
Packit 33f14e
STATIC
Packit 33f14e
#endif
Packit 33f14e
const char *
Packit 33f14e
locale_charset (void)
Packit 33f14e
{
Packit 33f14e
  const char *codeset;
Packit 33f14e
  const char *aliases;
Packit 33f14e
Packit 33f14e
#if !(defined WINDOWS_NATIVE || defined OS2)
Packit 33f14e
Packit 33f14e
# if HAVE_LANGINFO_CODESET
Packit 33f14e
Packit 33f14e
  /* Most systems support nl_langinfo (CODESET) nowadays.  */
Packit 33f14e
  codeset = nl_langinfo (CODESET);
Packit 33f14e
Packit 33f14e
#  ifdef __CYGWIN__
Packit 33f14e
  /* Cygwin < 1.7 does not have locales.  nl_langinfo (CODESET) always
Packit 33f14e
     returns "US-ASCII".  Return the suffix of the locale name from the
Packit 33f14e
     environment variables (if present) or the codepage as a number.  */
Packit 33f14e
  if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
Packit 33f14e
    {
Packit 33f14e
      const char *locale;
Packit 33f14e
      static char buf[2 + 10 + 1];
Packit 33f14e
Packit 33f14e
      locale = getenv ("LC_ALL");
Packit 33f14e
      if (locale == NULL || locale[0] == '\0')
Packit 33f14e
        {
Packit 33f14e
          locale = getenv ("LC_CTYPE");
Packit 33f14e
          if (locale == NULL || locale[0] == '\0')
Packit 33f14e
            locale = getenv ("LANG");
Packit 33f14e
        }
Packit 33f14e
      if (locale != NULL && locale[0] != '\0')
Packit 33f14e
        {
Packit 33f14e
          /* If the locale name contains an encoding after the dot, return
Packit 33f14e
             it.  */
Packit 33f14e
          const char *dot = strchr (locale, '.');
Packit 33f14e
Packit 33f14e
          if (dot != NULL)
Packit 33f14e
            {
Packit 33f14e
              const char *modifier;
Packit 33f14e
Packit 33f14e
              dot++;
Packit 33f14e
              /* Look for the possible @... trailer and remove it, if any.  */
Packit 33f14e
              modifier = strchr (dot, '@');
Packit 33f14e
              if (modifier == NULL)
Packit 33f14e
                return dot;
Packit 33f14e
              if (modifier - dot < sizeof (buf))
Packit 33f14e
                {
Packit 33f14e
                  memcpy (buf, dot, modifier - dot);
Packit 33f14e
                  buf [modifier - dot] = '\0';
Packit 33f14e
                  return buf;
Packit 33f14e
                }
Packit 33f14e
            }
Packit 33f14e
        }
Packit 33f14e
Packit 33f14e
      /* The Windows API has a function returning the locale's codepage as a
Packit 33f14e
         number: GetACP().  This encoding is used by Cygwin, unless the user
Packit 33f14e
         has set the environment variable CYGWIN=codepage:oem (which very few
Packit 33f14e
         people do).
Packit 33f14e
         Output directed to console windows needs to be converted (to
Packit 33f14e
         GetOEMCP() if the console is using a raster font, or to
Packit 33f14e
         GetConsoleOutputCP() if it is using a TrueType font).  Cygwin does
Packit 33f14e
         this conversion transparently (see winsup/cygwin/fhandler_console.cc),
Packit 33f14e
         converting to GetConsoleOutputCP().  This leads to correct results,
Packit 33f14e
         except when SetConsoleOutputCP has been called and a raster font is
Packit 33f14e
         in use.  */
Packit 33f14e
      sprintf (buf, "CP%u", GetACP ());
Packit 33f14e
      codeset = buf;
Packit 33f14e
    }
Packit 33f14e
#  endif
Packit 33f14e
Packit 33f14e
# else
Packit 33f14e
Packit 33f14e
  /* On old systems which lack it, use setlocale or getenv.  */
Packit 33f14e
  const char *locale = NULL;
Packit 33f14e
Packit 33f14e
  /* But most old systems don't have a complete set of locales.  Some
Packit 33f14e
     (like SunOS 4 or DJGPP) have only the C locale.  Therefore we don't
Packit 33f14e
     use setlocale here; it would return "C" when it doesn't support the
Packit 33f14e
     locale name the user has set.  */
Packit 33f14e
#  if 0
Packit 33f14e
  locale = setlocale (LC_CTYPE, NULL);
Packit 33f14e
#  endif
Packit 33f14e
  if (locale == NULL || locale[0] == '\0')
Packit 33f14e
    {
Packit 33f14e
      locale = getenv ("LC_ALL");
Packit 33f14e
      if (locale == NULL || locale[0] == '\0')
Packit 33f14e
        {
Packit 33f14e
          locale = getenv ("LC_CTYPE");
Packit 33f14e
          if (locale == NULL || locale[0] == '\0')
Packit 33f14e
            locale = getenv ("LANG");
Packit 33f14e
        }
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  /* On some old systems, one used to set locale = "iso8859_1". On others,
Packit 33f14e
     you set it to "language_COUNTRY.charset". In any case, we resolve it
Packit 33f14e
     through the charset.alias file.  */
Packit 33f14e
  codeset = locale;
Packit 33f14e
Packit 33f14e
# endif
Packit 33f14e
Packit 33f14e
#elif defined WINDOWS_NATIVE
Packit 33f14e
Packit 33f14e
  static char buf[2 + 10 + 1];
Packit 33f14e
Packit 33f14e
  /* The Windows API has a function returning the locale's codepage as
Packit 33f14e
     a number, but the value doesn't change according to what the
Packit 33f14e
     'setlocale' call specified.  So we use it as a last resort, in
Packit 33f14e
     case the string returned by 'setlocale' doesn't specify the
Packit 33f14e
     codepage.  */
Packit 33f14e
  char *current_locale = setlocale (LC_ALL, NULL);
Packit 33f14e
  char *pdot;
Packit 33f14e
Packit 33f14e
  /* If they set different locales for different categories,
Packit 33f14e
     'setlocale' will return a semi-colon separated list of locale
Packit 33f14e
     values.  To make sure we use the correct one, we choose LC_CTYPE.  */
Packit 33f14e
  if (strchr (current_locale, ';'))
Packit 33f14e
    current_locale = setlocale (LC_CTYPE, NULL);
Packit 33f14e
Packit 33f14e
  pdot = strrchr (current_locale, '.');
Packit 33f14e
  if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf))
Packit 33f14e
    sprintf (buf, "CP%s", pdot + 1);
Packit 33f14e
  else
Packit 33f14e
    {
Packit 33f14e
      /* The Windows API has a function returning the locale's codepage as a
Packit 33f14e
        number: GetACP().
Packit 33f14e
        When the output goes to a console window, it needs to be provided in
Packit 33f14e
        GetOEMCP() encoding if the console is using a raster font, or in
Packit 33f14e
        GetConsoleOutputCP() encoding if it is using a TrueType font.
Packit 33f14e
        But in GUI programs and for output sent to files and pipes, GetACP()
Packit 33f14e
        encoding is the best bet.  */
Packit 33f14e
      sprintf (buf, "CP%u", GetACP ());
Packit 33f14e
    }
Packit 33f14e
  codeset = buf;
Packit 33f14e
Packit 33f14e
#elif defined OS2
Packit 33f14e
Packit 33f14e
  const char *locale;
Packit 33f14e
  static char buf[2 + 10 + 1];
Packit 33f14e
  ULONG cp[3];
Packit 33f14e
  ULONG cplen;
Packit 33f14e
Packit 33f14e
  codeset = NULL;
Packit 33f14e
Packit 33f14e
  /* Allow user to override the codeset, as set in the operating system,
Packit 33f14e
     with standard language environment variables.  */
Packit 33f14e
  locale = getenv ("LC_ALL");
Packit 33f14e
  if (locale == NULL || locale[0] == '\0')
Packit 33f14e
    {
Packit 33f14e
      locale = getenv ("LC_CTYPE");
Packit 33f14e
      if (locale == NULL || locale[0] == '\0')
Packit 33f14e
        locale = getenv ("LANG");
Packit 33f14e
    }
Packit 33f14e
  if (locale != NULL && locale[0] != '\0')
Packit 33f14e
    {
Packit 33f14e
      /* If the locale name contains an encoding after the dot, return it.  */
Packit 33f14e
      const char *dot = strchr (locale, '.');
Packit 33f14e
Packit 33f14e
      if (dot != NULL)
Packit 33f14e
        {
Packit 33f14e
          const char *modifier;
Packit 33f14e
Packit 33f14e
          dot++;
Packit 33f14e
          /* Look for the possible @... trailer and remove it, if any.  */
Packit 33f14e
          modifier = strchr (dot, '@');
Packit 33f14e
          if (modifier == NULL)
Packit 33f14e
            return dot;
Packit 33f14e
          if (modifier - dot < sizeof (buf))
Packit 33f14e
            {
Packit 33f14e
              memcpy (buf, dot, modifier - dot);
Packit 33f14e
              buf [modifier - dot] = '\0';
Packit 33f14e
              return buf;
Packit 33f14e
            }
Packit 33f14e
        }
Packit 33f14e
Packit 33f14e
      /* For the POSIX locale, don't use the system's codepage.  */
Packit 33f14e
      if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
Packit 33f14e
        codeset = "";
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
  if (codeset == NULL)
Packit 33f14e
    {
Packit 33f14e
      /* OS/2 has a function returning the locale's codepage as a number.  */
Packit 33f14e
      if (DosQueryCp (sizeof (cp), cp, &cplen))
Packit 33f14e
        codeset = "";
Packit 33f14e
      else
Packit 33f14e
        {
Packit 33f14e
          sprintf (buf, "CP%u", cp[0]);
Packit 33f14e
          codeset = buf;
Packit 33f14e
        }
Packit 33f14e
    }
Packit 33f14e
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
  if (codeset == NULL)
Packit 33f14e
    /* The canonical name cannot be determined.  */
Packit 33f14e
    codeset = "";
Packit 33f14e
Packit 33f14e
  /* Resolve alias. */
Packit 33f14e
  for (aliases = get_charset_aliases ();
Packit 33f14e
       *aliases != '\0';
Packit 33f14e
       aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
Packit 33f14e
    if (strcmp (codeset, aliases) == 0
Packit 33f14e
        || (aliases[0] == '*' && aliases[1] == '\0'))
Packit 33f14e
      {
Packit 33f14e
        codeset = aliases + strlen (aliases) + 1;
Packit 33f14e
        break;
Packit 33f14e
      }
Packit 33f14e
Packit 33f14e
  /* Don't return an empty string.  GNU libc and GNU libiconv interpret
Packit 33f14e
     the empty string as denoting "the locale's character encoding",
Packit 33f14e
     thus GNU libiconv would call this function a second time.  */
Packit 33f14e
  if (codeset[0] == '\0')
Packit 33f14e
    codeset = "ASCII";
Packit 33f14e
Packit 33f14e
#ifdef DARWIN7
Packit 33f14e
  /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
Packit 33f14e
     (the default codeset) does not work when MB_CUR_MAX is 1.  */
Packit 33f14e
  if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
Packit 33f14e
    codeset = "ASCII";
Packit 33f14e
#endif
Packit 33f14e
Packit 33f14e
  return codeset;
Packit 33f14e
}