Blame gettext-runtime/gnulib-lib/localcharset.c

Packit 5b56b6
/* Determine a canonical name for the current locale's character encoding.
Packit 5b56b6
Packit 5b56b6
   Copyright (C) 2000-2006, 2008-2015 Free Software Foundation, Inc.
Packit 5b56b6
Packit 5b56b6
   This program is free software; you can redistribute it and/or modify
Packit 5b56b6
   it under the terms of the GNU General Public License as published by
Packit 5b56b6
   the Free Software Foundation; either version 3, or (at your option)
Packit 5b56b6
   any later version.
Packit 5b56b6
Packit 5b56b6
   This program is distributed in the hope that it will be useful,
Packit 5b56b6
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 5b56b6
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 5b56b6
   GNU General Public License for more details.
Packit 5b56b6
Packit 5b56b6
   You should have received a copy of the GNU General Public License along
Packit 5b56b6
   with this program; if not, see <http://www.gnu.org/licenses/>.  */
Packit 5b56b6
Packit 5b56b6
/* Written by Bruno Haible <bruno@clisp.org>.  */
Packit 5b56b6
Packit 5b56b6
#include <config.h>
Packit 5b56b6
Packit 5b56b6
/* Specification.  */
Packit 5b56b6
#include "localcharset.h"
Packit 5b56b6
Packit 5b56b6
#include <fcntl.h>
Packit 5b56b6
#include <stddef.h>
Packit 5b56b6
#include <stdio.h>
Packit 5b56b6
#include <string.h>
Packit 5b56b6
#include <stdlib.h>
Packit 5b56b6
Packit 5b56b6
#if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
Packit 5b56b6
# define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
Packit 5b56b6
#endif
Packit 5b56b6
Packit 5b56b6
#if defined _WIN32 || defined __WIN32__
Packit 5b56b6
# define WINDOWS_NATIVE
Packit 5b56b6
# include <locale.h>
Packit 5b56b6
#endif
Packit 5b56b6
Packit 5b56b6
#if defined __EMX__
Packit 5b56b6
/* Assume EMX program runs on OS/2, even if compiled under DOS.  */
Packit 5b56b6
# ifndef OS2
Packit 5b56b6
#  define OS2
Packit 5b56b6
# endif
Packit 5b56b6
#endif
Packit 5b56b6
Packit 5b56b6
#if !defined WINDOWS_NATIVE
Packit 5b56b6
# include <unistd.h>
Packit 5b56b6
# if HAVE_LANGINFO_CODESET
Packit 5b56b6
#  include <langinfo.h>
Packit 5b56b6
# else
Packit 5b56b6
#  if 0 /* see comment below */
Packit 5b56b6
#   include <locale.h>
Packit 5b56b6
#  endif
Packit 5b56b6
# endif
Packit 5b56b6
# ifdef __CYGWIN__
Packit 5b56b6
#  define WIN32_LEAN_AND_MEAN
Packit 5b56b6
#  include <windows.h>
Packit 5b56b6
# endif
Packit 5b56b6
#elif defined WINDOWS_NATIVE
Packit 5b56b6
# define WIN32_LEAN_AND_MEAN
Packit 5b56b6
# include <windows.h>
Packit 5b56b6
#endif
Packit 5b56b6
#if defined OS2
Packit 5b56b6
# define INCL_DOS
Packit 5b56b6
# include <os2.h>
Packit 5b56b6
#endif
Packit 5b56b6
Packit 5b56b6
/* For MB_CUR_MAX_L */
Packit 5b56b6
#if defined DARWIN7
Packit 5b56b6
# include <xlocale.h>
Packit 5b56b6
#endif
Packit 5b56b6
Packit 5b56b6
#if ENABLE_RELOCATABLE
Packit 5b56b6
# include "relocatable.h"
Packit 5b56b6
#else
Packit 5b56b6
# define relocate(pathname) (pathname)
Packit 5b56b6
#endif
Packit 5b56b6
Packit 5b56b6
/* Get LIBDIR.  */
Packit 5b56b6
#ifndef LIBDIR
Packit 5b56b6
# include "configmake.h"
Packit 5b56b6
#endif
Packit 5b56b6
Packit 5b56b6
/* Define O_NOFOLLOW to 0 on platforms where it does not exist.  */
Packit 5b56b6
#ifndef O_NOFOLLOW
Packit 5b56b6
# define O_NOFOLLOW 0
Packit 5b56b6
#endif
Packit 5b56b6
Packit 5b56b6
#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__
Packit 5b56b6
  /* Native Windows, Cygwin, OS/2, DOS */
Packit 5b56b6
# define ISSLASH(C) ((C) == '/' || (C) == '\\')
Packit 5b56b6
#endif
Packit 5b56b6
Packit 5b56b6
#ifndef DIRECTORY_SEPARATOR
Packit 5b56b6
# define DIRECTORY_SEPARATOR '/'
Packit 5b56b6
#endif
Packit 5b56b6
Packit 5b56b6
#ifndef ISSLASH
Packit 5b56b6
# define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR)
Packit 5b56b6
#endif
Packit 5b56b6
Packit 5b56b6
#if HAVE_DECL_GETC_UNLOCKED
Packit 5b56b6
# undef getc
Packit 5b56b6
# define getc getc_unlocked
Packit 5b56b6
#endif
Packit 5b56b6
Packit 5b56b6
/* The following static variable is declared 'volatile' to avoid a
Packit 5b56b6
   possible multithread problem in the function get_charset_aliases. If we
Packit 5b56b6
   are running in a threaded environment, and if two threads initialize
Packit 5b56b6
   'charset_aliases' simultaneously, both will produce the same value,
Packit 5b56b6
   and everything will be ok if the two assignments to 'charset_aliases'
Packit 5b56b6
   are atomic. But I don't know what will happen if the two assignments mix.  */
Packit 5b56b6
#if __STDC__ != 1
Packit 5b56b6
# define volatile /* empty */
Packit 5b56b6
#endif
Packit 5b56b6
/* Pointer to the contents of the charset.alias file, if it has already been
Packit 5b56b6
   read, else NULL.  Its format is:
Packit 5b56b6
   ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0'  */
Packit 5b56b6
static const char * volatile charset_aliases;
Packit 5b56b6
Packit 5b56b6
/* Return a pointer to the contents of the charset.alias file.  */
Packit 5b56b6
static const char *
Packit 5b56b6
get_charset_aliases (void)
Packit 5b56b6
{
Packit 5b56b6
  const char *cp;
Packit 5b56b6
Packit 5b56b6
  cp = charset_aliases;
Packit 5b56b6
  if (cp == NULL)
Packit 5b56b6
    {
Packit 5b56b6
#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__ || defined OS2)
Packit 5b56b6
      const char *dir;
Packit 5b56b6
      const char *base = "charset.alias";
Packit 5b56b6
      char *file_name;
Packit 5b56b6
Packit 5b56b6
      /* Make it possible to override the charset.alias location.  This is
Packit 5b56b6
         necessary for running the testsuite before "make install".  */
Packit 5b56b6
      dir = getenv ("CHARSETALIASDIR");
Packit 5b56b6
      if (dir == NULL || dir[0] == '\0')
Packit 5b56b6
        dir = relocate (LIBDIR);
Packit 5b56b6
Packit 5b56b6
      /* Concatenate dir and base into freshly allocated file_name.  */
Packit 5b56b6
      {
Packit 5b56b6
        size_t dir_len = strlen (dir);
Packit 5b56b6
        size_t base_len = strlen (base);
Packit 5b56b6
        int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
Packit 5b56b6
        file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
Packit 5b56b6
        if (file_name != NULL)
Packit 5b56b6
          {
Packit 5b56b6
            memcpy (file_name, dir, dir_len);
Packit 5b56b6
            if (add_slash)
Packit 5b56b6
              file_name[dir_len] = DIRECTORY_SEPARATOR;
Packit 5b56b6
            memcpy (file_name + dir_len + add_slash, base, base_len + 1);
Packit 5b56b6
          }
Packit 5b56b6
      }
Packit 5b56b6
Packit 5b56b6
      if (file_name == NULL)
Packit 5b56b6
        /* Out of memory.  Treat the file as empty.  */
Packit 5b56b6
        cp = "";
Packit 5b56b6
      else
Packit 5b56b6
        {
Packit 5b56b6
          int fd;
Packit 5b56b6
Packit 5b56b6
          /* Open the file.  Reject symbolic links on platforms that support
Packit 5b56b6
             O_NOFOLLOW.  This is a security feature.  Without it, an attacker
Packit 5b56b6
             could retrieve parts of the contents (namely, the tail of the
Packit 5b56b6
             first line that starts with "* ") of an arbitrary file by placing
Packit 5b56b6
             a symbolic link to that file under the name "charset.alias" in
Packit 5b56b6
             some writable directory and defining the environment variable
Packit 5b56b6
             CHARSETALIASDIR to point to that directory.  */
Packit 5b56b6
          fd = open (file_name,
Packit 5b56b6
                     O_RDONLY | (HAVE_WORKING_O_NOFOLLOW ? O_NOFOLLOW : 0));
Packit 5b56b6
          if (fd < 0)
Packit 5b56b6
            /* File not found.  Treat it as empty.  */
Packit 5b56b6
            cp = "";
Packit 5b56b6
          else
Packit 5b56b6
            {
Packit 5b56b6
              FILE *fp;
Packit 5b56b6
Packit 5b56b6
              fp = fdopen (fd, "r");
Packit 5b56b6
              if (fp == NULL)
Packit 5b56b6
                {
Packit 5b56b6
                  /* Out of memory.  Treat the file as empty.  */
Packit 5b56b6
                  close (fd);
Packit 5b56b6
                  cp = "";
Packit 5b56b6
                }
Packit 5b56b6
              else
Packit 5b56b6
                {
Packit 5b56b6
                  /* Parse the file's contents.  */
Packit 5b56b6
                  char *res_ptr = NULL;
Packit 5b56b6
                  size_t res_size = 0;
Packit 5b56b6
Packit 5b56b6
                  for (;;)
Packit 5b56b6
                    {
Packit 5b56b6
                      int c;
Packit 5b56b6
                      char buf1[50+1];
Packit 5b56b6
                      char buf2[50+1];
Packit 5b56b6
                      size_t l1, l2;
Packit 5b56b6
                      char *old_res_ptr;
Packit 5b56b6
Packit 5b56b6
                      c = getc (fp);
Packit 5b56b6
                      if (c == EOF)
Packit 5b56b6
                        break;
Packit 5b56b6
                      if (c == '\n' || c == ' ' || c == '\t')
Packit 5b56b6
                        continue;
Packit 5b56b6
                      if (c == '#')
Packit 5b56b6
                        {
Packit 5b56b6
                          /* Skip comment, to end of line.  */
Packit 5b56b6
                          do
Packit 5b56b6
                            c = getc (fp);
Packit 5b56b6
                          while (!(c == EOF || c == '\n'));
Packit 5b56b6
                          if (c == EOF)
Packit 5b56b6
                            break;
Packit 5b56b6
                          continue;
Packit 5b56b6
                        }
Packit 5b56b6
                      ungetc (c, fp);
Packit 5b56b6
                      if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)
Packit 5b56b6
                        break;
Packit 5b56b6
                      l1 = strlen (buf1);
Packit 5b56b6
                      l2 = strlen (buf2);
Packit 5b56b6
                      old_res_ptr = res_ptr;
Packit 5b56b6
                      if (res_size == 0)
Packit 5b56b6
                        {
Packit 5b56b6
                          res_size = l1 + 1 + l2 + 1;
Packit 5b56b6
                          res_ptr = (char *) malloc (res_size + 1);
Packit 5b56b6
                        }
Packit 5b56b6
                      else
Packit 5b56b6
                        {
Packit 5b56b6
                          res_size += l1 + 1 + l2 + 1;
Packit 5b56b6
                          res_ptr = (char *) realloc (res_ptr, res_size + 1);
Packit 5b56b6
                        }
Packit 5b56b6
                      if (res_ptr == NULL)
Packit 5b56b6
                        {
Packit 5b56b6
                          /* Out of memory. */
Packit 5b56b6
                          res_size = 0;
Packit 5b56b6
                          free (old_res_ptr);
Packit 5b56b6
                          break;
Packit 5b56b6
                        }
Packit 5b56b6
                      strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
Packit 5b56b6
                      strcpy (res_ptr + res_size - (l2 + 1), buf2);
Packit 5b56b6
                    }
Packit 5b56b6
                  fclose (fp);
Packit 5b56b6
                  if (res_size == 0)
Packit 5b56b6
                    cp = "";
Packit 5b56b6
                  else
Packit 5b56b6
                    {
Packit 5b56b6
                      *(res_ptr + res_size) = '\0';
Packit 5b56b6
                      cp = res_ptr;
Packit 5b56b6
                    }
Packit 5b56b6
                }
Packit 5b56b6
            }
Packit 5b56b6
Packit 5b56b6
          free (file_name);
Packit 5b56b6
        }
Packit 5b56b6
Packit 5b56b6
#else
Packit 5b56b6
Packit 5b56b6
# if defined DARWIN7
Packit 5b56b6
      /* To avoid the trouble of installing a file that is shared by many
Packit 5b56b6
         GNU packages -- many packaging systems have problems with this --,
Packit 5b56b6
         simply inline the aliases here.  */
Packit 5b56b6
      cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
Packit 5b56b6
           "ISO8859-2" "\0" "ISO-8859-2" "\0"
Packit 5b56b6
           "ISO8859-4" "\0" "ISO-8859-4" "\0"
Packit 5b56b6
           "ISO8859-5" "\0" "ISO-8859-5" "\0"
Packit 5b56b6
           "ISO8859-7" "\0" "ISO-8859-7" "\0"
Packit 5b56b6
           "ISO8859-9" "\0" "ISO-8859-9" "\0"
Packit 5b56b6
           "ISO8859-13" "\0" "ISO-8859-13" "\0"
Packit 5b56b6
           "ISO8859-15" "\0" "ISO-8859-15" "\0"
Packit 5b56b6
           "KOI8-R" "\0" "KOI8-R" "\0"
Packit 5b56b6
           "KOI8-U" "\0" "KOI8-U" "\0"
Packit 5b56b6
           "CP866" "\0" "CP866" "\0"
Packit 5b56b6
           "CP949" "\0" "CP949" "\0"
Packit 5b56b6
           "CP1131" "\0" "CP1131" "\0"
Packit 5b56b6
           "CP1251" "\0" "CP1251" "\0"
Packit 5b56b6
           "eucCN" "\0" "GB2312" "\0"
Packit 5b56b6
           "GB2312" "\0" "GB2312" "\0"
Packit 5b56b6
           "eucJP" "\0" "EUC-JP" "\0"
Packit 5b56b6
           "eucKR" "\0" "EUC-KR" "\0"
Packit 5b56b6
           "Big5" "\0" "BIG5" "\0"
Packit 5b56b6
           "Big5HKSCS" "\0" "BIG5-HKSCS" "\0"
Packit 5b56b6
           "GBK" "\0" "GBK" "\0"
Packit 5b56b6
           "GB18030" "\0" "GB18030" "\0"
Packit 5b56b6
           "SJIS" "\0" "SHIFT_JIS" "\0"
Packit 5b56b6
           "ARMSCII-8" "\0" "ARMSCII-8" "\0"
Packit 5b56b6
           "PT154" "\0" "PT154" "\0"
Packit 5b56b6
         /*"ISCII-DEV" "\0" "?" "\0"*/
Packit 5b56b6
           "*" "\0" "UTF-8" "\0";
Packit 5b56b6
# endif
Packit 5b56b6
Packit 5b56b6
# if defined VMS
Packit 5b56b6
      /* To avoid the troubles of an extra file charset.alias_vms in the
Packit 5b56b6
         sources of many GNU packages, simply inline the aliases here.  */
Packit 5b56b6
      /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
Packit 5b56b6
         "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
Packit 5b56b6
         section 10.7 "Handling Different Character Sets".  */
Packit 5b56b6
      cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
Packit 5b56b6
           "ISO8859-2" "\0" "ISO-8859-2" "\0"
Packit 5b56b6
           "ISO8859-5" "\0" "ISO-8859-5" "\0"
Packit 5b56b6
           "ISO8859-7" "\0" "ISO-8859-7" "\0"
Packit 5b56b6
           "ISO8859-8" "\0" "ISO-8859-8" "\0"
Packit 5b56b6
           "ISO8859-9" "\0" "ISO-8859-9" "\0"
Packit 5b56b6
           /* Japanese */
Packit 5b56b6
           "eucJP" "\0" "EUC-JP" "\0"
Packit 5b56b6
           "SJIS" "\0" "SHIFT_JIS" "\0"
Packit 5b56b6
           "DECKANJI" "\0" "DEC-KANJI" "\0"
Packit 5b56b6
           "SDECKANJI" "\0" "EUC-JP" "\0"
Packit 5b56b6
           /* Chinese */
Packit 5b56b6
           "eucTW" "\0" "EUC-TW" "\0"
Packit 5b56b6
           "DECHANYU" "\0" "DEC-HANYU" "\0"
Packit 5b56b6
           "DECHANZI" "\0" "GB2312" "\0"
Packit 5b56b6
           /* Korean */
Packit 5b56b6
           "DECKOREAN" "\0" "EUC-KR" "\0";
Packit 5b56b6
# endif
Packit 5b56b6
Packit 5b56b6
# if defined WINDOWS_NATIVE || defined __CYGWIN__
Packit 5b56b6
      /* To avoid the troubles of installing a separate file in the same
Packit 5b56b6
         directory as the DLL and of retrieving the DLL's directory at
Packit 5b56b6
         runtime, simply inline the aliases here.  */
Packit 5b56b6
Packit 5b56b6
      cp = "CP936" "\0" "GBK" "\0"
Packit 5b56b6
           "CP1361" "\0" "JOHAB" "\0"
Packit 5b56b6
           "CP20127" "\0" "ASCII" "\0"
Packit 5b56b6
           "CP20866" "\0" "KOI8-R" "\0"
Packit 5b56b6
           "CP20936" "\0" "GB2312" "\0"
Packit 5b56b6
           "CP21866" "\0" "KOI8-RU" "\0"
Packit 5b56b6
           "CP28591" "\0" "ISO-8859-1" "\0"
Packit 5b56b6
           "CP28592" "\0" "ISO-8859-2" "\0"
Packit 5b56b6
           "CP28593" "\0" "ISO-8859-3" "\0"
Packit 5b56b6
           "CP28594" "\0" "ISO-8859-4" "\0"
Packit 5b56b6
           "CP28595" "\0" "ISO-8859-5" "\0"
Packit 5b56b6
           "CP28596" "\0" "ISO-8859-6" "\0"
Packit 5b56b6
           "CP28597" "\0" "ISO-8859-7" "\0"
Packit 5b56b6
           "CP28598" "\0" "ISO-8859-8" "\0"
Packit 5b56b6
           "CP28599" "\0" "ISO-8859-9" "\0"
Packit 5b56b6
           "CP28605" "\0" "ISO-8859-15" "\0"
Packit 5b56b6
           "CP38598" "\0" "ISO-8859-8" "\0"
Packit 5b56b6
           "CP51932" "\0" "EUC-JP" "\0"
Packit 5b56b6
           "CP51936" "\0" "GB2312" "\0"
Packit 5b56b6
           "CP51949" "\0" "EUC-KR" "\0"
Packit 5b56b6
           "CP51950" "\0" "EUC-TW" "\0"
Packit 5b56b6
           "CP54936" "\0" "GB18030" "\0"
Packit 5b56b6
           "CP65001" "\0" "UTF-8" "\0";
Packit 5b56b6
# endif
Packit 5b56b6
# if defined OS2
Packit 5b56b6
      /* To avoid the troubles of installing a separate file in the same
Packit 5b56b6
         directory as the DLL and of retrieving the DLL's directory at
Packit 5b56b6
         runtime, simply inline the aliases here.  */
Packit 5b56b6
Packit 5b56b6
      /* The list of encodings is taken from "List of OS/2 Codepages"
Packit 5b56b6
         by Alex Taylor:
Packit 5b56b6
         <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
Packit 5b56b6
         See also "IBM Globalization - Code page identifiers":
Packit 5b56b6
         <http://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>.  */
Packit 5b56b6
      cp = "CP813" "\0" "ISO-8859-7" "\0"
Packit 5b56b6
           "CP878" "\0" "KOI8-R" "\0"
Packit 5b56b6
           "CP819" "\0" "ISO-8859-1" "\0"
Packit 5b56b6
           "CP912" "\0" "ISO-8859-2" "\0"
Packit 5b56b6
           "CP913" "\0" "ISO-8859-3" "\0"
Packit 5b56b6
           "CP914" "\0" "ISO-8859-4" "\0"
Packit 5b56b6
           "CP915" "\0" "ISO-8859-5" "\0"
Packit 5b56b6
           "CP916" "\0" "ISO-8859-8" "\0"
Packit 5b56b6
           "CP920" "\0" "ISO-8859-9" "\0"
Packit 5b56b6
           "CP921" "\0" "ISO-8859-13" "\0"
Packit 5b56b6
           "CP923" "\0" "ISO-8859-15" "\0"
Packit 5b56b6
           "CP954" "\0" "EUC-JP" "\0"
Packit 5b56b6
           "CP964" "\0" "EUC-TW" "\0"
Packit 5b56b6
           "CP970" "\0" "EUC-KR" "\0"
Packit 5b56b6
           "CP1089" "\0" "ISO-8859-6" "\0"
Packit 5b56b6
           "CP1208" "\0" "UTF-8" "\0"
Packit 5b56b6
           "CP1381" "\0" "GB2312" "\0"
Packit 5b56b6
           "CP1386" "\0" "GBK" "\0"
Packit 5b56b6
           "CP3372" "\0" "EUC-JP" "\0";
Packit 5b56b6
# endif
Packit 5b56b6
#endif
Packit 5b56b6
Packit 5b56b6
      charset_aliases = cp;
Packit 5b56b6
    }
Packit 5b56b6
Packit 5b56b6
  return cp;
Packit 5b56b6
}
Packit 5b56b6
Packit 5b56b6
/* Determine the current locale's character encoding, and canonicalize it
Packit 5b56b6
   into one of the canonical names listed in config.charset.
Packit 5b56b6
   The result must not be freed; it is statically allocated.
Packit 5b56b6
   If the canonical name cannot be determined, the result is a non-canonical
Packit 5b56b6
   name.  */
Packit 5b56b6
Packit 5b56b6
#ifdef STATIC
Packit 5b56b6
STATIC
Packit 5b56b6
#endif
Packit 5b56b6
const char *
Packit 5b56b6
locale_charset (void)
Packit 5b56b6
{
Packit 5b56b6
  const char *codeset;
Packit 5b56b6
  const char *aliases;
Packit 5b56b6
Packit 5b56b6
#if !(defined WINDOWS_NATIVE || defined OS2)
Packit 5b56b6
Packit 5b56b6
# if HAVE_LANGINFO_CODESET
Packit 5b56b6
Packit 5b56b6
  /* Most systems support nl_langinfo (CODESET) nowadays.  */
Packit 5b56b6
  codeset = nl_langinfo (CODESET);
Packit 5b56b6
Packit 5b56b6
#  ifdef __CYGWIN__
Packit 5b56b6
  /* Cygwin < 1.7 does not have locales.  nl_langinfo (CODESET) always
Packit 5b56b6
     returns "US-ASCII".  Return the suffix of the locale name from the
Packit 5b56b6
     environment variables (if present) or the codepage as a number.  */
Packit 5b56b6
  if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
Packit 5b56b6
    {
Packit 5b56b6
      const char *locale;
Packit 5b56b6
      static char buf[2 + 10 + 1];
Packit 5b56b6
Packit 5b56b6
      locale = getenv ("LC_ALL");
Packit 5b56b6
      if (locale == NULL || locale[0] == '\0')
Packit 5b56b6
        {
Packit 5b56b6
          locale = getenv ("LC_CTYPE");
Packit 5b56b6
          if (locale == NULL || locale[0] == '\0')
Packit 5b56b6
            locale = getenv ("LANG");
Packit 5b56b6
        }
Packit 5b56b6
      if (locale != NULL && locale[0] != '\0')
Packit 5b56b6
        {
Packit 5b56b6
          /* If the locale name contains an encoding after the dot, return
Packit 5b56b6
             it.  */
Packit 5b56b6
          const char *dot = strchr (locale, '.');
Packit 5b56b6
Packit 5b56b6
          if (dot != NULL)
Packit 5b56b6
            {
Packit 5b56b6
              const char *modifier;
Packit 5b56b6
Packit 5b56b6
              dot++;
Packit 5b56b6
              /* Look for the possible @... trailer and remove it, if any.  */
Packit 5b56b6
              modifier = strchr (dot, '@');
Packit 5b56b6
              if (modifier == NULL)
Packit 5b56b6
                return dot;
Packit 5b56b6
              if (modifier - dot < sizeof (buf))
Packit 5b56b6
                {
Packit 5b56b6
                  memcpy (buf, dot, modifier - dot);
Packit 5b56b6
                  buf [modifier - dot] = '\0';
Packit 5b56b6
                  return buf;
Packit 5b56b6
                }
Packit 5b56b6
            }
Packit 5b56b6
        }
Packit 5b56b6
Packit 5b56b6
      /* The Windows API has a function returning the locale's codepage as a
Packit 5b56b6
         number: GetACP().  This encoding is used by Cygwin, unless the user
Packit 5b56b6
         has set the environment variable CYGWIN=codepage:oem (which very few
Packit 5b56b6
         people do).
Packit 5b56b6
         Output directed to console windows needs to be converted (to
Packit 5b56b6
         GetOEMCP() if the console is using a raster font, or to
Packit 5b56b6
         GetConsoleOutputCP() if it is using a TrueType font).  Cygwin does
Packit 5b56b6
         this conversion transparently (see winsup/cygwin/fhandler_console.cc),
Packit 5b56b6
         converting to GetConsoleOutputCP().  This leads to correct results,
Packit 5b56b6
         except when SetConsoleOutputCP has been called and a raster font is
Packit 5b56b6
         in use.  */
Packit 5b56b6
      sprintf (buf, "CP%u", GetACP ());
Packit 5b56b6
      codeset = buf;
Packit 5b56b6
    }
Packit 5b56b6
#  endif
Packit 5b56b6
Packit 5b56b6
# else
Packit 5b56b6
Packit 5b56b6
  /* On old systems which lack it, use setlocale or getenv.  */
Packit 5b56b6
  const char *locale = NULL;
Packit 5b56b6
Packit 5b56b6
  /* But most old systems don't have a complete set of locales.  Some
Packit 5b56b6
     (like SunOS 4 or DJGPP) have only the C locale.  Therefore we don't
Packit 5b56b6
     use setlocale here; it would return "C" when it doesn't support the
Packit 5b56b6
     locale name the user has set.  */
Packit 5b56b6
#  if 0
Packit 5b56b6
  locale = setlocale (LC_CTYPE, NULL);
Packit 5b56b6
#  endif
Packit 5b56b6
  if (locale == NULL || locale[0] == '\0')
Packit 5b56b6
    {
Packit 5b56b6
      locale = getenv ("LC_ALL");
Packit 5b56b6
      if (locale == NULL || locale[0] == '\0')
Packit 5b56b6
        {
Packit 5b56b6
          locale = getenv ("LC_CTYPE");
Packit 5b56b6
          if (locale == NULL || locale[0] == '\0')
Packit 5b56b6
            locale = getenv ("LANG");
Packit 5b56b6
        }
Packit 5b56b6
    }
Packit 5b56b6
Packit 5b56b6
  /* On some old systems, one used to set locale = "iso8859_1". On others,
Packit 5b56b6
     you set it to "language_COUNTRY.charset". In any case, we resolve it
Packit 5b56b6
     through the charset.alias file.  */
Packit 5b56b6
  codeset = locale;
Packit 5b56b6
Packit 5b56b6
# endif
Packit 5b56b6
Packit 5b56b6
#elif defined WINDOWS_NATIVE
Packit 5b56b6
Packit 5b56b6
  static char buf[2 + 10 + 1];
Packit 5b56b6
Packit 5b56b6
  /* The Windows API has a function returning the locale's codepage as
Packit 5b56b6
     a number, but the value doesn't change according to what the
Packit 5b56b6
     'setlocale' call specified.  So we use it as a last resort, in
Packit 5b56b6
     case the string returned by 'setlocale' doesn't specify the
Packit 5b56b6
     codepage.  */
Packit 5b56b6
  char *current_locale = setlocale (LC_ALL, NULL);
Packit 5b56b6
  char *pdot;
Packit 5b56b6
Packit 5b56b6
  /* If they set different locales for different categories,
Packit 5b56b6
     'setlocale' will return a semi-colon separated list of locale
Packit 5b56b6
     values.  To make sure we use the correct one, we choose LC_CTYPE.  */
Packit 5b56b6
  if (strchr (current_locale, ';'))
Packit 5b56b6
    current_locale = setlocale (LC_CTYPE, NULL);
Packit 5b56b6
Packit 5b56b6
  pdot = strrchr (current_locale, '.');
Packit 5b56b6
  if (pdot)
Packit 5b56b6
    sprintf (buf, "CP%s", pdot + 1);
Packit 5b56b6
  else
Packit 5b56b6
    {
Packit 5b56b6
      /* The Windows API has a function returning the locale's codepage as a
Packit 5b56b6
        number: GetACP().
Packit 5b56b6
        When the output goes to a console window, it needs to be provided in
Packit 5b56b6
        GetOEMCP() encoding if the console is using a raster font, or in
Packit 5b56b6
        GetConsoleOutputCP() encoding if it is using a TrueType font.
Packit 5b56b6
        But in GUI programs and for output sent to files and pipes, GetACP()
Packit 5b56b6
        encoding is the best bet.  */
Packit 5b56b6
      sprintf (buf, "CP%u", GetACP ());
Packit 5b56b6
    }
Packit 5b56b6
  codeset = buf;
Packit 5b56b6
Packit 5b56b6
#elif defined OS2
Packit 5b56b6
Packit 5b56b6
  const char *locale;
Packit 5b56b6
  static char buf[2 + 10 + 1];
Packit 5b56b6
  ULONG cp[3];
Packit 5b56b6
  ULONG cplen;
Packit 5b56b6
Packit 5b56b6
  codeset = NULL;
Packit 5b56b6
Packit 5b56b6
  /* Allow user to override the codeset, as set in the operating system,
Packit 5b56b6
     with standard language environment variables.  */
Packit 5b56b6
  locale = getenv ("LC_ALL");
Packit 5b56b6
  if (locale == NULL || locale[0] == '\0')
Packit 5b56b6
    {
Packit 5b56b6
      locale = getenv ("LC_CTYPE");
Packit 5b56b6
      if (locale == NULL || locale[0] == '\0')
Packit 5b56b6
        locale = getenv ("LANG");
Packit 5b56b6
    }
Packit 5b56b6
  if (locale != NULL && locale[0] != '\0')
Packit 5b56b6
    {
Packit 5b56b6
      /* If the locale name contains an encoding after the dot, return it.  */
Packit 5b56b6
      const char *dot = strchr (locale, '.');
Packit 5b56b6
Packit 5b56b6
      if (dot != NULL)
Packit 5b56b6
        {
Packit 5b56b6
          const char *modifier;
Packit 5b56b6
Packit 5b56b6
          dot++;
Packit 5b56b6
          /* Look for the possible @... trailer and remove it, if any.  */
Packit 5b56b6
          modifier = strchr (dot, '@');
Packit 5b56b6
          if (modifier == NULL)
Packit 5b56b6
            return dot;
Packit 5b56b6
          if (modifier - dot < sizeof (buf))
Packit 5b56b6
            {
Packit 5b56b6
              memcpy (buf, dot, modifier - dot);
Packit 5b56b6
              buf [modifier - dot] = '\0';
Packit 5b56b6
              return buf;
Packit 5b56b6
            }
Packit 5b56b6
        }
Packit 5b56b6
Packit 5b56b6
      /* For the POSIX locale, don't use the system's codepage.  */
Packit 5b56b6
      if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
Packit 5b56b6
        codeset = "";
Packit 5b56b6
    }
Packit 5b56b6
Packit 5b56b6
  if (codeset == NULL)
Packit 5b56b6
    {
Packit 5b56b6
      /* OS/2 has a function returning the locale's codepage as a number.  */
Packit 5b56b6
      if (DosQueryCp (sizeof (cp), cp, &cplen))
Packit 5b56b6
        codeset = "";
Packit 5b56b6
      else
Packit 5b56b6
        {
Packit 5b56b6
          sprintf (buf, "CP%u", cp[0]);
Packit 5b56b6
          codeset = buf;
Packit 5b56b6
        }
Packit 5b56b6
    }
Packit 5b56b6
Packit 5b56b6
#endif
Packit 5b56b6
Packit 5b56b6
  if (codeset == NULL)
Packit 5b56b6
    /* The canonical name cannot be determined.  */
Packit 5b56b6
    codeset = "";
Packit 5b56b6
Packit 5b56b6
  /* Resolve alias. */
Packit 5b56b6
  for (aliases = get_charset_aliases ();
Packit 5b56b6
       *aliases != '\0';
Packit 5b56b6
       aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
Packit 5b56b6
    if (strcmp (codeset, aliases) == 0
Packit 5b56b6
        || (aliases[0] == '*' && aliases[1] == '\0'))
Packit 5b56b6
      {
Packit 5b56b6
        codeset = aliases + strlen (aliases) + 1;
Packit 5b56b6
        break;
Packit 5b56b6
      }
Packit 5b56b6
Packit 5b56b6
  /* Don't return an empty string.  GNU libc and GNU libiconv interpret
Packit 5b56b6
     the empty string as denoting "the locale's character encoding",
Packit 5b56b6
     thus GNU libiconv would call this function a second time.  */
Packit 5b56b6
  if (codeset[0] == '\0')
Packit 5b56b6
    codeset = "ASCII";
Packit 5b56b6
Packit 5b56b6
#ifdef DARWIN7
Packit 5b56b6
  /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
Packit 5b56b6
     (the default codeset) does not work when MB_CUR_MAX is 1.  */
Packit 5b56b6
  if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
Packit 5b56b6
    codeset = "ASCII";
Packit 5b56b6
#endif
Packit 5b56b6
Packit 5b56b6
  return codeset;
Packit 5b56b6
}