Blame gnu/localcharset.c

Packit 1ef1a9
/* Determine a canonical name for the current locale's character encoding.
Packit 1ef1a9
Packit 1ef1a9
   Copyright (C) 2000-2006, 2008-2015 Free Software Foundation, Inc.
Packit 1ef1a9
Packit 1ef1a9
   This program is free software; you can redistribute it and/or modify
Packit 1ef1a9
   it under the terms of the GNU General Public License as published by
Packit 1ef1a9
   the Free Software Foundation; either version 3, or (at your option)
Packit 1ef1a9
   any later version.
Packit 1ef1a9
Packit 1ef1a9
   This program is distributed in the hope that it will be useful,
Packit 1ef1a9
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 1ef1a9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 1ef1a9
   GNU General Public License for more details.
Packit 1ef1a9
Packit 1ef1a9
   You should have received a copy of the GNU General Public License along
Packit 1ef1a9
   with this program; if not, see <http://www.gnu.org/licenses/>.  */
Packit 1ef1a9
Packit 1ef1a9
/* Written by Bruno Haible <bruno@clisp.org>.  */
Packit 1ef1a9
Packit 1ef1a9
#include <config.h>
Packit 1ef1a9
Packit 1ef1a9
/* Specification.  */
Packit 1ef1a9
#include "localcharset.h"
Packit 1ef1a9
Packit 1ef1a9
#include <fcntl.h>
Packit 1ef1a9
#include <stddef.h>
Packit 1ef1a9
#include <stdio.h>
Packit 1ef1a9
#include <string.h>
Packit 1ef1a9
#include <stdlib.h>
Packit 1ef1a9
Packit 1ef1a9
#if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
Packit 1ef1a9
# define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
Packit 1ef1a9
#endif
Packit 1ef1a9
Packit 1ef1a9
#if defined _WIN32 || defined __WIN32__
Packit 1ef1a9
# define WINDOWS_NATIVE
Packit 1ef1a9
# include <locale.h>
Packit 1ef1a9
#endif
Packit 1ef1a9
Packit 1ef1a9
#if defined __EMX__
Packit 1ef1a9
/* Assume EMX program runs on OS/2, even if compiled under DOS.  */
Packit 1ef1a9
# ifndef OS2
Packit 1ef1a9
#  define OS2
Packit 1ef1a9
# endif
Packit 1ef1a9
#endif
Packit 1ef1a9
Packit 1ef1a9
#if !defined WINDOWS_NATIVE
Packit 1ef1a9
# include <unistd.h>
Packit 1ef1a9
# if HAVE_LANGINFO_CODESET
Packit 1ef1a9
#  include <langinfo.h>
Packit 1ef1a9
# else
Packit 1ef1a9
#  if 0 /* see comment below */
Packit 1ef1a9
#   include <locale.h>
Packit 1ef1a9
#  endif
Packit 1ef1a9
# endif
Packit 1ef1a9
# ifdef __CYGWIN__
Packit 1ef1a9
#  define WIN32_LEAN_AND_MEAN
Packit 1ef1a9
#  include <windows.h>
Packit 1ef1a9
# endif
Packit 1ef1a9
#elif defined WINDOWS_NATIVE
Packit 1ef1a9
# define WIN32_LEAN_AND_MEAN
Packit 1ef1a9
# include <windows.h>
Packit 1ef1a9
#endif
Packit 1ef1a9
#if defined OS2
Packit 1ef1a9
# define INCL_DOS
Packit 1ef1a9
# include <os2.h>
Packit 1ef1a9
#endif
Packit 1ef1a9
Packit 1ef1a9
/* For MB_CUR_MAX_L */
Packit 1ef1a9
#if defined DARWIN7
Packit 1ef1a9
# include <xlocale.h>
Packit 1ef1a9
#endif
Packit 1ef1a9
Packit 1ef1a9
#if ENABLE_RELOCATABLE
Packit 1ef1a9
# include "relocatable.h"
Packit 1ef1a9
#else
Packit 1ef1a9
# define relocate(pathname) (pathname)
Packit 1ef1a9
#endif
Packit 1ef1a9
Packit 1ef1a9
/* Get LIBDIR.  */
Packit 1ef1a9
#ifndef LIBDIR
Packit 1ef1a9
# include "configmake.h"
Packit 1ef1a9
#endif
Packit 1ef1a9
Packit 1ef1a9
/* Define O_NOFOLLOW to 0 on platforms where it does not exist.  */
Packit 1ef1a9
#ifndef O_NOFOLLOW
Packit 1ef1a9
# define O_NOFOLLOW 0
Packit 1ef1a9
#endif
Packit 1ef1a9
Packit 1ef1a9
#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__
Packit 1ef1a9
  /* Native Windows, Cygwin, OS/2, DOS */
Packit 1ef1a9
# define ISSLASH(C) ((C) == '/' || (C) == '\\')
Packit 1ef1a9
#endif
Packit 1ef1a9
Packit 1ef1a9
#ifndef DIRECTORY_SEPARATOR
Packit 1ef1a9
# define DIRECTORY_SEPARATOR '/'
Packit 1ef1a9
#endif
Packit 1ef1a9
Packit 1ef1a9
#ifndef ISSLASH
Packit 1ef1a9
# define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR)
Packit 1ef1a9
#endif
Packit 1ef1a9
Packit 1ef1a9
#if HAVE_DECL_GETC_UNLOCKED
Packit 1ef1a9
# undef getc
Packit 1ef1a9
# define getc getc_unlocked
Packit 1ef1a9
#endif
Packit 1ef1a9
Packit 1ef1a9
/* The following static variable is declared 'volatile' to avoid a
Packit 1ef1a9
   possible multithread problem in the function get_charset_aliases. If we
Packit 1ef1a9
   are running in a threaded environment, and if two threads initialize
Packit 1ef1a9
   'charset_aliases' simultaneously, both will produce the same value,
Packit 1ef1a9
   and everything will be ok if the two assignments to 'charset_aliases'
Packit 1ef1a9
   are atomic. But I don't know what will happen if the two assignments mix.  */
Packit 1ef1a9
#if __STDC__ != 1
Packit 1ef1a9
# define volatile /* empty */
Packit 1ef1a9
#endif
Packit 1ef1a9
/* Pointer to the contents of the charset.alias file, if it has already been
Packit 1ef1a9
   read, else NULL.  Its format is:
Packit 1ef1a9
   ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0'  */
Packit 1ef1a9
static const char * volatile charset_aliases;
Packit 1ef1a9
Packit 1ef1a9
/* Return a pointer to the contents of the charset.alias file.  */
Packit 1ef1a9
static const char *
Packit 1ef1a9
get_charset_aliases (void)
Packit 1ef1a9
{
Packit 1ef1a9
  const char *cp;
Packit 1ef1a9
Packit 1ef1a9
  cp = charset_aliases;
Packit 1ef1a9
  if (cp == NULL)
Packit 1ef1a9
    {
Packit 1ef1a9
#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__ || defined OS2)
Packit 1ef1a9
      const char *dir;
Packit 1ef1a9
      const char *base = "charset.alias";
Packit 1ef1a9
      char *file_name;
Packit 1ef1a9
Packit 1ef1a9
      /* Make it possible to override the charset.alias location.  This is
Packit 1ef1a9
         necessary for running the testsuite before "make install".  */
Packit 1ef1a9
      dir = getenv ("CHARSETALIASDIR");
Packit 1ef1a9
      if (dir == NULL || dir[0] == '\0')
Packit 1ef1a9
        dir = relocate (LIBDIR);
Packit 1ef1a9
Packit 1ef1a9
      /* Concatenate dir and base into freshly allocated file_name.  */
Packit 1ef1a9
      {
Packit 1ef1a9
        size_t dir_len = strlen (dir);
Packit 1ef1a9
        size_t base_len = strlen (base);
Packit 1ef1a9
        int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
Packit 1ef1a9
        file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
Packit 1ef1a9
        if (file_name != NULL)
Packit 1ef1a9
          {
Packit 1ef1a9
            memcpy (file_name, dir, dir_len);
Packit 1ef1a9
            if (add_slash)
Packit 1ef1a9
              file_name[dir_len] = DIRECTORY_SEPARATOR;
Packit 1ef1a9
            memcpy (file_name + dir_len + add_slash, base, base_len + 1);
Packit 1ef1a9
          }
Packit 1ef1a9
      }
Packit 1ef1a9
Packit 1ef1a9
      if (file_name == NULL)
Packit 1ef1a9
        /* Out of memory.  Treat the file as empty.  */
Packit 1ef1a9
        cp = "";
Packit 1ef1a9
      else
Packit 1ef1a9
        {
Packit 1ef1a9
          int fd;
Packit 1ef1a9
Packit 1ef1a9
          /* Open the file.  Reject symbolic links on platforms that support
Packit 1ef1a9
             O_NOFOLLOW.  This is a security feature.  Without it, an attacker
Packit 1ef1a9
             could retrieve parts of the contents (namely, the tail of the
Packit 1ef1a9
             first line that starts with "* ") of an arbitrary file by placing
Packit 1ef1a9
             a symbolic link to that file under the name "charset.alias" in
Packit 1ef1a9
             some writable directory and defining the environment variable
Packit 1ef1a9
             CHARSETALIASDIR to point to that directory.  */
Packit 1ef1a9
          fd = open (file_name,
Packit 1ef1a9
                     O_RDONLY | (HAVE_WORKING_O_NOFOLLOW ? O_NOFOLLOW : 0));
Packit 1ef1a9
          if (fd < 0)
Packit 1ef1a9
            /* File not found.  Treat it as empty.  */
Packit 1ef1a9
            cp = "";
Packit 1ef1a9
          else
Packit 1ef1a9
            {
Packit 1ef1a9
              FILE *fp;
Packit 1ef1a9
Packit 1ef1a9
              fp = fdopen (fd, "r");
Packit 1ef1a9
              if (fp == NULL)
Packit 1ef1a9
                {
Packit 1ef1a9
                  /* Out of memory.  Treat the file as empty.  */
Packit 1ef1a9
                  close (fd);
Packit 1ef1a9
                  cp = "";
Packit 1ef1a9
                }
Packit 1ef1a9
              else
Packit 1ef1a9
                {
Packit 1ef1a9
                  /* Parse the file's contents.  */
Packit 1ef1a9
                  char *res_ptr = NULL;
Packit 1ef1a9
                  size_t res_size = 0;
Packit 1ef1a9
Packit 1ef1a9
                  for (;;)
Packit 1ef1a9
                    {
Packit 1ef1a9
                      int c;
Packit 1ef1a9
                      char buf1[50+1];
Packit 1ef1a9
                      char buf2[50+1];
Packit 1ef1a9
                      size_t l1, l2;
Packit 1ef1a9
                      char *old_res_ptr;
Packit 1ef1a9
Packit 1ef1a9
                      c = getc (fp);
Packit 1ef1a9
                      if (c == EOF)
Packit 1ef1a9
                        break;
Packit 1ef1a9
                      if (c == '\n' || c == ' ' || c == '\t')
Packit 1ef1a9
                        continue;
Packit 1ef1a9
                      if (c == '#')
Packit 1ef1a9
                        {
Packit 1ef1a9
                          /* Skip comment, to end of line.  */
Packit 1ef1a9
                          do
Packit 1ef1a9
                            c = getc (fp);
Packit 1ef1a9
                          while (!(c == EOF || c == '\n'));
Packit 1ef1a9
                          if (c == EOF)
Packit 1ef1a9
                            break;
Packit 1ef1a9
                          continue;
Packit 1ef1a9
                        }
Packit 1ef1a9
                      ungetc (c, fp);
Packit 1ef1a9
                      if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)
Packit 1ef1a9
                        break;
Packit 1ef1a9
                      l1 = strlen (buf1);
Packit 1ef1a9
                      l2 = strlen (buf2);
Packit 1ef1a9
                      old_res_ptr = res_ptr;
Packit 1ef1a9
                      if (res_size == 0)
Packit 1ef1a9
                        {
Packit 1ef1a9
                          res_size = l1 + 1 + l2 + 1;
Packit 1ef1a9
                          res_ptr = (char *) malloc (res_size + 1);
Packit 1ef1a9
                        }
Packit 1ef1a9
                      else
Packit 1ef1a9
                        {
Packit 1ef1a9
                          res_size += l1 + 1 + l2 + 1;
Packit 1ef1a9
                          res_ptr = (char *) realloc (res_ptr, res_size + 1);
Packit 1ef1a9
                        }
Packit 1ef1a9
                      if (res_ptr == NULL)
Packit 1ef1a9
                        {
Packit 1ef1a9
                          /* Out of memory. */
Packit 1ef1a9
                          res_size = 0;
Packit 1ef1a9
                          free (old_res_ptr);
Packit 1ef1a9
                          break;
Packit 1ef1a9
                        }
Packit 1ef1a9
                      strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
Packit 1ef1a9
                      strcpy (res_ptr + res_size - (l2 + 1), buf2);
Packit 1ef1a9
                    }
Packit 1ef1a9
                  fclose (fp);
Packit 1ef1a9
                  if (res_size == 0)
Packit 1ef1a9
                    cp = "";
Packit 1ef1a9
                  else
Packit 1ef1a9
                    {
Packit 1ef1a9
                      *(res_ptr + res_size) = '\0';
Packit 1ef1a9
                      cp = res_ptr;
Packit 1ef1a9
                    }
Packit 1ef1a9
                }
Packit 1ef1a9
            }
Packit 1ef1a9
Packit 1ef1a9
          free (file_name);
Packit 1ef1a9
        }
Packit 1ef1a9
Packit 1ef1a9
#else
Packit 1ef1a9
Packit 1ef1a9
# if defined DARWIN7
Packit 1ef1a9
      /* To avoid the trouble of installing a file that is shared by many
Packit 1ef1a9
         GNU packages -- many packaging systems have problems with this --,
Packit 1ef1a9
         simply inline the aliases here.  */
Packit 1ef1a9
      cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
Packit 1ef1a9
           "ISO8859-2" "\0" "ISO-8859-2" "\0"
Packit 1ef1a9
           "ISO8859-4" "\0" "ISO-8859-4" "\0"
Packit 1ef1a9
           "ISO8859-5" "\0" "ISO-8859-5" "\0"
Packit 1ef1a9
           "ISO8859-7" "\0" "ISO-8859-7" "\0"
Packit 1ef1a9
           "ISO8859-9" "\0" "ISO-8859-9" "\0"
Packit 1ef1a9
           "ISO8859-13" "\0" "ISO-8859-13" "\0"
Packit 1ef1a9
           "ISO8859-15" "\0" "ISO-8859-15" "\0"
Packit 1ef1a9
           "KOI8-R" "\0" "KOI8-R" "\0"
Packit 1ef1a9
           "KOI8-U" "\0" "KOI8-U" "\0"
Packit 1ef1a9
           "CP866" "\0" "CP866" "\0"
Packit 1ef1a9
           "CP949" "\0" "CP949" "\0"
Packit 1ef1a9
           "CP1131" "\0" "CP1131" "\0"
Packit 1ef1a9
           "CP1251" "\0" "CP1251" "\0"
Packit 1ef1a9
           "eucCN" "\0" "GB2312" "\0"
Packit 1ef1a9
           "GB2312" "\0" "GB2312" "\0"
Packit 1ef1a9
           "eucJP" "\0" "EUC-JP" "\0"
Packit 1ef1a9
           "eucKR" "\0" "EUC-KR" "\0"
Packit 1ef1a9
           "Big5" "\0" "BIG5" "\0"
Packit 1ef1a9
           "Big5HKSCS" "\0" "BIG5-HKSCS" "\0"
Packit 1ef1a9
           "GBK" "\0" "GBK" "\0"
Packit 1ef1a9
           "GB18030" "\0" "GB18030" "\0"
Packit 1ef1a9
           "SJIS" "\0" "SHIFT_JIS" "\0"
Packit 1ef1a9
           "ARMSCII-8" "\0" "ARMSCII-8" "\0"
Packit 1ef1a9
           "PT154" "\0" "PT154" "\0"
Packit 1ef1a9
         /*"ISCII-DEV" "\0" "?" "\0"*/
Packit 1ef1a9
           "*" "\0" "UTF-8" "\0";
Packit 1ef1a9
# endif
Packit 1ef1a9
Packit 1ef1a9
# if defined VMS
Packit 1ef1a9
      /* To avoid the troubles of an extra file charset.alias_vms in the
Packit 1ef1a9
         sources of many GNU packages, simply inline the aliases here.  */
Packit 1ef1a9
      /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
Packit 1ef1a9
         "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
Packit 1ef1a9
         section 10.7 "Handling Different Character Sets".  */
Packit 1ef1a9
      cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
Packit 1ef1a9
           "ISO8859-2" "\0" "ISO-8859-2" "\0"
Packit 1ef1a9
           "ISO8859-5" "\0" "ISO-8859-5" "\0"
Packit 1ef1a9
           "ISO8859-7" "\0" "ISO-8859-7" "\0"
Packit 1ef1a9
           "ISO8859-8" "\0" "ISO-8859-8" "\0"
Packit 1ef1a9
           "ISO8859-9" "\0" "ISO-8859-9" "\0"
Packit 1ef1a9
           /* Japanese */
Packit 1ef1a9
           "eucJP" "\0" "EUC-JP" "\0"
Packit 1ef1a9
           "SJIS" "\0" "SHIFT_JIS" "\0"
Packit 1ef1a9
           "DECKANJI" "\0" "DEC-KANJI" "\0"
Packit 1ef1a9
           "SDECKANJI" "\0" "EUC-JP" "\0"
Packit 1ef1a9
           /* Chinese */
Packit 1ef1a9
           "eucTW" "\0" "EUC-TW" "\0"
Packit 1ef1a9
           "DECHANYU" "\0" "DEC-HANYU" "\0"
Packit 1ef1a9
           "DECHANZI" "\0" "GB2312" "\0"
Packit 1ef1a9
           /* Korean */
Packit 1ef1a9
           "DECKOREAN" "\0" "EUC-KR" "\0";
Packit 1ef1a9
# endif
Packit 1ef1a9
Packit 1ef1a9
# if defined WINDOWS_NATIVE || defined __CYGWIN__
Packit 1ef1a9
      /* To avoid the troubles of installing a separate file in the same
Packit 1ef1a9
         directory as the DLL and of retrieving the DLL's directory at
Packit 1ef1a9
         runtime, simply inline the aliases here.  */
Packit 1ef1a9
Packit 1ef1a9
      cp = "CP936" "\0" "GBK" "\0"
Packit 1ef1a9
           "CP1361" "\0" "JOHAB" "\0"
Packit 1ef1a9
           "CP20127" "\0" "ASCII" "\0"
Packit 1ef1a9
           "CP20866" "\0" "KOI8-R" "\0"
Packit 1ef1a9
           "CP20936" "\0" "GB2312" "\0"
Packit 1ef1a9
           "CP21866" "\0" "KOI8-RU" "\0"
Packit 1ef1a9
           "CP28591" "\0" "ISO-8859-1" "\0"
Packit 1ef1a9
           "CP28592" "\0" "ISO-8859-2" "\0"
Packit 1ef1a9
           "CP28593" "\0" "ISO-8859-3" "\0"
Packit 1ef1a9
           "CP28594" "\0" "ISO-8859-4" "\0"
Packit 1ef1a9
           "CP28595" "\0" "ISO-8859-5" "\0"
Packit 1ef1a9
           "CP28596" "\0" "ISO-8859-6" "\0"
Packit 1ef1a9
           "CP28597" "\0" "ISO-8859-7" "\0"
Packit 1ef1a9
           "CP28598" "\0" "ISO-8859-8" "\0"
Packit 1ef1a9
           "CP28599" "\0" "ISO-8859-9" "\0"
Packit 1ef1a9
           "CP28605" "\0" "ISO-8859-15" "\0"
Packit 1ef1a9
           "CP38598" "\0" "ISO-8859-8" "\0"
Packit 1ef1a9
           "CP51932" "\0" "EUC-JP" "\0"
Packit 1ef1a9
           "CP51936" "\0" "GB2312" "\0"
Packit 1ef1a9
           "CP51949" "\0" "EUC-KR" "\0"
Packit 1ef1a9
           "CP51950" "\0" "EUC-TW" "\0"
Packit 1ef1a9
           "CP54936" "\0" "GB18030" "\0"
Packit 1ef1a9
           "CP65001" "\0" "UTF-8" "\0";
Packit 1ef1a9
# endif
Packit 1ef1a9
# if defined OS2
Packit 1ef1a9
      /* To avoid the troubles of installing a separate file in the same
Packit 1ef1a9
         directory as the DLL and of retrieving the DLL's directory at
Packit 1ef1a9
         runtime, simply inline the aliases here.  */
Packit 1ef1a9
Packit 1ef1a9
      /* The list of encodings is taken from "List of OS/2 Codepages"
Packit 1ef1a9
         by Alex Taylor:
Packit 1ef1a9
         <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
Packit 1ef1a9
         See also "IBM Globalization - Code page identifiers":
Packit 1ef1a9
         <http://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>.  */
Packit 1ef1a9
      cp = "CP813" "\0" "ISO-8859-7" "\0"
Packit 1ef1a9
           "CP878" "\0" "KOI8-R" "\0"
Packit 1ef1a9
           "CP819" "\0" "ISO-8859-1" "\0"
Packit 1ef1a9
           "CP912" "\0" "ISO-8859-2" "\0"
Packit 1ef1a9
           "CP913" "\0" "ISO-8859-3" "\0"
Packit 1ef1a9
           "CP914" "\0" "ISO-8859-4" "\0"
Packit 1ef1a9
           "CP915" "\0" "ISO-8859-5" "\0"
Packit 1ef1a9
           "CP916" "\0" "ISO-8859-8" "\0"
Packit 1ef1a9
           "CP920" "\0" "ISO-8859-9" "\0"
Packit 1ef1a9
           "CP921" "\0" "ISO-8859-13" "\0"
Packit 1ef1a9
           "CP923" "\0" "ISO-8859-15" "\0"
Packit 1ef1a9
           "CP954" "\0" "EUC-JP" "\0"
Packit 1ef1a9
           "CP964" "\0" "EUC-TW" "\0"
Packit 1ef1a9
           "CP970" "\0" "EUC-KR" "\0"
Packit 1ef1a9
           "CP1089" "\0" "ISO-8859-6" "\0"
Packit 1ef1a9
           "CP1208" "\0" "UTF-8" "\0"
Packit 1ef1a9
           "CP1381" "\0" "GB2312" "\0"
Packit 1ef1a9
           "CP1386" "\0" "GBK" "\0"
Packit 1ef1a9
           "CP3372" "\0" "EUC-JP" "\0";
Packit 1ef1a9
# endif
Packit 1ef1a9
#endif
Packit 1ef1a9
Packit 1ef1a9
      charset_aliases = cp;
Packit 1ef1a9
    }
Packit 1ef1a9
Packit 1ef1a9
  return cp;
Packit 1ef1a9
}
Packit 1ef1a9
Packit 1ef1a9
/* Determine the current locale's character encoding, and canonicalize it
Packit 1ef1a9
   into one of the canonical names listed in config.charset.
Packit 1ef1a9
   The result must not be freed; it is statically allocated.
Packit 1ef1a9
   If the canonical name cannot be determined, the result is a non-canonical
Packit 1ef1a9
   name.  */
Packit 1ef1a9
Packit 1ef1a9
#ifdef STATIC
Packit 1ef1a9
STATIC
Packit 1ef1a9
#endif
Packit 1ef1a9
const char *
Packit 1ef1a9
locale_charset (void)
Packit 1ef1a9
{
Packit 1ef1a9
  const char *codeset;
Packit 1ef1a9
  const char *aliases;
Packit 1ef1a9
Packit 1ef1a9
#if !(defined WINDOWS_NATIVE || defined OS2)
Packit 1ef1a9
Packit 1ef1a9
# if HAVE_LANGINFO_CODESET
Packit 1ef1a9
Packit 1ef1a9
  /* Most systems support nl_langinfo (CODESET) nowadays.  */
Packit 1ef1a9
  codeset = nl_langinfo (CODESET);
Packit 1ef1a9
Packit 1ef1a9
#  ifdef __CYGWIN__
Packit 1ef1a9
  /* Cygwin < 1.7 does not have locales.  nl_langinfo (CODESET) always
Packit 1ef1a9
     returns "US-ASCII".  Return the suffix of the locale name from the
Packit 1ef1a9
     environment variables (if present) or the codepage as a number.  */
Packit 1ef1a9
  if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
Packit 1ef1a9
    {
Packit 1ef1a9
      const char *locale;
Packit 1ef1a9
      static char buf[2 + 10 + 1];
Packit 1ef1a9
Packit 1ef1a9
      locale = getenv ("LC_ALL");
Packit 1ef1a9
      if (locale == NULL || locale[0] == '\0')
Packit 1ef1a9
        {
Packit 1ef1a9
          locale = getenv ("LC_CTYPE");
Packit 1ef1a9
          if (locale == NULL || locale[0] == '\0')
Packit 1ef1a9
            locale = getenv ("LANG");
Packit 1ef1a9
        }
Packit 1ef1a9
      if (locale != NULL && locale[0] != '\0')
Packit 1ef1a9
        {
Packit 1ef1a9
          /* If the locale name contains an encoding after the dot, return
Packit 1ef1a9
             it.  */
Packit 1ef1a9
          const char *dot = strchr (locale, '.');
Packit 1ef1a9
Packit 1ef1a9
          if (dot != NULL)
Packit 1ef1a9
            {
Packit 1ef1a9
              const char *modifier;
Packit 1ef1a9
Packit 1ef1a9
              dot++;
Packit 1ef1a9
              /* Look for the possible @... trailer and remove it, if any.  */
Packit 1ef1a9
              modifier = strchr (dot, '@');
Packit 1ef1a9
              if (modifier == NULL)
Packit 1ef1a9
                return dot;
Packit 1ef1a9
              if (modifier - dot < sizeof (buf))
Packit 1ef1a9
                {
Packit 1ef1a9
                  memcpy (buf, dot, modifier - dot);
Packit 1ef1a9
                  buf [modifier - dot] = '\0';
Packit 1ef1a9
                  return buf;
Packit 1ef1a9
                }
Packit 1ef1a9
            }
Packit 1ef1a9
        }
Packit 1ef1a9
Packit 1ef1a9
      /* The Windows API has a function returning the locale's codepage as a
Packit 1ef1a9
         number: GetACP().  This encoding is used by Cygwin, unless the user
Packit 1ef1a9
         has set the environment variable CYGWIN=codepage:oem (which very few
Packit 1ef1a9
         people do).
Packit 1ef1a9
         Output directed to console windows needs to be converted (to
Packit 1ef1a9
         GetOEMCP() if the console is using a raster font, or to
Packit 1ef1a9
         GetConsoleOutputCP() if it is using a TrueType font).  Cygwin does
Packit 1ef1a9
         this conversion transparently (see winsup/cygwin/fhandler_console.cc),
Packit 1ef1a9
         converting to GetConsoleOutputCP().  This leads to correct results,
Packit 1ef1a9
         except when SetConsoleOutputCP has been called and a raster font is
Packit 1ef1a9
         in use.  */
Packit 1ef1a9
      sprintf (buf, "CP%u", GetACP ());
Packit 1ef1a9
      codeset = buf;
Packit 1ef1a9
    }
Packit 1ef1a9
#  endif
Packit 1ef1a9
Packit 1ef1a9
# else
Packit 1ef1a9
Packit 1ef1a9
  /* On old systems which lack it, use setlocale or getenv.  */
Packit 1ef1a9
  const char *locale = NULL;
Packit 1ef1a9
Packit 1ef1a9
  /* But most old systems don't have a complete set of locales.  Some
Packit 1ef1a9
     (like SunOS 4 or DJGPP) have only the C locale.  Therefore we don't
Packit 1ef1a9
     use setlocale here; it would return "C" when it doesn't support the
Packit 1ef1a9
     locale name the user has set.  */
Packit 1ef1a9
#  if 0
Packit 1ef1a9
  locale = setlocale (LC_CTYPE, NULL);
Packit 1ef1a9
#  endif
Packit 1ef1a9
  if (locale == NULL || locale[0] == '\0')
Packit 1ef1a9
    {
Packit 1ef1a9
      locale = getenv ("LC_ALL");
Packit 1ef1a9
      if (locale == NULL || locale[0] == '\0')
Packit 1ef1a9
        {
Packit 1ef1a9
          locale = getenv ("LC_CTYPE");
Packit 1ef1a9
          if (locale == NULL || locale[0] == '\0')
Packit 1ef1a9
            locale = getenv ("LANG");
Packit 1ef1a9
        }
Packit 1ef1a9
    }
Packit 1ef1a9
Packit 1ef1a9
  /* On some old systems, one used to set locale = "iso8859_1". On others,
Packit 1ef1a9
     you set it to "language_COUNTRY.charset". In any case, we resolve it
Packit 1ef1a9
     through the charset.alias file.  */
Packit 1ef1a9
  codeset = locale;
Packit 1ef1a9
Packit 1ef1a9
# endif
Packit 1ef1a9
Packit 1ef1a9
#elif defined WINDOWS_NATIVE
Packit 1ef1a9
Packit 1ef1a9
  static char buf[2 + 10 + 1];
Packit 1ef1a9
Packit 1ef1a9
  /* The Windows API has a function returning the locale's codepage as
Packit 1ef1a9
     a number, but the value doesn't change according to what the
Packit 1ef1a9
     'setlocale' call specified.  So we use it as a last resort, in
Packit 1ef1a9
     case the string returned by 'setlocale' doesn't specify the
Packit 1ef1a9
     codepage.  */
Packit 1ef1a9
  char *current_locale = setlocale (LC_ALL, NULL);
Packit 1ef1a9
  char *pdot;
Packit 1ef1a9
Packit 1ef1a9
  /* If they set different locales for different categories,
Packit 1ef1a9
     'setlocale' will return a semi-colon separated list of locale
Packit 1ef1a9
     values.  To make sure we use the correct one, we choose LC_CTYPE.  */
Packit 1ef1a9
  if (strchr (current_locale, ';'))
Packit 1ef1a9
    current_locale = setlocale (LC_CTYPE, NULL);
Packit 1ef1a9
Packit 1ef1a9
  pdot = strrchr (current_locale, '.');
Packit 1ef1a9
  if (pdot)
Packit 1ef1a9
    sprintf (buf, "CP%s", pdot + 1);
Packit 1ef1a9
  else
Packit 1ef1a9
    {
Packit 1ef1a9
      /* The Windows API has a function returning the locale's codepage as a
Packit 1ef1a9
        number: GetACP().
Packit 1ef1a9
        When the output goes to a console window, it needs to be provided in
Packit 1ef1a9
        GetOEMCP() encoding if the console is using a raster font, or in
Packit 1ef1a9
        GetConsoleOutputCP() encoding if it is using a TrueType font.
Packit 1ef1a9
        But in GUI programs and for output sent to files and pipes, GetACP()
Packit 1ef1a9
        encoding is the best bet.  */
Packit 1ef1a9
      sprintf (buf, "CP%u", GetACP ());
Packit 1ef1a9
    }
Packit 1ef1a9
  codeset = buf;
Packit 1ef1a9
Packit 1ef1a9
#elif defined OS2
Packit 1ef1a9
Packit 1ef1a9
  const char *locale;
Packit 1ef1a9
  static char buf[2 + 10 + 1];
Packit 1ef1a9
  ULONG cp[3];
Packit 1ef1a9
  ULONG cplen;
Packit 1ef1a9
Packit 1ef1a9
  codeset = NULL;
Packit 1ef1a9
Packit 1ef1a9
  /* Allow user to override the codeset, as set in the operating system,
Packit 1ef1a9
     with standard language environment variables.  */
Packit 1ef1a9
  locale = getenv ("LC_ALL");
Packit 1ef1a9
  if (locale == NULL || locale[0] == '\0')
Packit 1ef1a9
    {
Packit 1ef1a9
      locale = getenv ("LC_CTYPE");
Packit 1ef1a9
      if (locale == NULL || locale[0] == '\0')
Packit 1ef1a9
        locale = getenv ("LANG");
Packit 1ef1a9
    }
Packit 1ef1a9
  if (locale != NULL && locale[0] != '\0')
Packit 1ef1a9
    {
Packit 1ef1a9
      /* If the locale name contains an encoding after the dot, return it.  */
Packit 1ef1a9
      const char *dot = strchr (locale, '.');
Packit 1ef1a9
Packit 1ef1a9
      if (dot != NULL)
Packit 1ef1a9
        {
Packit 1ef1a9
          const char *modifier;
Packit 1ef1a9
Packit 1ef1a9
          dot++;
Packit 1ef1a9
          /* Look for the possible @... trailer and remove it, if any.  */
Packit 1ef1a9
          modifier = strchr (dot, '@');
Packit 1ef1a9
          if (modifier == NULL)
Packit 1ef1a9
            return dot;
Packit 1ef1a9
          if (modifier - dot < sizeof (buf))
Packit 1ef1a9
            {
Packit 1ef1a9
              memcpy (buf, dot, modifier - dot);
Packit 1ef1a9
              buf [modifier - dot] = '\0';
Packit 1ef1a9
              return buf;
Packit 1ef1a9
            }
Packit 1ef1a9
        }
Packit 1ef1a9
Packit 1ef1a9
      /* For the POSIX locale, don't use the system's codepage.  */
Packit 1ef1a9
      if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
Packit 1ef1a9
        codeset = "";
Packit 1ef1a9
    }
Packit 1ef1a9
Packit 1ef1a9
  if (codeset == NULL)
Packit 1ef1a9
    {
Packit 1ef1a9
      /* OS/2 has a function returning the locale's codepage as a number.  */
Packit 1ef1a9
      if (DosQueryCp (sizeof (cp), cp, &cplen))
Packit 1ef1a9
        codeset = "";
Packit 1ef1a9
      else
Packit 1ef1a9
        {
Packit 1ef1a9
          sprintf (buf, "CP%u", cp[0]);
Packit 1ef1a9
          codeset = buf;
Packit 1ef1a9
        }
Packit 1ef1a9
    }
Packit 1ef1a9
Packit 1ef1a9
#endif
Packit 1ef1a9
Packit 1ef1a9
  if (codeset == NULL)
Packit 1ef1a9
    /* The canonical name cannot be determined.  */
Packit 1ef1a9
    codeset = "";
Packit 1ef1a9
Packit 1ef1a9
  /* Resolve alias. */
Packit 1ef1a9
  for (aliases = get_charset_aliases ();
Packit 1ef1a9
       *aliases != '\0';
Packit 1ef1a9
       aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
Packit 1ef1a9
    if (strcmp (codeset, aliases) == 0
Packit 1ef1a9
        || (aliases[0] == '*' && aliases[1] == '\0'))
Packit 1ef1a9
      {
Packit 1ef1a9
        codeset = aliases + strlen (aliases) + 1;
Packit 1ef1a9
        break;
Packit 1ef1a9
      }
Packit 1ef1a9
Packit 1ef1a9
  /* Don't return an empty string.  GNU libc and GNU libiconv interpret
Packit 1ef1a9
     the empty string as denoting "the locale's character encoding",
Packit 1ef1a9
     thus GNU libiconv would call this function a second time.  */
Packit 1ef1a9
  if (codeset[0] == '\0')
Packit 1ef1a9
    codeset = "ASCII";
Packit 1ef1a9
Packit 1ef1a9
#ifdef DARWIN7
Packit 1ef1a9
  /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
Packit 1ef1a9
     (the default codeset) does not work when MB_CUR_MAX is 1.  */
Packit 1ef1a9
  if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
Packit 1ef1a9
    codeset = "ASCII";
Packit 1ef1a9
#endif
Packit 1ef1a9
Packit 1ef1a9
  return codeset;
Packit 1ef1a9
}