Blob Blame History Raw
/* locale.c -- locale module.
   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H15PRO112

   This file is part of the m17n library.

   The m17n library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public License
   as published by the Free Software Foundation; either version 2.1 of
   the License, or (at your option) any later version.

   The m17n library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the m17n library; if not, write to the Free
   Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
   Boston, MA 02110-1301 USA.  */

/***en
    @addtogroup m17nLocale
    @brief Locale objects and API for them.

    The m17n library represents locale related information as objects
    of type #MLocale.  */

/***ja
    @addtogroup m17nLocale
    @brief ロケールオブジェクトとそれに関する API.

    m17n ライブラリはロケール関連情報を #MLocale 型のオブジェクトで表現する。  */

/*=*/

#if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
/*** @addtogroup m17nInternal
     @{ */

#define _GNU_SOURCE

#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
#ifdef HAVE_LANGINFO_H
#include <langinfo.h>
#endif
#include <time.h>

#include "m17n.h"
#include "m17n-misc.h"
#include "internal.h"
#include "symbol.h"
#include "coding.h"
#include "textprop.h"
#include "mlocale.h"

static MSymbol M_locale;
static MSymbol M_xfrm;


/** Structure of locales.  */

struct MLocale
{
  M17NObject control;
  MSymbol name;
  MSymbol language;
  MSymbol territory;
  MSymbol modifier;
  MSymbol codeset;
  MSymbol coding;
};


/** The current locales of each category.  */
MLocale *mlocale__collate, *mlocale__ctype;
MLocale *mlocale__messages, *mlocale__time;

/* These are currently not used.  */
#if 0
MLocale *mlocale_monetary, *mlocale_numeric, ;
#endif

/** Parse locale name NAME and return a newly created MLocale object.  */

static MLocale *
make_locale (const char *name)
{
  char *str;
  int len;
  MLocale *locale;
  char c;

  M17N_OBJECT (locale, NULL, MERROR_LOCALE);
  locale->name = msymbol (name);
  msymbol_put (locale->name, M_locale, (void *) locale);
  M17N_OBJECT_UNREF (locale);

  len = strlen (name) + 1;
  str = alloca (len);
  memcpy (str, name, len);

  c = '\0';
  while (1)
    {
      char c1;
      int i;

      for (i = 0; str[i]; i++)
	if (str[i] == '_' || str[i] == '.' || str[i] == '@')
	  break;
      c1 = str[i];
      str[i] = '\0';
      if (c == '\0')
	/* The first field is for language.  */
	locale->language = msymbol (str);
      else if (c == '_')
	/* The field following '_' is for territory.  */
	locale->territory = msymbol (str);
      else if (c == '.')
	/* The field following '.' is for codeset.  */
	locale->codeset = msymbol (str);
      else
	/* The other field is for modifier.  */
	locale->modifier = msymbol (str);
      if (! c1)
	break;
      c = c1;
      str += i + 1;
    }

#ifdef HAVE_NL_LANGINFO
#ifdef CODESET
  /* If we can use nl_langinfo () to retrieve a codeset name, respect
     it over the codeset name extracted from the locale name.  */
  locale->codeset = msymbol (nl_langinfo (CODESET));
#endif
#endif

  /* If the locale name specifies a codeset, get the corresponding
     coding system.  */
  if (locale->codeset != Mnil)
    {
      locale->coding = mconv_resolve_coding (locale->codeset);
      if (locale->coding == Mnil)
	locale->coding = Mcoding_us_ascii;
    }
  else
    locale->coding = Mcoding_us_ascii;
  return locale;
}


/** Decode the byte sequence at BUF of length SIZE bytes by the coding
    system associated with LOCALE, and return a generated M-text.  */

static MText *
decode_locale (unsigned char *buf, int size, MLocale *locale)
{
  return mconv_decode_buffer (locale->coding, buf, size);
}


/** Encode the M-text MT by the coding system associated with LOCALE,
    and store the resulting bytes in the memory area at BUF of *SIZE
    bytes.  If the area is too short, allocate a new and wider area.
    Store the length of the generated bytes in the place pointed by
    SIZE, and return the address of those bytes.  */

static unsigned char *
encode_locale (MText *mt, unsigned char *buf, int *size, MLocale *locale)
{
  int nbytes = mconv_encode_buffer (locale->coding, mt, buf, *size - 1);

  if (nbytes < 0)
    {
      buf = NULL;
      *size *= 2;
      do {
	MTABLE_REALLOC (buf, *size, MERROR_LOCALE);
	nbytes = mconv_encode_buffer (mlocale__ctype->coding, mt, buf,
				      *size - 1);
      } while (nbytes < 0);
    }
  buf[nbytes] = '\0';
  *size = nbytes;
  return buf;
}


/** Structure of transformed strings.  The function mtext_coll ()
    caches this object in an M-text as a text property.  */

typedef struct {
  /* Common header for a managed object.  */
  M17NObject control;

  /* Locale corresponding to <str>.  */
  MLocale *locale;

  /** Result of strxfrm.  */
  char *str;
} MXfrm;


static void
free_xfrm (void *object)
{
  MXfrm *xfrm = (MXfrm *) object;

  M17N_OBJECT_UNREF (xfrm->locale);
  free (xfrm->str);
}

static char *
get_xfrm (MText *mt)
{
  MTextProperty *prop = mtext_get_property (mt, 0, M_xfrm);
  MXfrm *xfrm;
  int size;
  unsigned char *buf, *newbuf;
  int request;

  if (prop)
    {
      if (prop->end == mt->nchars)
	{
	  xfrm = (MXfrm *) prop->val;
	  if (xfrm->locale == mlocale__ctype)
	    return xfrm->str;
	}
      mtext_detach_property (prop);
    }

  size = mt->nbytes;
  buf = alloca (size);
  newbuf = encode_locale (mt, buf, &size, mlocale__ctype);
  M17N_OBJECT (xfrm, free_xfrm, MERROR_MTEXT);
  xfrm->str = malloc (size);
  request = strxfrm (xfrm->str, (char *) newbuf, size);
  if (request >= size)
    {
      xfrm->str = realloc (xfrm->str, request);
      strxfrm (xfrm->str, (char *) newbuf, size);
    }
  if (buf != newbuf)
    free (newbuf);
  prop = mtext_property (M_xfrm, xfrm, MTEXTPROP_VOLATILE_WEAK);
  mtext_attach_property (mt, 0, mt->nchars, prop);
  M17N_OBJECT_UNREF (prop);
  return xfrm->str;
}


/* Internal API */

int
mlocale__init ()
{
  M_locale = msymbol_as_managing_key ("  locale");

  Mterritory = msymbol ("territory");
  Mcodeset = msymbol ("codeset");

  mlocale__collate = mlocale_set (LC_COLLATE, NULL);
  M17N_OBJECT_REF (mlocale__collate);
  mlocale__ctype = mlocale_set (LC_CTYPE, NULL);
  M17N_OBJECT_REF (mlocale__ctype);
  mlocale__messages = mlocale_set (LC_MESSAGES, NULL);
  M17N_OBJECT_REF (mlocale__messages);
  mlocale__time = mlocale_set (LC_TIME, NULL);
  M17N_OBJECT_REF (mlocale__time);

  M_xfrm = msymbol_as_managing_key ("  xfrm");
  return 0;
}

void
mlocale__fini ()
{
  M17N_OBJECT_UNREF (mlocale__collate);
  M17N_OBJECT_UNREF (mlocale__ctype);
  M17N_OBJECT_UNREF (mlocale__messages);
  M17N_OBJECT_UNREF (mlocale__time);
}

/*** @} */
#endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */


/* External API */
/*** @addtogroup m17nLocale */
/*** @{ */

/*=*/
/***en The symbol whose name is "territory".  */
/***ja  "territory" という名前を持つシンボル. */
MSymbol Mterritory;

/*=*/
/***en The symbol whose name is "modifier".  */
/***ja  "modifier" という名前を持つシンボル. */
MSymbol Mmodifier;

/*=*/
/***en The symbol whose name is "codeset".  */
/***ja  "codeset" という名前を持つシンボル. */
MSymbol Mcodeset;

/*=*/

/***en
    @brief Set the current locale.

    The mlocale_set () function sets or query a part of the current
    locale.  The part is specified by $CATEGORY which must be a valid
    first argument to <tt>setlocale ()</tt>.

    If $LOCALE is not NULL, the locale of the specified part is set to
    $LOCALE.  If $LOCALE is not supported by the system, the current
    locale is not changed.

    If $LOCALE is NULL, the current locale of the specified part is
    queried.

    @return
    If the call is successful, mlocale_set () returns an opaque locale
    object that corresponds to the locale.  The name of the locale can
    be acquired by the function mlocale_get_prop ().
    Otherwise, it returns NULL.  */

/***ja
    @brief 現在のロケールを設定する.

    関数 mlocale_set () は現在のロケールの一部を設定したり問い合わせたりする。ここで一部とは 
    $CATEGORY で指定され、<tt>setlocale ()</tt> の有効な第一引数となるものでなくてはならない。

    $LOCALE が NULL でなければ、指定した部分のロケールが$LOCALE に設定される。
    $LOCALE がシステムにサポートされていなければ、設定は行われず、現在のロケールは変わらない。

    $LOCALE が NULL ならば、現在のロケールの指定した部分を問い合わせる。

    @return 

    呼び出しに成功すれば、mlocale_set () はロケールに対応する opaque 
    ロケールオブジェクトを返す。ロケールの名前は関数
    mlocale_get_prop () によって得ることができる。
    そうでなければ NULL を返す。
     */

/***
    @errors
    @c MERROR_LOCALE  */

MLocale *
mlocale_set (int category, const char *name)
{
  char *new;
  MLocale *locale;

  new = setlocale (category, name);
  if (! new)
    return NULL;

  locale = (MLocale *) msymbol_get (msymbol (new), M_locale);
  if (! locale)
    locale = make_locale (new);
  if (! locale)
    return NULL;
  if (name && (category == LC_ALL || category == LC_COLLATE))
    {
      M17N_OBJECT_REF (locale);
      M17N_OBJECT_UNREF (mlocale__collate);
      mlocale__collate = locale;
    }
  else if (name && (category == LC_ALL || category == LC_CTYPE))
    {
      M17N_OBJECT_REF (locale);
      M17N_OBJECT_UNREF (mlocale__ctype);
      mlocale__ctype = locale;
    }
  if (name && (category == LC_ALL || category == LC_MESSAGES))
    {
      M17N_OBJECT_REF (locale);
      M17N_OBJECT_UNREF (mlocale__messages);
      mlocale__messages = locale;
    }
  if (name && (category == LC_ALL || category == LC_TIME))
    {
      M17N_OBJECT_REF (locale);
      M17N_OBJECT_UNREF (mlocale__time);
      mlocale__time = locale;
    }
  return locale;
}

/*=*/

/***en
    @brief Get the value of a locale property.

    The mlocale_get_prop () function returns the value of a property
    $KEY of local $LOCALE.  $KEY must be #Mname, #Mlanguage,
    #Mterritory, #Mcodeset, #Mmodifier, or #Mcoding.  */ 

/***ja
    @brief ロケールプロパティの値を得る.

    関数 mlocale_get_prop () は、ロケール $LOCALE の $KEY プロパティの値を返す。
    $KEY は #Mname, #Mlanguage, #Mterritory, #Mcodeset, #Mmodifier, 
    #Mcoding のいずれかである。 */ 

MSymbol
mlocale_get_prop (MLocale *locale, MSymbol key)
{
  if (key == Mcoding)
    return locale->coding;
  if (key == Mname)
    return locale->name;
  if (key == Mlanguage)
    return locale->language;
  if (key == Mterritory)
    return locale->territory;
  if (key == Mcodeset)
    return locale->codeset;
  if (key == Mmodifier)
    return locale->modifier;
  return Mnil;
}

/*=*/
/***en
    @brief Format date and time.

    The mtext_ftime () function formats the broken-down time $TM
    according to the format specification $FORMAT and append the
    result to the M-text $MT.  The formating is done according to the
    locale $LOCALE (if not NULL) or the current locale (LC_TIME).

    The meaning of the arguments $TM and $FORMAT are the same as those
    of strftime ().

    @seealso
    strftime ().
*/
/***ja
    @brief 日付と時間をフォーマットする.

    関数 mtext_ftime () は時刻データ (broken-down time) $TM を$FORMAT 
    で指定された形式に清書し、結果をM-text $MT に付加する。フォーマットは 
    NULL でなければ ロケール $LOCALE に、または現在のロケール(LC_TIME) に従う。

    引数 $TM と $FORMAT の意味は strftime () の場合と同じ。

    @seealso
    strftime ().
*/

int
mtext_ftime (MText *mt, const char *format, const struct tm *tm,
	     MLocale *locale)
{
  int bufsize;
  unsigned char *buf;
  size_t nbytes, nchars;
  char *current_locale = NULL;

  if (locale)
    {
      char *str = setlocale (LC_TIME, NULL);
      int len = strlen (str) + 1;

      current_locale = alloca (len);
      memcpy (current_locale, str, len);
      mlocale_set (LC_TIME, msymbol_name (locale->name));
    }

  bufsize = 1024;
  while (1)
    {
      MTABLE_ALLOCA (buf, bufsize, MERROR_MTEXT);
      buf[0] = 1;
      nbytes = strftime ((char *) buf, bufsize, format, tm);
      if (nbytes > 0
	  || ! buf[0])
	break;
      bufsize *= 2;
    }

  if (nbytes > 0)
    {
      MText *work = decode_locale (buf, nbytes, mlocale__time);

      if (work)
	{
	  nchars = work->nchars;
	  mtext_cat (mt, work);
	  M17N_OBJECT_UNREF (work);
	}
      else
	nchars = 0;
    }
  else
    nchars = 0;

  if (current_locale)
    mlocale_set (LC_TIME, current_locale);

  return nchars;
}
	  
/*=*/

/***en
    @brief Get an environment variable.

    The mtext_getenv () function searches the environment variable
    list for a string that matches the string pointed to by $NAME.

    If there is a match, the function decodes the value according to
    the current locale (LC_CTYPE) into an M-text, and return that
    M-text.

    If there is no match, the function returns NULL.  */
/***ja
    @brief 環境変数を得る.

    関数 mtext_getenv () は $NAME 
    で指される文字列と合致する文字列を環境変数のリスト中から探す。

    見つかった場合には、その値を現在のロケール(LC_CTYPE) に従って 
    M-text にデコードし、そのM-text を返す。

    見つからなければ、NULL を返す。  */

MText *
mtext_getenv (const char *name)
{
  char *p = getenv (name);

  if (!p)
    return NULL;
  return decode_locale ((unsigned char *) p, strlen (p), mlocale__ctype);
}

/*=*/

/***en
    @brief Change or add an environment variable.

    The mtext_putenv () function changes or adds the value of
    environment variables according to M-text $MT.  It calls the
    function <tt>putenv</tt> with an argument generated by encoding
    $MT according to the current locale (LC_CTYPE).

    @return
    This function returns zero on success, or -1 if an error
    occurs.  */
/***ja
    @brief 環境変数を変更/追加する.

    関数 mtext_putenv () は M-text $MT 
    に従って、環境変数の値を変更したり追加したりする。この関数は、現在のロケール
    (LC_CTYPE) に従って$MT をエンコードし、それを引数として関数 <tt>putenv</tt> を呼ぶ。

    @return
    この関数は、成功した場合には 0 を、エラーが起これば -1 を返す。
    */


int
mtext_putenv (MText *mt)
{
  unsigned char buf[1024];
  int size = 1024;
  unsigned char *newbuf;
  int result;

  newbuf = encode_locale (mt, buf, &size, mlocale__ctype);
  result = putenv ((char *) newbuf);
  if (buf != newbuf)
    free (newbuf);
  return result;
}

/*=*/

/***en
    @brief Compare two M-texts using the current locale.

    The mtext_coll () function compares the two M-texts $MT1 and $MT2.
    It returns an integer less than, equal to, or greater than zero if
    $MT1 is found, respectively, to be less than, to match, or to be
    greater than $MT2.  The comparison is based on texts as
    appropriate for the current locale (LC_COLLATE).

    This function makes use of information that is automatically
    cached in the M-texts as a text property.  So, the second call of
    this function with $MT1 or $MT2 finishes faster than the first
    call.  */
/***ja
    @brief 現在のロケールを用いて2つの M-text を比較する.

    関数 mtext_coll () は2つの M-text $MT1 と $MT2 
    を比較する。戻り値は負の整数値, 0, 正の整数値のいずれかであり、それぞれ 
    $MT1 が $MT2 より小さい、同じ、大きい場合に相当する。比較は現在のロケール 
    (LC_COLLATE) に基づいて行われる。

    この関数は M-text 
    のテキストプロパティとして自動的にキャッシュされる情報を利用するので、2度目以降の同じ比較は1度目より速く実行される。  */

int
mtext_coll (MText *mt1, MText *mt2)
{
  char *str1, *str2;

  if (mt1->nchars == 0)
    return (mt2->nchars == 0 ? 0 : -1);
  else if (mt2->nchars == 0)
    return 1;

  str1 = get_xfrm (mt1);
  str2 = get_xfrm (mt2);
  return strcoll (str1, str2);
}

/*** @} */

/*
  Local Variables:
  coding: euc-japan
  End:
*/