/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8; coding: utf-8 -*- */ /* gtksourceencoding.c * This file is part of GtkSourceView * * Copyright (C) 2002-2005 - Paolo Maggi * Copyright (C) 2014, 2015 - Sébastien Wilmet * * GtkSourceView is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * GtkSourceView is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifdef HAVE_CONFIG_H #include #endif #include "gtksourceencoding.h" #include "gtksourceencoding-private.h" #include "gtksourceview-i18n.h" /** * SECTION:encoding * @Short_description: Character encoding * @Title: GtkSourceEncoding * @See_also: #GtkSourceFileSaver, #GtkSourceFileLoader * * The #GtkSourceEncoding boxed type represents a character encoding. It is used * for example by #GtkSourceFile. Note that the text in GTK+ widgets is always * encoded in UTF-8. */ struct _GtkSourceEncoding { gint index; const gchar *charset; const gchar *name; }; G_DEFINE_BOXED_TYPE (GtkSourceEncoding, gtk_source_encoding, gtk_source_encoding_copy, gtk_source_encoding_free) /* * The original versions of the following tables are taken from profterm * * Copyright (C) 2002 Red Hat, Inc. */ typedef enum _GtkSourceEncodingIndex { GTK_SOURCE_ENCODING_ISO_8859_1, GTK_SOURCE_ENCODING_ISO_8859_2, GTK_SOURCE_ENCODING_ISO_8859_3, GTK_SOURCE_ENCODING_ISO_8859_4, GTK_SOURCE_ENCODING_ISO_8859_5, GTK_SOURCE_ENCODING_ISO_8859_6, GTK_SOURCE_ENCODING_ISO_8859_7, GTK_SOURCE_ENCODING_ISO_8859_8, GTK_SOURCE_ENCODING_ISO_8859_9, GTK_SOURCE_ENCODING_ISO_8859_10, GTK_SOURCE_ENCODING_ISO_8859_13, GTK_SOURCE_ENCODING_ISO_8859_14, GTK_SOURCE_ENCODING_ISO_8859_15, GTK_SOURCE_ENCODING_ISO_8859_16, GTK_SOURCE_ENCODING_UTF_7, GTK_SOURCE_ENCODING_UTF_16, GTK_SOURCE_ENCODING_UTF_16_BE, GTK_SOURCE_ENCODING_UTF_16_LE, GTK_SOURCE_ENCODING_UTF_32, GTK_SOURCE_ENCODING_UCS_2, GTK_SOURCE_ENCODING_UCS_4, GTK_SOURCE_ENCODING_ARMSCII_8, GTK_SOURCE_ENCODING_BIG5, GTK_SOURCE_ENCODING_BIG5_HKSCS, GTK_SOURCE_ENCODING_CP_866, GTK_SOURCE_ENCODING_EUC_JP, GTK_SOURCE_ENCODING_EUC_JP_MS, GTK_SOURCE_ENCODING_CP932, GTK_SOURCE_ENCODING_EUC_KR, GTK_SOURCE_ENCODING_EUC_TW, GTK_SOURCE_ENCODING_GB18030, GTK_SOURCE_ENCODING_GB2312, GTK_SOURCE_ENCODING_GBK, GTK_SOURCE_ENCODING_GEOSTD8, GTK_SOURCE_ENCODING_IBM_850, GTK_SOURCE_ENCODING_IBM_852, GTK_SOURCE_ENCODING_IBM_855, GTK_SOURCE_ENCODING_IBM_857, GTK_SOURCE_ENCODING_IBM_862, GTK_SOURCE_ENCODING_IBM_864, GTK_SOURCE_ENCODING_ISO_2022_JP, GTK_SOURCE_ENCODING_ISO_2022_KR, GTK_SOURCE_ENCODING_ISO_IR_111, GTK_SOURCE_ENCODING_JOHAB, GTK_SOURCE_ENCODING_KOI8_R, GTK_SOURCE_ENCODING_KOI8__R, GTK_SOURCE_ENCODING_KOI8_U, GTK_SOURCE_ENCODING_SHIFT_JIS, GTK_SOURCE_ENCODING_TCVN, GTK_SOURCE_ENCODING_TIS_620, GTK_SOURCE_ENCODING_UHC, GTK_SOURCE_ENCODING_VISCII, GTK_SOURCE_ENCODING_WINDOWS_1250, GTK_SOURCE_ENCODING_WINDOWS_1251, GTK_SOURCE_ENCODING_WINDOWS_1252, GTK_SOURCE_ENCODING_WINDOWS_1253, GTK_SOURCE_ENCODING_WINDOWS_1254, GTK_SOURCE_ENCODING_WINDOWS_1255, GTK_SOURCE_ENCODING_WINDOWS_1256, GTK_SOURCE_ENCODING_WINDOWS_1257, GTK_SOURCE_ENCODING_WINDOWS_1258, GTK_SOURCE_ENCODING_LAST, GTK_SOURCE_ENCODING_UTF_8, GTK_SOURCE_ENCODING_UNKNOWN } GtkSourceEncodingIndex; static const GtkSourceEncoding utf8_encoding = { GTK_SOURCE_ENCODING_UTF_8, "UTF-8", N_("Unicode") }; /* Initialized in gtk_source_encoding_lazy_init(). */ static GtkSourceEncoding unknown_encoding = { GTK_SOURCE_ENCODING_UNKNOWN, NULL, NULL }; static const GtkSourceEncoding encodings[] = { { GTK_SOURCE_ENCODING_ISO_8859_1, "ISO-8859-1", N_("Western") }, { GTK_SOURCE_ENCODING_ISO_8859_2, "ISO-8859-2", N_("Central European") }, { GTK_SOURCE_ENCODING_ISO_8859_3, "ISO-8859-3", N_("South European") }, { GTK_SOURCE_ENCODING_ISO_8859_4, "ISO-8859-4", N_("Baltic") }, { GTK_SOURCE_ENCODING_ISO_8859_5, "ISO-8859-5", N_("Cyrillic") }, { GTK_SOURCE_ENCODING_ISO_8859_6, "ISO-8859-6", N_("Arabic") }, { GTK_SOURCE_ENCODING_ISO_8859_7, "ISO-8859-7", N_("Greek") }, { GTK_SOURCE_ENCODING_ISO_8859_8, "ISO-8859-8", N_("Hebrew Visual") }, { GTK_SOURCE_ENCODING_ISO_8859_9, "ISO-8859-9", N_("Turkish") }, { GTK_SOURCE_ENCODING_ISO_8859_10, "ISO-8859-10", N_("Nordic") }, { GTK_SOURCE_ENCODING_ISO_8859_13, "ISO-8859-13", N_("Baltic") }, { GTK_SOURCE_ENCODING_ISO_8859_14, "ISO-8859-14", N_("Celtic") }, { GTK_SOURCE_ENCODING_ISO_8859_15, "ISO-8859-15", N_("Western") }, { GTK_SOURCE_ENCODING_ISO_8859_16, "ISO-8859-16", N_("Romanian") }, { GTK_SOURCE_ENCODING_UTF_7, "UTF-7", N_("Unicode") }, { GTK_SOURCE_ENCODING_UTF_16, "UTF-16", N_("Unicode") }, { GTK_SOURCE_ENCODING_UTF_16_BE, "UTF-16BE", N_("Unicode") }, { GTK_SOURCE_ENCODING_UTF_16_LE, "UTF-16LE", N_("Unicode") }, { GTK_SOURCE_ENCODING_UTF_32, "UTF-32", N_("Unicode") }, { GTK_SOURCE_ENCODING_UCS_2, "UCS-2", N_("Unicode") }, { GTK_SOURCE_ENCODING_UCS_4, "UCS-4", N_("Unicode") }, { GTK_SOURCE_ENCODING_ARMSCII_8, "ARMSCII-8", N_("Armenian") }, { GTK_SOURCE_ENCODING_BIG5, "BIG5", N_("Chinese Traditional") }, { GTK_SOURCE_ENCODING_BIG5_HKSCS, "BIG5-HKSCS", N_("Chinese Traditional") }, { GTK_SOURCE_ENCODING_CP_866, "CP866", N_("Cyrillic/Russian") }, { GTK_SOURCE_ENCODING_EUC_JP, "EUC-JP", N_("Japanese") }, { GTK_SOURCE_ENCODING_EUC_JP_MS, "EUC-JP-MS", N_("Japanese") }, { GTK_SOURCE_ENCODING_CP932, "CP932", N_("Japanese") }, { GTK_SOURCE_ENCODING_EUC_KR, "EUC-KR", N_("Korean") }, { GTK_SOURCE_ENCODING_EUC_TW, "EUC-TW", N_("Chinese Traditional") }, { GTK_SOURCE_ENCODING_GB18030, "GB18030", N_("Chinese Simplified") }, { GTK_SOURCE_ENCODING_GB2312, "GB2312", N_("Chinese Simplified") }, { GTK_SOURCE_ENCODING_GBK, "GBK", N_("Chinese Simplified") }, { GTK_SOURCE_ENCODING_GEOSTD8, "GEORGIAN-ACADEMY", N_("Georgian") }, /* FIXME GEOSTD8 ? */ { GTK_SOURCE_ENCODING_IBM_850, "IBM850", N_("Western") }, { GTK_SOURCE_ENCODING_IBM_852, "IBM852", N_("Central European") }, { GTK_SOURCE_ENCODING_IBM_855, "IBM855", N_("Cyrillic") }, { GTK_SOURCE_ENCODING_IBM_857, "IBM857", N_("Turkish") }, { GTK_SOURCE_ENCODING_IBM_862, "IBM862", N_("Hebrew") }, { GTK_SOURCE_ENCODING_IBM_864, "IBM864", N_("Arabic") }, { GTK_SOURCE_ENCODING_ISO_2022_JP, "ISO-2022-JP", N_("Japanese") }, { GTK_SOURCE_ENCODING_ISO_2022_KR, "ISO-2022-KR", N_("Korean") }, { GTK_SOURCE_ENCODING_ISO_IR_111, "ISO-IR-111", N_("Cyrillic") }, { GTK_SOURCE_ENCODING_JOHAB, "JOHAB", N_("Korean") }, { GTK_SOURCE_ENCODING_KOI8_R, "KOI8R", N_("Cyrillic") }, { GTK_SOURCE_ENCODING_KOI8__R, "KOI8-R", N_("Cyrillic") }, { GTK_SOURCE_ENCODING_KOI8_U, "KOI8U", N_("Cyrillic/Ukrainian") }, { GTK_SOURCE_ENCODING_SHIFT_JIS, "SHIFT_JIS", N_("Japanese") }, { GTK_SOURCE_ENCODING_TCVN, "TCVN", N_("Vietnamese") }, { GTK_SOURCE_ENCODING_TIS_620, "TIS-620", N_("Thai") }, { GTK_SOURCE_ENCODING_UHC, "UHC", N_("Korean") }, { GTK_SOURCE_ENCODING_VISCII, "VISCII", N_("Vietnamese") }, { GTK_SOURCE_ENCODING_WINDOWS_1250, "WINDOWS-1250", N_("Central European") }, { GTK_SOURCE_ENCODING_WINDOWS_1251, "WINDOWS-1251", N_("Cyrillic") }, { GTK_SOURCE_ENCODING_WINDOWS_1252, "WINDOWS-1252", N_("Western") }, { GTK_SOURCE_ENCODING_WINDOWS_1253, "WINDOWS-1253", N_("Greek") }, { GTK_SOURCE_ENCODING_WINDOWS_1254, "WINDOWS-1254", N_("Turkish") }, { GTK_SOURCE_ENCODING_WINDOWS_1255, "WINDOWS-1255", N_("Hebrew") }, { GTK_SOURCE_ENCODING_WINDOWS_1256, "WINDOWS-1256", N_("Arabic") }, { GTK_SOURCE_ENCODING_WINDOWS_1257, "WINDOWS-1257", N_("Baltic") }, { GTK_SOURCE_ENCODING_WINDOWS_1258, "WINDOWS-1258", N_("Vietnamese") } }; static void gtk_source_encoding_lazy_init (void) { static gboolean initialized = FALSE; const gchar *locale_charset; if (G_LIKELY (initialized)) { return; } if (g_get_charset (&locale_charset) == FALSE) { unknown_encoding.charset = g_strdup (locale_charset); } initialized = TRUE; } /** * gtk_source_encoding_get_from_charset: * @charset: a character set. * * Gets a #GtkSourceEncoding from a character set such as "UTF-8" or * "ISO-8859-1". * * Returns: (nullable): the corresponding #GtkSourceEncoding, or %NULL * if not found. * Since: 3.14 */ const GtkSourceEncoding * gtk_source_encoding_get_from_charset (const gchar *charset) { gint i; g_return_val_if_fail (charset != NULL, NULL); if (g_ascii_strcasecmp (charset, "UTF-8") == 0) { return gtk_source_encoding_get_utf8 (); } for (i = 0; i < GTK_SOURCE_ENCODING_LAST; i++) { if (g_ascii_strcasecmp (charset, encodings[i].charset) == 0) { return &encodings[i]; } } gtk_source_encoding_lazy_init (); if (unknown_encoding.charset != NULL && g_ascii_strcasecmp (charset, unknown_encoding.charset) == 0) { return &unknown_encoding; } return NULL; } /** * gtk_source_encoding_get_all: * * Gets all encodings. * * Returns: (transfer container) (element-type GtkSource.Encoding): a list of * all #GtkSourceEncoding's. Free with g_slist_free(). * Since: 3.14 */ GSList * gtk_source_encoding_get_all (void) { GSList *all = NULL; gint i; for (i = GTK_SOURCE_ENCODING_LAST - 1; i >= 0; i--) { all = g_slist_prepend (all, (gpointer) &encodings[i]); } all = g_slist_prepend (all, (gpointer) &utf8_encoding); return all; } /** * gtk_source_encoding_get_utf8: * * Returns: the UTF-8 encoding. * Since: 3.14 */ const GtkSourceEncoding * gtk_source_encoding_get_utf8 (void) { return &utf8_encoding; } /** * gtk_source_encoding_get_current: * * Gets the #GtkSourceEncoding for the current locale. See also g_get_charset(). * * Returns: the current locale encoding. * Since: 3.14 */ const GtkSourceEncoding * gtk_source_encoding_get_current (void) { static gboolean initialized = FALSE; static const GtkSourceEncoding *locale_encoding = NULL; const gchar *locale_charset; gtk_source_encoding_lazy_init (); if (G_LIKELY (initialized)) { return locale_encoding; } if (g_get_charset (&locale_charset)) { locale_encoding = &utf8_encoding; } else { locale_encoding = gtk_source_encoding_get_from_charset (locale_charset); } if (locale_encoding == NULL) { locale_encoding = &unknown_encoding; } initialized = TRUE; return locale_encoding; } /** * gtk_source_encoding_to_string: * @enc: a #GtkSourceEncoding. * * Returns: a string representation. Free with g_free() when no longer needed. * Since: 3.14 */ gchar * gtk_source_encoding_to_string (const GtkSourceEncoding* enc) { g_return_val_if_fail (enc != NULL, NULL); gtk_source_encoding_lazy_init (); g_return_val_if_fail (enc->charset != NULL, NULL); if (enc->name != NULL) { return g_strdup_printf ("%s (%s)", _(enc->name), enc->charset); } else if (g_ascii_strcasecmp (enc->charset, "ANSI_X3.4-1968") == 0) { return g_strdup_printf ("US-ASCII (%s)", enc->charset); } else { return g_strdup (enc->charset); } } /** * gtk_source_encoding_get_charset: * @enc: a #GtkSourceEncoding. * * Gets the character set of the #GtkSourceEncoding, such as "UTF-8" or * "ISO-8859-1". * * Returns: the character set of the #GtkSourceEncoding. * Since: 3.14 */ const gchar * gtk_source_encoding_get_charset (const GtkSourceEncoding* enc) { g_return_val_if_fail (enc != NULL, NULL); gtk_source_encoding_lazy_init (); g_return_val_if_fail (enc->charset != NULL, NULL); return enc->charset; } /** * gtk_source_encoding_get_name: * @enc: a #GtkSourceEncoding. * * Gets the name of the #GtkSourceEncoding such as "Unicode" or "Western". * * Returns: the name of the #GtkSourceEncoding. * Since: 3.14 */ const gchar * gtk_source_encoding_get_name (const GtkSourceEncoding* enc) { g_return_val_if_fail (enc != NULL, NULL); gtk_source_encoding_lazy_init (); return (enc->name == NULL) ? _("Unknown") : _(enc->name); } static GSList * strv_to_list (const gchar * const *enc_str) { GSList *res = NULL; gchar **p; for (p = (gchar **)enc_str; p != NULL && *p != NULL; p++) { const gchar *charset = *p; const GtkSourceEncoding *enc; if (g_str_equal (charset, "CURRENT")) { g_get_charset (&charset); } g_return_val_if_fail (charset != NULL, NULL); enc = gtk_source_encoding_get_from_charset (charset); if (enc != NULL && g_slist_find (res, enc) == NULL) { res = g_slist_prepend (res, (gpointer)enc); } } return g_slist_reverse (res); } static GSList * remove_duplicates_keep_first (GSList *list) { GSList *new_list = NULL; GSList *l; for (l = list; l != NULL; l = l->next) { gpointer cur_encoding = l->data; if (g_slist_find (new_list, cur_encoding) == NULL) { new_list = g_slist_prepend (new_list, cur_encoding); } } new_list = g_slist_reverse (new_list); g_slist_free (list); return new_list; } static GSList * remove_duplicates_keep_last (GSList *list) { GSList *new_list = NULL; GSList *l; list = g_slist_reverse (list); for (l = list; l != NULL; l = l->next) { gpointer cur_encoding = l->data; if (g_slist_find (new_list, cur_encoding) == NULL) { new_list = g_slist_prepend (new_list, cur_encoding); } } g_slist_free (list); return new_list; } /* * _gtk_source_encoding_remove_duplicates: * @list: (element-type GtkSource.Encoding): a list of #GtkSourceEncoding's. * @removal_type: the #GtkSourceEncodingDuplicates. * * A convenience function to remove duplicated encodings in a list. * * Returns: (transfer container) (element-type GtkSource.Encoding): the new * start of the #GSList. * Since: 3.14 */ GSList * _gtk_source_encoding_remove_duplicates (GSList *list, GtkSourceEncodingDuplicates removal_type) { switch (removal_type) { case GTK_SOURCE_ENCODING_DUPLICATES_KEEP_FIRST: return remove_duplicates_keep_first (list); case GTK_SOURCE_ENCODING_DUPLICATES_KEEP_LAST: return remove_duplicates_keep_last (list); default: break; } g_return_val_if_reached (list); } /** * gtk_source_encoding_get_default_candidates: * * Gets the list of default candidate encodings to try when loading a file. See * gtk_source_file_loader_set_candidate_encodings(). * * This function returns a different list depending on the current locale (i.e. * language, country and default encoding). The UTF-8 encoding and the current * locale encoding are guaranteed to be present in the returned list. * * Returns: (transfer container) (element-type GtkSource.Encoding): the list of * default candidate encodings. Free with g_slist_free(). * Since: 3.18 */ GSList * gtk_source_encoding_get_default_candidates (void) { const gchar *encodings_str; const gchar *encodings_str_translated; GVariant *encodings_variant; const gchar **encodings_strv; GSList *encodings_list; GError *error = NULL; /* Translators: This is the sorted list of encodings used by * GtkSourceView for automatic detection of the file encoding. You may * want to customize it adding encodings that are common in your * country, for instance the GB18030 encoding for the Chinese * translation. You may also want to remove the ISO-8859-15 encoding * (covering English and most Western European languages) if you think * people in your country will rarely use it. "CURRENT" is a magic * value used by GtkSourceView and it represents the encoding for the * current locale, so please don't translate the "CURRENT" term. Only * recognized encodings are used. See * https://git.gnome.org/browse/gtksourceview/tree/gtksourceview/gtksourceencoding.c#n147 * for a list of supported encodings. * Keep the same format: square brackets, single quotes, commas. */ encodings_str = N_("['UTF-8', 'CURRENT', 'ISO-8859-15', 'UTF-16']"); encodings_str_translated = _(encodings_str); encodings_variant = g_variant_parse (G_VARIANT_TYPE_STRING_ARRAY, encodings_str_translated, NULL, NULL, &error); if (error != NULL) { const gchar * const *language_names = g_get_language_names (); g_warning ("Error while parsing encodings list for locale %s:\n" "Translated list: %s\n" "Error message: %s", language_names[0], encodings_str_translated, error->message); g_clear_error (&error); encodings_variant = g_variant_parse (G_VARIANT_TYPE_STRING_ARRAY, encodings_str, NULL, NULL, &error); g_assert_no_error (error); } encodings_strv = g_variant_get_strv (encodings_variant, NULL); encodings_list = strv_to_list (encodings_strv); g_free ((gpointer) encodings_strv); /* Ensure that UTF-8 and CURRENT are present. */ encodings_list = g_slist_prepend (encodings_list, (gpointer) gtk_source_encoding_get_current ()); encodings_list = g_slist_prepend (encodings_list, (gpointer) &utf8_encoding); encodings_list = _gtk_source_encoding_remove_duplicates (encodings_list, GTK_SOURCE_ENCODING_DUPLICATES_KEEP_LAST); g_variant_unref (encodings_variant); return encodings_list; } /** * gtk_source_encoding_copy: * @enc: a #GtkSourceEncoding. * * Used by language bindings. * * Returns: a copy of @enc. * Since: 3.14 */ GtkSourceEncoding * gtk_source_encoding_copy (const GtkSourceEncoding *enc) { g_return_val_if_fail (enc != NULL, NULL); return (GtkSourceEncoding *) enc; } /** * gtk_source_encoding_free: * @enc: a #GtkSourceEncoding. * * Used by language bindings. * * Since: 3.14 */ void gtk_source_encoding_free (GtkSourceEncoding *enc) { g_return_if_fail (enc != NULL); }