/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil; -*- */ /* vim:set et sts=4: */ /* ibus - The Input Bus * Copyright (C) 2018 Takao Fujiwara * Copyright (C) 2018 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 * USA */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #ifdef HAVE_LOCALE_H #include #endif #include "ibusunicode.h" #define NAMES_LIST_SUBJECT "The Unicode Standard" #define BLOCKS_SUBJECT "Blocks-" /* This file has 21 lines about the license at the top of the file. */ #define LICENSE_LINES 21 typedef enum { UCD_NAMES_LIST, UCD_BLOCKS } UCDType; typedef struct _UnicodeData UnicodeData; typedef struct _UnicodeDataIndex UnicodeDataIndex; struct _UnicodeData{ gunichar code; gchar *name; gchar *alias; gunichar start; gunichar end; GSList *list; }; struct _UnicodeDataIndex { gchar *index; UnicodeData *data_list; }; static gchar *unicode_version; static void unicode_data_new_object (UnicodeData *data) { g_return_if_fail (data != NULL); if (!data->name) { g_warning ("No name in U+%04X", data->code); } IBusUnicodeData *unicode = ibus_unicode_data_new ("code", data->code, "name", data->name ? g_strdup (data->name) : g_strdup (""), "alias", data->alias ? g_strdup (data->alias) : g_strdup (""), NULL); data->list = g_slist_append (data->list, unicode); } static void unicode_block_new_object (UnicodeData *data) { g_return_if_fail (data != NULL); if (!data->name) { g_warning ("No name in U+%04X", data->start); } IBusUnicodeBlock *block = ibus_unicode_block_new ("start", data->start, "end", data->end, "name", data->name ? g_strdup (data->name) : g_strdup (""), NULL); data->list = g_slist_append (data->list, block); } static void unicode_data_reset (UnicodeData *data) { g_return_if_fail (data != NULL); data->code = 0; g_clear_pointer (&data->name, g_free); g_clear_pointer (&data->alias, g_free); data->start = 0; data->end = 0; } static gboolean ucd_names_list_parse_comment (const gchar *line) { static gboolean has_version = FALSE; if (has_version) return TRUE; if (strlen (line) > 4 && strncmp (line, "@@@", 3) == 0) { gchar **elements = g_strsplit (line, "\t", -1); if (strncmp (elements[1], NAMES_LIST_SUBJECT, strlen (NAMES_LIST_SUBJECT)) == 0) { unicode_version = g_strdup (elements[1] + strlen (NAMES_LIST_SUBJECT) + 1); has_version = TRUE; } g_strfreev (elements); } return TRUE; } static gboolean ucd_names_list_parse_alias (const gchar *line, UnicodeData *data) { g_return_val_if_fail (line != NULL, FALSE); g_return_val_if_fail (data != NULL, FALSE); if (*line == '\0') return FALSE; data->alias = g_strdup (line); return TRUE; } static gboolean ucd_names_list_parse_indent_line (const gchar *line, UnicodeData *data) { g_return_val_if_fail (line != NULL, FALSE); switch (*line) { case '\0': return FALSE; case '=': line++; while (*line == ' ') line++; return ucd_names_list_parse_alias (line, data); default:; } return TRUE; } static gboolean ucd_names_list_parse_line (const gchar *line, UnicodeData *data) { g_return_val_if_fail (line != NULL, FALSE); switch (*line) { case '\0': return TRUE; case ';': return TRUE; case '@': return ucd_names_list_parse_comment (line); case '\t': return ucd_names_list_parse_indent_line (line + 1, data); default:; } if (g_ascii_isxdigit (*line)) { gchar **elements = g_strsplit (line, "\t", -1); gunichar code; gchar *name; if (g_strv_length (elements) < 2) { g_strfreev (elements); return FALSE; } code = g_ascii_strtoull (elements[0], NULL, 16); name = g_strdup (elements[1]); if (data->name) { unicode_data_new_object (data); unicode_data_reset (data); } data->code = code; data->name = name; } return TRUE; } static gboolean ucd_blocks_parse_comment (const gchar *line) { static gboolean has_version = FALSE; g_return_val_if_fail (line != NULL, FALSE); if (has_version) return TRUE; while (*line == ' ') line++; if (strlen (line) > strlen (BLOCKS_SUBJECT) && strncmp (line, BLOCKS_SUBJECT, strlen (BLOCKS_SUBJECT)) == 0) { unicode_version = g_strdup (line + strlen (BLOCKS_SUBJECT) + 1); has_version = TRUE; } return TRUE; } static gboolean ucd_blocks_parse_line (const gchar *line, UnicodeData *data) { g_return_val_if_fail (line != NULL, FALSE); switch (*line) { case '\0': return TRUE; case '#': return ucd_blocks_parse_comment (line + 1); default:; } if (g_ascii_isxdigit (*line)) { gchar *endptr = NULL; gunichar start = g_ascii_strtoull (line, &endptr, 16); gunichar end; gchar *name = NULL; if (endptr == NULL || *endptr == '\0') return FALSE; while (*endptr == '.') endptr++; line = endptr; endptr = NULL; end = g_ascii_strtoull (line, &endptr, 16); if (endptr == NULL || *endptr == '\0') return FALSE; while (*endptr == ';') endptr++; while (*endptr == ' ') endptr++; if (*endptr == '\0') return FALSE; name = g_strdup (endptr); if (data->name) { unicode_block_new_object (data); unicode_data_reset (data); } data->start = start; data->end = end; data->name = name; } return TRUE; } static gboolean ucd_parse_file (const gchar *filename, GSList **list, UCDType type) { UnicodeData data = { 0, }; gchar *content = NULL; gsize length = 0; GError *error = NULL; gchar *head, *end, *line; int n = 1; g_return_val_if_fail (filename != NULL, FALSE); g_return_val_if_fail (list != NULL, FALSE); if (!g_file_get_contents (filename, &content, &length, &error)) { g_warning ("Failed to load %s: %s", filename, error ? error->message : ""); goto failed_to_parse_ucd_names_list; } head = end = content; while (*end == '\n' && end - content < length) { end++; n++; } head = end; while (end - content < length) { while (*end != '\n' && end - content < length) end++; if (end - content >= length) break; line = g_strndup (head, end - head); switch (type) { case UCD_NAMES_LIST: if (!ucd_names_list_parse_line (line, &data)) { g_warning ("parse error #%d in %s version %s: %s", n, filename, unicode_version ? unicode_version : "(null)", line); } break; case UCD_BLOCKS: if (!ucd_blocks_parse_line (line, &data)) { g_warning ("parse error #%d in %s version %s: %s", n, filename, unicode_version ? unicode_version : "(null)", line); } break; default: g_abort (); } while (*end == '\n' && end - content < length) { end++; n++; } g_free (line); head = end; } if (data.name != NULL) { switch (type) { case UCD_NAMES_LIST: unicode_data_new_object (&data); break; case UCD_BLOCKS: unicode_block_new_object (&data); break; default:; } unicode_data_reset (&data); } g_free (content); *list = data.list; return TRUE; failed_to_parse_ucd_names_list: if (error) g_error_free (error); g_clear_pointer (&content, g_free); *list = data.list; return FALSE; } static void block_list_dump (IBusUnicodeBlock *block, GString *buff) { g_return_if_fail (buff != NULL); g_string_append (buff, " /* TRANSLATORS: You might refer the " \ "translations from gucharmap with\n" \ " the following command:\n" \ " msgmerge -C gucharmap.po ibus.po " \ "ibus.pot */\n"); gchar *line = g_strdup_printf (" N_(\"%s\"),\n", ibus_unicode_block_get_name (block)); g_string_append (buff, line); } static void ucd_block_translatable_save (const gchar *filename, GSList *blocks_list) { gchar *content = NULL; gsize length = 0; GError *error = NULL; gchar *p; GString *buff = NULL; int i; GSList *list = blocks_list; g_return_if_fail (filename != NULL); g_return_if_fail (list != NULL); if (!g_file_get_contents (__FILE__, &content, &length, &error)) { g_warning ("Failed to load %s: %s", __FILE__, error->message); g_clear_pointer (&error, g_error_free); return; } buff = g_string_new (NULL); p = content; for (i = 0; i < LICENSE_LINES; i++, p++) { if ((p = strchr (p, '\n')) == NULL) break; } if (p != NULL) { g_string_append (buff, g_strndup (content, p - content)); g_string_append_c (buff, '\n'); } g_clear_pointer (&content, g_free); g_string_append (buff, g_strdup ("\n")); g_string_append (buff, g_strdup_printf ("/* This file is generated by %s. */", __FILE__)); g_string_append (buff, g_strdup ("\n")); g_string_append (buff, g_strdup ("include \n")); g_string_append (buff, g_strdup ("\n")); g_string_append (buff, g_strdup ("#ifndef __IBUS_UNICODE_GEN_H_\n")); g_string_append (buff, g_strdup ("#define __IBUS_UNICODE_GEN_H_\n")); g_string_append (buff, g_strdup ("const static char *unicode_blocks[] = {\n")); g_slist_foreach (list, (GFunc)block_list_dump, buff); g_string_append (buff, g_strdup ("};\n")); g_string_append (buff, g_strdup ("#endif\n")); if (!g_file_set_contents (filename, buff->str, -1, &error)) { g_warning ("Failed to save emoji category file %s: %s", filename, error->message); g_error_free (error); } g_string_free (buff, TRUE); } int main (int argc, char *argv[]) { gchar *prgname; gchar *input_names_list = NULL; gchar *input_blocks = NULL; gchar *output_names_list = NULL; gchar *output_blocks = NULL; gchar *output_blocks_trans = NULL; GOptionEntry entries[] = { { "input-names-list", 'n', 0, G_OPTION_ARG_STRING, &input_names_list, "Parse NamesList.txt FILE in unicode.org ", "FILE" }, { "input-blocks", 'b', 0, G_OPTION_ARG_STRING, &input_blocks, "Parse Blocks.txt FILE in unicode.org ", "FILE" }, { "output-names-list", 'o', 0, G_OPTION_ARG_STRING, &output_names_list, "Save the Unicode data as FILE", "FILE" }, { "output-blocks", 'B', 0, G_OPTION_ARG_STRING, &output_blocks, "Save the Unicode block list as FILE", "FILE" }, { "output-blocks-trans", 'C', 0, G_OPTION_ARG_STRING, &output_blocks_trans, "Save the translatable Unicode blocks as FILE", "FILE" }, { NULL } }; GOptionContext *context; GError *error = NULL; GSList *names_list = NULL; GSList *blocks_list = NULL; #ifdef HAVE_LOCALE_H /* To output emoji warnings. */ setlocale (LC_ALL, ""); #endif prgname = g_path_get_basename (argv[0]); g_set_prgname (prgname); g_free (prgname); context = g_option_context_new (NULL); g_option_context_add_main_entries (context, entries, NULL); if (argc < 3) { g_print ("%s", g_option_context_get_help (context, TRUE, NULL)); g_option_context_free (context); return -1; } if (!g_option_context_parse (context, &argc, &argv, &error)) { g_warning ("Failed options: %s", error->message); g_error_free (error); return -1; } g_option_context_free (context); if (input_names_list) { ucd_parse_file (input_names_list, &names_list, UCD_NAMES_LIST); g_free (input_names_list); } if (output_names_list && names_list) ibus_unicode_data_save (output_names_list, names_list); g_free (output_names_list); if (input_blocks) { ucd_parse_file (input_blocks, &blocks_list, UCD_BLOCKS); g_free (input_blocks); } if (output_blocks && blocks_list) ibus_unicode_block_save (output_blocks, blocks_list); if (output_blocks_trans && blocks_list) ucd_block_translatable_save (output_blocks_trans, blocks_list); g_free (output_blocks); g_free (unicode_version); return 0; }