/* * Copyright (C) 2010 Collabora Ltd. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * Authors: Xavier Claessens */ #include "config.h" #include "tpaw-string-parser.h" #include #include "tpaw-utils.h" #define SCHEMES "([a-zA-Z\\+]+)" #define INVALID_CHARS "\\s\"<>" #define INVALID_CHARS_EXT INVALID_CHARS "\\[\\](){},;:" #define INVALID_CHARS_FULL INVALID_CHARS_EXT "?'" #define BODY "([^"INVALID_CHARS_FULL"])([^"INVALID_CHARS_EXT"]*)" #define BODY_END "([^"INVALID_CHARS"]*)[^"INVALID_CHARS_FULL".]" #define URI_REGEX "("SCHEMES"://"BODY_END")" \ "|((www|ftp)\\."BODY_END")" \ "|((mailto:)?"BODY"@"BODY"\\."BODY_END")" static GRegex * uri_regex_dup_singleton (void) { static GRegex *uri_regex = NULL; /* We intentionally leak the regex so it's not recomputed */ if (!uri_regex) { GError *error = NULL; uri_regex = g_regex_new (URI_REGEX, 0, 0, &error); if (uri_regex == NULL) { g_warning ("Failed to create reg exp: %s", error->message); g_error_free (error); return NULL; } } return g_regex_ref (uri_regex); } void tpaw_string_parser_substr (const gchar *text, gssize len, TpawStringParser *parsers, gpointer user_data) { if (parsers != NULL && parsers[0].match_func != NULL) { parsers[0].match_func (text, len, parsers[0].replace_func, parsers + 1, user_data); } } void tpaw_string_match_link (const gchar *text, gssize len, TpawStringReplace replace_func, TpawStringParser *sub_parsers, gpointer user_data) { GRegex *uri_regex; GMatchInfo *match_info; gboolean match; gint last = 0; uri_regex = uri_regex_dup_singleton (); if (uri_regex == NULL) { tpaw_string_parser_substr (text, len, sub_parsers, user_data); return; } match = g_regex_match_full (uri_regex, text, len, 0, 0, &match_info, NULL); if (match) { gint s = 0, e = 0; do { g_match_info_fetch_pos (match_info, 0, &s, &e); if (s > last) { /* Append the text between last link (or the * start of the message) and this link */ tpaw_string_parser_substr (text + last, s - last, sub_parsers, user_data); } replace_func (text + s, e - s, NULL, user_data); last = e; } while (g_match_info_next (match_info, NULL)); } tpaw_string_parser_substr (text + last, len - last, sub_parsers, user_data); g_match_info_free (match_info); g_regex_unref (uri_regex); } void tpaw_string_match_all (const gchar *text, gssize len, TpawStringReplace replace_func, TpawStringParser *sub_parsers, gpointer user_data) { replace_func (text, len, NULL, user_data); } void tpaw_string_replace_link (const gchar *text, gssize len, gpointer match_data, gpointer user_data) { GString *string = user_data; gchar *real_url; gchar *title; gchar *markup; real_url = tpaw_make_absolute_url_len (text, len); /* Need to copy manually, because g_markup_printf_escaped does not work * with string precision pitfalls. */ title = g_strndup (text, len); /* Append the link inside tag */ markup = g_markup_printf_escaped ("%s", real_url, title); g_string_append (string, markup); g_free (real_url); g_free (title); g_free (markup); } void tpaw_string_replace_escaped (const gchar *text, gssize len, gpointer match_data, gpointer user_data) { GString *string = user_data; gchar *escaped; guint i; gsize escaped_len, old_len; escaped = g_markup_escape_text (text, len); escaped_len = strlen (escaped); /* Allocate more space to string (we really need a g_string_extend...) */ old_len = string->len; g_string_set_size (string, old_len + escaped_len); g_string_truncate (string, old_len); /* Remove '\r' */ for (i = 0; i < escaped_len; i++) { if (escaped[i] != '\r') g_string_append_c (string, escaped[i]); } g_free (escaped); } gchar * tpaw_add_link_markup (const gchar *text) { TpawStringParser parsers[] = { {tpaw_string_match_link, tpaw_string_replace_link}, {tpaw_string_match_all, tpaw_string_replace_escaped}, {NULL, NULL}}; GString *string; g_return_val_if_fail (text != NULL, NULL); string = g_string_sized_new (strlen (text)); tpaw_string_parser_substr (text, -1, parsers, string); return g_string_free (string, FALSE); }