/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8; coding: utf-8 -*- *
* gtksourcecompletionwordsutils.c
* This file is part of GtkSourceView
*
* Copyright (C) 2009 - Jesse van den Kieboom
* Copyright (C) 2013 - Sébastien Wilmet
*
* gtksourceview is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* gtksourceview is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "gtksourcecompletionwordsutils.h"
#include <string.h>
/* Here, we work on strings. It is more efficient than working with
* GtkTextIters to traverse the text (~3x faster). Both techniques are equally
* difficult to implement.
*/
static gboolean
valid_word_char (gunichar ch)
{
return g_unichar_isprint (ch) && (ch == '_' || g_unichar_isalnum (ch));
}
static gboolean
valid_start_char (gunichar ch)
{
return !g_unichar_isdigit (ch);
}
/* Find the next word in @text, beginning at the index @start_idx.
* Use only valid_word_char() to find the word boundaries.
* Store in @start_idx and @end_idx the word boundaries. The character at
* @start_idx is included in the word, but the character at @end_idx is not
* included in the word (it is the next char, or '\0').
*
* Returns %TRUE if a word has been found.
*/
static gboolean
find_next_word (gchar *text,
guint *start_idx,
guint *end_idx)
{
gchar *cur_char;
/* Find the start of the next word */
cur_char = text + *start_idx;
while (TRUE)
{
gunichar ch = g_utf8_get_char (cur_char);
if (ch == '\0')
{
return FALSE;
}
if (valid_word_char (ch))
{
*start_idx = cur_char - text;
break;
}
cur_char = g_utf8_next_char (cur_char);
}
/* Find the end of the word */
while (TRUE)
{
gunichar ch;
cur_char = g_utf8_next_char (cur_char);
ch = g_utf8_get_char (cur_char);
if (ch == '\0' ||
!valid_word_char (ch))
{
*end_idx = cur_char - text;
return TRUE;
}
}
}
/* Get the list of words in @text.
* You must free the data with g_free(), and free the list with
* g_slist_free().
*/
GSList *
_gtk_source_completion_words_utils_scan_words (gchar *text,
guint minimum_word_size)
{
GSList *words = NULL;
guint start_idx = 0;
guint end_idx = 0;
while (find_next_word (text, &start_idx, &end_idx))
{
guint word_size;
gunichar ch;
g_assert (end_idx >= start_idx);
word_size = end_idx - start_idx;
ch = g_utf8_get_char (text + start_idx);
if (word_size >= minimum_word_size &&
valid_start_char (ch))
{
gchar *new_word = g_strndup (text + start_idx, word_size);
words = g_slist_prepend (words, new_word);
}
start_idx = end_idx;
}
return words;
}
/* Get the word at the end of @text.
* Returns %NULL if not found.
* Free the return value with g_free().
*/
gchar *
_gtk_source_completion_words_utils_get_end_word (gchar *text)
{
gchar *cur_char = text + strlen (text);
gboolean word_found = FALSE;
gunichar ch;
while (TRUE)
{
gchar *prev_char = g_utf8_find_prev_char (text, cur_char);
if (prev_char == NULL)
{
break;
}
ch = g_utf8_get_char (prev_char);
if (!valid_word_char (ch))
{
break;
}
word_found = TRUE;
cur_char = prev_char;
}
if (!word_found)
{
return NULL;
}
ch = g_utf8_get_char (cur_char);
if (!valid_start_char (ch))
{
return NULL;
}
return g_strdup (cur_char);
}
/* Adjust @start and @end to word boundaries, if they touch or are inside a
* word. Uses only valid_word_char().
*/
void
_gtk_source_completion_words_utils_adjust_region (GtkTextIter *start,
GtkTextIter *end)
{
g_return_if_fail (gtk_text_iter_compare (start, end) <= 0);
while (TRUE)
{
GtkTextIter iter = *start;
if (!gtk_text_iter_backward_char (&iter))
{
break;
}
if (!valid_word_char (gtk_text_iter_get_char (&iter)))
{
break;
}
*start = iter;
}
while (valid_word_char (gtk_text_iter_get_char (end)))
{
gtk_text_iter_forward_char (end);
}
}
/* @iter here is a vertical bar between two characters, not the character
* pointed by @iter. So "inside word" means really "inside word", not the
* definition used by gtk_text_iter_inside_word().
*/
static gboolean
iter_inside_word (const GtkTextIter *iter)
{
GtkTextIter prev;
if (gtk_text_iter_is_start (iter) || gtk_text_iter_is_end (iter))
{
return FALSE;
}
prev = *iter;
gtk_text_iter_backward_char (&prev);
return (valid_word_char (gtk_text_iter_get_char (&prev)) &&
valid_word_char (gtk_text_iter_get_char (iter)));
}
/* Checks if @start and @end are well placed for scanning the region between the
* two iters.
* If an iter isn't well placed, then the library of words will maybe be
* inconsistent with the words present in the text buffer.
*/
void
_gtk_source_completion_words_utils_check_scan_region (const GtkTextIter *start,
const GtkTextIter *end)
{
g_return_if_fail (gtk_text_iter_compare (start, end) <= 0);
if (iter_inside_word (start))
{
g_warning ("Words completion: 'start' iter not well placed.");
}
if (iter_inside_word (end))
{
g_warning ("Words completion: 'end' iter not well placed.");
}
}