Blame glib/glib/gurifuncs.c

Packit db3073
/* GIO - GLib Input, Output and Streaming Library
Packit db3073
 * 
Packit db3073
 * Copyright (C) 2006-2007 Red Hat, Inc.
Packit db3073
 *
Packit db3073
 * This library is free software; you can redistribute it and/or
Packit db3073
 * modify it under the terms of the GNU Lesser General Public
Packit db3073
 * License as published by the Free Software Foundation; either
Packit db3073
 * version 2 of the License, or (at your option) any later version.
Packit db3073
 *
Packit db3073
 * This library is distributed in the hope that it will be useful,
Packit db3073
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit db3073
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit db3073
 * Lesser General Public License for more details.
Packit db3073
 *
Packit db3073
 * You should have received a copy of the GNU Lesser General
Packit db3073
 * Public License along with this library; if not, write to the
Packit db3073
 * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
Packit db3073
 * Boston, MA 02111-1307, USA.
Packit db3073
 *
Packit db3073
 * Author: Alexander Larsson <alexl@redhat.com>
Packit db3073
 */
Packit db3073
Packit db3073
#include "config.h"
Packit db3073
Packit db3073
#include "gurifuncs.h"
Packit db3073
Packit db3073
#include <glib/gstrfuncs.h>
Packit db3073
#include <glib/gmessages.h>
Packit db3073
#include <glib/gstring.h>
Packit db3073
#include <glib/gmem.h>
Packit db3073
Packit db3073
#include <string.h>
Packit db3073
Packit db3073
#include "config.h"
Packit db3073
Packit db3073
/**
Packit db3073
 * SECTION:gurifuncs
Packit db3073
 * @title: URI Functions
Packit db3073
 * @short_description: manipulating URIs
Packit db3073
 *
Packit db3073
 * Functions for manipulating Universal Resource Identifiers (URIs) as
Packit db3073
 * defined by <ulink url="http://www.ietf.org/rfc/rfc3986.txt">
Packit db3073
 * RFC 3986</ulink>. It is highly recommended that you have read and
Packit db3073
 * understand RFC 3986 for understanding this API.
Packit db3073
 */
Packit db3073
Packit db3073
static int
Packit db3073
unescape_character (const char *scanner)
Packit db3073
{
Packit db3073
  int first_digit;
Packit db3073
  int second_digit;
Packit db3073
  
Packit db3073
  first_digit = g_ascii_xdigit_value (*scanner++);
Packit db3073
  if (first_digit < 0)
Packit db3073
    return -1;
Packit db3073
Packit db3073
  second_digit = g_ascii_xdigit_value (*scanner++);
Packit db3073
  if (second_digit < 0)
Packit db3073
    return -1;
Packit db3073
Packit db3073
  return (first_digit << 4) | second_digit;
Packit db3073
}
Packit db3073
Packit db3073
/**
Packit db3073
 * g_uri_unescape_segment:
Packit db3073
 * @escaped_string: (allow-none): A string, may be %NULL
Packit db3073
 * @escaped_string_end: (allow-none): Pointer to end of @escaped_string, may be %NULL
Packit db3073
 * @illegal_characters: (allow-none): An optional string of illegal characters not to be allowed, may be %NULL
Packit db3073
 * 
Packit db3073
 * Unescapes a segment of an escaped string.
Packit db3073
 *
Packit db3073
 * If any of the characters in @illegal_characters or the character zero appears
Packit db3073
 * as an escaped character in @escaped_string then that is an error and %NULL
Packit db3073
 * will be returned. This is useful it you want to avoid for instance having a
Packit db3073
 * slash being expanded in an escaped path element, which might confuse pathname
Packit db3073
 * handling.
Packit db3073
 *
Packit db3073
 * Returns: an unescaped version of @escaped_string or %NULL on error.
Packit db3073
 * The returned string should be freed when no longer needed.  As a
Packit db3073
 * special case if %NULL is given for @escaped_string, this function
Packit db3073
 * will return %NULL.
Packit db3073
 *
Packit db3073
 * Since: 2.16
Packit db3073
 **/
Packit db3073
char *
Packit db3073
g_uri_unescape_segment (const char *escaped_string,
Packit db3073
			const char *escaped_string_end,
Packit db3073
			const char *illegal_characters)
Packit db3073
{
Packit db3073
  const char *in;
Packit db3073
  char *out, *result;
Packit db3073
  gint character;
Packit db3073
  
Packit db3073
  if (escaped_string == NULL)
Packit db3073
    return NULL;
Packit db3073
  
Packit db3073
  if (escaped_string_end == NULL)
Packit db3073
    escaped_string_end = escaped_string + strlen (escaped_string);
Packit db3073
  
Packit db3073
  result = g_malloc (escaped_string_end - escaped_string + 1);
Packit db3073
  
Packit db3073
  out = result;
Packit db3073
  for (in = escaped_string; in < escaped_string_end; in++)
Packit db3073
    {
Packit db3073
      character = *in;
Packit db3073
      
Packit db3073
      if (*in == '%')
Packit db3073
	{
Packit db3073
	  in++;
Packit db3073
	  
Packit db3073
	  if (escaped_string_end - in < 2)
Packit db3073
	    {
Packit db3073
	      /* Invalid escaped char (to short) */
Packit db3073
	      g_free (result);
Packit db3073
	      return NULL;
Packit db3073
	    }
Packit db3073
	  
Packit db3073
	  character = unescape_character (in);
Packit db3073
	  
Packit db3073
	  /* Check for an illegal character. We consider '\0' illegal here. */
Packit db3073
	  if (character <= 0 ||
Packit db3073
	      (illegal_characters != NULL &&
Packit db3073
	       strchr (illegal_characters, (char)character) != NULL))
Packit db3073
	    {
Packit db3073
	      g_free (result);
Packit db3073
	      return NULL;
Packit db3073
	    }
Packit db3073
	  
Packit db3073
	  in++; /* The other char will be eaten in the loop header */
Packit db3073
	}
Packit db3073
      *out++ = (char)character;
Packit db3073
    }
Packit db3073
  
Packit db3073
  *out = '\0';
Packit db3073
  
Packit db3073
  return result;
Packit db3073
}
Packit db3073
Packit db3073
/**
Packit db3073
 * g_uri_unescape_string:
Packit db3073
 * @escaped_string: an escaped string to be unescaped.
Packit db3073
 * @illegal_characters: an optional string of illegal characters not to be allowed.
Packit db3073
 * 
Packit db3073
 * Unescapes a whole escaped string.
Packit db3073
 * 
Packit db3073
 * If any of the characters in @illegal_characters or the character zero appears
Packit db3073
 * as an escaped character in @escaped_string then that is an error and %NULL
Packit db3073
 * will be returned. This is useful it you want to avoid for instance having a
Packit db3073
 * slash being expanded in an escaped path element, which might confuse pathname
Packit db3073
 * handling.
Packit db3073
 *
Packit db3073
 * Returns: an unescaped version of @escaped_string. The returned string 
Packit db3073
 * should be freed when no longer needed.
Packit db3073
 *
Packit db3073
 * Since: 2.16
Packit db3073
 **/
Packit db3073
char *
Packit db3073
g_uri_unescape_string (const char *escaped_string,
Packit db3073
		       const char *illegal_characters)
Packit db3073
{
Packit db3073
  return g_uri_unescape_segment (escaped_string, NULL, illegal_characters);
Packit db3073
}
Packit db3073
Packit db3073
/**
Packit db3073
 * g_uri_parse_scheme:
Packit db3073
 * @uri: a valid URI.
Packit db3073
 * 
Packit db3073
 * Gets the scheme portion of a URI string. RFC 3986 decodes the scheme as:
Packit db3073
 * <programlisting>
Packit db3073
 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] 
Packit db3073
 * </programlisting>
Packit db3073
 * Common schemes include "file", "http", "svn+ssh", etc.
Packit db3073
 * 
Packit db3073
 * Returns: The "Scheme" component of the URI, or %NULL on error. 
Packit db3073
 * The returned string should be freed when no longer needed.
Packit db3073
 *
Packit db3073
 * Since: 2.16
Packit db3073
 **/
Packit db3073
char *
Packit db3073
g_uri_parse_scheme (const char  *uri)
Packit db3073
{
Packit db3073
  const char *p;
Packit db3073
  char c;
Packit db3073
Packit db3073
  g_return_val_if_fail (uri != NULL, NULL);
Packit db3073
Packit db3073
  /* From RFC 3986 Decodes:
Packit db3073
   * URI         = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
Packit db3073
   */ 
Packit db3073
Packit db3073
  p = uri;
Packit db3073
  
Packit db3073
  /* Decode scheme:
Packit db3073
     scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
Packit db3073
  */
Packit db3073
Packit db3073
  if (!g_ascii_isalpha (*p))
Packit db3073
    return NULL;
Packit db3073
  
Packit db3073
  while (1)
Packit db3073
    {
Packit db3073
      c = *p++;
Packit db3073
      
Packit db3073
      if (c == ':')
Packit db3073
	break;
Packit db3073
      
Packit db3073
      if (!(g_ascii_isalnum(c) ||
Packit db3073
	    c == '+' ||
Packit db3073
	    c == '-' ||
Packit db3073
	    c == '.'))
Packit db3073
	return NULL;
Packit db3073
    }
Packit db3073
  
Packit db3073
  return g_strndup (uri, p - uri - 1);
Packit db3073
}
Packit db3073
Packit db3073
/**
Packit db3073
 * g_uri_escape_string:
Packit db3073
 * @unescaped: the unescaped input string.
Packit db3073
 * @reserved_chars_allowed: a string of reserved characters that are
Packit db3073
 *      allowed to be used, or %NULL.
Packit db3073
 * @allow_utf8: %TRUE if the result can include UTF-8 characters.
Packit db3073
 * 
Packit db3073
 * Escapes a string for use in a URI.
Packit db3073
 *
Packit db3073
 * Normally all characters that are not "unreserved" (i.e. ASCII alphanumerical
Packit db3073
 * characters plus dash, dot, underscore and tilde) are escaped.
Packit db3073
 * But if you specify characters in @reserved_chars_allowed they are not
Packit db3073
 * escaped. This is useful for the "reserved" characters in the URI
Packit db3073
 * specification, since those are allowed unescaped in some portions of
Packit db3073
 * a URI. 
Packit db3073
 * 
Packit db3073
 * Returns: an escaped version of @unescaped. The returned string should be 
Packit db3073
 * freed when no longer needed.
Packit db3073
 *
Packit db3073
 * Since: 2.16
Packit db3073
 **/
Packit db3073
char *
Packit db3073
g_uri_escape_string (const char *unescaped,
Packit db3073
		     const char  *reserved_chars_allowed,
Packit db3073
		     gboolean     allow_utf8)
Packit db3073
{
Packit db3073
  GString *s;
Packit db3073
Packit db3073
  g_return_val_if_fail (unescaped != NULL, NULL);
Packit db3073
Packit db3073
  s = g_string_sized_new (strlen (unescaped) + 10);
Packit db3073
  
Packit db3073
  g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8);
Packit db3073
  
Packit db3073
  return g_string_free (s, FALSE);
Packit db3073
}