Blob Blame History Raw
/*
    Copyright (C) 2015  ABRT team
    Copyright (C) 2015  RedHat Inc

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/

#include "internal_libreport.h"

#include <regex.h>

int uri_userinfo_remove(const char *uri, char **result, char **scheme, char **hostname, char **username, char **password, char **location)
{
    /* https://www.ietf.org/rfc/rfc3986.txt
     * Appendix B.  Parsing a URI Reference with a Regular Expression
     *
     * scheme    = $2
     * authority = $4
     * location  = $5 <- introduced by jfilak
     * path      = $6
     * query     = $8
     * fragment  = $10
     *                         12            3  4          56       7   8        9 10 */
    const char *rfc3986_rx = "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)$";
    regex_t re;
    int r = regcomp(&re, rfc3986_rx, REG_EXTENDED);
    assert(r == 0 || !"BUG: invalid regular expression");

    regmatch_t matchptr[10];
    r = regexec(&re, uri, ARRAY_SIZE(matchptr), matchptr, 0);
    if (r != 0)
    {
        log_debug("URI does not match RFC3986 regular expression.");
        return -EINVAL;
    }

    char *ptr = xzalloc((strlen(uri) + 1) * sizeof(char));
    *result = ptr;
    if (scheme != NULL)
        *scheme = NULL;
    if (hostname != NULL)
        *hostname = NULL;
    if (username != NULL)
        *username = NULL;
    if (password != NULL)
        *password = NULL;
    if (location != NULL)
        *location= NULL;

    /* https://www.ietf.org/rfc/rfc3986.txt
     * 5.3.  Component Recomposition
     *
      result = ""

      if defined(scheme) then
         append scheme to result;
         append ":" to result;
      endif;

      if defined(authority) then
         append "//" to result;
         append authority to result;
      endif;

      append path to result;

      if defined(query) then
         append "?" to result;
         append query to result;
      endif;

      if defined(fragment) then
         append "#" to result;
         append fragment to result;
      endif;

      return result;
    */

#define APPEND_MATCH(i, output) \
    if (matchptr[(i)].rm_so != -1) \
    { \
        size_t len = 0; \
        len = matchptr[(i)].rm_eo - matchptr[(i)].rm_so; \
        if (output) *output = xstrndup(uri + matchptr[(i)].rm_so, len); \
        strncpy(ptr, uri + matchptr[(i)].rm_so, len); \
        ptr += len; \
    }

    /* Append "scheme:" if defined */
    APPEND_MATCH(1, scheme);

    /* If authority is defined, append "//" */
    regmatch_t *match_authority = matchptr + 3;
    if (match_authority->rm_so != -1)
    {
        strcat(ptr, "//");
        ptr += 2;
    }

    ++match_authority;
    /* If authority has address part, remove userinfo and add the address */
    if (match_authority->rm_so != -1)
    {
        size_t len = match_authority->rm_eo - match_authority->rm_so;
        const char *authority = uri + match_authority->rm_so;

        /* Find the last '@'. Just for the case some used @ in username or
         * password */
        size_t at = len;
        while (at != 0)
        {
            if (authority[--at] != '@')
                continue;

            /* Find the first ':' before @. There should not be more ':' but this
             * is the most secure way -> avoid leaking an excerpt of a password
             * containing ':'.*/
            size_t colon = 0;
            while (colon < at)
            {
                if (authority[colon] != ':')
                {
                    ++colon;
                    continue;
                }

                if (password != NULL)
                    *password = xstrndup(authority + colon + 1, at - colon - 1);

                break;
            }

            if (username != NULL)
                *username = xstrndup(authority, colon);

            ++at;
            break;
        }

        len -= at;

        if (hostname != NULL)
            *hostname = xstrndup(authority + at, len);

        strncpy(ptr, authority + at, len);
        ptr += len;
    }

    /* Append path, query and fragment or "" */
    APPEND_MATCH(5, location);

    return 0;
}