/* Copyright (C) 2015 ABRT team Copyright (C) 2015 RedHat Inc This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include "internal_libreport.h" #include int uri_userinfo_remove(const char *uri, char **result, char **scheme, char **hostname, char **username, char **password, char **location) { /* https://www.ietf.org/rfc/rfc3986.txt * Appendix B. Parsing a URI Reference with a Regular Expression * * scheme = $2 * authority = $4 * location = $5 <- introduced by jfilak * path = $6 * query = $8 * fragment = $10 * 12 3 4 56 7 8 9 10 */ const char *rfc3986_rx = "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)$"; regex_t re; int r = regcomp(&re, rfc3986_rx, REG_EXTENDED); assert(r == 0 || !"BUG: invalid regular expression"); regmatch_t matchptr[10]; r = regexec(&re, uri, ARRAY_SIZE(matchptr), matchptr, 0); if (r != 0) { log_debug("URI does not match RFC3986 regular expression."); return -EINVAL; } char *ptr = xzalloc((strlen(uri) + 1) * sizeof(char)); *result = ptr; if (scheme != NULL) *scheme = NULL; if (hostname != NULL) *hostname = NULL; if (username != NULL) *username = NULL; if (password != NULL) *password = NULL; if (location != NULL) *location= NULL; /* https://www.ietf.org/rfc/rfc3986.txt * 5.3. Component Recomposition * result = "" if defined(scheme) then append scheme to result; append ":" to result; endif; if defined(authority) then append "//" to result; append authority to result; endif; append path to result; if defined(query) then append "?" to result; append query to result; endif; if defined(fragment) then append "#" to result; append fragment to result; endif; return result; */ #define APPEND_MATCH(i, output) \ if (matchptr[(i)].rm_so != -1) \ { \ size_t len = 0; \ len = matchptr[(i)].rm_eo - matchptr[(i)].rm_so; \ if (output) *output = xstrndup(uri + matchptr[(i)].rm_so, len); \ strncpy(ptr, uri + matchptr[(i)].rm_so, len); \ ptr += len; \ } /* Append "scheme:" if defined */ APPEND_MATCH(1, scheme); /* If authority is defined, append "//" */ regmatch_t *match_authority = matchptr + 3; if (match_authority->rm_so != -1) { strcat(ptr, "//"); ptr += 2; } ++match_authority; /* If authority has address part, remove userinfo and add the address */ if (match_authority->rm_so != -1) { size_t len = match_authority->rm_eo - match_authority->rm_so; const char *authority = uri + match_authority->rm_so; /* Find the last '@'. Just for the case some used @ in username or * password */ size_t at = len; while (at != 0) { if (authority[--at] != '@') continue; /* Find the first ':' before @. There should not be more ':' but this * is the most secure way -> avoid leaking an excerpt of a password * containing ':'.*/ size_t colon = 0; while (colon < at) { if (authority[colon] != ':') { ++colon; continue; } if (password != NULL) *password = xstrndup(authority + colon + 1, at - colon - 1); break; } if (username != NULL) *username = xstrndup(authority, colon); ++at; break; } len -= at; if (hostname != NULL) *hostname = xstrndup(authority + at, len); strncpy(ptr, authority + at, len); ptr += len; } /* Append path, query and fragment or "" */ APPEND_MATCH(5, location); return 0; }