From 86a02d690855267f1185e70c7b1cd73ab567f471 Mon Sep 17 00:00:00 2001 From: Packit Date: Oct 13 2020 09:46:55 +0000 Subject: Apply patch elfutils-0.178-debuginfod-timeoutprogress.patch patch_name: elfutils-0.178-debuginfod-timeoutprogress.patch present_in_specfile: true --- diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c index ab7b4e1..e5a2e82 100644 --- a/debuginfod/debuginfod-client.c +++ b/debuginfod/debuginfod-client.c @@ -1,5 +1,5 @@ /* Retrieve ELF / DWARF / source files from the debuginfod. - Copyright (C) 2019 Red Hat, Inc. + Copyright (C) 2019-2020 Red Hat, Inc. This file is part of elfutils. This file is free software; you can redistribute it and/or modify @@ -40,6 +40,7 @@ #include "config.h" #include "debuginfod.h" +#include "system.h" #include #include #include @@ -57,6 +58,7 @@ #include #include #include +#include #include /* If fts.h is included before config.h, its indirect inclusions may not @@ -98,16 +100,15 @@ static const time_t cache_default_max_unused_age_s = 604800; /* 1 week */ static const char *cache_default_name = ".debuginfod_client_cache"; static const char *cache_path_envvar = DEBUGINFOD_CACHE_PATH_ENV_VAR; -/* URLs of debuginfods, separated by url_delim. - This env var must be set for debuginfod-client to run. */ +/* URLs of debuginfods, separated by url_delim. */ static const char *server_urls_envvar = DEBUGINFOD_URLS_ENV_VAR; static const char *url_delim = " "; static const char url_delim_char = ' '; -/* Timeout for debuginfods, in seconds. - This env var must be set for debuginfod-client to run. */ +/* Timeout for debuginfods, in seconds (to get at least 100K). */ static const char *server_timeout_envvar = DEBUGINFOD_TIMEOUT_ENV_VAR; -static int server_timeout = 5; +static const long default_timeout = 90; + /* Data associated with a particular CURL easy handle. Passed to the write callback. */ @@ -278,6 +279,87 @@ debuginfod_clean_cache(debuginfod_client *c, #define MAX_BUILD_ID_BYTES 64 +static void +add_extra_headers(CURL *handle) +{ + /* Compute a User-Agent: string to send. The more accurately this + describes this host, the likelier that the debuginfod servers + might be able to locate debuginfo for us. */ + + char* utspart = NULL; + struct utsname uts; + int rc = 0; + rc = uname (&uts); + if (rc == 0) + rc = asprintf(& utspart, "%s/%s", uts.sysname, uts.machine); + if (rc < 0) + utspart = NULL; + + FILE *f = fopen ("/etc/os-release", "r"); + if (f == NULL) + f = fopen ("/usr/lib/os-release", "r"); + char *id = NULL; + char *version = NULL; + if (f != NULL) + { + while (id == NULL || version == NULL) + { + char buf[128]; + char *s = &buf[0]; + if (fgets (s, sizeof(buf), f) == NULL) + break; + + int len = strlen (s); + if (len < 3) + continue; + if (s[len - 1] == '\n') + { + s[len - 1] = '\0'; + len--; + } + + char *v = strchr (s, '='); + if (v == NULL || strlen (v) < 2) + continue; + + /* Split var and value. */ + *v = '\0'; + v++; + + /* Remove optional quotes around value string. */ + if (*v == '"' || *v == '\'') + { + v++; + s[len - 1] = '\0'; + } + if (strcmp (s, "ID") == 0) + id = strdup (v); + if (strcmp (s, "VERSION_ID") == 0) + version = strdup (v); + } + fclose (f); + } + + char *ua = NULL; + rc = asprintf(& ua, "%s/%s,%s,%s/%s", + PACKAGE_NAME, PACKAGE_VERSION, + utspart ?: "", + id ?: "", + version ?: ""); + if (rc < 0) + ua = NULL; + + if (ua) + curl_easy_setopt(handle, CURLOPT_USERAGENT, (void*) ua); /* implicit strdup */ + + free (ua); + free (id); + free (version); + free (utspart); +} + + + /* Query each of the server URLs found in $DEBUGINFOD_URLS for the file with the specified build-id, type (debuginfo, executable or source) and filename. filename may be NULL. If found, return a file @@ -400,8 +482,10 @@ debuginfod_query_server (debuginfod_client *c, return fd; } - if (getenv(server_timeout_envvar)) - server_timeout = atoi (getenv(server_timeout_envvar)); + long timeout = default_timeout; + const char* timeout_envvar = getenv(server_timeout_envvar); + if (timeout_envvar != NULL) + timeout = atoi (timeout_envvar); /* make a copy of the envvar so it can be safely modified. */ server_urls = strdup(urls_envvar); @@ -493,14 +577,22 @@ debuginfod_query_server (debuginfod_client *c, CURLOPT_WRITEFUNCTION, debuginfod_write_callback); curl_easy_setopt(data[i].handle, CURLOPT_WRITEDATA, (void*)&data[i]); - curl_easy_setopt(data[i].handle, CURLOPT_TIMEOUT, (long) server_timeout); + if (timeout > 0) + { + /* Make sure there is at least some progress, + try to get at least 100K per timeout seconds. */ + curl_easy_setopt (data[i].handle, CURLOPT_LOW_SPEED_TIME, + timeout); + curl_easy_setopt (data[i].handle, CURLOPT_LOW_SPEED_LIMIT, + 100 * 1024L); + } curl_easy_setopt(data[i].handle, CURLOPT_FILETIME, (long) 1); curl_easy_setopt(data[i].handle, CURLOPT_FOLLOWLOCATION, (long) 1); curl_easy_setopt(data[i].handle, CURLOPT_FAILONERROR, (long) 1); curl_easy_setopt(data[i].handle, CURLOPT_NOSIGNAL, (long) 1); curl_easy_setopt(data[i].handle, CURLOPT_AUTOREFERER, (long) 1); curl_easy_setopt(data[i].handle, CURLOPT_ACCEPT_ENCODING, ""); - curl_easy_setopt(data[i].handle, CURLOPT_USERAGENT, (void*) PACKAGE_STRING); + add_extra_headers(data[i].handle); curl_multi_add_handle(curlm, data[i].handle); server_url = strtok_r(NULL, url_delim, &strtok_saveptr); @@ -511,11 +603,32 @@ debuginfod_query_server (debuginfod_client *c, long loops = 0; do { + /* Wait 1 second, the minimum DEBUGINFOD_TIMEOUT. */ + curl_multi_wait(curlm, NULL, 0, 1000, NULL); + + /* If the target file has been found, abort the other queries. */ + if (target_handle != NULL) + for (int i = 0; i < num_urls; i++) + if (data[i].handle != target_handle) + curl_multi_remove_handle(curlm, data[i].handle); + + CURLMcode curlm_res = curl_multi_perform(curlm, &still_running); + if (curlm_res != CURLM_OK) + { + switch (curlm_res) + { + case CURLM_CALL_MULTI_PERFORM: continue; + case CURLM_OUT_OF_MEMORY: rc = -ENOMEM; break; + default: rc = -ENETUNREACH; break; + } + goto out1; + } + if (c->progressfn) /* inform/check progress callback */ { loops ++; long pa = loops; /* default params for progress callback */ - long pb = 0; + long pb = 0; /* transfer_timeout tempting, but loops != elapsed-time */ if (target_handle) /* we've committed to a server; report its download progress */ { CURLcode curl_res; @@ -535,6 +648,8 @@ debuginfod_query_server (debuginfod_client *c, pa = (dl > LONG_MAX ? LONG_MAX : (long)dl); #endif + /* NB: If going through deflate-compressing proxies, this + number is likely to be unavailable, so -1 may show. */ #ifdef CURLINFO_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T curl_off_t cl; curl_res = curl_easy_getinfo(target_handle, @@ -555,27 +670,6 @@ debuginfod_query_server (debuginfod_client *c, if ((*c->progressfn) (c, pa, pb)) break; } - - /* Wait 1 second, the minimum DEBUGINFOD_TIMEOUT. */ - curl_multi_wait(curlm, NULL, 0, 1000, NULL); - - /* If the target file has been found, abort the other queries. */ - if (target_handle != NULL) - for (int i = 0; i < num_urls; i++) - if (data[i].handle != target_handle) - curl_multi_remove_handle(curlm, data[i].handle); - - CURLMcode curlm_res = curl_multi_perform(curlm, &still_running); - if (curlm_res != CURLM_OK) - { - switch (curlm_res) - { - case CURLM_CALL_MULTI_PERFORM: continue; - case CURLM_OUT_OF_MEMORY: rc = -ENOMEM; break; - default: rc = -ENETUNREACH; break; - } - goto out1; - } } while (still_running); /* Check whether a query was successful. If so, assign its handle @@ -674,9 +768,9 @@ debuginfod_query_server (debuginfod_client *c, curl_multi_cleanup(curlm); unlink (target_cache_tmppath); + close (fd); /* before the rmdir, otherwise it'll fail */ (void) rmdir (target_cache_dir); /* nop if not empty */ free(data); - close (fd); out0: free (server_urls); @@ -685,6 +779,22 @@ debuginfod_query_server (debuginfod_client *c, return rc; } + +/* Activate a basic form of progress tracing */ +static int +default_progressfn (debuginfod_client *c, long a, long b) +{ + (void) c; + + dprintf(STDERR_FILENO, + "Downloading from debuginfod %ld/%ld%s", a, b, + ((a == b) ? "\n" : "\r")); + /* XXX: include URL - stateful */ + + return 0; +} + + /* See debuginfod.h */ debuginfod_client * debuginfod_begin (void) @@ -693,7 +803,12 @@ debuginfod_begin (void) size_t size = sizeof (struct debuginfod_client); client = (debuginfod_client *) malloc (size); if (client != NULL) - client->progressfn = NULL; + { + if (getenv(DEBUGINFOD_PROGRESS_ENV_VAR)) + client->progressfn = default_progressfn; + else + client->progressfn = NULL; + } return client; } diff --git a/debuginfod/debuginfod-client.c.debuginfod-timeout-progress b/debuginfod/debuginfod-client.c.debuginfod-timeout-progress new file mode 100644 index 0000000..ab7b4e1 --- /dev/null +++ b/debuginfod/debuginfod-client.c.debuginfod-timeout-progress @@ -0,0 +1,755 @@ +/* Retrieve ELF / DWARF / source files from the debuginfod. + Copyright (C) 2019 Red Hat, Inc. + This file is part of elfutils. + + This file is free software; you can redistribute it and/or modify + it under the terms of either + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at + your option) any later version + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at + your option) any later version + + or both in parallel, as here. + + elfutils is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see . */ + + +/* cargo-cult from libdwfl linux-kernel-modules.c */ +/* In case we have a bad fts we include this before config.h because it + can't handle _FILE_OFFSET_BITS. + Everything we need here is fine if its declarations just come first. + Also, include sys/types.h before fts. On some systems fts.h is not self + contained. */ +#ifdef BAD_FTS + #include + #include +#endif + +#include "config.h" +#include "debuginfod.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* If fts.h is included before config.h, its indirect inclusions may not + give us the right LFS aliases of these functions, so map them manually. */ +#ifdef BAD_FTS + #ifdef _FILE_OFFSET_BITS + #define open open64 + #define fopen fopen64 + #endif +#else + #include + #include +#endif + +struct debuginfod_client +{ + /* Progress/interrupt callback function. */ + debuginfod_progressfn_t progressfn; + + /* Can contain all other context, like cache_path, server_urls, + timeout or other info gotten from environment variables, the + handle data, etc. So those don't have to be reparsed and + recreated on each request. */ +}; + +/* The cache_clean_interval_s file within the debuginfod cache specifies + how frequently the cache should be cleaned. The file's st_mtime represents + the time of last cleaning. */ +static const char *cache_clean_interval_filename = "cache_clean_interval_s"; +static const time_t cache_clean_default_interval_s = 86400; /* 1 day */ + +/* The cache_max_unused_age_s file within the debuginfod cache specifies the + the maximum time since last access that a file will remain in the cache. */ +static const char *cache_max_unused_age_filename = "max_unused_age_s"; +static const time_t cache_default_max_unused_age_s = 604800; /* 1 week */ + +/* Location of the cache of files downloaded from debuginfods. + The default parent directory is $HOME, or '/' if $HOME doesn't exist. */ +static const char *cache_default_name = ".debuginfod_client_cache"; +static const char *cache_path_envvar = DEBUGINFOD_CACHE_PATH_ENV_VAR; + +/* URLs of debuginfods, separated by url_delim. + This env var must be set for debuginfod-client to run. */ +static const char *server_urls_envvar = DEBUGINFOD_URLS_ENV_VAR; +static const char *url_delim = " "; +static const char url_delim_char = ' '; + +/* Timeout for debuginfods, in seconds. + This env var must be set for debuginfod-client to run. */ +static const char *server_timeout_envvar = DEBUGINFOD_TIMEOUT_ENV_VAR; +static int server_timeout = 5; + +/* Data associated with a particular CURL easy handle. Passed to + the write callback. */ +struct handle_data +{ + /* Cache file to be written to in case query is successful. */ + int fd; + + /* URL queried by this handle. */ + char url[PATH_MAX]; + + /* This handle. */ + CURL *handle; + + /* Pointer to handle that should write to fd. Initially points to NULL, + then points to the first handle that begins writing the target file + to the cache. Used to ensure that a file is not downloaded from + multiple servers unnecessarily. */ + CURL **target_handle; +}; + +static size_t +debuginfod_write_callback (char *ptr, size_t size, size_t nmemb, void *data) +{ + ssize_t count = size * nmemb; + + struct handle_data *d = (struct handle_data*)data; + + /* Indicate to other handles that they can abort their transfer. */ + if (*d->target_handle == NULL) + *d->target_handle = d->handle; + + /* If this handle isn't the target handle, abort transfer. */ + if (*d->target_handle != d->handle) + return -1; + + return (size_t) write(d->fd, (void*)ptr, count); +} + +/* Create the cache and interval file if they do not already exist. + Return 0 if cache and config file are initialized, otherwise return + the appropriate error code. */ +static int +debuginfod_init_cache (char *cache_path, char *interval_path, char *maxage_path) +{ + struct stat st; + + /* If the cache and config file already exist then we are done. */ + if (stat(cache_path, &st) == 0 && stat(interval_path, &st) == 0) + return 0; + + /* Create the cache and config files as necessary. */ + if (stat(cache_path, &st) != 0 && mkdir(cache_path, 0777) < 0) + return -errno; + + int fd = -1; + + /* init cleaning interval config file. */ + fd = open(interval_path, O_CREAT | O_RDWR, 0666); + if (fd < 0) + return -errno; + + if (dprintf(fd, "%ld", cache_clean_default_interval_s) < 0) + return -errno; + + /* init max age config file. */ + if (stat(maxage_path, &st) != 0 + && (fd = open(maxage_path, O_CREAT | O_RDWR, 0666)) < 0) + return -errno; + + if (dprintf(fd, "%ld", cache_default_max_unused_age_s) < 0) + return -errno; + + return 0; +} + + +/* Delete any files that have been unmodied for a period + longer than $DEBUGINFOD_CACHE_CLEAN_INTERVAL_S. */ +static int +debuginfod_clean_cache(debuginfod_client *c, + char *cache_path, char *interval_path, + char *max_unused_path) +{ + struct stat st; + FILE *interval_file; + FILE *max_unused_file; + + if (stat(interval_path, &st) == -1) + { + /* Create new interval file. */ + interval_file = fopen(interval_path, "w"); + + if (interval_file == NULL) + return -errno; + + int rc = fprintf(interval_file, "%ld", cache_clean_default_interval_s); + fclose(interval_file); + + if (rc < 0) + return -errno; + } + + /* Check timestamp of interval file to see whether cleaning is necessary. */ + time_t clean_interval; + interval_file = fopen(interval_path, "r"); + if (fscanf(interval_file, "%ld", &clean_interval) != 1) + clean_interval = cache_clean_default_interval_s; + fclose(interval_file); + + if (time(NULL) - st.st_mtime < clean_interval) + /* Interval has not passed, skip cleaning. */ + return 0; + + /* Read max unused age value from config file. */ + time_t max_unused_age; + max_unused_file = fopen(max_unused_path, "r"); + if (max_unused_file) + { + if (fscanf(max_unused_file, "%ld", &max_unused_age) != 1) + max_unused_age = cache_default_max_unused_age_s; + fclose(max_unused_file); + } + else + max_unused_age = cache_default_max_unused_age_s; + + char * const dirs[] = { cache_path, NULL, }; + + FTS *fts = fts_open(dirs, 0, NULL); + if (fts == NULL) + return -errno; + + FTSENT *f; + long files = 0; + while ((f = fts_read(fts)) != NULL) + { + files++; + if (c->progressfn) /* inform/check progress callback */ + if ((c->progressfn) (c, files, 0)) + break; + + switch (f->fts_info) + { + case FTS_F: + /* delete file if max_unused_age has been met or exceeded. */ + /* XXX consider extra effort to clean up old tmp files */ + if (time(NULL) - f->fts_statp->st_atime >= max_unused_age) + unlink (f->fts_path); + break; + + case FTS_DP: + /* Remove if empty. */ + (void) rmdir (f->fts_path); + break; + + default: + ; + } + } + fts_close(fts); + + /* Update timestamp representing when the cache was last cleaned. */ + utime (interval_path, NULL); + return 0; +} + + +#define MAX_BUILD_ID_BYTES 64 + + +/* Query each of the server URLs found in $DEBUGINFOD_URLS for the file + with the specified build-id, type (debuginfo, executable or source) + and filename. filename may be NULL. If found, return a file + descriptor for the target, otherwise return an error code. +*/ +static int +debuginfod_query_server (debuginfod_client *c, + const unsigned char *build_id, + int build_id_len, + const char *type, + const char *filename, + char **path) +{ + char *urls_envvar; + char *server_urls; + char cache_path[PATH_MAX]; + char maxage_path[PATH_MAX*3]; /* These *3 multipliers are to shut up gcc -Wformat-truncation */ + char interval_path[PATH_MAX*4]; + char target_cache_dir[PATH_MAX*2]; + char target_cache_path[PATH_MAX*4]; + char target_cache_tmppath[PATH_MAX*5]; + char suffix[PATH_MAX*2]; + char build_id_bytes[MAX_BUILD_ID_BYTES * 2 + 1]; + int rc; + + /* Is there any server we can query? If not, don't do any work, + just return with ENOSYS. Don't even access the cache. */ + urls_envvar = getenv(server_urls_envvar); + if (urls_envvar == NULL || urls_envvar[0] == '\0') + { + rc = -ENOSYS; + goto out; + } + + /* Copy lowercase hex representation of build_id into buf. */ + if ((build_id_len >= MAX_BUILD_ID_BYTES) || + (build_id_len == 0 && + sizeof(build_id_bytes) > MAX_BUILD_ID_BYTES*2 + 1)) + return -EINVAL; + if (build_id_len == 0) /* expect clean hexadecimal */ + strcpy (build_id_bytes, (const char *) build_id); + else + for (int i = 0; i < build_id_len; i++) + sprintf(build_id_bytes + (i * 2), "%02x", build_id[i]); + + if (filename != NULL) + { + if (filename[0] != '/') // must start with / + return -EINVAL; + + /* copy the filename to suffix, s,/,#,g */ + unsigned q = 0; + for (unsigned fi=0; q < PATH_MAX-1; fi++) + switch (filename[fi]) + { + case '\0': + suffix[q] = '\0'; + q = PATH_MAX-1; /* escape for loop too */ + break; + case '/': /* escape / to prevent dir escape */ + suffix[q++]='#'; + suffix[q++]='#'; + break; + case '#': /* escape # to prevent /# vs #/ collisions */ + suffix[q++]='#'; + suffix[q++]='_'; + break; + default: + suffix[q++]=filename[fi]; + } + suffix[q] = '\0'; + /* If the DWARF filenames are super long, this could exceed + PATH_MAX and truncate/collide. Oh well, that'll teach + them! */ + } + else + suffix[0] = '\0'; + + /* set paths needed to perform the query + + example format + cache_path: $HOME/.debuginfod_cache + target_cache_dir: $HOME/.debuginfod_cache/0123abcd + target_cache_path: $HOME/.debuginfod_cache/0123abcd/debuginfo + target_cache_path: $HOME/.debuginfod_cache/0123abcd/source#PATH#TO#SOURCE ? + */ + + if (getenv(cache_path_envvar)) + strcpy(cache_path, getenv(cache_path_envvar)); + else + { + if (getenv("HOME")) + sprintf(cache_path, "%s/%s", getenv("HOME"), cache_default_name); + else + sprintf(cache_path, "/%s", cache_default_name); + } + + /* avoid using snprintf here due to compiler warning. */ + snprintf(target_cache_dir, sizeof(target_cache_dir), "%s/%s", cache_path, build_id_bytes); + snprintf(target_cache_path, sizeof(target_cache_path), "%s/%s%s", target_cache_dir, type, suffix); + snprintf(target_cache_tmppath, sizeof(target_cache_tmppath), "%s.XXXXXX", target_cache_path); + + /* XXX combine these */ + snprintf(interval_path, sizeof(interval_path), "%s/%s", cache_path, cache_clean_interval_filename); + snprintf(maxage_path, sizeof(maxage_path), "%s/%s", cache_path, cache_max_unused_age_filename); + rc = debuginfod_init_cache(cache_path, interval_path, maxage_path); + if (rc != 0) + goto out; + rc = debuginfod_clean_cache(c, cache_path, interval_path, maxage_path); + if (rc != 0) + goto out; + + /* If the target is already in the cache then we are done. */ + int fd = open (target_cache_path, O_RDONLY); + if (fd >= 0) + { + /* Success!!!! */ + if (path != NULL) + *path = strdup(target_cache_path); + return fd; + } + + if (getenv(server_timeout_envvar)) + server_timeout = atoi (getenv(server_timeout_envvar)); + + /* make a copy of the envvar so it can be safely modified. */ + server_urls = strdup(urls_envvar); + if (server_urls == NULL) + { + rc = -ENOMEM; + goto out; + } + /* thereafter, goto out0 on error*/ + + /* create target directory in cache if not found. */ + struct stat st; + if (stat(target_cache_dir, &st) == -1 && mkdir(target_cache_dir, 0700) < 0) + { + rc = -errno; + goto out0; + } + + /* NB: write to a temporary file first, to avoid race condition of + multiple clients checking the cache, while a partially-written or empty + file is in there, being written from libcurl. */ + fd = mkstemp (target_cache_tmppath); + if (fd < 0) + { + rc = -errno; + goto out0; + } + + /* Count number of URLs. */ + int num_urls = 0; + for (int i = 0; server_urls[i] != '\0'; i++) + if (server_urls[i] != url_delim_char + && (i == 0 || server_urls[i - 1] == url_delim_char)) + num_urls++; + + /* Tracks which handle should write to fd. Set to the first + handle that is ready to write the target file to the cache. */ + CURL *target_handle = NULL; + struct handle_data *data = malloc(sizeof(struct handle_data) * num_urls); + + /* Initalize handle_data with default values. */ + for (int i = 0; i < num_urls; i++) + { + data[i].handle = NULL; + data[i].fd = -1; + } + + CURLM *curlm = curl_multi_init(); + if (curlm == NULL) + { + rc = -ENETUNREACH; + goto out0; + } + /* thereafter, goto out1 on error. */ + + char *strtok_saveptr; + char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr); + + /* Initialize each handle. */ + for (int i = 0; i < num_urls && server_url != NULL; i++) + { + data[i].fd = fd; + data[i].target_handle = &target_handle; + data[i].handle = curl_easy_init(); + + if (data[i].handle == NULL) + { + rc = -ENETUNREACH; + goto out1; + } + + /* Build handle url. Tolerate both http://foo:999 and + http://foo:999/ forms */ + char *slashbuildid; + if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/') + slashbuildid = "buildid"; + else + slashbuildid = "/buildid"; + + if (filename) /* must start with / */ + snprintf(data[i].url, PATH_MAX, "%s%s/%s/%s%s", server_url, + slashbuildid, build_id_bytes, type, filename); + else + snprintf(data[i].url, PATH_MAX, "%s%s/%s/%s", server_url, + slashbuildid, build_id_bytes, type); + + curl_easy_setopt(data[i].handle, CURLOPT_URL, data[i].url); + curl_easy_setopt(data[i].handle, + CURLOPT_WRITEFUNCTION, + debuginfod_write_callback); + curl_easy_setopt(data[i].handle, CURLOPT_WRITEDATA, (void*)&data[i]); + curl_easy_setopt(data[i].handle, CURLOPT_TIMEOUT, (long) server_timeout); + curl_easy_setopt(data[i].handle, CURLOPT_FILETIME, (long) 1); + curl_easy_setopt(data[i].handle, CURLOPT_FOLLOWLOCATION, (long) 1); + curl_easy_setopt(data[i].handle, CURLOPT_FAILONERROR, (long) 1); + curl_easy_setopt(data[i].handle, CURLOPT_NOSIGNAL, (long) 1); + curl_easy_setopt(data[i].handle, CURLOPT_AUTOREFERER, (long) 1); + curl_easy_setopt(data[i].handle, CURLOPT_ACCEPT_ENCODING, ""); + curl_easy_setopt(data[i].handle, CURLOPT_USERAGENT, (void*) PACKAGE_STRING); + + curl_multi_add_handle(curlm, data[i].handle); + server_url = strtok_r(NULL, url_delim, &strtok_saveptr); + } + + /* Query servers in parallel. */ + int still_running; + long loops = 0; + do + { + if (c->progressfn) /* inform/check progress callback */ + { + loops ++; + long pa = loops; /* default params for progress callback */ + long pb = 0; + if (target_handle) /* we've committed to a server; report its download progress */ + { + CURLcode curl_res; +#ifdef CURLINFO_SIZE_DOWNLOAD_T + curl_off_t dl; + curl_res = curl_easy_getinfo(target_handle, + CURLINFO_SIZE_DOWNLOAD_T, + &dl); + if (curl_res == 0 && dl >= 0) + pa = (dl > LONG_MAX ? LONG_MAX : (long)dl); +#else + double dl; + curl_res = curl_easy_getinfo(target_handle, + CURLINFO_SIZE_DOWNLOAD, + &dl); + if (curl_res == 0) + pa = (dl > LONG_MAX ? LONG_MAX : (long)dl); +#endif + +#ifdef CURLINFO_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T + curl_off_t cl; + curl_res = curl_easy_getinfo(target_handle, + CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, + &cl); + if (curl_res == 0 && cl >= 0) + pb = (cl > LONG_MAX ? LONG_MAX : (long)cl); +#else + double cl; + curl_res = curl_easy_getinfo(target_handle, + CURLINFO_CONTENT_LENGTH_DOWNLOAD, + &cl); + if (curl_res == 0) + pb = (cl > LONG_MAX ? LONG_MAX : (long)cl); +#endif + } + + if ((*c->progressfn) (c, pa, pb)) + break; + } + + /* Wait 1 second, the minimum DEBUGINFOD_TIMEOUT. */ + curl_multi_wait(curlm, NULL, 0, 1000, NULL); + + /* If the target file has been found, abort the other queries. */ + if (target_handle != NULL) + for (int i = 0; i < num_urls; i++) + if (data[i].handle != target_handle) + curl_multi_remove_handle(curlm, data[i].handle); + + CURLMcode curlm_res = curl_multi_perform(curlm, &still_running); + if (curlm_res != CURLM_OK) + { + switch (curlm_res) + { + case CURLM_CALL_MULTI_PERFORM: continue; + case CURLM_OUT_OF_MEMORY: rc = -ENOMEM; break; + default: rc = -ENETUNREACH; break; + } + goto out1; + } + } while (still_running); + + /* Check whether a query was successful. If so, assign its handle + to verified_handle. */ + int num_msg; + rc = -ENOENT; + CURL *verified_handle = NULL; + do + { + CURLMsg *msg; + + msg = curl_multi_info_read(curlm, &num_msg); + if (msg != NULL && msg->msg == CURLMSG_DONE) + { + if (msg->data.result != CURLE_OK) + { + /* Unsucessful query, determine error code. */ + switch (msg->data.result) + { + case CURLE_COULDNT_RESOLVE_HOST: rc = -EHOSTUNREACH; break; // no NXDOMAIN + case CURLE_URL_MALFORMAT: rc = -EINVAL; break; + case CURLE_COULDNT_CONNECT: rc = -ECONNREFUSED; break; + case CURLE_REMOTE_ACCESS_DENIED: rc = -EACCES; break; + case CURLE_WRITE_ERROR: rc = -EIO; break; + case CURLE_OUT_OF_MEMORY: rc = -ENOMEM; break; + case CURLE_TOO_MANY_REDIRECTS: rc = -EMLINK; break; + case CURLE_SEND_ERROR: rc = -ECONNRESET; break; + case CURLE_RECV_ERROR: rc = -ECONNRESET; break; + case CURLE_OPERATION_TIMEDOUT: rc = -ETIME; break; + default: rc = -ENOENT; break; + } + } + else + { + /* Query completed without an error. Confirm that the + response code is 200 and set verified_handle. */ + long resp_code = 500; + CURLcode curl_res; + + curl_res = curl_easy_getinfo(target_handle, + CURLINFO_RESPONSE_CODE, + &resp_code); + + if (curl_res == CURLE_OK + && resp_code == 200 + && msg->easy_handle != NULL) + { + verified_handle = msg->easy_handle; + break; + } + } + } + } while (num_msg > 0); + + if (verified_handle == NULL) + goto out1; + + /* we've got one!!!! */ + time_t mtime; + CURLcode curl_res = curl_easy_getinfo(verified_handle, CURLINFO_FILETIME, (void*) &mtime); + if (curl_res != CURLE_OK) + mtime = time(NULL); /* fall back to current time */ + + struct timeval tvs[2]; + tvs[0].tv_sec = tvs[1].tv_sec = mtime; + tvs[0].tv_usec = tvs[1].tv_usec = 0; + (void) futimes (fd, tvs); /* best effort */ + + /* rename tmp->real */ + rc = rename (target_cache_tmppath, target_cache_path); + if (rc < 0) + { + rc = -errno; + goto out1; + /* Perhaps we need not give up right away; could retry or something ... */ + } + + /* Success!!!! */ + for (int i = 0; i < num_urls; i++) + curl_easy_cleanup(data[i].handle); + + curl_multi_cleanup (curlm); + free (data); + free (server_urls); + /* don't close fd - we're returning it */ + /* don't unlink the tmppath; it's already been renamed. */ + if (path != NULL) + *path = strdup(target_cache_path); + + return fd; + +/* error exits */ + out1: + for (int i = 0; i < num_urls; i++) + curl_easy_cleanup(data[i].handle); + + curl_multi_cleanup(curlm); + unlink (target_cache_tmppath); + (void) rmdir (target_cache_dir); /* nop if not empty */ + free(data); + close (fd); + + out0: + free (server_urls); + + out: + return rc; +} + +/* See debuginfod.h */ +debuginfod_client * +debuginfod_begin (void) +{ + debuginfod_client *client; + size_t size = sizeof (struct debuginfod_client); + client = (debuginfod_client *) malloc (size); + if (client != NULL) + client->progressfn = NULL; + return client; +} + +void +debuginfod_end (debuginfod_client *client) +{ + free (client); +} + +int +debuginfod_find_debuginfo (debuginfod_client *client, + const unsigned char *build_id, int build_id_len, + char **path) +{ + return debuginfod_query_server(client, build_id, build_id_len, + "debuginfo", NULL, path); +} + + +/* See debuginfod.h */ +int +debuginfod_find_executable(debuginfod_client *client, + const unsigned char *build_id, int build_id_len, + char **path) +{ + return debuginfod_query_server(client, build_id, build_id_len, + "executable", NULL, path); +} + +/* See debuginfod.h */ +int debuginfod_find_source(debuginfod_client *client, + const unsigned char *build_id, int build_id_len, + const char *filename, char **path) +{ + return debuginfod_query_server(client, build_id, build_id_len, + "source", filename, path); +} + + +void +debuginfod_set_progressfn(debuginfod_client *client, + debuginfod_progressfn_t fn) +{ + client->progressfn = fn; +} + + +/* NB: these are thread-unsafe. */ +__attribute__((constructor)) attribute_hidden void libdebuginfod_ctor(void) +{ + curl_global_init(CURL_GLOBAL_DEFAULT); +} + +/* NB: this is very thread-unsafe: it breaks other threads that are still in libcurl */ +__attribute__((destructor)) attribute_hidden void libdebuginfod_dtor(void) +{ + /* ... so don't do this: */ + /* curl_global_cleanup(); */ +} diff --git a/debuginfod/debuginfod.h b/debuginfod/debuginfod.h index 6b1b1cc..33fae86 100644 --- a/debuginfod/debuginfod.h +++ b/debuginfod/debuginfod.h @@ -33,6 +33,7 @@ #define DEBUGINFOD_URLS_ENV_VAR "DEBUGINFOD_URLS" #define DEBUGINFOD_CACHE_PATH_ENV_VAR "DEBUGINFOD_CACHE_PATH" #define DEBUGINFOD_TIMEOUT_ENV_VAR "DEBUGINFOD_TIMEOUT" +#define DEBUGINFOD_PROGRESS_ENV_VAR "DEBUGINFOD_PROGRESS" /* Handle for debuginfod-client connection. */ typedef struct debuginfod_client debuginfod_client; diff --git a/debuginfod/debuginfod.h.debuginfod-timeout-progress b/debuginfod/debuginfod.h.debuginfod-timeout-progress new file mode 100644 index 0000000..6b1b1cc --- /dev/null +++ b/debuginfod/debuginfod.h.debuginfod-timeout-progress @@ -0,0 +1,85 @@ +/* External declarations for the libdebuginfod client library. + Copyright (C) 2019 Red Hat, Inc. + This file is part of elfutils. + + This file is free software; you can redistribute it and/or modify + it under the terms of either + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at + your option) any later version + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at + your option) any later version + + or both in parallel, as here. + + elfutils is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see . */ + +#ifndef _DEBUGINFOD_CLIENT_H +#define _DEBUGINFOD_CLIENT_H 1 + +/* Names of environment variables that control the client logic. */ +#define DEBUGINFOD_URLS_ENV_VAR "DEBUGINFOD_URLS" +#define DEBUGINFOD_CACHE_PATH_ENV_VAR "DEBUGINFOD_CACHE_PATH" +#define DEBUGINFOD_TIMEOUT_ENV_VAR "DEBUGINFOD_TIMEOUT" + +/* Handle for debuginfod-client connection. */ +typedef struct debuginfod_client debuginfod_client; + +#ifdef __cplusplus +extern "C" { +#endif + +/* Create a handle for a new debuginfod-client session. */ +debuginfod_client *debuginfod_begin (void); + +/* Query the urls contained in $DEBUGINFOD_URLS for a file with + the specified type and build id. If build_id_len == 0, the + build_id is supplied as a lowercase hexadecimal string; otherwise + it is a binary blob of given legnth. + + If successful, return a file descriptor to the target, otherwise + return a posix error code. If successful, set *path to a + strdup'd copy of the name of the same file in the cache. + Caller must free() it later. */ + +int debuginfod_find_debuginfo (debuginfod_client *client, + const unsigned char *build_id, + int build_id_len, + char **path); + +int debuginfod_find_executable (debuginfod_client *client, + const unsigned char *build_id, + int build_id_len, + char **path); + +int debuginfod_find_source (debuginfod_client *client, + const unsigned char *build_id, + int build_id_len, + const char *filename, + char **path); + +typedef int (*debuginfod_progressfn_t)(debuginfod_client *c, long a, long b); +void debuginfod_set_progressfn(debuginfod_client *c, + debuginfod_progressfn_t fn); + +/* Release debuginfod client connection context handle. */ +void debuginfod_end (debuginfod_client *client); + +#ifdef __cplusplus +} +#endif + + +#endif /* _DEBUGINFOD_CLIENT_H */ diff --git a/doc/debuginfod-find.1 b/doc/debuginfod-find.1 index a759ecb..e71ca29 100644 --- a/doc/debuginfod-find.1 +++ b/doc/debuginfod-find.1 @@ -120,8 +120,9 @@ debuginfod instances. Alternate URL prefixes are separated by space. .TP 21 .B DEBUGINFOD_TIMEOUT This environment variable governs the timeout for each debuginfod HTTP -connection. A server that fails to respond within this many seconds -is skipped. The default is 5. +connection. A server that fails to provide at least 100K of data +within this many seconds is skipped. The default is 90 seconds. (Zero +or negative means "no timeout".) .TP 21 .B DEBUGINFOD_CACHE_PATH diff --git a/doc/debuginfod-find.1.debuginfod-timeout-progress b/doc/debuginfod-find.1.debuginfod-timeout-progress new file mode 100644 index 0000000..a759ecb --- /dev/null +++ b/doc/debuginfod-find.1.debuginfod-timeout-progress @@ -0,0 +1,144 @@ +'\"! tbl | nroff \-man +'\" t macro stdmacro + +.de SAMPLE +.br +.RS 0 +.nf +.nh +.. +.de ESAMPLE +.hy +.fi +.RE +.. + +.TH DEBUGINFOD-FIND 1 +.SH NAME +debuginfod-find \- request debuginfo-related data + +.SH SYNOPSIS +.B debuginfod-find [\fIOPTION\fP]... debuginfo \fIBUILDID\fP + +.B debuginfod-find [\fIOPTION\fP]... executable \fIBUILDID\fP + +.B debuginfod-find [\fIOPTION\fP]... source \fIBUILDID\fP \fI/FILENAME\fP + +.SH DESCRIPTION +\fBdebuginfod-find\fP queries one or more \fBdebuginfod\fP servers for +debuginfo-related data. In case of a match, it saves the the +requested file into a local cache, prints the file name to standard +output, and exits with a success status of 0. In case of any error, +it exits with a failure status and an error message to standard error. + +.\" Much of the following text is duplicated with debuginfod.8 + +The debuginfod system uses buildids to identify debuginfo-related data. +These are stored as binary notes in ELF/DWARF files, and are +represented as lowercase hexadecimal. For example, for a program +/bin/ls, look at the ELF note GNU_BUILD_ID: + +.SAMPLE +% readelf -n /bin/ls | grep -A4 build.id +Note section [ 4] '.note.gnu.buildid' of 36 bytes at offset 0x340: +Owner Data size Type +GNU 20 GNU_BUILD_ID +Build ID: 8713b9c3fb8a720137a4a08b325905c7aaf8429d +.ESAMPLE + +Then the hexadecimal BUILDID is simply: + +.SAMPLE +8713b9c3fb8a720137a4a08b325905c7aaf8429d +.ESAMPLE + +.SS debuginfo \fIBUILDID\fP + +If the given buildid is known to a server, this request will result +in a binary object that contains the customary \fB.*debug_*\fP +sections. This may be a split debuginfo file as created by +\fBstrip\fP, or it may be an original unstripped executable. + +.SS executable \fIBUILDID\fP + +If the given buildid is known to the server, this request will result +in a binary object that contains the normal executable segments. This +may be a executable stripped by \fBstrip\fP, or it may be an original +unstripped executable. \fBET_DYN\fP shared libraries are considered +to be a type of executable. + +.SS source \fIBUILDID\fP \fI/SOURCE/FILE\fP + +If the given buildid is known to the server, this request will result +in a binary object that contains the source file mentioned. The path +should be absolute. Relative path names commonly appear in the DWARF +file's source directory, but these paths are relative to +individual compilation unit AT_comp_dir paths, and yet an executable +is made up of multiple CUs. Therefore, to disambiguate, debuginfod +expects source queries to prefix relative path names with the CU +compilation-directory, followed by a mandatory "/". + +Note: the user should not elide \fB../\fP or \fB/./\fP or extraneous +\fB///\fP sorts of path components in the directory names, because if +this is how those names appear in the DWARF files, that is what +debuginfod needs to see too. + +For example: +.TS +l l. +#include source BUILDID /usr/include/stdio.h +/path/to/foo.c source BUILDID /path/to/foo.c +\../bar/foo.c AT_comp_dir=/zoo/ source BUILDID /zoo//../bar/foo.c +.TE + +.SH "OPTIONS" + +.TP +.B "\-v" +Increase verbosity, including printing frequent download-progress messages. + + +.SH "SECURITY" + +debuginfod-find \fBdoes not\fP include any particular security +features. It trusts that the binaries returned by the debuginfod(s) +are accurate. Therefore, the list of servers should include only +trustworthy ones. If accessed across HTTP rather than HTTPS, the +network should be trustworthy. Authentication information through +the internal \fIlibcurl\fP library is not currently enabled, except +for the basic plaintext \%\fIhttp[s]://userid:password@hostname/\fP style. +(The debuginfod server does not perform authentication, but a front-end +proxy server could.) + +.SH "ENVIRONMENT VARIABLES" + +.TP 21 +.B DEBUGINFOD_URLS +This environment variable contains a list of URL prefixes for trusted +debuginfod instances. Alternate URL prefixes are separated by space. + +.TP 21 +.B DEBUGINFOD_TIMEOUT +This environment variable governs the timeout for each debuginfod HTTP +connection. A server that fails to respond within this many seconds +is skipped. The default is 5. + +.TP 21 +.B DEBUGINFOD_CACHE_PATH +This environment variable governs the location of the cache where +downloaded files are kept. It is cleaned periodically as this program +is reexecuted. Cache management parameters may be set by files under +this directory: see the \fBdebuginfod_find_debuginfo(3)\fP man page +for details. The default is $HOME/.debuginfod_client_cache. + +.SH "FILES" +.LP +.PD .1v +.TP 20 +.B $HOME/.debuginfod_client_cache +Default cache directory. +.PD + +.SH "SEE ALSO" +.I "debuginfod(8)" +.I "debuginfod_find_debuginfod(3)" diff --git a/doc/debuginfod.8 b/doc/debuginfod.8 index 210550e..c55673c 100644 --- a/doc/debuginfod.8 +++ b/doc/debuginfod.8 @@ -357,8 +357,10 @@ or indirectly - the results would be hilarious. .TP 21 .B DEBUGINFOD_TIMEOUT This environment variable governs the timeout for each debuginfod HTTP -connection. A server that fails to respond within this many seconds -is skipped. The default is 5. +connection. A server that fails to provide at least 100K of data +within this many seconds is skipped. The default is 90 seconds. (Zero +or negative means "no timeout".) + .TP 21 .B DEBUGINFOD_CACHE_PATH diff --git a/doc/debuginfod.8.debuginfod-timeout-progress b/doc/debuginfod.8.debuginfod-timeout-progress new file mode 100644 index 0000000..210550e --- /dev/null +++ b/doc/debuginfod.8.debuginfod-timeout-progress @@ -0,0 +1,387 @@ +'\"! tbl | nroff \-man +'\" t macro stdmacro + +.de SAMPLE +.br +.RS 0 +.nf +.nh +.. +.de ESAMPLE +.hy +.fi +.RE +.. + +.TH DEBUGINFOD 8 +.SH NAME +debuginfod \- debuginfo-related http file-server daemon + +.SH SYNOPSIS +.B debuginfod +[\fIOPTION\fP]... [\fIPATH\fP]... + +.SH DESCRIPTION +\fBdebuginfod\fP serves debuginfo-related artifacts over HTTP. It +periodically scans a set of directories for ELF/DWARF files and their +associated source code, as well as RPM files containing the above, to +build an index by their buildid. This index is used when remote +clients use the HTTP webapi, to fetch these files by the same buildid. + +If a debuginfod cannot service a given buildid artifact request +itself, and it is configured with information about upstream +debuginfod servers, it queries them for the same information, just as +\fBdebuginfod-find\fP would. If successful, it locally caches then +relays the file content to the original requester. + +If the \fB\-F\fP option is given, each listed PATH creates a thread to +scan for matching ELF/DWARF/source files under the given physical +directory. Source files are matched with DWARF files based on the +AT_comp_dir (compilation directory) attributes inside it. Duplicate +directories are ignored. You may use a file name for a PATH, but +source code indexing may be incomplete; prefer using a directory that +contains the binaries. Caution: source files listed in the DWARF may +be a path \fIanywhere\fP in the file system, and debuginfod will +readily serve their content on demand. (Imagine a doctored DWARF file +that lists \fI/etc/passwd\fP as a source file.) If this is a concern, +audit your binaries with tools such as: + +.SAMPLE +% eu-readelf -wline BINARY | sed -n '/^Directory.table/,/^File.name.table/p' +or +% eu-readelf -wline BINARY | sed -n '/^Directory.table/,/^Line.number/p' +or even use debuginfod itself: +% debuginfod -vvv -d :memory: -F BINARY 2>&1 | grep 'recorded.*source' +^C +.ESAMPLE + +If the \fB\-R\fP option is given each listed PATH creates a thread to +scan for ELF/DWARF/source files contained in matching RPMs under the +given physical directory. Duplicate directories are ignored. You may +use a file name for a PATH, but source code indexing may be +incomplete; prefer using a directory that contains normal RPMs +alongside debuginfo/debugsource RPMs. Because of complications such +as DWZ-compressed debuginfo, may require \fItwo\fP scan passes to +identify all source code. Source files for RPMs are only served +from other RPMs, so the caution for \-F does not apply. + +If no PATH is listed, or neither \-F nor \-R option is given, then +\fBdebuginfod\fP will simply serve content that it scanned into its +index in previous runs: the data is cumulative. + +File names must match extended regular expressions given by the \-I +option and not the \-X option (if any) in order to be considered. + + +.SH OPTIONS + +.TP +.B "\-F" +Activate ELF/DWARF file scanning threads. The default is off. + +.TP +.B "\-R" +Activate RPM file scanning threads. The default is off. + +.TP +.B "\-d FILE" "\-\-database=FILE" +Set the path of the sqlite database used to store the index. This +file is disposable in the sense that a later rescan will repopulate +data. It will contain absolute file path names, so it may not be +portable across machines. It may be frequently read/written, so it +should be on a fast filesytem. It should not be shared across +machines or users, to maximize sqlite locking performance. The +default database file is $HOME/.debuginfod.sqlite. + +.TP +.B "\-D SQL" "\-\-ddl=SQL" +Execute given sqlite statement after the database is opened and +initialized as extra DDL (SQL data definition language). This may be +useful to tune performance-related pragmas or indexes. May be +repeated. The default is nothing extra. + +.TP +.B "\-p NUM" "\-\-port=NUM" +Set the TCP port number on which debuginfod should listen, to service +HTTP requests. Both IPv4 and IPV6 sockets are opened, if possible. +The webapi is documented below. The default port number is 8002. + +.TP +.B "\-I REGEX" "\-\-include=REGEX" "\-X REGEX" "\-\-exclude=REGEX" +Govern the inclusion and exclusion of file names under the search +paths. The regular expressions are interpreted as unanchored POSIX +extended REs, thus may include alternation. They are evaluated +against the full path of each file, based on its \fBrealpath(3)\fP +canonicalization. By default, all files are included and none are +excluded. A file that matches both include and exclude REGEX is +excluded. (The \fIcontents\fP of RPM files are not subject to +inclusion or exclusion filtering: they are all processed.) + +.TP +.B "\-t SECONDS" "\-\-rescan\-time=SECONDS" +Set the rescan time for the file and RPM directories. This is the +amount of time the scanning threads will wait after finishing a scan, +before doing it again. A rescan for unchanged files is fast (because +the index also stores the file mtimes). A time of zero is acceptable, +and means that only one initial scan should performed. The default +rescan time is 300 seconds. Receiving a SIGUSR1 signal triggers a new +scan, independent of the rescan time (including if it was zero). + +.TP +.B "\-g SECONDS" "\-\-groom\-time=SECONDS" +Set the groom time for the index database. This is the amount of time +the grooming thread will wait after finishing a grooming pass before +doing it again. A groom operation quickly rescans all previously +scanned files, only to see if they are still present and current, so +it can deindex obsolete files. See also the \fIDATA MANAGEMENT\fP +section. The default groom time is 86400 seconds (1 day). A time of +zero is acceptable, and means that only one initial groom should be +performed. Receiving a SIGUSR2 signal triggers a new grooming pass, +independent of the groom time (including if it was zero). + +.TP +.B "\-G" +Run an extraordinary maximal-grooming pass at debuginfod startup. +This pass can take considerable time, because it tries to remove any +debuginfo-unrelated content from the RPM-related parts of the index. +It should not be run if any recent RPM-related indexing operations +were aborted early. It can take considerable space, because it +finishes up with an sqlite "vacuum" operation, which repacks the +database file by triplicating it temporarily. The default is not to +do maximal-grooming. See also the \fIDATA MANAGEMENT\fP section. + +.TP +.B "\-c NUM" "\-\-concurrency=NUM" +Set the concurrency limit for all the scanning threads. While many +threads may be spawned to cover all the given PATHs, only NUM may +concurrently do CPU-intensive operations like parsing an ELF file +or an RPM. The default is the number of processors on the system; +the minimum is 1. + +.TP +.B "\-L" +Traverse symbolic links encountered during traversal of the PATHs, +including across devices - as in \fIfind\ -L\fP. The default is to +traverse the physical directory structure only, stay on the same +device, and ignore symlinks - as in \fIfind\ -P\ -xdev\fP. Caution: a +loops in the symbolic directory tree might lead to \fIinfinite +traversal\fP. + +.TP +.B "\-v" +Increase verbosity of logging to the standard error file descriptor. +May be repeated to increase details. The default verbosity is 0. + +.SH WEBAPI + +.\" Much of the following text is duplicated with debuginfod-find.1 + +debuginfod's webapi resembles ordinary file service, where a GET +request with a path containing a known buildid results in a file. +Unknown buildid / request combinations result in HTTP error codes. +This file service resemblance is intentional, so that an installation +can take advantage of standard HTTP management infrastructure. + +There are three requests. In each case, the buildid is encoded as a +lowercase hexadecimal string. For example, for a program \fI/bin/ls\fP, +look at the ELF note GNU_BUILD_ID: + +.SAMPLE +% readelf -n /bin/ls | grep -A4 build.id +Note section [ 4] '.note.gnu.buildid' of 36 bytes at offset 0x340: +Owner Data size Type +GNU 20 GNU_BUILD_ID +Build ID: 8713b9c3fb8a720137a4a08b325905c7aaf8429d +.ESAMPLE + +Then the hexadecimal BUILDID is simply: + +.SAMPLE +8713b9c3fb8a720137a4a08b325905c7aaf8429d +.ESAMPLE + +.SS /buildid/\fIBUILDID\fP/debuginfo + +If the given buildid is known to the server, this request will result +in a binary object that contains the customary \fB.*debug_*\fP +sections. This may be a split debuginfo file as created by +\fBstrip\fP, or it may be an original unstripped executable. + +.SS /buildid/\fIBUILDID\fP/executable + +If the given buildid is known to the server, this request will result +in a binary object that contains the normal executable segments. This +may be a executable stripped by \fBstrip\fP, or it may be an original +unstripped executable. \fBET_DYN\fP shared libraries are considered +to be a type of executable. + +.SS /buildid/\fIBUILDID\fP/source\fI/SOURCE/FILE\fP + +If the given buildid is known to the server, this request will result +in a binary object that contains the source file mentioned. The path +should be absolute. Relative path names commonly appear in the DWARF +file's source directory, but these paths are relative to +individual compilation unit AT_comp_dir paths, and yet an executable +is made up of multiple CUs. Therefore, to disambiguate, debuginfod +expects source queries to prefix relative path names with the CU +compilation-directory, followed by a mandatory "/". + +Note: contrary to RFC 3986, the client should not elide \fB../\fP or +\fB/./\fP or extraneous \fB///\fP sorts of path components in the +directory names, because if this is how those names appear in the +DWARF files, that is what debuginfod needs to see too. + +For example: +.TS +l l. +#include /buildid/BUILDID/source/usr/include/stdio.h +/path/to/foo.c /buildid/BUILDID/source/path/to/foo.c +\../bar/foo.c AT_comp_dir=/zoo/ /buildid/BUILDID/source/zoo//../bar/foo.c +.TE + +.SS /metrics + +This endpoint returns a Prometheus formatted text/plain dump of a +variety of statistics about the operation of the debuginfod server. +The exact set of metrics and their meanings may change in future +versions. Caution: configuration information (path names, versions) +may be disclosed. + +.SH DATA MANAGEMENT + +debuginfod stores its index in an sqlite database in a densely packed +set of interlinked tables. While the representation is as efficient +as we have been able to make it, it still takes a considerable amount +of data to record all debuginfo-related data of potentially a great +many files. This section offers some advice about the implications. + +As a general explanation for size, consider that debuginfod indexes +ELF/DWARF files, it stores their names and referenced source file +names, and buildids will be stored. When indexing RPMs, it stores +every file name \fIof or in\fP an RPM, every buildid, plus every +source file name referenced from a DWARF file. (Indexing RPMs takes +more space because the source files often reside in separate +subpackages that may not be indexed at the same pass, so extra +metadata has to be kept.) + +Getting down to numbers, in the case of Fedora RPMs (essentially, +gzip-compressed cpio files), the sqlite index database tends to be +from 0.5% to 3% of their size. It's larger for binaries that are +assembled out of a great many source files, or packages that carry +much debuginfo-unrelated content. It may be even larger during the +indexing phase due to temporary sqlite write-ahead-logging files; +these are checkpointed (cleaned out and removed) at shutdown. It may +be helpful to apply tight \-I or \-X regular-expression constraints to +exclude files from scanning that you know have no debuginfo-relevant +content. + +As debuginfod runs, it periodically rescans its target directories, +and any new content found is added to the database. Old content, such +as data for files that have disappeared or that have been replaced +with newer versions is removed at a periodic \fIgrooming\fP pass. +This means that the sqlite files grow fast during initial indexing, +slowly during index rescans, and periodically shrink during grooming. +There is also an optional one-shot \fImaximal grooming\fP pass is +available. It removes information debuginfo-unrelated data from the +RPM content index such as file names found in RPMs ("rpm sdef" +records) that are not referred to as source files from any binaries +find in RPMs ("rpm sref" records). This can save considerable disk +space. However, it is slow and temporarily requires up to twice the +database size as free space. Worse: it may result in missing +source-code info if the RPM traversals were interrupted, so the not +all source file references were known. Use it rarely to polish a +complete index. + +You should ensure that ample disk space remains available. (The flood +of error messages on -ENOSPC is ugly and nagging. But, like for most +other errors, debuginfod will resume when resources permit.) If +necessary, debuginfod can be stopped, the database file moved or +removed, and debuginfod restarted. + +sqlite offers several performance-related options in the form of +pragmas. Some may be useful to fine-tune the defaults plus the +debuginfod extras. The \-D option may be useful to tell debuginfod to +execute the given bits of SQL after the basic schema creation +commands. For example, the "synchronous", "cache_size", +"auto_vacuum", "threads", "journal_mode" pragmas may be fun to tweak +via \-D, if you're searching for peak performance. The "optimize", +"wal_checkpoint" pragmas may be useful to run periodically, outside +debuginfod. The default settings are performance- rather than +reliability-oriented, so a hardware crash might corrupt the database. +In these cases, it may be necessary to manually delete the sqlite +database and start over. + +As debuginfod changes in the future, we may have no choice but to +change the database schema in an incompatible manner. If this +happens, new versions of debuginfod will issue SQL statements to +\fIdrop\fP all prior schema & data, and start over. So, disk space +will not be wasted for retaining a no-longer-useable dataset. + +In summary, if your system can bear a 0.5%-3% index-to-RPM-dataset +size ratio, and slow growth afterwards, you should not need to +worry about disk space. If a system crash corrupts the database, +or you want to force debuginfod to reset and start over, simply +erase the sqlite file before restarting debuginfod. + + +.SH SECURITY + +debuginfod \fBdoes not\fP include any particular security features. +While it is robust with respect to inputs, some abuse is possible. It +forks a new thread for each incoming HTTP request, which could lead to +a denial-of-service in terms of RAM, CPU, disk I/O, or network I/O. +If this is a problem, users are advised to install debuginfod with a +HTTPS reverse-proxy front-end that enforces site policies for +firewalling, authentication, integrity, authorization, and load +control. The \fI/metrics\fP webapi endpoint is probably not +appropriate for disclosure to the public. + +When relaying queries to upstream debuginfods, debuginfod \fBdoes not\fP +include any particular security features. It trusts that the binaries +returned by the debuginfods are accurate. Therefore, the list of +servers should include only trustworthy ones. If accessed across HTTP +rather than HTTPS, the network should be trustworthy. Authentication +information through the internal \fIlibcurl\fP library is not currently +enabled. + + +.SH "ENVIRONMENT VARIABLES" + +.TP 21 +.B DEBUGINFOD_URLS +This environment variable contains a list of URL prefixes for trusted +debuginfod instances. Alternate URL prefixes are separated by space. +Avoid referential loops that cause a server to contact itself, directly +or indirectly - the results would be hilarious. + +.TP 21 +.B DEBUGINFOD_TIMEOUT +This environment variable governs the timeout for each debuginfod HTTP +connection. A server that fails to respond within this many seconds +is skipped. The default is 5. + +.TP 21 +.B DEBUGINFOD_CACHE_PATH +This environment variable governs the location of the cache where +downloaded files are kept. It is cleaned periodically as this +program is reexecuted. The default is $HOME/.debuginfod_client_cache. +.\" XXX describe cache eviction policy + +.SH FILES +.LP +.PD .1v +.TP 20 +.B $HOME/.debuginfod.sqlite +Default database file. +.PD + +.TP 20 +.B $HOME/.debuginfod_client_cache +Default cache directory for content from upstream debuginfods. +.PD + + +.SH "SEE ALSO" +.I "debuginfod-find(1)" +.I "sqlite3(1)" +.I \%https://prometheus.io/docs/instrumenting/exporters/ diff --git a/doc/debuginfod_find_debuginfo.3 b/doc/debuginfod_find_debuginfo.3 index be8eed0..7e5060f 100644 --- a/doc/debuginfod_find_debuginfo.3 +++ b/doc/debuginfod_find_debuginfo.3 @@ -164,8 +164,17 @@ debuginfod instances. Alternate URL prefixes are separated by space. .TP 21 .B DEBUGINFOD_TIMEOUT This environment variable governs the timeout for each debuginfod HTTP -connection. A server that fails to respond within this many seconds -is skipped. The default is 5. +connection. A server that fails to provide at least 100K of data +within this many seconds is skipped. The default is 90 seconds. (Zero +or negative means "no timeout".) + +.TP 21 +.B DEBUGINFOD_PROGRESS +This environment variable governs the default progress function. If +set, and if a progressfn is not explicitly set, then the library will +configure a default progressfn. This function will append a simple +progress message periodically to stderr. The default is no progress +function output. .TP 21 .B DEBUGINFOD_CACHE_PATH diff --git a/doc/debuginfod_find_debuginfo.3.debuginfod-timeout-progress b/doc/debuginfod_find_debuginfo.3.debuginfod-timeout-progress new file mode 100644 index 0000000..be8eed0 --- /dev/null +++ b/doc/debuginfod_find_debuginfo.3.debuginfod-timeout-progress @@ -0,0 +1,242 @@ +'\"! tbl | nroff \-man +'\" t macro stdmacro + +.de SAMPLE +.br +.RS 0 +.nf +.nh +.. +.de ESAMPLE +.hy +.fi +.RE +.. + +.TH DEBUGINFOD_FIND_* 3 +.SH NAME +debuginfod_find_debuginfo \- request debuginfo from debuginfod + +.SH SYNOPSIS +.nf +.B #include +.PP +.BI "debuginfod_client *debuginfod_begin(void);" +.BI "void debuginfod_end(debuginfod_client *" client ");" + +.BI "int debuginfod_find_debuginfo(debuginfod_client *" client "," +.BI " const unsigned char *" build_id "," +.BI " int " build_id_len "," +.BI " char ** " path ");" +.BI "int debuginfod_find_executable(debuginfod_client *" client "," +.BI " const unsigned char *" build_id "," +.BI " int " build_id_len "," +.BI " char ** " path ");" +.BI "int debuginfod_find_source(debuginfod_client *" client "," +.BI " const unsigned char *" build_id "," +.BI " int " build_id_len "," +.BI " const char *" filename "," +.BI " char ** " path ");" + +.BI "typedef int (*debuginfo_progressfn_t)(debuginfod_client *" client "," +.BI " long a, long b);" +.BI "void debuginfod_set_progressfn(debuginfod_client *" client "," +.BI " debuginfo_progressfn_t " progressfn ");" + +Link with \fB-ldebuginfod\fP. + +.SH DESCRIPTION + +.BR debuginfod_begin () +creates a \fBdebuginfod_client\fP connection handle that should be used +with all other calls. +.BR debuginfod_end () +should be called on the \fBclient\fP handle to release all state and +storage when done. + +.BR debuginfod_find_debuginfo (), +.BR debuginfod_find_executable (), +and +.BR debuginfod_find_source () +query the debuginfod server URLs contained in +.BR $DEBUGINFOD_URLS +(see below) for the debuginfo, executable or source file with the +given \fIbuild_id\fP. \fIbuild_id\fP should be a pointer to either +a null-terminated, lowercase hex string or a binary blob. If +\fIbuild_id\fP is given as a hex string, \fIbuild_id_len\fP should +be 0. Otherwise \fIbuild_id_len\fP should be the number of bytes in +the binary blob. + +.BR debuginfod_find_source () +also requries a \fIfilename\fP in order to specify a particular +source file. \fIfilename\fP should be an absolute path that includes +the compilation directory of the CU associated with the source file. +Relative path names commonly appear in the DWARF file's source directory, +but these paths are relative to individual compilation unit AT_comp_dir +paths, and yet an executable is made up of multiple CUs. Therefore, to +disambiguate, debuginfod expects source queries to prefix relative path +names with the CU compilation-directory, followed by a mandatory "/". + +Note: the caller should not elide \fB../\fP or \fB/./\fP or extraneous +\fB///\fP sorts of path components in the directory names, because if +this is how those names appear in the DWARF files, that is what +debuginfod needs to see too. + +If \fIpath\fP is not NULL and the query is successful, \fIpath\fP is set +to the path of the file in the cache. The caller must \fBfree\fP() this value. + +The URLs in \fB$DEBUGINFOD_URLS\fP may be queried in parallel. As soon +as a debuginfod server begins transferring the target file all of the +connections to the other servers are closed. + +A \fBclient\fP handle should be used from only one thread at a time. + +.SH "RETURN VALUE" + +\fBdebuginfod_begin\fP returns the \fBdebuginfod_client\fP handle to +use with all other calls. On error \fBNULL\fP will be returned and +\fBerrno\fP will be set. + +If a find family function is successful, the resulting file is saved +to the client cache and a file descriptor to that file is returned. +The caller needs to \fBclose\fP() this descriptor. Otherwise, a +negative error code is returned. + +.SH "PROGRESS CALLBACK" + +As the \fBdebuginfod_find_*\fP() functions may block for seconds or +longer, a progress callback function is called periodically, if +configured with +.BR debuginfod_set_progressfn (). +This function sets a new progress callback function (or NULL) for the +client handle. + +The given callback function is called from the context of each thread +that is invoking any of the other lookup functions. It is given two +numeric parameters that, if thought of as a numerator \fIa\fP and +denominator \fIb\fP, together represent a completion fraction +\fIa/b\fP. The denominator may be zero initially, until a quantity +such as an exact download size becomes known. + +The progress callback function is also the supported way to +\fIinterrupt\fP the download operation. (The library does \fInot\fP +modify or trigger signals.) The progress callback must return 0 to +continue the work, or any other value to stop work as soon as +possible. Consequently, the \fBdebuginfod_find_*\fP() function will +likely return with an error, but might still succeed. + + +.SH "CACHE" +If the query is successful, the \fBdebuginfod_find_*\fP() functions save +the target file to a local cache. The location of the cache is controlled +by the \fB$DEBUGINFOD_CACHE_PATH\fP environment variable (see below). +Cleaning of the cache is controlled by the \fIcache_clean_interval_s\fP +and \fImax_unused_age_s\fP files, which are found in the +\fB$DEBUGINFOD_CACHE_PATH\fP directory. \fIcache_clean_interval_s\fP controls +how frequently the cache is traversed for cleaning and \fImax_unused_age_s\fP +controls how long a file can go unused (fstat(2) atime) before it's +removed from the cache during cleaning. These files should contain only an +ASCII decimal integer representing the interval or max unused age in seconds. +The default is one day and one week, respectively. Values of zero mean "immediately". + +.SH "SECURITY" +.BR debuginfod_find_debuginfo (), +.BR debuginfod_find_executable (), +and +.BR debuginfod_find_source () +\fBdo not\fP include any particular security +features. They trust that the binaries returned by the debuginfod(s) +are accurate. Therefore, the list of servers should include only +trustworthy ones. If accessed across HTTP rather than HTTPS, the +network should be trustworthy. Passing user authentication information +through the internal \fIlibcurl\fP library is not currently enabled, except +for the basic plaintext \%\fIhttp[s]://userid:password@hostname/\fP style. +(The debuginfod server does not perform authentication, but a front-end +proxy server could.) + +.SH "ENVIRONMENT VARIABLES" + +.TP 21 +.B DEBUGINFOD_URLS +This environment variable contains a list of URL prefixes for trusted +debuginfod instances. Alternate URL prefixes are separated by space. + +.TP 21 +.B DEBUGINFOD_TIMEOUT +This environment variable governs the timeout for each debuginfod HTTP +connection. A server that fails to respond within this many seconds +is skipped. The default is 5. + +.TP 21 +.B DEBUGINFOD_CACHE_PATH +This environment variable governs the location of the cache where +downloaded files are kept. It is cleaned periodically as this +program is reexecuted. The default is $HOME/.debuginfod_client_cache. + +.SH "ERRORS" +The following list is not comprehensive. Error codes may also +originate from calls to various C Library functions. + +.TP +.BR EACCESS +Denied access to resource located at the URL. + +.TP +.BR ECONNREFUSED +Unable to connect to remote host. + +.TP +.BR ECONNRESET +Unable to either send or recieve network data. + +.TP +.BR EHOSTUNREACH +Unable to resolve remote host. + +.TP +.BR EINVAL +One or more arguments are incorrectly formatted. \fIbuild_id\fP may +be too long (greater than 256 characters), \fIfilename\fP may not +be an absolute path or a debuginfod URL is malformed. + +.TP +.BR EIO +Unable to write data received from server to local file. + +.TP +.BR EMLINK +Too many HTTP redirects. + +.TP +.BR ENETUNREACH +Unable to initialize network connection. + +.TP +.BR ENOENT +Could not find the resource located at URL. Often this error code +indicates that a debuginfod server was successfully contacted but +the server could not find the target file. + +.TP +.BR ENOMEM +System is unable to allocate resources. + +.TP +.BR ENOSYS +\fB$DEBUGINFOD_URLS\fP is not defined. + +.TP +.BR ETIME +Query failed due to timeout. \fB$DEBUGINFOD_TIMEOUT\fP controls +the timeout duration. See debuginfod(8) for more information. + +.SH "FILES" +.LP +.PD .1v +.TP 20 +.B $HOME/.debuginfod_client_cache +Default cache directory. +.PD + +.SH "SEE ALSO" +.I "debuginfod(8)" diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh index 0ade03b..c8979e9 100755 --- a/tests/run-debuginfod-find.sh +++ b/tests/run-debuginfod-find.sh @@ -153,8 +153,11 @@ cmp $filename F/prog2 cat vlog grep -q Progress vlog tempfiles vlog -filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $BUILDID2` +filename=`testrun env DEBUGINFOD_PROGRESS=1 ${abs_top_builddir}/debuginfod/debuginfod-find executable $BUILDID2 2>vlog2` cmp $filename F/prog2 +cat vlog2 +grep -q Downloading vlog2 +tempfiles vlog2 filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source $BUILDID2 ${PWD}/prog2.c` cmp $filename ${PWD}/prog2.c diff --git a/tests/run-debuginfod-find.sh.debuginfod-timeout-progress b/tests/run-debuginfod-find.sh.debuginfod-timeout-progress new file mode 100755 index 0000000..0ade03b --- /dev/null +++ b/tests/run-debuginfod-find.sh.debuginfod-timeout-progress @@ -0,0 +1,324 @@ +#!/bin/bash +# +# Copyright (C) 2019 Red Hat, Inc. +# This file is part of elfutils. +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# elfutils is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. $srcdir/test-subr.sh # includes set -e + +DB=${PWD}/.debuginfod_tmp.sqlite +tempfiles $DB +export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache + +PID1=0 +PID2=0 + +cleanup() +{ + if [ $PID1 -ne 0 ]; then kill $PID1; wait $PID1; fi + if [ $PID2 -ne 0 ]; then kill $PID2; wait $PID2; fi + + rm -rf F R L ${PWD}/.client_cache* + exit_cleanup +} + +# clean up trash if we were aborted early +trap cleanup 0 1 2 3 5 9 15 + +# find an unused port number +while true; do + PORT1=`expr '(' $RANDOM % 1000 ')' + 9000` + ss -atn | fgrep ":$PORT1" || break +done + +# We want to run debuginfod in the background. We also want to start +# it with the same check/installcheck-sensitive LD_LIBRARY_PATH stuff +# that the testrun alias sets. But: we if we just use +# testrun .../debuginfod +# it runs in a subshell, with different pid, so not helpful. +# +# So we gather the LD_LIBRARY_PATH with this cunning trick: +ldpath=`testrun sh -c 'echo $LD_LIBRARY_PATH'` + +mkdir F R L +# not tempfiles F R L - they are directories which we clean up manually +ln -s ${abs_builddir}/dwfllines L/foo # any program not used elsewhere in this test + +wait_ready() +{ + port=$1; + what=$2; + value=$3; + timeout=20; + + echo "Wait $timeout seconds on $port for metric $what to change to $value" + while [ $timeout -gt 0 ]; do + mvalue="$(curl -s http://127.0.0.1:$port/metrics \ + | grep "$what" | awk '{print $NF}')" + if [ -z "$mvalue" ]; then mvalue=0; fi + echo "metric $what: $mvalue" + if [ "$mvalue" -eq "$value" ]; then + break; + fi + sleep 0.5; + ((timeout--)); + done; + + if [ $timeout -eq 0 ]; then + echo "metric $what never changed to $value on port $port" + exit 1; + fi +} + +env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod -F -R -d $DB -p $PORT1 -t0 -g0 R F L & +PID1=$! +# Server must become ready +wait_ready $PORT1 'ready' 1 +export DEBUGINFOD_URLS=http://127.0.0.1:$PORT1/ # or without trailing / + +# Be patient when run on a busy machine things might take a bit. +export DEBUGINFOD_TIMEOUT=10 + +# We use -t0 and -g0 here to turn off time-based scanning & grooming. +# For testing purposes, we just sic SIGUSR1 / SIGUSR2 at the process. + +######################################################################## + +# Compile a simple program, strip its debuginfo and save the build-id. +# Also move the debuginfo into another directory so that elfutils +# cannot find it without debuginfod. +echo "int main() { return 0; }" > ${PWD}/prog.c +tempfiles prog.c +gcc -g -o prog ${PWD}/prog.c + ${abs_top_builddir}/src/strip -g -f prog.debug ${PWD}/prog +BUILDID=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../src/readelf \ + -a prog | grep 'Build ID' | cut -d ' ' -f 7` + +mv prog F +mv prog.debug F +kill -USR1 $PID1 +# Wait till both files are in the index. +wait_ready $PORT1 'thread_work_total{file="F"}' 2 + +######################################################################## + +# Test whether elfutils, via the debuginfod client library dlopen hooks, +# is able to fetch debuginfo from the local debuginfod. +testrun ${abs_builddir}/debuginfod_build_id_find -e F/prog 1 + +######################################################################## + +# Test whether debuginfod-find is able to fetch those files. +rm -rf $DEBUGINFOD_CACHE_PATH # clean it from previous tests +filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $BUILDID` +cmp $filename F/prog.debug + +filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $BUILDID` +cmp $filename F/prog + +filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source $BUILDID ${PWD}/prog.c` +cmp $filename ${PWD}/prog.c + +######################################################################## + +# Add artifacts to the search paths and test whether debuginfod finds them while already running. + +# Build another, non-stripped binary +echo "int main() { return 0; }" > ${PWD}/prog2.c +tempfiles prog2.c +gcc -g -o prog2 ${PWD}/prog2.c +BUILDID2=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../src/readelf \ + -a prog2 | grep 'Build ID' | cut -d ' ' -f 7` + +mv prog2 F +kill -USR1 $PID1 +# Now there should be 3 files in the index +wait_ready $PORT1 'thread_work_total{file="F"}' 3 + +# Rerun same tests for the prog2 binary +filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find -v debuginfo $BUILDID2 2>vlog` +cmp $filename F/prog2 +cat vlog +grep -q Progress vlog +tempfiles vlog +filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $BUILDID2` +cmp $filename F/prog2 +filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source $BUILDID2 ${PWD}/prog2.c` +cmp $filename ${PWD}/prog2.c + +cp -rp ${abs_srcdir}/debuginfod-rpms R +kill -USR1 $PID1 +# All rpms need to be in the index +rpms=$(find R -name \*rpm | wc -l) +wait_ready $PORT1 'scanned_total{source="rpm"}' $rpms + +kill -USR1 $PID1 # two hits of SIGUSR1 may be needed to resolve .debug->dwz->srefs +# Expect all source files found in the rpms (they are all called hello.c :) +# We will need to extract all rpms (in their own directory) and could all +# sources referenced in the .debug files. +mkdir extracted +cd extracted +subdir=0; +newrpms=$(find ../R -name \*\.rpm) +for i in $newrpms; do + subdir=$[$subdir+1]; + mkdir $subdir; + cd $subdir; + ls -lah ../$i + rpm2cpio ../$i | cpio -id; + cd ..; +done +sourcefiles=$(find -name \*\\.debug \ + | env LD_LIBRARY_PATH=$ldpath xargs \ + ${abs_top_builddir}/src/readelf --debug-dump=decodedline \ + | grep mtime: | wc --lines) +cd .. +rm -rf extracted + +wait_ready $PORT1 'found_sourcerefs_total{source="rpm"}' $sourcefiles + +# Run a bank of queries against the debuginfod-rpms test cases + +rpm_test() { + __BUILDID=$1 + __SOURCEPATH=$2 + __SOURCESHA1=$3 + + filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $__BUILDID` + buildid=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../src/readelf \ + -a $filename | grep 'Build ID' | cut -d ' ' -f 7` + test $__BUILDID = $buildid + + filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $__BUILDID` + buildid=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../src/readelf \ + -a $filename | grep 'Build ID' | cut -d ' ' -f 7` + test $__BUILDID = $buildid + + filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source $__BUILDID $__SOURCEPATH` + hash=`cat $filename | sha1sum | awk '{print $1}'` + test $__SOURCESHA1 = $hash +} + + +# common source file sha1 +SHA=f4a1a8062be998ae93b8f1cd744a398c6de6dbb1 +# fedora30 +rpm_test c36708a78618d597dee15d0dc989f093ca5f9120 /usr/src/debug/hello2-1.0-2.x86_64/hello.c $SHA +rpm_test 41a236eb667c362a1c4196018cc4581e09722b1b /usr/src/debug/hello2-1.0-2.x86_64/hello.c $SHA +# rhel7 +rpm_test bc1febfd03ca05e030f0d205f7659db29f8a4b30 /usr/src/debug/hello-1.0/hello.c $SHA +rpm_test f0aa15b8aba4f3c28cac3c2a73801fefa644a9f2 /usr/src/debug/hello-1.0/hello.c $SHA +# rhel6 +rpm_test bbbf92ebee5228310e398609c23c2d7d53f6e2f9 /usr/src/debug/hello-1.0/hello.c $SHA +rpm_test d44d42cbd7d915bc938c81333a21e355a6022fb7 /usr/src/debug/hello-1.0/hello.c $SHA + +RPM_BUILDID=d44d42cbd7d915bc938c81333a21e355a6022fb7 # in rhel6/ subdir, for a later test + + +######################################################################## + +# Drop some of the artifacts, run a groom cycle; confirm that +# debuginfod has forgotten them, but remembers others + +rm -r R/debuginfod-rpms/rhel6/* +kill -USR2 $PID1 # groom cycle +# Expect 3 rpms to be deleted by the groom +wait_ready $PORT1 'groom{statistic="file d/e"}' 3 + +rm -rf $DEBUGINFOD_CACHE_PATH # clean it from previous tests + +testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $RPM_BUILDID && false || true + +testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $BUILDID2 + +######################################################################## + +# Federation mode + +# find another unused port +while true; do + PORT2=`expr '(' $RANDOM % 1000 ')' + 9000` + ss -atn | fgrep ":$PORT2" || break +done + +export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache2 +mkdir -p $DEBUGINFOD_CACHE_PATH +# NB: inherits the DEBUGINFOD_URLS to the first server +# NB: run in -L symlink-following mode for the L subdir +env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../debuginfod/debuginfod -F -d ${DB}_2 -p $PORT2 -L L & +PID2=$! +tempfiles ${DB}_2 +wait_ready $PORT2 'ready' 1 + +# have clients contact the new server +export DEBUGINFOD_URLS=http://127.0.0.1:$PORT2 +rm -rf $DEBUGINFOD_CACHE_PATH +testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $BUILDID + +# confirm that first server can't resolve symlinked info in L/ but second can +BUILDID=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../src/readelf \ + -a L/foo | grep 'Build ID' | cut -d ' ' -f 7` +file L/foo +file -L L/foo +export DEBUGINFOD_URLS=http://127.0.0.1:$PORT1 +rm -rf $DEBUGINFOD_CACHE_PATH +testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $BUILDID && false || true +export DEBUGINFOD_URLS=http://127.0.0.1:$PORT2 +testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $BUILDID + + +# test parallel queries in client +export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache3 +mkdir -p $DEBUGINFOD_CACHE_PATH +export DEBUGINFOD_URLS="BAD http://127.0.0.1:$PORT1 127.0.0.1:$PORT1 http://127.0.0.1:$PORT2 DNE" + +testrun ${abs_builddir}/debuginfod_build_id_find -e F/prog2 1 + +######################################################################## + +# Fetch some metrics, if curl program is installed +if type curl 2>/dev/null; then + curl http://127.0.0.1:$PORT1/badapi + curl http://127.0.0.1:$PORT1/metrics + curl http://127.0.0.1:$PORT2/metrics + curl http://127.0.0.1:$PORT1/metrics | grep -q 'http_responses_total.*result.*error' + curl http://127.0.0.1:$PORT2/metrics | grep -q 'http_responses_total.*result.*upstream' +fi + +######################################################################## + +# Run the tests again without the servers running. The target file should +# be found in the cache. + +kill -INT $PID1 $PID2 +wait $PID1 $PID2 +PID1=0 +PID2=0 +tempfiles .debuginfod_* + +testrun ${abs_builddir}/debuginfod_build_id_find -e F/prog2 1 + +######################################################################## + +# Trigger a cache clean and run the tests again. The clients should be unable to +# find the target. +echo 0 > $DEBUGINFOD_CACHE_PATH/cache_clean_interval_s +echo 0 > $DEBUGINFOD_CACHE_PATH/max_unused_age_s + +testrun ${abs_builddir}/debuginfod_build_id_find -e F/prog 1 + +testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $BUILDID2 && false || true + +exit 0