Blame tools/psl.c

Packit 0af36a
/*
Packit 0af36a
 * Copyright(c) 2014-2018 Tim Ruehsen
Packit 0af36a
 *
Packit 0af36a
 * Permission is hereby granted, free of charge, to any person obtaining a
Packit 0af36a
 * copy of this software and associated documentation files (the "Software"),
Packit 0af36a
 * to deal in the Software without restriction, including without limitation
Packit 0af36a
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
Packit 0af36a
 * and/or sell copies of the Software, and to permit persons to whom the
Packit 0af36a
 * Software is furnished to do so, subject to the following conditions:
Packit 0af36a
 *
Packit 0af36a
 * The above copyright notice and this permission notice shall be included in
Packit 0af36a
 * all copies or substantial portions of the Software.
Packit 0af36a
 *
Packit 0af36a
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
Packit 0af36a
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
Packit 0af36a
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
Packit 0af36a
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
Packit 0af36a
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
Packit 0af36a
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
Packit 0af36a
 * DEALINGS IN THE SOFTWARE.
Packit 0af36a
 *
Packit 0af36a
 * This file is part of libpsl.
Packit 0af36a
 *
Packit 0af36a
 * Using the libpsl functions via command line
Packit 0af36a
 *
Packit 0af36a
 * Changelog
Packit 0af36a
 * 11.04.2014  Tim Ruehsen  created
Packit 0af36a
 *
Packit 0af36a
 */
Packit 0af36a
Packit 0af36a
#if HAVE_CONFIG_H
Packit 0af36a
# include <config.h>
Packit 0af36a
#endif
Packit 0af36a
Packit 0af36a
#ifdef HAVE_UNISTD_H
Packit 0af36a
# include <unistd.h>
Packit 0af36a
#endif
Packit 0af36a
Packit 0af36a
#include <stdlib.h>
Packit 0af36a
#include <string.h>
Packit 0af36a
#include <ctype.h>
Packit 0af36a
#include <locale.h>
Packit 0af36a
Packit 0af36a
#include <libpsl.h>
Packit 0af36a
Packit 0af36a
static void usage(int err, FILE* f)
Packit 0af36a
{
Packit 0af36a
	fprintf(f, "Usage: psl [options] <domains...>\n");
Packit 0af36a
	fprintf(f, "\n");
Packit 0af36a
	fprintf(f, "Options:\n");
Packit 0af36a
	fprintf(f, "  --version                    show library version information\n");
Packit 0af36a
	fprintf(f, "  --use-latest-data            use the latest PSL data available [default]\n");
Packit 0af36a
	fprintf(f, "  --use-builtin-data           use the builtin PSL data\n");
Packit 0af36a
	fprintf(f, "  --no-star-rule               do not apply the prevailing star rule\n");
Packit 0af36a
	fprintf(f, "                                 (only applies to --is-public-suffix)\n");
Packit 0af36a
	fprintf(f, "  --load-psl-file <filename>   load PSL data from file\n");
Packit 0af36a
	fprintf(f, "  --is-public-suffix           check if domains are public suffixes [default]\n");
Packit 0af36a
	fprintf(f, "  --is-cookie-domain-acceptable <cookie-domain>\n");
Packit 0af36a
	fprintf(f, "                               check if cookie-domain is acceptable for domains\n");
Packit 0af36a
	fprintf(f, "  --print-unreg-domain         print the longest public suffix part\n");
Packit 0af36a
	fprintf(f, "  --print-reg-domain           print the shortest private suffix part\n");
Packit 0af36a
	fprintf(f, "  --print-info                 print info about library builtin data\n");
Packit 0af36a
	fprintf(f, "\n");
Packit 0af36a
Packit 0af36a
	exit(err);
Packit 0af36a
}
Packit 0af36a
Packit 0af36a
/* RFC 2822-compliant date format */
Packit 0af36a
static const char *time2str(time_t t)
Packit 0af36a
{
Packit 0af36a
	static char buf[64];
Packit 0af36a
	struct tm *tp = localtime(&t);
Packit 0af36a
Packit 0af36a
	strftime(buf, sizeof(buf), "%a, %d %b %Y %H:%M:%S %Z", tp);
Packit 0af36a
	return buf;
Packit 0af36a
}
Packit 0af36a
Packit 0af36a
int main(int argc, const char *const *argv)
Packit 0af36a
{
Packit 0af36a
	int mode = 1, no_star_rule = 0;
Packit 0af36a
	const char *const *arg, *psl_file = NULL, *cookie_domain = NULL;
Packit 0af36a
	psl_ctx_t *psl = (psl_ctx_t *) psl_latest(NULL);
Packit 0af36a
Packit 0af36a
	/* set current locale according to the environment variables */
Packit 0af36a
	setlocale(LC_ALL, "");
Packit 0af36a
Packit 0af36a
	for (arg = argv + 1; arg < argv + argc; arg++) {
Packit 0af36a
		if (!strncmp(*arg, "--", 2)) {
Packit 0af36a
			if (!strcmp(*arg, "--is-public-suffix"))
Packit 0af36a
				mode = 1;
Packit 0af36a
			else if (!strcmp(*arg, "--print-unreg-domain"))
Packit 0af36a
				mode = 2;
Packit 0af36a
			else if (!strcmp(*arg, "--print-reg-domain"))
Packit 0af36a
				mode = 3;
Packit 0af36a
			else if (!strcmp(*arg, "--print-info"))
Packit 0af36a
				mode = 99;
Packit 0af36a
			else if (!strcmp(*arg, "--is-cookie-domain-acceptable") && arg < argv + argc - 1) {
Packit 0af36a
				mode = 4;
Packit 0af36a
				cookie_domain = *(++arg);
Packit 0af36a
			}
Packit 0af36a
			else if (!strcmp(*arg, "--use-latest-data")) {
Packit 0af36a
				psl_free(psl);
Packit 0af36a
				if (psl_file) {
Packit 0af36a
					fprintf(stderr, "Dropped data from %s\n", psl_file);
Packit 0af36a
					psl_file = NULL;
Packit 0af36a
				}
Packit 0af36a
				if (!(psl = (psl_ctx_t *) psl_latest(NULL)))
Packit 0af36a
					printf("No PSL data available\n");
Packit 0af36a
			}
Packit 0af36a
			else if (!strcmp(*arg, "--use-builtin-data")) {
Packit 0af36a
				psl_free(psl);
Packit 0af36a
				if (psl_file) {
Packit 0af36a
					fprintf(stderr, "Dropped data from %s\n", psl_file);
Packit 0af36a
					psl_file = NULL;
Packit 0af36a
				}
Packit 0af36a
				if (!(psl = (psl_ctx_t *) psl_builtin()))
Packit 0af36a
					printf("No builtin PSL data available\n");
Packit 0af36a
			}
Packit 0af36a
			else if (!strcmp(*arg, "--no-star-rule")) {
Packit 0af36a
				no_star_rule = 1;
Packit 0af36a
			}
Packit 0af36a
			else if (!strcmp(*arg, "--load-psl-file") && arg < argv + argc - 1) {
Packit 0af36a
				psl_free(psl);
Packit 0af36a
				if (psl_file) {
Packit 0af36a
					fprintf(stderr, "Dropped data from %s\n", psl_file);
Packit 0af36a
					psl_file = NULL;
Packit 0af36a
				}
Packit 0af36a
				if (!(psl = psl_load_file(psl_file = *(++arg)))) {
Packit 0af36a
					fprintf(stderr, "Failed to load PSL data from %s\n\n", psl_file);
Packit 0af36a
					psl_file = NULL;
Packit 0af36a
				}
Packit 0af36a
			}
Packit 0af36a
			else if (!strcmp(*arg, "--help")) {
Packit 0af36a
				fprintf(stdout, "`psl' explores the Public Suffix List\n\n");
Packit 0af36a
				usage(0, stdout);
Packit 0af36a
			}
Packit 0af36a
			else if (!strcmp(*arg, "--version")) {
Packit 0af36a
				printf("psl %s (0x%06x)\n", PACKAGE_VERSION, psl_check_version_number(0));
Packit 0af36a
				printf("libpsl %s\n", psl_get_version());
Packit 0af36a
				printf("\n");
Packit 0af36a
				printf("Copyright (C) 2014-2018 Tim Ruehsen\n");
Packit 0af36a
				printf("License: MIT\n");
Packit 0af36a
				exit(0);
Packit 0af36a
			}
Packit 0af36a
			else if (!strcmp(*arg, "--")) {
Packit 0af36a
				arg++;
Packit 0af36a
				break;
Packit 0af36a
			}
Packit 0af36a
			else {
Packit 0af36a
				fprintf(stderr, "Unknown option '%s'\n", *arg);
Packit 0af36a
				usage(1, stderr);
Packit 0af36a
			}
Packit 0af36a
		} else
Packit 0af36a
			break;
Packit 0af36a
	}
Packit 0af36a
Packit 0af36a
	if (mode != 99) {
Packit 0af36a
		if (mode != 1 && no_star_rule) {
Packit 0af36a
			fprintf(stderr, "--no-star-rule only combines with --is-public-suffix\n");
Packit 0af36a
			usage(1, stderr);
Packit 0af36a
		}
Packit 0af36a
		if (!psl) {
Packit 0af36a
			fprintf(stderr, "No PSL data available - aborting\n");
Packit 0af36a
			exit(2);
Packit 0af36a
		}
Packit 0af36a
		if (arg >= argv + argc) {
Packit 0af36a
			char buf[256], *domain, *lower;
Packit 0af36a
			size_t len;
Packit 0af36a
			psl_error_t rc;
Packit 0af36a
Packit 0af36a
			/* read URLs from STDIN */
Packit 0af36a
			while (fgets(buf, sizeof(buf), stdin)) {
Packit 0af36a
				for (domain = buf; isspace(*domain); domain++); /* skip leading spaces */
Packit 0af36a
				if (*domain == '#' || !*domain) continue; /* skip empty lines and comments */
Packit 0af36a
				for (len = strlen(domain); len && isspace(domain[len - 1]); len--); /* skip trailing spaces */
Packit 0af36a
				domain[len] = 0;
Packit 0af36a
Packit 0af36a
				if ((rc = psl_str_to_utf8lower(domain, NULL, NULL, &lower)) != PSL_SUCCESS)
Packit 0af36a
					fprintf(stderr, "%s: Failed to convert to lowercase UTF-8 (%d)\n", domain, rc);
Packit 0af36a
				else if (mode == 1) {
Packit 0af36a
					if (no_star_rule)
Packit 0af36a
						printf("%s: %d (%s)\n", domain, psl_is_public_suffix2(psl, lower, PSL_TYPE_ANY|PSL_TYPE_NO_STAR_RULE), lower);
Packit 0af36a
					else
Packit 0af36a
						printf("%s: %d (%s)\n", domain, psl_is_public_suffix(psl, lower), lower);
Packit 0af36a
				}
Packit 0af36a
				else if (mode == 2)
Packit 0af36a
					printf("%s: %s\n", domain, psl_unregistrable_domain(psl, lower));
Packit 0af36a
				else if (mode == 3)
Packit 0af36a
					printf("%s: %s\n", domain, psl_registrable_domain(psl, lower));
Packit 0af36a
				else if (mode == 4) {
Packit 0af36a
					char *cookie_domain_lower;
Packit 0af36a
Packit 0af36a
					if ((rc = psl_str_to_utf8lower(domain, NULL, NULL, &cookie_domain_lower)) == PSL_SUCCESS) {
Packit 0af36a
						printf("%s: %d\n", domain, psl_is_cookie_domain_acceptable(psl, lower, cookie_domain));
Packit 0af36a
						free(cookie_domain_lower);
Packit 0af36a
					} else
Packit 0af36a
						fprintf(stderr, "%s: Failed to convert cookie domain '%s' to lowercase UTF-8 (%d)\n", domain, cookie_domain, rc);
Packit 0af36a
				}
Packit 0af36a
Packit 0af36a
				if (rc == PSL_SUCCESS)
Packit 0af36a
					psl_free_string(lower);
Packit 0af36a
			}
Packit 0af36a
Packit 0af36a
			psl_free(psl);
Packit 0af36a
			exit(0);
Packit 0af36a
		}
Packit 0af36a
	}
Packit 0af36a
Packit 0af36a
	if (mode == 1) {
Packit 0af36a
		for (; arg < argv + argc; arg++) {
Packit 0af36a
			if (no_star_rule)
Packit 0af36a
				printf("%s: %d\n", *arg, psl_is_public_suffix2(psl, *arg, PSL_TYPE_ANY|PSL_TYPE_NO_STAR_RULE));
Packit 0af36a
			else
Packit 0af36a
				printf("%s: %d\n", *arg, psl_is_public_suffix(psl, *arg));
Packit 0af36a
		}
Packit 0af36a
	}
Packit 0af36a
	else if (mode == 2) {
Packit 0af36a
		for (; arg < argv + argc; arg++)
Packit 0af36a
			printf("%s: %s\n", *arg, psl_unregistrable_domain(psl, *arg));
Packit 0af36a
	}
Packit 0af36a
	else if (mode == 3) {
Packit 0af36a
		for (; arg < argv + argc; arg++)
Packit 0af36a
			printf("%s: %s\n", *arg, psl_registrable_domain(psl, *arg));
Packit 0af36a
	}
Packit 0af36a
	else if (mode == 4) {
Packit 0af36a
		for (; arg < argv + argc; arg++)
Packit 0af36a
			printf("%s: %d\n", *arg, psl_is_cookie_domain_acceptable(psl, *arg, cookie_domain));
Packit 0af36a
	}
Packit 0af36a
	else if (mode == 99) {
Packit 0af36a
		printf("dist filename: %s\n", psl_dist_filename());
Packit 0af36a
Packit 0af36a
		if (psl && psl != psl_builtin()) {
Packit 0af36a
			static char not_avail[] = "- information not available -";
Packit 0af36a
			int n;
Packit 0af36a
Packit 0af36a
			if ((n = psl_suffix_count(psl)) >= 0)
Packit 0af36a
				printf("suffixes: %d\n", n);
Packit 0af36a
			else
Packit 0af36a
				printf("suffixes: %s\n", not_avail);
Packit 0af36a
Packit 0af36a
			if ((n = psl_suffix_exception_count(psl)) >= 0)
Packit 0af36a
				printf("exceptions: %d\n", n);
Packit 0af36a
			else
Packit 0af36a
				printf("exceptions: %s\n", not_avail);
Packit 0af36a
Packit 0af36a
			if ((n = psl_suffix_wildcard_count(psl)) >= 0)
Packit 0af36a
				printf("wildcards: %d\n", n);
Packit 0af36a
			else
Packit 0af36a
				printf("wildcards: %s\n", not_avail);
Packit 0af36a
		}
Packit 0af36a
Packit 0af36a
		psl_free(psl);
Packit 0af36a
		psl = (psl_ctx_t *) psl_builtin();
Packit 0af36a
Packit 0af36a
		if (psl) {
Packit 0af36a
			printf("builtin suffixes: %d\n", psl_suffix_count(psl));
Packit 0af36a
			printf("builtin exceptions: %d\n", psl_suffix_exception_count(psl));
Packit 0af36a
			printf("builtin wildcards: %d\n", psl_suffix_wildcard_count(psl));
Packit 0af36a
			printf("builtin filename: %s\n", psl_builtin_filename());
Packit 0af36a
			printf("builtin file time: %ld (%s)\n", psl_builtin_file_time(), time2str(psl_builtin_file_time()));
Packit 0af36a
			printf("builtin SHA1 file hash: %s\n", psl_builtin_sha1sum());
Packit 0af36a
			printf("builtin outdated: %d\n", psl_builtin_outdated());
Packit 0af36a
		} else
Packit 0af36a
			printf("No builtin PSL data available\n");
Packit 0af36a
	}
Packit 0af36a
Packit 0af36a
	psl_free(psl);
Packit 0af36a
Packit 0af36a
	return 0;
Packit 0af36a
}