Blame org_fedora_oscap/data_fetch.py

Packit Service 39273c
"""
Packit Service 39273c
Module for fetching files via HTTP and FTP. Directly or over SSL (HTTPS) with
Packit Service 39273c
server certificate validation.
Packit Service 39273c
Packit Service 39273c
"""
Packit Service 39273c
Packit Service 39273c
import re
Packit Service 39273c
import os
Packit Service 39273c
import os.path
Packit Service 39273c
import pycurl
Packit Service 39273c
Packit Service 39273c
from pyanaconda.core.configuration.anaconda import conf
Packit Service 39273c
Packit Service 39273c
from org_fedora_oscap import utils
Packit Service 39273c
Packit Service 39273c
import logging
Packit Service 39273c
log = logging.getLogger("anaconda")
Packit Service 39273c
Packit Service 39273c
Packit Service 39273c
# everything else should be private
Packit Service 39273c
__all__ = ["fetch_data", "can_fetch_from"]
Packit Service 39273c
Packit Service 39273c
# prefixes of the URLs that need network connection
Packit Service 39273c
NET_URL_PREFIXES = ("http", "https", "ftp")
Packit Service 39273c
Packit Service 39273c
# prefixes of the URLs that may not need network connection
Packit Service 39273c
LOCAL_URL_PREFIXES = ("file",)
Packit Service 39273c
Packit Service 39273c
# TODO: needs improvements
Packit Service 39273c
HTTP_URL_RE_STR = r"(https?)://(.*)"
Packit Service 39273c
HTTP_URL_RE = re.compile(HTTP_URL_RE_STR)
Packit Service 39273c
Packit Service 39273c
FTP_URL_RE_STR = r"(ftp)://(.*)"
Packit Service 39273c
FTP_URL_RE = re.compile(FTP_URL_RE_STR)
Packit Service 39273c
Packit Service 39273c
FILE_URL_RE_STR = r"(file)://(.*)"
Packit Service 39273c
FILE_URL_RE = re.compile(FILE_URL_RE_STR)
Packit Service 39273c
Packit Service 39273c
Packit Service 39273c
class DataFetchError(Exception):
Packit Service 39273c
    """Parent class for the exception classes defined in this module."""
Packit Service 39273c
Packit Service 39273c
    pass
Packit Service 39273c
Packit Service 39273c
Packit Service 39273c
class CertificateValidationError(DataFetchError):
Packit Service 39273c
    """Class for the certificate validation related errors."""
Packit Service 39273c
Packit Service 39273c
    pass
Packit Service 39273c
Packit Service 39273c
Packit Service 39273c
class WrongRequestError(DataFetchError):
Packit Service 39273c
    """Class for the wrong combination of parameters errors."""
Packit Service 39273c
Packit Service 39273c
    pass
Packit Service 39273c
Packit Service 39273c
Packit Service 39273c
class UnknownURLformatError(DataFetchError):
Packit Service 39273c
    """Class for invalid URL cases."""
Packit Service 39273c
Packit Service 39273c
    pass
Packit Service 39273c
Packit Service 39273c
Packit Service 39273c
class FetchError(DataFetchError):
Packit Service 39273c
    """
Packit Service 39273c
    Class for the errors when fetching data. Usually due to I/O errors.
Packit Service 39273c
Packit Service 39273c
    """
Packit Service 39273c
Packit Service 39273c
    pass
Packit Service 39273c
Packit Service 39273c
Packit Service 39273c
def can_fetch_from(url):
Packit Service 39273c
    """
Packit Service 39273c
    Function telling whether the fetch_data function understands the type of
Packit Service 39273c
    given URL or not.
Packit Service 39273c
Packit Service 39273c
    :param url: URL
Packit Service 39273c
    :type url: str
Packit Service 39273c
    :return: whether the type of the URL is supported or not
Packit Service 39273c
    :rtype: str
Packit Service 39273c
Packit Service 39273c
    """
Packit Service 39273c
    resources = NET_URL_PREFIXES + LOCAL_URL_PREFIXES
Packit Service 39273c
    return any(url.startswith(prefix) for prefix in resources)
Packit Service 39273c
Packit Service 39273c
Packit Service 39273c
def fetch_data(url, out_file, ca_certs=None):
Packit Service 39273c
    """
Packit Service 39273c
    Fetch data from a given URL. If the URL starts with https://, ca_certs can
Packit Service 39273c
    be a path to PEM file with CA certificate chain to validate server
Packit Service 39273c
    certificate.
Packit Service 39273c
Packit Service 39273c
    :param url: URL of the data
Packit Service 39273c
    :type url: str
Packit Service 39273c
    :param out_file: path to the output file
Packit Service 39273c
    :type out_file: str
Packit Service 39273c
    :param ca_certs: path to a PEM file with CA certificate chain
Packit Service 39273c
    :type ca_certs: str
Packit Service 39273c
    :raise WrongRequestError: if a wrong combination of arguments is passed
Packit Service 39273c
                              (ca_certs file path given and url starting with
Packit Service 39273c
                              http://) or arguments don't have required format
Packit Service 39273c
    :raise CertificateValidationError: if server certificate validation fails
Packit Service 39273c
    :raise FetchError: if data fetching fails (usually due to I/O errors)
Packit Service 39273c
Packit Service 39273c
    """
Packit Service 39273c
Packit Service 39273c
    # create the directory for the out_file if it doesn't exist
Packit Service 39273c
    out_dir = os.path.dirname(out_file)
Packit Service 39273c
    utils.ensure_dir_exists(out_dir)
Packit Service 39273c
Packit Service 39273c
    if can_fetch_from(url):
Packit Service 39273c
        _curl_fetch(url, out_file, ca_certs)
Packit Service 39273c
    else:
Packit Service 39273c
        msg = "Cannot fetch data from '%s': unknown URL format" % url
Packit Service 39273c
        raise UnknownURLformatError(msg)
Packit Service 39273c
Packit Service 39273c
Packit Service 39273c
def _curl_fetch(url, out_file, ca_certs=None):
Packit Service 39273c
    """
Packit Service 39273c
    Function that fetches data and writes it out to the given file path. If a
Packit Service 39273c
    path to the file with CA certificates is given and the url starts with
Packit Service 39273c
    'https', the server certificate is validated.
Packit Service 39273c
Packit Service 39273c
    :param url: url of the data that has to start with 'http://' or "https://"
Packit Service 39273c
    :type url: str
Packit Service 39273c
    :param out_file: path to the output file
Packit Service 39273c
    :type out_file: str
Packit Service 39273c
    :param ca_certs: path to the file with CA certificates for server
Packit Service 39273c
                     certificate validation
Packit Service 39273c
    :type ca_certs: str
Packit Service 39273c
    :raise WrongRequestError: if a wrong combination of arguments is passed
Packit Service 39273c
                              (ca_certs file path given and url starting with
Packit Service 39273c
                              http://) or arguments don't have required format
Packit Service 39273c
    :raise CertificateValidationError: if server certificate validation fails
Packit Service 39273c
    :raise FetchError: if data fetching fails (usually due to I/O errors)
Packit Service 39273c
Packit Service 39273c
    """
Packit Service 39273c
Packit Service 39273c
    if url.startswith("ftp"):
Packit Service 39273c
        match = FTP_URL_RE.match(url)
Packit Service 39273c
        if not match:
Packit Service 39273c
            msg = "Wrong url not matching '%s'" % FTP_URL_RE_STR
Packit Service 39273c
            raise WrongRequestError(msg)
Packit Service 39273c
        else:
Packit Service 39273c
            protocol, path = match.groups()
Packit Service 39273c
            if '@' not in path:
Packit Service 39273c
                # no user:pass given -> use anonymous login to the FTP server
Packit Service 39273c
                url = protocol + "://anonymous:@" + path
Packit Service 39273c
    elif url.startswith("file"):
Packit Service 39273c
        match = FILE_URL_RE.match(url)
Packit Service 39273c
        if not match:
Packit Service 39273c
            msg = "Wrong url not matching '%s'" % FILE_URL_RE_STR
Packit Service 39273c
            raise WrongRequestError(msg)
Packit Service 39273c
    else:
Packit Service 39273c
        match = HTTP_URL_RE.match(url)
Packit Service 39273c
        if not match:
Packit Service 39273c
            msg = "Wrong url not matching '%s'" % HTTP_URL_RE_STR
Packit Service 39273c
            raise WrongRequestError(msg)
Packit Service 39273c
Packit Service 39273c
    # the first group contains the protocol, the second one the rest
Packit Service 39273c
    protocol = match.groups()[0]
Packit Service 39273c
Packit Service 39273c
    if not out_file:
Packit Service 39273c
        raise WrongRequestError("out_file cannot be an empty string")
Packit Service 39273c
Packit Service 39273c
    if ca_certs and protocol != "https":
Packit Service 39273c
        msg = "Cannot verify server certificate when using plain HTTP"
Packit Service 39273c
        raise WrongRequestError(msg)
Packit Service 39273c
Packit Service 39273c
    curl = pycurl.Curl()
Packit Service 39273c
    curl.setopt(pycurl.URL, url)
Packit Service 39273c
Packit Service 39273c
    if ca_certs and protocol == "https":
Packit Service 39273c
        # the strictest verification
Packit Service 39273c
        curl.setopt(pycurl.SSL_VERIFYHOST, 2)
Packit Service 39273c
        curl.setopt(pycurl.SSL_VERIFYPEER, 1)
Packit Service 39273c
        curl.setopt(pycurl.CAINFO, ca_certs)
Packit Service 39273c
Packit Service 39273c
    # may be turned off by flags (specified on command line, take precedence)
Packit Service 39273c
    if not conf.payload.verify_ssl:
Packit Service 39273c
        log.warning("Disabling SSL verification due to the noverifyssl flag")
Packit Service 39273c
        curl.setopt(pycurl.SSL_VERIFYHOST, 0)
Packit Service 39273c
        curl.setopt(pycurl.SSL_VERIFYPEER, 0)
Packit Service 39273c
Packit Service 39273c
    try:
Packit Service 39273c
        with open(out_file, "wb") as fobj:
Packit Service 39273c
            curl.setopt(pycurl.WRITEDATA, fobj)
Packit Service 39273c
            curl.perform()
Packit Service 39273c
    except pycurl.error as err:
Packit Service 39273c
        # first arg is the error code
Packit Service 39273c
        if err.args[0] == pycurl.E_SSL_CACERT:
Packit Service 39273c
            msg = "Failed to connect to server and validate its "\
Packit Service 39273c
                  "certificate: %s" % err
Packit Service 39273c
            raise CertificateValidationError(msg)
Packit Service 39273c
        else:
Packit Service 39273c
            msg = "Failed to fetch data: %s" % err
Packit Service 39273c
            raise FetchError(msg)