Blame org_fedora_oscap/data_fetch.py

Packit 792a06
"""
Packit 792a06
Module for fetching files via HTTP and FTP. Directly or over SSL (HTTPS) with
Packit 792a06
server certificate validation.
Packit 792a06
Packit 792a06
"""
Packit 792a06
Packit 792a06
import re
Packit 792a06
import os
Packit 792a06
import os.path
Packit 792a06
import pycurl
Packit 792a06
Packit 792a06
from pyanaconda.core.configuration.anaconda import conf
Packit 792a06
Packit 792a06
from org_fedora_oscap import utils
Packit 792a06
Packit 792a06
import logging
Packit 792a06
log = logging.getLogger("anaconda")
Packit 792a06
Packit 792a06
Packit 792a06
# everything else should be private
Packit 792a06
__all__ = ["fetch_data", "can_fetch_from"]
Packit 792a06
Packit 792a06
# prefixes of the URLs that need network connection
Packit 792a06
NET_URL_PREFIXES = ("http", "https", "ftp")
Packit 792a06
Packit 792a06
# prefixes of the URLs that may not need network connection
Packit 792a06
LOCAL_URL_PREFIXES = ("file",)
Packit 792a06
Packit 792a06
# TODO: needs improvements
Packit 792a06
HTTP_URL_RE_STR = r"(https?)://(.*)"
Packit 792a06
HTTP_URL_RE = re.compile(HTTP_URL_RE_STR)
Packit 792a06
Packit 792a06
FTP_URL_RE_STR = r"(ftp)://(.*)"
Packit 792a06
FTP_URL_RE = re.compile(FTP_URL_RE_STR)
Packit 792a06
Packit 792a06
FILE_URL_RE_STR = r"(file)://(.*)"
Packit 792a06
FILE_URL_RE = re.compile(FILE_URL_RE_STR)
Packit 792a06
Packit 792a06
Packit 792a06
class DataFetchError(Exception):
Packit 792a06
    """Parent class for the exception classes defined in this module."""
Packit 792a06
Packit 792a06
    pass
Packit 792a06
Packit 792a06
Packit 792a06
class CertificateValidationError(DataFetchError):
Packit 792a06
    """Class for the certificate validation related errors."""
Packit 792a06
Packit 792a06
    pass
Packit 792a06
Packit 792a06
Packit 792a06
class WrongRequestError(DataFetchError):
Packit 792a06
    """Class for the wrong combination of parameters errors."""
Packit 792a06
Packit 792a06
    pass
Packit 792a06
Packit 792a06
Packit 792a06
class UnknownURLformatError(DataFetchError):
Packit 792a06
    """Class for invalid URL cases."""
Packit 792a06
Packit 792a06
    pass
Packit 792a06
Packit 792a06
Packit 792a06
class FetchError(DataFetchError):
Packit 792a06
    """
Packit 792a06
    Class for the errors when fetching data. Usually due to I/O errors.
Packit 792a06
Packit 792a06
    """
Packit 792a06
Packit 792a06
    pass
Packit 792a06
Packit 792a06
Packit 792a06
def can_fetch_from(url):
Packit 792a06
    """
Packit 792a06
    Function telling whether the fetch_data function understands the type of
Packit 792a06
    given URL or not.
Packit 792a06
Packit 792a06
    :param url: URL
Packit 792a06
    :type url: str
Packit 792a06
    :return: whether the type of the URL is supported or not
Packit 792a06
    :rtype: str
Packit 792a06
Packit 792a06
    """
Packit 792a06
    resources = NET_URL_PREFIXES + LOCAL_URL_PREFIXES
Packit 792a06
    return any(url.startswith(prefix) for prefix in resources)
Packit 792a06
Packit 792a06
Packit 792a06
def fetch_data(url, out_file, ca_certs=None):
Packit 792a06
    """
Packit 792a06
    Fetch data from a given URL. If the URL starts with https://, ca_certs can
Packit 792a06
    be a path to PEM file with CA certificate chain to validate server
Packit 792a06
    certificate.
Packit 792a06
Packit 792a06
    :param url: URL of the data
Packit 792a06
    :type url: str
Packit 792a06
    :param out_file: path to the output file
Packit 792a06
    :type out_file: str
Packit 792a06
    :param ca_certs: path to a PEM file with CA certificate chain
Packit 792a06
    :type ca_certs: str
Packit 792a06
    :raise WrongRequestError: if a wrong combination of arguments is passed
Packit 792a06
                              (ca_certs file path given and url starting with
Packit 792a06
                              http://) or arguments don't have required format
Packit 792a06
    :raise CertificateValidationError: if server certificate validation fails
Packit 792a06
    :raise FetchError: if data fetching fails (usually due to I/O errors)
Packit 792a06
Packit 792a06
    """
Packit 792a06
Packit 792a06
    # create the directory for the out_file if it doesn't exist
Packit 792a06
    out_dir = os.path.dirname(out_file)
Packit 792a06
    utils.ensure_dir_exists(out_dir)
Packit 792a06
Packit 792a06
    if can_fetch_from(url):
Packit 792a06
        _curl_fetch(url, out_file, ca_certs)
Packit 792a06
    else:
Packit 792a06
        msg = "Cannot fetch data from '%s': unknown URL format" % url
Packit 792a06
        raise UnknownURLformatError(msg)
Packit 792a06
Packit 792a06
Packit 792a06
def _curl_fetch(url, out_file, ca_certs=None):
Packit 792a06
    """
Packit 792a06
    Function that fetches data and writes it out to the given file path. If a
Packit 792a06
    path to the file with CA certificates is given and the url starts with
Packit 792a06
    'https', the server certificate is validated.
Packit 792a06
Packit 792a06
    :param url: url of the data that has to start with 'http://' or "https://"
Packit 792a06
    :type url: str
Packit 792a06
    :param out_file: path to the output file
Packit 792a06
    :type out_file: str
Packit 792a06
    :param ca_certs: path to the file with CA certificates for server
Packit 792a06
                     certificate validation
Packit 792a06
    :type ca_certs: str
Packit 792a06
    :raise WrongRequestError: if a wrong combination of arguments is passed
Packit 792a06
                              (ca_certs file path given and url starting with
Packit 792a06
                              http://) or arguments don't have required format
Packit 792a06
    :raise CertificateValidationError: if server certificate validation fails
Packit 792a06
    :raise FetchError: if data fetching fails (usually due to I/O errors)
Packit 792a06
Packit 792a06
    """
Packit 792a06
Packit 792a06
    if url.startswith("ftp"):
Packit 792a06
        match = FTP_URL_RE.match(url)
Packit 792a06
        if not match:
Packit 792a06
            msg = "Wrong url not matching '%s'" % FTP_URL_RE_STR
Packit 792a06
            raise WrongRequestError(msg)
Packit 792a06
        else:
Packit 792a06
            protocol, path = match.groups()
Packit 792a06
            if '@' not in path:
Packit 792a06
                # no user:pass given -> use anonymous login to the FTP server
Packit 792a06
                url = protocol + "://anonymous:@" + path
Packit 792a06
    elif url.startswith("file"):
Packit 792a06
        match = FILE_URL_RE.match(url)
Packit 792a06
        if not match:
Packit 792a06
            msg = "Wrong url not matching '%s'" % FILE_URL_RE_STR
Packit 792a06
            raise WrongRequestError(msg)
Packit 792a06
    else:
Packit 792a06
        match = HTTP_URL_RE.match(url)
Packit 792a06
        if not match:
Packit 792a06
            msg = "Wrong url not matching '%s'" % HTTP_URL_RE_STR
Packit 792a06
            raise WrongRequestError(msg)
Packit 792a06
Packit 792a06
    # the first group contains the protocol, the second one the rest
Packit 792a06
    protocol = match.groups()[0]
Packit 792a06
Packit 792a06
    if not out_file:
Packit 792a06
        raise WrongRequestError("out_file cannot be an empty string")
Packit 792a06
Packit 792a06
    if ca_certs and protocol != "https":
Packit 792a06
        msg = "Cannot verify server certificate when using plain HTTP"
Packit 792a06
        raise WrongRequestError(msg)
Packit 792a06
Packit 792a06
    curl = pycurl.Curl()
Packit 792a06
    curl.setopt(pycurl.URL, url)
Packit 792a06
Packit 792a06
    if ca_certs and protocol == "https":
Packit 792a06
        # the strictest verification
Packit 792a06
        curl.setopt(pycurl.SSL_VERIFYHOST, 2)
Packit 792a06
        curl.setopt(pycurl.SSL_VERIFYPEER, 1)
Packit 792a06
        curl.setopt(pycurl.CAINFO, ca_certs)
Packit 792a06
Packit 792a06
    # may be turned off by flags (specified on command line, take precedence)
Packit 792a06
    if not conf.payload.verify_ssl:
Packit 792a06
        log.warning("Disabling SSL verification due to the noverifyssl flag")
Packit 792a06
        curl.setopt(pycurl.SSL_VERIFYHOST, 0)
Packit 792a06
        curl.setopt(pycurl.SSL_VERIFYPEER, 0)
Packit 792a06
Packit 792a06
    try:
Packit 792a06
        with open(out_file, "wb") as fobj:
Packit 792a06
            curl.setopt(pycurl.WRITEDATA, fobj)
Packit 792a06
            curl.perform()
Packit 792a06
    except pycurl.error as err:
Packit 792a06
        # first arg is the error code
Packit 792a06
        if err.args[0] == pycurl.E_SSL_CACERT:
Packit 792a06
            msg = "Failed to connect to server and validate its "\
Packit 792a06
                  "certificate: %s" % err
Packit 792a06
            raise CertificateValidationError(msg)
Packit 792a06
        else:
Packit 792a06
            msg = "Failed to fetch data: %s" % err
Packit 792a06
            raise FetchError(msg)