|
Packit Service |
39273c |
"""
|
|
Packit Service |
39273c |
Module for fetching files via HTTP and FTP. Directly or over SSL (HTTPS) with
|
|
Packit Service |
39273c |
server certificate validation.
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
"""
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
import re
|
|
Packit Service |
39273c |
import os
|
|
Packit Service |
39273c |
import os.path
|
|
Packit Service |
39273c |
import pycurl
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
from pyanaconda.core.configuration.anaconda import conf
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
from org_fedora_oscap import utils
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
import logging
|
|
Packit Service |
39273c |
log = logging.getLogger("anaconda")
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
# everything else should be private
|
|
Packit Service |
39273c |
__all__ = ["fetch_data", "can_fetch_from"]
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
# prefixes of the URLs that need network connection
|
|
Packit Service |
39273c |
NET_URL_PREFIXES = ("http", "https", "ftp")
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
# prefixes of the URLs that may not need network connection
|
|
Packit Service |
39273c |
LOCAL_URL_PREFIXES = ("file",)
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
# TODO: needs improvements
|
|
Packit Service |
39273c |
HTTP_URL_RE_STR = r"(https?)://(.*)"
|
|
Packit Service |
39273c |
HTTP_URL_RE = re.compile(HTTP_URL_RE_STR)
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
FTP_URL_RE_STR = r"(ftp)://(.*)"
|
|
Packit Service |
39273c |
FTP_URL_RE = re.compile(FTP_URL_RE_STR)
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
FILE_URL_RE_STR = r"(file)://(.*)"
|
|
Packit Service |
39273c |
FILE_URL_RE = re.compile(FILE_URL_RE_STR)
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
class DataFetchError(Exception):
|
|
Packit Service |
39273c |
"""Parent class for the exception classes defined in this module."""
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
pass
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
class CertificateValidationError(DataFetchError):
|
|
Packit Service |
39273c |
"""Class for the certificate validation related errors."""
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
pass
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
class WrongRequestError(DataFetchError):
|
|
Packit Service |
39273c |
"""Class for the wrong combination of parameters errors."""
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
pass
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
class UnknownURLformatError(DataFetchError):
|
|
Packit Service |
39273c |
"""Class for invalid URL cases."""
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
pass
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
class FetchError(DataFetchError):
|
|
Packit Service |
39273c |
"""
|
|
Packit Service |
39273c |
Class for the errors when fetching data. Usually due to I/O errors.
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
"""
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
pass
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
def can_fetch_from(url):
|
|
Packit Service |
39273c |
"""
|
|
Packit Service |
39273c |
Function telling whether the fetch_data function understands the type of
|
|
Packit Service |
39273c |
given URL or not.
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
:param url: URL
|
|
Packit Service |
39273c |
:type url: str
|
|
Packit Service |
39273c |
:return: whether the type of the URL is supported or not
|
|
Packit Service |
39273c |
:rtype: str
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
"""
|
|
Packit Service |
39273c |
resources = NET_URL_PREFIXES + LOCAL_URL_PREFIXES
|
|
Packit Service |
39273c |
return any(url.startswith(prefix) for prefix in resources)
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
def fetch_data(url, out_file, ca_certs=None):
|
|
Packit Service |
39273c |
"""
|
|
Packit Service |
39273c |
Fetch data from a given URL. If the URL starts with https://, ca_certs can
|
|
Packit Service |
39273c |
be a path to PEM file with CA certificate chain to validate server
|
|
Packit Service |
39273c |
certificate.
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
:param url: URL of the data
|
|
Packit Service |
39273c |
:type url: str
|
|
Packit Service |
39273c |
:param out_file: path to the output file
|
|
Packit Service |
39273c |
:type out_file: str
|
|
Packit Service |
39273c |
:param ca_certs: path to a PEM file with CA certificate chain
|
|
Packit Service |
39273c |
:type ca_certs: str
|
|
Packit Service |
39273c |
:raise WrongRequestError: if a wrong combination of arguments is passed
|
|
Packit Service |
39273c |
(ca_certs file path given and url starting with
|
|
Packit Service |
39273c |
http://) or arguments don't have required format
|
|
Packit Service |
39273c |
:raise CertificateValidationError: if server certificate validation fails
|
|
Packit Service |
39273c |
:raise FetchError: if data fetching fails (usually due to I/O errors)
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
"""
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
# create the directory for the out_file if it doesn't exist
|
|
Packit Service |
39273c |
out_dir = os.path.dirname(out_file)
|
|
Packit Service |
39273c |
utils.ensure_dir_exists(out_dir)
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
if can_fetch_from(url):
|
|
Packit Service |
39273c |
_curl_fetch(url, out_file, ca_certs)
|
|
Packit Service |
39273c |
else:
|
|
Packit Service |
39273c |
msg = "Cannot fetch data from '%s': unknown URL format" % url
|
|
Packit Service |
39273c |
raise UnknownURLformatError(msg)
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
def _curl_fetch(url, out_file, ca_certs=None):
|
|
Packit Service |
39273c |
"""
|
|
Packit Service |
39273c |
Function that fetches data and writes it out to the given file path. If a
|
|
Packit Service |
39273c |
path to the file with CA certificates is given and the url starts with
|
|
Packit Service |
39273c |
'https', the server certificate is validated.
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
:param url: url of the data that has to start with 'http://' or "https://"
|
|
Packit Service |
39273c |
:type url: str
|
|
Packit Service |
39273c |
:param out_file: path to the output file
|
|
Packit Service |
39273c |
:type out_file: str
|
|
Packit Service |
39273c |
:param ca_certs: path to the file with CA certificates for server
|
|
Packit Service |
39273c |
certificate validation
|
|
Packit Service |
39273c |
:type ca_certs: str
|
|
Packit Service |
39273c |
:raise WrongRequestError: if a wrong combination of arguments is passed
|
|
Packit Service |
39273c |
(ca_certs file path given and url starting with
|
|
Packit Service |
39273c |
http://) or arguments don't have required format
|
|
Packit Service |
39273c |
:raise CertificateValidationError: if server certificate validation fails
|
|
Packit Service |
39273c |
:raise FetchError: if data fetching fails (usually due to I/O errors)
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
"""
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
if url.startswith("ftp"):
|
|
Packit Service |
39273c |
match = FTP_URL_RE.match(url)
|
|
Packit Service |
39273c |
if not match:
|
|
Packit Service |
39273c |
msg = "Wrong url not matching '%s'" % FTP_URL_RE_STR
|
|
Packit Service |
39273c |
raise WrongRequestError(msg)
|
|
Packit Service |
39273c |
else:
|
|
Packit Service |
39273c |
protocol, path = match.groups()
|
|
Packit Service |
39273c |
if '@' not in path:
|
|
Packit Service |
39273c |
# no user:pass given -> use anonymous login to the FTP server
|
|
Packit Service |
39273c |
url = protocol + "://anonymous:@" + path
|
|
Packit Service |
39273c |
elif url.startswith("file"):
|
|
Packit Service |
39273c |
match = FILE_URL_RE.match(url)
|
|
Packit Service |
39273c |
if not match:
|
|
Packit Service |
39273c |
msg = "Wrong url not matching '%s'" % FILE_URL_RE_STR
|
|
Packit Service |
39273c |
raise WrongRequestError(msg)
|
|
Packit Service |
39273c |
else:
|
|
Packit Service |
39273c |
match = HTTP_URL_RE.match(url)
|
|
Packit Service |
39273c |
if not match:
|
|
Packit Service |
39273c |
msg = "Wrong url not matching '%s'" % HTTP_URL_RE_STR
|
|
Packit Service |
39273c |
raise WrongRequestError(msg)
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
# the first group contains the protocol, the second one the rest
|
|
Packit Service |
39273c |
protocol = match.groups()[0]
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
if not out_file:
|
|
Packit Service |
39273c |
raise WrongRequestError("out_file cannot be an empty string")
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
if ca_certs and protocol != "https":
|
|
Packit Service |
39273c |
msg = "Cannot verify server certificate when using plain HTTP"
|
|
Packit Service |
39273c |
raise WrongRequestError(msg)
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
curl = pycurl.Curl()
|
|
Packit Service |
39273c |
curl.setopt(pycurl.URL, url)
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
if ca_certs and protocol == "https":
|
|
Packit Service |
39273c |
# the strictest verification
|
|
Packit Service |
39273c |
curl.setopt(pycurl.SSL_VERIFYHOST, 2)
|
|
Packit Service |
39273c |
curl.setopt(pycurl.SSL_VERIFYPEER, 1)
|
|
Packit Service |
39273c |
curl.setopt(pycurl.CAINFO, ca_certs)
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
# may be turned off by flags (specified on command line, take precedence)
|
|
Packit Service |
39273c |
if not conf.payload.verify_ssl:
|
|
Packit Service |
39273c |
log.warning("Disabling SSL verification due to the noverifyssl flag")
|
|
Packit Service |
39273c |
curl.setopt(pycurl.SSL_VERIFYHOST, 0)
|
|
Packit Service |
39273c |
curl.setopt(pycurl.SSL_VERIFYPEER, 0)
|
|
Packit Service |
39273c |
|
|
Packit Service |
39273c |
try:
|
|
Packit Service |
39273c |
with open(out_file, "wb") as fobj:
|
|
Packit Service |
39273c |
curl.setopt(pycurl.WRITEDATA, fobj)
|
|
Packit Service |
39273c |
curl.perform()
|
|
Packit Service |
39273c |
except pycurl.error as err:
|
|
Packit Service |
39273c |
# first arg is the error code
|
|
Packit Service |
39273c |
if err.args[0] == pycurl.E_SSL_CACERT:
|
|
Packit Service |
39273c |
msg = "Failed to connect to server and validate its "\
|
|
Packit Service |
39273c |
"certificate: %s" % err
|
|
Packit Service |
39273c |
raise CertificateValidationError(msg)
|
|
Packit Service |
39273c |
else:
|
|
Packit Service |
39273c |
msg = "Failed to fetch data: %s" % err
|
|
Packit Service |
39273c |
raise FetchError(msg)
|