import os, re, sys, subprocess
import tarfile
from distutils import log, version
from contextlib import closing
from ftplib import FTP
try:
from urlparse import urljoin, unquote, urlparse
from urllib import urlretrieve, urlopen, urlcleanup
except ImportError:
from urllib.parse import urljoin, unquote, urlparse
from urllib.request import urlretrieve, urlopen, urlcleanup
multi_make_options = []
try:
import multiprocessing
cpus = multiprocessing.cpu_count()
if cpus > 1:
if cpus > 5:
cpus = 5
multi_make_options = ['-j%d' % (cpus+1)]
except:
pass
# use pre-built libraries on Windows
def download_and_extract_windows_binaries(destdir):
url = "https://github.com/mhils/libxml2-win-binaries/releases"
filenames = list(_list_dir_urllib(url))
release_path = "/download/%s/" % find_max_version(
"library release", filenames, re.compile(r"/releases/tag/([0-9.]+[0-9])$"))
url += release_path
filenames = [
filename.rsplit('/', 1)[1]
for filename in filenames
if release_path in filename
]
arch = "win64" if sys.maxsize > 2**32 else "win32"
if sys.version_info < (3, 5):
arch = 'vs2008.' + arch
libs = {}
for libname in ['libxml2', 'libxslt', 'zlib', 'iconv']:
libs[libname] = "%s-%s.%s.zip" % (
libname,
find_max_version(libname, filenames),
arch,
)
if not os.path.exists(destdir):
os.makedirs(destdir)
for libname, libfn in libs.items():
srcfile = urljoin(url, libfn)
destfile = os.path.join(destdir, libfn)
if os.path.exists(destfile + ".keep"):
print('Using local copy of "{}"'.format(srcfile))
else:
print('Retrieving "%s" to "%s"' % (srcfile, destfile))
urlcleanup() # work around FTP bug 27973 in Py2.7.12+
urlretrieve(srcfile, destfile)
d = unpack_zipfile(destfile, destdir)
libs[libname] = d
return libs
def find_top_dir_of_zipfile(zipfile):
topdir = None
files = [f.filename for f in zipfile.filelist]
dirs = [d for d in files if d.endswith('/')]
if dirs:
dirs.sort(key=len)
topdir = dirs[0]
topdir = topdir[:topdir.index("/")+1]
for path in files:
if not path.startswith(topdir):
topdir = None
break
assert topdir, (
"cannot determine single top-level directory in zip file %s" %
zipfile.filename)
return topdir.rstrip('/')
def unpack_zipfile(zipfn, destdir):
assert zipfn.endswith('.zip')
import zipfile
print('Unpacking %s into %s' % (os.path.basename(zipfn), destdir))
f = zipfile.ZipFile(zipfn)
try:
extracted_dir = os.path.join(destdir, find_top_dir_of_zipfile(f))
f.extractall(path=destdir)
finally:
f.close()
assert os.path.exists(extracted_dir), 'missing: %s' % extracted_dir
return extracted_dir
def get_prebuilt_libxml2xslt(download_dir, static_include_dirs, static_library_dirs):
assert sys.platform.startswith('win')
libs = download_and_extract_windows_binaries(download_dir)
for libname, path in libs.items():
i = os.path.join(path, 'include')
l = os.path.join(path, 'lib')
assert os.path.exists(i), 'does not exist: %s' % i
assert os.path.exists(l), 'does not exist: %s' % l
static_include_dirs.append(i)
static_library_dirs.append(l)
## Routines to download and build libxml2/xslt from sources:
LIBXML2_LOCATION = 'ftp://xmlsoft.org/libxml2/'
LIBICONV_LOCATION = 'ftp://ftp.gnu.org/pub/gnu/libiconv/'
ZLIB_LOCATION = 'http://zlib.net/'
match_libfile_version = re.compile('^[^-]*-([.0-9-]+)[.].*').match
def _find_content_encoding(response, default='iso8859-1'):
from email.message import Message
content_type = response.headers.get('Content-Type')
if content_type:
msg = Message()
msg.add_header('Content-Type', content_type)
charset = msg.get_content_charset(default)
else:
charset = default
return charset
def remote_listdir(url):
try:
return _list_dir_urllib(url)
except IOError:
assert url.lower().startswith('ftp://')
print("Requesting with urllib failed. Falling back to ftplib. Proxy argument will be ignored")
return _list_dir_ftplib(url)
def _list_dir_ftplib(url):
parts = urlparse(url)
ftp = FTP(parts.netloc)
try:
ftp.login()
ftp.cwd(parts.path)
data = []
ftp.dir(data.append)
finally:
ftp.quit()
return parse_text_ftplist("\n".join(data))
def _list_dir_urllib(url):
with closing(urlopen(url)) as res:
charset = _find_content_encoding(res)
content_type = res.headers.get('Content-Type')
data = res.read()
data = data.decode(charset)
if content_type and content_type.startswith('text/html'):
files = parse_html_filelist(data)
else:
files = parse_text_ftplist(data)
return files
def http_listfiles(url, re_pattern):
with closing(urlopen(url)) as res:
charset = _find_content_encoding(res)
data = res.read()
files = re.findall(re_pattern, data.decode(charset))
return files
def parse_text_ftplist(s):
for line in s.splitlines():
if not line.startswith('d'):
# -rw-r--r-- 1 ftp ftp 476 Sep 1 2011 md5sum.txt
# Last (9th) element is 'md5sum.txt' in the above example, but there
# may be variations, so we discard only the first 8 entries.
yield line.split(None, 8)[-1]
def parse_html_filelist(s):
re_href = re.compile(
r'<a\s+(?:[^>]*\s+)?href=["\']([^;?"\']+?)[;?"\']',
re.I|re.M)
links = set(re_href.findall(s))
for link in links:
if not link.endswith('/'):
yield unquote(link)
def tryint(s):
try:
return int(s)
except ValueError:
return s
def download_libxml2(dest_dir, version=None):
"""Downloads libxml2, returning the filename where the library was downloaded"""
version_re = re.compile(r'LATEST_LIBXML2_IS_([0-9.]+[0-9])')
filename = 'libxml2-%s.tar.gz'
return download_library(dest_dir, LIBXML2_LOCATION, 'libxml2',
version_re, filename, version=version)
def download_libxslt(dest_dir, version=None):
"""Downloads libxslt, returning the filename where the library was downloaded"""
version_re = re.compile(r'LATEST_LIBXSLT_IS_([0-9.]+[0-9])')
filename = 'libxslt-%s.tar.gz'
return download_library(dest_dir, LIBXML2_LOCATION, 'libxslt',
version_re, filename, version=version)
def download_libiconv(dest_dir, version=None):
"""Downloads libiconv, returning the filename where the library was downloaded"""
version_re = re.compile(r'^libiconv-([0-9.]+[0-9]).tar.gz$')
filename = 'libiconv-%s.tar.gz'
return download_library(dest_dir, LIBICONV_LOCATION, 'libiconv',
version_re, filename, version=version)
def download_zlib(dest_dir, version):
"""Downloads zlib, returning the filename where the library was downloaded"""
version_re = re.compile(r'zlib-([0-9.]+[0-9]).tar.gz')
filename = 'zlib-%s.tar.gz'
return download_library(dest_dir, ZLIB_LOCATION, 'zlib',
version_re, filename, version=version)
def find_max_version(libname, filenames, version_re=None):
if version_re is None:
version_re = re.compile(r'%s-([0-9.]+[0-9])' % libname)
versions = []
for fn in filenames:
match = version_re.search(fn)
if match:
version_string = match.group(1)
versions.append((tuple(map(tryint, version_string.split('.'))),
version_string))
if not versions:
raise Exception(
"Could not find the most current version of %s from the files: %s" % (
libname, filenames))
versions.sort()
version_string = versions[-1][-1]
print('Latest version of %s is %s' % (libname, version_string))
return version_string
def download_library(dest_dir, location, name, version_re, filename, version=None):
if version is None:
try:
if location.startswith('ftp://'):
fns = remote_listdir(location)
else:
fns = http_listfiles(location, filename.replace('%s', '(?:[0-9.]+[0-9])'))
version = find_max_version(name, fns, version_re)
except IOError:
# network failure - maybe we have the files already?
latest = (0,0,0)
fns = os.listdir(dest_dir)
for fn in fns:
if fn.startswith(name+'-'):
match = match_libfile_version(fn)
if match:
version_tuple = tuple(map(tryint, match.group(1).split('.')))
if version_tuple > latest:
latest = version_tuple
filename = fn
version = None
if latest == (0,0,0):
raise
if version:
filename = filename % version
full_url = urljoin(location, filename)
dest_filename = os.path.join(dest_dir, filename)
if os.path.exists(dest_filename):
print(('Using existing %s downloaded into %s '
'(delete this file if you want to re-download the package)') % (
name, dest_filename))
else:
print('Downloading %s into %s from %s' % (name, dest_filename, full_url))
urlcleanup() # work around FTP bug 27973 in Py2.7.12
urlretrieve(full_url, dest_filename)
return dest_filename
def unpack_tarball(tar_filename, dest):
print('Unpacking %s into %s' % (os.path.basename(tar_filename), dest))
tar = tarfile.open(tar_filename)
base_dir = None
for member in tar:
base_name = member.name.split('/')[0]
if base_dir is None:
base_dir = base_name
elif base_dir != base_name:
print('Unexpected path in %s: %s' % (tar_filename, base_name))
tar.extractall(dest)
tar.close()
return os.path.join(dest, base_dir)
def call_subprocess(cmd, **kw):
import subprocess
cwd = kw.get('cwd', '.')
cmd_desc = ' '.join(cmd)
log.info('Running "%s" in %s' % (cmd_desc, cwd))
returncode = subprocess.call(cmd, **kw)
if returncode:
raise Exception('Command "%s" returned code %s' % (cmd_desc, returncode))
def safe_mkdir(dir):
if not os.path.exists(dir):
os.makedirs(dir)
def cmmi(configure_cmd, build_dir, multicore=None, **call_setup):
print('Starting build in %s' % build_dir)
call_subprocess(configure_cmd, cwd=build_dir, **call_setup)
if not multicore:
make_jobs = multi_make_options
elif int(multicore) > 1:
make_jobs = ['-j%s' % multicore]
else:
make_jobs = []
call_subprocess(
['make'] + make_jobs,
cwd=build_dir, **call_setup)
call_subprocess(
['make'] + make_jobs + ['install'],
cwd=build_dir, **call_setup)
def configure_darwin_env(env_setup):
import platform
# check target architectures on MacOS-X (ppc, i386, x86_64)
major_version, minor_version = tuple(map(int, platform.mac_ver()[0].split('.')[:2]))
if major_version > 7:
# Check to see if ppc is supported (XCode4 drops ppc support)
include_ppc = True
if os.path.exists('/usr/bin/xcodebuild'):
pipe = subprocess.Popen(['/usr/bin/xcodebuild', '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, _ = pipe.communicate()
xcode_version = (out.decode('utf8').splitlines() or [''])[0]
# Also parse only first digit, because 3.2.1 can't be parsed nicely
if (xcode_version.startswith('Xcode') and
version.StrictVersion(xcode_version.split()[1]) >= version.StrictVersion('4.0')):
include_ppc = False
arch_string = ""
if include_ppc:
arch_string = "-arch ppc "
if minor_version < 6:
env_default = {
'CFLAGS': arch_string + "-arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk -O2",
'LDFLAGS': arch_string + "-arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk",
'MACOSX_DEPLOYMENT_TARGET': "10.3"
}
else:
env_default = {
'CFLAGS': arch_string + "-arch i386 -arch x86_64 -O2",
'LDFLAGS': arch_string + "-arch i386 -arch x86_64",
'MACOSX_DEPLOYMENT_TARGET': "10.6"
}
env = os.environ.copy()
env_default.update(env)
env_setup['env'] = env_default
def build_libxml2xslt(download_dir, build_dir,
static_include_dirs, static_library_dirs,
static_cflags, static_binaries,
libxml2_version=None,
libxslt_version=None,
libiconv_version=None,
zlib_version=None,
multicore=None):
safe_mkdir(download_dir)
safe_mkdir(build_dir)
zlib_dir = unpack_tarball(download_zlib(download_dir, zlib_version), build_dir)
libiconv_dir = unpack_tarball(download_libiconv(download_dir, libiconv_version), build_dir)
libxml2_dir = unpack_tarball(download_libxml2(download_dir, libxml2_version), build_dir)
libxslt_dir = unpack_tarball(download_libxslt(download_dir, libxslt_version), build_dir)
prefix = os.path.join(os.path.abspath(build_dir), 'libxml2')
safe_mkdir(prefix)
call_setup = {}
if sys.platform == 'darwin':
configure_darwin_env(call_setup)
configure_cmd = ['./configure',
'--disable-dependency-tracking',
'--disable-shared',
'--prefix=%s' % prefix,
]
# build zlib
zlib_configure_cmd = [
'./configure',
'--prefix=%s' % prefix,
]
cmmi(zlib_configure_cmd, zlib_dir, multicore, **call_setup)
# build libiconv
cmmi(configure_cmd, libiconv_dir, multicore, **call_setup)
# build libxml2
libxml2_configure_cmd = configure_cmd + [
'--without-python',
'--with-iconv=%s' % prefix,
'--with-zlib=%s' % prefix,
]
if not libxml2_version:
libxml2_version = os.path.basename(libxml2_dir).split('-', 1)[-1]
if tuple(map(tryint, libxml2_version.split('-', 1)[0].split('.'))) >= (2, 9, 5):
libxml2_configure_cmd.append('--without-lzma') # can't currently build that
try:
if tuple(map(tryint, libxml2_version.split('-', 1)[0].split('.'))) >= (2, 7, 3):
libxml2_configure_cmd.append('--enable-rebuild-docs=no')
except Exception:
pass # this isn't required, so ignore any errors
cmmi(libxml2_configure_cmd, libxml2_dir, multicore, **call_setup)
# build libxslt
libxslt_configure_cmd = configure_cmd + [
'--without-python',
'--with-libxml-prefix=%s' % prefix,
]
if sys.platform in ('darwin',):
libxslt_configure_cmd += [
'--without-crypto',
]
cmmi(libxslt_configure_cmd, libxslt_dir, multicore, **call_setup)
# collect build setup for lxml
xslt_config = os.path.join(prefix, 'bin', 'xslt-config')
xml2_config = os.path.join(prefix, 'bin', 'xml2-config')
lib_dir = os.path.join(prefix, 'lib')
static_include_dirs.extend([
os.path.join(prefix, 'include'),
os.path.join(prefix, 'include', 'libxml2'),
os.path.join(prefix, 'include', 'libxslt'),
os.path.join(prefix, 'include', 'libexslt')])
static_library_dirs.append(lib_dir)
listdir = os.listdir(lib_dir)
static_binaries += [os.path.join(lib_dir, filename)
for lib in ['libxml2', 'libexslt', 'libxslt', 'iconv', 'libz']
for filename in listdir
if lib in filename and filename.endswith('.a')]
return (xml2_config, xslt_config)