You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

940 lines
29 KiB

from __future__ import absolute_import
import contextlib
import errno
import io
import locale
# we have a submodule named 'logging' which would shadow this if we used the
# regular name:
import logging as std_logging
import os
import posixpath
import re
import shutil
import stat
import subprocess
import sys
import tarfile
import zipfile
from collections import deque
from pip._vendor import pkg_resources
# NOTE: retrying is not annotated in typeshed as on 2017-07-17, which is
# why we ignore the type on this import.
from pip._vendor.retrying import retry # type: ignore
from pip._vendor.six import PY2
from pip._vendor.six.moves import input
from pip._vendor.six.moves.urllib import parse as urllib_parse
from pip._internal.exceptions import CommandError, InstallationError
from pip._internal.locations import (
running_under_virtualenv, site_packages, user_site, virtualenv_no_global,
write_delete_marker_file,
)
from pip._internal.utils.compat import (
WINDOWS, console_to_str, expanduser, stdlib_pkgs,
)
if PY2:
from io import BytesIO as StringIO
else:
from io import StringIO
__all__ = ['rmtree', 'display_path', 'backup_dir',
'ask', 'splitext',
'format_size', 'is_installable_dir',
'is_svn_page', 'file_contents',
'split_leading_dir', 'has_leading_dir',
'normalize_path',
'renames', 'get_prog',
'unzip_file', 'untar_file', 'unpack_file', 'call_subprocess',
'captured_stdout', 'ensure_dir',
'ARCHIVE_EXTENSIONS', 'SUPPORTED_EXTENSIONS',
'get_installed_version', 'remove_auth_from_url']
logger = std_logging.getLogger(__name__)
BZ2_EXTENSIONS = ('.tar.bz2', '.tbz')
XZ_EXTENSIONS = ('.tar.xz', '.txz', '.tlz', '.tar.lz', '.tar.lzma')
ZIP_EXTENSIONS = ('.zip', '.whl')
TAR_EXTENSIONS = ('.tar.gz', '.tgz', '.tar')
ARCHIVE_EXTENSIONS = (
ZIP_EXTENSIONS + BZ2_EXTENSIONS + TAR_EXTENSIONS + XZ_EXTENSIONS)
SUPPORTED_EXTENSIONS = ZIP_EXTENSIONS + TAR_EXTENSIONS
try:
import bz2 # noqa
SUPPORTED_EXTENSIONS += BZ2_EXTENSIONS
except ImportError:
logger.debug('bz2 module is not available')
try:
# Only for Python 3.3+
import lzma # noqa
SUPPORTED_EXTENSIONS += XZ_EXTENSIONS
except ImportError:
logger.debug('lzma module is not available')
def import_or_raise(pkg_or_module_string, ExceptionType, *args, **kwargs):
try:
return __import__(pkg_or_module_string)
except ImportError:
raise ExceptionType(*args, **kwargs)
def ensure_dir(path):
"""os.path.makedirs without EEXIST."""
try:
os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
def get_prog():
try:
prog = os.path.basename(sys.argv[0])
if prog in ('__main__.py', '-c'):
return "%s -m pip" % sys.executable
else:
return prog
except (AttributeError, TypeError, IndexError):
pass
return 'pip'
# Retry every half second for up to 3 seconds
@retry(stop_max_delay=3000, wait_fixed=500)
def rmtree(dir, ignore_errors=False):
shutil.rmtree(dir, ignore_errors=ignore_errors,
onerror=rmtree_errorhandler)
def rmtree_errorhandler(func, path, exc_info):
"""On Windows, the files in .svn are read-only, so when rmtree() tries to
remove them, an exception is thrown. We catch that here, remove the
read-only attribute, and hopefully continue without problems."""
# if file type currently read only
if os.stat(path).st_mode & stat.S_IREAD:
# convert to read/write
os.chmod(path, stat.S_IWRITE)
# use the original function to repeat the operation
func(path)
return
else:
raise
def display_path(path):
"""Gives the display value for a given path, making it relative to cwd
if possible."""
path = os.path.normcase(os.path.abspath(path))
if sys.version_info[0] == 2:
path = path.decode(sys.getfilesystemencoding(), 'replace')
path = path.encode(sys.getdefaultencoding(), 'replace')
if path.startswith(os.getcwd() + os.path.sep):
path = '.' + path[len(os.getcwd()):]
return path
def backup_dir(dir, ext='.bak'):
"""Figure out the name of a directory to back up the given dir to
(adding .bak, .bak2, etc)"""
n = 1
extension = ext
while os.path.exists(dir + extension):
n += 1
extension = ext + str(n)
return dir + extension
def ask_path_exists(message, options):
for action in os.environ.get('PIP_EXISTS_ACTION', '').split():
if action in options:
return action
return ask(message, options)
def ask(message, options):
"""Ask the message interactively, with the given possible responses"""
while 1:
if os.environ.get('PIP_NO_INPUT'):
raise Exception(
'No input was expected ($PIP_NO_INPUT set); question: %s' %
message
)
response = input(message)
response = response.strip().lower()
if response not in options:
print(
'Your response (%r) was not one of the expected responses: '
'%s' % (response, ', '.join(options))
)
else:
return response
def format_size(bytes):
if bytes > 1000 * 1000:
return '%.1fMB' % (bytes / 1000.0 / 1000)
elif bytes > 10 * 1000:
return '%ikB' % (bytes / 1000)
elif bytes > 1000:
return '%.1fkB' % (bytes / 1000.0)
else:
return '%ibytes' % bytes
def is_installable_dir(path):
"""Is path is a directory containing setup.py or pyproject.toml?
"""
if not os.path.isdir(path):
return False
setup_py = os.path.join(path, 'setup.py')
if os.path.isfile(setup_py):
return True
pyproject_toml = os.path.join(path, 'pyproject.toml')
if os.path.isfile(pyproject_toml):
return True
return False
def is_svn_page(html):
"""
Returns true if the page appears to be the index page of an svn repository
"""
return (re.search(r'<title>[^<]*Revision \d+:', html) and
re.search(r'Powered by (?:<a[^>]*?>)?Subversion', html, re.I))
def file_contents(filename):
with open(filename, 'rb') as fp:
return fp.read().decode('utf-8')
def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE):
"""Yield pieces of data from a file-like object until EOF."""
while True:
chunk = file.read(size)
if not chunk:
break
yield chunk
def split_leading_dir(path):
path = path.lstrip('/').lstrip('\\')
if '/' in path and (('\\' in path and path.find('/') < path.find('\\')) or
'\\' not in path):
return path.split('/', 1)
elif '\\' in path:
return path.split('\\', 1)
else:
return path, ''
def has_leading_dir(paths):
"""Returns true if all the paths have the same leading path name
(i.e., everything is in one subdirectory in an archive)"""
common_prefix = None
for path in paths:
prefix, rest = split_leading_dir(path)
if not prefix:
return False
elif common_prefix is None:
common_prefix = prefix
elif prefix != common_prefix:
return False
return True
def normalize_path(path, resolve_symlinks=True):
"""
Convert a path to its canonical, case-normalized, absolute version.
"""
path = expanduser(path)
if resolve_symlinks:
path = os.path.realpath(path)
else:
path = os.path.abspath(path)
return os.path.normcase(path)
def splitext(path):
"""Like os.path.splitext, but take off .tar too"""
base, ext = posixpath.splitext(path)
if base.lower().endswith('.tar'):
ext = base[-4:] + ext
base = base[:-4]
return base, ext
def renames(old, new):
"""Like os.renames(), but handles renaming across devices."""
# Implementation borrowed from os.renames().
head, tail = os.path.split(new)
if head and tail and not os.path.exists(head):
os.makedirs(head)
shutil.move(old, new)
head, tail = os.path.split(old)
if head and tail:
try:
os.removedirs(head)
except OSError:
pass
def is_local(path):
"""
Return True if path is within sys.prefix, if we're running in a virtualenv.
If we're not in a virtualenv, all paths are considered "local."
"""
if not running_under_virtualenv():
return True
return normalize_path(path).startswith(normalize_path(sys.prefix))
def dist_is_local(dist):
"""
Return True if given Distribution object is installed locally
(i.e. within current virtualenv).
Always True if we're not in a virtualenv.
"""
return is_local(dist_location(dist))
def dist_in_usersite(dist):
"""
Return True if given Distribution is installed in user site.
"""
norm_path = normalize_path(dist_location(dist))
return norm_path.startswith(normalize_path(user_site))
def dist_in_site_packages(dist):
"""
Return True if given Distribution is installed in
sysconfig.get_python_lib().
"""
return normalize_path(
dist_location(dist)
).startswith(normalize_path(site_packages))
def dist_is_editable(dist):
"""Is distribution an editable install?"""
for path_item in sys.path:
egg_link = os.path.join(path_item, dist.project_name + '.egg-link')
if os.path.isfile(egg_link):
return True
return False
def get_installed_distributions(local_only=True,
skip=stdlib_pkgs,
include_editables=True,
editables_only=False,
user_only=False):
"""
Return a list of installed Distribution objects.
If ``local_only`` is True (default), only return installations
local to the current virtualenv, if in a virtualenv.
``skip`` argument is an iterable of lower-case project names to
ignore; defaults to stdlib_pkgs
If ``include_editables`` is False, don't report editables.
If ``editables_only`` is True , only report editables.
If ``user_only`` is True , only report installations in the user
site directory.
"""
if local_only:
local_test = dist_is_local
else:
def local_test(d):
return True
if include_editables:
def editable_test(d):
return True
else:
def editable_test(d):
return not dist_is_editable(d)
if editables_only:
def editables_only_test(d):
return dist_is_editable(d)
else:
def editables_only_test(d):
return True
if user_only:
user_test = dist_in_usersite
else:
def user_test(d):
return True
return [d for d in pkg_resources.working_set
if local_test(d) and
d.key not in skip and
editable_test(d) and
editables_only_test(d) and
user_test(d)
]
def egg_link_path(dist):
"""
Return the path for the .egg-link file if it exists, otherwise, None.
There's 3 scenarios:
1) not in a virtualenv
try to find in site.USER_SITE, then site_packages
2) in a no-global virtualenv
try to find in site_packages
3) in a yes-global virtualenv
try to find in site_packages, then site.USER_SITE
(don't look in global location)
For #1 and #3, there could be odd cases, where there's an egg-link in 2
locations.
This method will just return the first one found.
"""
sites = []
if running_under_virtualenv():
if virtualenv_no_global():
sites.append(site_packages)
else:
sites.append(site_packages)
if user_site:
sites.append(user_site)
else:
if user_site:
sites.append(user_site)
sites.append(site_packages)
for site in sites:
egglink = os.path.join(site, dist.project_name) + '.egg-link'
if os.path.isfile(egglink):
return egglink
def dist_location(dist):
"""
Get the site-packages location of this distribution. Generally
this is dist.location, except in the case of develop-installed
packages, where dist.location is the source code location, and we
want to know where the egg-link file is.
"""
egg_link = egg_link_path(dist)
if egg_link:
return egg_link
return dist.location
def current_umask():
"""Get the current umask which involves having to set it temporarily."""
mask = os.umask(0)
os.umask(mask)
return mask
def unzip_file(filename, location, flatten=True):
"""
Unzip the file (with path `filename`) to the destination `location`. All
files are written based on system defaults and umask (i.e. permissions are
not preserved), except that regular file members with any execute
permissions (user, group, or world) have "chmod +x" applied after being
written. Note that for windows, any execute changes using os.chmod are
no-ops per the python docs.
"""
ensure_dir(location)
zipfp = open(filename, 'rb')
try:
zip = zipfile.ZipFile(zipfp, allowZip64=True)
leading = has_leading_dir(zip.namelist()) and flatten
for info in zip.infolist():
name = info.filename
data = zip.read(name)
fn = name
if leading:
fn = split_leading_dir(name)[1]
fn = os.path.join(location, fn)
dir = os.path.dirname(fn)
if fn.endswith('/') or fn.endswith('\\'):
# A directory
ensure_dir(fn)
else:
ensure_dir(dir)
fp = open(fn, 'wb')
try:
fp.write(data)
finally:
fp.close()
mode = info.external_attr >> 16
# if mode and regular file and any execute permissions for
# user/group/world?
if mode and stat.S_ISREG(mode) and mode & 0o111:
# make dest file have execute for user/group/world
# (chmod +x) no-op on windows per python docs
os.chmod(fn, (0o777 - current_umask() | 0o111))
finally:
zipfp.close()
def untar_file(filename, location):
"""
Untar the file (with path `filename`) to the destination `location`.
All files are written based on system defaults and umask (i.e. permissions
are not preserved), except that regular file members with any execute
permissions (user, group, or world) have "chmod +x" applied after being
written. Note that for windows, any execute changes using os.chmod are
no-ops per the python docs.
"""
ensure_dir(location)
if filename.lower().endswith('.gz') or filename.lower().endswith('.tgz'):
mode = 'r:gz'
elif filename.lower().endswith(BZ2_EXTENSIONS):
mode = 'r:bz2'
elif filename.lower().endswith(XZ_EXTENSIONS):
mode = 'r:xz'
elif filename.lower().endswith('.tar'):
mode = 'r'
else:
logger.warning(
'Cannot determine compression type for file %s', filename,
)
mode = 'r:*'
tar = tarfile.open(filename, mode)
try:
# note: python<=2.5 doesn't seem to know about pax headers, filter them
leading = has_leading_dir([
member.name for member in tar.getmembers()
if member.name != 'pax_global_header'
])
for member in tar.getmembers():
fn = member.name
if fn == 'pax_global_header':
continue
if leading:
fn = split_leading_dir(fn)[1]
path = os.path.join(location, fn)
if member.isdir():
ensure_dir(path)
elif member.issym():
try:
tar._extract_member(member, path)
except Exception as exc:
# Some corrupt tar files seem to produce this
# (specifically bad symlinks)
logger.warning(
'In the tar file %s the member %s is invalid: %s',
filename, member.name, exc,
)
continue
else:
try:
fp = tar.extractfile(member)
except (KeyError, AttributeError) as exc:
# Some corrupt tar files seem to produce this
# (specifically bad symlinks)
logger.warning(
'In the tar file %s the member %s is invalid: %s',
filename, member.name, exc,
)
continue
ensure_dir(os.path.dirname(path))
with open(path, 'wb') as destfp:
shutil.copyfileobj(fp, destfp)
fp.close()
# Update the timestamp (useful for cython compiled files)
tar.utime(member, path)
# member have any execute permissions for user/group/world?
if member.mode & 0o111:
# make dest file have execute for user/group/world
# no-op on windows per python docs
os.chmod(path, (0o777 - current_umask() | 0o111))
finally:
tar.close()
def unpack_file(filename, location, content_type, link):
filename = os.path.realpath(filename)
if (content_type == 'application/zip' or
filename.lower().endswith(ZIP_EXTENSIONS) or
zipfile.is_zipfile(filename)):
unzip_file(
filename,
location,
flatten=not filename.endswith('.whl')
)
elif (content_type == 'application/x-gzip' or
tarfile.is_tarfile(filename) or
filename.lower().endswith(
TAR_EXTENSIONS + BZ2_EXTENSIONS + XZ_EXTENSIONS)):
untar_file(filename, location)
elif (content_type and content_type.startswith('text/html') and
is_svn_page(file_contents(filename))):
# We don't really care about this
from pip._internal.vcs.subversion import Subversion
Subversion('svn+' + link.url).unpack(location)
else:
# FIXME: handle?
# FIXME: magic signatures?
logger.critical(
'Cannot unpack file %s (downloaded from %s, content-type: %s); '
'cannot detect archive format',
filename, location, content_type,
)
raise InstallationError(
'Cannot determine archive format of %s' % location
)
def call_subprocess(cmd, show_stdout=True, cwd=None,
on_returncode='raise',
command_desc=None,
extra_environ=None, unset_environ=None, spinner=None):
"""
Args:
unset_environ: an iterable of environment variable names to unset
prior to calling subprocess.Popen().
"""
if unset_environ is None:
unset_environ = []
# This function's handling of subprocess output is confusing and I
# previously broke it terribly, so as penance I will write a long comment
# explaining things.
#
# The obvious thing that affects output is the show_stdout=
# kwarg. show_stdout=True means, let the subprocess write directly to our
# stdout. Even though it is nominally the default, it is almost never used
# inside pip (and should not be used in new code without a very good
# reason); as of 2016-02-22 it is only used in a few places inside the VCS
# wrapper code. Ideally we should get rid of it entirely, because it
# creates a lot of complexity here for a rarely used feature.
#
# Most places in pip set show_stdout=False. What this means is:
# - We connect the child stdout to a pipe, which we read.
# - By default, we hide the output but show a spinner -- unless the
# subprocess exits with an error, in which case we show the output.
# - If the --verbose option was passed (= loglevel is DEBUG), then we show
# the output unconditionally. (But in this case we don't want to show
# the output a second time if it turns out that there was an error.)
#
# stderr is always merged with stdout (even if show_stdout=True).
if show_stdout:
stdout = None
else:
stdout = subprocess.PIPE
if command_desc is None:
cmd_parts = []
for part in cmd:
if ' ' in part or '\n' in part or '"' in part or "'" in part:
part = '"%s"' % part.replace('"', '\\"')
cmd_parts.append(part)
command_desc = ' '.join(cmd_parts)
logger.debug("Running command %s", command_desc)
env = os.environ.copy()
if extra_environ:
env.update(extra_environ)
for name in unset_environ:
env.pop(name, None)
try:
proc = subprocess.Popen(
cmd, stderr=subprocess.STDOUT, stdin=subprocess.PIPE,
stdout=stdout, cwd=cwd, env=env,
)
proc.stdin.close()
except Exception as exc:
logger.critical(
"Error %s while executing command %s", exc, command_desc,
)
raise
all_output = []
if stdout is not None:
while True:
line = console_to_str(proc.stdout.readline())
if not line:
break
line = line.rstrip()
all_output.append(line + '\n')
if logger.getEffectiveLevel() <= std_logging.DEBUG:
# Show the line immediately
logger.debug(line)
else:
# Update the spinner
if spinner is not None:
spinner.spin()
try:
proc.wait()
finally:
if proc.stdout:
proc.stdout.close()
if spinner is not None:
if proc.returncode:
spinner.finish("error")
else:
spinner.finish("done")
if proc.returncode:
if on_returncode == 'raise':
if (logger.getEffectiveLevel() > std_logging.DEBUG and
not show_stdout):
logger.info(
'Complete output from command %s:', command_desc,
)
logger.info(
''.join(all_output) +
'\n----------------------------------------'
)
raise InstallationError(
'Command "%s" failed with error code %s in %s'
% (command_desc, proc.returncode, cwd))
elif on_returncode == 'warn':
logger.warning(
'Command "%s" had error code %s in %s',
command_desc, proc.returncode, cwd,
)
elif on_returncode == 'ignore':
pass
else:
raise ValueError('Invalid value: on_returncode=%s' %
repr(on_returncode))
if not show_stdout:
return ''.join(all_output)
def read_text_file(filename):
"""Return the contents of *filename*.
Try to decode the file contents with utf-8, the preferred system encoding
(e.g., cp1252 on some Windows machines), and latin1, in that order.
Decoding a byte string with latin1 will never raise an error. In the worst
case, the returned string will contain some garbage characters.
"""
with open(filename, 'rb') as fp:
data = fp.read()
encodings = ['utf-8', locale.getpreferredencoding(False), 'latin1']
for enc in encodings:
try:
data = data.decode(enc)
except UnicodeDecodeError:
continue
break
assert type(data) != bytes # Latin1 should have worked.
return data
def _make_build_dir(build_dir):
os.makedirs(build_dir)
write_delete_marker_file(build_dir)
class FakeFile(object):
"""Wrap a list of lines in an object with readline() to make
ConfigParser happy."""
def __init__(self, lines):
self._gen = (l for l in lines)
def readline(self):
try:
try:
return next(self._gen)
except NameError:
return self._gen.next()
except StopIteration:
return ''
def __iter__(self):
return self._gen
class StreamWrapper(StringIO):
@classmethod
def from_stream(cls, orig_stream):
cls.orig_stream = orig_stream
return cls()
# compileall.compile_dir() needs stdout.encoding to print to stdout
@property
def encoding(self):
return self.orig_stream.encoding
@contextlib.contextmanager
def captured_output(stream_name):
"""Return a context manager used by captured_stdout/stdin/stderr
that temporarily replaces the sys stream *stream_name* with a StringIO.
Taken from Lib/support/__init__.py in the CPython repo.
"""
orig_stdout = getattr(sys, stream_name)
setattr(sys, stream_name, StreamWrapper.from_stream(orig_stdout))
try:
yield getattr(sys, stream_name)
finally:
setattr(sys, stream_name, orig_stdout)
def captured_stdout():
"""Capture the output of sys.stdout:
with captured_stdout() as stdout:
print('hello')
self.assertEqual(stdout.getvalue(), 'hello\n')
Taken from Lib/support/__init__.py in the CPython repo.
"""
return captured_output('stdout')
class cached_property(object):
"""A property that is only computed once per instance and then replaces
itself with an ordinary attribute. Deleting the attribute resets the
property.
Source: https://github.com/bottlepy/bottle/blob/0.11.5/bottle.py#L175
"""
def __init__(self, func):
self.__doc__ = getattr(func, '__doc__')
self.func = func
def __get__(self, obj, cls):
if obj is None:
# We're being accessed from the class itself, not from an object
return self
value = obj.__dict__[self.func.__name__] = self.func(obj)
return value
def get_installed_version(dist_name, working_set=None):
"""Get the installed version of dist_name avoiding pkg_resources cache"""
# Create a requirement that we'll look for inside of setuptools.
req = pkg_resources.Requirement.parse(dist_name)
if working_set is None:
# We want to avoid having this cached, so we need to construct a new
# working set each time.
working_set = pkg_resources.WorkingSet()
# Get the installed distribution from our working set
dist = working_set.find(req)
# Check to see if we got an installed distribution or not, if we did
# we want to return it's version.
return dist.version if dist else None
def consume(iterator):
"""Consume an iterable at C speed."""
deque(iterator, maxlen=0)
# Simulates an enum
def enum(*sequential, **named):
enums = dict(zip(sequential, range(len(sequential))), **named)
reverse = {value: key for key, value in enums.items()}
enums['reverse_mapping'] = reverse
return type('Enum', (), enums)
def make_vcs_requirement_url(repo_url, rev, egg_project_name, subdir=None):
"""
Return the URL for a VCS requirement.
Args:
repo_url: the remote VCS url, with any needed VCS prefix (e.g. "git+").
"""
req = '{}@{}#egg={}'.format(repo_url, rev, egg_project_name)
if subdir:
req += '&subdirectory={}'.format(subdir)
return req
def split_auth_from_netloc(netloc):
"""
Parse out and remove the auth information from a netloc.
Returns: (netloc, (username, password)).
"""
if '@' not in netloc:
return netloc, (None, None)
# Split from the right because that's how urllib.parse.urlsplit()
# behaves if more than one @ is present (which can be checked using
# the password attribute of urlsplit()'s return value).
auth, netloc = netloc.rsplit('@', 1)
if ':' in auth:
# Split from the left because that's how urllib.parse.urlsplit()
# behaves if more than one : is present (which again can be checked
# using the password attribute of the return value)
user_pass = tuple(auth.split(':', 1))
else:
user_pass = auth, None
return netloc, user_pass
def remove_auth_from_url(url):
# Return a copy of url with 'username:password@' removed.
# username/pass params are passed to subversion through flags
# and are not recognized in the url.
# parsed url
purl = urllib_parse.urlsplit(url)
netloc, user_pass = split_auth_from_netloc(purl.netloc)
# stripped url
url_pieces = (
purl.scheme, netloc, purl.path, purl.query, purl.fragment
)
surl = urllib_parse.urlunsplit(url_pieces)
return surl
def protect_pip_from_modification_on_windows(modifying_pip):
"""Protection of pip.exe from modification on Windows
On Windows, any operation modifying pip should be run as:
python -m pip ...
"""
pip_names = [
"pip.exe",
"pip{}.exe".format(sys.version_info[0]),
"pip{}.{}.exe".format(*sys.version_info[:2])
]
# See https://github.com/pypa/pip/issues/1299 for more discussion
should_show_use_python_msg = (
modifying_pip and
WINDOWS and
os.path.basename(sys.argv[0]) in pip_names
)
if should_show_use_python_msg:
new_command = [
sys.executable, "-m", "pip"
] + sys.argv[1:]
raise CommandError(
'To modify pip, please run the following command:\n{}'
.format(" ".join(new_command))
)