mirror of https://github.com/zulip/zulip.git
Optimize incremental virtualenv creation.
This adds a new system for copying packages from old virtualenvs that are sufficiently similar to the new virtualenv required. In practice, this results in a huge performance improvement for re-provisioning Zulip development environments when the requirements files have changed (which is the dominant performance problem with provision today). Fixes: #1507.
This commit is contained in:
parent
27e4ed126f
commit
e3078b226a
|
@ -156,3 +156,6 @@ polib==1.0.7
|
|||
# We're using this version from git to ensure compatibility with
|
||||
# Jinja2==2.8 (see https://github.com/jorgebastida/glue/pull/211).
|
||||
git+https://github.com/lorenzogil/glue@01c00cd33b9b78ea868300c266c16acd59a81bfc#egg=glue==0.11.1
|
||||
|
||||
# Needed for cloning virtual environments
|
||||
git+https://github.com/umairwaheed/virtualenv-clone.git@short-version#egg=virtualenv-clone==0.2.6
|
||||
|
|
|
@ -5,6 +5,7 @@ import sys
|
|||
from os.path import dirname, abspath
|
||||
import subprocess
|
||||
from scripts.lib.zulip_tools import run
|
||||
from scripts.lib.hash_reqs import expand_reqs
|
||||
|
||||
ZULIP_PATH = dirname(dirname(dirname(abspath(__file__))))
|
||||
VENV_CACHE_PATH = "/srv/zulip-venv-cache"
|
||||
|
@ -15,7 +16,7 @@ if 'TRAVIS' in os.environ:
|
|||
|
||||
if False:
|
||||
# Don't add a runtime dependency on typing
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, Tuple, Set
|
||||
|
||||
VENV_DEPENDENCIES = [
|
||||
"build-essential",
|
||||
|
@ -34,6 +35,136 @@ VENV_DEPENDENCIES = [
|
|||
"libpq-dev", # Needed by psycopg2
|
||||
]
|
||||
|
||||
def get_index_filename(venv_path):
|
||||
# type: (str) -> str
|
||||
return os.path.join(venv_path, 'package_index')
|
||||
|
||||
def get_package_names(requirements_file):
|
||||
# type: (str) -> List[str]
|
||||
packages = expand_reqs(requirements_file)
|
||||
cleaned = []
|
||||
operators = ['~=', '==', '!=', '<', '>']
|
||||
for package in packages:
|
||||
for operator in operators:
|
||||
if operator in package:
|
||||
package = package.split(operator)[0]
|
||||
|
||||
package = package.strip()
|
||||
if package:
|
||||
cleaned.append(package.lower())
|
||||
|
||||
return sorted(cleaned)
|
||||
|
||||
def create_requirements_index_file(venv_path, requirements_file):
|
||||
# type: (str, str) -> str
|
||||
"""
|
||||
Creates a file, called package_index, in the virtual environment
|
||||
directory that contains all the PIP packages installed in the
|
||||
virtual environment. This file is used to determine the packages
|
||||
that can be copied to a new virtual environment.
|
||||
"""
|
||||
index_filename = get_index_filename(venv_path)
|
||||
packages = get_package_names(requirements_file)
|
||||
with open(index_filename, 'w') as writer:
|
||||
writer.write('\n'.join(packages))
|
||||
writer.write('\n')
|
||||
|
||||
return index_filename
|
||||
|
||||
def get_venv_packages(venv_path):
|
||||
# type: (str) -> Set[str]
|
||||
"""
|
||||
Returns the packages installed in the virtual environment using the
|
||||
package index file.
|
||||
"""
|
||||
with open(get_index_filename(venv_path)) as reader:
|
||||
return set(p.strip() for p in reader.read().split('\n') if p.strip())
|
||||
|
||||
def try_to_copy_venv(venv_path, new_packages):
|
||||
# type: (str, Set[str]) -> bool
|
||||
"""
|
||||
Tries to copy packages from an old virtual environment in the cache
|
||||
to the new virtual environment. The algorithm works as follows:
|
||||
1. Find a virtual environment, v, from the cache that has the
|
||||
highest overlap with the new requirements such that:
|
||||
a. The new requirements only add to the packages of v.
|
||||
b. The new requirements only upgrade packages of v.
|
||||
2. Copy the contents of v to the new virtual environment using
|
||||
virtualenv-clone.
|
||||
3. Delete all .pyc files in the new virtual environment.
|
||||
"""
|
||||
venv_name = os.path.basename(venv_path)
|
||||
|
||||
overlaps = [] # type: List[Tuple[int, str, Set[str]]]
|
||||
old_packages = set() # type: Set[str]
|
||||
for sha1sum in os.listdir(VENV_CACHE_PATH):
|
||||
curr_venv_path = os.path.join(VENV_CACHE_PATH, sha1sum, venv_name)
|
||||
if (curr_venv_path == venv_path or
|
||||
not os.path.exists(get_index_filename(curr_venv_path))):
|
||||
continue
|
||||
|
||||
old_packages = get_venv_packages(curr_venv_path)
|
||||
# We only consider using using old virtualenvs that only
|
||||
# contain packages that we want in our new virtualenv.
|
||||
if not (old_packages - new_packages):
|
||||
overlap = new_packages & old_packages
|
||||
overlaps.append((len(overlap), curr_venv_path, overlap))
|
||||
|
||||
target_log = get_logfile_name(venv_path)
|
||||
source_venv_path = None
|
||||
if overlaps:
|
||||
# Here, we select the old virtualenv with the largest overlap
|
||||
overlaps = sorted(overlaps)
|
||||
_, source_venv_path, copied_packages = overlaps[-1]
|
||||
print('Copying packages from {}'.format(source_venv_path))
|
||||
clone_ve = "{}/bin/virtualenv-clone".format(source_venv_path)
|
||||
cmd = "sudo {exe} {source} {target}".format(exe=clone_ve,
|
||||
source=source_venv_path,
|
||||
target=venv_path).split()
|
||||
try:
|
||||
run(cmd)
|
||||
except Exception:
|
||||
# Virtualenv-clone is not installed. Install it and try running
|
||||
# the command again.
|
||||
run("{}/bin/pip install --no-deps virtualenv-clone".format(
|
||||
source_venv_path).split())
|
||||
run(cmd)
|
||||
|
||||
run(["sudo", "chown", "-R",
|
||||
"{}:{}".format(os.getuid(), os.getgid()), venv_path])
|
||||
source_log = get_logfile_name(source_venv_path)
|
||||
copy_parent_log(source_log, target_log)
|
||||
create_log_entry(target_log, source_venv_path, copied_packages,
|
||||
new_packages - copied_packages)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def get_logfile_name(venv_path):
|
||||
# type: (str) -> str
|
||||
return "{}/setup-venv.log".format(venv_path)
|
||||
|
||||
def create_log_entry(target_log, parent, copied_packages, new_packages):
|
||||
# type: (str, str, Set[str], Set[str]) -> None
|
||||
|
||||
venv_path = dirname(target_log)
|
||||
with open(target_log, 'a') as writer:
|
||||
writer.write("{}\n".format(venv_path))
|
||||
if copied_packages:
|
||||
writer.write(
|
||||
"Copied from {}:\n".format(parent))
|
||||
writer.write("\n".join('- {}'.format(p) for p in sorted(copied_packages)))
|
||||
writer.write("\n")
|
||||
|
||||
writer.write("New packages:\n")
|
||||
writer.write("\n".join('- {}'.format(p) for p in sorted(new_packages)))
|
||||
writer.write("\n\n")
|
||||
|
||||
def copy_parent_log(source_log, target_log):
|
||||
# type: (str, str) -> None
|
||||
if os.path.exists(source_log):
|
||||
run('cp {} {}'.format(source_log, target_log).split())
|
||||
|
||||
def do_patch_activate_script(venv_path):
|
||||
# type: (str) -> None
|
||||
"""
|
||||
|
@ -83,11 +214,18 @@ def do_setup_virtualenv(venv_path, requirements_file, virtualenv_args):
|
|||
# type: (str, str, List[str]) -> None
|
||||
|
||||
# Setup Python virtualenv
|
||||
run(["sudo", "rm", "-rf", venv_path])
|
||||
run(["sudo", "mkdir", "-p", venv_path])
|
||||
run(["sudo", "chown", "{}:{}".format(os.getuid(), os.getgid()), venv_path])
|
||||
run(["virtualenv"] + virtualenv_args + [venv_path])
|
||||
new_packages = set(get_package_names(requirements_file))
|
||||
|
||||
run(["sudo", "rm", "-rf", venv_path])
|
||||
if not try_to_copy_venv(venv_path, new_packages):
|
||||
# Create new virtualenv.
|
||||
run(["sudo", "mkdir", "-p", venv_path])
|
||||
run(["sudo", "virtualenv"] + virtualenv_args + [venv_path])
|
||||
run(["sudo", "chown", "-R",
|
||||
"{}:{}".format(os.getuid(), os.getgid()), venv_path])
|
||||
create_log_entry(get_logfile_name(venv_path), "", set(), new_packages)
|
||||
|
||||
create_requirements_index_file(venv_path, requirements_file)
|
||||
# Switch current Python context to the virtualenv.
|
||||
activate_this = os.path.join(venv_path, "bin", "activate_this.py")
|
||||
exec(open(activate_this).read(), {}, dict(__file__=activate_this)) # type: ignore # https://github.com/python/mypy/issues/1577
|
||||
|
|
Loading…
Reference in New Issue