tools: Rewrite `test-locked-requirements` to be more performant.

This commit modifies `test-locked-requirements` to use some caching so that we don't need to use the `update-locked-requirements` tool everytime for checking the validity of locked requirements as it is slow. Fixes: #6969.
2017-10-28 17:12:45 +00:00 · 2017-10-28 17:12:45 +00:00 · 5111aeac41
parent 40fe6239c5
commit 5111aeac41
3 changed files with 93 additions and 27 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -33,6 +33,7 @@ cache:
    - $HOME/zulip-npm-cache
    - $HOME/zulip-emoji-cache
    - $HOME/node
+    - $HOME/misc
 env:
  global:
    - BOTO_CONFIG=/tmp/nowhere
--- a/tools/test-locked-requirements
+++ b/tools/test-locked-requirements
@ -1,24 +1,27 @@
 #!/usr/bin/env python3
 import difflib
 import filecmp
+import glob
+import hashlib
 import os
 import subprocess
 import sys
+import ujson

-from typing import Text
+from typing import Optional, List, Tuple

-TOOLS_DIR = os.path.dirname(__file__)
+TOOLS_DIR = os.path.abspath(os.path.dirname(__file__))
 ZULIP_PATH = os.path.dirname(TOOLS_DIR)
-sys.path.append(ZULIP_PATH)
-from scripts.lib.zulip_tools import run
-
-PROD_LOCK_FILE = os.path.join(ZULIP_PATH, 'requirements', 'prod.txt')
-DEV_LOCK_FILE = os.path.join(ZULIP_PATH, 'requirements', 'dev.txt')
-TEST_PROD_LOCK_FILE = '/var/tmp/test_prod.txt'
-TEST_DEV_LOCK_FILE = '/var/tmp/test_dev.txt'
+REQS_DIR = os.path.join(ZULIP_PATH, 'requirements')
+CACHE_DIR = os.path.join(ZULIP_PATH, 'var', 'tmp')
+TMP_DIR = '/var/tmp'
+if 'TRAVIS' in os.environ:
+    CACHE_DIR = os.path.join(os.environ['HOME'], 'misc')
+CACHE_FILE = os.path.join(CACHE_DIR, 'requirements_hashes')
+LOCKED_REQS_FILE_NAMES = ['dev.txt', 'docs.txt', 'mypy.txt', 'prod.txt', 'thumbor.txt']

 def print_diff(path_file1, path_file2):
-    # type: (Text, Text) -> None
+    # type: (str, str) -> None
    with open(path_file1) as file1:
        with open(path_file2) as file2:
            diff = difflib.unified_diff(
@ -30,30 +33,92 @@ def print_diff(path_file1, path_file2):
    for line in diff:
        print(line)

-def main():
-    # type: () -> None
+def test_locked_requirements():
+    # type: () -> bool
    # `pip-compile` tries to avoid unnecessarily updating recursive dependencies
    # if lock files are present already. If we don't copy these files to the tmp
    # dir then recursive dependencies will get updated to their latest version
    # without any change in the input requirements file and the test will not pass.
-    run(['cp', PROD_LOCK_FILE, TEST_PROD_LOCK_FILE])
-    run(['cp', DEV_LOCK_FILE, TEST_DEV_LOCK_FILE])
-    subprocess.check_call([os.path.join(TOOLS_DIR, 'update-locked-requirements'),
-                           '--prod', TEST_PROD_LOCK_FILE, '--dev', TEST_DEV_LOCK_FILE,
-                           ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    for fn in LOCKED_REQS_FILE_NAMES:
+        locked_file = os.path.join(REQS_DIR, fn)
+        test_locked_file = os.path.join(TMP_DIR, fn)
+        subprocess.check_call(['cp', locked_file, test_locked_file])
+    subprocess.check_call([os.path.join(TOOLS_DIR, 'update-locked-requirements'), '--output-dir', TMP_DIR],
+                          stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

-    same = filecmp.cmp(TEST_PROD_LOCK_FILE, PROD_LOCK_FILE, shallow=False)
-    same = same or filecmp.cmp(TEST_DEV_LOCK_FILE, DEV_LOCK_FILE, shallow=False)
+    same = True
+    for fn in LOCKED_REQS_FILE_NAMES:
+        locked_file = os.path.join(REQS_DIR, fn)
+        test_locked_file = os.path.join(TMP_DIR, fn)
+        same = same and filecmp.cmp(test_locked_file, locked_file, shallow=False)

-    if not same:
-        print_diff(TEST_PROD_LOCK_FILE, PROD_LOCK_FILE)
-        print_diff(TEST_DEV_LOCK_FILE, DEV_LOCK_FILE)
+    return same
+
+def get_requirements_hash(use_test_lock_files=False):
+    # type: (Optional[bool]) -> str
+    sha1 = hashlib.sha1()
+    reqs_files = glob.glob(os.path.join(ZULIP_PATH, "requirements", "*.in"))
+    lock_files_path = REQS_DIR
+    if use_test_lock_files:
+        lock_files_path = TMP_DIR
+    reqs_files.extend([os.path.join(lock_files_path, fn) for fn in LOCKED_REQS_FILE_NAMES])
+    for file_path in reqs_files:
+        with open(file_path) as fp:
+            sha1.update(fp.read().encode("utf-8"))
+    return sha1.hexdigest()
+
+def may_be_setup_cache():
+    # type: () -> None
+    if not os.path.exists(CACHE_DIR):
+        subprocess.check_call(['mkdir', CACHE_DIR])
+    if not os.path.exists(CACHE_FILE):
+        with open(CACHE_FILE, 'w') as fp:
+            ujson.dump([], fp)
+
+def load_cache():
+    # type: () -> List[str]
+    with open(CACHE_FILE) as fp:
+        hash_list = ujson.load(fp)
+    return hash_list
+
+def update_cache(hash_list):
+    # type: (List[str]) -> None
+    # We store last 100 hash entries. Aggressive caching is
+    # not a problem as it is cheap to do.
+    if len(hash_list) > 100:
+        hash_list = hash_list[-100:]
+    with open(CACHE_FILE, 'w') as fp:
+        ujson.dump(hash_list, fp)
+
+def main():
+    # type: () -> None
+    may_be_setup_cache()
+    hash_list = load_cache()
+    curr_hash = get_requirements_hash()
+
+    if curr_hash in hash_list:
+        # We have already checked this set of requirements and they
+        # were consistent so no need to check again.
+        return
+
+    requirements_are_consistent = test_locked_requirements()
+
+    # Cache the hash so that we need not to run the `update_locked_requirements`
+    # tool again for checking this set of requirements.
+    valid_hash = get_requirements_hash(use_test_lock_files=True)
+    hash_list.append(valid_hash)
+    update_cache(hash_list)
+    if not requirements_are_consistent:
+        for fn in LOCKED_REQS_FILE_NAMES:
+            locked_file = os.path.join(REQS_DIR, fn)
+            test_locked_file = os.path.join(TMP_DIR, fn)
+            print_diff(test_locked_file, locked_file)
        # Flush the output to ensure we print the error at the end.
        sys.stdout.flush()
-        raise Exception("Seems like you have updated some python dependencies but haven't "
-                        "run updated requirements lock files. Please update them by running "
-                        "`tools/update-locked-requirements`. For more information please refer "
-                        "to `requirements/README.md`.")
+        raise Exception("It looks like you have updated some python dependencies but haven't "
+                        "updated locked requirements files. Please update them by running "
+                        "`tools/update-locked-requirements`. For more information please "
+                        "refer to `requirements/README.md`.")

 if __name__ == '__main__':
    main()
--- a/tools/travis/backend
+++ b/tools/travis/backend
@ -20,7 +20,7 @@ set -x
 ./tools/test-documentation
 ./tools/test-help-documentation
 ./tools/test-api
-#./tools/test-locked-requirements  # Disabled in CI because slow
+./tools/test-locked-requirements
 #./tools/test-run-dev  # Disabled in CI because flaky.
 #./tools/test-queue-worker-reload  # Disabled in CI because flaky.