zulip/tools/test-locked-requirements

#!/usr/bin/env python3
import difflib
import filecmp
import glob
import hashlib
import os
import shutil
import subprocess
import sys
import tempfile

import orjson

TOOLS_DIR = os.path.abspath(os.path.dirname(__file__))
ZULIP_PATH = os.path.dirname(TOOLS_DIR)
REQS_DIR = os.path.join(ZULIP_PATH, "requirements")
CACHE_DIR = os.path.join(ZULIP_PATH, "var", "tmp")
CACHE_FILE = os.path.join(CACHE_DIR, "requirements_hashes")


def print_diff(path_file1: str, path_file2: str) -> None:
    with open(path_file1) as file1:
        with open(path_file2) as file2:
            diff = difflib.unified_diff(
                file1.readlines(),
                file2.readlines(),
                fromfile=path_file1,
                tofile=path_file2,
            )
    sys.stdout.writelines(diff)


def test_locked_requirements(tmp_dir: str) -> bool:
    # `pip-compile` tries to avoid unnecessarily updating recursive dependencies
    # if lock files are present already. If we don't copy these files to the tmp
    # dir then recursive dependencies will get updated to their latest version
    # without any change in the input requirements file and the test will not pass.
    for locked_file in glob.glob(os.path.join(REQS_DIR, "*.txt")):
        fn = os.path.basename(locked_file)
        locked_file = os.path.join(REQS_DIR, fn)
        test_locked_file = os.path.join(tmp_dir, fn)
        shutil.copyfile(locked_file, test_locked_file)
    subprocess.check_call(
        [os.path.join(TOOLS_DIR, "update-locked-requirements"), "--output-dir", tmp_dir]
    )

    same = True
    for test_locked_file in glob.glob(os.path.join(tmp_dir, "*.txt")):
        fn = os.path.basename(test_locked_file)
        locked_file = os.path.join(REQS_DIR, fn)
        same = same and filecmp.cmp(test_locked_file, locked_file, shallow=False)

    return same


def get_requirements_hash(tmp_dir: str, use_test_lock_files: bool = False) -> str:
    sha1 = hashlib.sha1()
    reqs_files = sorted(glob.glob(os.path.join(REQS_DIR, "*.in")))
    lock_files_path = REQS_DIR
    if use_test_lock_files:
        lock_files_path = tmp_dir
    reqs_files.extend(sorted(glob.glob(os.path.join(lock_files_path, "*.txt"))))
    for file_path in reqs_files:
        with open(file_path, "rb") as fp:
            sha1.update(fp.read())
    return sha1.hexdigest()


def maybe_set_up_cache() -> None:
    os.makedirs(CACHE_DIR, exist_ok=True)
    if not os.path.exists(CACHE_FILE):
        with open(CACHE_FILE, "wb") as fp:
            fp.write(orjson.dumps([]))


def load_cache() -> list[str]:
    with open(CACHE_FILE, "rb") as fp:
        hash_list = orjson.loads(fp.read())
    return hash_list


def update_cache(hash_list: list[str]) -> None:
    # We store last 100 hash entries. Aggressive caching is
    # not a problem as it is cheap to do.
    if len(hash_list) > 100:
        hash_list = hash_list[-100:]
    with open(CACHE_FILE, "wb") as fp:
        fp.write(orjson.dumps(hash_list))


def main() -> None:
    maybe_set_up_cache()
    hash_list = load_cache()
    tmp = tempfile.TemporaryDirectory()
    tmp_dir = tmp.name
    curr_hash = get_requirements_hash(tmp_dir)

    if curr_hash in hash_list:
        # We have already checked this set of requirements and they
        # were consistent so no need to check again.
        return

    requirements_are_consistent = test_locked_requirements(tmp_dir)

    # Cache the hash so that we need not to run the `update_locked_requirements`
    # tool again for checking this set of requirements.
    valid_hash = get_requirements_hash(tmp_dir, use_test_lock_files=True)
    update_cache([*(h for h in hash_list if h != valid_hash), valid_hash])
    if not requirements_are_consistent:
        for test_locked_file in glob.glob(os.path.join(tmp_dir, "*.txt")):
            fn = os.path.basename(test_locked_file)
            locked_file = os.path.join(REQS_DIR, fn)
            print_diff(locked_file, test_locked_file)
        # Flush the output to ensure we print the error at the end.
        sys.stdout.flush()
        raise Exception(
            "It looks like you have updated some python dependencies but haven't "
            "updated locked requirements files. Please update them by running "
            "`tools/update-locked-requirements`. For more information please "
            "refer to `requirements/README.md`."
        )


if __name__ == "__main__":
    main()