node_cache: Serialize to structured data before hashing.

Appending data back-to-back without serializing it loses the
information about where the breaks between them lie, which can lead to
different inputs having the same hash.
This commit is contained in:
Alex Vandiver 2021-05-28 01:51:42 +00:00 committed by Tim Abbott
parent ff9126ac1e
commit e080a05b05
1 changed files with 14 additions and 11 deletions

View File

@ -3,7 +3,7 @@ import json
import os import os
import shutil import shutil
import subprocess import subprocess
from typing import List, Optional from typing import Dict, List, Optional
from scripts.lib.zulip_tools import run from scripts.lib.zulip_tools import run
@ -33,19 +33,22 @@ def generate_sha1sum_node_modules(
setup_dir = os.path.realpath(os.getcwd()) setup_dir = os.path.realpath(os.getcwd())
PACKAGE_JSON_FILE_PATH = os.path.join(setup_dir, "package.json") PACKAGE_JSON_FILE_PATH = os.path.join(setup_dir, "package.json")
YARN_LOCK_FILE_PATH = os.path.join(setup_dir, "yarn.lock") YARN_LOCK_FILE_PATH = os.path.join(setup_dir, "yarn.lock")
sha1sum = hashlib.sha1() data: Dict[str, object] = {}
with open(PACKAGE_JSON_FILE_PATH, "rb") as fb: with open(PACKAGE_JSON_FILE_PATH, "r") as f:
sha1sum.update(fb.read().strip()) data[PACKAGE_JSON_FILE_PATH] = f.read().strip()
if os.path.exists(YARN_LOCK_FILE_PATH): if os.path.exists(YARN_LOCK_FILE_PATH):
# For backwards compatibility, we can't assume yarn.lock exists # For backwards compatibility, we can't assume yarn.lock exists
with open(YARN_LOCK_FILE_PATH, "rb") as fb: with open(YARN_LOCK_FILE_PATH, "r") as f:
sha1sum.update(fb.read().strip()) data[YARN_LOCK_FILE_PATH] = f.read().strip()
with open(YARN_PACKAGE_JSON) as f: with open(YARN_PACKAGE_JSON) as f:
yarn_version = json.load(f)["version"] data["yarn-package-version"] = json.load(f)["version"]
sha1sum.update(yarn_version.encode("utf8")) data["node-version"] = subprocess.check_output(
sha1sum.update(subprocess.check_output(["node", "--version"]).strip()) ["node", "--version"], universal_newlines=True
yarn_args = get_yarn_args(production=production) ).strip()
sha1sum.update("".join(sorted(yarn_args)).encode("utf8")) data["yarn-args"] = get_yarn_args(production=production)
sha1sum = hashlib.sha1()
sha1sum.update(json.dumps(data, sort_keys=True).encode("utf-8"))
return sha1sum.hexdigest() return sha1sum.hexdigest()