#!/usr/bin/env python3 """Check the docs/THIRDPARTY file against the DEP-5 copyright format. https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Disclaimer: This script is not a lawyer. It cannot validate that the claimed licenses are correct. It can only check for basic syntactic issues. """ import difflib import io import os import subprocess import sys from debian import copyright COPYRIGHT_FILENAME = "docs/THIRDPARTY" os.chdir(os.path.join(os.path.dirname(__file__), "..")) status = 0 *files, empty = subprocess.check_output( ["git", "ls-files", "-z"], encoding="utf-8", ).split("\0") assert empty == "" files += [ "static/generated/emoji/images/emoji/unicode/ignore-this-path", ] with open(COPYRIGHT_FILENAME) as f: lines = list(f) c = copyright.Copyright(lines) if not c.header.known_format(): print(f"{COPYRIGHT_FILENAME}: Unknown header format {c.header.format}") status = 1 defined_licenses = { p.license.to_str().split("\n", 1)[0] for p in c.all_license_paragraphs() if "\n" in p.license.to_str() } for p in c.all_files_paragraphs(): for g in p.files: if not any(map(copyright.globs_to_re([g]).fullmatch, files)): print(f"{COPYRIGHT_FILENAME}: No such file {g}") status = 1 if "\n" not in p.license.to_str() and p.license.to_str() not in defined_licenses: print(f"{COPYRIGHT_FILENAME}: Missing license text for {p.license.to_str()}") status = 1 dumped = c.dump() if dumped != "".join(lines): print(f"{COPYRIGHT_FILENAME}: Changes expected:") sys.stdout.writelines( difflib.unified_diff( lines, io.StringIO(dumped).readlines(), COPYRIGHT_FILENAME, COPYRIGHT_FILENAME, ), ) status = 1 sys.exit(status)