emoji: Remove old broken CSV import/export scripts.

These have been broken at least since commit e331a758c3 (#12787), so clearly nobody cares. Signed-off-by: Anders Kaseorg <anders@zulip.com>
2022-12-04 02:04:18 -08:00 · 2022-12-04 02:04:18 -08:00 · 84bdd6371d
parent 35913a2297
commit 84bdd6371d
3 changed files with 0 additions and 403 deletions
--- a/tools/setup/emoji/export_emoji_names_to_csv
+++ b/tools/setup/emoji/export_emoji_names_to_csv
@ -1,147 +0,0 @@
-#!/usr/bin/env python3
-#
-# This exports the emoji_names.py data set to a CSV file in the same
-# format used as input for import_emoji_names_from_csv.  We use this
-# as part of a test for the correctness of the import process (one can
-# compare the exported CSV file to the original CSV file, and if the
-# data round-tripped with no changes, we know everything is working
-# correctly).
-import argparse
-import csv
-import os
-import re
-from typing import Any, Dict, List
-
-import orjson
-
-from emoji_setup_utils import get_emoji_code
-
-TOOLS_DIR_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-ZULIP_PATH = os.path.dirname(TOOLS_DIR_PATH)
-# `emoji.json` file is same in all four emoji-datasource packages.
-EMOJI_DATA_PATH = os.path.join(ZULIP_PATH, "node_modules", "emoji-datasource-google", "emoji.json")
-
-sorting_info: Dict[str, Any] = {}
-column_names = [
-    "Codepoint",
-    "New sorting info",
-    "zulip (main)",
-    "zulip (alternates)",
-    "explanation",
-]
-category_index = {
-    "Smileys & People": "1",
-    "Animals & Nature": "2",
-    "Food & Drink": "3",
-    "Activities": "4",
-    "Travel & Places": "5",
-    "Objects": "6",
-    "Symbols": "7",
-    "Flags": "8",
-    "Skin Tones": "9",
-}
-
-name_entry_regex = re.compile(
-    r"'(?P<emoji_code>[a-z0-9-]+)': "
-    r"{'canonical_name': '(?P<canonical_name>[+-]?[a-z0-9_X-]+)',[\n ]+"
-    r"'aliases': \[(?P<aliases>('([+-]?[a-z0-9_, X-]+)'[, ]{0,2})*)\]},"
-)
-explanation_regex = re.compile(r"    # (?P<explanation_line>[^\r\n\t]+)")
-
-
-def prepare_sorting_info() -> None:
-    emoji_data: List[Dict[str, Any]] = []
-    with open(EMOJI_DATA_PATH, "rb") as fp:
-        emoji_data = orjson.loads(fp.read())
-
-    for emoji_dict in emoji_data:
-        emoji_code = get_emoji_code(emoji_dict)
-        sort_order = str(emoji_dict["sort_order"]).strip()
-        sorting_info[emoji_code] = {
-            "category": emoji_dict["category"],
-            "sort_order": sort_order.rjust(3, "0"),
-        }
-
-
-def get_sorting_info(emoji_code: str) -> str:
-    category = sorting_info[emoji_code]["category"]
-    category = category_index[category] + "-" + category
-    sort_order = sorting_info[emoji_code]["sort_order"]
-    return " ".join([category, sort_order])
-
-
-def prepare_explanation(explanation_lines: List[str]) -> str:
-    return " ".join(explanation_lines)
-
-
-def prepare_aliases(captured_aliases: str) -> str:
-    aliases = []
-    for alias in captured_aliases.split(", "):
-        aliases.append(alias.strip("'"))
-    return ", ".join(aliases)
-
-
-def main() -> None:
-    description = (
-        "This script is used for exporting `emoji_names.py` to comma separated file. It "
-        "takes the path of output csv file and path to `emoji_names.py` as arguments."
-    )
-    parser = argparse.ArgumentParser(description=description)
-    parser.add_argument(
-        "--input-file",
-        dest="input_file_path",
-        metavar="<path>",
-        default=os.path.join(TOOLS_DIR_PATH, "setup", "emoji", "emoji_names.py"),
-        help="Path to the file from which data is to be read.",
-    )
-    parser.add_argument(
-        "--output-file",
-        dest="output_file_path",
-        metavar="<path>",
-        default=os.path.join(TOOLS_DIR_PATH, "setup", "emoji", "emoji_names.csv"),
-        help="Path to the output csv file.",
-    )
-
-    args = parser.parse_args()
-    prepare_sorting_info()
-    output_data = [column_names]
-    explanation_lines: List[str] = []
-    with open(args.input_file_path) as fp:
-        for line in fp.readlines():
-            match = name_entry_regex.search(line)
-            if match is not None:
-                emoji_code = match.group("emoji_code")
-                sort_info = get_sorting_info(emoji_code)
-                canonical_name = match.group("canonical_name")
-                aliases = prepare_aliases(match.group("aliases"))
-                explanation = prepare_explanation(explanation_lines)
-                output_data.append(
-                    [
-                        emoji_code,
-                        sort_info,
-                        canonical_name,
-                        aliases,
-                        explanation,
-                    ]
-                )
-                explanation_lines = []
-                continue
-
-            match = explanation_regex.search(line)
-            if match is not None:
-                explanation_line = match.group("explanation_line").strip()
-                explanation_lines.append(explanation_line)
-
-    with open(args.output_file_path, "w") as f:
-        writer = csv.writer(f, dialect="excel")
-    writer.writerows(output_data)
-    # The CSV file exported by Google Sheets doesn't have a newline
-    # character in the end. So we also strip the last newline character
-    # so that round-trip conversion test passes.
-    line_sep_len = len(os.linesep)
-    fp.truncate(fp.tell() - line_sep_len)
-    fp.close()
-
-
-if __name__ == "__main__":
-    main()
--- a/tools/setup/emoji/import_emoji_names_from_csv
+++ b/tools/setup/emoji/import_emoji_names_from_csv
@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-# This tool generates emoji_names.py from a CSV file passed in on the command line.
-#
-# The CSV files come from a Google Sheets document, because that's a
-# better format for reviewing all the emoji and thinking about what
-# names and aliases make the most sense; this script converts the
-# easily exported CSV one can get from Google Sheets into the
-# emoji_names.py format for consumption by the rest of our emoji
-# tooling.  We check in emoji_names.py (not the CSV) whenever we rerun
-# this tool to update the data.
-import argparse
-import csv
-import os
-import re
-import textwrap
-from typing import Any, Dict, List, Set
-
-EMOJI_DIR_PATH = os.path.dirname(os.path.abspath(__file__))
-
-ACTIVE_ENTRY = (
-    "{explanation}"
-    "\n    {emoji_code!r}: {{'canonical_name': {canonical_name!r}, 'aliases': {aliases!r}}},"
-)
-
-INACTIVE_ENTRY = (
-    "{explanation}"
-    "\n    # {emoji_code!r}: {{'canonical_name': {canonical_name!r}, 'aliases': {aliases!r}}},"
-)
-
-FILE_TEMPLATE = (
-    "from typing import Any, Dict\n\n"
-    "EMOJI_NAME_MAPS: Dict[str, Dict[str, Any]] = {{"
-    "{emoji_entries}\n"
-    "}}\n"
-)
-
-emoji_names: Set[str] = set()
-
-
-def load_data(data_file: str) -> List[List[str]]:
-    emoji_name_data: List[List[str]] = []
-    with open(data_file, newline="") as fp:
-        data = csv.reader(fp)
-        for row in data:
-            emoji_name_data.append(row)
-    return emoji_name_data[1:]
-
-
-def check_uniqueness(emoji_name: str) -> None:
-    if emoji_name in emoji_names:
-        raise Exception(f"Duplicate emoji name: {emoji_name}")
-    emoji_names.add(emoji_name)
-
-
-def check_valid_emoji_name(emoji_name: str) -> None:
-    if re.fullmatch("[+-]?[a-z0-9_-]+", emoji_name) is None:
-        raise Exception(f"Invalid emoji name: {emoji_name}")
-
-
-def check_emoji_names(canonical_name: str, aliases: List[str]) -> None:
-    if canonical_name == "X":
-        return
-    names_to_check = [canonical_name, *aliases]
-    for name in names_to_check:
-        check_valid_emoji_name(name)
-        check_uniqueness(name)
-
-
-def prepare_explanation(explanation: str) -> str:
-    if explanation == "":
-        return ""
-
-    wrapper_config: Dict[str, Any] = {
-        "width": 80,
-        "break_long_words": False,
-        "break_on_hyphens": False,
-        "initial_indent": "\n    # ",
-        "subsequent_indent": "\n    # ",
-    }
-    wrapped_lines = textwrap.wrap(explanation.strip(), **wrapper_config)
-    return "".join(wrapped_lines)
-
-
-def prepare_aliases(aliases: str) -> List[str]:
-    if aliases == "":
-        return []
-    return [alias.strip() for alias in aliases.split(",")]
-
-
-def main() -> None:
-    description = (
-        "This script is used for generating `emoji_names.py`. It takes the "
-        "path of an csv file containing the required data and optional output "
-        "file path."
-    )
-    parser = argparse.ArgumentParser(description=description)
-    parser.add_argument(
-        "--input-file",
-        dest="input_file_path",
-        metavar="<path>",
-        default=os.path.join(EMOJI_DIR_PATH, "emoji_names.csv"),
-        help="Path to the csv file from which data is to be read.",
-    )
-    parser.add_argument(
-        "--output-file",
-        dest="output_file_path",
-        metavar="<path>",
-        default=os.path.join(EMOJI_DIR_PATH, "emoji_names.py"),
-        help="Path to the output file.",
-    )
-
-    args = parser.parse_args()
-    emoji_name_data = load_data(args.input_file_path)
-    emoji_entry = ""
-    emoji_entries = ""
-
-    for row in emoji_name_data:
-        emoji_code = row[0]
-        canonical_name = row[2]
-        aliases = row[3]
-        explanation = row[4]
-
-        formatted_explanation = prepare_explanation(explanation)
-        extracted_aliases = prepare_aliases(aliases)
-        check_emoji_names(canonical_name, extracted_aliases)
-
-        context = {
-            "emoji_code": emoji_code,
-            "canonical_name": canonical_name,
-            "aliases": extracted_aliases,
-            "explanation": formatted_explanation,
-        }
-        if canonical_name == "X":
-            emoji_entry = INACTIVE_ENTRY.format(**context)
-        else:
-            emoji_entry = ACTIVE_ENTRY.format(**context)
-
-        emoji_entries += emoji_entry
-
-    with open(args.output_file_path, "w") as fp:
-        fp.write(FILE_TEMPLATE.format(emoji_entries=emoji_entries))
-
-
-if __name__ == "__main__":
-    main()
--- a/tools/setup/emoji/test-emoji-name-scripts
+++ b/tools/setup/emoji/test-emoji-name-scripts
@ -1,111 +0,0 @@
-#!/usr/bin/env python3
-import argparse
-import difflib
-import filecmp
-import os
-import shutil
-import subprocess
-import tempfile
-
-TOOLS_DIR = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
-
-
-def generate_files(source_file: str, tmp_dir: str) -> None:
-    # Copy the source CSV file to out temporary test directory.
-    input_file_path = source_file
-    output_file_path = os.path.join(tmp_dir, "CSV_A")
-    shutil.copyfile(input_file_path, output_file_path)
-
-    # Generate the name map file in the temporary directory.
-    input_file_path = output_file_path
-    output_file_path = os.path.join(tmp_dir, "NAME_MAP_A")
-    subprocess.check_call(
-        [
-            os.path.join(TOOLS_DIR, "setup", "emoji", "import_emoji_names_from_csv"),
-            f"--input-file={input_file_path}",
-            f"--output-file={output_file_path}",
-        ],
-        stdout=subprocess.DEVNULL,
-    )
-
-    # Regenerate the CSV file from name map.
-    input_file_path = output_file_path
-    output_file_path = os.path.join(tmp_dir, "CSV_B")
-    subprocess.check_call(
-        [
-            os.path.join(TOOLS_DIR, "setup", "emoji", "export_emoji_names_to_csv"),
-            f"--input-file={input_file_path}",
-            f"--output-file={output_file_path}",
-        ],
-        stdout=subprocess.DEVNULL,
-    )
-
-    # Regenerate the name map file from the regenerated CSV file.
-    input_file_path = output_file_path
-    output_file_path = os.path.join(tmp_dir, "NAME_MAP_B")
-    subprocess.check_call(
-        [
-            os.path.join(TOOLS_DIR, "setup", "emoji", "import_emoji_names_from_csv"),
-            f"--input-file={input_file_path}",
-            f"--output-file={output_file_path}",
-        ],
-        stdout=subprocess.DEVNULL,
-    )
-
-
-def print_diff(path_file1: str, path_file2: str) -> None:
-    with open(path_file1) as file1:
-        with open(path_file2) as file2:
-            diff = difflib.unified_diff(
-                file1.readlines(),
-                file2.readlines(),
-                fromfile=path_file1,
-                tofile=path_file2,
-            )
-    for line in diff:
-        print(line)
-
-
-def compare_files(first_file: str, second_file: str) -> None:
-    same = True
-    same = same and filecmp.cmp(first_file, second_file, shallow=False)
-    if not same:
-        print_diff(first_file, second_file)
-        raise Exception("Round trip conversion failed!!")
-
-
-def check_files(tmp_dir: str) -> None:
-    # Compare the original and regenerated CSV files.
-    first_file = os.path.join(tmp_dir, "CSV_A")
-    second_file = os.path.join(tmp_dir, "CSV_B")
-    compare_files(first_file, second_file)
-
-    # Compare the name map files.
-    first_file = os.path.join(tmp_dir, "NAME_MAP_A")
-    second_file = os.path.join(tmp_dir, "NAME_MAP_B")
-    compare_files(first_file, second_file)
-
-
-def main() -> None:
-    description = (
-        "This tool is used test the emoji tooling that we use to import/export "
-        "naming related data. This works by doing a round-trip conversion of "
-        "and then verifying that no changes were made in the process."
-    )
-    parser = argparse.ArgumentParser(description=description)
-    parser.add_argument(
-        "--input-file",
-        dest="input_file_path",
-        metavar="<path>",
-        default=os.path.join(TOOLS_DIR, "setup", "emoji", "emoji_names.csv"),
-        help="Path to the CSV file to be used for round-trip conversion.",
-    )
-
-    args = parser.parse_args()
-    with tempfile.TemporaryDirectory() as tmp_dir:
-        generate_files(args.input_file_path, tmp_dir)
-        check_files(tmp_dir)
-
-
-if __name__ == "__main__":
-    main()