diff --git a/tools/setup/emoji/export_emoji_names_to_csv b/tools/setup/emoji/export_emoji_names_to_csv deleted file mode 100755 index ca67d27813..0000000000 --- a/tools/setup/emoji/export_emoji_names_to_csv +++ /dev/null @@ -1,147 +0,0 @@ -#!/usr/bin/env python3 -# -# This exports the emoji_names.py data set to a CSV file in the same -# format used as input for import_emoji_names_from_csv. We use this -# as part of a test for the correctness of the import process (one can -# compare the exported CSV file to the original CSV file, and if the -# data round-tripped with no changes, we know everything is working -# correctly). -import argparse -import csv -import os -import re -from typing import Any, Dict, List - -import orjson - -from emoji_setup_utils import get_emoji_code - -TOOLS_DIR_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -ZULIP_PATH = os.path.dirname(TOOLS_DIR_PATH) -# `emoji.json` file is same in all four emoji-datasource packages. -EMOJI_DATA_PATH = os.path.join(ZULIP_PATH, "node_modules", "emoji-datasource-google", "emoji.json") - -sorting_info: Dict[str, Any] = {} -column_names = [ - "Codepoint", - "New sorting info", - "zulip (main)", - "zulip (alternates)", - "explanation", -] -category_index = { - "Smileys & People": "1", - "Animals & Nature": "2", - "Food & Drink": "3", - "Activities": "4", - "Travel & Places": "5", - "Objects": "6", - "Symbols": "7", - "Flags": "8", - "Skin Tones": "9", -} - -name_entry_regex = re.compile( - r"'(?P[a-z0-9-]+)': " - r"{'canonical_name': '(?P[+-]?[a-z0-9_X-]+)',[\n ]+" - r"'aliases': \[(?P('([+-]?[a-z0-9_, X-]+)'[, ]{0,2})*)\]}," -) -explanation_regex = re.compile(r" # (?P[^\r\n\t]+)") - - -def prepare_sorting_info() -> None: - emoji_data: List[Dict[str, Any]] = [] - with open(EMOJI_DATA_PATH, "rb") as fp: - emoji_data = orjson.loads(fp.read()) - - for emoji_dict in emoji_data: - emoji_code = get_emoji_code(emoji_dict) - sort_order = str(emoji_dict["sort_order"]).strip() - sorting_info[emoji_code] = { - "category": emoji_dict["category"], - "sort_order": sort_order.rjust(3, "0"), - } - - -def get_sorting_info(emoji_code: str) -> str: - category = sorting_info[emoji_code]["category"] - category = category_index[category] + "-" + category - sort_order = sorting_info[emoji_code]["sort_order"] - return " ".join([category, sort_order]) - - -def prepare_explanation(explanation_lines: List[str]) -> str: - return " ".join(explanation_lines) - - -def prepare_aliases(captured_aliases: str) -> str: - aliases = [] - for alias in captured_aliases.split(", "): - aliases.append(alias.strip("'")) - return ", ".join(aliases) - - -def main() -> None: - description = ( - "This script is used for exporting `emoji_names.py` to comma separated file. It " - "takes the path of output csv file and path to `emoji_names.py` as arguments." - ) - parser = argparse.ArgumentParser(description=description) - parser.add_argument( - "--input-file", - dest="input_file_path", - metavar="", - default=os.path.join(TOOLS_DIR_PATH, "setup", "emoji", "emoji_names.py"), - help="Path to the file from which data is to be read.", - ) - parser.add_argument( - "--output-file", - dest="output_file_path", - metavar="", - default=os.path.join(TOOLS_DIR_PATH, "setup", "emoji", "emoji_names.csv"), - help="Path to the output csv file.", - ) - - args = parser.parse_args() - prepare_sorting_info() - output_data = [column_names] - explanation_lines: List[str] = [] - with open(args.input_file_path) as fp: - for line in fp.readlines(): - match = name_entry_regex.search(line) - if match is not None: - emoji_code = match.group("emoji_code") - sort_info = get_sorting_info(emoji_code) - canonical_name = match.group("canonical_name") - aliases = prepare_aliases(match.group("aliases")) - explanation = prepare_explanation(explanation_lines) - output_data.append( - [ - emoji_code, - sort_info, - canonical_name, - aliases, - explanation, - ] - ) - explanation_lines = [] - continue - - match = explanation_regex.search(line) - if match is not None: - explanation_line = match.group("explanation_line").strip() - explanation_lines.append(explanation_line) - - with open(args.output_file_path, "w") as f: - writer = csv.writer(f, dialect="excel") - writer.writerows(output_data) - # The CSV file exported by Google Sheets doesn't have a newline - # character in the end. So we also strip the last newline character - # so that round-trip conversion test passes. - line_sep_len = len(os.linesep) - fp.truncate(fp.tell() - line_sep_len) - fp.close() - - -if __name__ == "__main__": - main() diff --git a/tools/setup/emoji/import_emoji_names_from_csv b/tools/setup/emoji/import_emoji_names_from_csv deleted file mode 100755 index db8b3c6cac..0000000000 --- a/tools/setup/emoji/import_emoji_names_from_csv +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/env python3 -# This tool generates emoji_names.py from a CSV file passed in on the command line. -# -# The CSV files come from a Google Sheets document, because that's a -# better format for reviewing all the emoji and thinking about what -# names and aliases make the most sense; this script converts the -# easily exported CSV one can get from Google Sheets into the -# emoji_names.py format for consumption by the rest of our emoji -# tooling. We check in emoji_names.py (not the CSV) whenever we rerun -# this tool to update the data. -import argparse -import csv -import os -import re -import textwrap -from typing import Any, Dict, List, Set - -EMOJI_DIR_PATH = os.path.dirname(os.path.abspath(__file__)) - -ACTIVE_ENTRY = ( - "{explanation}" - "\n {emoji_code!r}: {{'canonical_name': {canonical_name!r}, 'aliases': {aliases!r}}}," -) - -INACTIVE_ENTRY = ( - "{explanation}" - "\n # {emoji_code!r}: {{'canonical_name': {canonical_name!r}, 'aliases': {aliases!r}}}," -) - -FILE_TEMPLATE = ( - "from typing import Any, Dict\n\n" - "EMOJI_NAME_MAPS: Dict[str, Dict[str, Any]] = {{" - "{emoji_entries}\n" - "}}\n" -) - -emoji_names: Set[str] = set() - - -def load_data(data_file: str) -> List[List[str]]: - emoji_name_data: List[List[str]] = [] - with open(data_file, newline="") as fp: - data = csv.reader(fp) - for row in data: - emoji_name_data.append(row) - return emoji_name_data[1:] - - -def check_uniqueness(emoji_name: str) -> None: - if emoji_name in emoji_names: - raise Exception(f"Duplicate emoji name: {emoji_name}") - emoji_names.add(emoji_name) - - -def check_valid_emoji_name(emoji_name: str) -> None: - if re.fullmatch("[+-]?[a-z0-9_-]+", emoji_name) is None: - raise Exception(f"Invalid emoji name: {emoji_name}") - - -def check_emoji_names(canonical_name: str, aliases: List[str]) -> None: - if canonical_name == "X": - return - names_to_check = [canonical_name, *aliases] - for name in names_to_check: - check_valid_emoji_name(name) - check_uniqueness(name) - - -def prepare_explanation(explanation: str) -> str: - if explanation == "": - return "" - - wrapper_config: Dict[str, Any] = { - "width": 80, - "break_long_words": False, - "break_on_hyphens": False, - "initial_indent": "\n # ", - "subsequent_indent": "\n # ", - } - wrapped_lines = textwrap.wrap(explanation.strip(), **wrapper_config) - return "".join(wrapped_lines) - - -def prepare_aliases(aliases: str) -> List[str]: - if aliases == "": - return [] - return [alias.strip() for alias in aliases.split(",")] - - -def main() -> None: - description = ( - "This script is used for generating `emoji_names.py`. It takes the " - "path of an csv file containing the required data and optional output " - "file path." - ) - parser = argparse.ArgumentParser(description=description) - parser.add_argument( - "--input-file", - dest="input_file_path", - metavar="", - default=os.path.join(EMOJI_DIR_PATH, "emoji_names.csv"), - help="Path to the csv file from which data is to be read.", - ) - parser.add_argument( - "--output-file", - dest="output_file_path", - metavar="", - default=os.path.join(EMOJI_DIR_PATH, "emoji_names.py"), - help="Path to the output file.", - ) - - args = parser.parse_args() - emoji_name_data = load_data(args.input_file_path) - emoji_entry = "" - emoji_entries = "" - - for row in emoji_name_data: - emoji_code = row[0] - canonical_name = row[2] - aliases = row[3] - explanation = row[4] - - formatted_explanation = prepare_explanation(explanation) - extracted_aliases = prepare_aliases(aliases) - check_emoji_names(canonical_name, extracted_aliases) - - context = { - "emoji_code": emoji_code, - "canonical_name": canonical_name, - "aliases": extracted_aliases, - "explanation": formatted_explanation, - } - if canonical_name == "X": - emoji_entry = INACTIVE_ENTRY.format(**context) - else: - emoji_entry = ACTIVE_ENTRY.format(**context) - - emoji_entries += emoji_entry - - with open(args.output_file_path, "w") as fp: - fp.write(FILE_TEMPLATE.format(emoji_entries=emoji_entries)) - - -if __name__ == "__main__": - main() diff --git a/tools/setup/emoji/test-emoji-name-scripts b/tools/setup/emoji/test-emoji-name-scripts deleted file mode 100755 index bf7ad01d6d..0000000000 --- a/tools/setup/emoji/test-emoji-name-scripts +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import difflib -import filecmp -import os -import shutil -import subprocess -import tempfile - -TOOLS_DIR = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) - - -def generate_files(source_file: str, tmp_dir: str) -> None: - # Copy the source CSV file to out temporary test directory. - input_file_path = source_file - output_file_path = os.path.join(tmp_dir, "CSV_A") - shutil.copyfile(input_file_path, output_file_path) - - # Generate the name map file in the temporary directory. - input_file_path = output_file_path - output_file_path = os.path.join(tmp_dir, "NAME_MAP_A") - subprocess.check_call( - [ - os.path.join(TOOLS_DIR, "setup", "emoji", "import_emoji_names_from_csv"), - f"--input-file={input_file_path}", - f"--output-file={output_file_path}", - ], - stdout=subprocess.DEVNULL, - ) - - # Regenerate the CSV file from name map. - input_file_path = output_file_path - output_file_path = os.path.join(tmp_dir, "CSV_B") - subprocess.check_call( - [ - os.path.join(TOOLS_DIR, "setup", "emoji", "export_emoji_names_to_csv"), - f"--input-file={input_file_path}", - f"--output-file={output_file_path}", - ], - stdout=subprocess.DEVNULL, - ) - - # Regenerate the name map file from the regenerated CSV file. - input_file_path = output_file_path - output_file_path = os.path.join(tmp_dir, "NAME_MAP_B") - subprocess.check_call( - [ - os.path.join(TOOLS_DIR, "setup", "emoji", "import_emoji_names_from_csv"), - f"--input-file={input_file_path}", - f"--output-file={output_file_path}", - ], - stdout=subprocess.DEVNULL, - ) - - -def print_diff(path_file1: str, path_file2: str) -> None: - with open(path_file1) as file1: - with open(path_file2) as file2: - diff = difflib.unified_diff( - file1.readlines(), - file2.readlines(), - fromfile=path_file1, - tofile=path_file2, - ) - for line in diff: - print(line) - - -def compare_files(first_file: str, second_file: str) -> None: - same = True - same = same and filecmp.cmp(first_file, second_file, shallow=False) - if not same: - print_diff(first_file, second_file) - raise Exception("Round trip conversion failed!!") - - -def check_files(tmp_dir: str) -> None: - # Compare the original and regenerated CSV files. - first_file = os.path.join(tmp_dir, "CSV_A") - second_file = os.path.join(tmp_dir, "CSV_B") - compare_files(first_file, second_file) - - # Compare the name map files. - first_file = os.path.join(tmp_dir, "NAME_MAP_A") - second_file = os.path.join(tmp_dir, "NAME_MAP_B") - compare_files(first_file, second_file) - - -def main() -> None: - description = ( - "This tool is used test the emoji tooling that we use to import/export " - "naming related data. This works by doing a round-trip conversion of " - "and then verifying that no changes were made in the process." - ) - parser = argparse.ArgumentParser(description=description) - parser.add_argument( - "--input-file", - dest="input_file_path", - metavar="", - default=os.path.join(TOOLS_DIR, "setup", "emoji", "emoji_names.csv"), - help="Path to the CSV file to be used for round-trip conversion.", - ) - - args = parser.parse_args() - with tempfile.TemporaryDirectory() as tmp_dir: - generate_files(args.input_file_path, tmp_dir) - check_files(tmp_dir) - - -if __name__ == "__main__": - main()