mirror of https://github.com/zulip/zulip.git
emoji: Remove old broken CSV import/export scripts.
These have been broken at least since commit
e331a758c3
(#12787), so clearly nobody
cares.
Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
parent
35913a2297
commit
84bdd6371d
|
@ -1,147 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
#
|
|
||||||
# This exports the emoji_names.py data set to a CSV file in the same
|
|
||||||
# format used as input for import_emoji_names_from_csv. We use this
|
|
||||||
# as part of a test for the correctness of the import process (one can
|
|
||||||
# compare the exported CSV file to the original CSV file, and if the
|
|
||||||
# data round-tripped with no changes, we know everything is working
|
|
||||||
# correctly).
|
|
||||||
import argparse
|
|
||||||
import csv
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
from typing import Any, Dict, List
|
|
||||||
|
|
||||||
import orjson
|
|
||||||
|
|
||||||
from emoji_setup_utils import get_emoji_code
|
|
||||||
|
|
||||||
TOOLS_DIR_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
||||||
ZULIP_PATH = os.path.dirname(TOOLS_DIR_PATH)
|
|
||||||
# `emoji.json` file is same in all four emoji-datasource packages.
|
|
||||||
EMOJI_DATA_PATH = os.path.join(ZULIP_PATH, "node_modules", "emoji-datasource-google", "emoji.json")
|
|
||||||
|
|
||||||
sorting_info: Dict[str, Any] = {}
|
|
||||||
column_names = [
|
|
||||||
"Codepoint",
|
|
||||||
"New sorting info",
|
|
||||||
"zulip (main)",
|
|
||||||
"zulip (alternates)",
|
|
||||||
"explanation",
|
|
||||||
]
|
|
||||||
category_index = {
|
|
||||||
"Smileys & People": "1",
|
|
||||||
"Animals & Nature": "2",
|
|
||||||
"Food & Drink": "3",
|
|
||||||
"Activities": "4",
|
|
||||||
"Travel & Places": "5",
|
|
||||||
"Objects": "6",
|
|
||||||
"Symbols": "7",
|
|
||||||
"Flags": "8",
|
|
||||||
"Skin Tones": "9",
|
|
||||||
}
|
|
||||||
|
|
||||||
name_entry_regex = re.compile(
|
|
||||||
r"'(?P<emoji_code>[a-z0-9-]+)': "
|
|
||||||
r"{'canonical_name': '(?P<canonical_name>[+-]?[a-z0-9_X-]+)',[\n ]+"
|
|
||||||
r"'aliases': \[(?P<aliases>('([+-]?[a-z0-9_, X-]+)'[, ]{0,2})*)\]},"
|
|
||||||
)
|
|
||||||
explanation_regex = re.compile(r" # (?P<explanation_line>[^\r\n\t]+)")
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_sorting_info() -> None:
|
|
||||||
emoji_data: List[Dict[str, Any]] = []
|
|
||||||
with open(EMOJI_DATA_PATH, "rb") as fp:
|
|
||||||
emoji_data = orjson.loads(fp.read())
|
|
||||||
|
|
||||||
for emoji_dict in emoji_data:
|
|
||||||
emoji_code = get_emoji_code(emoji_dict)
|
|
||||||
sort_order = str(emoji_dict["sort_order"]).strip()
|
|
||||||
sorting_info[emoji_code] = {
|
|
||||||
"category": emoji_dict["category"],
|
|
||||||
"sort_order": sort_order.rjust(3, "0"),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def get_sorting_info(emoji_code: str) -> str:
|
|
||||||
category = sorting_info[emoji_code]["category"]
|
|
||||||
category = category_index[category] + "-" + category
|
|
||||||
sort_order = sorting_info[emoji_code]["sort_order"]
|
|
||||||
return " ".join([category, sort_order])
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_explanation(explanation_lines: List[str]) -> str:
|
|
||||||
return " ".join(explanation_lines)
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_aliases(captured_aliases: str) -> str:
|
|
||||||
aliases = []
|
|
||||||
for alias in captured_aliases.split(", "):
|
|
||||||
aliases.append(alias.strip("'"))
|
|
||||||
return ", ".join(aliases)
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
description = (
|
|
||||||
"This script is used for exporting `emoji_names.py` to comma separated file. It "
|
|
||||||
"takes the path of output csv file and path to `emoji_names.py` as arguments."
|
|
||||||
)
|
|
||||||
parser = argparse.ArgumentParser(description=description)
|
|
||||||
parser.add_argument(
|
|
||||||
"--input-file",
|
|
||||||
dest="input_file_path",
|
|
||||||
metavar="<path>",
|
|
||||||
default=os.path.join(TOOLS_DIR_PATH, "setup", "emoji", "emoji_names.py"),
|
|
||||||
help="Path to the file from which data is to be read.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--output-file",
|
|
||||||
dest="output_file_path",
|
|
||||||
metavar="<path>",
|
|
||||||
default=os.path.join(TOOLS_DIR_PATH, "setup", "emoji", "emoji_names.csv"),
|
|
||||||
help="Path to the output csv file.",
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
prepare_sorting_info()
|
|
||||||
output_data = [column_names]
|
|
||||||
explanation_lines: List[str] = []
|
|
||||||
with open(args.input_file_path) as fp:
|
|
||||||
for line in fp.readlines():
|
|
||||||
match = name_entry_regex.search(line)
|
|
||||||
if match is not None:
|
|
||||||
emoji_code = match.group("emoji_code")
|
|
||||||
sort_info = get_sorting_info(emoji_code)
|
|
||||||
canonical_name = match.group("canonical_name")
|
|
||||||
aliases = prepare_aliases(match.group("aliases"))
|
|
||||||
explanation = prepare_explanation(explanation_lines)
|
|
||||||
output_data.append(
|
|
||||||
[
|
|
||||||
emoji_code,
|
|
||||||
sort_info,
|
|
||||||
canonical_name,
|
|
||||||
aliases,
|
|
||||||
explanation,
|
|
||||||
]
|
|
||||||
)
|
|
||||||
explanation_lines = []
|
|
||||||
continue
|
|
||||||
|
|
||||||
match = explanation_regex.search(line)
|
|
||||||
if match is not None:
|
|
||||||
explanation_line = match.group("explanation_line").strip()
|
|
||||||
explanation_lines.append(explanation_line)
|
|
||||||
|
|
||||||
with open(args.output_file_path, "w") as f:
|
|
||||||
writer = csv.writer(f, dialect="excel")
|
|
||||||
writer.writerows(output_data)
|
|
||||||
# The CSV file exported by Google Sheets doesn't have a newline
|
|
||||||
# character in the end. So we also strip the last newline character
|
|
||||||
# so that round-trip conversion test passes.
|
|
||||||
line_sep_len = len(os.linesep)
|
|
||||||
fp.truncate(fp.tell() - line_sep_len)
|
|
||||||
fp.close()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
|
@ -1,145 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# This tool generates emoji_names.py from a CSV file passed in on the command line.
|
|
||||||
#
|
|
||||||
# The CSV files come from a Google Sheets document, because that's a
|
|
||||||
# better format for reviewing all the emoji and thinking about what
|
|
||||||
# names and aliases make the most sense; this script converts the
|
|
||||||
# easily exported CSV one can get from Google Sheets into the
|
|
||||||
# emoji_names.py format for consumption by the rest of our emoji
|
|
||||||
# tooling. We check in emoji_names.py (not the CSV) whenever we rerun
|
|
||||||
# this tool to update the data.
|
|
||||||
import argparse
|
|
||||||
import csv
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import textwrap
|
|
||||||
from typing import Any, Dict, List, Set
|
|
||||||
|
|
||||||
EMOJI_DIR_PATH = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
|
|
||||||
ACTIVE_ENTRY = (
|
|
||||||
"{explanation}"
|
|
||||||
"\n {emoji_code!r}: {{'canonical_name': {canonical_name!r}, 'aliases': {aliases!r}}},"
|
|
||||||
)
|
|
||||||
|
|
||||||
INACTIVE_ENTRY = (
|
|
||||||
"{explanation}"
|
|
||||||
"\n # {emoji_code!r}: {{'canonical_name': {canonical_name!r}, 'aliases': {aliases!r}}},"
|
|
||||||
)
|
|
||||||
|
|
||||||
FILE_TEMPLATE = (
|
|
||||||
"from typing import Any, Dict\n\n"
|
|
||||||
"EMOJI_NAME_MAPS: Dict[str, Dict[str, Any]] = {{"
|
|
||||||
"{emoji_entries}\n"
|
|
||||||
"}}\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
emoji_names: Set[str] = set()
|
|
||||||
|
|
||||||
|
|
||||||
def load_data(data_file: str) -> List[List[str]]:
|
|
||||||
emoji_name_data: List[List[str]] = []
|
|
||||||
with open(data_file, newline="") as fp:
|
|
||||||
data = csv.reader(fp)
|
|
||||||
for row in data:
|
|
||||||
emoji_name_data.append(row)
|
|
||||||
return emoji_name_data[1:]
|
|
||||||
|
|
||||||
|
|
||||||
def check_uniqueness(emoji_name: str) -> None:
|
|
||||||
if emoji_name in emoji_names:
|
|
||||||
raise Exception(f"Duplicate emoji name: {emoji_name}")
|
|
||||||
emoji_names.add(emoji_name)
|
|
||||||
|
|
||||||
|
|
||||||
def check_valid_emoji_name(emoji_name: str) -> None:
|
|
||||||
if re.fullmatch("[+-]?[a-z0-9_-]+", emoji_name) is None:
|
|
||||||
raise Exception(f"Invalid emoji name: {emoji_name}")
|
|
||||||
|
|
||||||
|
|
||||||
def check_emoji_names(canonical_name: str, aliases: List[str]) -> None:
|
|
||||||
if canonical_name == "X":
|
|
||||||
return
|
|
||||||
names_to_check = [canonical_name, *aliases]
|
|
||||||
for name in names_to_check:
|
|
||||||
check_valid_emoji_name(name)
|
|
||||||
check_uniqueness(name)
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_explanation(explanation: str) -> str:
|
|
||||||
if explanation == "":
|
|
||||||
return ""
|
|
||||||
|
|
||||||
wrapper_config: Dict[str, Any] = {
|
|
||||||
"width": 80,
|
|
||||||
"break_long_words": False,
|
|
||||||
"break_on_hyphens": False,
|
|
||||||
"initial_indent": "\n # ",
|
|
||||||
"subsequent_indent": "\n # ",
|
|
||||||
}
|
|
||||||
wrapped_lines = textwrap.wrap(explanation.strip(), **wrapper_config)
|
|
||||||
return "".join(wrapped_lines)
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_aliases(aliases: str) -> List[str]:
|
|
||||||
if aliases == "":
|
|
||||||
return []
|
|
||||||
return [alias.strip() for alias in aliases.split(",")]
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
description = (
|
|
||||||
"This script is used for generating `emoji_names.py`. It takes the "
|
|
||||||
"path of an csv file containing the required data and optional output "
|
|
||||||
"file path."
|
|
||||||
)
|
|
||||||
parser = argparse.ArgumentParser(description=description)
|
|
||||||
parser.add_argument(
|
|
||||||
"--input-file",
|
|
||||||
dest="input_file_path",
|
|
||||||
metavar="<path>",
|
|
||||||
default=os.path.join(EMOJI_DIR_PATH, "emoji_names.csv"),
|
|
||||||
help="Path to the csv file from which data is to be read.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--output-file",
|
|
||||||
dest="output_file_path",
|
|
||||||
metavar="<path>",
|
|
||||||
default=os.path.join(EMOJI_DIR_PATH, "emoji_names.py"),
|
|
||||||
help="Path to the output file.",
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
emoji_name_data = load_data(args.input_file_path)
|
|
||||||
emoji_entry = ""
|
|
||||||
emoji_entries = ""
|
|
||||||
|
|
||||||
for row in emoji_name_data:
|
|
||||||
emoji_code = row[0]
|
|
||||||
canonical_name = row[2]
|
|
||||||
aliases = row[3]
|
|
||||||
explanation = row[4]
|
|
||||||
|
|
||||||
formatted_explanation = prepare_explanation(explanation)
|
|
||||||
extracted_aliases = prepare_aliases(aliases)
|
|
||||||
check_emoji_names(canonical_name, extracted_aliases)
|
|
||||||
|
|
||||||
context = {
|
|
||||||
"emoji_code": emoji_code,
|
|
||||||
"canonical_name": canonical_name,
|
|
||||||
"aliases": extracted_aliases,
|
|
||||||
"explanation": formatted_explanation,
|
|
||||||
}
|
|
||||||
if canonical_name == "X":
|
|
||||||
emoji_entry = INACTIVE_ENTRY.format(**context)
|
|
||||||
else:
|
|
||||||
emoji_entry = ACTIVE_ENTRY.format(**context)
|
|
||||||
|
|
||||||
emoji_entries += emoji_entry
|
|
||||||
|
|
||||||
with open(args.output_file_path, "w") as fp:
|
|
||||||
fp.write(FILE_TEMPLATE.format(emoji_entries=emoji_entries))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
|
@ -1,111 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
import argparse
|
|
||||||
import difflib
|
|
||||||
import filecmp
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
import subprocess
|
|
||||||
import tempfile
|
|
||||||
|
|
||||||
TOOLS_DIR = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
|
||||||
|
|
||||||
|
|
||||||
def generate_files(source_file: str, tmp_dir: str) -> None:
|
|
||||||
# Copy the source CSV file to out temporary test directory.
|
|
||||||
input_file_path = source_file
|
|
||||||
output_file_path = os.path.join(tmp_dir, "CSV_A")
|
|
||||||
shutil.copyfile(input_file_path, output_file_path)
|
|
||||||
|
|
||||||
# Generate the name map file in the temporary directory.
|
|
||||||
input_file_path = output_file_path
|
|
||||||
output_file_path = os.path.join(tmp_dir, "NAME_MAP_A")
|
|
||||||
subprocess.check_call(
|
|
||||||
[
|
|
||||||
os.path.join(TOOLS_DIR, "setup", "emoji", "import_emoji_names_from_csv"),
|
|
||||||
f"--input-file={input_file_path}",
|
|
||||||
f"--output-file={output_file_path}",
|
|
||||||
],
|
|
||||||
stdout=subprocess.DEVNULL,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Regenerate the CSV file from name map.
|
|
||||||
input_file_path = output_file_path
|
|
||||||
output_file_path = os.path.join(tmp_dir, "CSV_B")
|
|
||||||
subprocess.check_call(
|
|
||||||
[
|
|
||||||
os.path.join(TOOLS_DIR, "setup", "emoji", "export_emoji_names_to_csv"),
|
|
||||||
f"--input-file={input_file_path}",
|
|
||||||
f"--output-file={output_file_path}",
|
|
||||||
],
|
|
||||||
stdout=subprocess.DEVNULL,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Regenerate the name map file from the regenerated CSV file.
|
|
||||||
input_file_path = output_file_path
|
|
||||||
output_file_path = os.path.join(tmp_dir, "NAME_MAP_B")
|
|
||||||
subprocess.check_call(
|
|
||||||
[
|
|
||||||
os.path.join(TOOLS_DIR, "setup", "emoji", "import_emoji_names_from_csv"),
|
|
||||||
f"--input-file={input_file_path}",
|
|
||||||
f"--output-file={output_file_path}",
|
|
||||||
],
|
|
||||||
stdout=subprocess.DEVNULL,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def print_diff(path_file1: str, path_file2: str) -> None:
|
|
||||||
with open(path_file1) as file1:
|
|
||||||
with open(path_file2) as file2:
|
|
||||||
diff = difflib.unified_diff(
|
|
||||||
file1.readlines(),
|
|
||||||
file2.readlines(),
|
|
||||||
fromfile=path_file1,
|
|
||||||
tofile=path_file2,
|
|
||||||
)
|
|
||||||
for line in diff:
|
|
||||||
print(line)
|
|
||||||
|
|
||||||
|
|
||||||
def compare_files(first_file: str, second_file: str) -> None:
|
|
||||||
same = True
|
|
||||||
same = same and filecmp.cmp(first_file, second_file, shallow=False)
|
|
||||||
if not same:
|
|
||||||
print_diff(first_file, second_file)
|
|
||||||
raise Exception("Round trip conversion failed!!")
|
|
||||||
|
|
||||||
|
|
||||||
def check_files(tmp_dir: str) -> None:
|
|
||||||
# Compare the original and regenerated CSV files.
|
|
||||||
first_file = os.path.join(tmp_dir, "CSV_A")
|
|
||||||
second_file = os.path.join(tmp_dir, "CSV_B")
|
|
||||||
compare_files(first_file, second_file)
|
|
||||||
|
|
||||||
# Compare the name map files.
|
|
||||||
first_file = os.path.join(tmp_dir, "NAME_MAP_A")
|
|
||||||
second_file = os.path.join(tmp_dir, "NAME_MAP_B")
|
|
||||||
compare_files(first_file, second_file)
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
description = (
|
|
||||||
"This tool is used test the emoji tooling that we use to import/export "
|
|
||||||
"naming related data. This works by doing a round-trip conversion of "
|
|
||||||
"and then verifying that no changes were made in the process."
|
|
||||||
)
|
|
||||||
parser = argparse.ArgumentParser(description=description)
|
|
||||||
parser.add_argument(
|
|
||||||
"--input-file",
|
|
||||||
dest="input_file_path",
|
|
||||||
metavar="<path>",
|
|
||||||
default=os.path.join(TOOLS_DIR, "setup", "emoji", "emoji_names.csv"),
|
|
||||||
help="Path to the CSV file to be used for round-trip conversion.",
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
||||||
generate_files(args.input_file_path, tmp_dir)
|
|
||||||
check_files(tmp_dir)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
Loading…
Reference in New Issue