tools: Add import_emoji_names_from_csv tool.

This commit is contained in:
Harshit Bansal 2017-11-11 22:01:04 +00:00 committed by Tim Abbott
parent 5ae90d60d7
commit 8ada7cfe5b
1 changed files with 134 additions and 0 deletions

View File

@ -0,0 +1,134 @@
#!/usr/bin/env python3
# This tool generates emoji_names.py from a CSV file passed in on the command line.
#
# The CSV files come from a Google Sheets document, because that's a
# better format for reviewing all the emoji and thinking about what
# names and aliases make the most sense; this script converts the
# easily exported CSV one can get from Google Sheets into the
# emoji_names.py format for consumption by the rest of our emoji
# tooling. We check in emoji_names.py (not the CSV) whenever we rerun
# this tool to update the data.
import argparse
import csv
import os
import re
import textwrap
from typing import Any, Dict, List, Set
TOOLS_DIR_PATH = os.path.dirname(os.path.abspath(__file__))
ACTIVE_ENTRY = (
"%(explanation)s"
"\n '%(emoji_code)s': {'canonical_name': '%(canonical_name)s', 'aliases': %(aliases)s},"
)
INACTIVE_ENTRY = (
"%(explanation)s"
"\n # '%(emoji_code)s': {'canonical_name': '%(canonical_name)s', 'aliases': %(aliases)s},"
)
FILE_TEMPLATE = (
"from typing import Any, Dict\n\n"
"EMOJI_NAME_MAPS = {"
"%(emoji_entries)s\n"
"} # type: Dict[str, Dict[str, Any]]\n"
)
emoji_names = set() # type: Set[str]
def load_data(data_file: str) -> List[List[str]]:
emoji_name_data = [] # type: List[List[str]]
with open(data_file, newline='') as fp:
data = csv.reader(fp)
for row in data:
emoji_name_data.append(row)
return emoji_name_data[1:]
def check_uniqueness(emoji_name: str) -> None:
if emoji_name in emoji_names:
raise Exception("Duplicate emoji name: %s" % (emoji_name,))
emoji_names.add(emoji_name)
def check_valid_emoji_name(emoji_name: str) -> None:
if re.fullmatch("[+-]?[a-z0-9_-]+", emoji_name) is None:
raise Exception("Invalid emoji name: %s" % (emoji_name,))
def check_emoji_names(canonical_name: str, aliases: List[str]) -> None:
if canonical_name == 'X':
return
names_to_check = [canonical_name, ] + aliases
for name in names_to_check:
check_valid_emoji_name(name)
check_uniqueness(name)
def prepare_explanation(explanation: str) -> str:
if explanation == '':
return ''
wrapper_config = {
'width': 80,
'break_long_words': False,
'break_on_hyphens': False,
'initial_indent': '\n # ',
'subsequent_indent': '\n # ',
} # type: Dict[str, Any]
wrapped_lines = textwrap.wrap(explanation.strip(), **wrapper_config)
return ''.join(wrapped_lines)
def prepare_aliases(aliases: str) -> List[str]:
if aliases == '':
return []
return [alias.strip() for alias in aliases.split(',')]
def main() -> None:
description = ("This script is used for generating `emoji_names.py`. It takes the "
"path of an csv file containing the required data and optional output "
"file path.")
parser = argparse.ArgumentParser(description=description)
parser.add_argument(
"--input-file", dest="input_file_path", type=str, metavar="<path>",
default=os.path.join(TOOLS_DIR_PATH, "emoji_names.csv"),
help="Path to the csv file from which data is to be read.")
parser.add_argument(
"--output-file", dest="output_file_path", type=str, metavar="<path>",
default=os.path.join(TOOLS_DIR_PATH, "emoji_names.py"),
help="Path to the output file.")
args = parser.parse_args()
emoji_name_data = load_data(args.input_file_path)
emoji_entry = ""
emoji_entries = ""
for row in emoji_name_data:
emoji_code = row[0]
canonical_name = row[2]
aliases = row[3]
explanation = row[4]
formatted_explanation = prepare_explanation(explanation)
extracted_aliases = prepare_aliases(aliases)
check_emoji_names(canonical_name, extracted_aliases)
context = {
'emoji_code': emoji_code,
'canonical_name': canonical_name,
'aliases': extracted_aliases,
'explanation': formatted_explanation,
}
if canonical_name == 'X':
emoji_entry = INACTIVE_ENTRY % dict(**context)
else:
emoji_entry = ACTIVE_ENTRY % dict(**context)
# If an entry is longer than 120 then append
# ` # ignorelongline` to avoid lint errors.
if len(emoji_entry.split('\n')[-1]) >= 110:
emoji_entry += ' # ignorelongline'
emoji_entries += emoji_entry
with open(args.output_file_path, 'w') as fp:
fp.write(FILE_TEMPLATE % {'emoji_entries': emoji_entries})
if __name__ == '__main__':
main()