mirror of https://github.com/zulip/zulip.git
tools: Add `export_emoji_names_to_csv`.
This tool is used for exporting `emoji_names.py` to a csv file.
This commit is contained in:
parent
05f85eb94d
commit
628e868d1e
|
@ -0,0 +1,127 @@
|
|||
#!/usr/bin/env python3
|
||||
#
|
||||
# This exports the emoji_names.py data set to a CSV file in the same
|
||||
# format used as input for import_emoji_names_from_csv. We use this
|
||||
# as part of a test for the correctness of the import process (one can
|
||||
# compare the exported CSV file to the original CSV file, and if the
|
||||
# data round-tripped with no changes, we know everything is working
|
||||
# correctly).
|
||||
import argparse
|
||||
import csv
|
||||
import os
|
||||
import re
|
||||
import ujson
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
TOOLS_DIR_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
ZULIP_PATH = os.path.dirname(TOOLS_DIR_PATH)
|
||||
# `emoji.json` file is same in all four emoji-datasource packages.
|
||||
EMOJI_DATA_PATH = os.path.join(ZULIP_PATH, 'node_modules', 'emoji-datasource-google', 'emoji.json')
|
||||
|
||||
sorting_info = {} # type: Dict[str, Any]
|
||||
column_names = [
|
||||
'Codepoint',
|
||||
'New sorting info',
|
||||
'zulip (main)',
|
||||
'zulip (alternates)',
|
||||
'explanation',
|
||||
]
|
||||
category_index = {
|
||||
'People': '1',
|
||||
'Nature': '2',
|
||||
'Foods': '3',
|
||||
'Activity': '4',
|
||||
'Places': '5',
|
||||
'Objects': '6',
|
||||
'Symbols': '7',
|
||||
'Flags': '8',
|
||||
'Skin Tones': '9',
|
||||
}
|
||||
|
||||
name_entry_regex = re.compile(r"'(?P<emoji_code>[a-z0-9-]+)': "
|
||||
r"{'canonical_name': '(?P<canonical_name>[+-]?[a-z0-9_X-]+)',[\n ]+"
|
||||
r"'aliases': \[(?P<aliases>('([+-]?[a-z0-9_, X-]+)'[, ]{0,2})*)\]},")
|
||||
explanation_regex = re.compile(r" # (?P<explanation_line>[^\r\n\t]+)")
|
||||
|
||||
def prepare_sorting_info() -> None:
|
||||
emoji_data = [] # type: List[Dict[str, Any]]
|
||||
with open(EMOJI_DATA_PATH) as fp:
|
||||
emoji_data = ujson.load(fp)
|
||||
|
||||
for emoji_dict in emoji_data:
|
||||
emoji_code = emoji_dict['unified'].lower()
|
||||
sort_order = str(emoji_dict['sort_order']).strip()
|
||||
sorting_info[emoji_code] = {
|
||||
'category': emoji_dict['category'],
|
||||
'sort_order': sort_order.rjust(3, '0'),
|
||||
}
|
||||
|
||||
def get_sorting_info(emoji_code: str) -> str:
|
||||
category = sorting_info[emoji_code]['category']
|
||||
category = category_index[category] + '-' + category
|
||||
sort_order = sorting_info[emoji_code]['sort_order']
|
||||
return ' '.join([category, sort_order])
|
||||
|
||||
def prepare_explanation(explanation_lines: List[str]) -> str:
|
||||
return ' '.join(explanation_lines)
|
||||
|
||||
def prepare_aliases(captured_aliases: str) -> str:
|
||||
aliases = []
|
||||
for alias in captured_aliases.split(', '):
|
||||
aliases.append(alias.strip("'"))
|
||||
return ', '.join(aliases)
|
||||
|
||||
def main() -> None:
|
||||
description = ("This script is used for exporting `emoji_names.py` to comma separated file. It "
|
||||
"takes the path of output csv file and path to `emoji_names.py` as arguments.")
|
||||
parser = argparse.ArgumentParser(description=description)
|
||||
parser.add_argument(
|
||||
"--input-file", dest="input_file_path", type=str, metavar="<path>",
|
||||
default=os.path.join(TOOLS_DIR_PATH, "setup", "emoji", "emoji_names.py"),
|
||||
help="Path to the file from which data is to be read.")
|
||||
parser.add_argument(
|
||||
"--output-file", dest="output_file_path", type=str, metavar="<path>",
|
||||
default=os.path.join(TOOLS_DIR_PATH, "setup", "emoji", "emoji_names.csv"),
|
||||
help="Path to the output csv file.")
|
||||
|
||||
args = parser.parse_args()
|
||||
prepare_sorting_info()
|
||||
output_data = [column_names, ]
|
||||
explanation_lines = [] # type: List[str]
|
||||
with open(args.input_file_path) as fp:
|
||||
for line in fp.readlines():
|
||||
match = name_entry_regex.search(line)
|
||||
if match is not None:
|
||||
emoji_code = match.group('emoji_code')
|
||||
sort_info = get_sorting_info(emoji_code)
|
||||
canonical_name = match.group('canonical_name')
|
||||
aliases = prepare_aliases(match.group('aliases'))
|
||||
explanation = prepare_explanation(explanation_lines)
|
||||
output_data.append([
|
||||
emoji_code,
|
||||
sort_info,
|
||||
canonical_name,
|
||||
aliases,
|
||||
explanation,
|
||||
])
|
||||
explanation_lines = []
|
||||
continue
|
||||
|
||||
match = explanation_regex.search(line)
|
||||
if match is not None:
|
||||
explanation_line = match.group('explanation_line').strip()
|
||||
explanation_lines.append(explanation_line)
|
||||
|
||||
fp = open(args.output_file_path, 'w')
|
||||
writer = csv.writer(fp, dialect='excel')
|
||||
writer.writerows(output_data)
|
||||
# The CSV file exported by google sheets doesn't have a newline
|
||||
# character in the end. So we also strip the last newline character
|
||||
# so that round-trip conversion test passes.
|
||||
line_sep_len = len(os.linesep)
|
||||
fp.truncate(fp.tell() - line_sep_len)
|
||||
fp.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue