2017-11-08 19:40:43 +01:00
|
|
|
# This file contains various helper functions used by `build_emoji` tool.
|
|
|
|
# See docs/subsystems/emoji.md for details on how this system works.
|
2017-01-26 08:35:23 +01:00
|
|
|
from collections import defaultdict
|
2017-11-08 17:14:52 +01:00
|
|
|
from typing import Any, Dict, List
|
2017-01-26 08:35:23 +01:00
|
|
|
|
2021-09-08 21:21:51 +02:00
|
|
|
# Emoji sets that we currently support.
|
2021-02-12 08:20:45 +01:00
|
|
|
EMOJISETS = ["google", "twitter"]
|
2017-04-01 17:20:32 +02:00
|
|
|
|
2018-04-23 06:50:11 +02:00
|
|
|
# Some image files in the old emoji farm had a different name than in the new emoji
|
|
|
|
# farm. `remapped_emojis` is a map that contains a mapping of their name in the old
|
|
|
|
# emoji farm to their name in the new emoji farm.
|
2018-08-09 21:27:23 +02:00
|
|
|
REMAPPED_EMOJIS = {
|
2021-02-12 08:19:30 +01:00
|
|
|
"0023": "0023-20e3", # Hash
|
|
|
|
"0030": "0030-20e3", # Zero
|
|
|
|
"0031": "0031-20e3", # One
|
|
|
|
"0032": "0032-20e3", # Two
|
|
|
|
"0033": "0033-20e3", # Three
|
|
|
|
"0034": "0034-20e3", # Four
|
|
|
|
"0035": "0035-20e3", # Five
|
|
|
|
"0036": "0036-20e3", # Six
|
|
|
|
"0037": "0037-20e3", # Seven
|
|
|
|
"0038": "0038-20e3", # Eight
|
|
|
|
"0039": "0039-20e3", # Nine
|
|
|
|
"1f1e8": "1f1e8-1f1f3", # cn
|
|
|
|
"1f1e9": "1f1e9-1f1ea", # de
|
|
|
|
"1f1ea": "1f1ea-1f1f8", # es
|
|
|
|
"1f1eb": "1f1eb-1f1f7", # fr
|
|
|
|
"1f1ec": "1f1ec-1f1e7", # gb/us
|
|
|
|
"1f1ee": "1f1ee-1f1f9", # it
|
|
|
|
"1f1ef": "1f1ef-1f1f5", # jp
|
|
|
|
"1f1f0": "1f1f0-1f1f7", # kr
|
|
|
|
"1f1f7": "1f1f7-1f1fa", # ru
|
|
|
|
"1f1fa": "1f1fa-1f1f8", # us
|
2018-04-23 06:50:11 +02:00
|
|
|
}
|
|
|
|
|
2018-07-20 11:37:39 +02:00
|
|
|
# Emoticons and which emoji they should become. Duplicate emoji are allowed.
|
2023-03-07 16:10:59 +01:00
|
|
|
# Changes here should be mimicked in `help/configure-emoticon-translations.md`.
|
2018-07-20 11:37:39 +02:00
|
|
|
EMOTICON_CONVERSIONS = {
|
2021-02-12 08:20:45 +01:00
|
|
|
":)": ":smile:",
|
|
|
|
"(:": ":smile:",
|
|
|
|
":(": ":frown:",
|
|
|
|
"<3": ":heart:",
|
|
|
|
":|": ":neutral:",
|
|
|
|
":/": ":confused:",
|
|
|
|
";)": ":wink:",
|
|
|
|
":D": ":grinning:",
|
|
|
|
":o": ":open_mouth:",
|
|
|
|
":O": ":open_mouth:",
|
|
|
|
":p": ":stuck_out_tongue:",
|
|
|
|
":P": ":stuck_out_tongue:",
|
2018-07-20 11:37:39 +02:00
|
|
|
}
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-22 22:03:24 +01:00
|
|
|
def emoji_names_for_picker(emoji_name_maps: Dict[str, Dict[str, Any]]) -> List[str]:
|
2020-04-22 01:09:50 +02:00
|
|
|
emoji_names: List[str] = []
|
2017-11-08 19:40:43 +01:00
|
|
|
for emoji_code, name_info in emoji_name_maps.items():
|
|
|
|
emoji_names.append(name_info["canonical_name"])
|
|
|
|
emoji_names.extend(name_info["aliases"])
|
2017-01-26 08:35:23 +01:00
|
|
|
|
2017-11-08 19:40:43 +01:00
|
|
|
return sorted(emoji_names)
|
2017-03-19 09:41:24 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-04-23 15:03:38 +02:00
|
|
|
def get_emoji_code(emoji_dict: Dict[str, Any]) -> str:
|
2018-04-23 07:48:19 +02:00
|
|
|
# Starting from version 4.0.0, `emoji_datasource` package has started to
|
|
|
|
# add an emoji presentation variation selector for certain emojis which
|
|
|
|
# have defined variation sequences. Since in informal environments(like
|
|
|
|
# texting and chat), it is more appropriate for an emoji to have a colorful
|
|
|
|
# display so until emoji characters have a text presentation selector, it
|
|
|
|
# should have a colorful display. Hence we can continue using emoji characters
|
|
|
|
# without appending emoji presentation selector.
|
|
|
|
# (http://unicode.org/reports/tr51/index.html#Presentation_Style)
|
2018-08-26 14:26:18 +02:00
|
|
|
# If `non_qualified` field is present and not None return it otherwise
|
|
|
|
# return `unified` field.
|
|
|
|
emoji_code = emoji_dict.get("non_qualified") or emoji_dict["unified"]
|
2018-04-23 15:03:38 +02:00
|
|
|
return emoji_code.lower()
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-03-19 09:41:24 +01:00
|
|
|
# Returns a dict from categories to list of codepoints. The list of
|
|
|
|
# codepoints are sorted according to the `sort_order` as defined in
|
|
|
|
# `emoji_data`.
|
2021-02-12 08:19:30 +01:00
|
|
|
def generate_emoji_catalog(
|
|
|
|
emoji_data: List[Dict[str, Any]], emoji_name_maps: Dict[str, Dict[str, Any]]
|
|
|
|
) -> Dict[str, List[str]]:
|
2020-04-22 01:09:50 +02:00
|
|
|
sort_order: Dict[str, int] = {}
|
|
|
|
emoji_catalog: Dict[str, List[str]] = defaultdict(list)
|
2017-11-08 19:40:43 +01:00
|
|
|
|
|
|
|
for emoji_dict in emoji_data:
|
2018-04-23 15:03:38 +02:00
|
|
|
emoji_code = get_emoji_code(emoji_dict)
|
2017-11-08 19:40:43 +01:00
|
|
|
if not emoji_is_universal(emoji_dict) or emoji_code not in emoji_name_maps:
|
2017-03-21 04:58:21 +01:00
|
|
|
continue
|
2017-11-08 19:40:43 +01:00
|
|
|
category = emoji_dict["category"]
|
|
|
|
sort_order[emoji_code] = emoji_dict["sort_order"]
|
|
|
|
emoji_catalog[category].append(emoji_code)
|
|
|
|
|
|
|
|
# Sort the emojis according to iamcal's sort order. This sorting determines the
|
|
|
|
# order in which emojis will be displayed in emoji picker.
|
2017-03-19 09:41:24 +01:00
|
|
|
for category in emoji_catalog:
|
2017-11-08 19:40:43 +01:00
|
|
|
emoji_catalog[category].sort(key=lambda emoji_code: sort_order[emoji_code])
|
|
|
|
|
|
|
|
return dict(emoji_catalog)
|
2017-03-21 04:58:21 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-03-21 04:58:21 +01:00
|
|
|
# Use only those names for which images are present in all
|
2020-03-28 01:25:56 +01:00
|
|
|
# the emoji sets so that we can switch emoji sets seamlessly.
|
2017-11-22 22:03:24 +01:00
|
|
|
def emoji_is_universal(emoji_dict: Dict[str, Any]) -> bool:
|
2023-01-05 07:22:44 +01:00
|
|
|
return all(emoji_dict["has_img_" + emoji_set] for emoji_set in EMOJISETS)
|
2017-05-23 17:15:26 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-22 22:03:24 +01:00
|
|
|
def generate_codepoint_to_name_map(emoji_name_maps: Dict[str, Dict[str, Any]]) -> Dict[str, str]:
|
2020-04-22 01:09:50 +02:00
|
|
|
codepoint_to_name: Dict[str, str] = {}
|
2017-11-08 19:40:43 +01:00
|
|
|
for emoji_code, name_info in emoji_name_maps.items():
|
|
|
|
codepoint_to_name[emoji_code] = name_info["canonical_name"]
|
2017-05-23 17:15:26 +02:00
|
|
|
return codepoint_to_name
|
2017-10-01 15:19:58 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2022-06-21 23:56:52 +02:00
|
|
|
def generate_codepoint_to_names_map(
|
|
|
|
emoji_name_maps: Dict[str, Dict[str, Any]]
|
|
|
|
) -> Dict[str, List[str]]:
|
|
|
|
codepoint_to_names: Dict[str, List[str]] = {}
|
|
|
|
for emoji_code, name_info in emoji_name_maps.items():
|
|
|
|
# The first element of the names list is always the canonical name.
|
|
|
|
names = [name_info["canonical_name"]]
|
|
|
|
for alias in name_info["aliases"]:
|
|
|
|
names.append(alias)
|
|
|
|
codepoint_to_names[emoji_code] = names
|
|
|
|
return codepoint_to_names
|
|
|
|
|
|
|
|
|
2017-11-22 22:03:24 +01:00
|
|
|
def generate_name_to_codepoint_map(emoji_name_maps: Dict[str, Dict[str, Any]]) -> Dict[str, str]:
|
2017-11-08 19:40:43 +01:00
|
|
|
name_to_codepoint = {}
|
|
|
|
for emoji_code, name_info in emoji_name_maps.items():
|
|
|
|
canonical_name = name_info["canonical_name"]
|
|
|
|
aliases = name_info["aliases"]
|
|
|
|
name_to_codepoint[canonical_name] = emoji_code
|
|
|
|
for alias in aliases:
|
|
|
|
name_to_codepoint[alias] = emoji_code
|
|
|
|
return name_to_codepoint
|