zulip/tools/setup/emoji/emoji_setup_utils.py

# This file contains various helper functions used by `build_emoji` tool.
# See docs/subsystems/emoji.md for details on how this system works.

from collections import defaultdict

from typing import Any, Dict, List

# Emojisets that we currently support.
EMOJISETS = ['google', 'twitter']

# Some image files in the old emoji farm had a different name than in the new emoji
# farm. `remapped_emojis` is a map that contains a mapping of their name in the old
# emoji farm to their name in the new emoji farm.
REMAPPED_EMOJIS = {
    "0023": "0023-20e3",         # Hash
    "0030": "0030-20e3",         # Zero
    "0031": "0031-20e3",         # One
    "0032": "0032-20e3",         # Two
    "0033": "0033-20e3",         # Three
    "0034": "0034-20e3",         # Four
    "0035": "0035-20e3",         # Five
    "0036": "0036-20e3",         # Six
    "0037": "0037-20e3",         # Seven
    "0038": "0038-20e3",         # Eight
    "0039": "0039-20e3",         # Nine
    "1f1e8": "1f1e8-1f1f3",      # cn
    "1f1e9": "1f1e9-1f1ea",      # de
    "1f1ea": "1f1ea-1f1f8",      # es
    "1f1eb": "1f1eb-1f1f7",      # fr
    "1f1ec": "1f1ec-1f1e7",      # gb/us
    "1f1ee": "1f1ee-1f1f9",      # it
    "1f1ef": "1f1ef-1f1f5",      # jp
    "1f1f0": "1f1f0-1f1f7",      # kr
    "1f1f7": "1f1f7-1f1fa",      # ru
    "1f1fa": "1f1fa-1f1f8",      # us
}

# Emoticons and which emoji they should become. Duplicate emoji are allowed.
# Changes here should be mimicked in `templates/zerver/help/enable-emoticon-translations.md`.
EMOTICON_CONVERSIONS = {
    ':)': ':slight_smile:',
    '(:': ':slight_smile:',
    ':(': ':frown:',
    '<3': ':heart:',
    ':|': ':expressionless:',
    ':/': ':confused:',
}

def emoji_names_for_picker(emoji_name_maps: Dict[str, Dict[str, Any]]) -> List[str]:
    emoji_names = []  # type: List[str]
    for emoji_code, name_info in emoji_name_maps.items():
        emoji_names.append(name_info["canonical_name"])
        emoji_names.extend(name_info["aliases"])

    return sorted(emoji_names)

def get_emoji_code(emoji_dict: Dict[str, Any]) -> str:
    # Starting from version 4.0.0, `emoji_datasource` package has started to
    # add an emoji presentation variation selector for certain emojis which
    # have defined variation sequences. Since in informal environments(like
    # texting and chat), it is more appropriate for an emoji to have a colorful
    # display so until emoji characters have a text presentation selector, it
    # should have a colorful display. Hence we can continue using emoji characters
    # without appending emoji presentation selector.
    # (http://unicode.org/reports/tr51/index.html#Presentation_Style)
    # If `non_qualified` field is present and not None return it otherwise
    # return `unified` field.
    emoji_code = emoji_dict.get("non_qualified") or emoji_dict["unified"]
    return emoji_code.lower()

# Returns a dict from categories to list of codepoints. The list of
# codepoints are sorted according to the `sort_order` as defined in
# `emoji_data`.
def generate_emoji_catalog(emoji_data: List[Dict[str, Any]],
                           emoji_name_maps: Dict[str, Dict[str, Any]]) -> Dict[str, List[str]]:
    sort_order = {}  # type: Dict[str, int]
    emoji_catalog = defaultdict(list)  # type: Dict[str, List[str]]

    for emoji_dict in emoji_data:
        emoji_code = get_emoji_code(emoji_dict)
        if not emoji_is_universal(emoji_dict) or emoji_code not in emoji_name_maps:
            continue
        category = emoji_dict["category"]
        sort_order[emoji_code] = emoji_dict["sort_order"]
        emoji_catalog[category].append(emoji_code)

    # Sort the emojis according to iamcal's sort order. This sorting determines the
    # order in which emojis will be displayed in emoji picker.
    for category in emoji_catalog:
        emoji_catalog[category].sort(key=lambda emoji_code: sort_order[emoji_code])

    return dict(emoji_catalog)

# Use only those names for which images are present in all
# the emoji sets so that we can switch emoji sets seemlessly.
def emoji_is_universal(emoji_dict: Dict[str, Any]) -> bool:
    for emoji_set in EMOJISETS:
        if not emoji_dict['has_img_' + emoji_set]:
            return False
    return True

def generate_codepoint_to_name_map(emoji_name_maps: Dict[str, Dict[str, Any]]) -> Dict[str, str]:
    codepoint_to_name = {}  # type: Dict[str, str]
    for emoji_code, name_info in emoji_name_maps.items():
        codepoint_to_name[emoji_code] = name_info["canonical_name"]
    return codepoint_to_name

def generate_name_to_codepoint_map(emoji_name_maps: Dict[str, Dict[str, Any]]) -> Dict[str, str]:
    name_to_codepoint = {}
    for emoji_code, name_info in emoji_name_maps.items():
        canonical_name = name_info["canonical_name"]
        aliases = name_info["aliases"]
        name_to_codepoint[canonical_name] = emoji_code
        for alias in aliases:
            name_to_codepoint[alias] = emoji_code
    return name_to_codepoint
build_emoji: Migrate to use `emoji_names.py` file. This migrates Zulip to use a dramatically better set of names and aliases for our emoji set, defined in emoji_names.py (which is in turn manually generated from our hand-curated CSV file). This should significantly improve the experience of using Zulip's emoji picker and emoji typeahead for finding what one is looking for. 2017-11-08 19:40:43 +01:00			# This file contains various helper functions used by `build_emoji` tool.
			`# See docs/subsystems/emoji.md for details on how this system works.`
emoji: Remove duplicates from autocomplete and emoji picker. Previously, if you searched for ':offi..' you would see both :office: and :office_building: as possible completions, both of which are shortcodes for the same unicode codepoint (and hence which have the same image). Also, we sort the emoji in our emoji pickers alphabetically by shortcode, and so the images for :office: and :office_building: show up next to each other, which looks like a bug. This removes :office_building: as a shortcode, along with several hundred other duplicates. It leaves some duplicates in that won't give autocomplete or alphabetical ordering a problem, like (:car:, :automobile:). 2017-01-26 08:35:23 +01:00
			`from collections import defaultdict`

build_emoji: Migrate to python3. 2017-11-08 17:14:52 +01:00			`from typing import Any, Dict, List`
emoji: Remove duplicates from autocomplete and emoji picker. Previously, if you searched for ':offi..' you would see both :office: and :office_building: as possible completions, both of which are shortcodes for the same unicode codepoint (and hence which have the same image). Also, we sort the emoji in our emoji pickers alphabetically by shortcode, and so the images for :office: and :office_building: show up next to each other, which looks like a bug. This removes :office_building: as a shortcode, along with several hundred other duplicates. It leaves some duplicates in that won't give autocomplete or alphabetical ordering a problem, like (:car:, :automobile:). 2017-01-26 08:35:23 +01:00
build_emoji: Generate CSS files for all the emoji sets. Modify the `build_emoji` tool to copy spritesheets for all the emojisets to emoji cache and generate CSS files for them. 2017-04-01 17:20:32 +02:00			`# Emojisets that we currently support.`
emoji: Finish removing leftover code from banned Emoji sets. Signed-off-by: Anders Kaseorg <anders@zulipchat.com> 2019-09-18 22:33:00 +02:00			`EMOJISETS = ['google', 'twitter']`
build_emoji: Generate CSS files for all the emoji sets. Modify the `build_emoji` tool to copy spritesheets for all the emojisets to emoji cache and generate CSS files for them. 2017-04-01 17:20:32 +02:00
emoji: Move `remapped_emojis` list to emoji_setup_utils.py. 2018-04-23 06:50:11 +02:00			`# Some image files in the old emoji farm had a different name than in the new emoji`
			# farm. `remapped_emojis` is a map that contains a mapping of their name in the old
			`# emoji farm to their name in the new emoji farm.`
emoji: Remove now-unnecessary `get_remapped_emojis_map()`. 2018-08-09 21:27:23 +02:00			`REMAPPED_EMOJIS = {`
emoji: Fix some emoji images not loading in missed message emails. `emoji-datasource` package v4.0.4 introduced the concept of qualified and non-qualified emoji codes. As chat programs don't need to use emoji representation selector, so we used migrated our infrastructure to use non-qualified emoji codes. But we missed the fact that the emoji file names in emoji farm are based on emoji data's 'unified' field and the value of this field has changed. Consequently the image file names must also have been changed. We used `emoji_code` while converting the span tags to img tags while processing notifications. But since now `emoji_code` refers to non-qualified code while image file names are based on qualified code, we need to rename images to correctly do the conversion. This commit just fixes this. 2018-08-09 21:06:21 +02:00			`"0023": "0023-20e3", # Hash`
			`"0030": "0030-20e3", # Zero`
			`"0031": "0031-20e3", # One`
			`"0032": "0032-20e3", # Two`
			`"0033": "0033-20e3", # Three`
			`"0034": "0034-20e3", # Four`
			`"0035": "0035-20e3", # Five`
			`"0036": "0036-20e3", # Six`
			`"0037": "0037-20e3", # Seven`
			`"0038": "0038-20e3", # Eight`
			`"0039": "0039-20e3", # Nine`
emoji: Update `emoji-datasource` packages. This commit updates the `emoji-datasource` packages to version 4.0.4. This update brings following changes to emoji infra: 1: Fix for the bleeding sprite sheets. 2: The category of some emojis has been changed. Categorywise breakup of net gain or loss is as follows: Travel & Places: 58 (gain) Symbols: 47 (loss) Smileys & People: 52 (gain) Objects: 11 (loss) Food & Drink: 3 (gain) Animals and Nature: 46 (gain) Activities: 9 (loss) 3: There were some changes in the image farm of the package which were breaking our old emoji farm. I fixed them by modifying the remapped emoji map. Fixes: #8235. 2018-04-23 07:48:19 +02:00			`"1f1e8": "1f1e8-1f1f3", # cn`
			`"1f1e9": "1f1e9-1f1ea", # de`
			`"1f1ea": "1f1ea-1f1f8", # es`
			`"1f1eb": "1f1eb-1f1f7", # fr`
			`"1f1ec": "1f1ec-1f1e7", # gb/us`
			`"1f1ee": "1f1ee-1f1f9", # it`
			`"1f1ef": "1f1ef-1f1f5", # jp`
			`"1f1f0": "1f1f0-1f1f7", # kr`
			`"1f1f7": "1f1f7-1f1fa", # ru`
			`"1f1fa": "1f1fa-1f1f8", # us`
emoji: Move `remapped_emojis` list to emoji_setup_utils.py. 2018-04-23 06:50:11 +02:00			`}`

emoji: Move `EMOTICON_CONVERSIONS` mapping to build_emoji infra. This commit closes a long pending issue which involved moving the `EMOTICON_CONVERSION` mapping to build_emoji infrastructure so that there is only one source of truth. This was pending from the time when this feature was implemented. 2018-07-20 11:37:39 +02:00			`# Emoticons and which emoji they should become. Duplicate emoji are allowed.`
			# Changes here should be mimicked in `templates/zerver/help/enable-emoticon-translations.md`.
			`EMOTICON_CONVERSIONS = {`
emoji: Change emoticon mapping for `:)`, `(:` and `:(`. See discussion on CZO: https://chat.zulip.org/#narrow/stream/101-design/subject/emoji.20picker/near/617811 2018-07-26 19:35:28 +02:00			`':)': ':slight_smile:',`
			`'(:': ':slight_smile:',`
			`':(': ':frown:',`
emoji: Move `EMOTICON_CONVERSIONS` mapping to build_emoji infra. This commit closes a long pending issue which involved moving the `EMOTICON_CONVERSION` mapping to build_emoji infrastructure so that there is only one source of truth. This was pending from the time when this feature was implemented. 2018-07-20 11:37:39 +02:00			`'<3': ':heart:',`
			`':\|': ':expressionless:',`
			`':/': ':confused:',`
			`}`

emoji_setup_utils: Migrate to python3 type annotations. 2017-11-22 22:03:24 +01:00			`def emoji_names_for_picker(emoji_name_maps: Dict[str, Dict[str, Any]]) -> List[str]:`
build_emoji: Migrate to use `emoji_names.py` file. This migrates Zulip to use a dramatically better set of names and aliases for our emoji set, defined in emoji_names.py (which is in turn manually generated from our hand-curated CSV file). This should significantly improve the experience of using Zulip's emoji picker and emoji typeahead for finding what one is looking for. 2017-11-08 19:40:43 +01:00			`emoji_names = [] # type: List[str]`
			`for emoji_code, name_info in emoji_name_maps.items():`
			`emoji_names.append(name_info["canonical_name"])`
			`emoji_names.extend(name_info["aliases"])`
emoji: Remove duplicates from autocomplete and emoji picker. Previously, if you searched for ':offi..' you would see both :office: and :office_building: as possible completions, both of which are shortcodes for the same unicode codepoint (and hence which have the same image). Also, we sort the emoji in our emoji pickers alphabetically by shortcode, and so the images for :office: and :office_building: show up next to each other, which looks like a bug. This removes :office_building: as a shortcode, along with several hundred other duplicates. It leaves some duplicates in that won't give autocomplete or alphabetical ordering a problem, like (:car:, :automobile:). 2017-01-26 08:35:23 +01:00
build_emoji: Migrate to use `emoji_names.py` file. This migrates Zulip to use a dramatically better set of names and aliases for our emoji set, defined in emoji_names.py (which is in turn manually generated from our hand-curated CSV file). This should significantly improve the experience of using Zulip's emoji picker and emoji typeahead for finding what one is looking for. 2017-11-08 19:40:43 +01:00			`return sorted(emoji_names)`
build_emoji: Add `emoji_catalog` to emoji_code.js Use `emoji.json` to create a emoji catalog and add it to `emoji_code.js` file. This catalog contains the unicode codepoints of all the emojis grouped according to their category. Emojis are sorted according to the `sort_order` defined in the iamcal's dataset. 2017-03-19 09:41:24 +01:00
emoji: Extract `get_emoji_code()`. 2018-04-23 15:03:38 +02:00			`def get_emoji_code(emoji_dict: Dict[str, Any]) -> str:`
emoji: Update `emoji-datasource` packages. This commit updates the `emoji-datasource` packages to version 4.0.4. This update brings following changes to emoji infra: 1: Fix for the bleeding sprite sheets. 2: The category of some emojis has been changed. Categorywise breakup of net gain or loss is as follows: Travel & Places: 58 (gain) Symbols: 47 (loss) Smileys & People: 52 (gain) Objects: 11 (loss) Food & Drink: 3 (gain) Animals and Nature: 46 (gain) Activities: 9 (loss) 3: There were some changes in the image farm of the package which were breaking our old emoji farm. I fixed them by modifying the remapped emoji map. Fixes: #8235. 2018-04-23 07:48:19 +02:00			# Starting from version 4.0.0, `emoji_datasource` package has started to
			`# add an emoji presentation variation selector for certain emojis which`
			`# have defined variation sequences. Since in informal environments(like`
			`# texting and chat), it is more appropriate for an emoji to have a colorful`
			`# display so until emoji characters have a text presentation selector, it`
			`# should have a colorful display. Hence we can continue using emoji characters`
			`# without appending emoji presentation selector.`
			`# (http://unicode.org/reports/tr51/index.html#Presentation_Style)`
build_emoji: Refactor `get_emoji_code()` to not fail for old datasources. If `non_qualified` field is not present then return the value of `unified` field instead of failing. 2018-08-26 14:26:18 +02:00			# If `non_qualified` field is present and not None return it otherwise
			# return `unified` field.
			`emoji_code = emoji_dict.get("non_qualified") or emoji_dict["unified"]`
emoji: Extract `get_emoji_code()`. 2018-04-23 15:03:38 +02:00			`return emoji_code.lower()`

build_emoji: Add `emoji_catalog` to emoji_code.js Use `emoji.json` to create a emoji catalog and add it to `emoji_code.js` file. This catalog contains the unicode codepoints of all the emojis grouped according to their category. Emojis are sorted according to the `sort_order` defined in the iamcal's dataset. 2017-03-19 09:41:24 +01:00			`# Returns a dict from categories to list of codepoints. The list of`
			# codepoints are sorted according to the `sort_order` as defined in
			# `emoji_data`.
emoji_setup_utils: Migrate to python3 type annotations. 2017-11-22 22:03:24 +01:00			`def generate_emoji_catalog(emoji_data: List[Dict[str, Any]],`
			`emoji_name_maps: Dict[str, Dict[str, Any]]) -> Dict[str, List[str]]:`
pep8: Add compliance with rule E261 emoji_setup_utils.py. 2017-05-31 23:35:57 +02:00			`sort_order = {} # type: Dict[str, int]`
build_emoji: Migrate to use `emoji_names.py` file. This migrates Zulip to use a dramatically better set of names and aliases for our emoji set, defined in emoji_names.py (which is in turn manually generated from our hand-curated CSV file). This should significantly improve the experience of using Zulip's emoji picker and emoji typeahead for finding what one is looking for. 2017-11-08 19:40:43 +01:00			`emoji_catalog = defaultdict(list) # type: Dict[str, List[str]]`

			`for emoji_dict in emoji_data:`
emoji: Extract `get_emoji_code()`. 2018-04-23 15:03:38 +02:00			`emoji_code = get_emoji_code(emoji_dict)`
build_emoji: Migrate to use `emoji_names.py` file. This migrates Zulip to use a dramatically better set of names and aliases for our emoji set, defined in emoji_names.py (which is in turn manually generated from our hand-curated CSV file). This should significantly improve the experience of using Zulip's emoji picker and emoji typeahead for finding what one is looking for. 2017-11-08 19:40:43 +01:00			`if not emoji_is_universal(emoji_dict) or emoji_code not in emoji_name_maps:`
emoji: Only include universal emoji in catalog. Extracted from "Interrelated emoji infrastructure changes." by tabbott. 2017-03-21 04:58:21 +01:00			`continue`
build_emoji: Migrate to use `emoji_names.py` file. This migrates Zulip to use a dramatically better set of names and aliases for our emoji set, defined in emoji_names.py (which is in turn manually generated from our hand-curated CSV file). This should significantly improve the experience of using Zulip's emoji picker and emoji typeahead for finding what one is looking for. 2017-11-08 19:40:43 +01:00			`category = emoji_dict["category"]`
			`sort_order[emoji_code] = emoji_dict["sort_order"]`
			`emoji_catalog[category].append(emoji_code)`

			`# Sort the emojis according to iamcal's sort order. This sorting determines the`
			`# order in which emojis will be displayed in emoji picker.`
build_emoji: Add `emoji_catalog` to emoji_code.js Use `emoji.json` to create a emoji catalog and add it to `emoji_code.js` file. This catalog contains the unicode codepoints of all the emojis grouped according to their category. Emojis are sorted according to the `sort_order` defined in the iamcal's dataset. 2017-03-19 09:41:24 +01:00			`for category in emoji_catalog:`
build_emoji: Migrate to use `emoji_names.py` file. This migrates Zulip to use a dramatically better set of names and aliases for our emoji set, defined in emoji_names.py (which is in turn manually generated from our hand-curated CSV file). This should significantly improve the experience of using Zulip's emoji picker and emoji typeahead for finding what one is looking for. 2017-11-08 19:40:43 +01:00			`emoji_catalog[category].sort(key=lambda emoji_code: sort_order[emoji_code])`

			`return dict(emoji_catalog)`
emoji: Only include universal emoji in catalog. Extracted from "Interrelated emoji infrastructure changes." by tabbott. 2017-03-21 04:58:21 +01:00
			`# Use only those names for which images are present in all`
			`# the emoji sets so that we can switch emoji sets seemlessly.`
emoji_setup_utils: Migrate to python3 type annotations. 2017-11-22 22:03:24 +01:00			`def emoji_is_universal(emoji_dict: Dict[str, Any]) -> bool:`
build_emoji: Generate CSS files for all the emoji sets. Modify the `build_emoji` tool to copy spritesheets for all the emojisets to emoji cache and generate CSS files for them. 2017-04-01 17:20:32 +02:00			`for emoji_set in EMOJISETS:`
emoji: Only include universal emoji in catalog. Extracted from "Interrelated emoji infrastructure changes." by tabbott. 2017-03-21 04:58:21 +01:00			`if not emoji_dict['has_img_' + emoji_set]:`
			`return False`
			`return True`
build_emoji: Generate `codepoint_to_name.json` file. Store the `codepoint_to_name` map in a JSON file which can be used by the rest of codebase(zerver) to convert emoji codepoints to canonical names. 2017-05-23 17:15:26 +02:00
emoji_setup_utils: Migrate to python3 type annotations. 2017-11-22 22:03:24 +01:00			`def generate_codepoint_to_name_map(emoji_name_maps: Dict[str, Dict[str, Any]]) -> Dict[str, str]:`
build_emoji: Generate `codepoint_to_name.json` file. Store the `codepoint_to_name` map in a JSON file which can be used by the rest of codebase(zerver) to convert emoji codepoints to canonical names. 2017-05-23 17:15:26 +02:00			`codepoint_to_name = {} # type: Dict[str, str]`
build_emoji: Migrate to use `emoji_names.py` file. This migrates Zulip to use a dramatically better set of names and aliases for our emoji set, defined in emoji_names.py (which is in turn manually generated from our hand-curated CSV file). This should significantly improve the experience of using Zulip's emoji picker and emoji typeahead for finding what one is looking for. 2017-11-08 19:40:43 +01:00			`for emoji_code, name_info in emoji_name_maps.items():`
			`codepoint_to_name[emoji_code] = name_info["canonical_name"]`
build_emoji: Generate `codepoint_to_name.json` file. Store the `codepoint_to_name` map in a JSON file which can be used by the rest of codebase(zerver) to convert emoji codepoints to canonical names. 2017-05-23 17:15:26 +02:00			`return codepoint_to_name`
emoji_setup_utils.py: Add `emoji_can_be_included()` function. 2017-10-01 15:19:58 +02:00
emoji_setup_utils: Migrate to python3 type annotations. 2017-11-22 22:03:24 +01:00			`def generate_name_to_codepoint_map(emoji_name_maps: Dict[str, Dict[str, Any]]) -> Dict[str, str]:`
build_emoji: Migrate to use `emoji_names.py` file. This migrates Zulip to use a dramatically better set of names and aliases for our emoji set, defined in emoji_names.py (which is in turn manually generated from our hand-curated CSV file). This should significantly improve the experience of using Zulip's emoji picker and emoji typeahead for finding what one is looking for. 2017-11-08 19:40:43 +01:00			`name_to_codepoint = {}`
			`for emoji_code, name_info in emoji_name_maps.items():`
			`canonical_name = name_info["canonical_name"]`
			`aliases = name_info["aliases"]`
			`name_to_codepoint[canonical_name] = emoji_code`
			`for alias in aliases:`
			`name_to_codepoint[alias] = emoji_code`
			`return name_to_codepoint`