zulip/tools/setup/emoji/generate_emoji_names

#!/usr/bin/env python3
import os
import re
import sys
from collections import defaultdict

import orjson

ZULIP_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../../")
sys.path.append(ZULIP_PATH)

from tools.setup.emoji.custom_emoji_names import CUSTOM_EMOJI_NAME_MAPS
from tools.setup.emoji.emoji_setup_utils import get_emoji_code
from zerver.lib.emoji_utils import hex_codepoint_to_emoji, unqualify_emoji

CLDR_DATA_FILE = os.path.join(
    ZULIP_PATH, "node_modules", "cldr-annotations-modern", "annotations", "en", "annotations.json"
)
CLDR_DERIVED_DATA_FILE = os.path.join(
    ZULIP_PATH,
    "node_modules",
    "cldr-annotations-derived-modern",
    "annotationsDerived",
    "en",
    "annotations.json",
)
EMOJI_DATA_FILE = os.path.join(ZULIP_PATH, "node_modules", "emoji-datasource-google", "emoji.json")
OUT_EMOJI_FILE = os.path.join(ZULIP_PATH, "tools", "setup", "emoji", "emoji_names.py")

with open(EMOJI_DATA_FILE, "rb") as fp:
    EMOJI_DATA = orjson.loads(fp.read())
with open(CLDR_DATA_FILE, "rb") as fp:
    CLDR_DATA = orjson.loads(fp.read())["annotations"]["annotations"]
with open(CLDR_DERIVED_DATA_FILE, "rb") as fp:
    CLDR_DATA.update(orjson.loads(fp.read())["annotationsDerived"]["annotations"])

# We don't include most clock emojis. See `custom_emoji_names` for more context.
SKIPPED_CLOCK_EMOJI_CODES = [
    "1f550",
    "1f551",
    "1f552",
    "1f553",
    "1f554",
    "1f555",
    "1f556",
    "1f558",
    "1f559",
    "1f55a",
    "1f55b",
    "1f55c",
    "1f55d",
    "1f55e",
    "1f55f",
    "1f560",
    "1f561",
    "1f562",
    "1f563",
    "1f564",
    "1f565",
    "1f566",
    "1f567",
]

# We don't include the skin tones as emojis that one can search for on their own.
SKIN_TONE_EMOJI_CODES = [
    "1f3fb",
    "1f3fc",
    "1f3fd",
    "1f3fe",
    "1f3ff",
]


def cleanup_name(name: str) -> str:
    replacements = {
        " ": "_",
        "-": "_",
        "–": "_",
        "“": "",
        "”": "",
        ":": "",
        ".": "",
        ",": "",
        "(": "",
        ")": "",
        "&": "and",
        "‘": "",
        "’": "",
        "'": "",
    }
    for before, after in replacements.items():
        name = name.replace(before, after)
    name = re.sub(r"_{2,}", "_", name)
    return name.lower()


def convert_non_ascii_chars(name: str) -> str:
    replacements = {
        "è": "e",
        "ǐ": "i",
        "ó": "o",
        "ā": "a",
        "ō": "o",
        "ñ": "n",
        "ô": "o",
        "ç": "c",
        "é": "e",
        "ã": "a",
        "í": "i",
        "å": "a",
        "ü": "u",
    }
    for before, after in replacements.items():
        name = name.replace(before, after)
    assert (
        name.isascii()
    ), f"{name} still contains non-ascii characters. Add them to convert_non_ascii_chars."
    return name


def main() -> None:
    all_emojis = {}
    all_canonical_names = set()

    alias_to_emoji_code = defaultdict(list)

    # STEP 1: Generate first draft of all_emojis.
    for emoji_dict in EMOJI_DATA:
        emoji_code = get_emoji_code(emoji_dict)
        if emoji_code in SKIPPED_CLOCK_EMOJI_CODES or emoji_code in SKIN_TONE_EMOJI_CODES:
            continue

        if emoji_code in CUSTOM_EMOJI_NAME_MAPS:
            canonical_name = cleanup_name(CUSTOM_EMOJI_NAME_MAPS[emoji_code]["canonical_name"])
            if canonical_name in all_canonical_names:
                raise Exception(
                    f"{canonical_name} was already added with a different codepoint. "
                    f"Rename it in `custom_emoji_names` or add an entry for {emoji_code}."
                )
            all_canonical_names.add(canonical_name)
            all_emojis[emoji_code] = CUSTOM_EMOJI_NAME_MAPS[emoji_code]
        else:
            # create the unicode character(s) for the emoji, since this is the key into the CLDR data
            # We can't just use emoji_dict["non_qualified"] because of this upstream bug:
            # https://github.com/iamcal/emoji-data/pull/217
            emoji = unqualify_emoji(hex_codepoint_to_emoji(emoji_dict["unified"]))
            if emoji not in CLDR_DATA:
                print(
                    f"{emoji} not found in custom emoji name maps, but also not found in CLDR data. Skipping."
                )
                continue
            # CLDR_DATA[emoji] is of the form {'default': [...], 'tts': [...]}
            # * "tts" is what's used for text-to-speech and always has one item, so we use that
            #    as the canonical name.
            # * "default" has several items in it that we use as aliases.
            # See also: https://www.unicode.org/reports/tr35/tr35-general.html#14-annotations-and-labels
            assert len(CLDR_DATA[emoji]["tts"]) == 1
            canonical_name = cleanup_name(CLDR_DATA[emoji]["tts"][0].strip())
            if canonical_name in all_canonical_names:
                raise Exception(
                    f"{canonical_name} was already added with a different codepoint. "
                    f"Rename it in `custom_emoji_names` or add an entry for {emoji_code}."
                )
            aliases = [cleanup_name(alias.strip()) for alias in CLDR_DATA[emoji]["default"]]
            all_emojis[emoji_code] = {"canonical_name": canonical_name, "aliases": aliases}
            all_canonical_names.add(canonical_name)

    # STEP 2: We don't support having aliases that collide with canonical names for emoji, so remove them.
    for emoji_code, emoji_names in all_emojis.items():
        # Copy the list to not iterate while elements are being deleted.
        aliases = emoji_names["aliases"][:]
        for alias in aliases:
            if alias in all_canonical_names:
                emoji_names["aliases"].remove(alias)
            else:
                alias_to_emoji_code[alias].append(emoji_code)  # This is used in STEP 3.

    # STEP 3: We don't support multiple emoji sharing the same alias, but the CLDR data
    # doesn't have that same restriction, so we have to fix this up to have unique aliases.
    # If the alias was specifically specified in custom_emoji_names, then we can keep just
    # that one, but otherwise there's no particular emoji that is an obvious candidate
    # for the alias so just remove the alias for all relevant emoji.
    for alias in alias_to_emoji_code:
        if len(alias_to_emoji_code[alias]) > 1:
            for emoji_code in alias_to_emoji_code[alias]:
                if emoji_code not in CUSTOM_EMOJI_NAME_MAPS:
                    all_emojis[emoji_code]["aliases"].remove(alias)

    # STEP 4: We keep non-ascii (non-"English") characters in some emoji names if that's the correct
    # way to spell that word, but always add an alias for an ascii-only version of the word.
    for emoji_code, emoji_names in all_emojis.items():
        for name in [emoji_names["canonical_name"]] + emoji_names["aliases"]:
            # These are known names where we don't have an ascii-only version and there are ascii aliases
            # that a user can still enter instead to get the same emoji.
            if name in ["ココ", "サ", "指", "空"]:
                assert any(alias.isascii() for alias in emoji_names["aliases"])
                continue
            if not name.isascii():
                ascii_alias = convert_non_ascii_chars(name)
                # Now no other emoji can use this alias.
                for code in alias_to_emoji_code[ascii_alias]:
                    all_emojis[code]["aliases"].remove(ascii_alias)
                all_emojis[emoji_code]["aliases"].append(ascii_alias)

    # STEP 5: Write final dictionary to `emoji_names.py`.
    with open(OUT_EMOJI_FILE, "w") as f:
        f.write(
            "from typing import Any\n\n"
            "# Generated with `generate_emoji_names`.\n\n"
            "EMOJI_NAME_MAPS: dict[str, dict[str, Any]] = {\n"
        )
        for key, emoji_names in all_emojis.items():
            f.write(f"    {key!r}: {emoji_names!r},\n")
        f.write("}\n")

    print(
        "\n\nDone! You should run the linter to format emoji_names.py with `./tools/lint --fix -m --only ruff-format`"
    )


if __name__ == "__main__":
    main()
-												emoji: Add script to generate emoji_names.py, only with custom names.

This script pulls from our previously custom-written emoji strings
to prepare to fill in the rest from CLDR.

This commit has no user-facing changes.

											
										
										
											2022-06-10 22:43:59 +02:00
+								#!/usr/bin/env python3
 								import os
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								import re
-												emoji: Add script to generate emoji_names.py, only with custom names.

This script pulls from our previously custom-written emoji strings
to prepare to fill in the rest from CLDR.

This commit has no user-facing changes.

											
										
										
											2022-06-10 22:43:59 +02:00
+								import sys
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								from collections import defaultdict
-												emoji: Add script to generate emoji_names.py, only with custom names.

This script pulls from our previously custom-written emoji strings
to prepare to fill in the rest from CLDR.

This commit has no user-facing changes.

											
										
										
											2022-06-10 22:43:59 +02:00
 								import orjson
 								ZULIP_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../../")
 								sys.path.append(ZULIP_PATH)
-												emoji: Import modules relative to ZULIP_PATH.

Newer mypy is less confused this way.

Signed-off-by: Anders Kaseorg <anders@zulip.com>

											
										
										
											2022-12-04 10:59:47 +01:00
 								from tools.setup.emoji.custom_emoji_names import CUSTOM_EMOJI_NAME_MAPS
 								from tools.setup.emoji.emoji_setup_utils import get_emoji_code
-												generate emoji: Use hex_codepoint_to_emoji.

This new util function helps make the
code here a little more readable.

											
										
										
											2023-08-18 21:49:03 +02:00
+								from zerver.lib.emoji_utils import hex_codepoint_to_emoji, unqualify_emoji
-												emoji: Import modules relative to ZULIP_PATH.

Newer mypy is less confused this way.

Signed-off-by: Anders Kaseorg <anders@zulip.com>

											
										
										
											2022-12-04 10:59:47 +01:00
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								CLDR_DATA_FILE = os.path.join(
 								    ZULIP_PATH, "node_modules", "cldr-annotations-modern", "annotations", "en", "annotations.json"
 								)
 								CLDR_DERIVED_DATA_FILE = os.path.join(
 								    ZULIP_PATH,
 								    "node_modules",
 								    "cldr-annotations-derived-modern",
 								    "annotationsDerived",
 								    "en",
 								    "annotations.json",
 								)
-												emoji: Add script to generate emoji_names.py, only with custom names.

This script pulls from our previously custom-written emoji strings
to prepare to fill in the rest from CLDR.

This commit has no user-facing changes.

											
										
										
											2022-06-10 22:43:59 +02:00
+								EMOJI_DATA_FILE = os.path.join(ZULIP_PATH, "node_modules", "emoji-datasource-google", "emoji.json")
 								OUT_EMOJI_FILE = os.path.join(ZULIP_PATH, "tools", "setup", "emoji", "emoji_names.py")
 								with open(EMOJI_DATA_FILE, "rb") as fp:
 								    EMOJI_DATA = orjson.loads(fp.read())
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								with open(CLDR_DATA_FILE, "rb") as fp:
 								    CLDR_DATA = orjson.loads(fp.read())["annotations"]["annotations"]
 								with open(CLDR_DERIVED_DATA_FILE, "rb") as fp:
 								    CLDR_DATA.update(orjson.loads(fp.read())["annotationsDerived"]["annotations"])
-												emoji: Add script to generate emoji_names.py, only with custom names.

This script pulls from our previously custom-written emoji strings
to prepare to fill in the rest from CLDR.

This commit has no user-facing changes.

											
										
										
											2022-06-10 22:43:59 +02:00
 								# We don't include most clock emojis. See `custom_emoji_names` for more context.
 								SKIPPED_CLOCK_EMOJI_CODES = [
 								    "1f550",
 								    "1f551",
 								    "1f552",
 								    "1f553",
 								    "1f554",
 								    "1f555",
 								    "1f556",
 								    "1f558",
 								    "1f559",
 								    "1f55a",
 								    "1f55b",
 								    "1f55c",
 								    "1f55d",
 								    "1f55e",
 								    "1f55f",
 								    "1f560",
 								    "1f561",
 								    "1f562",
 								    "1f563",
 								    "1f564",
 								    "1f565",
 								    "1f566",
 								    "1f567",
 								]
 								# We don't include the skin tones as emojis that one can search for on their own.
 								SKIN_TONE_EMOJI_CODES = [
 								    "1f3fb",
 								    "1f3fc",
 								    "1f3fd",
 								    "1f3fe",
 								    "1f3ff",
 								]
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								def cleanup_name(name: str) -> str:
 								    replacements = {
 								        " ": "_",
 								        "-": "_",
 								        "–": "_",
 								        "“": "",
 								        "”": "",
 								        ":": "",
 								        ".": "",
-												emoji: Remove commas from emoji names.

Fixes part of #23626.

This was preventing emoji like `:family_man,_woman,_girl,_boy:`
from displaying.

											
										
										
											2022-11-24 21:51:56 +01:00
+								        ",": "",
-												emoji: Remove parens and apostrophes from emoji names.

This was breaking rendering of the emoji with these names.
More context on CZO: https://chat.zulip.org/#narrow/stream/9-issues/topic/issue.20with.20built-in.20emoji.20name/near/1517735

											
										
										
											2023-03-23 22:48:32 +01:00
+								        "(": "",
 								        ")": "",
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								        "&": "and",
-												emoji: Remove parens and apostrophes from emoji names.

This was breaking rendering of the emoji with these names.
More context on CZO: https://chat.zulip.org/#narrow/stream/9-issues/topic/issue.20with.20built-in.20emoji.20name/near/1517735

											
										
										
											2023-03-23 22:48:32 +01:00
+								        "‘": "",
 								        "’": "",
 								        "'": "",
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								    }
 								    for before, after in replacements.items():
 								        name = name.replace(before, after)
-												python: Mark regexes as raw strings.

Signed-off-by: Anders Kaseorg <anders@zulip.com>

											
										
										
											2024-04-26 20:30:22 +02:00
+								    name = re.sub(r"_{2,}", "_", name)
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								    return name.lower()
 								def convert_non_ascii_chars(name: str) -> str:
 								    replacements = {
 								        "è": "e",
 								        "ǐ": "i",
 								        "ó": "o",
 								        "ā": "a",
 								        "ō": "o",
 								        "ñ": "n",
 								        "ô": "o",
 								        "ç": "c",
 								        "é": "e",
 								        "ã": "a",
 								        "í": "i",
 								        "å": "a",
-												emoji: Run generate_emoji_names with up-to-date data.

											
										
										
											2023-07-27 02:00:41 +02:00
+								        "ü": "u",
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								    }
 								    for before, after in replacements.items():
 								        name = name.replace(before, after)
 								    assert (
 								        name.isascii()
 								    ), f"{name} still contains non-ascii characters. Add them to convert_non_ascii_chars."
 								    return name
-												emoji: Add script to generate emoji_names.py, only with custom names.

This script pulls from our previously custom-written emoji strings
to prepare to fill in the rest from CLDR.

This commit has no user-facing changes.

											
										
										
											2022-06-10 22:43:59 +02:00
+								def main() -> None:
 								    all_emojis = {}
 								    all_canonical_names = set()
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								    alias_to_emoji_code = defaultdict(list)
 								    # STEP 1: Generate first draft of all_emojis.
-												emoji: Add script to generate emoji_names.py, only with custom names.

This script pulls from our previously custom-written emoji strings
to prepare to fill in the rest from CLDR.

This commit has no user-facing changes.

											
										
										
											2022-06-10 22:43:59 +02:00
+								    for emoji_dict in EMOJI_DATA:
 								        emoji_code = get_emoji_code(emoji_dict)
 								        if emoji_code in SKIPPED_CLOCK_EMOJI_CODES or emoji_code in SKIN_TONE_EMOJI_CODES:
 								            continue
 								        if emoji_code in CUSTOM_EMOJI_NAME_MAPS:
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								            canonical_name = cleanup_name(CUSTOM_EMOJI_NAME_MAPS[emoji_code]["canonical_name"])
-												emoji: Add script to generate emoji_names.py, only with custom names.

This script pulls from our previously custom-written emoji strings
to prepare to fill in the rest from CLDR.

This commit has no user-facing changes.

											
										
										
											2022-06-10 22:43:59 +02:00
+								            if canonical_name in all_canonical_names:
 								                raise Exception(
 								                    f"{canonical_name} was already added with a different codepoint. "
 								                    f"Rename it in `custom_emoji_names` or add an entry for {emoji_code}."
 								                )
 								            all_canonical_names.add(canonical_name)
 								            all_emojis[emoji_code] = CUSTOM_EMOJI_NAME_MAPS[emoji_code]
 								        else:
 								            # create the unicode character(s) for the emoji, since this is the key into the CLDR data
-												generate emoji: Use hex_codepoint_to_emoji.

This new util function helps make the
code here a little more readable.

											
										
										
											2023-08-18 21:49:03 +02:00
+								            # We can't just use emoji_dict["non_qualified"] because of this upstream bug:
 								            # https://github.com/iamcal/emoji-data/pull/217
 								            emoji = unqualify_emoji(hex_codepoint_to_emoji(emoji_dict["unified"]))
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								            if emoji not in CLDR_DATA:
 								                print(
 								                    f"{emoji} not found in custom emoji name maps, but also not found in CLDR data. Skipping."
 								                )
 								                continue
 								            # CLDR_DATA[emoji] is of the form {'default': [...], 'tts': [...]}
 								            # * "tts" is what's used for text-to-speech and always has one item, so we use that
 								            #    as the canonical name.
 								            # * "default" has several items in it that we use as aliases.
 								            # See also: https://www.unicode.org/reports/tr35/tr35-general.html#14-annotations-and-labels
 								            assert len(CLDR_DATA[emoji]["tts"]) == 1
 								            canonical_name = cleanup_name(CLDR_DATA[emoji]["tts"][0].strip())
 								            if canonical_name in all_canonical_names:
 								                raise Exception(
 								                    f"{canonical_name} was already added with a different codepoint. "
 								                    f"Rename it in `custom_emoji_names` or add an entry for {emoji_code}."
 								                )
 								            aliases = [cleanup_name(alias.strip()) for alias in CLDR_DATA[emoji]["default"]]
 								            all_emojis[emoji_code] = {"canonical_name": canonical_name, "aliases": aliases}
 								            all_canonical_names.add(canonical_name)
 								    # STEP 2: We don't support having aliases that collide with canonical names for emoji, so remove them.
-												black: Reformat with Black 23.

Black 23 enforces some slightly more specific rules about empty line
counts and redundant parenthesis removal, but the result is still
compatible with Black 22.

(This does not actually upgrade our Python environment to Black 23
yet.)

Signed-off-by: Anders Kaseorg <anders@zulip.com>

											
										
										
											2023-02-02 04:35:24 +01:00
+								    for emoji_code, emoji_names in all_emojis.items():
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								        # Copy the list to not iterate while elements are being deleted.
 								        aliases = emoji_names["aliases"][:]
 								        for alias in aliases:
 								            if alias in all_canonical_names:
 								                emoji_names["aliases"].remove(alias)
 								            else:
 								                alias_to_emoji_code[alias].append(emoji_code)  # This is used in STEP 3.
 								    # STEP 3: We don't support multiple emoji sharing the same alias, but the CLDR data
 								    # doesn't have that same restriction, so we have to fix this up to have unique aliases.
 								    # If the alias was specifically specified in custom_emoji_names, then we can keep just
 								    # that one, but otherwise there's no particular emoji that is an obvious candidate
 								    # for the alias so just remove the alias for all relevant emoji.
-												ruff: Fix SIM118 Use `key in dict` instead of `key in dict.keys()`.

Signed-off-by: Anders Kaseorg <anders@zulip.com>

											
										
										
											2022-12-12 03:39:16 +01:00
+								    for alias in alias_to_emoji_code:
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								        if len(alias_to_emoji_code[alias]) > 1:
 								            for emoji_code in alias_to_emoji_code[alias]:
 								                if emoji_code not in CUSTOM_EMOJI_NAME_MAPS:
 								                    all_emojis[emoji_code]["aliases"].remove(alias)
 								    # STEP 4: We keep non-ascii (non-"English") characters in some emoji names if that's the correct
 								    # way to spell that word, but always add an alias for an ascii-only version of the word.
-												black: Reformat with Black 23.

Black 23 enforces some slightly more specific rules about empty line
counts and redundant parenthesis removal, but the result is still
compatible with Black 22.

(This does not actually upgrade our Python environment to Black 23
yet.)

Signed-off-by: Anders Kaseorg <anders@zulip.com>

											
										
										
											2023-02-02 04:35:24 +01:00
+								    for emoji_code, emoji_names in all_emojis.items():
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								        for name in [emoji_names["canonical_name"]] + emoji_names["aliases"]:
 								            # These are known names where we don't have an ascii-only version and there are ascii aliases
 								            # that a user can still enter instead to get the same emoji.
 								            if name in ["ココ", "サ", "指", "空"]:
-												generate_emoji_names: Fix use of stale aliases variable.

Signed-off-by: Anders Kaseorg <anders@zulip.com>

											
										
										
											2023-08-10 06:17:53 +02:00
+								                assert any(alias.isascii() for alias in emoji_names["aliases"])
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								                continue
 								            if not name.isascii():
 								                ascii_alias = convert_non_ascii_chars(name)
 								                # Now no other emoji can use this alias.
 								                for code in alias_to_emoji_code[ascii_alias]:
 								                    all_emojis[code]["aliases"].remove(ascii_alias)
 								                all_emojis[emoji_code]["aliases"].append(ascii_alias)
-												emoji: Add script to generate emoji_names.py, only with custom names.

This script pulls from our previously custom-written emoji strings
to prepare to fill in the rest from CLDR.

This commit has no user-facing changes.

											
										
										
											2022-06-10 22:43:59 +02:00
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								    # STEP 5: Write final dictionary to `emoji_names.py`.
-												emoji: Add script to generate emoji_names.py, only with custom names.

This script pulls from our previously custom-written emoji strings
to prepare to fill in the rest from CLDR.

This commit has no user-facing changes.

											
										
										
											2022-06-10 22:43:59 +02:00
+								    with open(OUT_EMOJI_FILE, "w") as f:
 								        f.write(
-												emoji_names: Rebuild with CLDR 45, emoji-data 15.1.

Signed-off-by: Anders Kaseorg <anders@zulip.com>

											
										
										
											2024-07-18 03:13:54 +02:00
+								            "from typing import Any\n\n"
-												emoji: Add script to generate emoji_names.py, only with custom names.

This script pulls from our previously custom-written emoji strings
to prepare to fill in the rest from CLDR.

This commit has no user-facing changes.

											
										
										
											2022-06-10 22:43:59 +02:00
+								            "# Generated with `generate_emoji_names`.\n\n"
-												emoji_names: Rebuild with CLDR 45, emoji-data 15.1.

Signed-off-by: Anders Kaseorg <anders@zulip.com>

											
										
										
											2024-07-18 03:13:54 +02:00
+								            "EMOJI_NAME_MAPS: dict[str, dict[str, Any]] = {\n"
-												emoji: Add script to generate emoji_names.py, only with custom names.

This script pulls from our previously custom-written emoji strings
to prepare to fill in the rest from CLDR.

This commit has no user-facing changes.

											
										
										
											2022-06-10 22:43:59 +02:00
+								        )
-												black: Reformat with Black 23.

Black 23 enforces some slightly more specific rules about empty line
counts and redundant parenthesis removal, but the result is still
compatible with Black 22.

(This does not actually upgrade our Python environment to Black 23
yet.)

Signed-off-by: Anders Kaseorg <anders@zulip.com>

											
										
										
											2023-02-02 04:35:24 +01:00
+								        for key, emoji_names in all_emojis.items():
-												emoji: Finish script to generate emoji_names.py with CLDR data.

This script pulls from our previously custom-written emoji strings
and fills in the rest from CLDR. It also removes 4 custom emoji which
collide with some of the new CLDR names (they will now just be called
by their CLDR name).

											
										
										
											2022-06-10 22:40:31 +02:00
+								            f.write(f"    {key!r}: {emoji_names!r},\n")
-												emoji: Add script to generate emoji_names.py, only with custom names.

This script pulls from our previously custom-written emoji strings
to prepare to fill in the rest from CLDR.

This commit has no user-facing changes.

											
										
										
											2022-06-10 22:43:59 +02:00
+								        f.write("}\n")
 								    print(
-												docs: Update Black and isort references to Ruff.

Signed-off-by: Anders Kaseorg <anders@zulip.com>

											
										
										
											2024-07-18 03:06:49 +02:00
+								        "\n\nDone! You should run the linter to format emoji_names.py with `./tools/lint --fix -m --only ruff-format`"
-												emoji: Add script to generate emoji_names.py, only with custom names.

This script pulls from our previously custom-written emoji strings
to prepare to fill in the rest from CLDR.

This commit has no user-facing changes.

											
										
										
											2022-06-10 22:43:59 +02:00
+								    )
 								if __name__ == "__main__":
 								    main()