mirror of https://github.com/zulip/zulip.git
484 lines
20 KiB
Python
Executable File
484 lines
20 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
#
|
||
# See docs/subsystems/emoji.md for a high-level explanation of how this system
|
||
# works.
|
||
import os
|
||
import shutil
|
||
import subprocess
|
||
import sys
|
||
from collections.abc import Iterator, Sequence
|
||
from typing import Any
|
||
|
||
import orjson
|
||
|
||
ZULIP_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../../")
|
||
sys.path.append(ZULIP_PATH)
|
||
|
||
from scripts.lib.zulip_tools import generate_sha1sum_emoji, run_as_root
|
||
from tools.setup.emoji.emoji_names import EMOJI_NAME_MAPS
|
||
from tools.setup.emoji.emoji_setup_utils import (
|
||
EMOTICON_CONVERSIONS,
|
||
REMAPPED_EMOJIS,
|
||
emoji_is_supported,
|
||
emoji_names_for_picker,
|
||
generate_codepoint_to_name_map,
|
||
generate_codepoint_to_names_map,
|
||
generate_emoji_catalog,
|
||
generate_name_to_codepoint_map,
|
||
get_emoji_code,
|
||
)
|
||
|
||
EMOJI_CACHE_BASE_PATH = "/srv/zulip-emoji-cache"
|
||
EMOJI_SCRIPT_DIR_PATH = os.path.join(ZULIP_PATH, "tools", "setup", "emoji")
|
||
NODE_MODULES_PATH = os.path.join(ZULIP_PATH, "node_modules")
|
||
|
||
|
||
# The CSS for emoji spritesheet has somewhat tricky requirements. One
|
||
# is that we want to be able to use the same emoji CSS classes for
|
||
# different display sizes of our emoji (e.g. reactions are smaller
|
||
# than inline message emoji, which are smaller than those in the emoji
|
||
# picker) while only downloading 1 copy of the spritesheet, having
|
||
# good browser rendering performance, and reusing as much common CSS
|
||
# as is possible.
|
||
|
||
# Our solution to those problem is to use the `background-size` (Which
|
||
# is e.g. 5700%) and background-position attributes to select the
|
||
# region of the spritesheet corresponding to the target sprite and
|
||
# display it properly scaled in an emoji span.
|
||
SPRITE_CSS_FILE_TEMPLATE = """\
|
||
div.emoji,
|
||
span.emoji
|
||
{{
|
||
display: inline-block;
|
||
background-image: url(../emoji/{emojiset}.webp);
|
||
background-size: {background_size};
|
||
background-repeat: no-repeat;
|
||
|
||
/* Hide the text. */
|
||
text-indent: 100%;
|
||
white-space: nowrap;
|
||
overflow: hidden;
|
||
}}
|
||
|
||
.emoji-1f419
|
||
{{
|
||
background-image: url(../../../static/generated/emoji/images-google-64/1f419.png) !important;
|
||
background-position: 0% 0% !important;
|
||
background-size: contain !important;
|
||
}}
|
||
|
||
{emoji_positions}
|
||
"""
|
||
|
||
EMOJI_POS_INFO_TEMPLATE = """\
|
||
.emoji-{codepoint} {{
|
||
background-position: {pos_x} {pos_y};
|
||
}}
|
||
"""
|
||
|
||
EMOJI_OVERRIDE_TEMPLATE = """\
|
||
.emoji-{codepoint} {{
|
||
background-image: url(../../../static/generated/emoji/images-google-64/{codepoint}.png) !important;
|
||
background-position: 0% 0% !important;
|
||
background-size: contain !important;
|
||
}}
|
||
"""
|
||
|
||
# change directory
|
||
os.chdir(EMOJI_SCRIPT_DIR_PATH)
|
||
|
||
|
||
def main() -> None:
|
||
if not os.access(EMOJI_CACHE_BASE_PATH, os.W_OK):
|
||
# Note: In production, this block will fail, since we don't
|
||
# assume sudo access; but it should never run in production
|
||
# anyway, because EMOJI_CACHE_BASE_PATH is created by Puppet before
|
||
# build_emoji would be run.
|
||
run_as_root(["mkdir", "-p", EMOJI_CACHE_BASE_PATH])
|
||
run_as_root(["chown", f"{os.getuid()}:{os.getgid()}", EMOJI_CACHE_BASE_PATH])
|
||
|
||
sha1_hexdigest = generate_sha1sum_emoji(ZULIP_PATH)
|
||
emoji_cache_path = os.path.join(EMOJI_CACHE_BASE_PATH, sha1_hexdigest)
|
||
success_stamp = os.path.join(emoji_cache_path, ".success-stamp")
|
||
|
||
if not os.path.exists(success_stamp):
|
||
print("Dumping emojis ...")
|
||
dump_emojis(emoji_cache_path)
|
||
with open(success_stamp, "w") as f:
|
||
f.close()
|
||
|
||
print(f"build_emoji: Using cached emojis from {emoji_cache_path}")
|
||
|
||
# /srv/zulip-emoji-cache/*/static gets symlinked to ZULIP_PATH/static/generated/emoji
|
||
TARGET_STATIC_EMOJI = os.path.join(ZULIP_PATH, "static", "generated", "emoji")
|
||
if os.path.lexists(TARGET_STATIC_EMOJI):
|
||
os.remove(TARGET_STATIC_EMOJI)
|
||
os.symlink(os.path.join(emoji_cache_path, "static"), TARGET_STATIC_EMOJI)
|
||
|
||
# /srv/zulip-emoji-cache/*/web gets copied to ZULIP_PATH/web/generated
|
||
# These must not be symlinked so webpack can resolve module references.
|
||
for subdir in ("emoji", "emoji-styles"):
|
||
target_dir = os.path.join(ZULIP_PATH, "web", "generated", subdir)
|
||
os.makedirs(target_dir, exist_ok=True)
|
||
to_remove = set(os.listdir(target_dir))
|
||
source_emoji_dump = os.path.join(emoji_cache_path, "web", subdir)
|
||
for filename in os.listdir(source_emoji_dump):
|
||
shutil.copy2(os.path.join(source_emoji_dump, filename), target_dir)
|
||
to_remove.discard(filename)
|
||
for filename in to_remove:
|
||
os.remove(os.path.join(target_dir, filename))
|
||
|
||
|
||
def percent(f: float) -> str:
|
||
return f"{f * 100:0.3f}%"
|
||
|
||
|
||
def get_square_size(emoji_data: Sequence[dict[str, Any]]) -> int:
|
||
"""
|
||
Spritesheets are usually NxN squares, and we have to
|
||
infer N from the sheet_x/sheet_y values of emojis.
|
||
"""
|
||
|
||
def get_offsets(emoji_data: Sequence[dict[str, Any]]) -> Iterator[int]:
|
||
for emoji_dict in emoji_data:
|
||
yield emoji_dict["sheet_x"]
|
||
yield emoji_dict["sheet_y"]
|
||
if "skin_variations" in emoji_dict:
|
||
for img_info in emoji_dict["skin_variations"].values():
|
||
yield img_info["sheet_x"]
|
||
yield img_info["sheet_y"]
|
||
|
||
n = max(get_offsets(emoji_data)) + 1
|
||
return n
|
||
|
||
|
||
def generate_sprite_css_files(
|
||
cache_path: str,
|
||
emoji_data: list[dict[str, Any]],
|
||
emojiset: str,
|
||
alt_name: str,
|
||
fallback_emoji_data: Sequence[dict[str, Any]],
|
||
) -> None:
|
||
"""
|
||
Spritesheets are usually NxN squares.
|
||
"""
|
||
n = get_square_size(emoji_data)
|
||
|
||
"""
|
||
Each single emoji is 64x64, with 1px gutters on every border.
|
||
We just consider the gutters to be part of the image for
|
||
simplicity reasons, so you can think of the spritesheet as
|
||
an NxN square of 66x66 pre-padded emojis. The CSS
|
||
background-size parameter below says to size the background
|
||
element as N times the size of the element that you're drawing.
|
||
|
||
Note that we use percentages here, instead of absolute
|
||
pixel values, because when we render emojis as actual elements,
|
||
their size will vary depending on which part of the UI we're
|
||
in (message emojis, emoji reactions, emoji popup, emoji
|
||
popup showcase, etc.).
|
||
|
||
(The next step is to offset the image; that will be in the
|
||
upcoming loop.)
|
||
"""
|
||
background_size = percent(n)
|
||
|
||
emoji_positions = ""
|
||
for emoji in emoji_data:
|
||
if emoji_is_supported(emoji):
|
||
"""
|
||
For background-position we need to use percentages.
|
||
Absolute pixel values won't work, because the size
|
||
of the background sprite image is proportional to
|
||
the size of the element we're rendering, and we render
|
||
elements in multiple sizes.
|
||
|
||
The way that CSS background-position works is linear
|
||
interpolation. When you tell CSS background-position
|
||
is "42% 37%", then in the `x` dimension it will align
|
||
the image such that 42% of the background image is to
|
||
the left of the 42% mark in the element itself.
|
||
|
||
For simplicity assume we render the emoji as 66px
|
||
(and everything will scale appropriately for other
|
||
size images as long as we use percentages).
|
||
|
||
The image size will be 66n.
|
||
The left offset of the x-th emoji (including its
|
||
padding) will be 66x. And the element's width
|
||
will be 66.
|
||
|
||
So, solve this equation for `p`, where p is
|
||
the ratio that we'll later express as a
|
||
percentage:
|
||
|
||
<image offset> = <offset of p% mark of element>
|
||
(p * 66n) = 66x + p66
|
||
p * n = x + p
|
||
p * n - p = x
|
||
p * (n - 1) = x
|
||
p = x / (n - 1)
|
||
|
||
If you ever want to change the code so that the
|
||
gutters don't show up in the element, the algebra
|
||
will get more complicated.
|
||
"""
|
||
emoji_positions += EMOJI_POS_INFO_TEMPLATE.format(
|
||
codepoint=get_emoji_code(emoji),
|
||
pos_x=percent(emoji["sheet_x"] / (n - 1)),
|
||
pos_y=percent(emoji["sheet_y"] / (n - 1)),
|
||
)
|
||
|
||
SPRITE_STYLES_DIRECTORY = os.path.join(cache_path, "web", "emoji-styles")
|
||
os.makedirs(SPRITE_STYLES_DIRECTORY, exist_ok=True)
|
||
SPRITE_CSS_PATH = os.path.join(SPRITE_STYLES_DIRECTORY, f"{emojiset}-sprite.css")
|
||
with open(SPRITE_CSS_PATH, "w") as f:
|
||
f.write(
|
||
SPRITE_CSS_FILE_TEMPLATE.format(
|
||
emojiset=emojiset,
|
||
alt_name=alt_name,
|
||
emoji_positions=emoji_positions,
|
||
background_size=background_size,
|
||
),
|
||
)
|
||
|
||
# Google Classic stopped being supported in 2017. To be able to use other emoji, we
|
||
# fallback to Google Modern for any emoji not covered by Google Classic.
|
||
if emojiset == "google-blob":
|
||
extra_emoji_positions = ""
|
||
covered_emoji_codes = [
|
||
get_emoji_code(emoji) for emoji in emoji_data if emoji["has_img_google"]
|
||
]
|
||
for emoji in fallback_emoji_data:
|
||
code = get_emoji_code(emoji)
|
||
if emoji["has_img_google"] and code not in covered_emoji_codes:
|
||
extra_emoji_positions += EMOJI_OVERRIDE_TEMPLATE.format(
|
||
codepoint=code,
|
||
)
|
||
with open(SPRITE_CSS_PATH, "a") as f:
|
||
f.write(extra_emoji_positions)
|
||
|
||
# The Twitter emoji team was laid off in 2022, so new emoji aren't supported.
|
||
# https://github.com/twitter/twemoji/issues/570#issuecomment-1303422143.
|
||
# The "twitter" sprite sheet we’re using does have images in those locations,
|
||
# but they’re fallback images that emoji-datasource fills in from the Apple
|
||
# sprite sheet, which has unclear licensing implications.
|
||
# To be able to support newer emoji, we fallback to Google Modern for any emoji
|
||
# not covered by Twemoji.
|
||
if emojiset == "twitter":
|
||
extra_emoji_positions = ""
|
||
twitter_covered_emoji_codes = {
|
||
get_emoji_code(emoji) for emoji in emoji_data if emoji["has_img_twitter"]
|
||
}
|
||
for emoji in emoji_data:
|
||
code = get_emoji_code(emoji)
|
||
if emoji["has_img_google"] and code not in twitter_covered_emoji_codes:
|
||
extra_emoji_positions += EMOJI_OVERRIDE_TEMPLATE.format(
|
||
codepoint=code,
|
||
)
|
||
with open(SPRITE_CSS_PATH, "a") as f:
|
||
f.write(extra_emoji_positions)
|
||
|
||
|
||
def setup_emoji_farms(cache_path: str, emoji_data: list[dict[str, Any]]) -> None:
|
||
def ensure_emoji_image(
|
||
emoji_dict: dict[str, Any], src_emoji_farm: str, target_emoji_farm: str
|
||
) -> None:
|
||
# We use individual images from emoji farm for rendering emojis
|
||
# in notification messages. We have a custom emoji formatter in
|
||
# notifications processing code that converts `span` tags to
|
||
# `img` and that logic requires us to have non-qualified
|
||
# `emoji_code` as file name for emoji.
|
||
emoji_code = get_emoji_code(emoji_dict)
|
||
img_file_name = emoji_code + ".png"
|
||
src_file = os.path.join(src_emoji_farm, emoji_dict["image"])
|
||
dst_file = os.path.join(target_emoji_farm, img_file_name)
|
||
shutil.copy2(src_file, dst_file)
|
||
|
||
def setup_emoji_farm(
|
||
emojiset: str,
|
||
emoji_data: list[dict[str, Any]],
|
||
alt_name: str | None = None,
|
||
fallback_emoji_data: Sequence[dict[str, Any]] = [],
|
||
) -> None:
|
||
# `alt_name` is an optional parameter that we use to avoid duplicating below
|
||
# code. It is only used while setting up google-blob emoji set as it is just
|
||
# a wrapper for an older version of emoji-datasource package due to which we
|
||
# need to use 'google' at some places in this code. It has no meaning for other
|
||
# emoji sets and is just equivalent to `emojiset`.
|
||
alt_name = alt_name or emojiset
|
||
|
||
# Copy individual emoji images from npm packages.
|
||
src_emoji_farm = os.path.join(
|
||
NODE_MODULES_PATH, "emoji-datasource-" + emojiset, "img", alt_name, "64"
|
||
)
|
||
target_emoji_farm = os.path.join(cache_path, "static", "images-" + emojiset + "-64")
|
||
os.makedirs(target_emoji_farm, exist_ok=True)
|
||
print(f"Copying individual {emojiset} image files...")
|
||
for emoji_dict in emoji_data:
|
||
if emoji_dict["has_img_" + alt_name]:
|
||
ensure_emoji_image(emoji_dict, src_emoji_farm, target_emoji_farm)
|
||
skin_variations = emoji_dict.get("skin_variations", {})
|
||
for img_info in skin_variations.values():
|
||
if img_info["has_img_" + alt_name]:
|
||
ensure_emoji_image(img_info, src_emoji_farm, target_emoji_farm)
|
||
|
||
# Copy zulip.png to the emoji farm.
|
||
zulip_image = os.path.join(ZULIP_PATH, "web", "images", "zulip-emoji")
|
||
for f in os.listdir(zulip_image):
|
||
shutil.copy2(os.path.join(zulip_image, f), target_emoji_farm, follow_symlinks=False)
|
||
|
||
# We hardcode octopus emoji image to Google emoji set's old
|
||
# "cute octopus" image. Copy it to the emoji farms.
|
||
input_img_file = os.path.join(EMOJI_SCRIPT_DIR_PATH, "1f419.png")
|
||
output_img_file = os.path.join(target_emoji_farm, "1f419.png")
|
||
shutil.copyfile(input_img_file, output_img_file)
|
||
|
||
generate_sprite_css_files(cache_path, emoji_data, emojiset, alt_name, fallback_emoji_data)
|
||
|
||
print(f"Converting {emojiset} sheet to webp...")
|
||
TARGET_EMOJI_SHEETS = os.path.join(cache_path, "web", "emoji")
|
||
os.makedirs(TARGET_EMOJI_SHEETS, exist_ok=True)
|
||
|
||
sheet_src = os.path.join(
|
||
NODE_MODULES_PATH,
|
||
f"emoji-datasource-{emojiset}",
|
||
"img",
|
||
alt_name,
|
||
"sheets-256",
|
||
"64.png",
|
||
)
|
||
sheet_dst = os.path.join(TARGET_EMOJI_SHEETS, f"{emojiset}.webp")
|
||
# From libwebp: [Q is] between 0 and 100. For lossy, 0 gives
|
||
# the smallest size and 100 the largest. For lossless, this
|
||
# parameter is the amount of effort put into the
|
||
# compression: 0 is the fastest but gives larger files
|
||
# compared to the slowest, but best, 100.
|
||
subprocess.check_call(["vips", "copy", sheet_src, f"{sheet_dst}[lossless=true,Q=100]"])
|
||
|
||
# Set up standard emoji sets.
|
||
for emojiset in ["google", "twitter"]:
|
||
setup_emoji_farm(emojiset, emoji_data)
|
||
|
||
# Set up old Google "blobs" emoji set.
|
||
GOOGLE_BLOB_EMOJI_DATA_PATH = os.path.join(
|
||
NODE_MODULES_PATH, "emoji-datasource-google-blob", "emoji.json"
|
||
)
|
||
with open(GOOGLE_BLOB_EMOJI_DATA_PATH, "rb") as fp:
|
||
blob_emoji_data = orjson.loads(fp.read())
|
||
setup_emoji_farm("google-blob", blob_emoji_data, "google", emoji_data)
|
||
|
||
|
||
def setup_old_emoji_farm(
|
||
cache_path: str, emoji_map: dict[str, str], emoji_data: list[dict[str, Any]]
|
||
) -> None:
|
||
# Code for setting up old emoji farm.
|
||
emoji_cache_path = os.path.join(cache_path, "static", "images", "emoji")
|
||
unicode_emoji_cache_path = os.path.join(cache_path, "static", "images", "emoji", "unicode")
|
||
google_emoji_cache_path = os.path.join(cache_path, "static", "images-google-64")
|
||
os.makedirs(emoji_cache_path, exist_ok=True)
|
||
os.makedirs(unicode_emoji_cache_path, exist_ok=True)
|
||
|
||
# Symlink zulip.png image file.
|
||
image_file_path = os.path.join(google_emoji_cache_path, "zulip.png")
|
||
symlink_path = os.path.join(emoji_cache_path, "zulip.png")
|
||
os.symlink(image_file_path, symlink_path)
|
||
|
||
unicode_symlink_path = os.path.join(unicode_emoji_cache_path, "zulip.png")
|
||
os.symlink(image_file_path, unicode_symlink_path)
|
||
|
||
for name, codepoint in emoji_map.items():
|
||
mapped_codepoint = REMAPPED_EMOJIS.get(codepoint, codepoint)
|
||
image_file_path = os.path.join(google_emoji_cache_path, f"{mapped_codepoint}.png")
|
||
symlink_path = os.path.join(emoji_cache_path, f"{name}.png")
|
||
os.symlink(image_file_path, symlink_path)
|
||
try:
|
||
# `emoji_map` contains duplicate entries for the same codepoint with different
|
||
# names. So creation of symlink for <codepoint>.png may throw `FileExistsError`.
|
||
unicode_symlink_path = os.path.join(unicode_emoji_cache_path, f"{codepoint}.png")
|
||
os.symlink(image_file_path, unicode_symlink_path)
|
||
except FileExistsError:
|
||
pass
|
||
|
||
|
||
def generate_map_files(cache_path: str, emoji_catalog: dict[str, list[str]]) -> None:
|
||
# This function generates the main data files about emoji that are
|
||
# consumed by the web app, mobile apps, Markdown processor, etc.
|
||
names = emoji_names_for_picker(EMOJI_NAME_MAPS)
|
||
codepoint_to_name = generate_codepoint_to_name_map(EMOJI_NAME_MAPS)
|
||
name_to_codepoint = generate_name_to_codepoint_map(EMOJI_NAME_MAPS)
|
||
|
||
EMOJI_CODES_FILE_PATH = os.path.join(cache_path, "static", "emoji_codes.json")
|
||
with open(EMOJI_CODES_FILE_PATH, "wb") as emoji_codes_file:
|
||
emoji_codes_file.write(
|
||
orjson.dumps(
|
||
{
|
||
"names": names,
|
||
"name_to_codepoint": name_to_codepoint,
|
||
"codepoint_to_name": codepoint_to_name,
|
||
"emoji_catalog": emoji_catalog,
|
||
"emoticon_conversions": EMOTICON_CONVERSIONS,
|
||
}
|
||
)
|
||
)
|
||
|
||
# This is the more official API for mobile to fetch data about emoji.
|
||
# emoji_codes.json has a lot of goo, and we're creating this new file
|
||
# as a cleaner data format to move towards. We could add the rest of
|
||
# the data into this API-described data, and then the web client could
|
||
# switch to that which would allow us to drop the existing file. But
|
||
# we'll probably instead do #18121 which will make this file obsolete.
|
||
# So this is a temporary solution. CZO discussion:
|
||
# https://chat.zulip.org/#narrow/channel/378-api-design/topic/currently.20supported.20emoji/near/1394598
|
||
EMOJI_API_FILE_PATH = os.path.join(cache_path, "static", "emoji_api.json")
|
||
with open(EMOJI_API_FILE_PATH, "wb") as emoji_api_file:
|
||
emoji_api_file.write(
|
||
orjson.dumps(
|
||
{
|
||
"code_to_names": generate_codepoint_to_names_map(EMOJI_NAME_MAPS),
|
||
}
|
||
)
|
||
)
|
||
|
||
|
||
def dump_emojis(cache_path: str) -> None:
|
||
with open("emoji_map.json", "rb") as emoji_map_file:
|
||
emoji_map = orjson.loads(emoji_map_file.read())
|
||
|
||
# `emoji.json` or any other data file can be sourced from any of the supported
|
||
# emoji set packages, they all contain the same data files.
|
||
EMOJI_DATA_FILE_PATH = os.path.join(NODE_MODULES_PATH, "emoji-datasource-google", "emoji.json")
|
||
with open(EMOJI_DATA_FILE_PATH, "rb") as emoji_data_file:
|
||
emoji_data = orjson.loads(emoji_data_file.read())
|
||
|
||
# These are the codes that we'll be able to show Google Modern
|
||
# emoji for. (For other emoji sets, we fall back to Google Modern.)
|
||
supported_codes = {get_emoji_code(emoji) for emoji in emoji_data if emoji_is_supported(emoji)}
|
||
# These are in the emoji dropdown so we should make sure they're supported.
|
||
for code in EMOJI_NAME_MAPS:
|
||
# If an assertion here fails, we either need to find an image for Google Modern
|
||
# to display for that emoji, or we need to remove that emoji as an option for users,
|
||
# by removing it from emoji_names.py through a change to `generate_emoji_names`.
|
||
assert code in supported_codes
|
||
|
||
emoji_catalog = generate_emoji_catalog(emoji_data, EMOJI_NAME_MAPS)
|
||
|
||
# Set up emoji farms.
|
||
if os.path.exists(cache_path):
|
||
shutil.rmtree(cache_path)
|
||
setup_emoji_farms(cache_path, emoji_data)
|
||
setup_old_emoji_farm(cache_path, emoji_map, emoji_data)
|
||
|
||
# This file is needed to translate emoji when importing data from Slack.
|
||
shutil.copyfile(
|
||
EMOJI_DATA_FILE_PATH,
|
||
os.path.join(cache_path, "static", "emoji-datasource-google-emoji.json"),
|
||
)
|
||
|
||
# Generate various map files.
|
||
generate_map_files(cache_path, emoji_catalog)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|