data_import: Import custom emoji from Rocket.Chat.

This commit is contained in:
Priyansh Garg 2021-08-02 22:41:32 +05:30 committed by Tim Abbott
parent a7890f046b
commit 65e28907cb
7 changed files with 209 additions and 22 deletions

View File

@ -83,7 +83,6 @@ root domain. Replace the last line above with the following, after replacing
This import tool is currently beta and does not support importing
the following data:
- Custom emoji
- User avatars
- Uploaded files
- Default channels for new users

View File

@ -15,6 +15,7 @@ from zerver.data_import.import_util import (
build_message,
build_personal_subscriptions,
build_realm,
build_realm_emoji,
build_recipients,
build_stream,
build_stream_subscriptions,
@ -28,7 +29,7 @@ from zerver.data_import.sequencer import NEXT_ID, IdMapper
from zerver.data_import.user_handler import UserHandler
from zerver.lib.emoji import name_to_codepoint
from zerver.lib.utils import process_list_in_batches
from zerver.models import Reaction, Recipient, UserProfile
from zerver.models import Reaction, RealmEmoji, Recipient, UserProfile
def make_realm(
@ -238,18 +239,92 @@ def convert_huddle_data(
return zerver_huddle
def build_custom_emoji(
realm_id: int, custom_emoji_data: Dict[str, List[Dict[str, Any]]], output_dir: str
) -> List[ZerverFieldsT]:
logging.info("Starting to process custom emoji")
emoji_folder = os.path.join(output_dir, "emoji")
os.makedirs(emoji_folder, exist_ok=True)
zerver_realmemoji: List[ZerverFieldsT] = []
emoji_records: List[ZerverFieldsT] = []
# Map emoji file_id to emoji file data
emoji_file_data = {}
for emoji_file in custom_emoji_data["file"]:
emoji_file_data[emoji_file["_id"]] = {"filename": emoji_file["filename"], "chunks": []}
for emoji_chunk in custom_emoji_data["chunk"]:
emoji_file_data[emoji_chunk["files_id"]]["chunks"].append(emoji_chunk["data"])
# Build custom emoji
for rc_emoji in custom_emoji_data["emoji"]:
# Subject to change with changes in database
emoji_file_id = ".".join([rc_emoji["name"], rc_emoji["extension"]])
emoji_file_info = emoji_file_data[emoji_file_id]
emoji_filename = emoji_file_info["filename"]
emoji_data = b"".join(emoji_file_info["chunks"])
target_sub_path = RealmEmoji.PATH_ID_TEMPLATE.format(
realm_id=realm_id,
emoji_file_name=emoji_filename,
)
target_path = os.path.join(emoji_folder, target_sub_path)
os.makedirs(os.path.dirname(target_path), exist_ok=True)
with open(target_path, "wb") as e_file:
e_file.write(emoji_data)
emoji_aliases = [rc_emoji["name"]]
emoji_aliases.extend(rc_emoji["aliases"])
for alias in emoji_aliases:
emoji_record = dict(
path=target_path,
s3_path=target_path,
file_name=emoji_filename,
realm_id=realm_id,
name=alias,
)
emoji_records.append(emoji_record)
realmemoji = build_realm_emoji(
realm_id=realm_id,
name=alias,
id=NEXT_ID("realmemoji"),
file_name=emoji_filename,
)
zerver_realmemoji.append(realmemoji)
create_converted_data_files(emoji_records, output_dir, "/emoji/records.json")
logging.info("Done processing emoji")
return zerver_realmemoji
def build_reactions(
total_reactions: List[ZerverFieldsT],
reactions: List[Dict[str, Any]],
message_id: int,
zerver_realmemoji: List[ZerverFieldsT],
) -> None:
realmemoji = {}
for emoji in zerver_realmemoji:
realmemoji[emoji["name"]] = emoji["id"]
# For the Unicode emoji codes, we use equivalent of
# function 'emoji_name_to_emoji_code' in 'zerver/lib/emoji' here
for reaction in reactions:
emoji_name = reaction["name"]
user_id = reaction["user_id"]
# Check in realm emoji
if emoji_name in realmemoji:
emoji_code = realmemoji[emoji_name]
reaction_type = Reaction.REALM_EMOJI
# Check in Unicode emoji
if emoji_name in name_to_codepoint:
elif emoji_name in name_to_codepoint:
emoji_code = name_to_codepoint[emoji_name]
reaction_type = Reaction.UNICODE_EMOJI
else: # nocoverage
@ -276,6 +351,7 @@ def process_raw_message_batch(
user_handler: UserHandler,
is_pm_data: bool,
output_dir: str,
zerver_realmemoji: List[ZerverFieldsT],
total_reactions: List[ZerverFieldsT],
) -> None:
def fix_mentions(content: str, mention_user_ids: Set[int]) -> str:
@ -331,6 +407,7 @@ def process_raw_message_batch(
total_reactions=total_reactions,
reactions=raw_message["reactions"],
message_id=message_id,
zerver_realmemoji=zerver_realmemoji,
)
zerver_usermessage = make_user_messages(
@ -366,6 +443,7 @@ def process_messages(
dsc_id_to_dsc_map: Dict[str, Dict[str, Any]],
direct_id_to_direct_map: Dict[str, Dict[str, Any]],
huddle_id_to_huddle_map: Dict[str, Dict[str, Any]],
zerver_realmemoji: List[ZerverFieldsT],
total_reactions: List[ZerverFieldsT],
output_dir: str,
) -> None:
@ -458,6 +536,7 @@ def process_messages(
user_handler=user_handler,
is_pm_data=is_pm_data,
output_dir=output_dir,
zerver_realmemoji=zerver_realmemoji,
total_reactions=total_reactions,
)
@ -550,6 +629,7 @@ def rocketchat_data_to_dict(rocketchat_data_dir: str) -> Dict[str, Any]:
rocketchat_data["avatar"] = {"avatar": [], "file": [], "chunk": []}
rocketchat_data["room"] = []
rocketchat_data["message"] = []
rocketchat_data["custom_emoji"] = {"emoji": [], "file": [], "chunk": []}
# Get instance
with open(os.path.join(rocketchat_data_dir, "instances.bson"), "rb") as fcache:
@ -563,11 +643,16 @@ def rocketchat_data_to_dict(rocketchat_data_dir: str) -> Dict[str, Any]:
with open(os.path.join(rocketchat_data_dir, "rocketchat_avatars.bson"), "rb") as fcache:
rocketchat_data["avatar"]["avatar"] = bson.decode_all(fcache.read())
with open(os.path.join(rocketchat_data_dir, "rocketchat_avatars.chunks.bson"), "rb") as fcache:
rocketchat_data["avatar"]["chunk"] = bson.decode_all(fcache.read())
if rocketchat_data["avatar"]["avatar"]:
with open(
os.path.join(rocketchat_data_dir, "rocketchat_avatars.files.bson"), "rb"
) as fcache:
rocketchat_data["avatar"]["file"] = bson.decode_all(fcache.read())
with open(os.path.join(rocketchat_data_dir, "rocketchat_avatars.files.bson"), "rb") as fcache:
rocketchat_data["avatar"]["file"] = bson.decode_all(fcache.read())
with open(
os.path.join(rocketchat_data_dir, "rocketchat_avatars.chunks.bson"), "rb"
) as fcache:
rocketchat_data["avatar"]["chunk"] = bson.decode_all(fcache.read())
# Get room
with open(os.path.join(rocketchat_data_dir, "rocketchat_room.bson"), "rb") as fcache:
@ -577,6 +662,17 @@ def rocketchat_data_to_dict(rocketchat_data_dir: str) -> Dict[str, Any]:
with open(os.path.join(rocketchat_data_dir, "rocketchat_message.bson"), "rb") as fcache:
rocketchat_data["message"] = bson.decode_all(fcache.read())
# Get custom emoji
with open(os.path.join(rocketchat_data_dir, "rocketchat_custom_emoji.bson"), "rb") as fcache:
rocketchat_data["custom_emoji"]["emoji"] = bson.decode_all(fcache.read())
if rocketchat_data["custom_emoji"]["emoji"]:
with open(os.path.join(rocketchat_data_dir, "custom_emoji.files.bson"), "rb") as fcache:
rocketchat_data["custom_emoji"]["file"] = bson.decode_all(fcache.read())
with open(os.path.join(rocketchat_data_dir, "custom_emoji.chunks.bson"), "rb") as fcache:
rocketchat_data["custom_emoji"]["chunk"] = bson.decode_all(fcache.read())
return rocketchat_data
@ -677,6 +773,13 @@ def do_convert_data(rocketchat_data_dir: str, output_dir: str) -> None:
zerver_subscription = personal_subscriptions + stream_subscriptions + huddle_subscriptions
realm["zerver_subscription"] = zerver_subscription
zerver_realmemoji = build_custom_emoji(
realm_id=realm_id,
custom_emoji_data=rocketchat_data["custom_emoji"],
output_dir=output_dir,
)
realm["zerver_realmemoji"] = zerver_realmemoji
subscriber_map = make_subscriber_map(
zerver_subscription=zerver_subscription,
)
@ -722,6 +825,7 @@ def do_convert_data(rocketchat_data_dir: str, output_dir: str) -> None:
dsc_id_to_dsc_map=dsc_id_to_dsc_map,
direct_id_to_direct_map=direct_id_to_direct_map,
huddle_id_to_huddle_map=huddle_id_to_huddle_map,
zerver_realmemoji=zerver_realmemoji,
total_reactions=total_reactions,
output_dir=output_dir,
)
@ -742,6 +846,7 @@ def do_convert_data(rocketchat_data_dir: str, output_dir: str) -> None:
dsc_id_to_dsc_map=dsc_id_to_dsc_map,
direct_id_to_direct_map=direct_id_to_direct_map,
huddle_id_to_huddle_map=huddle_id_to_huddle_map,
zerver_realmemoji=zerver_realmemoji,
total_reactions=total_reactions,
output_dir=output_dir,
)

Binary file not shown.

Binary file not shown.

View File

@ -6,6 +6,7 @@ import orjson
from zerver.data_import.import_util import SubscriberHandler, ZerverFieldsT, build_recipients
from zerver.data_import.rocketchat import (
build_custom_emoji,
build_reactions,
categorize_channels_and_map_with_id,
convert_channel_data,
@ -31,7 +32,7 @@ class RocketChatImporter(ZulipTestCase):
def test_rocketchat_data_to_dict(self) -> None:
fixture_dir_name = self.fixture_file_name("", "rocketchat_fixtures")
rocketchat_data = rocketchat_data_to_dict(fixture_dir_name)
self.assert_length(rocketchat_data, 5)
self.assert_length(rocketchat_data, 6)
self.assert_length(rocketchat_data["user"], 6)
self.assertEqual(rocketchat_data["user"][2]["username"], "harry.potter")
@ -41,11 +42,14 @@ class RocketChatImporter(ZulipTestCase):
self.assertEqual(rocketchat_data["room"][0]["_id"], "GENERAL")
self.assertEqual(rocketchat_data["room"][0]["name"], "general")
self.assert_length(rocketchat_data["message"], 52)
self.assert_length(rocketchat_data["message"], 58)
self.assertEqual(rocketchat_data["message"][1]["msg"], "Hey everyone, how's it going??")
self.assertEqual(rocketchat_data["message"][1]["rid"], "GENERAL")
self.assertEqual(rocketchat_data["message"][1]["u"]["username"], "priyansh3133")
self.assert_length(rocketchat_data["custom_emoji"]["emoji"], 3)
self.assertEqual(rocketchat_data["custom_emoji"]["emoji"][0]["name"], "tick")
def test_map_user_id_to_user(self) -> None:
fixture_dir_name = self.fixture_file_name("", "rocketchat_fixtures")
rocketchat_data = rocketchat_data_to_dict(fixture_dir_name)
@ -438,6 +442,51 @@ class RocketChatImporter(ZulipTestCase):
huddle_id = huddle_id_mapper.get(rc_huddle_id)
self.assertEqual(subscriber_handler.get_users(huddle_id=huddle_id), {3, 4, 5})
def test_write_emoticon_data(self) -> None:
fixture_dir_name = self.fixture_file_name("", "rocketchat_fixtures")
rocketchat_data = rocketchat_data_to_dict(fixture_dir_name)
output_dir = self.make_import_output_dir("rocketchat")
with self.assertLogs(level="INFO"):
zerver_realmemoji = build_custom_emoji(
realm_id=3,
custom_emoji_data=rocketchat_data["custom_emoji"],
output_dir=output_dir,
)
self.assert_length(zerver_realmemoji, 5)
self.assertEqual(zerver_realmemoji[0]["name"], "tick")
self.assertEqual(zerver_realmemoji[0]["file_name"], "tick.png")
self.assertEqual(zerver_realmemoji[0]["realm"], 3)
self.assertEqual(zerver_realmemoji[0]["deactivated"], False)
self.assertEqual(zerver_realmemoji[1]["name"], "check")
self.assertEqual(zerver_realmemoji[1]["file_name"], "tick.png")
self.assertEqual(zerver_realmemoji[1]["realm"], 3)
self.assertEqual(zerver_realmemoji[1]["deactivated"], False)
self.assertEqual(zerver_realmemoji[2]["name"], "zulip")
self.assertEqual(zerver_realmemoji[2]["file_name"], "zulip.png")
self.assertEqual(zerver_realmemoji[2]["realm"], 3)
self.assertEqual(zerver_realmemoji[2]["deactivated"], False)
records_file = os.path.join(output_dir, "emoji", "records.json")
with open(records_file, "rb") as f:
records_json = orjson.loads(f.read())
self.assertEqual(records_json[0]["name"], "tick")
self.assertEqual(records_json[0]["file_name"], "tick.png")
self.assertEqual(records_json[0]["realm_id"], 3)
self.assertEqual(records_json[1]["name"], "check")
self.assertEqual(records_json[1]["file_name"], "tick.png")
self.assertEqual(records_json[1]["realm_id"], 3)
self.assertTrue(os.path.isfile(records_json[0]["path"]))
self.assertEqual(records_json[2]["name"], "zulip")
self.assertEqual(records_json[2]["file_name"], "zulip.png")
self.assertEqual(records_json[2]["realm_id"], 3)
self.assertTrue(os.path.isfile(records_json[2]["path"]))
def test_map_receiver_id_to_recipient_id(self) -> None:
fixture_dir_name = self.fixture_file_name("", "rocketchat_fixtures")
rocketchat_data = rocketchat_data_to_dict(fixture_dir_name)
@ -564,8 +613,8 @@ class RocketChatImporter(ZulipTestCase):
private_messages=private_messages,
)
self.assert_length(rocketchat_data["message"], 52)
self.assert_length(channel_messages, 47)
self.assert_length(rocketchat_data["message"], 58)
self.assert_length(channel_messages, 53)
self.assert_length(private_messages, 5)
self.assertIn(rocketchat_data["message"][0], channel_messages)
@ -612,10 +661,21 @@ class RocketChatImporter(ZulipTestCase):
)
# No new message added to channel or private messages
self.assert_length(channel_messages, 47)
self.assert_length(channel_messages, 53)
self.assert_length(private_messages, 5)
def test_build_reactions(self) -> None:
fixture_dir_name = self.fixture_file_name("", "rocketchat_fixtures")
rocketchat_data = rocketchat_data_to_dict(fixture_dir_name)
output_dir = self.make_import_output_dir("rocketchat")
with self.assertLogs(level="INFO"):
zerver_realmemoji = build_custom_emoji(
realm_id=3,
custom_emoji_data=rocketchat_data["custom_emoji"],
output_dir=output_dir,
)
total_reactions: List[ZerverFieldsT] = []
reactions = [
@ -625,32 +685,53 @@ class RocketChatImporter(ZulipTestCase):
{"name": "star_struck", "user_id": 4},
{"name": "heart", "user_id": 3},
{"name": "rocket", "user_id": 4},
{"name": "check", "user_id": 2},
{"name": "zulip", "user_id": 3},
{"name": "harry-ron", "user_id": 4},
]
build_reactions(total_reactions=total_reactions, reactions=reactions, message_id=3)
build_reactions(
total_reactions=total_reactions,
reactions=reactions,
message_id=3,
zerver_realmemoji=zerver_realmemoji,
)
# :grin: and :star_struck: are not present in Zulip's default
# emoji set, or in Reaction.UNICODE_EMOJI reaction type.
self.assert_length(total_reactions, 4)
self.assert_length(total_reactions, 7)
grinning_emoji_code = name_to_codepoint["grinning"]
innocent_emoji_code = name_to_codepoint["innocent"]
heart_emoji_code = name_to_codepoint["heart"]
rocket_emoji_code = name_to_codepoint["rocket"]
realmemoji_code = {}
for emoji in zerver_realmemoji:
realmemoji_code[emoji["name"]] = emoji["id"]
self.assertEqual(
self.get_set(total_reactions, "reaction_type"),
{Reaction.UNICODE_EMOJI},
{Reaction.UNICODE_EMOJI, Reaction.REALM_EMOJI},
)
self.assertEqual(
self.get_set(total_reactions, "emoji_name"), {"grinning", "innocent", "heart", "rocket"}
self.get_set(total_reactions, "emoji_name"),
{"grinning", "innocent", "heart", "rocket", "check", "zulip", "harry-ron"},
)
self.assertEqual(
self.get_set(total_reactions, "emoji_code"),
{grinning_emoji_code, innocent_emoji_code, heart_emoji_code, rocket_emoji_code},
{
grinning_emoji_code,
innocent_emoji_code,
heart_emoji_code,
rocket_emoji_code,
realmemoji_code["check"],
realmemoji_code["zulip"],
realmemoji_code["harry-ron"],
},
)
self.assertEqual(self.get_set(total_reactions, "user_profile"), {2, 3, 4})
self.assert_length(self.get_set(total_reactions, "id"), 4)
self.assert_length(self.get_set(total_reactions, "id"), 7)
self.assert_length(self.get_set(total_reactions, "message"), 1)
def read_file(self, team_output_dir: str, output_file: str) -> Any:
@ -670,13 +751,15 @@ class RocketChatImporter(ZulipTestCase):
self.assertEqual(
info_log.output,
[
"INFO:root:Starting to process custom emoji",
"INFO:root:Done processing emoji",
"INFO:root:Start making tarball",
"INFO:root:Done making tarball",
],
)
self.assertEqual(os.path.exists(os.path.join(output_dir, "avatars")), True)
self.assertEqual(os.path.exists(os.path.join(output_dir, "emoji")), False)
self.assertEqual(os.path.exists(os.path.join(output_dir, "emoji")), True)
self.assertEqual(os.path.exists(os.path.join(output_dir, "attachment.json")), True)
realm = self.read_file(output_dir, "realm.json")
@ -784,12 +867,12 @@ class RocketChatImporter(ZulipTestCase):
for message in messages:
self.assertIsNotNone(message.rendered_content)
# After removing user_joined, added_user, discussion_created, etc.
# messages. (Total messages were 44.)
self.assert_length(messages, 27)
# messages. (Total messages were 58.)
self.assert_length(messages, 31)
stream_messages = messages.filter(recipient__type=Recipient.STREAM).order_by("date_sent")
stream_recipients = stream_messages.values_list("recipient", flat=True)
self.assert_length(stream_messages, 22)
self.assert_length(stream_messages, 26)
self.assert_length(set(stream_recipients), 5)
self.assertEqual(stream_messages[0].sender.email, "priyansh3133@email.com")
self.assertEqual(stream_messages[0].content, "Hey everyone, how's it going??")