mirror of https://github.com/zulip/zulip.git
import: Avoid unnecessary sender lookups.
This commit speeds up the import by avoiding sender lookups and instead using the data for users that we already have in memory. This avoids a few DB hops, many hops to memcached, plus some object construction. We now call do_render_markdown() directly. This also makes it more explicit that the import has never rendered alert words.
This commit is contained in:
parent
06f8d0af7e
commit
1e12b13a56
|
@ -19,7 +19,7 @@ from zerver.lib.bulk_create import bulk_create_users
|
||||||
from zerver.lib.timestamp import datetime_to_timestamp
|
from zerver.lib.timestamp import datetime_to_timestamp
|
||||||
from zerver.lib.export import DATE_FIELDS, realm_tables, \
|
from zerver.lib.export import DATE_FIELDS, realm_tables, \
|
||||||
Record, TableData, TableName, Field, Path
|
Record, TableData, TableName, Field, Path
|
||||||
from zerver.lib.message import render_markdown
|
from zerver.lib.message import do_render_markdown, RealmAlertWords
|
||||||
from zerver.lib.bugdown import version as bugdown_version
|
from zerver.lib.bugdown import version as bugdown_version
|
||||||
from zerver.lib.upload import random_name, sanitize_name, \
|
from zerver.lib.upload import random_name, sanitize_name, \
|
||||||
S3UploadBackend, LocalUploadBackend, guess_type
|
S3UploadBackend, LocalUploadBackend, guess_type
|
||||||
|
@ -207,7 +207,10 @@ def fix_customprofilefield(data: TableData) -> None:
|
||||||
old_id_list=old_user_id_list)
|
old_id_list=old_user_id_list)
|
||||||
item['value'] = ujson.dumps(new_id_list)
|
item['value'] = ujson.dumps(new_id_list)
|
||||||
|
|
||||||
def fix_message_rendered_content(realm: Realm, data: TableData, field: TableName) -> None:
|
def fix_message_rendered_content(realm: Realm,
|
||||||
|
sender_map: Dict[int, Record],
|
||||||
|
data: TableData,
|
||||||
|
field: TableName) -> None:
|
||||||
"""
|
"""
|
||||||
This function sets the rendered_content of all the messages
|
This function sets the rendered_content of all the messages
|
||||||
after the messages have been imported from a non-Zulip platform.
|
after the messages have been imported from a non-Zulip platform.
|
||||||
|
@ -220,10 +223,26 @@ def fix_message_rendered_content(realm: Realm, data: TableData, field: TableName
|
||||||
|
|
||||||
try:
|
try:
|
||||||
content = message['content']
|
content = message['content']
|
||||||
rendered_content = render_markdown(
|
|
||||||
|
sender_id = message['sender_id']
|
||||||
|
sender = sender_map[sender_id]
|
||||||
|
sent_by_bot = sender['is_bot']
|
||||||
|
translate_emoticons = sender['translate_emoticons']
|
||||||
|
|
||||||
|
# We don't handle alert words on import from third-party
|
||||||
|
# platforms, since they generally don't have an "alert
|
||||||
|
# words" type feature, and notifications aren't important anyway.
|
||||||
|
realm_alert_words = dict() # type: RealmAlertWords
|
||||||
|
message_user_ids = set() # type: Set[int]
|
||||||
|
|
||||||
|
rendered_content = do_render_markdown(
|
||||||
message=message_object,
|
message=message_object,
|
||||||
content=content,
|
content=content,
|
||||||
realm=realm,
|
realm=realm,
|
||||||
|
realm_alert_words=realm_alert_words,
|
||||||
|
message_user_ids=message_user_ids,
|
||||||
|
sent_by_bot=sent_by_bot,
|
||||||
|
translate_emoticons=translate_emoticons,
|
||||||
)
|
)
|
||||||
assert(rendered_content is not None)
|
assert(rendered_content is not None)
|
||||||
message_object.rendered_content = rendered_content
|
message_object.rendered_content = rendered_content
|
||||||
|
@ -901,8 +920,13 @@ def do_import_realm(import_dir: Path, subdomain: str) -> Realm:
|
||||||
if os.path.exists(os.path.join(import_dir, "emoji")):
|
if os.path.exists(os.path.join(import_dir, "emoji")):
|
||||||
import_uploads(os.path.join(import_dir, "emoji"), processing_emojis=True)
|
import_uploads(os.path.join(import_dir, "emoji"), processing_emojis=True)
|
||||||
|
|
||||||
|
sender_map = {
|
||||||
|
user['id']: user
|
||||||
|
for user in data['zerver_userprofile']
|
||||||
|
}
|
||||||
|
|
||||||
# Import zerver_message and zerver_usermessage
|
# Import zerver_message and zerver_usermessage
|
||||||
import_message_data(realm=realm, import_dir=import_dir)
|
import_message_data(realm=realm, sender_map=sender_map, import_dir=import_dir)
|
||||||
|
|
||||||
re_map_foreign_keys(data, 'zerver_reaction', 'message', related_table="message")
|
re_map_foreign_keys(data, 'zerver_reaction', 'message', related_table="message")
|
||||||
re_map_foreign_keys(data, 'zerver_reaction', 'user_profile', related_table="user_profile")
|
re_map_foreign_keys(data, 'zerver_reaction', 'user_profile', related_table="user_profile")
|
||||||
|
@ -1023,7 +1047,9 @@ def get_incoming_message_ids(import_dir: Path,
|
||||||
|
|
||||||
return message_ids
|
return message_ids
|
||||||
|
|
||||||
def import_message_data(realm: Realm, import_dir: Path) -> None:
|
def import_message_data(realm: Realm,
|
||||||
|
sender_map: Dict[int, Record],
|
||||||
|
import_dir: Path) -> None:
|
||||||
dump_file_id = 1
|
dump_file_id = 1
|
||||||
while True:
|
while True:
|
||||||
message_filename = os.path.join(import_dir, "messages-%06d.json" % (dump_file_id,))
|
message_filename = os.path.join(import_dir, "messages-%06d.json" % (dump_file_id,))
|
||||||
|
@ -1055,7 +1081,12 @@ def import_message_data(realm: Realm, import_dir: Path) -> None:
|
||||||
# This is where we actually import the message data.
|
# This is where we actually import the message data.
|
||||||
bulk_import_model(data, Message)
|
bulk_import_model(data, Message)
|
||||||
|
|
||||||
fix_message_rendered_content(realm, data, 'zerver_message')
|
fix_message_rendered_content(
|
||||||
|
realm=realm,
|
||||||
|
sender_map=sender_map,
|
||||||
|
data=data,
|
||||||
|
field='zerver_message',
|
||||||
|
)
|
||||||
logging.info("Successfully rendered markdown for message batch")
|
logging.info("Successfully rendered markdown for message batch")
|
||||||
|
|
||||||
# Due to the structure of these message chunks, we're
|
# Due to the structure of these message chunks, we're
|
||||||
|
|
Loading…
Reference in New Issue