2016-10-24 18:32:09 +02:00
|
|
|
import os
|
2017-11-02 06:58:36 +01:00
|
|
|
from typing import Any, Iterator
|
2016-10-24 18:32:09 +02:00
|
|
|
|
2020-08-07 01:09:47 +02:00
|
|
|
import orjson
|
2024-05-24 16:49:56 +02:00
|
|
|
from django.core.management.base import CommandParser
|
2016-10-24 18:32:09 +02:00
|
|
|
from django.db.models import QuerySet
|
2023-10-12 19:43:45 +02:00
|
|
|
from typing_extensions import override
|
2016-10-24 18:32:09 +02:00
|
|
|
|
2024-05-24 16:49:56 +02:00
|
|
|
from zerver.lib.management import ZulipBaseCommand
|
2023-10-03 03:22:59 +02:00
|
|
|
from zerver.lib.markdown import render_message_markdown
|
2016-10-24 18:32:09 +02:00
|
|
|
from zerver.models import Message
|
|
|
|
|
2020-01-14 21:59:46 +01:00
|
|
|
|
2022-06-15 23:58:40 +02:00
|
|
|
def queryset_iterator(queryset: QuerySet[Message], chunksize: int = 5000) -> Iterator[Message]:
|
2021-02-12 08:20:45 +01:00
|
|
|
queryset = queryset.order_by("id")
|
2016-10-24 18:32:09 +02:00
|
|
|
while queryset.exists():
|
|
|
|
for row in queryset[:chunksize]:
|
|
|
|
msg_id = row.id
|
|
|
|
yield row
|
|
|
|
queryset = queryset.filter(id__gt=msg_id)
|
|
|
|
|
|
|
|
|
2024-05-24 16:49:56 +02:00
|
|
|
class Command(ZulipBaseCommand):
|
2016-10-24 18:32:09 +02:00
|
|
|
help = """
|
|
|
|
Render messages to a file.
|
2016-11-22 01:44:16 +01:00
|
|
|
Usage: ./manage.py render_messages <destination> [--amount=10000]
|
2016-10-24 18:32:09 +02:00
|
|
|
"""
|
|
|
|
|
2023-10-12 19:43:45 +02:00
|
|
|
@override
|
2017-10-27 12:57:54 +02:00
|
|
|
def add_arguments(self, parser: CommandParser) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
parser.add_argument("destination", help="Destination file path")
|
|
|
|
parser.add_argument("--amount", default=100000, help="Number of messages to render")
|
|
|
|
parser.add_argument("--latest_id", default=0, help="Last message id to render")
|
2016-10-24 18:32:09 +02:00
|
|
|
|
2023-10-12 19:43:45 +02:00
|
|
|
@override
|
2017-10-27 12:57:54 +02:00
|
|
|
def handle(self, *args: Any, **options: Any) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
dest_dir = os.path.realpath(os.path.dirname(options["destination"]))
|
|
|
|
amount = int(options["amount"])
|
|
|
|
latest = int(options["latest_id"]) or Message.objects.latest("id").id
|
|
|
|
self.stdout.write(f"Latest message id: {latest}")
|
2016-10-24 18:32:09 +02:00
|
|
|
if not os.path.exists(dest_dir):
|
|
|
|
os.makedirs(dest_dir)
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
with open(options["destination"], "wb") as result:
|
|
|
|
messages = Message.objects.filter(id__gt=latest - amount, id__lte=latest).order_by("id")
|
2016-10-24 18:32:09 +02:00
|
|
|
for message in queryset_iterator(messages):
|
2016-11-14 10:06:58 +01:00
|
|
|
content = message.content
|
|
|
|
# In order to ensure that the output of this tool is
|
|
|
|
# consistent across the time, even if messages are
|
|
|
|
# edited, we always render the original content
|
|
|
|
# version, extracting it from the edit history if
|
|
|
|
# necessary.
|
|
|
|
if message.edit_history:
|
2020-08-07 01:09:47 +02:00
|
|
|
history = orjson.loads(message.edit_history)
|
2021-02-12 08:20:45 +01:00
|
|
|
history = sorted(history, key=lambda i: i["timestamp"])
|
2017-02-19 07:33:26 +01:00
|
|
|
for entry in history:
|
2021-02-12 08:20:45 +01:00
|
|
|
if "prev_content" in entry:
|
|
|
|
content = entry["prev_content"]
|
2017-02-19 07:33:26 +01:00
|
|
|
break
|
2021-02-12 08:19:30 +01:00
|
|
|
result.write(
|
|
|
|
orjson.dumps(
|
|
|
|
{
|
2021-02-12 08:20:45 +01:00
|
|
|
"id": message.id,
|
2023-10-03 03:22:59 +02:00
|
|
|
"content": render_message_markdown(message, content),
|
2021-05-20 21:13:53 +02:00
|
|
|
},
|
|
|
|
option=orjson.OPT_APPEND_NEWLINE,
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
)
|