mirror of https://github.com/zulip/zulip.git
Add tools for dumping and comparing markdown renderings.
This adds a couple new tools that can be used to determine whether a particular change in Zulip's backend markdown processor would impact the rendering of historical messages, without a human actually looking at the message content. This is a useful way to verify whether a change to our markdown syntax is likely to create problems. [commit message and code tweaked by tabbott]
This commit is contained in:
parent
6107c877e8
commit
93965a8e89
|
@ -162,3 +162,6 @@ git+https://github.com/lorenzogil/glue@01c00cd33b9b78ea868300c266c16acd59a81bfc#
|
||||||
|
|
||||||
# Needed for cloning virtual environments
|
# Needed for cloning virtual environments
|
||||||
git+https://github.com/umairwaheed/virtualenv-clone.git@short-version#egg=virtualenv-clone==0.2.6
|
git+https://github.com/umairwaheed/virtualenv-clone.git@short-version#egg=virtualenv-clone==0.2.6
|
||||||
|
|
||||||
|
# Needed for reading json as stream
|
||||||
|
ijson==2.3
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
from six.moves import zip
|
||||||
|
|
||||||
|
import ijson
|
||||||
|
|
||||||
|
from django.core.management.base import BaseCommand, CommandParser
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = """
|
||||||
|
Render messages to a file.
|
||||||
|
Usage: python manage.py render_messages <destination> <--amount>
|
||||||
|
"""
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
# type: (CommandParser) -> None
|
||||||
|
parser.add_argument('dump1', help='First file to compare')
|
||||||
|
parser.add_argument('dump2', help='Second file to compare')
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
# type: (*Any, **Any) -> None
|
||||||
|
total_count = 0
|
||||||
|
changed_count = 0
|
||||||
|
with open(options['dump1'], 'r') as dump1, open(options['dump2'], 'r') as dump2:
|
||||||
|
for m1, m2 in zip(ijson.items(dump1, 'item'), ijson.items(dump2, 'item')):
|
||||||
|
total_count += 1
|
||||||
|
if m1['id'] != m2['id']:
|
||||||
|
self.stderr.write('Inconsistent messages dump')
|
||||||
|
break
|
||||||
|
if m1['content'] != m2['content']:
|
||||||
|
changed_count += 1
|
||||||
|
self.stdout.write('Changed message id: {id}'.format(id=m1['id']))
|
||||||
|
self.stdout.write('Total messages: {count}'.format(count=total_count))
|
||||||
|
self.stdout.write('Changed messages: {count}'.format(count=changed_count))
|
|
@ -0,0 +1,56 @@
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import os
|
||||||
|
import ujson
|
||||||
|
from typing import Any, Generator
|
||||||
|
|
||||||
|
from django.core.management.base import BaseCommand, CommandParser
|
||||||
|
from django.db.models import QuerySet
|
||||||
|
|
||||||
|
from zerver.lib.message import render_markdown
|
||||||
|
from zerver.models import Message
|
||||||
|
|
||||||
|
|
||||||
|
def queryset_iterator(queryset, chunksize=5000):
|
||||||
|
# type: (QuerySet, int) -> Generator
|
||||||
|
queryset = queryset.order_by('id')
|
||||||
|
while queryset.exists():
|
||||||
|
for row in queryset[:chunksize]:
|
||||||
|
msg_id = row.id
|
||||||
|
yield row
|
||||||
|
queryset = queryset.filter(id__gt=msg_id)
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = """
|
||||||
|
Render messages to a file.
|
||||||
|
Usage: python manage.py render_messages <destination> [--amount=10000]
|
||||||
|
"""
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
# type: (CommandParser) -> None
|
||||||
|
parser.add_argument('destination', help='Destination file path')
|
||||||
|
parser.add_argument('--amount', default=100000, help='Number of messages to render')
|
||||||
|
parser.add_argument('--latest_id', default=0, help="Last message id to render")
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
# type: (*Any, **Any) -> None
|
||||||
|
dest_dir = os.path.realpath(os.path.dirname(options['destination']))
|
||||||
|
amount = int(options['amount'])
|
||||||
|
latest = int(options['latest_id']) or Message.objects.latest('id').id
|
||||||
|
self.stdout.write('Latest message id: {latest}'.format(latest=latest))
|
||||||
|
if not os.path.exists(dest_dir):
|
||||||
|
os.makedirs(dest_dir)
|
||||||
|
|
||||||
|
with open(options['destination'], 'w') as result:
|
||||||
|
result.write('[')
|
||||||
|
messages = Message.objects.filter(id__gt=latest - amount, id__lte=latest).order_by('id')
|
||||||
|
for message in queryset_iterator(messages):
|
||||||
|
result.write(ujson.dumps({
|
||||||
|
'id': message.id,
|
||||||
|
'content': render_markdown(message, message.content)
|
||||||
|
}))
|
||||||
|
if message.id != latest:
|
||||||
|
result.write(',')
|
||||||
|
result.write(']')
|
|
@ -1,40 +0,0 @@
|
||||||
from __future__ import absolute_import
|
|
||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from django.core.management.base import BaseCommand
|
|
||||||
|
|
||||||
import zerver.lib.bugdown as bugdown
|
|
||||||
from zerver.lib.message import re_render_content_for_management_command
|
|
||||||
from zerver.models import Message
|
|
||||||
import datetime
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
|
||||||
help = """Render all historical messages that haven't been rendered yet.
|
|
||||||
|
|
||||||
Usage: python manage.py render_old_messages"""
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
|
||||||
# type: (*Any, **Any) -> None
|
|
||||||
print('''
|
|
||||||
This command is currently not supported, and it can be somewhat
|
|
||||||
dangerous to run on large instances. Before upgrading messages
|
|
||||||
to a new version, you should make sure that the old renderings
|
|
||||||
are actually invalid; it could be quite the opposite (we might
|
|
||||||
not want to render V1 messages with V2).
|
|
||||||
''')
|
|
||||||
sys.exit(1)
|
|
||||||
total_rendered = 0
|
|
||||||
while True:
|
|
||||||
messages = Message.objects.exclude(rendered_content_version=bugdown.version)[0:100]
|
|
||||||
if len(messages) == 0:
|
|
||||||
break
|
|
||||||
for message in messages:
|
|
||||||
re_render_content_for_management_command(message)
|
|
||||||
total_rendered += len(messages)
|
|
||||||
print(datetime.datetime.now(), total_rendered)
|
|
||||||
# Put in some sleep so this can run safely on low resource machines
|
|
||||||
time.sleep(0.25)
|
|
Loading…
Reference in New Issue