mirror of https://github.com/zulip/zulip.git
Add tools for dumping and comparing markdown renderings.
This adds a couple new tools that can be used to determine whether a particular change in Zulip's backend markdown processor would impact the rendering of historical messages, without a human actually looking at the message content. This is a useful way to verify whether a change to our markdown syntax is likely to create problems. [commit message and code tweaked by tabbott]
This commit is contained in:
parent
6107c877e8
commit
93965a8e89
|
@ -162,3 +162,6 @@ git+https://github.com/lorenzogil/glue@01c00cd33b9b78ea868300c266c16acd59a81bfc#
|
|||
|
||||
# Needed for cloning virtual environments
|
||||
git+https://github.com/umairwaheed/virtualenv-clone.git@short-version#egg=virtualenv-clone==0.2.6
|
||||
|
||||
# Needed for reading json as stream
|
||||
ijson==2.3
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
|
||||
from typing import Any
|
||||
from six.moves import zip
|
||||
|
||||
import ijson
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandParser
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = """
|
||||
Render messages to a file.
|
||||
Usage: python manage.py render_messages <destination> <--amount>
|
||||
"""
|
||||
|
||||
def add_arguments(self, parser):
|
||||
# type: (CommandParser) -> None
|
||||
parser.add_argument('dump1', help='First file to compare')
|
||||
parser.add_argument('dump2', help='Second file to compare')
|
||||
|
||||
def handle(self, *args, **options):
|
||||
# type: (*Any, **Any) -> None
|
||||
total_count = 0
|
||||
changed_count = 0
|
||||
with open(options['dump1'], 'r') as dump1, open(options['dump2'], 'r') as dump2:
|
||||
for m1, m2 in zip(ijson.items(dump1, 'item'), ijson.items(dump2, 'item')):
|
||||
total_count += 1
|
||||
if m1['id'] != m2['id']:
|
||||
self.stderr.write('Inconsistent messages dump')
|
||||
break
|
||||
if m1['content'] != m2['content']:
|
||||
changed_count += 1
|
||||
self.stdout.write('Changed message id: {id}'.format(id=m1['id']))
|
||||
self.stdout.write('Total messages: {count}'.format(count=total_count))
|
||||
self.stdout.write('Changed messages: {count}'.format(count=changed_count))
|
|
@ -0,0 +1,56 @@
|
|||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import ujson
|
||||
from typing import Any, Generator
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandParser
|
||||
from django.db.models import QuerySet
|
||||
|
||||
from zerver.lib.message import render_markdown
|
||||
from zerver.models import Message
|
||||
|
||||
|
||||
def queryset_iterator(queryset, chunksize=5000):
|
||||
# type: (QuerySet, int) -> Generator
|
||||
queryset = queryset.order_by('id')
|
||||
while queryset.exists():
|
||||
for row in queryset[:chunksize]:
|
||||
msg_id = row.id
|
||||
yield row
|
||||
queryset = queryset.filter(id__gt=msg_id)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = """
|
||||
Render messages to a file.
|
||||
Usage: python manage.py render_messages <destination> [--amount=10000]
|
||||
"""
|
||||
|
||||
def add_arguments(self, parser):
|
||||
# type: (CommandParser) -> None
|
||||
parser.add_argument('destination', help='Destination file path')
|
||||
parser.add_argument('--amount', default=100000, help='Number of messages to render')
|
||||
parser.add_argument('--latest_id', default=0, help="Last message id to render")
|
||||
|
||||
def handle(self, *args, **options):
|
||||
# type: (*Any, **Any) -> None
|
||||
dest_dir = os.path.realpath(os.path.dirname(options['destination']))
|
||||
amount = int(options['amount'])
|
||||
latest = int(options['latest_id']) or Message.objects.latest('id').id
|
||||
self.stdout.write('Latest message id: {latest}'.format(latest=latest))
|
||||
if not os.path.exists(dest_dir):
|
||||
os.makedirs(dest_dir)
|
||||
|
||||
with open(options['destination'], 'w') as result:
|
||||
result.write('[')
|
||||
messages = Message.objects.filter(id__gt=latest - amount, id__lte=latest).order_by('id')
|
||||
for message in queryset_iterator(messages):
|
||||
result.write(ujson.dumps({
|
||||
'id': message.id,
|
||||
'content': render_markdown(message, message.content)
|
||||
}))
|
||||
if message.id != latest:
|
||||
result.write(',')
|
||||
result.write(']')
|
|
@ -1,40 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
|
||||
from typing import Any
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
import zerver.lib.bugdown as bugdown
|
||||
from zerver.lib.message import re_render_content_for_management_command
|
||||
from zerver.models import Message
|
||||
import datetime
|
||||
import sys
|
||||
import time
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = """Render all historical messages that haven't been rendered yet.
|
||||
|
||||
Usage: python manage.py render_old_messages"""
|
||||
|
||||
def handle(self, *args, **options):
|
||||
# type: (*Any, **Any) -> None
|
||||
print('''
|
||||
This command is currently not supported, and it can be somewhat
|
||||
dangerous to run on large instances. Before upgrading messages
|
||||
to a new version, you should make sure that the old renderings
|
||||
are actually invalid; it could be quite the opposite (we might
|
||||
not want to render V1 messages with V2).
|
||||
''')
|
||||
sys.exit(1)
|
||||
total_rendered = 0
|
||||
while True:
|
||||
messages = Message.objects.exclude(rendered_content_version=bugdown.version)[0:100]
|
||||
if len(messages) == 0:
|
||||
break
|
||||
for message in messages:
|
||||
re_render_content_for_management_command(message)
|
||||
total_rendered += len(messages)
|
||||
print(datetime.datetime.now(), total_rendered)
|
||||
# Put in some sleep so this can run safely on low resource machines
|
||||
time.sleep(0.25)
|
Loading…
Reference in New Issue