Add tools for dumping and comparing markdown renderings.

This adds a couple new tools that can be used to determine whether a
particular change in Zulip's backend markdown processor would impact
the rendering of historical messages, without a human actually looking
at the message content.  This is a useful way to verify whether a
change to our markdown syntax is likely to create problems.

[commit message and code tweaked by tabbott]
This commit is contained in:
paxapy 2016-10-24 19:32:09 +03:00 committed by Tim Abbott
parent 6107c877e8
commit 93965a8e89
4 changed files with 96 additions and 40 deletions

View File

@ -162,3 +162,6 @@ git+https://github.com/lorenzogil/glue@01c00cd33b9b78ea868300c266c16acd59a81bfc#
# Needed for cloning virtual environments
git+https://github.com/umairwaheed/virtualenv-clone.git@short-version#egg=virtualenv-clone==0.2.6
# Needed for reading json as stream
ijson==2.3

View File

@ -0,0 +1,37 @@
from __future__ import absolute_import
from __future__ import print_function
from typing import Any
from six.moves import zip
import ijson
from django.core.management.base import BaseCommand, CommandParser
class Command(BaseCommand):
help = """
Render messages to a file.
Usage: python manage.py render_messages <destination> <--amount>
"""
def add_arguments(self, parser):
# type: (CommandParser) -> None
parser.add_argument('dump1', help='First file to compare')
parser.add_argument('dump2', help='Second file to compare')
def handle(self, *args, **options):
# type: (*Any, **Any) -> None
total_count = 0
changed_count = 0
with open(options['dump1'], 'r') as dump1, open(options['dump2'], 'r') as dump2:
for m1, m2 in zip(ijson.items(dump1, 'item'), ijson.items(dump2, 'item')):
total_count += 1
if m1['id'] != m2['id']:
self.stderr.write('Inconsistent messages dump')
break
if m1['content'] != m2['content']:
changed_count += 1
self.stdout.write('Changed message id: {id}'.format(id=m1['id']))
self.stdout.write('Total messages: {count}'.format(count=total_count))
self.stdout.write('Changed messages: {count}'.format(count=changed_count))

View File

@ -0,0 +1,56 @@
from __future__ import absolute_import
from __future__ import print_function
import os
import ujson
from typing import Any, Generator
from django.core.management.base import BaseCommand, CommandParser
from django.db.models import QuerySet
from zerver.lib.message import render_markdown
from zerver.models import Message
def queryset_iterator(queryset, chunksize=5000):
# type: (QuerySet, int) -> Generator
queryset = queryset.order_by('id')
while queryset.exists():
for row in queryset[:chunksize]:
msg_id = row.id
yield row
queryset = queryset.filter(id__gt=msg_id)
class Command(BaseCommand):
help = """
Render messages to a file.
Usage: python manage.py render_messages <destination> [--amount=10000]
"""
def add_arguments(self, parser):
# type: (CommandParser) -> None
parser.add_argument('destination', help='Destination file path')
parser.add_argument('--amount', default=100000, help='Number of messages to render')
parser.add_argument('--latest_id', default=0, help="Last message id to render")
def handle(self, *args, **options):
# type: (*Any, **Any) -> None
dest_dir = os.path.realpath(os.path.dirname(options['destination']))
amount = int(options['amount'])
latest = int(options['latest_id']) or Message.objects.latest('id').id
self.stdout.write('Latest message id: {latest}'.format(latest=latest))
if not os.path.exists(dest_dir):
os.makedirs(dest_dir)
with open(options['destination'], 'w') as result:
result.write('[')
messages = Message.objects.filter(id__gt=latest - amount, id__lte=latest).order_by('id')
for message in queryset_iterator(messages):
result.write(ujson.dumps({
'id': message.id,
'content': render_markdown(message, message.content)
}))
if message.id != latest:
result.write(',')
result.write(']')

View File

@ -1,40 +0,0 @@
from __future__ import absolute_import
from __future__ import print_function
from typing import Any
from django.core.management.base import BaseCommand
import zerver.lib.bugdown as bugdown
from zerver.lib.message import re_render_content_for_management_command
from zerver.models import Message
import datetime
import sys
import time
class Command(BaseCommand):
help = """Render all historical messages that haven't been rendered yet.
Usage: python manage.py render_old_messages"""
def handle(self, *args, **options):
# type: (*Any, **Any) -> None
print('''
This command is currently not supported, and it can be somewhat
dangerous to run on large instances. Before upgrading messages
to a new version, you should make sure that the old renderings
are actually invalid; it could be quite the opposite (we might
not want to render V1 messages with V2).
''')
sys.exit(1)
total_rendered = 0
while True:
messages = Message.objects.exclude(rendered_content_version=bugdown.version)[0:100]
if len(messages) == 0:
break
for message in messages:
re_render_content_for_management_command(message)
total_rendered += len(messages)
print(datetime.datetime.now(), total_rendered)
# Put in some sleep so this can run safely on low resource machines
time.sleep(0.25)