mirror of https://github.com/zulip/zulip.git
realm_export: Handle hard head-of-queue failures.
Realm exports may OOM on deployments with low memory; to ensure forward progress, log the start time in the RealmAuditLog entry, and key off of the existence of that to prevent re-attempting an export which was already tried once.
This commit is contained in:
parent
4a43856ba7
commit
7811e99548
|
@ -9,6 +9,7 @@ from django.utils.timezone import now as timezone_now
|
|||
|
||||
from analytics.models import RealmCount
|
||||
from zerver.lib.exceptions import JsonableError
|
||||
from zerver.lib.queue import queue_json_publish
|
||||
from zerver.lib.test_classes import ZulipTestCase
|
||||
from zerver.lib.test_helpers import (
|
||||
HostRequestMock,
|
||||
|
@ -243,6 +244,31 @@ class RealmExportTest(ZulipTestCase):
|
|||
result = self.client_delete(f"/json/export/realm/{export_id}")
|
||||
self.assert_json_error(result, "Export failed, nothing to delete")
|
||||
|
||||
# If the queue worker sees the same export-id again, it aborts
|
||||
# instead of retrying
|
||||
with patch("zerver.lib.export.do_export_realm") as mock_export:
|
||||
with self.assertLogs(level="INFO") as info_logs:
|
||||
queue_json_publish(
|
||||
"deferred_work",
|
||||
{
|
||||
"type": "realm_export",
|
||||
"time": 42,
|
||||
"realm_id": admin.realm.id,
|
||||
"user_profile_id": admin.id,
|
||||
"id": export_id,
|
||||
},
|
||||
)
|
||||
mock_export.assert_not_called()
|
||||
self.assertEqual(
|
||||
info_logs.output,
|
||||
[
|
||||
(
|
||||
"ERROR:zerver.worker.queue_processors:Marking export for realm zulip "
|
||||
"as failed due to retry -- possible OOM during export?"
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
def test_realm_export_rate_limited(self) -> None:
|
||||
admin = self.example_user("iago")
|
||||
self.login_user(admin)
|
||||
|
|
|
@ -1066,6 +1066,20 @@ class DeferredWorker(QueueProcessingWorker):
|
|||
extra_data = {}
|
||||
if export_event.extra_data is not None:
|
||||
extra_data = orjson.loads(export_event.extra_data)
|
||||
if extra_data.get("started_timestamp") is not None:
|
||||
logger.error(
|
||||
"Marking export for realm %s as failed due to retry -- possible OOM during export?",
|
||||
realm.string_id,
|
||||
)
|
||||
extra_data["failed_timestamp"] = timezone_now().timestamp()
|
||||
export_event.extra_data = orjson.dumps(extra_data).decode()
|
||||
export_event.save(update_fields=["extra_data"])
|
||||
notify_realm_export(user_profile)
|
||||
return
|
||||
|
||||
extra_data["started_timestamp"] = timezone_now().timestamp()
|
||||
export_event.extra_data = orjson.dumps(extra_data).decode()
|
||||
export_event.save(update_fields=["extra_data"])
|
||||
|
||||
logger.info(
|
||||
"Starting realm export for realm %s into %s, initiated by user_profile_id %s",
|
||||
|
|
Loading…
Reference in New Issue