zulip/zerver/lib/attachments.py

196 lines
7.1 KiB
Python

from datetime import timedelta
from typing import Any
from django.conf import settings
from django.contrib.auth.models import AnonymousUser
from django.db.models import Exists, OuterRef, QuerySet
from django.utils.timezone import now as timezone_now
from django.utils.translation import gettext as _
from zerver.lib.exceptions import JsonableError, RateLimitedError
from zerver.lib.upload import delete_message_attachment
from zerver.models import (
ArchivedAttachment,
Attachment,
Message,
Realm,
Recipient,
Stream,
Subscription,
UserMessage,
UserProfile,
)
def user_attachments(user_profile: UserProfile) -> list[dict[str, Any]]:
attachments = Attachment.objects.filter(owner=user_profile).prefetch_related("messages")
return [a.to_dict() for a in attachments]
def access_attachment_by_id(
user_profile: UserProfile, attachment_id: int, needs_owner: bool = False
) -> Attachment:
query = Attachment.objects.filter(id=attachment_id)
if needs_owner:
query = query.filter(owner=user_profile)
attachment = query.first()
if attachment is None:
raise JsonableError(_("Invalid attachment"))
return attachment
def remove_attachment(user_profile: UserProfile, attachment: Attachment) -> None:
try:
delete_message_attachment(attachment.path_id)
except Exception:
raise JsonableError(
_("An error occurred while deleting the attachment. Please try again later.")
)
attachment.delete()
def validate_attachment_request_for_spectator_access(realm: Realm, attachment: Attachment) -> bool:
if attachment.realm != realm:
return False
# Update cached is_web_public property, if necessary.
if attachment.is_web_public is None:
# Fill the cache in a single query. This is important to avoid
# a potential race condition between checking and setting,
# where the attachment could have been moved again.
Attachment.objects.filter(id=attachment.id, is_web_public__isnull=True).update(
is_web_public=Exists(
Message.objects.filter(
# Uses index: zerver_attachment_messages_attachment_id_message_id_key
realm_id=realm.id,
attachment=OuterRef("id"),
recipient__stream__invite_only=False,
recipient__stream__is_web_public=True,
),
),
)
attachment.refresh_from_db()
if not attachment.is_web_public:
return False
if settings.RATE_LIMITING:
try:
from zerver.lib.rate_limiter import rate_limit_spectator_attachment_access_by_file
rate_limit_spectator_attachment_access_by_file(attachment.path_id)
except RateLimitedError:
return False
return True
def validate_attachment_request(
maybe_user_profile: UserProfile | AnonymousUser,
path_id: str,
realm: Realm | None = None,
) -> tuple[bool, Attachment | None]:
try:
attachment = Attachment.objects.get(path_id=path_id)
except Attachment.DoesNotExist:
return False, None
if isinstance(maybe_user_profile, AnonymousUser):
assert realm is not None
return validate_attachment_request_for_spectator_access(realm, attachment), attachment
user_profile = maybe_user_profile
assert isinstance(user_profile, UserProfile)
# Update cached is_realm_public property, if necessary.
if attachment.is_realm_public is None:
# Fill the cache in a single query. This is important to avoid
# a potential race condition between checking and setting,
# where the attachment could have been moved again.
Attachment.objects.filter(id=attachment.id, is_realm_public__isnull=True).update(
is_realm_public=Exists(
Message.objects.filter(
# Uses index: zerver_attachment_messages_attachment_id_message_id_key
realm_id=user_profile.realm_id,
attachment=OuterRef("id"),
recipient__stream__invite_only=False,
),
),
)
attachment.refresh_from_db()
if user_profile == attachment.owner:
# If you own the file, you can access it.
return True, attachment
if (
attachment.is_realm_public
and attachment.realm == user_profile.realm
and user_profile.can_access_public_streams()
):
# Any user in the realm can access realm-public files
return True, attachment
messages = attachment.messages.all()
if UserMessage.objects.filter(user_profile=user_profile, message__in=messages).exists():
# If it was sent in a direct message or private stream
# message, then anyone who received that message can access it.
return True, attachment
# The user didn't receive any of the messages that included this
# attachment. But they might still have access to it, if it was
# sent to a stream they are on where history is public to
# subscribers.
# These are subscriptions to a stream one of the messages was sent to
relevant_stream_ids = Subscription.objects.filter(
user_profile=user_profile,
active=True,
recipient__type=Recipient.STREAM,
recipient__in=[m.recipient_id for m in messages],
).values_list("recipient__type_id", flat=True)
if len(relevant_stream_ids) == 0:
return False, attachment
return Stream.objects.filter(
id__in=relevant_stream_ids, history_public_to_subscribers=True
).exists(), attachment
def get_old_unclaimed_attachments(
weeks_ago: int,
) -> tuple[QuerySet[Attachment], QuerySet[ArchivedAttachment]]:
"""
The logic in this function is fairly tricky. The essence is that
a file should be cleaned up if and only if it not referenced by any
Message, ScheduledMessage or ArchivedMessage. The way to find that out is through the
Attachment and ArchivedAttachment tables.
The queries are complicated by the fact that an uploaded file
may have either only an Attachment row, only an ArchivedAttachment row,
or both - depending on whether some, all or none of the messages
linking to it have been archived.
"""
delta_weeks_ago = timezone_now() - timedelta(weeks=weeks_ago)
# The Attachment vs ArchivedAttachment queries are asymmetric because only
# Attachment has the scheduled_messages relation.
old_attachments = Attachment.objects.alias(
has_other_messages=Exists(
ArchivedAttachment.objects.filter(id=OuterRef("id")).exclude(messages=None)
)
).filter(
messages=None,
scheduled_messages=None,
create_time__lt=delta_weeks_ago,
has_other_messages=False,
)
old_archived_attachments = ArchivedAttachment.objects.alias(
has_other_messages=Exists(
Attachment.objects.filter(id=OuterRef("id")).exclude(
messages=None, scheduled_messages=None
)
)
).filter(messages=None, create_time__lt=delta_weeks_ago, has_other_messages=False)
return old_attachments, old_archived_attachments