2024-07-12 02:30:25 +02:00
|
|
|
from collections.abc import Callable
|
2020-12-28 11:30:07 +01:00
|
|
|
from datetime import datetime
|
2024-07-12 02:30:25 +02:00
|
|
|
from typing import Any
|
2018-11-01 15:16:26 +01:00
|
|
|
|
2020-12-28 11:30:07 +01:00
|
|
|
import orjson
|
2020-06-11 00:54:34 +02:00
|
|
|
from django.db import connection
|
2023-09-26 17:48:34 +02:00
|
|
|
from django.db.models import F, Func, JSONField, Q, QuerySet, Subquery, TextField, Value
|
|
|
|
from django.db.models.functions import Cast
|
2018-11-01 21:48:49 +01:00
|
|
|
|
2022-03-02 00:49:18 +01:00
|
|
|
from zerver.lib.types import EditHistoryEvent
|
2023-09-26 17:48:34 +02:00
|
|
|
from zerver.lib.utils import assert_is_not_none
|
2023-05-31 16:56:18 +02:00
|
|
|
from zerver.models import Message, Reaction, Stream, UserMessage, UserProfile
|
2018-11-01 15:16:26 +01:00
|
|
|
|
2018-11-01 18:26:20 +01:00
|
|
|
# Only use these constants for events.
|
|
|
|
ORIG_TOPIC = "orig_subject"
|
|
|
|
TOPIC_NAME = "subject"
|
2020-02-07 13:09:17 +01:00
|
|
|
TOPIC_LINKS = "topic_links"
|
2018-11-09 17:25:57 +01:00
|
|
|
MATCH_TOPIC = "match_subject"
|
2018-11-01 18:26:20 +01:00
|
|
|
|
2021-07-13 09:37:57 +02:00
|
|
|
# Prefix use to mark topic as resolved.
|
|
|
|
RESOLVED_TOPIC_PREFIX = "✔ "
|
|
|
|
|
2018-11-10 17:10:45 +01:00
|
|
|
# This constant is pretty closely coupled to the
|
|
|
|
# database, but it's the JSON field.
|
|
|
|
EXPORT_TOPIC_NAME = "subject"
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2018-11-10 22:50:28 +01:00
|
|
|
The following functions are for user-facing APIs
|
|
|
|
where we'll want to support "subject" for a while.
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2018-11-10 22:50:28 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def get_topic_from_message_info(message_info: dict[str, Any]) -> str:
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2018-11-10 22:50:28 +01:00
|
|
|
Use this where you are getting dicts that are based off of messages
|
|
|
|
that may come from the outside world, especially from third party
|
|
|
|
APIs and bots.
|
|
|
|
|
|
|
|
We prefer 'topic' to 'subject' here. We expect at least one field
|
|
|
|
to be present (or the caller must know how to handle KeyError).
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2021-02-12 08:20:45 +01:00
|
|
|
if "topic" in message_info:
|
|
|
|
return message_info["topic"]
|
2018-11-10 22:50:28 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
return message_info["subject"]
|
2018-11-10 22:50:28 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2018-11-10 23:01:45 +01:00
|
|
|
TRY TO KEEP THIS DIVIDING LINE.
|
|
|
|
|
|
|
|
Below this line we want to make it so that functions are only
|
|
|
|
using "subject" in the DB sense, and nothing customer facing.
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2018-11-10 23:01:45 +01:00
|
|
|
|
|
|
|
# This is used in low-level message functions in
|
|
|
|
# zerver/lib/message.py, and it's not user facing.
|
|
|
|
DB_TOPIC_NAME = "subject"
|
2021-02-12 08:20:45 +01:00
|
|
|
MESSAGE__TOPIC = "message__subject"
|
2018-11-10 23:01:45 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2022-06-23 20:01:38 +02:00
|
|
|
def filter_by_topic_name_via_message(
|
|
|
|
query: QuerySet[UserMessage], topic_name: str
|
|
|
|
) -> QuerySet[UserMessage]:
|
2018-11-01 18:06:55 +01:00
|
|
|
return query.filter(message__subject__iexact=topic_name)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2023-08-30 21:19:37 +02:00
|
|
|
def messages_for_topic(
|
|
|
|
realm_id: int, stream_recipient_id: int, topic_name: str
|
|
|
|
) -> QuerySet[Message]:
|
2018-11-09 19:02:54 +01:00
|
|
|
return Message.objects.filter(
|
2023-08-30 21:19:37 +02:00
|
|
|
# Uses index: zerver_message_realm_recipient_upper_subject
|
|
|
|
realm_id=realm_id,
|
2020-02-11 16:04:05 +01:00
|
|
|
recipient_id=stream_recipient_id,
|
2019-01-25 02:45:55 +01:00
|
|
|
subject__iexact=topic_name,
|
2018-11-09 19:02:54 +01:00
|
|
|
)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-11-01 20:12:59 +01:00
|
|
|
def save_message_for_edit_use_case(message: Message) -> None:
|
2021-02-12 08:19:30 +01:00
|
|
|
message.save(
|
|
|
|
update_fields=[
|
|
|
|
TOPIC_NAME,
|
|
|
|
"content",
|
|
|
|
"rendered_content",
|
|
|
|
"rendered_content_version",
|
|
|
|
"last_edit_time",
|
|
|
|
"edit_history",
|
|
|
|
"has_attachment",
|
|
|
|
"has_image",
|
|
|
|
"has_link",
|
|
|
|
"recipient_id",
|
|
|
|
]
|
|
|
|
)
|
2019-09-24 21:10:56 +02:00
|
|
|
|
2018-11-01 20:12:59 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def user_message_exists_for_topic(
|
|
|
|
user_profile: UserProfile, recipient_id: int, topic_name: str
|
|
|
|
) -> bool:
|
2018-11-09 17:32:08 +01:00
|
|
|
return UserMessage.objects.filter(
|
|
|
|
user_profile=user_profile,
|
2020-10-16 17:02:33 +02:00
|
|
|
message__recipient_id=recipient_id,
|
2018-11-09 17:32:08 +01:00
|
|
|
message__subject__iexact=topic_name,
|
|
|
|
).exists()
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-04-22 07:02:04 +02:00
|
|
|
def update_edit_history(
|
2022-03-02 00:49:18 +01:00
|
|
|
message: Message, last_edit_time: datetime, edit_history_event: EditHistoryEvent
|
2021-04-22 07:02:04 +02:00
|
|
|
) -> None:
|
|
|
|
message.last_edit_time = last_edit_time
|
|
|
|
if message.edit_history is not None:
|
2024-07-12 02:30:17 +02:00
|
|
|
edit_history: list[EditHistoryEvent] = orjson.loads(message.edit_history)
|
2021-04-22 07:02:04 +02:00
|
|
|
edit_history.insert(0, edit_history_event)
|
|
|
|
else:
|
|
|
|
edit_history = [edit_history_event]
|
|
|
|
message.edit_history = orjson.dumps(edit_history).decode()
|
|
|
|
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def update_messages_for_topic_edit(
|
2021-05-12 22:54:43 +02:00
|
|
|
acting_user: UserProfile,
|
2021-04-22 07:00:27 +02:00
|
|
|
edited_message: Message,
|
2021-02-12 08:19:30 +01:00
|
|
|
propagate_mode: str,
|
|
|
|
orig_topic_name: str,
|
2024-07-12 02:30:23 +02:00
|
|
|
topic_name: str | None,
|
|
|
|
new_stream: Stream | None,
|
2021-05-12 22:58:23 +02:00
|
|
|
old_stream: Stream,
|
2022-03-02 00:49:18 +01:00
|
|
|
edit_history_event: EditHistoryEvent,
|
2021-02-12 08:19:30 +01:00
|
|
|
last_edit_time: datetime,
|
2024-07-12 02:30:17 +02:00
|
|
|
) -> tuple[QuerySet[Message], Callable[[], QuerySet[Message]]]:
|
2023-09-26 17:48:34 +02:00
|
|
|
# Uses index: zerver_message_realm_recipient_upper_subject
|
|
|
|
messages = Message.objects.filter(
|
|
|
|
realm_id=old_stream.realm_id,
|
|
|
|
recipient_id=assert_is_not_none(old_stream.recipient_id),
|
2023-08-30 21:19:37 +02:00
|
|
|
subject__iexact=orig_topic_name,
|
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
if propagate_mode == "change_all":
|
2023-09-26 17:48:34 +02:00
|
|
|
messages = messages.exclude(id=edited_message.id)
|
2021-02-12 08:20:45 +01:00
|
|
|
if propagate_mode == "change_later":
|
2023-09-26 17:48:34 +02:00
|
|
|
messages = messages.filter(id__gt=edited_message.id)
|
2020-02-19 01:38:34 +01:00
|
|
|
|
2021-05-12 23:54:41 +02:00
|
|
|
if new_stream is not None:
|
|
|
|
# If we're moving the messages between streams, only move
|
|
|
|
# messages that the acting user can access, so that one cannot
|
|
|
|
# gain access to messages through moving them.
|
2023-09-26 17:34:55 +02:00
|
|
|
from zerver.lib.message import bulk_access_stream_messages_query
|
2021-05-12 23:54:41 +02:00
|
|
|
|
2023-09-26 17:48:34 +02:00
|
|
|
messages = bulk_access_stream_messages_query(acting_user, messages, old_stream)
|
2021-05-12 23:54:41 +02:00
|
|
|
else:
|
|
|
|
# For single-message edits or topic moves within a stream, we
|
|
|
|
# allow moving history the user may not have access in order
|
|
|
|
# to keep topics together.
|
2023-09-26 17:48:34 +02:00
|
|
|
pass
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
update_fields: dict[str, object] = {
|
2023-09-26 17:48:34 +02:00
|
|
|
"last_edit_time": last_edit_time,
|
|
|
|
# We cast the `edit_history` column to jsonb (defaulting NULL
|
|
|
|
# to `[]`), apply the `||` array concatenation operator to it,
|
|
|
|
# and cast the result back to text. See #26496 for making
|
|
|
|
# this column itself jsonb, which is a complicated migration.
|
|
|
|
#
|
|
|
|
# This equates to:
|
|
|
|
# "edit_history" = (
|
|
|
|
# (COALESCE("zerver_message"."edit_history", '[]'))::jsonb
|
|
|
|
# ||
|
|
|
|
# ( '[{ ..json event.. }]' )::jsonb
|
|
|
|
# )::text
|
|
|
|
"edit_history": Cast(
|
|
|
|
Func(
|
2024-02-21 04:37:54 +01:00
|
|
|
Cast(
|
|
|
|
Value(orjson.dumps([edit_history_event]).decode()),
|
|
|
|
JSONField(),
|
|
|
|
),
|
2023-09-26 17:48:34 +02:00
|
|
|
Cast(
|
|
|
|
Func(
|
|
|
|
F("edit_history"),
|
|
|
|
Value("[]"),
|
|
|
|
function="COALESCE",
|
|
|
|
),
|
|
|
|
JSONField(),
|
|
|
|
),
|
|
|
|
function="",
|
|
|
|
arg_joiner=" || ",
|
|
|
|
),
|
|
|
|
TextField(),
|
|
|
|
),
|
|
|
|
}
|
2020-02-19 01:38:34 +01:00
|
|
|
if new_stream is not None:
|
2023-09-26 17:48:34 +02:00
|
|
|
update_fields["recipient"] = new_stream.recipient
|
2020-02-19 01:38:34 +01:00
|
|
|
if topic_name is not None:
|
2023-09-26 17:48:34 +02:00
|
|
|
update_fields["subject"] = topic_name
|
|
|
|
|
|
|
|
# The update will cause the 'messages' query to no longer match
|
|
|
|
# any rows; we capture the set of matching ids first, do the
|
|
|
|
# update, and then return a fresh collection -- so we know their
|
|
|
|
# metadata has been updated for the UPDATE command, and the caller
|
|
|
|
# can update the remote cache with that.
|
message_edit: Carry the QuerySet through as much as possible.
Rather than pass around a list of message objects in-memory, we
instead keep the same constructed QuerySet which includes the later
propagated messages (if any), and use that same query to pick out
affected Attachment objects, rather than limiting to the set of ids.
This is not necessarily a win -- the list of message-ids *may* be very
long, and thus the query may be more concise, easier to send to
PostgreSQL, and faster for PostgreSQL to parse. However, the list of
ids is almost certainly better-indexed.
After processing the move, the QuerySet must be re-defined as a search
of ids (and possibly a very long list of such), since there is no
other way which is guaranteed to correctly single out the moved
messages. At this point, it is mostly equivalent to the list of
Message objects, and certainly takes no less memory.
2023-09-26 20:44:28 +02:00
|
|
|
message_ids = [edited_message.id, *messages.values_list("id", flat=True)]
|
2023-09-26 17:48:34 +02:00
|
|
|
|
message_edit: Carry the QuerySet through as much as possible.
Rather than pass around a list of message objects in-memory, we
instead keep the same constructed QuerySet which includes the later
propagated messages (if any), and use that same query to pick out
affected Attachment objects, rather than limiting to the set of ids.
This is not necessarily a win -- the list of message-ids *may* be very
long, and thus the query may be more concise, easier to send to
PostgreSQL, and faster for PostgreSQL to parse. However, the list of
ids is almost certainly better-indexed.
After processing the move, the QuerySet must be re-defined as a search
of ids (and possibly a very long list of such), since there is no
other way which is guaranteed to correctly single out the moved
messages. At this point, it is mostly equivalent to the list of
Message objects, and certainly takes no less memory.
2023-09-26 20:44:28 +02:00
|
|
|
def propagate() -> QuerySet[Message]:
|
|
|
|
messages.update(**update_fields)
|
|
|
|
return Message.objects.filter(id__in=message_ids).select_related(
|
|
|
|
*Message.DEFAULT_SELECT_RELATED
|
|
|
|
)
|
|
|
|
|
|
|
|
return messages, propagate
|
2018-11-01 19:55:14 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def generate_topic_history_from_db_rows(rows: list[tuple[str, int]]) -> list[dict[str, Any]]:
|
|
|
|
canonical_topic_names: dict[str, tuple[int, str]] = {}
|
2018-11-01 15:16:26 +01:00
|
|
|
|
|
|
|
# Sort rows by max_message_id so that if a topic
|
|
|
|
# has many different casings, we use the most
|
|
|
|
# recent row.
|
|
|
|
rows = sorted(rows, key=lambda tup: tup[1])
|
|
|
|
|
2023-02-02 04:35:24 +01:00
|
|
|
for topic_name, max_message_id in rows:
|
2018-11-01 15:16:26 +01:00
|
|
|
canonical_name = topic_name.lower()
|
|
|
|
canonical_topic_names[canonical_name] = (max_message_id, topic_name)
|
|
|
|
|
|
|
|
history = []
|
2021-08-14 01:01:37 +02:00
|
|
|
for max_message_id, topic_name in canonical_topic_names.values():
|
2021-02-12 08:19:30 +01:00
|
|
|
history.append(
|
|
|
|
dict(name=topic_name, max_id=max_message_id),
|
2018-11-01 15:16:26 +01:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
return sorted(history, key=lambda x: -x["max_id"])
|
2018-11-01 15:16:26 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def get_topic_history_for_public_stream(realm_id: int, recipient_id: int) -> list[dict[str, Any]]:
|
2020-08-26 02:03:08 +02:00
|
|
|
cursor = connection.cursor()
|
2023-09-27 18:17:41 +02:00
|
|
|
# Uses index: zerver_message_realm_recipient_subject
|
|
|
|
# Note that this is *case-sensitive*, so that we can display the
|
|
|
|
# most recently-used case (in generate_topic_history_from_db_rows)
|
2021-02-12 08:20:45 +01:00
|
|
|
query = """
|
2020-08-26 02:03:08 +02:00
|
|
|
SELECT
|
|
|
|
"zerver_message"."subject" as topic,
|
|
|
|
max("zerver_message".id) as max_message_id
|
|
|
|
FROM "zerver_message"
|
|
|
|
WHERE (
|
2023-09-27 17:52:17 +02:00
|
|
|
"zerver_message"."realm_id" = %s AND
|
2020-08-26 02:03:08 +02:00
|
|
|
"zerver_message"."recipient_id" = %s
|
|
|
|
)
|
|
|
|
GROUP BY (
|
|
|
|
"zerver_message"."subject"
|
|
|
|
)
|
|
|
|
ORDER BY max("zerver_message".id) DESC
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2023-09-27 17:52:17 +02:00
|
|
|
cursor.execute(query, [realm_id, recipient_id])
|
2018-11-01 15:16:26 +01:00
|
|
|
rows = cursor.fetchall()
|
|
|
|
cursor.close()
|
|
|
|
|
|
|
|
return generate_topic_history_from_db_rows(rows)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def get_topic_history_for_stream(
|
|
|
|
user_profile: UserProfile, recipient_id: int, public_history: bool
|
2024-07-12 02:30:17 +02:00
|
|
|
) -> list[dict[str, Any]]:
|
2020-08-26 02:03:48 +02:00
|
|
|
if public_history:
|
2023-09-27 17:52:17 +02:00
|
|
|
return get_topic_history_for_public_stream(user_profile.realm_id, recipient_id)
|
2020-08-26 02:03:48 +02:00
|
|
|
|
2018-11-01 15:16:26 +01:00
|
|
|
cursor = connection.cursor()
|
2023-09-27 18:17:41 +02:00
|
|
|
# Uses index: zerver_message_realm_recipient_subject
|
|
|
|
# Note that this is *case-sensitive*, so that we can display the
|
|
|
|
# most recently-used case (in generate_topic_history_from_db_rows)
|
2021-02-12 08:20:45 +01:00
|
|
|
query = """
|
2018-11-01 15:16:26 +01:00
|
|
|
SELECT
|
|
|
|
"zerver_message"."subject" as topic,
|
|
|
|
max("zerver_message".id) as max_message_id
|
|
|
|
FROM "zerver_message"
|
2020-08-26 02:03:48 +02:00
|
|
|
INNER JOIN "zerver_usermessage" ON (
|
|
|
|
"zerver_usermessage"."message_id" = "zerver_message"."id"
|
|
|
|
)
|
2018-11-01 15:16:26 +01:00
|
|
|
WHERE (
|
2020-08-26 02:03:48 +02:00
|
|
|
"zerver_usermessage"."user_profile_id" = %s AND
|
2023-09-27 17:52:17 +02:00
|
|
|
"zerver_message"."realm_id" = %s AND
|
2018-11-01 15:16:26 +01:00
|
|
|
"zerver_message"."recipient_id" = %s
|
|
|
|
)
|
|
|
|
GROUP BY (
|
|
|
|
"zerver_message"."subject"
|
|
|
|
)
|
|
|
|
ORDER BY max("zerver_message".id) DESC
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2023-09-27 17:52:17 +02:00
|
|
|
cursor.execute(query, [user_profile.id, user_profile.realm_id, recipient_id])
|
2018-11-01 15:16:26 +01:00
|
|
|
rows = cursor.fetchall()
|
|
|
|
cursor.close()
|
|
|
|
|
|
|
|
return generate_topic_history_from_db_rows(rows)
|
2022-10-28 00:25:31 +02:00
|
|
|
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def get_topic_resolution_and_bare_name(stored_name: str) -> tuple[bool, str]:
|
2022-10-28 00:25:31 +02:00
|
|
|
"""
|
|
|
|
Resolved topics are denoted only by a title change, not by a boolean toggle in a database column. This
|
|
|
|
method inspects the topic name and returns a tuple of:
|
|
|
|
|
|
|
|
- Whether the topic has been resolved
|
|
|
|
- The topic name with the resolution prefix, if present in stored_name, removed
|
|
|
|
"""
|
|
|
|
if stored_name.startswith(RESOLVED_TOPIC_PREFIX):
|
2024-09-03 19:42:14 +02:00
|
|
|
return (True, stored_name.removeprefix(RESOLVED_TOPIC_PREFIX))
|
2022-10-28 00:25:31 +02:00
|
|
|
|
|
|
|
return (False, stored_name)
|
2023-05-31 16:56:18 +02:00
|
|
|
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def participants_for_topic(realm_id: int, recipient_id: int, topic_name: str) -> set[int]:
|
2023-05-31 16:56:18 +02:00
|
|
|
"""
|
|
|
|
Users who either sent or reacted to the messages in the topic.
|
|
|
|
The function is expensive for large numbers of messages in the topic.
|
|
|
|
"""
|
2023-08-30 21:19:37 +02:00
|
|
|
messages = Message.objects.filter(
|
|
|
|
# Uses index: zerver_message_realm_recipient_upper_subject
|
|
|
|
realm_id=realm_id,
|
|
|
|
recipient_id=recipient_id,
|
|
|
|
subject__iexact=topic_name,
|
|
|
|
)
|
2023-05-31 16:56:18 +02:00
|
|
|
participants = set(
|
|
|
|
UserProfile.objects.filter(
|
|
|
|
Q(id__in=Subquery(messages.values("sender_id")))
|
|
|
|
| Q(
|
|
|
|
id__in=Subquery(
|
|
|
|
Reaction.objects.filter(message__in=messages).values("user_profile_id")
|
|
|
|
)
|
|
|
|
)
|
|
|
|
).values_list("id", flat=True)
|
|
|
|
)
|
|
|
|
return participants
|