message_fetch: Move limit_query_to_range to zerver.lib.narrow.

Signed-off-by: Anders Kaseorg <anders@zulip.com>
2022-10-21 13:29:15 -04:00 · 2022-10-21 13:29:15 -04:00 · 0a0a70b33d
parent 1095efeb52
commit 0a0a70b33d
3 changed files with 159 additions and 166 deletions
--- a/zerver/lib/narrow.py
+++ b/zerver/lib/narrow.py
@ -20,7 +20,7 @@ from django.core.exceptions import ValidationError
 from django.db import connection
 from django.utils.translation import gettext as _
 from sqlalchemy.dialects import postgresql
-from sqlalchemy.engine import Connection
+from sqlalchemy.engine import Connection, Row
 from sqlalchemy.sql import (
    ClauseElement,
    ColumnElement,
@ -35,7 +35,9 @@ from sqlalchemy.sql import (
    or_,
    select,
    table,
+    union_all,
 )
+from sqlalchemy.sql.selectable import SelectBase
 from sqlalchemy.types import ARRAY, Boolean, Integer, Text

 from zerver.lib.addressee import get_user_profiles, get_user_profiles_by_ids
@ -980,3 +982,153 @@ def parse_anchor_value(anchor_val: Optional[str], use_first_unread_anchor: bool)
        return anchor
    except ValueError:
        raise JsonableError(_("Invalid anchor"))
+
+
+def limit_query_to_range(
+    query: Select,
+    num_before: int,
+    num_after: int,
+    anchor: int,
+    anchored_to_left: bool,
+    anchored_to_right: bool,
+    id_col: ColumnElement[Integer],
+    first_visible_message_id: int,
+) -> SelectBase:
+    """
+    This code is actually generic enough that we could move it to a
+    library, but our only caller for now is message search.
+    """
+    need_before_query = (not anchored_to_left) and (num_before > 0)
+    need_after_query = (not anchored_to_right) and (num_after > 0)
+
+    need_both_sides = need_before_query and need_after_query
+
+    # The semantics of our flags are as follows:
+    #
+    # num_after = number of rows < anchor
+    # num_after = number of rows > anchor
+    #
+    # But we also want the row where id == anchor (if it exists),
+    # and we don't want to union up to 3 queries.  So in some cases
+    # we do things like `after_limit = num_after + 1` to grab the
+    # anchor row in the "after" query.
+    #
+    # Note that in some cases, if the anchor row isn't found, we
+    # actually may fetch an extra row at one of the extremes.
+    if need_both_sides:
+        before_anchor = anchor - 1
+        after_anchor = max(anchor, first_visible_message_id)
+        before_limit = num_before
+        after_limit = num_after + 1
+    elif need_before_query:
+        before_anchor = anchor
+        before_limit = num_before
+        if not anchored_to_right:
+            before_limit += 1
+    elif need_after_query:
+        after_anchor = max(anchor, first_visible_message_id)
+        after_limit = num_after + 1
+
+    if need_before_query:
+        before_query = query
+
+        if not anchored_to_right:
+            before_query = before_query.where(id_col <= before_anchor)
+
+        before_query = before_query.order_by(id_col.desc())
+        before_query = before_query.limit(before_limit)
+
+    if need_after_query:
+        after_query = query
+
+        if not anchored_to_left:
+            after_query = after_query.where(id_col >= after_anchor)
+
+        after_query = after_query.order_by(id_col.asc())
+        after_query = after_query.limit(after_limit)
+
+    if need_both_sides:
+        return union_all(before_query.self_group(), after_query.self_group())
+    elif need_before_query:
+        return before_query
+    elif need_after_query:
+        return after_query
+    else:
+        # If we don't have either a before_query or after_query, it's because
+        # some combination of num_before/num_after/anchor are zero or
+        # use_first_unread_anchor logic found no unread messages.
+        #
+        # The most likely reason is somebody is doing an id search, so searching
+        # for something like `message_id = 42` is exactly what we want.  In other
+        # cases, which could possibly be buggy API clients, at least we will
+        # return at most one row here.
+        return query.where(id_col == anchor)
+
+
+def post_process_limited_query(
+    rows: Sequence[Union[Row, Sequence[Any]]],
+    num_before: int,
+    num_after: int,
+    anchor: int,
+    anchored_to_left: bool,
+    anchored_to_right: bool,
+    first_visible_message_id: int,
+) -> Dict[str, Any]:
+    # Our queries may have fetched extra rows if they added
+    # "headroom" to the limits, but we want to truncate those
+    # rows.
+    #
+    # Also, in cases where we had non-zero values of num_before or
+    # num_after, we want to know found_oldest and found_newest, so
+    # that the clients will know that they got complete results.
+
+    if first_visible_message_id > 0:
+        visible_rows: Sequence[Union[Row, Sequence[Any]]] = [
+            r for r in rows if r[0] >= first_visible_message_id
+        ]
+    else:
+        visible_rows = rows
+
+    rows_limited = len(visible_rows) != len(rows)
+
+    if anchored_to_right:
+        num_after = 0
+        before_rows = visible_rows[:]
+        anchor_rows = []
+        after_rows = []
+    else:
+        before_rows = [r for r in visible_rows if r[0] < anchor]
+        anchor_rows = [r for r in visible_rows if r[0] == anchor]
+        after_rows = [r for r in visible_rows if r[0] > anchor]
+
+    if num_before:
+        before_rows = before_rows[-1 * num_before :]
+
+    if num_after:
+        after_rows = after_rows[:num_after]
+
+    visible_rows = [*before_rows, *anchor_rows, *after_rows]
+
+    found_anchor = len(anchor_rows) == 1
+    found_oldest = anchored_to_left or (len(before_rows) < num_before)
+    found_newest = anchored_to_right or (len(after_rows) < num_after)
+    # BUG: history_limited is incorrect False in the event that we had
+    # to bump `anchor` up due to first_visible_message_id, and there
+    # were actually older messages.  This may be a rare event in the
+    # context where history_limited is relevant, because it can only
+    # happen in one-sided queries with no num_before (see tests tagged
+    # BUG in PostProcessTest for examples), and we don't generally do
+    # those from the UI, so this might be OK for now.
+    #
+    # The correct fix for this probably involves e.g. making a
+    # `before_query` when we increase `anchor` just to confirm whether
+    # messages were hidden.
+    history_limited = rows_limited and found_oldest
+
+    return dict(
+        rows=visible_rows,
+        found_anchor=found_anchor,
+        found_newest=found_newest,
+        found_oldest=found_oldest,
+        history_limited=history_limited,
+    )
--- a/zerver/tests/test_message_fetch.py
+++ b/zerver/tests/test_message_fetch.py
@ -35,6 +35,7 @@ from zerver.lib.narrow import (
    find_first_unread_anchor,
    is_spectator_compatible,
    ok_to_include_history,
+    post_process_limited_query,
 )
 from zerver.lib.sqlalchemy_utils import get_sqlalchemy_connection
 from zerver.lib.streams import StreamDict, create_streams_if_needed, get_public_streams_queryset
@ -56,7 +57,7 @@ from zerver.models import (
    get_realm,
    get_stream,
 )
-from zerver.views.message_fetch import get_messages_backend, post_process_limited_query
+from zerver.views.message_fetch import get_messages_backend

 if TYPE_CHECKING:
    from django.test.client import _MonkeyPatchedWSGIResponse as TestHttpResponse
--- a/zerver/views/message_fetch.py
+++ b/zerver/views/message_fetch.py
@ -1,22 +1,10 @@
-from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union
+from typing import Dict, Iterable, List, Optional, Tuple, Union

 from django.contrib.auth.models import AnonymousUser
 from django.http import HttpRequest, HttpResponse
 from django.utils.html import escape as escape_html
 from django.utils.translation import gettext as _
-from sqlalchemy.engine import Row
-from sqlalchemy.sql import (
-    ColumnElement,
-    Select,
-    and_,
-    column,
-    join,
-    literal,
-    literal_column,
-    select,
-    table,
-    union_all,
-)
+from sqlalchemy.sql import and_, column, join, literal, literal_column, select, table
 from sqlalchemy.sql.selectable import SelectBase
 from sqlalchemy.types import Integer, Text

@ -32,9 +20,11 @@ from zerver.lib.narrow import (
    get_base_query_for_search,
    is_spectator_compatible,
    is_web_public_narrow,
+    limit_query_to_range,
    narrow_parameter,
    ok_to_include_history,
    parse_anchor_value,
+    post_process_limited_query,
 )
 from zerver.lib.request import REQ, RequestNotes, has_request_variables
 from zerver.lib.response import json_success
@ -327,156 +317,6 @@ def get_messages_backend(
    return json_success(request, data=ret)


-def limit_query_to_range(
-    query: Select,
-    num_before: int,
-    num_after: int,
-    anchor: int,
-    anchored_to_left: bool,
-    anchored_to_right: bool,
-    id_col: ColumnElement[Integer],
-    first_visible_message_id: int,
-) -> SelectBase:
-    """
-    This code is actually generic enough that we could move it to a
-    library, but our only caller for now is message search.
-    """
-    need_before_query = (not anchored_to_left) and (num_before > 0)
-    need_after_query = (not anchored_to_right) and (num_after > 0)
-
-    need_both_sides = need_before_query and need_after_query
-
-    # The semantics of our flags are as follows:
-    #
-    # num_after = number of rows < anchor
-    # num_after = number of rows > anchor
-    #
-    # But we also want the row where id == anchor (if it exists),
-    # and we don't want to union up to 3 queries.  So in some cases
-    # we do things like `after_limit = num_after + 1` to grab the
-    # anchor row in the "after" query.
-    #
-    # Note that in some cases, if the anchor row isn't found, we
-    # actually may fetch an extra row at one of the extremes.
-    if need_both_sides:
-        before_anchor = anchor - 1
-        after_anchor = max(anchor, first_visible_message_id)
-        before_limit = num_before
-        after_limit = num_after + 1
-    elif need_before_query:
-        before_anchor = anchor
-        before_limit = num_before
-        if not anchored_to_right:
-            before_limit += 1
-    elif need_after_query:
-        after_anchor = max(anchor, first_visible_message_id)
-        after_limit = num_after + 1
-
-    if need_before_query:
-        before_query = query
-
-        if not anchored_to_right:
-            before_query = before_query.where(id_col <= before_anchor)
-
-        before_query = before_query.order_by(id_col.desc())
-        before_query = before_query.limit(before_limit)
-
-    if need_after_query:
-        after_query = query
-
-        if not anchored_to_left:
-            after_query = after_query.where(id_col >= after_anchor)
-
-        after_query = after_query.order_by(id_col.asc())
-        after_query = after_query.limit(after_limit)
-
-    if need_both_sides:
-        return union_all(before_query.self_group(), after_query.self_group())
-    elif need_before_query:
-        return before_query
-    elif need_after_query:
-        return after_query
-    else:
-        # If we don't have either a before_query or after_query, it's because
-        # some combination of num_before/num_after/anchor are zero or
-        # use_first_unread_anchor logic found no unread messages.
-        #
-        # The most likely reason is somebody is doing an id search, so searching
-        # for something like `message_id = 42` is exactly what we want.  In other
-        # cases, which could possibly be buggy API clients, at least we will
-        # return at most one row here.
-        return query.where(id_col == anchor)
-
-
-def post_process_limited_query(
-    rows: Sequence[Union[Row, Sequence[Any]]],
-    num_before: int,
-    num_after: int,
-    anchor: int,
-    anchored_to_left: bool,
-    anchored_to_right: bool,
-    first_visible_message_id: int,
-) -> Dict[str, Any]:
-    # Our queries may have fetched extra rows if they added
-    # "headroom" to the limits, but we want to truncate those
-    # rows.
-    #
-    # Also, in cases where we had non-zero values of num_before or
-    # num_after, we want to know found_oldest and found_newest, so
-    # that the clients will know that they got complete results.
-
-    if first_visible_message_id > 0:
-        visible_rows: Sequence[Union[Row, Sequence[Any]]] = [
-            r for r in rows if r[0] >= first_visible_message_id
-        ]
-    else:
-        visible_rows = rows
-
-    rows_limited = len(visible_rows) != len(rows)
-
-    if anchored_to_right:
-        num_after = 0
-        before_rows = visible_rows[:]
-        anchor_rows = []
-        after_rows = []
-    else:
-        before_rows = [r for r in visible_rows if r[0] < anchor]
-        anchor_rows = [r for r in visible_rows if r[0] == anchor]
-        after_rows = [r for r in visible_rows if r[0] > anchor]
-
-    if num_before:
-        before_rows = before_rows[-1 * num_before :]
-
-    if num_after:
-        after_rows = after_rows[:num_after]
-
-    visible_rows = [*before_rows, *anchor_rows, *after_rows]
-
-    found_anchor = len(anchor_rows) == 1
-    found_oldest = anchored_to_left or (len(before_rows) < num_before)
-    found_newest = anchored_to_right or (len(after_rows) < num_after)
-    # BUG: history_limited is incorrect False in the event that we had
-    # to bump `anchor` up due to first_visible_message_id, and there
-    # were actually older messages.  This may be a rare event in the
-    # context where history_limited is relevant, because it can only
-    # happen in one-sided queries with no num_before (see tests tagged
-    # BUG in PostProcessTest for examples), and we don't generally do
-    # those from the UI, so this might be OK for now.
-    #
-    # The correct fix for this probably involves e.g. making a
-    # `before_query` when we increase `anchor` just to confirm whether
-    # messages were hidden.
-    history_limited = rows_limited and found_oldest
-
-    return dict(
-        rows=visible_rows,
-        found_anchor=found_anchor,
-        found_newest=found_newest,
-        found_oldest=found_oldest,
-        history_limited=history_limited,
-    )
-
-
@has_request_variables
 def messages_in_narrow_backend(
    request: HttpRequest,