diff --git a/zerver/tests/test_narrow.py b/zerver/tests/test_narrow.py
index 0a3380b818..5a9ab670f0 100644
--- a/zerver/tests/test_narrow.py
+++ b/zerver/tests/test_narrow.py
@@ -340,13 +340,13 @@ class NarrowBuilderTest(ZulipTestCase):
@override_settings(USING_PGROONGA=False)
def test_add_term_using_search_operator(self) -> None:
term = dict(operator='search', operand='"french fries"')
- self._do_add_term_test(term, 'WHERE (content ILIKE %(content_1)s OR subject ILIKE %(subject_1)s) AND (search_tsvector @@ plainto_tsquery(%(param_2)s, %(param_3)s))')
+ self._do_add_term_test(term, 'WHERE (content ILIKE %(content_1)s OR subject ILIKE %(subject_1)s) AND (search_tsvector @@ plainto_tsquery(%(param_4)s, %(param_5)s))')
@override_settings(USING_PGROONGA=False)
def test_add_term_using_search_operator_and_negated(
self) -> None: # NEGATED
term = dict(operator='search', operand='"french fries"', negated=True)
- self._do_add_term_test(term, 'WHERE NOT (content ILIKE %(content_1)s OR subject ILIKE %(subject_1)s) AND NOT (search_tsvector @@ plainto_tsquery(%(param_2)s, %(param_3)s))')
+ self._do_add_term_test(term, 'WHERE NOT (content ILIKE %(content_1)s OR subject ILIKE %(subject_1)s) AND NOT (search_tsvector @@ plainto_tsquery(%(param_4)s, %(param_5)s))')
@override_settings(USING_PGROONGA=True)
def test_add_term_using_search_operator_pgroonga(self) -> None:
@@ -2749,19 +2749,49 @@ recipient_id = %(recipient_id_3)s AND upper(subject) = upper(%(param_2)s))\
def test_get_messages_with_search_queries(self) -> None:
query_ids = self.get_query_ids()
- sql_template = "SELECT anon_1.message_id, anon_1.flags, anon_1.subject, anon_1.rendered_content, anon_1.content_matches, anon_1.topic_matches \nFROM (SELECT message_id, flags, subject, rendered_content, ts_match_locs_array('zulip.english_us_search', rendered_content, plainto_tsquery('zulip.english_us_search', 'jumping')) AS content_matches, ts_match_locs_array('zulip.english_us_search', escape_html(subject), plainto_tsquery('zulip.english_us_search', 'jumping')) AS topic_matches \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND (search_tsvector @@ plainto_tsquery('zulip.english_us_search', 'jumping')) ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" # type: str
+ sql_template = """\
+SELECT anon_1.message_id, anon_1.flags, anon_1.subject, anon_1.rendered_content, anon_1.content_matches, anon_1.topic_matches \n\
+FROM (SELECT message_id, flags, subject, rendered_content, array((SELECT ARRAY[sum(length(anon_3) - 11) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) + 11, strpos(anon_3, '') - 1] AS anon_2 \n\
+FROM unnest(string_to_array(ts_headline('zulip.english_us_search', rendered_content, plainto_tsquery('zulip.english_us_search', 'jumping'), 'HighlightAll = TRUE, StartSel = , StopSel = '), '')) AS anon_3 \n\
+ LIMIT ALL OFFSET 1)) AS content_matches, array((SELECT ARRAY[sum(length(anon_5) - 11) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) + 11, strpos(anon_5, '') - 1] AS anon_4 \n\
+FROM unnest(string_to_array(ts_headline('zulip.english_us_search', escape_html(subject), plainto_tsquery('zulip.english_us_search', 'jumping'), 'HighlightAll = TRUE, StartSel = , StopSel = '), '')) AS anon_5 \n\
+ LIMIT ALL OFFSET 1)) AS topic_matches \n\
+FROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \n\
+WHERE user_profile_id = {hamlet_id} AND (search_tsvector @@ plainto_tsquery('zulip.english_us_search', 'jumping')) ORDER BY message_id ASC \n\
+ LIMIT 10) AS anon_1 ORDER BY message_id ASC\
+"""
sql = sql_template.format(**query_ids)
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 9,
'narrow': '[["search", "jumping"]]'},
sql)
- sql_template = "SELECT anon_1.message_id, anon_1.subject, anon_1.rendered_content, anon_1.content_matches, anon_1.topic_matches \nFROM (SELECT id AS message_id, subject, rendered_content, ts_match_locs_array('zulip.english_us_search', rendered_content, plainto_tsquery('zulip.english_us_search', 'jumping')) AS content_matches, ts_match_locs_array('zulip.english_us_search', escape_html(subject), plainto_tsquery('zulip.english_us_search', 'jumping')) AS topic_matches \nFROM zerver_message \nWHERE recipient_id = {scotland_recipient} AND (search_tsvector @@ plainto_tsquery('zulip.english_us_search', 'jumping')) ORDER BY zerver_message.id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC"
+ sql_template = """\
+SELECT anon_1.message_id, anon_1.subject, anon_1.rendered_content, anon_1.content_matches, anon_1.topic_matches \n\
+FROM (SELECT id AS message_id, subject, rendered_content, array((SELECT ARRAY[sum(length(anon_3) - 11) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) + 11, strpos(anon_3, '') - 1] AS anon_2 \n\
+FROM unnest(string_to_array(ts_headline('zulip.english_us_search', rendered_content, plainto_tsquery('zulip.english_us_search', 'jumping'), 'HighlightAll = TRUE, StartSel = , StopSel = '), '')) AS anon_3 \n\
+ LIMIT ALL OFFSET 1)) AS content_matches, array((SELECT ARRAY[sum(length(anon_5) - 11) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) + 11, strpos(anon_5, '') - 1] AS anon_4 \n\
+FROM unnest(string_to_array(ts_headline('zulip.english_us_search', escape_html(subject), plainto_tsquery('zulip.english_us_search', 'jumping'), 'HighlightAll = TRUE, StartSel = , StopSel = '), '')) AS anon_5 \n\
+ LIMIT ALL OFFSET 1)) AS topic_matches \n\
+FROM zerver_message \n\
+WHERE recipient_id = {scotland_recipient} AND (search_tsvector @@ plainto_tsquery('zulip.english_us_search', 'jumping')) ORDER BY zerver_message.id ASC \n\
+ LIMIT 10) AS anon_1 ORDER BY message_id ASC\
+"""
sql = sql_template.format(**query_ids)
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 9,
'narrow': '[["stream", "Scotland"], ["search", "jumping"]]'},
sql)
- sql_template = 'SELECT anon_1.message_id, anon_1.flags, anon_1.subject, anon_1.rendered_content, anon_1.content_matches, anon_1.topic_matches \nFROM (SELECT message_id, flags, subject, rendered_content, ts_match_locs_array(\'zulip.english_us_search\', rendered_content, plainto_tsquery(\'zulip.english_us_search\', \'"jumping" quickly\')) AS content_matches, ts_match_locs_array(\'zulip.english_us_search\', escape_html(subject), plainto_tsquery(\'zulip.english_us_search\', \'"jumping" quickly\')) AS topic_matches \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND (content ILIKE \'%jumping%\' OR subject ILIKE \'%jumping%\') AND (search_tsvector @@ plainto_tsquery(\'zulip.english_us_search\', \'"jumping" quickly\')) ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC'
+ sql_template = """\
+SELECT anon_1.message_id, anon_1.flags, anon_1.subject, anon_1.rendered_content, anon_1.content_matches, anon_1.topic_matches \n\
+FROM (SELECT message_id, flags, subject, rendered_content, array((SELECT ARRAY[sum(length(anon_3) - 11) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) + 11, strpos(anon_3, '') - 1] AS anon_2 \n\
+FROM unnest(string_to_array(ts_headline('zulip.english_us_search', rendered_content, plainto_tsquery('zulip.english_us_search', '"jumping" quickly'), 'HighlightAll = TRUE, StartSel = , StopSel = '), '')) AS anon_3 \n\
+ LIMIT ALL OFFSET 1)) AS content_matches, array((SELECT ARRAY[sum(length(anon_5) - 11) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) + 11, strpos(anon_5, '') - 1] AS anon_4 \n\
+FROM unnest(string_to_array(ts_headline('zulip.english_us_search', escape_html(subject), plainto_tsquery('zulip.english_us_search', '"jumping" quickly'), 'HighlightAll = TRUE, StartSel = , StopSel = '), '')) AS anon_5 \n\
+ LIMIT ALL OFFSET 1)) AS topic_matches \n\
+FROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \n\
+WHERE user_profile_id = {hamlet_id} AND (content ILIKE '%jumping%' OR subject ILIKE '%jumping%') AND (search_tsvector @@ plainto_tsquery('zulip.english_us_search', '"jumping" quickly')) ORDER BY message_id ASC \n\
+ LIMIT 10) AS anon_1 ORDER BY message_id ASC\
+"""
sql = sql_template.format(**query_ids)
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 9,
'narrow': '[["search", "\\"jumping\\" quickly"]]'},
diff --git a/zerver/views/messages.py b/zerver/views/messages.py
index b81508c5a2..12f6eab149 100644
--- a/zerver/views/messages.py
+++ b/zerver/views/messages.py
@@ -56,6 +56,7 @@ from zerver.models import Message, UserProfile, Stream, Subscription, Client,\
get_user_by_id_in_realm_including_cross_realm, get_stream_recipient
from sqlalchemy import func
+from sqlalchemy.dialects import postgresql
from sqlalchemy.sql import select, join, column, literal_column, literal, and_, \
or_, not_, union_all, alias, Selectable, ColumnElement, table
@@ -86,6 +87,27 @@ ConditionTransform = Any
OptionalNarrowListT = Optional[List[Dict[str, Any]]]
+# These delimiters will not appear in rendered messages or HTML-escaped topics.
+TS_START = ""
+TS_STOP = ""
+
+def ts_locs_array(
+ config: ColumnElement, text: ColumnElement, tsquery: ColumnElement
+) -> ColumnElement:
+ options = "HighlightAll = TRUE, StartSel = %s, StopSel = %s" % (TS_START, TS_STOP)
+ delimited = func.ts_headline(config, text, tsquery, options)
+ parts = func.unnest(func.string_to_array(delimited, TS_START)).alias()
+ part = column(parts.name)
+ part_len = func.length(part) - len(TS_STOP)
+ match_pos = func.sum(part_len).over(rows=(None, -1)) + len(TS_STOP)
+ match_len = func.strpos(part, TS_STOP) - 1
+ return func.array(
+ select([postgresql.array([match_pos, match_len])])
+ .select_from(parts)
+ .offset(1)
+ .as_scalar()
+ )
+
# When you add a new operator to this, also update zerver/lib/narrow.py
class NarrowBuilder:
'''
@@ -430,7 +452,6 @@ class NarrowBuilder:
def _by_search_tsearch(self, query: Query, operand: str,
maybe_negate: ConditionTransform) -> Query:
tsquery = func.plainto_tsquery(literal("zulip.english_us_search"), literal(operand))
- ts_locs_array = func.ts_match_locs_array
query = query.column(ts_locs_array(literal("zulip.english_us_search"),
column("rendered_content"),
tsquery).label("content_matches"))
@@ -454,9 +475,6 @@ class NarrowBuilder:
cond = column("search_tsvector").op("@@")(tsquery)
return query.where(maybe_negate(cond))
-# The offsets we get from PGroonga are counted in characters
-# whereas the offsets from tsearch_extras are in bytes, so we
-# have to account for both cases in the logic below.
def highlight_string(text: str, locs: Iterable[Tuple[int, int]]) -> str:
highlight_start = ''
highlight_stop = ''
@@ -464,24 +482,16 @@ def highlight_string(text: str, locs: Iterable[Tuple[int, int]]) -> str:
result = ''
in_tag = False
- text_utf8 = text.encode('utf8')
-
for loc in locs:
(offset, length) = loc
- # These indexes are in byte space for tsearch,
- # and they are in string space for pgroonga.
prefix_start = pos
prefix_end = offset
match_start = offset
match_end = offset + length
- if settings.USING_PGROONGA:
- prefix = text[prefix_start:prefix_end]
- match = text[match_start:match_end]
- else:
- prefix = text_utf8[prefix_start:prefix_end].decode()
- match = text_utf8[match_start:match_end].decode()
+ prefix = text[prefix_start:prefix_end]
+ match = text[match_start:match_end]
for character in (prefix + match):
if character == '<':
@@ -498,12 +508,7 @@ def highlight_string(text: str, locs: Iterable[Tuple[int, int]]) -> str:
result += highlight_stop
pos = match_end
- if settings.USING_PGROONGA:
- final_frag = text[pos:]
- else:
- final_frag = text_utf8[pos:].decode()
-
- result += final_frag
+ result += text[pos:]
return result
def get_search_fields(rendered_content: str, topic_name: str, content_matches: Iterable[Tuple[int, int]],