diff --git a/pgroonga/migrations/0002_html_escape_subject.py b/pgroonga/migrations/0002_html_escape_subject.py new file mode 100644 index 0000000000..eecf8969fb --- /dev/null +++ b/pgroonga/migrations/0002_html_escape_subject.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +from django.db import models, migrations, connection +from django.contrib.postgres import operations +from django.db.backends.postgresql_psycopg2.schema import DatabaseSchemaEditor +from django.db.migrations.state import StateApps + +def rebuild_pgroonga_index(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None: + BATCH_SIZE = 10000 + + Message = apps.get_model("zerver", "Message") + message_ids = Message.objects.values_list('id', flat=True) + with connection.cursor() as cursor: + for i in range(0, len(message_ids), BATCH_SIZE): + batch_ids = ', '.join(str(id) for id in message_ids[i:i+BATCH_SIZE]) + cursor.execute("UPDATE zerver_message SET " + "search_pgroonga = " + "escape_html(subject) || ' ' || rendered_content " + "WHERE id IN (%s)" % (batch_ids,)) + +class Migration(migrations.Migration): + atomic = False + + dependencies = [ + ('pgroonga', '0001_enable'), + ] + + operations = [ + migrations.RunPython(rebuild_pgroonga_index, + reverse_code=migrations.RunPython.noop) + ] diff --git a/puppet/zulip/files/postgresql/process_fts_updates b/puppet/zulip/files/postgresql/process_fts_updates index 2ac4d2b84a..66b6e640da 100755 --- a/puppet/zulip/files/postgresql/process_fts_updates +++ b/puppet/zulip/files/postgresql/process_fts_updates @@ -37,7 +37,7 @@ def update_fts_columns(cursor): if USING_PGROONGA: cursor.execute("UPDATE zerver_message SET " "search_pgroonga = " - "subject || ' ' || rendered_content " + "escape_html(subject) || ' ' || rendered_content " "WHERE id = %s", (message_id,)) cursor.execute("UPDATE zerver_message SET " "search_tsvector = to_tsvector('zulip.english_us_search', " diff --git a/zerver/tests/test_narrow.py b/zerver/tests/test_narrow.py index dcf567b930..d8056343cf 100644 --- a/zerver/tests/test_narrow.py +++ b/zerver/tests/test_narrow.py @@ -262,13 +262,13 @@ class NarrowBuilderTest(ZulipTestCase): @override_settings(USING_PGROONGA=True) def test_add_term_using_search_operator_pgroonga(self) -> None: term = dict(operator='search', operand='"french fries"') - self._do_add_term_test(term, 'WHERE search_pgroonga @@ :search_pgroonga_1') + self._do_add_term_test(term, 'WHERE search_pgroonga @@ escape_html(:escape_html_1)') @override_settings(USING_PGROONGA=True) def test_add_term_using_search_operator_and_negated_pgroonga( self) -> None: # NEGATED term = dict(operator='search', operand='"french fries"', negated=True) - self._do_add_term_test(term, 'WHERE NOT (search_pgroonga @@ :search_pgroonga_1)') + self._do_add_term_test(term, 'WHERE NOT (search_pgroonga @@ escape_html(:escape_html_1))') def test_add_term_using_has_operator_and_attachment_operand(self) -> None: term = dict(operator='has', operand='attachment') @@ -1377,6 +1377,7 @@ class GetOldMessagesTest(ZulipTestCase): ('english', u'I want to go to 日本!'), ('english', 'Can you speak https://en.wikipedia.org/wiki/Japanese?'), ('english', 'https://google.com'), + ('bread & butter', 'chalk & cheese'), ] for topic, content in messages_to_search: @@ -1394,7 +1395,7 @@ class GetOldMessagesTest(ZulipTestCase): with connection.cursor() as cursor: cursor.execute(""" UPDATE zerver_message SET - search_pgroonga = subject || ' ' || rendered_content + search_pgroonga = escape_html(subject) || ' ' || rendered_content """) narrow = [ @@ -1472,6 +1473,35 @@ class GetOldMessagesTest(ZulipTestCase): self.assertEqual(link_search_result['messages'][0]['match_content'], '
') + # Search operands with HTML Special Characters + special_search_narrow = [ + dict(operator='search', operand='butter'), + ] + special_search_result = self.get_and_check_messages(dict( + narrow=ujson.dumps(special_search_narrow), + anchor=next_message_id, + num_after=10, + num_before=0, + )) # type: Dict[str, Any] + self.assertEqual(len(special_search_result['messages']), 1) + self.assertEqual(special_search_result['messages'][0]['match_subject'], + 'bread & butter') + + special_search_narrow = [ + dict(operator='search', operand='&'), + ] + special_search_result = self.get_and_check_messages(dict( + narrow=ujson.dumps(special_search_narrow), + anchor=next_message_id, + num_after=10, + num_before=0, + )) + self.assertEqual(len(special_search_result['messages']), 1) + self.assertEqual(special_search_result['messages'][0]['match_subject'], + 'bread & butter') + self.assertEqual(special_search_result['messages'][0]['match_content'], + 'chalk & cheese
') + def test_messages_in_narrow_for_non_search(self) -> None: email = self.example_email("cordelia") self.login(email) diff --git a/zerver/views/messages.py b/zerver/views/messages.py index e903544d2e..aae431219d 100644 --- a/zerver/views/messages.py +++ b/zerver/views/messages.py @@ -359,12 +359,13 @@ class NarrowBuilder: maybe_negate: ConditionTransform) -> Query: match_positions_character = func.pgroonga.match_positions_character query_extract_keywords = func.pgroonga.query_extract_keywords - keywords = query_extract_keywords(operand) + operand_escaped = func.escape_html(operand) + keywords = query_extract_keywords(operand_escaped) query = query.column(match_positions_character(column("rendered_content"), keywords).label("content_matches")) - query = query.column(match_positions_character(column("subject"), + query = query.column(match_positions_character(func.escape_html(column("subject")), keywords).label("subject_matches")) - condition = column("search_pgroonga").op("@@")(operand) + condition = column("search_pgroonga").op("@@")(operand_escaped) return query.where(maybe_negate(condition)) def _by_search_tsearch(self, query: Query, operand: str,