mirror of https://github.com/zulip/zulip.git
pgroonga: Fix issues with HTML escaping in queries.
This commit is contained in:
parent
0bc272fc86
commit
250015a5d5
|
@ -0,0 +1,30 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from django.db import models, migrations, connection
|
||||||
|
from django.contrib.postgres import operations
|
||||||
|
from django.db.backends.postgresql_psycopg2.schema import DatabaseSchemaEditor
|
||||||
|
from django.db.migrations.state import StateApps
|
||||||
|
|
||||||
|
def rebuild_pgroonga_index(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
|
||||||
|
BATCH_SIZE = 10000
|
||||||
|
|
||||||
|
Message = apps.get_model("zerver", "Message")
|
||||||
|
message_ids = Message.objects.values_list('id', flat=True)
|
||||||
|
with connection.cursor() as cursor:
|
||||||
|
for i in range(0, len(message_ids), BATCH_SIZE):
|
||||||
|
batch_ids = ', '.join(str(id) for id in message_ids[i:i+BATCH_SIZE])
|
||||||
|
cursor.execute("UPDATE zerver_message SET "
|
||||||
|
"search_pgroonga = "
|
||||||
|
"escape_html(subject) || ' ' || rendered_content "
|
||||||
|
"WHERE id IN (%s)" % (batch_ids,))
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
atomic = False
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('pgroonga', '0001_enable'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RunPython(rebuild_pgroonga_index,
|
||||||
|
reverse_code=migrations.RunPython.noop)
|
||||||
|
]
|
|
@ -37,7 +37,7 @@ def update_fts_columns(cursor):
|
||||||
if USING_PGROONGA:
|
if USING_PGROONGA:
|
||||||
cursor.execute("UPDATE zerver_message SET "
|
cursor.execute("UPDATE zerver_message SET "
|
||||||
"search_pgroonga = "
|
"search_pgroonga = "
|
||||||
"subject || ' ' || rendered_content "
|
"escape_html(subject) || ' ' || rendered_content "
|
||||||
"WHERE id = %s", (message_id,))
|
"WHERE id = %s", (message_id,))
|
||||||
cursor.execute("UPDATE zerver_message SET "
|
cursor.execute("UPDATE zerver_message SET "
|
||||||
"search_tsvector = to_tsvector('zulip.english_us_search', "
|
"search_tsvector = to_tsvector('zulip.english_us_search', "
|
||||||
|
|
|
@ -262,13 +262,13 @@ class NarrowBuilderTest(ZulipTestCase):
|
||||||
@override_settings(USING_PGROONGA=True)
|
@override_settings(USING_PGROONGA=True)
|
||||||
def test_add_term_using_search_operator_pgroonga(self) -> None:
|
def test_add_term_using_search_operator_pgroonga(self) -> None:
|
||||||
term = dict(operator='search', operand='"french fries"')
|
term = dict(operator='search', operand='"french fries"')
|
||||||
self._do_add_term_test(term, 'WHERE search_pgroonga @@ :search_pgroonga_1')
|
self._do_add_term_test(term, 'WHERE search_pgroonga @@ escape_html(:escape_html_1)')
|
||||||
|
|
||||||
@override_settings(USING_PGROONGA=True)
|
@override_settings(USING_PGROONGA=True)
|
||||||
def test_add_term_using_search_operator_and_negated_pgroonga(
|
def test_add_term_using_search_operator_and_negated_pgroonga(
|
||||||
self) -> None: # NEGATED
|
self) -> None: # NEGATED
|
||||||
term = dict(operator='search', operand='"french fries"', negated=True)
|
term = dict(operator='search', operand='"french fries"', negated=True)
|
||||||
self._do_add_term_test(term, 'WHERE NOT (search_pgroonga @@ :search_pgroonga_1)')
|
self._do_add_term_test(term, 'WHERE NOT (search_pgroonga @@ escape_html(:escape_html_1))')
|
||||||
|
|
||||||
def test_add_term_using_has_operator_and_attachment_operand(self) -> None:
|
def test_add_term_using_has_operator_and_attachment_operand(self) -> None:
|
||||||
term = dict(operator='has', operand='attachment')
|
term = dict(operator='has', operand='attachment')
|
||||||
|
@ -1377,6 +1377,7 @@ class GetOldMessagesTest(ZulipTestCase):
|
||||||
('english', u'I want to go to 日本!'),
|
('english', u'I want to go to 日本!'),
|
||||||
('english', 'Can you speak https://en.wikipedia.org/wiki/Japanese?'),
|
('english', 'Can you speak https://en.wikipedia.org/wiki/Japanese?'),
|
||||||
('english', 'https://google.com'),
|
('english', 'https://google.com'),
|
||||||
|
('bread & butter', 'chalk & cheese'),
|
||||||
]
|
]
|
||||||
|
|
||||||
for topic, content in messages_to_search:
|
for topic, content in messages_to_search:
|
||||||
|
@ -1394,7 +1395,7 @@ class GetOldMessagesTest(ZulipTestCase):
|
||||||
with connection.cursor() as cursor:
|
with connection.cursor() as cursor:
|
||||||
cursor.execute("""
|
cursor.execute("""
|
||||||
UPDATE zerver_message SET
|
UPDATE zerver_message SET
|
||||||
search_pgroonga = subject || ' ' || rendered_content
|
search_pgroonga = escape_html(subject) || ' ' || rendered_content
|
||||||
""")
|
""")
|
||||||
|
|
||||||
narrow = [
|
narrow = [
|
||||||
|
@ -1472,6 +1473,35 @@ class GetOldMessagesTest(ZulipTestCase):
|
||||||
self.assertEqual(link_search_result['messages'][0]['match_content'],
|
self.assertEqual(link_search_result['messages'][0]['match_content'],
|
||||||
'<p><a href="https://google.com" target="_blank" title="https://google.com"><span class="highlight">https://google.com</span></a></p>')
|
'<p><a href="https://google.com" target="_blank" title="https://google.com"><span class="highlight">https://google.com</span></a></p>')
|
||||||
|
|
||||||
|
# Search operands with HTML Special Characters
|
||||||
|
special_search_narrow = [
|
||||||
|
dict(operator='search', operand='butter'),
|
||||||
|
]
|
||||||
|
special_search_result = self.get_and_check_messages(dict(
|
||||||
|
narrow=ujson.dumps(special_search_narrow),
|
||||||
|
anchor=next_message_id,
|
||||||
|
num_after=10,
|
||||||
|
num_before=0,
|
||||||
|
)) # type: Dict[str, Any]
|
||||||
|
self.assertEqual(len(special_search_result['messages']), 1)
|
||||||
|
self.assertEqual(special_search_result['messages'][0]['match_subject'],
|
||||||
|
'bread & <span class="highlight">butter</span>')
|
||||||
|
|
||||||
|
special_search_narrow = [
|
||||||
|
dict(operator='search', operand='&'),
|
||||||
|
]
|
||||||
|
special_search_result = self.get_and_check_messages(dict(
|
||||||
|
narrow=ujson.dumps(special_search_narrow),
|
||||||
|
anchor=next_message_id,
|
||||||
|
num_after=10,
|
||||||
|
num_before=0,
|
||||||
|
))
|
||||||
|
self.assertEqual(len(special_search_result['messages']), 1)
|
||||||
|
self.assertEqual(special_search_result['messages'][0]['match_subject'],
|
||||||
|
'bread <span class="highlight">&</span> butter')
|
||||||
|
self.assertEqual(special_search_result['messages'][0]['match_content'],
|
||||||
|
'<p>chalk <span class="highlight">&</span> cheese</p>')
|
||||||
|
|
||||||
def test_messages_in_narrow_for_non_search(self) -> None:
|
def test_messages_in_narrow_for_non_search(self) -> None:
|
||||||
email = self.example_email("cordelia")
|
email = self.example_email("cordelia")
|
||||||
self.login(email)
|
self.login(email)
|
||||||
|
|
|
@ -359,12 +359,13 @@ class NarrowBuilder:
|
||||||
maybe_negate: ConditionTransform) -> Query:
|
maybe_negate: ConditionTransform) -> Query:
|
||||||
match_positions_character = func.pgroonga.match_positions_character
|
match_positions_character = func.pgroonga.match_positions_character
|
||||||
query_extract_keywords = func.pgroonga.query_extract_keywords
|
query_extract_keywords = func.pgroonga.query_extract_keywords
|
||||||
keywords = query_extract_keywords(operand)
|
operand_escaped = func.escape_html(operand)
|
||||||
|
keywords = query_extract_keywords(operand_escaped)
|
||||||
query = query.column(match_positions_character(column("rendered_content"),
|
query = query.column(match_positions_character(column("rendered_content"),
|
||||||
keywords).label("content_matches"))
|
keywords).label("content_matches"))
|
||||||
query = query.column(match_positions_character(column("subject"),
|
query = query.column(match_positions_character(func.escape_html(column("subject")),
|
||||||
keywords).label("subject_matches"))
|
keywords).label("subject_matches"))
|
||||||
condition = column("search_pgroonga").op("@@")(operand)
|
condition = column("search_pgroonga").op("@@")(operand_escaped)
|
||||||
return query.where(maybe_negate(condition))
|
return query.where(maybe_negate(condition))
|
||||||
|
|
||||||
def _by_search_tsearch(self, query: Query, operand: str,
|
def _by_search_tsearch(self, query: Query, operand: str,
|
||||||
|
|
Loading…
Reference in New Issue