pgroonga: Fix issues with HTML escaping in queries.

This commit is contained in:
Sampriti Panda 2018-05-19 09:09:13 +05:30 committed by Tim Abbott
parent 0bc272fc86
commit 250015a5d5
4 changed files with 68 additions and 7 deletions

View File

@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
from django.db import models, migrations, connection
from django.contrib.postgres import operations
from django.db.backends.postgresql_psycopg2.schema import DatabaseSchemaEditor
from django.db.migrations.state import StateApps
def rebuild_pgroonga_index(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
BATCH_SIZE = 10000
Message = apps.get_model("zerver", "Message")
message_ids = Message.objects.values_list('id', flat=True)
with connection.cursor() as cursor:
for i in range(0, len(message_ids), BATCH_SIZE):
batch_ids = ', '.join(str(id) for id in message_ids[i:i+BATCH_SIZE])
cursor.execute("UPDATE zerver_message SET "
"search_pgroonga = "
"escape_html(subject) || ' ' || rendered_content "
"WHERE id IN (%s)" % (batch_ids,))
class Migration(migrations.Migration):
atomic = False
dependencies = [
('pgroonga', '0001_enable'),
]
operations = [
migrations.RunPython(rebuild_pgroonga_index,
reverse_code=migrations.RunPython.noop)
]

View File

@ -37,7 +37,7 @@ def update_fts_columns(cursor):
if USING_PGROONGA:
cursor.execute("UPDATE zerver_message SET "
"search_pgroonga = "
"subject || ' ' || rendered_content "
"escape_html(subject) || ' ' || rendered_content "
"WHERE id = %s", (message_id,))
cursor.execute("UPDATE zerver_message SET "
"search_tsvector = to_tsvector('zulip.english_us_search', "

View File

@ -262,13 +262,13 @@ class NarrowBuilderTest(ZulipTestCase):
@override_settings(USING_PGROONGA=True)
def test_add_term_using_search_operator_pgroonga(self) -> None:
term = dict(operator='search', operand='"french fries"')
self._do_add_term_test(term, 'WHERE search_pgroonga @@ :search_pgroonga_1')
self._do_add_term_test(term, 'WHERE search_pgroonga @@ escape_html(:escape_html_1)')
@override_settings(USING_PGROONGA=True)
def test_add_term_using_search_operator_and_negated_pgroonga(
self) -> None: # NEGATED
term = dict(operator='search', operand='"french fries"', negated=True)
self._do_add_term_test(term, 'WHERE NOT (search_pgroonga @@ :search_pgroonga_1)')
self._do_add_term_test(term, 'WHERE NOT (search_pgroonga @@ escape_html(:escape_html_1))')
def test_add_term_using_has_operator_and_attachment_operand(self) -> None:
term = dict(operator='has', operand='attachment')
@ -1377,6 +1377,7 @@ class GetOldMessagesTest(ZulipTestCase):
('english', u'I want to go to 日本!'),
('english', 'Can you speak https://en.wikipedia.org/wiki/Japanese?'),
('english', 'https://google.com'),
('bread & butter', 'chalk & cheese'),
]
for topic, content in messages_to_search:
@ -1394,7 +1395,7 @@ class GetOldMessagesTest(ZulipTestCase):
with connection.cursor() as cursor:
cursor.execute("""
UPDATE zerver_message SET
search_pgroonga = subject || ' ' || rendered_content
search_pgroonga = escape_html(subject) || ' ' || rendered_content
""")
narrow = [
@ -1472,6 +1473,35 @@ class GetOldMessagesTest(ZulipTestCase):
self.assertEqual(link_search_result['messages'][0]['match_content'],
'<p><a href="https://google.com" target="_blank" title="https://google.com"><span class="highlight">https://google.com</span></a></p>')
# Search operands with HTML Special Characters
special_search_narrow = [
dict(operator='search', operand='butter'),
]
special_search_result = self.get_and_check_messages(dict(
narrow=ujson.dumps(special_search_narrow),
anchor=next_message_id,
num_after=10,
num_before=0,
)) # type: Dict[str, Any]
self.assertEqual(len(special_search_result['messages']), 1)
self.assertEqual(special_search_result['messages'][0]['match_subject'],
'bread &amp; <span class="highlight">butter</span>')
special_search_narrow = [
dict(operator='search', operand='&'),
]
special_search_result = self.get_and_check_messages(dict(
narrow=ujson.dumps(special_search_narrow),
anchor=next_message_id,
num_after=10,
num_before=0,
))
self.assertEqual(len(special_search_result['messages']), 1)
self.assertEqual(special_search_result['messages'][0]['match_subject'],
'bread <span class="highlight">&amp;</span> butter')
self.assertEqual(special_search_result['messages'][0]['match_content'],
'<p>chalk <span class="highlight">&amp;</span> cheese</p>')
def test_messages_in_narrow_for_non_search(self) -> None:
email = self.example_email("cordelia")
self.login(email)

View File

@ -359,12 +359,13 @@ class NarrowBuilder:
maybe_negate: ConditionTransform) -> Query:
match_positions_character = func.pgroonga.match_positions_character
query_extract_keywords = func.pgroonga.query_extract_keywords
keywords = query_extract_keywords(operand)
operand_escaped = func.escape_html(operand)
keywords = query_extract_keywords(operand_escaped)
query = query.column(match_positions_character(column("rendered_content"),
keywords).label("content_matches"))
query = query.column(match_positions_character(column("subject"),
query = query.column(match_positions_character(func.escape_html(column("subject")),
keywords).label("subject_matches"))
condition = column("search_pgroonga").op("@@")(operand)
condition = column("search_pgroonga").op("@@")(operand_escaped)
return query.where(maybe_negate(condition))
def _by_search_tsearch(self, query: Query, operand: str,