remove_single_newlines: Fix regex to parse list syntax correctly.

Earlier, the 'remove_single_newlines' function wasn't working
correctly when '\n' was followed by asterix, hyphen, or number.
Specifically, they were not added as a syntax for ordered or
unordered list in markdown.

For example, see the workaround fix in 330439a83b.

This commit updates the function to replace '\n' with ' ', when
"\n" is not preceded by "\n" and not followed by:
* Another newline (\n)
* A hyphen followed by a space
* An asterisk followed by a space
* A number followed by a dot and a space

We won't have to do fixes like 330439a83b in the future.
This commit is contained in:
Prakhar Pratyush 2024-05-31 00:02:52 +05:30 committed by GitHub
parent 50e0f336f0
commit f9fca5b469
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 20 additions and 1 deletions

View File

@ -1435,4 +1435,4 @@ def set_visibility_policy_possible(user_profile: UserProfile, message: Message)
def remove_single_newlines(content: str) -> str:
content = content.strip("\n")
return re.sub(r"(?<!\n)\n(?![\n0-9*-])", " ", content)
return re.sub(r"(?<!\n)\n(?!\n|[-*] |[0-9]+\. )", " ", content)

View File

@ -313,6 +313,25 @@ class ZulipUpdateAnnouncementsTest(ZulipTestCase):
expected_output = "- This is a bullet.\n- This is another bullet.\n\n1. This is a list\n1. This is more list."
self.assertEqual(remove_single_newlines(input_text), expected_output)
# Asterisks after newline for bold.
input_text = "* This is a bullet.\n**word in bold** on the same line.\n* Another bullet."
expected_output = (
"* This is a bullet. **word in bold** on the same line.\n* Another bullet."
)
self.assertEqual(remove_single_newlines(input_text), expected_output)
# Digit after newline.
input_text = "1. This is a numbered list.\n2. Second list element.\n3.5 is a decimal.\n3. Third list element."
expected_output = "1. This is a numbered list.\n2. Second list element. 3.5 is a decimal.\n3. Third list element."
self.assertEqual(remove_single_newlines(input_text), expected_output)
# Hyphen after newline.
input_text = "- This is a list.\n-C-C- organic molecule structure.\n- Another list element."
expected_output = (
"- This is a list. -C-C- organic molecule structure.\n- Another list element."
)
self.assertEqual(remove_single_newlines(input_text), expected_output)
def test_zulip_updates_for_realm_imported_from_other_product(self) -> None:
with mock.patch(
"zerver.lib.zulip_update_announcements.zulip_update_announcements",