mirror of https://github.com/zulip/zulip.git
capitalization: Avoid bs4.MarkupResemblesLocatorWarning.
Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
parent
ec437fb770
commit
14db6e8c14
|
@ -1,4 +1,5 @@
|
||||||
import re
|
import re
|
||||||
|
from io import StringIO
|
||||||
from re import Match
|
from re import Match
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
@ -182,7 +183,8 @@ IGNORED_PHRASES.sort(key=len, reverse=True)
|
||||||
# text using BeautifulSoup and then removes extra whitespaces from
|
# text using BeautifulSoup and then removes extra whitespaces from
|
||||||
# it. This step enables us to add HTML in our regexes directly.
|
# it. This step enables us to add HTML in our regexes directly.
|
||||||
COMPILED_IGNORED_PHRASES = [
|
COMPILED_IGNORED_PHRASES = [
|
||||||
re.compile(r" ".join(BeautifulSoup(regex, "lxml").text.split())) for regex in IGNORED_PHRASES
|
re.compile(r" ".join(BeautifulSoup(StringIO(regex), "lxml").text.split()))
|
||||||
|
for regex in IGNORED_PHRASES
|
||||||
]
|
]
|
||||||
|
|
||||||
SPLIT_BOUNDARY = r"?.!" # Used to split string into sentences.
|
SPLIT_BOUNDARY = r"?.!" # Used to split string into sentences.
|
||||||
|
@ -241,7 +243,7 @@ def get_safe_text(text: str) -> str:
|
||||||
This returns text which is rendered by BeautifulSoup and is in the
|
This returns text which is rendered by BeautifulSoup and is in the
|
||||||
form that can be split easily and has all IGNORED_PHRASES processed.
|
form that can be split easily and has all IGNORED_PHRASES processed.
|
||||||
"""
|
"""
|
||||||
soup = BeautifulSoup(text, "lxml")
|
soup = BeautifulSoup(StringIO(text), "lxml")
|
||||||
text = " ".join(soup.text.split()) # Remove extra whitespaces.
|
text = " ".join(soup.text.split()) # Remove extra whitespaces.
|
||||||
for phrase_regex in COMPILED_IGNORED_PHRASES:
|
for phrase_regex in COMPILED_IGNORED_PHRASES:
|
||||||
text = phrase_regex.sub(replace_with_safe_phrase, text)
|
text = phrase_regex.sub(replace_with_safe_phrase, text)
|
||||||
|
|
Loading…
Reference in New Issue