2017-03-03 12:42:07 +01:00
|
|
|
import re
|
2020-06-11 00:54:34 +02:00
|
|
|
from typing import List, Match, Tuple
|
2017-03-03 12:42:07 +01:00
|
|
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
2017-03-10 11:47:06 +01:00
|
|
|
# The phrases in this list will be ignored. The longest phrase is
|
|
|
|
# tried first; this removes the chance of smaller phrases changing
|
|
|
|
# the text before longer phrases are tried.
|
|
|
|
# The errors shown by `tools/check-capitalization` can be added to
|
|
|
|
# this list without any modification.
|
|
|
|
IGNORED_PHRASES = [
|
2017-03-03 12:42:07 +01:00
|
|
|
# Proper nouns and acronyms
|
2017-05-26 07:38:24 +02:00
|
|
|
r"Android",
|
2017-03-03 12:42:07 +01:00
|
|
|
r"API",
|
2017-07-07 19:43:02 +02:00
|
|
|
r"APNS",
|
2017-07-01 00:41:50 +02:00
|
|
|
r"App Store",
|
2017-07-08 02:02:28 +02:00
|
|
|
r"Botserver",
|
2017-03-03 12:42:07 +01:00
|
|
|
r"Cookie Bot",
|
2020-07-21 22:23:58 +02:00
|
|
|
r"DevAuthBackend",
|
2017-03-03 12:42:07 +01:00
|
|
|
r"Dropbox",
|
2019-02-08 19:11:55 +01:00
|
|
|
r"GCM",
|
2017-03-03 12:42:07 +01:00
|
|
|
r"GitHub",
|
2018-04-23 19:21:10 +02:00
|
|
|
r"G Suite",
|
2017-03-03 12:42:07 +01:00
|
|
|
r"Google",
|
2018-03-05 19:29:18 +01:00
|
|
|
r"Gravatar",
|
2017-11-17 04:31:37 +01:00
|
|
|
r"Hamlet",
|
2018-12-17 21:44:44 +01:00
|
|
|
r"Help Center",
|
2017-03-03 12:42:07 +01:00
|
|
|
r"HTTP",
|
|
|
|
r"ID",
|
|
|
|
r"IDs",
|
2018-04-30 21:04:01 +02:00
|
|
|
r"IP",
|
2017-03-03 12:42:07 +01:00
|
|
|
r"JSON",
|
|
|
|
r"Kerberos",
|
2017-09-25 07:45:17 +02:00
|
|
|
r"LDAP",
|
2017-03-03 12:42:07 +01:00
|
|
|
r"Mac",
|
2017-08-26 09:33:47 +02:00
|
|
|
r"macOS",
|
2020-08-11 01:47:49 +02:00
|
|
|
r"Markdown",
|
2017-03-03 12:42:07 +01:00
|
|
|
r"MiB",
|
2019-11-28 01:17:30 +01:00
|
|
|
r"OAuth",
|
2017-03-19 20:01:01 +01:00
|
|
|
r"OTP",
|
2017-03-03 12:42:07 +01:00
|
|
|
r"Pivotal",
|
2017-07-01 00:41:50 +02:00
|
|
|
r"Play Store",
|
2019-05-15 04:39:22 +02:00
|
|
|
r"PM",
|
|
|
|
r"PMs",
|
2021-02-12 08:20:45 +01:00
|
|
|
r"REMOTE_USER",
|
|
|
|
r"Slack",
|
2017-03-03 12:42:07 +01:00
|
|
|
r"SSO",
|
2021-02-12 08:20:45 +01:00
|
|
|
r"Terms of Service",
|
|
|
|
r"Tuesday",
|
2017-03-03 12:42:07 +01:00
|
|
|
r"URL",
|
|
|
|
r"Ubuntu",
|
2017-07-21 01:55:10 +02:00
|
|
|
r"Updown",
|
2017-03-03 12:42:07 +01:00
|
|
|
r"V5",
|
|
|
|
r"Webathena",
|
|
|
|
r"Windows",
|
|
|
|
r"WordPress",
|
|
|
|
r"XML",
|
|
|
|
r"Zephyr",
|
2018-12-28 20:45:54 +01:00
|
|
|
r"Zoom",
|
2017-03-03 12:42:07 +01:00
|
|
|
r"Zulip",
|
2018-12-18 08:59:56 +01:00
|
|
|
r"Zulip Account Security",
|
2018-12-20 08:57:49 +01:00
|
|
|
r"Zulip Security",
|
2019-07-21 02:31:24 +02:00
|
|
|
r"Zulip Standard",
|
2018-12-20 08:57:49 +01:00
|
|
|
r"Zulip Team",
|
2017-03-03 12:42:07 +01:00
|
|
|
r"iPhone",
|
2017-05-27 05:59:32 +02:00
|
|
|
r"iOS",
|
2017-04-25 08:15:39 +02:00
|
|
|
r"Emoji One",
|
2018-03-14 15:46:20 +01:00
|
|
|
r"mailinator.com",
|
2018-12-18 11:08:51 +01:00
|
|
|
r"HQ",
|
2021-07-06 00:23:51 +02:00
|
|
|
r"BigBlueButton",
|
2017-03-03 12:42:07 +01:00
|
|
|
# Code things
|
|
|
|
r".zuliprc",
|
|
|
|
r"__\w+\.\w+__",
|
|
|
|
# Things using "I"
|
|
|
|
r"I say",
|
|
|
|
r"I want",
|
|
|
|
r"I'm",
|
2021-10-03 08:53:35 +02:00
|
|
|
r"I've",
|
2017-03-03 12:42:07 +01:00
|
|
|
# Specific short words
|
2020-01-08 01:49:44 +01:00
|
|
|
r"beta",
|
2017-03-03 12:42:07 +01:00
|
|
|
r"and",
|
|
|
|
r"bot",
|
|
|
|
r"e.g.",
|
|
|
|
r"etc.",
|
|
|
|
r"images",
|
2017-11-16 02:38:56 +01:00
|
|
|
r"enabled",
|
|
|
|
r"disabled",
|
2018-05-04 07:37:24 +02:00
|
|
|
r"zulip_org_id",
|
2018-05-26 12:15:47 +02:00
|
|
|
r"admins",
|
|
|
|
r"members",
|
2020-06-26 13:43:49 +02:00
|
|
|
r"signups",
|
2017-12-05 01:11:14 +01:00
|
|
|
# Placeholders
|
|
|
|
r"keyword",
|
|
|
|
r"streamname",
|
|
|
|
r"user@example.com",
|
2017-03-03 12:42:07 +01:00
|
|
|
# Fragments of larger strings
|
2021-02-12 08:20:45 +01:00
|
|
|
(r"your subscriptions on your Streams page"),
|
2021-02-12 08:19:30 +01:00
|
|
|
(
|
2021-02-12 08:20:45 +01:00
|
|
|
r"Change notification settings for individual streams on your "
|
2021-02-12 08:19:30 +01:00
|
|
|
'<a href="/#streams">Streams page</a>.'
|
|
|
|
),
|
|
|
|
(
|
2021-02-12 08:20:45 +01:00
|
|
|
r"Looking for our "
|
2021-02-12 08:19:30 +01:00
|
|
|
'<a href="/integrations" target="_blank">Integrations</a> or '
|
|
|
|
'<a href="/api" target="_blank">API</a> documentation?'
|
|
|
|
),
|
2017-03-10 11:47:06 +01:00
|
|
|
r'Most stream administration is done on the <a href="/#streams">Streams page</a>.',
|
2017-03-03 12:42:07 +01:00
|
|
|
r"one or more people...",
|
|
|
|
r"confirmation email",
|
|
|
|
r"invites remaining",
|
2017-03-10 11:47:06 +01:00
|
|
|
r"was too large; the maximum file size is 25MiB.",
|
2017-03-20 15:52:46 +01:00
|
|
|
r"selected message",
|
2017-06-23 02:06:54 +02:00
|
|
|
r"a-z",
|
2018-08-21 08:14:46 +02:00
|
|
|
r"organization administrator",
|
|
|
|
r"user",
|
2018-12-18 08:59:56 +01:00
|
|
|
r"an unknown operating system",
|
2019-02-05 19:28:56 +01:00
|
|
|
r"Go to Settings",
|
2019-02-19 00:53:55 +01:00
|
|
|
r"Like Organization logo",
|
2017-03-03 12:42:07 +01:00
|
|
|
# SPECIAL CASES
|
|
|
|
# Enter is usually capitalized
|
|
|
|
r"Press Enter to send",
|
2021-09-17 12:51:31 +02:00
|
|
|
r"Send message on pressing Enter",
|
2017-03-03 12:42:07 +01:00
|
|
|
# Because topics usually are lower-case, this would look weird if it were capitalized
|
|
|
|
r"more topics",
|
|
|
|
# For consistency with "more topics"
|
|
|
|
r"more conversations",
|
2018-02-07 01:13:11 +01:00
|
|
|
# Capital 'i' looks weird in reminders popover
|
|
|
|
r"in 1 hour",
|
|
|
|
r"in 20 minutes",
|
|
|
|
r"in 3 hours",
|
2017-03-03 12:42:07 +01:00
|
|
|
# We should probably just delete this string from translations
|
2021-02-12 08:20:45 +01:00
|
|
|
r"activation key",
|
2019-07-11 23:05:38 +02:00
|
|
|
# these are used as topics
|
2021-02-12 08:20:45 +01:00
|
|
|
r"^new streams$",
|
|
|
|
r"^stream events$",
|
2018-03-02 20:54:39 +01:00
|
|
|
# These are used as example short names (e.g. an uncapitalized context):
|
|
|
|
r"^marketing$",
|
|
|
|
r"^cookie$",
|
|
|
|
r"^new_emoji$",
|
2018-05-02 19:02:51 +02:00
|
|
|
# Used to refer custom time limits
|
|
|
|
r"\bN\b",
|
2019-02-23 00:46:51 +01:00
|
|
|
# Capital c feels obtrusive in clear status option
|
|
|
|
r"clear",
|
2021-04-13 06:51:07 +02:00
|
|
|
r"group private messages with {recipient}",
|
|
|
|
r"private messages with {recipient}",
|
2018-12-16 20:34:31 +01:00
|
|
|
r"private messages with yourself",
|
2017-03-03 12:42:07 +01:00
|
|
|
# TO CLEAN UP
|
|
|
|
# Just want to avoid churning login.html right now
|
|
|
|
r"or Choose a user",
|
|
|
|
# This is a parsing bug in the tool
|
|
|
|
r"argument ",
|
|
|
|
# I can't find this one
|
|
|
|
r"text",
|
2018-08-04 11:03:37 +02:00
|
|
|
r"GIF",
|
|
|
|
# Emoji name placeholder
|
|
|
|
r"leafy green vegetable",
|
2018-08-25 14:06:17 +02:00
|
|
|
# Subdomain placeholder
|
|
|
|
r"your-organization-url",
|
2019-02-06 20:31:45 +01:00
|
|
|
# Used in invite modal
|
|
|
|
r"or",
|
2021-03-19 13:21:18 +01:00
|
|
|
# Used in GIPHY popover.
|
|
|
|
r"GIFs",
|
|
|
|
r"GIPHY",
|
2021-07-27 19:42:18 +02:00
|
|
|
# Used in our case studies
|
|
|
|
r"Technical University of Munich",
|
|
|
|
r"University of California San Diego",
|
2017-03-10 11:47:06 +01:00
|
|
|
]
|
|
|
|
|
|
|
|
# Sort regexes in descending order of their lengths. As a result, the
|
|
|
|
# longer phrases will be ignored first.
|
|
|
|
IGNORED_PHRASES.sort(key=lambda regex: len(regex), reverse=True)
|
|
|
|
|
|
|
|
# Compile regexes to improve performance. This also extracts the
|
|
|
|
# text using BeautifulSoup and then removes extra whitespaces from
|
|
|
|
# it. This step enables us to add HTML in our regexes directly.
|
|
|
|
COMPILED_IGNORED_PHRASES = [
|
2021-02-12 08:20:45 +01:00
|
|
|
re.compile(" ".join(BeautifulSoup(regex, "lxml").text.split())) for regex in IGNORED_PHRASES
|
2017-03-10 11:47:06 +01:00
|
|
|
]
|
2017-03-03 12:42:07 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
SPLIT_BOUNDARY = "?.!" # Used to split string into sentences.
|
|
|
|
SPLIT_BOUNDARY_REGEX = re.compile(fr"[{SPLIT_BOUNDARY}]")
|
2017-03-03 12:42:07 +01:00
|
|
|
|
|
|
|
# Regexes which check capitalization in sentences.
|
2020-09-02 02:50:08 +02:00
|
|
|
DISALLOWED = [
|
2021-02-12 08:20:45 +01:00
|
|
|
r"^[a-z](?!\})", # Checks if the sentence starts with a lower case character.
|
|
|
|
r"^[A-Z][a-z]+[\sa-z0-9]+[A-Z]", # Checks if an upper case character exists
|
2017-03-03 12:42:07 +01:00
|
|
|
# after a lower case character when the first character is in upper case.
|
2020-09-02 02:50:08 +02:00
|
|
|
]
|
|
|
|
DISALLOWED_REGEX = re.compile(r"|".join(DISALLOWED))
|
2017-03-03 12:42:07 +01:00
|
|
|
|
2018-03-11 06:06:54 +01:00
|
|
|
BANNED_WORDS = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"realm": "The term realm should not appear in user-facing strings. Use organization instead.",
|
2018-03-11 06:06:54 +01:00
|
|
|
}
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
python: Convert function type annotations to Python 3 style.
Generated by com2ann (slightly patched to avoid also converting
assignment type annotations, which require Python 3.6), followed by
some manual whitespace adjustment, and six fixes for runtime issues:
- def __init__(self, token: Token, parent: Optional[Node]) -> None:
+ def __init__(self, token: Token, parent: "Optional[Node]") -> None:
-def main(options: argparse.Namespace) -> NoReturn:
+def main(options: argparse.Namespace) -> "NoReturn":
-def fetch_request(url: str, callback: Any, **kwargs: Any) -> Generator[Callable[..., Any], Any, None]:
+def fetch_request(url: str, callback: Any, **kwargs: Any) -> "Generator[Callable[..., Any], Any, None]":
-def assert_server_running(server: subprocess.Popen[bytes], log_file: Optional[str]) -> None:
+def assert_server_running(server: "subprocess.Popen[bytes]", log_file: Optional[str]) -> None:
-def server_is_up(server: subprocess.Popen[bytes], log_file: Optional[str]) -> bool:
+def server_is_up(server: "subprocess.Popen[bytes]", log_file: Optional[str]) -> bool:
- method_kwarg_pairs: List[FuncKwargPair],
+ method_kwarg_pairs: "List[FuncKwargPair]",
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-19 03:48:37 +02:00
|
|
|
def get_safe_phrase(phrase: str) -> str:
|
2017-03-03 12:42:07 +01:00
|
|
|
"""
|
|
|
|
Safe phrase is in lower case and doesn't contain characters which can
|
|
|
|
conflict with split boundaries. All conflicting characters are replaced
|
|
|
|
with low dash (_).
|
|
|
|
"""
|
2021-02-12 08:20:45 +01:00
|
|
|
phrase = SPLIT_BOUNDARY_REGEX.sub("_", phrase)
|
2017-03-03 12:42:07 +01:00
|
|
|
return phrase.lower()
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
python: Convert function type annotations to Python 3 style.
Generated by com2ann (slightly patched to avoid also converting
assignment type annotations, which require Python 3.6), followed by
some manual whitespace adjustment, and six fixes for runtime issues:
- def __init__(self, token: Token, parent: Optional[Node]) -> None:
+ def __init__(self, token: Token, parent: "Optional[Node]") -> None:
-def main(options: argparse.Namespace) -> NoReturn:
+def main(options: argparse.Namespace) -> "NoReturn":
-def fetch_request(url: str, callback: Any, **kwargs: Any) -> Generator[Callable[..., Any], Any, None]:
+def fetch_request(url: str, callback: Any, **kwargs: Any) -> "Generator[Callable[..., Any], Any, None]":
-def assert_server_running(server: subprocess.Popen[bytes], log_file: Optional[str]) -> None:
+def assert_server_running(server: "subprocess.Popen[bytes]", log_file: Optional[str]) -> None:
-def server_is_up(server: subprocess.Popen[bytes], log_file: Optional[str]) -> bool:
+def server_is_up(server: "subprocess.Popen[bytes]", log_file: Optional[str]) -> bool:
- method_kwarg_pairs: List[FuncKwargPair],
+ method_kwarg_pairs: "List[FuncKwargPair]",
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-19 03:48:37 +02:00
|
|
|
def replace_with_safe_phrase(matchobj: Match[str]) -> str:
|
2017-03-03 12:42:07 +01:00
|
|
|
"""
|
|
|
|
The idea is to convert IGNORED_PHRASES into safe phrases, see
|
|
|
|
`get_safe_phrase()` function. The only exception is when the
|
|
|
|
IGNORED_PHRASE is at the start of the text or after a split
|
|
|
|
boundary; in this case, we change the first letter of the phrase
|
|
|
|
to upper case.
|
|
|
|
"""
|
|
|
|
ignored_phrase = matchobj.group(0)
|
|
|
|
safe_string = get_safe_phrase(ignored_phrase)
|
|
|
|
|
|
|
|
start_index = matchobj.start()
|
|
|
|
complete_string = matchobj.string
|
|
|
|
|
|
|
|
is_string_start = start_index == 0
|
|
|
|
# We expect that there will be one space between split boundary
|
|
|
|
# and the next word.
|
|
|
|
punctuation = complete_string[max(start_index - 2, 0)]
|
|
|
|
is_after_split_boundary = punctuation in SPLIT_BOUNDARY
|
|
|
|
if is_string_start or is_after_split_boundary:
|
|
|
|
return safe_string.capitalize()
|
|
|
|
|
|
|
|
return safe_string
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
python: Convert function type annotations to Python 3 style.
Generated by com2ann (slightly patched to avoid also converting
assignment type annotations, which require Python 3.6), followed by
some manual whitespace adjustment, and six fixes for runtime issues:
- def __init__(self, token: Token, parent: Optional[Node]) -> None:
+ def __init__(self, token: Token, parent: "Optional[Node]") -> None:
-def main(options: argparse.Namespace) -> NoReturn:
+def main(options: argparse.Namespace) -> "NoReturn":
-def fetch_request(url: str, callback: Any, **kwargs: Any) -> Generator[Callable[..., Any], Any, None]:
+def fetch_request(url: str, callback: Any, **kwargs: Any) -> "Generator[Callable[..., Any], Any, None]":
-def assert_server_running(server: subprocess.Popen[bytes], log_file: Optional[str]) -> None:
+def assert_server_running(server: "subprocess.Popen[bytes]", log_file: Optional[str]) -> None:
-def server_is_up(server: subprocess.Popen[bytes], log_file: Optional[str]) -> bool:
+def server_is_up(server: "subprocess.Popen[bytes]", log_file: Optional[str]) -> bool:
- method_kwarg_pairs: List[FuncKwargPair],
+ method_kwarg_pairs: "List[FuncKwargPair]",
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-19 03:48:37 +02:00
|
|
|
def get_safe_text(text: str) -> str:
|
2017-03-03 12:42:07 +01:00
|
|
|
"""
|
|
|
|
This returns text which is rendered by BeautifulSoup and is in the
|
|
|
|
form that can be split easily and has all IGNORED_PHRASES processed.
|
|
|
|
"""
|
2021-02-12 08:20:45 +01:00
|
|
|
soup = BeautifulSoup(text, "lxml")
|
|
|
|
text = " ".join(soup.text.split()) # Remove extra whitespaces.
|
2017-03-10 11:47:06 +01:00
|
|
|
for phrase_regex in COMPILED_IGNORED_PHRASES:
|
2017-03-03 12:42:07 +01:00
|
|
|
text = phrase_regex.sub(replace_with_safe_phrase, text)
|
|
|
|
|
|
|
|
return text
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
python: Convert function type annotations to Python 3 style.
Generated by com2ann (slightly patched to avoid also converting
assignment type annotations, which require Python 3.6), followed by
some manual whitespace adjustment, and six fixes for runtime issues:
- def __init__(self, token: Token, parent: Optional[Node]) -> None:
+ def __init__(self, token: Token, parent: "Optional[Node]") -> None:
-def main(options: argparse.Namespace) -> NoReturn:
+def main(options: argparse.Namespace) -> "NoReturn":
-def fetch_request(url: str, callback: Any, **kwargs: Any) -> Generator[Callable[..., Any], Any, None]:
+def fetch_request(url: str, callback: Any, **kwargs: Any) -> "Generator[Callable[..., Any], Any, None]":
-def assert_server_running(server: subprocess.Popen[bytes], log_file: Optional[str]) -> None:
+def assert_server_running(server: "subprocess.Popen[bytes]", log_file: Optional[str]) -> None:
-def server_is_up(server: subprocess.Popen[bytes], log_file: Optional[str]) -> bool:
+def server_is_up(server: "subprocess.Popen[bytes]", log_file: Optional[str]) -> bool:
- method_kwarg_pairs: List[FuncKwargPair],
+ method_kwarg_pairs: "List[FuncKwargPair]",
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-19 03:48:37 +02:00
|
|
|
def is_capitalized(safe_text: str) -> bool:
|
2017-03-03 12:42:07 +01:00
|
|
|
sentences = SPLIT_BOUNDARY_REGEX.split(safe_text)
|
2020-09-02 02:50:08 +02:00
|
|
|
return not any(DISALLOWED_REGEX.search(sentence.strip()) for sentence in sentences)
|
2017-03-03 12:42:07 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-03-11 06:06:54 +01:00
|
|
|
def check_banned_words(text: str) -> List[str]:
|
|
|
|
lower_cased_text = text.lower()
|
|
|
|
errors = []
|
|
|
|
for word, reason in BANNED_WORDS.items():
|
|
|
|
if word in lower_cased_text:
|
2018-03-17 00:41:21 +01:00
|
|
|
# Hack: Should move this into BANNED_WORDS framework; for
|
|
|
|
# now, just hand-code the skips:
|
2021-02-12 08:20:45 +01:00
|
|
|
if "realm_name" in lower_cased_text:
|
2018-03-17 00:41:21 +01:00
|
|
|
continue
|
2018-03-11 06:06:54 +01:00
|
|
|
kwargs = dict(word=word, text=text, reason=reason)
|
|
|
|
msg = "{word} found in '{text}'. {reason}".format(**kwargs)
|
|
|
|
errors.append(msg)
|
|
|
|
|
|
|
|
return errors
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
python: Convert function type annotations to Python 3 style.
Generated by com2ann (slightly patched to avoid also converting
assignment type annotations, which require Python 3.6), followed by
some manual whitespace adjustment, and six fixes for runtime issues:
- def __init__(self, token: Token, parent: Optional[Node]) -> None:
+ def __init__(self, token: Token, parent: "Optional[Node]") -> None:
-def main(options: argparse.Namespace) -> NoReturn:
+def main(options: argparse.Namespace) -> "NoReturn":
-def fetch_request(url: str, callback: Any, **kwargs: Any) -> Generator[Callable[..., Any], Any, None]:
+def fetch_request(url: str, callback: Any, **kwargs: Any) -> "Generator[Callable[..., Any], Any, None]":
-def assert_server_running(server: subprocess.Popen[bytes], log_file: Optional[str]) -> None:
+def assert_server_running(server: "subprocess.Popen[bytes]", log_file: Optional[str]) -> None:
-def server_is_up(server: subprocess.Popen[bytes], log_file: Optional[str]) -> bool:
+def server_is_up(server: "subprocess.Popen[bytes]", log_file: Optional[str]) -> bool:
- method_kwarg_pairs: List[FuncKwargPair],
+ method_kwarg_pairs: "List[FuncKwargPair]",
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-19 03:48:37 +02:00
|
|
|
def check_capitalization(strings: List[str]) -> Tuple[List[str], List[str], List[str]]:
|
2017-03-03 12:42:07 +01:00
|
|
|
errors = []
|
|
|
|
ignored = []
|
2018-03-11 06:06:54 +01:00
|
|
|
banned_word_errors = []
|
2017-03-03 12:42:07 +01:00
|
|
|
for text in strings:
|
2021-02-12 08:20:45 +01:00
|
|
|
text = " ".join(text.split()) # Remove extra whitespaces.
|
2017-03-03 12:42:07 +01:00
|
|
|
safe_text = get_safe_text(text)
|
|
|
|
has_ignored_phrase = text != safe_text
|
|
|
|
capitalized = is_capitalized(safe_text)
|
|
|
|
if not capitalized:
|
|
|
|
errors.append(text)
|
|
|
|
elif capitalized and has_ignored_phrase:
|
|
|
|
ignored.append(text)
|
|
|
|
|
2018-03-11 06:06:54 +01:00
|
|
|
banned_word_errors.extend(check_banned_words(text))
|
|
|
|
|
|
|
|
return sorted(errors), sorted(ignored), sorted(banned_word_errors)
|