capitalization: Prune unused phrases from IGNORED_PHRASES.

Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
Anders Kaseorg 2022-02-22 16:57:12 -08:00 committed by Tim Abbott
parent cb9630e0db
commit 076b0f06a2
2 changed files with 0 additions and 101 deletions

View File

@ -10,19 +10,14 @@ from bs4 import BeautifulSoup
# this list without any modification.
IGNORED_PHRASES = [
# Proper nouns and acronyms
r"Android",
r"API",
r"APNS",
r"App Store",
r"Botserver",
r"Cookie Bot",
r"DevAuthBackend",
r"Dropbox",
r"GCM",
r"GitHub",
r"Google",
r"Gravatar",
r"Hamlet",
r"Help Center",
r"HTTP",
r"ID",
@ -31,50 +26,29 @@ IGNORED_PHRASES = [
r"JSON",
r"Kerberos",
r"LDAP",
r"Mac",
r"macOS",
r"Markdown",
r"MiB",
r"OAuth",
r"OTP",
r"Pivotal",
r"Play Store",
r"PM",
r"PMs",
r"REMOTE_USER",
r"Slack",
r"SSO",
r"Terms of Service",
r"Tuesday",
r"URL",
r"Ubuntu",
r"Updown",
r"UUID",
r"V5",
r"Webathena",
r"Windows",
r"WordPress",
r"XML",
r"Zephyr",
r"Zoom",
r"Zulip",
r"Zulip Account Security",
r"Zulip Security",
r"Zulip Cloud Standard",
r"Zulip Team",
r"iPhone",
r"iOS",
r"Emoji One",
r"mailinator\.com",
r"HQ",
r"BigBlueButton",
# Code things
r"\.zuliprc",
r"__\w+\.\w+__",
# Things using "I"
r"I understand",
r"I say",
r"I want",
r"I'm",
r"I've",
# Specific short words
@ -82,13 +56,7 @@ IGNORED_PHRASES = [
r"and",
r"bot",
r"e\.g\.",
r"etc\.",
r"images",
r"enabled",
r"disabled",
r"zulip_org_id",
r"admins",
r"members",
r"signups",
# Placeholders
r"keyword",
@ -96,49 +64,23 @@ IGNORED_PHRASES = [
r"user@example\.com",
# Fragments of larger strings
(r"your subscriptions on your Streams page"),
(
r"Change notification settings for individual streams on your "
r'<a href="/#streams">Streams page</a>\.'
),
(
r"Looking for our "
r'<a href="/integrations" target="_blank">Integrations</a> or '
r'<a href="/api" target="_blank">API</a> documentation\?'
),
r'Most stream administration is done on the <a href="/#streams">Streams page</a>\.',
r"Add global time<br />Everyone sees global times in their own time zone\.",
r"one or more people\.\.\.",
r"confirmation email",
r"invites remaining",
r"was too large; the maximum file size is 25MiB\.",
r"selected message",
r"a-z",
r"organization administrator",
r"user",
r"an unknown operating system",
r"Go to Settings",
r"Like Organization logo",
# SPECIAL CASES
# Enter is usually capitalized
r"Press Enter to send",
r"Send message on pressing Enter",
# Because topics usually are lower-case, this would look weird if it were capitalized
r"more topics",
# For consistency with "more topics"
r"more conversations",
# Capital 'i' looks weird in reminders popover
r"in 1 hour",
r"in 20 minutes",
r"in 3 hours",
# We should probably just delete this string from translations
r"activation key",
# these are used as topics
r"^new streams$",
r"^stream events$",
# These are used as example short names (e.g. an uncapitalized context):
r"^marketing$",
r"^cookie$",
r"^new_emoji$",
# Used to refer custom time limits
r"\bN\b",
# Capital c feels obtrusive in clear status option
@ -146,13 +88,6 @@ IGNORED_PHRASES = [
r"group private messages with \{recipient\}",
r"private messages with \{recipient\}",
r"private messages with yourself",
# TO CLEAN UP
# Just want to avoid churning login.html right now
r"or Choose a user",
# This is a parsing bug in the tool
r"argument ",
# I can't find this one
r"text",
r"GIF",
# Emoji name placeholder
r"leafy green vegetable",

View File

@ -7,12 +7,6 @@ from tools.lib.capitalization import check_capitalization, get_safe_text, is_cap
class GetSafeTextTestCase(TestCase):
def test_get_safe_text(self) -> None:
string = "Messages in __page_params.product_name__ go to a stream and have a topic."
safe_text = get_safe_text(string)
self.assertEqual(
safe_text, "Messages in __page_params_product_name__ go to a stream and have a topic."
)
string = "Zulip Zulip. Zulip some text!"
safe_text = get_safe_text(string)
self.assertEqual(safe_text, "Zulip zulip. Zulip some text!")
@ -29,10 +23,6 @@ class GetSafeTextTestCase(TestCase):
safe_text = get_safe_text(string)
self.assertEqual(safe_text, "Zulip zulip, zulip some text!")
string = "Some text 25MiB"
safe_text = get_safe_text(string)
self.assertEqual(safe_text, "Some text 25mib")
string = "Not Ignored Phrase"
safe_text = get_safe_text(string)
self.assertEqual(safe_text, "Not Ignored Phrase")
@ -58,18 +48,6 @@ class GetSafeTextTestCase(TestCase):
safe_text = get_safe_text(string)
self.assertEqual(safe_text, string)
string = "iPhone application"
safe_text = get_safe_text(string)
self.assertEqual(safe_text, "Iphone application")
string = "One two etc. three"
safe_text = get_safe_text(string)
self.assertEqual(safe_text, "One two etc_ three")
string = "One two etc. three. four"
safe_text = get_safe_text(string)
self.assertEqual(safe_text, "One two etc_ three. four")
class IsCapitalizedTestCase(TestCase):
def test_process_text(self) -> None:
@ -129,17 +107,10 @@ class CheckCapitalizationTestCase(TestCase):
"Zulip Zulip? Zulip some text!",
"Zulip Zulip! Zulip some text!",
"Zulip Zulip, Zulip some text!",
"Some number 25MiB",
"Not Ignored Phrase",
"Not ignored phrase",
"Some text with realm in it",
"Realm in capital case",
(
'<p class="bot-settings-note padded-container"> Looking for our '
'<a href="/integrations" target="_blank">Integrations</a> or '
'<a href="/api" target="_blank">API</a> '
"documentation? </p>"
),
]
errored, ignored, banned = check_capitalization(strings)
self.assertEqual(errored, ["Not Ignored Phrase"])
@ -151,13 +122,6 @@ class CheckCapitalizationTestCase(TestCase):
"Zulip Zulip? Zulip some text!",
"Zulip Zulip! Zulip some text!",
"Zulip Zulip, Zulip some text!",
"Some number 25MiB",
(
'<p class="bot-settings-note padded-container"> Looking '
'for our <a href="/integrations" target="_blank">'
'Integrations</a> or <a href="/api" '
'target="_blank">API</a> documentation? </p>'
),
]
),
)