From 076b0f06a2f3f84e0effd4bbc18a6318215f079c Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Tue, 22 Feb 2022 16:57:12 -0800 Subject: [PATCH] capitalization: Prune unused phrases from IGNORED_PHRASES. Signed-off-by: Anders Kaseorg --- tools/lib/capitalization.py | 65 ---------------------- tools/tests/test_capitalization_checker.py | 36 ------------ 2 files changed, 101 deletions(-) diff --git a/tools/lib/capitalization.py b/tools/lib/capitalization.py index ea06b12b28..f0784acfab 100644 --- a/tools/lib/capitalization.py +++ b/tools/lib/capitalization.py @@ -10,19 +10,14 @@ from bs4 import BeautifulSoup # this list without any modification. IGNORED_PHRASES = [ # Proper nouns and acronyms - r"Android", r"API", r"APNS", - r"App Store", r"Botserver", r"Cookie Bot", r"DevAuthBackend", - r"Dropbox", r"GCM", r"GitHub", - r"Google", r"Gravatar", - r"Hamlet", r"Help Center", r"HTTP", r"ID", @@ -31,50 +26,29 @@ IGNORED_PHRASES = [ r"JSON", r"Kerberos", r"LDAP", - r"Mac", - r"macOS", r"Markdown", - r"MiB", - r"OAuth", r"OTP", r"Pivotal", - r"Play Store", r"PM", r"PMs", - r"REMOTE_USER", r"Slack", - r"SSO", r"Terms of Service", r"Tuesday", r"URL", - r"Ubuntu", - r"Updown", r"UUID", - r"V5", r"Webathena", - r"Windows", r"WordPress", - r"XML", r"Zephyr", r"Zoom", r"Zulip", r"Zulip Account Security", r"Zulip Security", r"Zulip Cloud Standard", - r"Zulip Team", - r"iPhone", - r"iOS", - r"Emoji One", - r"mailinator\.com", - r"HQ", r"BigBlueButton", # Code things r"\.zuliprc", - r"__\w+\.\w+__", # Things using "I" r"I understand", - r"I say", - r"I want", r"I'm", r"I've", # Specific short words @@ -82,13 +56,7 @@ IGNORED_PHRASES = [ r"and", r"bot", r"e\.g\.", - r"etc\.", - r"images", r"enabled", - r"disabled", - r"zulip_org_id", - r"admins", - r"members", r"signups", # Placeholders r"keyword", @@ -96,49 +64,23 @@ IGNORED_PHRASES = [ r"user@example\.com", # Fragments of larger strings (r"your subscriptions on your Streams page"), - ( - r"Change notification settings for individual streams on your " - r'Streams page\.' - ), - ( - r"Looking for our " - r'Integrations or ' - r'API documentation\?' - ), - r'Most stream administration is done on the Streams page\.', r"Add global time
Everyone sees global times in their own time zone\.", - r"one or more people\.\.\.", - r"confirmation email", - r"invites remaining", - r"was too large; the maximum file size is 25MiB\.", - r"selected message", - r"a-z", - r"organization administrator", r"user", r"an unknown operating system", r"Go to Settings", - r"Like Organization logo", # SPECIAL CASES - # Enter is usually capitalized - r"Press Enter to send", - r"Send message on pressing Enter", # Because topics usually are lower-case, this would look weird if it were capitalized r"more topics", - # For consistency with "more topics" - r"more conversations", # Capital 'i' looks weird in reminders popover r"in 1 hour", r"in 20 minutes", r"in 3 hours", - # We should probably just delete this string from translations - r"activation key", # these are used as topics r"^new streams$", r"^stream events$", # These are used as example short names (e.g. an uncapitalized context): r"^marketing$", r"^cookie$", - r"^new_emoji$", # Used to refer custom time limits r"\bN\b", # Capital c feels obtrusive in clear status option @@ -146,13 +88,6 @@ IGNORED_PHRASES = [ r"group private messages with \{recipient\}", r"private messages with \{recipient\}", r"private messages with yourself", - # TO CLEAN UP - # Just want to avoid churning login.html right now - r"or Choose a user", - # This is a parsing bug in the tool - r"argument ", - # I can't find this one - r"text", r"GIF", # Emoji name placeholder r"leafy green vegetable", diff --git a/tools/tests/test_capitalization_checker.py b/tools/tests/test_capitalization_checker.py index 02bb7b6e6e..9e593ed02c 100644 --- a/tools/tests/test_capitalization_checker.py +++ b/tools/tests/test_capitalization_checker.py @@ -7,12 +7,6 @@ from tools.lib.capitalization import check_capitalization, get_safe_text, is_cap class GetSafeTextTestCase(TestCase): def test_get_safe_text(self) -> None: - string = "Messages in __page_params.product_name__ go to a stream and have a topic." - safe_text = get_safe_text(string) - self.assertEqual( - safe_text, "Messages in __page_params_product_name__ go to a stream and have a topic." - ) - string = "Zulip Zulip. Zulip some text!" safe_text = get_safe_text(string) self.assertEqual(safe_text, "Zulip zulip. Zulip some text!") @@ -29,10 +23,6 @@ class GetSafeTextTestCase(TestCase): safe_text = get_safe_text(string) self.assertEqual(safe_text, "Zulip zulip, zulip some text!") - string = "Some text 25MiB" - safe_text = get_safe_text(string) - self.assertEqual(safe_text, "Some text 25mib") - string = "Not Ignored Phrase" safe_text = get_safe_text(string) self.assertEqual(safe_text, "Not Ignored Phrase") @@ -58,18 +48,6 @@ class GetSafeTextTestCase(TestCase): safe_text = get_safe_text(string) self.assertEqual(safe_text, string) - string = "iPhone application" - safe_text = get_safe_text(string) - self.assertEqual(safe_text, "Iphone application") - - string = "One two etc. three" - safe_text = get_safe_text(string) - self.assertEqual(safe_text, "One two etc_ three") - - string = "One two etc. three. four" - safe_text = get_safe_text(string) - self.assertEqual(safe_text, "One two etc_ three. four") - class IsCapitalizedTestCase(TestCase): def test_process_text(self) -> None: @@ -129,17 +107,10 @@ class CheckCapitalizationTestCase(TestCase): "Zulip Zulip? Zulip some text!", "Zulip Zulip! Zulip some text!", "Zulip Zulip, Zulip some text!", - "Some number 25MiB", "Not Ignored Phrase", "Not ignored phrase", "Some text with realm in it", "Realm in capital case", - ( - '

Looking for our ' - 'Integrations or ' - 'API ' - "documentation?

" - ), ] errored, ignored, banned = check_capitalization(strings) self.assertEqual(errored, ["Not Ignored Phrase"]) @@ -151,13 +122,6 @@ class CheckCapitalizationTestCase(TestCase): "Zulip Zulip? Zulip some text!", "Zulip Zulip! Zulip some text!", "Zulip Zulip, Zulip some text!", - "Some number 25MiB", - ( - '

Looking ' - 'for our ' - 'Integrations or API documentation?

' - ), ] ), )