mirror of https://github.com/zulip/zulip.git
capitalization: Prune unused phrases from IGNORED_PHRASES.
Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
parent
cb9630e0db
commit
076b0f06a2
|
@ -10,19 +10,14 @@ from bs4 import BeautifulSoup
|
|||
# this list without any modification.
|
||||
IGNORED_PHRASES = [
|
||||
# Proper nouns and acronyms
|
||||
r"Android",
|
||||
r"API",
|
||||
r"APNS",
|
||||
r"App Store",
|
||||
r"Botserver",
|
||||
r"Cookie Bot",
|
||||
r"DevAuthBackend",
|
||||
r"Dropbox",
|
||||
r"GCM",
|
||||
r"GitHub",
|
||||
r"Google",
|
||||
r"Gravatar",
|
||||
r"Hamlet",
|
||||
r"Help Center",
|
||||
r"HTTP",
|
||||
r"ID",
|
||||
|
@ -31,50 +26,29 @@ IGNORED_PHRASES = [
|
|||
r"JSON",
|
||||
r"Kerberos",
|
||||
r"LDAP",
|
||||
r"Mac",
|
||||
r"macOS",
|
||||
r"Markdown",
|
||||
r"MiB",
|
||||
r"OAuth",
|
||||
r"OTP",
|
||||
r"Pivotal",
|
||||
r"Play Store",
|
||||
r"PM",
|
||||
r"PMs",
|
||||
r"REMOTE_USER",
|
||||
r"Slack",
|
||||
r"SSO",
|
||||
r"Terms of Service",
|
||||
r"Tuesday",
|
||||
r"URL",
|
||||
r"Ubuntu",
|
||||
r"Updown",
|
||||
r"UUID",
|
||||
r"V5",
|
||||
r"Webathena",
|
||||
r"Windows",
|
||||
r"WordPress",
|
||||
r"XML",
|
||||
r"Zephyr",
|
||||
r"Zoom",
|
||||
r"Zulip",
|
||||
r"Zulip Account Security",
|
||||
r"Zulip Security",
|
||||
r"Zulip Cloud Standard",
|
||||
r"Zulip Team",
|
||||
r"iPhone",
|
||||
r"iOS",
|
||||
r"Emoji One",
|
||||
r"mailinator\.com",
|
||||
r"HQ",
|
||||
r"BigBlueButton",
|
||||
# Code things
|
||||
r"\.zuliprc",
|
||||
r"__\w+\.\w+__",
|
||||
# Things using "I"
|
||||
r"I understand",
|
||||
r"I say",
|
||||
r"I want",
|
||||
r"I'm",
|
||||
r"I've",
|
||||
# Specific short words
|
||||
|
@ -82,13 +56,7 @@ IGNORED_PHRASES = [
|
|||
r"and",
|
||||
r"bot",
|
||||
r"e\.g\.",
|
||||
r"etc\.",
|
||||
r"images",
|
||||
r"enabled",
|
||||
r"disabled",
|
||||
r"zulip_org_id",
|
||||
r"admins",
|
||||
r"members",
|
||||
r"signups",
|
||||
# Placeholders
|
||||
r"keyword",
|
||||
|
@ -96,49 +64,23 @@ IGNORED_PHRASES = [
|
|||
r"user@example\.com",
|
||||
# Fragments of larger strings
|
||||
(r"your subscriptions on your Streams page"),
|
||||
(
|
||||
r"Change notification settings for individual streams on your "
|
||||
r'<a href="/#streams">Streams page</a>\.'
|
||||
),
|
||||
(
|
||||
r"Looking for our "
|
||||
r'<a href="/integrations" target="_blank">Integrations</a> or '
|
||||
r'<a href="/api" target="_blank">API</a> documentation\?'
|
||||
),
|
||||
r'Most stream administration is done on the <a href="/#streams">Streams page</a>\.',
|
||||
r"Add global time<br />Everyone sees global times in their own time zone\.",
|
||||
r"one or more people\.\.\.",
|
||||
r"confirmation email",
|
||||
r"invites remaining",
|
||||
r"was too large; the maximum file size is 25MiB\.",
|
||||
r"selected message",
|
||||
r"a-z",
|
||||
r"organization administrator",
|
||||
r"user",
|
||||
r"an unknown operating system",
|
||||
r"Go to Settings",
|
||||
r"Like Organization logo",
|
||||
# SPECIAL CASES
|
||||
# Enter is usually capitalized
|
||||
r"Press Enter to send",
|
||||
r"Send message on pressing Enter",
|
||||
# Because topics usually are lower-case, this would look weird if it were capitalized
|
||||
r"more topics",
|
||||
# For consistency with "more topics"
|
||||
r"more conversations",
|
||||
# Capital 'i' looks weird in reminders popover
|
||||
r"in 1 hour",
|
||||
r"in 20 minutes",
|
||||
r"in 3 hours",
|
||||
# We should probably just delete this string from translations
|
||||
r"activation key",
|
||||
# these are used as topics
|
||||
r"^new streams$",
|
||||
r"^stream events$",
|
||||
# These are used as example short names (e.g. an uncapitalized context):
|
||||
r"^marketing$",
|
||||
r"^cookie$",
|
||||
r"^new_emoji$",
|
||||
# Used to refer custom time limits
|
||||
r"\bN\b",
|
||||
# Capital c feels obtrusive in clear status option
|
||||
|
@ -146,13 +88,6 @@ IGNORED_PHRASES = [
|
|||
r"group private messages with \{recipient\}",
|
||||
r"private messages with \{recipient\}",
|
||||
r"private messages with yourself",
|
||||
# TO CLEAN UP
|
||||
# Just want to avoid churning login.html right now
|
||||
r"or Choose a user",
|
||||
# This is a parsing bug in the tool
|
||||
r"argument ",
|
||||
# I can't find this one
|
||||
r"text",
|
||||
r"GIF",
|
||||
# Emoji name placeholder
|
||||
r"leafy green vegetable",
|
||||
|
|
|
@ -7,12 +7,6 @@ from tools.lib.capitalization import check_capitalization, get_safe_text, is_cap
|
|||
|
||||
class GetSafeTextTestCase(TestCase):
|
||||
def test_get_safe_text(self) -> None:
|
||||
string = "Messages in __page_params.product_name__ go to a stream and have a topic."
|
||||
safe_text = get_safe_text(string)
|
||||
self.assertEqual(
|
||||
safe_text, "Messages in __page_params_product_name__ go to a stream and have a topic."
|
||||
)
|
||||
|
||||
string = "Zulip Zulip. Zulip some text!"
|
||||
safe_text = get_safe_text(string)
|
||||
self.assertEqual(safe_text, "Zulip zulip. Zulip some text!")
|
||||
|
@ -29,10 +23,6 @@ class GetSafeTextTestCase(TestCase):
|
|||
safe_text = get_safe_text(string)
|
||||
self.assertEqual(safe_text, "Zulip zulip, zulip some text!")
|
||||
|
||||
string = "Some text 25MiB"
|
||||
safe_text = get_safe_text(string)
|
||||
self.assertEqual(safe_text, "Some text 25mib")
|
||||
|
||||
string = "Not Ignored Phrase"
|
||||
safe_text = get_safe_text(string)
|
||||
self.assertEqual(safe_text, "Not Ignored Phrase")
|
||||
|
@ -58,18 +48,6 @@ class GetSafeTextTestCase(TestCase):
|
|||
safe_text = get_safe_text(string)
|
||||
self.assertEqual(safe_text, string)
|
||||
|
||||
string = "iPhone application"
|
||||
safe_text = get_safe_text(string)
|
||||
self.assertEqual(safe_text, "Iphone application")
|
||||
|
||||
string = "One two etc. three"
|
||||
safe_text = get_safe_text(string)
|
||||
self.assertEqual(safe_text, "One two etc_ three")
|
||||
|
||||
string = "One two etc. three. four"
|
||||
safe_text = get_safe_text(string)
|
||||
self.assertEqual(safe_text, "One two etc_ three. four")
|
||||
|
||||
|
||||
class IsCapitalizedTestCase(TestCase):
|
||||
def test_process_text(self) -> None:
|
||||
|
@ -129,17 +107,10 @@ class CheckCapitalizationTestCase(TestCase):
|
|||
"Zulip Zulip? Zulip some text!",
|
||||
"Zulip Zulip! Zulip some text!",
|
||||
"Zulip Zulip, Zulip some text!",
|
||||
"Some number 25MiB",
|
||||
"Not Ignored Phrase",
|
||||
"Not ignored phrase",
|
||||
"Some text with realm in it",
|
||||
"Realm in capital case",
|
||||
(
|
||||
'<p class="bot-settings-note padded-container"> Looking for our '
|
||||
'<a href="/integrations" target="_blank">Integrations</a> or '
|
||||
'<a href="/api" target="_blank">API</a> '
|
||||
"documentation? </p>"
|
||||
),
|
||||
]
|
||||
errored, ignored, banned = check_capitalization(strings)
|
||||
self.assertEqual(errored, ["Not Ignored Phrase"])
|
||||
|
@ -151,13 +122,6 @@ class CheckCapitalizationTestCase(TestCase):
|
|||
"Zulip Zulip? Zulip some text!",
|
||||
"Zulip Zulip! Zulip some text!",
|
||||
"Zulip Zulip, Zulip some text!",
|
||||
"Some number 25MiB",
|
||||
(
|
||||
'<p class="bot-settings-note padded-container"> Looking '
|
||||
'for our <a href="/integrations" target="_blank">'
|
||||
'Integrations</a> or <a href="/api" '
|
||||
'target="_blank">API</a> documentation? </p>'
|
||||
),
|
||||
]
|
||||
),
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue