makemessages: Write with orjson.

orjson’s use of Unicode is more consistent with what we get from
Transifex.  (We could alternatively use json’s ensure_ascii=False
flag.)

Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
Anders Kaseorg 2024-11-13 16:30:36 -08:00 committed by Tim Abbott
parent 06a9600aa7
commit 2de648df02
5 changed files with 56 additions and 33 deletions

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import json
import os import os
import re import re
from subprocess import check_output from subprocess import check_output
import orjson
def get_json_filename(locale: str) -> str: def get_json_filename(locale: str) -> str:
return f"locale/{locale}/mobile.json" return f"locale/{locale}/mobile.json"
@ -23,8 +24,8 @@ def get_locales() -> list[str]:
def get_translation_stats(resource_path: str) -> dict[str, int]: def get_translation_stats(resource_path: str) -> dict[str, int]:
with open(resource_path) as raw_resource_file: with open(resource_path, "rb") as raw_resource_file:
raw_info = json.load(raw_resource_file) raw_info = orjson.loads(raw_resource_file.read())
total = len(raw_info) total = len(raw_info)
not_translated = len([i for i in raw_info.items() if i[1] == ""]) not_translated = len([i for i in raw_info.items() if i[1] == ""])
@ -41,8 +42,12 @@ for locale in get_locales():
locale_paths.append(path) locale_paths.append(path)
stats_path = os.path.join("locale", "mobile_info.json") stats_path = os.path.join("locale", "mobile_info.json")
with open(stats_path, "w") as f: with open(stats_path, "wb") as f:
json.dump(translation_stats, f, indent=2, sort_keys=True) f.write(
f.write("\n") orjson.dumps(
translation_stats,
option=orjson.OPT_APPEND_NEWLINE | orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS,
)
)
print("Mobile stats file created at: " + stats_path) print("Mobile stats file created at: " + stats_path)

View File

@ -2,10 +2,11 @@
import argparse import argparse
import html import html
import json
import sys import sys
from typing import NamedTuple from typing import NamedTuple
import orjson
class CLIArgs(NamedTuple): class CLIArgs(NamedTuple):
unescape_html: bool unescape_html: bool
@ -38,8 +39,8 @@ if __name__ == "__main__":
json_data: dict[str, str] = {} json_data: dict[str, str] = {}
with open(args.filename) as source: with open(args.filename, "rb") as source:
json_data = json.load(source) json_data = orjson.loads(source.read())
if args.unescape_html: if args.unescape_html:
for key, value in json_data.items(): for key, value in json_data.items():
@ -51,11 +52,13 @@ if __name__ == "__main__":
file=sys.stderr, file=sys.stderr,
) )
with open(args.filename, mode="w") as dest: with open(args.filename, mode="wb") as dest:
# At least on Linux systems with LANG=en_US.UTF-8, ensure_ascii=False # orjson ensures our output uses real UTF-8 codepoints for
# ensures our output uses real UTF-8 codepoints for human readability, # human readability, rather than \u0000 style escape
# rather than \u0000 style escape sequences, providing us a # sequences, providing us a somewhat-implicit JSON unescape.
# somewhat-implicit JSON unescape. This may behave in unexpected ways dest.write(
# on other OSes or system encodings. orjson.dumps(
json.dump(json_data, dest, ensure_ascii=False, indent=args.indent_level) json_data,
dest.write("\n") option=orjson.OPT_APPEND_NEWLINE | orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS,
)
)

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import json
import os import os
import re import re
from subprocess import check_output from subprocess import check_output
import orjson
LEGACY_STRINGS_MAP = { LEGACY_STRINGS_MAP = {
"<p>You are searching for messages that belong to more than one channel, which is not possible.</p>": "<p>You are searching for messages that belong to more than one stream, which is not possible.</p>", "<p>You are searching for messages that belong to more than one channel, which is not possible.</p>": "<p>You are searching for messages that belong to more than one stream, which is not possible.</p>",
"<strong>{name}</strong> <i>(guest)</i> is not subscribed to this channel. They will not be notified if you mention them.": "<strong>{name}</strong> <i>(guest)</i> is not subscribed to this stream. They will not be notified if you mention them.", "<strong>{name}</strong> <i>(guest)</i> is not subscribed to this channel. They will not be notified if you mention them.": "<strong>{name}</strong> <i>(guest)</i> is not subscribed to this stream. They will not be notified if you mention them.",
@ -200,8 +201,8 @@ def get_locales() -> list[str]:
def get_translations(path: str) -> dict[str, str]: def get_translations(path: str) -> dict[str, str]:
with open(path) as raw_resource_file: with open(path, "rb") as raw_resource_file:
translations = json.load(raw_resource_file) translations = orjson.loads(raw_resource_file.read())
return translations return translations
@ -226,9 +227,13 @@ def update_for_legacy_stream_translations(
# Only replace file content if we've made any updates for legacy # Only replace file content if we've made any updates for legacy
# translated strings. # translated strings.
if number_of_updates > 0: if number_of_updates > 0:
with open(path, "w") as f: with open(path, "wb") as f:
json.dump(updated_translations, f, ensure_ascii=False, indent=2, sort_keys=True) f.write(
f.write("\n") orjson.dumps(
updated_translations,
option=orjson.OPT_APPEND_NEWLINE | orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS,
)
)
print(f"Updated {number_of_updates} strings in: {path}") print(f"Updated {number_of_updates} strings in: {path}")

View File

@ -1,4 +1,3 @@
import json
import os import os
import re import re
import unicodedata import unicodedata
@ -137,9 +136,13 @@ class Command(compilemessages.Command):
info["percent_translated"] = percentage info["percent_translated"] = percentage
data["languages"].append(info) data["languages"].append(info)
with open(output_path, "w") as writer: with open(output_path, "wb") as writer:
json.dump(data, writer, indent=2, sort_keys=True) writer.write(
writer.write("\n") orjson.dumps(
data,
option=orjson.OPT_APPEND_NEWLINE | orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS,
)
)
def get_translation_percentage(self, locale_path: str, locale: str) -> int: def get_translation_percentage(self, locale_path: str, locale: str) -> int:
# backend stats # backend stats

View File

@ -34,13 +34,13 @@ https://stackoverflow.com/questions/2090717
import glob import glob
import itertools import itertools
import json
import os import os
import re import re
import subprocess import subprocess
from collections.abc import Collection, Iterator, Mapping from collections.abc import Collection, Iterator, Mapping
from typing import Any from typing import Any
import orjson
from django.core.management.base import CommandParser from django.core.management.base import CommandParser
from django.core.management.commands import makemessages from django.core.management.commands import makemessages
from django.template.base import BLOCK_TAG_END, BLOCK_TAG_START from django.template.base import BLOCK_TAG_END, BLOCK_TAG_START
@ -215,7 +215,7 @@ class Command(makemessages.Command):
"web/src/**/*.ts", "web/src/**/*.ts",
] ]
) )
translation_strings.extend(json.loads(extracted).values()) translation_strings.extend(orjson.loads(extracted).values())
return list(set(translation_strings)) return list(set(translation_strings))
@ -276,11 +276,18 @@ class Command(makemessages.Command):
for locale, output_path in zip(self.get_locales(), self.get_output_paths(), strict=False): for locale, output_path in zip(self.get_locales(), self.get_output_paths(), strict=False):
self.stdout.write(f"[frontend] processing locale {locale}") self.stdout.write(f"[frontend] processing locale {locale}")
try: try:
with open(output_path) as reader: with open(output_path, "rb") as reader:
old_strings = json.load(reader) old_strings = orjson.loads(reader.read())
except (OSError, ValueError): except (OSError, ValueError):
old_strings = {} old_strings = {}
new_strings = self.get_new_strings(old_strings, translation_strings, locale) new_strings = self.get_new_strings(old_strings, translation_strings, locale)
with open(output_path, "w") as writer: with open(output_path, "wb") as writer:
json.dump(new_strings, writer, indent=2, sort_keys=True) writer.write(
orjson.dumps(
new_strings,
option=orjson.OPT_APPEND_NEWLINE
| orjson.OPT_INDENT_2
| orjson.OPT_SORT_KEYS,
)
)