makemessages: Write with orjson.

orjson’s use of Unicode is more consistent with what we get from
Transifex.  (We could alternatively use json’s ensure_ascii=False
flag.)

Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
Anders Kaseorg 2024-11-13 16:30:36 -08:00 committed by Tim Abbott
parent 06a9600aa7
commit 2de648df02
5 changed files with 56 additions and 33 deletions

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python3
import json
import os
import re
from subprocess import check_output
import orjson
def get_json_filename(locale: str) -> str:
return f"locale/{locale}/mobile.json"
@ -23,8 +24,8 @@ def get_locales() -> list[str]:
def get_translation_stats(resource_path: str) -> dict[str, int]:
with open(resource_path) as raw_resource_file:
raw_info = json.load(raw_resource_file)
with open(resource_path, "rb") as raw_resource_file:
raw_info = orjson.loads(raw_resource_file.read())
total = len(raw_info)
not_translated = len([i for i in raw_info.items() if i[1] == ""])
@ -41,8 +42,12 @@ for locale in get_locales():
locale_paths.append(path)
stats_path = os.path.join("locale", "mobile_info.json")
with open(stats_path, "w") as f:
json.dump(translation_stats, f, indent=2, sort_keys=True)
f.write("\n")
with open(stats_path, "wb") as f:
f.write(
orjson.dumps(
translation_stats,
option=orjson.OPT_APPEND_NEWLINE | orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS,
)
)
print("Mobile stats file created at: " + stats_path)

View File

@ -2,10 +2,11 @@
import argparse
import html
import json
import sys
from typing import NamedTuple
import orjson
class CLIArgs(NamedTuple):
unescape_html: bool
@ -38,8 +39,8 @@ if __name__ == "__main__":
json_data: dict[str, str] = {}
with open(args.filename) as source:
json_data = json.load(source)
with open(args.filename, "rb") as source:
json_data = orjson.loads(source.read())
if args.unescape_html:
for key, value in json_data.items():
@ -51,11 +52,13 @@ if __name__ == "__main__":
file=sys.stderr,
)
with open(args.filename, mode="w") as dest:
# At least on Linux systems with LANG=en_US.UTF-8, ensure_ascii=False
# ensures our output uses real UTF-8 codepoints for human readability,
# rather than \u0000 style escape sequences, providing us a
# somewhat-implicit JSON unescape. This may behave in unexpected ways
# on other OSes or system encodings.
json.dump(json_data, dest, ensure_ascii=False, indent=args.indent_level)
dest.write("\n")
with open(args.filename, mode="wb") as dest:
# orjson ensures our output uses real UTF-8 codepoints for
# human readability, rather than \u0000 style escape
# sequences, providing us a somewhat-implicit JSON unescape.
dest.write(
orjson.dumps(
json_data,
option=orjson.OPT_APPEND_NEWLINE | orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS,
)
)

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python3
import json
import os
import re
from subprocess import check_output
import orjson
LEGACY_STRINGS_MAP = {
"<p>You are searching for messages that belong to more than one channel, which is not possible.</p>": "<p>You are searching for messages that belong to more than one stream, which is not possible.</p>",
"<strong>{name}</strong> <i>(guest)</i> is not subscribed to this channel. They will not be notified if you mention them.": "<strong>{name}</strong> <i>(guest)</i> is not subscribed to this stream. They will not be notified if you mention them.",
@ -200,8 +201,8 @@ def get_locales() -> list[str]:
def get_translations(path: str) -> dict[str, str]:
with open(path) as raw_resource_file:
translations = json.load(raw_resource_file)
with open(path, "rb") as raw_resource_file:
translations = orjson.loads(raw_resource_file.read())
return translations
@ -226,9 +227,13 @@ def update_for_legacy_stream_translations(
# Only replace file content if we've made any updates for legacy
# translated strings.
if number_of_updates > 0:
with open(path, "w") as f:
json.dump(updated_translations, f, ensure_ascii=False, indent=2, sort_keys=True)
f.write("\n")
with open(path, "wb") as f:
f.write(
orjson.dumps(
updated_translations,
option=orjson.OPT_APPEND_NEWLINE | orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS,
)
)
print(f"Updated {number_of_updates} strings in: {path}")

View File

@ -1,4 +1,3 @@
import json
import os
import re
import unicodedata
@ -137,9 +136,13 @@ class Command(compilemessages.Command):
info["percent_translated"] = percentage
data["languages"].append(info)
with open(output_path, "w") as writer:
json.dump(data, writer, indent=2, sort_keys=True)
writer.write("\n")
with open(output_path, "wb") as writer:
writer.write(
orjson.dumps(
data,
option=orjson.OPT_APPEND_NEWLINE | orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS,
)
)
def get_translation_percentage(self, locale_path: str, locale: str) -> int:
# backend stats

View File

@ -34,13 +34,13 @@ https://stackoverflow.com/questions/2090717
import glob
import itertools
import json
import os
import re
import subprocess
from collections.abc import Collection, Iterator, Mapping
from typing import Any
import orjson
from django.core.management.base import CommandParser
from django.core.management.commands import makemessages
from django.template.base import BLOCK_TAG_END, BLOCK_TAG_START
@ -215,7 +215,7 @@ class Command(makemessages.Command):
"web/src/**/*.ts",
]
)
translation_strings.extend(json.loads(extracted).values())
translation_strings.extend(orjson.loads(extracted).values())
return list(set(translation_strings))
@ -276,11 +276,18 @@ class Command(makemessages.Command):
for locale, output_path in zip(self.get_locales(), self.get_output_paths(), strict=False):
self.stdout.write(f"[frontend] processing locale {locale}")
try:
with open(output_path) as reader:
old_strings = json.load(reader)
with open(output_path, "rb") as reader:
old_strings = orjson.loads(reader.read())
except (OSError, ValueError):
old_strings = {}
new_strings = self.get_new_strings(old_strings, translation_strings, locale)
with open(output_path, "w") as writer:
json.dump(new_strings, writer, indent=2, sort_keys=True)
with open(output_path, "wb") as writer:
writer.write(
orjson.dumps(
new_strings,
option=orjson.OPT_APPEND_NEWLINE
| orjson.OPT_INDENT_2
| orjson.OPT_SORT_KEYS,
)
)