2016-04-21 08:48:33 +02:00
|
|
|
"""
|
2020-09-18 20:43:19 +02:00
|
|
|
See https://zulip.readthedocs.io/en/latest/translating/internationalization.html
|
|
|
|
for background.
|
|
|
|
|
2016-04-21 08:48:33 +02:00
|
|
|
The contents of this file are taken from
|
2017-11-10 04:33:28 +01:00
|
|
|
https://github.com/niwinz/django-jinja/blob/master/django_jinja/management/commands/makemessages.py
|
2016-04-21 08:48:33 +02:00
|
|
|
|
|
|
|
Jinja2's i18n functionality is not exactly the same as Django's.
|
|
|
|
In particular, the tags names and their syntax are different:
|
|
|
|
|
|
|
|
1. The Django ``trans`` tag is replaced by a _() global.
|
|
|
|
2. The Django ``blocktrans`` tag is called ``trans``.
|
|
|
|
|
|
|
|
(1) isn't an issue, since the whole ``makemessages`` process is based on
|
|
|
|
converting the template tags to ``_()`` calls. However, (2) means that
|
|
|
|
those Jinja2 ``trans`` tags will not be picked up by Django's
|
|
|
|
``makemessages`` command.
|
|
|
|
|
|
|
|
There aren't any nice solutions here. While Jinja2's i18n extension does
|
|
|
|
come with extraction capabilities built in, the code behind ``makemessages``
|
|
|
|
unfortunately isn't extensible, so we can:
|
|
|
|
|
|
|
|
* Duplicate the command + code behind it.
|
|
|
|
* Offer a separate command for Jinja2 extraction.
|
|
|
|
* Try to get Django to offer hooks into makemessages().
|
|
|
|
* Monkey-patch.
|
|
|
|
|
|
|
|
We are currently doing that last thing. It turns out there we are lucky
|
|
|
|
for once: It's simply a matter of extending two regular expressions.
|
|
|
|
Credit for the approach goes to:
|
2020-03-27 01:32:21 +01:00
|
|
|
https://stackoverflow.com/questions/2090717
|
2016-04-21 08:48:33 +02:00
|
|
|
|
|
|
|
"""
|
2024-01-29 00:32:21 +01:00
|
|
|
|
2016-05-13 12:44:03 +02:00
|
|
|
import glob
|
2020-05-09 02:01:05 +02:00
|
|
|
import itertools
|
2017-11-16 00:43:27 +01:00
|
|
|
import os
|
|
|
|
import re
|
2021-04-10 09:28:57 +02:00
|
|
|
import subprocess
|
2024-07-12 02:30:25 +02:00
|
|
|
from collections.abc import Collection, Iterator, Mapping
|
|
|
|
from typing import Any
|
2016-05-13 12:44:03 +02:00
|
|
|
|
2024-11-14 01:30:36 +01:00
|
|
|
import orjson
|
2021-08-14 16:51:57 +02:00
|
|
|
from django.core.management.base import CommandParser
|
2017-11-16 00:43:27 +01:00
|
|
|
from django.core.management.commands import makemessages
|
|
|
|
from django.template.base import BLOCK_TAG_END, BLOCK_TAG_START
|
2017-07-10 07:17:42 +02:00
|
|
|
from django.utils.translation import template
|
2023-10-12 19:43:45 +02:00
|
|
|
from typing_extensions import override
|
2016-04-21 08:48:33 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
strip_whitespace_right = re.compile(
|
2024-04-26 20:30:22 +02:00
|
|
|
rf"({BLOCK_TAG_START}-?\s*(trans|pluralize).*?-{BLOCK_TAG_END})\s+"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
strip_whitespace_left = re.compile(
|
2024-04-26 20:30:22 +02:00
|
|
|
rf"\s+({BLOCK_TAG_START}-\s*(endtrans|pluralize).*?-?{BLOCK_TAG_END})"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
regexes = [
|
2024-07-03 10:10:39 +02:00
|
|
|
r"{{~?#tr}}([\s\S]*?)(?:~?{{/tr}}|{{~?#\*inline )", # '.' doesn't match '\n' by default
|
2023-09-08 17:35:35 +02:00
|
|
|
r'{{~?\s*t "([\s\S]*?)"\W*~?}}',
|
|
|
|
r"{{~?\s*t '([\s\S]*?)'\W*~?}}",
|
2023-03-16 01:32:15 +01:00
|
|
|
r'\(t "([\s\S]*?)"\)',
|
|
|
|
r'=\(t "([\s\S]*?)"\)(?=[^{]*}})',
|
|
|
|
r"=\(t '([\s\S]*?)'\)(?=[^{]*}})",
|
2021-02-12 08:19:30 +01:00
|
|
|
]
|
|
|
|
tags = [
|
2021-02-12 08:20:45 +01:00
|
|
|
("err_", "error"),
|
2021-02-12 08:19:30 +01:00
|
|
|
]
|
2016-05-13 12:44:03 +02:00
|
|
|
|
|
|
|
frontend_compiled_regexes = [re.compile(regex) for regex in regexes]
|
2018-07-02 00:05:24 +02:00
|
|
|
multiline_js_comment = re.compile(r"/\*.*?\*/", re.DOTALL)
|
2024-04-26 20:30:22 +02:00
|
|
|
singleline_js_comment = re.compile(r"//.*?\n")
|
2016-05-13 12:44:03 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-05-10 19:30:04 +02:00
|
|
|
def strip_whitespaces(src: str) -> str:
|
2021-02-12 08:20:45 +01:00
|
|
|
src = strip_whitespace_left.sub("\\1", src)
|
|
|
|
src = strip_whitespace_right.sub("\\1", src)
|
2016-04-21 08:48:33 +02:00
|
|
|
return src
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2016-04-21 08:48:33 +02:00
|
|
|
class Command(makemessages.Command):
|
2017-08-11 16:35:16 +02:00
|
|
|
xgettext_options = makemessages.Command.xgettext_options
|
|
|
|
for func, tag in tags:
|
2020-06-09 00:25:09 +02:00
|
|
|
xgettext_options += [f'--keyword={func}:1,"{tag}"']
|
2017-08-11 16:35:16 +02:00
|
|
|
|
2023-10-12 19:43:45 +02:00
|
|
|
@override
|
2021-08-14 16:51:57 +02:00
|
|
|
def add_arguments(self, parser: CommandParser) -> None:
|
2020-04-09 21:51:58 +02:00
|
|
|
super().add_arguments(parser)
|
2021-02-12 08:19:30 +01:00
|
|
|
parser.add_argument(
|
2021-02-12 08:20:45 +01:00
|
|
|
"--frontend-source",
|
2023-02-22 23:03:47 +01:00
|
|
|
default="web/templates",
|
2021-02-12 08:20:45 +01:00
|
|
|
help="Name of the Handlebars template directory",
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
2021-02-12 08:20:45 +01:00
|
|
|
"--frontend-output",
|
|
|
|
default="locale",
|
|
|
|
help="Name of the frontend messages output directory",
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
2021-02-12 08:20:45 +01:00
|
|
|
"--frontend-namespace",
|
|
|
|
default="translations.json",
|
|
|
|
help="Namespace of the frontend locale file",
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2016-05-13 12:44:03 +02:00
|
|
|
|
2023-10-12 19:43:45 +02:00
|
|
|
@override
|
2017-10-26 11:35:57 +02:00
|
|
|
def handle(self, *args: Any, **options: Any) -> None:
|
2016-05-13 12:44:03 +02:00
|
|
|
self.handle_django_locales(*args, **options)
|
2018-02-13 20:38:25 +01:00
|
|
|
self.handle_frontend_locales(**options)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def handle_frontend_locales(
|
|
|
|
self,
|
|
|
|
*,
|
|
|
|
frontend_source: str,
|
|
|
|
frontend_output: str,
|
|
|
|
frontend_namespace: str,
|
2024-07-12 02:30:17 +02:00
|
|
|
locale: list[str],
|
|
|
|
exclude: list[str],
|
2021-02-12 08:19:30 +01:00
|
|
|
all: bool,
|
|
|
|
**options: Any,
|
|
|
|
) -> None:
|
2018-02-13 20:38:25 +01:00
|
|
|
self.frontend_source = frontend_source
|
|
|
|
self.frontend_output = frontend_output
|
|
|
|
self.frontend_namespace = frontend_namespace
|
|
|
|
self.frontend_locale = locale
|
|
|
|
self.frontend_exclude = exclude
|
|
|
|
self.frontend_all = all
|
2016-05-13 12:44:03 +02:00
|
|
|
|
|
|
|
translation_strings = self.get_translation_strings()
|
|
|
|
self.write_translation_strings(translation_strings)
|
|
|
|
|
2017-10-26 11:35:57 +02:00
|
|
|
def handle_django_locales(self, *args: Any, **options: Any) -> None:
|
2017-07-10 07:17:42 +02:00
|
|
|
old_endblock_re = template.endblock_re
|
|
|
|
old_block_re = template.block_re
|
|
|
|
old_constant_re = template.constant_re
|
|
|
|
|
|
|
|
old_templatize = template.templatize
|
2016-04-21 08:48:33 +02:00
|
|
|
# Extend the regular expressions that are used to detect
|
|
|
|
# translation blocks with an "OR jinja-syntax" clause.
|
2017-07-10 07:17:42 +02:00
|
|
|
template.endblock_re = re.compile(
|
2024-04-26 20:30:22 +02:00
|
|
|
template.endblock_re.pattern + r"|" + r"""^-?\s*endtrans\s*-?$"""
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2017-07-10 07:17:42 +02:00
|
|
|
template.block_re = re.compile(
|
2024-04-26 20:30:22 +02:00
|
|
|
template.block_re.pattern + r"|" + r"""^-?\s*trans(?:\s+(?!'|")(?=.*?=.*?)|\s*-?$)"""
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2017-07-10 07:17:42 +02:00
|
|
|
template.plural_re = re.compile(
|
2024-04-26 20:30:22 +02:00
|
|
|
template.plural_re.pattern + r"|" + r"""^-?\s*pluralize(?:\s+.+|-?$)"""
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2017-07-10 07:17:42 +02:00
|
|
|
template.constant_re = re.compile(r"""_\(((?:".*?")|(?:'.*?')).*\)""")
|
2017-06-02 08:02:34 +02:00
|
|
|
|
2018-05-10 19:30:04 +02:00
|
|
|
def my_templatize(src: str, *args: Any, **kwargs: Any) -> str:
|
2016-04-21 08:48:33 +02:00
|
|
|
new_src = strip_whitespaces(src)
|
2017-06-02 08:02:34 +02:00
|
|
|
return old_templatize(new_src, *args, **kwargs)
|
2016-04-21 08:48:33 +02:00
|
|
|
|
2017-07-10 07:17:42 +02:00
|
|
|
template.templatize = my_templatize
|
2016-04-21 08:48:33 +02:00
|
|
|
|
|
|
|
try:
|
2021-02-12 08:20:45 +01:00
|
|
|
ignore_patterns = options.get("ignore_patterns", [])
|
|
|
|
ignore_patterns.append("docs/*")
|
2022-03-13 02:43:06 +01:00
|
|
|
ignore_patterns.append("templates/zerver/emails/custom/*")
|
2021-02-12 08:20:45 +01:00
|
|
|
ignore_patterns.append("var/*")
|
|
|
|
options["ignore_patterns"] = ignore_patterns
|
2017-10-27 08:28:23 +02:00
|
|
|
super().handle(*args, **options)
|
2016-04-21 08:48:33 +02:00
|
|
|
finally:
|
2017-07-10 07:17:42 +02:00
|
|
|
template.endblock_re = old_endblock_re
|
|
|
|
template.block_re = old_block_re
|
|
|
|
template.templatize = old_templatize
|
|
|
|
template.constant_re = old_constant_re
|
2016-05-13 12:44:03 +02:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def extract_strings(self, data: str) -> list[str]:
|
|
|
|
translation_strings: list[str] = []
|
2016-05-13 12:44:03 +02:00
|
|
|
for regex in frontend_compiled_regexes:
|
|
|
|
for match in regex.findall(data):
|
2017-03-27 23:25:43 +02:00
|
|
|
match = match.strip()
|
2021-02-12 08:20:45 +01:00
|
|
|
match = " ".join(line.strip() for line in match.splitlines())
|
2017-10-05 09:46:56 +02:00
|
|
|
translation_strings.append(match)
|
2016-05-13 12:44:03 +02:00
|
|
|
|
|
|
|
return translation_strings
|
|
|
|
|
2017-10-26 11:35:57 +02:00
|
|
|
def ignore_javascript_comments(self, data: str) -> str:
|
2016-10-10 09:31:00 +02:00
|
|
|
# Removes multi line comments.
|
2021-02-12 08:20:45 +01:00
|
|
|
data = multiline_js_comment.sub("", data)
|
2016-10-10 09:31:00 +02:00
|
|
|
# Removes single line (//) comments.
|
2021-02-12 08:20:45 +01:00
|
|
|
data = singleline_js_comment.sub("", data)
|
2016-10-10 09:31:00 +02:00
|
|
|
return data
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def get_translation_strings(self) -> list[str]:
|
|
|
|
translation_strings: list[str] = []
|
2016-05-13 12:44:03 +02:00
|
|
|
dirname = self.get_template_dir()
|
|
|
|
|
2016-10-05 01:36:26 +02:00
|
|
|
for dirpath, dirnames, filenames in os.walk(dirname):
|
2019-07-12 00:52:56 +02:00
|
|
|
for filename in [f for f in filenames if f.endswith(".hbs")]:
|
2021-02-12 08:20:45 +01:00
|
|
|
if filename.startswith("."):
|
2017-04-05 09:23:35 +02:00
|
|
|
continue
|
2020-04-09 21:51:58 +02:00
|
|
|
with open(os.path.join(dirpath, filename)) as reader:
|
2016-05-13 12:44:03 +02:00
|
|
|
data = reader.read()
|
2017-10-05 09:46:56 +02:00
|
|
|
translation_strings.extend(self.extract_strings(data))
|
2021-02-12 08:19:30 +01:00
|
|
|
for dirpath, dirnames, filenames in itertools.chain(
|
2023-02-22 23:03:47 +01:00
|
|
|
os.walk("web/src"), os.walk("web/shared/src")
|
2021-02-12 08:19:30 +01:00
|
|
|
):
|
2023-02-09 00:16:37 +01:00
|
|
|
for filename in [f for f in filenames if f.endswith((".js", ".ts"))]:
|
2021-02-12 08:20:45 +01:00
|
|
|
if filename.startswith("."):
|
2020-05-09 02:01:05 +02:00
|
|
|
continue
|
|
|
|
with open(os.path.join(dirpath, filename)) as reader:
|
2016-06-13 09:52:10 +02:00
|
|
|
data = reader.read()
|
2017-08-28 21:13:49 +02:00
|
|
|
data = self.ignore_javascript_comments(data)
|
2017-10-05 09:46:56 +02:00
|
|
|
translation_strings.extend(self.extract_strings(data))
|
2016-06-13 09:52:10 +02:00
|
|
|
|
2021-04-10 09:28:57 +02:00
|
|
|
extracted = subprocess.check_output(
|
|
|
|
[
|
|
|
|
"node_modules/.bin/formatjs",
|
|
|
|
"extract",
|
|
|
|
"--additional-function-names=$t,$t_html",
|
|
|
|
"--format=simple",
|
|
|
|
"--ignore=**/*.d.ts",
|
2023-02-22 23:03:47 +01:00
|
|
|
"web/src/**/*.js",
|
|
|
|
"web/src/**/*.ts",
|
2021-04-10 09:28:57 +02:00
|
|
|
]
|
|
|
|
)
|
2024-11-14 01:30:36 +01:00
|
|
|
translation_strings.extend(orjson.loads(extracted).values())
|
2021-04-10 09:28:57 +02:00
|
|
|
|
2017-10-05 09:46:56 +02:00
|
|
|
return list(set(translation_strings))
|
2016-05-13 12:44:03 +02:00
|
|
|
|
2017-10-26 11:35:57 +02:00
|
|
|
def get_template_dir(self) -> str:
|
2016-05-13 12:44:03 +02:00
|
|
|
return self.frontend_source
|
|
|
|
|
2017-10-26 11:35:57 +02:00
|
|
|
def get_namespace(self) -> str:
|
2016-05-13 12:44:03 +02:00
|
|
|
return self.frontend_namespace
|
|
|
|
|
2021-04-30 00:15:33 +02:00
|
|
|
def get_locales(self) -> Collection[str]:
|
2016-05-13 12:44:03 +02:00
|
|
|
locale = self.frontend_locale
|
|
|
|
exclude = self.frontend_exclude
|
|
|
|
process_all = self.frontend_all
|
|
|
|
|
2022-06-13 21:28:01 +02:00
|
|
|
# After calling super().handle(), default_locale_path gets set on self
|
2022-10-08 06:10:17 +02:00
|
|
|
# so that we can reuse it here.
|
2023-12-07 22:40:33 +01:00
|
|
|
default_locale_path = self.default_locale_path
|
2022-06-13 21:28:01 +02:00
|
|
|
paths = glob.glob(f"{default_locale_path}/*")
|
2016-05-23 16:34:48 +02:00
|
|
|
all_locales = [os.path.basename(path) for path in paths if os.path.isdir(path)]
|
2016-05-13 12:44:03 +02:00
|
|
|
|
|
|
|
# Account for excluded locales
|
|
|
|
if process_all:
|
2016-05-23 16:34:48 +02:00
|
|
|
return all_locales
|
2016-05-13 12:44:03 +02:00
|
|
|
else:
|
|
|
|
locales = locale or all_locales
|
2016-05-23 16:34:48 +02:00
|
|
|
return set(locales) - set(exclude)
|
2016-05-13 12:44:03 +02:00
|
|
|
|
2017-10-26 11:35:57 +02:00
|
|
|
def get_base_path(self) -> str:
|
2016-05-13 12:44:03 +02:00
|
|
|
return self.frontend_output
|
|
|
|
|
2020-06-23 08:03:47 +02:00
|
|
|
def get_output_paths(self) -> Iterator[str]:
|
2016-05-13 12:44:03 +02:00
|
|
|
base_path = self.get_base_path()
|
|
|
|
locales = self.get_locales()
|
|
|
|
for path in [os.path.join(base_path, locale) for locale in locales]:
|
|
|
|
if not os.path.exists(path):
|
|
|
|
os.makedirs(path)
|
|
|
|
|
|
|
|
yield os.path.join(path, self.get_namespace())
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def get_new_strings(
|
2024-07-12 02:30:17 +02:00
|
|
|
self, old_strings: Mapping[str, str], translation_strings: list[str], locale: str
|
|
|
|
) -> dict[str, str]:
|
2016-05-13 12:44:03 +02:00
|
|
|
"""
|
|
|
|
Missing strings are removed, new strings are added and already
|
|
|
|
translated strings are not touched.
|
|
|
|
"""
|
2017-05-31 23:44:59 +02:00
|
|
|
new_strings = {} # Dict[str, str]
|
2016-05-13 12:44:03 +02:00
|
|
|
for k in translation_strings:
|
2021-02-12 08:20:45 +01:00
|
|
|
if locale == "en":
|
2017-10-04 10:09:24 +02:00
|
|
|
# For English language, translation is equal to the key.
|
|
|
|
new_strings[k] = old_strings.get(k, k)
|
|
|
|
else:
|
|
|
|
new_strings[k] = old_strings.get(k, "")
|
2016-05-13 12:44:03 +02:00
|
|
|
|
|
|
|
return new_strings
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def write_translation_strings(self, translation_strings: list[str]) -> None:
|
2024-07-12 02:30:30 +02:00
|
|
|
for locale, output_path in zip(self.get_locales(), self.get_output_paths(), strict=False):
|
2020-06-09 00:25:09 +02:00
|
|
|
self.stdout.write(f"[frontend] processing locale {locale}")
|
2016-05-13 12:44:03 +02:00
|
|
|
try:
|
2024-11-14 01:30:36 +01:00
|
|
|
with open(output_path, "rb") as reader:
|
|
|
|
old_strings = orjson.loads(reader.read())
|
2020-04-09 21:51:58 +02:00
|
|
|
except (OSError, ValueError):
|
2016-05-13 12:44:03 +02:00
|
|
|
old_strings = {}
|
|
|
|
|
2023-03-23 21:16:47 +01:00
|
|
|
new_strings = self.get_new_strings(old_strings, translation_strings, locale)
|
2024-11-14 01:30:36 +01:00
|
|
|
with open(output_path, "wb") as writer:
|
|
|
|
writer.write(
|
|
|
|
orjson.dumps(
|
|
|
|
new_strings,
|
|
|
|
option=orjson.OPT_APPEND_NEWLINE
|
|
|
|
| orjson.OPT_INDENT_2
|
|
|
|
| orjson.OPT_SORT_KEYS,
|
|
|
|
)
|
|
|
|
)
|