From afa218fa2a9516caf68a6e24dbb0728bf20a3a7f Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Mon, 20 Mar 2023 23:10:20 -0700 Subject: [PATCH] semgrep: Detect some unsafe uses of markupsafe.Markup. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the built-in HTML escaping of Markup("…{var}…").format(), in order to allow Semgrep to detect mistakes like Markup("…{var}…".format()) and Markup(f"…{var}…"). Signed-off-by: Anders Kaseorg --- analytics/views/activity_common.py | 21 ++++++++------------- analytics/views/installation_activity.py | 17 ++++++++++------- tools/semgrep.yml | 13 +++++++++++++ zerver/forms.py | 4 ++-- zerver/lib/upload/base.py | 3 +-- zerver/views/auth.py | 6 ++---- 6 files changed, 36 insertions(+), 28 deletions(-) diff --git a/analytics/views/activity_common.py b/analytics/views/activity_common.py index 6b46f225d9..4aae757e93 100644 --- a/analytics/views/activity_common.py +++ b/analytics/views/activity_common.py @@ -1,7 +1,6 @@ import re import sys from datetime import datetime -from html import escape from typing import Any, Collection, Dict, List, Optional, Sequence from urllib.parse import urlencode @@ -63,46 +62,42 @@ def user_activity_link(email: str, user_profile_id: int) -> Markup: from analytics.views.user_activity import get_user_activity url = reverse(get_user_activity, kwargs=dict(user_profile_id=user_profile_id)) - email_link = f'{escape(email)}' - return Markup(email_link) + return Markup('{email}').format(url=url, email=email) def realm_activity_link(realm_str: str) -> Markup: from analytics.views.realm_activity import get_realm_activity url = reverse(get_realm_activity, kwargs=dict(realm_str=realm_str)) - realm_link = f'{escape(realm_str)}' - return Markup(realm_link) + return Markup('{realm_str}').format(url=url, realm_str=realm_str) def realm_stats_link(realm_str: str) -> Markup: from analytics.views.stats import stats_for_realm url = reverse(stats_for_realm, kwargs=dict(realm_str=realm_str)) - stats_link = f'' - return Markup(stats_link) + return Markup('').format(url=url) def realm_support_link(realm_str: str) -> Markup: support_url = reverse("support") query = urlencode({"q": realm_str}) url = append_url_query_string(support_url, query) - support_link = f'{escape(realm_str)}' - return Markup(support_link) + return Markup('{realm_str}').format(url=url, realm_str=realm_str) def realm_url_link(realm_str: str) -> Markup: url = get_realm(realm_str).uri - realm_link = f'' - return Markup(realm_link) + return Markup('').format(url=url) def remote_installation_stats_link(server_id: int, hostname: str) -> Markup: from analytics.views.stats import stats_for_remote_installation url = reverse(stats_for_remote_installation, kwargs=dict(remote_server_id=server_id)) - stats_link = f'{escape(hostname)}' - return Markup(stats_link) + return Markup('{hostname}').format( + url=url, hostname=hostname + ) def get_user_activity_summary(records: Collection[UserActivity]) -> Dict[str, Any]: diff --git a/analytics/views/installation_activity.py b/analytics/views/installation_activity.py index 437211d6fc..e9217076c0 100644 --- a/analytics/views/installation_activity.py +++ b/analytics/views/installation_activity.py @@ -38,7 +38,7 @@ if settings.BILLING_ENABLED: ) -def get_realm_day_counts() -> Dict[str, Dict[str, str]]: +def get_realm_day_counts() -> Dict[str, Dict[str, Markup]]: query = SQL( """ select @@ -78,7 +78,7 @@ def get_realm_day_counts() -> Dict[str, Dict[str, str]]: min_cnt = min(raw_cnts[1:]) max_cnt = max(raw_cnts[1:]) - def format_count(cnt: int, style: Optional[str] = None) -> str: + def format_count(cnt: int, style: Optional[str] = None) -> Markup: if style is not None: good_bad = style elif cnt == min_cnt: @@ -88,9 +88,11 @@ def get_realm_day_counts() -> Dict[str, Dict[str, str]]: else: good_bad = "neutral" - return f'{cnt}' + return Markup('{cnt}').format( + good_bad=good_bad, cnt=cnt + ) - cnts = format_count(raw_cnts[0], "neutral") + "".join(map(format_count, raw_cnts[1:])) + cnts = format_count(raw_cnts[0], "neutral") + Markup().join(map(format_count, raw_cnts[1:])) result[string_id] = dict(cnts=cnts) return result @@ -304,7 +306,8 @@ def user_activity_intervals() -> Tuple[Markup, Dict[str, float]]: day_end = timestamp_to_datetime(time.time()) day_start = day_end - timedelta(hours=24) - output = "Per-user online duration for the last 24 hours:\n" + output = Markup() + output += "Per-user online duration for the last 24 hours:\n" total_duration = timedelta(0) all_intervals = ( @@ -335,7 +338,7 @@ def user_activity_intervals() -> Tuple[Markup, Dict[str, float]]: for string_id, realm_intervals in itertools.groupby(all_intervals, by_string_id): realm_duration = timedelta(0) - output += f"
{string_id}\n" + output += Markup("
") + f"{string_id}\n" for email, intervals in itertools.groupby(realm_intervals, by_email): duration = timedelta(0) for interval in intervals: @@ -352,7 +355,7 @@ def user_activity_intervals() -> Tuple[Markup, Dict[str, float]]: output += f"\nTotal duration: {total_duration}\n" output += f"\nTotal duration in minutes: {total_duration.total_seconds() / 60.}\n" output += f"Total duration amortized to a month: {total_duration.total_seconds() * 30. / 60.}" - content = Markup("
" + output + "
") + content = Markup("
{}
").format(output) return content, realm_minutes diff --git a/tools/semgrep.yml b/tools/semgrep.yml index fc5c83e2f2..53379ba92c 100644 --- a/tools/semgrep.yml +++ b/tools/semgrep.yml @@ -42,17 +42,30 @@ rules: - zerver/migrations/0387_reupload_realmemoji_again.py - pgroonga/migrations/0002_html_escape_subject.py + - id: html-format + languages: [python] + pattern-either: + - pattern: markupsafe.Markup(... .format(...)) + - pattern: markupsafe.Markup(f"...") + - pattern: markupsafe.Markup(... + ...) + severity: ERROR + message: "Do not write an HTML injection vulnerability please" + - id: sql-format languages: [python] pattern-either: - pattern: ... .execute("...".format(...)) - pattern: ... .execute(f"...") + - pattern: ... .execute(... + ...) - pattern: psycopg2.sql.SQL(... .format(...)) - pattern: psycopg2.sql.SQL(f"...") + - pattern: psycopg2.sql.SQL(... + ...) - pattern: django.db.migrations.RunSQL(..., "..." .format(...), ...) - pattern: django.db.migrations.RunSQL(..., f"...", ...) + - pattern: django.db.migrations.RunSQL(..., ... + ..., ...) - pattern: django.db.migrations.RunSQL(..., [..., "..." .format(...), ...], ...) - pattern: django.db.migrations.RunSQL(..., [..., f"...", ...], ...) + - pattern: django.db.migrations.RunSQL(..., [..., ... + ..., ...], ...) severity: ERROR message: "Do not write a SQL injection vulnerability please" diff --git a/zerver/forms.py b/zerver/forms.py index 37562f1c36..11fc3e0bbf 100644 --- a/zerver/forms.py +++ b/zerver/forms.py @@ -51,7 +51,7 @@ if settings.BILLING_ENABLED: # We don't mark this error for translation, because it's displayed # only to MIT users. -MIT_VALIDATION_ERROR = ( +MIT_VALIDATION_ERROR = Markup( "That user does not exist at MIT or is a" ' mailing list.' " If you want to sign up an alias for Zulip," @@ -76,7 +76,7 @@ def email_is_not_mit_mailing_list(email: str) -> None: if e.rcode == DNS.Status.NXDOMAIN: # This error is Markup only because 1. it needs to render HTML # 2. It's not formatted with any user input. - raise ValidationError(Markup(MIT_VALIDATION_ERROR)) + raise ValidationError(MIT_VALIDATION_ERROR) else: raise AssertionError("Unexpected DNS error") diff --git a/zerver/lib/upload/base.py b/zerver/lib/upload/base.py index 33853e2e4d..77c3383352 100644 --- a/zerver/lib/upload/base.py +++ b/zerver/lib/upload/base.py @@ -6,7 +6,6 @@ from datetime import datetime from typing import IO, Any, Callable, Iterator, List, Optional, Tuple from django.utils.translation import gettext as _ -from markupsafe import Markup from PIL import GifImagePlugin, Image, ImageOps, PngImagePlugin from PIL.Image import DecompressionBombError @@ -52,7 +51,7 @@ def sanitize_name(value: str) -> str: value = re.sub(r"[^\w\s.-]", "", value).strip() value = re.sub(r"[-\s]+", "-", value) assert value not in {"", ".", ".."} - return Markup(value) + return value class BadImageError(JsonableError): diff --git a/zerver/views/auth.py b/zerver/views/auth.py index 6613bc1aa7..9f09c1555d 100644 --- a/zerver/views/auth.py +++ b/zerver/views/auth.py @@ -19,12 +19,10 @@ from django.http import HttpRequest, HttpResponse, HttpResponseRedirect, HttpRes from django.shortcuts import redirect, render from django.template.response import SimpleTemplateResponse, TemplateResponse from django.urls import reverse -from django.utils.html import escape from django.utils.http import url_has_allowed_host_and_scheme from django.utils.translation import gettext as _ from django.views.decorators.csrf import csrf_exempt from django.views.decorators.http import require_safe -from markupsafe import Markup from social_django.utils import load_backend, load_strategy from two_factor.forms import BackupTokenForm from two_factor.views import LoginView as BaseTwoFactorLoginView @@ -719,8 +717,8 @@ def update_login_page_context(request: HttpRequest, context: Dict[str, Any]) -> return try: validate_email(deactivated_email) - context["deactivated_account_error"] = Markup( - DEACTIVATED_ACCOUNT_ERROR.format(username=escape(deactivated_email)) + context["deactivated_account_error"] = DEACTIVATED_ACCOUNT_ERROR.format( + username=deactivated_email ) except ValidationError: logging.info("Invalid email in is_deactivated param to login page: %s", deactivated_email)