mirror of https://github.com/zulip/zulip.git
pysa: Introduce sanitizers, models, and inline marking safe.
This commit adds three `.pysa` model files: `false_positives.pysa` for ruling out false positive flows with `Sanitize` annotations, `req_lib.pysa` for educating pysa about Zulip's `REQ()` pattern for extracting user input, and `redirects.pysa` for capturing the risk of open redirects within Zulip code. Additionally, this commit introduces `mark_sanitized`, an identity function which can be used to selectively clear taint in cases where `Sanitize` models will not work. This commit also puts `mark_sanitized` to work removing known false postive flows.
This commit is contained in:
parent
89131bfcbb
commit
461d5b1a3e
|
@ -0,0 +1,59 @@
|
|||
# This function ensures that a redirect is only within the specified domain.
|
||||
# Assuming that the domain isn't attacker controllable, the result is safe to
|
||||
# redirect to
|
||||
def zerver.views.auth.get_safe_redirect_to(url, redirect_host) -> Sanitize: ...
|
||||
|
||||
# This function was previously the source of an open redirect, but has now been
|
||||
# reviewed and patched, so the output should now be safe to redirect to,
|
||||
# regardless of the value of the specified 'path'.
|
||||
def zerver.lib.thumbnail.generate_thumbnail_url(
|
||||
path,
|
||||
size=...,
|
||||
is_camo_url=...
|
||||
) -> Sanitize: ...
|
||||
|
||||
# This function returns a version of name that only contains word and space
|
||||
# characters, or ., -, _ characters. This should be safe to put into URLs and
|
||||
# filesystem operations.
|
||||
def zerver.lib.upload.sanitize_name(value) -> Sanitize: ...
|
||||
|
||||
# This function accepts two integers and then concatenates them into a path
|
||||
# segment. The result should be safe for use in filesystem and other operations.
|
||||
def zerver.lib.avatar_hash.user_avatar_path_from_ids(user_profile_id, realm_id) -> Sanitize: ...
|
||||
|
||||
# This function creates a list of 'UserMessageLite' objects, which contain only
|
||||
# integral IDs and flags. These should safe for use with SQL and other
|
||||
# operations.
|
||||
def zerver.lib.actions.create_user_messages(
|
||||
message,
|
||||
um_eligible_user_ids,
|
||||
long_term_idle_user_ids,
|
||||
stream_push_user_ids,
|
||||
stream_email_user_ids,
|
||||
mentioned_user_ids,
|
||||
mark_as_read
|
||||
) -> Sanitize: ...
|
||||
|
||||
# This function is an identity function used for removing taint from variables
|
||||
# when there is no convenient way to do it by annotating existing functions.
|
||||
def zerver.lib.pysa.mark_sanitized(arg) -> Sanitize: ...
|
||||
|
||||
############################
|
||||
# Overbroad Approximations #
|
||||
############################
|
||||
|
||||
# Note that the below functions are overbroad approximations of Sanitizers and
|
||||
# could lead to false negatives. They should be replaced with more specific
|
||||
# feature-based filtering when that is available through SAPP.
|
||||
|
||||
# This function generates a URL pointing to a valid Django endpoint, with
|
||||
# arguments properly URL encoded. The resulting URL can usually be used as a
|
||||
# part of a redirect or HTTP request without fear of open redirect or SSRF
|
||||
# vulnerabilities respectively.
|
||||
def django.urls.base.reverse(
|
||||
viewname,
|
||||
urlconf=...,
|
||||
args=...,
|
||||
kwargs=...,
|
||||
current_app=...
|
||||
) -> Sanitize: ...
|
|
@ -0,0 +1,4 @@
|
|||
# One of the ways user-controlled data enters the application is through the
|
||||
# request variables framework. This model teaches Pysa that every instance of
|
||||
# 'REQ()' in a view function is a source of UserControlled taint.
|
||||
class zerver.lib.request._REQ(TaintSource[UserControlled]): ...
|
|
@ -50,6 +50,7 @@ from zerver.lib.message import (
|
|||
truncate_body,
|
||||
truncate_topic,
|
||||
)
|
||||
from zerver.lib.pysa import mark_sanitized
|
||||
from zerver.lib.realm_icon import realm_icon_url
|
||||
from zerver.lib.realm_logo import get_realm_logo_data
|
||||
from zerver.lib.retention import move_messages_to_archive
|
||||
|
@ -5146,6 +5147,11 @@ def check_add_realm_emoji(realm: Realm,
|
|||
realm_emoji.save()
|
||||
|
||||
emoji_file_name = get_emoji_file_name(image_file.name, realm_emoji.id)
|
||||
|
||||
# The only user-controlled portion of 'emoji_file_name' is an extension,
|
||||
# which can not contain '..' or '/' or '\', making it difficult to exploit
|
||||
emoji_file_name = mark_sanitized(emoji_file_name)
|
||||
|
||||
emoji_uploaded_successfully = False
|
||||
try:
|
||||
upload_emoji_image(image_file, emoji_file_name, author)
|
||||
|
|
|
@ -31,6 +31,8 @@ from zerver.models import UserProfile, Realm, Client, Huddle, Stream, \
|
|||
CustomProfileFieldValue, get_display_recipient, Attachment, get_system_bot, \
|
||||
RealmAuditLog, UserHotspot, MutedTopic, Service, UserGroup, \
|
||||
UserGroupMembership, BotStorageData, BotConfigData
|
||||
from zerver.lib.pysa import mark_sanitized
|
||||
|
||||
import zerver.lib.upload
|
||||
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, \
|
||||
Union
|
||||
|
@ -1239,7 +1241,11 @@ def _save_s3_object_to_file(key: ServiceResource, output_dir: str, processing_av
|
|||
if "../" in filename:
|
||||
raise AssertionError(f"Suspicious file with invalid format {filename}")
|
||||
|
||||
dirname = os.path.dirname(filename)
|
||||
# Use 'mark_sanitized' to cause Pysa to ignore the flow of user controlled
|
||||
# data into the filesystem sink, because we've already prevented directory
|
||||
# traversal with our assertion above.
|
||||
dirname = mark_sanitized(os.path.dirname(filename))
|
||||
|
||||
if not os.path.exists(dirname):
|
||||
os.makedirs(dirname)
|
||||
key.download_file(filename)
|
||||
|
@ -1303,16 +1309,22 @@ def export_uploads_from_local(realm: Realm, local_dir: Path, output_dir: Path) -
|
|||
count = 0
|
||||
records = []
|
||||
for attachment in Attachment.objects.filter(realm_id=realm.id):
|
||||
local_path = os.path.join(local_dir, attachment.path_id)
|
||||
output_path = os.path.join(output_dir, attachment.path_id)
|
||||
# Use 'mark_sanitized' to work around false positive caused by Pysa
|
||||
# thinking that 'realm' (and thus 'attachment' and 'attachment.path_id')
|
||||
# are user controlled
|
||||
path_id = mark_sanitized(attachment.path_id)
|
||||
|
||||
local_path = os.path.join(local_dir, path_id)
|
||||
output_path = os.path.join(output_dir, path_id)
|
||||
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
shutil.copy2(local_path, output_path)
|
||||
stat = os.stat(local_path)
|
||||
record = dict(realm_id=attachment.realm_id,
|
||||
user_profile_id=attachment.owner.id,
|
||||
user_profile_email=attachment.owner.email,
|
||||
s3_path=attachment.path_id,
|
||||
path=attachment.path_id,
|
||||
s3_path=path_id,
|
||||
path=path_id,
|
||||
size=stat.st_size,
|
||||
last_modified=stat.st_mtime,
|
||||
content_type=None)
|
||||
|
@ -1398,8 +1410,15 @@ def export_emoji_from_local(realm: Realm, local_dir: Path, output_dir: Path) ->
|
|||
realm_id=realm.id,
|
||||
emoji_file_name=realm_emoji.file_name
|
||||
)
|
||||
|
||||
# Use 'mark_sanitized' to work around false positive caused by Pysa
|
||||
# thinking that 'realm' (and thus 'attachment' and 'attachment.path_id')
|
||||
# are user controlled
|
||||
emoji_path = mark_sanitized(emoji_path)
|
||||
|
||||
local_path = os.path.join(local_dir, emoji_path)
|
||||
output_path = os.path.join(output_dir, emoji_path)
|
||||
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
shutil.copy2(local_path, output_path)
|
||||
# Realm Emoji author is optional.
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
from typing import TypeVar
|
||||
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def mark_sanitized(arg: T) -> T:
|
||||
return arg
|
|
@ -4,6 +4,8 @@ import sourcemap
|
|||
|
||||
from typing import Dict, List
|
||||
|
||||
from zerver.lib.pysa import mark_sanitized
|
||||
|
||||
|
||||
class SourceMap:
|
||||
'''Map (line, column) pairs from generated to source file.'''
|
||||
|
@ -15,11 +17,25 @@ class SourceMap:
|
|||
def _index_for(self, minified_src: str) -> sourcemap.SourceMapDecoder:
|
||||
'''Return the source map index for minified_src, loading it if not
|
||||
already loaded.'''
|
||||
|
||||
# Prevent path traversal
|
||||
assert ".." not in minified_src and "/" not in minified_src
|
||||
|
||||
if minified_src not in self._indices:
|
||||
for source_dir in self._dirs:
|
||||
filename = os.path.join(source_dir, minified_src + '.map')
|
||||
if os.path.isfile(filename):
|
||||
with open(filename) as fp:
|
||||
# Use 'mark_sanitized' to force Pysa to ignore the fact that
|
||||
# 'filename' is user controlled. While putting user
|
||||
# controlled data into a filesystem operation is bad, in
|
||||
# this case it's benign because 'filename' can't traverse
|
||||
# directories outside of the pre-configured 'sourcemap_dirs'
|
||||
# (due to the above assertions) and will always end in
|
||||
# '.map'. Additionally, the result of this function is used
|
||||
# for error logging and not returned to the user, so
|
||||
# controlling the loaded file would not be useful to an
|
||||
# attacker.
|
||||
with open(mark_sanitized(filename)) as fp:
|
||||
self._indices[minified_src] = sourcemap.load(fp)
|
||||
break
|
||||
|
||||
|
|
|
@ -98,7 +98,9 @@ def sanitize_name(value: str) -> str:
|
|||
"""
|
||||
value = unicodedata.normalize('NFKC', value)
|
||||
value = re.sub(r'[^\w\s._-]', '', value, flags=re.U).strip()
|
||||
return mark_safe(re.sub(r'[-\s]+', '-', value, flags=re.U))
|
||||
value = re.sub(r'[-\s]+', '-', value, flags=re.U)
|
||||
assert value not in {'', '.', '..'}
|
||||
return mark_safe(value)
|
||||
|
||||
def random_name(bytes: int=60) -> str:
|
||||
return base64.urlsafe_b64encode(os.urandom(bytes)).decode('utf-8')
|
||||
|
@ -653,6 +655,7 @@ class S3UploadBackend(ZulipUploadBackend):
|
|||
|
||||
def write_local_file(type: str, path: str, file_data: bytes) -> None:
|
||||
file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, type, path)
|
||||
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
with open(file_path, 'wb') as f:
|
||||
f.write(file_data)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import urllib
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from zerver.lib.pysa import mark_sanitized
|
||||
from zerver.lib.topic import get_topic_from_message_info
|
||||
from zerver.models import Realm, Stream, UserProfile
|
||||
|
||||
|
@ -97,8 +98,12 @@ def near_pm_message_url(realm: Realm,
|
|||
return full_url
|
||||
|
||||
def add_query_to_redirect_url(original_url: str, query: str) -> str:
|
||||
return original_url + "?" + query
|
||||
# Using 'mark_sanitized' because user-controlled data after the '?' is
|
||||
# not relevant for open redirects
|
||||
return original_url + "?" + mark_sanitized(query)
|
||||
|
||||
def add_query_arg_to_redirect_url(original_url: str, query_arg: str) -> str:
|
||||
assert '?' in original_url
|
||||
return original_url + "&" + query_arg
|
||||
# Using 'mark_sanitized' because user-controlled data after the '?' is
|
||||
# not relevant for open redirects
|
||||
return original_url + "&" + mark_sanitized(query_arg)
|
||||
|
|
|
@ -9,6 +9,7 @@ from typing.re import Match
|
|||
|
||||
from version import ZULIP_VERSION
|
||||
from zerver.lib.cache import cache_with_key, get_cache_with_key, preview_url_cache_key
|
||||
from zerver.lib.pysa import mark_sanitized
|
||||
from zerver.lib.url_preview.oembed import get_oembed_data
|
||||
from zerver.lib.url_preview.parsers import OpenGraphParser, GenericParser
|
||||
|
||||
|
@ -88,7 +89,7 @@ def get_link_embed_data(url: str,
|
|||
if data.get('oembed'):
|
||||
return data
|
||||
|
||||
response = requests.get(url, stream=True, headers=HEADERS, timeout=TIMEOUT)
|
||||
response = requests.get(mark_sanitized(url), stream=True, headers=HEADERS, timeout=TIMEOUT)
|
||||
if response.ok:
|
||||
og_data = OpenGraphParser(response.text).extract_data()
|
||||
for key in ['title', 'description', 'image']:
|
||||
|
|
|
@ -28,6 +28,7 @@ from zerver.lib.timestamp import datetime_to_timestamp
|
|||
from django.db.models.signals import post_save, post_delete
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
from zerver.lib import cache
|
||||
from zerver.lib.pysa import mark_sanitized
|
||||
from zerver.lib.validator import check_int, \
|
||||
check_short_string, check_long_string, validate_choice_field, check_date, \
|
||||
check_url, check_list
|
||||
|
@ -518,7 +519,9 @@ class Realm(models.Model):
|
|||
|
||||
@property
|
||||
def host(self) -> str:
|
||||
return self.host_for_subdomain(self.subdomain)
|
||||
# Use mark sanitized to prevent false positives from Pysa thinking that
|
||||
# the host is user controlled.
|
||||
return mark_sanitized(self.host_for_subdomain(self.subdomain))
|
||||
|
||||
@staticmethod
|
||||
def host_for_subdomain(subdomain: str) -> str:
|
||||
|
|
|
@ -30,6 +30,7 @@ from zerver.forms import HomepageForm, OurAuthenticationForm, \
|
|||
AuthenticationTokenForm
|
||||
from zerver.lib.mobile_auth_otp import otp_encrypt_api_key
|
||||
from zerver.lib.push_notifications import push_notifications_enabled
|
||||
from zerver.lib.pysa import mark_sanitized
|
||||
from zerver.lib.realm_icon import realm_icon_url
|
||||
from zerver.lib.request import REQ, has_request_variables, JsonableError
|
||||
from zerver.lib.response import json_success, json_error
|
||||
|
@ -64,7 +65,10 @@ ExtraContext = Optional[Dict[str, Any]]
|
|||
def get_safe_redirect_to(url: str, redirect_host: str) -> str:
|
||||
is_url_safe = is_safe_url(url=url, allowed_hosts=None)
|
||||
if is_url_safe:
|
||||
return urllib.parse.urljoin(redirect_host, url)
|
||||
# Mark as safe to prevent Pysa from surfacing false positives for
|
||||
# open redirects. In this branch, we have already checked that the URL
|
||||
# points to the specified 'redirect_host', or is relative.
|
||||
return urllib.parse.urljoin(redirect_host, mark_sanitized(url))
|
||||
else:
|
||||
return redirect_host
|
||||
|
||||
|
@ -186,7 +190,10 @@ def maybe_send_to_registration(request: HttpRequest, email: str, full_name: str=
|
|||
host = realm.host
|
||||
else:
|
||||
host = request.get_host()
|
||||
confirmation_link = create_confirmation_link(prereg_user, host,
|
||||
# Mark 'host' as safe for use in a redirect. It's pulled from the
|
||||
# current request or realm, both of which only allow a limited set of
|
||||
# trusted hosts.
|
||||
confirmation_link = create_confirmation_link(prereg_user, mark_sanitized(host),
|
||||
Confirmation.USER_REGISTRATION)
|
||||
if is_signup:
|
||||
return redirect(confirmation_link)
|
||||
|
|
|
@ -27,6 +27,7 @@ from zerver.decorator import require_post, \
|
|||
from zerver.lib.create_user import get_role_for_new_user
|
||||
from zerver.lib.onboarding import send_initial_realm_messages, setup_realm_internal_bots
|
||||
from zerver.lib.sessions import get_expirable_session_var
|
||||
from zerver.lib.pysa import mark_sanitized
|
||||
from zerver.lib.subdomains import get_subdomain, is_root_domain_available
|
||||
from zerver.lib.timezone import get_all_timezones
|
||||
from zerver.lib.url_encoding import add_query_to_redirect_url
|
||||
|
@ -408,7 +409,9 @@ def login_and_go_to_home(request: HttpRequest, user_profile: UserProfile) -> Htt
|
|||
return finish_desktop_flow(request, user_profile, desktop_flow_otp)
|
||||
|
||||
do_login(request, user_profile)
|
||||
return HttpResponseRedirect(user_profile.realm.uri + reverse('zerver.views.home.home'))
|
||||
# Using 'mark_sanitized' to work around false positive where Pysa thinks
|
||||
# that 'user_profile' is user-controlled
|
||||
return HttpResponseRedirect(mark_sanitized(user_profile.realm.uri) + reverse('zerver.views.home.home'))
|
||||
|
||||
def prepare_activation_url(email: str, request: HttpRequest,
|
||||
realm_creation: bool=False,
|
||||
|
|
|
@ -19,6 +19,7 @@ from requests_oauthlib import OAuth2Session
|
|||
from zerver.decorator import REQ, has_request_variables, zulip_login_required
|
||||
from zerver.lib.actions import do_set_zoom_token
|
||||
from zerver.lib.exceptions import ErrorCode, JsonableError
|
||||
from zerver.lib.pysa import mark_sanitized
|
||||
from zerver.lib.response import json_success
|
||||
from zerver.lib.subdomains import get_subdomain
|
||||
from zerver.lib.validator import check_dict, check_string
|
||||
|
@ -57,7 +58,10 @@ def get_zoom_sid(request: HttpRequest) -> str:
|
|||
# token directly to the Zoom server.
|
||||
|
||||
csrf.get_token(request)
|
||||
return (
|
||||
# Use 'mark_sanitized' to cause Pysa to ignore the flow of user controlled
|
||||
# data out of this function. 'request.META' is indeed user controlled, but
|
||||
# post-HMAC ouptut is no longer meaningfully controllable.
|
||||
return mark_sanitized(
|
||||
""
|
||||
if getattr(request, "_dont_enforce_csrf_checks", False)
|
||||
else salted_hmac("Zulip Zoom sid", request.META["CSRF_COOKIE"]).hexdigest()
|
||||
|
|
|
@ -3,6 +3,7 @@ from django.http import HttpResponse, HttpRequest
|
|||
from django.utils.translation import ugettext as _
|
||||
from zerver.decorator import authenticated_json_view
|
||||
from zerver.lib.ccache import make_ccache
|
||||
from zerver.lib.pysa import mark_sanitized
|
||||
from zerver.lib.request import has_request_variables, REQ
|
||||
from zerver.lib.response import json_success, json_error
|
||||
from zerver.lib.users import get_api_key
|
||||
|
@ -43,6 +44,14 @@ def webathena_kerberos_login(request: HttpRequest, user_profile: UserProfile,
|
|||
# This is important for security since DNS is not secure.
|
||||
assert(re.match(r'^[a-z0-9_.-]+$', user) is not None)
|
||||
ccache = make_ccache(parsed_cred)
|
||||
|
||||
# 'user' has been verified to contain only benign characters that won't
|
||||
# help with shell injection.
|
||||
user = mark_sanitized(user)
|
||||
|
||||
# 'ccache' is only written to disk by the script and used as a kerberos
|
||||
# credential cache file.
|
||||
ccache = mark_sanitized(ccache)
|
||||
except Exception:
|
||||
return json_error(_("Invalid Kerberos cache"))
|
||||
|
||||
|
|
|
@ -45,6 +45,7 @@ from zerver.lib.bot_lib import EmbeddedBotHandler, get_bot_handler, EmbeddedBotQ
|
|||
from zerver.lib.exceptions import RateLimited
|
||||
from zerver.lib.export import export_realm_wrapper
|
||||
from zerver.lib.remote_server import PushNotificationBouncerRetryLaterError
|
||||
from zerver.lib.pysa import mark_sanitized
|
||||
|
||||
import os
|
||||
import ujson
|
||||
|
@ -204,7 +205,9 @@ class QueueProcessingWorker(ABC):
|
|||
self._log_problem()
|
||||
if not os.path.exists(settings.QUEUE_ERROR_DIR):
|
||||
os.mkdir(settings.QUEUE_ERROR_DIR) # nocoverage
|
||||
fname = f'{self.queue_name}.errors'
|
||||
# Use 'mark_sanitized' to prevent Pysa from detecting this false positive
|
||||
# flow. 'queue_name' is always a constant string.
|
||||
fname = mark_sanitized(f'{self.queue_name}.errors')
|
||||
fn = os.path.join(settings.QUEUE_ERROR_DIR, fname)
|
||||
line = f'{time.asctime()}\t{ujson.dumps(events)}\n'
|
||||
lock_fn = fn + '.lock'
|
||||
|
|
Loading…
Reference in New Issue