CVE-2023-22735: Provide the Content-Disposition header from S3.

The Content-Type of user-provided uploads was provided by the browser
at initial upload time, and stored in S3; however, 04cf68b45e
switched to determining the Content-Disposition merely from the
filename.  This makes uploads vulnerable to a stored XSS, wherein a
file uploaded with a content-type of `text/html` and an extension of
`.png` would be served to browsers as `Content-Disposition: inline`,
which is unsafe.

The `Content-Security-Policy` headers in the previous commit mitigate
this, but only for browsers which support them.

Revert parts of 04cf68b45e, specifically by allowing S3 to provide
the Content-Disposition header, and using the
`ResponseContentDisposition` argument when necessary to override it to
`attachment`.  Because we expect S3 responses to vary based on this
argument, we include it in the cache key; since the query parameter
has dashes in it, we can't use use the helper `$arg_` variables, and
must parse it from the query parameters manually.

Adding the disposition may decrease the cache hit rate somewhat, but
downloads are infrequent enough that it is unlikely to have a
noticeable effect.  We take care to not adjust the cache key for
requests which do not specify the disposition.
This commit is contained in:
Alex Vandiver 2023-01-11 16:36:41 +00:00
parent 36e97f8121
commit 2f6c5a883e
4 changed files with 55 additions and 29 deletions

View File

@ -14,14 +14,14 @@ location ~ ^/internal/s3/(?<s3_hostname>[^/]+)/(?<s3_path>.*) {
# Ensure that we only get _one_ of these headers: the one that
# Django added, not the one from S3.
proxy_hide_header Content-Disposition;
proxy_hide_header Cache-Control;
proxy_hide_header Expires;
proxy_hide_header Set-Cookie;
# We are _leaving_ S3 to provide Content-Type and Accept-Ranges
# headers, which are the two remaining headers which nginx would
# also pass through from the first response. Django explicitly
# unsets the former, and does not set the latter.
# We are _leaving_ S3 to provide Content-Type,
# Content-Disposition, and Accept-Ranges headers, which are the
# three remaining headers which nginx would also pass through from
# the first response. Django explicitly unsets the first, and
# does not set the latter two.
# nginx does its own DNS resolution, which is necessary here to
# resolve the IP of the S3 server. Point it at the local caching
@ -38,9 +38,11 @@ location ~ ^/internal/s3/(?<s3_hostname>[^/]+)/(?<s3_path>.*) {
# `s3_disk_cache_size` and read frequency, set via
# `s3_cache_inactive_time`.
proxy_cache_valid 200 1y;
# Don't include query parameters in the cache key, since those
# include a time-based auth token
proxy_cache_key $download_url;
# We only include the requested content-disposition in the cache
# key, so that we cache "Content-Disposition: attachment"
# separately from the inline version.
proxy_cache_key $download_url$s3_disposition_cache_key;
}
# Internal file-serving

View File

@ -4,3 +4,16 @@ proxy_cache_path /srv/zulip-uploaded-files-cache
keys_zone=uploads:<%= @s3_memory_cache_size %>
inactive=<%= @s3_cache_inactive_time %>
max_size=<%= @s3_disk_cache_size %>;
# This is used when proxying requests to S3; we wish to know if the
# proxied request is asking to override the Content-Disposition in its
# response, so we can adjust our cache key. Unfortunately, $arg_foo
# style variables pre-parsed from query parameters don't work with
# query parameters with dashes, so we parse it out by hand. Despite
# needing to be declared at the 'http' level,. nginx applies maps like
# this lazily, so this only affects internal S3 proxied requests.
map $args $s3_disposition_cache_key {
default "";
"~(^|&)(?<param>response-content-disposition=[^&]+)" "?$param";
}

View File

@ -91,7 +91,7 @@ def upload_image_to_s3(
)
def get_signed_upload_url(path: str) -> str:
def get_signed_upload_url(path: str, force_download: bool = False) -> str:
client = boto3.client(
"s3",
aws_access_key_id=settings.S3_KEY,
@ -99,13 +99,16 @@ def get_signed_upload_url(path: str) -> str:
region_name=settings.S3_REGION,
endpoint_url=settings.S3_ENDPOINT_URL,
)
params = {
"Bucket": settings.S3_AUTH_UPLOADS_BUCKET,
"Key": path,
}
if force_download:
params["ResponseContentDisposition"] = "attachment"
return client.generate_presigned_url(
ClientMethod="get_object",
Params={
"Bucket": settings.S3_AUTH_UPLOADS_BUCKET,
"Key": path,
},
Params=params,
ExpiresIn=SIGNED_UPLOAD_URL_DURATION,
HttpMethod="GET",
)

View File

@ -88,8 +88,8 @@ def internal_nginx_redirect(internal_path: str) -> HttpResponse:
return response
def serve_s3(request: HttpRequest, path_id: str, download: bool = False) -> HttpResponse:
url = get_signed_upload_url(path_id)
def serve_s3(request: HttpRequest, path_id: str, force_download: bool = False) -> HttpResponse:
url = get_signed_upload_url(path_id, force_download=force_download)
assert url.startswith("https://")
if settings.DEVELOPMENT:
@ -107,18 +107,30 @@ def serve_s3(request: HttpRequest, path_id: str, download: bool = False) -> Http
assert parsed_url.query is not None
escaped_path_parts = parsed_url.hostname + quote(parsed_url.path) + "?" + parsed_url.query
response = internal_nginx_redirect("/internal/s3/" + escaped_path_parts)
patch_disposition_header(response, path_id, download)
# It is important that S3 generate both the Content-Type and
# Content-Disposition headers; when the file was uploaded, we
# stored the browser-provided value for the former, and set
# Content-Disposition according to if that was safe. As such,
# only S3 knows if a given attachment is safe to inline; we only
# override Content-Disposition to "attachment", and do so by
# telling S3 that is what we want in the signed URL.
patch_cache_control(response, private=True, immutable=True)
return response
def serve_local(request: HttpRequest, path_id: str, download: bool = False) -> HttpResponseBase:
def serve_local(
request: HttpRequest, path_id: str, force_download: bool = False
) -> HttpResponseBase:
assert settings.LOCAL_FILES_DIR is not None
local_path = os.path.join(settings.LOCAL_FILES_DIR, path_id)
assert_is_local_storage_path("files", local_path)
if not os.path.isfile(local_path):
return HttpResponseNotFound("<p>File not found</p>")
mimetype, encoding = guess_type(path_id)
download = force_download or mimetype not in INLINE_MIME_TYPES
if settings.DEVELOPMENT:
# In development, we do not have the nginx server to offload
# the response to; serve it directly ourselves.
@ -138,7 +150,9 @@ def serve_local(request: HttpRequest, path_id: str, download: bool = False) -> H
def serve_file_download_backend(
request: HttpRequest, user_profile: UserProfile, realm_id_str: str, filename: str
) -> HttpResponseBase:
return serve_file(request, user_profile, realm_id_str, filename, url_only=False, download=True)
return serve_file(
request, user_profile, realm_id_str, filename, url_only=False, force_download=True
)
def serve_file_backend(
@ -167,7 +181,7 @@ def serve_file(
realm_id_str: str,
filename: str,
url_only: bool = False,
download: bool = False,
force_download: bool = False,
) -> HttpResponseBase:
path_id = f"{realm_id_str}/{filename}"
realm = get_valid_realm_from_request(request)
@ -181,13 +195,10 @@ def serve_file(
url = generate_unauthed_file_access_url(path_id)
return json_success(request, data=dict(url=url))
mimetype, encoding = guess_type(path_id)
download = download or mimetype not in INLINE_MIME_TYPES
if settings.LOCAL_UPLOADS_DIR is not None:
return serve_local(request, path_id, download=download)
return serve_local(request, path_id, force_download=force_download)
else:
return serve_s3(request, path_id, download=download)
return serve_s3(request, path_id, force_download=force_download)
USER_UPLOADS_ACCESS_TOKEN_SALT = "user_uploads_"
@ -221,13 +232,10 @@ def serve_file_unauthed_from_token(
if path_id.split("/")[-1] != filename:
raise JsonableError(_("Invalid filename"))
mimetype, encoding = guess_type(path_id)
download = mimetype not in INLINE_MIME_TYPES
if settings.LOCAL_UPLOADS_DIR is not None:
return serve_local(request, path_id, download=download)
return serve_local(request, path_id)
else:
return serve_s3(request, path_id, download=download)
return serve_s3(request, path_id)
def serve_local_avatar_unauthed(request: HttpRequest, path: str) -> HttpResponseBase: