mirror of https://github.com/zulip/zulip.git
CVE-2023-22735: Provide the Content-Disposition header from S3.
The Content-Type of user-provided uploads was provided by the browser at initial upload time, and stored in S3; however,04cf68b45e
switched to determining the Content-Disposition merely from the filename. This makes uploads vulnerable to a stored XSS, wherein a file uploaded with a content-type of `text/html` and an extension of `.png` would be served to browsers as `Content-Disposition: inline`, which is unsafe. The `Content-Security-Policy` headers in the previous commit mitigate this, but only for browsers which support them. Revert parts of04cf68b45e
, specifically by allowing S3 to provide the Content-Disposition header, and using the `ResponseContentDisposition` argument when necessary to override it to `attachment`. Because we expect S3 responses to vary based on this argument, we include it in the cache key; since the query parameter has dashes in it, we can't use use the helper `$arg_` variables, and must parse it from the query parameters manually. Adding the disposition may decrease the cache hit rate somewhat, but downloads are infrequent enough that it is unlikely to have a noticeable effect. We take care to not adjust the cache key for requests which do not specify the disposition.
This commit is contained in:
parent
36e97f8121
commit
2f6c5a883e
|
@ -14,14 +14,14 @@ location ~ ^/internal/s3/(?<s3_hostname>[^/]+)/(?<s3_path>.*) {
|
|||
|
||||
# Ensure that we only get _one_ of these headers: the one that
|
||||
# Django added, not the one from S3.
|
||||
proxy_hide_header Content-Disposition;
|
||||
proxy_hide_header Cache-Control;
|
||||
proxy_hide_header Expires;
|
||||
proxy_hide_header Set-Cookie;
|
||||
# We are _leaving_ S3 to provide Content-Type and Accept-Ranges
|
||||
# headers, which are the two remaining headers which nginx would
|
||||
# also pass through from the first response. Django explicitly
|
||||
# unsets the former, and does not set the latter.
|
||||
# We are _leaving_ S3 to provide Content-Type,
|
||||
# Content-Disposition, and Accept-Ranges headers, which are the
|
||||
# three remaining headers which nginx would also pass through from
|
||||
# the first response. Django explicitly unsets the first, and
|
||||
# does not set the latter two.
|
||||
|
||||
# nginx does its own DNS resolution, which is necessary here to
|
||||
# resolve the IP of the S3 server. Point it at the local caching
|
||||
|
@ -38,9 +38,11 @@ location ~ ^/internal/s3/(?<s3_hostname>[^/]+)/(?<s3_path>.*) {
|
|||
# `s3_disk_cache_size` and read frequency, set via
|
||||
# `s3_cache_inactive_time`.
|
||||
proxy_cache_valid 200 1y;
|
||||
# Don't include query parameters in the cache key, since those
|
||||
# include a time-based auth token
|
||||
proxy_cache_key $download_url;
|
||||
|
||||
# We only include the requested content-disposition in the cache
|
||||
# key, so that we cache "Content-Disposition: attachment"
|
||||
# separately from the inline version.
|
||||
proxy_cache_key $download_url$s3_disposition_cache_key;
|
||||
}
|
||||
|
||||
# Internal file-serving
|
||||
|
|
|
@ -4,3 +4,16 @@ proxy_cache_path /srv/zulip-uploaded-files-cache
|
|||
keys_zone=uploads:<%= @s3_memory_cache_size %>
|
||||
inactive=<%= @s3_cache_inactive_time %>
|
||||
max_size=<%= @s3_disk_cache_size %>;
|
||||
|
||||
# This is used when proxying requests to S3; we wish to know if the
|
||||
# proxied request is asking to override the Content-Disposition in its
|
||||
# response, so we can adjust our cache key. Unfortunately, $arg_foo
|
||||
# style variables pre-parsed from query parameters don't work with
|
||||
# query parameters with dashes, so we parse it out by hand. Despite
|
||||
# needing to be declared at the 'http' level,. nginx applies maps like
|
||||
# this lazily, so this only affects internal S3 proxied requests.
|
||||
|
||||
map $args $s3_disposition_cache_key {
|
||||
default "";
|
||||
"~(^|&)(?<param>response-content-disposition=[^&]+)" "?$param";
|
||||
}
|
||||
|
|
|
@ -91,7 +91,7 @@ def upload_image_to_s3(
|
|||
)
|
||||
|
||||
|
||||
def get_signed_upload_url(path: str) -> str:
|
||||
def get_signed_upload_url(path: str, force_download: bool = False) -> str:
|
||||
client = boto3.client(
|
||||
"s3",
|
||||
aws_access_key_id=settings.S3_KEY,
|
||||
|
@ -99,13 +99,16 @@ def get_signed_upload_url(path: str) -> str:
|
|||
region_name=settings.S3_REGION,
|
||||
endpoint_url=settings.S3_ENDPOINT_URL,
|
||||
)
|
||||
params = {
|
||||
"Bucket": settings.S3_AUTH_UPLOADS_BUCKET,
|
||||
"Key": path,
|
||||
}
|
||||
if force_download:
|
||||
params["ResponseContentDisposition"] = "attachment"
|
||||
|
||||
return client.generate_presigned_url(
|
||||
ClientMethod="get_object",
|
||||
Params={
|
||||
"Bucket": settings.S3_AUTH_UPLOADS_BUCKET,
|
||||
"Key": path,
|
||||
},
|
||||
Params=params,
|
||||
ExpiresIn=SIGNED_UPLOAD_URL_DURATION,
|
||||
HttpMethod="GET",
|
||||
)
|
||||
|
|
|
@ -88,8 +88,8 @@ def internal_nginx_redirect(internal_path: str) -> HttpResponse:
|
|||
return response
|
||||
|
||||
|
||||
def serve_s3(request: HttpRequest, path_id: str, download: bool = False) -> HttpResponse:
|
||||
url = get_signed_upload_url(path_id)
|
||||
def serve_s3(request: HttpRequest, path_id: str, force_download: bool = False) -> HttpResponse:
|
||||
url = get_signed_upload_url(path_id, force_download=force_download)
|
||||
assert url.startswith("https://")
|
||||
|
||||
if settings.DEVELOPMENT:
|
||||
|
@ -107,18 +107,30 @@ def serve_s3(request: HttpRequest, path_id: str, download: bool = False) -> Http
|
|||
assert parsed_url.query is not None
|
||||
escaped_path_parts = parsed_url.hostname + quote(parsed_url.path) + "?" + parsed_url.query
|
||||
response = internal_nginx_redirect("/internal/s3/" + escaped_path_parts)
|
||||
patch_disposition_header(response, path_id, download)
|
||||
|
||||
# It is important that S3 generate both the Content-Type and
|
||||
# Content-Disposition headers; when the file was uploaded, we
|
||||
# stored the browser-provided value for the former, and set
|
||||
# Content-Disposition according to if that was safe. As such,
|
||||
# only S3 knows if a given attachment is safe to inline; we only
|
||||
# override Content-Disposition to "attachment", and do so by
|
||||
# telling S3 that is what we want in the signed URL.
|
||||
patch_cache_control(response, private=True, immutable=True)
|
||||
return response
|
||||
|
||||
|
||||
def serve_local(request: HttpRequest, path_id: str, download: bool = False) -> HttpResponseBase:
|
||||
def serve_local(
|
||||
request: HttpRequest, path_id: str, force_download: bool = False
|
||||
) -> HttpResponseBase:
|
||||
assert settings.LOCAL_FILES_DIR is not None
|
||||
local_path = os.path.join(settings.LOCAL_FILES_DIR, path_id)
|
||||
assert_is_local_storage_path("files", local_path)
|
||||
if not os.path.isfile(local_path):
|
||||
return HttpResponseNotFound("<p>File not found</p>")
|
||||
|
||||
mimetype, encoding = guess_type(path_id)
|
||||
download = force_download or mimetype not in INLINE_MIME_TYPES
|
||||
|
||||
if settings.DEVELOPMENT:
|
||||
# In development, we do not have the nginx server to offload
|
||||
# the response to; serve it directly ourselves.
|
||||
|
@ -138,7 +150,9 @@ def serve_local(request: HttpRequest, path_id: str, download: bool = False) -> H
|
|||
def serve_file_download_backend(
|
||||
request: HttpRequest, user_profile: UserProfile, realm_id_str: str, filename: str
|
||||
) -> HttpResponseBase:
|
||||
return serve_file(request, user_profile, realm_id_str, filename, url_only=False, download=True)
|
||||
return serve_file(
|
||||
request, user_profile, realm_id_str, filename, url_only=False, force_download=True
|
||||
)
|
||||
|
||||
|
||||
def serve_file_backend(
|
||||
|
@ -167,7 +181,7 @@ def serve_file(
|
|||
realm_id_str: str,
|
||||
filename: str,
|
||||
url_only: bool = False,
|
||||
download: bool = False,
|
||||
force_download: bool = False,
|
||||
) -> HttpResponseBase:
|
||||
path_id = f"{realm_id_str}/{filename}"
|
||||
realm = get_valid_realm_from_request(request)
|
||||
|
@ -181,13 +195,10 @@ def serve_file(
|
|||
url = generate_unauthed_file_access_url(path_id)
|
||||
return json_success(request, data=dict(url=url))
|
||||
|
||||
mimetype, encoding = guess_type(path_id)
|
||||
download = download or mimetype not in INLINE_MIME_TYPES
|
||||
|
||||
if settings.LOCAL_UPLOADS_DIR is not None:
|
||||
return serve_local(request, path_id, download=download)
|
||||
return serve_local(request, path_id, force_download=force_download)
|
||||
else:
|
||||
return serve_s3(request, path_id, download=download)
|
||||
return serve_s3(request, path_id, force_download=force_download)
|
||||
|
||||
|
||||
USER_UPLOADS_ACCESS_TOKEN_SALT = "user_uploads_"
|
||||
|
@ -221,13 +232,10 @@ def serve_file_unauthed_from_token(
|
|||
if path_id.split("/")[-1] != filename:
|
||||
raise JsonableError(_("Invalid filename"))
|
||||
|
||||
mimetype, encoding = guess_type(path_id)
|
||||
download = mimetype not in INLINE_MIME_TYPES
|
||||
|
||||
if settings.LOCAL_UPLOADS_DIR is not None:
|
||||
return serve_local(request, path_id, download=download)
|
||||
return serve_local(request, path_id)
|
||||
else:
|
||||
return serve_s3(request, path_id, download=download)
|
||||
return serve_s3(request, path_id)
|
||||
|
||||
|
||||
def serve_local_avatar_unauthed(request: HttpRequest, path: str) -> HttpResponseBase:
|
||||
|
|
Loading…
Reference in New Issue