mirror of https://github.com/zulip/zulip.git
CVE-2023-22735: Provide the Content-Disposition header from S3.
The Content-Type of user-provided uploads was provided by the browser at initial upload time, and stored in S3; however,04cf68b45e
switched to determining the Content-Disposition merely from the filename. This makes uploads vulnerable to a stored XSS, wherein a file uploaded with a content-type of `text/html` and an extension of `.png` would be served to browsers as `Content-Disposition: inline`, which is unsafe. The `Content-Security-Policy` headers in the previous commit mitigate this, but only for browsers which support them. Revert parts of04cf68b45e
, specifically by allowing S3 to provide the Content-Disposition header, and using the `ResponseContentDisposition` argument when necessary to override it to `attachment`. Because we expect S3 responses to vary based on this argument, we include it in the cache key; since the query parameter has dashes in it, we can't use use the helper `$arg_` variables, and must parse it from the query parameters manually. Adding the disposition may decrease the cache hit rate somewhat, but downloads are infrequent enough that it is unlikely to have a noticeable effect. We take care to not adjust the cache key for requests which do not specify the disposition.
This commit is contained in:
parent
36e97f8121
commit
2f6c5a883e
|
@ -14,14 +14,14 @@ location ~ ^/internal/s3/(?<s3_hostname>[^/]+)/(?<s3_path>.*) {
|
||||||
|
|
||||||
# Ensure that we only get _one_ of these headers: the one that
|
# Ensure that we only get _one_ of these headers: the one that
|
||||||
# Django added, not the one from S3.
|
# Django added, not the one from S3.
|
||||||
proxy_hide_header Content-Disposition;
|
|
||||||
proxy_hide_header Cache-Control;
|
proxy_hide_header Cache-Control;
|
||||||
proxy_hide_header Expires;
|
proxy_hide_header Expires;
|
||||||
proxy_hide_header Set-Cookie;
|
proxy_hide_header Set-Cookie;
|
||||||
# We are _leaving_ S3 to provide Content-Type and Accept-Ranges
|
# We are _leaving_ S3 to provide Content-Type,
|
||||||
# headers, which are the two remaining headers which nginx would
|
# Content-Disposition, and Accept-Ranges headers, which are the
|
||||||
# also pass through from the first response. Django explicitly
|
# three remaining headers which nginx would also pass through from
|
||||||
# unsets the former, and does not set the latter.
|
# the first response. Django explicitly unsets the first, and
|
||||||
|
# does not set the latter two.
|
||||||
|
|
||||||
# nginx does its own DNS resolution, which is necessary here to
|
# nginx does its own DNS resolution, which is necessary here to
|
||||||
# resolve the IP of the S3 server. Point it at the local caching
|
# resolve the IP of the S3 server. Point it at the local caching
|
||||||
|
@ -38,9 +38,11 @@ location ~ ^/internal/s3/(?<s3_hostname>[^/]+)/(?<s3_path>.*) {
|
||||||
# `s3_disk_cache_size` and read frequency, set via
|
# `s3_disk_cache_size` and read frequency, set via
|
||||||
# `s3_cache_inactive_time`.
|
# `s3_cache_inactive_time`.
|
||||||
proxy_cache_valid 200 1y;
|
proxy_cache_valid 200 1y;
|
||||||
# Don't include query parameters in the cache key, since those
|
|
||||||
# include a time-based auth token
|
# We only include the requested content-disposition in the cache
|
||||||
proxy_cache_key $download_url;
|
# key, so that we cache "Content-Disposition: attachment"
|
||||||
|
# separately from the inline version.
|
||||||
|
proxy_cache_key $download_url$s3_disposition_cache_key;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Internal file-serving
|
# Internal file-serving
|
||||||
|
|
|
@ -4,3 +4,16 @@ proxy_cache_path /srv/zulip-uploaded-files-cache
|
||||||
keys_zone=uploads:<%= @s3_memory_cache_size %>
|
keys_zone=uploads:<%= @s3_memory_cache_size %>
|
||||||
inactive=<%= @s3_cache_inactive_time %>
|
inactive=<%= @s3_cache_inactive_time %>
|
||||||
max_size=<%= @s3_disk_cache_size %>;
|
max_size=<%= @s3_disk_cache_size %>;
|
||||||
|
|
||||||
|
# This is used when proxying requests to S3; we wish to know if the
|
||||||
|
# proxied request is asking to override the Content-Disposition in its
|
||||||
|
# response, so we can adjust our cache key. Unfortunately, $arg_foo
|
||||||
|
# style variables pre-parsed from query parameters don't work with
|
||||||
|
# query parameters with dashes, so we parse it out by hand. Despite
|
||||||
|
# needing to be declared at the 'http' level,. nginx applies maps like
|
||||||
|
# this lazily, so this only affects internal S3 proxied requests.
|
||||||
|
|
||||||
|
map $args $s3_disposition_cache_key {
|
||||||
|
default "";
|
||||||
|
"~(^|&)(?<param>response-content-disposition=[^&]+)" "?$param";
|
||||||
|
}
|
||||||
|
|
|
@ -91,7 +91,7 @@ def upload_image_to_s3(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_signed_upload_url(path: str) -> str:
|
def get_signed_upload_url(path: str, force_download: bool = False) -> str:
|
||||||
client = boto3.client(
|
client = boto3.client(
|
||||||
"s3",
|
"s3",
|
||||||
aws_access_key_id=settings.S3_KEY,
|
aws_access_key_id=settings.S3_KEY,
|
||||||
|
@ -99,13 +99,16 @@ def get_signed_upload_url(path: str) -> str:
|
||||||
region_name=settings.S3_REGION,
|
region_name=settings.S3_REGION,
|
||||||
endpoint_url=settings.S3_ENDPOINT_URL,
|
endpoint_url=settings.S3_ENDPOINT_URL,
|
||||||
)
|
)
|
||||||
|
params = {
|
||||||
|
"Bucket": settings.S3_AUTH_UPLOADS_BUCKET,
|
||||||
|
"Key": path,
|
||||||
|
}
|
||||||
|
if force_download:
|
||||||
|
params["ResponseContentDisposition"] = "attachment"
|
||||||
|
|
||||||
return client.generate_presigned_url(
|
return client.generate_presigned_url(
|
||||||
ClientMethod="get_object",
|
ClientMethod="get_object",
|
||||||
Params={
|
Params=params,
|
||||||
"Bucket": settings.S3_AUTH_UPLOADS_BUCKET,
|
|
||||||
"Key": path,
|
|
||||||
},
|
|
||||||
ExpiresIn=SIGNED_UPLOAD_URL_DURATION,
|
ExpiresIn=SIGNED_UPLOAD_URL_DURATION,
|
||||||
HttpMethod="GET",
|
HttpMethod="GET",
|
||||||
)
|
)
|
||||||
|
|
|
@ -88,8 +88,8 @@ def internal_nginx_redirect(internal_path: str) -> HttpResponse:
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
def serve_s3(request: HttpRequest, path_id: str, download: bool = False) -> HttpResponse:
|
def serve_s3(request: HttpRequest, path_id: str, force_download: bool = False) -> HttpResponse:
|
||||||
url = get_signed_upload_url(path_id)
|
url = get_signed_upload_url(path_id, force_download=force_download)
|
||||||
assert url.startswith("https://")
|
assert url.startswith("https://")
|
||||||
|
|
||||||
if settings.DEVELOPMENT:
|
if settings.DEVELOPMENT:
|
||||||
|
@ -107,18 +107,30 @@ def serve_s3(request: HttpRequest, path_id: str, download: bool = False) -> Http
|
||||||
assert parsed_url.query is not None
|
assert parsed_url.query is not None
|
||||||
escaped_path_parts = parsed_url.hostname + quote(parsed_url.path) + "?" + parsed_url.query
|
escaped_path_parts = parsed_url.hostname + quote(parsed_url.path) + "?" + parsed_url.query
|
||||||
response = internal_nginx_redirect("/internal/s3/" + escaped_path_parts)
|
response = internal_nginx_redirect("/internal/s3/" + escaped_path_parts)
|
||||||
patch_disposition_header(response, path_id, download)
|
|
||||||
|
# It is important that S3 generate both the Content-Type and
|
||||||
|
# Content-Disposition headers; when the file was uploaded, we
|
||||||
|
# stored the browser-provided value for the former, and set
|
||||||
|
# Content-Disposition according to if that was safe. As such,
|
||||||
|
# only S3 knows if a given attachment is safe to inline; we only
|
||||||
|
# override Content-Disposition to "attachment", and do so by
|
||||||
|
# telling S3 that is what we want in the signed URL.
|
||||||
patch_cache_control(response, private=True, immutable=True)
|
patch_cache_control(response, private=True, immutable=True)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
def serve_local(request: HttpRequest, path_id: str, download: bool = False) -> HttpResponseBase:
|
def serve_local(
|
||||||
|
request: HttpRequest, path_id: str, force_download: bool = False
|
||||||
|
) -> HttpResponseBase:
|
||||||
assert settings.LOCAL_FILES_DIR is not None
|
assert settings.LOCAL_FILES_DIR is not None
|
||||||
local_path = os.path.join(settings.LOCAL_FILES_DIR, path_id)
|
local_path = os.path.join(settings.LOCAL_FILES_DIR, path_id)
|
||||||
assert_is_local_storage_path("files", local_path)
|
assert_is_local_storage_path("files", local_path)
|
||||||
if not os.path.isfile(local_path):
|
if not os.path.isfile(local_path):
|
||||||
return HttpResponseNotFound("<p>File not found</p>")
|
return HttpResponseNotFound("<p>File not found</p>")
|
||||||
|
|
||||||
|
mimetype, encoding = guess_type(path_id)
|
||||||
|
download = force_download or mimetype not in INLINE_MIME_TYPES
|
||||||
|
|
||||||
if settings.DEVELOPMENT:
|
if settings.DEVELOPMENT:
|
||||||
# In development, we do not have the nginx server to offload
|
# In development, we do not have the nginx server to offload
|
||||||
# the response to; serve it directly ourselves.
|
# the response to; serve it directly ourselves.
|
||||||
|
@ -138,7 +150,9 @@ def serve_local(request: HttpRequest, path_id: str, download: bool = False) -> H
|
||||||
def serve_file_download_backend(
|
def serve_file_download_backend(
|
||||||
request: HttpRequest, user_profile: UserProfile, realm_id_str: str, filename: str
|
request: HttpRequest, user_profile: UserProfile, realm_id_str: str, filename: str
|
||||||
) -> HttpResponseBase:
|
) -> HttpResponseBase:
|
||||||
return serve_file(request, user_profile, realm_id_str, filename, url_only=False, download=True)
|
return serve_file(
|
||||||
|
request, user_profile, realm_id_str, filename, url_only=False, force_download=True
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def serve_file_backend(
|
def serve_file_backend(
|
||||||
|
@ -167,7 +181,7 @@ def serve_file(
|
||||||
realm_id_str: str,
|
realm_id_str: str,
|
||||||
filename: str,
|
filename: str,
|
||||||
url_only: bool = False,
|
url_only: bool = False,
|
||||||
download: bool = False,
|
force_download: bool = False,
|
||||||
) -> HttpResponseBase:
|
) -> HttpResponseBase:
|
||||||
path_id = f"{realm_id_str}/{filename}"
|
path_id = f"{realm_id_str}/{filename}"
|
||||||
realm = get_valid_realm_from_request(request)
|
realm = get_valid_realm_from_request(request)
|
||||||
|
@ -181,13 +195,10 @@ def serve_file(
|
||||||
url = generate_unauthed_file_access_url(path_id)
|
url = generate_unauthed_file_access_url(path_id)
|
||||||
return json_success(request, data=dict(url=url))
|
return json_success(request, data=dict(url=url))
|
||||||
|
|
||||||
mimetype, encoding = guess_type(path_id)
|
|
||||||
download = download or mimetype not in INLINE_MIME_TYPES
|
|
||||||
|
|
||||||
if settings.LOCAL_UPLOADS_DIR is not None:
|
if settings.LOCAL_UPLOADS_DIR is not None:
|
||||||
return serve_local(request, path_id, download=download)
|
return serve_local(request, path_id, force_download=force_download)
|
||||||
else:
|
else:
|
||||||
return serve_s3(request, path_id, download=download)
|
return serve_s3(request, path_id, force_download=force_download)
|
||||||
|
|
||||||
|
|
||||||
USER_UPLOADS_ACCESS_TOKEN_SALT = "user_uploads_"
|
USER_UPLOADS_ACCESS_TOKEN_SALT = "user_uploads_"
|
||||||
|
@ -221,13 +232,10 @@ def serve_file_unauthed_from_token(
|
||||||
if path_id.split("/")[-1] != filename:
|
if path_id.split("/")[-1] != filename:
|
||||||
raise JsonableError(_("Invalid filename"))
|
raise JsonableError(_("Invalid filename"))
|
||||||
|
|
||||||
mimetype, encoding = guess_type(path_id)
|
|
||||||
download = mimetype not in INLINE_MIME_TYPES
|
|
||||||
|
|
||||||
if settings.LOCAL_UPLOADS_DIR is not None:
|
if settings.LOCAL_UPLOADS_DIR is not None:
|
||||||
return serve_local(request, path_id, download=download)
|
return serve_local(request, path_id)
|
||||||
else:
|
else:
|
||||||
return serve_s3(request, path_id, download=download)
|
return serve_s3(request, path_id)
|
||||||
|
|
||||||
|
|
||||||
def serve_local_avatar_unauthed(request: HttpRequest, path: str) -> HttpResponseBase:
|
def serve_local_avatar_unauthed(request: HttpRequest, path: str) -> HttpResponseBase:
|
||||||
|
|
Loading…
Reference in New Issue