upload: Download files with their original names.

Fixes: #29491.
This commit is contained in:
Alex Vandiver 2024-08-29 21:39:07 +00:00 committed by Tim Abbott
parent 933e3cb375
commit b4764f49df
7 changed files with 129 additions and 30 deletions

View File

@ -122,6 +122,10 @@ def sanitize_name(value: str) -> str:
value = unicodedata.normalize("NFKC", value) value = unicodedata.normalize("NFKC", value)
value = re.sub(r"[^\w\s.-]", "", value).strip() value = re.sub(r"[^\w\s.-]", "", value).strip()
value = re.sub(r"[-\s]+", "-", value) value = re.sub(r"[-\s]+", "-", value)
# Django's MultiPartParser never returns files named this, but we
# could get them after removing spaces; change the name to a safer
# value.
if value in {"", ".", ".."}: if value in {"", ".", ".."}:
return "uploaded-file" return "uploaded-file"
return value return value
@ -139,9 +143,11 @@ def upload_message_attachment(
path_id = upload_backend.generate_message_upload_path( path_id = upload_backend.generate_message_upload_path(
str(target_realm.id), sanitize_name(uploaded_file_name) str(target_realm.id), sanitize_name(uploaded_file_name)
) )
with transaction.atomic(): with transaction.atomic():
upload_backend.upload_message_attachment( upload_backend.upload_message_attachment(
path_id, path_id,
uploaded_file_name,
content_type, content_type,
file_data, file_data,
user_profile, user_profile,

View File

@ -38,6 +38,7 @@ class ZulipUploadBackend:
def upload_message_attachment( def upload_message_attachment(
self, self,
path_id: str, path_id: str,
filename: str,
content_type: str, content_type: str,
file_data: bytes, file_data: bytes,
user_profile: UserProfile | None, user_profile: UserProfile | None,

View File

@ -89,6 +89,7 @@ class LocalUploadBackend(ZulipUploadBackend):
def upload_message_attachment( def upload_message_attachment(
self, self,
path_id: str, path_id: str,
filename: str,
content_type: str, content_type: str,
file_data: bytes, file_data: bytes,
user_profile: UserProfile | None, user_profile: UserProfile | None,

View File

@ -10,6 +10,7 @@ import boto3
import botocore import botocore
from botocore.client import Config from botocore.client import Config
from django.conf import settings from django.conf import settings
from django.utils.http import content_disposition_header
from mypy_boto3_s3.service_resource import Bucket from mypy_boto3_s3.service_resource import Bucket
from typing_extensions import override from typing_extensions import override
@ -62,7 +63,7 @@ def get_bucket(bucket_name: str, authed: bool = True) -> Bucket:
def upload_content_to_s3( def upload_content_to_s3(
bucket: Bucket, bucket: Bucket,
file_name: str, path: str,
content_type: str | None, content_type: str | None,
user_profile: UserProfile | None, user_profile: UserProfile | None,
contents: bytes, contents: bytes,
@ -77,8 +78,9 @@ def upload_content_to_s3(
] = "STANDARD", ] = "STANDARD",
cache_control: str | None = None, cache_control: str | None = None,
extra_metadata: dict[str, str] | None = None, extra_metadata: dict[str, str] | None = None,
filename: str | None = None,
) -> None: ) -> None:
key = bucket.Object(file_name) key = bucket.Object(path)
metadata: dict[str, str] = {} metadata: dict[str, str] = {}
if user_profile: if user_profile:
metadata["user_profile_id"] = str(user_profile.id) metadata["user_profile_id"] = str(user_profile.id)
@ -89,7 +91,10 @@ def upload_content_to_s3(
extras = {} extras = {}
if content_type is None: # nocoverage if content_type is None: # nocoverage
content_type = "" content_type = ""
if content_type not in INLINE_MIME_TYPES: is_attachment = content_type not in INLINE_MIME_TYPES
if filename is not None:
extras["ContentDisposition"] = content_disposition_header(is_attachment, filename)
elif is_attachment:
extras["ContentDisposition"] = "attachment" extras["ContentDisposition"] = "attachment"
if cache_control is not None: if cache_control is not None:
extras["CacheControl"] = cache_control extras["CacheControl"] = cache_control
@ -211,6 +216,7 @@ class S3UploadBackend(ZulipUploadBackend):
def upload_message_attachment( def upload_message_attachment(
self, self,
path_id: str, path_id: str,
filename: str,
content_type: str, content_type: str,
file_data: bytes, file_data: bytes,
user_profile: UserProfile | None, user_profile: UserProfile | None,
@ -222,6 +228,7 @@ class S3UploadBackend(ZulipUploadBackend):
user_profile, user_profile,
file_data, file_data,
storage_class=settings.S3_UPLOADS_STORAGE_CLASS, storage_class=settings.S3_UPLOADS_STORAGE_CLASS,
filename=filename,
) )
@override @override

View File

@ -1,6 +1,6 @@
import os import os
import re import re
import time from datetime import timedelta
from io import StringIO from io import StringIO
from unittest import mock from unittest import mock
from unittest.mock import patch from unittest.mock import patch
@ -8,6 +8,7 @@ from urllib.parse import quote
import orjson import orjson
import pyvips import pyvips
import time_machine
from django.conf import settings from django.conf import settings
from django.utils.timezone import now as timezone_now from django.utils.timezone import now as timezone_now
from pyvips import at_least_libvips from pyvips import at_least_libvips
@ -304,21 +305,43 @@ class FileUploadTest(UploadSerializeMixin, ZulipTestCase):
self.login("hamlet") self.login("hamlet")
fp = StringIO("zulip!") fp = StringIO("zulip!")
fp.name = "zulip.txt" fp.name = "zulip.txt"
now = timezone_now()
with time_machine.travel(now, tick=False):
result = self.client_post("/json/user_uploads", {"file": fp})
response_dict = self.assert_json_success(result)
url = "/json" + response_dict["url"]
result = self.client_get(url)
data = self.assert_json_success(result)
url_only_url = data["url"]
self.logout()
self.assertEqual(self.client_get(url_only_url).getvalue(), b"zulip!")
with time_machine.travel(now + timedelta(seconds=30), tick=False):
self.assertEqual(self.client_get(url_only_url).getvalue(), b"zulip!")
# After over 60 seconds, the token should become invalid:
with time_machine.travel(now + timedelta(seconds=61), tick=False):
result = self.client_get(url_only_url)
self.assert_json_error(result, "Invalid token")
def test_serve_local_file_unauthed_token_deleted(self) -> None:
self.login("hamlet")
fp = StringIO("zulip!")
fp.name = "zulip.txt"
now = timezone_now()
with time_machine.travel(now, tick=False):
result = self.client_post("/json/user_uploads", {"file": fp}) result = self.client_post("/json/user_uploads", {"file": fp})
response_dict = self.assert_json_success(result) response_dict = self.assert_json_success(result)
url = "/json" + response_dict["url"] url = "/json" + response_dict["url"]
start_time = time.time()
with mock.patch("django.core.signing.time.time", return_value=start_time):
result = self.client_get(url) result = self.client_get(url)
data = self.assert_json_success(result) data = self.assert_json_success(result)
url_only_url = data["url"] url_only_url = data["url"]
self.logout() path_id = response_dict["url"].removeprefix("/user_uploads/")
self.assertEqual(self.client_get(url_only_url).getvalue(), b"zulip!") Attachment.objects.get(path_id=path_id).delete()
# After over 60 seconds, the token should become invalid:
with mock.patch("django.core.signing.time.time", return_value=start_time + 61):
result = self.client_get(url_only_url) result = self.client_get(url_only_url)
self.assert_json_error(result, "Invalid token") self.assert_json_error(result, "Invalid token")
@ -909,6 +932,7 @@ class FileUploadTest(UploadSerializeMixin, ZulipTestCase):
name_str_for_test: str, name_str_for_test: str,
content_disposition: str = "", content_disposition: str = "",
download: bool = False, download: bool = False,
returned_attachment: bool = False,
) -> None: ) -> None:
self.login("hamlet") self.login("hamlet")
fp = StringIO("zulip!") fp = StringIO("zulip!")
@ -927,27 +951,74 @@ class FileUploadTest(UploadSerializeMixin, ZulipTestCase):
response["X-Accel-Redirect"], response["X-Accel-Redirect"],
"/internal/local/uploads/" + fp_path + "/" + name_str_for_test, "/internal/local/uploads/" + fp_path + "/" + name_str_for_test,
) )
if content_disposition != "": if returned_attachment:
self.assertIn("attachment;", response["Content-disposition"]) self.assertIn("attachment;", response["Content-disposition"])
self.assertIn(content_disposition, response["Content-disposition"])
else: else:
self.assertIn("inline;", response["Content-disposition"]) self.assertIn("inline;", response["Content-disposition"])
if content_disposition != "":
self.assertIn(content_disposition, response["Content-disposition"])
self.assertEqual(set(response["Cache-Control"].split(", ")), {"private", "immutable"}) self.assertEqual(set(response["Cache-Control"].split(", ")), {"private", "immutable"})
check_xsend_links("zulip.txt", "zulip.txt", 'filename="zulip.txt"') check_xsend_links(
"zulip.txt", "zulip.txt", 'filename="zulip.txt"', returned_attachment=True
)
check_xsend_links( check_xsend_links(
"áéБД.txt", "áéБД.txt",
"%C3%A1%C3%A9%D0%91%D0%94.txt", "%C3%A1%C3%A9%D0%91%D0%94.txt",
"filename*=utf-8''%C3%A1%C3%A9%D0%91%D0%94.txt", "filename*=utf-8''%C3%A1%C3%A9%D0%91%D0%94.txt",
returned_attachment=True,
) )
check_xsend_links("zulip.html", "zulip.html", 'filename="zulip.html"')
check_xsend_links("zulip.sh", "zulip.sh", 'filename="zulip.sh"')
check_xsend_links("zulip.jpeg", "zulip.jpeg")
check_xsend_links( check_xsend_links(
"zulip.jpeg", "zulip.jpeg", download=True, content_disposition='filename="zulip.jpeg"' "zulip.html",
"zulip.html",
'filename="zulip.html"',
returned_attachment=True,
) )
check_xsend_links("áéБД.pdf", "%C3%A1%C3%A9%D0%91%D0%94.pdf") check_xsend_links(
check_xsend_links("zulip", "zulip", 'filename="zulip"') "zulip.sh",
"zulip.sh",
'filename="zulip.sh"',
returned_attachment=True,
)
check_xsend_links(
"zulip.jpeg",
"zulip.jpeg",
'filename="zulip.jpeg"',
returned_attachment=False,
)
check_xsend_links(
"zulip.jpeg",
"zulip.jpeg",
download=True,
content_disposition='filename="zulip.jpeg"',
returned_attachment=True,
)
check_xsend_links(
"áéБД.pdf",
"%C3%A1%C3%A9%D0%91%D0%94.pdf",
"filename*=utf-8''%C3%A1%C3%A9%D0%91%D0%94.pdf",
returned_attachment=False,
)
check_xsend_links(
"some file (with spaces).png",
"some-file-with-spaces.png",
'filename="some file (with spaces).png"',
returned_attachment=False,
)
check_xsend_links(
"some file (with spaces).png",
"some-file-with-spaces.png",
'filename="some file (with spaces).png"',
download=True,
returned_attachment=True,
)
check_xsend_links(
".().",
"uploaded-file",
'filename=".()."',
returned_attachment=True,
)
check_xsend_links("zulip", "zulip", 'filename="zulip"', returned_attachment=True)
class AvatarTest(UploadSerializeMixin, ZulipTestCase): class AvatarTest(UploadSerializeMixin, ZulipTestCase):

View File

@ -45,12 +45,11 @@ from zerver.lib.upload import (
from zerver.lib.upload.base import INLINE_MIME_TYPES from zerver.lib.upload.base import INLINE_MIME_TYPES
from zerver.lib.upload.local import assert_is_local_storage_path from zerver.lib.upload.local import assert_is_local_storage_path
from zerver.lib.upload.s3 import get_signed_upload_url from zerver.lib.upload.s3 import get_signed_upload_url
from zerver.models import ImageAttachment, UserProfile from zerver.models import Attachment, ImageAttachment, UserProfile
from zerver.worker.thumbnail import ensure_thumbnails from zerver.worker.thumbnail import ensure_thumbnails
def patch_disposition_header(response: HttpResponse, url: str, is_attachment: bool) -> None: def patch_disposition_header(response: HttpResponse, filename: str, is_attachment: bool) -> None:
filename = os.path.basename(urlsplit(url).path)
content_disposition = content_disposition_header(is_attachment, filename) content_disposition = content_disposition_header(is_attachment, filename)
if content_disposition is not None: if content_disposition is not None:
@ -112,7 +111,10 @@ def serve_s3(request: HttpRequest, path_id: str, force_download: bool = False) -
def serve_local( def serve_local(
request: HttpRequest, path_id: str, force_download: bool = False request: HttpRequest,
path_id: str,
filename: str,
force_download: bool = False,
) -> HttpResponseBase: ) -> HttpResponseBase:
assert settings.LOCAL_FILES_DIR is not None assert settings.LOCAL_FILES_DIR is not None
local_path = os.path.join(settings.LOCAL_FILES_DIR, path_id) local_path = os.path.join(settings.LOCAL_FILES_DIR, path_id)
@ -120,7 +122,7 @@ def serve_local(
if not os.path.isfile(local_path): if not os.path.isfile(local_path):
return HttpResponseNotFound("<p>File not found</p>") return HttpResponseNotFound("<p>File not found</p>")
mimetype, encoding = guess_type(path_id) mimetype, encoding = guess_type(filename)
download = force_download or mimetype not in INLINE_MIME_TYPES download = force_download or mimetype not in INLINE_MIME_TYPES
if settings.DEVELOPMENT: if settings.DEVELOPMENT:
@ -130,6 +132,7 @@ def serve_local(
response: HttpResponseBase = FileResponse( response: HttpResponseBase = FileResponse(
open(local_path, "rb"), # noqa: SIM115 open(local_path, "rb"), # noqa: SIM115
as_attachment=download, as_attachment=download,
filename=filename,
) )
patch_cache_control(response, private=True, immutable=True) patch_cache_control(response, private=True, immutable=True)
return response return response
@ -143,7 +146,7 @@ def serve_local(
response = internal_nginx_redirect( response = internal_nginx_redirect(
quote(f"/internal/local/uploads/{path_id}"), content_type=mimetype quote(f"/internal/local/uploads/{path_id}"), content_type=mimetype
) )
patch_disposition_header(response, local_path, download) patch_disposition_header(response, filename, download)
patch_cache_control(response, private=True, immutable=True) patch_cache_control(response, private=True, immutable=True)
return response return response
@ -335,9 +338,14 @@ def serve_file(
# Update the path that we are fetching to be the thumbnail # Update the path that we are fetching to be the thumbnail
path_id = get_image_thumbnail_path(image_attachment, requested_format) path_id = get_image_thumbnail_path(image_attachment, requested_format)
served_filename = str(requested_format)
else:
served_filename = attachment.file_name
if settings.LOCAL_UPLOADS_DIR is not None: if settings.LOCAL_UPLOADS_DIR is not None:
return serve_local(request, path_id, force_download=force_download) return serve_local(
request, path_id, filename=served_filename, force_download=force_download
)
else: else:
return serve_s3(request, path_id, force_download=force_download) return serve_s3(request, path_id, force_download=force_download)
@ -374,9 +382,13 @@ def serve_file_unauthed_from_token(
raise JsonableError(_("Invalid token")) raise JsonableError(_("Invalid token"))
if path_id.split("/")[-1] != filename: if path_id.split("/")[-1] != filename:
raise JsonableError(_("Invalid filename")) raise JsonableError(_("Invalid filename"))
try:
attachment = Attachment.objects.get(path_id=path_id)
except Attachment.DoesNotExist:
raise JsonableError(_("Invalid token"))
if settings.LOCAL_UPLOADS_DIR is not None: if settings.LOCAL_UPLOADS_DIR is not None:
return serve_local(request, path_id) return serve_local(request, path_id, filename=attachment.file_name)
else: else:
return serve_s3(request, path_id) return serve_s3(request, path_id)

View File

@ -100,6 +100,7 @@ def ensure_thumbnails(image_attachment: ImageAttachment) -> int:
logger.info("Uploading %d bytes to %s", len(thumbnailed_bytes), thumbnail_path) logger.info("Uploading %d bytes to %s", len(thumbnailed_bytes), thumbnail_path)
upload_backend.upload_message_attachment( upload_backend.upload_message_attachment(
thumbnail_path, thumbnail_path,
str(thumbnail_format),
content_type, content_type,
thumbnailed_bytes, thumbnailed_bytes,
None, None,