diff --git a/zerver/lib/test_helpers.py b/zerver/lib/test_helpers.py index f7b571aa55..d5b5f375a5 100644 --- a/zerver/lib/test_helpers.py +++ b/zerver/lib/test_helpers.py @@ -605,6 +605,7 @@ def use_s3_backend(method: Callable[P, None]) -> Callable[P, None]: with ( mock.patch("zerver.lib.upload.upload_backend", backend), mock.patch("zerver.worker.thumbnail.upload_backend", backend), + mock.patch("zerver.views.tusd.upload_backend", backend), ): return method(*args, **kwargs) diff --git a/zerver/lib/upload/s3.py b/zerver/lib/upload/s3.py index 4f12251d69..29407cff80 100644 --- a/zerver/lib/upload/s3.py +++ b/zerver/lib/upload/s3.py @@ -87,6 +87,10 @@ def upload_content_to_s3( extra_metadata: dict[str, str] | None = None, filename: str | None = None, ) -> None: + # Note that these steps are also replicated in + # handle_upload_pre_finish_hook in zerver.views.tus, to update + # properties for files uploaded via TUS. + key = bucket.Object(path) metadata: dict[str, str] = {} if user_profile: diff --git a/zerver/tests/test_tusd.py b/zerver/tests/test_tusd.py index f9c9ea758b..6b90f7b681 100644 --- a/zerver/tests/test_tusd.py +++ b/zerver/tests/test_tusd.py @@ -1,12 +1,15 @@ import os +import botocore import orjson from django.conf import settings from django.test import override_settings from zerver.lib.cache import cache_delete, get_realm_used_upload_space_cache_key from zerver.lib.test_classes import ZulipTestCase +from zerver.lib.test_helpers import create_s3_buckets, use_s3_backend from zerver.lib.upload import sanitize_name, upload_backend, upload_message_attachment +from zerver.lib.upload.s3 import S3UploadBackend from zerver.lib.utils import assert_is_not_none from zerver.models import Attachment from zerver.views.tusd import TusEvent, TusHook, TusHTTPRequest, TusUpload @@ -392,3 +395,77 @@ class TusdPreFinishTest(ZulipTestCase): assert settings.LOCAL_FILES_DIR is not None self.assertTrue(os.path.exists(os.path.join(settings.LOCAL_FILES_DIR, path_id))) self.assertFalse(os.path.exists(os.path.join(settings.LOCAL_FILES_DIR, f"{path_id}.info"))) + + @use_s3_backend + @override_settings(S3_UPLOADS_STORAGE_CLASS="STANDARD_IA") + def test_s3_upload(self) -> None: + hamlet = self.example_user("hamlet") + bucket = create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET)[0] + + upload_backend = S3UploadBackend() + filename = "some δΎ‹ example.png" + path_id = upload_backend.generate_message_upload_path( + str(hamlet.realm.id), sanitize_name(filename, strict=True) + ) + self.assertTrue(path_id.endswith("/some-example.png")) + info = TusUpload( + id=path_id, + size=len("zulip!"), + offset=0, + size_is_deferred=False, + meta_data={ + "filename": filename, + "filetype": "image/png", + "name": filename, + "type": "image/png", + }, + is_final=False, + is_partial=False, + partial_uploads=None, + storage=None, + ) + bucket.Object(path_id).put( + Body=b"zulip!", + ContentType="application/octet-stream", + Metadata={k: v.encode("ascii", "replace").decode() for k, v in info.meta_data.items()}, + ) + bucket.Object(f"{path_id}.info").put( + Body=info.model_dump_json().encode(), + ) + + # Post the hook saying the file is in place + self.login("hamlet") + result = self.client_post( + "/api/internal/tusd", + self.request(info).model_dump(), + content_type="application/json", + ) + self.assertEqual(result.status_code, 200) + result_json = result.json() + self.assertEqual(result_json["HttpResponse"]["StatusCode"], 200) + self.assertEqual( + orjson.loads(result_json["HttpResponse"]["Body"]), + {"url": f"/user_uploads/{path_id}", "filename": filename}, + ) + self.assertEqual( + result_json["HttpResponse"]["Header"], {"Content-Type": "application/json"} + ) + + attachment = Attachment.objects.get(path_id=path_id) + self.assertEqual(attachment.size, len("zulip!")) + self.assertEqual(attachment.content_type, "image/png") + + assert settings.LOCAL_FILES_DIR is None + response = bucket.Object(path_id).get() + self.assertEqual(response["ContentType"], "image/png") + self.assertEqual( + response["ContentDisposition"], + "inline; filename*=utf-8''some%20%E4%BE%8B%20example.png", + ) + self.assertEqual(response["StorageClass"], "STANDARD_IA") + self.assertEqual( + response["Metadata"], + {"realm_id": str(hamlet.realm_id), "user_profile_id": str(hamlet.id)}, + ) + with self.assertRaises(botocore.exceptions.ClientError): + bucket.Object(f"{path_id}.info").get() diff --git a/zerver/views/tusd.py b/zerver/views/tusd.py index d1ca31e6bd..c956fcb9a0 100644 --- a/zerver/views/tusd.py +++ b/zerver/views/tusd.py @@ -5,6 +5,7 @@ from django.conf import settings from django.contrib.auth.models import AnonymousUser from django.db import transaction from django.http import HttpRequest, HttpResponse, HttpResponseNotFound +from django.utils.http import content_disposition_header from django.utils.translation import gettext as _ from django.views.decorators.csrf import csrf_exempt from pydantic import BaseModel, ConfigDict, Field @@ -24,6 +25,7 @@ from zerver.lib.upload import ( sanitize_name, upload_backend, ) +from zerver.lib.upload.base import INLINE_MIME_TYPES from zerver.models import UserProfile @@ -119,25 +121,52 @@ def handle_upload_pre_create_hook( def handle_upload_pre_finish_hook( request: HttpRequest, user_profile: UserProfile, data: TusUpload ) -> HttpResponse: - metadata = data.meta_data - filename = metadata.get("filename", "") - # We want to store as the filename a version that clients are - # likely to be able to accept via "Save as..." - if filename in {"", ".", ".."}: - filename = "uploaded-file" - - content_type = metadata.get("filetype") - if not content_type: - content_type = guess_type(filename)[0] - if content_type is None: - content_type = "application/octet-stream" - # With an S3 backend, the filename we passed in pre_create's # data.id has a randomly-generated "mutlipart-id" appended with a # `+`. Our path_ids cannot contain `+`, so we strip any suffix # starting with `+`. path_id = data.id.partition("+")[0] + tus_metadata = data.meta_data + filename = tus_metadata.get("filename", "") + + # We want to store as the filename a version that clients are + # likely to be able to accept via "Save as..." + if filename in {"", ".", ".."}: + filename = "uploaded-file" + + content_type = tus_metadata.get("filetype") + if not content_type: + content_type = guess_type(filename)[0] + if content_type is None: + content_type = "application/octet-stream" + + if settings.LOCAL_UPLOADS_DIR is None: + # We "copy" the file to itself to update the Content-Type, + # Content-Disposition, and storage class of the data. This + # parallels the work from upload_content_to_s3 in + # zerver.lib.uploads.s3 + s3_metadata = { + "user_profile_id": str(user_profile.id), + "realm_id": str(user_profile.realm_id), + } + + is_attachment = content_type not in INLINE_MIME_TYPES + content_disposition = content_disposition_header(is_attachment, filename) or "inline" + + from zerver.lib.upload.s3 import S3UploadBackend + + assert isinstance(upload_backend, S3UploadBackend) + key = upload_backend.uploads_bucket.Object(path_id) + key.copy_from( + ContentType=content_type, + ContentDisposition=content_disposition, + CopySource={"Bucket": settings.S3_AUTH_UPLOADS_BUCKET, "Key": path_id}, + Metadata=s3_metadata, + MetadataDirective="REPLACE", + StorageClass=settings.S3_UPLOADS_STORAGE_CLASS, + ) + # https://tus.github.io/tusd/storage-backends/overview/#storage-format # tusd creates a .info file next to the upload, which we do not # need to keep. Clean it up.