tusd: Set metadata correctly in S3.

The Content-Type, Content-Disposition, StorageClass, and general
metadata are not set according to our patterns by tusd; copy the file
to itself to update those properties.
This commit is contained in:
Alex Vandiver 2024-09-25 19:39:49 +00:00 committed by Tim Abbott
parent 287850d08d
commit 638c579c56
4 changed files with 124 additions and 13 deletions

View File

@ -605,6 +605,7 @@ def use_s3_backend(method: Callable[P, None]) -> Callable[P, None]:
with ( with (
mock.patch("zerver.lib.upload.upload_backend", backend), mock.patch("zerver.lib.upload.upload_backend", backend),
mock.patch("zerver.worker.thumbnail.upload_backend", backend), mock.patch("zerver.worker.thumbnail.upload_backend", backend),
mock.patch("zerver.views.tusd.upload_backend", backend),
): ):
return method(*args, **kwargs) return method(*args, **kwargs)

View File

@ -87,6 +87,10 @@ def upload_content_to_s3(
extra_metadata: dict[str, str] | None = None, extra_metadata: dict[str, str] | None = None,
filename: str | None = None, filename: str | None = None,
) -> None: ) -> None:
# Note that these steps are also replicated in
# handle_upload_pre_finish_hook in zerver.views.tus, to update
# properties for files uploaded via TUS.
key = bucket.Object(path) key = bucket.Object(path)
metadata: dict[str, str] = {} metadata: dict[str, str] = {}
if user_profile: if user_profile:

View File

@ -1,12 +1,15 @@
import os import os
import botocore
import orjson import orjson
from django.conf import settings from django.conf import settings
from django.test import override_settings from django.test import override_settings
from zerver.lib.cache import cache_delete, get_realm_used_upload_space_cache_key from zerver.lib.cache import cache_delete, get_realm_used_upload_space_cache_key
from zerver.lib.test_classes import ZulipTestCase from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.test_helpers import create_s3_buckets, use_s3_backend
from zerver.lib.upload import sanitize_name, upload_backend, upload_message_attachment from zerver.lib.upload import sanitize_name, upload_backend, upload_message_attachment
from zerver.lib.upload.s3 import S3UploadBackend
from zerver.lib.utils import assert_is_not_none from zerver.lib.utils import assert_is_not_none
from zerver.models import Attachment from zerver.models import Attachment
from zerver.views.tusd import TusEvent, TusHook, TusHTTPRequest, TusUpload from zerver.views.tusd import TusEvent, TusHook, TusHTTPRequest, TusUpload
@ -392,3 +395,77 @@ class TusdPreFinishTest(ZulipTestCase):
assert settings.LOCAL_FILES_DIR is not None assert settings.LOCAL_FILES_DIR is not None
self.assertTrue(os.path.exists(os.path.join(settings.LOCAL_FILES_DIR, path_id))) self.assertTrue(os.path.exists(os.path.join(settings.LOCAL_FILES_DIR, path_id)))
self.assertFalse(os.path.exists(os.path.join(settings.LOCAL_FILES_DIR, f"{path_id}.info"))) self.assertFalse(os.path.exists(os.path.join(settings.LOCAL_FILES_DIR, f"{path_id}.info")))
@use_s3_backend
@override_settings(S3_UPLOADS_STORAGE_CLASS="STANDARD_IA")
def test_s3_upload(self) -> None:
hamlet = self.example_user("hamlet")
bucket = create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET)[0]
upload_backend = S3UploadBackend()
filename = "some 例 example.png"
path_id = upload_backend.generate_message_upload_path(
str(hamlet.realm.id), sanitize_name(filename, strict=True)
)
self.assertTrue(path_id.endswith("/some-example.png"))
info = TusUpload(
id=path_id,
size=len("zulip!"),
offset=0,
size_is_deferred=False,
meta_data={
"filename": filename,
"filetype": "image/png",
"name": filename,
"type": "image/png",
},
is_final=False,
is_partial=False,
partial_uploads=None,
storage=None,
)
bucket.Object(path_id).put(
Body=b"zulip!",
ContentType="application/octet-stream",
Metadata={k: v.encode("ascii", "replace").decode() for k, v in info.meta_data.items()},
)
bucket.Object(f"{path_id}.info").put(
Body=info.model_dump_json().encode(),
)
# Post the hook saying the file is in place
self.login("hamlet")
result = self.client_post(
"/api/internal/tusd",
self.request(info).model_dump(),
content_type="application/json",
)
self.assertEqual(result.status_code, 200)
result_json = result.json()
self.assertEqual(result_json["HttpResponse"]["StatusCode"], 200)
self.assertEqual(
orjson.loads(result_json["HttpResponse"]["Body"]),
{"url": f"/user_uploads/{path_id}", "filename": filename},
)
self.assertEqual(
result_json["HttpResponse"]["Header"], {"Content-Type": "application/json"}
)
attachment = Attachment.objects.get(path_id=path_id)
self.assertEqual(attachment.size, len("zulip!"))
self.assertEqual(attachment.content_type, "image/png")
assert settings.LOCAL_FILES_DIR is None
response = bucket.Object(path_id).get()
self.assertEqual(response["ContentType"], "image/png")
self.assertEqual(
response["ContentDisposition"],
"inline; filename*=utf-8''some%20%E4%BE%8B%20example.png",
)
self.assertEqual(response["StorageClass"], "STANDARD_IA")
self.assertEqual(
response["Metadata"],
{"realm_id": str(hamlet.realm_id), "user_profile_id": str(hamlet.id)},
)
with self.assertRaises(botocore.exceptions.ClientError):
bucket.Object(f"{path_id}.info").get()

View File

@ -5,6 +5,7 @@ from django.conf import settings
from django.contrib.auth.models import AnonymousUser from django.contrib.auth.models import AnonymousUser
from django.db import transaction from django.db import transaction
from django.http import HttpRequest, HttpResponse, HttpResponseNotFound from django.http import HttpRequest, HttpResponse, HttpResponseNotFound
from django.utils.http import content_disposition_header
from django.utils.translation import gettext as _ from django.utils.translation import gettext as _
from django.views.decorators.csrf import csrf_exempt from django.views.decorators.csrf import csrf_exempt
from pydantic import BaseModel, ConfigDict, Field from pydantic import BaseModel, ConfigDict, Field
@ -24,6 +25,7 @@ from zerver.lib.upload import (
sanitize_name, sanitize_name,
upload_backend, upload_backend,
) )
from zerver.lib.upload.base import INLINE_MIME_TYPES
from zerver.models import UserProfile from zerver.models import UserProfile
@ -119,25 +121,52 @@ def handle_upload_pre_create_hook(
def handle_upload_pre_finish_hook( def handle_upload_pre_finish_hook(
request: HttpRequest, user_profile: UserProfile, data: TusUpload request: HttpRequest, user_profile: UserProfile, data: TusUpload
) -> HttpResponse: ) -> HttpResponse:
metadata = data.meta_data
filename = metadata.get("filename", "")
# We want to store as the filename a version that clients are
# likely to be able to accept via "Save as..."
if filename in {"", ".", ".."}:
filename = "uploaded-file"
content_type = metadata.get("filetype")
if not content_type:
content_type = guess_type(filename)[0]
if content_type is None:
content_type = "application/octet-stream"
# With an S3 backend, the filename we passed in pre_create's # With an S3 backend, the filename we passed in pre_create's
# data.id has a randomly-generated "mutlipart-id" appended with a # data.id has a randomly-generated "mutlipart-id" appended with a
# `+`. Our path_ids cannot contain `+`, so we strip any suffix # `+`. Our path_ids cannot contain `+`, so we strip any suffix
# starting with `+`. # starting with `+`.
path_id = data.id.partition("+")[0] path_id = data.id.partition("+")[0]
tus_metadata = data.meta_data
filename = tus_metadata.get("filename", "")
# We want to store as the filename a version that clients are
# likely to be able to accept via "Save as..."
if filename in {"", ".", ".."}:
filename = "uploaded-file"
content_type = tus_metadata.get("filetype")
if not content_type:
content_type = guess_type(filename)[0]
if content_type is None:
content_type = "application/octet-stream"
if settings.LOCAL_UPLOADS_DIR is None:
# We "copy" the file to itself to update the Content-Type,
# Content-Disposition, and storage class of the data. This
# parallels the work from upload_content_to_s3 in
# zerver.lib.uploads.s3
s3_metadata = {
"user_profile_id": str(user_profile.id),
"realm_id": str(user_profile.realm_id),
}
is_attachment = content_type not in INLINE_MIME_TYPES
content_disposition = content_disposition_header(is_attachment, filename) or "inline"
from zerver.lib.upload.s3 import S3UploadBackend
assert isinstance(upload_backend, S3UploadBackend)
key = upload_backend.uploads_bucket.Object(path_id)
key.copy_from(
ContentType=content_type,
ContentDisposition=content_disposition,
CopySource={"Bucket": settings.S3_AUTH_UPLOADS_BUCKET, "Key": path_id},
Metadata=s3_metadata,
MetadataDirective="REPLACE",
StorageClass=settings.S3_UPLOADS_STORAGE_CLASS,
)
# https://tus.github.io/tusd/storage-backends/overview/#storage-format # https://tus.github.io/tusd/storage-backends/overview/#storage-format
# tusd creates a .info file next to the upload, which we do not # tusd creates a .info file next to the upload, which we do not
# need to keep. Clean it up. # need to keep. Clean it up.