tusd: Set metadata correctly in S3.

The Content-Type, Content-Disposition, StorageClass, and general
metadata are not set according to our patterns by tusd; copy the file
to itself to update those properties.
This commit is contained in:
Alex Vandiver 2024-09-25 19:39:49 +00:00 committed by Tim Abbott
parent 287850d08d
commit 638c579c56
4 changed files with 124 additions and 13 deletions

View File

@ -605,6 +605,7 @@ def use_s3_backend(method: Callable[P, None]) -> Callable[P, None]:
with (
mock.patch("zerver.lib.upload.upload_backend", backend),
mock.patch("zerver.worker.thumbnail.upload_backend", backend),
mock.patch("zerver.views.tusd.upload_backend", backend),
):
return method(*args, **kwargs)

View File

@ -87,6 +87,10 @@ def upload_content_to_s3(
extra_metadata: dict[str, str] | None = None,
filename: str | None = None,
) -> None:
# Note that these steps are also replicated in
# handle_upload_pre_finish_hook in zerver.views.tus, to update
# properties for files uploaded via TUS.
key = bucket.Object(path)
metadata: dict[str, str] = {}
if user_profile:

View File

@ -1,12 +1,15 @@
import os
import botocore
import orjson
from django.conf import settings
from django.test import override_settings
from zerver.lib.cache import cache_delete, get_realm_used_upload_space_cache_key
from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.test_helpers import create_s3_buckets, use_s3_backend
from zerver.lib.upload import sanitize_name, upload_backend, upload_message_attachment
from zerver.lib.upload.s3 import S3UploadBackend
from zerver.lib.utils import assert_is_not_none
from zerver.models import Attachment
from zerver.views.tusd import TusEvent, TusHook, TusHTTPRequest, TusUpload
@ -392,3 +395,77 @@ class TusdPreFinishTest(ZulipTestCase):
assert settings.LOCAL_FILES_DIR is not None
self.assertTrue(os.path.exists(os.path.join(settings.LOCAL_FILES_DIR, path_id)))
self.assertFalse(os.path.exists(os.path.join(settings.LOCAL_FILES_DIR, f"{path_id}.info")))
@use_s3_backend
@override_settings(S3_UPLOADS_STORAGE_CLASS="STANDARD_IA")
def test_s3_upload(self) -> None:
hamlet = self.example_user("hamlet")
bucket = create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET)[0]
upload_backend = S3UploadBackend()
filename = "some 例 example.png"
path_id = upload_backend.generate_message_upload_path(
str(hamlet.realm.id), sanitize_name(filename, strict=True)
)
self.assertTrue(path_id.endswith("/some-example.png"))
info = TusUpload(
id=path_id,
size=len("zulip!"),
offset=0,
size_is_deferred=False,
meta_data={
"filename": filename,
"filetype": "image/png",
"name": filename,
"type": "image/png",
},
is_final=False,
is_partial=False,
partial_uploads=None,
storage=None,
)
bucket.Object(path_id).put(
Body=b"zulip!",
ContentType="application/octet-stream",
Metadata={k: v.encode("ascii", "replace").decode() for k, v in info.meta_data.items()},
)
bucket.Object(f"{path_id}.info").put(
Body=info.model_dump_json().encode(),
)
# Post the hook saying the file is in place
self.login("hamlet")
result = self.client_post(
"/api/internal/tusd",
self.request(info).model_dump(),
content_type="application/json",
)
self.assertEqual(result.status_code, 200)
result_json = result.json()
self.assertEqual(result_json["HttpResponse"]["StatusCode"], 200)
self.assertEqual(
orjson.loads(result_json["HttpResponse"]["Body"]),
{"url": f"/user_uploads/{path_id}", "filename": filename},
)
self.assertEqual(
result_json["HttpResponse"]["Header"], {"Content-Type": "application/json"}
)
attachment = Attachment.objects.get(path_id=path_id)
self.assertEqual(attachment.size, len("zulip!"))
self.assertEqual(attachment.content_type, "image/png")
assert settings.LOCAL_FILES_DIR is None
response = bucket.Object(path_id).get()
self.assertEqual(response["ContentType"], "image/png")
self.assertEqual(
response["ContentDisposition"],
"inline; filename*=utf-8''some%20%E4%BE%8B%20example.png",
)
self.assertEqual(response["StorageClass"], "STANDARD_IA")
self.assertEqual(
response["Metadata"],
{"realm_id": str(hamlet.realm_id), "user_profile_id": str(hamlet.id)},
)
with self.assertRaises(botocore.exceptions.ClientError):
bucket.Object(f"{path_id}.info").get()

View File

@ -5,6 +5,7 @@ from django.conf import settings
from django.contrib.auth.models import AnonymousUser
from django.db import transaction
from django.http import HttpRequest, HttpResponse, HttpResponseNotFound
from django.utils.http import content_disposition_header
from django.utils.translation import gettext as _
from django.views.decorators.csrf import csrf_exempt
from pydantic import BaseModel, ConfigDict, Field
@ -24,6 +25,7 @@ from zerver.lib.upload import (
sanitize_name,
upload_backend,
)
from zerver.lib.upload.base import INLINE_MIME_TYPES
from zerver.models import UserProfile
@ -119,25 +121,52 @@ def handle_upload_pre_create_hook(
def handle_upload_pre_finish_hook(
request: HttpRequest, user_profile: UserProfile, data: TusUpload
) -> HttpResponse:
metadata = data.meta_data
filename = metadata.get("filename", "")
# We want to store as the filename a version that clients are
# likely to be able to accept via "Save as..."
if filename in {"", ".", ".."}:
filename = "uploaded-file"
content_type = metadata.get("filetype")
if not content_type:
content_type = guess_type(filename)[0]
if content_type is None:
content_type = "application/octet-stream"
# With an S3 backend, the filename we passed in pre_create's
# data.id has a randomly-generated "mutlipart-id" appended with a
# `+`. Our path_ids cannot contain `+`, so we strip any suffix
# starting with `+`.
path_id = data.id.partition("+")[0]
tus_metadata = data.meta_data
filename = tus_metadata.get("filename", "")
# We want to store as the filename a version that clients are
# likely to be able to accept via "Save as..."
if filename in {"", ".", ".."}:
filename = "uploaded-file"
content_type = tus_metadata.get("filetype")
if not content_type:
content_type = guess_type(filename)[0]
if content_type is None:
content_type = "application/octet-stream"
if settings.LOCAL_UPLOADS_DIR is None:
# We "copy" the file to itself to update the Content-Type,
# Content-Disposition, and storage class of the data. This
# parallels the work from upload_content_to_s3 in
# zerver.lib.uploads.s3
s3_metadata = {
"user_profile_id": str(user_profile.id),
"realm_id": str(user_profile.realm_id),
}
is_attachment = content_type not in INLINE_MIME_TYPES
content_disposition = content_disposition_header(is_attachment, filename) or "inline"
from zerver.lib.upload.s3 import S3UploadBackend
assert isinstance(upload_backend, S3UploadBackend)
key = upload_backend.uploads_bucket.Object(path_id)
key.copy_from(
ContentType=content_type,
ContentDisposition=content_disposition,
CopySource={"Bucket": settings.S3_AUTH_UPLOADS_BUCKET, "Key": path_id},
Metadata=s3_metadata,
MetadataDirective="REPLACE",
StorageClass=settings.S3_UPLOADS_STORAGE_CLASS,
)
# https://tus.github.io/tusd/storage-backends/overview/#storage-format
# tusd creates a .info file next to the upload, which we do not
# need to keep. Clean it up.