exports: Add a separate bucket for realm exports.

This allows finer-grained access control and auditing.  The links
generated also expire after one week, and the suggested configuration
is that the underlying data does as well.

Co-authored-by: Prakhar Pratyush <prakhar@zulip.com>
This commit is contained in:
Alex Vandiver 2022-04-06 15:46:13 -07:00 committed by Tim Abbott
parent c1e8ecd08f
commit e125ad823d
8 changed files with 259 additions and 29 deletions

View File

@ -241,3 +241,64 @@ lifecycle transition cost][s3-pricing].
[s3-storage-class]: https://aws.amazon.com/s3/storage-classes/
[s3-storage-class-constant]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html#AmazonS3-PutObject-request-header-StorageClass
[s3-pricing]: https://aws.amazon.com/s3/pricing/
## Data export bucket
The [data export process](export-and-import.md#data-export) process, when
[triggered from the
UI](https://zulip.com/help/export-your-organization), uploads the
completed export so it is available to download from the server; this
is also available [from the command
line](export-and-import.md#export-your-zulip-data) by passing
`--upload`. When the S3 backend is used, these uploads are done to S3.
By default, they are uploaded to the bucket with user avatars
(`S3_AVATAR_BUCKET`), because that bucket is world-readable, allowing
easy generation of links to download the export.
If you would like to store exports in a dedicated bucket, you can set
`S3_EXPORT_BUCKET` in your `/etc/zulip/settings.py`. This bucket
should also be configured like the uploads bucket, only allowing write
access to the Zulip account, as it will generate links which are valid
for 1 week at a time:
```json
{
"Version": "2012-10-17",
"Id": "Policy1468991802322",
"Statement": [
{
"Sid": "Stmt1468991795390",
"Effect": "Allow",
"Principal": {
"AWS": "ARN_PRINCIPAL_HERE"
},
"Action": [
"s3:GetObject",
"s3:DeleteObject",
"s3:PutObject"
],
"Resource": "arn:aws:s3:::BUCKET_NAME_HERE/*"
},
{
"Sid": "Stmt1468991795391",
"Effect": "Allow",
"Principal": {
"AWS": "ARN_PRINCIPAL_HERE"
},
"Action": "s3:ListBucket",
"Resource": "arn:aws:s3:::BUCKET_NAME_HERE"
}
]
}
```
You should copy existing exports to the new bucket. For instance,
using the [AWS CLI](https://aws.amazon.com/cli/)'s [`aws s3
sync`](https://docs.aws.amazon.com/cli/latest/reference/s3/sync.html),
if the old bucket was named `example-zulip-avatars` and the new export
bucket is named `example-zulip-exports`:
```
aws s3 sync s3://example-zulip-avatars/exports/ s3://example-zulip-exports/
```

View File

@ -2470,9 +2470,6 @@ def export_realm_wrapper(
if not upload:
return None
# We upload to the `avatars` bucket because that's world-readable
# without additional configuration. We'll likely want to change
# that in the future.
print("Uploading export tarball...")
public_url = zerver.lib.upload.upload_backend.upload_export_tarball(
realm, tarball_path, percent_callback=percent_callback

View File

@ -14,7 +14,7 @@ from botocore.response import StreamingBody
from django.conf import settings
from django.utils.http import content_disposition_header
from mypy_boto3_s3.client import S3Client
from mypy_boto3_s3.service_resource import Bucket
from mypy_boto3_s3.service_resource import Bucket, Object
from typing_extensions import override
from zerver.lib.partial import partial
@ -145,6 +145,10 @@ class S3UploadBackend(ZulipUploadBackend):
def __init__(self) -> None:
self.avatar_bucket = get_bucket(settings.S3_AVATAR_BUCKET)
self.uploads_bucket = get_bucket(settings.S3_AUTH_UPLOADS_BUCKET)
self.export_bucket: Bucket | None = None
if settings.S3_EXPORT_BUCKET:
self.export_bucket = get_bucket(settings.S3_EXPORT_BUCKET)
self.public_upload_url_base = self.construct_public_upload_url_base()
def delete_file_from_s3(self, path_id: str, bucket: Bucket) -> bool:
@ -428,19 +432,46 @@ class S3UploadBackend(ZulipUploadBackend):
@override
def get_export_tarball_url(self, realm: Realm, export_path: str) -> str:
export_path = export_path.removeprefix("/")
if self.export_bucket:
# Fix old data if the row was created when an export bucket was not in use.
export_path = export_path.removeprefix("exports/")
client = self.export_bucket.meta.client
return client.generate_presigned_url(
ClientMethod="get_object",
Params={
"Bucket": self.export_bucket.name,
"Key": export_path,
},
# Expires in one week, the longest allowed by AWS
ExpiresIn=60 * 60 * 24 * 7,
)
else:
if not export_path.startswith("exports/"):
export_path = "exports/" + export_path
client = self.avatar_bucket.meta.client
signed_url = client.generate_presigned_url(
ClientMethod="get_object",
Params={
"Bucket": self.avatar_bucket.name,
# export_path has a leading /
"Key": export_path.removeprefix("/"),
"Key": export_path,
},
ExpiresIn=0,
)
# Strip off the signing query parameters, since this URL is public
return urlsplit(signed_url)._replace(query="").geturl()
def export_object(self, tarball_path: str) -> Object:
if self.export_bucket:
return self.export_bucket.Object(
os.path.join(secrets.token_hex(16), os.path.basename(tarball_path))
)
else:
# We fall back to the avatar bucket, because it's world-readable.
return self.avatar_bucket.Object(
os.path.join("exports", secrets.token_hex(16), os.path.basename(tarball_path))
)
@override
def upload_export_tarball(
self,
@ -448,10 +479,7 @@ class S3UploadBackend(ZulipUploadBackend):
tarball_path: str,
percent_callback: Callable[[Any], None] | None = None,
) -> str:
# We use the avatar bucket, because it's world-readable.
key = self.avatar_bucket.Object(
os.path.join("exports", secrets.token_hex(16), os.path.basename(tarball_path))
)
key = self.export_object(tarball_path)
if percent_callback is None:
key.upload_file(Filename=tarball_path)
@ -464,6 +492,7 @@ class S3UploadBackend(ZulipUploadBackend):
def delete_export_tarball(self, export_path: str) -> str | None:
assert export_path.startswith("/")
path_id = export_path.removeprefix("/")
if self.delete_file_from_s3(path_id, self.avatar_bucket):
bucket = self.export_bucket or self.avatar_bucket
if self.delete_file_from_s3(path_id, bucket):
return export_path
return None

View File

@ -1,5 +1,6 @@
import os
from unittest.mock import patch
from urllib.parse import urlsplit
import botocore.exceptions
from django.conf import settings
@ -42,7 +43,7 @@ class RealmExportTest(ZulipTestCase):
def test_endpoint_s3(self) -> None:
admin = self.example_user("iago")
self.login_user(admin)
bucket = create_s3_buckets(settings.S3_AVATAR_BUCKET)[0]
bucket = create_s3_buckets(settings.S3_EXPORT_BUCKET)[0]
tarball_path = create_dummy_file("test-export.tar.gz")
# Test the export logic.
@ -82,9 +83,10 @@ class RealmExportTest(ZulipTestCase):
# Test that the export we have is the export we created.
export_dict = response_dict["exports"]
self.assertEqual(export_dict[0]["id"], audit_log_entry.id)
parsed_url = urlsplit(export_dict[0]["export_url"])
self.assertEqual(
export_dict[0]["export_url"],
"https://test-avatar-bucket.s3.amazonaws.com" + export_path,
parsed_url._replace(query="").geturl(),
"https://test-export-bucket.s3.amazonaws.com" + export_path,
)
self.assertEqual(export_dict[0]["acting_user_id"], admin.id)
self.assert_length(

View File

@ -2,11 +2,15 @@ import os
import re
from io import BytesIO, StringIO
from unittest.mock import patch
from urllib.parse import urlsplit
from urllib.parse import parse_qs, urlsplit
import boto3
import botocore.exceptions
import pyvips
from django.conf import settings
from django.test import override_settings
from moto.core.decorator import mock_aws
from mypy_boto3_s3.type_defs import CopySourceTypeDef
import zerver.lib.upload
from zerver.actions.create_user import do_create_user
@ -569,7 +573,7 @@ class S3Test(ZulipTestCase):
@use_s3_backend
def test_tarball_upload_and_deletion(self) -> None:
bucket = create_s3_buckets(settings.S3_AVATAR_BUCKET)[0]
bucket = create_s3_buckets(settings.S3_EXPORT_BUCKET)[0]
user_profile = self.example_user("iago")
self.assertTrue(user_profile.is_realm_admin)
@ -590,11 +594,18 @@ class S3Test(ZulipTestCase):
# Verify the percent_callback API works
self.assertEqual(total_bytes_transferred, 5)
result = re.search(re.compile(r"([0-9a-fA-F]{32})"), url)
parsed_url = urlsplit(url)
result = re.search(re.compile(r"/([0-9a-fA-F]{32})/"), url)
if result is not None:
hex_value = result.group(1)
expected_url = f"https://{bucket.name}.s3.amazonaws.com/exports/{hex_value}/{os.path.basename(tarball_path)}"
self.assertEqual(url, expected_url)
expected_url = (
f"https://{bucket.name}.s3.amazonaws.com/{hex_value}/{os.path.basename(tarball_path)}"
)
self.assertEqual(parsed_url._replace(query="").geturl(), expected_url)
params = parse_qs(parsed_url.query)
self.assertEqual(params["AWSAccessKeyId"], ["test-key"])
self.assertIn("Signature", params)
self.assertIn("Expires", params)
# Delete the tarball.
with self.assertLogs(level="WARNING") as warn_log:
@ -603,5 +614,132 @@ class S3Test(ZulipTestCase):
warn_log.output,
["WARNING:root:not_a_file does not exist. Its entry in the database will be removed."],
)
self.assertEqual(delete_export_tarball(parsed_url.path), parsed_url.path)
@override_settings(S3_EXPORT_BUCKET="")
@use_s3_backend
def test_tarball_upload_and_deletion_no_export_bucket(self) -> None:
bucket = create_s3_buckets(settings.S3_AVATAR_BUCKET)[0]
user_profile = self.example_user("iago")
self.assertTrue(user_profile.is_realm_admin)
tarball_path = os.path.join(settings.TEST_WORKER_DIR, "tarball.tar.gz")
with open(tarball_path, "w") as f:
f.write("dummy")
url = upload_export_tarball(user_profile.realm, tarball_path)
result = re.search(re.compile(r"/([0-9a-fA-F]{32})/"), url)
if result is not None:
hex_value = result.group(1)
expected_url = f"https://{bucket.name}.s3.amazonaws.com/exports/{hex_value}/{os.path.basename(tarball_path)}"
self.assertEqual(url, expected_url)
# Delete the tarball.
path_id = urlsplit(url).path
self.assertEqual(delete_export_tarball(path_id), path_id)
@mock_aws
def test_tarball_upload_avatar_bucket_download_export_bucket(self) -> None:
"""Test to verify that tarballs uploaded to avatar bucket can be later
accessed via export bucket when server is configured to use export bucket.
"""
user_profile = self.example_user("iago")
self.assertTrue(user_profile.is_realm_admin)
tarball_path = os.path.join(settings.TEST_WORKER_DIR, "tarball.tar.gz")
with open(tarball_path, "w") as f:
f.write("dummy")
# Upload export tarball to the avatar bucket.
with override_settings(S3_EXPORT_BUCKET=""):
backend = S3UploadBackend()
avatar_bucket = create_s3_buckets(settings.S3_AVATAR_BUCKET)[0]
with patch("zerver.lib.upload.upload_backend", backend):
public_url = upload_export_tarball(user_profile.realm, tarball_path)
avatar_object_key = urlsplit(public_url).path.removeprefix("/")
# Verify that old tarballs (uploaded to avatar bucket) can be accessed
# from export bucket, once server is configured to use a separate export bucket.
with override_settings(S3_EXPORT_BUCKET=settings.S3_EXPORT_BUCKET):
backend = S3UploadBackend()
export_bucket = create_s3_buckets(settings.S3_EXPORT_BUCKET)[0]
# Copy existing exports to the new bucket.
# This operation is performed as a part of configuring export bucket.
session = boto3.session.Session(settings.S3_KEY, settings.S3_SECRET_KEY)
s3 = session.resource("s3")
copy_source: CopySourceTypeDef = {
"Bucket": avatar_bucket.name,
"Key": avatar_object_key,
}
export_object_key = avatar_object_key.removeprefix("exports/")
s3.meta.client.copy(copy_source, export_bucket.name, export_object_key)
# Verify copy operation.
object = s3.Object(export_bucket.name, export_object_key)
content = object.get()["Body"].read()
self.assertEqual(content, b"dummy")
# Verify that tarball can be accessed using old 'avatar_object_key'.
url = backend.get_export_tarball_url(user_profile.realm, avatar_object_key)
parsed_url = urlsplit(url)
result = re.search(re.compile(r"/([0-9a-fA-F]{32})/"), url)
if result is not None:
hex_value = result.group(1)
expected_url = f"https://{export_bucket.name}.s3.amazonaws.com/{hex_value}/{os.path.basename(tarball_path)}"
self.assertEqual(parsed_url._replace(query="").geturl(), expected_url)
params = parse_qs(parsed_url.query)
self.assertEqual(params["AWSAccessKeyId"], ["test-key"])
self.assertIn("Signature", params)
self.assertIn("Expires", params)
@mock_aws
def test_tarball_upload_export_bucket_download_avatar_bucket(self) -> None:
"""Test to verify that tarballs uploaded to export bucket can be later
accessed via avatar bucket when server is configured to use ONLY avatar bucket.
"""
user_profile = self.example_user("iago")
self.assertTrue(user_profile.is_realm_admin)
tarball_path = os.path.join(settings.TEST_WORKER_DIR, "tarball.tar.gz")
with open(tarball_path, "w") as f:
f.write("dummy")
# Upload export tarball to the export bucket.
with override_settings(S3_EXPORT_BUCKET=settings.S3_EXPORT_BUCKET):
backend = S3UploadBackend()
export_bucket = create_s3_buckets(settings.S3_EXPORT_BUCKET)[0]
with patch("zerver.lib.upload.upload_backend", backend):
public_url = upload_export_tarball(user_profile.realm, tarball_path)
export_object_key = urlsplit(public_url).path.removeprefix("/")
# Verify that old tarballs (uploaded to export bucket) can be accessed
# from avatar bucket, once server is configured to use ONLY avatar bucket.
with override_settings(S3_EXPORT_BUCKET=""):
backend = S3UploadBackend()
avatar_bucket = create_s3_buckets(settings.S3_AVATAR_BUCKET)[0]
# Copy existing exports to the avatar bucket.
# This operation is performed as a part of the changes to use ONLY avatar bucket.
session = boto3.session.Session(settings.S3_KEY, settings.S3_SECRET_KEY)
s3 = session.resource("s3")
copy_source: CopySourceTypeDef = {
"Bucket": export_bucket.name,
"Key": export_object_key,
}
avatar_object_key = "exports/" + export_object_key
s3.meta.client.copy(copy_source, avatar_bucket.name, avatar_object_key)
# Verify copy operation.
object = s3.Object(avatar_bucket.name, avatar_object_key)
content = object.get()["Body"].read()
self.assertEqual(content, b"dummy")
# Verify that tarball can still be accessed using old 'export_object_key'.
url = backend.get_export_tarball_url(user_profile.realm, export_object_key)
result = re.search(re.compile(r"/([0-9a-fA-F]{32})/"), url)
if result is not None:
hex_value = result.group(1)
expected_url = f"https://{avatar_bucket.name}.s3.amazonaws.com/exports/{hex_value}/{os.path.basename(tarball_path)}"
self.assertEqual(url, expected_url)

View File

@ -151,6 +151,7 @@ DEFAULT_AVATAR_URI: str | None = None
DEFAULT_LOGO_URI: str | None = None
S3_AVATAR_BUCKET = ""
S3_AUTH_UPLOADS_BUCKET = ""
S3_EXPORT_BUCKET = ""
S3_REGION: str | None = None
S3_ENDPOINT_URL: str | None = None
S3_ADDRESSING_STYLE: Literal["auto", "virtual", "path"] = "auto"

View File

@ -793,6 +793,7 @@ ENABLE_FILE_LINKS = False
LOCAL_UPLOADS_DIR = "/home/zulip/uploads"
# S3_AUTH_UPLOADS_BUCKET = ""
# S3_AVATAR_BUCKET = ""
# S3_EXPORT_BUCKET = ""
# S3_REGION = None
# S3_ENDPOINT_URL = None
# S3_AVATAR_PUBLIC_URL_PREFIX = None

View File

@ -145,6 +145,7 @@ S3_KEY = "test-key"
S3_SECRET_KEY = "test-secret-key"
S3_AUTH_UPLOADS_BUCKET = "test-authed-bucket"
S3_AVATAR_BUCKET = "test-avatar-bucket"
S3_EXPORT_BUCKET = "test-export-bucket"
INLINE_URL_EMBED_PREVIEW = False