exports: Add a separate bucket for realm exports.

This allows finer-grained access control and auditing.  The links
generated also expire after one week, and the suggested configuration
is that the underlying data does as well.

Co-authored-by: Prakhar Pratyush <prakhar@zulip.com>
This commit is contained in:
Alex Vandiver 2022-04-06 15:46:13 -07:00 committed by Tim Abbott
parent c1e8ecd08f
commit e125ad823d
8 changed files with 259 additions and 29 deletions

View File

@ -241,3 +241,64 @@ lifecycle transition cost][s3-pricing].
[s3-storage-class]: https://aws.amazon.com/s3/storage-classes/ [s3-storage-class]: https://aws.amazon.com/s3/storage-classes/
[s3-storage-class-constant]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html#AmazonS3-PutObject-request-header-StorageClass [s3-storage-class-constant]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html#AmazonS3-PutObject-request-header-StorageClass
[s3-pricing]: https://aws.amazon.com/s3/pricing/ [s3-pricing]: https://aws.amazon.com/s3/pricing/
## Data export bucket
The [data export process](export-and-import.md#data-export) process, when
[triggered from the
UI](https://zulip.com/help/export-your-organization), uploads the
completed export so it is available to download from the server; this
is also available [from the command
line](export-and-import.md#export-your-zulip-data) by passing
`--upload`. When the S3 backend is used, these uploads are done to S3.
By default, they are uploaded to the bucket with user avatars
(`S3_AVATAR_BUCKET`), because that bucket is world-readable, allowing
easy generation of links to download the export.
If you would like to store exports in a dedicated bucket, you can set
`S3_EXPORT_BUCKET` in your `/etc/zulip/settings.py`. This bucket
should also be configured like the uploads bucket, only allowing write
access to the Zulip account, as it will generate links which are valid
for 1 week at a time:
```json
{
"Version": "2012-10-17",
"Id": "Policy1468991802322",
"Statement": [
{
"Sid": "Stmt1468991795390",
"Effect": "Allow",
"Principal": {
"AWS": "ARN_PRINCIPAL_HERE"
},
"Action": [
"s3:GetObject",
"s3:DeleteObject",
"s3:PutObject"
],
"Resource": "arn:aws:s3:::BUCKET_NAME_HERE/*"
},
{
"Sid": "Stmt1468991795391",
"Effect": "Allow",
"Principal": {
"AWS": "ARN_PRINCIPAL_HERE"
},
"Action": "s3:ListBucket",
"Resource": "arn:aws:s3:::BUCKET_NAME_HERE"
}
]
}
```
You should copy existing exports to the new bucket. For instance,
using the [AWS CLI](https://aws.amazon.com/cli/)'s [`aws s3
sync`](https://docs.aws.amazon.com/cli/latest/reference/s3/sync.html),
if the old bucket was named `example-zulip-avatars` and the new export
bucket is named `example-zulip-exports`:
```
aws s3 sync s3://example-zulip-avatars/exports/ s3://example-zulip-exports/
```

View File

@ -2470,9 +2470,6 @@ def export_realm_wrapper(
if not upload: if not upload:
return None return None
# We upload to the `avatars` bucket because that's world-readable
# without additional configuration. We'll likely want to change
# that in the future.
print("Uploading export tarball...") print("Uploading export tarball...")
public_url = zerver.lib.upload.upload_backend.upload_export_tarball( public_url = zerver.lib.upload.upload_backend.upload_export_tarball(
realm, tarball_path, percent_callback=percent_callback realm, tarball_path, percent_callback=percent_callback

View File

@ -14,7 +14,7 @@ from botocore.response import StreamingBody
from django.conf import settings from django.conf import settings
from django.utils.http import content_disposition_header from django.utils.http import content_disposition_header
from mypy_boto3_s3.client import S3Client from mypy_boto3_s3.client import S3Client
from mypy_boto3_s3.service_resource import Bucket from mypy_boto3_s3.service_resource import Bucket, Object
from typing_extensions import override from typing_extensions import override
from zerver.lib.partial import partial from zerver.lib.partial import partial
@ -145,6 +145,10 @@ class S3UploadBackend(ZulipUploadBackend):
def __init__(self) -> None: def __init__(self) -> None:
self.avatar_bucket = get_bucket(settings.S3_AVATAR_BUCKET) self.avatar_bucket = get_bucket(settings.S3_AVATAR_BUCKET)
self.uploads_bucket = get_bucket(settings.S3_AUTH_UPLOADS_BUCKET) self.uploads_bucket = get_bucket(settings.S3_AUTH_UPLOADS_BUCKET)
self.export_bucket: Bucket | None = None
if settings.S3_EXPORT_BUCKET:
self.export_bucket = get_bucket(settings.S3_EXPORT_BUCKET)
self.public_upload_url_base = self.construct_public_upload_url_base() self.public_upload_url_base = self.construct_public_upload_url_base()
def delete_file_from_s3(self, path_id: str, bucket: Bucket) -> bool: def delete_file_from_s3(self, path_id: str, bucket: Bucket) -> bool:
@ -428,19 +432,46 @@ class S3UploadBackend(ZulipUploadBackend):
@override @override
def get_export_tarball_url(self, realm: Realm, export_path: str) -> str: def get_export_tarball_url(self, realm: Realm, export_path: str) -> str:
export_path = export_path.removeprefix("/")
if self.export_bucket:
# Fix old data if the row was created when an export bucket was not in use.
export_path = export_path.removeprefix("exports/")
client = self.export_bucket.meta.client
return client.generate_presigned_url(
ClientMethod="get_object",
Params={
"Bucket": self.export_bucket.name,
"Key": export_path,
},
# Expires in one week, the longest allowed by AWS
ExpiresIn=60 * 60 * 24 * 7,
)
else:
if not export_path.startswith("exports/"):
export_path = "exports/" + export_path
client = self.avatar_bucket.meta.client client = self.avatar_bucket.meta.client
signed_url = client.generate_presigned_url( signed_url = client.generate_presigned_url(
ClientMethod="get_object", ClientMethod="get_object",
Params={ Params={
"Bucket": self.avatar_bucket.name, "Bucket": self.avatar_bucket.name,
# export_path has a leading / "Key": export_path,
"Key": export_path.removeprefix("/"),
}, },
ExpiresIn=0, ExpiresIn=0,
) )
# Strip off the signing query parameters, since this URL is public # Strip off the signing query parameters, since this URL is public
return urlsplit(signed_url)._replace(query="").geturl() return urlsplit(signed_url)._replace(query="").geturl()
def export_object(self, tarball_path: str) -> Object:
if self.export_bucket:
return self.export_bucket.Object(
os.path.join(secrets.token_hex(16), os.path.basename(tarball_path))
)
else:
# We fall back to the avatar bucket, because it's world-readable.
return self.avatar_bucket.Object(
os.path.join("exports", secrets.token_hex(16), os.path.basename(tarball_path))
)
@override @override
def upload_export_tarball( def upload_export_tarball(
self, self,
@ -448,10 +479,7 @@ class S3UploadBackend(ZulipUploadBackend):
tarball_path: str, tarball_path: str,
percent_callback: Callable[[Any], None] | None = None, percent_callback: Callable[[Any], None] | None = None,
) -> str: ) -> str:
# We use the avatar bucket, because it's world-readable. key = self.export_object(tarball_path)
key = self.avatar_bucket.Object(
os.path.join("exports", secrets.token_hex(16), os.path.basename(tarball_path))
)
if percent_callback is None: if percent_callback is None:
key.upload_file(Filename=tarball_path) key.upload_file(Filename=tarball_path)
@ -464,6 +492,7 @@ class S3UploadBackend(ZulipUploadBackend):
def delete_export_tarball(self, export_path: str) -> str | None: def delete_export_tarball(self, export_path: str) -> str | None:
assert export_path.startswith("/") assert export_path.startswith("/")
path_id = export_path.removeprefix("/") path_id = export_path.removeprefix("/")
if self.delete_file_from_s3(path_id, self.avatar_bucket): bucket = self.export_bucket or self.avatar_bucket
if self.delete_file_from_s3(path_id, bucket):
return export_path return export_path
return None return None

View File

@ -1,5 +1,6 @@
import os import os
from unittest.mock import patch from unittest.mock import patch
from urllib.parse import urlsplit
import botocore.exceptions import botocore.exceptions
from django.conf import settings from django.conf import settings
@ -42,7 +43,7 @@ class RealmExportTest(ZulipTestCase):
def test_endpoint_s3(self) -> None: def test_endpoint_s3(self) -> None:
admin = self.example_user("iago") admin = self.example_user("iago")
self.login_user(admin) self.login_user(admin)
bucket = create_s3_buckets(settings.S3_AVATAR_BUCKET)[0] bucket = create_s3_buckets(settings.S3_EXPORT_BUCKET)[0]
tarball_path = create_dummy_file("test-export.tar.gz") tarball_path = create_dummy_file("test-export.tar.gz")
# Test the export logic. # Test the export logic.
@ -82,9 +83,10 @@ class RealmExportTest(ZulipTestCase):
# Test that the export we have is the export we created. # Test that the export we have is the export we created.
export_dict = response_dict["exports"] export_dict = response_dict["exports"]
self.assertEqual(export_dict[0]["id"], audit_log_entry.id) self.assertEqual(export_dict[0]["id"], audit_log_entry.id)
parsed_url = urlsplit(export_dict[0]["export_url"])
self.assertEqual( self.assertEqual(
export_dict[0]["export_url"], parsed_url._replace(query="").geturl(),
"https://test-avatar-bucket.s3.amazonaws.com" + export_path, "https://test-export-bucket.s3.amazonaws.com" + export_path,
) )
self.assertEqual(export_dict[0]["acting_user_id"], admin.id) self.assertEqual(export_dict[0]["acting_user_id"], admin.id)
self.assert_length( self.assert_length(

View File

@ -2,11 +2,15 @@ import os
import re import re
from io import BytesIO, StringIO from io import BytesIO, StringIO
from unittest.mock import patch from unittest.mock import patch
from urllib.parse import urlsplit from urllib.parse import parse_qs, urlsplit
import boto3
import botocore.exceptions import botocore.exceptions
import pyvips import pyvips
from django.conf import settings from django.conf import settings
from django.test import override_settings
from moto.core.decorator import mock_aws
from mypy_boto3_s3.type_defs import CopySourceTypeDef
import zerver.lib.upload import zerver.lib.upload
from zerver.actions.create_user import do_create_user from zerver.actions.create_user import do_create_user
@ -569,7 +573,7 @@ class S3Test(ZulipTestCase):
@use_s3_backend @use_s3_backend
def test_tarball_upload_and_deletion(self) -> None: def test_tarball_upload_and_deletion(self) -> None:
bucket = create_s3_buckets(settings.S3_AVATAR_BUCKET)[0] bucket = create_s3_buckets(settings.S3_EXPORT_BUCKET)[0]
user_profile = self.example_user("iago") user_profile = self.example_user("iago")
self.assertTrue(user_profile.is_realm_admin) self.assertTrue(user_profile.is_realm_admin)
@ -590,11 +594,18 @@ class S3Test(ZulipTestCase):
# Verify the percent_callback API works # Verify the percent_callback API works
self.assertEqual(total_bytes_transferred, 5) self.assertEqual(total_bytes_transferred, 5)
result = re.search(re.compile(r"([0-9a-fA-F]{32})"), url) parsed_url = urlsplit(url)
result = re.search(re.compile(r"/([0-9a-fA-F]{32})/"), url)
if result is not None: if result is not None:
hex_value = result.group(1) hex_value = result.group(1)
expected_url = f"https://{bucket.name}.s3.amazonaws.com/exports/{hex_value}/{os.path.basename(tarball_path)}" expected_url = (
self.assertEqual(url, expected_url) f"https://{bucket.name}.s3.amazonaws.com/{hex_value}/{os.path.basename(tarball_path)}"
)
self.assertEqual(parsed_url._replace(query="").geturl(), expected_url)
params = parse_qs(parsed_url.query)
self.assertEqual(params["AWSAccessKeyId"], ["test-key"])
self.assertIn("Signature", params)
self.assertIn("Expires", params)
# Delete the tarball. # Delete the tarball.
with self.assertLogs(level="WARNING") as warn_log: with self.assertLogs(level="WARNING") as warn_log:
@ -603,5 +614,132 @@ class S3Test(ZulipTestCase):
warn_log.output, warn_log.output,
["WARNING:root:not_a_file does not exist. Its entry in the database will be removed."], ["WARNING:root:not_a_file does not exist. Its entry in the database will be removed."],
) )
self.assertEqual(delete_export_tarball(parsed_url.path), parsed_url.path)
@override_settings(S3_EXPORT_BUCKET="")
@use_s3_backend
def test_tarball_upload_and_deletion_no_export_bucket(self) -> None:
bucket = create_s3_buckets(settings.S3_AVATAR_BUCKET)[0]
user_profile = self.example_user("iago")
self.assertTrue(user_profile.is_realm_admin)
tarball_path = os.path.join(settings.TEST_WORKER_DIR, "tarball.tar.gz")
with open(tarball_path, "w") as f:
f.write("dummy")
url = upload_export_tarball(user_profile.realm, tarball_path)
result = re.search(re.compile(r"/([0-9a-fA-F]{32})/"), url)
if result is not None:
hex_value = result.group(1)
expected_url = f"https://{bucket.name}.s3.amazonaws.com/exports/{hex_value}/{os.path.basename(tarball_path)}"
self.assertEqual(url, expected_url)
# Delete the tarball.
path_id = urlsplit(url).path path_id = urlsplit(url).path
self.assertEqual(delete_export_tarball(path_id), path_id) self.assertEqual(delete_export_tarball(path_id), path_id)
@mock_aws
def test_tarball_upload_avatar_bucket_download_export_bucket(self) -> None:
"""Test to verify that tarballs uploaded to avatar bucket can be later
accessed via export bucket when server is configured to use export bucket.
"""
user_profile = self.example_user("iago")
self.assertTrue(user_profile.is_realm_admin)
tarball_path = os.path.join(settings.TEST_WORKER_DIR, "tarball.tar.gz")
with open(tarball_path, "w") as f:
f.write("dummy")
# Upload export tarball to the avatar bucket.
with override_settings(S3_EXPORT_BUCKET=""):
backend = S3UploadBackend()
avatar_bucket = create_s3_buckets(settings.S3_AVATAR_BUCKET)[0]
with patch("zerver.lib.upload.upload_backend", backend):
public_url = upload_export_tarball(user_profile.realm, tarball_path)
avatar_object_key = urlsplit(public_url).path.removeprefix("/")
# Verify that old tarballs (uploaded to avatar bucket) can be accessed
# from export bucket, once server is configured to use a separate export bucket.
with override_settings(S3_EXPORT_BUCKET=settings.S3_EXPORT_BUCKET):
backend = S3UploadBackend()
export_bucket = create_s3_buckets(settings.S3_EXPORT_BUCKET)[0]
# Copy existing exports to the new bucket.
# This operation is performed as a part of configuring export bucket.
session = boto3.session.Session(settings.S3_KEY, settings.S3_SECRET_KEY)
s3 = session.resource("s3")
copy_source: CopySourceTypeDef = {
"Bucket": avatar_bucket.name,
"Key": avatar_object_key,
}
export_object_key = avatar_object_key.removeprefix("exports/")
s3.meta.client.copy(copy_source, export_bucket.name, export_object_key)
# Verify copy operation.
object = s3.Object(export_bucket.name, export_object_key)
content = object.get()["Body"].read()
self.assertEqual(content, b"dummy")
# Verify that tarball can be accessed using old 'avatar_object_key'.
url = backend.get_export_tarball_url(user_profile.realm, avatar_object_key)
parsed_url = urlsplit(url)
result = re.search(re.compile(r"/([0-9a-fA-F]{32})/"), url)
if result is not None:
hex_value = result.group(1)
expected_url = f"https://{export_bucket.name}.s3.amazonaws.com/{hex_value}/{os.path.basename(tarball_path)}"
self.assertEqual(parsed_url._replace(query="").geturl(), expected_url)
params = parse_qs(parsed_url.query)
self.assertEqual(params["AWSAccessKeyId"], ["test-key"])
self.assertIn("Signature", params)
self.assertIn("Expires", params)
@mock_aws
def test_tarball_upload_export_bucket_download_avatar_bucket(self) -> None:
"""Test to verify that tarballs uploaded to export bucket can be later
accessed via avatar bucket when server is configured to use ONLY avatar bucket.
"""
user_profile = self.example_user("iago")
self.assertTrue(user_profile.is_realm_admin)
tarball_path = os.path.join(settings.TEST_WORKER_DIR, "tarball.tar.gz")
with open(tarball_path, "w") as f:
f.write("dummy")
# Upload export tarball to the export bucket.
with override_settings(S3_EXPORT_BUCKET=settings.S3_EXPORT_BUCKET):
backend = S3UploadBackend()
export_bucket = create_s3_buckets(settings.S3_EXPORT_BUCKET)[0]
with patch("zerver.lib.upload.upload_backend", backend):
public_url = upload_export_tarball(user_profile.realm, tarball_path)
export_object_key = urlsplit(public_url).path.removeprefix("/")
# Verify that old tarballs (uploaded to export bucket) can be accessed
# from avatar bucket, once server is configured to use ONLY avatar bucket.
with override_settings(S3_EXPORT_BUCKET=""):
backend = S3UploadBackend()
avatar_bucket = create_s3_buckets(settings.S3_AVATAR_BUCKET)[0]
# Copy existing exports to the avatar bucket.
# This operation is performed as a part of the changes to use ONLY avatar bucket.
session = boto3.session.Session(settings.S3_KEY, settings.S3_SECRET_KEY)
s3 = session.resource("s3")
copy_source: CopySourceTypeDef = {
"Bucket": export_bucket.name,
"Key": export_object_key,
}
avatar_object_key = "exports/" + export_object_key
s3.meta.client.copy(copy_source, avatar_bucket.name, avatar_object_key)
# Verify copy operation.
object = s3.Object(avatar_bucket.name, avatar_object_key)
content = object.get()["Body"].read()
self.assertEqual(content, b"dummy")
# Verify that tarball can still be accessed using old 'export_object_key'.
url = backend.get_export_tarball_url(user_profile.realm, export_object_key)
result = re.search(re.compile(r"/([0-9a-fA-F]{32})/"), url)
if result is not None:
hex_value = result.group(1)
expected_url = f"https://{avatar_bucket.name}.s3.amazonaws.com/exports/{hex_value}/{os.path.basename(tarball_path)}"
self.assertEqual(url, expected_url)

View File

@ -151,6 +151,7 @@ DEFAULT_AVATAR_URI: str | None = None
DEFAULT_LOGO_URI: str | None = None DEFAULT_LOGO_URI: str | None = None
S3_AVATAR_BUCKET = "" S3_AVATAR_BUCKET = ""
S3_AUTH_UPLOADS_BUCKET = "" S3_AUTH_UPLOADS_BUCKET = ""
S3_EXPORT_BUCKET = ""
S3_REGION: str | None = None S3_REGION: str | None = None
S3_ENDPOINT_URL: str | None = None S3_ENDPOINT_URL: str | None = None
S3_ADDRESSING_STYLE: Literal["auto", "virtual", "path"] = "auto" S3_ADDRESSING_STYLE: Literal["auto", "virtual", "path"] = "auto"

View File

@ -793,6 +793,7 @@ ENABLE_FILE_LINKS = False
LOCAL_UPLOADS_DIR = "/home/zulip/uploads" LOCAL_UPLOADS_DIR = "/home/zulip/uploads"
# S3_AUTH_UPLOADS_BUCKET = "" # S3_AUTH_UPLOADS_BUCKET = ""
# S3_AVATAR_BUCKET = "" # S3_AVATAR_BUCKET = ""
# S3_EXPORT_BUCKET = ""
# S3_REGION = None # S3_REGION = None
# S3_ENDPOINT_URL = None # S3_ENDPOINT_URL = None
# S3_AVATAR_PUBLIC_URL_PREFIX = None # S3_AVATAR_PUBLIC_URL_PREFIX = None

View File

@ -145,6 +145,7 @@ S3_KEY = "test-key"
S3_SECRET_KEY = "test-secret-key" S3_SECRET_KEY = "test-secret-key"
S3_AUTH_UPLOADS_BUCKET = "test-authed-bucket" S3_AUTH_UPLOADS_BUCKET = "test-authed-bucket"
S3_AVATAR_BUCKET = "test-avatar-bucket" S3_AVATAR_BUCKET = "test-avatar-bucket"
S3_EXPORT_BUCKET = "test-export-bucket"
INLINE_URL_EMBED_PREVIEW = False INLINE_URL_EMBED_PREVIEW = False