s3: Only use get_bucket to get to boto3 clients and resources.

boto3 has two different modalities of making API calls -- through
resources, and through clients.  Resources are a higher-level
abstraction, and thus more generally useful, but some APIs are only
accessible through clients.  It is possible to get to a client object
from a resource, but not vice versa.

Use `get_bucket(...).meta.client` when we need direct access to the
client object for more complex API calls; this lets all of the
configuration for how to access S3 to sit within `get_bucket`.  Client
objects are not bound to only one bucket, but we get to them based on
the bucket we will be interacting with, for clarity.

We removed the cached session object, as it serves no real purpose.

(cherry picked from commit 617c35398a)
This commit is contained in:
Alex Vandiver 2024-01-03 20:26:23 +00:00 committed by Tim Abbott
parent c5394d9ca0
commit 5192ae0fd2
1 changed files with 20 additions and 26 deletions

View File

@ -8,9 +8,9 @@ from urllib.parse import urljoin, urlsplit, urlunsplit
import boto3
import botocore
from boto3.session import Session
from botocore.client import Config
from django.conf import settings
from mypy_boto3_s3 import S3Client
from mypy_boto3_s3.service_resource import Bucket, Object
from typing_extensions import override
@ -56,13 +56,17 @@ if settings.S3_SKIP_PROXY is True: # nocoverage
botocore.utils.should_bypass_proxies = lambda url: True
def get_bucket(bucket_name: str, session: Optional[Session] = None) -> Bucket:
if session is None:
session = Session(settings.S3_KEY, settings.S3_SECRET_KEY)
bucket = session.resource(
"s3", region_name=settings.S3_REGION, endpoint_url=settings.S3_ENDPOINT_URL
def get_bucket(bucket_name: str, authed: bool = True) -> Bucket:
return boto3.resource(
"s3",
aws_access_key_id=settings.S3_KEY if authed else None,
aws_secret_access_key=settings.S3_SECRET_KEY if authed else None,
region_name=settings.S3_REGION,
endpoint_url=settings.S3_ENDPOINT_URL,
config=Config(
signature_version=None if authed else botocore.UNSIGNED,
),
).Bucket(bucket_name)
return bucket
def upload_image_to_s3(
@ -102,13 +106,7 @@ def upload_image_to_s3(
def get_signed_upload_url(path: str, force_download: bool = False) -> str:
client = boto3.client(
"s3",
aws_access_key_id=settings.S3_KEY,
aws_secret_access_key=settings.S3_SECRET_KEY,
region_name=settings.S3_REGION,
endpoint_url=settings.S3_ENDPOINT_URL,
)
client: S3Client = get_bucket(settings.S3_AUTH_UPLOADS_BUCKET).meta.client # type: ignore[assignment] # https://github.com/youtype/mypy_boto3_builder/issues/239
params = {
"Bucket": settings.S3_AUTH_UPLOADS_BUCKET,
"Key": path,
@ -126,9 +124,8 @@ def get_signed_upload_url(path: str, force_download: bool = False) -> str:
class S3UploadBackend(ZulipUploadBackend):
def __init__(self) -> None:
self.session = Session(settings.S3_KEY, settings.S3_SECRET_KEY)
self.avatar_bucket = get_bucket(settings.S3_AVATAR_BUCKET, self.session)
self.uploads_bucket = get_bucket(settings.S3_AUTH_UPLOADS_BUCKET, self.session)
self.avatar_bucket = get_bucket(settings.S3_AVATAR_BUCKET)
self.uploads_bucket = get_bucket(settings.S3_AUTH_UPLOADS_BUCKET)
self.public_upload_url_base = self.construct_public_upload_url_base()
def delete_file_from_s3(self, path_id: str, bucket: Bucket) -> bool:
@ -162,12 +159,11 @@ class S3UploadBackend(ZulipUploadBackend):
# we instead back-compute the URL pattern here.
DUMMY_KEY = "dummy_key_ignored"
client = self.session.client(
"s3",
region_name=settings.S3_REGION,
endpoint_url=settings.S3_ENDPOINT_URL,
config=Config(signature_version=botocore.UNSIGNED),
)
# We do not access self.avatar_bucket.meta.client directly,
# since that client is auth'd, and we want only the direct
# unauthed endpoint here.
client: S3Client = get_bucket(self.avatar_bucket.name, authed=False).meta.client # type: ignore[assignment] # https://github.com/youtype/mypy_boto3_builder/issues/239
dummy_signed_url = client.generate_presigned_url(
ClientMethod="get_object",
Params={
@ -250,9 +246,7 @@ class S3UploadBackend(ZulipUploadBackend):
@override
def all_message_attachments(self) -> Iterator[Tuple[str, datetime]]:
client = self.session.client(
"s3", region_name=settings.S3_REGION, endpoint_url=settings.S3_ENDPOINT_URL
)
client = self.uploads_bucket.meta.client
paginator = client.get_paginator("list_objects_v2")
page_iterator = paginator.paginate(Bucket=self.uploads_bucket.name)