zulip/zerver/lib/upload.py

294 lines
9.7 KiB
Python

from __future__ import absolute_import
from typing import Optional, Tuple
from django.utils.translation import ugettext as _
from django.conf import settings
from django.template.defaultfilters import slugify
from django.utils.encoding import force_text
from django.core.files import File
from django.http import HttpRequest
from jinja2 import Markup as mark_safe
import unicodedata
from zerver.lib.avatar import user_avatar_hash
from zerver.lib.request import JsonableError
from boto.s3.bucket import Bucket
from boto.s3.key import Key
from boto.s3.connection import S3Connection
from mimetypes import guess_type, guess_extension
from zerver.models import get_user_profile_by_id
from zerver.models import Attachment
from zerver.models import Realm, UserProfile, UserMessage
import base64
import os
import re
import six
from PIL import Image, ImageOps
from six import text_type
from six.moves import cStringIO as StringIO
import random
import logging
# Performance Note:
#
# For writing files to S3, the file could either be stored in RAM
# (if it is less than 2.5MiB or so) or an actual temporary file on disk.
#
# Because we set FILE_UPLOAD_MAX_MEMORY_SIZE to 0, only the latter case
# should occur in practice.
#
# This is great, because passing the pseudofile object that Django gives
# you to boto would be a pain.
# To come up with a s3 key we randomly generate a "directory". The
# "file name" is the original filename provided by the user run
# through a sanitization function.
def sanitize_name(value):
# type: (six.text_type) -> str
"""
Sanitizes a value to be safe to store in a Linux filesystem, in
S3, and in a URL. So unicode is allowed, but not special
characters other than ".", "-", and "_".
This implementation is based on django.utils.text.slugify; it is
modified by:
* hardcoding allow_unicode=True.
* adding '.' and '_' to the list of allowed characters.
* preserving the case of the value.
"""
value = force_text(value)
value = unicodedata.normalize('NFKC', value)
value = re.sub('[^\w\s._-]', '', value, flags=re.U).strip()
return mark_safe(re.sub('[-\s]+', '-', value, flags=re.U))
def random_name(bytes=60):
# type: (int) -> str
return base64.urlsafe_b64encode(os.urandom(bytes))
class BadImageError(JsonableError):
pass
def resize_avatar(image_data):
# type: (str) -> str
AVATAR_SIZE = 100
try:
im = Image.open(StringIO(image_data))
im = ImageOps.fit(im, (AVATAR_SIZE, AVATAR_SIZE), Image.ANTIALIAS)
except IOError:
raise BadImageError("Could not decode avatar image; did you upload an image file?")
out = StringIO()
im.save(out, format='png')
return out.getvalue()
### S3
def get_bucket(conn, bucket_name):
# type: (S3Connection, str) -> Bucket
# Calling get_bucket() with validate=True can apparently lead
# to expensive S3 bills:
# http://www.appneta.com/blog/s3-list-get-bucket-default/
# The benefits of validation aren't completely clear to us, and
# we want to save on our bills, so we set the validate flag to False.
# (We think setting validate to True would cause us to fail faster
# in situations where buckets don't exist, but that shouldn't be
# an issue for us.)
bucket = conn.get_bucket(bucket_name, validate=False)
return bucket
def upload_image_to_s3(
bucket_name,
file_name,
content_type,
user_profile,
contents,
):
# type: (str, str, str, UserProfile, str) -> None
conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY)
bucket = get_bucket(conn, bucket_name)
key = Key(bucket)
key.key = file_name
key.set_metadata("user_profile_id", str(user_profile.id))
key.set_metadata("realm_id", str(user_profile.realm.id))
if content_type:
headers = {'Content-Type': content_type}
else:
headers = None
key.set_contents_from_string(contents, headers=headers)
def get_file_info(request, user_file):
# type: (HttpRequest, File) -> Tuple[str, str]
uploaded_file_name = user_file.name
content_type = request.GET.get('mimetype')
if content_type is None:
content_type = guess_type(uploaded_file_name)[0]
else:
uploaded_file_name = uploaded_file_name + guess_extension(content_type)
return uploaded_file_name, content_type
def upload_message_image_s3(uploaded_file_name, content_type, file_data, user_profile, target_realm=None):
# type: (str, str, str, UserProfile, Optional[Realm]) -> str
bucket_name = settings.S3_AUTH_UPLOADS_BUCKET
s3_file_name = "/".join([
str(target_realm.id if target_realm is not None else user_profile.realm.id),
random_name(18),
sanitize_name(uploaded_file_name)
])
url = "/user_uploads/%s" % (s3_file_name)
upload_image_to_s3(
bucket_name,
s3_file_name,
content_type,
user_profile,
file_data
)
create_attachment(uploaded_file_name, s3_file_name, user_profile)
return url
def get_signed_upload_url(path):
# type: (str) -> str
conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY)
return conn.generate_url(15, 'GET', bucket=settings.S3_AUTH_UPLOADS_BUCKET, key=path)
def get_realm_for_filename(path):
# type: (str) -> int
conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY)
key = get_bucket(conn, settings.S3_AUTH_UPLOADS_BUCKET).get_key(path)
if key is None:
# This happens if the key does not exist.
return None
return get_user_profile_by_id(key.metadata["user_profile_id"]).realm.id
def delete_message_image_s3(path_id):
# type: (str) -> bool
conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY)
bucket = get_bucket(conn, settings.S3_AUTH_UPLOADS_BUCKET)
# check if file exists
key = bucket.get_key(path_id)
if key is not None:
bucket.delete_key(key)
return True
file_name = path_id.split("/")[-1]
logging.warning("%s does not exist. Its entry in the database will be removed." % (file_name,))
return False
def upload_avatar_image_s3(user_file, user_profile, email):
# type: (File, UserProfile, str) -> None
content_type = guess_type(user_file.name)[0]
bucket_name = settings.S3_AVATAR_BUCKET
s3_file_name = user_avatar_hash(email)
image_data = user_file.read()
upload_image_to_s3(
bucket_name,
s3_file_name + ".original",
content_type,
user_profile,
image_data,
)
resized_data = resize_avatar(image_data)
upload_image_to_s3(
bucket_name,
s3_file_name,
'image/png',
user_profile,
resized_data,
)
# See avatar_url in avatar.py for URL. (That code also handles the case
# that users use gravatar.)
### Local
def mkdirs(path):
# type: (str) -> None
dirname = os.path.dirname(path)
if not os.path.isdir(dirname):
os.makedirs(dirname)
def write_local_file(type, path, file_data):
# type: (str, str, str) -> None
file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, type, path)
mkdirs(file_path)
with open(file_path, 'wb') as f:
f.write(file_data)
def upload_message_image_local(uploaded_file_name, content_type, file_data, user_profile, target_realm=None):
# type: (str, str, str, UserProfile, Optional[Realm]) -> str
# Split into 256 subdirectories to prevent directories from getting too big
path = "/".join([
str(user_profile.realm.id),
format(random.randint(0, 255), 'x'),
random_name(18),
sanitize_name(uploaded_file_name)
])
write_local_file('files', path, file_data)
create_attachment(uploaded_file_name, path, user_profile)
return '/user_uploads/' + path
def delete_message_image_local(path_id):
# type: (str) -> bool
file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, 'files', path_id)
if os.path.isfile(file_path):
# This removes the file but the empty folders still remain.
os.remove(file_path)
return True
file_name = path_id.split("/")[-1]
logging.warning("%s does not exist. Its entry in the database will be removed." % (file_name,))
return False
def upload_avatar_image_local(user_file, user_profile, email):
# type: (File, UserProfile, str) -> None
email_hash = user_avatar_hash(email)
image_data = user_file.read()
write_local_file('avatars', email_hash+'.original', image_data)
resized_data = resize_avatar(image_data)
write_local_file('avatars', email_hash+'.png', resized_data)
### Common
if settings.LOCAL_UPLOADS_DIR is not None:
upload_message_image = upload_message_image_local
upload_avatar_image = upload_avatar_image_local
delete_message_image = delete_message_image_local
else:
upload_message_image = upload_message_image_s3
upload_avatar_image = upload_avatar_image_s3
delete_message_image = delete_message_image_s3
def claim_attachment(path_id, message):
# type: (text_type, UserMessage) -> bool
try:
attachment = Attachment.objects.get(path_id=path_id)
attachment.messages.add(message)
attachment.save()
return True
except Attachment.DoesNotExist:
raise JsonableError(_("The upload was not successful. Please reupload the file again in a new message."))
return False
def create_attachment(file_name, path_id, user_profile):
# type: (str, str, UserProfile) -> bool
Attachment.objects.create(file_name=file_name, path_id=path_id, owner=user_profile)
return True
def upload_message_image_through_web_client(request, user_file, user_profile):
# type: (HttpRequest, File, UserProfile) -> str
uploaded_file_name, content_type = get_file_info(request, user_file)
return upload_message_image(uploaded_file_name, content_type, user_file.read(), user_profile)