mirror of https://github.com/zulip/zulip.git
Rewrite sanitize_name to better preserve filenames.
The previous version of sanitize_name dropped all unicode characters and mangled filenames with multiple `.`s in the extension, leading to confusing URLs for files uploaded to Zulip. Fixes #321. [tweaked significantly by tabbott]
This commit is contained in:
parent
4a50336476
commit
e0ef1a991e
|
@ -2,6 +2,9 @@ from __future__ import absolute_import
|
|||
|
||||
from django.conf import settings
|
||||
from django.template.defaultfilters import slugify
|
||||
from django.utils.encoding import force_text
|
||||
from django.utils.safestring import mark_safe
|
||||
import unicodedata
|
||||
|
||||
from zerver.lib.avatar import user_avatar_hash
|
||||
|
||||
|
@ -13,6 +16,7 @@ from zerver.models import get_user_profile_by_id
|
|||
|
||||
import base64
|
||||
import os
|
||||
import re
|
||||
from PIL import Image, ImageOps
|
||||
from six.moves import cStringIO as StringIO
|
||||
import random
|
||||
|
@ -28,15 +32,26 @@ import random
|
|||
# This is great, because passing the pseudofile object that Django gives
|
||||
# you to boto would be a pain.
|
||||
|
||||
# To come up with a s3 key we randomly generate a "directory". The "file
|
||||
# name" is the original filename provided by the user run through Django's
|
||||
# slugify.
|
||||
# To come up with a s3 key we randomly generate a "directory". The
|
||||
# "file name" is the original filename provided by the user run
|
||||
# through a sanitization function.
|
||||
|
||||
def sanitize_name(name):
|
||||
split_name = name.split('.')
|
||||
base = ".".join(split_name[:-1])
|
||||
extension = split_name[-1]
|
||||
return slugify(base) + "." + slugify(extension)
|
||||
def sanitize_name(value):
|
||||
"""
|
||||
Sanitizes a value to be safe to store in a Linux filesystem, in
|
||||
S3, and in a URL. So unicode is allowed, but not special
|
||||
characters other than ".", "-", and "_".
|
||||
|
||||
This implementation is based on django.utils.text.slugify; it is
|
||||
modified by:
|
||||
* hardcoding allow_unicode=True.
|
||||
* adding '.' and '_' to the list of allowed characters.
|
||||
* preserving the case of the value.
|
||||
"""
|
||||
value = force_text(value)
|
||||
value = unicodedata.normalize('NFKC', value)
|
||||
value = re.sub('[^\w\s._-]', '', value, flags=re.U).strip()
|
||||
return mark_safe(re.sub('[-\s]+', '-', value, flags=re.U))
|
||||
|
||||
def random_name(bytes=60):
|
||||
return base64.urlsafe_b64encode(os.urandom(bytes))
|
||||
|
|
|
@ -17,6 +17,7 @@ from zerver.lib.actions import compute_mit_user_fullname
|
|||
from zerver.lib.test_helpers import AuthedTestCase
|
||||
from zerver.models import get_user_profile_by_email
|
||||
from zerver.lib.test_runner import slow
|
||||
from zerver.lib.upload import sanitize_name
|
||||
|
||||
import time
|
||||
import ujson
|
||||
|
@ -227,3 +228,15 @@ class GCMTokenTests(AuthedTestCase):
|
|||
result = self.client.post('/json/users/me/android_gcm_reg_id', {'token':token})
|
||||
self.assert_json_success(result)
|
||||
|
||||
class SanitizeNameTests(TestCase):
|
||||
def test_file_name(self):
|
||||
self.assertEquals(sanitize_name(u'test.txt'), u'test.txt')
|
||||
self.assertEquals(sanitize_name(u'.hidden'), u'.hidden')
|
||||
self.assertEquals(sanitize_name(u'.hidden.txt'), u'.hidden.txt')
|
||||
self.assertEquals(sanitize_name(u'tarball.tar.gz'), u'tarball.tar.gz')
|
||||
self.assertEquals(sanitize_name(u'.hidden_tarball.tar.gz'), u'.hidden_tarball.tar.gz')
|
||||
self.assertEquals(sanitize_name(u'Testing{}*&*#().ta&&%$##&&r.gz'), u'Testing.tar.gz')
|
||||
self.assertEquals(sanitize_name(u'*testingfile?*.txt'), u'testingfile.txt')
|
||||
self.assertEquals(sanitize_name(u'snowman☃.txt'), u'snowman.txt')
|
||||
self.assertEquals(sanitize_name(u'테스트.txt'), u'테스트.txt')
|
||||
self.assertEquals(sanitize_name(u'~/."\`\?*"u0`000ssh/test.t**{}ar.gz'), u'.u0000sshtest.tar.gz')
|
||||
|
|
Loading…
Reference in New Issue