2018-10-13 16:25:44 +02:00
|
|
|
import logging
|
|
|
|
import shutil
|
|
|
|
import os
|
|
|
|
|
|
|
|
from zerver.data_import.import_util import (
|
|
|
|
build_attachment,
|
|
|
|
create_converted_data_files,
|
|
|
|
)
|
|
|
|
|
|
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
|
|
|
|
class AttachmentHandler:
|
|
|
|
def __init__(self) -> None:
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
self.info_dict: Dict[str, Dict[str, Any]] = dict()
|
2018-10-13 16:25:44 +02:00
|
|
|
|
|
|
|
def handle_message_data(self,
|
|
|
|
realm_id: int,
|
|
|
|
message_id: int,
|
|
|
|
sender_id: int,
|
|
|
|
attachment: Dict[str, Any],
|
|
|
|
files_dir: str) -> Optional[str]:
|
|
|
|
if not attachment:
|
|
|
|
return None
|
|
|
|
|
2018-11-14 00:52:17 +01:00
|
|
|
name = attachment['name']
|
|
|
|
|
|
|
|
if 'path' not in attachment:
|
|
|
|
logging.info('Skipping HipChat attachment with missing path data: ' + name)
|
|
|
|
return None
|
|
|
|
|
2018-10-13 16:25:44 +02:00
|
|
|
size = attachment['size']
|
|
|
|
path = attachment['path']
|
|
|
|
|
|
|
|
local_fn = os.path.join(files_dir, path)
|
|
|
|
|
2018-11-17 16:34:00 +01:00
|
|
|
if not os.path.exists(local_fn):
|
|
|
|
# HipChat has an option to not include these in its
|
|
|
|
# exports, since file uploads can be very large.
|
|
|
|
logging.info('Skipping attachment with no file data: ' + local_fn)
|
|
|
|
return None
|
|
|
|
|
2018-10-13 16:25:44 +02:00
|
|
|
target_path = os.path.join(
|
|
|
|
str(realm_id),
|
|
|
|
'HipChatImportAttachment',
|
|
|
|
path
|
|
|
|
)
|
|
|
|
|
|
|
|
if target_path in self.info_dict:
|
|
|
|
logging.info("file used multiple times: " + path)
|
|
|
|
info = self.info_dict[target_path]
|
|
|
|
info['message_ids'].add(message_id)
|
|
|
|
return info['content']
|
|
|
|
|
|
|
|
# HipChat provides size info, but it's not
|
|
|
|
# completely trustworthy, so we we just
|
|
|
|
# ask the OS for file details.
|
|
|
|
size = os.path.getsize(local_fn)
|
|
|
|
mtime = os.path.getmtime(local_fn)
|
|
|
|
|
2020-06-10 06:40:53 +02:00
|
|
|
content = f'[{name}](/user_uploads/{target_path})'
|
2018-10-13 16:25:44 +02:00
|
|
|
|
|
|
|
info = dict(
|
|
|
|
message_ids={message_id},
|
|
|
|
sender_id=sender_id,
|
|
|
|
local_fn=local_fn,
|
|
|
|
target_path=target_path,
|
|
|
|
name=name,
|
|
|
|
size=size,
|
|
|
|
mtime=mtime,
|
|
|
|
content=content,
|
|
|
|
)
|
|
|
|
self.info_dict[target_path] = info
|
|
|
|
|
|
|
|
return content
|
|
|
|
|
|
|
|
def write_info(self, output_dir: str, realm_id: int) -> None:
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
attachments: List[Dict[str, Any]] = []
|
|
|
|
uploads_records: List[Dict[str, Any]] = []
|
2018-10-13 16:25:44 +02:00
|
|
|
|
|
|
|
def add_attachment(info: Dict[str, Any]) -> None:
|
|
|
|
build_attachment(
|
|
|
|
realm_id=realm_id,
|
|
|
|
message_ids=info['message_ids'],
|
|
|
|
user_id=info['sender_id'],
|
|
|
|
fileinfo=dict(
|
|
|
|
created=info['mtime'], # minor lie
|
|
|
|
size=info['size'],
|
|
|
|
name=info['name'],
|
|
|
|
),
|
|
|
|
s3_path=info['target_path'],
|
|
|
|
zerver_attachment=attachments,
|
|
|
|
)
|
|
|
|
|
|
|
|
def add_upload(info: Dict[str, Any]) -> None:
|
|
|
|
target_path = info['target_path']
|
|
|
|
upload_rec = dict(
|
|
|
|
size=info['size'],
|
|
|
|
user_profile_id=info['sender_id'],
|
|
|
|
realm_id=realm_id,
|
|
|
|
s3_path=target_path,
|
|
|
|
path=target_path,
|
|
|
|
content_type=None,
|
|
|
|
)
|
|
|
|
uploads_records.append(upload_rec)
|
|
|
|
|
|
|
|
def make_full_target_path(info: Dict[str, Any]) -> str:
|
|
|
|
target_path = info['target_path']
|
|
|
|
full_target_path = os.path.join(
|
|
|
|
output_dir,
|
|
|
|
'uploads',
|
|
|
|
target_path,
|
|
|
|
)
|
|
|
|
full_target_path = os.path.abspath(full_target_path)
|
|
|
|
os.makedirs(os.path.dirname(full_target_path), exist_ok=True)
|
|
|
|
return full_target_path
|
|
|
|
|
|
|
|
def copy_file(info: Dict[str, Any]) -> None:
|
|
|
|
source_path = info['local_fn']
|
|
|
|
target_path = make_full_target_path(info)
|
|
|
|
shutil.copyfile(source_path, target_path)
|
|
|
|
|
|
|
|
logging.info('Start processing attachment files')
|
|
|
|
|
|
|
|
for info in self.info_dict.values():
|
|
|
|
add_attachment(info)
|
|
|
|
add_upload(info)
|
|
|
|
copy_file(info)
|
|
|
|
|
|
|
|
uploads_folder = os.path.join(output_dir, 'uploads')
|
|
|
|
os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True)
|
|
|
|
|
|
|
|
attachment = dict(
|
|
|
|
zerver_attachment=attachments
|
|
|
|
)
|
|
|
|
|
|
|
|
create_converted_data_files(uploads_records, output_dir, '/uploads/records.json')
|
|
|
|
create_converted_data_files(attachment, output_dir, '/attachment.json')
|
|
|
|
|
|
|
|
logging.info('Done processing attachment files')
|