mirror of https://github.com/zulip/zulip.git
137 lines
4.5 KiB
Python
137 lines
4.5 KiB
Python
import logging
|
|
import os
|
|
import shutil
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from zerver.data_import.import_util import build_attachment, create_converted_data_files
|
|
|
|
|
|
class AttachmentHandler:
|
|
def __init__(self) -> None:
|
|
self.info_dict: Dict[str, Dict[str, Any]] = {}
|
|
|
|
def handle_message_data(self,
|
|
realm_id: int,
|
|
message_id: int,
|
|
sender_id: int,
|
|
attachment: Dict[str, Any],
|
|
files_dir: str) -> Optional[str]:
|
|
if not attachment:
|
|
return None
|
|
|
|
name = attachment['name']
|
|
|
|
if 'path' not in attachment:
|
|
logging.info('Skipping HipChat attachment with missing path data: ' + name)
|
|
return None
|
|
|
|
size = attachment['size']
|
|
path = attachment['path']
|
|
|
|
local_fn = os.path.join(files_dir, path)
|
|
|
|
if not os.path.exists(local_fn):
|
|
# HipChat has an option to not include these in its
|
|
# exports, since file uploads can be very large.
|
|
logging.info('Skipping attachment with no file data: ' + local_fn)
|
|
return None
|
|
|
|
target_path = os.path.join(
|
|
str(realm_id),
|
|
'HipChatImportAttachment',
|
|
path,
|
|
)
|
|
|
|
if target_path in self.info_dict:
|
|
logging.info("file used multiple times: " + path)
|
|
info = self.info_dict[target_path]
|
|
info['message_ids'].add(message_id)
|
|
return info['content']
|
|
|
|
# HipChat provides size info, but it's not
|
|
# completely trustworthy, so we we just
|
|
# ask the OS for file details.
|
|
size = os.path.getsize(local_fn)
|
|
mtime = os.path.getmtime(local_fn)
|
|
|
|
content = f'[{name}](/user_uploads/{target_path})'
|
|
|
|
info = dict(
|
|
message_ids={message_id},
|
|
sender_id=sender_id,
|
|
local_fn=local_fn,
|
|
target_path=target_path,
|
|
name=name,
|
|
size=size,
|
|
mtime=mtime,
|
|
content=content,
|
|
)
|
|
self.info_dict[target_path] = info
|
|
|
|
return content
|
|
|
|
def write_info(self, output_dir: str, realm_id: int) -> None:
|
|
attachments: List[Dict[str, Any]] = []
|
|
uploads_records: List[Dict[str, Any]] = []
|
|
|
|
def add_attachment(info: Dict[str, Any]) -> None:
|
|
build_attachment(
|
|
realm_id=realm_id,
|
|
message_ids=info['message_ids'],
|
|
user_id=info['sender_id'],
|
|
fileinfo=dict(
|
|
created=info['mtime'], # minor lie
|
|
size=info['size'],
|
|
name=info['name'],
|
|
),
|
|
s3_path=info['target_path'],
|
|
zerver_attachment=attachments,
|
|
)
|
|
|
|
def add_upload(info: Dict[str, Any]) -> None:
|
|
target_path = info['target_path']
|
|
upload_rec = dict(
|
|
size=info['size'],
|
|
user_profile_id=info['sender_id'],
|
|
realm_id=realm_id,
|
|
s3_path=target_path,
|
|
path=target_path,
|
|
content_type=None,
|
|
)
|
|
uploads_records.append(upload_rec)
|
|
|
|
def make_full_target_path(info: Dict[str, Any]) -> str:
|
|
target_path = info['target_path']
|
|
full_target_path = os.path.join(
|
|
output_dir,
|
|
'uploads',
|
|
target_path,
|
|
)
|
|
full_target_path = os.path.abspath(full_target_path)
|
|
os.makedirs(os.path.dirname(full_target_path), exist_ok=True)
|
|
return full_target_path
|
|
|
|
def copy_file(info: Dict[str, Any]) -> None:
|
|
source_path = info['local_fn']
|
|
target_path = make_full_target_path(info)
|
|
shutil.copyfile(source_path, target_path)
|
|
|
|
logging.info('Start processing attachment files')
|
|
|
|
for info in self.info_dict.values():
|
|
add_attachment(info)
|
|
add_upload(info)
|
|
copy_file(info)
|
|
|
|
uploads_folder = os.path.join(output_dir, 'uploads')
|
|
os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True)
|
|
|
|
attachment = dict(
|
|
zerver_attachment=attachments,
|
|
)
|
|
|
|
create_converted_data_files(uploads_records, output_dir, '/uploads/records.json')
|
|
create_converted_data_files(attachment, output_dir, '/attachment.json')
|
|
|
|
logging.info('Done processing attachment files')
|