zulip/zerver/data_import/hipchat_attachment.py

137 lines
4.5 KiB
Python

import logging
import os
import shutil
from typing import Any, Dict, List, Optional
from zerver.data_import.import_util import build_attachment, create_converted_data_files
class AttachmentHandler:
def __init__(self) -> None:
self.info_dict: Dict[str, Dict[str, Any]] = {}
def handle_message_data(self,
realm_id: int,
message_id: int,
sender_id: int,
attachment: Dict[str, Any],
files_dir: str) -> Optional[str]:
if not attachment:
return None
name = attachment['name']
if 'path' not in attachment:
logging.info('Skipping HipChat attachment with missing path data: ' + name)
return None
size = attachment['size']
path = attachment['path']
local_fn = os.path.join(files_dir, path)
if not os.path.exists(local_fn):
# HipChat has an option to not include these in its
# exports, since file uploads can be very large.
logging.info('Skipping attachment with no file data: ' + local_fn)
return None
target_path = os.path.join(
str(realm_id),
'HipChatImportAttachment',
path,
)
if target_path in self.info_dict:
logging.info("file used multiple times: " + path)
info = self.info_dict[target_path]
info['message_ids'].add(message_id)
return info['content']
# HipChat provides size info, but it's not
# completely trustworthy, so we we just
# ask the OS for file details.
size = os.path.getsize(local_fn)
mtime = os.path.getmtime(local_fn)
content = f'[{name}](/user_uploads/{target_path})'
info = dict(
message_ids={message_id},
sender_id=sender_id,
local_fn=local_fn,
target_path=target_path,
name=name,
size=size,
mtime=mtime,
content=content,
)
self.info_dict[target_path] = info
return content
def write_info(self, output_dir: str, realm_id: int) -> None:
attachments: List[Dict[str, Any]] = []
uploads_records: List[Dict[str, Any]] = []
def add_attachment(info: Dict[str, Any]) -> None:
build_attachment(
realm_id=realm_id,
message_ids=info['message_ids'],
user_id=info['sender_id'],
fileinfo=dict(
created=info['mtime'], # minor lie
size=info['size'],
name=info['name'],
),
s3_path=info['target_path'],
zerver_attachment=attachments,
)
def add_upload(info: Dict[str, Any]) -> None:
target_path = info['target_path']
upload_rec = dict(
size=info['size'],
user_profile_id=info['sender_id'],
realm_id=realm_id,
s3_path=target_path,
path=target_path,
content_type=None,
)
uploads_records.append(upload_rec)
def make_full_target_path(info: Dict[str, Any]) -> str:
target_path = info['target_path']
full_target_path = os.path.join(
output_dir,
'uploads',
target_path,
)
full_target_path = os.path.abspath(full_target_path)
os.makedirs(os.path.dirname(full_target_path), exist_ok=True)
return full_target_path
def copy_file(info: Dict[str, Any]) -> None:
source_path = info['local_fn']
target_path = make_full_target_path(info)
shutil.copyfile(source_path, target_path)
logging.info('Start processing attachment files')
for info in self.info_dict.values():
add_attachment(info)
add_upload(info)
copy_file(info)
uploads_folder = os.path.join(output_dir, 'uploads')
os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True)
attachment = dict(
zerver_attachment=attachments,
)
create_converted_data_files(uploads_records, output_dir, '/uploads/records.json')
create_converted_data_files(attachment, output_dir, '/attachment.json')
logging.info('Done processing attachment files')