2018-10-13 16:25:44 +02:00
|
|
|
import logging
|
|
|
|
import os
|
2020-06-11 00:54:34 +02:00
|
|
|
import shutil
|
|
|
|
from typing import Any, Dict, List, Optional
|
2018-10-13 16:25:44 +02:00
|
|
|
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.data_import.import_util import build_attachment, create_converted_data_files
|
2018-10-13 16:25:44 +02:00
|
|
|
|
|
|
|
|
|
|
|
class AttachmentHandler:
|
|
|
|
def __init__(self) -> None:
|
2020-09-02 08:14:51 +02:00
|
|
|
self.info_dict: Dict[str, Dict[str, Any]] = {}
|
2018-10-13 16:25:44 +02:00
|
|
|
|
|
|
|
def handle_message_data(self,
|
|
|
|
realm_id: int,
|
|
|
|
message_id: int,
|
|
|
|
sender_id: int,
|
|
|
|
attachment: Dict[str, Any],
|
|
|
|
files_dir: str) -> Optional[str]:
|
|
|
|
if not attachment:
|
|
|
|
return None
|
|
|
|
|
2018-11-14 00:52:17 +01:00
|
|
|
name = attachment['name']
|
|
|
|
|
|
|
|
if 'path' not in attachment:
|
|
|
|
logging.info('Skipping HipChat attachment with missing path data: ' + name)
|
|
|
|
return None
|
|
|
|
|
2018-10-13 16:25:44 +02:00
|
|
|
size = attachment['size']
|
|
|
|
path = attachment['path']
|
|
|
|
|
|
|
|
local_fn = os.path.join(files_dir, path)
|
|
|
|
|
2018-11-17 16:34:00 +01:00
|
|
|
if not os.path.exists(local_fn):
|
|
|
|
# HipChat has an option to not include these in its
|
|
|
|
# exports, since file uploads can be very large.
|
|
|
|
logging.info('Skipping attachment with no file data: ' + local_fn)
|
|
|
|
return None
|
|
|
|
|
2018-10-13 16:25:44 +02:00
|
|
|
target_path = os.path.join(
|
|
|
|
str(realm_id),
|
|
|
|
'HipChatImportAttachment',
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
path,
|
2018-10-13 16:25:44 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
if target_path in self.info_dict:
|
|
|
|
logging.info("file used multiple times: " + path)
|
|
|
|
info = self.info_dict[target_path]
|
|
|
|
info['message_ids'].add(message_id)
|
|
|
|
return info['content']
|
|
|
|
|
|
|
|
# HipChat provides size info, but it's not
|
|
|
|
# completely trustworthy, so we we just
|
|
|
|
# ask the OS for file details.
|
|
|
|
size = os.path.getsize(local_fn)
|
|
|
|
mtime = os.path.getmtime(local_fn)
|
|
|
|
|
2020-06-10 06:40:53 +02:00
|
|
|
content = f'[{name}](/user_uploads/{target_path})'
|
2018-10-13 16:25:44 +02:00
|
|
|
|
|
|
|
info = dict(
|
|
|
|
message_ids={message_id},
|
|
|
|
sender_id=sender_id,
|
|
|
|
local_fn=local_fn,
|
|
|
|
target_path=target_path,
|
|
|
|
name=name,
|
|
|
|
size=size,
|
|
|
|
mtime=mtime,
|
|
|
|
content=content,
|
|
|
|
)
|
|
|
|
self.info_dict[target_path] = info
|
|
|
|
|
|
|
|
return content
|
|
|
|
|
|
|
|
def write_info(self, output_dir: str, realm_id: int) -> None:
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
attachments: List[Dict[str, Any]] = []
|
|
|
|
uploads_records: List[Dict[str, Any]] = []
|
2018-10-13 16:25:44 +02:00
|
|
|
|
|
|
|
def add_attachment(info: Dict[str, Any]) -> None:
|
|
|
|
build_attachment(
|
|
|
|
realm_id=realm_id,
|
|
|
|
message_ids=info['message_ids'],
|
|
|
|
user_id=info['sender_id'],
|
|
|
|
fileinfo=dict(
|
|
|
|
created=info['mtime'], # minor lie
|
|
|
|
size=info['size'],
|
|
|
|
name=info['name'],
|
|
|
|
),
|
|
|
|
s3_path=info['target_path'],
|
|
|
|
zerver_attachment=attachments,
|
|
|
|
)
|
|
|
|
|
|
|
|
def add_upload(info: Dict[str, Any]) -> None:
|
|
|
|
target_path = info['target_path']
|
|
|
|
upload_rec = dict(
|
|
|
|
size=info['size'],
|
|
|
|
user_profile_id=info['sender_id'],
|
|
|
|
realm_id=realm_id,
|
|
|
|
s3_path=target_path,
|
|
|
|
path=target_path,
|
|
|
|
content_type=None,
|
|
|
|
)
|
|
|
|
uploads_records.append(upload_rec)
|
|
|
|
|
|
|
|
def make_full_target_path(info: Dict[str, Any]) -> str:
|
|
|
|
target_path = info['target_path']
|
|
|
|
full_target_path = os.path.join(
|
|
|
|
output_dir,
|
|
|
|
'uploads',
|
|
|
|
target_path,
|
|
|
|
)
|
|
|
|
full_target_path = os.path.abspath(full_target_path)
|
|
|
|
os.makedirs(os.path.dirname(full_target_path), exist_ok=True)
|
|
|
|
return full_target_path
|
|
|
|
|
|
|
|
def copy_file(info: Dict[str, Any]) -> None:
|
|
|
|
source_path = info['local_fn']
|
|
|
|
target_path = make_full_target_path(info)
|
|
|
|
shutil.copyfile(source_path, target_path)
|
|
|
|
|
|
|
|
logging.info('Start processing attachment files')
|
|
|
|
|
|
|
|
for info in self.info_dict.values():
|
|
|
|
add_attachment(info)
|
|
|
|
add_upload(info)
|
|
|
|
copy_file(info)
|
|
|
|
|
|
|
|
uploads_folder = os.path.join(output_dir, 'uploads')
|
|
|
|
os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True)
|
|
|
|
|
|
|
|
attachment = dict(
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
zerver_attachment=attachments,
|
2018-10-13 16:25:44 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
create_converted_data_files(uploads_records, output_dir, '/uploads/records.json')
|
|
|
|
create_converted_data_files(attachment, output_dir, '/attachment.json')
|
|
|
|
|
|
|
|
logging.info('Done processing attachment files')
|