import: Add the UTF-8 flag on file entries in zipfiles from Slack.

Fixes: #22533.
This commit is contained in:
Alex Vandiver 2023-01-27 20:02:08 +00:00 committed by Tim Abbott
parent 282bf680f2
commit 92c8c17190
5 changed files with 230 additions and 0 deletions

View File

@ -1330,6 +1330,14 @@ def do_convert_data(original_path: str, output_dir: str, token: str, threads: in
os.makedirs(slack_data_dir)
with zipfile.ZipFile(original_path) as zipObj:
# Slack's export doesn't set the UTF-8 flag on each
# filename entry, despite encoding them as such, so
# zipfile mojibake's the output. Explicitly re-interpret
# it as UTF-8 mis-decoded as cp437, the default.
for fileinfo in zipObj.infolist():
fileinfo.flag_bits |= 0x800
fileinfo.filename = fileinfo.filename.encode("cp437").decode("utf-8")
zipObj.NameToInfo[fileinfo.filename] = fileinfo
zipObj.extractall(slack_data_dir)
elif os.path.isdir(original_path):
slack_data_dir = original_path

View File

@ -0,0 +1,22 @@
{
"ok": true,
"team": {
"id": "T04LT7JD2AZ",
"name": "例",
"url": "https://w1674835429-reo773452.slack.com/",
"domain": "w1674835429-reo773452",
"email_domain": "",
"icon": {
"image_default": true,
"image_34": "https://a.slack-edge.com/80588/img/avatars-teams/ava_0007-34.png",
"image_44": "https://a.slack-edge.com/80588/img/avatars-teams/ava_0007-44.png",
"image_68": "https://a.slack-edge.com/80588/img/avatars-teams/ava_0007-68.png",
"image_88": "https://a.slack-edge.com/80588/img/avatars-teams/ava_0007-88.png",
"image_102": "https://a.slack-edge.com/80588/img/avatars-teams/ava_0007-102.png",
"image_230": "https://a.slack-edge.com/80588/img/avatars-teams/ava_0007-230.png",
"image_132": "https://a.slack-edge.com/80588/img/avatars-teams/ava_0007-132.png"
},
"avatar_base_url": "https://ca.slack-edge.com/",
"is_verified": false
}
}

View File

@ -0,0 +1,152 @@
{
"cache_ts": 1674860232,
"members": [
{
"color": "757575",
"deleted": false,
"id": "USLACKBOT",
"is_admin": false,
"is_app_user": false,
"is_bot": false,
"is_email_confirmed": false,
"is_owner": false,
"is_primary_owner": false,
"is_restricted": false,
"is_ultra_restricted": false,
"name": "slackbot",
"profile": {
"always_active": true,
"avatar_hash": "sv41d8cd98f0",
"display_name": "Slackbot",
"display_name_normalized": "Slackbot",
"fields": {},
"first_name": "slackbot",
"image_192": "https://a.slack-edge.com/80588/marketing/img/avatars/slackbot/avatar-slackbot.png",
"image_24": "https://a.slack-edge.com/80588/img/slackbot_24.png",
"image_32": "https://a.slack-edge.com/80588/img/slackbot_32.png",
"image_48": "https://a.slack-edge.com/80588/img/slackbot_48.png",
"image_512": "https://a.slack-edge.com/80588/img/slackbot_512.png",
"image_72": "https://a.slack-edge.com/80588/img/slackbot_72.png",
"last_name": "",
"phone": "",
"real_name": "Slackbot",
"real_name_normalized": "Slackbot",
"skype": "",
"status_emoji": "",
"status_emoji_display_info": [],
"status_expiration": 0,
"status_text": "",
"status_text_canonical": "",
"team": "T04LT7JD2AZ",
"title": ""
},
"real_name": "Slackbot",
"team_id": "T04LT7JD2AZ",
"tz": "America/Los_Angeles",
"tz_label": "Pacific Standard Time",
"tz_offset": -28800,
"updated": 0,
"who_can_share_contact_card": "EVERYONE"
},
{
"color": "9f69e7",
"deleted": false,
"id": "U04LW5V0LH0",
"is_admin": true,
"is_app_user": false,
"is_bot": false,
"is_email_confirmed": true,
"is_owner": true,
"is_primary_owner": true,
"is_restricted": false,
"is_ultra_restricted": false,
"name": "alexmv",
"profile": {
"avatar_hash": "g6040af6e26d",
"display_name": "",
"display_name_normalized": "",
"email": "alexmv@zulip.com",
"fields": null,
"first_name": "alexmv",
"image_192": "https://secure.gravatar.com/avatar/6040af6e26d481c0e5fd4ac2fe4ea460.jpg?s=192&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0004-192.png",
"image_24": "https://secure.gravatar.com/avatar/6040af6e26d481c0e5fd4ac2fe4ea460.jpg?s=24&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0004-24.png",
"image_32": "https://secure.gravatar.com/avatar/6040af6e26d481c0e5fd4ac2fe4ea460.jpg?s=32&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0004-32.png",
"image_48": "https://secure.gravatar.com/avatar/6040af6e26d481c0e5fd4ac2fe4ea460.jpg?s=48&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0004-48.png",
"image_512": "https://secure.gravatar.com/avatar/6040af6e26d481c0e5fd4ac2fe4ea460.jpg?s=512&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0004-512.png",
"image_72": "https://secure.gravatar.com/avatar/6040af6e26d481c0e5fd4ac2fe4ea460.jpg?s=72&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0004-72.png",
"last_name": "",
"phone": "",
"real_name": "alexmv",
"real_name_normalized": "alexmv",
"skype": "",
"status_emoji": "",
"status_emoji_display_info": [],
"status_expiration": 0,
"status_text": "",
"status_text_canonical": "",
"team": "T04LT7JD2AZ",
"title": ""
},
"real_name": "alexmv",
"team_id": "T04LT7JD2AZ",
"tz": "America/New_York",
"tz_label": "Eastern Standard Time",
"tz_offset": -18000,
"updated": 1674835388,
"who_can_share_contact_card": "EVERYONE"
},
{
"color": "4bbe2e",
"deleted": false,
"id": "U04LYMYPGSG",
"is_admin": false,
"is_app_user": false,
"is_bot": true,
"is_email_confirmed": false,
"is_owner": false,
"is_primary_owner": false,
"is_restricted": false,
"is_ultra_restricted": false,
"name": "zulip_export",
"profile": {
"always_active": false,
"api_app_id": "A04LFM9BJ6T",
"avatar_hash": "g2668f8ddfc6",
"bot_id": "B04LW4RLEGK",
"display_name": "",
"display_name_normalized": "",
"fields": null,
"first_name": "Zulip",
"image_192": "https://secure.gravatar.com/avatar/2668f8ddfc6b6c6289f2fd5ba5490cae.jpg?s=192&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0024-192.png",
"image_24": "https://secure.gravatar.com/avatar/2668f8ddfc6b6c6289f2fd5ba5490cae.jpg?s=24&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0024-24.png",
"image_32": "https://secure.gravatar.com/avatar/2668f8ddfc6b6c6289f2fd5ba5490cae.jpg?s=32&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0024-32.png",
"image_48": "https://secure.gravatar.com/avatar/2668f8ddfc6b6c6289f2fd5ba5490cae.jpg?s=48&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0024-48.png",
"image_512": "https://secure.gravatar.com/avatar/2668f8ddfc6b6c6289f2fd5ba5490cae.jpg?s=512&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0024-512.png",
"image_72": "https://secure.gravatar.com/avatar/2668f8ddfc6b6c6289f2fd5ba5490cae.jpg?s=72&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0024-72.png",
"last_name": "export",
"phone": "",
"real_name": "Zulip export",
"real_name_normalized": "Zulip export",
"skype": "",
"status_emoji": "",
"status_emoji_display_info": [],
"status_expiration": 0,
"status_text": "",
"status_text_canonical": "",
"team": "T04LT7JD2AZ",
"title": ""
},
"real_name": "Zulip export",
"team_id": "T04LT7JD2AZ",
"tz": "America/Los_Angeles",
"tz_label": "Pacific Standard Time",
"tz_offset": -28800,
"updated": 1674836111,
"who_can_share_contact_card": "EVERYONE"
}
],
"ok": true,
"response_metadata": {
"next_cursor": ""
}
}

View File

@ -1263,3 +1263,51 @@ class SlackImporter(ZulipTestCase):
self.assertEqual(slack_emoji_name_to_codepoint["tophat"], "1f3a9")
self.assertEqual(slack_emoji_name_to_codepoint["dog2"], "1f415")
self.assertEqual(slack_emoji_name_to_codepoint["dog"], "1f436")
@mock.patch("zerver.data_import.slack.requests.get")
@mock.patch("zerver.data_import.slack.process_uploads", return_value=[])
@mock.patch("zerver.data_import.slack.build_attachment", return_value=[])
@mock.patch("zerver.data_import.slack.build_avatar_url")
@mock.patch("zerver.data_import.slack.build_avatar")
@mock.patch("zerver.data_import.slack.get_slack_api_data")
def test_slack_import_unicode_filenames(
self,
mock_get_slack_api_data: mock.Mock,
mock_build_avatar_url: mock.Mock,
mock_build_avatar: mock.Mock,
mock_process_uploads: mock.Mock,
mock_attachment: mock.Mock,
mock_requests_get: mock.Mock,
) -> None:
test_slack_dir = os.path.join(
settings.DEPLOY_ROOT, "zerver", "tests", "fixtures", "slack_fixtures"
)
test_slack_zip_file = os.path.join(test_slack_dir, "test_unicode_slack_importer.zip")
test_slack_unzipped_file = os.path.join(test_slack_dir, "test_unicode_slack_importer")
output_dir = os.path.join(settings.DEPLOY_ROOT, "var", "test-unicode-slack-importer-data")
token = "xoxp-valid-token"
# If the test fails, the 'output_dir' would not be deleted and hence it would give an
# error when we run the tests next time, as 'do_convert_data' expects an empty 'output_dir'
# hence we remove it before running 'do_convert_data'
self.rm_tree(output_dir)
# Also the unzipped data file should be removed if the test fails at 'do_convert_data'
self.rm_tree(test_slack_unzipped_file)
user_data_fixture = orjson.loads(
self.fixture_data("unicode_user_data.json", type="slack_fixtures")
)
team_info_fixture = orjson.loads(
self.fixture_data("unicode_team_info.json", type="slack_fixtures")
)
mock_get_slack_api_data.side_effect = [
user_data_fixture["members"],
{},
team_info_fixture["team"],
]
mock_requests_get.return_value.raw = BytesIO(read_test_image_file("img.png"))
with self.assertLogs(level="INFO"), self.settings(EXTERNAL_HOST="zulip.example.com"):
# We need to mock EXTERNAL_HOST to be a valid domain because Slack's importer
# uses it to generate email addresses for users without an email specified.
do_convert_data(test_slack_zip_file, output_dir, token)