data_import: Add email validation to third-party data converters.

This commit makes the third-party data converters check for invalid user
emails. If it finds any, it’ll raise an Exception and show an error
message with all the bad emails listed out.

Fixes: #31783
This commit is contained in:
PieterCK 2024-09-30 15:26:03 +07:00 committed by Tim Abbott
parent 7ffe558e81
commit 6289a551aa
8 changed files with 68 additions and 10 deletions

View File

@ -10,6 +10,8 @@ from typing import Any, Protocol, TypeAlias, TypeVar
import orjson
import requests
from django.core.exceptions import ValidationError
from django.core.validators import validate_email
from django.forms.models import model_to_dict
from django.utils.timezone import now as timezone_now
@ -825,3 +827,19 @@ def long_term_idle_helper(
user_profile_row["last_active_message_id"] = 1
return long_term_idle
def validate_user_emails_for_import(user_emails: list[str]) -> None:
invalid_emails = []
for email in user_emails:
try:
validate_email(email)
except ValidationError:
invalid_emails.append(email)
if invalid_emails:
details = ", ".join(invalid_emails)
error_log = (
f"Invalid email format, please fix the following email(s) and try again: {details}"
)
raise ValidationError(error_log)

View File

@ -132,15 +132,11 @@ def convert_user_data(
realm_id: int,
team_name: str,
) -> None:
user_data_list = []
for username in user_data_map:
user = user_data_map[username]
if check_user_in_team(user, team_name) or user["is_mirror_dummy"]:
user_data_list.append(user)
for raw_item in user_data_list:
user = process_user(raw_item, realm_id, team_name, user_id_mapper)
user_handler.add_user(user)
for user_data in user_data_map.values():
if check_user_in_team(user_data, team_name) or user_data["is_mirror_dummy"]:
user = process_user(user_data, realm_id, team_name, user_id_mapper)
user_handler.add_user(user)
user_handler.validate_user_emails()
def convert_channel_data(

View File

@ -128,6 +128,7 @@ def process_users(
)
user_handler.add_user(user)
user_handler.validate_user_emails()
# Set the first realm_owner as the owner of
# all the bots.
if realm_owners:

View File

@ -39,6 +39,7 @@ from zerver.data_import.import_util import (
process_avatars,
process_emojis,
process_uploads,
validate_user_emails_for_import,
)
from zerver.data_import.sequencer import NEXT_ID
from zerver.data_import.slack_message_conversion import (
@ -358,6 +359,7 @@ def users_to_zerver_userprofile(
logging.info("%s: %s -> %s", slack_user_id, user["name"], userprofile_dict["email"])
validate_user_emails_for_import(list(found_emails))
process_customprofilefields(zerver_customprofilefield, zerver_customprofilefield_values)
logging.info("######### IMPORTING USERS FINISHED #########\n")
return (

View File

@ -1,5 +1,7 @@
from typing import Any
from zerver.data_import.import_util import validate_user_emails_for_import
class UserHandler:
"""
@ -25,3 +27,7 @@ class UserHandler:
def get_all_users(self) -> list[dict[str, Any]]:
users = list(self.id_to_user_map.values())
return users
def validate_user_emails(self) -> None:
all_users = self.get_all_users()
validate_user_emails_for_import([user["delivery_email"] for user in all_users])

View File

@ -202,6 +202,16 @@ class MatterMostImporter(ZulipTestCase):
convert_user_data(user_handler, user_id_mapper, username_to_user, realm_id, team_name)
self.assert_length(user_handler.get_all_users(), 3)
# Importer should raise error when user emails are malformed
team_name = "gryffindor"
bad_email1 = username_to_user["harry"]["email"] = "harry.ceramicist@zuL1[p.c0m"
bad_email2 = username_to_user["ron"]["email"] = "ron.ferret@zulup...com"
with self.assertRaises(Exception) as e:
convert_user_data(user_handler, user_id_mapper, username_to_user, realm_id, team_name)
error_message = str(e.exception)
expected_error_message = f"['Invalid email format, please fix the following email(s) and try again: {bad_email2}, {bad_email1}']"
self.assertEqual(error_message, expected_error_message)
def test_convert_channel_data(self) -> None:
fixture_file_name = self.fixture_file_name("export.json", "mattermost_fixtures")
mattermost_data = mattermost_data_file_to_dict(fixture_file_name)

View File

@ -180,6 +180,22 @@ class RocketChatImporter(ZulipTestCase):
self.assertEqual(user["is_mirror_dummy"], True)
self.assertEqual(user["is_bot"], False)
# Importer should raise error when user emails are malformed
bad_email1 = rocketchat_data["user"][0]["emails"][0]["address"] = "test1@hotmai,l.om"
bad_email2 = rocketchat_data["user"][1]["emails"][0]["address"] = "test2@gmail.c"
user_id_to_user_map = map_user_id_to_user(rocketchat_data["user"])
with self.assertRaises(Exception) as e:
process_users(
user_id_to_user_map=user_id_to_user_map,
realm_id=realm_id,
domain_name=domain_name,
user_handler=user_handler,
user_id_mapper=user_id_mapper,
)
error_message = str(e.exception)
expected_error_message = f"['Invalid email format, please fix the following email(s) and try again: {bad_email1}, {bad_email2}']"
self.assertEqual(error_message, expected_error_message)
def test_categorize_channels_and_map_with_id(self) -> None:
fixture_dir_name = self.fixture_file_name("", "rocketchat_fixtures")
rocketchat_data = rocketchat_data_to_dict(fixture_dir_name)

View File

@ -374,7 +374,7 @@ class SlackImporter(ZulipTestCase):
"Xf06054BBB": {"value": "random2"},
"Xf023DSCdd": {"value": "employer"},
}
user_data = [
user_data: list[dict[str, Any]] = [
{
"id": "U08RGD1RD",
"team_id": "T5YFFM2QY",
@ -636,6 +636,15 @@ class SlackImporter(ZulipTestCase):
self.assertEqual(zerver_userprofile[7]["is_active"], True)
self.assertEqual(zerver_userprofile[7]["is_mirror_dummy"], False)
# Importer should raise error when user emails are malformed
bad_email1 = user_data[0]["profile"]["email"] = "jon@gmail,com"
bad_email2 = user_data[1]["profile"]["email"] = "jane@gmail.m"
with self.assertRaises(Exception) as e, self.assertLogs(level="INFO"):
users_to_zerver_userprofile(slack_data_dir, user_data, 1, timestamp, "test_domain")
error_message = str(e.exception)
expected_error_message = f"['Invalid email format, please fix the following email(s) and try again: {bad_email1}, {bad_email2}']"
self.assertEqual(error_message, expected_error_message)
def test_build_defaultstream(self) -> None:
realm_id = 1
stream_id = 1