From b17a006bf00cb84bcb1749274e9234bc16f75aab Mon Sep 17 00:00:00 2001 From: Prakhar Pratyush Date: Thu, 2 May 2024 17:28:38 +0530 Subject: [PATCH] data_import: Remove gitter data import tool. Gitter broke their older API as part of being integrated into Matrix. Their announcement blog says: "Anything left using the Gitter APIs will need to be updated to use the Matrix API" This commit drops the legacy Gitter import tool and we plan to build a new one for Matrix in future. --- docs/production/install.md | 7 +- help/import-from-gitter.md | 196 --------- .../migrating-from-other-chat-tools.md | 1 - help/include/sidebar_index.md | 1 - .../comparison_table_integrated.html | 3 +- templates/corporate/for/business.html | 2 +- templates/corporate/for/communities.md | 8 +- templates/corporate/for/open-source.html | 3 +- templates/corporate/for/research.html | 2 +- templates/zerver/create_realm.html | 2 +- tools/test-backend | 1 - zerver/data_import/gitter.py | 397 ------------------ zerver/data_import/import_util.py | 4 +- zerver/lib/import_realm.py | 6 +- .../commands/convert_gitter_data.py | 51 --- zerver/tests/fixtures/gitter_data.json | 44 -- zerver/tests/test_gitter_importer.py | 176 -------- 17 files changed, 14 insertions(+), 890 deletions(-) delete mode 100644 help/import-from-gitter.md delete mode 100644 zerver/data_import/gitter.py delete mode 100644 zerver/management/commands/convert_gitter_data.py delete mode 100644 zerver/tests/fixtures/gitter_data.json delete mode 100644 zerver/tests/test_gitter_importer.py diff --git a/docs/production/install.md b/docs/production/install.md index b6c28a679e..cdb6cdb0a4 100644 --- a/docs/production/install.md +++ b/docs/production/install.md @@ -10,7 +10,7 @@ You can choose from several convenient options for hosting Zulip: - Use [Zulip Cloud](https://zulip.com/plans/) hosting. Read our [guide to choosing between Zulip Cloud and self-hosting](https://zulip.com/help/getting-your-organization-started-with-zulip#choosing-between-zulip-cloud-and-self-hosting). -To **import data** from [Slack][slack-import], [Mattermost][mattermost-import], [Rocket.Chat][rocketchat-import], [Gitter][gitter-import], [Zulip Cloud][zulip-cloud-import], or [another Zulip +To **import data** from [Slack][slack-import], [Mattermost][mattermost-import], [Rocket.Chat][rocketchat-import], [Zulip Cloud][zulip-cloud-import], or [another Zulip server][zulip-server-import], follow the linked instructions. You can **try out Zulip** before setting up your own server by [checking @@ -100,9 +100,7 @@ documentation. If you are importing data, stop here and return to the import instructions for [Slack][slack-import], [Mattermost][mattermost-import], -[Rocket.Chat][rocketchat-import], [Gitter][gitter-import], [Zulip -Cloud][zulip-cloud-import], [a server backup][zulip-backups], or [another Zulip -server][zulip-server-import]. +[Rocket.Chat][rocketchat-import], [Zulip Cloud][zulip-cloud-import], [a server backup][zulip-backups], or [another Zulip server][zulip-server-import]. ::: @@ -114,7 +112,6 @@ server][zulip-server-import]. [slack-import]: https://zulip.com/help/import-from-slack [mattermost-import]: https://zulip.com/help/import-from-mattermost [rocketchat-import]: https://zulip.com/help/import-from-rocketchat -[gitter-import]: https://zulip.com/help/import-from-gitter [zulip-cloud-import]: export-and-import.md#import-into-a-new-zulip-server [zulip-server-import]: export-and-import.md#import-into-a-new-zulip-server diff --git a/help/import-from-gitter.md b/help/import-from-gitter.md deleted file mode 100644 index e22ef2f871..0000000000 --- a/help/import-from-gitter.md +++ /dev/null @@ -1,196 +0,0 @@ -# Import from Gitter (beta) - -You can import your current workspace into a Zulip organization. It's a great way -to preserve your workspace history when you migrate to Zulip, and to -make the transition easy for the members of your organization. - -The import will include your organization's: - -* **Name** -* **Message history**, including attachments and emoji reactions -* **Users**, including names and avatars -* **Channels**, including all user subscriptions - -## Import process overview - -To import your Gitter organization into Zulip, you will need to take the -following steps, which are described in more detail below: - -{start_tabs} - -1. [Export your Gitter data.](#export-your-gitter-data) - -1. [Import your Gitter data into Zulip.](#import-your-data-into-zulip) - -1. [Get your organization started with Zulip!](#get-your-organization-started-with-zulip) - -{end_tabs} - -## Import your organization from Gitter into Zulip - -### Export your Gitter data - -Gitter's [data export tool](https://github.com/minrk/archive-gitter) allows you -to export all public channel messages. - -{start_tabs} - -1. Export your Gitter data. You will receive `.json` files of the public - rooms that you are a part of. - -1. Select the `gitter_data.json` file of the room which you want to - import into Zulip. - -!!! warn "" - - **Note:** You will need a Gitter API token to export data. You can get this - token by following the instructions in the **Getting Started** section of - the [Gitter documentation](https://developer.gitter.im/docs/). - -{end_tabs} - -### Import your data into Zulip - -{!import-your-data-into-zulip.md!} - -{start_tabs} - -{tab|zulip-cloud} - -{!import-into-a-zulip-cloud-organization.md!} - -1. The **username** that will have the [owner role](/help/roles-and-permissions) - in your Zulip organization. - -{!import-zulip-cloud-organization-warning.md!} - -{tab|self-hosting} - -{!import-into-a-self-hosted-zulip-server.md!} - -1. To import into an organization hosted on the root domain - (`EXTERNAL_HOST`) of the Zulip installation, run the following - commands. - - {!import-self-hosted-server-tips.md!} - - ``` - cd /home/zulip/deployments/current - ./scripts/stop-server - ./manage.py convert_gitter_data /tmp/gitter_data.json --output /tmp/converted_gitter_data - ./manage.py import '' /tmp/converted_gitter_data - ./scripts/start-server - ``` - - Alternatively, to import into a custom subdomain, run: - - ``` - cd /home/zulip/deployments/current - ./scripts/stop-server - ./manage.py convert_gitter_data /tmp/gitter_data.json --output /tmp/converted_gitter_data - ./manage.py import /tmp/converted_gitter_data - ./scripts/start-server - ``` - -1. Follow [step 4](https://zulip.readthedocs.io/en/stable/production/install.html#step-4-configure-and-use) - of the guide for [installing a new Zulip - server](https://zulip.readthedocs.io/en/stable/production/install.html). - -{end_tabs} - -#### Import details - -Whether you are using Zulip Cloud or self-hosting Zulip, here are a few notes to -keep in mind about the import process: - -- [Gitter's export tool](https://github.com/minrk/archive-gitter) does not export - workspace settings, so you will need to [configure the settings for your Zulip - organization](/help/customize-organization-settings). This includes settings - like [email visibility](/help/configure-email-visibility), - [message editing permissions](/help/restrict-message-editing-and-deletion), - and [how users can join your organization](/help/restrict-account-creation). - -- Gitter's export tool does not export user settings, so users in your organization - may want to [customize their account settings](/help/getting-started-with-zulip). - -- The [Gitter API][gitter-api-user-data] doesn't contain data on which users are - administrators of a Gitter channel. As a result, all Gitter users are imported - into Zulip as [members](/help/roles-and-permissions). - -- Gitter's export tool doesn't export email addresses, only GitHub usernames. - Zulip's import tool will set GitHub as the only authentication method enabled - by default to avoid user confusion. - -- You can merge multiple Gitter channels into a single Zulip - organization using [this - tool](https://github.com/minrk/archive-gitter/pull/5). - -- Zulip's import tool doesn't translate Gitter's Markdown format into Zulip's - Markdown format (there are a few corner cases where the syntax is different). - Additionally, Gitter's issue mention syntax isn't translated. - -- Message edit history is not imported. - -[grant-admin-access]: https://zulip.readthedocs.io/en/stable/production/management-commands.html#other-useful-manage-py-commands -[gitter-api-user-data]: https://developer.gitter.im/docs/user-resource - -## Get your organization started with Zulip - -Once the import process is completed, you will need to: - -{start_tabs} - -1. [Configure the settings for your organization](/help/customize-organization-settings), - which are not exported. This includes settings like [email - visibility](/help/configure-email-visibility), [message editing - permissions](/help/restrict-message-editing-and-deletion), - and [how users can join your organization](/help/restrict-account-creation). - -2. [Configure user roles](/help/change-a-users-role). Only organization owners - and administrators can do this. - * If you [import into Zulip Cloud](#import-your-data-into-zulip), you will - specify the user whose account will have the owner role when you request the - import. - * If you self-host, you can follow the Zulip documentation on [making a user an - organization owner from the terminal][grant-admin-access] to mark the appropriate - users as organization owners. - -3. All users from your previous workspace will have accounts in your new Zulip - organization. However, you will need to let users know about their new - accounts, and [how they will log in for the first time - ](#how-users-will-log-in-for-the-first-time). - -4. Share the URL for your new Zulip organization, and (recommended) the [Getting - started with Zulip guide](/help/getting-started-with-zulip). - -5. Migrate any [integrations](/integrations/). - -{end_tabs} - -## How users will log in for the first time - -When you create your organization, users will immediately be able to log in -without a password using GitHub as the [authentication method -](/help/configure-authentication-methods). Once they log -in, users whose accounts have been imported will need to [change their Zulip -email address](/help/change-your-email-address) in order to receive [email -notifications](/help/email-notifications). - -!!! warn "" - - A user's email notifications will not work until they update the email - associated with their Zulip account. - -When user accounts are imported, users initially do not have passwords -configured. Users can [reset their own passwords](/help/change-your-password) by -following the instructions on your Zulip organization's login page. - -!!! tip "" - - For security reasons, passwords are never exported. - -## Related articles - -* [Choosing between Zulip Cloud and self-hosting](/help/zulip-cloud-or-self-hosting) -* [Setting up your organization](/help/getting-your-organization-started-with-zulip) -* [Getting started with Zulip](/help/getting-started-with-zulip) diff --git a/help/include/migrating-from-other-chat-tools.md b/help/include/migrating-from-other-chat-tools.md index 47e6cbb5f5..9c89d8ce94 100644 --- a/help/include/migrating-from-other-chat-tools.md +++ b/help/include/migrating-from-other-chat-tools.md @@ -6,7 +6,6 @@ more. Follow the detailed import guides: incoming webhook](https://zulip.com/integrations/doc/slack_incoming) also makes it easy to migrate any integrations. * [Import from Mattermost](/help/import-from-mattermost) -* [Import from Gitter](/help/import-from-gitter) * [Import from Rocket.Chat](/help/import-from-rocketchat) !!! warn "" diff --git a/help/include/sidebar_index.md b/help/include/sidebar_index.md index b891cfadc7..3e67140d61 100644 --- a/help/include/sidebar_index.md +++ b/help/include/sidebar_index.md @@ -170,7 +170,6 @@ * [Communities directory](/help/communities-directory) * [Import from Mattermost](/help/import-from-mattermost) * [Import from Slack](/help/import-from-slack) -* [Import from Gitter](/help/import-from-gitter) * [Import from Rocket.Chat](/help/import-from-rocketchat) * [Configure authentication methods](/help/configure-authentication-methods) * [SAML authentication](/help/saml-authentication) diff --git a/templates/corporate/comparison_table_integrated.html b/templates/corporate/comparison_table_integrated.html index 01d1e70a7b..fc987172fa 100644 --- a/templates/corporate/comparison_table_integrated.html +++ b/templates/corporate/comparison_table_integrated.html @@ -1525,8 +1525,7 @@ Import data from Slack, - Mattermost, - Gitter or + Mattermost, or Rocket.Chat diff --git a/templates/corporate/for/business.html b/templates/corporate/for/business.html index fe826f45db..794bed29db 100644 --- a/templates/corporate/for/business.html +++ b/templates/corporate/for/business.html @@ -544,7 +544,7 @@

Make the move today

-

Getting started or moving from another platform is easy! Import your existing organization from Slack, Mattermost, Gitter, or Rocket.Chat.

+

Getting started or moving from another platform is easy! Import your existing organization from Slack, Mattermost, or Rocket.Chat.

To get more insight into how Zulip can help your organization, read how the iDrift AS company, the GUT contact support agency, the End Point Dev software consultancy, and the startups Semsee and Atolio are using Zulip.

diff --git a/templates/corporate/for/communities.md b/templates/corporate/for/communities.md index 9fb1d89894..db6df09648 100644 --- a/templates/corporate/for/communities.md +++ b/templates/corporate/for/communities.md @@ -132,10 +132,10 @@ most of their time integrating improvements into Zulip. Many communities that migrated from [Slack](/help/import-from-slack), [Mattermost](/help/import-from-mattermost), -[Gitter](/help/import-from-gitter), or -[Rocket.Chat](/help/import-from-rocketchat) to Zulip tell us -that Zulip helped them manage and grow an inclusive, healthy -community. We hope Zulip can help your community succeed too! +[Rocket.Chat](/help/import-from-rocketchat), or +Gitter to Zulip tell us that Zulip helped them manage +and grow an inclusive, healthy community. We hope Zulip +can help your community succeed too! > “I highly recommend Zulip to other communities. We’re coming from diff --git a/templates/corporate/for/open-source.html b/templates/corporate/for/open-source.html index 006382504d..74ff53fcad 100644 --- a/templates/corporate/for/open-source.html +++ b/templates/corporate/for/open-source.html @@ -669,8 +669,7 @@ from another platform is easy! Import your existing organization from Slack, - Mattermost, - Gitter, or + Mattermost, or Rocket.Chat. diff --git a/templates/corporate/for/research.html b/templates/corporate/for/research.html index 9219f057ed..30bc7d7c92 100644 --- a/templates/corporate/for/research.html +++ b/templates/corporate/for/research.html @@ -399,7 +399,7 @@ conferences. -
  • Getting started or moving from another platform is easy! Import your existing organization from Slack, Mattermost, Gitter, or Rocket.Chat.
  • +
  • Getting started or moving from another platform is easy! Import your existing organization from Slack, Mattermost, or Rocket.Chat.
  • If you have any questions, please contact us at sales@zulip.com. You can also drop by our friendly development community at chat.zulip.org to ask for help or suggest improvements!
  • diff --git a/templates/zerver/create_realm.html b/templates/zerver/create_realm.html index cc9fbe546c..27523d2fe3 100644 --- a/templates/zerver/create_realm.html +++ b/templates/zerver/create_realm.html @@ -43,7 +43,7 @@ {% trans %} Or import from Slack, Mattermost, - Gitter, or Rocket.Chat. + or Rocket.Chat. {% endtrans %} diff --git a/tools/test-backend b/tools/test-backend index 429b556b30..796f5cc60c 100755 --- a/tools/test-backend +++ b/tools/test-backend @@ -119,7 +119,6 @@ not_yet_fully_covered = [ # Data import files; relatively low priority "zerver/data_import/sequencer.py", "zerver/data_import/slack.py", - "zerver/data_import/gitter.py", "zerver/data_import/import_util.py", # Webhook integrations with incomplete coverage "zerver/webhooks/greenhouse/view.py", diff --git a/zerver/data_import/gitter.py b/zerver/data_import/gitter.py deleted file mode 100644 index f827a894e9..0000000000 --- a/zerver/data_import/gitter.py +++ /dev/null @@ -1,397 +0,0 @@ -import logging -import os -from typing import Any, Dict, List, Set, Tuple - -import dateutil.parser -import orjson -from django.conf import settings -from django.forms.models import model_to_dict -from django.utils.timezone import now as timezone_now -from typing_extensions import TypeAlias - -from zerver.data_import.import_util import ( - ZerverFieldsT, - build_avatar, - build_defaultstream, - build_message, - build_realm, - build_recipient, - build_stream, - build_subscription, - build_usermessages, - build_zerver_realm, - create_converted_data_files, - long_term_idle_helper, - make_subscriber_map, - process_avatars, -) -from zerver.lib.export import MESSAGE_BATCH_CHUNK_SIZE -from zerver.models import Recipient, UserProfile -from zproject.backends import GitHubAuthBackend - -# stubs -GitterDataT: TypeAlias = List[Dict[str, Any]] - -realm_id = 0 - - -def gitter_workspace_to_realm( - domain_name: str, gitter_data: GitterDataT, realm_subdomain: str -) -> Tuple[ZerverFieldsT, List[ZerverFieldsT], Dict[str, int], Dict[str, int]]: - """ - Returns: - 1. realm, converted realm data - 2. avatars, which is list to map avatars to Zulip avatar records.json - 3. user_map, which is a dictionary to map from Gitter user id to Zulip user id - 4. stream_map, which is a dictionary to map from Gitter rooms to Zulip stream id - """ - NOW = float(timezone_now().timestamp()) - zerver_realm: List[ZerverFieldsT] = build_zerver_realm(realm_id, realm_subdomain, NOW, "Gitter") - - realm = build_realm(zerver_realm, realm_id, domain_name) - - # Users will have GitHub's generated noreply email addresses so their only way to log in - # at first is via GitHub. So we set GitHub to be the only authentication method enabled - # default to avoid user confusion. - realm["zerver_realmauthenticationmethod"] = [ - { - "name": GitHubAuthBackend.auth_backend_name, - "realm": realm_id, - # The id doesn't matter since it gets set by the import later properly, but we need to set - # it to something in the dict. - "id": 1, - } - ] - - zerver_userprofile, avatars, user_map = build_userprofile(int(NOW), domain_name, gitter_data) - zerver_stream, zerver_defaultstream, stream_map = build_stream_map(int(NOW), gitter_data) - zerver_recipient, zerver_subscription = build_recipient_and_subscription( - zerver_userprofile, zerver_stream - ) - - realm["zerver_userprofile"] = zerver_userprofile - realm["zerver_stream"] = zerver_stream - realm["zerver_defaultstream"] = zerver_defaultstream - realm["zerver_recipient"] = zerver_recipient - realm["zerver_subscription"] = zerver_subscription - - return realm, avatars, user_map, stream_map - - -def build_userprofile( - timestamp: Any, domain_name: str, gitter_data: GitterDataT -) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], Dict[str, int]]: - """ - Returns: - 1. zerver_userprofile, which is a list of user profile - 2. avatar_list, which is list to map avatars to Zulip avatars records.json - 3. added_users, which is a dictionary to map from Gitter user id to Zulip id - """ - logging.info("######### IMPORTING USERS STARTED #########\n") - zerver_userprofile = [] - avatar_list: List[ZerverFieldsT] = [] - user_map: Dict[str, int] = {} - user_id = 0 - - for data in gitter_data: - if get_user_from_message(data) not in user_map: - user_data = data["fromUser"] - user_map[user_data["id"]] = user_id - - email = get_user_email(user_data, domain_name) - if user_data.get("avatarUrl"): - build_avatar( - user_id, realm_id, email, user_data["avatarUrl"], timestamp, avatar_list - ) - - # Build userprofile object - userprofile = UserProfile( - full_name=user_data["displayName"], - id=user_id, - email=email, - delivery_email=email, - avatar_source="U", - date_joined=timestamp, - last_login=timestamp, - ) - userprofile_dict = model_to_dict(userprofile) - # Set realm id separately as the corresponding realm is not yet a Realm model - # instance - userprofile_dict["realm"] = realm_id - - # We use this later, even though Zulip doesn't - # support short_name - userprofile_dict["short_name"] = user_data["username"] - - zerver_userprofile.append(userprofile_dict) - user_id += 1 - logging.info("######### IMPORTING USERS FINISHED #########\n") - return zerver_userprofile, avatar_list, user_map - - -def get_user_email(user_data: ZerverFieldsT, domain_name: str) -> str: - # TODO Get user email from github - email = "{}@users.noreply.github.com".format(user_data["username"]) - return email - - -def build_stream_map( - timestamp: Any, gitter_data: GitterDataT -) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], Dict[str, int]]: - """ - Returns: - 1. stream, which is the list of streams - 2. defaultstreams, which is the list of default streams - 3. stream_map, which is a dictionary to map from Gitter rooms to Zulip stream id - """ - logging.info("######### IMPORTING STREAM STARTED #########\n") - stream_id = 0 - stream: List[ZerverFieldsT] = [] - - # Default stream when no "room" field is present - stream.append( - build_stream(timestamp, realm_id, "from gitter", "Imported from Gitter", stream_id) - ) - defaultstream = build_defaultstream(realm_id=realm_id, stream_id=stream_id, defaultstream_id=0) - stream_id += 1 - - # Gathering streams from gitter_data - stream_map: Dict[str, int] = {} - for data in gitter_data: - if "room" in data and data["room"] not in stream_map: - stream.append( - build_stream( - timestamp, realm_id, data["room"], f'Gitter room {data["room"]}', stream_id - ) - ) - stream_map[data["room"]] = stream_id - stream_id += 1 - logging.info("######### IMPORTING STREAMS FINISHED #########\n") - - return stream, [defaultstream], stream_map - - -def build_recipient_and_subscription( - zerver_userprofile: List[ZerverFieldsT], zerver_stream: List[ZerverFieldsT] -) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT]]: - """ - Assumes that there is at least one stream with 'stream_id' = 0, - and that this stream is the only defaultstream, with 'defaultstream_id' = 0 - Returns: - 1. zerver_recipient, which is a list of mapped recipient - 2. zerver_subscription, which is a list of mapped subscription - """ - zerver_recipient = [] - zerver_subscription = [] - recipient_id = subscription_id = 0 - - # For streams - - # Initial recipients correspond to initial streams - # We enumerate all streams, and build a recipient for each - # Hence 'recipient_id'=n corresponds to 'stream_id'=n - for stream in zerver_stream: - zerver_recipient.append(build_recipient(recipient_id, recipient_id, Recipient.STREAM)) - recipient_id += 1 - - # For users - for user in zerver_userprofile: - zerver_recipient.append(build_recipient(user["id"], recipient_id, Recipient.PERSONAL)) - zerver_subscription.append(build_subscription(recipient_id, user["id"], subscription_id)) - recipient_id += 1 - subscription_id += 1 - - # As suggested in #14830, we subscribe every user to every stream. - # We rely on the above invariant: 'recipient_id'=n corresponds to 'stream_id'=n - # - # TODO: For multi-stream imports, subscribe users to streams - # based either on Gitter API data or who sent messages where. - for user in zerver_userprofile: - for stream in zerver_stream: - zerver_subscription.append( - build_subscription(stream["id"], user["id"], subscription_id) - ) - subscription_id += 1 - - return zerver_recipient, zerver_subscription - - -def get_timestamp_from_message(message: ZerverFieldsT) -> float: - # Gitter's timestamps are in UTC - return float(dateutil.parser.parse(message["sent"]).timestamp()) - - -def get_user_from_message(message: ZerverFieldsT) -> str: - return message["fromUser"]["id"] - - -def convert_gitter_workspace_messages( - gitter_data: GitterDataT, - output_dir: str, - subscriber_map: Dict[int, Set[int]], - user_map: Dict[str, int], - stream_map: Dict[str, int], - user_short_name_to_full_name: Dict[str, str], - zerver_userprofile: List[ZerverFieldsT], - realm_id: int, - chunk_size: int = MESSAGE_BATCH_CHUNK_SIZE, -) -> None: - """ - Messages are stored in batches - """ - logging.info("######### IMPORTING MESSAGES STARTED #########\n") - - long_term_idle = long_term_idle_helper( - iter(gitter_data), - get_user_from_message, - get_timestamp_from_message, - lambda id: user_map[id], - iter(user_map.keys()), - zerver_userprofile, - ) - - message_id = 0 - - low_index = 0 - upper_index = low_index + chunk_size - dump_file_id = 1 - - while True: - message_json = {} - zerver_message = [] - zerver_usermessage: List[ZerverFieldsT] = [] - message_data = gitter_data[low_index:upper_index] - if len(message_data) == 0: - break - for message in message_data: - message_time = get_timestamp_from_message(message) - mentioned_user_ids = get_usermentions(message, user_map, user_short_name_to_full_name) - rendered_content = None - topic_name = "imported from Gitter" + ( - f' room {message["room"]}' if "room" in message else "" - ) - user_id = user_map[get_user_from_message(message)] - recipient_id = stream_map[message["room"]] if "room" in message else 0 - zulip_message = build_message( - topic_name=topic_name, - date_sent=message_time, - message_id=message_id, - content=message["text"], - rendered_content=rendered_content, - user_id=user_id, - recipient_id=recipient_id, - realm_id=realm_id, - ) - zerver_message.append(zulip_message) - - build_usermessages( - zerver_usermessage=zerver_usermessage, - subscriber_map=subscriber_map, - recipient_id=recipient_id, - mentioned_user_ids=mentioned_user_ids, - message_id=message_id, - is_private=False, - long_term_idle=long_term_idle, - ) - - message_id += 1 - - message_json["zerver_message"] = zerver_message - message_json["zerver_usermessage"] = zerver_usermessage - message_filename = os.path.join(output_dir, f"messages-{dump_file_id:06}.json") - logging.info("Writing messages to %s\n", message_filename) - write_data_to_file(os.path.join(message_filename), message_json) - - low_index = upper_index - upper_index = chunk_size + low_index - dump_file_id += 1 - - logging.info("######### IMPORTING MESSAGES FINISHED #########\n") - - -def get_usermentions( - message: Dict[str, Any], user_map: Dict[str, int], user_short_name_to_full_name: Dict[str, str] -) -> List[int]: - mentioned_user_ids = [] - if "mentions" in message: - for mention in message["mentions"]: - if mention.get("userId") in user_map: - gitter_mention = "@{}".format(mention["screenName"]) - if mention["screenName"] not in user_short_name_to_full_name: - logging.info( - "Mentioned user %s never sent any messages, so has no full name data", - mention["screenName"], - ) - full_name = mention["screenName"] - else: - full_name = user_short_name_to_full_name[mention["screenName"]] - zulip_mention = f"@**{full_name}**" - message["text"] = message["text"].replace(gitter_mention, zulip_mention) - - mentioned_user_ids.append(user_map[mention["userId"]]) - return mentioned_user_ids - - -def do_convert_data(gitter_data_file: str, output_dir: str, threads: int = 6) -> None: - # Subdomain is set by the user while running the import commands - realm_subdomain = "" - domain_name = settings.EXTERNAL_HOST - - os.makedirs(output_dir, exist_ok=True) - # output directory should be empty initially - if os.listdir(output_dir): - raise Exception("Output directory should be empty!") - - # Read data from the Gitter file - with open(gitter_data_file, "rb") as fp: - gitter_data = orjson.loads(fp.read()) - - realm, avatar_list, user_map, stream_map = gitter_workspace_to_realm( - domain_name, gitter_data, realm_subdomain - ) - - subscriber_map = make_subscriber_map( - zerver_subscription=realm["zerver_subscription"], - ) - - # For user mentions - user_short_name_to_full_name = {} - for userprofile in realm["zerver_userprofile"]: - user_short_name_to_full_name[userprofile["short_name"]] = userprofile["full_name"] - - convert_gitter_workspace_messages( - gitter_data, - output_dir, - subscriber_map, - user_map, - stream_map, - user_short_name_to_full_name, - realm["zerver_userprofile"], - realm_id=realm_id, - ) - - avatar_folder = os.path.join(output_dir, "avatars") - avatar_realm_folder = os.path.join(avatar_folder, str(realm_id)) - os.makedirs(avatar_realm_folder, exist_ok=True) - avatar_records = process_avatars(avatar_list, avatar_folder, realm_id, threads) - - attachment: Dict[str, List[Any]] = {"zerver_attachment": []} - - # IO realm.json - create_converted_data_files(realm, output_dir, "/realm.json") - # IO emoji records - create_converted_data_files([], output_dir, "/emoji/records.json") - # IO avatar records - create_converted_data_files(avatar_records, output_dir, "/avatars/records.json") - # IO uploads records - create_converted_data_files([], output_dir, "/uploads/records.json") - # IO attachments records - create_converted_data_files(attachment, output_dir, "/attachment.json") - - logging.info("######### DATA CONVERSION FINISHED #########\n") - logging.info("Zulip data dump created at %s", output_dir) - - -def write_data_to_file(output_file: str, data: Any) -> None: - with open(output_file, "wb") as f: - f.write(orjson.dumps(data, option=orjson.OPT_INDENT_2)) diff --git a/zerver/data_import/import_util.py b/zerver/data_import/import_util.py index 6e0cf5003f..7c18fafdbb 100644 --- a/zerver/data_import/import_util.py +++ b/zerver/data_import/import_util.py @@ -311,7 +311,7 @@ def build_recipients( ) -> List[ZerverFieldsT]: """ This function was only used HipChat import, this function may be - required for future conversions. The Slack and Gitter conversions do it more + required for future conversions. The Slack conversions do it more tightly integrated with creating other objects. """ @@ -583,7 +583,7 @@ def process_avatars( 2. avatar_dir: Folder where the downloaded avatars are saved 3. realm_id: Realm ID. - We use this for Slack and Gitter conversions, where avatars need to be + We use this for Slack conversions, where avatars need to be downloaded. For simpler conversions see write_avatar_png. """ diff --git a/zerver/lib/import_realm.py b/zerver/lib/import_realm.py index 5d9a1151ad..58b99e253f 100644 --- a/zerver/lib/import_realm.py +++ b/zerver/lib/import_realm.py @@ -208,7 +208,7 @@ def create_subscription_events(data: TableData, realm_id: int) -> None: type event for all the existing Stream subscriptions. This is needed for all the export tools which do not include the - table `zerver_realmauditlog` (Slack, Gitter, etc.) because the appropriate + table `zerver_realmauditlog` (e.g. Slack) because the appropriate data about when a user was subscribed is not exported by the third-party service. """ @@ -855,10 +855,6 @@ def import_uploads( # so, it is an error, default_user_profile_id will be # None, and we assert. For emoji / realm icons, we # fall back to default_user_profile_id. - # default_user_profile_id can be None in Gitter - # imports, which do not create any owners; but Gitter - # does not have emoji which we would need to allocate - # a user to. assert default_user_profile_id is not None metadata["user_profile_id"] = str(default_user_profile_id) else: diff --git a/zerver/management/commands/convert_gitter_data.py b/zerver/management/commands/convert_gitter_data.py deleted file mode 100644 index 158d3eba35..0000000000 --- a/zerver/management/commands/convert_gitter_data.py +++ /dev/null @@ -1,51 +0,0 @@ -import argparse -import os -import tempfile -from typing import Any - -from django.conf import settings -from django.core.management.base import BaseCommand, CommandError, CommandParser -from typing_extensions import override - -from zerver.data_import.gitter import do_convert_data - - -class Command(BaseCommand): - help = """Convert the Gitter data into Zulip data format.""" - - @override - def add_arguments(self, parser: CommandParser) -> None: - parser.add_argument( - "gitter_data", nargs="+", metavar="", help="Gitter data in json format" - ) - - parser.add_argument( - "--output", dest="output_dir", help="Directory to write exported data to." - ) - - parser.add_argument( - "--threads", - default=settings.DEFAULT_DATA_EXPORT_IMPORT_PARALLELISM, - help="Threads to download avatars and attachments faster", - ) - - parser.formatter_class = argparse.RawTextHelpFormatter - - @override - def handle(self, *args: Any, **options: Any) -> None: - output_dir = options["output_dir"] - if output_dir is None: - output_dir = tempfile.mkdtemp(prefix="converted-gitter-data-") - else: - output_dir = os.path.realpath(output_dir) - - num_threads = int(options["threads"]) - if num_threads < 1: - raise CommandError("You must have at least one thread.") - - for path in options["gitter_data"]: - if not os.path.exists(path): - raise CommandError(f"Gitter data file not found: '{path}'") - # TODO add json check - print("Converting data ...") - do_convert_data(path, output_dir, num_threads) diff --git a/zerver/tests/fixtures/gitter_data.json b/zerver/tests/fixtures/gitter_data.json deleted file mode 100644 index d7c5a10d33..0000000000 --- a/zerver/tests/fixtures/gitter_data.json +++ /dev/null @@ -1,44 +0,0 @@ -[ -{ - "fromUser": { - "avatarUrl": "https://avatars-02.gitter.im/gh/uv/4/username", - "displayName": "User Full Name", - "gv": "4", - "id": "54d7876c15522ed4b3dbbefb", - "url": "/user1", - "username": "username1", - "v": 17 - }, - "html": "test message", - "id": "57509d0ef44fde236e52f395", - "issues": [], - "meta": [], - "readBy": 8, - "sent": "2015-06-02T20:54:38.747Z", - "text": "test message", - "unread": false, - "urls": [], - "v": 1 -}, -{ - "fromUser": { - "avatarUrl": "https://avatars-02.gitter.im/gh/uv/4/username", - "displayName": "User Full Name 2", - "gv": "3", - "id": "54d7876c15522ed4b3dbtest", - "url": "/user2", - "username": "username2", - "v": 17 - }, - "html": "test message 2", - "id": "57509d0ef44fde236e52f395", - "issues": [], - "meta": [], - "readBy": 8, - "sent": "2016-07-02T19:54:38.747Z", - "text": "test message 2", - "unread": false, - "urls": [], - "v": 1 -} -] diff --git a/zerver/tests/test_gitter_importer.py b/zerver/tests/test_gitter_importer.py deleted file mode 100644 index 75e950acbd..0000000000 --- a/zerver/tests/test_gitter_importer.py +++ /dev/null @@ -1,176 +0,0 @@ -import os -from datetime import timedelta -from typing import Any -from unittest import mock - -import dateutil.parser -import orjson -import time_machine - -from zerver.data_import.gitter import do_convert_data, get_usermentions -from zerver.lib.import_realm import do_import_realm -from zerver.lib.test_classes import ZulipTestCase -from zerver.models import Message, UserProfile -from zerver.models.realms import get_realm -from zproject.backends import ( - AUTH_BACKEND_NAME_MAP, - GitHubAuthBackend, - auth_enabled_helper, - github_auth_enabled, -) - - -class GitterImporter(ZulipTestCase): - @mock.patch("zerver.data_import.gitter.process_avatars", return_value=[]) - def test_gitter_import_data_conversion(self, mock_process_avatars: mock.Mock) -> None: - output_dir = self.make_import_output_dir("gitter") - gitter_file = os.path.join(os.path.dirname(__file__), "fixtures/gitter_data.json") - - # We need some time-mocking to set up user soft-deactivation logic. - # One of the messages in the import data - # is significantly older than the other one. We mock the current time in the relevant module - # to match the sent time of the more recent message - to make it look like one of the messages - # is very recent, while the other one is old. This should cause that the sender of the recent - # message to NOT be soft-deactivated, while the sender of the other one is. - with open(gitter_file) as f: - gitter_data = orjson.loads(f.read()) - sent_datetime = dateutil.parser.parse(gitter_data[1]["sent"]) - with self.assertLogs(level="INFO"), time_machine.travel( - (sent_datetime + timedelta(days=1)), tick=False - ): - do_convert_data(gitter_file, output_dir) - - def read_file(output_file: str) -> Any: - full_path = os.path.join(output_dir, output_file) - with open(full_path, "rb") as f: - return orjson.loads(f.read()) - - self.assertEqual(os.path.exists(os.path.join(output_dir, "avatars")), True) - self.assertEqual(os.path.exists(os.path.join(output_dir, "emoji")), True) - self.assertEqual(os.path.exists(os.path.join(output_dir, "attachment.json")), True) - - realm = read_file("realm.json") - - # test realm - self.assertEqual( - "Organization imported from Gitter!", realm["zerver_realm"][0]["description"] - ) - - # test users - exported_user_ids = self.get_set(realm["zerver_userprofile"], "id") - exported_user_full_name = self.get_set(realm["zerver_userprofile"], "full_name") - self.assertIn("User Full Name", exported_user_full_name) - exported_user_email = self.get_set(realm["zerver_userprofile"], "email") - self.assertIn("username2@users.noreply.github.com", exported_user_email) - - # test stream - self.assert_length(realm["zerver_stream"], 1) - self.assertEqual(realm["zerver_stream"][0]["name"], "from gitter") - self.assertEqual(realm["zerver_stream"][0]["deactivated"], False) - self.assertEqual(realm["zerver_stream"][0]["realm"], realm["zerver_realm"][0]["id"]) - - self.assertEqual( - realm["zerver_defaultstream"][0]["stream"], realm["zerver_stream"][0]["id"] - ) - - # test recipient - exported_recipient_id = self.get_set(realm["zerver_recipient"], "id") - exported_recipient_type = self.get_set(realm["zerver_recipient"], "type") - self.assertEqual({1, 2}, exported_recipient_type) - - # test subscription - exported_subscription_userprofile = self.get_set( - realm["zerver_subscription"], "user_profile" - ) - self.assertEqual({0, 1}, exported_subscription_userprofile) - exported_subscription_recipient = self.get_set(realm["zerver_subscription"], "recipient") - self.assert_length(exported_subscription_recipient, 3) - self.assertIn(realm["zerver_subscription"][1]["recipient"], exported_recipient_id) - - messages = read_file("messages-000001.json") - - # test messages - exported_messages_id = self.get_set(messages["zerver_message"], "id") - self.assertIn(messages["zerver_message"][0]["sender"], exported_user_ids) - self.assertIn(messages["zerver_message"][1]["recipient"], exported_recipient_id) - self.assertIn(messages["zerver_message"][0]["content"], "test message") - - # test usermessages and soft-deactivation of users - [user_should_be_long_term_idle] = ( - user - for user in realm["zerver_userprofile"] - if user["delivery_email"] == "username1@users.noreply.github.com" - ) - [user_should_not_be_long_term_idle] = ( - user - for user in realm["zerver_userprofile"] - if user["delivery_email"] == "username2@users.noreply.github.com" - ) - self.assertEqual(user_should_be_long_term_idle["long_term_idle"], True) - - # Only the user who's not soft-deactivated gets UserMessages. - exported_usermessage_userprofile = self.get_set( - messages["zerver_usermessage"], "user_profile" - ) - self.assertEqual( - {user_should_not_be_long_term_idle["id"]}, exported_usermessage_userprofile - ) - exported_usermessage_message = self.get_set(messages["zerver_usermessage"], "message") - self.assertEqual(exported_usermessage_message, exported_messages_id) - - @mock.patch("zerver.data_import.gitter.process_avatars", return_value=[]) - def test_gitter_import_to_existing_database(self, mock_process_avatars: mock.Mock) -> None: - output_dir = self.make_import_output_dir("gitter") - gitter_file = os.path.join(os.path.dirname(__file__), "fixtures/gitter_data.json") - with self.assertLogs(level="INFO"): - do_convert_data(gitter_file, output_dir) - - with self.assertLogs(level="INFO"): - do_import_realm(output_dir, "test-gitter-import") - - realm = get_realm("test-gitter-import") - - # test rendered_messages - realm_users = UserProfile.objects.filter(realm=realm) - messages = Message.objects.filter(realm_id=realm.id, sender__in=realm_users) - for message in messages: - self.assertIsNotNone(message.rendered_content, None) - - self.assertTrue(github_auth_enabled(realm)) - for auth_backend_name in AUTH_BACKEND_NAME_MAP: - if auth_backend_name == GitHubAuthBackend.auth_backend_name: - continue - - self.assertFalse(auth_enabled_helper([auth_backend_name], realm)) - - def test_get_usermentions(self) -> None: - user_map = {"57124a4": 3, "57124b4": 5, "57124c4": 8} - user_short_name_to_full_name = { - "user": "user name", - "user2": "user2", - "user3": "user name 3", - "user4": "user 4", - } - messages = [ - {"text": "hi @user", "mentions": [{"screenName": "user", "userId": "57124a4"}]}, - { - "text": "hi @user2 @user3", - "mentions": [ - {"screenName": "user2", "userId": "57124b4"}, - {"screenName": "user3", "userId": "57124c4"}, - ], - }, - {"text": "hi @user4", "mentions": [{"screenName": "user4"}]}, - {"text": "hi @user5", "mentions": [{"screenName": "user", "userId": "5712ds4"}]}, - ] - - self.assertEqual(get_usermentions(messages[0], user_map, user_short_name_to_full_name), [3]) - self.assertEqual(messages[0]["text"], "hi @**user name**") - self.assertEqual( - get_usermentions(messages[1], user_map, user_short_name_to_full_name), [5, 8] - ) - self.assertEqual(messages[1]["text"], "hi @**user2** @**user name 3**") - self.assertEqual(get_usermentions(messages[2], user_map, user_short_name_to_full_name), []) - self.assertEqual(messages[2]["text"], "hi @user4") - self.assertEqual(get_usermentions(messages[3], user_map, user_short_name_to_full_name), []) - self.assertEqual(messages[3]["text"], "hi @user5")