From 89b3129d48bf11a7fadb13f8d67a34beb89cbd55 Mon Sep 17 00:00:00 2001
From: Anders Kaseorg <anders@zulip.com>
Date: Tue, 2 Apr 2024 13:06:59 -0700
Subject: [PATCH] documentation_crawler: Consider status.zulip.com external.

Signed-off-by: Anders Kaseorg <anders@zulip.com>
---
 .../documentation_crawler/spiders/common/spiders.py          | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/documentation_crawler/documentation_crawler/spiders/common/spiders.py b/tools/documentation_crawler/documentation_crawler/spiders/common/spiders.py
index 746acf3f71..9d56ee9d57 100644
--- a/tools/documentation_crawler/documentation_crawler/spiders/common/spiders.py
+++ b/tools/documentation_crawler/documentation_crawler/spiders/common/spiders.py
@@ -80,11 +80,14 @@ class BaseDocumentationSpider(scrapy.Spider):
 
     def _is_external_link(self, url: str) -> bool:
         split_url = urlsplit(url)
-        if split_url.hostname == "chat.zulip.org":
+        if split_url.hostname in ("chat.zulip.org", "status.zulip.com"):
             # Since most chat.zulip.org URLs will be links to specific
             # logged-in content that the spider cannot verify, or the
             # homepage, there's no need to check those (which can
             # cause errors when chat.zulip.org is being updated).
+            #
+            # status.zulip.com is externally hosted and, in a peculiar twist of
+            # cosmic irony, often itself offline.
             return True
         if split_url.hostname == "zulip.readthedocs.io" or f".{split_url.hostname}".endswith(
             (".zulip.com", ".zulip.org")