test-documentation: Add --skip-external-links option.

This option causes test-documentation to only verify internal links
that we control and are important to be correct.  This prevents
test-documentation from flaking in CI due to issues with the dozens of
third-party blocks that we link to from various parts of our
documentation.

Tweaked by tabbott for comment clarity, and to also include
github.com/zulip links.

Fixes #10942.
This commit is contained in:
Sumanth V Rao 2018-12-05 10:12:46 +05:30 committed by Tim Abbott
parent 8ed4933005
commit e1f33e0f6b
2 changed files with 37 additions and 0 deletions

View File

@ -34,6 +34,7 @@ class BaseDocumentationSpider(scrapy.Spider):
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
self.has_error = False
self.skip_external = kwargs.get('skip_external', None)
def _set_error_state(self) -> None:
self.has_error = True
@ -47,6 +48,18 @@ class BaseDocumentationSpider(scrapy.Spider):
def check_existing(self, response: Any) -> None:
self.log(response)
def _is_external_link(self, url: str) -> bool:
if "zulip.readthedocs" in url or "zulipchat.com" in url or "zulip.org" in url:
# We want CI to check any links to Zulip sites.
return False
if (len(url) > 4 and url[:4] == "file") or ("localhost" in url):
# We also want CI to check any links to built documentation.
return False
if 'github.com/zulip' in url:
# Finally, links to our own GitHub organization should always work.
return False
return True
def check_permalink(self, response: Any) -> None:
self.log(response)
xpath_template = "//*[@id='{permalink}' or @name='{permalink}']"
@ -74,6 +87,9 @@ class BaseDocumentationSpider(scrapy.Spider):
elif '#' in link.url:
dont_filter = True
callback = self.check_permalink
if (self.skip_external is not None): # checks if flag is set to skip external link check.
if (self._is_external_link(link.url)):
continue
yield Request(link.url, method=method, callback=callback, dont_filter=dont_filter,
errback=self.error_callback)

View File

@ -12,6 +12,8 @@ case $1 in
-h|--help)
echo "--help, -h show this help message and exit"
echo "--loglevel=LEVEL, -L LEVEL log level (default: ERROR)"
echo "--skip-check-links skip checking of links"
echo "--skip-external-links skip checking of external links"
exit 0
;;
-L|--loglevel)
@ -20,6 +22,9 @@ case $1 in
--skip-check-links)
skip_check_links=1
;;
--skip-external-links)
skip_external_links=1
;;
esac
cd "$(dirname "$0")"/../docs
@ -36,6 +41,22 @@ if [ -n "$skip_check_links" ]; then
exit 0
fi
if [ -n "$skip_external_links" ]; then
color_message 94 "Testing only internal links in documentation..."
cd ../tools/documentation_crawler
set +e
scrapy crawl documentation_crawler -a skip_external=set "${loglevel[@]}"
# calling crawl directly as parameter needs to be passed
result=$?
if [ "$result" = 1 ]; then
color_message 91 "Failed!"
exit 1
else
color_message 92 "Passed!"
exit 0
fi
fi
color_message 94 "Testing links in documentation..."
cd ../tools/documentation_crawler