2017-11-08 17:55:36 +01:00
|
|
|
# Zulip's main markdown implementation. See docs/subsystems/markdown.md for
|
2016-04-01 06:58:14 +02:00
|
|
|
# detailed documentation on our markdown syntax.
|
2020-06-18 23:14:53 +02:00
|
|
|
import datetime
|
2020-06-11 00:54:34 +02:00
|
|
|
import functools
|
|
|
|
import html
|
2012-10-22 05:06:28 +02:00
|
|
|
import logging
|
2020-06-11 00:54:34 +02:00
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import time
|
2017-11-05 05:30:31 +01:00
|
|
|
import urllib
|
2019-12-12 01:28:29 +01:00
|
|
|
import urllib.parse
|
2020-06-11 00:54:34 +02:00
|
|
|
from collections import defaultdict, deque
|
2020-06-11 21:44:23 +02:00
|
|
|
from dataclasses import dataclass
|
2020-06-11 00:54:34 +02:00
|
|
|
from io import StringIO
|
|
|
|
from typing import (
|
|
|
|
Any,
|
|
|
|
Callable,
|
|
|
|
Dict,
|
|
|
|
Generic,
|
|
|
|
Iterable,
|
|
|
|
List,
|
|
|
|
Optional,
|
|
|
|
Set,
|
|
|
|
Tuple,
|
|
|
|
TypeVar,
|
|
|
|
Union,
|
|
|
|
)
|
|
|
|
from typing.re import Match, Pattern
|
|
|
|
from xml.etree import ElementTree as etree
|
2020-06-03 06:37:07 +02:00
|
|
|
from xml.etree.ElementTree import Element, SubElement
|
2013-05-01 22:49:16 +02:00
|
|
|
|
2020-06-11 00:54:34 +02:00
|
|
|
import ahocorasick
|
|
|
|
import dateutil.parser
|
|
|
|
import dateutil.tz
|
|
|
|
import markdown
|
2014-05-21 08:11:29 +02:00
|
|
|
import requests
|
2013-03-08 06:27:16 +01:00
|
|
|
from django.conf import settings
|
2017-09-14 19:47:22 +02:00
|
|
|
from django.db.models import Q
|
2020-06-11 00:54:34 +02:00
|
|
|
from hyperlink import parse
|
|
|
|
from markdown.extensions import codehilite, nl2br, sane_lists, tables
|
|
|
|
from typing_extensions import TypedDict
|
2013-01-31 19:57:25 +01:00
|
|
|
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib import mention as mention
|
|
|
|
from zerver.lib.cache import NotFoundInCache, cache_with_key
|
2016-04-28 05:40:58 +02:00
|
|
|
from zerver.lib.camo import get_camo_url
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.emoji import (
|
|
|
|
codepoint_to_name,
|
|
|
|
emoticon_regex,
|
|
|
|
name_to_codepoint,
|
|
|
|
translate_emoticons,
|
|
|
|
)
|
2020-06-25 16:58:20 +02:00
|
|
|
from zerver.lib.exceptions import MarkdownRenderingException
|
2020-06-25 15:00:33 +02:00
|
|
|
from zerver.lib.markdown import fenced_code
|
|
|
|
from zerver.lib.markdown.fenced_code import FENCE_RE
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.mention import extract_user_group, possible_mentions, possible_user_group_mentions
|
|
|
|
from zerver.lib.tex import render_tex
|
2019-01-04 16:22:04 +01:00
|
|
|
from zerver.lib.thumbnail import user_uploads_or_external
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.timeout import TimeoutExpired, timeout
|
|
|
|
from zerver.lib.timezone import get_common_timezones
|
|
|
|
from zerver.lib.url_encoding import encode_stream, hash_util_encode
|
2016-10-27 12:06:44 +02:00
|
|
|
from zerver.lib.url_preview import preview as link_preview
|
2017-09-14 20:44:56 +02:00
|
|
|
from zerver.models import (
|
2018-02-09 19:49:13 +01:00
|
|
|
MAX_MESSAGE_LENGTH,
|
2017-09-14 20:44:56 +02:00
|
|
|
Message,
|
|
|
|
Realm,
|
2017-09-25 09:47:15 +02:00
|
|
|
UserGroup,
|
2017-10-27 14:47:54 +02:00
|
|
|
UserGroupMembership,
|
2020-06-11 00:54:34 +02:00
|
|
|
UserProfile,
|
|
|
|
all_realm_filters,
|
|
|
|
get_active_streams,
|
|
|
|
realm_filters_for_realm,
|
2017-09-14 20:44:56 +02:00
|
|
|
)
|
2013-04-29 22:22:07 +02:00
|
|
|
|
2019-01-22 19:31:25 +01:00
|
|
|
ReturnT = TypeVar('ReturnT')
|
|
|
|
|
|
|
|
def one_time(method: Callable[[], ReturnT]) -> Callable[[], ReturnT]:
|
|
|
|
'''
|
|
|
|
Use this decorator with extreme caution.
|
|
|
|
The function you wrap should have no dependency
|
|
|
|
on any arguments (no args, no kwargs) nor should
|
|
|
|
it depend on any global state.
|
|
|
|
'''
|
|
|
|
val = None
|
|
|
|
|
|
|
|
def cache_wrapper() -> ReturnT:
|
|
|
|
nonlocal val
|
|
|
|
if val is None:
|
|
|
|
val = method()
|
|
|
|
return val
|
|
|
|
return cache_wrapper
|
|
|
|
|
2020-05-02 06:24:43 +02:00
|
|
|
class FullNameInfo(TypedDict):
|
|
|
|
id: int
|
|
|
|
email: str
|
|
|
|
full_name: str
|
2017-09-14 19:47:22 +02:00
|
|
|
|
2018-11-07 16:07:34 +01:00
|
|
|
DbData = Dict[str, Any]
|
|
|
|
|
2020-06-26 23:30:39 +02:00
|
|
|
# Format version of the markdown rendering; stored along with rendered
|
2013-03-18 22:51:08 +01:00
|
|
|
# messages so that we can efficiently determine what needs to be re-rendered
|
|
|
|
version = 1
|
|
|
|
|
2016-06-03 18:38:34 +02:00
|
|
|
_T = TypeVar('_T')
|
2018-05-11 01:42:51 +02:00
|
|
|
ElementStringNone = Union[Element, Optional[str]]
|
2016-06-03 18:38:34 +02:00
|
|
|
|
2017-09-14 22:11:34 +02:00
|
|
|
AVATAR_REGEX = r'!avatar\((?P<email>[^)]*)\)'
|
|
|
|
GRAVATAR_REGEX = r'!gravatar\((?P<email>[^)]*)\)'
|
2017-09-15 03:08:15 +02:00
|
|
|
EMOJI_REGEX = r'(?P<syntax>:[\w\-\+]+:)'
|
2017-09-14 22:11:34 +02:00
|
|
|
|
2018-11-03 17:12:15 +01:00
|
|
|
def verbose_compile(pattern: str) -> Any:
|
|
|
|
return re.compile(
|
2020-06-13 08:59:37 +02:00
|
|
|
f"^(.*?){pattern}(.*?)$",
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
re.DOTALL | re.UNICODE | re.VERBOSE,
|
2018-11-03 17:12:15 +01:00
|
|
|
)
|
|
|
|
|
2019-01-22 19:31:25 +01:00
|
|
|
def normal_compile(pattern: str) -> Any:
|
|
|
|
return re.compile(
|
2020-06-13 08:59:37 +02:00
|
|
|
fr"^(.*?){pattern}(.*)$",
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
re.DOTALL | re.UNICODE,
|
2019-01-22 19:31:25 +01:00
|
|
|
)
|
|
|
|
|
2017-09-15 00:25:38 +02:00
|
|
|
STREAM_LINK_REGEX = r"""
|
|
|
|
(?<![^\s'"\(,:<]) # Start after whitespace or specified chars
|
|
|
|
\#\*\* # and after hash sign followed by double asterisks
|
|
|
|
(?P<stream_name>[^\*]+) # stream name can contain anything
|
|
|
|
\*\* # ends by double asterisks
|
|
|
|
"""
|
|
|
|
|
2019-01-22 20:16:39 +01:00
|
|
|
@one_time
|
|
|
|
def get_compiled_stream_link_regex() -> Pattern:
|
|
|
|
return verbose_compile(STREAM_LINK_REGEX)
|
|
|
|
|
2019-06-21 17:31:16 +02:00
|
|
|
STREAM_TOPIC_LINK_REGEX = r"""
|
|
|
|
(?<![^\s'"\(,:<]) # Start after whitespace or specified chars
|
|
|
|
\#\*\* # and after hash sign followed by double asterisks
|
|
|
|
(?P<stream_name>[^\*>]+) # stream name can contain anything except >
|
|
|
|
> # > acts as separator
|
|
|
|
(?P<topic_name>[^\*]+) # topic name can contain anything
|
|
|
|
\*\* # ends by double asterisks
|
|
|
|
"""
|
|
|
|
|
|
|
|
@one_time
|
|
|
|
def get_compiled_stream_topic_link_regex() -> Pattern:
|
|
|
|
return verbose_compile(STREAM_TOPIC_LINK_REGEX)
|
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
LINK_REGEX: Pattern = None
|
2018-11-03 17:12:15 +01:00
|
|
|
|
|
|
|
def get_web_link_regex() -> str:
|
|
|
|
# We create this one time, but not at startup. So the
|
|
|
|
# first message rendered in any process will have some
|
2019-01-22 19:35:41 +01:00
|
|
|
# extra costs. It's roughly 75ms to run this code, so
|
|
|
|
# caching the value in LINK_REGEX is super important here.
|
2018-11-03 17:12:15 +01:00
|
|
|
global LINK_REGEX
|
2019-01-22 19:35:41 +01:00
|
|
|
if LINK_REGEX is not None:
|
|
|
|
return LINK_REGEX
|
|
|
|
|
|
|
|
tlds = '|'.join(list_of_tlds())
|
|
|
|
|
|
|
|
# A link starts at a word boundary, and ends at space, punctuation, or end-of-input.
|
|
|
|
#
|
|
|
|
# We detect a url either by the `https?://` or by building around the TLD.
|
|
|
|
|
|
|
|
# In lieu of having a recursive regex (which python doesn't support) to match
|
|
|
|
# arbitrary numbers of nested matching parenthesis, we manually build a regexp that
|
|
|
|
# can match up to six
|
|
|
|
# The inner_paren_contents chunk matches the innermore non-parenthesis-holding text,
|
|
|
|
# and the paren_group matches text with, optionally, a matching set of parens
|
|
|
|
inner_paren_contents = r"[^\s()\"]*"
|
|
|
|
paren_group = r"""
|
|
|
|
[^\s()\"]*? # Containing characters that won't end the URL
|
|
|
|
(?: \( %s \) # and more characters in matched parens
|
|
|
|
[^\s()\"]*? # followed by more characters
|
|
|
|
)* # zero-or-more sets of paired parens
|
|
|
|
"""
|
|
|
|
nested_paren_chunk = paren_group
|
|
|
|
for i in range(6):
|
|
|
|
nested_paren_chunk = nested_paren_chunk % (paren_group,)
|
|
|
|
nested_paren_chunk = nested_paren_chunk % (inner_paren_contents,)
|
|
|
|
|
|
|
|
file_links = r"| (?:file://(/[^/ ]*)+/?)" if settings.ENABLE_FILE_LINKS else r""
|
2020-06-13 08:59:37 +02:00
|
|
|
REGEX = fr"""
|
2019-01-22 19:35:41 +01:00
|
|
|
(?<![^\s'"\(,:<]) # Start after whitespace or specified chars
|
|
|
|
# (Double-negative lookbehind to allow start-of-string)
|
|
|
|
(?P<url> # Main group
|
|
|
|
(?:(?: # Domain part
|
|
|
|
https?://[\w.:@-]+? # If it has a protocol, anything goes.
|
|
|
|
|(?: # Or, if not, be more strict to avoid false-positives
|
|
|
|
(?:[\w-]+\.)+ # One or more domain components, separated by dots
|
2020-06-13 08:59:37 +02:00
|
|
|
(?:{tlds}) # TLDs (filled in via format from tlds-alpha-by-domain.txt)
|
2018-11-03 17:12:15 +01:00
|
|
|
)
|
|
|
|
)
|
2019-01-22 19:35:41 +01:00
|
|
|
(?:/ # A path, beginning with /
|
2020-06-13 08:59:37 +02:00
|
|
|
{nested_paren_chunk} # zero-to-6 sets of paired parens
|
2019-01-22 19:35:41 +01:00
|
|
|
)?) # Path is optional
|
|
|
|
| (?:[\w.-]+\@[\w.-]+\.[\w]+) # Email is separate, since it can't have a path
|
2020-06-13 08:59:37 +02:00
|
|
|
{file_links} # File path start with file:///, enable by setting ENABLE_FILE_LINKS=True
|
|
|
|
| (?:bitcoin:[13][a-km-zA-HJ-NP-Z1-9]{{25,34}}) # Bitcoin address pattern, see https://mokagio.github.io/tech-journal/2014/11/21/regex-bitcoin.html
|
2019-01-22 19:35:41 +01:00
|
|
|
)
|
|
|
|
(?= # URL must be followed by (not included in group)
|
|
|
|
[!:;\?\),\.\'\"\>]* # Optional punctuation characters
|
|
|
|
(?:\Z|\s) # followed by whitespace or end of string
|
|
|
|
)
|
2020-06-13 08:59:37 +02:00
|
|
|
"""
|
2019-01-23 21:30:00 +01:00
|
|
|
LINK_REGEX = verbose_compile(REGEX)
|
2018-11-03 17:12:15 +01:00
|
|
|
return LINK_REGEX
|
|
|
|
|
|
|
|
def clear_state_for_testing() -> None:
|
|
|
|
# The link regex never changes in production, but our tests
|
|
|
|
# try out both sides of ENABLE_FILE_LINKS, so we need
|
|
|
|
# a way to clear it.
|
|
|
|
global LINK_REGEX
|
|
|
|
LINK_REGEX = None
|
|
|
|
|
2020-06-26 20:54:05 +02:00
|
|
|
markdown_logger = logging.getLogger()
|
2018-07-03 07:25:29 +02:00
|
|
|
|
2018-11-07 16:07:34 +01:00
|
|
|
def rewrite_local_links_to_relative(db_data: Optional[DbData], link: str) -> str:
|
2019-12-13 03:56:59 +01:00
|
|
|
"""If the link points to a local destination (e.g. #narrow/...),
|
|
|
|
generate a relative link that will open it in the current window.
|
|
|
|
"""
|
2017-12-11 17:35:04 +01:00
|
|
|
|
2018-11-07 16:07:34 +01:00
|
|
|
if db_data:
|
|
|
|
realm_uri_prefix = db_data['realm_uri'] + "/"
|
2020-03-24 03:48:32 +01:00
|
|
|
if (
|
|
|
|
link.startswith(realm_uri_prefix)
|
|
|
|
and urllib.parse.urljoin(realm_uri_prefix, link[len(realm_uri_prefix):])
|
|
|
|
== link
|
|
|
|
):
|
2018-04-02 19:29:32 +02:00
|
|
|
return link[len(realm_uri_prefix):]
|
2017-12-11 17:35:04 +01:00
|
|
|
|
|
|
|
return link
|
|
|
|
|
2019-03-01 01:53:18 +01:00
|
|
|
def url_embed_preview_enabled(message: Optional[Message]=None,
|
|
|
|
realm: Optional[Realm]=None,
|
2020-06-13 01:57:21 +02:00
|
|
|
no_previews: bool=False) -> bool:
|
2017-03-13 14:42:03 +01:00
|
|
|
if not settings.INLINE_URL_EMBED_PREVIEW:
|
|
|
|
return False
|
2018-11-02 14:56:32 +01:00
|
|
|
|
2019-03-01 01:53:18 +01:00
|
|
|
if no_previews:
|
|
|
|
return False
|
|
|
|
|
2018-11-02 14:56:32 +01:00
|
|
|
if realm is None:
|
|
|
|
if message is not None:
|
|
|
|
realm = message.get_realm()
|
|
|
|
|
2017-03-13 14:42:03 +01:00
|
|
|
if realm is None:
|
2018-11-02 14:56:32 +01:00
|
|
|
# realm can be None for odd use cases
|
|
|
|
# like generating documentation or running
|
|
|
|
# test code
|
2017-03-13 14:42:03 +01:00
|
|
|
return True
|
2018-11-02 14:56:32 +01:00
|
|
|
|
2017-03-13 14:42:03 +01:00
|
|
|
return realm.inline_url_embed_preview
|
|
|
|
|
2019-03-01 01:53:18 +01:00
|
|
|
def image_preview_enabled(message: Optional[Message]=None,
|
|
|
|
realm: Optional[Realm]=None,
|
2020-06-13 01:57:21 +02:00
|
|
|
no_previews: bool=False) -> bool:
|
2017-03-13 14:42:03 +01:00
|
|
|
if not settings.INLINE_IMAGE_PREVIEW:
|
|
|
|
return False
|
2018-11-02 14:56:32 +01:00
|
|
|
|
2019-03-01 01:53:18 +01:00
|
|
|
if no_previews:
|
|
|
|
return False
|
|
|
|
|
2017-03-13 14:42:03 +01:00
|
|
|
if realm is None:
|
2018-11-02 14:56:32 +01:00
|
|
|
if message is not None:
|
|
|
|
realm = message.get_realm()
|
|
|
|
|
|
|
|
if realm is None:
|
|
|
|
# realm can be None for odd use cases
|
|
|
|
# like generating documentation or running
|
|
|
|
# test code
|
2017-03-13 14:42:03 +01:00
|
|
|
return True
|
2018-11-02 14:56:32 +01:00
|
|
|
|
2017-03-13 14:42:03 +01:00
|
|
|
return realm.inline_image_preview
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def list_of_tlds() -> List[str]:
|
2016-12-30 03:15:48 +01:00
|
|
|
# HACK we manually blacklist a few domains
|
2017-11-03 03:12:25 +01:00
|
|
|
blacklist = ['PY\n', "MD\n"]
|
2013-04-02 17:08:00 +02:00
|
|
|
|
2020-03-27 01:32:21 +01:00
|
|
|
# tlds-alpha-by-domain.txt comes from https://data.iana.org/TLD/tlds-alpha-by-domain.txt
|
2013-04-02 17:08:00 +02:00
|
|
|
tlds_file = os.path.join(os.path.dirname(__file__), 'tlds-alpha-by-domain.txt')
|
2020-04-09 21:51:58 +02:00
|
|
|
tlds = [tld.lower().strip() for tld in open(tlds_file)
|
2016-11-30 14:17:35 +01:00
|
|
|
if tld not in blacklist and not tld[0].startswith('#')]
|
2013-04-02 17:08:00 +02:00
|
|
|
tlds.sort(key=len, reverse=True)
|
|
|
|
return tlds
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def walk_tree(root: Element,
|
|
|
|
processor: Callable[[Element], Optional[_T]],
|
|
|
|
stop_after_first: bool=False) -> List[_T]:
|
2013-03-08 20:07:46 +01:00
|
|
|
results = []
|
2017-04-15 12:53:10 +02:00
|
|
|
queue = deque([root])
|
2013-03-08 20:07:46 +01:00
|
|
|
|
2017-04-15 12:53:10 +02:00
|
|
|
while queue:
|
|
|
|
currElement = queue.popleft()
|
2020-06-04 02:15:21 +02:00
|
|
|
for child in currElement:
|
|
|
|
if child:
|
2017-04-15 12:53:10 +02:00
|
|
|
queue.append(child)
|
2013-03-08 20:07:46 +01:00
|
|
|
|
|
|
|
result = processor(child)
|
|
|
|
if result is not None:
|
|
|
|
results.append(result)
|
2013-03-08 21:44:06 +01:00
|
|
|
if stop_after_first:
|
|
|
|
return results
|
2013-03-08 20:07:46 +01:00
|
|
|
|
|
|
|
return results
|
|
|
|
|
2020-06-11 21:44:23 +02:00
|
|
|
@dataclass
|
|
|
|
class ElementFamily:
|
|
|
|
grandparent: Optional[Element]
|
|
|
|
parent: Element
|
|
|
|
child: Element
|
|
|
|
in_blockquote: bool
|
2017-12-25 21:35:23 +01:00
|
|
|
|
2020-05-09 02:59:28 +02:00
|
|
|
T = TypeVar("T")
|
|
|
|
|
|
|
|
class ResultWithFamily(Generic[T]):
|
|
|
|
family: ElementFamily
|
|
|
|
result: T
|
|
|
|
|
|
|
|
def __init__(self, family: ElementFamily, result: T):
|
|
|
|
self.family = family
|
|
|
|
self.result = result
|
2017-12-25 21:35:23 +01:00
|
|
|
|
2020-05-09 03:08:01 +02:00
|
|
|
class ElementPair:
|
|
|
|
parent: Optional["ElementPair"]
|
|
|
|
value: Element
|
|
|
|
|
|
|
|
def __init__(self, parent: Optional["ElementPair"], value: Element):
|
|
|
|
self.parent = parent
|
|
|
|
self.value = value
|
2018-03-10 07:51:01 +01:00
|
|
|
|
2017-12-25 21:35:23 +01:00
|
|
|
def walk_tree_with_family(root: Element,
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
processor: Callable[[Element], Optional[_T]],
|
2020-05-09 02:59:28 +02:00
|
|
|
) -> List[ResultWithFamily[_T]]:
|
2017-12-25 21:35:23 +01:00
|
|
|
results = []
|
|
|
|
|
2018-03-10 07:51:01 +01:00
|
|
|
queue = deque([ElementPair(parent=None, value=root)])
|
2017-12-25 21:35:23 +01:00
|
|
|
while queue:
|
|
|
|
currElementPair = queue.popleft()
|
2020-06-04 02:15:21 +02:00
|
|
|
for child in currElementPair.value:
|
|
|
|
if child:
|
2019-08-06 01:29:34 +02:00
|
|
|
queue.append(ElementPair(parent=currElementPair, value=child))
|
2017-12-25 21:35:23 +01:00
|
|
|
result = processor(child)
|
|
|
|
if result is not None:
|
2018-03-10 07:51:01 +01:00
|
|
|
if currElementPair.parent is not None:
|
2019-07-12 04:25:19 +02:00
|
|
|
grandparent_element = currElementPair.parent
|
2018-03-10 07:51:01 +01:00
|
|
|
grandparent = grandparent_element.value
|
2017-12-25 21:35:23 +01:00
|
|
|
else:
|
|
|
|
grandparent = None
|
|
|
|
family = ElementFamily(
|
|
|
|
grandparent=grandparent,
|
2018-03-10 07:51:01 +01:00
|
|
|
parent=currElementPair.value,
|
2019-07-11 16:26:31 +02:00
|
|
|
child=child,
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
in_blockquote=has_blockquote_ancestor(currElementPair),
|
2017-12-25 21:35:23 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
results.append(ResultWithFamily(
|
|
|
|
family=family,
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
result=result,
|
2017-12-25 21:35:23 +01:00
|
|
|
))
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
2019-07-11 16:26:31 +02:00
|
|
|
def has_blockquote_ancestor(element_pair: Optional[ElementPair]) -> bool:
|
|
|
|
if element_pair is None:
|
|
|
|
return False
|
|
|
|
elif element_pair.value.tag == 'blockquote':
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return has_blockquote_ancestor(element_pair.parent)
|
|
|
|
|
2013-05-11 15:50:02 +02:00
|
|
|
@cache_with_key(lambda tweet_id: tweet_id, cache_name="database", with_statsd_key="tweet_data")
|
2018-05-11 01:42:51 +02:00
|
|
|
def fetch_tweet_data(tweet_id: str) -> Optional[Dict[str, Any]]:
|
2013-03-11 16:23:34 +01:00
|
|
|
if settings.TEST_SUITE:
|
2013-10-10 21:37:26 +02:00
|
|
|
from . import testing_mocks
|
2013-03-11 16:23:34 +01:00
|
|
|
res = testing_mocks.twitter(tweet_id)
|
|
|
|
else:
|
2015-09-30 09:55:56 +02:00
|
|
|
creds = {
|
|
|
|
'consumer_key': settings.TWITTER_CONSUMER_KEY,
|
|
|
|
'consumer_secret': settings.TWITTER_CONSUMER_SECRET,
|
|
|
|
'access_token_key': settings.TWITTER_ACCESS_TOKEN_KEY,
|
|
|
|
'access_token_secret': settings.TWITTER_ACCESS_TOKEN_SECRET,
|
|
|
|
}
|
|
|
|
if not all(creds.values()):
|
2016-12-01 06:20:27 +01:00
|
|
|
return None
|
2013-10-10 21:30:35 +02:00
|
|
|
|
2018-08-08 18:06:51 +02:00
|
|
|
# We lazily import twitter here because its import process is
|
|
|
|
# surprisingly slow, and doing so has a significant impact on
|
|
|
|
# the startup performance of `manage.py` commands.
|
|
|
|
import twitter
|
|
|
|
|
2013-03-12 23:40:41 +01:00
|
|
|
try:
|
2017-11-06 18:10:35 +01:00
|
|
|
api = twitter.Api(tweet_mode='extended', **creds)
|
2013-04-23 17:01:33 +02:00
|
|
|
# Sometimes Twitter hangs on responses. Timing out here
|
|
|
|
# will cause the Tweet to go through as-is with no inline
|
|
|
|
# preview, rather than having the message be rejected
|
|
|
|
# entirely. This timeout needs to be less than our overall
|
|
|
|
# formatting timeout.
|
2014-01-10 19:04:57 +01:00
|
|
|
tweet = timeout(3, api.GetStatus, tweet_id)
|
|
|
|
res = tweet.AsDict()
|
2015-09-30 21:01:37 +02:00
|
|
|
except AttributeError:
|
2020-06-26 20:54:05 +02:00
|
|
|
markdown_logger.error('Unable to load twitter api, you may have the wrong '
|
|
|
|
'library installed, see https://github.com/zulip/zulip/issues/86')
|
2015-09-30 21:01:37 +02:00
|
|
|
return None
|
2018-05-24 16:41:34 +02:00
|
|
|
except TimeoutExpired:
|
2013-04-23 17:01:33 +02:00
|
|
|
# We'd like to try again later and not cache the bad result,
|
|
|
|
# so we need to re-raise the exception (just as though
|
|
|
|
# we were being rate-limited)
|
|
|
|
raise
|
2013-03-12 23:40:41 +01:00
|
|
|
except twitter.TwitterError as e:
|
|
|
|
t = e.args[0]
|
|
|
|
if len(t) == 1 and ('code' in t[0]) and (t[0]['code'] == 34):
|
|
|
|
# Code 34 means that the message doesn't exist; return
|
|
|
|
# None so that we will cache the error
|
|
|
|
return None
|
|
|
|
elif len(t) == 1 and ('code' in t[0]) and (t[0]['code'] == 88 or
|
|
|
|
t[0]['code'] == 130):
|
|
|
|
# Code 88 means that we were rate-limited and 130
|
|
|
|
# means Twitter is having capacity issues; either way
|
|
|
|
# just raise the error so we don't cache None and will
|
|
|
|
# try again later.
|
|
|
|
raise
|
|
|
|
else:
|
|
|
|
# It's not clear what to do in cases of other errors,
|
|
|
|
# but for now it seems reasonable to log at error
|
|
|
|
# level (so that we get notified), but then cache the
|
|
|
|
# failure to proceed with our usual work
|
2020-06-26 20:54:05 +02:00
|
|
|
markdown_logger.exception("Unknown error fetching tweet data")
|
2013-03-12 23:40:41 +01:00
|
|
|
return None
|
2013-03-11 16:23:34 +01:00
|
|
|
return res
|
|
|
|
|
2017-11-03 03:12:25 +01:00
|
|
|
HEAD_START_RE = re.compile('^head[ >]')
|
|
|
|
HEAD_END_RE = re.compile('^/head[ >]')
|
|
|
|
META_START_RE = re.compile('^meta[ >]')
|
|
|
|
META_END_RE = re.compile('^/meta[ >]')
|
2014-05-21 08:11:29 +02:00
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def fetch_open_graph_image(url: str) -> Optional[Dict[str, Any]]:
|
2014-05-21 08:11:29 +02:00
|
|
|
in_head = False
|
2017-11-10 03:49:42 +01:00
|
|
|
# HTML will auto close meta tags, when we start the next tag add
|
|
|
|
# a closing tag if it has not been closed yet.
|
2014-05-21 08:11:29 +02:00
|
|
|
last_closed = True
|
|
|
|
head = []
|
|
|
|
# TODO: What if response content is huge? Should we get headers first?
|
2014-07-17 02:41:49 +02:00
|
|
|
try:
|
2016-07-04 09:16:58 +02:00
|
|
|
content = requests.get(url, timeout=1).text
|
2017-03-05 10:25:27 +01:00
|
|
|
except Exception:
|
2014-07-17 02:41:49 +02:00
|
|
|
return None
|
2014-05-21 08:11:29 +02:00
|
|
|
# Extract the head and meta tags
|
|
|
|
# All meta tags are self closing, have no children or are closed
|
|
|
|
# automatically.
|
|
|
|
for part in content.split('<'):
|
|
|
|
if not in_head and HEAD_START_RE.match(part):
|
|
|
|
# Started the head node output it to have a document root
|
|
|
|
in_head = True
|
|
|
|
head.append('<head>')
|
|
|
|
elif in_head and HEAD_END_RE.match(part):
|
|
|
|
# Found the end of the head close any remaining tag then stop
|
|
|
|
# processing
|
|
|
|
in_head = False
|
|
|
|
if not last_closed:
|
|
|
|
last_closed = True
|
|
|
|
head.append('</meta>')
|
|
|
|
head.append('</head>')
|
|
|
|
break
|
|
|
|
|
|
|
|
elif in_head and META_START_RE.match(part):
|
|
|
|
# Found a meta node copy it
|
|
|
|
if not last_closed:
|
|
|
|
head.append('</meta>')
|
|
|
|
last_closed = True
|
|
|
|
head.append('<')
|
|
|
|
head.append(part)
|
|
|
|
if '/>' not in part:
|
|
|
|
last_closed = False
|
|
|
|
|
|
|
|
elif in_head and META_END_RE.match(part):
|
|
|
|
# End of a meta node just copy it to close the tag
|
|
|
|
head.append('<')
|
|
|
|
head.append(part)
|
|
|
|
last_closed = True
|
|
|
|
|
|
|
|
try:
|
|
|
|
doc = etree.fromstring(''.join(head))
|
|
|
|
except etree.ParseError:
|
|
|
|
return None
|
|
|
|
og_image = doc.find('meta[@property="og:image"]')
|
|
|
|
og_title = doc.find('meta[@property="og:title"]')
|
|
|
|
og_desc = doc.find('meta[@property="og:description"]')
|
2014-07-17 02:41:49 +02:00
|
|
|
title = None
|
|
|
|
desc = None
|
2014-05-21 08:11:29 +02:00
|
|
|
if og_image is not None:
|
|
|
|
image = og_image.get('content')
|
2014-07-17 02:41:49 +02:00
|
|
|
else:
|
|
|
|
return None
|
2014-05-21 08:11:29 +02:00
|
|
|
if og_title is not None:
|
|
|
|
title = og_title.get('content')
|
|
|
|
if og_desc is not None:
|
|
|
|
desc = og_desc.get('content')
|
|
|
|
return {'image': image, 'title': title, 'desc': desc}
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def get_tweet_id(url: str) -> Optional[str]:
|
2016-01-24 03:39:44 +01:00
|
|
|
parsed_url = urllib.parse.urlparse(url)
|
2013-04-30 21:37:22 +02:00
|
|
|
if not (parsed_url.netloc == 'twitter.com' or parsed_url.netloc.endswith('.twitter.com')):
|
2016-06-17 00:21:01 +02:00
|
|
|
return None
|
2013-12-13 23:45:01 +01:00
|
|
|
to_match = parsed_url.path
|
2017-11-10 03:49:42 +01:00
|
|
|
# In old-style twitter.com/#!/wdaher/status/1231241234-style URLs,
|
|
|
|
# we need to look at the fragment instead
|
2013-12-13 23:45:01 +01:00
|
|
|
if parsed_url.path == '/' and len(parsed_url.fragment) > 5:
|
2016-11-28 23:29:01 +01:00
|
|
|
to_match = parsed_url.fragment
|
2013-04-30 21:37:22 +02:00
|
|
|
|
2018-05-26 19:11:32 +02:00
|
|
|
tweet_id_match = re.match(r'^!?/.*?/status(es)?/(?P<tweetid>\d{10,30})(/photo/[0-9])?/?$', to_match)
|
2013-04-30 21:37:22 +02:00
|
|
|
if not tweet_id_match:
|
2016-06-17 00:21:01 +02:00
|
|
|
return None
|
2013-04-30 21:37:22 +02:00
|
|
|
return tweet_id_match.group("tweetid")
|
|
|
|
|
2013-08-28 22:45:26 +02:00
|
|
|
class InlineHttpsProcessor(markdown.treeprocessors.Treeprocessor):
|
2017-11-05 11:15:10 +01:00
|
|
|
def run(self, root: Element) -> None:
|
2013-08-28 22:45:26 +02:00
|
|
|
# Get all URLs from the blob
|
|
|
|
found_imgs = walk_tree(root, lambda e: e if e.tag == "img" else None)
|
|
|
|
for img in found_imgs:
|
|
|
|
url = img.get("src")
|
2019-12-12 01:28:29 +01:00
|
|
|
if urllib.parse.urlsplit(url).scheme != "http":
|
2013-08-28 22:45:26 +02:00
|
|
|
# Don't rewrite images on our own site (e.g. emoji).
|
|
|
|
continue
|
2016-04-28 05:40:58 +02:00
|
|
|
img.set("src", get_camo_url(url))
|
2013-04-30 21:37:22 +02:00
|
|
|
|
2020-02-14 00:09:22 +01:00
|
|
|
class BacktickInlineProcessor(markdown.inlinepatterns.BacktickInlineProcessor):
|
2017-11-22 02:27:19 +01:00
|
|
|
""" Return a `<code>` element containing the matching text. """
|
2020-02-14 00:09:22 +01:00
|
|
|
def handleMatch(self, m: Match[str], data: str) -> Tuple[Union[None, Element], int, int]:
|
|
|
|
# Let upstream's implementation do its job as it is, we'll
|
|
|
|
# just replace the text to not strip the group because it
|
|
|
|
# makes it impossible to put leading/trailing whitespace in
|
|
|
|
# an inline code block.
|
|
|
|
el, start, end = super().handleMatch(m, data)
|
|
|
|
if m.group(3):
|
|
|
|
# upstream's code here is: m.group(3).strip() rather than m.group(3).
|
|
|
|
el.text = markdown.util.AtomicString(markdown.util.code_escape(m.group(3)))
|
|
|
|
return el, start, end
|
2017-11-22 02:27:19 +01:00
|
|
|
|
2013-03-08 06:27:16 +01:00
|
|
|
class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
|
2014-01-10 19:04:57 +01:00
|
|
|
TWITTER_MAX_IMAGE_HEIGHT = 400
|
2014-01-28 22:17:12 +01:00
|
|
|
TWITTER_MAX_TO_PREVIEW = 3
|
2018-02-23 12:16:09 +01:00
|
|
|
INLINE_PREVIEW_LIMIT_PER_MESSAGE = 5
|
2014-01-28 22:17:12 +01:00
|
|
|
|
2019-01-20 09:10:58 +01:00
|
|
|
def __init__(self, md: markdown.Markdown) -> None:
|
2014-07-17 02:41:49 +02:00
|
|
|
markdown.treeprocessors.Treeprocessor.__init__(self, md)
|
2014-01-10 19:04:57 +01:00
|
|
|
|
2019-12-06 09:18:02 +01:00
|
|
|
def add_a(
|
|
|
|
self,
|
|
|
|
root: Element,
|
|
|
|
url: str,
|
|
|
|
link: str,
|
|
|
|
title: Optional[str]=None,
|
|
|
|
desc: Optional[str]=None,
|
|
|
|
class_attr: str="message_inline_image",
|
|
|
|
data_id: Optional[str]=None,
|
|
|
|
insertion_index: Optional[int]=None,
|
2020-06-13 01:57:21 +02:00
|
|
|
already_thumbnailed: bool=False,
|
2019-12-06 09:18:02 +01:00
|
|
|
) -> None:
|
|
|
|
desc = desc if desc is not None else ""
|
|
|
|
|
2019-12-07 05:42:24 +01:00
|
|
|
# Update message.has_image attribute.
|
2020-06-03 04:16:38 +02:00
|
|
|
if 'message_inline_image' in class_attr and self.md.zulip_message:
|
|
|
|
self.md.zulip_message.has_image = True
|
2019-12-07 05:42:24 +01:00
|
|
|
|
2019-12-06 09:18:02 +01:00
|
|
|
if insertion_index is not None:
|
2020-06-03 06:37:07 +02:00
|
|
|
div = Element("div")
|
2019-12-06 09:18:02 +01:00
|
|
|
root.insert(insertion_index, div)
|
|
|
|
else:
|
2020-06-03 06:37:07 +02:00
|
|
|
div = SubElement(root, "div")
|
2019-12-06 09:18:02 +01:00
|
|
|
|
|
|
|
div.set("class", class_attr)
|
2020-06-03 06:37:07 +02:00
|
|
|
a = SubElement(div, "a")
|
2019-12-06 09:18:02 +01:00
|
|
|
a.set("href", link)
|
2020-05-09 03:44:56 +02:00
|
|
|
if title is not None:
|
|
|
|
a.set("title", title)
|
2019-12-06 09:18:02 +01:00
|
|
|
if data_id is not None:
|
|
|
|
a.set("data-id", data_id)
|
2020-06-03 06:37:07 +02:00
|
|
|
img = SubElement(a, "img")
|
2019-12-06 09:18:02 +01:00
|
|
|
if settings.THUMBNAIL_IMAGES and (not already_thumbnailed) and user_uploads_or_external(url):
|
|
|
|
# See docs/thumbnailing.md for some high-level documentation.
|
|
|
|
#
|
|
|
|
# We strip leading '/' from relative URLs here to ensure
|
|
|
|
# consistency in what gets passed to /thumbnail
|
|
|
|
url = url.lstrip('/')
|
2020-04-09 21:51:58 +02:00
|
|
|
img.set("src", "/thumbnail?url={}&size=thumbnail".format(
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
urllib.parse.quote(url, safe=''),
|
2019-12-06 09:18:02 +01:00
|
|
|
))
|
2020-04-09 21:51:58 +02:00
|
|
|
img.set('data-src-fullsize', "/thumbnail?url={}&size=full".format(
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
urllib.parse.quote(url, safe=''),
|
2019-12-06 09:18:02 +01:00
|
|
|
))
|
|
|
|
else:
|
|
|
|
img.set("src", url)
|
|
|
|
|
|
|
|
if class_attr == "message_inline_ref":
|
2020-06-03 06:37:07 +02:00
|
|
|
summary_div = SubElement(div, "div")
|
|
|
|
title_div = SubElement(summary_div, "div")
|
2019-12-06 09:18:02 +01:00
|
|
|
title_div.set("class", "message_inline_image_title")
|
|
|
|
title_div.text = title
|
2020-06-03 06:37:07 +02:00
|
|
|
desc_div = SubElement(summary_div, "desc")
|
2019-12-06 09:18:02 +01:00
|
|
|
desc_div.set("class", "message_inline_image_desc")
|
|
|
|
|
|
|
|
def add_oembed_data(self, root: Element, link: str, extracted_data: Dict[str, Any]) -> bool:
|
|
|
|
oembed_resource_type = extracted_data.get('type', '')
|
2020-05-09 03:44:56 +02:00
|
|
|
title = extracted_data.get('title')
|
2019-12-06 09:18:02 +01:00
|
|
|
|
|
|
|
if oembed_resource_type == 'photo':
|
|
|
|
image = extracted_data.get('image')
|
|
|
|
if image:
|
|
|
|
self.add_a(root, image, link, title=title)
|
|
|
|
return True
|
|
|
|
|
|
|
|
elif oembed_resource_type == 'video':
|
|
|
|
html = extracted_data['html']
|
|
|
|
image = extracted_data['image']
|
2020-05-09 03:44:56 +02:00
|
|
|
title = extracted_data.get('title')
|
2019-12-06 09:18:02 +01:00
|
|
|
description = extracted_data.get('description')
|
|
|
|
self.add_a(root, image, link, title, description,
|
|
|
|
"embed-video message_inline_image",
|
|
|
|
html, already_thumbnailed=True)
|
|
|
|
return True
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
def add_embed(self, root: Element, link: str, extracted_data: Dict[str, Any]) -> None:
|
|
|
|
oembed = extracted_data.get('oembed', False)
|
|
|
|
if oembed and self.add_oembed_data(root, link, extracted_data):
|
|
|
|
return
|
|
|
|
|
|
|
|
img_link = extracted_data.get('image')
|
|
|
|
if not img_link:
|
|
|
|
# Don't add an embed if an image is not found
|
|
|
|
return
|
|
|
|
|
2020-06-03 06:37:07 +02:00
|
|
|
container = SubElement(root, "div")
|
2019-12-06 09:18:02 +01:00
|
|
|
container.set("class", "message_embed")
|
|
|
|
|
|
|
|
parsed_img_link = urllib.parse.urlparse(img_link)
|
|
|
|
# Append domain where relative img_link url is given
|
|
|
|
if not parsed_img_link.netloc:
|
|
|
|
parsed_url = urllib.parse.urlparse(link)
|
|
|
|
domain = '{url.scheme}://{url.netloc}/'.format(url=parsed_url)
|
|
|
|
img_link = urllib.parse.urljoin(domain, img_link)
|
2020-06-03 06:37:07 +02:00
|
|
|
img = SubElement(container, "a")
|
2019-12-06 09:18:02 +01:00
|
|
|
img.set("style", "background-image: url(" + img_link + ")")
|
|
|
|
img.set("href", link)
|
|
|
|
img.set("class", "message_embed_image")
|
|
|
|
|
2020-06-03 06:37:07 +02:00
|
|
|
data_container = SubElement(container, "div")
|
2019-12-06 09:18:02 +01:00
|
|
|
data_container.set("class", "data-container")
|
|
|
|
|
|
|
|
title = extracted_data.get('title')
|
|
|
|
if title:
|
2020-06-03 06:37:07 +02:00
|
|
|
title_elm = SubElement(data_container, "div")
|
2019-12-06 09:18:02 +01:00
|
|
|
title_elm.set("class", "message_embed_title")
|
2020-06-03 06:37:07 +02:00
|
|
|
a = SubElement(title_elm, "a")
|
2019-12-06 09:18:02 +01:00
|
|
|
a.set("href", link)
|
|
|
|
a.set("title", title)
|
|
|
|
a.text = title
|
|
|
|
description = extracted_data.get('description')
|
|
|
|
if description:
|
2020-06-03 06:37:07 +02:00
|
|
|
description_elm = SubElement(data_container, "div")
|
2019-12-06 09:18:02 +01:00
|
|
|
description_elm.set("class", "message_embed_description")
|
|
|
|
description_elm.text = description
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def get_actual_image_url(self, url: str) -> str:
|
2017-05-03 18:42:55 +02:00
|
|
|
# Add specific per-site cases to convert image-preview urls to image urls.
|
|
|
|
# See https://github.com/zulip/zulip/issues/4658 for more information
|
|
|
|
parsed_url = urllib.parse.urlparse(url)
|
|
|
|
if (parsed_url.netloc == 'github.com' or parsed_url.netloc.endswith('.github.com')):
|
|
|
|
# https://github.com/zulip/zulip/blob/master/static/images/logo/zulip-icon-128x128.png ->
|
|
|
|
# https://raw.githubusercontent.com/zulip/zulip/master/static/images/logo/zulip-icon-128x128.png
|
|
|
|
split_path = parsed_url.path.split('/')
|
|
|
|
if len(split_path) > 3 and split_path[3] == "blob":
|
|
|
|
return urllib.parse.urljoin('https://raw.githubusercontent.com',
|
|
|
|
'/'.join(split_path[0:3] + split_path[4:]))
|
|
|
|
|
|
|
|
return url
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def is_image(self, url: str) -> bool:
|
2020-06-03 04:16:38 +02:00
|
|
|
if not self.md.image_preview_enabled:
|
2013-11-14 14:37:39 +01:00
|
|
|
return False
|
2016-01-24 03:39:44 +01:00
|
|
|
parsed_url = urllib.parse.urlparse(url)
|
2019-02-14 17:15:30 +01:00
|
|
|
# remove html urls which end with img extensions that can not be shorted
|
|
|
|
if parsed_url.netloc == 'pasteboard.co':
|
|
|
|
return False
|
|
|
|
|
2020-03-27 01:32:21 +01:00
|
|
|
# List from https://support.google.com/chromeos/bin/answer.py?hl=en&answer=183093
|
2020-05-09 18:29:13 +02:00
|
|
|
for ext in [".bmp", ".gif", ".jpe", "jpeg", ".jpg", ".png", ".webp"]:
|
2013-05-21 16:59:09 +02:00
|
|
|
if parsed_url.path.lower().endswith(ext):
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2019-02-14 17:15:30 +01:00
|
|
|
def corrected_image_source(self, url: str) -> str:
|
|
|
|
# This function adjusts any urls from linx.li and
|
|
|
|
# wikipedia.org to point to the actual image url. It's
|
|
|
|
# structurally very similar to dropbox_image, and possibly
|
|
|
|
# should be rewritten to use open graph, but has some value.
|
|
|
|
parsed_url = urllib.parse.urlparse(url)
|
|
|
|
if parsed_url.netloc.lower().endswith('.wikipedia.org'):
|
|
|
|
# Redirecting from "/wiki/File:" to "/wiki/Special:FilePath/File:"
|
|
|
|
# A possible alternative, that avoids the redirect after hitting "Special:"
|
|
|
|
# is using the first characters of md5($filename) to generate the url
|
|
|
|
domain = parsed_url.scheme + "://" + parsed_url.netloc
|
|
|
|
correct_url = domain + parsed_url.path[:6] + 'Special:FilePath' + parsed_url.path[5:]
|
|
|
|
return correct_url
|
|
|
|
if parsed_url.netloc == 'linx.li':
|
|
|
|
return 'https://linx.li/s' + parsed_url.path
|
|
|
|
return None
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def dropbox_image(self, url: str) -> Optional[Dict[str, Any]]:
|
2017-11-02 05:39:39 +01:00
|
|
|
# TODO: The returned Dict could possibly be a TypedDict in future.
|
2016-01-24 03:39:44 +01:00
|
|
|
parsed_url = urllib.parse.urlparse(url)
|
2014-02-26 21:25:27 +01:00
|
|
|
if (parsed_url.netloc == 'dropbox.com' or parsed_url.netloc.endswith('.dropbox.com')):
|
2014-07-17 02:41:49 +02:00
|
|
|
is_album = parsed_url.path.startswith('/sc/') or parsed_url.path.startswith('/photos/')
|
2014-05-21 08:11:29 +02:00
|
|
|
# Only allow preview Dropbox shared links
|
|
|
|
if not (parsed_url.path.startswith('/s/') or
|
|
|
|
parsed_url.path.startswith('/sh/') or
|
|
|
|
is_album):
|
|
|
|
return None
|
|
|
|
|
|
|
|
# Try to retrieve open graph protocol info for a preview
|
|
|
|
# This might be redundant right now for shared links for images.
|
|
|
|
# However, we might want to make use of title and description
|
|
|
|
# in the future. If the actual image is too big, we might also
|
|
|
|
# want to use the open graph image.
|
|
|
|
image_info = fetch_open_graph_image(url)
|
|
|
|
|
|
|
|
is_image = is_album or self.is_image(url)
|
|
|
|
|
|
|
|
# If it is from an album or not an actual image file,
|
|
|
|
# just use open graph image.
|
|
|
|
if is_album or not is_image:
|
2014-07-17 02:41:49 +02:00
|
|
|
# Failed to follow link to find an image preview so
|
|
|
|
# use placeholder image and guess filename
|
|
|
|
if image_info is None:
|
2016-04-30 00:40:52 +02:00
|
|
|
return None
|
2014-07-17 02:41:49 +02:00
|
|
|
|
|
|
|
image_info["is_image"] = is_image
|
2014-05-21 08:11:29 +02:00
|
|
|
return image_info
|
|
|
|
|
|
|
|
# Otherwise, try to retrieve the actual image.
|
|
|
|
# This is because open graph image from Dropbox may have padding
|
|
|
|
# and gifs do not work.
|
|
|
|
# TODO: What if image is huge? Should we get headers first?
|
|
|
|
if image_info is None:
|
|
|
|
image_info = dict()
|
|
|
|
image_info['is_image'] = True
|
|
|
|
parsed_url_list = list(parsed_url)
|
2017-07-09 01:28:18 +02:00
|
|
|
parsed_url_list[4] = "dl=1" # Replaces query
|
2016-01-24 03:39:44 +01:00
|
|
|
image_info["image"] = urllib.parse.urlunparse(parsed_url_list)
|
2014-05-21 08:11:29 +02:00
|
|
|
|
|
|
|
return image_info
|
2013-05-21 16:59:09 +02:00
|
|
|
return None
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def youtube_id(self, url: str) -> Optional[str]:
|
2020-06-03 04:16:38 +02:00
|
|
|
if not self.md.image_preview_enabled:
|
2013-11-14 14:37:39 +01:00
|
|
|
return None
|
2020-03-27 01:32:21 +01:00
|
|
|
# Youtube video id extraction regular expression from https://pastebin.com/KyKAFv1s
|
2019-05-12 11:51:31 +02:00
|
|
|
# Slightly modified to support URLs of the forms
|
|
|
|
# - youtu.be/<id>
|
|
|
|
# - youtube.com/playlist?v=<id>&list=<list-id>
|
|
|
|
# - youtube.com/watch_videos?video_ids=<id1>,<id2>,<id3>
|
2013-05-21 16:59:09 +02:00
|
|
|
# If it matches, match.group(2) is the video id.
|
2019-04-12 05:58:57 +02:00
|
|
|
schema_re = r'(?:https?://)'
|
|
|
|
host_re = r'(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)'
|
2019-05-12 11:43:18 +02:00
|
|
|
param_re = r'(?:(?:(?:v|embed)/)|' + \
|
2019-05-12 11:51:31 +02:00
|
|
|
r'(?:(?:(?:watch|playlist)(?:_popup|_videos)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v(?:ideo_ids)?=))'
|
2019-04-12 05:58:57 +02:00
|
|
|
id_re = r'([0-9A-Za-z_-]+)'
|
|
|
|
youtube_re = r'^({schema_re}?{host_re}{param_re}?)?{id_re}(?(1).+)?$'
|
|
|
|
youtube_re = youtube_re.format(schema_re=schema_re, host_re=host_re, id_re=id_re, param_re=param_re)
|
2013-05-21 16:59:09 +02:00
|
|
|
match = re.match(youtube_re, url)
|
2019-05-12 11:55:25 +02:00
|
|
|
# URLs of the form youtube.com/playlist?list=<list-id> are incorrectly matched
|
|
|
|
if match is None or match.group(2) == 'playlist':
|
2013-05-21 16:59:09 +02:00
|
|
|
return None
|
2016-10-17 22:02:01 +02:00
|
|
|
return match.group(2)
|
|
|
|
|
2019-03-21 21:08:26 +01:00
|
|
|
def youtube_title(self, extracted_data: Dict[str, Any]) -> Optional[str]:
|
|
|
|
title = extracted_data.get("title")
|
|
|
|
if title is not None:
|
2020-06-09 00:25:09 +02:00
|
|
|
return f"YouTube - {title}"
|
2019-03-21 21:08:26 +01:00
|
|
|
return None
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def youtube_image(self, url: str) -> Optional[str]:
|
2016-10-17 22:02:01 +02:00
|
|
|
yt_id = self.youtube_id(url)
|
|
|
|
|
|
|
|
if yt_id is not None:
|
2020-06-13 08:59:37 +02:00
|
|
|
return f"https://i.ytimg.com/vi/{yt_id}/default.jpg"
|
2017-03-03 20:30:49 +01:00
|
|
|
return None
|
2013-05-21 16:59:09 +02:00
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def vimeo_id(self, url: str) -> Optional[str]:
|
2020-06-03 04:16:38 +02:00
|
|
|
if not self.md.image_preview_enabled:
|
2017-12-14 22:17:00 +01:00
|
|
|
return None
|
|
|
|
#(http|https)?:\/\/(www\.)?vimeo.com\/(?:channels\/(?:\w+\/)?|groups\/([^\/]*)\/videos\/|)(\d+)(?:|\/\?)
|
|
|
|
# If it matches, match.group('id') is the video id.
|
|
|
|
|
|
|
|
vimeo_re = r'^((http|https)?:\/\/(www\.)?vimeo.com\/' + \
|
|
|
|
r'(?:channels\/(?:\w+\/)?|groups\/' + \
|
|
|
|
r'([^\/]*)\/videos\/|)(\d+)(?:|\/\?))$'
|
|
|
|
match = re.match(vimeo_re, url)
|
|
|
|
if match is None:
|
|
|
|
return None
|
|
|
|
return match.group(5)
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def vimeo_title(self, extracted_data: Dict[str, Any]) -> Optional[str]:
|
2018-03-24 12:53:47 +01:00
|
|
|
title = extracted_data.get("title")
|
|
|
|
if title is not None:
|
2020-06-09 00:25:09 +02:00
|
|
|
return f"Vimeo - {title}"
|
2018-03-24 12:53:47 +01:00
|
|
|
return None
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def twitter_text(self, text: str,
|
|
|
|
urls: List[Dict[str, str]],
|
|
|
|
user_mentions: List[Dict[str, Any]],
|
|
|
|
media: List[Dict[str, Any]]) -> Element:
|
2014-01-08 22:56:48 +01:00
|
|
|
"""
|
2014-01-10 19:04:57 +01:00
|
|
|
Use data from the twitter API to turn links, mentions and media into A
|
2017-06-19 23:18:55 +02:00
|
|
|
tags. Also convert unicode emojis to images.
|
2014-01-08 22:56:48 +01:00
|
|
|
|
2017-06-19 23:18:55 +02:00
|
|
|
This works by using the urls, user_mentions and media data from
|
|
|
|
the twitter API and searching for unicode emojis in the text using
|
|
|
|
`unicode_emoji_regex`.
|
2014-01-08 22:56:48 +01:00
|
|
|
|
2017-06-19 23:18:55 +02:00
|
|
|
The first step is finding the locations of the URLs, mentions, media and
|
|
|
|
emoji in the text. For each match we build a dictionary with type, the start
|
|
|
|
location, end location, the URL to link to, and the text(codepoint and title
|
|
|
|
in case of emojis) to be used in the link(image in case of emojis).
|
2014-01-08 22:56:48 +01:00
|
|
|
|
|
|
|
Next we sort the matches by start location. And for each we add the
|
|
|
|
text from the end of the last link to the start of the current link to
|
|
|
|
the output. The text needs to added to the text attribute of the first
|
|
|
|
node (the P tag) or the tail the last link created.
|
|
|
|
|
|
|
|
Finally we add any remaining text to the last node.
|
|
|
|
"""
|
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
to_process: List[Dict[str, Any]] = []
|
2014-01-08 22:56:48 +01:00
|
|
|
# Build dicts for URLs
|
2016-07-16 06:48:10 +02:00
|
|
|
for url_data in urls:
|
|
|
|
short_url = url_data["url"]
|
|
|
|
full_url = url_data["expanded_url"]
|
2014-02-15 21:33:22 +01:00
|
|
|
for match in re.finditer(re.escape(short_url), text, re.IGNORECASE):
|
2017-06-19 23:41:20 +02:00
|
|
|
to_process.append({
|
|
|
|
'type': 'url',
|
2014-01-08 22:56:48 +01:00
|
|
|
'start': match.start(),
|
|
|
|
'end': match.end(),
|
|
|
|
'url': short_url,
|
|
|
|
'text': full_url,
|
|
|
|
})
|
|
|
|
# Build dicts for mentions
|
|
|
|
for user_mention in user_mentions:
|
|
|
|
screen_name = user_mention['screen_name']
|
2017-11-03 03:12:25 +01:00
|
|
|
mention_string = '@' + screen_name
|
2014-02-15 21:33:22 +01:00
|
|
|
for match in re.finditer(re.escape(mention_string), text, re.IGNORECASE):
|
2017-06-19 23:41:20 +02:00
|
|
|
to_process.append({
|
|
|
|
'type': 'mention',
|
2014-01-08 22:56:48 +01:00
|
|
|
'start': match.start(),
|
|
|
|
'end': match.end(),
|
2017-11-04 19:38:53 +01:00
|
|
|
'url': 'https://twitter.com/' + urllib.parse.quote(screen_name),
|
2014-01-08 22:56:48 +01:00
|
|
|
'text': mention_string,
|
|
|
|
})
|
2014-01-10 19:04:57 +01:00
|
|
|
# Build dicts for media
|
|
|
|
for media_item in media:
|
|
|
|
short_url = media_item['url']
|
|
|
|
expanded_url = media_item['expanded_url']
|
2014-02-15 21:33:22 +01:00
|
|
|
for match in re.finditer(re.escape(short_url), text, re.IGNORECASE):
|
2017-06-19 23:41:20 +02:00
|
|
|
to_process.append({
|
|
|
|
'type': 'media',
|
2014-01-10 19:04:57 +01:00
|
|
|
'start': match.start(),
|
|
|
|
'end': match.end(),
|
|
|
|
'url': short_url,
|
|
|
|
'text': expanded_url,
|
|
|
|
})
|
2017-06-19 23:18:55 +02:00
|
|
|
# Build dicts for emojis
|
|
|
|
for match in re.finditer(unicode_emoji_regex, text, re.IGNORECASE):
|
|
|
|
orig_syntax = match.group('syntax')
|
|
|
|
codepoint = unicode_emoji_to_codepoint(orig_syntax)
|
|
|
|
if codepoint in codepoint_to_name:
|
|
|
|
display_string = ':' + codepoint_to_name[codepoint] + ':'
|
|
|
|
to_process.append({
|
|
|
|
'type': 'emoji',
|
|
|
|
'start': match.start(),
|
|
|
|
'end': match.end(),
|
|
|
|
'codepoint': codepoint,
|
|
|
|
'title': display_string,
|
|
|
|
})
|
2014-01-08 22:56:48 +01:00
|
|
|
|
2017-06-19 23:41:20 +02:00
|
|
|
to_process.sort(key=lambda x: x['start'])
|
2020-06-03 06:37:07 +02:00
|
|
|
p = current_node = Element('p')
|
2016-01-25 21:53:23 +01:00
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def set_text(text: str) -> None:
|
2014-01-08 22:56:48 +01:00
|
|
|
"""
|
|
|
|
Helper to set the text or the tail of the current_node
|
|
|
|
"""
|
|
|
|
if current_node == p:
|
|
|
|
current_node.text = text
|
|
|
|
else:
|
|
|
|
current_node.tail = text
|
|
|
|
|
2020-06-03 04:16:38 +02:00
|
|
|
db_data = self.md.zulip_db_data
|
2014-01-08 22:56:48 +01:00
|
|
|
current_index = 0
|
2017-06-19 23:41:20 +02:00
|
|
|
for item in to_process:
|
2014-01-08 22:56:48 +01:00
|
|
|
# The text we want to link starts in already linked text skip it
|
2017-06-19 23:41:20 +02:00
|
|
|
if item['start'] < current_index:
|
2014-01-08 22:56:48 +01:00
|
|
|
continue
|
|
|
|
# Add text from the end of last link to the start of the current
|
|
|
|
# link
|
2017-06-19 23:41:20 +02:00
|
|
|
set_text(text[current_index:item['start']])
|
|
|
|
current_index = item['end']
|
2017-06-19 23:18:55 +02:00
|
|
|
if item['type'] != 'emoji':
|
2020-06-03 06:50:08 +02:00
|
|
|
elem = url_to_a(db_data, item['url'], item['text'])
|
|
|
|
assert isinstance(elem, Element)
|
2017-06-19 23:18:55 +02:00
|
|
|
else:
|
2020-06-03 06:50:08 +02:00
|
|
|
elem = make_emoji(item['codepoint'], item['title'])
|
|
|
|
current_node = elem
|
2017-06-19 23:18:55 +02:00
|
|
|
p.append(elem)
|
2014-01-08 22:56:48 +01:00
|
|
|
|
|
|
|
# Add any unused text
|
|
|
|
set_text(text[current_index:])
|
|
|
|
return p
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def twitter_link(self, url: str) -> Optional[Element]:
|
2013-04-30 21:37:22 +02:00
|
|
|
tweet_id = get_tweet_id(url)
|
2013-03-08 06:27:16 +01:00
|
|
|
|
2016-06-17 00:21:01 +02:00
|
|
|
if tweet_id is None:
|
2013-03-08 06:27:16 +01:00
|
|
|
return None
|
|
|
|
|
|
|
|
try:
|
2013-03-11 16:23:34 +01:00
|
|
|
res = fetch_tweet_data(tweet_id)
|
2013-03-12 23:40:41 +01:00
|
|
|
if res is None:
|
|
|
|
return None
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
user: Dict[str, Any] = res['user']
|
2020-06-03 06:37:07 +02:00
|
|
|
tweet = Element("div")
|
2013-03-08 06:27:16 +01:00
|
|
|
tweet.set("class", "twitter-tweet")
|
2020-06-03 06:37:07 +02:00
|
|
|
img_a = SubElement(tweet, 'a')
|
2013-03-08 06:27:16 +01:00
|
|
|
img_a.set("href", url)
|
2020-06-03 06:37:07 +02:00
|
|
|
profile_img = SubElement(img_a, 'img')
|
2013-03-08 06:27:16 +01:00
|
|
|
profile_img.set('class', 'twitter-avatar')
|
2013-03-08 20:48:14 +01:00
|
|
|
# For some reason, for, e.g. tweet 285072525413724161,
|
|
|
|
# python-twitter does not give us a
|
|
|
|
# profile_image_url_https, but instead puts that URL in
|
|
|
|
# profile_image_url. So use _https if available, but fall
|
|
|
|
# back gracefully.
|
|
|
|
image_url = user.get('profile_image_url_https', user['profile_image_url'])
|
|
|
|
profile_img.set('src', image_url)
|
2014-01-08 22:56:48 +01:00
|
|
|
|
2017-11-06 18:10:35 +01:00
|
|
|
text = html.unescape(res['full_text'])
|
2016-07-16 06:48:10 +02:00
|
|
|
urls = res.get('urls', [])
|
2014-01-08 22:56:48 +01:00
|
|
|
user_mentions = res.get('user_mentions', [])
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
media: List[Dict[str, Any]] = res.get('media', [])
|
2014-01-10 19:04:57 +01:00
|
|
|
p = self.twitter_text(text, urls, user_mentions, media)
|
2014-01-08 22:56:48 +01:00
|
|
|
tweet.append(p)
|
|
|
|
|
2020-06-03 06:37:07 +02:00
|
|
|
span = SubElement(tweet, 'span')
|
2020-06-13 08:59:37 +02:00
|
|
|
span.text = "- {} (@{})".format(user['name'], user['screen_name'])
|
2013-03-08 06:27:16 +01:00
|
|
|
|
2014-01-10 19:04:57 +01:00
|
|
|
# Add image previews
|
|
|
|
for media_item in media:
|
|
|
|
# Only photos have a preview image
|
|
|
|
if media_item['type'] != 'photo':
|
|
|
|
continue
|
|
|
|
|
|
|
|
# Find the image size that is smaller than
|
|
|
|
# TWITTER_MAX_IMAGE_HEIGHT px tall or the smallest
|
2016-01-25 01:27:18 +01:00
|
|
|
size_name_tuples = list(media_item['sizes'].items())
|
2014-01-10 19:04:57 +01:00
|
|
|
size_name_tuples.sort(reverse=True,
|
|
|
|
key=lambda x: x[1]['h'])
|
|
|
|
for size_name, size in size_name_tuples:
|
|
|
|
if size['h'] < self.TWITTER_MAX_IMAGE_HEIGHT:
|
|
|
|
break
|
|
|
|
|
2020-06-13 08:59:37 +02:00
|
|
|
media_url = '{}:{}'.format(media_item['media_url_https'], size_name)
|
2020-06-03 06:37:07 +02:00
|
|
|
img_div = SubElement(tweet, 'div')
|
2014-01-10 21:08:13 +01:00
|
|
|
img_div.set('class', 'twitter-image')
|
2020-06-03 06:37:07 +02:00
|
|
|
img_a = SubElement(img_div, 'a')
|
2014-01-10 19:04:57 +01:00
|
|
|
img_a.set('href', media_item['url'])
|
2020-06-03 06:37:07 +02:00
|
|
|
img = SubElement(img_a, 'img')
|
2014-01-10 19:04:57 +01:00
|
|
|
img.set('src', media_url)
|
|
|
|
|
2013-05-21 16:59:09 +02:00
|
|
|
return tweet
|
2017-03-05 10:25:27 +01:00
|
|
|
except Exception:
|
2013-03-08 06:27:16 +01:00
|
|
|
# We put this in its own try-except because it requires external
|
|
|
|
# connectivity. If Twitter flakes out, we don't want to not-render
|
|
|
|
# the entire message; we just want to not show the Twitter preview.
|
2020-06-26 20:54:05 +02:00
|
|
|
markdown_logger.warning("Error building Twitter link", exc_info=True)
|
2013-03-08 06:27:16 +01:00
|
|
|
return None
|
|
|
|
|
2020-05-09 03:44:56 +02:00
|
|
|
def get_url_data(self, e: Element) -> Optional[Tuple[str, Optional[str]]]:
|
2016-09-22 22:39:24 +02:00
|
|
|
if e.tag == "a":
|
2020-05-09 03:44:56 +02:00
|
|
|
return (e.get("href"), e.text)
|
2016-09-22 22:39:24 +02:00
|
|
|
return None
|
|
|
|
|
2020-05-09 03:44:56 +02:00
|
|
|
def handle_image_inlining(
|
|
|
|
self,
|
|
|
|
root: Element,
|
|
|
|
found_url: ResultWithFamily[Tuple[str, Optional[str]]],
|
|
|
|
) -> None:
|
2017-12-25 21:35:23 +01:00
|
|
|
grandparent = found_url.family.grandparent
|
|
|
|
parent = found_url.family.parent
|
|
|
|
ahref_element = found_url.family.child
|
|
|
|
(url, text) = found_url.result
|
|
|
|
actual_url = self.get_actual_image_url(url)
|
2017-11-27 10:03:18 +01:00
|
|
|
|
2017-12-25 21:35:23 +01:00
|
|
|
# url != text usually implies a named link, which we opt not to remove
|
2020-05-09 03:44:56 +02:00
|
|
|
url_eq_text = text is None or url == text
|
|
|
|
title = None if url_eq_text else text
|
2017-11-27 10:03:18 +01:00
|
|
|
|
2017-12-25 21:35:23 +01:00
|
|
|
if parent.tag == 'li':
|
2020-05-09 03:44:56 +02:00
|
|
|
self.add_a(parent, self.get_actual_image_url(url), url, title=title)
|
2017-12-25 21:35:23 +01:00
|
|
|
if not parent.text and not ahref_element.tail and url_eq_text:
|
|
|
|
parent.remove(ahref_element)
|
2017-11-27 10:03:18 +01:00
|
|
|
|
2017-12-25 21:35:23 +01:00
|
|
|
elif parent.tag == 'p':
|
|
|
|
parent_index = None
|
2020-06-04 02:15:21 +02:00
|
|
|
for index, uncle in enumerate(grandparent):
|
2017-12-25 21:35:23 +01:00
|
|
|
if uncle is parent:
|
|
|
|
parent_index = index
|
|
|
|
break
|
2017-11-27 10:03:18 +01:00
|
|
|
|
2017-12-25 21:35:23 +01:00
|
|
|
if parent_index is not None:
|
|
|
|
ins_index = self.find_proper_insertion_index(grandparent, parent, parent_index)
|
2020-05-09 03:44:56 +02:00
|
|
|
self.add_a(grandparent, actual_url, url, title=title, insertion_index=ins_index)
|
2017-11-27 10:03:18 +01:00
|
|
|
|
2017-12-25 21:35:23 +01:00
|
|
|
else:
|
|
|
|
# We're not inserting after parent, since parent not found.
|
|
|
|
# Append to end of list of grandparent's children as normal
|
2020-05-09 03:44:56 +02:00
|
|
|
self.add_a(grandparent, actual_url, url, title=title)
|
2017-12-25 21:35:23 +01:00
|
|
|
|
|
|
|
# If link is alone in a paragraph, delete paragraph containing it
|
2020-06-04 02:15:21 +02:00
|
|
|
if (len(parent) == 1 and
|
2017-12-25 21:35:23 +01:00
|
|
|
(not parent.text or parent.text == "\n") and
|
|
|
|
not ahref_element.tail and
|
|
|
|
url_eq_text):
|
|
|
|
grandparent.remove(parent)
|
|
|
|
|
|
|
|
else:
|
|
|
|
# If none of the above criteria match, fall back to old behavior
|
2020-05-09 03:44:56 +02:00
|
|
|
self.add_a(root, actual_url, url, title=title)
|
2017-12-25 21:35:23 +01:00
|
|
|
|
|
|
|
def find_proper_insertion_index(self, grandparent: Element, parent: Element,
|
|
|
|
parent_index_in_grandparent: int) -> int:
|
|
|
|
# If there are several inline images from same paragraph, ensure that
|
|
|
|
# they are in correct (and not opposite) order by inserting after last
|
|
|
|
# inline image from paragraph 'parent'
|
|
|
|
|
|
|
|
parent_links = [ele.attrib['href'] for ele in parent.iter(tag="a")]
|
|
|
|
insertion_index = parent_index_in_grandparent
|
|
|
|
|
|
|
|
while True:
|
|
|
|
insertion_index += 1
|
2020-06-04 02:15:21 +02:00
|
|
|
if insertion_index >= len(grandparent):
|
2017-12-25 21:35:23 +01:00
|
|
|
return insertion_index
|
|
|
|
|
2020-06-04 02:15:21 +02:00
|
|
|
uncle = grandparent[insertion_index]
|
2017-12-25 21:35:23 +01:00
|
|
|
inline_image_classes = ['message_inline_image', 'message_inline_ref']
|
|
|
|
if (
|
|
|
|
uncle.tag != 'div' or
|
|
|
|
'class' not in uncle.keys() or
|
|
|
|
uncle.attrib['class'] not in inline_image_classes
|
|
|
|
):
|
|
|
|
return insertion_index
|
|
|
|
|
|
|
|
uncle_link = list(uncle.iter(tag="a"))[0].attrib['href']
|
|
|
|
if uncle_link not in parent_links:
|
|
|
|
return insertion_index
|
2017-11-27 10:03:18 +01:00
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def is_absolute_url(self, url: str) -> bool:
|
2018-02-23 21:17:29 +01:00
|
|
|
return bool(urllib.parse.urlparse(url).netloc)
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def run(self, root: Element) -> None:
|
2013-05-21 16:59:09 +02:00
|
|
|
# Get all URLs from the blob
|
2017-12-25 21:35:23 +01:00
|
|
|
found_urls = walk_tree_with_family(root, self.get_url_data)
|
2019-12-10 22:19:30 +01:00
|
|
|
unique_urls = {found_url.result[0] for found_url in found_urls}
|
|
|
|
# Collect unique URLs which are not quoted as we don't do
|
|
|
|
# inline previews for links inside blockquotes.
|
|
|
|
unique_previewable_urls = {found_url.result[0] for found_url in found_urls
|
|
|
|
if not found_url.family.in_blockquote}
|
2019-09-24 21:10:56 +02:00
|
|
|
|
2020-06-26 23:30:39 +02:00
|
|
|
# Set has_link and similar flags whenever a message is processed by markdown
|
2020-06-03 04:16:38 +02:00
|
|
|
if self.md.zulip_message:
|
|
|
|
self.md.zulip_message.has_link = len(found_urls) > 0
|
|
|
|
self.md.zulip_message.has_image = False # This is updated in self.add_a
|
|
|
|
self.md.zulip_message.potential_attachment_path_ids = []
|
2019-12-13 03:56:59 +01:00
|
|
|
|
2019-12-10 22:19:30 +01:00
|
|
|
for url in unique_urls:
|
2019-12-13 03:56:59 +01:00
|
|
|
# Due to rewrite_local_links_to_relative, we need to
|
|
|
|
# handle both relative URLs beginning with
|
|
|
|
# `/user_uploads` and beginning with `user_uploads`.
|
|
|
|
# This urllib construction converts the latter into
|
|
|
|
# the former.
|
|
|
|
parsed_url = urllib.parse.urlsplit(urllib.parse.urljoin("/", url))
|
|
|
|
host = parsed_url.netloc
|
|
|
|
|
2020-06-03 04:16:38 +02:00
|
|
|
if host != '' and host != self.md.zulip_realm.host:
|
2019-12-13 03:56:59 +01:00
|
|
|
continue
|
|
|
|
|
|
|
|
if not parsed_url.path.startswith("/user_uploads/"):
|
|
|
|
continue
|
|
|
|
|
|
|
|
path_id = parsed_url.path[len("/user_uploads/"):]
|
2020-06-03 04:16:38 +02:00
|
|
|
self.md.zulip_message.potential_attachment_path_ids.append(path_id)
|
2019-12-10 22:19:30 +01:00
|
|
|
|
|
|
|
if len(found_urls) == 0:
|
|
|
|
return
|
2019-09-24 21:10:56 +02:00
|
|
|
|
2019-12-10 22:19:30 +01:00
|
|
|
if len(unique_previewable_urls) > self.INLINE_PREVIEW_LIMIT_PER_MESSAGE:
|
2013-05-21 16:59:09 +02:00
|
|
|
return
|
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
processed_urls: Set[str] = set()
|
2014-01-28 22:17:12 +01:00
|
|
|
rendered_tweet_count = 0
|
2014-07-17 02:41:49 +02:00
|
|
|
|
2017-12-25 21:35:23 +01:00
|
|
|
for found_url in found_urls:
|
|
|
|
(url, text) = found_url.result
|
2018-02-16 21:06:05 +01:00
|
|
|
|
2019-12-10 22:19:30 +01:00
|
|
|
if url in unique_previewable_urls and url not in processed_urls:
|
2018-02-16 21:06:05 +01:00
|
|
|
processed_urls.add(url)
|
|
|
|
else:
|
|
|
|
continue
|
|
|
|
|
2018-02-23 21:17:29 +01:00
|
|
|
if not self.is_absolute_url(url):
|
|
|
|
if self.is_image(url):
|
|
|
|
self.handle_image_inlining(root, found_url)
|
|
|
|
# We don't have a strong use case for doing url preview for relative links.
|
|
|
|
continue
|
2016-09-22 22:39:24 +02:00
|
|
|
|
2018-02-23 21:17:29 +01:00
|
|
|
dropbox_image = self.dropbox_image(url)
|
2014-05-21 08:11:29 +02:00
|
|
|
if dropbox_image is not None:
|
|
|
|
class_attr = "message_inline_ref"
|
|
|
|
is_image = dropbox_image["is_image"]
|
|
|
|
if is_image:
|
|
|
|
class_attr = "message_inline_image"
|
|
|
|
# Not making use of title and description of images
|
2019-12-06 09:18:02 +01:00
|
|
|
self.add_a(root, dropbox_image['image'], url,
|
2020-05-09 03:44:56 +02:00
|
|
|
title=dropbox_image.get('title'),
|
2019-12-06 09:18:02 +01:00
|
|
|
desc=dropbox_image.get('desc', ""),
|
|
|
|
class_attr=class_attr,
|
|
|
|
already_thumbnailed=True)
|
2013-05-21 16:59:09 +02:00
|
|
|
continue
|
2019-02-14 17:15:30 +01:00
|
|
|
|
2013-05-21 16:59:09 +02:00
|
|
|
if self.is_image(url):
|
2019-02-14 17:15:30 +01:00
|
|
|
image_source = self.corrected_image_source(url)
|
|
|
|
if image_source is not None:
|
|
|
|
found_url = ResultWithFamily(
|
|
|
|
family=found_url.family,
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
result=(image_source, image_source),
|
2019-02-14 17:15:30 +01:00
|
|
|
)
|
2017-12-25 21:35:23 +01:00
|
|
|
self.handle_image_inlining(root, found_url)
|
2013-05-21 16:59:09 +02:00
|
|
|
continue
|
2019-02-14 17:15:30 +01:00
|
|
|
|
2016-06-17 00:21:01 +02:00
|
|
|
if get_tweet_id(url) is not None:
|
2014-01-28 22:17:12 +01:00
|
|
|
if rendered_tweet_count >= self.TWITTER_MAX_TO_PREVIEW:
|
2013-05-21 16:59:09 +02:00
|
|
|
# Only render at most one tweet per message
|
|
|
|
continue
|
2013-05-29 21:38:16 +02:00
|
|
|
twitter_data = self.twitter_link(url)
|
|
|
|
if twitter_data is None:
|
|
|
|
# This link is not actually a tweet known to twitter
|
|
|
|
continue
|
2014-01-28 22:17:12 +01:00
|
|
|
rendered_tweet_count += 1
|
2020-06-03 06:37:07 +02:00
|
|
|
div = SubElement(root, "div")
|
2013-05-21 16:59:09 +02:00
|
|
|
div.set("class", "inline-preview-twitter")
|
2013-05-29 21:38:16 +02:00
|
|
|
div.insert(0, twitter_data)
|
2013-05-21 16:59:09 +02:00
|
|
|
continue
|
|
|
|
youtube = self.youtube_image(url)
|
|
|
|
if youtube is not None:
|
2016-10-17 22:02:01 +02:00
|
|
|
yt_id = self.youtube_id(url)
|
2019-12-06 09:18:02 +01:00
|
|
|
self.add_a(root, youtube, url, None, None,
|
|
|
|
"youtube-video message_inline_image",
|
|
|
|
yt_id, already_thumbnailed=True)
|
2019-03-21 21:08:26 +01:00
|
|
|
# NOTE: We don't `continue` here, to allow replacing the URL with
|
|
|
|
# the title, if INLINE_URL_EMBED_PREVIEW feature is enabled.
|
|
|
|
# The entire preview would ideally be shown only if the feature
|
|
|
|
# is enabled, but URL previews are a beta feature and YouTube
|
|
|
|
# previews are pretty stable.
|
2013-05-21 16:59:09 +02:00
|
|
|
|
2020-06-03 04:16:38 +02:00
|
|
|
db_data = self.md.zulip_db_data
|
2018-11-07 16:26:33 +01:00
|
|
|
if db_data and db_data['sent_by_bot']:
|
2017-02-03 23:28:26 +01:00
|
|
|
continue
|
|
|
|
|
2020-06-03 04:16:38 +02:00
|
|
|
if not self.md.url_embed_preview_enabled:
|
2016-10-27 12:06:44 +02:00
|
|
|
continue
|
2018-11-02 14:56:32 +01:00
|
|
|
|
2016-10-27 12:06:44 +02:00
|
|
|
try:
|
|
|
|
extracted_data = link_preview.link_embed_data_from_cache(url)
|
|
|
|
except NotFoundInCache:
|
2020-06-03 04:16:38 +02:00
|
|
|
self.md.zulip_message.links_for_preview.add(url)
|
2016-10-27 12:06:44 +02:00
|
|
|
continue
|
2019-03-21 21:08:26 +01:00
|
|
|
|
2016-10-27 12:06:44 +02:00
|
|
|
if extracted_data:
|
2019-03-21 21:08:26 +01:00
|
|
|
if youtube is not None:
|
|
|
|
title = self.youtube_title(extracted_data)
|
|
|
|
if title is not None:
|
|
|
|
found_url.family.child.text = title
|
|
|
|
continue
|
2019-12-06 09:18:02 +01:00
|
|
|
self.add_embed(root, url, extracted_data)
|
2019-06-01 12:59:57 +02:00
|
|
|
if self.vimeo_id(url):
|
|
|
|
title = self.vimeo_title(extracted_data)
|
|
|
|
if title:
|
|
|
|
found_url.family.child.text = title
|
2016-10-27 12:06:44 +02:00
|
|
|
|
2013-11-12 23:37:33 +01:00
|
|
|
class Avatar(markdown.inlinepatterns.Pattern):
|
2018-05-11 01:42:51 +02:00
|
|
|
def handleMatch(self, match: Match[str]) -> Optional[Element]:
|
2020-06-03 06:37:07 +02:00
|
|
|
img = Element('img')
|
2013-10-30 16:52:28 +01:00
|
|
|
email_address = match.group('email')
|
2016-10-24 16:42:43 +02:00
|
|
|
email = email_address.strip().lower()
|
|
|
|
profile_id = None
|
|
|
|
|
2020-06-03 04:16:38 +02:00
|
|
|
db_data = self.md.zulip_db_data
|
2018-11-07 16:26:33 +01:00
|
|
|
if db_data is not None:
|
|
|
|
user_dict = db_data['email_info'].get(email)
|
2016-10-24 16:42:43 +02:00
|
|
|
if user_dict is not None:
|
|
|
|
profile_id = user_dict['id']
|
|
|
|
|
2013-10-30 16:52:28 +01:00
|
|
|
img.set('class', 'message_body_gravatar')
|
2020-06-09 00:25:09 +02:00
|
|
|
img.set('src', f'/avatar/{profile_id or email}?s=30')
|
2016-10-24 16:42:43 +02:00
|
|
|
img.set('title', email)
|
|
|
|
img.set('alt', email)
|
2012-10-17 04:42:19 +02:00
|
|
|
return img
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def possible_avatar_emails(content: str) -> Set[str]:
|
2017-09-14 22:11:34 +02:00
|
|
|
emails = set()
|
2019-01-23 21:30:00 +01:00
|
|
|
for REGEX in [AVATAR_REGEX, GRAVATAR_REGEX]:
|
|
|
|
matches = re.findall(REGEX, content)
|
2017-09-14 22:11:34 +02:00
|
|
|
for email in matches:
|
|
|
|
if email:
|
|
|
|
emails.add(email)
|
|
|
|
|
|
|
|
return emails
|
|
|
|
|
2018-07-18 14:36:04 +02:00
|
|
|
class Timestamp(markdown.inlinepatterns.Pattern):
|
|
|
|
def handleMatch(self, match: Match[str]) -> Optional[Element]:
|
2020-06-18 01:32:24 +02:00
|
|
|
time_input_string = match.group('time')
|
2018-07-18 14:36:04 +02:00
|
|
|
timestamp = None
|
|
|
|
try:
|
2020-06-18 01:32:24 +02:00
|
|
|
timestamp = dateutil.parser.parse(time_input_string, tzinfos=get_common_timezones())
|
2018-07-18 14:36:04 +02:00
|
|
|
except ValueError:
|
|
|
|
try:
|
2020-06-18 23:14:53 +02:00
|
|
|
timestamp = datetime.datetime.fromtimestamp(float(time_input_string))
|
2018-07-18 14:36:04 +02:00
|
|
|
except ValueError:
|
|
|
|
pass
|
2020-06-18 01:32:24 +02:00
|
|
|
|
|
|
|
if not timestamp:
|
|
|
|
error_element = Element('span')
|
|
|
|
error_element.set('class', 'timestamp-error')
|
|
|
|
error_element.text = markdown.util.AtomicString(
|
|
|
|
f"Invalid time format: {time_input_string}")
|
|
|
|
return error_element
|
|
|
|
|
|
|
|
# Use HTML5 <time> element for valid timestamps.
|
|
|
|
time_element = Element('time')
|
|
|
|
if timestamp.tzinfo:
|
2020-06-18 23:30:24 +02:00
|
|
|
timestamp = timestamp.astimezone(datetime.timezone.utc)
|
2020-06-18 01:32:24 +02:00
|
|
|
else:
|
2020-06-18 23:30:24 +02:00
|
|
|
timestamp = timestamp.replace(tzinfo=datetime.timezone.utc)
|
2020-06-18 01:32:24 +02:00
|
|
|
time_element.set('datetime', timestamp.isoformat().replace('+00:00', 'Z'))
|
|
|
|
# Set text to initial input, so simple clients translating
|
|
|
|
# HTML to text will at least display something.
|
|
|
|
time_element.text = markdown.util.AtomicString(time_input_string)
|
|
|
|
return time_element
|
2018-07-18 14:36:04 +02:00
|
|
|
|
2017-06-19 23:30:14 +02:00
|
|
|
# All of our emojis(non ZWJ sequences) belong to one of these unicode blocks:
|
|
|
|
# \U0001f100-\U0001f1ff - Enclosed Alphanumeric Supplement
|
|
|
|
# \U0001f200-\U0001f2ff - Enclosed Ideographic Supplement
|
|
|
|
# \U0001f300-\U0001f5ff - Miscellaneous Symbols and Pictographs
|
|
|
|
# \U0001f600-\U0001f64f - Emoticons (Emoji)
|
|
|
|
# \U0001f680-\U0001f6ff - Transport and Map Symbols
|
|
|
|
# \U0001f900-\U0001f9ff - Supplemental Symbols and Pictographs
|
|
|
|
# \u2000-\u206f - General Punctuation
|
|
|
|
# \u2300-\u23ff - Miscellaneous Technical
|
|
|
|
# \u2400-\u243f - Control Pictures
|
|
|
|
# \u2440-\u245f - Optical Character Recognition
|
|
|
|
# \u2460-\u24ff - Enclosed Alphanumerics
|
|
|
|
# \u2500-\u257f - Box Drawing
|
|
|
|
# \u2580-\u259f - Block Elements
|
|
|
|
# \u25a0-\u25ff - Geometric Shapes
|
|
|
|
# \u2600-\u26ff - Miscellaneous Symbols
|
|
|
|
# \u2700-\u27bf - Dingbats
|
|
|
|
# \u2900-\u297f - Supplemental Arrows-B
|
|
|
|
# \u2b00-\u2bff - Miscellaneous Symbols and Arrows
|
|
|
|
# \u3000-\u303f - CJK Symbols and Punctuation
|
|
|
|
# \u3200-\u32ff - Enclosed CJK Letters and Months
|
2017-11-03 03:12:25 +01:00
|
|
|
unicode_emoji_regex = '(?P<syntax>['\
|
|
|
|
'\U0001F100-\U0001F64F' \
|
|
|
|
'\U0001F680-\U0001F6FF' \
|
|
|
|
'\U0001F900-\U0001F9FF' \
|
|
|
|
'\u2000-\u206F' \
|
|
|
|
'\u2300-\u27BF' \
|
|
|
|
'\u2900-\u297F' \
|
|
|
|
'\u2B00-\u2BFF' \
|
|
|
|
'\u3000-\u303F' \
|
|
|
|
'\u3200-\u32FF' \
|
|
|
|
'])'
|
2017-06-19 23:30:14 +02:00
|
|
|
# The equivalent JS regex is \ud83c[\udd00-\udfff]|\ud83d[\udc00-\ude4f]|\ud83d[\ude80-\udeff]|
|
|
|
|
# \ud83e[\udd00-\uddff]|[\u2000-\u206f]|[\u2300-\u27bf]|[\u2b00-\u2bff]|[\u3000-\u303f]|
|
|
|
|
# [\u3200-\u32ff]. See below comments for explanation. The JS regex is used by marked.js for
|
|
|
|
# frontend unicode emoji processing.
|
|
|
|
# The JS regex \ud83c[\udd00-\udfff]|\ud83d[\udc00-\ude4f] represents U0001f100-\U0001f64f
|
|
|
|
# The JS regex \ud83d[\ude80-\udeff] represents \U0001f680-\U0001f6ff
|
|
|
|
# The JS regex \ud83e[\udd00-\uddff] represents \U0001f900-\U0001f9ff
|
|
|
|
# The JS regex [\u2000-\u206f] represents \u2000-\u206f
|
|
|
|
# The JS regex [\u2300-\u27bf] represents \u2300-\u27bf
|
|
|
|
# Similarly other JS regexes can be mapped to the respective unicode blocks.
|
|
|
|
# For more information, please refer to the following article:
|
|
|
|
# http://crocodillon.com/blog/parsing-emoji-unicode-in-javascript
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def make_emoji(codepoint: str, display_string: str) -> Element:
|
2017-06-09 10:30:24 +02:00
|
|
|
# Replace underscore in emoji's title with space
|
|
|
|
title = display_string[1:-1].replace("_", " ")
|
2020-06-03 06:37:07 +02:00
|
|
|
span = Element('span')
|
2020-06-13 08:59:37 +02:00
|
|
|
span.set('class', f'emoji emoji-{codepoint}')
|
2017-09-27 19:39:42 +02:00
|
|
|
span.set('title', title)
|
2019-01-14 08:45:37 +01:00
|
|
|
span.set('role', 'img')
|
|
|
|
span.set('aria-label', title)
|
2019-05-01 20:45:41 +02:00
|
|
|
span.text = markdown.util.AtomicString(display_string)
|
2017-09-27 19:39:42 +02:00
|
|
|
return span
|
2013-03-01 22:07:27 +01:00
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def make_realm_emoji(src: str, display_string: str) -> Element:
|
2020-06-03 06:37:07 +02:00
|
|
|
elt = Element('img')
|
2017-05-01 01:25:03 +02:00
|
|
|
elt.set('src', src)
|
|
|
|
elt.set('class', 'emoji')
|
|
|
|
elt.set("alt", display_string)
|
2017-06-09 10:30:24 +02:00
|
|
|
elt.set("title", display_string[1:-1].replace("_", " "))
|
2017-05-01 01:25:03 +02:00
|
|
|
return elt
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def unicode_emoji_to_codepoint(unicode_emoji: str) -> str:
|
2017-05-01 01:23:41 +02:00
|
|
|
codepoint = hex(ord(unicode_emoji))[2:]
|
2017-05-16 08:59:24 +02:00
|
|
|
# Unicode codepoints are minimum of length 4, padded
|
|
|
|
# with zeroes if the length is less than zero.
|
|
|
|
while len(codepoint) < 4:
|
|
|
|
codepoint = '0' + codepoint
|
2017-05-01 01:23:41 +02:00
|
|
|
return codepoint
|
|
|
|
|
2018-01-15 19:36:32 +01:00
|
|
|
class EmoticonTranslation(markdown.inlinepatterns.Pattern):
|
|
|
|
""" Translates emoticons like `:)` into emoji like `:smile:`. """
|
2020-04-22 01:45:30 +02:00
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def handleMatch(self, match: Match[str]) -> Optional[Element]:
|
2020-06-03 04:16:38 +02:00
|
|
|
db_data = self.md.zulip_db_data
|
2018-11-07 16:26:33 +01:00
|
|
|
if db_data is None or not db_data['translate_emoticons']:
|
2018-01-15 19:36:32 +01:00
|
|
|
return None
|
|
|
|
|
|
|
|
emoticon = match.group('emoticon')
|
|
|
|
translated = translate_emoticons(emoticon)
|
|
|
|
name = translated[1:-1]
|
|
|
|
return make_emoji(name_to_codepoint[name], translated)
|
|
|
|
|
2016-06-24 20:03:56 +02:00
|
|
|
class UnicodeEmoji(markdown.inlinepatterns.Pattern):
|
2018-05-11 01:42:51 +02:00
|
|
|
def handleMatch(self, match: Match[str]) -> Optional[Element]:
|
2016-06-24 20:03:56 +02:00
|
|
|
orig_syntax = match.group('syntax')
|
2017-05-01 01:23:41 +02:00
|
|
|
codepoint = unicode_emoji_to_codepoint(orig_syntax)
|
2017-06-20 15:52:14 +02:00
|
|
|
if codepoint in codepoint_to_name:
|
|
|
|
display_string = ':' + codepoint_to_name[codepoint] + ':'
|
|
|
|
return make_emoji(codepoint, display_string)
|
2016-06-24 20:03:56 +02:00
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
2013-03-01 22:07:27 +01:00
|
|
|
class Emoji(markdown.inlinepatterns.Pattern):
|
2018-05-11 01:42:51 +02:00
|
|
|
def handleMatch(self, match: Match[str]) -> Optional[Element]:
|
2013-03-01 22:07:27 +01:00
|
|
|
orig_syntax = match.group("syntax")
|
|
|
|
name = orig_syntax[1:-1]
|
2013-08-22 20:50:00 +02:00
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
active_realm_emoji: Dict[str, Dict[str, str]] = {}
|
2020-06-03 04:16:38 +02:00
|
|
|
db_data = self.md.zulip_db_data
|
2018-11-07 16:26:33 +01:00
|
|
|
if db_data is not None:
|
|
|
|
active_realm_emoji = db_data['active_realm_emoji']
|
2013-08-22 20:50:00 +02:00
|
|
|
|
2020-06-03 04:16:38 +02:00
|
|
|
if self.md.zulip_message and name in active_realm_emoji:
|
2018-03-11 18:48:56 +01:00
|
|
|
return make_realm_emoji(active_realm_emoji[name]['source_url'], orig_syntax)
|
2017-05-01 01:13:28 +02:00
|
|
|
elif name == 'zulip':
|
2017-05-01 01:25:03 +02:00
|
|
|
return make_realm_emoji('/static/generated/emoji/images/emoji/unicode/zulip.png', orig_syntax)
|
2017-02-04 23:27:24 +01:00
|
|
|
elif name in name_to_codepoint:
|
2017-05-01 01:34:31 +02:00
|
|
|
return make_emoji(name_to_codepoint[name], orig_syntax)
|
2013-08-22 20:50:00 +02:00
|
|
|
else:
|
2019-03-05 08:18:59 +01:00
|
|
|
return orig_syntax
|
2013-03-01 22:07:27 +01:00
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def content_has_emoji_syntax(content: str) -> bool:
|
2017-09-15 03:08:15 +02:00
|
|
|
return re.search(EMOJI_REGEX, content) is not None
|
|
|
|
|
2017-03-20 16:56:39 +01:00
|
|
|
class Tex(markdown.inlinepatterns.Pattern):
|
2018-05-11 01:42:51 +02:00
|
|
|
def handleMatch(self, match: Match[str]) -> Element:
|
2017-03-20 16:56:39 +01:00
|
|
|
rendered = render_tex(match.group('body'), is_inline=True)
|
|
|
|
if rendered is not None:
|
2019-11-09 03:53:32 +01:00
|
|
|
# We need to give Python-Markdown an ElementTree object, but if we
|
|
|
|
# give it one with correctly stored XML namespaces, it will mangle
|
|
|
|
# everything when serializing it. So we play this stupid game to
|
|
|
|
# store xmlns as a normal attribute. :-[
|
|
|
|
assert ' zulip-xmlns="' not in rendered
|
|
|
|
rendered = rendered.replace(' xmlns="', ' zulip-xmlns="')
|
|
|
|
parsed = etree.iterparse(StringIO(rendered))
|
|
|
|
for event, elem in parsed:
|
|
|
|
if 'zulip-xmlns' in elem.attrib:
|
|
|
|
elem.attrib['xmlns'] = elem.attrib.pop('zulip-xmlns')
|
|
|
|
root = elem
|
|
|
|
return root
|
2017-07-09 01:28:18 +02:00
|
|
|
else: # Something went wrong while rendering
|
2020-06-03 06:37:07 +02:00
|
|
|
span = Element('span')
|
2017-03-20 16:56:39 +01:00
|
|
|
span.set('class', 'tex-error')
|
|
|
|
span.text = '$$' + match.group('body') + '$$'
|
|
|
|
return span
|
|
|
|
|
2013-02-01 23:15:05 +01:00
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def sanitize_url(url: str) -> Optional[str]:
|
2013-02-01 23:15:05 +01:00
|
|
|
"""
|
|
|
|
Sanitize a url against xss attacks.
|
|
|
|
See the docstring on markdown.inlinepatterns.LinkPattern.sanitize_url.
|
|
|
|
"""
|
|
|
|
try:
|
2016-01-24 03:39:44 +01:00
|
|
|
parts = urllib.parse.urlparse(url.replace(' ', '%20'))
|
2013-02-01 23:15:05 +01:00
|
|
|
scheme, netloc, path, params, query, fragment = parts
|
|
|
|
except ValueError:
|
|
|
|
# Bad url - so bad it couldn't be parsed.
|
|
|
|
return ''
|
|
|
|
|
2013-03-29 20:17:33 +01:00
|
|
|
# If there is no scheme or netloc and there is a '@' in the path,
|
|
|
|
# treat it as a mailto: and set the appropriate scheme
|
|
|
|
if scheme == '' and netloc == '' and '@' in path:
|
|
|
|
scheme = 'mailto'
|
2013-10-24 18:06:33 +02:00
|
|
|
elif scheme == '' and netloc == '' and len(path) > 0 and path[0] == '/':
|
|
|
|
# Allow domain-relative links
|
2016-01-24 03:39:44 +01:00
|
|
|
return urllib.parse.urlunparse(('', '', path, params, query, fragment))
|
2013-10-24 18:06:33 +02:00
|
|
|
elif (scheme, netloc, path, params, query) == ('', '', '', '', '') and len(fragment) > 0:
|
|
|
|
# Allow fragment links
|
2016-01-24 03:39:44 +01:00
|
|
|
return urllib.parse.urlunparse(('', '', '', '', '', fragment))
|
2013-03-29 20:17:33 +01:00
|
|
|
|
2013-08-06 21:32:15 +02:00
|
|
|
# Zulip modification: If scheme is not specified, assume http://
|
2013-02-01 23:15:05 +01:00
|
|
|
# We re-enter sanitize_url because netloc etc. need to be re-parsed.
|
|
|
|
if not scheme:
|
|
|
|
return sanitize_url('http://' + url)
|
|
|
|
|
2018-01-19 11:17:38 +01:00
|
|
|
locless_schemes = ['mailto', 'news', 'file', 'bitcoin']
|
2013-02-01 23:15:05 +01:00
|
|
|
if netloc == '' and scheme not in locless_schemes:
|
|
|
|
# This fails regardless of anything else.
|
2017-11-09 16:26:38 +01:00
|
|
|
# Return immediately to save additional processing
|
2013-02-26 22:41:39 +01:00
|
|
|
return None
|
2013-02-01 23:15:05 +01:00
|
|
|
|
2013-04-02 19:57:35 +02:00
|
|
|
# Upstream code will accept a URL like javascript://foo because it
|
|
|
|
# appears to have a netloc. Additionally there are plenty of other
|
|
|
|
# schemes that do weird things like launch external programs. To be
|
|
|
|
# on the safe side, we whitelist the scheme.
|
2018-01-19 11:17:38 +01:00
|
|
|
if scheme not in ('http', 'https', 'ftp', 'mailto', 'file', 'bitcoin'):
|
2013-04-02 19:57:35 +02:00
|
|
|
return None
|
|
|
|
|
2013-04-02 19:36:37 +02:00
|
|
|
# Upstream code scans path, parameters, and query for colon characters
|
|
|
|
# because
|
|
|
|
#
|
2016-01-24 03:39:44 +01:00
|
|
|
# some aliases [for javascript:] will appear to urllib.parse to have
|
2013-04-02 19:36:37 +02:00
|
|
|
# no scheme. On top of that relative links (i.e.: "foo/bar.html")
|
|
|
|
# have no scheme.
|
|
|
|
#
|
|
|
|
# We already converted an empty scheme to http:// above, so we skip
|
|
|
|
# the colon check, which would also forbid a lot of legitimate URLs.
|
2013-02-01 23:15:05 +01:00
|
|
|
|
|
|
|
# Url passes all tests. Return url as-is.
|
2016-01-24 03:39:44 +01:00
|
|
|
return urllib.parse.urlunparse((scheme, netloc, path, params, query, fragment))
|
2013-02-01 23:15:05 +01:00
|
|
|
|
2018-11-07 16:07:34 +01:00
|
|
|
def url_to_a(db_data: Optional[DbData], url: str, text: Optional[str]=None) -> Union[Element, str]:
|
2020-06-03 06:37:07 +02:00
|
|
|
a = Element('a')
|
2013-02-26 22:41:39 +01:00
|
|
|
|
2013-03-29 20:17:33 +01:00
|
|
|
href = sanitize_url(url)
|
2013-02-26 22:41:39 +01:00
|
|
|
if href is None:
|
|
|
|
# Rejected by sanitize_url; render it as plain text.
|
|
|
|
return url
|
2013-06-05 17:45:57 +02:00
|
|
|
if text is None:
|
2013-10-02 21:14:22 +02:00
|
|
|
text = markdown.util.AtomicString(url)
|
2013-02-26 22:41:39 +01:00
|
|
|
|
2018-11-07 16:07:34 +01:00
|
|
|
href = rewrite_local_links_to_relative(db_data, href)
|
2017-10-31 22:03:39 +01:00
|
|
|
|
2013-02-26 22:41:39 +01:00
|
|
|
a.set('href', href)
|
2013-06-05 17:45:57 +02:00
|
|
|
a.text = text
|
2013-02-11 20:49:48 +01:00
|
|
|
return a
|
|
|
|
|
2019-01-22 19:08:33 +01:00
|
|
|
class CompiledPattern(markdown.inlinepatterns.Pattern):
|
2018-11-07 15:24:36 +01:00
|
|
|
def __init__(self, compiled_re: Pattern, md: markdown.Markdown) -> None:
|
2019-01-22 19:11:50 +01:00
|
|
|
# This is similar to the superclass's small __init__ function,
|
|
|
|
# but we skip the compilation step and let the caller give us
|
|
|
|
# a compiled regex.
|
2018-11-03 17:12:15 +01:00
|
|
|
self.compiled_re = compiled_re
|
2019-01-22 19:11:50 +01:00
|
|
|
self.md = md
|
2013-06-21 23:42:33 +02:00
|
|
|
|
2019-01-22 19:08:33 +01:00
|
|
|
class AutoLink(CompiledPattern):
|
2018-05-11 01:42:51 +02:00
|
|
|
def handleMatch(self, match: Match[str]) -> ElementStringNone:
|
2013-02-11 20:49:48 +01:00
|
|
|
url = match.group('url')
|
2020-06-03 04:16:38 +02:00
|
|
|
db_data = self.md.zulip_db_data
|
2018-11-07 16:07:34 +01:00
|
|
|
return url_to_a(db_data, url)
|
2012-10-22 02:32:18 +02:00
|
|
|
|
2019-08-11 07:41:34 +02:00
|
|
|
class OListProcessor(sane_lists.SaneOListProcessor):
|
|
|
|
def __init__(self, parser: Any) -> None:
|
2020-06-03 04:16:38 +02:00
|
|
|
parser.md.tab_length = 2
|
2019-08-11 07:41:34 +02:00
|
|
|
super().__init__(parser)
|
2020-06-03 04:16:38 +02:00
|
|
|
parser.md.tab_length = 4
|
2012-11-02 18:25:37 +01:00
|
|
|
|
2019-08-11 07:41:34 +02:00
|
|
|
class UListProcessor(sane_lists.SaneUListProcessor):
|
2019-12-06 08:17:10 +01:00
|
|
|
""" Unordered lists, but with 2-space indent """
|
2012-11-02 18:25:37 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def __init__(self, parser: Any) -> None:
|
2020-06-03 04:16:38 +02:00
|
|
|
parser.md.tab_length = 2
|
2017-10-27 08:28:23 +02:00
|
|
|
super().__init__(parser)
|
2020-06-03 04:16:38 +02:00
|
|
|
parser.md.tab_length = 4
|
2017-03-26 21:14:05 +02:00
|
|
|
|
|
|
|
class ListIndentProcessor(markdown.blockprocessors.ListIndentProcessor):
|
|
|
|
""" Process unordered list blocks.
|
|
|
|
|
|
|
|
Based on markdown.blockprocessors.ListIndentProcessor, but with 2-space indent
|
|
|
|
"""
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def __init__(self, parser: Any) -> None:
|
2017-03-26 21:14:05 +02:00
|
|
|
|
|
|
|
# HACK: Set the tab length to 2 just for the initialization of
|
|
|
|
# this class, so that bulleted lists (and only bulleted lists)
|
|
|
|
# work off 2-space indentation.
|
2020-06-03 04:16:38 +02:00
|
|
|
parser.md.tab_length = 2
|
2017-10-27 08:28:23 +02:00
|
|
|
super().__init__(parser)
|
2020-06-03 04:16:38 +02:00
|
|
|
parser.md.tab_length = 4
|
2017-03-26 21:14:05 +02:00
|
|
|
|
2019-07-31 08:04:32 +02:00
|
|
|
class HashHeaderProcessor(markdown.blockprocessors.HashHeaderProcessor):
|
|
|
|
""" Process Hash Headers.
|
|
|
|
|
|
|
|
Based on markdown.blockprocessors.HashHeaderProcessor, but requires space for heading.
|
|
|
|
"""
|
|
|
|
|
|
|
|
# Original regex for hashheader is
|
|
|
|
# RE = re.compile(r'(?:^|\n)(?P<level>#{1,6})(?P<header>(?:\\.|[^\\])*?)#*(?:\n|$)')
|
|
|
|
RE = re.compile(r'(?:^|\n)(?P<level>#{1,6})\s(?P<header>(?:\\.|[^\\])*?)#*(?:\n|$)')
|
|
|
|
|
2019-01-08 11:30:13 +01:00
|
|
|
class BlockQuoteProcessor(markdown.blockprocessors.BlockQuoteProcessor):
|
|
|
|
""" Process BlockQuotes.
|
|
|
|
|
|
|
|
Based on markdown.blockprocessors.BlockQuoteProcessor, but with 2-space indent
|
|
|
|
"""
|
|
|
|
|
|
|
|
# Original regex for blockquote is RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)')
|
|
|
|
RE = re.compile(r'(^|\n)(?!(?:[ ]{0,3}>\s*(?:$|\n))*(?:$|\n))'
|
|
|
|
r'[ ]{0,3}>[ ]?(.*)')
|
|
|
|
mention_re = re.compile(mention.find_mentions)
|
|
|
|
|
|
|
|
def clean(self, line: str) -> str:
|
|
|
|
# Silence all the mentions inside blockquotes
|
2019-02-20 10:15:33 +01:00
|
|
|
line = re.sub(self.mention_re, lambda m: "@_{}".format(m.group('match')), line)
|
2019-01-08 11:30:13 +01:00
|
|
|
|
|
|
|
# And then run the upstream processor's code for removing the '>'
|
|
|
|
return super().clean(line)
|
|
|
|
|
2020-06-11 21:44:23 +02:00
|
|
|
@dataclass
|
|
|
|
class Fence:
|
|
|
|
fence_str: str
|
|
|
|
is_code: bool
|
|
|
|
|
2020-06-25 21:38:36 +02:00
|
|
|
class MarkdownListPreprocessor(markdown.preprocessors.Preprocessor):
|
2019-08-11 07:41:34 +02:00
|
|
|
""" Allows list blocks that come directly after another block
|
|
|
|
to be rendered as a list.
|
2013-01-24 19:35:20 +01:00
|
|
|
|
|
|
|
Detects paragraphs that have a matching list item that comes
|
|
|
|
directly after a line of text, and inserts a newline between
|
|
|
|
to satisfy Markdown"""
|
|
|
|
|
2020-05-01 01:37:05 +02:00
|
|
|
LI_RE = re.compile(r'^[ ]*([*+-]|\d\.)[ ]+(.*)', re.MULTILINE)
|
2013-01-24 19:35:20 +01:00
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def run(self, lines: List[str]) -> List[str]:
|
2013-01-24 19:35:20 +01:00
|
|
|
""" Insert a newline between a paragraph and ulist if missing """
|
|
|
|
inserts = 0
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
in_code_fence: bool = False
|
|
|
|
open_fences: List[Fence] = []
|
2013-01-24 19:35:20 +01:00
|
|
|
copy = lines[:]
|
2015-11-01 17:15:05 +01:00
|
|
|
for i in range(len(lines) - 1):
|
2020-01-02 02:22:01 +01:00
|
|
|
# Ignore anything that is inside a fenced code block but not quoted.
|
|
|
|
# We ignore all lines where some parent is a non quote code block.
|
2013-01-24 19:35:20 +01:00
|
|
|
m = FENCE_RE.match(lines[i])
|
2020-01-02 02:22:01 +01:00
|
|
|
if m:
|
|
|
|
fence_str = m.group('fence')
|
|
|
|
is_code = not m.group('lang') in ('quote', 'quoted')
|
|
|
|
has_open_fences = not len(open_fences) == 0
|
|
|
|
matches_last_fence = fence_str == open_fences[-1].fence_str if has_open_fences else False
|
|
|
|
closes_last_fence = not m.group('lang') and matches_last_fence
|
|
|
|
|
|
|
|
if closes_last_fence:
|
|
|
|
open_fences.pop()
|
|
|
|
else:
|
|
|
|
open_fences.append(Fence(fence_str, is_code))
|
|
|
|
|
|
|
|
in_code_fence = any([fence.is_code for fence in open_fences])
|
2013-01-24 19:35:20 +01:00
|
|
|
|
|
|
|
# If we're not in a fenced block and we detect an upcoming list
|
2019-08-11 07:41:34 +02:00
|
|
|
# hanging off any block (including a list of another type), add
|
|
|
|
# a newline.
|
|
|
|
li1 = self.LI_RE.match(lines[i])
|
|
|
|
li2 = self.LI_RE.match(lines[i+1])
|
2020-01-02 02:22:01 +01:00
|
|
|
if not in_code_fence and lines[i]:
|
2019-08-11 07:41:34 +02:00
|
|
|
if (li2 and not li1) or (li1 and li2 and
|
|
|
|
(len(li1.group(1)) == 1) != (len(li2.group(1)) == 1)):
|
|
|
|
copy.insert(i+inserts+1, '')
|
|
|
|
inserts += 1
|
2013-01-24 19:35:20 +01:00
|
|
|
return copy
|
|
|
|
|
2019-08-30 18:36:14 +02:00
|
|
|
# Name for the outer capture group we use to separate whitespace and
|
|
|
|
# other delimiters from the actual content. This value won't be an
|
|
|
|
# option in user-entered capture groups.
|
|
|
|
OUTER_CAPTURE_GROUP = "linkifier_actual_match"
|
2018-05-11 01:42:51 +02:00
|
|
|
def prepare_realm_pattern(source: str) -> str:
|
2019-08-30 18:36:14 +02:00
|
|
|
"""Augment a realm filter so it only matches after start-of-string,
|
2019-03-07 17:41:54 +01:00
|
|
|
whitespace, or opening delimiters, won't match if there are word
|
2019-08-30 18:36:14 +02:00
|
|
|
characters directly after, and saves what was matched as
|
|
|
|
OUTER_CAPTURE_GROUP."""
|
2020-06-13 08:59:37 +02:00
|
|
|
return fr"""(?<![^\s'"\(,:<])(?P<{OUTER_CAPTURE_GROUP}>{source})(?!\w)"""
|
2013-07-15 17:56:45 +02:00
|
|
|
|
2013-06-05 17:45:57 +02:00
|
|
|
# Given a regular expression pattern, linkifies groups that match it
|
|
|
|
# using the provided format string to construct the URL.
|
2019-03-07 17:41:54 +01:00
|
|
|
class RealmFilterPattern(markdown.inlinepatterns.Pattern):
|
2013-06-05 17:45:57 +02:00
|
|
|
""" Applied a given realm filter to the input """
|
2016-11-29 07:22:02 +01:00
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def __init__(self, source_pattern: str,
|
|
|
|
format_string: str,
|
2017-11-05 11:15:10 +01:00
|
|
|
markdown_instance: Optional[markdown.Markdown]=None) -> None:
|
2013-07-15 17:56:45 +02:00
|
|
|
self.pattern = prepare_realm_pattern(source_pattern)
|
2013-06-05 17:45:57 +02:00
|
|
|
self.format_string = format_string
|
2019-03-07 17:41:54 +01:00
|
|
|
markdown.inlinepatterns.Pattern.__init__(self, self.pattern, markdown_instance)
|
|
|
|
|
|
|
|
def handleMatch(self, m: Match[str]) -> Union[Element, str]:
|
2020-06-03 04:16:38 +02:00
|
|
|
db_data = self.md.zulip_db_data
|
2018-11-07 16:07:34 +01:00
|
|
|
return url_to_a(db_data,
|
|
|
|
self.format_string % m.groupdict(),
|
2019-08-30 18:36:14 +02:00
|
|
|
m.group(OUTER_CAPTURE_GROUP))
|
2013-06-05 17:45:57 +02:00
|
|
|
|
2013-06-28 16:02:58 +02:00
|
|
|
class UserMentionPattern(markdown.inlinepatterns.Pattern):
|
2018-05-11 01:42:51 +02:00
|
|
|
def handleMatch(self, m: Match[str]) -> Optional[Element]:
|
2019-01-08 09:30:19 +01:00
|
|
|
match = m.group('match')
|
|
|
|
silent = m.group('silent') == '_'
|
2017-08-16 20:18:09 +02:00
|
|
|
|
2020-06-03 04:16:38 +02:00
|
|
|
db_data = self.md.zulip_db_data
|
|
|
|
if self.md.zulip_message and db_data is not None:
|
2017-08-16 20:18:09 +02:00
|
|
|
if match.startswith("**") and match.endswith("**"):
|
|
|
|
name = match[2:-2]
|
|
|
|
else:
|
2018-01-24 17:18:07 +01:00
|
|
|
return None
|
2013-06-28 16:02:58 +02:00
|
|
|
|
2017-08-16 20:18:09 +02:00
|
|
|
wildcard = mention.user_mention_matches_wildcard(name)
|
2018-08-19 00:02:17 +02:00
|
|
|
|
|
|
|
id_syntax_match = re.match(r'.+\|(?P<user_id>\d+)$', name)
|
|
|
|
if id_syntax_match:
|
|
|
|
id = id_syntax_match.group("user_id")
|
2018-11-07 16:26:33 +01:00
|
|
|
user = db_data['mention_data'].get_user_by_id(id)
|
2018-08-19 00:02:17 +02:00
|
|
|
else:
|
2018-11-02 09:24:27 +01:00
|
|
|
user = db_data['mention_data'].get_user_by_name(name)
|
2013-06-28 16:02:58 +02:00
|
|
|
|
|
|
|
if wildcard:
|
2020-06-03 04:16:38 +02:00
|
|
|
self.md.zulip_message.mentions_wildcard = True
|
2017-01-20 18:27:30 +01:00
|
|
|
user_id = "*"
|
2013-06-28 16:02:58 +02:00
|
|
|
elif user:
|
2019-01-08 09:30:19 +01:00
|
|
|
if not silent:
|
2020-06-03 04:16:38 +02:00
|
|
|
self.md.zulip_message.mentions_user_ids.add(user['id'])
|
2013-10-09 20:48:05 +02:00
|
|
|
name = user['full_name']
|
2017-01-20 18:27:30 +01:00
|
|
|
user_id = str(user['id'])
|
2013-06-28 16:02:58 +02:00
|
|
|
else:
|
|
|
|
# Don't highlight @mentions that don't refer to a valid user
|
|
|
|
return None
|
|
|
|
|
2020-06-03 06:37:07 +02:00
|
|
|
el = Element("span")
|
2017-01-20 18:27:30 +01:00
|
|
|
el.set('data-user-id', user_id)
|
2020-06-13 08:59:37 +02:00
|
|
|
text = f"{name}"
|
2019-01-08 09:30:19 +01:00
|
|
|
if silent:
|
|
|
|
el.set('class', 'user-mention silent')
|
|
|
|
else:
|
|
|
|
el.set('class', 'user-mention')
|
2020-06-09 00:25:09 +02:00
|
|
|
text = f"@{text}"
|
2020-03-06 13:00:17 +01:00
|
|
|
el.text = markdown.util.AtomicString(text)
|
2013-06-28 16:02:58 +02:00
|
|
|
return el
|
2017-03-03 20:30:49 +01:00
|
|
|
return None
|
2016-10-26 20:56:17 +02:00
|
|
|
|
2017-09-25 09:47:15 +02:00
|
|
|
class UserGroupMentionPattern(markdown.inlinepatterns.Pattern):
|
2018-05-11 01:42:51 +02:00
|
|
|
def handleMatch(self, m: Match[str]) -> Optional[Element]:
|
2017-09-25 09:47:15 +02:00
|
|
|
match = m.group(2)
|
|
|
|
|
2020-06-03 04:16:38 +02:00
|
|
|
db_data = self.md.zulip_db_data
|
|
|
|
if self.md.zulip_message and db_data is not None:
|
2017-09-25 09:47:15 +02:00
|
|
|
name = extract_user_group(match)
|
2018-11-07 16:26:33 +01:00
|
|
|
user_group = db_data['mention_data'].get_user_group(name)
|
2017-09-25 09:47:15 +02:00
|
|
|
if user_group:
|
2020-06-03 04:16:38 +02:00
|
|
|
self.md.zulip_message.mentions_user_group_ids.add(user_group.id)
|
2017-09-25 09:47:15 +02:00
|
|
|
name = user_group.name
|
|
|
|
user_group_id = str(user_group.id)
|
|
|
|
else:
|
|
|
|
# Don't highlight @-mentions that don't refer to a valid user
|
|
|
|
# group.
|
|
|
|
return None
|
|
|
|
|
2020-06-03 06:37:07 +02:00
|
|
|
el = Element("span")
|
2017-09-25 09:47:15 +02:00
|
|
|
el.set('class', 'user-group-mention')
|
|
|
|
el.set('data-user-group-id', user_group_id)
|
2020-06-13 08:59:37 +02:00
|
|
|
text = f"@{name}"
|
2020-03-06 12:48:06 +01:00
|
|
|
el.text = markdown.util.AtomicString(text)
|
2017-09-25 09:47:15 +02:00
|
|
|
return el
|
|
|
|
return None
|
|
|
|
|
2019-01-22 19:08:33 +01:00
|
|
|
class StreamPattern(CompiledPattern):
|
2018-05-11 01:42:51 +02:00
|
|
|
def find_stream_by_name(self, name: Match[str]) -> Optional[Dict[str, Any]]:
|
2020-06-03 04:16:38 +02:00
|
|
|
db_data = self.md.zulip_db_data
|
2018-11-07 16:26:33 +01:00
|
|
|
if db_data is None:
|
2016-10-26 20:56:17 +02:00
|
|
|
return None
|
2018-11-07 16:26:33 +01:00
|
|
|
stream = db_data['stream_names'].get(name)
|
2016-10-26 20:56:17 +02:00
|
|
|
return stream
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def handleMatch(self, m: Match[str]) -> Optional[Element]:
|
2016-10-26 20:56:17 +02:00
|
|
|
name = m.group('stream_name')
|
|
|
|
|
2020-06-03 04:16:38 +02:00
|
|
|
if self.md.zulip_message:
|
2016-10-26 20:56:17 +02:00
|
|
|
stream = self.find_stream_by_name(name)
|
|
|
|
if stream is None:
|
|
|
|
return None
|
2020-06-03 06:37:07 +02:00
|
|
|
el = Element('a')
|
2016-10-26 20:56:17 +02:00
|
|
|
el.set('class', 'stream')
|
|
|
|
el.set('data-stream-id', str(stream['id']))
|
|
|
|
# TODO: We should quite possibly not be specifying the
|
|
|
|
# href here and instead having the browser auto-add the
|
|
|
|
# href when it processes a message with one of these, to
|
|
|
|
# provide more clarity to API clients.
|
2019-06-21 17:31:16 +02:00
|
|
|
# Also do the same for StreamTopicPattern.
|
2018-02-15 21:02:47 +01:00
|
|
|
stream_url = encode_stream(stream['id'], name)
|
2020-06-09 00:25:09 +02:00
|
|
|
el.set('href', f'/#narrow/stream/{stream_url}')
|
|
|
|
text = f'#{name}'
|
2020-03-01 20:22:09 +01:00
|
|
|
el.text = markdown.util.AtomicString(text)
|
2016-10-26 20:56:17 +02:00
|
|
|
return el
|
2017-03-03 20:30:49 +01:00
|
|
|
return None
|
2016-10-26 20:56:17 +02:00
|
|
|
|
2019-06-21 17:31:16 +02:00
|
|
|
class StreamTopicPattern(CompiledPattern):
|
|
|
|
def find_stream_by_name(self, name: Match[str]) -> Optional[Dict[str, Any]]:
|
2020-06-03 04:16:38 +02:00
|
|
|
db_data = self.md.zulip_db_data
|
2019-06-21 17:31:16 +02:00
|
|
|
if db_data is None:
|
|
|
|
return None
|
|
|
|
stream = db_data['stream_names'].get(name)
|
|
|
|
return stream
|
|
|
|
|
|
|
|
def handleMatch(self, m: Match[str]) -> Optional[Element]:
|
|
|
|
stream_name = m.group('stream_name')
|
|
|
|
topic_name = m.group('topic_name')
|
|
|
|
|
2020-06-03 04:16:38 +02:00
|
|
|
if self.md.zulip_message:
|
2019-06-21 17:31:16 +02:00
|
|
|
stream = self.find_stream_by_name(stream_name)
|
|
|
|
if stream is None or topic_name is None:
|
|
|
|
return None
|
2020-06-03 06:37:07 +02:00
|
|
|
el = Element('a')
|
2019-06-21 17:31:16 +02:00
|
|
|
el.set('class', 'stream-topic')
|
|
|
|
el.set('data-stream-id', str(stream['id']))
|
|
|
|
stream_url = encode_stream(stream['id'], stream_name)
|
|
|
|
topic_url = hash_util_encode(topic_name)
|
2020-06-10 06:40:53 +02:00
|
|
|
link = f'/#narrow/stream/{stream_url}/topic/{topic_url}'
|
2019-06-21 17:31:16 +02:00
|
|
|
el.set('href', link)
|
2020-06-09 00:25:09 +02:00
|
|
|
text = f'#{stream_name} > {topic_name}'
|
2020-03-01 20:04:26 +01:00
|
|
|
el.text = markdown.util.AtomicString(text)
|
2019-06-21 17:31:16 +02:00
|
|
|
return el
|
|
|
|
return None
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def possible_linked_stream_names(content: str) -> Set[str]:
|
2017-09-15 00:25:38 +02:00
|
|
|
matches = re.findall(STREAM_LINK_REGEX, content, re.VERBOSE)
|
2019-06-21 17:31:16 +02:00
|
|
|
for match in re.finditer(STREAM_TOPIC_LINK_REGEX, content, re.VERBOSE):
|
|
|
|
matches.append(match.group('stream_name'))
|
2017-09-15 00:25:38 +02:00
|
|
|
return set(matches)
|
|
|
|
|
2020-04-19 21:13:03 +02:00
|
|
|
class AlertWordNotificationProcessor(markdown.preprocessors.Preprocessor):
|
2019-02-11 15:19:38 +01:00
|
|
|
|
2020-04-09 21:51:58 +02:00
|
|
|
allowed_before_punctuation = {' ', '\n', '(', '"', '.', ',', '\'', ';', '[', '*', '`', '>'}
|
|
|
|
allowed_after_punctuation = {' ', '\n', ')', '",', '?', ':', '.', ',', '\'', ';', ']', '!',
|
|
|
|
'*', '`'}
|
2019-02-11 15:19:38 +01:00
|
|
|
|
|
|
|
def check_valid_start_position(self, content: str, index: int) -> bool:
|
|
|
|
if index <= 0 or content[index] in self.allowed_before_punctuation:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
def check_valid_end_position(self, content: str, index: int) -> bool:
|
|
|
|
if index >= len(content) or content[index] in self.allowed_after_punctuation:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def run(self, lines: Iterable[str]) -> Iterable[str]:
|
2020-06-03 04:16:38 +02:00
|
|
|
db_data = self.md.zulip_db_data
|
|
|
|
if self.md.zulip_message and db_data is not None:
|
2017-03-15 02:06:22 +01:00
|
|
|
# We check for alert words here, the set of which are
|
2016-09-14 18:02:24 +02:00
|
|
|
# dependent on which users may see this message.
|
|
|
|
#
|
|
|
|
# Our caller passes in the list of possible_words. We
|
|
|
|
# don't do any special rendering; we just append the alert words
|
2020-06-03 04:16:38 +02:00
|
|
|
# we find to the set self.md.zulip_message.alert_words.
|
2016-09-14 18:02:24 +02:00
|
|
|
|
2019-02-11 15:19:38 +01:00
|
|
|
realm_alert_words_automaton = db_data['realm_alert_words_automaton']
|
2013-09-03 22:41:17 +02:00
|
|
|
|
2019-02-11 15:19:38 +01:00
|
|
|
if realm_alert_words_automaton is not None:
|
|
|
|
content = '\n'.join(lines).lower()
|
|
|
|
for end_index, (original_value, user_ids) in realm_alert_words_automaton.iter(content):
|
|
|
|
if self.check_valid_start_position(content, end_index - len(original_value)) and \
|
|
|
|
self.check_valid_end_position(content, end_index + 1):
|
2020-06-03 04:16:38 +02:00
|
|
|
self.md.zulip_message.user_ids_with_alert_words.update(user_ids)
|
2013-09-03 22:41:17 +02:00
|
|
|
return lines
|
|
|
|
|
2019-08-11 13:04:53 +02:00
|
|
|
class LinkInlineProcessor(markdown.inlinepatterns.LinkInlineProcessor):
|
|
|
|
def zulip_specific_link_changes(self, el: Element) -> Union[None, Element]:
|
|
|
|
href = el.get('href')
|
2019-01-22 19:21:56 +01:00
|
|
|
|
2019-08-11 13:04:53 +02:00
|
|
|
# Sanitize url or don't parse link. See linkify_tests in markdown_test_cases for banned syntax.
|
2019-01-22 19:21:56 +01:00
|
|
|
href = sanitize_url(self.unescape(href.strip()))
|
|
|
|
if href is None:
|
2019-08-11 13:04:53 +02:00
|
|
|
return None # no-op; the link is not processed.
|
2019-01-22 19:21:56 +01:00
|
|
|
|
2019-08-11 13:04:53 +02:00
|
|
|
# Rewrite local links to be relative
|
2020-06-03 04:16:38 +02:00
|
|
|
db_data = self.md.zulip_db_data
|
2019-01-22 19:21:56 +01:00
|
|
|
href = rewrite_local_links_to_relative(db_data, href)
|
|
|
|
|
2019-08-11 13:04:53 +02:00
|
|
|
# Make changes to <a> tag attributes
|
|
|
|
el.set("href", href)
|
|
|
|
|
2019-08-11 13:34:24 +02:00
|
|
|
# Show link href if title is empty
|
|
|
|
if not el.text.strip():
|
|
|
|
el.text = href
|
|
|
|
|
2019-08-11 13:04:53 +02:00
|
|
|
# Prevent realm_filters from running on the content of a Markdown link, breaking up the link.
|
|
|
|
# This is a monkey-patch, but it might be worth sending a version of this change upstream.
|
2020-04-18 03:55:04 +02:00
|
|
|
el.text = markdown.util.AtomicString(el.text)
|
2019-08-11 13:04:53 +02:00
|
|
|
|
2019-01-22 19:21:56 +01:00
|
|
|
return el
|
|
|
|
|
2019-08-11 13:04:53 +02:00
|
|
|
def handleMatch(self, m: Match[str], data: str) -> Tuple[Union[None, Element], int, int]:
|
|
|
|
el, match_start, index = super().handleMatch(m, data)
|
|
|
|
if el is not None:
|
|
|
|
el = self.zulip_specific_link_changes(el)
|
|
|
|
return el, match_start, index
|
2013-07-31 22:53:15 +02:00
|
|
|
|
2018-12-20 08:28:40 +01:00
|
|
|
def get_sub_registry(r: markdown.util.Registry, keys: List[str]) -> markdown.util.Registry:
|
|
|
|
# Registry is a new class added by py-markdown to replace Ordered List.
|
|
|
|
# Since Registry doesn't support .keys(), it is easier to make a new
|
|
|
|
# object instead of removing keys from the existing object.
|
|
|
|
new_r = markdown.util.Registry()
|
|
|
|
for k in keys:
|
|
|
|
new_r.register(r[k], k, r.get_index_for_name(k))
|
|
|
|
return new_r
|
|
|
|
|
2017-01-17 06:48:46 +01:00
|
|
|
# These are used as keys ("realm_filters_keys") to md_engines and the respective
|
2016-12-31 03:08:43 +01:00
|
|
|
# realm filter caches
|
2020-06-26 22:05:13 +02:00
|
|
|
DEFAULT_MARKDOWN_KEY = -1
|
|
|
|
ZEPHYR_MIRROR_MARKDOWN_KEY = -2
|
2016-12-31 03:08:43 +01:00
|
|
|
|
2020-06-25 22:01:54 +02:00
|
|
|
class Markdown(markdown.Markdown):
|
2017-11-05 11:15:10 +01:00
|
|
|
def __init__(self, *args: Any, **kwargs: Union[bool, int, List[Any]]) -> None:
|
2016-10-14 05:23:15 +02:00
|
|
|
# define default configs
|
|
|
|
self.config = {
|
2017-11-11 15:43:42 +01:00
|
|
|
"realm_filters": [kwargs['realm_filters'],
|
2020-06-13 08:59:37 +02:00
|
|
|
"Realm-specific filters for realm_filters_key {}".format(kwargs['realm'])],
|
2017-11-11 15:43:42 +01:00
|
|
|
"realm": [kwargs['realm'], "Realm id"],
|
2017-11-10 03:49:42 +01:00
|
|
|
"code_block_processor_disabled": [kwargs['code_block_processor_disabled'],
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
"Disabled for email gateway"],
|
2016-10-14 05:23:15 +02:00
|
|
|
}
|
|
|
|
|
2017-10-27 08:28:23 +02:00
|
|
|
super().__init__(*args, **kwargs)
|
2019-01-20 09:10:58 +01:00
|
|
|
self.set_output_format('html')
|
2016-10-14 05:23:15 +02:00
|
|
|
|
2019-01-20 09:10:58 +01:00
|
|
|
def build_parser(self) -> markdown.Markdown:
|
|
|
|
# Build the parser using selected default features from py-markdown.
|
|
|
|
# The complete list of all available processors can be found in the
|
|
|
|
# super().build_parser() function.
|
|
|
|
#
|
|
|
|
# Note: for any py-markdown updates, manually check if we want any
|
|
|
|
# of the new features added upstream or not; they wouldn't get
|
|
|
|
# included by default.
|
|
|
|
self.preprocessors = self.build_preprocessors()
|
|
|
|
self.parser = self.build_block_parser()
|
|
|
|
self.inlinePatterns = self.build_inlinepatterns()
|
|
|
|
self.treeprocessors = self.build_treeprocessors()
|
|
|
|
self.postprocessors = self.build_postprocessors()
|
|
|
|
self.handle_zephyr_mirror()
|
|
|
|
return self
|
|
|
|
|
|
|
|
def build_preprocessors(self) -> markdown.util.Registry:
|
2019-01-28 21:24:06 +01:00
|
|
|
# We disable the following preprocessors from upstream:
|
|
|
|
#
|
|
|
|
# html_block - insecure
|
|
|
|
# reference - references don't make sense in a chat context.
|
2019-01-20 09:10:58 +01:00
|
|
|
preprocessors = markdown.util.Registry()
|
2020-06-25 21:38:36 +02:00
|
|
|
preprocessors.register(MarkdownListPreprocessor(self), 'hanging_lists', 35)
|
2019-01-20 09:10:58 +01:00
|
|
|
preprocessors.register(markdown.preprocessors.NormalizeWhitespace(self), 'normalize_whitespace', 30)
|
|
|
|
preprocessors.register(fenced_code.FencedBlockPreprocessor(self), 'fenced_code_block', 25)
|
2020-04-19 21:13:03 +02:00
|
|
|
preprocessors.register(AlertWordNotificationProcessor(self), 'custom_text_notifications', 20)
|
2019-01-20 09:10:58 +01:00
|
|
|
return preprocessors
|
|
|
|
|
|
|
|
def build_block_parser(self) -> markdown.util.Registry:
|
2019-01-28 21:24:06 +01:00
|
|
|
# We disable the following blockparsers from upstream:
|
|
|
|
#
|
|
|
|
# indent - replaced by ours
|
2020-03-02 23:36:03 +01:00
|
|
|
# setextheader - disabled; we only support hashheaders for headings
|
2019-01-28 21:24:06 +01:00
|
|
|
# olist - replaced by ours
|
|
|
|
# ulist - replaced by ours
|
|
|
|
# quote - replaced by ours
|
2020-05-26 03:13:03 +02:00
|
|
|
parser = markdown.blockprocessors.BlockParser(self)
|
2020-03-02 22:44:03 +01:00
|
|
|
parser.blockprocessors.register(markdown.blockprocessors.EmptyBlockProcessor(parser), 'empty', 95)
|
|
|
|
parser.blockprocessors.register(ListIndentProcessor(parser), 'indent', 90)
|
2019-01-20 09:10:58 +01:00
|
|
|
if not self.getConfig('code_block_processor_disabled'):
|
2020-03-02 22:44:03 +01:00
|
|
|
parser.blockprocessors.register(markdown.blockprocessors.CodeBlockProcessor(parser), 'code', 85)
|
|
|
|
parser.blockprocessors.register(HashHeaderProcessor(parser), 'hashheader', 80)
|
2019-01-20 09:10:58 +01:00
|
|
|
# We get priority 75 from 'table' extension
|
|
|
|
parser.blockprocessors.register(markdown.blockprocessors.HRProcessor(parser), 'hr', 70)
|
2020-03-02 22:44:03 +01:00
|
|
|
parser.blockprocessors.register(OListProcessor(parser), 'olist', 65)
|
|
|
|
parser.blockprocessors.register(UListProcessor(parser), 'ulist', 60)
|
2019-01-20 09:10:58 +01:00
|
|
|
parser.blockprocessors.register(BlockQuoteProcessor(parser), 'quote', 55)
|
|
|
|
parser.blockprocessors.register(markdown.blockprocessors.ParagraphProcessor(parser), 'paragraph', 50)
|
|
|
|
return parser
|
|
|
|
|
|
|
|
def build_inlinepatterns(self) -> markdown.util.Registry:
|
2019-01-28 21:24:06 +01:00
|
|
|
# We disable the following upstream inline patterns:
|
|
|
|
#
|
|
|
|
# backtick - replaced by ours
|
|
|
|
# escape - probably will re-add at some point.
|
|
|
|
# link - replaced by ours
|
|
|
|
# image_link - replaced by ours
|
|
|
|
# autolink - replaced by ours
|
|
|
|
# automail - replaced by ours
|
|
|
|
# linebreak - we use nl2br and consider that good enough
|
|
|
|
# html - insecure
|
|
|
|
# reference - references not useful
|
|
|
|
# image_reference - references not useful
|
|
|
|
# short_reference - references not useful
|
|
|
|
# ---------------------------------------------------
|
|
|
|
# strong_em - for these three patterns,
|
|
|
|
# strong2 - we have our own versions where
|
|
|
|
# emphasis2 - we disable _ for bold and emphasis
|
|
|
|
|
2019-01-20 09:10:58 +01:00
|
|
|
# Declare regexes for clean single line calls to .register().
|
|
|
|
NOT_STRONG_RE = markdown.inlinepatterns.NOT_STRONG_RE
|
2016-11-08 07:26:38 +01:00
|
|
|
# Custom strikethrough syntax: ~~foo~~
|
2019-01-20 09:10:58 +01:00
|
|
|
DEL_RE = r'(?<!~)(\~\~)([^~\n]+?)(\~\~)(?!~)'
|
|
|
|
# Custom bold syntax: **foo** but not __foo__
|
2018-05-11 01:42:51 +02:00
|
|
|
# str inside ** must start and end with a word character
|
2016-11-03 07:56:28 +01:00
|
|
|
# it need for things like "const char *x = (char *)y"
|
2019-01-20 09:10:58 +01:00
|
|
|
EMPHASIS_RE = r'(\*)(?!\s+)([^\*^\n]+)(?<!\s)\*'
|
|
|
|
ENTITY_RE = markdown.inlinepatterns.ENTITY_RE
|
|
|
|
STRONG_EM_RE = r'(\*\*\*)(?!\s+)([^\*^\n]+)(?<!\s)\*\*\*'
|
|
|
|
|
2019-01-28 21:24:06 +01:00
|
|
|
# Add Inline Patterns. We use a custom numbering of the
|
|
|
|
# rules, that preserves the order from upstream but leaves
|
|
|
|
# space for us to add our own.
|
2019-01-20 09:10:58 +01:00
|
|
|
reg = markdown.util.Registry()
|
2020-02-14 00:09:22 +01:00
|
|
|
reg.register(BacktickInlineProcessor(markdown.inlinepatterns.BACKTICK_RE), 'backtick', 105)
|
2019-01-20 09:10:58 +01:00
|
|
|
reg.register(markdown.inlinepatterns.DoubleTagPattern(STRONG_EM_RE, 'strong,em'), 'strong_em', 100)
|
|
|
|
reg.register(UserMentionPattern(mention.find_mentions, self), 'usermention', 95)
|
|
|
|
reg.register(Tex(r'\B(?<!\$)\$\$(?P<body>[^\n_$](\\\$|[^$\n])*)\$\$(?!\$)\B'), 'tex', 90)
|
2019-06-21 17:31:16 +02:00
|
|
|
reg.register(StreamTopicPattern(get_compiled_stream_topic_link_regex(), self), 'topic', 87)
|
2019-01-22 20:16:39 +01:00
|
|
|
reg.register(StreamPattern(get_compiled_stream_link_regex(), self), 'stream', 85)
|
2019-01-20 09:10:58 +01:00
|
|
|
reg.register(Avatar(AVATAR_REGEX, self), 'avatar', 80)
|
2018-07-18 14:36:04 +02:00
|
|
|
reg.register(Timestamp(r'!time\((?P<time>[^)]*)\)'), 'timestamp', 75)
|
2013-11-12 23:48:05 +01:00
|
|
|
# Note that !gravatar syntax should be deprecated long term.
|
2019-01-20 09:10:58 +01:00
|
|
|
reg.register(Avatar(GRAVATAR_REGEX, self), 'gravatar', 70)
|
|
|
|
reg.register(UserGroupMentionPattern(mention.user_group_mentions, self), 'usergroupmention', 65)
|
2019-08-11 13:04:53 +02:00
|
|
|
reg.register(LinkInlineProcessor(markdown.inlinepatterns.LINK_RE, self), 'link', 60)
|
2019-01-20 09:10:58 +01:00
|
|
|
reg.register(AutoLink(get_web_link_regex(), self), 'autolink', 55)
|
|
|
|
# Reserve priority 45-54 for Realm Filters
|
|
|
|
reg = self.register_realm_filters(reg)
|
|
|
|
reg.register(markdown.inlinepatterns.HtmlInlineProcessor(ENTITY_RE, self), 'entity', 40)
|
|
|
|
reg.register(markdown.inlinepatterns.SimpleTagPattern(r'(\*\*)([^\n]+?)\2', 'strong'), 'strong', 35)
|
|
|
|
reg.register(markdown.inlinepatterns.SimpleTagPattern(EMPHASIS_RE, 'em'), 'emphasis', 30)
|
|
|
|
reg.register(markdown.inlinepatterns.SimpleTagPattern(DEL_RE, 'del'), 'del', 25)
|
|
|
|
reg.register(markdown.inlinepatterns.SimpleTextInlineProcessor(NOT_STRONG_RE), 'not_strong', 20)
|
|
|
|
reg.register(Emoji(EMOJI_REGEX, self), 'emoji', 15)
|
|
|
|
reg.register(EmoticonTranslation(emoticon_regex, self), 'translate_emoticons', 10)
|
|
|
|
# We get priority 5 from 'nl2br' extension
|
|
|
|
reg.register(UnicodeEmoji(unicode_emoji_regex), 'unicodeemoji', 0)
|
|
|
|
return reg
|
|
|
|
|
|
|
|
def register_realm_filters(self, inlinePatterns: markdown.util.Registry) -> markdown.util.Registry:
|
2016-02-13 19:17:15 +01:00
|
|
|
for (pattern, format_string, id) in self.getConfig("realm_filters"):
|
2019-01-20 09:10:58 +01:00
|
|
|
inlinePatterns.register(RealmFilterPattern(pattern, format_string, self),
|
2020-06-13 08:59:37 +02:00
|
|
|
f'realm_filters/{pattern}', 45)
|
2019-01-20 09:10:58 +01:00
|
|
|
return inlinePatterns
|
|
|
|
|
|
|
|
def build_treeprocessors(self) -> markdown.util.Registry:
|
2019-01-28 21:24:06 +01:00
|
|
|
# Here we build all the processors from upstream, plus a few of our own.
|
2019-01-20 09:10:58 +01:00
|
|
|
treeprocessors = markdown.util.Registry()
|
|
|
|
# We get priority 30 from 'hilite' extension
|
|
|
|
treeprocessors.register(markdown.treeprocessors.InlineProcessor(self), 'inline', 25)
|
|
|
|
treeprocessors.register(markdown.treeprocessors.PrettifyTreeprocessor(self), 'prettify', 20)
|
|
|
|
treeprocessors.register(InlineInterestingLinkProcessor(self), 'inline_interesting_links', 15)
|
2013-11-15 19:53:04 +01:00
|
|
|
if settings.CAMO_URI:
|
2019-01-20 09:10:58 +01:00
|
|
|
treeprocessors.register(InlineHttpsProcessor(self), 'rewrite_to_https', 10)
|
|
|
|
return treeprocessors
|
|
|
|
|
|
|
|
def build_postprocessors(self) -> markdown.util.Registry:
|
2019-01-28 21:24:06 +01:00
|
|
|
# These are the default python-markdown processors, unmodified.
|
2019-01-20 09:10:58 +01:00
|
|
|
postprocessors = markdown.util.Registry()
|
|
|
|
postprocessors.register(markdown.postprocessors.RawHtmlPostprocessor(self), 'raw_html', 20)
|
|
|
|
postprocessors.register(markdown.postprocessors.AndSubstitutePostprocessor(), 'amp_substitute', 15)
|
|
|
|
postprocessors.register(markdown.postprocessors.UnescapePostprocessor(), 'unescape', 10)
|
|
|
|
return postprocessors
|
|
|
|
|
|
|
|
def getConfig(self, key: str, default: str='') -> Any:
|
|
|
|
""" Return a setting for the given key or an empty string. """
|
|
|
|
if key in self.config:
|
|
|
|
return self.config[key][0]
|
|
|
|
else:
|
|
|
|
return default
|
2013-03-01 19:20:53 +01:00
|
|
|
|
2019-01-20 09:10:58 +01:00
|
|
|
def handle_zephyr_mirror(self) -> None:
|
2020-06-26 22:05:13 +02:00
|
|
|
if self.getConfig("realm") == ZEPHYR_MIRROR_MARKDOWN_KEY:
|
2016-07-27 02:04:11 +02:00
|
|
|
# Disable almost all inline patterns for zephyr mirror
|
|
|
|
# users' traffic that is mirrored. Note that
|
|
|
|
# inline_interesting_links is a treeprocessor and thus is
|
|
|
|
# not removed
|
2019-01-20 09:10:58 +01:00
|
|
|
self.inlinePatterns = get_sub_registry(self.inlinePatterns, ['autolink'])
|
|
|
|
self.treeprocessors = get_sub_registry(self.treeprocessors, ['inline_interesting_links',
|
|
|
|
'rewrite_to_https'])
|
|
|
|
# insert new 'inline' processor because we have changed self.inlinePatterns
|
2018-12-20 08:28:40 +01:00
|
|
|
# but InlineProcessor copies md as self.md in __init__.
|
2019-01-20 09:10:58 +01:00
|
|
|
self.treeprocessors.register(markdown.treeprocessors.InlineProcessor(self), 'inline', 25)
|
|
|
|
self.preprocessors = get_sub_registry(self.preprocessors, ['custom_text_notifications'])
|
|
|
|
self.parser.blockprocessors = get_sub_registry(self.parser.blockprocessors, ['paragraph'])
|
2013-06-05 17:45:57 +02:00
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
md_engines: Dict[Tuple[int, bool], markdown.Markdown] = {}
|
|
|
|
realm_filter_data: Dict[int, List[Tuple[str, str, int]]] = {}
|
2013-06-05 17:45:57 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def make_md_engine(realm_filters_key: int, email_gateway: bool) -> None:
|
2017-11-11 16:00:46 +01:00
|
|
|
md_engine_key = (realm_filters_key, email_gateway)
|
|
|
|
if md_engine_key in md_engines:
|
|
|
|
del md_engines[md_engine_key]
|
|
|
|
|
|
|
|
realm_filters = realm_filter_data[realm_filters_key]
|
2018-11-03 15:09:59 +01:00
|
|
|
md_engines[md_engine_key] = build_engine(
|
|
|
|
realm_filters=realm_filters,
|
|
|
|
realm_filters_key=realm_filters_key,
|
|
|
|
email_gateway=email_gateway,
|
|
|
|
)
|
|
|
|
|
|
|
|
def build_engine(realm_filters: List[Tuple[str, str, int]],
|
|
|
|
realm_filters_key: int,
|
|
|
|
email_gateway: bool) -> markdown.Markdown:
|
2020-06-25 22:01:54 +02:00
|
|
|
engine = Markdown(
|
2019-01-20 09:10:58 +01:00
|
|
|
realm_filters=realm_filters,
|
|
|
|
realm=realm_filters_key,
|
|
|
|
code_block_processor_disabled=email_gateway,
|
|
|
|
extensions = [
|
2018-11-03 19:04:52 +01:00
|
|
|
nl2br.makeExtension(),
|
|
|
|
tables.makeExtension(),
|
2017-01-24 07:06:13 +01:00
|
|
|
codehilite.makeExtension(
|
|
|
|
linenums=False,
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
guess_lang=False,
|
2017-01-24 07:06:13 +01:00
|
|
|
),
|
2019-01-20 09:10:58 +01:00
|
|
|
])
|
2018-11-03 15:09:59 +01:00
|
|
|
return engine
|
2013-06-05 17:45:57 +02:00
|
|
|
|
2019-05-25 16:10:30 +02:00
|
|
|
# Split the topic name into multiple sections so that we can easily use
|
|
|
|
# our common single link matching regex on it.
|
|
|
|
basic_link_splitter = re.compile(r'[ !;\?\),\'\"]')
|
|
|
|
|
2019-06-21 08:54:25 +02:00
|
|
|
# Security note: We don't do any HTML escaping in this
|
|
|
|
# function on the URLs; they are expected to be HTML-escaped when
|
|
|
|
# rendered by clients (just as links rendered into message bodies
|
|
|
|
# are validated and escaped inside `url_to_a`).
|
2018-11-08 17:21:14 +01:00
|
|
|
def topic_links(realm_filters_key: int, topic_name: str) -> List[str]:
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
matches: List[str] = []
|
2013-07-12 22:29:25 +02:00
|
|
|
|
2017-01-17 06:48:46 +01:00
|
|
|
realm_filters = realm_filters_for_realm(realm_filters_key)
|
2016-06-01 04:46:42 +02:00
|
|
|
|
|
|
|
for realm_filter in realm_filters:
|
|
|
|
pattern = prepare_realm_pattern(realm_filter[0])
|
2018-11-08 17:21:14 +01:00
|
|
|
for m in re.finditer(pattern, topic_name):
|
2016-06-01 04:46:42 +02:00
|
|
|
matches += [realm_filter[1] % m.groupdict()]
|
2019-05-25 16:10:30 +02:00
|
|
|
|
|
|
|
# Also make raw urls navigable.
|
|
|
|
for sub_string in basic_link_splitter.split(topic_name):
|
|
|
|
link_match = re.match(get_web_link_regex(), sub_string)
|
|
|
|
if link_match:
|
2019-07-19 07:46:07 +02:00
|
|
|
url = link_match.group('url')
|
|
|
|
url_object = parse(url)
|
|
|
|
if not url_object.scheme:
|
|
|
|
url = url_object.replace(scheme='https').to_text()
|
|
|
|
matches.append(url)
|
2019-05-25 16:10:30 +02:00
|
|
|
|
2016-06-01 04:46:42 +02:00
|
|
|
return matches
|
2013-12-11 20:06:37 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def maybe_update_markdown_engines(realm_filters_key: Optional[int], email_gateway: bool) -> None:
|
2017-01-17 06:48:46 +01:00
|
|
|
# If realm_filters_key is None, load all filters
|
2017-11-11 16:00:46 +01:00
|
|
|
global realm_filter_data
|
2017-01-17 06:48:46 +01:00
|
|
|
if realm_filters_key is None:
|
2013-12-11 20:06:37 +01:00
|
|
|
all_filters = all_realm_filters()
|
2020-06-26 22:05:13 +02:00
|
|
|
all_filters[DEFAULT_MARKDOWN_KEY] = []
|
2017-09-27 10:06:17 +02:00
|
|
|
for realm_filters_key, filters in all_filters.items():
|
2017-11-11 16:00:46 +01:00
|
|
|
realm_filter_data[realm_filters_key] = filters
|
|
|
|
make_md_engine(realm_filters_key, email_gateway)
|
2014-01-29 20:01:54 +01:00
|
|
|
# Hack to ensure that getConfig("realm") is right for mirrored Zephyrs
|
2020-06-26 22:05:13 +02:00
|
|
|
realm_filter_data[ZEPHYR_MIRROR_MARKDOWN_KEY] = []
|
|
|
|
make_md_engine(ZEPHYR_MIRROR_MARKDOWN_KEY, False)
|
2013-12-11 20:06:37 +01:00
|
|
|
else:
|
2017-01-17 06:48:46 +01:00
|
|
|
realm_filters = realm_filters_for_realm(realm_filters_key)
|
2017-11-03 12:13:17 +01:00
|
|
|
if realm_filters_key not in realm_filter_data or \
|
2017-11-14 00:17:50 +01:00
|
|
|
realm_filter_data[realm_filters_key] != realm_filters:
|
|
|
|
# Realm filters data has changed, update `realm_filter_data` and any
|
|
|
|
# of the existing markdown engines using this set of realm filters.
|
2017-11-11 16:00:46 +01:00
|
|
|
realm_filter_data[realm_filters_key] = realm_filters
|
2017-11-14 00:17:50 +01:00
|
|
|
for email_gateway_flag in [True, False]:
|
|
|
|
if (realm_filters_key, email_gateway_flag) in md_engines:
|
|
|
|
# Update only existing engines(if any), don't create new one.
|
|
|
|
make_md_engine(realm_filters_key, email_gateway_flag)
|
|
|
|
|
|
|
|
if (realm_filters_key, email_gateway) not in md_engines:
|
|
|
|
# Markdown engine corresponding to this key doesn't exists so create one.
|
2017-11-11 16:00:46 +01:00
|
|
|
make_md_engine(realm_filters_key, email_gateway)
|
2013-12-11 20:06:37 +01:00
|
|
|
|
2012-10-25 21:38:47 +02:00
|
|
|
# We want to log Markdown parser failures, but shouldn't log the actual input
|
|
|
|
# message for privacy reasons. The compromise is to replace all alphanumeric
|
|
|
|
# characters with 'x'.
|
|
|
|
#
|
|
|
|
# We also use repr() to improve reproducibility, and to escape terminal control
|
|
|
|
# codes, which can do surprisingly nasty things.
|
2017-11-03 03:12:25 +01:00
|
|
|
_privacy_re = re.compile('\\w', flags=re.UNICODE)
|
2018-05-11 01:42:51 +02:00
|
|
|
def privacy_clean_markdown(content: str) -> str:
|
2016-10-11 16:33:51 +02:00
|
|
|
return repr(_privacy_re.sub('x', content))
|
2012-10-25 21:38:47 +02:00
|
|
|
|
2020-06-28 00:48:57 +02:00
|
|
|
def log_markdown_error(msg: str) -> None:
|
2020-06-26 23:30:39 +02:00
|
|
|
"""We use this unusual logging approach to log the markdown error, in
|
2020-03-28 01:25:56 +01:00
|
|
|
order to prevent AdminNotifyHandler from sending the sanitized
|
2016-09-15 21:51:27 +02:00
|
|
|
original markdown formatting into another Zulip message, which
|
|
|
|
could cause an infinite exception loop."""
|
2020-06-26 20:54:05 +02:00
|
|
|
markdown_logger.error(msg)
|
2016-09-15 21:51:27 +02:00
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def get_email_info(realm_id: int, emails: Set[str]) -> Dict[str, FullNameInfo]:
|
2017-09-14 22:11:34 +02:00
|
|
|
if not emails:
|
|
|
|
return dict()
|
|
|
|
|
|
|
|
q_list = {
|
|
|
|
Q(email__iexact=email.strip().lower())
|
|
|
|
for email in emails
|
|
|
|
}
|
|
|
|
|
|
|
|
rows = UserProfile.objects.filter(
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
realm_id=realm_id,
|
2017-09-14 22:11:34 +02:00
|
|
|
).filter(
|
|
|
|
functools.reduce(lambda a, b: a | b, q_list),
|
|
|
|
).values(
|
|
|
|
'id',
|
|
|
|
'email',
|
|
|
|
)
|
|
|
|
|
|
|
|
dct = {
|
|
|
|
row['email'].strip().lower(): row
|
|
|
|
for row in rows
|
|
|
|
}
|
|
|
|
return dct
|
|
|
|
|
2018-11-02 09:15:46 +01:00
|
|
|
def get_possible_mentions_info(realm_id: int, mention_texts: Set[str]) -> List[FullNameInfo]:
|
2018-11-28 23:07:23 +01:00
|
|
|
if not mention_texts:
|
2018-11-02 09:15:46 +01:00
|
|
|
return list()
|
2017-09-14 19:47:22 +02:00
|
|
|
|
2018-11-02 09:15:46 +01:00
|
|
|
# Remove the trailing part of the `name|id` mention syntax,
|
|
|
|
# thus storing only full names in full_names.
|
|
|
|
full_names = set()
|
2018-08-18 23:21:47 +02:00
|
|
|
name_re = r'(?P<full_name>.+)\|\d+$'
|
2018-11-02 09:15:46 +01:00
|
|
|
for mention_text in mention_texts:
|
2018-11-28 23:07:23 +01:00
|
|
|
name_syntax_match = re.match(name_re, mention_text)
|
2018-08-18 23:21:47 +02:00
|
|
|
if name_syntax_match:
|
2018-11-02 09:15:46 +01:00
|
|
|
full_names.add(name_syntax_match.group("full_name"))
|
|
|
|
else:
|
|
|
|
full_names.add(mention_text)
|
2018-08-18 23:21:47 +02:00
|
|
|
|
2017-09-14 19:47:22 +02:00
|
|
|
q_list = {
|
2018-11-02 09:15:46 +01:00
|
|
|
Q(full_name__iexact=full_name)
|
|
|
|
for full_name in full_names
|
2017-09-14 19:47:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
rows = UserProfile.objects.filter(
|
2017-10-13 00:06:24 +02:00
|
|
|
realm_id=realm_id,
|
|
|
|
is_active=True,
|
2017-09-14 19:47:22 +02:00
|
|
|
).filter(
|
|
|
|
functools.reduce(lambda a, b: a | b, q_list),
|
|
|
|
).values(
|
|
|
|
'id',
|
|
|
|
'full_name',
|
|
|
|
'email',
|
|
|
|
)
|
2018-11-02 09:15:46 +01:00
|
|
|
return list(rows)
|
2017-09-14 19:47:22 +02:00
|
|
|
|
2017-11-05 11:37:41 +01:00
|
|
|
class MentionData:
|
2018-05-11 01:42:51 +02:00
|
|
|
def __init__(self, realm_id: int, content: str) -> None:
|
2019-11-22 10:38:34 +01:00
|
|
|
mention_texts, has_wildcards = possible_mentions(content)
|
2018-11-02 09:15:46 +01:00
|
|
|
possible_mentions_info = get_possible_mentions_info(realm_id, mention_texts)
|
|
|
|
self.full_name_info = {
|
|
|
|
row['full_name'].lower(): row
|
|
|
|
for row in possible_mentions_info
|
|
|
|
}
|
2018-08-18 23:12:18 +02:00
|
|
|
self.user_id_info = {
|
|
|
|
row['id']: row
|
2018-11-02 09:15:46 +01:00
|
|
|
for row in possible_mentions_info
|
2017-10-24 17:36:27 +02:00
|
|
|
}
|
2018-11-02 19:17:07 +01:00
|
|
|
self.init_user_group_data(realm_id=realm_id, content=content)
|
2019-11-22 10:38:34 +01:00
|
|
|
self.has_wildcards = has_wildcards
|
|
|
|
|
|
|
|
def message_has_wildcards(self) -> bool:
|
|
|
|
return self.has_wildcards
|
2017-10-24 02:47:09 +02:00
|
|
|
|
2018-11-02 19:17:07 +01:00
|
|
|
def init_user_group_data(self,
|
|
|
|
realm_id: int,
|
|
|
|
content: str) -> None:
|
2017-09-25 09:47:15 +02:00
|
|
|
user_group_names = possible_user_group_mentions(content)
|
|
|
|
self.user_group_name_info = get_user_group_name_info(realm_id, user_group_names)
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
self.user_group_members: Dict[int, List[int]] = defaultdict(list)
|
2017-10-27 14:47:54 +02:00
|
|
|
group_ids = [group.id for group in self.user_group_name_info.values()]
|
2018-11-02 19:17:07 +01:00
|
|
|
|
|
|
|
if not group_ids:
|
|
|
|
# Early-return to avoid the cost of hitting the ORM,
|
|
|
|
# which shows up in profiles.
|
|
|
|
return
|
|
|
|
|
2017-10-27 14:47:54 +02:00
|
|
|
membership = UserGroupMembership.objects.filter(user_group_id__in=group_ids)
|
|
|
|
for info in membership.values('user_group_id', 'user_profile_id'):
|
|
|
|
group_id = info['user_group_id']
|
|
|
|
user_profile_id = info['user_profile_id']
|
|
|
|
self.user_group_members[group_id].append(user_profile_id)
|
2017-09-25 09:47:15 +02:00
|
|
|
|
2018-11-02 09:24:27 +01:00
|
|
|
def get_user_by_name(self, name: str) -> Optional[FullNameInfo]:
|
|
|
|
# warning: get_user_by_name is not dependable if two
|
|
|
|
# users of the same full name are mentioned. Use
|
|
|
|
# get_user_by_id where possible.
|
2017-10-24 02:47:09 +02:00
|
|
|
return self.full_name_info.get(name.lower(), None)
|
|
|
|
|
2018-08-18 23:12:18 +02:00
|
|
|
def get_user_by_id(self, id: str) -> Optional[FullNameInfo]:
|
|
|
|
return self.user_id_info.get(int(id), None)
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def get_user_ids(self) -> Set[int]:
|
2017-10-24 17:36:27 +02:00
|
|
|
"""
|
|
|
|
Returns the user IDs that might have been mentioned by this
|
|
|
|
content. Note that because this data structure has not parsed
|
|
|
|
the message and does not know about escaping/code blocks, this
|
|
|
|
will overestimate the list of user ids.
|
|
|
|
"""
|
2018-08-18 23:12:18 +02:00
|
|
|
return set(self.user_id_info.keys())
|
2017-10-24 17:36:27 +02:00
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def get_user_group(self, name: str) -> Optional[UserGroup]:
|
2017-09-25 09:47:15 +02:00
|
|
|
return self.user_group_name_info.get(name.lower(), None)
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def get_group_members(self, user_group_id: int) -> List[int]:
|
2017-10-27 14:47:54 +02:00
|
|
|
return self.user_group_members.get(user_group_id, [])
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def get_user_group_name_info(realm_id: int, user_group_names: Set[str]) -> Dict[str, UserGroup]:
|
2017-09-25 09:47:15 +02:00
|
|
|
if not user_group_names:
|
|
|
|
return dict()
|
|
|
|
|
|
|
|
rows = UserGroup.objects.filter(realm_id=realm_id,
|
|
|
|
name__in=user_group_names)
|
|
|
|
dct = {row.name.lower(): row for row in rows}
|
|
|
|
return dct
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def get_stream_name_info(realm: Realm, stream_names: Set[str]) -> Dict[str, FullNameInfo]:
|
2017-09-15 00:25:38 +02:00
|
|
|
if not stream_names:
|
|
|
|
return dict()
|
|
|
|
|
|
|
|
q_list = {
|
|
|
|
Q(name=name)
|
|
|
|
for name in stream_names
|
|
|
|
}
|
|
|
|
|
|
|
|
rows = get_active_streams(
|
|
|
|
realm=realm,
|
|
|
|
).filter(
|
|
|
|
functools.reduce(lambda a, b: a | b, q_list),
|
|
|
|
).values(
|
|
|
|
'id',
|
|
|
|
'name',
|
|
|
|
)
|
|
|
|
|
|
|
|
dct = {
|
|
|
|
row['name']: row
|
|
|
|
for row in rows
|
|
|
|
}
|
|
|
|
return dct
|
|
|
|
|
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def do_convert(content: str,
|
2019-02-11 15:19:38 +01:00
|
|
|
realm_alert_words_automaton: Optional[ahocorasick.Automaton] = None,
|
2017-11-05 11:15:10 +01:00
|
|
|
message: Optional[Message]=None,
|
|
|
|
message_realm: Optional[Realm]=None,
|
2020-06-13 01:57:21 +02:00
|
|
|
sent_by_bot: bool=False,
|
|
|
|
translate_emoticons: bool=False,
|
2017-11-05 11:15:10 +01:00
|
|
|
mention_data: Optional[MentionData]=None,
|
2020-06-13 01:57:21 +02:00
|
|
|
email_gateway: bool=False,
|
|
|
|
no_previews: bool=False) -> str:
|
2013-08-06 21:32:15 +02:00
|
|
|
"""Convert Markdown to HTML, with Zulip-specific settings and hacks."""
|
2017-01-22 06:29:11 +01:00
|
|
|
# This logic is a bit convoluted, but the overall goal is to support a range of use cases:
|
|
|
|
# * Nothing is passed in other than content -> just run default options (e.g. for docs)
|
|
|
|
# * message is passed, but no realm is -> look up realm from message
|
2020-06-26 23:30:39 +02:00
|
|
|
# * message_realm is passed -> use that realm for markdown purposes
|
2017-05-26 02:08:16 +02:00
|
|
|
if message is not None:
|
2017-01-18 23:19:18 +01:00
|
|
|
if message_realm is None:
|
|
|
|
message_realm = message.get_realm()
|
2017-01-22 06:29:11 +01:00
|
|
|
if message_realm is None:
|
2020-06-26 22:05:13 +02:00
|
|
|
realm_filters_key = DEFAULT_MARKDOWN_KEY
|
2017-01-22 06:29:11 +01:00
|
|
|
else:
|
|
|
|
realm_filters_key = message_realm.id
|
|
|
|
|
2019-01-29 21:06:27 +01:00
|
|
|
if message and hasattr(message, 'id') and message.id:
|
|
|
|
logging_message_id = 'id# ' + str(message.id)
|
|
|
|
else:
|
|
|
|
logging_message_id = 'unknown'
|
|
|
|
|
2018-11-02 02:07:43 +01:00
|
|
|
if message is not None and message_realm is not None:
|
|
|
|
if message_realm.is_zephyr_mirror_realm:
|
|
|
|
if message.sending_client.name == "zephyr_mirror":
|
|
|
|
# Use slightly customized Markdown processor for content
|
|
|
|
# delivered via zephyr_mirror
|
2020-06-26 22:05:13 +02:00
|
|
|
realm_filters_key = ZEPHYR_MIRROR_MARKDOWN_KEY
|
2017-01-22 06:29:11 +01:00
|
|
|
|
2017-11-11 15:30:19 +01:00
|
|
|
maybe_update_markdown_engines(realm_filters_key, email_gateway)
|
2017-11-03 12:13:17 +01:00
|
|
|
md_engine_key = (realm_filters_key, email_gateway)
|
2012-11-20 20:15:55 +01:00
|
|
|
|
2017-11-03 12:13:17 +01:00
|
|
|
if md_engine_key in md_engines:
|
|
|
|
_md_engine = md_engines[md_engine_key]
|
2013-06-05 17:45:57 +02:00
|
|
|
else:
|
2020-06-26 22:05:13 +02:00
|
|
|
if DEFAULT_MARKDOWN_KEY not in md_engines:
|
2017-11-11 15:30:19 +01:00
|
|
|
maybe_update_markdown_engines(realm_filters_key=None, email_gateway=False)
|
2016-10-27 12:57:57 +02:00
|
|
|
|
2020-06-26 22:05:13 +02:00
|
|
|
_md_engine = md_engines[(DEFAULT_MARKDOWN_KEY, email_gateway)]
|
2012-11-20 20:15:55 +01:00
|
|
|
# Reset the parser; otherwise it will get slower over time.
|
|
|
|
_md_engine.reset()
|
2012-10-15 22:03:50 +02:00
|
|
|
|
2018-11-07 15:24:36 +01:00
|
|
|
# Filters such as UserMentionPattern need a message.
|
|
|
|
_md_engine.zulip_message = message
|
2018-11-07 15:48:08 +01:00
|
|
|
_md_engine.zulip_realm = message_realm
|
2018-11-07 16:26:33 +01:00
|
|
|
_md_engine.zulip_db_data = None # for now
|
2019-03-01 01:53:18 +01:00
|
|
|
_md_engine.image_preview_enabled = image_preview_enabled(
|
|
|
|
message, message_realm, no_previews)
|
|
|
|
_md_engine.url_embed_preview_enabled = url_embed_preview_enabled(
|
|
|
|
message, message_realm, no_previews)
|
2013-10-09 20:48:05 +02:00
|
|
|
|
2020-06-26 23:30:39 +02:00
|
|
|
# Pre-fetch data from the DB that is used in the markdown thread
|
2020-06-23 00:37:25 +02:00
|
|
|
if message_realm is not None:
|
2016-09-14 18:02:24 +02:00
|
|
|
|
2017-09-15 10:21:04 +02:00
|
|
|
# Here we fetch the data structures needed to render
|
|
|
|
# mentions/avatars/stream mentions from the database, but only
|
|
|
|
# if there is syntax in the message that might use them, since
|
|
|
|
# the fetches are somewhat expensive and these types of syntax
|
|
|
|
# are uncommon enough that it's a useful optimization.
|
2017-10-24 02:47:09 +02:00
|
|
|
|
|
|
|
if mention_data is None:
|
|
|
|
mention_data = MentionData(message_realm.id, content)
|
2017-09-14 19:47:22 +02:00
|
|
|
|
2017-09-14 22:11:34 +02:00
|
|
|
emails = possible_avatar_emails(content)
|
|
|
|
email_info = get_email_info(message_realm.id, emails)
|
|
|
|
|
2017-09-15 00:25:38 +02:00
|
|
|
stream_names = possible_linked_stream_names(content)
|
|
|
|
stream_name_info = get_stream_name_info(message_realm, stream_names)
|
|
|
|
|
2017-09-15 03:08:15 +02:00
|
|
|
if content_has_emoji_syntax(content):
|
2018-03-11 18:48:56 +01:00
|
|
|
active_realm_emoji = message_realm.get_active_emoji()
|
2017-09-15 03:08:15 +02:00
|
|
|
else:
|
2018-03-11 18:48:56 +01:00
|
|
|
active_realm_emoji = dict()
|
2017-09-15 03:08:15 +02:00
|
|
|
|
2018-11-07 16:26:33 +01:00
|
|
|
_md_engine.zulip_db_data = {
|
2019-02-11 15:19:38 +01:00
|
|
|
'realm_alert_words_automaton': realm_alert_words_automaton,
|
2017-09-15 00:25:38 +02:00
|
|
|
'email_info': email_info,
|
2017-10-24 02:47:09 +02:00
|
|
|
'mention_data': mention_data,
|
2018-03-11 18:48:56 +01:00
|
|
|
'active_realm_emoji': active_realm_emoji,
|
2017-10-31 22:03:39 +01:00
|
|
|
'realm_uri': message_realm.uri,
|
2017-09-15 00:25:38 +02:00
|
|
|
'sent_by_bot': sent_by_bot,
|
|
|
|
'stream_names': stream_name_info,
|
2018-11-02 12:50:09 +01:00
|
|
|
'translate_emoticons': translate_emoticons,
|
2017-09-15 00:25:38 +02:00
|
|
|
}
|
2013-10-09 20:48:05 +02:00
|
|
|
|
2012-10-15 22:03:50 +02:00
|
|
|
try:
|
2018-04-13 17:38:40 +02:00
|
|
|
# Spend at most 5 seconds rendering; this protects the backend
|
|
|
|
# from being overloaded by bugs (e.g. markdown logic that is
|
|
|
|
# extremely inefficient in corner cases) as well as user
|
|
|
|
# errors (e.g. a realm filter that makes some syntax
|
|
|
|
# infinite-loop).
|
2018-02-09 19:49:13 +01:00
|
|
|
rendered_content = timeout(5, _md_engine.convert, content)
|
|
|
|
|
|
|
|
# Throw an exception if the content is huge; this protects the
|
|
|
|
# rest of the codebase from any bugs where we end up rendering
|
|
|
|
# something huge.
|
2018-04-13 08:23:21 +02:00
|
|
|
if len(rendered_content) > MAX_MESSAGE_LENGTH * 10:
|
2020-06-25 16:58:20 +02:00
|
|
|
raise MarkdownRenderingException(
|
2020-06-14 02:57:50 +02:00
|
|
|
f'Rendered content exceeds {MAX_MESSAGE_LENGTH * 10} characters (message {logging_message_id})'
|
|
|
|
)
|
2018-02-09 19:49:13 +01:00
|
|
|
return rendered_content
|
2017-03-05 10:25:27 +01:00
|
|
|
except Exception:
|
2017-10-12 02:40:42 +02:00
|
|
|
cleaned = privacy_clean_markdown(content)
|
2018-07-02 09:55:42 +02:00
|
|
|
# NOTE: Don't change this message without also changing the
|
|
|
|
# logic in logging_handlers.py or we can create recursive
|
|
|
|
# exceptions.
|
2020-06-26 20:54:05 +02:00
|
|
|
markdown_logger.exception(
|
2020-06-12 01:35:37 +02:00
|
|
|
'Exception in Markdown parser; input (sanitized) was: %s\n (message %s)',
|
|
|
|
cleaned,
|
|
|
|
logging_message_id,
|
|
|
|
)
|
2017-10-13 02:45:33 +02:00
|
|
|
|
2020-06-25 16:58:20 +02:00
|
|
|
raise MarkdownRenderingException()
|
2013-06-28 16:02:58 +02:00
|
|
|
finally:
|
2018-11-07 16:26:33 +01:00
|
|
|
# These next three lines are slightly paranoid, since
|
|
|
|
# we always set these right before actually using the
|
|
|
|
# engine, but better safe then sorry.
|
2018-11-07 15:24:36 +01:00
|
|
|
_md_engine.zulip_message = None
|
2018-11-07 15:48:08 +01:00
|
|
|
_md_engine.zulip_realm = None
|
2018-11-07 16:26:33 +01:00
|
|
|
_md_engine.zulip_db_data = None
|
2013-05-21 23:59:27 +02:00
|
|
|
|
2020-06-26 23:06:05 +02:00
|
|
|
markdown_time_start = 0.0
|
|
|
|
markdown_total_time = 0.0
|
|
|
|
markdown_total_requests = 0
|
2013-05-21 23:59:27 +02:00
|
|
|
|
2020-06-26 23:06:05 +02:00
|
|
|
def get_markdown_time() -> float:
|
|
|
|
return markdown_total_time
|
2013-05-21 23:59:27 +02:00
|
|
|
|
2020-06-26 23:06:05 +02:00
|
|
|
def get_markdown_requests() -> int:
|
|
|
|
return markdown_total_requests
|
2013-05-21 23:59:27 +02:00
|
|
|
|
2020-06-26 23:06:05 +02:00
|
|
|
def markdown_stats_start() -> None:
|
|
|
|
global markdown_time_start
|
|
|
|
markdown_time_start = time.time()
|
2013-05-21 23:59:27 +02:00
|
|
|
|
2020-06-26 23:06:05 +02:00
|
|
|
def markdown_stats_finish() -> None:
|
|
|
|
global markdown_total_time
|
|
|
|
global markdown_total_requests
|
|
|
|
global markdown_time_start
|
|
|
|
markdown_total_requests += 1
|
|
|
|
markdown_total_time += (time.time() - markdown_time_start)
|
2013-05-21 23:59:27 +02:00
|
|
|
|
2018-05-11 01:42:51 +02:00
|
|
|
def convert(content: str,
|
2019-02-11 15:19:38 +01:00
|
|
|
realm_alert_words_automaton: Optional[ahocorasick.Automaton] = None,
|
2017-11-05 11:15:10 +01:00
|
|
|
message: Optional[Message]=None,
|
|
|
|
message_realm: Optional[Realm]=None,
|
2020-06-13 01:57:21 +02:00
|
|
|
sent_by_bot: bool=False,
|
|
|
|
translate_emoticons: bool=False,
|
2017-11-05 11:15:10 +01:00
|
|
|
mention_data: Optional[MentionData]=None,
|
2020-06-13 01:57:21 +02:00
|
|
|
email_gateway: bool=False,
|
|
|
|
no_previews: bool=False) -> str:
|
2020-06-26 23:06:05 +02:00
|
|
|
markdown_stats_start()
|
2019-02-11 15:19:38 +01:00
|
|
|
ret = do_convert(content, realm_alert_words_automaton,
|
|
|
|
message, message_realm, sent_by_bot,
|
|
|
|
translate_emoticons, mention_data, email_gateway,
|
|
|
|
no_previews=no_previews)
|
2020-06-26 23:06:05 +02:00
|
|
|
markdown_stats_finish()
|
2013-05-21 23:59:27 +02:00
|
|
|
return ret
|