zulip/web/src/markdown.js

700 lines
26 KiB
JavaScript

import {isValid} from "date-fns";
import katex from "katex";
import _ from "lodash";
import * as fenced_code from "../shared/src/fenced_code";
import marked from "../third/marked/lib/marked";
// This contains zulip's frontend Markdown implementation; see
// docs/subsystems/markdown.md for docs on our Markdown syntax. The other
// main piece in rendering Markdown client-side is
// web/third/marked/lib/marked.js, which we have significantly
// modified from the original implementation.
// Docs: https://zulip.readthedocs.io/en/latest/subsystems/markdown.html
// If we see preview-related syntax in our content, we will need the
// backend to render it.
const preview_regexes = [
// Inline image previews, check for contiguous chars ending in image suffix
// To keep the below regexes simple, split them out for the end-of-message case
/\S*(?:\.bmp|\.gif|\.jpg|\.jpeg|\.png|\.webp)\)?\s+/m,
/\S*(?:\.bmp|\.gif|\.jpg|\.jpeg|\.png|\.webp)\)?$/m,
// Twitter and youtube links are given previews
/\S*(?:twitter|youtube)\.com\/\S*/,
];
function contains_preview_link(content) {
return preview_regexes.some((re) => re.test(content));
}
export function translate_emoticons_to_names({src, get_emoticon_translations}) {
// Translates emoticons in a string to their colon syntax.
let translated = src;
let replacement_text;
const terminal_symbols = ",.;?!()[] \"'\n\t"; // From composebox_typeahead
const symbols_except_space = terminal_symbols.replace(" ", "");
const emoticon_replacer = function (match, _g1, offset, str) {
const prev_char = str[offset - 1];
const next_char = str[offset + match.length];
const symbol_at_start = terminal_symbols.includes(prev_char);
const symbol_at_end = terminal_symbols.includes(next_char);
const non_space_at_start = symbols_except_space.includes(prev_char);
const non_space_at_end = symbols_except_space.includes(next_char);
const valid_start = symbol_at_start || offset === 0;
const valid_end = symbol_at_end || offset === str.length - match.length;
if (non_space_at_start && non_space_at_end) {
// Hello!:)?
return match;
}
if (valid_start && valid_end) {
return replacement_text;
}
return match;
};
for (const translation of get_emoticon_translations()) {
// We can't pass replacement_text directly into
// emoticon_replacer, because emoticon_replacer is
// a callback for `replace()`. Instead we just mutate
// the `replacement_text` that the function closes on.
replacement_text = translation.replacement_text;
translated = translated.replace(translation.regex, emoticon_replacer);
}
return translated;
}
function contains_problematic_linkifier({content, get_linkifier_map}) {
// If a linkifier doesn't start with some specified characters
// then don't render it locally. It is workaround for the fact that
// javascript regex doesn't support lookbehind.
for (const re of get_linkifier_map().keys()) {
const pattern = /[^\s"'(,:<]/.source + re.source + /(?!\w)/.source;
const regex = new RegExp(pattern);
if (regex.test(content)) {
return true;
}
}
return false;
}
function content_contains_backend_only_syntax({content, get_linkifier_map}) {
// Try to guess whether or not a message contains syntax that only the
// backend Markdown processor can correctly handle.
// If it doesn't, we can immediately render it client-side for local echo.
return (
contains_preview_link(content) ||
contains_problematic_linkifier({content, get_linkifier_map})
);
}
function parse_with_options({raw_content, helper_config, options}) {
// Given the raw markdown content of a message (raw_content)
// we return the HTML content (content) and flags.
// Our caller passes a helper_config object that has several
// helper functions for getting info about users, streams, etc.
// And it also passes in options for the marked processor.
let mentioned = false;
let mentioned_group = false;
let mentioned_wildcard = false;
const marked_options = {
...options,
userMentionHandler(mention, silently) {
if (mention === "all" || mention === "everyone" || mention === "stream") {
let classes;
let display_text;
if (silently) {
classes = "user-mention silent";
display_text = mention;
} else {
// Stream Wildcard mention
mentioned_wildcard = true;
display_text = "@" + mention;
classes = "user-mention";
}
return `<span class="${classes}" data-user-id="*">${_.escape(display_text)}</span>`;
}
if (mention === "topic") {
let classes;
let display_text;
if (silently) {
classes = "topic-mention silent";
display_text = mention;
} else {
// Topic Wildcard mention
mentioned_wildcard = true;
display_text = "@" + mention;
classes = "topic-mention";
}
return `<span class="${classes}">${_.escape(display_text)}</span>`;
}
let full_name;
let user_id;
const id_regex = /^(.+)?\|(\d+)$/; // For @**user|id** and @**|id** syntax
const match = id_regex.exec(mention);
if (match) {
/*
If we have two users named Alice, we want
users to provide mentions like this:
alice|42
alice|99
The autocomplete feature will help users
send correct mentions for duplicate names,
but we also have to consider the possibility
that the user will hand-type something
incorrectly, in which case we'll fall
through to the other code (which may be a
misfeature).
*/
full_name = match[1];
user_id = Number.parseInt(match[2], 10);
if (full_name === undefined) {
// For @**|id** syntax
if (!helper_config.is_valid_user_id(user_id)) {
// silently ignore invalid user id.
user_id = undefined;
} else {
full_name = helper_config.get_actual_name_from_user_id(user_id);
}
} else {
// For @**user|id** syntax
if (!helper_config.is_valid_full_name_and_user_id(full_name, user_id)) {
user_id = undefined;
full_name = undefined;
}
}
}
if (user_id === undefined) {
// Handle normal syntax
full_name = mention;
user_id = helper_config.get_user_id_from_name(full_name);
}
if (user_id === undefined) {
// This is nothing to be concerned about--the users
// are allowed to hand-type mentions and they may
// have had a typo in the name.
return undefined;
}
// HAPPY PATH! Note that we not only need to return the
// appropriate HTML snippet here; we also want to update
// flags on the message itself that get used by the message
// view code and possibly our filtering code.
// If I mention "@aLiCe sMITH", I still want "Alice Smith" to
// show in the pill.
let display_text = helper_config.get_actual_name_from_user_id(user_id);
let classes;
if (silently) {
classes = "user-mention silent";
} else {
if (helper_config.my_user_id() === user_id) {
// Personal mention of current user.
mentioned = true;
}
classes = "user-mention";
display_text = "@" + display_text;
}
return `<span class="${classes}" data-user-id="${_.escape(user_id)}">${_.escape(
display_text,
)}</span>`;
},
groupMentionHandler(name, silently) {
const group = helper_config.get_user_group_from_name(name);
if (group !== undefined) {
let display_text;
let classes;
if (silently) {
display_text = group.name;
classes = "user-group-mention silent";
} else {
display_text = "@" + group.name;
classes = "user-group-mention";
if (
helper_config.is_member_of_user_group(helper_config.my_user_id(), group.id)
) {
// Mentioned the current user's group.
mentioned_group = true;
}
}
return `<span class="${classes}" data-user-group-id="${_.escape(
group.id,
)}">${_.escape(display_text)}</span>`;
}
return undefined;
},
silencedMentionHandler(quote) {
// Silence quoted personal and stream wildcard mentions.
quote = quote.replaceAll(
/(<span class="user-mention)(" data-user-id="(\d+|\*)">)@/g,
"$1 silent$2",
);
// Silence quoted topic wildcard mentions.
quote = quote.replaceAll(/(<span class="topic-mention)(">)@/g, "$1 silent$2");
// Silence quoted user group mentions.
quote = quote.replaceAll(
/(<span class="user-group-mention)(" data-user-group-id="\d+">)@/g,
"$1 silent$2",
);
// In most cases, if you are being mentioned in the message you're quoting, you wouldn't
// mention yourself outside of the blockquote (and, above it). If that you do that, the
// following mentioned status is false; the backend rendering is authoritative and the
// only side effect is the lack red flash on immediately sending the message.
//
// A better parser would be able to just ignore mentions
// inside; we just set all flags to False and let the
// server rendering correct the message flags, to avoid a
// flash of mention styling.
mentioned = false;
mentioned_group = false;
mentioned_wildcard = false;
return quote;
},
};
// Our Python-Markdown processor appends two \n\n to input
const content = marked(raw_content + "\n\n", marked_options).trim();
// Simulate message flags for our locally rendered
// message. Messages the user themselves sent via the browser are
// always marked as read.
const flags = ["read"];
if (mentioned || mentioned_group) {
flags.push("mentioned");
}
if (mentioned_wildcard) {
flags.push("wildcard_mentioned");
}
return {content, flags};
}
function is_x_between(x, start, length) {
return start <= x && x < start + length;
}
function is_overlapping(match_a, match_b) {
return (
is_x_between(match_a.index, match_b.index, match_b.text.length) ||
is_x_between(match_b.index, match_a.index, match_a.text.length)
);
}
export function get_topic_links({topic, get_linkifier_map}) {
// We export this for testing purposes, and mobile may want to
// use this as well in the future.
const links = [];
// The lower the precedence is, the more prioritized the pattern is.
let precedence = 0;
for (const [pattern, {url_template, group_number_to_name}] of get_linkifier_map().entries()) {
let match;
while ((match = pattern.exec(topic)) !== null) {
const matched_groups = match.slice(1);
let i = 0;
const template_context = {};
while (i < matched_groups.length) {
const matched_group = matched_groups[i];
const current_group = i + 1;
template_context[group_number_to_name[current_group]] = matched_group;
i += 1;
}
const link_url = url_template.expand(template_context);
// We store the starting index as well, to sort the order of occurrence of the links
// in the topic, similar to the logic implemented in zerver/lib/markdown/__init__.py
links.push({url: link_url, text: match[0], index: match.index, precedence});
}
precedence += 1;
}
// Sort the matches beforehand so we favor the match with a higher priority and tie-break with the starting index.
// Note that we sort it before processing the raw URLs so that linkifiers will be prioritized over them.
links.sort((a, b) => {
if (a.precedence !== null && b.precedence !== null) {
// When both of the links have precedence set, find the one that comes first.
const diff = a.precedence - b.precedence;
if (diff !== 0) {
return diff;
}
}
// Fallback to the index when there is either a tie in precedence or at least one of the links is a raw URL.
return a.index - b.index;
});
// Also make raw URLs navigable
const url_re = /\b(https?:\/\/[^\s<]+[^\s"'),.:;<\]])/g; // Slightly modified from third/marked.js
let match;
while ((match = url_re.exec(topic)) !== null) {
links.push({url: match[0], text: match[0], index: match.index, precedence: null});
}
// The following removes overlapping intervals depending on the precedence of linkifier patterns.
// This uses the same algorithm implemented in zerver/lib/markdown/__init__.py.
// To avoid mutating links while processing links, the final output gets pushed to another list.
const applied_matches = [];
// To avoid mutating matches inside the loop, the final output gets appended to another list.
for (const new_match of links) {
// When the current match does not overlap with all existing matches,
// we are confident that the link should present in the final output because
// 1. Given that the links are sorted by precedence, the current match has the highest priority
// among the matches to be checked.
// 2. None of the matches with higher priority overlaps with the current match.
// This might be optimized to search for overlapping matches in O(logn) time,
// but it is kept as-is since performance is not critical for this codepath and for simplicity.
if (applied_matches.every((applied_match) => !is_overlapping(applied_match, new_match))) {
applied_matches.push(new_match);
}
}
// We need to sort applied_matches again because the links were previously ordered by precedence,
// so that the links are displayed in the order their patterns are matched.
return applied_matches
.sort((a, b) => a.index - b.index)
.map((match) => ({url: match.url, text: match.text}));
}
export function is_status_message(raw_content) {
return raw_content.startsWith("/me ");
}
function make_emoji_span(codepoint, title, alt_text) {
return `<span aria-label="${_.escape(title)}" class="emoji emoji-${_.escape(
codepoint,
)}" role="img" title="${_.escape(title)}">${_.escape(alt_text)}</span>`;
}
function handleUnicodeEmoji({unicode_emoji, get_emoji_name}) {
// We want to avoid turning things like arrows (↔) and keycaps (numbers
// in boxes) into qualified emoji (images).
// More specifically, we skip anything with text in the second column of
// this table https://unicode.org/Public/emoji/1.0/emoji-data.txt
if (/^\P{Emoji_Presentation}\u20E3?$/u.test(unicode_emoji)) {
return unicode_emoji;
}
// This unqualifies qualified emoji, which helps us make sure we
// can match both versions.
const unqualified_unicode_emoji = unicode_emoji.replace(/\uFE0F/, "");
const codepoint = [...unqualified_unicode_emoji]
.map((char) => char.codePointAt(0).toString(16).padStart(4, "0"))
.join("-");
const emoji_name = get_emoji_name(codepoint);
if (emoji_name) {
const alt_text = ":" + emoji_name + ":";
const title = emoji_name.replaceAll("_", " ");
return make_emoji_span(codepoint, title, alt_text);
}
return unicode_emoji;
}
function handleEmoji({emoji_name, get_realm_emoji_url, get_emoji_codepoint}) {
const alt_text = ":" + emoji_name + ":";
const title = emoji_name.replaceAll("_", " ");
// Zulip supports both standard/Unicode emoji, served by a
// spritesheet and custom realm-specific emoji (served by URL).
// We first check if this is a realm emoji, and if so, render it.
//
// Otherwise we'll look at Unicode emoji to render with an emoji
// span using the spritesheet; and if it isn't one of those
// either, we pass through the plain text syntax unmodified.
const emoji_url = get_realm_emoji_url(emoji_name);
if (emoji_url) {
return `<img alt="${_.escape(alt_text)}" class="emoji" src="${_.escape(
emoji_url,
)}" title="${_.escape(title)}">`;
}
const codepoint = get_emoji_codepoint(emoji_name);
if (codepoint) {
return make_emoji_span(codepoint, title, alt_text);
}
return alt_text;
}
function handleLinkifier({pattern, matches, get_linkifier_map}) {
const {url_template, group_number_to_name} = get_linkifier_map().get(pattern);
let current_group = 1;
const template_context = {};
for (const match of matches) {
template_context[group_number_to_name[current_group]] = match;
current_group += 1;
}
return url_template.expand(template_context);
}
function handleTimestamp(time) {
let timeobject;
if (Number.isNaN(Number(time))) {
timeobject = new Date(time); // not a Unix timestamp
} else {
// JavaScript dates are in milliseconds, Unix timestamps are in seconds
timeobject = new Date(time * 1000);
}
const escaped_time = _.escape(time);
if (!isValid(timeobject)) {
// Unsupported time format: rerender accordingly.
// We do not show an error on these formats in local echo because
// there is a chance that the server would interpret it successfully
// and if it does, the jumping from the error message to a rendered
// timestamp doesn't look good.
return `<span>${escaped_time}</span>`;
}
// Use html5 <time> tag for valid timestamps.
// render time without milliseconds.
const escaped_isotime = _.escape(timeobject.toISOString().split(".")[0] + "Z");
return `<time datetime="${escaped_isotime}">${escaped_time}</time>`;
}
function handleStream({stream_name, get_stream_by_name, stream_hash}) {
const stream = get_stream_by_name(stream_name);
if (stream === undefined) {
return undefined;
}
const href = stream_hash(stream.stream_id);
return `<a class="stream" data-stream-id="${_.escape(stream.stream_id)}" href="/${_.escape(
href,
)}">#${_.escape(stream.name)}</a>`;
}
function handleStreamTopic({stream_name, topic, get_stream_by_name, stream_topic_hash}) {
const stream = get_stream_by_name(stream_name);
if (stream === undefined || !topic) {
return undefined;
}
const href = stream_topic_hash(stream.stream_id, topic);
const text = `#${stream.name} > ${topic}`;
return `<a class="stream-topic" data-stream-id="${_.escape(
stream.stream_id,
)}" href="/${_.escape(href)}">${_.escape(text)}</a>`;
}
function handleTex(tex, fullmatch) {
try {
return katex.renderToString(tex);
} catch (error) {
if (error.message.startsWith("KaTeX parse error")) {
// TeX syntax error
return `<span class="tex-error">${_.escape(fullmatch)}</span>`;
}
throw new Error(error.message);
}
}
export function parse({raw_content, helper_config}) {
function get_linkifier_regexes() {
return [...helper_config.get_linkifier_map().keys()];
}
function disable_markdown_regex(rules, name) {
rules[name] = {
exec() {
return false;
},
};
}
// Configure the marked Markdown parser for our usage
const renderer = new marked.Renderer();
// No <code> around our code blocks instead a codehilite <div> and disable
// class-specific highlighting.
renderer.code = (code) => fenced_code.wrap_code(code) + "\n\n";
// Prohibit empty links for some reason.
const old_link = renderer.link;
renderer.link = (href, title, text) =>
old_link.call(renderer, href, title, text.trim() ? text : href);
// Put a newline after a <br> in the generated HTML to match Markdown
renderer.br = function () {
return "<br>\n";
};
function preprocess_code_blocks(src) {
return fenced_code.process_fenced_code(src);
}
function preprocess_translate_emoticons(src) {
if (!helper_config.should_translate_emoticons()) {
return src;
}
// In this scenario, the message has to be from the user, so the only
// requirement should be that they have the setting on.
return translate_emoticons_to_names({
src,
get_emoticon_translations: helper_config.get_emoticon_translations,
});
}
// Disable headings
// We only keep the # Heading format.
disable_markdown_regex(marked.Lexer.rules.tables, "lheading");
// Disable __strong__ (keeping **strong**)
marked.InlineLexer.rules.zulip.strong = /^\*\*([\S\s]+?)\*\*(?!\*)/;
// Make sure <del> syntax matches the backend processor
marked.InlineLexer.rules.zulip.del = /^(?!<~)~~([^~]+)~~(?!~)/;
// Disable _emphasis_ (keeping *emphasis*)
// Text inside ** must start and end with a word character
// to prevent mis-parsing things like "char **x = (char **)y"
marked.InlineLexer.rules.zulip.em = /^\*(?!\s+)((?:\*\*|[\S\s])+?)(\S)\*(?!\*)/;
// Disable autolink as (a) it is not used in our backend and (b) it interferes with @mentions
disable_markdown_regex(marked.InlineLexer.rules.zulip, "autolink");
// Tell our fenced code preprocessor how to insert arbitrary
// HTML into the output. This generated HTML is safe to not escape
fenced_code.set_stash_func((html) => marked.stashHtml(html, true));
function streamHandler(stream_name) {
return handleStream({
stream_name,
get_stream_by_name: helper_config.get_stream_by_name,
stream_hash: helper_config.stream_hash,
});
}
function streamTopicHandler(stream_name, topic) {
return handleStreamTopic({
stream_name,
topic,
get_stream_by_name: helper_config.get_stream_by_name,
stream_topic_hash: helper_config.stream_topic_hash,
});
}
function emojiHandler(emoji_name) {
return handleEmoji({
emoji_name,
get_realm_emoji_url: helper_config.get_realm_emoji_url,
get_emoji_codepoint: helper_config.get_emoji_codepoint,
});
}
function unicodeEmojiHandler(unicode_emoji) {
return handleUnicodeEmoji({
unicode_emoji,
get_emoji_name: helper_config.get_emoji_name,
});
}
function linkifierHandler(pattern, matches) {
return handleLinkifier({
pattern,
matches,
get_linkifier_map: helper_config.get_linkifier_map,
});
}
const options = {
get_linkifier_regexes,
linkifierHandler,
emojiHandler,
unicodeEmojiHandler,
streamHandler,
streamTopicHandler,
texHandler: handleTex,
timestampHandler: handleTimestamp,
gfm: true,
tables: true,
breaks: true,
pedantic: false,
sanitize: true,
smartLists: true,
smartypants: false,
zulip: true,
renderer,
preprocessors: [preprocess_code_blocks, preprocess_translate_emoticons],
};
return parse_with_options({raw_content, helper_config, options});
}
// NOTE: Everything below this line is likely to be web-specific
// and won't be used by future platforms such as mobile.
// We may eventually move this code to a new file, but we want
// to wait till the dust settles a bit on some other changes first.
let web_app_helpers;
export function initialize(helper_config) {
// This is generally only intended to be called by the web app. Most
// other platforms should call setup().
web_app_helpers = helper_config;
}
export function apply_markdown(message) {
// This is generally only intended to be called by the web app. Most
// other platforms should call parse().
const raw_content = message.raw_content;
const {content, flags} = parse({raw_content, helper_config: web_app_helpers});
message.content = content;
message.flags = flags;
message.is_me_message = is_status_message(raw_content);
}
export function add_topic_links(message) {
if (message.type !== "stream") {
message.topic_links = [];
return;
}
message.topic_links = get_topic_links({
topic: message.topic,
get_linkifier_map: web_app_helpers.get_linkifier_map,
});
}
export function contains_backend_only_syntax(content) {
return content_contains_backend_only_syntax({
content,
get_linkifier_map: web_app_helpers.get_linkifier_map,
});
}
export function parse_non_message(raw_content) {
// Occasionally we get markdown from the server that is not technically
// a message, but we want to convert it to HTML. Note that we parse
// raw_content exactly as if it were a Zulip message, so we will
// handle things like mentions, stream links, and linkifiers.
return parse({raw_content, helper_config: web_app_helpers}).content;
}