markdown: Include text & url in `topic_links` parameter of our API.

The linkifier code now includes both the shortened text and the expanded URL, sorted by the order of the occurrence in a topic. This list is passed back in the `topic_links` parameter of the /messages and the /events APIs. topic_links earlier vs now: earlier: ['https://www.google.com', 'https://github.com/zulip/zulip/32'] now: [{'url': 'https://www.google.com', 'text': 'https://www.google/com}, {'url': 'https://github.com/zulip/zulip/32', 'text': '#32'}] Similarly, the topic_links local echo logic in the frontend now returns back an object. Fixes: #17109.
2021-01-26 12:02:29 +05:30 · 2021-01-26 12:02:29 +05:30 · e12f682e2e
parent de1660e407
commit e12f682e2e
10 changed files with 203 additions and 60 deletions
--- a/frontend_tests/node_tests/markdown.js
+++ b/frontend_tests/node_tests/markdown.js
@ -554,41 +554,74 @@ test("topic_links", () => {
    message = {type: "stream", topic: "One #123 link here"};
    markdown.add_topic_links(message);
    assert.equal(message.topic_links.length, 1);
-    assert.equal(message.topic_links[0], "https://trac.example.com/ticket/123");
+    assert.deepEqual(message.topic_links[0], {
+        url: "https://trac.example.com/ticket/123",
+        text: "#123",
+    });

    message = {type: "stream", topic: "Two #123 #456 link here"};
    markdown.add_topic_links(message);
    assert.equal(message.topic_links.length, 2);
-    assert.equal(message.topic_links[0], "https://trac.example.com/ticket/123");
-    assert.equal(message.topic_links[1], "https://trac.example.com/ticket/456");
+    assert.deepEqual(message.topic_links[0], {
+        url: "https://trac.example.com/ticket/123",
+        text: "#123",
+    });
+    assert.deepEqual(message.topic_links[1], {
+        url: "https://trac.example.com/ticket/456",
+        text: "#456",
+    });

    message = {type: "stream", topic: "New ZBUG_123 link here"};
    markdown.add_topic_links(message);
    assert.equal(message.topic_links.length, 1);
-    assert.equal(message.topic_links[0], "https://trac2.zulip.net/ticket/123");
+    assert.deepEqual(message.topic_links[0], {
+        url: "https://trac2.zulip.net/ticket/123",
+        text: "ZBUG_123",
+    });

    message = {type: "stream", topic: "New ZBUG_123 with #456 link here"};
    markdown.add_topic_links(message);
    assert.equal(message.topic_links.length, 2);
-    assert(message.topic_links.includes("https://trac2.zulip.net/ticket/123"));
-    assert(message.topic_links.includes("https://trac.example.com/ticket/456"));
+    assert.deepEqual(message.topic_links[0], {
+        url: "https://trac2.zulip.net/ticket/123",
+        text: "ZBUG_123",
+    });
+    assert.deepEqual(message.topic_links[1], {
+        url: "https://trac.example.com/ticket/456",
+        text: "#456",
+    });

    message = {type: "stream", topic: "One ZGROUP_123:45 link here"};
    markdown.add_topic_links(message);
    assert.equal(message.topic_links.length, 1);
-    assert.equal(message.topic_links[0], "https://zone_45.zulip.net/ticket/123");
+    assert.deepEqual(message.topic_links[0], {
+        url: "https://zone_45.zulip.net/ticket/123",
+        text: "ZGROUP_123:45",
+    });

    message = {type: "stream", topic: "Hello https://google.com"};
    markdown.add_topic_links(message);
    assert.equal(message.topic_links.length, 1);
-    assert.equal(message.topic_links[0], "https://google.com");
+    assert.deepEqual(message.topic_links[0], {
+        url: "https://google.com",
+        text: "https://google.com",
+    });

    message = {type: "stream", topic: "#456 https://google.com https://github.com"};
    markdown.add_topic_links(message);
    assert.equal(message.topic_links.length, 3);
-    assert(message.topic_links.includes("https://google.com"));
-    assert(message.topic_links.includes("https://github.com"));
-    assert(message.topic_links.includes("https://trac.example.com/ticket/456"));
+    assert.deepEqual(message.topic_links[0], {
+        url: "https://trac.example.com/ticket/456",
+        text: "#456",
+    });
+    assert.deepEqual(message.topic_links[1], {
+        url: "https://google.com",
+        text: "https://google.com",
+    });
+    assert.deepEqual(message.topic_links[2], {
+        url: "https://github.com",
+        text: "https://github.com",
+    });

    message = {type: "not-stream"};
    markdown.add_topic_links(message);
--- a/static/js/markdown.js
+++ b/static/js/markdown.js
@ -222,7 +222,7 @@ export function add_topic_links(message) {
        return;
    }
    const topic = message.topic;
-    let links = [];
+    const links = [];

    for (const linkifier of linkifier_list) {
        const pattern = linkifier[0];
@ -239,17 +239,24 @@ export function add_topic_links(message) {
                link_url = link_url.replace(back_ref, matched_group);
                i += 1;
            }
-            links.push(link_url);
+            // We store the starting index as well, to sort the order of occurence of the links
+            // in the topic, similar to the logic implemeted in zerver/lib/markdown/__init__.py
+            links.push({url: link_url, text: match[0], index: topic.indexOf(match[0])});
        }
    }

    // Also make raw URLs navigable
    const url_re = /\b(https?:\/\/[^\s<]+[^\s"'),.:;<\]])/g; // Slightly modified from third/marked.js
-    const match = topic.match(url_re);
-    if (match) {
-        links = links.concat(match);
+    const matches = topic.match(url_re);
+    if (matches) {
+        for (const match of matches) {
+            links.push({url: match, text: match, index: topic.indexOf(match)});
+        }
+    }
+    links.sort((a, b) => a.index - b.index);
+    for (const match of links) {
+        delete match.index;
    }
-
    message.topic_links = links;
 }

--- a/static/templates/recipient_row.hbs
+++ b/static/templates/recipient_row.hbs
@ -32,7 +32,7 @@
            </span><span class="recipient_bar_controls no-select">
                {{! exterior links (e.g. to a trac ticket) }}
                {{#each topic_links}}
-                <a href="{{this}}" target="_blank" rel="noopener noreferrer" class="no-underline">
+                <a href="{{this.url}}" target="_blank" rel="noopener noreferrer" class="no-underline">
                    <i class="fa fa-external-link-square recipient_bar_icon" aria-label="{{t 'External link' }}"></i>
                </a>
                {{/each}}
--- a/templates/zerver/api/changelog.md
+++ b/templates/zerver/api/changelog.md
@ -10,6 +10,12 @@ below features are supported.

 ## Changes in Zulip 4.0

+**Feature level 46**
+
+* [`GET /messages`](/api/get-messages) and [`GET
+  /events`](/api/get-events): The `topic_links` field now contains a
+  list of dictionaries, rather than a list of strings.
+
 **Feature level 45**

 * [`GET /events`](/api/get-events): Removed useless `op` field from
--- a/version.py
+++ b/version.py
@ -30,7 +30,7 @@ DESKTOP_WARNING_VERSION = "5.2.0"
 #
 # Changes should be accompanied by documentation explaining what the
 # new level means in templates/zerver/api/changelog.md.
-API_FEATURE_LEVEL = 45
+API_FEATURE_LEVEL = 46

 # Bump the minor PROVISION_VERSION to indicate that folks should provision
 # only when going from an old version of the code to a newer version. Bump
--- a/zerver/lib/event_schema.py
+++ b/zerver/lib/event_schema.py
@ -337,6 +337,13 @@ muted_topics_event = event_dict_type(
 )
 check_muted_topics = make_checker(muted_topics_event)

+_check_topic_links = DictType(
+    required_keys=[
+        ("text", str),
+        ("url", str),
+    ]
+)
+
 message_fields = [
    ("avatar_url", OptionalType(str)),
    ("client", str),
@ -353,7 +360,7 @@ message_fields = [
    ("sender_id", int),
    ("stream_id", int),
    (TOPIC_NAME, str),
-    (TOPIC_LINKS, ListType(str)),
+    (TOPIC_LINKS, ListType(_check_topic_links)),
    ("submessages", ListType(dict)),
    ("timestamp", int),
    ("type", str),
@ -1380,7 +1387,7 @@ update_message_topic_fields = [
    ),
    ("stream_id", int),
    ("stream_name", str),
-    (TOPIC_LINKS, ListType(str)),
+    (TOPIC_LINKS, ListType(_check_topic_links)),
    (TOPIC_NAME, str),
 ]

--- a/zerver/lib/markdown/init.py
+++ b/zerver/lib/markdown/init.py
@ -2382,30 +2382,49 @@ basic_link_splitter = re.compile(r"[ !;\?\),\'\"]")
 # function on the URLs; they are expected to be HTML-escaped when
 # rendered by clients (just as links rendered into message bodies
 # are validated and escaped inside `url_to_a`).
-def topic_links(realm_filters_key: int, topic_name: str) -> List[str]:
-    matches: List[str] = []
-
+def topic_links(realm_filters_key: int, topic_name: str) -> List[Dict[str, str]]:
+    matches: List[Dict[str, Union[str, int]]] = []
    realm_filters = realm_filters_for_realm(realm_filters_key)

    for realm_filter in realm_filters:
-        pattern = prepare_realm_pattern(realm_filter[0])
+        raw_pattern = realm_filter[0]
+        url_format_string = realm_filter[1]
+        pattern = prepare_realm_pattern(raw_pattern)
        for m in re.finditer(pattern, topic_name):
-            matches += [realm_filter[1] % m.groupdict()]
+            match_details = m.groupdict()
+            match_text = match_details["linkifier_actual_match"]
+            # We format the realm_filter's url string using the matched text.
+            # Also, we include the matched text in the response, so that our clients
+            # don't have to implement any logic of their own to get back the text.
+            matches += [
+                dict(
+                    url=url_format_string % match_details,
+                    text=match_text,
+                    index=topic_name.find(match_text),
+                )
+            ]

    # Also make raw URLs navigable.
    for sub_string in basic_link_splitter.split(topic_name):
        link_match = re.match(get_web_link_regex(), sub_string)
        if link_match:
-            url = link_match.group("url")
-            result = urlsplit(url)
+            actual_match_url = link_match.group("url")
+            result = urlsplit(actual_match_url)
            if not result.scheme:
                if not result.netloc:
                    i = (result.path + "/").index("/")
                    result = result._replace(netloc=result.path[:i], path=result.path[i:])
                url = result._replace(scheme="https").geturl()
-            matches.append(url)
+            else:
+                url = actual_match_url
+            matches.append(
+                dict(url=url, text=actual_match_url, index=topic_name.find(actual_match_url))
+            )

-    return matches
+    # In order to preserve the order in which the links occur, we sort the matched text
+    # based on its starting index in the topic. We pop the index field before returning.
+    matches = sorted(matches, key=lambda k: k["index"])
+    return [{k: str(v) for k, v in match.items() if k != "index"} for match in matches]


 def maybe_update_markdown_engines(realm_filters_key: Optional[int], email_gateway: bool) -> None:
--- a/zerver/openapi/zulip.yaml
+++ b/zerver/openapi/zulip.yaml
@ -1823,18 +1823,30 @@ paths:
                                topic_links:
                                  type: array
                                  items:
+                                    type: object
+                                    additionalProperties: false
+                                    properties:
+                                      text:
                                        type: string
+                                        description: |
+                                          The original link text present in the topic.
+                                      url:
+                                        type: string
+                                        description: |
+                                          The expanded target url which the link points to.
                                  description: |
                                    Data on any links to be included in the `topic`
-                                    line (these are generated by [custom linkification
-                                    filters][linkification-filters] that match content in the
-                                    message's topic.)
+                                    line (these are generated by
+                                    [custom linkification filter](/help/add-a-custom-linkification-filter)
+                                    that match content in the message's topic.)

-                                    **Changes**: New in Zulip 3.0 (feature level 1).
-                                    Previously, this field was called `subject_links`;
-                                    clients are recommended to rename `subject_links`
-                                    to `topic_links` if present for compatibility with
-                                    older Zulip servers.
+                                    **Changes**: This field contained a list of urls before
+                                      Zulip 4.0 (feature level 46).
+
+                                    New in Zulip 3.0 (feature level 1).  Previously, this field
+                                    was called `subject_links`; clients are recommended to
+                                    rename `subject_links` to `topic_links` if present for
+                                    compatibility with older Zulip servers.
                                message_ids:
                                  type: array
                                  items:
@ -10362,18 +10374,30 @@ components:
        topic_links:
          type: array
          items:
+            type: object
+            additionalProperties: false
+            properties:
+              text:
                type: string
+                description: |
+                  The original link text present in the topic.
+              url:
+                type: string
+                description: |
+                  The expanded target url which the link points to.
          description: |
            Data on any links to be included in the `topic`
            line (these are generated by [custom linkification
            filters][linkification-filters] that match content in the
            message's topic.)

-            **Changes**: New in Zulip 3.0 (feature level 1).
-            Previously, this field was called `subject_links`;
-            clients are recommended to rename `subject_links`
-            to `topic_links` if present for compatibility with
-            older Zulip servers.
+            **Changes**: This field contained a list of urls before
+              Zulip 4.0 (feature level 46).
+
+            New in Zulip 3.0 (feature level 1): Previously, this field was called
+            `subject_links`; clients are recommended to rename `subject_links` to `topic_links`
+            if present for compatibility with older Zulip servers.
+
        submessages:
          type: array
          items:
--- a/zerver/tests/test_markdown.py
+++ b/zerver/tests/test_markdown.py
@ -1207,15 +1207,24 @@ class MarkdownTest(ZulipTestCase):

        msg.set_topic_name("https://google.com/hello-world")
        converted_topic = topic_links(realm.id, msg.topic_name())
-        self.assertEqual(converted_topic, ["https://google.com/hello-world"])
+        self.assertEqual(
+            converted_topic,
+            [{"url": "https://google.com/hello-world", "text": "https://google.com/hello-world"}],
+        )

        msg.set_topic_name("http://google.com/hello-world")
        converted_topic = topic_links(realm.id, msg.topic_name())
-        self.assertEqual(converted_topic, ["http://google.com/hello-world"])
+        self.assertEqual(
+            converted_topic,
+            [{"url": "http://google.com/hello-world", "text": "http://google.com/hello-world"}],
+        )

        msg.set_topic_name("Without scheme google.com/hello-world")
        converted_topic = topic_links(realm.id, msg.topic_name())
-        self.assertEqual(converted_topic, ["https://google.com/hello-world"])
+        self.assertEqual(
+            converted_topic,
+            [{"url": "https://google.com/hello-world", "text": "google.com/hello-world"}],
+        )

        msg.set_topic_name("Without scheme random.words/hello-world")
        converted_topic = topic_links(realm.id, msg.topic_name())
@ -1226,7 +1235,23 @@ class MarkdownTest(ZulipTestCase):
        )
        converted_topic = topic_links(realm.id, msg.topic_name())
        self.assertEqual(
-            converted_topic, ["http://ftp.debian.org", "https://google.com/", "https://google.in/"]
+            converted_topic,
+            [
+                {"url": "http://ftp.debian.org", "text": "http://ftp.debian.org"},
+                {"url": "https://google.com/", "text": "https://google.com/"},
+                {"url": "https://google.in/", "text": "https://google.in/"},
+            ],
+        )
+
+        # test order for links without scheme
+        msg.set_topic_name("google.in google.com")
+        converted_topic = topic_links(realm.id, msg.topic_name())
+        self.assertEqual(
+            converted_topic,
+            [
+                {"url": "https://google.in", "text": "google.in"},
+                {"url": "https://google.com", "text": "google.com"},
+            ],
        )

    def test_realm_patterns(self) -> None:
@ -1254,12 +1279,18 @@ class MarkdownTest(ZulipTestCase):
            converted,
            '<p>We should fix <a href="https://trac.example.com/ticket/224">#224</a> and <a href="https://trac.example.com/ticket/115">#115</a>, but not issue#124 or #1124z or <a href="https://trac.example.com/ticket/16">trac #15</a> today.</p>',
        )
-        self.assertEqual(converted_topic, ["https://trac.example.com/ticket/444"])
+        self.assertEqual(
+            converted_topic, [{"url": "https://trac.example.com/ticket/444", "text": "#444"}]
+        )

        msg.set_topic_name("#444 https://google.com")
        converted_topic = topic_links(realm.id, msg.topic_name())
        self.assertEqual(
-            converted_topic, ["https://trac.example.com/ticket/444", "https://google.com"]
+            converted_topic,
+            [
+                {"url": "https://trac.example.com/ticket/444", "text": "#444"},
+                {"url": "https://google.com", "text": "https://google.com"},
+            ],
        )

        RealmFilter(
@ -1283,7 +1314,10 @@ class MarkdownTest(ZulipTestCase):
            if should_have_converted:
                self.assertTrue("https://trac.example.com" in converted)
                self.assertEqual(len(converted_topic), 1)
-                self.assertTrue("https://trac.example.com" in converted_topic[0])
+                self.assertEqual(
+                    converted_topic[0],
+                    {"url": "https://trac.example.com/ticket/123", "text": "#123"},
+                )
            else:
                self.assertTrue("https://trac.example.com" not in converted)
                self.assertEqual(len(converted_topic), 0)
@ -1315,7 +1349,20 @@ class MarkdownTest(ZulipTestCase):
        converted_topic = topic_links(realm.id, "hello#123 #234")
        self.assertEqual(
            converted_topic,
-            ["https://trac.example.com/ticket/234", "https://trac.example.com/hello/123"],
+            [
+                {"url": "https://trac.example.com/hello/123", "text": "hello#123"},
+                {"url": "https://trac.example.com/ticket/234", "text": "#234"},
+            ],
+        )
+
+        # test correct order when realm pattern and normal links are both present.
+        converted_topic = topic_links(realm.id, "#234 https://google.com")
+        self.assertEqual(
+            converted_topic,
+            [
+                {"url": "https://trac.example.com/ticket/234", "text": "#234"},
+                {"url": "https://google.com", "text": "https://google.com"},
+            ],
        )

    def test_multiple_matching_realm_patterns(self) -> None:
@ -1367,8 +1414,8 @@ class MarkdownTest(ZulipTestCase):
        self.assertEqual(
            converted_topic,
            [
-                "https://trac.example.com/ticket/ABC-123",
-                "https://other-trac.example.com/ticket/ABC-123",
+                {"url": "https://trac.example.com/ticket/ABC-123", "text": "ABC-123"},
+                {"url": "https://other-trac.example.com/ticket/ABC-123", "text": "ABC-123"},
            ],
        )

--- a/zerver/tests/test_message_dict.py
+++ b/zerver/tests/test_message_dict.py
@ -246,7 +246,7 @@ class MessageDictTest(ZulipTestCase):
        # the notification bot.
        zulip_realm = get_realm("zulip")
        url_format_string = r"https://trac.example.com/ticket/%(id)s"
-        url = "https://trac.example.com/ticket/123"
+        links = {"url": "https://trac.example.com/ticket/123", "text": "#123"}
        topic_name = "test #123"

        realm_filter = RealmFilter(
@ -263,7 +263,7 @@ class MessageDictTest(ZulipTestCase):
            )
            return Message.objects.get(id=msg_id)

-        def assert_topic_links(links: List[str], msg: Message) -> None:
+        def assert_topic_links(links: List[Dict[str, str]], msg: Message) -> None:
            dct = MessageDict.to_dict_uncached_helper([msg])[0]
            self.assertEqual(dct[TOPIC_LINKS], links)

@ -272,9 +272,9 @@ class MessageDictTest(ZulipTestCase):
        assert_topic_links([], get_message(self.lear_user("cordelia")))
        assert_topic_links([], get_message(self.notification_bot()))
        realm_filter.save()
-        assert_topic_links([url], get_message(self.example_user("othello")))
-        assert_topic_links([url], get_message(self.lear_user("cordelia")))
-        assert_topic_links([url], get_message(self.notification_bot()))
+        assert_topic_links([links], get_message(self.example_user("othello")))
+        assert_topic_links([links], get_message(self.lear_user("cordelia")))
+        assert_topic_links([links], get_message(self.notification_bot()))

    def test_reaction(self) -> None:
        sender = self.example_user("othello")