diff --git a/zerver/lib/markdown/__init__.py b/zerver/lib/markdown/__init__.py index f23532bd4c..74962206f8 100644 --- a/zerver/lib/markdown/__init__.py +++ b/zerver/lib/markdown/__init__.py @@ -1,6 +1,5 @@ # Zulip's main Markdown implementation. See docs/subsystems/markdown.md for # detailed documentation on our Markdown syntax. -import cgi import html import logging import mimetypes @@ -10,6 +9,7 @@ from collections import deque from collections.abc import Callable from dataclasses import dataclass from datetime import datetime, timezone +from email.message import EmailMessage from functools import lru_cache from re import Match, Pattern from typing import Any, Generic, Optional, TypeAlias, TypedDict, TypeVar, cast @@ -441,7 +441,9 @@ def fetch_open_graph_image(url: str) -> dict[str, Any] | None: if res.status_code != requests.codes.ok: return None - mimetype, options = cgi.parse_header(res.headers["Content-Type"]) + m = EmailMessage() + m["Content-Type"] = res.headers.get("Content-Type") + mimetype = m.get_content_type() if mimetype not in ("text/html", "application/xhtml+xml"): return None html = mimetype == "text/html" diff --git a/zerver/lib/url_preview/parsers/base.py b/zerver/lib/url_preview/parsers/base.py index 921349f3d7..09541de9a2 100644 --- a/zerver/lib/url_preview/parsers/base.py +++ b/zerver/lib/url_preview/parsers/base.py @@ -1,4 +1,4 @@ -import cgi +from email.message import EmailMessage from zerver.lib.url_preview.types import UrlEmbedData @@ -10,9 +10,9 @@ class BaseParser: # importing it adds 10s of milliseconds to manage.py startup. from bs4 import BeautifulSoup - charset = None - if content_type is not None: - charset = cgi.parse_header(content_type)[1].get("charset") + m = EmailMessage() + m["Content-Type"] = content_type + charset = m.get_content_charset() self._soup = BeautifulSoup(html_source, "lxml", from_encoding=charset) def extract_data(self) -> UrlEmbedData: