python: Avoid deprecated cgi module, removed in Python 3.13.

Signed-off-by: Anders Kaseorg <anders@zulip.com>
2024-10-21 16:40:24 -07:00 · 2024-10-21 16:40:24 -07:00 · 08db41660a
parent 006ac58a4b
commit 08db41660a
2 changed files with 8 additions and 6 deletions
--- a/zerver/lib/markdown/init.py
+++ b/zerver/lib/markdown/init.py
@ -1,6 +1,5 @@
 # Zulip's main Markdown implementation.  See docs/subsystems/markdown.md for
 # detailed documentation on our Markdown syntax.
-import cgi
 import html
 import logging
 import mimetypes
@ -10,6 +9,7 @@ from collections import deque
 from collections.abc import Callable
 from dataclasses import dataclass
 from datetime import datetime, timezone
+from email.message import EmailMessage
 from functools import lru_cache
 from re import Match, Pattern
 from typing import Any, Generic, Optional, TypeAlias, TypedDict, TypeVar, cast
@ -441,7 +441,9 @@ def fetch_open_graph_image(url: str) -> dict[str, Any] | None:
            if res.status_code != requests.codes.ok:
                return None

-            mimetype, options = cgi.parse_header(res.headers["Content-Type"])
+            m = EmailMessage()
+            m["Content-Type"] = res.headers.get("Content-Type")
+            mimetype = m.get_content_type()
            if mimetype not in ("text/html", "application/xhtml+xml"):
                return None
            html = mimetype == "text/html"
--- a/zerver/lib/url_preview/parsers/base.py
+++ b/zerver/lib/url_preview/parsers/base.py
@ -1,4 +1,4 @@
-import cgi
+from email.message import EmailMessage

 from zerver.lib.url_preview.types import UrlEmbedData

@ -10,9 +10,9 @@ class BaseParser:
        # importing it adds 10s of milliseconds to manage.py startup.
        from bs4 import BeautifulSoup

-        charset = None
-        if content_type is not None:
-            charset = cgi.parse_header(content_type)[1].get("charset")
+        m = EmailMessage()
+        m["Content-Type"] = content_type
+        charset = m.get_content_charset()
        self._soup = BeautifulSoup(html_source, "lxml", from_encoding=charset)

    def extract_data(self) -> UrlEmbedData: