2020-04-11 13:24:06 +02:00
|
|
|
import json
|
2020-06-11 00:54:34 +02:00
|
|
|
|
2021-05-07 03:54:25 +02:00
|
|
|
import requests
|
2020-06-11 00:54:34 +02:00
|
|
|
from pyoembed import PyOembedException, oEmbed
|
|
|
|
|
2022-04-14 21:52:41 +02:00
|
|
|
from zerver.lib.url_preview.types import UrlEmbedData, UrlOEmbedData
|
2016-10-27 12:06:44 +02:00
|
|
|
|
2022-04-14 21:52:41 +02:00
|
|
|
|
2024-07-12 02:30:23 +02:00
|
|
|
def get_oembed_data(url: str, maxwidth: int = 640, maxheight: int = 480) -> UrlEmbedData | None:
|
2016-10-27 12:06:44 +02:00
|
|
|
try:
|
|
|
|
data = oEmbed(url, maxwidth=maxwidth, maxheight=maxheight)
|
2021-05-07 03:54:25 +02:00
|
|
|
except (PyOembedException, json.decoder.JSONDecodeError, requests.exceptions.ConnectionError):
|
2016-10-27 12:06:44 +02:00
|
|
|
return None
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
oembed_resource_type = data.get("type", "")
|
|
|
|
image = data.get("url", data.get("image"))
|
|
|
|
thumbnail = data.get("thumbnail_url")
|
2022-04-14 21:52:41 +02:00
|
|
|
html = data.get("html", "")
|
2021-02-12 08:20:45 +01:00
|
|
|
if oembed_resource_type == "photo" and image:
|
2022-04-14 21:52:41 +02:00
|
|
|
return UrlOEmbedData(
|
2019-12-12 02:10:50 +01:00
|
|
|
image=image,
|
2022-04-14 21:52:41 +02:00
|
|
|
type="photo",
|
2021-02-12 08:20:45 +01:00
|
|
|
title=data.get("title"),
|
|
|
|
description=data.get("description"),
|
2019-12-12 02:10:50 +01:00
|
|
|
)
|
2019-05-02 18:58:39 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if oembed_resource_type == "video" and html and thumbnail:
|
2022-04-14 21:52:41 +02:00
|
|
|
return UrlOEmbedData(
|
2019-12-12 02:10:50 +01:00
|
|
|
image=thumbnail,
|
2022-04-14 21:52:41 +02:00
|
|
|
type="video",
|
2019-12-12 02:10:50 +01:00
|
|
|
html=strip_cdata(html),
|
2021-02-12 08:20:45 +01:00
|
|
|
title=data.get("title"),
|
|
|
|
description=data.get("description"),
|
2019-12-12 02:10:50 +01:00
|
|
|
)
|
2019-05-26 06:27:01 +02:00
|
|
|
|
2022-04-14 21:52:41 +02:00
|
|
|
# Otherwise, use the title/description from pyembed as the basis
|
|
|
|
# for our other parsers
|
|
|
|
return UrlEmbedData(
|
2021-02-12 08:20:45 +01:00
|
|
|
title=data.get("title"),
|
|
|
|
description=data.get("description"),
|
2019-12-12 02:10:50 +01:00
|
|
|
)
|
2019-05-02 18:58:39 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-12-12 09:39:41 +01:00
|
|
|
def strip_cdata(html: str) -> str:
|
|
|
|
# Work around a bug in SoundCloud's XML generation:
|
|
|
|
# <html><![CDATA[<iframe ...></iframe>]]></html>
|
2021-02-12 08:20:45 +01:00
|
|
|
if html.startswith("<![CDATA[") and html.endswith("]]>"):
|
2019-05-02 18:58:39 +02:00
|
|
|
html = html[9:-3]
|
2019-12-12 09:39:41 +01:00
|
|
|
return html
|