2022-02-18 22:48:53 +01:00
|
|
|
from urllib.parse import urlparse
|
2020-06-11 00:54:34 +02:00
|
|
|
|
2023-10-12 19:43:45 +02:00
|
|
|
from typing_extensions import override
|
|
|
|
|
2022-04-14 21:52:41 +02:00
|
|
|
from zerver.lib.url_preview.types import UrlEmbedData
|
|
|
|
|
2016-10-27 12:06:44 +02:00
|
|
|
from .base import BaseParser
|
|
|
|
|
|
|
|
|
|
|
|
class OpenGraphParser(BaseParser):
|
2023-10-12 19:43:45 +02:00
|
|
|
@override
|
2022-04-14 21:52:41 +02:00
|
|
|
def extract_data(self) -> UrlEmbedData:
|
2021-02-12 08:20:45 +01:00
|
|
|
meta = self._soup.findAll("meta")
|
2022-04-14 21:52:41 +02:00
|
|
|
|
|
|
|
data = UrlEmbedData()
|
|
|
|
|
2016-10-27 12:06:44 +02:00
|
|
|
for tag in meta:
|
2021-02-12 08:20:45 +01:00
|
|
|
if not tag.has_attr("property"):
|
2019-12-12 02:10:50 +01:00
|
|
|
continue
|
2021-02-12 08:20:45 +01:00
|
|
|
if not tag.has_attr("content"):
|
2019-12-12 02:10:50 +01:00
|
|
|
continue
|
|
|
|
|
2022-04-14 21:52:41 +02:00
|
|
|
if tag["property"] == "og:title":
|
|
|
|
data.title = tag["content"]
|
|
|
|
elif tag["property"] == "og:description":
|
|
|
|
data.description = tag["content"]
|
|
|
|
elif tag["property"] == "og:image":
|
2022-02-18 22:48:53 +01:00
|
|
|
try:
|
|
|
|
# We use urlparse and not URLValidator because we
|
|
|
|
# need to support relative URLs.
|
|
|
|
urlparse(tag["content"])
|
|
|
|
except ValueError:
|
|
|
|
continue
|
2022-04-14 21:52:41 +02:00
|
|
|
data.image = tag["content"]
|
2022-02-18 22:48:53 +01:00
|
|
|
|
2022-04-14 21:52:41 +02:00
|
|
|
return data
|