2016-10-27 12:06:44 +02:00
|
|
|
import re
|
2018-05-10 19:13:36 +02:00
|
|
|
from typing import Dict
|
2016-10-27 12:06:44 +02:00
|
|
|
from .base import BaseParser
|
|
|
|
|
|
|
|
|
|
|
|
class OpenGraphParser(BaseParser):
|
2018-05-10 19:13:36 +02:00
|
|
|
def extract_data(self) -> Dict[str, str]:
|
2016-10-27 12:06:44 +02:00
|
|
|
meta = self._soup.findAll('meta')
|
|
|
|
content = {}
|
|
|
|
for tag in meta:
|
2019-05-05 20:28:15 +02:00
|
|
|
if tag.has_attr('property') and 'og:' in tag['property'] and tag.has_attr('content'):
|
2016-10-27 12:06:44 +02:00
|
|
|
content[re.sub('og:', '', tag['property'])] = tag['content']
|
|
|
|
return content
|