2016-10-27 12:06:44 +02:00
|
|
|
from __future__ import absolute_import
|
|
|
|
import re
|
2016-12-21 13:17:53 +01:00
|
|
|
from typing import Dict, Text
|
2016-10-27 12:06:44 +02:00
|
|
|
from .base import BaseParser
|
|
|
|
|
|
|
|
|
|
|
|
class OpenGraphParser(BaseParser):
|
|
|
|
def extract_data(self):
|
2016-12-21 13:17:53 +01:00
|
|
|
# type: () -> Dict[str, Text]
|
2016-10-27 12:06:44 +02:00
|
|
|
meta = self._soup.findAll('meta')
|
|
|
|
content = {}
|
|
|
|
for tag in meta:
|
|
|
|
if tag.has_attr('property') and 'og:' in tag['property']:
|
|
|
|
content[re.sub('og:', '', tag['property'])] = tag['content']
|
|
|
|
return content
|