2018-05-10 19:13:36 +02:00
|
|
|
from typing import Any
|
2016-10-27 12:06:44 +02:00
|
|
|
|
2017-11-05 11:37:41 +01:00
|
|
|
class BaseParser:
|
2018-05-10 19:13:36 +02:00
|
|
|
def __init__(self, html_source: str) -> None:
|
2018-08-08 22:24:20 +02:00
|
|
|
# We import BeautifulSoup here, because it's not used by most
|
|
|
|
# processes in production, and bs4 is big enough that
|
|
|
|
# importing it adds 10s of milliseconds to manage.py startup.
|
|
|
|
from bs4 import BeautifulSoup
|
2016-12-16 02:05:10 +01:00
|
|
|
self._soup = BeautifulSoup(html_source, "lxml")
|
2016-10-27 12:06:44 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def extract_data(self) -> Any:
|
2017-05-24 02:39:38 +02:00
|
|
|
raise NotImplementedError()
|