add html metadata parsing
This commit is contained in:
parent
202f1344a8
commit
99810e174a
@ -12,6 +12,7 @@ import tornado.web
|
|||||||
import tornado.routing
|
import tornado.routing
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import aiohttp_socks
|
import aiohttp_socks
|
||||||
|
import html.parser
|
||||||
import stream_providers
|
import stream_providers
|
||||||
|
|
||||||
logging.basicConfig(format='[%(filename)s:%(lineno)d] %(message)s', stream=sys.stdout, level=logging.INFO)
|
logging.basicConfig(format='[%(filename)s:%(lineno)d] %(message)s', stream=sys.stdout, level=logging.INFO)
|
||||||
@ -111,6 +112,30 @@ for key in providers:
|
|||||||
for proxy in current:
|
for proxy in current:
|
||||||
proxies[key].append(ProxyElem(proxy))
|
proxies[key].append(ProxyElem(proxy))
|
||||||
|
|
||||||
|
class MetaParser(html.parser.HTMLParser):
|
||||||
|
def __init__(self):
|
||||||
|
self.meta_data = {}
|
||||||
|
self.accepted_attrs = []
|
||||||
|
self.accepted_attrs.append("og:title")
|
||||||
|
self.accepted_attrs.append("og:description")
|
||||||
|
self.accepted_attrs.append("og:image")
|
||||||
|
self.accepted_attrs.append("og:video:height")
|
||||||
|
self.accepted_attrs.append("og:video:width")
|
||||||
|
self.accepted_attrs.append("og:image:height")
|
||||||
|
self.accepted_attrs.append("og:image:width")
|
||||||
|
super().__init__()
|
||||||
|
def handle_starttag(self, tag, attrs):
|
||||||
|
if tag == "meta":
|
||||||
|
name = None
|
||||||
|
for attr in (attrs + attrs):
|
||||||
|
if len(attr) == 2:
|
||||||
|
if isinstance(name, str):
|
||||||
|
if attr[0] == "content":
|
||||||
|
self.meta_data[name] = attr[1]
|
||||||
|
return
|
||||||
|
elif attr[0] == "property" and attr[1] in self.accepted_attrs:
|
||||||
|
name = attr[1]
|
||||||
|
|
||||||
class UpstreamHandler():
|
class UpstreamHandler():
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.provider = None
|
self.provider = None
|
||||||
@ -200,6 +225,12 @@ class UpstreamHandler():
|
|||||||
value = data_new.get(key)
|
value = data_new.get(key)
|
||||||
if isinstance(value, str):
|
if isinstance(value, str):
|
||||||
data_filtered[key] = value
|
data_filtered[key] = value
|
||||||
|
if len(data_filtered) == 0:
|
||||||
|
resp = await session.get(self.upstream)
|
||||||
|
text = await resp.text()
|
||||||
|
parser = MetaParser()
|
||||||
|
parser.feed(text)
|
||||||
|
data_filtered = parser.meta_data
|
||||||
data = list(data_filtered.items())
|
data = list(data_filtered.items())
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.info(e)
|
logger.info(e)
|
||||||
|
Loading…
Reference in New Issue
Block a user