reduce number of requests

This commit is contained in:
Roy Olav Purser 2021-05-28 15:57:19 +02:00
parent 231f7878b2
commit c021da3441
Signed by: roypur
GPG Key ID: E14D26A036F21656
2 changed files with 153 additions and 151 deletions

View File

@ -12,7 +12,6 @@ import tornado.web
import tornado.routing import tornado.routing
import aiohttp import aiohttp
import aiohttp_socks import aiohttp_socks
import html.parser
import stream_providers import stream_providers
logging.basicConfig(format='[%(filename)s:%(lineno)d] %(message)s', stream=sys.stdout, level=logging.INFO) logging.basicConfig(format='[%(filename)s:%(lineno)d] %(message)s', stream=sys.stdout, level=logging.INFO)
@ -112,26 +111,6 @@ for key in providers:
for proxy in current: for proxy in current:
proxies[key].append(ProxyElem(proxy)) proxies[key].append(ProxyElem(proxy))
class MetaParser(html.parser.HTMLParser):
def __init__(self):
self.meta_data = {}
self.accepted_attrs = []
self.accepted_attrs.append("og:title")
self.accepted_attrs.append("og:description")
self.accepted_attrs.append("og:image")
super().__init__()
def handle_starttag(self, tag, attrs):
if tag == "meta":
name = None
for attr in (attrs + attrs):
if len(attr) == 2:
if isinstance(name, str):
if attr[0] == "content":
self.meta_data[name] = attr[1]
return
elif attr[0] == "property" and attr[1] in self.accepted_attrs:
name = attr[1]
class UpstreamHandler(): class UpstreamHandler():
def __init__(self): def __init__(self):
self.provider = None self.provider = None
@ -192,76 +171,6 @@ class UpstreamHandler():
for session in sessions: for session in sessions:
await session.close() await session.close()
async def meta(self):
data = {}
try:
embed_url = f'https://noembed.com/embed?url={self.upstream_safe}'
async with self.proxy.session() as session:
resp_embed_future = session.get(embed_url)
resp_upstream_future = session.get(self.upstream)
try:
resp_embed = await resp_embed_future
except Exception as e:
logger.info(e)
resp_embed = None
try:
resp_upstream = await resp_upstream_future
except Exception as e:
logger.info(e)
resp_upstream = None
text_embed_future = resp_embed.text()
text_upstream_future = resp_upstream.text()
try:
text_embed = await text_embed_future
except Exception as e:
logger.info(e)
text_embed = None
try:
text_upstream = await text_upstream_future
except Exception as e:
logger.info(e)
text_upstream = None
parser = MetaParser()
parser.feed(text_upstream)
data_raw = json.loads(text_embed)
if isinstance(data_raw, dict):
data_new = {}
data_valid = True
data_new["og:title"] = data_raw.get("title")
data_new["og:description"] = data_raw.get("author_name")
data_new["og:image"] = data_raw.get("thumbnail_url")
data_filtered = {}
for key in data_new:
value = data_new.get(key)
if isinstance(value, str):
data_filtered[key] = value
data_filtered.update(parser.meta_data)
data = data_filtered
image = data.get("og:image")
if isinstance(image, str):
if self.provider == "youtube":
full_image = re.sub(r'\/[a-zA-Z0-9]+\.([a-zA-Z0-9]+)$', r'/maxresdefault.\1', image)
standard_image = re.sub(r'\/[a-zA-Z0-9]+\.([a-zA-Z0-9]+)$', r'/sddefault.\1', image)
image_status_full_future = session.head(full_image)
image_status_standard_future = session.head(standard_image)
try:
image_status_full = await image_status_full_future
except Exception as e:
logger.info(e)
image_status_full = None
try:
image_status_standard = await image_status_standard_future
except Exception as e:
logger.info(e)
image_status_standard = None
if hasattr(image_status_full, "status") and (image_status_full.status < 400):
data["og:image"] = full_image
elif hasattr(image_status_standard, "status") and (image_status_standard.status < 400):
data["og:image"] = standard_image
except Exception as e:
logger.info(e)
return data
if icecast_server is not None and stream_server is not None: if icecast_server is not None and stream_server is not None:
try: try:
with open("/app/sources.json", "r") as f: with open("/app/sources.json", "r") as f:
@ -376,32 +285,29 @@ class MainHandler(tornado.web.RequestHandler):
async def handle_render(self, handler): async def handle_render(self, handler):
if script_file is not None and template_html is not None: if script_file is not None and template_html is not None:
meta = await handler.meta() provider_data = await stream_providers.get_any(handler.upstream, handler.proxy, logger)
meta_list = list(meta.items())
title = meta.get("og:title")
data["script"] = script_file data["script"] = script_file
data["videojs_version"] = videojs_version data["videojs_version"] = videojs_version
data["chromecast_version"] = chromecast_version data["chromecast_version"] = chromecast_version
data["font_awesome_version"] = font_awesome_version data["font_awesome_version"] = font_awesome_version
data["custom_style"] = custom_style data["custom_style"] = custom_style
rendered_html = template_html.generate(data=data, meta=meta_list, title=title) rendered_html = template_html.generate(data=data, meta=provider_data.meta(), title=provider_data.title)
self.write(rendered_html) self.write(rendered_html)
else: else:
self.set_status(404) self.set_status(404)
self.write("HTML template missing.") self.write("HTML template missing.")
async def handle_stream(self, handler, redir): async def handle_stream(self, handler, redir):
upstream = None
if handler.provider == "nextcloud": if handler.provider == "nextcloud":
upstream = handler.upstream + "/download" upstream = handler.upstream + "/download"
else: else:
if not redir: provider_data = await stream_providers.get_any(handler.upstream, handler.proxy, logger)
meta = await handler.meta() upstream = provider_data.upstream()
image = meta.get("og:image") if isinstance(provider_data.thumbnail(), str):
if isinstance(image, str): image = await handler.proxy.proxy_url(provider_data.thumbnail(), None)
image = await handler.proxy.proxy_url(image, None)
if isinstance(image, str): if isinstance(image, str):
self.set_header("Custom-Poster", image) self.set_header("Custom-Poster", image)
upstream = await stream_providers.get_any(handler.upstream, handler.proxy, logger)
if upstream is None: if upstream is None:
logger.info(f'invalid upstream ({handler.provider})') logger.info(f'invalid upstream ({handler.provider})')
self.set_status(404) self.set_status(404)

View File

@ -1,7 +1,11 @@
#!/usr/bin/env python3
import youtube_dl import youtube_dl
import streamlink import streamlink
import requests
import asyncio import asyncio
import html.parser
import re
ytimg_pattern = re.compile(r'(https:\/\/[a-z0-9.]+ytimg\.com\/.+\/)[a-z0-9]+(\.[a-z0-9]+)')
class DummyLogger(): class DummyLogger():
def debug(self, msg): def debug(self, msg):
@ -11,16 +15,69 @@ class DummyLogger():
def error(self, msg): def error(self, msg):
pass pass
class MetaParser(html.parser.HTMLParser):
def __init__(self):
self.meta_data = {}
self.accepted_attrs = []
self.accepted_attrs.append("og:title")
self.accepted_attrs.append("og:description")
self.accepted_attrs.append("og:image")
super().__init__()
def handle_starttag(self, tag, attrs):
if tag == "meta":
name = None
for attr in (attrs + attrs):
if len(attr) == 2:
if isinstance(name, str):
if attr[0] == "content":
self.meta_data[name] = attr[1]
return
elif attr[0] == "property" and attr[1] in self.accepted_attrs:
name = attr[1]
class StreamData():
def __init__(self, upstream, thumbnail, title, description, override):
self.values = {}
self.values["upstream"] = upstream
self.values["thumbnail"] = thumbnail
self.values["title"] = title
self.values["description"] = description
self.override = override
def update(self, key, value, override):
missing = not isinstance(self.values.get(key), str)
override = override and isinstance(value, str)
if missing or override:
self.values[key] = value
def upstream(self):
return self.values.get("upstream")
def thumbnail(self):
return self.values.get("thumbnail")
def title(self):
return self.values.get("title")
def description(self):
return self.values.get("description")
def meta(self):
data = []
if isinstance(self.values.get("thumbnail"), str):
data.append(("og:image", self.values.get("thumbnail")))
if isinstance(self.values.get("title"), str):
data.append(("og:title", self.values.get("title")))
if isinstance(self.values.get("description"), str):
data.append(("og:description", self.values.get("description")))
return data
class StreamProvider(): class StreamProvider():
def __init__(self, upstream, proxy): def __init__(self, upstream, proxy, logger):
self.upstream = upstream self.upstream = upstream
self.proxy = None self.proxy = None
self.logger = logger
proxy = str(proxy) proxy = str(proxy)
if len(proxy) > 5: if len(proxy) > 5:
self.proxy = "socks5://" + proxy self.proxy = "socks5://" + proxy
class StreamlinkRunner(StreamProvider): class StreamlinkRunner(StreamProvider):
def stream(self): def stream(self):
try:
session = streamlink.Streamlink() session = streamlink.Streamlink()
if self.proxy is not None: if self.proxy is not None:
session.set_option("https-proxy", self.proxy) session.set_option("https-proxy", self.proxy)
@ -30,21 +87,30 @@ class StreamlinkRunner(StreamProvider):
for key in reversed(streams): for key in reversed(streams):
stream = streams.get(key) stream = streams.get(key)
if hasattr(stream, "url"): if hasattr(stream, "url"):
return stream.url return StreamData(stream.url, None, None, None, False)
return None except Exception as e:
self.logger.info(e)
return StreamData(None, None, None, None, False)
async def run(self): async def run(self):
return await asyncio.to_thread(self.stream) return await asyncio.to_thread(self.stream)
class YoutubeRunner(StreamProvider): class YoutubeRunner(StreamProvider):
def stream(self): def stream(self):
best_url = None
thumbnail = None
title = None
description = None
try:
opts = {} opts = {}
opts["logger"] = DummyLogger() opts["logger"] = DummyLogger()
if isinstance(self.proxy, str): if isinstance(self.proxy, str):
opts["proxy"] = self.proxy opts["proxy"] = self.proxy
best_url = None
with youtube_dl.YoutubeDL(opts) as ydl: with youtube_dl.YoutubeDL(opts) as ydl:
info = ydl.extract_info(self.upstream, download=False) info = ydl.extract_info(self.upstream, download=False)
vformats = info.get("formats") vformats = info.get("formats")
thumbnail = info.get("thumbnail")
description = info.get("channel")
title = info.get("title")
best_format = {} best_format = {}
best_format["width"] = 10 best_format["width"] = 10
best_format["height"] = 10 best_format["height"] = 10
@ -68,14 +134,30 @@ class YoutubeRunner(StreamProvider):
vcodec != "none"): vcodec != "none"):
best_format = vformat best_format = vformat
best_url = new_url best_url = new_url
return best_url except Exception as e:
self.logger.info(e)
return StreamData(best_url, thumbnail, title, description, True)
async def run(self):
return await asyncio.to_thread(self.stream)
class MetaRunner(StreamProvider):
def stream(self):
data = {}
try:
resp = requests.get(self.upstream)
parser = MetaParser()
parser.feed(resp.text)
data = parser.meta_data
except Exception as e:
self.logger.info(e)
return StreamData(None, data.get("og:image"), data.get("og:title"), data.get("og:description"), False)
async def run(self): async def run(self):
return await asyncio.to_thread(self.stream) return await asyncio.to_thread(self.stream)
async def get_ytdl(upstream, proxy, logger): async def get_ytdl(upstream, proxy, logger):
result = None result = None
try: try:
runner = YoutubeRunner(upstream, proxy) runner = YoutubeRunner(upstream, proxy, logger)
result_temp = await runner.run() result_temp = await runner.run()
except Exception as e: except Exception as e:
logger.info(e) logger.info(e)
@ -86,7 +168,18 @@ async def get_ytdl(upstream, proxy, logger):
async def get_streamlink(upstream, proxy, logger): async def get_streamlink(upstream, proxy, logger):
result = None result = None
try: try:
runner = StreamlinkRunner(upstream, proxy) runner = StreamlinkRunner(upstream, proxy, logger)
result_temp = await runner.run()
except Exception as e:
logger.info(e)
else:
result = result_temp
return result
async def get_meta(upstream, proxy, logger):
result = None
try:
runner = MetaRunner(upstream, proxy, logger)
result_temp = await runner.run() result_temp = await runner.run()
except Exception as e: except Exception as e:
logger.info(e) logger.info(e)
@ -98,16 +191,19 @@ async def get_any(upstream, proxy, logger):
tasks = [] tasks = []
tasks.append(asyncio.create_task(get_streamlink(upstream, proxy, logger))) tasks.append(asyncio.create_task(get_streamlink(upstream, proxy, logger)))
tasks.append(asyncio.create_task(get_ytdl(upstream, proxy, logger))) tasks.append(asyncio.create_task(get_ytdl(upstream, proxy, logger)))
result = None tasks.append(asyncio.create_task(get_meta(upstream, proxy, logger)))
result = StreamData(None, None, None, None, False)
for task in asyncio.as_completed(tasks, timeout=5.0): for task in asyncio.as_completed(tasks, timeout=5.0):
temp_result = None temp_result = None
try: try:
temp_result = await task temp_result = await task
except Exception as e: except Exception as e:
logger.info(e) logger.info(e)
if isinstance(temp_result, str): if isinstance(temp_result, StreamData):
result = temp_result result.update("upstream", temp_result.upstream(), temp_result.override)
break result.update("thumbnail", temp_result.thumbnail(), temp_result.override)
result.update("title", temp_result.title(), temp_result.override)
result.update("description", temp_result.description(), temp_result.override)
for task in tasks: for task in tasks:
if not task.done(): if not task.done():
task.cancel() task.cancel()