use esprima for js parsing

This commit is contained in:
Roy Olav Purser 2021-09-05 14:55:57 +02:00
parent 02a9b5e470
commit 116d990cb7
Signed by: roypur
GPG Key ID: E14D26A036F21656
4 changed files with 60 additions and 26 deletions

View File

@ -5,6 +5,7 @@ COPY ["backend/start.sh", "/app/start.sh"]
COPY ["backend/sources.py", "/app/setup/sources.py"] COPY ["backend/sources.py", "/app/setup/sources.py"]
COPY ["backend/style.js", "/app/setup/style.js"] COPY ["backend/style.js", "/app/setup/style.js"]
COPY ["backend/stream.py", "/app/stream.py"] COPY ["backend/stream.py", "/app/stream.py"]
COPY ["backend/seafile.js", "/app/seafile.js"]
COPY ["backend/stream_providers.py", "/app/stream_providers.py"] COPY ["backend/stream_providers.py", "/app/stream_providers.py"]
COPY ["frontend/index.html", "/app/index.html"] COPY ["frontend/index.html", "/app/index.html"]
COPY ["frontend/favicon.svg", "/app/favicon.svg"] COPY ["frontend/favicon.svg", "/app/favicon.svg"]

51
backend/seafile.js Normal file
View File

@ -0,0 +1,51 @@
#!/usr/bin/env node
const esprima = require('esprima');
const cheerio = require('cheerio');
const axios = require('axios');
let url = null;
for(let i=0; i<process.argv.length; i++) {
const arg = process.argv[i];
if(arg.startsWith("http://") || arg.startsWith("https://")) {
url = arg;
}
}
if(url !== null) {
axios.get(url).then((resp) => {
const dom = cheerio.load(resp.data, {xmlMode: false});
const tags = dom('script').get();
const data = {};
let props = [];
for(let i=0; i<tags.length; i++) {
const children = tags[i].children;
for(let j=0; j<children.length; j++) {
let tdata = {};
let assign = true;
try {
tdata = children[j].data;
} catch(err) {
assign = false;
console.log(err);
}
if(assign) {
const script = esprima.parseScript(tdata);
for(let k=0; k<script.body.length; k++) {
const prop = script.body[k].expression.right.properties;
for(let l=0; l<prop.length; l++) {
props = [...props, ...(prop[l].value.properties)];
}
}
}
}
}
for(let i=0; i<props.length; i++) {
const prop = props[i];
if((prop.key.type === "Identifier") && (prop.value.type === "Literal") && (prop.value.value != "")) {
data[prop.key.name] = prop.value.value;
}
}
console.log(JSON.stringify(data));
})
}

View File

@ -1,3 +1,4 @@
#!/usr/bin/env sh #!/usr/bin/env sh
export NODE_PATH=/app/node/lib/node_modules
source /app/venv/bin/activate source /app/venv/bin/activate
exec /app/stream.py exec /app/stream.py

View File

@ -5,6 +5,7 @@ import asyncio
import html.parser import html.parser
import urllib.parse import urllib.parse
import expiringdict import expiringdict
import subprocess
import json import json
import re import re
@ -226,35 +227,15 @@ class YoutubeRunner(StreamProvider):
class SeafileRunner(StreamProvider): class SeafileRunner(StreamProvider):
def stream(self): def stream(self):
stream_data = self.init_stream() stream_data = self.init_stream()
data = {} json_data = None
proc = subprocess.run(["/app/seafile.js", self.upstream], capture_output=True, encoding="utf-8")
try: try:
resp = requests.get(self.upstream) json_data = json.loads(proc.stdout)
expr = re.compile(u'pageOptions.{1,4}(\{[^\u1354]+\})[^{}]+\}', re.DOTALL)
comment_expr = re.compile("[^:]\/\/.+")
quote_add_expr = re.compile("^[^:a-zA-Z]+([a-zA-Z]+):", re.MULTILINE)
func_expr = re.compile('\([^"]+"', re.DOTALL)
optional_expr = re.compile(".+\|\|.+")
text = resp.text.replace("</script>", u'\u1354').replace("'", '"')
text = re.sub(quote_add_expr, r'"\1":', text)
text = re.sub(optional_expr, "", text)
text = re.sub(comment_expr, "", text)
for func in re.findall(func_expr, text):
text = text.replace(func, '0,"')
json_data_src = []
for res in re.findall(expr, text):
try:
json_data_src.append(json.loads(res))
except Exception as e:
self.logger.info("%s <%s>", e, self.upstream)
json_data = dict()
for elem in json_data_src:
for k,v in elem.items():
if hasattr(v, "__len__") and len(v) > 0:
json_data[k] = v
stream_data["title"] = json_data.get("filePath")
stream_data["upstream"] = json_data.get("rawPath")
except Exception as e: except Exception as e:
self.logger.info("%s <%s>", e, self.upstream) self.logger.info("%s <%s>", e, self.upstream)
else:
stream_data["title"] = json_data.get("filePath")
stream_data["upstream"] = json_data.get("rawPath")
return StreamData(**stream_data) return StreamData(**stream_data)
class MetaProvider(StreamProvider): class MetaProvider(StreamProvider):