use esprima for js parsing

2021-09-05 14:55:57 +02:00
parent 02a9b5e470
commit 116d990cb7
4 changed files with 60 additions and 26 deletions
--- a/1
+++ b/1
@@ -5,6 +5,7 @@ COPY ["backend/start.sh", "/app/start.sh"]
 COPY ["backend/sources.py", "/app/setup/sources.py"]
 COPY ["backend/style.js", "/app/setup/style.js"]
 COPY ["backend/stream.py", "/app/stream.py"]
+COPY ["backend/seafile.js", "/app/seafile.js"]
 COPY ["backend/stream_providers.py", "/app/stream_providers.py"]
 COPY ["frontend/index.html", "/app/index.html"]
 COPY ["frontend/favicon.svg", "/app/favicon.svg"]
--- a/backend/seafile.js
+++ b/backend/seafile.js
@@ -0,0 +1,51 @@
+#!/usr/bin/env node
+const esprima = require('esprima');
+const cheerio = require('cheerio');
+const axios = require('axios');
+
+let url = null;
+
+for(let i=0; i<process.argv.length; i++) {
+    const arg = process.argv[i];
+    if(arg.startsWith("http://") || arg.startsWith("https://")) {
+        url = arg;
+    }
+}
+
+if(url !== null) {
+    axios.get(url).then((resp) => {
+        const dom = cheerio.load(resp.data, {xmlMode: false});
+        const tags = dom('script').get();
+        const data = {};
+        let props = [];
+        for(let i=0; i<tags.length; i++) {
+            const children = tags[i].children;
+            for(let j=0; j<children.length; j++) {
+                let tdata = {};
+                let assign = true;
+                try {
+                    tdata = children[j].data;
+                } catch(err) {
+                    assign = false;
+                    console.log(err);
+                }
+                if(assign) {
+                    const script = esprima.parseScript(tdata);
+                    for(let k=0; k<script.body.length; k++) {
+                        const prop = script.body[k].expression.right.properties;
+                        for(let l=0; l<prop.length; l++) {
+                            props = [...props, ...(prop[l].value.properties)];
+                        }
+                    }
+                }
+            }
+        }
+        for(let i=0; i<props.length; i++) {
+            const prop = props[i];
+            if((prop.key.type === "Identifier") && (prop.value.type === "Literal") && (prop.value.value != "")) {
+                data[prop.key.name] = prop.value.value;
+            }
+        }
+        console.log(JSON.stringify(data));
+    })
+}
--- a/backend/start.sh
+++ b/backend/start.sh
@@ -1,3 +1,4 @@
 #!/usr/bin/env sh
+export NODE_PATH=/app/node/lib/node_modules
 source /app/venv/bin/activate
 exec /app/stream.py
--- a/backend/stream_providers.py
+++ b/backend/stream_providers.py
@@ -5,6 +5,7 @@ import asyncio
 import html.parser
 import urllib.parse
 import expiringdict
+import subprocess
 import json
 import re

@@ -226,35 +227,15 @@ class YoutubeRunner(StreamProvider):
 class SeafileRunner(StreamProvider):
    def stream(self):
        stream_data = self.init_stream()
-        data = {}
+        json_data = None
+        proc = subprocess.run(["/app/seafile.js", self.upstream], capture_output=True, encoding="utf-8")
        try:
-            resp = requests.get(self.upstream)
-            expr = re.compile(u'pageOptions.{1,4}(\{[^\u1354]+\})[^{}]+\}', re.DOTALL)
-            comment_expr = re.compile("[^:]\/\/.+")
-            quote_add_expr = re.compile("^[^:a-zA-Z]+([a-zA-Z]+):", re.MULTILINE)
-            func_expr = re.compile('\([^"]+"', re.DOTALL)
-            optional_expr = re.compile(".+\|\|.+")
-            text = resp.text.replace("</script>", u'\u1354').replace("'", '"')
-            text = re.sub(quote_add_expr, r'"\1":', text)
-            text = re.sub(optional_expr, "", text)
-            text = re.sub(comment_expr, "", text)
-            for func in re.findall(func_expr, text):
-                text = text.replace(func, '0,"')
-            json_data_src = []
-            for res in re.findall(expr, text):
-                try:
-                    json_data_src.append(json.loads(res))
-                except Exception as e:
-                    self.logger.info("%s <%s>", e, self.upstream)
-            json_data = dict()
-            for elem in json_data_src:
-                for k,v in elem.items():
-                    if hasattr(v, "__len__") and len(v) > 0:
-                        json_data[k] = v
-            stream_data["title"] = json_data.get("filePath")
-            stream_data["upstream"] = json_data.get("rawPath")
+            json_data = json.loads(proc.stdout)
        except Exception as e:
            self.logger.info("%s <%s>", e, self.upstream)
+        else:
+            stream_data["title"] = json_data.get("filePath")
+            stream_data["upstream"] = json_data.get("rawPath") 
        return StreamData(**stream_data)

 class MetaProvider(StreamProvider):