switch to ELA list

2023-01-05 18:09:18 +05:30 · 2023-01-05 18:09:18 +05:30 · 73100956f8
parent 28bd5d210e
commit 73100956f8
1 changed files with 79 additions and 53 deletions
--- a/main.py
+++ b/main.py
@ -1,62 +1,88 @@
 import urllib.request
-import os
 import json
-import xml.dom.minidom
 import re
-from bs4 import BeautifulSoup
+import datetime


-def fetch_advisory(url, ela_id):
-    debian_regex = (
-        r"(?P<v>(?:(?:[0-9]{1,9}):)?(?:[0-9][0-9a-z\.+~-]*)(?:(?:-[0-0a-z\.+~]+))?)"
-    )
+TITLE_REGEX = r"\[(?P<date>\d+ \w+ \d{4})\] (?P<id>ELA-\d+-\d+) (?P<package>[\w\-\.]+) - (?P<type>[\w ]+)"
+CVE_REGEX = r"CVE-\d{4}-\d{4,7}"
+DETAILS_REGEX = r"\[(?P<codename>\w+)\] - (?P<package>[\w\-\.]+) (?P<version>(?:(?:[0-9]{1,9}):)?(?:[0-9][0-9a-z\.+~-]*)(?:(?:-[0-0a-z\.+~]+))?)"
+
+DEBIAN_CODENAME = {
+    "bullseye": "11",
+    "buster": "10",
+    "stretch": "9",
+    "jessie": "8",
+    "wheezy": "7",
+    "squeeze": "6",
+}
+
+def fetch_ela_list():
+    url = "https://salsa.debian.org/freexian-team/extended-lts/security-tracker/-/raw/master/data/ELA/list"
    response = urllib.request.urlopen(url)
-    html = response.read()
-    soup = BeautifulSoup(html, "html.parser").find("main")
-    d = list(soup.find_all("td"))
-    cves = []
-    date = None
-    if len(d) < 3:
-        print(f"Skipping {url}, not enough data")
-        return None
-    cves = [x.strip() for x in d[2].text.strip().split("\n")]
-    if len(d) >= 1:
-        packages = [d[0].text]
-    if len(d) >= 2:
-        versions = re.findall(debian_regex, d[1].text)
-    if soup.find("span"):
-        date = soup.find("span").text
-    vuln_type = None
-    if soup.find("p"):
-        vuln_type = soup.find("p").text.strip()
+    return response.read().decode('utf-8')

-    return {
-        "id": ela_id,
-        "refs": [f"https://deb.freexian.com/extended-lts/tracker/{ela_id}", url],
-        "title": soup.find("h1").text,
-        "type": vuln_type,
-        "date": date,
-        "packages": packages,
-        "versions": versions,
-        "cves": cves,
-    }
+def parse_date(s):
+    # '15 Jun 2018'
+    return datetime.datetime.strptime(s, "%d %b %Y")
+
+def get_osv():
+    content = fetch_ela_list()
+    cves = None
+    details = []
+    data = None
+    for line in content.split("\n"):
+        line = line.strip()
+        m = re.match(TITLE_REGEX, line)
+        if m:
+            if cves and data and len(details)>0:
+                yield {
+                    "id": data["id"],
+                    "modified": parse_date(data["date"]).isoformat("T") + "Z",
+                    "related": cves,
+                    "affected": [
+                        {
+                            "package": {
+                                "ecosystem": f"Debian:{DEBIAN_CODENAME[r['codename']]}",
+                                "name": r["package"],
+                                "purl": f"pkg:deb/debian/{data['package']}?distro={r['codename']}?repository_url=http%3A%2F%2Fdeb.freexian.com%2Fextended-lts",
+                            },
+                            "ranges": {
+                                "type": "ECOSYSTEM",
+                                "events": [{
+                                    "fixed": r['version'],
+                                }]
+                            }
+                        }
+                        for r in details
+                    ],
+                    "database_specific": {
+                        "type": data['type']
+                    },
+                    "references": [
+                        f"https://deb.freexian.com/extended-lts/tracker/{data['id']}"
+                    ]
+                    + [
+                        f"https://deb.freexian.com/extended-lts/tracker/{cve}"
+                        for cve in cves
+                    ],
+                }
+                details = []
+                cves = None
+            data = m.groupdict()
+        m = re.findall(CVE_REGEX, line)
+        if len(m) > 0:
+            cves = re.findall(CVE_REGEX, line)
+        m = re.search(DETAILS_REGEX, line)
+        if m:
+            details.append(m.groupdict())
+
+def __main__():
+    for d in get_osv():
+        fn = f"advisories/{d['id']}.json"
+        with open(fn, "w") as f:
+            print(f"writing to {fn}")
+            f.write(json.dumps(d, indent=4, sort_keys=True))

 if __name__ == "__main__":
-    sitemap_url = "https://www.freexian.com/en/sitemap.xml"
-    contents = urllib.request.urlopen(sitemap_url)
-    d = xml.dom.minidom.parse(contents)
-    for x in d.getElementsByTagName("loc"):
-        url = x.childNodes[0].nodeValue
-        if url.startswith("https://www.freexian.com/lts/extended/updates/ela-"):
-            slug = url.split("/")[-2]
-            ela_id = re.match(r"^(ela-\d+\-\d+)", slug)[0].upper()
-            fn = f"advisories/{ela_id}.json"
-
-            if not os.path.exists(fn):
-                data = fetch_advisory(url, ela_id)
-                if not data:
-                    print(f"Failed to fetch {ela_id}")
-                    continue
-                with open(fn, "w") as f:
-                    print(f"writing to {fn}")
-                    f.write(json.dumps(data, indent=4, sort_keys=True))
+    __main__()