From 73100956f823f54f02bcb413cffd713e958b0922 Mon Sep 17 00:00:00 2001 From: Nemo Date: Thu, 5 Jan 2023 18:09:18 +0530 Subject: [PATCH] switch to ELA list --- main.py | 132 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 79 insertions(+), 53 deletions(-) diff --git a/main.py b/main.py index b6a6a2f..abfd435 100644 --- a/main.py +++ b/main.py @@ -1,62 +1,88 @@ import urllib.request -import os import json -import xml.dom.minidom import re -from bs4 import BeautifulSoup +import datetime -def fetch_advisory(url, ela_id): - debian_regex = ( - r"(?P(?:(?:[0-9]{1,9}):)?(?:[0-9][0-9a-z\.+~-]*)(?:(?:-[0-0a-z\.+~]+))?)" - ) +TITLE_REGEX = r"\[(?P\d+ \w+ \d{4})\] (?PELA-\d+-\d+) (?P[\w\-\.]+) - (?P[\w ]+)" +CVE_REGEX = r"CVE-\d{4}-\d{4,7}" +DETAILS_REGEX = r"\[(?P\w+)\] - (?P[\w\-\.]+) (?P(?:(?:[0-9]{1,9}):)?(?:[0-9][0-9a-z\.+~-]*)(?:(?:-[0-0a-z\.+~]+))?)" + +DEBIAN_CODENAME = { + "bullseye": "11", + "buster": "10", + "stretch": "9", + "jessie": "8", + "wheezy": "7", + "squeeze": "6", +} + +def fetch_ela_list(): + url = "https://salsa.debian.org/freexian-team/extended-lts/security-tracker/-/raw/master/data/ELA/list" response = urllib.request.urlopen(url) - html = response.read() - soup = BeautifulSoup(html, "html.parser").find("main") - d = list(soup.find_all("td")) - cves = [] - date = None - if len(d) < 3: - print(f"Skipping {url}, not enough data") - return None - cves = [x.strip() for x in d[2].text.strip().split("\n")] - if len(d) >= 1: - packages = [d[0].text] - if len(d) >= 2: - versions = re.findall(debian_regex, d[1].text) - if soup.find("span"): - date = soup.find("span").text - vuln_type = None - if soup.find("p"): - vuln_type = soup.find("p").text.strip() + return response.read().decode('utf-8') - return { - "id": ela_id, - "refs": [f"https://deb.freexian.com/extended-lts/tracker/{ela_id}", url], - "title": soup.find("h1").text, - "type": vuln_type, - "date": date, - "packages": packages, - "versions": versions, - "cves": cves, - } +def parse_date(s): + # '15 Jun 2018' + return datetime.datetime.strptime(s, "%d %b %Y") + +def get_osv(): + content = fetch_ela_list() + cves = None + details = [] + data = None + for line in content.split("\n"): + line = line.strip() + m = re.match(TITLE_REGEX, line) + if m: + if cves and data and len(details)>0: + yield { + "id": data["id"], + "modified": parse_date(data["date"]).isoformat("T") + "Z", + "related": cves, + "affected": [ + { + "package": { + "ecosystem": f"Debian:{DEBIAN_CODENAME[r['codename']]}", + "name": r["package"], + "purl": f"pkg:deb/debian/{data['package']}?distro={r['codename']}?repository_url=http%3A%2F%2Fdeb.freexian.com%2Fextended-lts", + }, + "ranges": { + "type": "ECOSYSTEM", + "events": [{ + "fixed": r['version'], + }] + } + } + for r in details + ], + "database_specific": { + "type": data['type'] + }, + "references": [ + f"https://deb.freexian.com/extended-lts/tracker/{data['id']}" + ] + + [ + f"https://deb.freexian.com/extended-lts/tracker/{cve}" + for cve in cves + ], + } + details = [] + cves = None + data = m.groupdict() + m = re.findall(CVE_REGEX, line) + if len(m) > 0: + cves = re.findall(CVE_REGEX, line) + m = re.search(DETAILS_REGEX, line) + if m: + details.append(m.groupdict()) + +def __main__(): + for d in get_osv(): + fn = f"advisories/{d['id']}.json" + with open(fn, "w") as f: + print(f"writing to {fn}") + f.write(json.dumps(d, indent=4, sort_keys=True)) if __name__ == "__main__": - sitemap_url = "https://www.freexian.com/en/sitemap.xml" - contents = urllib.request.urlopen(sitemap_url) - d = xml.dom.minidom.parse(contents) - for x in d.getElementsByTagName("loc"): - url = x.childNodes[0].nodeValue - if url.startswith("https://www.freexian.com/lts/extended/updates/ela-"): - slug = url.split("/")[-2] - ela_id = re.match(r"^(ela-\d+\-\d+)", slug)[0].upper() - fn = f"advisories/{ela_id}.json" - - if not os.path.exists(fn): - data = fetch_advisory(url, ela_id) - if not data: - print(f"Failed to fetch {ela_id}") - continue - with open(fn, "w") as f: - print(f"writing to {fn}") - f.write(json.dumps(data, indent=4, sort_keys=True)) + __main__() \ No newline at end of file