switch to ELA list

This commit is contained in:
Nemo 2023-01-05 18:09:18 +05:30
parent 28bd5d210e
commit 73100956f8
1 changed files with 79 additions and 53 deletions

132
main.py
View File

@ -1,62 +1,88 @@
import urllib.request
import os
import json
import xml.dom.minidom
import re
from bs4 import BeautifulSoup
import datetime
def fetch_advisory(url, ela_id):
debian_regex = (
r"(?P<v>(?:(?:[0-9]{1,9}):)?(?:[0-9][0-9a-z\.+~-]*)(?:(?:-[0-0a-z\.+~]+))?)"
)
TITLE_REGEX = r"\[(?P<date>\d+ \w+ \d{4})\] (?P<id>ELA-\d+-\d+) (?P<package>[\w\-\.]+) - (?P<type>[\w ]+)"
CVE_REGEX = r"CVE-\d{4}-\d{4,7}"
DETAILS_REGEX = r"\[(?P<codename>\w+)\] - (?P<package>[\w\-\.]+) (?P<version>(?:(?:[0-9]{1,9}):)?(?:[0-9][0-9a-z\.+~-]*)(?:(?:-[0-0a-z\.+~]+))?)"
DEBIAN_CODENAME = {
"bullseye": "11",
"buster": "10",
"stretch": "9",
"jessie": "8",
"wheezy": "7",
"squeeze": "6",
}
def fetch_ela_list():
url = "https://salsa.debian.org/freexian-team/extended-lts/security-tracker/-/raw/master/data/ELA/list"
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser").find("main")
d = list(soup.find_all("td"))
cves = []
date = None
if len(d) < 3:
print(f"Skipping {url}, not enough data")
return None
cves = [x.strip() for x in d[2].text.strip().split("\n")]
if len(d) >= 1:
packages = [d[0].text]
if len(d) >= 2:
versions = re.findall(debian_regex, d[1].text)
if soup.find("span"):
date = soup.find("span").text
vuln_type = None
if soup.find("p"):
vuln_type = soup.find("p").text.strip()
return response.read().decode('utf-8')
return {
"id": ela_id,
"refs": [f"https://deb.freexian.com/extended-lts/tracker/{ela_id}", url],
"title": soup.find("h1").text,
"type": vuln_type,
"date": date,
"packages": packages,
"versions": versions,
"cves": cves,
}
def parse_date(s):
# '15 Jun 2018'
return datetime.datetime.strptime(s, "%d %b %Y")
def get_osv():
content = fetch_ela_list()
cves = None
details = []
data = None
for line in content.split("\n"):
line = line.strip()
m = re.match(TITLE_REGEX, line)
if m:
if cves and data and len(details)>0:
yield {
"id": data["id"],
"modified": parse_date(data["date"]).isoformat("T") + "Z",
"related": cves,
"affected": [
{
"package": {
"ecosystem": f"Debian:{DEBIAN_CODENAME[r['codename']]}",
"name": r["package"],
"purl": f"pkg:deb/debian/{data['package']}?distro={r['codename']}?repository_url=http%3A%2F%2Fdeb.freexian.com%2Fextended-lts",
},
"ranges": {
"type": "ECOSYSTEM",
"events": [{
"fixed": r['version'],
}]
}
}
for r in details
],
"database_specific": {
"type": data['type']
},
"references": [
f"https://deb.freexian.com/extended-lts/tracker/{data['id']}"
]
+ [
f"https://deb.freexian.com/extended-lts/tracker/{cve}"
for cve in cves
],
}
details = []
cves = None
data = m.groupdict()
m = re.findall(CVE_REGEX, line)
if len(m) > 0:
cves = re.findall(CVE_REGEX, line)
m = re.search(DETAILS_REGEX, line)
if m:
details.append(m.groupdict())
def __main__():
for d in get_osv():
fn = f"advisories/{d['id']}.json"
with open(fn, "w") as f:
print(f"writing to {fn}")
f.write(json.dumps(d, indent=4, sort_keys=True))
if __name__ == "__main__":
sitemap_url = "https://www.freexian.com/en/sitemap.xml"
contents = urllib.request.urlopen(sitemap_url)
d = xml.dom.minidom.parse(contents)
for x in d.getElementsByTagName("loc"):
url = x.childNodes[0].nodeValue
if url.startswith("https://www.freexian.com/lts/extended/updates/ela-"):
slug = url.split("/")[-2]
ela_id = re.match(r"^(ela-\d+\-\d+)", slug)[0].upper()
fn = f"advisories/{ela_id}.json"
if not os.path.exists(fn):
data = fetch_advisory(url, ela_id)
if not data:
print(f"Failed to fetch {ela_id}")
continue
with open(fn, "w") as f:
print(f"writing to {fn}")
f.write(json.dumps(data, indent=4, sort_keys=True))
__main__()