photon-os-advisories/generate.py

202 lines
6.9 KiB
Python
Raw Normal View History

2023-01-05 07:17:56 +00:00
import sys
from glob import glob
2023-01-05 07:17:56 +00:00
import subprocess
import markdown
import json
import urllib.request
from datetime import datetime
2023-03-16 08:12:24 +00:00
import copy
2023-01-05 07:17:56 +00:00
import os
import re
from bs4 import BeautifulSoup
CVE_REGEX = r"CVE-\d{4}-\d{4,7}"
FILE_FORMAT = "/Security-Updates-{version}.md"
ADVISORY_URL = "https://github.com/vmware/photon/wiki/Security-Update-{slug}"
PHOTON_VERSIONS = range(1, 5)
ADVISORIES_DIR = "photon-wiki"
2023-01-05 07:17:56 +00:00
def last_modified_date(file):
p = int(
subprocess.check_output(
["git", "log", "--date=iso-strict", "-1", "--format=%ct", "--", file],
cwd=ADVISORIES_DIR,
)
.decode("utf-8")
.strip()
)
return datetime.utcfromtimestamp(p)
2023-01-05 07:17:56 +00:00
2023-01-05 07:17:56 +00:00
def created_date(file):
with open(ADVISORIES_DIR + "/" + file) as f:
for line in f:
if line.startswith("Issue"):
return datetime.strptime(line.split(": ")[1].strip(), "%Y-%m-%d")
2023-01-05 07:17:56 +00:00
2023-01-05 07:17:56 +00:00
def advisory_slug(os_version, advisory):
_id = int(float(advisory.split("-")[2]))
2023-01-05 07:17:56 +00:00
return f"{os_version}.0-{_id}"
def get_osv(cve_data_all_versions):
for os_version in PHOTON_VERSIONS:
filename = FILE_FORMAT.format(version=os_version)
2023-01-05 07:17:56 +00:00
file = ADVISORIES_DIR + filename
2023-03-16 09:44:54 +00:00
print(f"Parsing {filename}")
# Returns the version that fixed any of the given CVEs + OS + Package combination
# there should only be one
2023-03-16 09:44:54 +00:00
def cve_fixed_version(package, cves, os_version, advisory):
# list of fixed versions with a matching
# CVE/pkg/OS combination
fixed_versions = set([
x["res_ver"]
for cve in cves
for x in cve_data_all_versions[cve]
if (x["os"] == os_version and x["pkg"] == package)
])
# There should only be a single such reference
#
if len(fixed_versions) != 1:
2023-03-16 09:44:54 +00:00
f = ", ".join(list(fixed_versions))
print(f"[{advisory}] Invalid Versions: {package} ({f})")
return None
return fixed_versions.pop()
2023-01-05 07:17:56 +00:00
with open(file, "r") as f:
table_html = markdown.markdown(
f.read(), extensions=["markdown.extensions.tables"]
)
soup = BeautifulSoup(table_html, "html.parser")
for tr in soup.find("tbody").find_all("tr"):
(advisory, severity, published_date, packages, cves) = [
x.text for x in tr.find_all("td")
]
packages = json.loads(packages.replace("'", '"'))
cves = re.findall(CVE_REGEX, cves)
slug = advisory_slug(os_version, advisory)
2023-01-05 07:17:56 +00:00
advisory_file = f"Security-Update-{slug}.md"
modified = last_modified_date(advisory_file)
published = created_date(advisory_file)
def affected(pkg, cves, os_version):
r = {
"package": {
"ecosystem": f"photon:{os_version}.0",
"name": pkg,
"purl": f"pkg:rpm/vmware/{pkg}?distro=photon-{os_version}",
}
}
2023-03-16 09:44:54 +00:00
fixed_version = cve_fixed_version(pkg, cves, os_version, advisory)
if fixed_version:
r["ranges"] = {
"events": [
{"introduced": "0"},
{"fixed": fixed_version},
],
"type": "ECOSYSTEM"
}
return r
2023-01-05 07:17:56 +00:00
yield {
"id": advisory,
"modified": modified.isoformat("T") + "Z",
"published": published.isoformat("T") + "Z",
"related": cves,
"affected": [
affected(pkg, cves, os_version)
for pkg in packages
],
2023-01-05 07:17:56 +00:00
"references": [
{"type": "ADVISORY", "url": ADVISORY_URL.format(slug=slug)}
],
2023-01-05 07:17:56 +00:00
}
def merge_advisories(advisory_file, data):
# read the current advisory data as json
with open(advisory_file, "r") as f:
2023-03-16 08:12:24 +00:00
original = json.load(f)
current = copy.deepcopy(original)
# merge the data
assert current["id"] == data["id"]
current["affected"].extend(data["affected"])
current["references"].extend(data["references"])
current["related"].extend(data["related"])
# Make sure no CVE references are duplicated
current["related"] = list(set(current["related"])).sort()
# Pick the earlier published date
# and the later modified date
current["published"] = (
min(
datetime.strptime(current["published"], "%Y-%m-%dT%H:%M:%SZ"),
datetime.strptime(data["published"], "%Y-%m-%dT%H:%M:%SZ"),
).isoformat("T")
+ "Z"
)
current["modified"] = (
max(
datetime.strptime(current["modified"], "%Y-%m-%dT%H:%M:%SZ"),
datetime.strptime(data["modified"], "%Y-%m-%dT%H:%M:%SZ"),
).isoformat("T")
+ "Z"
)
2023-03-16 08:12:24 +00:00
no_important_changes = True
# One of the important keys has changed
for key in ["id", "affected", "references", "related", "published"]:
2023-03-16 08:12:24 +00:00
if current[key] != original[key]:
no_important_changes = False
if no_important_changes:
return None
return current
2023-01-05 07:17:56 +00:00
2023-03-17 05:45:12 +00:00
def fetch_cve_metadata(PHOTON_VERSIONS):
cve_metadata = {}
for branch in PHOTON_VERSIONS:
url = f"https://packages.vmware.com/photon/photon_cve_metadata/cve_data_photon{branch}.0.json"
with urllib.request.urlopen(url) as r:
data = json.loads(r.read().decode())
for row in data:
row["os"] = branch
cve = row.pop("cve_id")
if (
row["aff_ver"]
== f"all versions before {row['res_ver']} are vulnerable"
):
del row["aff_ver"]
else:
print(row)
raise Exception("Unimplemented affected version range")
2023-03-17 05:45:12 +00:00
if cve in cve_metadata:
cve_metadata[cve].append(row)
else:
2023-03-17 05:45:12 +00:00
cve_metadata[cve] = [row]
print(f"[+] CVE metadata for Photon OS {branch}.0: Added {len(data)} CVEs")
return cve_metadata
def __main__():
cve_metadata = fetch_cve_metadata(PHOTON_VERSIONS)
for advisory in glob("advisories/*.json"):
os.remove(advisory)
2023-03-17 05:45:12 +00:00
for d in get_osv(cve_metadata):
2023-01-05 07:17:56 +00:00
fn = f"advisories/{d['id']}.json"
if os.path.exists(fn):
d = merge_advisories(fn, d)
2023-03-16 08:12:24 +00:00
if d:
with open(fn, "w") as f:
f.write(json.dumps(d, indent=4, sort_keys=True))
2023-01-05 07:17:56 +00:00
2023-01-05 07:17:56 +00:00
if __name__ == "__main__":
__main__()