photon-os-advisories/update.py

import sys
import markdown
import json
import os
import re
from bs4 import BeautifulSoup

# This is a WIP unused script to
# write data back to the GSD database
advisories_dir = sys.argv[1]
gsd_dir = sys.argv[2]

CVE_REGEX = r"CVE-\d{4}-\d{4,7}"
FILE_FORMAT = "/Security-Updates-{version}.md"
ADVISORY_URL = "https://github.com/vmware/photon/wiki/Security-Update-{slug}"
PHOTON_VERSIONS = range(1, 5)

def advisory_slug(os_version, advisory):
    _id = int(advisory.split("-")[2])
    return f"{os_version}.0-{_id}"

def generate_cve_mapping():
    mapping = {}
    for version in PHOTON_VERSIONS:
        filename = FILE_FORMAT.format(version=version)
        file = advisories_dir + filename
        with open(file, "r") as f:
            table_html = markdown.markdown(
                f.read(), extensions=["markdown.extensions.tables"]
            )
            soup = BeautifulSoup(table_html, "html.parser")
            for tr in soup.find("tbody").find_all("tr"):
                (advisory, severity, date, packages, cves) = [
                    x.text for x in tr.find_all("td")
                ]
                cves = re.findall(CVE_REGEX, cves)
                for cve in cves:
                    slug = advisory_slug(version, advisory)
                    if cve in mapping:
                        mapping[cve].append(slug)
                    else:
                        mapping[cve] = [slug]

    return mapping


def __main__():
    mapping = generate_cve_mapping()
    for cve in mapping:
        (_, year, _id) = cve.split("-")
        grouping_id = _id[:-3] + "xxx"
        gsd = f"GSD-{year}-{_id}"
        path = f"{gsd_dir}/{year}/{grouping_id}/{gsd}.json"
        if os.path.exists(path):
            updated = False
            data = None
            with open(path, "r") as f:
                data = json.loads(f.read())
                slugs = mapping[cve]
                urls = [ADVISORY_URL.format(slug=slug) for slug in slugs]
                if 'gsd' in data:
                    existing_links = [x['url'] for x in data['gsd']['references']]
                    missing_links = existing_links - urls
                    if len(missing_links) > 0:
                        for url in urls:
                            data['gsd']['references'].append({
                                "type": "ADVISORY",
                                "url": url
                            })
                elif 'GSD' in data and 'references' in data['GSD']:
                    data['GSD']['references'].extend(urls)
                elif 'GSD' in data:
                    data['GSD']['references'] = urls
                else:
                    try:
                        description = data['namespaces']['cve.org']['description']['description_data'][0]['value']
                    except KeyError:
                        description = data['namespaces']['nvd.nist.gov']['cve']['description']['description_data'][0]['value']
                    data['GSD'] = {
                        "alias": cve,
                        "description": description,
                        "id": gsd,
                        "references": urls
                    }
            with open(path, 'w') as f:
                f.write(json.dumps(data, indent=4))

        else:
            print(f"Could not find {cve}")


if __name__ == "__main__":
    __main__()