🏡 index : github.com/captn3m0/microsoft-kb-metadata.git

author Nemo <commits@captnemo.in> 2025-04-30 12:15:02.0 +05:30:00
committer Nemo <commits@captnemo.in> 2025-04-30 12:15:42.0 +05:30:00
commit
0dd24c092a56cc5db3e1494ca1b288d630f422eb [patch]
tree
896bb1e92b96627967132d32b9356538ff9f4791
parent
cf029a9d2dd5235d71279c5d7c033746aecacd53
download
0dd24c092a56cc5db3e1494ca1b288d630f422eb.tar.gz

Partial Discovery support



Diff

 README.md                    |  9 +--------
 discovery.txt                |  6 +++++-
 pyproject.toml               |  6 +++---
 update.py                    | 12 ++++++++++++
 .github/workflows/update.yml |  4 +++-
 5 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index bb49021..046a585 100644
--- a/README.md
+++ a/README.md
@@ -12,14 +12,7 @@

## wip

Discovery notes. Need to check the sitemaps more thoroughly before i automate this

See https://learn.microsoft.com/_sitemaps/sitemapindex.xml

```

curl --silent https://learn.microsoft.com/_sitemaps/officeupdates_en-us_1.xml | yq -p xml -o c '.urlset.url[]|.loc' >> discovery.txt
curl --silent https://learn.microsoft.com/_sitemaps/security-updates_en-us_1.xml | yq -p xml -o c '.urlset.url[]|.loc' >> discovery.txt
```

The code currently auto-discovers new security updates and office release URLs.

## license

diff --git a/discovery.txt b/discovery.txt
index 7b2611d..de8b0e9 100644
--- a/discovery.txt
+++ a/discovery.txt
@@ -25,7 +25,10 @@
https://learn.microsoft.com/en-us/officeupdates/release-history-microsoft-autoupdate
https://learn.microsoft.com/en-us/officeupdates/release-notes-microsoft365-apps
https://learn.microsoft.com/en-us/officeupdates/release-notes-office-for-mac
https://learn.microsoft.com/en-us/officeupdates/release-notes-office-for-mac-archived
https://learn.microsoft.com/en-us/officeupdates/release-notes-outlook-mobile
https://learn.microsoft.com/en-us/officeupdates/release-notes-outlook-new
https://learn.microsoft.com/en-us/officeupdates/release-notes-outlook-new-gc
https://learn.microsoft.com/en-us/officeupdates/semi-annual-channel-2016
https://learn.microsoft.com/en-us/officeupdates/semi-annual-channel-2017
https://learn.microsoft.com/en-us/officeupdates/semi-annual-channel-2018
@@ -47,6 +50,7 @@
https://learn.microsoft.com/en-us/officeupdates/update-history-office-2013
https://learn.microsoft.com/en-us/officeupdates/update-history-office-2019
https://learn.microsoft.com/en-us/officeupdates/update-history-office-2021
https://learn.microsoft.com/en-us/officeupdates/update-history-office-2024
https://learn.microsoft.com/en-us/officeupdates/update-history-office-for-mac
https://learn.microsoft.com/en-us/officeupdates/update-history-office-win7
https://learn.microsoft.com/en-us/security-updates/
@@ -2064,5 +2068,5 @@
https://learn.microsoft.com/en-us/windows/release-health/status-windows-10-1809-and-windows-server-2019
https://learn.microsoft.com/en-us/windows/release-health/status-windows-10-21h2
https://learn.microsoft.com/en-us/windows/release-health/status-windows-11-22h2
https://learn.microsoft.com/en-us/windows/release-health/windows11-release-information
https://learn.microsoft.com/en-us/windows/release-health/windows-server-release-info
https://learn.microsoft.com/en-us/windows/release-health/windows11-release-information
diff --git a/pyproject.toml b/pyproject.toml
index 64309b0..f160c74 100644
--- a/pyproject.toml
+++ a/pyproject.toml
@@ -1,11 +1,11 @@
[project]
name = "microsoft-kb-metadata"
version = "0.1.0"
version = "0.1.1"
description = "Generate Microsoft Knowledge Base metadata mappings"
readme = "README.md"
requires-python = ">=3.12"
requires-python = ">=3.13"
dependencies = [
    "beautifulsoup4>=4.12.3",
    "beautifulsoup4>=4.13.4",
    "html5lib>=1.1",
    "requests>=2.32.3",
]
diff --git a/update.py b/update.py
index b3b6e5d..31ef50c 100644
--- a/update.py
+++ a/update.py
@@ -11,8 +11,7 @@
    def redirect_request(self, req, fp, code, msg, headers, newurl):
        return None



SKIPPABLE_REGEX = r"(vulnerabilityresearchadvisories|securitybulletinsummaries|securitybulletins|securityadvisories)\/(199[89]|20[01][0-9]|202[0-4])"
DATE_REGEX = r'(?:(?P<year>\d{4})-)?(?P<month>january|february|march|april|may|june|july|august|september|october|november|december)-(?P<date>\d+)(?:-(?P<year2>\d{4})-)?'

def parse_redirect(kb_id, slug):
@@ -79,7 +78,14 @@
            l = a['href']
            if l.startswith('https://support.microsoft.com/kb/') or l.startswith('https://support.microsoft.com/help/'):
                yield l.split('/')[4]



def skippable(url):
    m = re.search(SKIPPABLE_REGEX, url)
    if m:
        return True
    return False

if __name__ == "__main__":
    kbs = []
@@ -94,6 +100,8 @@
    with open('discovery.txt', 'r') as f:
        for url in f.readlines():
            url = url.strip()
            if skippable(url):
                continue
            for kb_id in fetch_kb_mentions(s, url):
                kbs.append(kb_id)
    update_mapping(s, kbs)
diff --git a/.github/workflows/update.yml b/.github/workflows/update.yml
index d0afe28..57a246d 100644
--- a/.github/workflows/update.yml
+++ a/.github/workflows/update.yml
@@ -16,12 +16,14 @@
      with:
        ref: ${{ github.head_ref }}
    - name: Install uv
      uses: astral-sh/setup-uv@v3
      uses: astral-sh/setup-uv@v6

    - name: Set up Python
      run: uv python install
    - name: Update data
      run: |

        curl --silent https://learn.microsoft.com/_sitemaps/officeupdates_en-us_1.xml | yq -p xml -o c '.urlset.url[]|.loc' >> discovery.txt
        curl --silent https://learn.microsoft.com/_sitemaps/security-updates_en-us_1.xml | yq -p xml -o c '.urlset.url[]|.loc' >> discovery.txt
        uv sync --all-extras --dev
        uv run update.py
    - uses: stefanzweifel/git-auto-commit-action@v4