Partial Discovery support
Diff
README.md | 9 +--------
discovery.txt | 6 +++++-
pyproject.toml | 6 +++---
update.py | 12 ++++++++++++
.github/workflows/update.yml | 4 +++-
5 files changed, 22 insertions(+), 15 deletions(-)
@@ -12,14 +12,7 @@
## wip
Discovery notes. Need to check the sitemaps more thoroughly before i automate this
See https://learn.microsoft.com/_sitemaps/sitemapindex.xml
```
curl --silent https://learn.microsoft.com/_sitemaps/officeupdates_en-us_1.xml | yq -p xml -o c '.urlset.url[]|.loc' >> discovery.txt
curl --silent https://learn.microsoft.com/_sitemaps/security-updates_en-us_1.xml | yq -p xml -o c '.urlset.url[]|.loc' >> discovery.txt
```
The code currently auto-discovers new security updates and office release URLs.
## license
@@ -25,7 +25,10 @@
https://learn.microsoft.com/en-us/officeupdates/release-history-microsoft-autoupdate
https://learn.microsoft.com/en-us/officeupdates/release-notes-microsoft365-apps
https://learn.microsoft.com/en-us/officeupdates/release-notes-office-for-mac
https://learn.microsoft.com/en-us/officeupdates/release-notes-office-for-mac-archived
https://learn.microsoft.com/en-us/officeupdates/release-notes-outlook-mobile
https://learn.microsoft.com/en-us/officeupdates/release-notes-outlook-new
https://learn.microsoft.com/en-us/officeupdates/release-notes-outlook-new-gc
https://learn.microsoft.com/en-us/officeupdates/semi-annual-channel-2016
https://learn.microsoft.com/en-us/officeupdates/semi-annual-channel-2017
https://learn.microsoft.com/en-us/officeupdates/semi-annual-channel-2018
@@ -47,6 +50,7 @@
https://learn.microsoft.com/en-us/officeupdates/update-history-office-2013
https://learn.microsoft.com/en-us/officeupdates/update-history-office-2019
https://learn.microsoft.com/en-us/officeupdates/update-history-office-2021
https://learn.microsoft.com/en-us/officeupdates/update-history-office-2024
https://learn.microsoft.com/en-us/officeupdates/update-history-office-for-mac
https://learn.microsoft.com/en-us/officeupdates/update-history-office-win7
https://learn.microsoft.com/en-us/security-updates/
@@ -2064,5 +2068,5 @@
https://learn.microsoft.com/en-us/windows/release-health/status-windows-10-1809-and-windows-server-2019
https://learn.microsoft.com/en-us/windows/release-health/status-windows-10-21h2
https://learn.microsoft.com/en-us/windows/release-health/status-windows-11-22h2
https://learn.microsoft.com/en-us/windows/release-health/windows11-release-information
https://learn.microsoft.com/en-us/windows/release-health/windows-server-release-info
https://learn.microsoft.com/en-us/windows/release-health/windows11-release-information
@@ -1,11 +1,11 @@
[project]
name = "microsoft-kb-metadata"
version = "0.1.0"
version = "0.1.1"
description = "Generate Microsoft Knowledge Base metadata mappings"
readme = "README.md"
requires-python = ">=3.12"
requires-python = ">=3.13"
dependencies = [
"beautifulsoup4>=4.12.3",
"beautifulsoup4>=4.13.4",
"html5lib>=1.1",
"requests>=2.32.3",
]
@@ -11,8 +11,7 @@
def redirect_request(self, req, fp, code, msg, headers, newurl):
return None
SKIPPABLE_REGEX = r"(vulnerabilityresearchadvisories|securitybulletinsummaries|securitybulletins|securityadvisories)\/(199[89]|20[01][0-9]|202[0-4])"
DATE_REGEX = r'(?:(?P<year>\d{4})-)?(?P<month>january|february|march|april|may|june|july|august|september|october|november|december)-(?P<date>\d+)(?:-(?P<year2>\d{4})-)?'
def parse_redirect(kb_id, slug):
@@ -79,7 +78,14 @@
l = a['href']
if l.startswith('https://support.microsoft.com/kb/') or l.startswith('https://support.microsoft.com/help/'):
yield l.split('/')[4]
def skippable(url):
m = re.search(SKIPPABLE_REGEX, url)
if m:
return True
return False
if __name__ == "__main__":
kbs = []
@@ -94,6 +100,8 @@
with open('discovery.txt', 'r') as f:
for url in f.readlines():
url = url.strip()
if skippable(url):
continue
for kb_id in fetch_kb_mentions(s, url):
kbs.append(kb_id)
update_mapping(s, kbs)
@@ -16,12 +16,14 @@
with:
ref: ${{ github.head_ref }}
- name: Install uv
uses: astral-sh/setup-uv@v3
uses: astral-sh/setup-uv@v6
- name: Set up Python
run: uv python install
- name: Update data
run: |
curl --silent https://learn.microsoft.com/_sitemaps/officeupdates_en-us_1.xml | yq -p xml -o c '.urlset.url[]|.loc' >> discovery.txt
curl --silent https://learn.microsoft.com/_sitemaps/security-updates_en-us_1.xml | yq -p xml -o c '.urlset.url[]|.loc' >> discovery.txt
uv sync --all-extras --dev
uv run update.py
- uses: stefanzweifel/git-auto-commit-action@v4