Skip to content

Commit a16de94

Browse files
committed
Store version in scrapper
1 parent 2f316c9 commit a16de94

File tree

1 file changed

+31
-0
lines changed

1 file changed

+31
-0
lines changed

midl-to-scapy/idl/idl_scraper.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import requests
1818
import sys
1919

20+
import xml.etree.ElementTree as ET
2021
from bs4 import BeautifulSoup
2122
from tqdm import tqdm
2223

@@ -26,6 +27,7 @@
2627
"folder": "win",
2728
"root": "https://docs.microsoft.com/en-us/openspecs/windows_protocols",
2829
"list": "https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-winprotlp/e36c976a-6263-42a8-b119-7a3cc41ddd2a",
30+
"rss": "https://winprotocoldocs-bhdugrdyduf5h2e4.b02.azurefd.net/",
2931
"extras": [
3032
# Protocols not listed in the "list"
3133
"ms-dltm",
@@ -39,6 +41,7 @@
3941
"folder": "win",
4042
"root": "https://learn.microsoft.com/en-us/openspecs/exchange_server_protocols",
4143
"list": "https://learn.microsoft.com/en-us/openspecs/exchange_server_protocols/ms-oxprotlp/229f77ea-6518-4fe7-84fe-bd535fc6c32e",
44+
"rss": "https://officeprotocoldoc.z19.web.core.windows.net/files/",
4245
"extras": [
4346
# Protocols not listed in the "list"
4447
"ms-oxcrpc",
@@ -49,12 +52,27 @@
4952
"folder": "win",
5053
"root": "https://learn.microsoft.com/en-us/openspecs/sharepoint_protocols",
5154
"list": "https://learn.microsoft.com/en-us/openspecs/sharepoint_protocols/MS-SPPROTLP/51f9ccbf-ea59-4bb5-9fe6-27bc5af855ff",
55+
"rss": "https://officeprotocoldoc.z19.web.core.windows.net/files/",
5256
},
5357
]
5458

5559
DEFAULT = "DEFAULT"
5660

5761

62+
def get_version(VERSION_URL, protocol):
63+
"""
64+
Get the version + date of the current online protocol.
65+
"""
66+
rss = requests.get(
67+
VERSION_URL + protocol.upper() + "/[" + protocol.upper() + "].rss"
68+
).content
69+
root = ET.fromstring(rss)
70+
item = root.find("channel").find("item")
71+
version = re.search(r"\(Version ([0-9.]+)\)", item.find("title").text).group(1)
72+
pubdate = item.find("pubDate").text
73+
return version, pubdate
74+
75+
5876
def get_protocol_list(TECHNICAL_DOCS_URL):
5977
"""
6078
Fetch the list of protocol names from Microsoft's technical documents page.
@@ -171,6 +189,9 @@ def download_protocol_idls(protocol_name, entry, output):
171189
"""
172190
num_files_saved = 0
173191

192+
# 0. Get IDL version
193+
version, pubdate = get_version(entry["rss"], protocol_name)
194+
174195
# 1. Get potential IDL URLs
175196
idl_urls = get_idl_urls(protocol_name, entry["root"])
176197
if not idl_urls:
@@ -198,6 +219,16 @@ def download_protocol_idls(protocol_name, entry, output):
198219
# Write it to disk
199220
with open(output / file_name, "w") as f:
200221
try:
222+
# Write header
223+
f.write(
224+
"// [%s] v%s (%s)\n"
225+
% (
226+
protocol_name,
227+
version,
228+
pubdate,
229+
)
230+
)
231+
# Write content
201232
f.write(idl_file)
202233
num_files_saved += 1
203234
except (TypeError, AttributeError) as e:

0 commit comments

Comments
 (0)