Skip to content

Commit 0677c20

Browse files
authored
Merge pull request #82 from m-rutherford/main
Added support for Azure Dev Ops and Azure Dev Ops Server
2 parents 64a82f6 + af46697 commit 0677c20

File tree

5 files changed

+361
-2
lines changed

5 files changed

+361
-2
lines changed

README.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,14 +118,25 @@ The basic structure is:
118118
```jsonc
119119
"TFS": [
120120
{
121-
"url": "https://tfs.internal", // Base URL for a Team Foundation Server (TFS) or Visual Studio Team Services (VSTS) or Azure DevOps instance
121+
"url": "https://tfs.internal", // Base URL for a Team Foundation Server (TFS) or Visual Studio Team Services (VSTS)
122122
"token": null, // Private token for accessing this TFS instance
123123

124124
"exclude": [ ... ] // List of projects / repositories to exclude from inventory
125125
}
126126
]
127127
```
128128

129+
```jsonc
130+
"AzureDevOps": [
131+
{
132+
"url": "https://dev.azure.com", // Base URL for an Azure Dev Ops Server or Azure Dev Ops Cloud instance
133+
"token": null, // Personal Access Token for accessing this ADO instance
134+
"apiVersion": "", // API Version
135+
"exclude": [ ... ] // List of projects to exclude from inventory
136+
}
137+
]
138+
```
139+
129140
## License
130141

131142
Scraper is released under an MIT license. For more details see the

scraper/azuredevops/__init__.py

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
import base64
2+
import logging
3+
import os
4+
import re
5+
from typing import List
6+
7+
import requests
8+
9+
from scraper.azuredevops.models import AzureDevOpsCollection, AzureDevOpsProject
10+
11+
logger = logging.getLogger(__name__)
12+
13+
14+
class AzureDevOpsClient:
15+
def __init__(self, baseurl, api_version, token=None):
16+
self.baseurl = baseurl
17+
self.api_version = api_version
18+
self.is_cloud_ado = "dev.azure.com" in baseurl
19+
self.session = self._create_client_session(token)
20+
21+
def get_projects_metadata(self) -> List[AzureDevOpsProject]:
22+
"""
23+
Get metadata for all projects
24+
"""
25+
collections = self._get_all_collections()
26+
return self._get_all_projects(collections)
27+
28+
def _create_client_session(self, token):
29+
"""
30+
Creates the Azure DevOps Client Context with the provided token.
31+
If no token is provided, it will look for the ADO_API_TOKEN environment variable.
32+
"""
33+
if token is None:
34+
token = os.environ.get("ADO_API_TOKEN", None)
35+
36+
if token is None:
37+
raise RuntimeError("Azure Dev Ops Token was not provided.")
38+
39+
session = requests.Session()
40+
auth_string = f":{token}"
41+
encoded_auth = base64.b64encode(auth_string.encode("ascii")).decode("ascii")
42+
session.headers.update(
43+
{"Authorization": f"Basic {encoded_auth}", "Accept": "application/json"}
44+
)
45+
return session
46+
47+
def _get_all_collections(self) -> List[AzureDevOpsCollection]:
48+
"""
49+
Get all collections from the Azure DevOps API.
50+
"""
51+
collections = []
52+
53+
if self.is_cloud_ado:
54+
# For cloud Azure DevOps, get all organizations from the API
55+
profile_url = f"https://app.vssps.visualstudio.com/_apis/profile/profiles/me?api-version={self.api_version}"
56+
profile_response = self.session.get(profile_url)
57+
58+
if profile_response.status_code == 200:
59+
profile = profile_response.json()
60+
61+
# Get user's organizations/accounts
62+
accounts_url = f"https://app.vssps.visualstudio.com/_apis/accounts?memberId={profile['id']}&api-version={self.api_version}"
63+
accounts_response = self.session.get(accounts_url)
64+
65+
if accounts_response.status_code == 200:
66+
accounts_json = accounts_response.json()
67+
68+
if accounts_json.get("value") and len(accounts_json["value"]) > 0:
69+
for org in accounts_json["value"]:
70+
collections.append(
71+
AzureDevOpsCollection(
72+
id=org["accountId"],
73+
name=org["accountName"],
74+
url=f"https://dev.azure.com/{org['accountName']}",
75+
)
76+
)
77+
logger.debug(
78+
f"Found cloud organization: {org['accountName']}"
79+
)
80+
else:
81+
logger.warning("No organizations found with your access token.")
82+
83+
# Fallback: Try to extract organization from baseAddress
84+
org_name = self.baseurl.rstrip("/").split("/")[-1]
85+
if org_name and org_name != "dev.azure.com":
86+
collections.append(
87+
AzureDevOpsCollection(
88+
id=org_name,
89+
name=org_name,
90+
url=f"https://dev.azure.com/{org_name}",
91+
)
92+
)
93+
logger.debug(
94+
f"Using organization from base address: {org_name}"
95+
)
96+
else:
97+
raise RuntimeError(
98+
f"Failed to retrieve organizations. Status Code: {accounts_response.status_code} Response: {accounts_response.text}"
99+
)
100+
else:
101+
logger.warning(
102+
f"Failed to retrieve user profile: {profile_response.status_code} Response: {profile_response.text}"
103+
)
104+
logger.warning(
105+
"Falling back to base address for organization extraction."
106+
)
107+
# Fallback: Try to extract organization from baseAddress
108+
org_name = self.baseurl.rstrip("/").split("/")[-1]
109+
if org_name and org_name != "dev.azure.com":
110+
collections.append(
111+
AzureDevOpsCollection(
112+
id=org_name,
113+
name=org_name,
114+
url=f"https://dev.azure.com/{org_name}",
115+
)
116+
)
117+
logger.debug(f"Using organization from base address: {org_name}")
118+
else:
119+
raise RuntimeError(
120+
"Could not determine organization. Please specify organization in the baseurl."
121+
)
122+
else:
123+
# For on-premises, get collections via API
124+
collections_url = f"{self.baseurl}/_apis/projectcollections?api-version={self.api_version}"
125+
collections_response = self.session.get(collections_url)
126+
127+
if collections_response.status_code == 200:
128+
collections_json = collections_response.json()
129+
for collection in collections_json.get("value", []):
130+
collections.append(
131+
AzureDevOpsCollection(
132+
id=collection["id"],
133+
name=collection["name"],
134+
url=collection["url"],
135+
)
136+
)
137+
else:
138+
raise RuntimeError(
139+
f"Failed to retrieve collections. Status Code: {collections_response.status_code} Response: {collections_response.text}"
140+
)
141+
142+
logger.debug(f"Found {len(collections)} collections/organizations")
143+
return collections
144+
145+
def _get_web_url_from_api_url(self, api_url, project_name):
146+
"""
147+
Convert an API URL to a web-accessible URL
148+
149+
Parameters:
150+
api_url (str): API URL for the project
151+
project_name (str): Name of the project
152+
153+
Returns:
154+
str: Web URL for the project
155+
"""
156+
if self.is_cloud_ado:
157+
# For cloud ADO, convert URL like:
158+
# https://dev.azure.com/org-name/_apis/projects/project-id
159+
# to: https://dev.azure.com/org-name/project-name
160+
match = re.search(r"https://dev\.azure\.com/([^/]+)", api_url)
161+
if match:
162+
org_name = match.group(1)
163+
return f"https://dev.azure.com/{org_name}/{project_name}"
164+
else:
165+
# For on-premises ADO, convert URL like:
166+
# https://server/collection/_apis/projects/project-id
167+
# to: https://server/collection/project-name
168+
base_url = api_url.split("/_apis/projects")[0]
169+
return f"{base_url}/{project_name}"
170+
171+
def _get_repo_web_url(self, api_url, project_name):
172+
"""
173+
Generate web-accessible URL for repositories page
174+
175+
Parameters:
176+
api_url (str): API URL for the project
177+
project_name (str): Name of the project
178+
179+
Returns:
180+
str: Web URL for the project's repositories page
181+
"""
182+
project_web_url = self._get_web_url_from_api_url(api_url, project_name)
183+
return f"{project_web_url}/_git"
184+
185+
def _get_all_projects(
186+
self, collections: List[AzureDevOpsCollection] = None
187+
) -> List[AzureDevOpsProject]:
188+
"""
189+
Get all projects from the provided collections or from all collections if none are provided
190+
191+
Parameters:
192+
collections (List[AzureDevOpsCollection]): List of collections to get projects from
193+
"""
194+
if collections is None:
195+
collections = self._get_all_collections()
196+
197+
projects = []
198+
for collection in collections:
199+
collection_url = (
200+
f"https://dev.azure.com/{collection.name}"
201+
if self.is_cloud_ado
202+
else f"{self.baseurl}/{collection.name}"
203+
)
204+
logger.debug("Getting projects from collection: %s", collection_url)
205+
206+
top = 100
207+
project_skip = 0
208+
total_projects = 0
209+
has_more_projects = True
210+
211+
while has_more_projects:
212+
url = f"{collection_url}/_apis/projects?$top={top}&$skip={project_skip}&api-version={self.api_version}&includeCapabilities=true"
213+
214+
response = self.session.get(url)
215+
if response.status_code != 200:
216+
raise RuntimeError(
217+
f"Failed to get projects: {response.status_code}"
218+
)
219+
220+
result = response.json()
221+
for project in result.get("value", []):
222+
project_api_url = project.get("url")
223+
project_name = project.get("name")
224+
225+
project_web_url = self._get_web_url_from_api_url(
226+
project_api_url, project_name
227+
)
228+
repo_web_url = self._get_repo_web_url(project_api_url, project_name)
229+
230+
projects.append(
231+
AzureDevOpsProject(
232+
project_id=project.get("id"),
233+
project_name=project_name,
234+
project_description=project.get("description") or "",
235+
project_url=project_web_url,
236+
repo_url=repo_web_url,
237+
project_create_time="", # Not provided in API response
238+
project_last_update_time=project.get("lastUpdateTime"),
239+
collection_or_org_name=collection.name,
240+
)
241+
)
242+
243+
count = len(result.get("value", []))
244+
total_projects += count
245+
project_skip += top
246+
247+
has_more_projects = count == top
248+
249+
return projects

scraper/azuredevops/models.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
class AzureDevOpsCollection:
2+
def __init__(self, id="", name="", url=""):
3+
self.id = id
4+
self.name = name
5+
self.url = url
6+
7+
8+
class AzureDevOpsProject:
9+
def __init__(
10+
self,
11+
project_id="",
12+
project_name="",
13+
project_description="",
14+
project_url="",
15+
repo_url="",
16+
project_create_time="",
17+
project_last_update_time="",
18+
collection_or_org_name="",
19+
):
20+
self.project_id = project_id
21+
self.project_name = project_name
22+
self.project_description = project_description
23+
self.project_url = project_url
24+
self.repo_url = repo_url
25+
self.project_create_time = project_create_time
26+
self.project_last_update_time = project_last_update_time
27+
self.collection_or_org_name = collection_or_org_name

scraper/code_gov/__init__.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import logging
55

66
from scraper import bitbucket, doecode, github, gitlab, tfs
7+
from scraper.azuredevops import AzureDevOpsClient
78
from scraper.code_gov.models import Metadata, Project
89
from scraper.github import gov_orgs
910

@@ -128,6 +129,26 @@ def process_config(config):
128129
)
129130
code_gov_metadata["releases"].append(code_gov_project)
130131

132+
# parse config for AzureDevOps repositories
133+
ado_instances = config.get("AzureDevOps", [])
134+
for instance in ado_instances:
135+
url = instance.get("url")
136+
token = instance.get("token", None)
137+
api_version = instance.get("apiVersion", "6.1-preview")
138+
excluded = instance.get("exclude", [])
139+
140+
ado_client = AzureDevOpsClient(url, api_version, token)
141+
projects = ado_client.get_projects_metadata()
142+
for project in projects:
143+
if project.project_name in excluded:
144+
logger.info("Excluding: %s", project.project_name)
145+
continue
146+
147+
code_gov_project = Project.from_ado(
148+
project, labor_hours=compute_labor_hours
149+
)
150+
code_gov_metadata["releases"].append(code_gov_project)
151+
131152
# Handle parsing of DOE CODE records
132153

133154
doecode_config = config.get("DOE CODE", {})

0 commit comments

Comments
 (0)