|
| 1 | +import base64 |
| 2 | +import logging |
| 3 | +import os |
| 4 | +import re |
| 5 | +from typing import List |
| 6 | + |
| 7 | +import requests |
| 8 | + |
| 9 | +from scraper.azuredevops.models import AzureDevOpsCollection, AzureDevOpsProject |
| 10 | + |
| 11 | +logger = logging.getLogger(__name__) |
| 12 | + |
| 13 | + |
| 14 | +class AzureDevOpsClient: |
| 15 | + def __init__(self, baseurl, api_version, token=None): |
| 16 | + self.baseurl = baseurl |
| 17 | + self.api_version = api_version |
| 18 | + self.is_cloud_ado = "dev.azure.com" in baseurl |
| 19 | + self.session = self._create_client_session(token) |
| 20 | + |
| 21 | + def get_projects_metadata(self) -> List[AzureDevOpsProject]: |
| 22 | + """ |
| 23 | + Get metadata for all projects |
| 24 | + """ |
| 25 | + collections = self._get_all_collections() |
| 26 | + return self._get_all_projects(collections) |
| 27 | + |
| 28 | + def _create_client_session(self, token): |
| 29 | + """ |
| 30 | + Creates the Azure DevOps Client Context with the provided token. |
| 31 | + If no token is provided, it will look for the ADO_API_TOKEN environment variable. |
| 32 | + """ |
| 33 | + if token is None: |
| 34 | + token = os.environ.get("ADO_API_TOKEN", None) |
| 35 | + |
| 36 | + if token is None: |
| 37 | + raise RuntimeError("Azure Dev Ops Token was not provided.") |
| 38 | + |
| 39 | + session = requests.Session() |
| 40 | + auth_string = f":{token}" |
| 41 | + encoded_auth = base64.b64encode(auth_string.encode("ascii")).decode("ascii") |
| 42 | + session.headers.update( |
| 43 | + {"Authorization": f"Basic {encoded_auth}", "Accept": "application/json"} |
| 44 | + ) |
| 45 | + return session |
| 46 | + |
| 47 | + def _get_all_collections(self) -> List[AzureDevOpsCollection]: |
| 48 | + """ |
| 49 | + Get all collections from the Azure DevOps API. |
| 50 | + """ |
| 51 | + collections = [] |
| 52 | + |
| 53 | + if self.is_cloud_ado: |
| 54 | + # For cloud Azure DevOps, get all organizations from the API |
| 55 | + profile_url = f"https://app.vssps.visualstudio.com/_apis/profile/profiles/me?api-version={self.api_version}" |
| 56 | + profile_response = self.session.get(profile_url) |
| 57 | + |
| 58 | + if profile_response.status_code == 200: |
| 59 | + profile = profile_response.json() |
| 60 | + |
| 61 | + # Get user's organizations/accounts |
| 62 | + accounts_url = f"https://app.vssps.visualstudio.com/_apis/accounts?memberId={profile['id']}&api-version={self.api_version}" |
| 63 | + accounts_response = self.session.get(accounts_url) |
| 64 | + |
| 65 | + if accounts_response.status_code == 200: |
| 66 | + accounts_json = accounts_response.json() |
| 67 | + |
| 68 | + if accounts_json.get("value") and len(accounts_json["value"]) > 0: |
| 69 | + for org in accounts_json["value"]: |
| 70 | + collections.append( |
| 71 | + AzureDevOpsCollection( |
| 72 | + id=org["accountId"], |
| 73 | + name=org["accountName"], |
| 74 | + url=f"https://dev.azure.com/{org['accountName']}", |
| 75 | + ) |
| 76 | + ) |
| 77 | + logger.debug( |
| 78 | + f"Found cloud organization: {org['accountName']}" |
| 79 | + ) |
| 80 | + else: |
| 81 | + logger.warning("No organizations found with your access token.") |
| 82 | + |
| 83 | + # Fallback: Try to extract organization from baseAddress |
| 84 | + org_name = self.baseurl.rstrip("/").split("/")[-1] |
| 85 | + if org_name and org_name != "dev.azure.com": |
| 86 | + collections.append( |
| 87 | + AzureDevOpsCollection( |
| 88 | + id=org_name, |
| 89 | + name=org_name, |
| 90 | + url=f"https://dev.azure.com/{org_name}", |
| 91 | + ) |
| 92 | + ) |
| 93 | + logger.debug( |
| 94 | + f"Using organization from base address: {org_name}" |
| 95 | + ) |
| 96 | + else: |
| 97 | + raise RuntimeError( |
| 98 | + f"Failed to retrieve organizations. Status Code: {accounts_response.status_code} Response: {accounts_response.text}" |
| 99 | + ) |
| 100 | + else: |
| 101 | + logger.warning( |
| 102 | + f"Failed to retrieve user profile: {profile_response.status_code} Response: {profile_response.text}" |
| 103 | + ) |
| 104 | + logger.warning( |
| 105 | + "Falling back to base address for organization extraction." |
| 106 | + ) |
| 107 | + # Fallback: Try to extract organization from baseAddress |
| 108 | + org_name = self.baseurl.rstrip("/").split("/")[-1] |
| 109 | + if org_name and org_name != "dev.azure.com": |
| 110 | + collections.append( |
| 111 | + AzureDevOpsCollection( |
| 112 | + id=org_name, |
| 113 | + name=org_name, |
| 114 | + url=f"https://dev.azure.com/{org_name}", |
| 115 | + ) |
| 116 | + ) |
| 117 | + logger.debug(f"Using organization from base address: {org_name}") |
| 118 | + else: |
| 119 | + raise RuntimeError( |
| 120 | + "Could not determine organization. Please specify organization in the baseurl." |
| 121 | + ) |
| 122 | + else: |
| 123 | + # For on-premises, get collections via API |
| 124 | + collections_url = f"{self.baseurl}/_apis/projectcollections?api-version={self.api_version}" |
| 125 | + collections_response = self.session.get(collections_url) |
| 126 | + |
| 127 | + if collections_response.status_code == 200: |
| 128 | + collections_json = collections_response.json() |
| 129 | + for collection in collections_json.get("value", []): |
| 130 | + collections.append( |
| 131 | + AzureDevOpsCollection( |
| 132 | + id=collection["id"], |
| 133 | + name=collection["name"], |
| 134 | + url=collection["url"], |
| 135 | + ) |
| 136 | + ) |
| 137 | + else: |
| 138 | + raise RuntimeError( |
| 139 | + f"Failed to retrieve collections. Status Code: {collections_response.status_code} Response: {collections_response.text}" |
| 140 | + ) |
| 141 | + |
| 142 | + logger.debug(f"Found {len(collections)} collections/organizations") |
| 143 | + return collections |
| 144 | + |
| 145 | + def _get_web_url_from_api_url(self, api_url, project_name): |
| 146 | + """ |
| 147 | + Convert an API URL to a web-accessible URL |
| 148 | +
|
| 149 | + Parameters: |
| 150 | + api_url (str): API URL for the project |
| 151 | + project_name (str): Name of the project |
| 152 | +
|
| 153 | + Returns: |
| 154 | + str: Web URL for the project |
| 155 | + """ |
| 156 | + if self.is_cloud_ado: |
| 157 | + # For cloud ADO, convert URL like: |
| 158 | + # https://dev.azure.com/org-name/_apis/projects/project-id |
| 159 | + # to: https://dev.azure.com/org-name/project-name |
| 160 | + match = re.search(r"https://dev\.azure\.com/([^/]+)", api_url) |
| 161 | + if match: |
| 162 | + org_name = match.group(1) |
| 163 | + return f"https://dev.azure.com/{org_name}/{project_name}" |
| 164 | + else: |
| 165 | + # For on-premises ADO, convert URL like: |
| 166 | + # https://server/collection/_apis/projects/project-id |
| 167 | + # to: https://server/collection/project-name |
| 168 | + base_url = api_url.split("/_apis/projects")[0] |
| 169 | + return f"{base_url}/{project_name}" |
| 170 | + |
| 171 | + def _get_repo_web_url(self, api_url, project_name): |
| 172 | + """ |
| 173 | + Generate web-accessible URL for repositories page |
| 174 | +
|
| 175 | + Parameters: |
| 176 | + api_url (str): API URL for the project |
| 177 | + project_name (str): Name of the project |
| 178 | +
|
| 179 | + Returns: |
| 180 | + str: Web URL for the project's repositories page |
| 181 | + """ |
| 182 | + project_web_url = self._get_web_url_from_api_url(api_url, project_name) |
| 183 | + return f"{project_web_url}/_git" |
| 184 | + |
| 185 | + def _get_all_projects( |
| 186 | + self, collections: List[AzureDevOpsCollection] = None |
| 187 | + ) -> List[AzureDevOpsProject]: |
| 188 | + """ |
| 189 | + Get all projects from the provided collections or from all collections if none are provided |
| 190 | +
|
| 191 | + Parameters: |
| 192 | + collections (List[AzureDevOpsCollection]): List of collections to get projects from |
| 193 | + """ |
| 194 | + if collections is None: |
| 195 | + collections = self._get_all_collections() |
| 196 | + |
| 197 | + projects = [] |
| 198 | + for collection in collections: |
| 199 | + collection_url = ( |
| 200 | + f"https://dev.azure.com/{collection.name}" |
| 201 | + if self.is_cloud_ado |
| 202 | + else f"{self.baseurl}/{collection.name}" |
| 203 | + ) |
| 204 | + logger.debug("Getting projects from collection: %s", collection_url) |
| 205 | + |
| 206 | + top = 100 |
| 207 | + project_skip = 0 |
| 208 | + total_projects = 0 |
| 209 | + has_more_projects = True |
| 210 | + |
| 211 | + while has_more_projects: |
| 212 | + url = f"{collection_url}/_apis/projects?$top={top}&$skip={project_skip}&api-version={self.api_version}&includeCapabilities=true" |
| 213 | + |
| 214 | + response = self.session.get(url) |
| 215 | + if response.status_code != 200: |
| 216 | + raise RuntimeError( |
| 217 | + f"Failed to get projects: {response.status_code}" |
| 218 | + ) |
| 219 | + |
| 220 | + result = response.json() |
| 221 | + for project in result.get("value", []): |
| 222 | + project_api_url = project.get("url") |
| 223 | + project_name = project.get("name") |
| 224 | + |
| 225 | + project_web_url = self._get_web_url_from_api_url( |
| 226 | + project_api_url, project_name |
| 227 | + ) |
| 228 | + repo_web_url = self._get_repo_web_url(project_api_url, project_name) |
| 229 | + |
| 230 | + projects.append( |
| 231 | + AzureDevOpsProject( |
| 232 | + project_id=project.get("id"), |
| 233 | + project_name=project_name, |
| 234 | + project_description=project.get("description") or "", |
| 235 | + project_url=project_web_url, |
| 236 | + repo_url=repo_web_url, |
| 237 | + project_create_time="", # Not provided in API response |
| 238 | + project_last_update_time=project.get("lastUpdateTime"), |
| 239 | + collection_or_org_name=collection.name, |
| 240 | + ) |
| 241 | + ) |
| 242 | + |
| 243 | + count = len(result.get("value", [])) |
| 244 | + total_projects += count |
| 245 | + project_skip += top |
| 246 | + |
| 247 | + has_more_projects = count == top |
| 248 | + |
| 249 | + return projects |
0 commit comments