diff --git a/.github/workflows/update-sdk-extras.yml b/.github/workflows/update-sdk-extras.yml new file mode 100644 index 0000000000..8a52e8c664 --- /dev/null +++ b/.github/workflows/update-sdk-extras.yml @@ -0,0 +1,111 @@ +name: Update SDK Extras Documentation + +on: + repository_dispatch: + types: [pyproject-updated] + workflow_dispatch: + +jobs: + update-sdk-extras: + runs-on: ubuntu-latest + + steps: + - name: Checkout docs repository + uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Checkout wandb repository + uses: actions/checkout@v5 + with: + repository: wandb/wandb + path: wandb-repo + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install tomli + + - name: Generate timestamp + id: timestamp + run: | + TIMESTAMP=$(date -u +"%Y-%m-%d %H:%M:%S UTC") + echo "timestamp=${TIMESTAMP}" >> $GITHUB_OUTPUT + echo "Generated at: ${TIMESTAMP}" + + - name: Copy pyproject.toml to scripts directory + run: | + echo "Copying wandb/pyproject.toml to scripts/" + cp wandb-repo/pyproject.toml scripts/pyproject.toml + echo "✅ File copied successfully" + + - name: Generate SDK Extras Table + run: | + echo "Generating SDK extras table from pyproject.toml..." + cd scripts + python generate_sdk_extras_table.py + echo "✅ SDK extras table generated" + + - name: Check for changes + id: check-changes + run: | + if [ -n "$(git status --porcelain)" ]; then + echo "has_changes=true" >> $GITHUB_OUTPUT + echo "Changes detected in python-sdk-extras.mdx" + git diff snippets/en/_includes/python-sdk-extras.mdx + else + echo "has_changes=false" >> $GITHUB_OUTPUT + echo "No changes detected" + fi + + - name: Create Pull Request + if: steps.check-changes.outputs.has_changes == 'true' + id: create-pr + uses: peter-evans/create-pull-request@v7 + with: + token: ${{ secrets.GITHUB_TOKEN }} + commit-message: "chore: Update SDK extras documentation" + title: "Update SDK extras documentation from pyproject.toml" + draft: false + body: | + This PR updates the SDK extras documentation based on the latest `wandb/pyproject.toml` configuration. + + **Generated on**: ${{ steps.timestamp.outputs.timestamp }} + **Source**: `wandb/wandb` repository `pyproject.toml` + **Triggered by**: Repository dispatch event from wandb/wandb + + ### Changes + - Synced latest optional dependencies from wandb package + - Updated SDK extras table in documentation + + ### Review Checklist + - [ ] Verify all extras are correctly listed + - [ ] Check that package links are accurate + - [ ] Confirm excluded extras (models, kubeflow, launch, importers, perf) are intentionally omitted + + --- + *This PR was automatically generated by the SDK extras update workflow.* + branch: update-sdk-extras-${{ github.run_number }} + delete-branch: true + labels: | + documentation + automated + sdk-extras + + - name: Display Result + run: | + if [ "${{ steps.check-changes.outputs.has_changes }}" == "true" ]; then + if [ -n "${{ steps.create-pr.outputs.pull-request-number }}" ]; then + PR_URL="https://github.com/${{ github.repository }}/pull/${{ steps.create-pr.outputs.pull-request-number }}" + echo "📝 PR created successfully!" + echo "::notice title=Pull Request Created::A PR has been created for SDK extras updates: $PR_URL" + fi + else + echo "✅ SDK extras documentation is up to date" + echo "::notice title=No Updates Needed::SDK extras documentation is already up to date" + fi diff --git a/models/ref/python.mdx b/models/ref/python.mdx index 25e5c959f1..c609caa113 100644 --- a/models/ref/python.mdx +++ b/models/ref/python.mdx @@ -2,25 +2,28 @@ title: Python SDK 0.23.0 module: --- + The W&B Python SDK, accessible at `wandb`, enables you to train and fine-tune models, and manage models from experimentation to production. > After performing your training and fine-tuning operations with this SDK, you can use [the Public API](/models/ref/python/public-api) to query and analyze the data that was logged, and [the Reports and Workspaces API](/models/ref/wandb_workspaces) to generate a web-publishable [report](/models/reports/) summarizing your work. -## Installation and setup -### Sign up and create an API key + +## Sign up and create an API key To authenticate your machine with W&B, you must first generate an API key at https://wandb.ai/authorize. -### Install and import packages +## Install and import packages -Install the W&B library. +Install the W&B Python SDK using `pip`: -``` +```shell pip install wandb ``` -### Import W&B Python SDK: +## Import W&B Python SDK + +The following code snippet demonstrates how to import the W&B Python SDK and initialize a run. Replace `` with your team entity name. ```python import wandb @@ -33,4 +36,45 @@ project = "my-awesome-project" with wandb.init(entity=entity, project=project) as run: run.log({"accuracy": 0.9, "loss": 0.1}) -```` +``` + +## Install optional dependencies + +If needed, you can install optional dependencies that extend the functionality of the W&B Python SDK. Replace `extra` with the desired [optional dependency](#supported-optional-dependencies): + +```shell +pip install wandb[extra] +``` + +For example, to install W&B with Google Cloud Storage support, run: + +```shell +pip install wandb[gcp] +``` + +Install more than one optional dependency by separating them with commas: + +```shell +pip install wandb[gcp,aws,media] +``` + +Use extras when you need deeper integration with specific platforms or features. For example: + +* Install `gcp` if you store datasets, models, or artifacts in Google Cloud Storage. +* Install `aws` if you use Amazon S3 or other AWS-backed storage in your workflows. +* Install `media` if you frequently log large or complex media (`images`, `audio`, `video`) and want better format support. + +### Supported optional dependencies + +The following table lists supported dependencies for the W&B Python SDK and the extras you can install: + +{/* python-extras-start */} +| Extra | Packages included | +|---------|---------| +| `gcp` | [google-cloud-storage](https://pypi.org/project/google-cloud-storage/) | +| `aws` | [boto3](https://pypi.org/project/boto3/), [botocore](https://pypi.org/project/botocore/) | +| `azure` | [azure-identity](https://pypi.org/project/azure-identity/), [azure-storage-blob](https://pypi.org/project/azure-storage-blob/) | +| `media` | [numpy](https://pypi.org/project/numpy/), [moviepy](https://pypi.org/project/moviepy/), [imageio](https://pypi.org/project/imageio/), [pillow](https://pypi.org/project/pillow/), [bokeh](https://pypi.org/project/bokeh/), [soundfile](https://pypi.org/project/soundfile/), [plotly](https://pypi.org/project/plotly/), [rdkit](https://pypi.org/project/rdkit/) | +| `sweeps` | [sweeps](https://pypi.org/project/sweeps/) | +| `workspaces` | [wandb-workspaces](https://pypi.org/project/wandb-workspaces/) | +{/* python-extras-end */} \ No newline at end of file diff --git a/scripts/generate_sdk_extras_table.py b/scripts/generate_sdk_extras_table.py new file mode 100755 index 0000000000..f2161fdd39 --- /dev/null +++ b/scripts/generate_sdk_extras_table.py @@ -0,0 +1,170 @@ +""""Generate a markdown table of SDK extras from pyproject.toml. + +This script reads the `pyproject.toml` file to extract optional dependencies (extras) +and generates a markdown table listing each extra along with the packages it includes. + +Note that this script excludes certain extras defined in the EXCLUDE list. +""" +import os +import re +import tomli + +# W&B Python SDK Extras to exclude from the table. +EXCLUDE = ["models", "kubeflow", "launch", "importers", "perf"] + +def header(): + """Return the markdown table headers.""" + return "| Extra | Packages included |\n" + +def header_row(head_topics: int = 2): + """Return the markdown table header row. + + Args: + head_topics (int): The number of header topics. + + Returns: + str: A markdown table header row. + """ + return "|---------"*head_topics + "|\n" + +def make_table_row(extra: str, packages: str) -> str: + """Generate a markdown table row for the given extra and packages. + + Args: + extra (str): The name of the extra. + packages (str): The markdown links for the packages. + + Returns: + str: A markdown table row. + """ + return f"| `{extra}` | {packages} |\n" + +def clean_deps(deps: list[str]) -> list[str]: + """Remove version specifiers from a list of dependency strings. + + Args: + deps (list[str]): A list of dependency strings. + + Returns: + list[str]: A list of dependency strings without version specifiers. + """ + pattern = r"(>=|<=|==|~=|>|<|!=).*" + return [re.sub(pattern, "", url) for url in deps] + +def make_dep_link(deps: list[str]) -> str: + """Create a markdown links for dependencies. + + Args: + deps (list[str]): A list of dependency names. + + Returns: + str: A string of markdown links for the dependencies. + """ + print(f"Creating dependency links for: {deps}") + dep_names = [dep.split(" ")[0] for dep in deps] + return ", ".join( + f"[{name}](https://pypi.org/project/{name}/)" for name in dep_names + ) + + +def generate_table(pyproject_path: str) -> str: + """Generate the markdown table for the SDK extras. + + Args: + pyproject_path (str): The path to the pyproject.toml file. + + Returns: + str: A string representing the markdown table. + """ + rows = [] + rows.append(header()) + rows.append(header_row()) + + with open(pyproject_path, "rb") as f: + pyproject = tomli.load(f) + extras = pyproject.get("project", {}).get("optional-dependencies", {}) + for extra, deps in extras.items(): + if extra in EXCLUDE: + continue + dep_links = make_dep_link(clean_deps(deps)) + table_row = make_table_row(extra, dep_links) + rows.append(table_row) + + return "".join(rows) + + +def replace_content_between_markers( + file_path: str, start_marker: str, end_marker: str, replacement: str +) -> bool: + """Replace content between start and end markers in a file. + + The markers themselves are preserved in the file. Only the content + between them is replaced with the new content. + + Args: + file_path (str): The path to the file to modify. + start_marker (str): The starting marker string. + end_marker (str): The ending marker string. + replacement (str): The text to insert between the markers. + + Returns: + bool: True if both markers were found and content was replaced, False otherwise. + """ + with open(file_path, "r") as f: + content = f.read() + + # Check if both markers exist + if start_marker not in content: + print(f"Error: Start marker '{start_marker}' not found in '{file_path}'.") + return False + + if end_marker not in content: + print(f"Error: End marker '{end_marker}' not found in '{file_path}'.") + return False + + # Find the positions of the markers + start_pos = content.find(start_marker) + end_pos = content.find(end_marker) + + # Check if markers are in the correct order + if start_pos >= end_pos: + print(f"Error: Start marker must come before end marker in '{file_path}'.") + return False + + # Split content into three parts: before, between (to replace), and after + before_start = content[:start_pos + len(start_marker)] + after_end = content[end_pos:] + + # Construct the new content with replacement between markers + updated_content = before_start + "\n" + replacement + after_end + + # Write the updated content back to the file + with open(file_path, "w") as f: + f.write(updated_content) + + return True + + +def main(): + # Path to pyproject.toml in the wandb SDK repository. + pyproject_path = "../../wandb/pyproject.toml" + + # MDX file path that contains the markers. + mdx_file_path = "../models/ref/python.mdx" + + # Start and end markers that enclose the table in the MDX file. + start_marker = "{/* python-extras-start */}" + end_marker = "{/* python-extras-end */}" + + # Generate the markdown table. + table = generate_table(pyproject_path) + + # Replace the content between markers in the MDX file with the generated table. + if replace_content_between_markers(mdx_file_path, start_marker, end_marker, table): + print(f"Successfully replaced content between markers in '{mdx_file_path}'.") + else: + print(f"Failed to replace content between markers in '{mdx_file_path}'.") + +if __name__ == "__main__": + # To do: Add argument parsing for custom paths + main() \ No newline at end of file