Feat: Add metadata validation workflow and scripts #5
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: validate-metadata-schema | |
| env: | |
| PYTHON_VERSION: "3.11" | |
| TARGET_DIR: "component/pipelines" | |
| COMPONENT_TYPES: "data_processing,deployment,evaluation,training" | |
| on: | |
| pull_request: | |
| paths: | |
| - 'component/pipelines/**' | |
| - 'scripts/**' | |
| jobs: | |
| validate-component-metadata-schema: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v5 | |
| with: | |
| fetch-depth: 0 | |
| - name: Set up Python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: 3.11 | |
| - name: Install Test dependencies | |
| run: | | |
| pip install -r scripts/validate_metadata/requirements.txt | |
| - name: Retrieve new components | |
| id: get-new-components | |
| # if diff: use components in diff range, otherwise list all components in components dir. | |
| run: | | |
| BASE_COMMIT="${{ github.event.pull_request.base.sha }}" | |
| HEAD_COMMIT="${{ github.event.pull_request.head.sha }}" | |
| # --- Variables --- | |
| COMMIT_RANGE="HEAD~1..HEAD" | |
| SCRIPT_DIR="scripts/validate_metadata/" | |
| echo "Using BASE_COMMIT: $BASE_COMMIT" | |
| echo "Using HEAD_COMMIT: $HEAD_COMMIT" | |
| echo "Run COMMIT_RANGE=$COMMIT_RANGE" | |
| # If changes are detected in scripts/validate_metadata, validate all components. | |
| SCRIPT_DIFF=$(git diff --name-only --diff-filter=A $COMMIT_RANGE -- $SCRIPT_DIR) | |
| if [[ "$SCRIPT_DIFF" == "" ]]; then | |
| echo "Changes detected in critical script directory: $SCRIPT_DIR" | |
| ALL_COMPONENT_FILES=$(find components -mindepth 2 -maxdepth 2 -type d | \ | |
| sed 's/^components\///' | \ | |
| sort -u | \ | |
| tr '\n' ',' | \ | |
| sed 's/,$//') | |
| echo "Changes detected in scripts/validate_metadata. All existing components will be validated: $ALL_COMPONENT_FILES" | |
| echo "new_components_list=$ALL_COMPONENT_FILES" >> "$GITHUB_OUTPUT" | |
| else | |
| # 1. Find all newly added files/directories under the components directory. | |
| ALL_NEW_PATHS=$(git diff --name-only --diff-filter=A $COMMIT_RANGE -- "components") | |
| # 2. Process the list to extract new components. | |
| for type in "$COMPONENT_TYPES"; do | |
| NEW_COMPONENTS+=$(echo "$ALL_NEW_PATHS" | \ | |
| # Filter for files that are directly in components/ | |
| grep -E "components/$type.+/" | \ | |
| sed 's/^components\///' | \ | |
| sort -u | \ | |
| tr '\n' ',' | \ | |
| sed 's/,$//') | |
| # --- Output --- | |
| echo "The following new components were found and will be validated: $NEW_COMPONENTS" | |
| echo "new_components_list=$NEW_COMPONENTS" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Validate new components | |
| if: ${{ steps.get-new-components.outputs.new_components_list != '' }} | |
| run: | | |
| SCRIPT_PATH="$GITHUB_WORKSPACE/scripts/validate_metadata/validate_metadata.py" | |
| NEW_COMPONENTS_ARRAY="${{ steps.get-new-components.outputs.new_components_list }}" | |
| # 2. Set IFS to a comma, so that the shell will split the string by commas. | |
| IFS=',' | |
| for component in $NEW_COMPONENTS_ARRAY; do | |
| COMPONENT_PATH="$GITHUB_WORKSPACE/components/$component" | |
| echo "Processing component: $component" | |
| python $SCRIPT_PATH --component $COMPONENT_PATH | |
| done |