Fix deployment on arm64 architecture (#109) #229
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Build & Deploy Changed Services | |
| permissions: | |
| packages: write | |
| contents: read | |
| on: | |
| push: | |
| branches: [main, dev, "release/*"] | |
| pull_request: | |
| branches: [main, dev, "release/*"] | |
| env: | |
| TAG: ${{ github.run_number }} | |
| jobs: | |
| build: | |
| name: Build and Deploy | |
| runs-on: [self-hosted, paicicd] | |
| timeout-minutes: 120 | |
| environment: auto-test | |
| container: | |
| image: ubuntu:latest | |
| volumes: | |
| - /var/run/docker.sock:/var/run/docker.sock | |
| env: | |
| DOCKER_BUILDKIT: "1" | |
| steps: | |
| - name: Install git | |
| run: | | |
| DEBIAN_FRONTEND=noninteractive apt update | |
| DEBIAN_FRONTEND=noninteractive apt install -y git | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| submodules: false | |
| ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.ref_name }} | |
| - name: Get Changed Folders (Services) | |
| id: changes | |
| run: | | |
| git config --global --add safe.directory "$GITHUB_WORKSPACE" | |
| if [ "${{ github.event_name }}" = "pull_request" ]; then | |
| echo "Pull request detected" | |
| # Fetch the merge base to get only PR changes | |
| git fetch origin ${{ github.event.pull_request.base.ref }} --depth=50 | |
| base_sha=$(git merge-base origin/${{ github.event.pull_request.base.ref }} ${{ github.event.pull_request.head.sha }}) | |
| head_sha="${{ github.event.pull_request.head.sha }}" | |
| else | |
| base_sha="${{ github.event.before }}" | |
| head_sha="${{ github.sha }}" | |
| fi | |
| echo "Comparing $base_sha...$head_sha" | |
| changed_files=$(git diff --name-only "$base_sha" "$head_sha") | |
| echo "Changed files: $changed_files" | |
| # extract service folders under src/ | |
| folders=$(echo "$changed_files" | grep '^src/' \ | |
| | awk -F'/' '{print $2}' \ | |
| | sort -u | tr '\n' ' ') | |
| echo "Changed folders: $folders" | |
| # export as output for next steps | |
| echo "folders=$folders" >> $GITHUB_OUTPUT | |
| - name: Check if folders are empty | |
| id: check | |
| run: | | |
| if [ -z "${{ steps.changes.outputs.folders }}" ]; then | |
| echo "has_changed=false" >> $GITHUB_OUTPUT | |
| else | |
| echo "has_changed=true" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Install Package | |
| if: steps.check.outputs.has_changed == 'true' | |
| run: | | |
| DEBIAN_FRONTEND=noninteractive apt install -y python3 python-is-python3 pip git unzip ca-certificates curl apt-transport-https lsb-release gnupg parallel | |
| curl -sL https://aka.ms/InstallAzureCLIDeb | bash | |
| curl -fsSL https://get.docker.com | sh | |
| - name: Install python libs | |
| if: steps.check.outputs.has_changed == 'true' | |
| run: python -m pip install --break-system-packages pyyaml jinja2 paramiko etcd3 protobuf==3.20.3 kubernetes gitpython | |
| - name: Decode and unzip config file | |
| if: steps.check.outputs.has_changed == 'true' | |
| run: | | |
| echo "${{ secrets.CONFIG_FILE_B64 }}" | base64 -d > config.zip | |
| mkdir -p $GITHUB_WORKSPACE/config | |
| unzip -o config.zip -d $GITHUB_WORKSPACE/config | |
| ls -l $GITHUB_WORKSPACE/config | |
| - name: Arrange Config Files | |
| if: steps.check.outputs.has_changed == 'true' | |
| run: | | |
| rm -rf /tmp/auth-configuration | |
| mv $GITHUB_WORKSPACE/config/auth-configuration /tmp/ | |
| ls -l /tmp/auth-configuration | |
| - name: Login to GHCR | |
| if: steps.check.outputs.has_changed == 'true' | |
| run: | | |
| docker login ghcr.io -u ${{ github.actor }} -p ${{ secrets.GITHUB_TOKEN }} | |
| - name: Build Images of Changed Services | |
| if: steps.check.outputs.has_changed == 'true' | |
| run: | | |
| changed_services="${{ steps.changes.outputs.folders }}" | |
| echo "Building: $changed_services" | |
| if [[ "$changed_services" == *"alert-manager"* ]]; then | |
| echo "alert-manager is in the changed services" | |
| changed_services=$(echo $changed_services | sed 's/alert-manager//g') | |
| # build specific images in alert-manager | |
| echo "Building specific alert-manager images" | |
| $GITHUB_WORKSPACE/build/pai_build.py build \ | |
| -c $GITHUB_WORKSPACE/config/cluster-configuration \ | |
| -s alert-manager | |
| -i abnormal-detector,alert-handler,alert-parser,cert-expiration-checker,cluster-utilization,job-data-recorder,job-status-change-notification,node-failure-detection,node-issue-classifier,nvidia-gpu-low-perf-fixer,redis-monitoring | |
| fi | |
| echo "Changed services after removing alert-manager: $changed_services" | |
| $GITHUB_WORKSPACE/build/pai_build.py build \ | |
| -c $GITHUB_WORKSPACE/config/cluster-configuration \ | |
| -s $changed_services | |
| - name: Push Images of Changed Services to ACR | |
| if: steps.check.outputs.has_changed == 'true' | |
| run: | | |
| changed_services="${{ steps.changes.outputs.folders }}" | |
| echo "Pushing: $changed_services" | |
| $GITHUB_WORKSPACE/build/pai_build.py push \ | |
| -c $GITHUB_WORKSPACE/config/cluster-configuration \ | |
| -s $changed_services | |
| - name: Push Images of Changed Service to GHCR | |
| if: steps.check.outputs.has_changed == 'true' | |
| run: | | |
| changed_services="${{ steps.changes.outputs.folders }}" | |
| echo "Pushing: $changed_services" | |
| # check whether alert-manager is in the changed services | |
| echo "Changed services before removing alert-manager: $changed_services" | |
| if [[ "$changed_services" == *"alert-manager"* ]]; then | |
| echo "alert-manager is in the changed services" | |
| changed_services=$(echo $changed_services | sed 's/alert-manager//g') | |
| # push specific images in alert-manager to GHCR | |
| echo "Pushing specific alert-manager images to GHCR" | |
| $GITHUB_WORKSPACE/build/pai_build.py push \ | |
| -c $GITHUB_WORKSPACE/config/cluster-configuration \ | |
| -s alert-manager \ | |
| -i abnormal-detector,alert-handler,alert-parser,cert-expiration-checker,cluster-utilization,job-data-recorder,job-status-change-notification,node-failure-detection,node-issue-classifier,nvidia-gpu-low-perf-fixer,redis-monitoring \ | |
| --docker-registry ghcr.io \ | |
| --docker-namespace ${GITHUB_REPOSITORY_OWNER} \ | |
| --docker-username ${{ github.actor }} \ | |
| --docker-password ${{ secrets.GITHUB_TOKEN }} | |
| fi | |
| echo "Changed services after removing alert-manager: $changed_services" | |
| $GITHUB_WORKSPACE/build/pai_build.py push \ | |
| -c $GITHUB_WORKSPACE/config/cluster-configuration \ | |
| -s $changed_services \ | |
| --docker-registry ghcr.io \ | |
| --docker-namespace ${GITHUB_REPOSITORY_OWNER} \ | |
| --docker-username ${{ github.actor }} \ | |
| --docker-password ${{ secrets.GITHUB_TOKEN }} | |
| - name: Azure CLI get credentials and deploy | |
| if: steps.check.outputs.has_changed == 'true' | |
| run: | | |
| az version | |
| az login --identity --client-id ${{ secrets.AZURE_MANAGED_IDENTITY_CLIENT_ID }} | |
| az aks install-cli | |
| az aks get-credentials \ | |
| --resource-group ${{ secrets.AZURE_RESOURCE_GROUP }} \ | |
| --name ${{ secrets.KUBERNETES_CLUSTER }} \ | |
| --overwrite-existing | |
| kubelogin convert-kubeconfig -l azurecli | |
| kubectl config use-context ${{ secrets.KUBERNETES_CLUSTER }} | |
| # Replace "webportal" with "webportal-dind" if "webportal" is changed | |
| services_to_deploy="${{ steps.changes.outputs.folders }}" | |
| if echo " $services_to_deploy " | grep -q " webportal "; then | |
| tmp="" | |
| for s in $services_to_deploy; do | |
| [ "$s" = "webportal" ] && continue | |
| [ "$s" = "webportal-dind" ] && continue | |
| tmp="$tmp $s" | |
| done | |
| services_to_deploy="$tmp webportal-dind" | |
| services_to_deploy=$(echo "$services_to_deploy" | xargs) | |
| fi | |
| echo "Final services to deploy: $services_to_deploy" | |
| echo "${{ secrets.PAI_CLUSTER_NAME }}" > cluster_id | |
| echo "Stopping changed pai services $services_to_deploy on ${{ secrets.PAI_CLUSTER_NAME }} ..." | |
| $GITHUB_WORKSPACE/paictl.py service stop -n $services_to_deploy < cluster_id | |
| echo "Pushing config to cluster \"${{ secrets.PAI_CLUSTER_NAME }}\" ..." | |
| $GITHUB_WORKSPACE/paictl.py config push -m service -p $GITHUB_WORKSPACE/config/cluster-configuration < cluster_id | |
| echo "Starting to update $services_to_deploy on ${{ secrets.PAI_CLUSTER_NAME }} ..." | |
| $GITHUB_WORKSPACE/paictl.py service start -n $services_to_deploy < cluster_id | |
| kubectl get pod | |
| kubectl get service | |
| test: | |
| name: Test rest-server | |
| needs: build | |
| runs-on: [self-hosted, paicicd] | |
| environment: auto-test | |
| steps: | |
| - name: Test rest-server | |
| run: | | |
| echo "Testing rest-server ${{ secrets.PAI_WEB_URL }}/rest-server/api/v2/info" | |
| curl ${{ secrets.PAI_WEB_URL }}/rest-server/api/v2/info | |
| echo "Checking virtual cluster status..." | |
| vc_info=$(curl -H "Authorization: Bearer ${{ secrets.PAI_WEB_TOKEN }}" -s ${{ secrets.PAI_WEB_URL }}/rest-server/api/v2/virtual-clusters) | |
| if [ $? -ne 0 ]; then | |
| echo "Failed to access virtual cluster API" | |
| exit 1 | |
| fi | |
| echo "Virtual cluster info: $vc_info" |