diff --git a/.github/workflows/test-wandb-base.yaml b/.github/workflows/test-wandb-base.yaml new file mode 100644 index 000000000..bfcce265a --- /dev/null +++ b/.github/workflows/test-wandb-base.yaml @@ -0,0 +1,118 @@ +name: Test wandb-base Chart + +on: + pull_request: + paths: + - charts/operator-wandb/** + - test-configs/wandb-base/** + +jobs: + snapshots: + name: Snapshot testing + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up Helm + uses: azure/setup-helm@v4.2.0 + with: + version: v3.17.0 + - name: Helm snapshot build and test + run: | + helm repo add bitnami https://charts.bitnami.com/bitnami + helm repo add stakater https://stakater.github.io/stakater-charts + helm repo add prometheus https://prometheus-community.github.io/helm-charts + + helm plugin install https://github.com/origranot/helm-cascade + helm plugin install https://github.com/jlandowner/helm-chartsnap + ./snapshots.sh build wandb-base + ./snapshots.sh run wandb-base + + test: + name: Test Chart + strategy: + fail-fast: false + matrix: + k8s-version: ["v1.32.2", "v1.31.6", "v1.30.10"] + configuration: + - env-values-example + - env-precedence-container + - env-precedence-sizing + - env-precedence-chart-env + - env-precedence-chart-legacy + - env-precedence-global-env + - env-precedence-global-legacy + runs-on: ubuntu-latest + environment: Helm Charts + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Helm + uses: azure/setup-helm@v4.2.0 + with: + version: v3.17.0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Set up chart-testing + uses: helm/chart-testing-action@v2.7.0 + with: + version: v3.12.0 + + - name: Run chart-testing (list-changed) + id: list-changed + run: | + changed=$(ct list-changed --config ct.yaml) + if [[ -n "$changed" ]]; then + echo "changed=true" >> "$GITHUB_OUTPUT" + fi + + - name: Determine Kind Cluster Name + id: cluster + run: | + MATRIX_STRING="${{ matrix.k8s-version }}-${{ matrix.configuration }}" + + HASH=$(python3 -c " + import zlib + data = '$MATRIX_STRING'.encode('utf-8') + crc = zlib.crc32(data) & 0xffffffff + print(f'{crc:08x}') + ") + + export NAME="ct-${HASH}" + echo "name=${NAME}" >> "$GITHUB_OUTPUT" + echo "Matrix: $MATRIX_STRING -> Hash: $HASH -> Name: $NAME" + + - name: Create kind cluster + uses: helm/kind-action@v1.12.0 + with: + version: v0.27.0 + cluster_name: ${{ steps.cluster.outputs.name }} + node_image: kindest/node:${{ matrix.k8s-version }} + if: env.ACT || steps.list-changed.outputs.changed == 'true' + + - name: Apply user defined secrets + run: | + if [[ -d test-configs/additional-resources/${{matrix.configuration}} ]]; then + kubectl --context kind-${{ steps.cluster.outputs.name }} apply -f test-configs/additional-resources/${{matrix.configuration}} + echo "Applied additional resources for ${{matrix.configuration}}" + sleep 5 + else + echo "No additional resources to be applied for ${{matrix.configuration}}" + fi + if: env.ACT || steps.list-changed.outputs.changed == 'true' + + - name: Run chart-testing (install) + if: env.ACT || steps.list-changed.outputs.changed == 'true' + run: | + ct install --namespace default \ + --charts ./charts/wandb-base \ + --config ct.yaml \ + --helm-extra-args '--kube-context kind-${{ steps.cluster.outputs.name }} --timeout 600s' \ + --helm-extra-set-args '--values test-configs/wandb-base/${{ matrix.configuration }}.yaml' diff --git a/charts/wandb-base/README.md b/charts/wandb-base/README.md index fa9deb7b3..33d50a083 100644 --- a/charts/wandb-base/README.md +++ b/charts/wandb-base/README.md @@ -30,7 +30,7 @@ Environment variables can be defined at multiple levels, with the following prec 5. Global environment variables (`global.env`) 6. Legacy global environment variables (`global.extraEnv`) -Example: +#### Examples Of `env` at different levels ```yaml # Global environment variables (lowest precedence) @@ -52,6 +52,71 @@ containers: In this example, the `LOG_LEVEL` for the `app` container would be set to `trace`. +#### Examples of different `env` values: +At any of the difference env levels shown above the following patterns are allowed. + +[k8s ref envvar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.34/#envvar-v1-core) +[k8s ref valueFrom](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.34/#envvarsource-v1-core) + +Basic Key/Value +```yaml +env: + EXAMPLE: "100" +``` + +```yaml +env: + EXAMPLE_ALT: + value: "200" +``` + +Reference a configmap +```yaml +env: + EXAMPLE_FROM_CONFIGMAP: + valueFrom: + configMapKeyRef: + name: "my-configmap" + key: "configmap-key" +``` + +Reference a secret +```yaml +env: + EXAMPLE_FROM_SECRET: + valueFrom: + secretKeyRef: + name: "my-secret" + key: "secret-key" +``` + +Reference k8s data (fieldRef) +Selects a field of the pod: supports `metadata.name`, `metadata.namespace`, `metadata.labels['']`, `metadata.annotations['']`, `spec.nodeName`, `spec.serviceAccountName`, `status.hostIP`, `status.podIP`, `status.podIPs` +```yaml +env: + EXAMPLE_FROM_FIELD_REF: + valueFrom: + fieldRef: + fieldPath: metadata.namespace + # practical example + DD_AGENT_HOST: + valueFrom: + fieldRef: + fieldPath: status.hostIP +``` + +Reference k8s data (resourceFieldRef) +Selects a resource of the container: only resources limits and requests (`limits.cpu`, `limits.memory`, `limits.ephemeral-storage`, `requests.cpu`, `requests.memory` and `requests.ephemeral-storage`) are currently supported. +```yaml +env: + EXAMPLE_FROM_RESOURCE_FIELD_REF: + valueFrom: + resourceFieldRef: + resource: limits.memory +``` + +#### Examples of `envFrom` to set groups of env vars at once + Additionally, environment variables can be sourced from ConfigMaps and Secrets using the `envFrom` field: ```yaml diff --git a/charts/wandb-base/templates/tests/test-jobs.yaml b/charts/wandb-base/templates/tests/test-jobs.yaml new file mode 100644 index 000000000..000f77186 --- /dev/null +++ b/charts/wandb-base/templates/tests/test-jobs.yaml @@ -0,0 +1,84 @@ +{{- if eq .Values.kind "Job" }} + {{- range $jobName, $job := .Values.jobs }} + {{- $job := mergeOverwrite (dict "enabled" true) $job}} + {{- $fullname := include "wandb-base.fullname" $ }} + {{- if $job.enabled }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: "{{ $fullname }}-test-job-{{ $jobName }}-sa" + labels: + {{- include "wandb-base.labels" $ | nindent 4 }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: "{{ $fullname }}-test-job-{{ $jobName }}-role" + labels: + {{- include "wandb-base.labels" $ | nindent 4 }} +rules: +- apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: ["pods", "pods/log"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: "{{ $fullname }}-test-job-{{ $jobName }}-binding" + labels: + {{- include "wandb-base.labels" $ | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: "{{ $fullname }}-test-job-{{ $jobName }}-role" +subjects: +- kind: ServiceAccount + name: "{{ $fullname }}-test-job-{{ $jobName }}-sa" + namespace: {{ $.Release.Namespace }} +--- +apiVersion: v1 +kind: Pod +metadata: + name: "{{ $fullname }}-test-job-{{ $jobName }}" + labels: + {{- include "wandb-base.labels" $ | nindent 4 }} + annotations: + "helm.sh/hook": test + "helm.sh/hook-weight": "1" +spec: + serviceAccountName: "{{ $fullname }}-test-job-{{ $jobName }}-sa" + containers: + - name: job-test + image: bitnami/kubectl:latest + command: ['/bin/sh'] + args: + - -c + - | + set -e + echo "Testing job: {{ printf "%s-%s" $fullname $jobName }}" + + i=0 + until kubectl logs job/{{ printf "%s-%s" $fullname $jobName }}; do + i=$((i+1)) + if [ "$i" -ge 60 ]; then + echo "Timed out waiting for job logs." + exit 1 + fi + sleep 5 + done + if ! kubectl wait --for=condition=complete job/{{ printf "%s-%s" $fullname $jobName }} --timeout=600s; then + echo "Job {{ printf "%s-%s" $fullname $jobName }} did not complete successfully." + kubectl describe job/{{ printf "%s-%s" $fullname $jobName }} || true + kubectl logs job/{{ printf "%s-%s" $fullname $jobName }} --tail=200 || true + exit 1 + fi + echo "Job {{ printf "%s-%s" $fullname $jobName }} completed successfully." + + restartPolicy: Never + {{- end }} + {{- end }} +{{- end }} diff --git a/snapshots.sh b/snapshots.sh index ce2c63226..2ccab99ac 100755 --- a/snapshots.sh +++ b/snapshots.sh @@ -20,7 +20,7 @@ function usage() { cat <