1+ name : GKE Prefix Cache Aware Test
2+
3+ on :
4+ # Runs with a PR comment /run-gke-prefix-cache-aware
5+ issue_comment :
6+ types : [created]
7+ workflow_dispatch :
8+ inputs :
9+ pr_or_branch :
10+ description : ' Pull-request number or branch name to test'
11+ required : true
12+ default : ' actions'
13+ type : string
14+
15+ permissions :
16+ contents : read
17+
18+ jobs :
19+ deploy_and_validate :
20+ if : >
21+ github.event_name == 'schedule' ||
22+ github.event_name == 'pull_request' ||
23+ github.event_name == 'workflow_dispatch' ||
24+ (
25+ github.event_name == 'issue_comment' &&
26+ github.event.issue.pull_request &&
27+ (
28+ contains(github.event.comment.body, '/run-gke-prefix-cache-aware')
29+ ) &&
30+ (
31+ github.event.comment.author_association == 'OWNER' ||
32+ github.event.comment.author_association == 'MEMBER' ||
33+ github.event.comment.author_association == 'COLLABORATOR'
34+ )
35+ )
36+ name : Test on ${{ matrix.accelerator.name }}
37+ runs-on : ubuntu-latest
38+
39+ strategy :
40+ fail-fast : false
41+ max-parallel : 1
42+ matrix :
43+ accelerator :
44+ - name : GPU
45+ helmfile_env : gke
46+ pod_readiness_sleep_seconds : 180
47+ - name : TPU
48+ helmfile_env : gke_tpu
49+ pod_readiness_sleep_seconds : 1080
50+
51+ env :
52+ GCP_PROJECT_ID : llm-d-scale
53+ GKE_CLUSTER_NAME : llm-d-e2e-us-east5
54+ GKE_CLUSTER_ZONE : us-east5
55+ NAMESPACE : igw-prefix-cache-aware
56+ GATEWAY : gke-l7-regional-external-managed
57+ GATEWAY_TYPE : gke
58+ PR_OR_BRANCH : ${{ github.event.inputs.pr_or_branch || github.event.issue.number || github.event.number || 'actions' }}
59+ HF_TOKEN : ${{ secrets.HF_TOKEN }}
60+
61+ steps :
62+ - name : Checkout
63+ uses : actions/checkout@v4
64+ with :
65+ persist-credentials : false
66+
67+ - name : Determine if pr_or_branch is a PR number
68+ id : check_pr
69+ env :
70+ PR_OR_BRANCH : ${{ github.event.inputs.pr_or_branch }}
71+ shell : bash
72+ run : |
73+ echo "PR_OR_BRANCH=${PR_OR_BRANCH:-actions}" >> "$GITHUB_ENV"
74+ if [[ "$PR_OR_BRANCH" =~ ^[0-9]+$ ]]; then
75+ echo "is_pr=true" >> "$GITHUB_OUTPUT"
76+ elif [[ "${{ github.event_name }}" = "pull_request" ]]; then
77+ echo "PR_OR_BRANCH=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
78+ echo "is_pr=true" >> "$GITHUB_OUTPUT"
79+ else
80+ echo "is_pr=false" >> "$GITHUB_OUTPUT"
81+ fi
82+
83+ - name : Fetch and checkout PR
84+ if : steps.check_pr.outputs.is_pr == 'true'
85+ run : |
86+ git fetch origin pull/"$PR_OR_BRANCH"/head:pr-"$PR_OR_BRANCH"
87+ git checkout pr-"$PR_OR_BRANCH"
88+
89+ - name : Checkout branch
90+ if : steps.check_pr.outputs.is_pr == 'false'
91+ run : git checkout "$PR_OR_BRANCH"
92+
93+ - name : Authenticate to Google Cloud
94+ id : auth
95+ uses : google-github-actions/auth@b7593ed2efd1c1617e1b0254da33b86225adb2a5
96+ with :
97+ credentials_json : ${{ secrets.GKE_SA_KEY }}
98+
99+ - name : Set up gcloud CLI and kubectl
100+ uses : google-github-actions/setup-gcloud@cb1e50a9932213ecece00a606661ae9ca44f3397
101+ with :
102+ project_id : ${{ env.GCP_PROJECT_ID }}
103+ install_components : ' kubectl,gke-gcloud-auth-plugin'
104+
105+ - name : Get GKE credentials
106+ run : |
107+ gcloud container clusters get-credentials "${{ env.GKE_CLUSTER_NAME }}" --zone "${{ env.GKE_CLUSTER_ZONE }}"
108+
109+ - name : Create namespace
110+ run : |
111+ kubectl create namespace "${NAMESPACE}" || echo "Namespace already exists"
112+
113+ - name : Create llm-d-hf-token secret
114+ run : |
115+ kubectl create secret generic llm-d-hf-token \
116+ --from-literal="HF_TOKEN=${{ secrets.HF_TOKEN }}" \
117+ --namespace "${NAMESPACE}" \
118+ --dry-run=client -o yaml | kubectl apply -f -
119+
120+ - name : Deploy Model Server and CRDs
121+ run : |
122+ cd config/manifests/vllm
123+ sed -i '/- --model/a\ - --enable-prefix-caching' gpu-deployment.yaml
124+ echo "Deploying Model Server..."
125+ kubectl apply -f gpu-deployment.yaml -n ${NAMESPACE} | tee ~/igw-prefix-cache-deployment.log
126+ echo "Installing CRDs"
127+ kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.1.0/manifests.yaml
128+ echo "---------------------------------------" >> ~/igw-prefix-cache-deployment.log
129+
130+ - name : Deploy InferencePool and Endpoint Picker Extension
131+ run : |
132+ helm install vllm-llama3-8b-instruct \
133+ --namespace $NAMESPACE \
134+ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
135+ --set provider.name=$GATEWAY_TYPE \
136+ --version $IGW_CHART_VERSION \
137+ oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool | tee ~/igw-prefix-cache-deployment.log
138+ echo "---------------------------------------" >> ~/igw-prefix-cache-deployment.log
139+
140+ - name : Deploy Gateway
141+ run : |
142+ echo "Deploying Gateway..."
143+ kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/v1.1.0/config/manifests/gateway/gke/gateway.yaml | tee ~/igw-prefix-cache-deployment.log
144+ echo "Deploying HTTPRoute..."
145+ kubectl apply -f httproute.gke.yaml -n ${NAMESPACE} | tee ~/igw-prefix-cache-deployment.log
146+ echo "---------------------------------------" >> ~/igw-prefix-cache-deployment.log
147+
148+ - name : Wait for all pods to be ready
149+ run : |
150+ kubectl wait pod \
151+ --for=condition=Ready \
152+ --all \
153+ -n "${NAMESPACE}" \
154+ --timeout=15m
155+ sleep ${{ matrix.accelerator.pod_readiness_sleep_seconds }} # TODO: remove this once examples have readiness probes
156+ echo "✅ All pods are ready."
157+ kubectl get pods -n "${NAMESPACE}"
158+
159+ - name : Wait for gateway to be ready
160+ run : |
161+ GATEWAY_NAME=inference-gateway
162+ kubectl wait gateway/${GATEWAY_NAME} \
163+ --for=condition=Programmed=True \
164+ -n "${NAMESPACE}" \
165+ --timeout=300s
166+ echo "✅ Gateway is ready."
167+ kubectl get gateway -n "${NAMESPACE}"
168+
169+ - name : Show deployment status
170+ run : |
171+ echo "=== Deployments ==="
172+ kubectl get deployments -n "${NAMESPACE}"
173+ echo ""
174+ echo "=== Pods ==="
175+ kubectl get pods -n "${NAMESPACE}"
176+ echo ""
177+ echo "=== Services ==="
178+ kubectl get svc -n "${NAMESPACE}"
179+ echo ""
180+ echo "=== Helm releases ==="
181+ helm list -n "${NAMESPACE}" || true
182+ echo ""
183+ echo "=== Inference Pools ==="
184+ kubectl get inferencepools -n "${NAMESPACE}" || true
185+ echo ""
186+ echo "=== HTTPRoutes ==="
187+ kubectl get httproutes -n "${NAMESPACE}" || true
188+ echo ""
189+ echo "=== Gateway ==="
190+ kubectl get Gateway -n "${NAMESPACE}" || true
191+ echo ""
192+
193+ - name : Verify installation and run inference tests
194+ run : |
195+ cd .github/scripts/e2e
196+ ./e2e-validate.sh -n "${NAMESPACE}" -v
197+
198+ - name : Collect and upload Kubernetes pod logs
199+ if : always()
200+ run : |
201+ mkdir -p pod-logs-inference-prefix-cache
202+ cd pod-logs-inference-prefix-cache
203+ echo "Fetching ${NAMESPACE} pods log..."
204+ kubectl get pods -n "${NAMESPACE}" --no-headers -o custom-columns=":metadata.name" \
205+ | xargs -I{} sh -c 'kubectl logs --all-containers=true -n "${NAMESPACE}" {} > "{}.log" 2>&1'
206+ echo "Fetching ${NAMESPACE} pods descriptions..."
207+ kubectl get pods -n "${NAMESPACE}" --no-headers -o custom-columns=":metadata.name" \
208+ | xargs -I{} sh -c 'kubectl describe pod -n "${NAMESPACE}" {} > "{}-describe.log" 2>&1'
209+ mv ~/igw-prefix-cache-deployment.log . || true
210+ mv ~/install-deps.log . || true
211+
212+ - name : Upload pod logs as artifact
213+ uses : actions/upload-artifact@v4
214+ if : always()
215+ with :
216+ name : igw-pod-logs-inference-prefix-cache-${{ matrix.accelerator.name }}
217+ path : pod-logs-inference-prefix-cache
218+
219+ - name : Send Google Chat notification on failure
220+ if : failure()
221+ uses : SimonScholz/google-chat-action@3b3519e5102dba8aa5046fd711c4b553586409bb
222+ with :
223+ webhookUrl : ${{ secrets.GOOGLE_CHAT_WEBHOOK }}
224+ jobStatus : ${{ job.status }}
225+ title : ' ${{ github.workflow }} - ${{ matrix.accelerator.name }}'
226+
227+ - name : Cleanup deployment
228+ if : always()
229+ run : |
230+ GATEWAY_NAME=inference-gateway
231+ helm uninstall vllm-llama3-8b-instruct -n ${NAMESPACE}
232+ kubectl delete -f httproute.gke.yaml -n ${NAMESPACE}
233+ kubectl delete gateway ${GATEWAY_NAME} -n ${NAMESPACE}
0 commit comments