Skip to content

Commit 72c5bea

Browse files
committed
Add prefix cache test to github actions
1 parent 1e57cc0 commit 72c5bea

File tree

2 files changed

+374
-0
lines changed

2 files changed

+374
-0
lines changed
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
# -----------------------------------------------------------------------------
5+
# e2e-validate.sh — CI e2e Gateway smoke-test (chat + completion, 10 iterations)
6+
# -----------------------------------------------------------------------------
7+
8+
show_help() {
9+
cat <<EOF
10+
Usage: $(basename "$0") [OPTIONS]
11+
12+
Options:
13+
-n, --namespace NAMESPACE Kubernetes namespace (default: llm-d)
14+
-m, --model MODEL_ID Model to query. If unset, discovers the first available model.
15+
-v, --verbose Echo kubectl/curl commands before running
16+
-h, --help Show this help and exit
17+
EOF
18+
exit 0
19+
}
20+
21+
# ── Defaults ────────────────────────────────────────────────────────────────
22+
NAMESPACE="igw-e2e"
23+
CLI_MODEL_ID=""
24+
VERBOSE=false
25+
26+
# ── Flag parsing ────────────────────────────────────────────────────────────
27+
while [[ $# -gt 0 ]]; do
28+
case $1 in
29+
-n|--namespace) NAMESPACE="$2"; shift 2 ;;
30+
-m|--model) CLI_MODEL_ID="$2"; shift 2 ;;
31+
-v|--verbose) VERBOSE=true; shift ;;
32+
-h|--help) show_help ;;
33+
*) echo "Unknown option: $1"; show_help ;;
34+
esac
35+
done
36+
37+
if [[ "${VERBOSE}" == "true" ]]; then
38+
set -x
39+
fi
40+
41+
# ── Create a unique pod suffix ────────────────────────────────────────────
42+
gen_id() { echo $(( RANDOM % 10000 + 1 )); }
43+
44+
# ── Discover Gateway address ────────────────────────────────────────────────
45+
HOST="${GATEWAY_HOST:-$(kubectl get gateway -n "$NAMESPACE" \
46+
-o jsonpath='{.items[0].status.addresses[0].value}' 2>/dev/null || true)}"
47+
if [[ -z "$HOST" ]]; then
48+
echo "Error: could not discover a Gateway address in namespace '$NAMESPACE'." >&2
49+
exit 1
50+
fi
51+
PORT=80
52+
SVC_HOST="${HOST}:${PORT}"
53+
54+
# ── Determine MODEL_ID ──────────────────────────────────────────────────────
55+
if [[ -n "$CLI_MODEL_ID" ]]; then
56+
MODEL_ID="$CLI_MODEL_ID"
57+
elif [[ -n "${MODEL_ID-}" ]]; then
58+
MODEL_ID="$MODEL_ID"
59+
else
60+
echo "Error: Failed to find model id. Please specify one using the -m flag or the MODEL_ID environment variable." >&2
61+
exit 1
62+
fi
63+
64+
echo "Namespace: $NAMESPACE"
65+
echo "Inference Gateway: ${SVC_HOST}"
66+
echo "Model ID: $MODEL_ID"
67+
echo
68+
69+
# ── Main test loop (10 iterations) ──────────────────────────────────────────
70+
for i in {1..10}; do
71+
echo "=== Iteration $i of 10 ==="
72+
failed=false
73+
74+
# 1) POST /v1/chat/completions
75+
echo "1) POST /v1/chat/completions at ${SVC_HOST}"
76+
chat_payload='{
77+
"model":"'"$MODEL_ID"'",
78+
"messages":[{"role":"user","content":"Hello! Who are you?"}]
79+
}'
80+
ID=$(gen_id)
81+
if $VERBOSE; then cat <<CMD
82+
- Running command:
83+
kubectl run --rm -i curl-${ID} \\
84+
--namespace "${NAMESPACE}" \\
85+
--image=curlimages/curl --restart=Never -- \\
86+
curl -sS -X POST "http://${SVC_HOST}/v1/chat/completions" \\
87+
-H 'accept: application/json' \\
88+
-H 'Content-Type: application/json' \\
89+
-d '${chat_payload//\'/\'}'
90+
91+
CMD
92+
fi
93+
ret=0
94+
output=$(kubectl run --rm -i curl-"$ID" \
95+
--namespace "$NAMESPACE" \
96+
--image=curlimages/curl --restart=Never -- \
97+
sh -c "sleep 1; curl -sS -X POST 'http://${SVC_HOST}/v1/chat/completions' \
98+
-H 'accept: application/json' \
99+
-H 'Content-Type: application/json' \
100+
-d '$chat_payload'") || ret=$?
101+
echo "$output"
102+
[[ $ret -ne 0 || "$output" != *'{'* ]] && {
103+
echo "Error: POST /v1/chat/completions failed (exit $ret or no JSON)" >&2; failed=true; }
104+
echo
105+
106+
# 2) POST /v1/completions
107+
echo "2) POST /v1/completions at ${SVC_HOST}"
108+
payload='{"model":"'"$MODEL_ID"'","prompt":"You are a helpful AI assistant."}'
109+
ID=$(gen_id)
110+
if $VERBOSE; then cat <<CMD
111+
- Running command:
112+
kubectl run --rm -i curl-${ID} \\
113+
--namespace "${NAMESPACE}" \\
114+
--image=curlimages/curl --restart=Never -- \\
115+
curl -sS -X POST "http://${SVC_HOST}/v1/completions" \\
116+
-H 'accept: application/json' \\
117+
-H 'Content-Type: application/json' \\
118+
-d '${payload//\'/\'}'
119+
120+
CMD
121+
fi
122+
ret=0
123+
output=$(kubectl run --rm -i curl-"$ID" \
124+
--namespace "$NAMESPACE" \
125+
--image=curlimages/curl --restart=Never -- \
126+
sh -c "sleep 1; curl -sS -X POST 'http://${SVC_HOST}/v1/completions' \
127+
-H 'accept: application/json' \
128+
-H 'Content-Type: application/json' \
129+
-d '$payload'") || ret=$?
130+
echo "$output"
131+
[[ $ret -ne 0 || "$output" != *'{'* ]] && {
132+
echo "Error: POST /v1/completions failed (exit $ret or no JSON)" >&2; failed=true; }
133+
echo
134+
135+
if $failed; then
136+
echo "Iteration $i encountered errors; exiting." >&2
137+
exit 1
138+
fi
139+
done
140+
141+
echo "✅ All 10 iterations succeeded."
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
name: GKE Prefix Cache Aware Test
2+
3+
on:
4+
# Runs with a PR comment /run-gke-prefix-cache-aware
5+
issue_comment:
6+
types: [created]
7+
workflow_dispatch:
8+
inputs:
9+
pr_or_branch:
10+
description: 'Pull-request number or branch name to test'
11+
required: true
12+
default: 'actions'
13+
type: string
14+
15+
permissions:
16+
contents: read
17+
18+
jobs:
19+
deploy_and_validate:
20+
if: >
21+
github.event_name == 'schedule' ||
22+
github.event_name == 'pull_request' ||
23+
github.event_name == 'workflow_dispatch' ||
24+
(
25+
github.event_name == 'issue_comment' &&
26+
github.event.issue.pull_request &&
27+
(
28+
contains(github.event.comment.body, '/run-gke-prefix-cache-aware')
29+
) &&
30+
(
31+
github.event.comment.author_association == 'OWNER' ||
32+
github.event.comment.author_association == 'MEMBER' ||
33+
github.event.comment.author_association == 'COLLABORATOR'
34+
)
35+
)
36+
name: Test on ${{ matrix.accelerator.name }}
37+
runs-on: ubuntu-latest
38+
39+
strategy:
40+
fail-fast: false
41+
max-parallel: 1
42+
matrix:
43+
accelerator:
44+
- name: GPU
45+
helmfile_env: gke
46+
pod_readiness_sleep_seconds: 180
47+
- name: TPU
48+
helmfile_env: gke_tpu
49+
pod_readiness_sleep_seconds: 1080
50+
51+
env:
52+
GCP_PROJECT_ID: llm-d-scale
53+
GKE_CLUSTER_NAME: llm-d-e2e-us-east5
54+
GKE_CLUSTER_ZONE: us-east5
55+
NAMESPACE: igw-prefix-cache-aware
56+
GATEWAY: gke-l7-regional-external-managed
57+
GATEWAY_TYPE: gke
58+
PR_OR_BRANCH: ${{ github.event.inputs.pr_or_branch || github.event.issue.number || github.event.number || 'actions' }}
59+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
60+
61+
steps:
62+
- name: Checkout
63+
uses: actions/checkout@v4
64+
with:
65+
persist-credentials: false
66+
67+
- name: Determine if pr_or_branch is a PR number
68+
id: check_pr
69+
env:
70+
PR_OR_BRANCH: ${{ github.event.inputs.pr_or_branch }}
71+
shell: bash
72+
run: |
73+
echo "PR_OR_BRANCH=${PR_OR_BRANCH:-actions}" >> "$GITHUB_ENV"
74+
if [[ "$PR_OR_BRANCH" =~ ^[0-9]+$ ]]; then
75+
echo "is_pr=true" >> "$GITHUB_OUTPUT"
76+
elif [[ "${{ github.event_name }}" = "pull_request" ]]; then
77+
echo "PR_OR_BRANCH=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
78+
echo "is_pr=true" >> "$GITHUB_OUTPUT"
79+
else
80+
echo "is_pr=false" >> "$GITHUB_OUTPUT"
81+
fi
82+
83+
- name: Fetch and checkout PR
84+
if: steps.check_pr.outputs.is_pr == 'true'
85+
run: |
86+
git fetch origin pull/"$PR_OR_BRANCH"/head:pr-"$PR_OR_BRANCH"
87+
git checkout pr-"$PR_OR_BRANCH"
88+
89+
- name: Checkout branch
90+
if: steps.check_pr.outputs.is_pr == 'false'
91+
run: git checkout "$PR_OR_BRANCH"
92+
93+
- name: Authenticate to Google Cloud
94+
id: auth
95+
uses: google-github-actions/auth@b7593ed2efd1c1617e1b0254da33b86225adb2a5
96+
with:
97+
credentials_json: ${{ secrets.GKE_SA_KEY }}
98+
99+
- name: Set up gcloud CLI and kubectl
100+
uses: google-github-actions/setup-gcloud@cb1e50a9932213ecece00a606661ae9ca44f3397
101+
with:
102+
project_id: ${{ env.GCP_PROJECT_ID }}
103+
install_components: 'kubectl,gke-gcloud-auth-plugin'
104+
105+
- name: Get GKE credentials
106+
run: |
107+
gcloud container clusters get-credentials "${{ env.GKE_CLUSTER_NAME }}" --zone "${{ env.GKE_CLUSTER_ZONE }}"
108+
109+
- name: Create namespace
110+
run: |
111+
kubectl create namespace "${NAMESPACE}" || echo "Namespace already exists"
112+
113+
- name: Create llm-d-hf-token secret
114+
run: |
115+
kubectl create secret generic llm-d-hf-token \
116+
--from-literal="HF_TOKEN=${{ secrets.HF_TOKEN }}" \
117+
--namespace "${NAMESPACE}" \
118+
--dry-run=client -o yaml | kubectl apply -f -
119+
120+
- name: Deploy Model Server and CRDs
121+
run: |
122+
cd config/manifests/vllm
123+
sed -i '/- --model/a\ - --enable-prefix-caching' gpu-deployment.yaml
124+
echo "Deploying Model Server..."
125+
kubectl apply -f gpu-deployment.yaml -n ${NAMESPACE} | tee ~/igw-prefix-cache-deployment.log
126+
echo "Installing CRDs"
127+
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.1.0/manifests.yaml
128+
echo "---------------------------------------" >> ~/igw-prefix-cache-deployment.log
129+
130+
- name: Deploy InferencePool and Endpoint Picker Extension
131+
run: |
132+
helm install vllm-llama3-8b-instruct \
133+
--namespace $NAMESPACE \
134+
--set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
135+
--set provider.name=$GATEWAY_TYPE \
136+
--version $IGW_CHART_VERSION \
137+
oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool | tee ~/igw-prefix-cache-deployment.log
138+
echo "---------------------------------------" >> ~/igw-prefix-cache-deployment.log
139+
140+
- name: Deploy Gateway
141+
run: |
142+
echo "Deploying Gateway..."
143+
kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/v1.1.0/config/manifests/gateway/gke/gateway.yaml | tee ~/igw-prefix-cache-deployment.log
144+
echo "Deploying HTTPRoute..."
145+
kubectl apply -f httproute.gke.yaml -n ${NAMESPACE} | tee ~/igw-prefix-cache-deployment.log
146+
echo "---------------------------------------" >> ~/igw-prefix-cache-deployment.log
147+
148+
- name: Wait for all pods to be ready
149+
run: |
150+
kubectl wait pod \
151+
--for=condition=Ready \
152+
--all \
153+
-n "${NAMESPACE}" \
154+
--timeout=15m
155+
sleep ${{ matrix.accelerator.pod_readiness_sleep_seconds }} # TODO: remove this once examples have readiness probes
156+
echo "✅ All pods are ready."
157+
kubectl get pods -n "${NAMESPACE}"
158+
159+
- name: Wait for gateway to be ready
160+
run: |
161+
GATEWAY_NAME=inference-gateway
162+
kubectl wait gateway/${GATEWAY_NAME} \
163+
--for=condition=Programmed=True \
164+
-n "${NAMESPACE}" \
165+
--timeout=300s
166+
echo "✅ Gateway is ready."
167+
kubectl get gateway -n "${NAMESPACE}"
168+
169+
- name: Show deployment status
170+
run: |
171+
echo "=== Deployments ==="
172+
kubectl get deployments -n "${NAMESPACE}"
173+
echo ""
174+
echo "=== Pods ==="
175+
kubectl get pods -n "${NAMESPACE}"
176+
echo ""
177+
echo "=== Services ==="
178+
kubectl get svc -n "${NAMESPACE}"
179+
echo ""
180+
echo "=== Helm releases ==="
181+
helm list -n "${NAMESPACE}" || true
182+
echo ""
183+
echo "=== Inference Pools ==="
184+
kubectl get inferencepools -n "${NAMESPACE}" || true
185+
echo ""
186+
echo "=== HTTPRoutes ==="
187+
kubectl get httproutes -n "${NAMESPACE}" || true
188+
echo ""
189+
echo "=== Gateway ==="
190+
kubectl get Gateway -n "${NAMESPACE}" || true
191+
echo ""
192+
193+
- name: Verify installation and run inference tests
194+
run: |
195+
cd .github/scripts/e2e
196+
./e2e-validate.sh -n "${NAMESPACE}" -v
197+
198+
- name: Collect and upload Kubernetes pod logs
199+
if: always()
200+
run: |
201+
mkdir -p pod-logs-inference-prefix-cache
202+
cd pod-logs-inference-prefix-cache
203+
echo "Fetching ${NAMESPACE} pods log..."
204+
kubectl get pods -n "${NAMESPACE}" --no-headers -o custom-columns=":metadata.name" \
205+
| xargs -I{} sh -c 'kubectl logs --all-containers=true -n "${NAMESPACE}" {} > "{}.log" 2>&1'
206+
echo "Fetching ${NAMESPACE} pods descriptions..."
207+
kubectl get pods -n "${NAMESPACE}" --no-headers -o custom-columns=":metadata.name" \
208+
| xargs -I{} sh -c 'kubectl describe pod -n "${NAMESPACE}" {} > "{}-describe.log" 2>&1'
209+
mv ~/igw-prefix-cache-deployment.log . || true
210+
mv ~/install-deps.log . || true
211+
212+
- name: Upload pod logs as artifact
213+
uses: actions/upload-artifact@v4
214+
if: always()
215+
with:
216+
name: igw-pod-logs-inference-prefix-cache-${{ matrix.accelerator.name }}
217+
path: pod-logs-inference-prefix-cache
218+
219+
- name: Send Google Chat notification on failure
220+
if: failure()
221+
uses: SimonScholz/google-chat-action@3b3519e5102dba8aa5046fd711c4b553586409bb
222+
with:
223+
webhookUrl: ${{ secrets.GOOGLE_CHAT_WEBHOOK }}
224+
jobStatus: ${{ job.status }}
225+
title: '${{ github.workflow }} - ${{ matrix.accelerator.name }}'
226+
227+
- name: Cleanup deployment
228+
if: always()
229+
run: |
230+
GATEWAY_NAME=inference-gateway
231+
helm uninstall vllm-llama3-8b-instruct -n ${NAMESPACE}
232+
kubectl delete -f httproute.gke.yaml -n ${NAMESPACE}
233+
kubectl delete gateway ${GATEWAY_NAME} -n ${NAMESPACE}

0 commit comments

Comments
 (0)