Skip to content

Commit c468ce0

Browse files
committed
add helm charts for app-design-center
1 parent a3f1a42 commit c468ce0

35 files changed

+1211
-1
lines changed

.github/CODEOWNERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@
1515
/quickstarts/whereami/ @theemadnes @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
1616
/security/language-vulns/ @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
1717
/streaming/ @aburhan @pwschuurman @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
18-
/windows/ @ibabou @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
18+
/windows/ @ibabou @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
name: app-design-center-ci
16+
"on":
17+
push:
18+
branches:
19+
- main
20+
paths:
21+
- .github/workflows/app-design-center-ci.yml
22+
- app-design-center/**
23+
pull_request:
24+
paths:
25+
- .github/workflows/app-design-center-ci.yml
26+
- app-design-center/**
27+
jobs:
28+
lint:
29+
runs-on: ubuntu-latest
30+
steps:
31+
- uses: actions/checkout@v4
32+
- name: Set up Helm
33+
uses: azure/setup-helm@v4
34+
- name: Lint, template, and dry-run app-design-center/ai-inference
35+
run: |
36+
helm lint app-design-center/ai-inference
37+
helm template app-design-center/ai-inference
38+
helm template ai-inference-test app-design-center/ai-inference
39+
- name: Lint, template, and dry-run app-design-center/enterprise-production
40+
run: |
41+
helm lint app-design-center/enterprise-production
42+
helm template app-design-center/enterprise-production
43+
helm template enterprise-production-test app-design-center/enterprise-production
44+
- name: Lint, template, and dry-run app-design-center/simple-app-multi-region
45+
run: |
46+
helm lint app-design-center/simple-app-multi-region
47+
helm template app-design-center/simple-app-multi-region
48+
helm template simple-app-multi-region-test app-design-center/simple-app-multi-region
49+
- name: Lint, template, and dry-run app-design-center/simple-app-single-region
50+
run: |
51+
helm lint app-design-center/simple-app-single-region
52+
helm template app-design-center/simple-app-single-region
53+
helm template simple-app-single-region-test app-design-center/simple-app-single-region
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
apiVersion: v2
16+
name: gemma
17+
description: A Helm chart for deploying the Gemma 2 27B model for inference
18+
type: application
19+
version: 0.1.0
20+
appVersion: "1.0"
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# AI Inference Helm Chart
2+
3+
These samples show how to deploy a Gemma 2 27B model for inference. Visit https://cloud.google.com/kubernetes-engine/docs/ to follow the tutorial.
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
{{/*
2+
Copyright 2025 Google LLC
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
https://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/}}
16+
{{/*
17+
Expand the name of the chart.
18+
*/}}
19+
{{- define "gemma.name" -}}
20+
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
21+
{{- end }}
22+
23+
{{/*
24+
Create a default fully qualified app name.
25+
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
26+
If release name contains chart name it will be used as a full name.
27+
*/}}
28+
{{- define "gemma.fullname" -}}
29+
{{- if .Values.fullnameOverride }}
30+
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
31+
{{- else }}
32+
{{- $name := default .Chart.Name .Values.nameOverride }}
33+
{{- if contains $name .Release.Name }}
34+
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
35+
{{- else }}
36+
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
37+
{{- end }}
38+
{{- end }}
39+
{{- end }}
40+
41+
{{/*
42+
Create chart labels for a chart.
43+
*/}}
44+
{{- define "gemma.chart" -}}
45+
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
46+
{{- end }}
47+
48+
{{/*
49+
Common labels
50+
*/}}
51+
{{- define "gemma.labels" -}}
52+
helm.sh/chart: {{ include "gemma.chart" . }}
53+
{{ include "gemma.selectorLabels" . }}
54+
{{- if .Chart.AppVersion }}
55+
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
56+
{{- end }}
57+
app.kubernetes.io/managed-by: {{ .Release.Service }}
58+
{{- end }}
59+
60+
{{/*
61+
Selector labels
62+
*/}}
63+
{{- define "gemma.selectorLabels" -}}
64+
app.kubernetes.io/name: {{ include "gemma.name" . }}
65+
app.kubernetes.io/instance: {{ .Release.Name }}
66+
{{- end }}
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
apiVersion: apps/v1
16+
kind: Deployment
17+
metadata:
18+
name: {{ include "gemma.fullname" . }}
19+
labels:
20+
{{- include "gemma.labels" . | nindent 4 }}
21+
spec:
22+
replicas: {{ .Values.replicaCount }}
23+
selector:
24+
matchLabels:
25+
{{- include "gemma.selectorLabels" . | nindent 6 }}
26+
template:
27+
metadata:
28+
labels:
29+
{{- include "gemma.selectorLabels" . | nindent 8 }}
30+
spec:
31+
containers:
32+
- name: {{ .Chart.Name }}
33+
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
34+
imagePullPolicy: {{ .Values.image.pullPolicy }}
35+
args:
36+
- --model=$(MODEL_ID)
37+
- --disable-log-requests
38+
- --tensor-parallel-size={{ .Values.tensorParallelSize }}
39+
- --max-num-seq=512
40+
- --gpu-memory-utilization=0.95
41+
- --num-scheduler-steps=8
42+
- --max-model-len={{ .Values.maxModelLen }}
43+
command:
44+
- python3
45+
- -m
46+
- vllm.entrypoints.openai.api_server
47+
env:
48+
- name: MODEL_ID
49+
value: {{ .Values.model.id }}
50+
- name: HUGGING_FACE_HUB_TOKEN
51+
valueFrom:
52+
secretKeyRef:
53+
key: hf_api_token
54+
name: {{ .Values.model.hfSecret }}
55+
ports:
56+
- containerPort: 8000
57+
name: metrics
58+
readinessProbe:
59+
failureThreshold: 6000
60+
httpGet:
61+
path: /health
62+
port: 8000
63+
periodSeconds: 10
64+
resources:
65+
{{- toYaml .Values.resources | nindent 12 }}
66+
volumeMounts:
67+
- mountPath: /dev/shm
68+
name: dshm
69+
nodeSelector:
70+
{{- toYaml .Values.nodeSelector | nindent 8 }}
71+
volumes:
72+
- emptyDir:
73+
medium: Memory
74+
name: dshm
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
{{- if .Values.hpa.enabled }}
16+
apiVersion: autoscaling/v2
17+
kind: HorizontalPodAutoscaler
18+
metadata:
19+
name: {{ include "gemma.fullname" . }}
20+
labels:
21+
{{- include "gemma.labels" . | nindent 4 }}
22+
spec:
23+
scaleTargetRef:
24+
apiVersion: apps/v1
25+
kind: Deployment
26+
name: {{ include "gemma.fullname" . }}
27+
minReplicas: {{ .Values.hpa.minReplicas }}
28+
maxReplicas: {{ .Values.hpa.maxReplicas }}
29+
metrics:
30+
- pods:
31+
metric:
32+
name: prometheus.googleapis.com|vllm:gpu_cache_usage_perc|gauge
33+
target:
34+
averageValue: 504m
35+
type: AverageValue
36+
type: Pods
37+
{{- end }}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
{{- if .Values.pdb.enabled -}}
16+
apiVersion: policy/v1
17+
kind: PodDisruptionBudget
18+
metadata:
19+
name: {{ include "gemma.fullname" . }}
20+
labels:
21+
{{- include "gemma.labels" . | nindent 4 }}
22+
spec:
23+
minAvailable: {{ .Values.pdb.minAvailable }}
24+
selector:
25+
matchLabels:
26+
{{- include "gemma.selectorLabels" . | nindent 6 }}
27+
{{- end }}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
apiVersion: v1
16+
kind: Service
17+
metadata:
18+
name: {{ include "gemma.fullname" . }}
19+
labels:
20+
{{- include "gemma.labels" . | nindent 4 }}
21+
spec:
22+
type: {{ .Values.service.type }}
23+
ports:
24+
- port: 8000
25+
targetPort: 8000
26+
protocol: TCP
27+
name: http
28+
selector:
29+
{{- include "gemma.selectorLabels" . | nindent 4 }}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# Default values for gemma chart.
16+
# This is a YAML-formatted file.
17+
# Declare variables to be passed into your templates.
18+
19+
replicaCount: 1
20+
21+
image:
22+
repository: vllm/vllm-openai
23+
tag: v0.7.2
24+
pullPolicy: IfNotPresent
25+
26+
model:
27+
id: google/gemma-7b-it
28+
hfSecret: hf-secret
29+
30+
resources:
31+
limits:
32+
nvidia.com/gpu: "1"
33+
requests:
34+
nvidia.com/gpu: "1"
35+
36+
nodeSelector:
37+
cloud.google.com/gke-accelerator: nvidia-l4
38+
39+
hpa:
40+
enabled: true
41+
minReplicas: 1
42+
maxReplicas: 10
43+
targetCPUUtilizationPercentage: 80
44+
45+
# -- Number of GPUs to distribute the model across
46+
tensorParallelSize: 1
47+
48+
# -- Maximum sequence length for the model
49+
maxModelLen: 512
50+
51+
service:
52+
type: ClusterIP
53+
port: 80
54+
55+
pdb:
56+
enabled: true
57+
minAvailable: 1

0 commit comments

Comments
 (0)