GoogleCloudPlatform · jinbaebang · Oct 23, 2025 · Dec 4, 2025 · Dec 4, 2025 · NimJay
@@ -15,4 +15,4 @@
 /quickstarts/whereami/ @theemadnes @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
 /security/language-vulns/ @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
 /streaming/ @aburhan @pwschuurman @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
-/windows/ @ibabou @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
+/windows/ @ibabou @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
@@ -0,0 +1,53 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: app-design-center-ci
+"on":
+  push:
+    branches:
+    - main
+    paths:
+    - .github/workflows/app-design-center-ci.yml
+    - app-design-center/**
+  pull_request:
+    paths:
+    - .github/workflows/app-design-center-ci.yml
+    - app-design-center/**
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Helm
+      uses: azure/setup-helm@v4
+    - name: Lint, template, and dry-run app-design-center/ai-inference
+      run: |
+        helm lint app-design-center/ai-inference
+        helm template app-design-center/ai-inference
+        helm template ai-inference-test app-design-center/ai-inference
+    - name: Lint, template, and dry-run app-design-center/enterprise-production
+      run: |
+        helm lint app-design-center/enterprise-production
+        helm template app-design-center/enterprise-production
+        helm template enterprise-production-test app-design-center/enterprise-production
+    - name: Lint, template, and dry-run app-design-center/simple-app-multi-region
+      run: |
+        helm lint app-design-center/simple-app-multi-region
+        helm template app-design-center/simple-app-multi-region
+        helm template simple-app-multi-region-test app-design-center/simple-app-multi-region
+    - name: Lint, template, and dry-run app-design-center/simple-app-single-region
+      run: |
+        helm lint app-design-center/simple-app-single-region
+        helm template app-design-center/simple-app-single-region
+        helm template simple-app-single-region-test app-design-center/simple-app-single-region
@@ -0,0 +1,20 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v2
+name: gemma
+description: A Helm chart for deploying the Gemma 2 27B model for inference
+type: application
+version: 0.1.0
+appVersion: "1.0"
@@ -0,0 +1,3 @@
+# AI Inference Helm Chart
+
+These samples show how to deploy a Gemma 2 27B model for inference. Visit https://cloud.google.com/kubernetes-engine/docs/ to follow the tutorial.
-These samples show how to deploy a Gemma 2 27B model for inference. Visit https://cloud.google.com/kubernetes-engine/docs/ to follow the tutorial.
+These samples show how to deploy a Gemma 2 27B model for inference. Visit https://cloud.google.com/application-design-center/docs/build-on-google-templates to follow the tutorial.
-These samples show how to deploy a Gemma 2 27B model for inference. Visit https://cloud.google.com/kubernetes-engine/docs/ to follow the tutorial.
+These samples show how to deploy a Gemma 2 27B model for inference. Visit https://cloud.google.com/application-design-center/docs/build-on-google-templates to follow the tutorial.
@@ -0,0 +1,66 @@
+{{/*
+Copyright 2025 Google LLC
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/}}
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "gemma.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "gemma.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart labels for a chart.
+*/}}
+{{- define "gemma.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "gemma.labels" -}}
+helm.sh/chart: {{ include "gemma.chart" . }}
+{{ include "gemma.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "gemma.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "gemma.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
@@ -0,0 +1,74 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "gemma.fullname" . }}
+  labels:
+    {{- include "gemma.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "gemma.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      labels:
+        {{- include "gemma.selectorLabels" . | nindent 8 }}
+    spec:
+      containers:
+      - name: {{ .Chart.Name }}
+        image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
+        imagePullPolicy: {{ .Values.image.pullPolicy }}
+        args:
+        - --model=$(MODEL_ID)
+        - --disable-log-requests
+        - --tensor-parallel-size={{ .Values.tensorParallelSize }}
+        - --max-num-seq=512
+        - --gpu-memory-utilization=0.95
+        - --num-scheduler-steps=8
+        - --max-model-len={{ .Values.maxModelLen }}
+        command:
+        - python3
+        - -m
+        - vllm.entrypoints.openai.api_server
+        env:
+        - name: MODEL_ID
+          value: {{ .Values.model.id }}
+        - name: HUGGING_FACE_HUB_TOKEN
+          valueFrom:
+            secretKeyRef:
+              key: hf_api_token
+              name: {{ .Values.model.hfSecret }}
+        ports:
+        - containerPort: 8000
+          name: metrics
+        readinessProbe:
+          failureThreshold: 6000
+          httpGet:
+            path: /health
+            port: 8000
+          periodSeconds: 10
+        resources:
+          {{- toYaml .Values.resources | nindent 12 }}
+        volumeMounts:
+        - mountPath: /dev/shm
+          name: dshm
+      nodeSelector:
+        {{- toYaml .Values.nodeSelector | nindent 8 }}
+      volumes:
+      - emptyDir:
+          medium: Memory
+        name: dshm
@@ -0,0 +1,37 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{{- if .Values.hpa.enabled }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "gemma.fullname" . }}
+  labels:
+    {{- include "gemma.labels" . | nindent 4 }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "gemma.fullname" . }}
+  minReplicas: {{ .Values.hpa.minReplicas }}
+  maxReplicas: {{ .Values.hpa.maxReplicas }}
+  metrics:
+  - pods:
+      metric:
+        name: prometheus.googleapis.com|vllm:gpu_cache_usage_perc|gauge
+      target:
+        averageValue: 504m
+        type: AverageValue
+    type: Pods
+{{- end }}
@@ -0,0 +1,27 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{{ if .Values.pdb.enabled -}}
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: {{ include "gemma.fullname" . }}
+  labels:
+    {{- include "gemma.labels" . | nindent 4 }}
+spec:
+  minAvailable: {{ .Values.pdb.minAvailable }}
+  selector:
+    matchLabels:
+      {{- include "gemma.selectorLabels" . | nindent 6 }}
+{{- end }}
@@ -0,0 +1,29 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "gemma.fullname" . }}
+  labels:
+    {{- include "gemma.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: 8000
+      targetPort: 8000
+      protocol: TCP
+      name: http
+  selector:
+    {{- include "gemma.selectorLabels" . | nindent 4 }}
@@ -0,0 +1,57 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Default values for gemma chart.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+replicaCount: 1
+
+image:
+  repository: vllm/vllm-openai
+  tag: v0.7.2
+  pullPolicy: IfNotPresent
+
+model:
+  id: google/gemma-7b-it
+  hfSecret: hf-secret
+
+resources:
+  limits:
+    nvidia.com/gpu: "1"
+  requests:
+    nvidia.com/gpu: "1"
+
+nodeSelector:
+  cloud.google.com/gke-accelerator: nvidia-l4
+
+hpa:
+  enabled: true
+  minReplicas: 1
+  maxReplicas: 10
+  targetCPUUtilizationPercentage: 80
+
+# -- Number of GPUs to distribute the model across
+tensorParallelSize: 1
+
+# -- Maximum sequence length for the model
+maxModelLen: 512
+
+service:
+  type: ClusterIP
+  port: 80
+
+pdb:
+  enabled: true
+  minAvailable: 1