Skip to content

Commit 43c87fa

Browse files
committed
merge conflicts
2 parents 9e85377 + fd5ce6b commit 43c87fa

File tree

90 files changed

+12946
-442
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

90 files changed

+12946
-442
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
bin/*
88
Dockerfile.cross
99
artifacts
10+
latencypredictor/__pycache__
1011

1112
# Test binary, built with `go test -c`
1213
*.test

benchmarking/benchmark-values.yaml

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
job:
2+
image:
3+
repository: quay.io/inference-perf/inference-perf
4+
tag: "latest" # Defaults to .Chart.AppVersion
5+
serviceAccountName: ""
6+
nodeSelector: {}
7+
# Example resources:
8+
# resources:
9+
# requests:
10+
# cpu: "1"
11+
# memory: "4Gi"
12+
# limits:
13+
# cpu: "2"
14+
# memory: "8Gi"
15+
resources: {}
16+
17+
logLevel: INFO
18+
19+
# A GCS bucket path that points to the dataset file.
20+
# The file will be copied from this path to the local file system
21+
# at /dataset/gcs-dataset.json for use during the run.
22+
# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/gcs-dataset.json.
23+
gcsPath: ""
24+
25+
# A S3 bucket path that points to the dataset file.
26+
# The file will be copied from this path to the local file system
27+
# at /dataset/s3-dataset.json for use during the run.
28+
# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/s3-dataset.json.
29+
s3Path: ""
30+
31+
# hfToken optionally creates a secret with the specified token.
32+
# Can be set using helm install --set hftoken=<token>
33+
hfToken: ""
34+
35+
config:
36+
load:
37+
type: constant
38+
interval: 15
39+
stages:
40+
- rate: 10
41+
duration: 20
42+
- rate: 20
43+
duration: 20
44+
- rate: 30
45+
duration: 20
46+
api:
47+
type: completion
48+
streaming: true
49+
server:
50+
type: vllm
51+
model_name: meta-llama/Llama-3.1-8B-Instruct
52+
base_url: http://0.0.0.0:8000
53+
ignore_eos: true
54+
tokenizer:
55+
pretrained_model_name_or_path: meta-llama/Llama-3.1-8B-Instruct
56+
data:
57+
type: shareGPT
58+
metrics:
59+
type: prometheus
60+
prometheus:
61+
google_managed: true
62+
report:
63+
request_lifecycle:
64+
summary: true
65+
per_stage: true
66+
per_request: true
67+
prometheus:
68+
summary: true
69+
per_stage: true

benchmarking/benchmark.ipynb

Lines changed: 596 additions & 0 deletions
Large diffs are not rendered by default.

benchmarking/download-results.bash

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#!/bin/bash
2+
3+
# Downloads files from a GCS or S3 bucket.
4+
5+
# Check if provider and bucket are provided as arguments
6+
if [ -z "$1" ] || [ -z "$2" ]; then
7+
echo "Usage: $0 <gcs|s3> <BUCKET> [FOLDER_PATH:DEFAULT=benchmark_results]"
8+
exit 1
9+
fi
10+
11+
PROVIDER="$1"
12+
BUCKET="$2"
13+
FOLDER_PATH="${3:-benchmark_results/}" # Default to benchmark_results/ if not provided
14+
15+
# Env vars to be passed when calling this script.
16+
# The id of the benchmark. This is needed to identify what the benchmark is for.
17+
# It decides the filepath to save the results, which later is used by the jupyter notebook to assign
18+
# the benchmark_id as data labels for plotting.
19+
benchmark_id=${benchmark_id:-"inference-extension"}
20+
# run_id can be used to group different runs of the same benchmarks for comparison.
21+
run_id=${run_id:-"default-run"}
22+
output_dir=${output_dir:-'output'}
23+
24+
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
25+
benchmark_output_dir=${SCRIPT_DIR}/${output_dir}/${run_id}/${benchmark_id}
26+
27+
echo "Creating output directory: ${benchmark_output_dir}/results/json/"
28+
mkdir -p "${benchmark_output_dir}/results/json/"
29+
30+
case "$PROVIDER" in
31+
gcs)
32+
echo "Downloading gs://${BUCKET}/${FOLDER_PATH} to ${benchmark_output_dir}/results/json/"
33+
gsutil cp -r "gs://${BUCKET}/${FOLDER_PATH}" "${benchmark_output_dir}/results/json/"
34+
;;
35+
s3)
36+
echo "Downloading s3://${BUCKET}/${FOLDER_PATH} to ${benchmark_output_dir}/results/json/"
37+
aws s3 cp -r "s3://${BUCKET}/${FOLDER_PATH}" "${benchmark_output_dir}/results/json/"
38+
;;
39+
*)
40+
echo "Invalid provider: $PROVIDER. Please use 'gcs' or 's3'."
41+
exit 1
42+
;;
43+
esac
44+
45+
echo "Download complete."
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Patterns to ignore when building packages.
2+
# This supports shell glob matching, relative path matching, and
3+
# negation (prefixed with !). Only one pattern per line.
4+
.DS_Store
5+
# Common VCS dirs
6+
.git/
7+
.gitignore
8+
.bzr/
9+
.bzrignore
10+
.hg/
11+
.hgignore
12+
.svn/
13+
# Common backup files
14+
*.swp
15+
*.bak
16+
*.tmp
17+
*.orig
18+
*~
19+
# Various IDEs
20+
.project
21+
.idea/
22+
*.tmproj
23+
.vscode/
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: v2
2+
name: inference-perf
3+
description: A Helm chart for running inference-perf benchmarking tool
4+
type: application
5+
version: 0.2.0
6+
appVersion: "0.2.0"
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
## 🚀 Deploying `inference-perf` via Helm Chart
2+
3+
This guide explains how to deploy `inference-perf` to a Kubernetes cluster with Helm.
4+
5+
Note: This is a temporary chart added until remote chart is available.
6+
7+
---
8+
9+
### 1. Prerequisites
10+
11+
Make sure you have the following tools installed and configured:
12+
13+
* **Kubernetes Cluster:** Access to a functional cluster (e.g., GKE).
14+
* **Helm:** The Helm CLI installed locally.
15+
16+
---
17+
18+
### 2. Configuration (`values.yaml`)
19+
20+
Before deployment, navigate to the **`deploy/inference-perf`** directory and edit the **`values.yaml`** file to customize your deployment and the benchmark parameters.
21+
22+
#### Optional Parameters
23+
24+
| Key | Description | Default |
25+
| :--- | :--- | :--- |
26+
| `hfToken` | Hugging Face API token. If provided, a Kubernetes `Secret` named `hf-token-secret` will be created for authentication. | `""` |
27+
| `serviceAccountName` | Standard Kubernetes `serviceAccountName`. If not provided, default service account is used. | `""` |
28+
| `nodeSelector` | Standard Kubernetes `nodeSelector` map to constrain pod placement to nodes with matching labels. | `{}` |
29+
| `resources` | Standard Kubernetes resource requests and limits for the main `inference-perf` container. | `{}` |
30+
---
31+
32+
> **Example Resource Block:**
33+
> ```yaml
34+
> # resources:
35+
> # requests:
36+
> # cpu: "1"
37+
> # memory: "4Gi"
38+
> # limits:
39+
> # cpu: "2"
40+
> # memory: "8Gi"
41+
> ```
42+
43+
#### GKE Specific Parameters
44+
45+
This section details the necessary configuration and permissions for using a Google Cloud Storage (GCS) path to manage your dataset, typical for deployments on GKE.
46+
47+
##### Required IAM Permissions
48+
49+
The identity executing the workload (e.g., the associated Kubernetes Service Account, often configured via **Workload Identity**) must possess the following IAM roles on the target GCS bucket for data transfer:
50+
51+
* **`roles/storage.objectViewer`** (Required to read/download the input dataset from GCS).
52+
* **`roles/storage.objectCreator`** (Required to write/push benchmark results back to GCS).
53+
54+
55+
| Key | Description | Default |
56+
| :--- | :--- | :--- |
57+
| `gcsPath` | A GCS URI pointing to the dataset file (e.g., `gs://my-bucket/dataset.json`). The file will be automatically copied to the running pod during initialization. | `""` |
58+
59+
---
60+
61+
### 3. Run Deployment
62+
63+
Use the **`helm install`** command from the **`deploy/inference-perf`** directory to deploy the chart.
64+
65+
* **Standard Install:** Deploy using the default `values.yaml`.
66+
```bash
67+
helm install test .
68+
```
69+
70+
* **Set `hfToken` Override:** Pass the Hugging Face token directly.
71+
```bash
72+
helm install test . --set hfToken="<TOKEN>"
73+
```
74+
75+
* **Custom Config Override:** Make changes to the values file for custom settings.
76+
```bash
77+
helm install test . -f values.yaml
78+
```
79+
80+
### 4. Cleanup
81+
82+
To remove the benchmark deployment.
83+
```bash
84+
helm uninstall test
85+
```
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
{{/*
2+
Expand the name of the chart.
3+
*/}}
4+
{{- define "inference-perf.name" -}}
5+
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6+
{{- end }}
7+
8+
{{/*
9+
Create a default fully qualified app name.
10+
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11+
If release name contains chart name it will be used as a full name.
12+
*/}}
13+
{{- define "inference-perf.fullname" -}}
14+
{{- if .Values.fullnameOverride }}
15+
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16+
{{- else }}
17+
{{- $name := default .Chart.Name .Values.nameOverride }}
18+
{{- if contains $name .Release.Name }}
19+
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
20+
{{- else }}
21+
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22+
{{- end }}
23+
{{- end }}
24+
{{- end }}
25+
26+
{{/*
27+
Create chart name and version as used by the chart label.
28+
*/}}
29+
{{- define "inference-perf.chart" -}}
30+
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31+
{{- end }}
32+
33+
{{/*
34+
Common labels
35+
*/}}
36+
{{- define "inference-perf.labels" -}}
37+
helm.sh/chart: {{ include "inference-perf.chart" . }}
38+
{{ include "inference-perf.selectorLabels" . }}
39+
{{- if .Chart.AppVersion }}
40+
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41+
{{- end }}
42+
app.kubernetes.io/managed-by: {{ .Release.Service }}
43+
{{- end }}
44+
45+
{{/*
46+
Selector labels
47+
*/}}
48+
{{- define "inference-perf.selectorLabels" -}}
49+
app.kubernetes.io/name: {{ include "inference-perf.name" . }}
50+
app.kubernetes.io/instance: {{ .Release.Name }}
51+
{{- end }}
52+
53+
{{/*
54+
Common Secret Name for HuggingFace credentials
55+
*/}}
56+
{{- define "inference-perf.hfSecret" -}}
57+
{{ include "inference-perf.fullname" . }}-hf-secret
58+
{{- end -}}
59+
60+
{{/*
61+
Common Secret Key for HuggingFace credentials
62+
*/}}
63+
{{- define "inference-perf.hfKey" -}}
64+
{{ include "inference-perf.fullname" . }}-hf-key
65+
{{- end -}}
66+
67+
{{/*
68+
Mount path for config map
69+
*/}}
70+
{{- define "inference-perf.configMount" -}}
71+
/cfg
72+
{{- end -}}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# inference-perf/templates/configmap.yaml
2+
apiVersion: v1
3+
kind: ConfigMap
4+
metadata:
5+
name: {{ include "inference-perf.fullname" . }}-config
6+
labels:
7+
{{- include "inference-perf.labels" . | nindent 4 }}
8+
data:
9+
config.yml: |
10+
{{- toYaml .Values.config | nindent 4 }}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# inference-perf/templates/job.yaml
2+
apiVersion: batch/v1
3+
kind: Job
4+
metadata:
5+
name: {{ include "inference-perf.fullname" . }}-job
6+
labels:
7+
{{- include "inference-perf.labels" . | nindent 4 }}
8+
app: inference-perf
9+
spec:
10+
template:
11+
metadata:
12+
labels:
13+
{{- include "inference-perf.selectorLabels" . | nindent 8 }}
14+
app: inference-perf
15+
spec:
16+
restartPolicy: Never
17+
serviceAccountName: {{ .Values.job.serviceAccountName }}
18+
{{- with .Values.job.nodeSelector }}
19+
nodeSelector:
20+
{{- toYaml . | nindent 8 }}
21+
{{- end }}
22+
{{- if .Values.gcsPath}}
23+
initContainers:
24+
- name: fetch-gcs-dataset
25+
image: google/cloud-sdk:latest
26+
command: ["sh", "-c", "gsutil cp {{ .Values.gcsPath }} /dataset/gcs-dataset.json"]
27+
volumeMounts:
28+
- name: dataset-volume
29+
mountPath: /dataset
30+
{{- end }}
31+
{{- if .Values.s3Path}}
32+
initContainers:
33+
- name: fetch-s3-dataset
34+
image: google/cloud-sdk:latest
35+
command: ["sh", "-c", "aws s3 cp s3://{{ .Values.s3Path }} /dataset/s3-dataset.json"]
36+
volumeMounts:
37+
- name: dataset-volume
38+
mountPath: /dataset
39+
{{- end }}
40+
containers:
41+
- name: inference-perf-container
42+
image: "{{ .Values.job.image.repository }}:{{ .Values.job.image.tag | default .Chart.AppVersion }}"
43+
command: ["inference-perf"]
44+
args:
45+
- "--config_file"
46+
- "{{ include "inference-perf.configMount" . }}/config.yml"
47+
- "--log-level"
48+
- {{ .Values.logLevel }}
49+
env:
50+
{{- if .Values.hfToken }}
51+
- name: HF_TOKEN
52+
valueFrom:
53+
secretKeyRef:
54+
name: {{ include "inference-perf.hfSecret" . }}
55+
key: {{ include "inference-perf.hfKey" . }}
56+
{{- end }}
57+
volumeMounts:
58+
- name: config-volume
59+
mountPath: {{ include "inference-perf.configMount" . }}
60+
readOnly: true
61+
resources:
62+
{{- toYaml .Values.job.resources | nindent 12 }}
63+
volumes:
64+
- name: config-volume
65+
configMap:
66+
name: {{ include "inference-perf.fullname" . }}-config

0 commit comments

Comments
 (0)