Skip to content

Commit 9027242

Browse files
committed
Move ci-kubernetes-e2e-gce-scale-resource-size to experiments
Remembered that we have experimental periodic jobs and dashboard. This might be a better place.
1 parent 166b657 commit 9027242

File tree

2 files changed

+109
-110
lines changed

2 files changed

+109
-110
lines changed

config/jobs/kubernetes/sig-scalability/sig-scalability-experimental-periodic-jobs.yaml

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,3 +184,112 @@ periodics:
184184
limits:
185185
cpu: 2
186186
memory: "2Gi"
187+
# Experimental tests for larger resource size as proposed in https://github.com/kubernetes/kubernetes/issues/134375
188+
- cron: '1 17 2-31/2 * *' # Run on even days at 9:01PST (17:01 UTC)
189+
name: ci-kubernetes-e2e-gce-scale-resource-size
190+
tags:
191+
- "perfDashPrefix: gce-5000Nodes-ResourceSize"
192+
- "perfDashBuildsCount: 270"
193+
- "perfDashJobType: performance"
194+
cluster: k8s-infra-prow-build
195+
labels:
196+
preset-service-account: "true"
197+
preset-k8s-ssh: "true"
198+
preset-e2e-scalability-common: "true"
199+
preset-e2e-scalability-periodics: "true"
200+
preset-e2e-scalability-periodics-master: "true"
201+
decorate: true
202+
decoration_config:
203+
timeout: 450m
204+
extra_refs:
205+
- org: kubernetes
206+
repo: kubernetes
207+
base_ref: master
208+
path_alias: k8s.io/kubernetes
209+
- org: kubernetes
210+
repo: perf-tests
211+
base_ref: master
212+
path_alias: k8s.io/perf-tests
213+
annotations:
214+
testgrid-dashboards: sig-scalability-experiments
215+
testgrid-tab-name: gce-master-scale-resource-size
216+
description: "Experimental tests for larger resource size as proposed in https://github.com/kubernetes/kubernetes/issues/134375"
217+
spec:
218+
volumes:
219+
- name: cache-secret
220+
secret:
221+
secretName: scale-pull-cache-token
222+
containers:
223+
- image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20251016-39cf27682d-master
224+
volumeMounts:
225+
- name: cache-secret
226+
readOnly: true
227+
mountPath: /etc/registry-auth
228+
env:
229+
- name: KUBERNETES_REGISTRY_PULL_THROUGH_HOST
230+
value: https://us-central1-docker.pkg.dev/v2/k8s-infra-e2e-scale-5k-project/k8s-5k-scale-cache/
231+
- name: KUBERNETES_REGISTRY_PULL_THROUGH_BASIC_AUTH_TOKEN_PATH
232+
value: /etc/registry-auth/token
233+
command:
234+
- runner.sh
235+
- /workspace/scenarios/kubernetes_e2e.py
236+
args:
237+
- --cluster=gce-scale-cluster
238+
- --env=HEAPSTER_MACHINE_TYPE=e2-standard-32
239+
# TODO(mborsz): Adjust or remove this change once we understand coredns
240+
# memory usage regression.
241+
- --env=KUBE_DNS_MEMORY_LIMIT=300Mi
242+
- --extract=ci/fast/latest-fast
243+
- --gcp-nodes=5000
244+
- --gcp-project-type=scalability-scale-project
245+
- --gcp-zone=us-east1-b
246+
- --provider=gce
247+
- --metadata-sources=cl2-metadata.json
248+
- --env=CL2_LOAD_TEST_THROUGHPUT=50
249+
- --env=CL2_DELETE_TEST_THROUGHPUT=50
250+
- --env=CL2_RATE_LIMIT_POD_CREATION=false
251+
- --env=KUBE_CONTROLLER_MANAGER_TEST_ARGS=--authorization-always-allow-paths=/healthz,/readyz,/livez,/metrics --endpointslice-updates-batch-period=500ms --endpoint-updates-batch-period=500ms
252+
# Overrides CONTROLLER_MANAGER_TEST_ARGS from preset-e2e-scalability-periodics.
253+
- --env=CONTROLLER_MANAGER_TEST_ARGS=--authorization-always-allow-paths=/healthz,/readyz,/livez,/metrics --profiling --contention-profiling --kube-api-qps=100 --kube-api-burst=100
254+
# Overrides SCHEDULER_TEST_ARGS from preset-e2e-scalability-periodics.
255+
# TODO(#1311): Clean this up after the experiment - it should allow
256+
# to hugely decrease pod-startup-latency across the whole test.
257+
# Given that individual controllers have separate QPS limits, we allow
258+
# scheduler to keep up with the load from deployment, daemonset and job
259+
# performing pod creations at once.
260+
- --env=SCHEDULER_TEST_ARGS=--authorization-always-allow-paths=/healthz,/readyz,/livez,/metrics --profiling --contention-profiling --kube-api-qps=500 --kube-api-burst=500
261+
# With APF only sum of --max-requests-inflight and --max-mutating-requests-inflight matters, so set --max-mutating-requests-inflight to 0.
262+
- --env=APISERVER_TEST_ARGS=--max-requests-inflight=640 --max-mutating-requests-inflight=0
263+
- --env=CL2_ENABLE_API_AVAILABILITY_MEASUREMENT=true
264+
- --env=CL2_API_AVAILABILITY_PERCENTAGE_THRESHOLD=99.5
265+
- --env=CL2_DAEMONSET_POD_PAYLOAD_SIZE=1024
266+
- --env=CL2_DEPLOYMENT_POD_PAYLOAD_SIZE=1024
267+
- --env=CL2_STATEFULSET_POD_PAYLOAD_SIZE=1024
268+
- --env=CL2_JOB_POD_PAYLOAD_SIZE=1024
269+
- --test=false
270+
- --test-cmd=$GOPATH/src/k8s.io/perf-tests/run-e2e.sh
271+
- --test-cmd-args=cluster-loader2
272+
- --test-cmd-args=--experimental-gcp-snapshot-prometheus-disk=true
273+
- --test-cmd-args=--experimental-prometheus-disk-snapshot-name=$(JOB_NAME)-$(BUILD_ID)
274+
- --test-cmd-args=--experimental-prometheus-snapshot-to-report-dir=true
275+
- --test-cmd-args=--nodes=5000
276+
- --test-cmd-args=--prometheus-scrape-node-exporter
277+
- --test-cmd-args=--provider=gce
278+
- --test-cmd-args=--report-dir=$(ARTIFACTS)
279+
- --test-cmd-args=--testconfig=testing/load/config.yaml
280+
- --test-cmd-args=--testconfig=testing/huge-service/config.yaml
281+
- --test-cmd-args=--testconfig=testing/access-tokens/config.yaml
282+
- --test-cmd-args=--testoverrides=./testing/experiments/enable_restart_count_check.yaml
283+
- --test-cmd-args=--testoverrides=./testing/experiments/ignore_known_gce_container_restarts.yaml
284+
- --test-cmd-args=--testoverrides=./testing/overrides/5000_nodes.yaml
285+
- --test-cmd-name=ClusterLoaderV2
286+
- --timeout=420m
287+
- --use-logexporter
288+
- --logexporter-gcs-path=gs://k8s-infra-scalability-tests-logs/$(JOB_NAME)/$(BUILD_ID)
289+
resources:
290+
requests:
291+
cpu: 6
292+
memory: "16Gi"
293+
limits:
294+
cpu: 6
295+
memory: "16Gi"

config/jobs/kubernetes/sig-scalability/sig-scalability-periodic-jobs.yaml

Lines changed: 0 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,113 +1166,3 @@ periodics:
11661166
limits:
11671167
cpu: 3
11681168
memory: "8Gi"
1169-
1170-
# Exploratory tests for resource size limit as proposed in https://github.com/kubernetes/kubernetes/issues/134375
1171-
- cron: '1 17 2-31/2 * *' # Run on even days at 9:01PST (17:01 UTC)
1172-
name: ci-kubernetes-e2e-gce-scale-resource-size
1173-
tags:
1174-
- "perfDashPrefix: gce-5000Nodes-ResourceSize"
1175-
- "perfDashBuildsCount: 270"
1176-
- "perfDashJobType: performance"
1177-
cluster: k8s-infra-prow-build
1178-
labels:
1179-
preset-service-account: "true"
1180-
preset-k8s-ssh: "true"
1181-
preset-e2e-scalability-common: "true"
1182-
preset-e2e-scalability-periodics: "true"
1183-
preset-e2e-scalability-periodics-master: "true"
1184-
decorate: true
1185-
decoration_config:
1186-
timeout: 450m
1187-
extra_refs:
1188-
- org: kubernetes
1189-
repo: kubernetes
1190-
base_ref: master
1191-
path_alias: k8s.io/kubernetes
1192-
- org: kubernetes
1193-
repo: perf-tests
1194-
base_ref: master
1195-
path_alias: k8s.io/perf-tests
1196-
annotations:
1197-
testgrid-dashboards: sig-scalability-gce, google-gce
1198-
testgrid-tab-name: gce-master-scale-resource-size
1199-
description: "Exploratory tests for resource size limit as proposed in https://github.com/kubernetes/kubernetes/issues/134375"
1200-
spec:
1201-
volumes:
1202-
- name: cache-secret
1203-
secret:
1204-
secretName: scale-pull-cache-token
1205-
containers:
1206-
- image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20251016-39cf27682d-master
1207-
volumeMounts:
1208-
- name: cache-secret
1209-
readOnly: true
1210-
mountPath: /etc/registry-auth
1211-
env:
1212-
- name: KUBERNETES_REGISTRY_PULL_THROUGH_HOST
1213-
value: https://us-central1-docker.pkg.dev/v2/k8s-infra-e2e-scale-5k-project/k8s-5k-scale-cache/
1214-
- name: KUBERNETES_REGISTRY_PULL_THROUGH_BASIC_AUTH_TOKEN_PATH
1215-
value: /etc/registry-auth/token
1216-
command:
1217-
- runner.sh
1218-
- /workspace/scenarios/kubernetes_e2e.py
1219-
args:
1220-
- --cluster=gce-scale-cluster
1221-
- --env=HEAPSTER_MACHINE_TYPE=e2-standard-32
1222-
# TODO(mborsz): Adjust or remove this change once we understand coredns
1223-
# memory usage regression.
1224-
- --env=KUBE_DNS_MEMORY_LIMIT=300Mi
1225-
- --extract=ci/fast/latest-fast
1226-
- --gcp-nodes=5000
1227-
- --gcp-project-type=scalability-scale-project
1228-
- --gcp-zone=us-east1-b
1229-
- --provider=gce
1230-
- --metadata-sources=cl2-metadata.json
1231-
- --env=CL2_LOAD_TEST_THROUGHPUT=50
1232-
- --env=CL2_DELETE_TEST_THROUGHPUT=50
1233-
- --env=CL2_RATE_LIMIT_POD_CREATION=false
1234-
- --env=KUBE_CONTROLLER_MANAGER_TEST_ARGS=--authorization-always-allow-paths=/healthz,/readyz,/livez,/metrics --endpointslice-updates-batch-period=500ms --endpoint-updates-batch-period=500ms
1235-
# Overrides CONTROLLER_MANAGER_TEST_ARGS from preset-e2e-scalability-periodics.
1236-
- --env=CONTROLLER_MANAGER_TEST_ARGS=--authorization-always-allow-paths=/healthz,/readyz,/livez,/metrics --profiling --contention-profiling --kube-api-qps=100 --kube-api-burst=100
1237-
# Overrides SCHEDULER_TEST_ARGS from preset-e2e-scalability-periodics.
1238-
# TODO(#1311): Clean this up after the experiment - it should allow
1239-
# to hugely decrease pod-startup-latency across the whole test.
1240-
# Given that individual controllers have separate QPS limits, we allow
1241-
# scheduler to keep up with the load from deployment, daemonset and job
1242-
# performing pod creations at once.
1243-
- --env=SCHEDULER_TEST_ARGS=--authorization-always-allow-paths=/healthz,/readyz,/livez,/metrics --profiling --contention-profiling --kube-api-qps=500 --kube-api-burst=500
1244-
# With APF only sum of --max-requests-inflight and --max-mutating-requests-inflight matters, so set --max-mutating-requests-inflight to 0.
1245-
- --env=APISERVER_TEST_ARGS=--max-requests-inflight=640 --max-mutating-requests-inflight=0
1246-
- --env=CL2_ENABLE_API_AVAILABILITY_MEASUREMENT=true
1247-
- --env=CL2_API_AVAILABILITY_PERCENTAGE_THRESHOLD=99.5
1248-
- --env=CL2_DAEMONSET_POD_PAYLOAD_SIZE=1024
1249-
- --env=CL2_DEPLOYMENT_POD_PAYLOAD_SIZE=1024
1250-
- --env=CL2_STATEFULSET_POD_PAYLOAD_SIZE=1024
1251-
- --env=CL2_JOB_POD_PAYLOAD_SIZE=1024
1252-
- --test=false
1253-
- --test-cmd=$GOPATH/src/k8s.io/perf-tests/run-e2e.sh
1254-
- --test-cmd-args=cluster-loader2
1255-
- --test-cmd-args=--experimental-gcp-snapshot-prometheus-disk=true
1256-
- --test-cmd-args=--experimental-prometheus-disk-snapshot-name=$(JOB_NAME)-$(BUILD_ID)
1257-
- --test-cmd-args=--experimental-prometheus-snapshot-to-report-dir=true
1258-
- --test-cmd-args=--nodes=5000
1259-
- --test-cmd-args=--prometheus-scrape-node-exporter
1260-
- --test-cmd-args=--provider=gce
1261-
- --test-cmd-args=--report-dir=$(ARTIFACTS)
1262-
- --test-cmd-args=--testconfig=testing/load/config.yaml
1263-
- --test-cmd-args=--testconfig=testing/huge-service/config.yaml
1264-
- --test-cmd-args=--testconfig=testing/access-tokens/config.yaml
1265-
- --test-cmd-args=--testoverrides=./testing/experiments/enable_restart_count_check.yaml
1266-
- --test-cmd-args=--testoverrides=./testing/experiments/ignore_known_gce_container_restarts.yaml
1267-
- --test-cmd-args=--testoverrides=./testing/overrides/5000_nodes.yaml
1268-
- --test-cmd-name=ClusterLoaderV2
1269-
- --timeout=420m
1270-
- --use-logexporter
1271-
- --logexporter-gcs-path=gs://k8s-infra-scalability-tests-logs/$(JOB_NAME)/$(BUILD_ID)
1272-
resources:
1273-
requests:
1274-
cpu: 6
1275-
memory: "16Gi"
1276-
limits:
1277-
cpu: 6
1278-
memory: "16Gi"

0 commit comments

Comments
 (0)