Skip to content

Commit b657faa

Browse files
authored
upload nightly prom metrics to s3 (#2744)
* upload nightly prom metrics to s3 * change push branch * remove push branch * remove job-level perms block, add id-token write to parent-level perms block * use env vars instead of var interpolation, add is_experiment input
1 parent cd39930 commit b657faa

File tree

1 file changed

+49
-4
lines changed

1 file changed

+49
-4
lines changed

.github/workflows/nightly-throughput-stress.yml

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@ on:
44
schedule:
55
# Run at 3 AM PST (11:00 UTC) - offset from existing nightly
66
- cron: '00 11 * * *'
7-
push:
8-
branches:
9-
- add-nightly-throughput-stress-workflow
107
workflow_dispatch:
118
inputs:
129
duration:
@@ -24,8 +21,15 @@ on:
2421
required: false
2522
default: 360
2623
type: number
24+
is_experiment:
25+
description: 'Mark this run as an experiment (excluded from nightly dashboards)'
26+
required: false
27+
default: false
28+
type: boolean
29+
2730
permissions:
2831
contents: read
32+
id-token: write
2933

3034
env:
3135
# Workflow configuration
@@ -35,11 +39,20 @@ env:
3539
# Logging and artifacts
3640
WORKER_LOG_DIR: /tmp/throughput-stress-logs
3741

42+
# AWS S3 metrics upload ARN
43+
AWS_S3_METRICS_UPLOAD_ROLE_ARN: ${{ vars.AWS_S3_METRICS_UPLOAD_ROLE_ARN }}
44+
3845
# Omes configuration
3946
OMES_REPO: temporalio/omes
4047
OMES_REF: main
4148
RUN_ID: ${{ github.run_id }}-throughput-stress
4249

50+
# Prometheus version
51+
PROM_VERSION: "3.8.0"
52+
53+
# Language
54+
SDK_LANG: "java"
55+
4356
jobs:
4457
throughput-stress:
4558
runs-on: ubuntu-latest-4-cores
@@ -88,6 +101,13 @@ jobs:
88101
- name: Install Temporal CLI
89102
uses: temporalio/setup-temporal@v0
90103

104+
- name: Install Prometheus
105+
run: |
106+
wget -q https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.linux-amd64.tar.gz
107+
tar xzf prometheus-${PROM_VERSION}.linux-amd64.tar.gz
108+
sudo mv prometheus-${PROM_VERSION}.linux-amd64/prometheus /usr/local/bin/
109+
prometheus --version
110+
91111
- name: Setup log directory
92112
run: mkdir -p $WORKER_LOG_DIR
93113

@@ -114,19 +134,44 @@ jobs:
114134
# to give CI a bit more time for visibility consistency
115135
go run ./cmd run-scenario-with-worker \
116136
--scenario throughput_stress \
117-
--language java \
137+
--language $SDK_LANG \
118138
--version $(pwd)/.. \
119139
--run-id $RUN_ID \
120140
--duration $TEST_DURATION \
121141
--timeout $TEST_TIMEOUT \
122142
--max-concurrent 10 \
143+
--prom-listen-address 127.0.0.1:9091 \
144+
--worker-prom-listen-address 127.0.0.1:9092 \
145+
--prom-instance-addr 127.0.0.1:9090 \
146+
--prom-instance-config \
147+
--prom-export-worker-metrics $RUN_ID.parquet \
123148
--option internal-iterations=10 \
124149
--option continue-as-new-after-iterations=3 \
125150
--option sleep-time=1s \
126151
--option visibility-count-timeout=5m \
127152
--option min-throughput-per-hour=1000 \
128153
2>&1 | tee $WORKER_LOG_DIR/scenario.log
129154
155+
- name: Configure AWS credentials
156+
if: always()
157+
uses: aws-actions/configure-aws-credentials@v4
158+
with:
159+
role-to-assume: ${{ env.AWS_S3_METRICS_UPLOAD_ROLE_ARN }}
160+
aws-region: us-west-2
161+
162+
- name: Upload metrics to S3
163+
if: always()
164+
run: |
165+
DATE=$(date +%Y-%m-%d)
166+
IS_EXPERIMENT="false"
167+
# Set as an experiment if we are not on the main branch or input as an experiment
168+
if [[ "$GH_REF" != "refs/heads/main" || "$IS_EXPERIMENT_INPUT" == "true" ]]; then
169+
IS_EXPERIMENT="true"
170+
fi
171+
echo "Uploading metrics: is_experiment=$IS_EXPERIMENT, language=$SDK_LANG, date=$DATE"
172+
aws s3 cp omes/$RUN_ID.parquet \
173+
"s3://cloud-data-ingest-prod/github/sdk_load_test/is_experiment=$IS_EXPERIMENT/language=$SDK_LANG/date=$DATE/$RUN_ID.parquet"
174+
130175
- name: Upload logs on failure
131176
if: failure() || cancelled()
132177
uses: actions/upload-artifact@v5

0 commit comments

Comments
 (0)