Skip to content

Commit a38e76b

Browse files
authored
Merge pull request #98 from snowplow/feature/web/bigquery/integration_tests
Feature/web/bigquery/integration tests
2 parents a0e2f32 + 0be98b5 commit a38e76b

21 files changed

+283
-10
lines changed

.scripts/README.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ Note that this script does not enforce dependencies, rather runs the playbooks i
5353
-d (dryRun) use sql-runner dry run
5454
-o (output path) path to store output of sql-runner to sql file (to be used in conjunction with p)
5555
-t (target template) path to target template to use (minimizes risk of credential leak)
56+
-v (variable template) path to variable template. Any variables in this template will override any corresponding variables within each playbook for the run.
5657
```
5758

5859
**Examples:**
@@ -178,6 +179,41 @@ bash .scripts/pr_check.sh -b ~/pathTo/sql-runner -d bigquery -m web;
178179
# Runs the pr check testing script against bigquery
179180
```
180181

182+
## integration_test.sh
183+
184+
Runs 4 end to end runs of the standard model in 1 day increments, using the integration test dataset. The actual derived tables are then checked against the expect derived tables. The standard tests are also performed on the derived tables.
185+
186+
We recommend using a virtual environment for python, eg. `pyenv` or `virtualenv` - for example using the latter:
187+
188+
```bash
189+
virtualenv ~/myenv
190+
source ~/myenv/bin/activate
191+
```
192+
193+
Before running, make sure to install python requirements (python3 required):
194+
195+
```bash
196+
cd data-models/.test
197+
pip3 install -r requirements.txt
198+
```
199+
200+
**Arguments:**
201+
202+
```
203+
-b (binary) path to sql-runner binary [required]
204+
-d (database) target database for expectations [required]
205+
-a (auth) optional credentials for database target
206+
-m (model) target model to run i.e. web or mobile [required]
207+
```
208+
209+
**Examples:**
210+
211+
```bash
212+
bash .scripts/integration_test.sh -b ~/pathTo/sql-runner -d bigquery -m web
213+
214+
# Runs the integration testing script against bigquery
215+
```
216+
181217
### `run_playbooks.sh` (deprecated)
182218

183219
Deprecated - `run_config.sh` provides a simpler instrumentation for this functionality.

.scripts/e2e.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# -b (binary) path to sql-runner binary
55
# -d (database) target database for expectations
66
# -a (auth) optional credentials for database target
7+
# -m (model) target model to run i.e. web or mobile
78

89
while getopts 'b:d:a:m:' v
910
do

.scripts/integration_test.sh

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#!/bin/bash
2+
3+
# Expected input:
4+
# -b (binary) path to sql-runner binary
5+
# -d (database) target database for expectations
6+
# -a (auth) optional credentials for database target
7+
# -m (model) target model to run i.e. web or mobile
8+
9+
while getopts 'b:d:a:m:' opt
10+
do
11+
case $opt in
12+
b) SQL_RUNNER_PATH=$OPTARG ;;
13+
d) DATABASE=$OPTARG ;;
14+
a) CREDENTIALS=$OPTARG ;;
15+
m) MODEL=$OPTARG ;;
16+
esac
17+
done
18+
19+
repo_root_path=$( cd "$(dirname "$(dirname "${BASH_SOURCE[0]}")")" && pwd -P )
20+
script_path="${repo_root_path}/.scripts"
21+
config_dir="${repo_root_path}/$MODEL/v1/$DATABASE/sql-runner/configs"
22+
23+
# Set credentials via env vars
24+
export BIGQUERY_CREDS=${BIGQUERY_CREDS:-$CREDENTIALS}
25+
export REDSHIFT_PASSWORD=${REDSHIFT_PASSWORD:-$CREDENTIALS}
26+
export SNOWFLAKE_PASSWORD=${SNOWFLAKE_PASSWORD:-$CREDENTIALS}
27+
28+
echo "integration_check: Starting 5 runs"
29+
30+
for i in {1..5}; do
31+
32+
echo "integration_check: Starting run $i";
33+
34+
bash .scripts/run_config.sh -b sql-runner -c $config_dir/pre_test.json -t $script_path/templates/$DATABASE.yml.tmpl -v .test/integration_tests/$MODEL/v1/${DATABASE}_variables.yml.tmpl || exit;
35+
36+
echo "integration_check: Checking actual vs. expected for the events_staged table";
37+
38+
bash $script_path/run_test.sh -m $MODEL -d $DATABASE -c events_staged_integration_test_${i} || exit 1;
39+
40+
bash .scripts/run_config.sh -b sql-runner -c $config_dir/post_test.json -t $script_path/templates/$DATABASE.yml.tmpl -v .test/integration_tests/$MODEL/v1/${DATABASE}_variables.yml.tmpl || exit;
41+
42+
echo "integration_check: run $i done";
43+
44+
done || exit 1
45+
46+
echo "integration_check: Checking actual vs. expected for derived tables";
47+
48+
bash $script_path/run_test.sh -m $MODEL -d $DATABASE -c perm_integration_test_tables || exit 1;
49+
50+
echo "integration_check: Checking standard tests against derived tables";
51+
52+
bash $script_path/run_test.sh -m $MODEL -d $DATABASE -c perm_tables || exit 1;
53+
54+
echo "integration_check: Done"

.scripts/pr_check.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# -b (binary) path to sql-runner binary
55
# -d (database) target database for expectations
66
# -a (auth) optional credentials for database target
7+
# -m (model) target model to run i.e. web or mobile
78

89
while getopts 'b:d:a:m:' v
910
do

.scripts/run_config.sh

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,19 @@
88
# -d (dryRun) use sql-runner dry run
99
# -o (output path) path to store output of sql-runner to sql file (to be used in conjunction with p)
1010
# -t (target template) path to target template to use (minimizes risk of credential leak)
11+
# -v (varialbles template) path to variables template to use
1112

12-
while getopts 'pdb:c:a:o:t:' v
13+
while getopts 'pdb:c:a:o:t:v:' opt
1314
do
14-
case $v in
15+
case $opt in
1516
b) SQL_RUNNER_PATH=$OPTARG ;;
1617
c) CONFIG_PATH=$OPTARG ;;
1718
a) CREDENTIALS=$OPTARG ;;
1819
p) FILL_TEMPLATES='-fillTemplates' ;;
1920
d) DRY_RUN='-dryRun' ;;
2021
o) OUTPUT_PATH=$OPTARG ;;
21-
t) TARGET_TEMPLATE=$OPTARG
22+
t) TARGET_TEMPLATE=$OPTARG ;;
23+
v) VARIABLES_TEMPLATE=$OPTARG
2224
esac
2325
done
2426

@@ -72,6 +74,13 @@ do
7274

7375
fi
7476

77+
if [ ! -z "$VARIABLES_TEMPLATE" ]; then
78+
79+
# Sub in any variables if specified
80+
awk -F':' 'NR==FNR{a[$2]=$0;next} /:variables:/{flag=1} /:steps:/{flag=0} a[$2]&&flag{$0=a[$2]}1' $root_path/$VARIABLES_TEMPLATE $root_path/tmp/current_playbook.yml > $root_path/tmp/current_playbook.tmp && mv $root_path/tmp/current_playbook.tmp $root_path/tmp/current_playbook.yml
81+
82+
fi
83+
7584
# If printing sql to file, mkdirs and set path vars
7685
if [ ! -z "$OUTPUT_PATH" ]; then
7786
mkdir -p $OUTPUT_PATH

.scripts/run_test.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# -d (database) target database for expectations
55
# -c (config) expectation config name
66
# -a (auth) optional credentials for database target
7+
# -m (model) target model to run i.e. web or mobile
78

89
while getopts 'd:c:a:m:' v
910
do

.test/great_expectations/expectations/web/v1/base.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@
137137
],
138138
"meta": {
139139
"versions": {
140-
"test_suite_version": "1.1.0",
140+
"test_suite_version": "1.1.1",
141141
"bigquery_model_version": "1.0.3",
142142
"snowflake_model_version": "1.0.0"
143143
},

.test/great_expectations/expectations/web/v1/base_redshift.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@
285285
],
286286
"meta": {
287287
"versions": {
288-
"test_suite_version": "1.1.0",
288+
"test_suite_version": "1.1.1",
289289
"redshift_model_version": "1.2.0"
290290
},
291291
"great_expectations.__version__": "0.12.0"
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
{
2+
"data_asset_type": "Dataset",
3+
"expectation_suite_name": "integration_tests",
4+
"expectations": [
5+
{
6+
"expectation_type": "expect_column_values_to_be_null",
7+
"kwargs": {
8+
"column": "long_session"
9+
}
10+
},
11+
{
12+
"expectation_type": "expect_column_values_to_be_null",
13+
"kwargs": {
14+
"column": "null_page_view_id"
15+
}
16+
},
17+
{
18+
"expectation_type": "expect_column_values_to_be_null",
19+
"kwargs": {
20+
"column": "null_domain_userid"
21+
}
22+
},
23+
{
24+
"expectation_type": "expect_column_values_to_be_null",
25+
"kwargs": {
26+
"column": "null_domain_sessionid"
27+
}
28+
},
29+
{
30+
"expectation_type": "expect_column_values_to_be_null",
31+
"kwargs": {
32+
"column": "dupe_event_id_same_collector_tstamp"
33+
}
34+
},
35+
{
36+
"expectation_type": "expect_column_values_to_be_null",
37+
"kwargs": {
38+
"column": "dupe_event_id_diff_collector_tstamp"
39+
}
40+
},
41+
{
42+
"expectation_type": "expect_column_values_to_be_null",
43+
"kwargs": {
44+
"column": "dupe_page_view_id_diff_derived_tstamp"
45+
}
46+
},
47+
{
48+
"expectation_type": "expect_column_values_to_be_null",
49+
"kwargs": {
50+
"column": "late_arriving_dvc_created_sent"
51+
}
52+
},
53+
{
54+
"expectation_type": "expect_column_values_to_be_null",
55+
"kwargs": {
56+
"column": "clean_session"
57+
}
58+
}
59+
],
60+
"meta": {
61+
"versions": {
62+
"test_suite_version": "1.1.1",
63+
"bigquery_model_version": "1.0.3"
64+
},
65+
"__comment__": "expect_column_values_to_be_null on column stray_page_ping has been removed as it is a known issue (https://github.com/snowplow/data-models/issues/92)",
66+
"great_expectations.__version__": "0.12.0"
67+
}
68+
}
69+

.test/great_expectations/expectations/web/v1/metadata.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@
102102
],
103103
"meta": {
104104
"versions": {
105-
"test_suite_version": "1.1.0",
105+
"test_suite_version": "1.1.1",
106106
"redshift_model_version": "1.2.0",
107107
"bigquery_model_version": "1.0.3",
108108
"snowflake_model_version": "1.0.0"

0 commit comments

Comments
 (0)