Merge pull request #98 from snowplow/feature/web/bigquery/integration_tests

bill-warner · web-flow · commit a38e76b42908 · 2021-06-16T15:15:24.000+03:00
Feature/web/bigquery/integration tests
diff --git a/.scripts/README.md b/.scripts/README.md
@@ -53,6 +53,7 @@ Note that this script does not enforce dependencies, rather runs the playbooks i
 -d (dryRun) use sql-runner dry run
 -o (output path) path to store output of sql-runner to sql file (to be used in conjunction with p)
 -t (target template) path to target template to use (minimizes risk of credential leak)
+-v (variable template) path to variable template. Any variables in this template will override any corresponding variables within each playbook for the run.
 ```
 
 **Examples:**
@@ -178,6 +179,41 @@ bash .scripts/pr_check.sh -b ~/pathTo/sql-runner -d bigquery -m web;
 # Runs the pr check testing script against bigquery
 ```
 
+## integration_test.sh
+
+Runs 4 end to end runs of the standard model in 1 day increments, using the integration test dataset. The actual derived tables are then checked against the expect derived tables. The standard tests are also performed on the derived tables.
+
+We recommend using a virtual environment for python, eg. `pyenv` or `virtualenv` - for example using the latter:
+
+```bash
+virtualenv ~/myenv
+source ~/myenv/bin/activate
+```
+
+Before running, make sure to install python requirements (python3 required):
+
+```bash
+cd data-models/.test
+pip3 install -r requirements.txt
+```
+
+**Arguments:**
+
+```
+-b (binary) path to sql-runner binary [required]
+-d (database) target database for expectations [required]
+-a (auth) optional credentials for database target
+-m (model) target model to run i.e. web or mobile [required]
+```
+
+**Examples:**
+
+```bash
+bash .scripts/integration_test.sh -b ~/pathTo/sql-runner -d bigquery -m web
+
+# Runs the integration testing script against bigquery
+```
+
 ### `run_playbooks.sh` (deprecated)
 
 Deprecated - `run_config.sh` provides a simpler instrumentation for this functionality.
diff --git a/.scripts/e2e.sh b/.scripts/e2e.sh
@@ -4,6 +4,7 @@
 # -b (binary) path to sql-runner binary
 # -d (database) target database for expectations
 # -a (auth) optional credentials for database target
+# -m (model) target model to run i.e. web or mobile
 
 while getopts 'b:d:a:m:' v
 do
diff --git a/.scripts/integration_test.sh b/.scripts/integration_test.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+# Expected input:
+# -b (binary) path to sql-runner binary
+# -d (database) target database for expectations
+# -a (auth) optional credentials for database target
+# -m (model) target model to run i.e. web or mobile
+
+while getopts 'b:d:a:m:' opt
+do
+  case $opt in
+    b) SQL_RUNNER_PATH=$OPTARG ;;
+    d) DATABASE=$OPTARG ;;
+    a) CREDENTIALS=$OPTARG ;;
+		m) MODEL=$OPTARG ;;
+  esac
+done
+
+repo_root_path=$( cd "$(dirname "$(dirname "${BASH_SOURCE[0]}")")" && pwd -P )
+script_path="${repo_root_path}/.scripts"
+config_dir="${repo_root_path}/$MODEL/v1/$DATABASE/sql-runner/configs"
+
+# Set credentials via env vars
+export BIGQUERY_CREDS=${BIGQUERY_CREDS:-$CREDENTIALS}
+export REDSHIFT_PASSWORD=${REDSHIFT_PASSWORD:-$CREDENTIALS}
+export SNOWFLAKE_PASSWORD=${SNOWFLAKE_PASSWORD:-$CREDENTIALS}
+
+echo "integration_check: Starting 5 runs"
+
+for i in {1..5}; do
+	
+  echo "integration_check: Starting run $i";
+
+  bash .scripts/run_config.sh -b sql-runner -c $config_dir/pre_test.json -t $script_path/templates/$DATABASE.yml.tmpl -v .test/integration_tests/$MODEL/v1/${DATABASE}_variables.yml.tmpl || exit;
+
+  echo "integration_check: Checking actual vs. expected for the events_staged table";
+
+  bash $script_path/run_test.sh -m $MODEL -d $DATABASE -c events_staged_integration_test_${i} || exit 1;
+
+  bash .scripts/run_config.sh -b sql-runner -c $config_dir/post_test.json -t $script_path/templates/$DATABASE.yml.tmpl -v .test/integration_tests/$MODEL/v1/${DATABASE}_variables.yml.tmpl || exit;
+
+  echo "integration_check: run $i done";
+
+done || exit 1
+
+echo "integration_check: Checking actual vs. expected for derived tables";
+
+bash $script_path/run_test.sh -m $MODEL -d $DATABASE -c perm_integration_test_tables || exit 1;
+
+echo "integration_check: Checking standard tests against derived tables";
+
+bash $script_path/run_test.sh -m $MODEL -d $DATABASE -c perm_tables || exit 1;
+
+echo "integration_check: Done"
diff --git a/.scripts/pr_check.sh b/.scripts/pr_check.sh
@@ -4,6 +4,7 @@
 # -b (binary) path to sql-runner binary
 # -d (database) target database for expectations
 # -a (auth) optional credentials for database target
+# -m (model) target model to run i.e. web or mobile
 
 while getopts 'b:d:a:m:' v
 do
diff --git a/.scripts/run_config.sh b/.scripts/run_config.sh
@@ -8,17 +8,19 @@
 # -d (dryRun) use sql-runner dry run
 # -o (output path) path to store output of sql-runner to sql file (to be used in conjunction with p)
 # -t (target template) path to target template to use (minimizes risk of credential leak)
+# -v (varialbles template) path to variables template to use
 
-while getopts 'pdb:c:a:o:t:' v
+while getopts 'pdb:c:a:o:t:v:' opt
 do
-  case $v in
+  case $opt in
     b) SQL_RUNNER_PATH=$OPTARG ;;
     c) CONFIG_PATH=$OPTARG ;;
     a) CREDENTIALS=$OPTARG ;;
     p) FILL_TEMPLATES='-fillTemplates' ;;
     d) DRY_RUN='-dryRun' ;;
     o) OUTPUT_PATH=$OPTARG ;;
-    t) TARGET_TEMPLATE=$OPTARG
+    t) TARGET_TEMPLATE=$OPTARG ;;
+    v) VARIABLES_TEMPLATE=$OPTARG
   esac
 done
 
@@ -72,6 +74,13 @@ do
 
   fi
 
+  if [ ! -z "$VARIABLES_TEMPLATE" ]; then
+
+    # Sub in any variables if specified
+    awk -F':' 'NR==FNR{a[$2]=$0;next} /:variables:/{flag=1} /:steps:/{flag=0} a[$2]&&flag{$0=a[$2]}1' $root_path/$VARIABLES_TEMPLATE $root_path/tmp/current_playbook.yml > $root_path/tmp/current_playbook.tmp && mv $root_path/tmp/current_playbook.tmp $root_path/tmp/current_playbook.yml
+
+  fi
+
   # If printing sql to file, mkdirs and set path vars
   if [ ! -z "$OUTPUT_PATH" ]; then
     mkdir -p $OUTPUT_PATH
diff --git a/.scripts/run_test.sh b/.scripts/run_test.sh
@@ -4,6 +4,7 @@
 # -d (database) target database for expectations
 # -c (config) expectation config name
 # -a (auth) optional credentials for database target
+# -m (model) target model to run i.e. web or mobile
 
 while getopts 'd:c:a:m:' v
 do
diff --git a/.test/great_expectations/expectations/web/v1/base.json b/.test/great_expectations/expectations/web/v1/base.json
@@ -137,7 +137,7 @@
   ],
   "meta": {
     "versions": {
-      "test_suite_version": "1.1.0",
+      "test_suite_version": "1.1.1",
       "bigquery_model_version": "1.0.3",
       "snowflake_model_version": "1.0.0"
     },
diff --git a/.test/great_expectations/expectations/web/v1/base_redshift.json b/.test/great_expectations/expectations/web/v1/base_redshift.json
@@ -285,7 +285,7 @@
   ],
   "meta": {
     "versions": {
-      "test_suite_version": "1.1.0",
+      "test_suite_version": "1.1.1",
       "redshift_model_version": "1.2.0"
     },
     "great_expectations.__version__": "0.12.0"
diff --git a/.test/great_expectations/expectations/web/v1/integration_tests.json b/.test/great_expectations/expectations/web/v1/integration_tests.json
@@ -0,0 +1,69 @@
+{
+  "data_asset_type": "Dataset",
+  "expectation_suite_name": "integration_tests",
+  "expectations": [
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "long_session"
+      }
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "null_page_view_id"
+      }
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "null_domain_userid"
+      }
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "null_domain_sessionid"
+      }
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "dupe_event_id_same_collector_tstamp"
+      }
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "dupe_event_id_diff_collector_tstamp"
+      }
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "dupe_page_view_id_diff_derived_tstamp"
+      }
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "late_arriving_dvc_created_sent"
+      }
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "clean_session"
+      }
+    }
+  ],
+  "meta": {
+    "versions": {
+      "test_suite_version": "1.1.1",
+      "bigquery_model_version": "1.0.3"
+    },
+    "__comment__": "expect_column_values_to_be_null on column stray_page_ping has been removed as it is a known issue (https://github.com/snowplow/data-models/issues/92)",
+    "great_expectations.__version__": "0.12.0"
+  }
+}
+
diff --git a/.test/great_expectations/expectations/web/v1/metadata.json b/.test/great_expectations/expectations/web/v1/metadata.json
@@ -102,7 +102,7 @@
   ],
   "meta": {
     "versions": {
-      "test_suite_version": "1.1.0",
+      "test_suite_version": "1.1.1",
       "redshift_model_version": "1.2.0",
       "bigquery_model_version": "1.0.3",
       "snowflake_model_version": "1.0.0"
diff --git a/.test/great_expectations/expectations/web/v1/page_view_in_session_values.json b/.test/great_expectations/expectations/web/v1/page_view_in_session_values.json
@@ -26,7 +26,7 @@
   ],
   "meta": {
     "versions": {
-      "test_suite_version": "1.1.0",
+      "test_suite_version": "1.1.1",
       "redshift_model_version": "1.2.0",
       "bigquery_model_version": "1.0.3",
       "snowflake_model_version": "1.0.0"
diff --git a/.test/great_expectations/expectations/web/v1/page_views.json b/.test/great_expectations/expectations/web/v1/page_views.json
@@ -224,7 +224,7 @@
   ],
   "meta": {
     "versions": {
-      "test_suite_version": "1.1.0",
+      "test_suite_version": "1.1.1",
       "redshift_model_version": "1.2.0",
       "bigquery_model_version": "1.0.3",
       "snowflake_model_version": "1.0.0"
diff --git a/.test/great_expectations/expectations/web/v1/sessions.json b/.test/great_expectations/expectations/web/v1/sessions.json
@@ -180,7 +180,7 @@
   ],
   "meta": {
     "versions": {
-      "test_suite_version": "1.1.0",
+      "test_suite_version": "1.1.1",
       "redshift_model_version": "1.2.0",
       "bigquery_model_version": "1.0.3",
       "snowflake_model_version": "1.0.0"
diff --git a/.test/great_expectations/expectations/web/v1/users.json b/.test/great_expectations/expectations/web/v1/users.json
@@ -116,7 +116,7 @@
   ],
   "meta": {
     "versions": {
-      "test_suite_version": "1.1.0",
+      "test_suite_version": "1.1.1",
       "redshift_model_version": "1.2.0",
       "bigquery_model_version": "1.0.3",
       "snowflake_model_version": "1.0.0"
diff --git a/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_1.json b/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_1.json
@@ -0,0 +1,13 @@
+{
+  "validation_operator_name": "action_list_operator",
+  "batches": [
+    {
+      "batch_kwargs": {
+        "datasource": "bigquery",
+        "query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.events_staged_run_1 AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM scratch_dev1.events_staged AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT  SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check",
+        "bigquery_temp_table": "ge_test_derived_events_staged_integration"
+      },
+      "expectation_suite_names": ["web.v1.integration_tests"]
+    }
+  ]
+}
diff --git a/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_2.json b/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_2.json
@@ -0,0 +1,13 @@
+{
+  "validation_operator_name": "action_list_operator",
+  "batches": [
+    {
+      "batch_kwargs": {
+        "datasource": "bigquery",
+        "query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.events_staged_run_2 AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM scratch_dev1.events_staged AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT  SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check",
+        "bigquery_temp_table": "ge_test_derived_events_staged_integration"
+      },
+      "expectation_suite_names": ["web.v1.integration_tests"]
+    }
+  ]
+}
diff --git a/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_3.json b/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_3.json
@@ -0,0 +1,13 @@
+{
+  "validation_operator_name": "action_list_operator",
+  "batches": [
+    {
+      "batch_kwargs": {
+        "datasource": "bigquery",
+        "query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.events_staged_run_3 AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM scratch_dev1.events_staged AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT  SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check",
+        "bigquery_temp_table": "ge_test_derived_events_staged_integration"
+      },
+      "expectation_suite_names": ["web.v1.integration_tests"]
+    }
+  ]
+}
diff --git a/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_4.json b/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_4.json
@@ -0,0 +1,13 @@
+{
+  "validation_operator_name": "action_list_operator",
+  "batches": [
+    {
+      "batch_kwargs": {
+        "datasource": "bigquery",
+        "query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.events_staged_run_4 AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM scratch_dev1.events_staged AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT  SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check",
+        "bigquery_temp_table": "ge_test_derived_events_staged_integration"
+      },
+      "expectation_suite_names": ["web.v1.integration_tests"]
+    }
+  ]
+}
diff --git a/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_5.json b/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_5.json
@@ -0,0 +1,13 @@
+{
+  "validation_operator_name": "action_list_operator",
+  "batches": [
+    {
+      "batch_kwargs": {
+        "datasource": "bigquery",
+        "query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.events_staged_run_5 AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM scratch_dev1.events_staged AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT  SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check",
+        "bigquery_temp_table": "ge_test_derived_events_staged_integration"
+      },
+      "expectation_suite_names": ["web.v1.integration_tests"]
+    }
+  ]
+}
diff --git a/.test/great_expectations/validation_configs/web/v1/bigquery/perm_integration_test_tables.json b/.test/great_expectations/validation_configs/web/v1/bigquery/perm_integration_test_tables.json
diff --git a/.test/integration_tests/web/v1/bigquery_variables.yml.tmpl b/.test/integration_tests/web/v1/bigquery_variables.yml.tmpl