From 99fd24deb7c974c08ab879259b16b1dcb9e74979 Mon Sep 17 00:00:00 2001 From: James Borlase Date: Thu, 12 Jun 2025 15:16:38 +0300 Subject: [PATCH 01/11] Adding keyhole backfill changes --- .../base_create_snowplow_events_this_run.sql | 54 ++++++++++++------- macros/incremental_hooks/get_run_limits.sql | 27 ++++++---- .../return_base_new_event_limits.sql | 23 ++++---- .../snowplow_incremental_post_hook.sql | 43 ++++++++------- 4 files changed, 88 insertions(+), 59 deletions(-) diff --git a/macros/base/base_create_snowplow_events_this_run.sql b/macros/base/base_create_snowplow_events_this_run.sql index eb0319a4..d864f912 100644 --- a/macros/base/base_create_snowplow_events_this_run.sql +++ b/macros/base/base_create_snowplow_events_this_run.sql @@ -1,3 +1,4 @@ + {# Copyright (c) 2021-present Snowplow Analytics Ltd. All rights reserved. This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, @@ -9,6 +10,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {{ return(adapter.dispatch('base_create_snowplow_events_this_run', 'snowplow_utils')(sessions_this_run_table, session_identifiers, session_sql, session_timestamp, derived_tstamp_partitioned, days_late_allowed, max_session_days, app_ids, snowplow_events_database, snowplow_events_schema, snowplow_events_table, entities_or_sdes, custom_sql, allow_null_dvce_tstamps)) }} {% endmacro %} + {% macro default__base_create_snowplow_events_this_run(sessions_this_run_table, session_identifiers, session_sql, session_timestamp, derived_tstamp_partitioned, days_late_allowed, max_session_days, app_ids, snowplow_events_database, snowplow_events_schema, snowplow_events_table, entities_or_sdes, custom_sql, allow_null_dvce_tstamps) %} {%- set lower_limit, upper_limit = snowplow_utils.return_limits_from_model(ref(sessions_this_run_table), 'start_tstamp', @@ -51,24 +53,30 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 inner join {{ sessions_this_run }} as b on a.session_identifier = b.session_identifier - where a.{{ session_timestamp }} <= {{ snowplow_utils.timestamp_add('day', max_session_days, 'b.start_tstamp') }} - - {% if allow_null_dvce_tstamps %} - and coalesce(a.dvce_sent_tstamp, a.collector_tstamp) <= {{ snowplow_utils.timestamp_add('day', days_late_allowed, 'coalesce(a.dvce_created_tstamp, a.collector_tstamp)') }} - {% else %} - and a.dvce_sent_tstamp <= {{ snowplow_utils.timestamp_add('day', days_late_allowed, 'a.dvce_created_tstamp') }} - {% endif %} - - and a.{{ session_timestamp }} >= {{ lower_limit }} - and a.{{ session_timestamp }} <= {{ upper_limit }} - and a.{{ session_timestamp }} >= b.start_tstamp -- deal with late loading events + where + {% if not var('snowplow__enable_keyhole_backfill',false) %} + + a.{{ session_timestamp }} <= {{ snowplow_utils.timestamp_add('day', max_session_days, 'b.start_tstamp') }} + + {% if allow_null_dvce_tstamps %} + and coalesce(a.dvce_sent_tstamp, a.collector_tstamp) <= {{ snowplow_utils.timestamp_add('day', days_late_allowed, 'coalesce(a.dvce_created_tstamp, a.collector_tstamp)') }} + {% else %} + and a.dvce_sent_tstamp <= {{ snowplow_utils.timestamp_add('day', days_late_allowed, 'a.dvce_created_tstamp') }} + {% endif %} + + and a.{{ session_timestamp }} >= {{ lower_limit }} + and a.{{ session_timestamp }} <= {{ upper_limit }} + and a.{{ session_timestamp }} >= b.start_tstamp -- deal with late loading events - {% if derived_tstamp_partitioned and target.type == 'bigquery' | as_bool() %} - and a.derived_tstamp >= {{ snowplow_utils.timestamp_add('hour', -1, lower_limit) }} - and a.derived_tstamp <= {{ upper_limit }} + {% if derived_tstamp_partitioned and target.type == 'bigquery' | as_bool() %} + and a.derived_tstamp >= {{ snowplow_utils.timestamp_add('hour', -1, lower_limit) }} + and a.derived_tstamp <= {{ upper_limit }} + {% endif %} + + and {% endif %} - and {{ snowplow_utils.app_id_filter(app_ids) }} + {{ snowplow_utils.app_id_filter(app_ids) }} qualify row_number() over (partition by a.event_id order by a.{{ session_timestamp }}, a.dvce_created_tstamp) = 1 {% endset %} @@ -197,7 +205,9 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 inner join {{ sessions_this_run }} as b on a.session_identifier = b.session_identifier - where a.{{ session_timestamp }} <= {{ snowplow_utils.timestamp_add('day', max_session_days, 'b.start_tstamp') }} + where + {% if not var('snowplow__enable_keyhole_backfill',false) %} + a.{{ session_timestamp }} <= {{ snowplow_utils.timestamp_add('day', max_session_days, 'b.start_tstamp') }} {% if allow_null_dvce_tstamps %} and coalesce(a.dvce_sent_tstamp, a.collector_tstamp) <= {{ snowplow_utils.timestamp_add('day', days_late_allowed, 'coalesce(a.dvce_created_tstamp, a.collector_tstamp)') }} {% else %} @@ -206,7 +216,9 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 and a.{{ session_timestamp }} >= {{ lower_limit }} and a.{{ session_timestamp }} <= {{ upper_limit }} and a.{{ session_timestamp }} >= b.start_tstamp -- deal with late loading events - and {{ snowplow_utils.app_id_filter(app_ids) }} + and + {% endif %} + {{ snowplow_utils.app_id_filter(app_ids) }} ) @@ -301,7 +313,9 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 inner join {{ sessions_this_run }} as b on a.session_identifier = b.session_identifier - where a.{{ session_timestamp }} <= {{ snowplow_utils.timestamp_add('day', max_session_days, 'b.start_tstamp') }} + where + {% if not var('snowplow__enable_keyhole_backfill',false) %} + a.{{ session_timestamp }} <= {{ snowplow_utils.timestamp_add('day', max_session_days, 'b.start_tstamp') }} {% if allow_null_dvce_tstamps %} and coalesce(a.dvce_sent_tstamp, a.collector_tstamp) <= {{ snowplow_utils.timestamp_add('day', days_late_allowed, 'coalesce(a.dvce_created_tstamp, a.collector_tstamp)') }} {% else %} @@ -316,7 +330,9 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 and a.derived_tstamp <= {{ upper_limit }} {% endif %} - and {{ snowplow_utils.app_id_filter(app_ids) }} + and + {% endif %} + {{ snowplow_utils.app_id_filter(app_ids) }} ) SELECT * FROM main_logic diff --git a/macros/incremental_hooks/get_run_limits.sql b/macros/incremental_hooks/get_run_limits.sql index 726c1a68..c94030e4 100644 --- a/macros/incremental_hooks/get_run_limits.sql +++ b/macros/incremental_hooks/get_run_limits.sql @@ -1,11 +1,8 @@ -{# -Copyright (c) 2021-present Snowplow Analytics Ltd. All rights reserved. -This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, -and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. -You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ -#} -{# Returns the sql to calculate the lower/upper limits of the run #} {% macro get_run_limits(min_last_success, max_last_success, models_matched_from_manifest, has_matched_all_models, start_date) -%} + {{ return(adapter.dispatch('get_run_limits', 'snowplow_utils')(min_last_success, max_last_success, models_matched_from_manifest, has_matched_all_models, start_date)) }} +{%- endmacro %} + +{% macro default__get_run_limits(min_last_success, max_last_success, models_matched_from_manifest, has_matched_all_models, start_date) -%} {% set start_tstamp = snowplow_utils.cast_to_tstamp(start_date) %} {% set min_last_success = snowplow_utils.cast_to_tstamp(min_last_success) %} @@ -15,7 +12,19 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {{ return('') }} {% endif %} - {% if models_matched_from_manifest == 0 %} + {% if var('snowplow__enable_keyhole_backfill',false) %} + {% set start_tstamp = snowplow_utils.cast_to_tstamp(var('snowplow__keyhole_backfill_start_date')) %} + {% set max_last_success = snowplow_utils.cast_to_tstamp(var('snowplow__keyhole_backfill_end_date')) %} + + {% do snowplow_utils.log_message("Snowplow: Keyhole backfill enabled") %} + + {% set run_limits_query %} + select {{ start_tstamp }} as lower_limit, + least({{ max_last_success }}, + {{ snowplow_utils.timestamp_add('day', var("snowplow__backfill_limit_days", 30), start_tstamp) }}) as upper_limit + {% endset %} + + {% elif models_matched_from_manifest == 0 %} {# If no snowplow models are in the manifest, start from start_tstamp #} {% do snowplow_utils.log_message("Snowplow: No data in manifest. Processing data from start_date") %} @@ -60,4 +69,4 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {{ return(run_limits_query) }} -{% endmacro %} +{% endmacro %} \ No newline at end of file diff --git a/macros/incremental_hooks/return_base_new_event_limits.sql b/macros/incremental_hooks/return_base_new_event_limits.sql index 2a404581..03935737 100644 --- a/macros/incremental_hooks/return_base_new_event_limits.sql +++ b/macros/incremental_hooks/return_base_new_event_limits.sql @@ -1,10 +1,8 @@ -{# -Copyright (c) 2021-present Snowplow Analytics Ltd. All rights reserved. -This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, -and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. -You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ -#} {% macro return_base_new_event_limits(base_events_this_run) -%} + {{ return(adapter.dispatch('return_base_new_event_limits', 'snowplow_utils')(base_events_this_run)) }} +{%- endmacro %} + +{% macro default__return_base_new_event_limits(base_events_this_run) -%} {# In case of not execute just return empty strings to avoid hitting database #} {% if not execute %} @@ -16,7 +14,15 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 schema=base_events_this_run.schema, identifier=base_events_this_run.name) %} - {% if target_relation is not none %} + {% if var('snowplow__enable_keyhole_backfill',false) %} + + {% set lower_limit = snowplow_utils.cast_to_tstamp(var('snowplow__keyhole_backfill_start_date')) %} + {% set upper_limit = snowplow_utils.cast_to_tstamp(var('snowplow__keyhole_backfill_end_date')) %} + {% set session_start_limit = snowplow_utils.cast_to_tstamp(var('snowplow__keyhole_backfill_start_date')) %} + + {{ return([lower_limit, upper_limit, session_start_limit]) }} + + {% elif target_relation is not none %} {% set limit_query %} select @@ -25,7 +31,6 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {{ snowplow_utils.timestamp_add('day', -var("snowplow__max_session_days", 3), 'lower_limit') }} as session_start_limit - from {{ base_events_this_run }} {% endset %} @@ -51,4 +56,4 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {% endif %} -{%- endmacro %} +{%- endmacro %} \ No newline at end of file diff --git a/macros/incremental_hooks/snowplow_incremental_post_hook.sql b/macros/incremental_hooks/snowplow_incremental_post_hook.sql index 325ee197..5ff9140c 100644 --- a/macros/incremental_hooks/snowplow_incremental_post_hook.sql +++ b/macros/incremental_hooks/snowplow_incremental_post_hook.sql @@ -1,28 +1,27 @@ -{# -Copyright (c) 2021-present Snowplow Analytics Ltd. All rights reserved. -This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, -and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. -You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ -#} -{# post-hook for incremental runs #} -{% macro snowplow_incremental_post_hook(package_name='snowplow', incremental_manifest_table_name=none, base_events_this_run_table_name=none, session_timestamp=var('snowplow__session_timestamp', 'load_tstamp')) %} +{% macro snowplow_incremental_post_hook(package_name='snowplow', incremental_manifest_table_name=none, base_events_this_run_table_name=none, session_timestamp=var('snowplow__session_timestamp', 'load_tstamp')) -%} + {{ return(adapter.dispatch('snowplow_incremental_post_hook', 'snowplow_utils')(package_name='snowplow', incremental_manifest_table_name=none, base_events_this_run_table_name=none, session_timestamp=var('snowplow__session_timestamp', 'load_tstamp'))) }} +{%- endmacro %} - {% set enabled_snowplow_models = snowplow_utils.get_enabled_snowplow_models(package_name) -%} +{% macro default__snowplow_incremental_post_hook(package_name='snowplow', incremental_manifest_table_name=none, base_events_this_run_table_name=none, session_timestamp=var('snowplow__session_timestamp', 'load_tstamp')) %} - {% set successful_snowplow_models = snowplow_utils.get_successful_models(models=enabled_snowplow_models) -%} + {% if var('snowplow__enable_keyhole_backfill',false) %} + {% set enabled_snowplow_models = snowplow_utils.get_enabled_snowplow_models(package_name) -%} - {%- if incremental_manifest_table_name -%} - {%- set incremental_manifest_table = ref(incremental_manifest_table_name) -%} - {%- else -%} - {% set incremental_manifest_table = snowplow_utils.get_incremental_manifest_table_relation(package_name) -%} - {%- endif -%} + {% set successful_snowplow_models = snowplow_utils.get_successful_models(models=enabled_snowplow_models) -%} - {%- if base_events_this_run_table_name -%} - {%- set base_events_this_run_table = ref(base_events_this_run_table_name) -%} - {%- else -%} - {% set base_events_this_run_table = ref(package_name~'_base_events_this_run') -%} - {%- endif -%} + {%- if incremental_manifest_table_name -%} + {%- set incremental_manifest_table = ref(incremental_manifest_table_name) -%} + {%- else -%} + {% set incremental_manifest_table = snowplow_utils.get_incremental_manifest_table_relation(package_name) -%} + {%- endif -%} - {{ snowplow_utils.update_incremental_manifest_table(incremental_manifest_table, base_events_this_run_table, successful_snowplow_models, session_timestamp) }} + {%- if base_events_this_run_table_name -%} + {%- set base_events_this_run_table = ref(base_events_this_run_table_name) -%} + {%- else -%} + {% set base_events_this_run_table = ref(package_name~'_base_events_this_run') -%} + {%- endif -%} -{% endmacro %} + {{ snowplow_utils.update_incremental_manifest_table(incremental_manifest_table, base_events_this_run_table, successful_snowplow_models, session_timestamp) }} + + {% endif %} +{% endmacro %} \ No newline at end of file From dee789f96b051e6ac755eff126035d8ad28e2a68 Mon Sep 17 00:00:00 2001 From: jborlase-snowplow Date: Thu, 12 Jun 2025 16:10:40 +0300 Subject: [PATCH 02/11] Update snowplow_incremental_post_hook.sql Fixing bug --- .../snowplow_incremental_post_hook.sql | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/macros/incremental_hooks/snowplow_incremental_post_hook.sql b/macros/incremental_hooks/snowplow_incremental_post_hook.sql index 5ff9140c..b918028d 100644 --- a/macros/incremental_hooks/snowplow_incremental_post_hook.sql +++ b/macros/incremental_hooks/snowplow_incremental_post_hook.sql @@ -1,10 +1,6 @@ -{% macro snowplow_incremental_post_hook(package_name='snowplow', incremental_manifest_table_name=none, base_events_this_run_table_name=none, session_timestamp=var('snowplow__session_timestamp', 'load_tstamp')) -%} - {{ return(adapter.dispatch('snowplow_incremental_post_hook', 'snowplow_utils')(package_name='snowplow', incremental_manifest_table_name=none, base_events_this_run_table_name=none, session_timestamp=var('snowplow__session_timestamp', 'load_tstamp'))) }} -{%- endmacro %} +{% macro snowplow_incremental_post_hook(package_name='snowplow', incremental_manifest_table_name=none, base_events_this_run_table_name=none, session_timestamp=var('snowplow__session_timestamp', 'load_tstamp')) %} -{% macro default__snowplow_incremental_post_hook(package_name='snowplow', incremental_manifest_table_name=none, base_events_this_run_table_name=none, session_timestamp=var('snowplow__session_timestamp', 'load_tstamp')) %} - - {% if var('snowplow__enable_keyhole_backfill',false) %} + {% if not var('snowplow__enable_keyhole_backfill',false) %} {% set enabled_snowplow_models = snowplow_utils.get_enabled_snowplow_models(package_name) -%} {% set successful_snowplow_models = snowplow_utils.get_successful_models(models=enabled_snowplow_models) -%} @@ -24,4 +20,4 @@ {{ snowplow_utils.update_incremental_manifest_table(incremental_manifest_table, base_events_this_run_table, successful_snowplow_models, session_timestamp) }} {% endif %} -{% endmacro %} \ No newline at end of file +{% endmacro %} From 9d87297340d0ee2e776b11b1367b74ea0f9a8f8c Mon Sep 17 00:00:00 2001 From: James Borlase Date: Fri, 13 Jun 2025 14:13:39 +0300 Subject: [PATCH 03/11] Updating logic for databricks/spark --- macros/base/base_create_snowplow_events_this_run.sql | 10 ++++++++-- .../snowplow_incremental_post_hook.sql | 4 +++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/macros/base/base_create_snowplow_events_this_run.sql b/macros/base/base_create_snowplow_events_this_run.sql index d864f912..bd1757a5 100644 --- a/macros/base/base_create_snowplow_events_this_run.sql +++ b/macros/base/base_create_snowplow_events_this_run.sql @@ -296,8 +296,14 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {%- endif %} e.* from {{ snowplow_events }} e - WHERE e.{{ session_timestamp }} >= {{ lower_limit }} - and e.{{ session_timestamp }} <= {{ upper_limit }} + WHERE + {% if var('snowplow__enable_keyhole_backfill',false) %} + e.{{ session_timestamp }} >= {{ snowplow_utils.timestamp_add('day', -max_session_days, lower_limit) }} + and e.{{ session_timestamp }} <= {{ snowplow_utils.timestamp_add('day', max_session_days, upper_limit) }} + {% else %} + e.{{ session_timestamp }} >= {{ lower_limit }} + and e.{{ session_timestamp }} <= {{ upper_limit }} + {% endif %} ), main_logic as ( diff --git a/macros/incremental_hooks/snowplow_incremental_post_hook.sql b/macros/incremental_hooks/snowplow_incremental_post_hook.sql index b918028d..0da68c51 100644 --- a/macros/incremental_hooks/snowplow_incremental_post_hook.sql +++ b/macros/incremental_hooks/snowplow_incremental_post_hook.sql @@ -1,6 +1,8 @@ {% macro snowplow_incremental_post_hook(package_name='snowplow', incremental_manifest_table_name=none, base_events_this_run_table_name=none, session_timestamp=var('snowplow__session_timestamp', 'load_tstamp')) %} - {% if not var('snowplow__enable_keyhole_backfill',false) %} +{% macro default__snowplow_incremental_post_hook(package_name='snowplow', incremental_manifest_table_name=none, base_events_this_run_table_name=none, session_timestamp=var('snowplow__session_timestamp', 'load_tstamp')) %} + + {% if var('snowplow__enable_keyhole_backfill',false) %} {% set enabled_snowplow_models = snowplow_utils.get_enabled_snowplow_models(package_name) -%} {% set successful_snowplow_models = snowplow_utils.get_successful_models(models=enabled_snowplow_models) -%} From 9261c933e46929167ce2debce7f955c4541bacad Mon Sep 17 00:00:00 2001 From: James Borlase Date: Fri, 13 Jun 2025 14:16:45 +0300 Subject: [PATCH 04/11] resolving conflict --- macros/incremental_hooks/snowplow_incremental_post_hook.sql | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/macros/incremental_hooks/snowplow_incremental_post_hook.sql b/macros/incremental_hooks/snowplow_incremental_post_hook.sql index 0da68c51..56a00c74 100644 --- a/macros/incremental_hooks/snowplow_incremental_post_hook.sql +++ b/macros/incremental_hooks/snowplow_incremental_post_hook.sql @@ -1,8 +1,6 @@ -{% macro snowplow_incremental_post_hook(package_name='snowplow', incremental_manifest_table_name=none, base_events_this_run_table_name=none, session_timestamp=var('snowplow__session_timestamp', 'load_tstamp')) %} +{% macro nowplow_incremental_post_hook(package_name='snowplow', incremental_manifest_table_name=none, base_events_this_run_table_name=none, session_timestamp=var('snowplow__session_timestamp', 'load_tstamp')) %} -{% macro default__snowplow_incremental_post_hook(package_name='snowplow', incremental_manifest_table_name=none, base_events_this_run_table_name=none, session_timestamp=var('snowplow__session_timestamp', 'load_tstamp')) %} - - {% if var('snowplow__enable_keyhole_backfill',false) %} + {% if not var('snowplow__enable_keyhole_backfill',false) %} {% set enabled_snowplow_models = snowplow_utils.get_enabled_snowplow_models(package_name) -%} {% set successful_snowplow_models = snowplow_utils.get_successful_models(models=enabled_snowplow_models) -%} From c70fb2626f82110f82ed483bac6a89476c6ee274 Mon Sep 17 00:00:00 2001 From: James Borlase Date: Fri, 13 Jun 2025 14:16:56 +0300 Subject: [PATCH 05/11] typo --- macros/incremental_hooks/snowplow_incremental_post_hook.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/incremental_hooks/snowplow_incremental_post_hook.sql b/macros/incremental_hooks/snowplow_incremental_post_hook.sql index 56a00c74..b918028d 100644 --- a/macros/incremental_hooks/snowplow_incremental_post_hook.sql +++ b/macros/incremental_hooks/snowplow_incremental_post_hook.sql @@ -1,4 +1,4 @@ -{% macro nowplow_incremental_post_hook(package_name='snowplow', incremental_manifest_table_name=none, base_events_this_run_table_name=none, session_timestamp=var('snowplow__session_timestamp', 'load_tstamp')) %} +{% macro snowplow_incremental_post_hook(package_name='snowplow', incremental_manifest_table_name=none, base_events_this_run_table_name=none, session_timestamp=var('snowplow__session_timestamp', 'load_tstamp')) %} {% if not var('snowplow__enable_keyhole_backfill',false) %} {% set enabled_snowplow_models = snowplow_utils.get_enabled_snowplow_models(package_name) -%} From 3443a1a3ab441dd0238fa5034a408d2e1b166c9e Mon Sep 17 00:00:00 2001 From: James Borlase Date: Mon, 16 Jun 2025 14:42:34 +0100 Subject: [PATCH 06/11] Updating keyhole backfill so session_start_limit subtracts max_session_days --- macros/incremental_hooks/return_base_new_event_limits.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/incremental_hooks/return_base_new_event_limits.sql b/macros/incremental_hooks/return_base_new_event_limits.sql index 03935737..76675353 100644 --- a/macros/incremental_hooks/return_base_new_event_limits.sql +++ b/macros/incremental_hooks/return_base_new_event_limits.sql @@ -18,7 +18,7 @@ {% set lower_limit = snowplow_utils.cast_to_tstamp(var('snowplow__keyhole_backfill_start_date')) %} {% set upper_limit = snowplow_utils.cast_to_tstamp(var('snowplow__keyhole_backfill_end_date')) %} - {% set session_start_limit = snowplow_utils.cast_to_tstamp(var('snowplow__keyhole_backfill_start_date')) %} + {% set session_start_limit = snowplow_utils.timestamp_add('day',-var("snowplow__max_session_days", 3),snowplow_utils.cast_to_tstamp(var('snowplow__keyhole_backfill_start_date'))) %} {{ return([lower_limit, upper_limit, session_start_limit]) }} From c6ad868917584d200ddd90d81b17f42bb8f80901 Mon Sep 17 00:00:00 2001 From: James Borlase Date: Tue, 17 Jun 2025 09:11:04 +0100 Subject: [PATCH 07/11] Removing additional table scan --- macros/base/base_create_snowplow_events_this_run.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/base/base_create_snowplow_events_this_run.sql b/macros/base/base_create_snowplow_events_this_run.sql index bd1757a5..99c57e66 100644 --- a/macros/base/base_create_snowplow_events_this_run.sql +++ b/macros/base/base_create_snowplow_events_this_run.sql @@ -298,7 +298,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 from {{ snowplow_events }} e WHERE {% if var('snowplow__enable_keyhole_backfill',false) %} - e.{{ session_timestamp }} >= {{ snowplow_utils.timestamp_add('day', -max_session_days, lower_limit) }} + e.{{ session_timestamp }} >= {{ lower_limit }} and e.{{ session_timestamp }} <= {{ snowplow_utils.timestamp_add('day', max_session_days, upper_limit) }} {% else %} e.{{ session_timestamp }} >= {{ lower_limit }} From 2c10e1da537a3f82b64c1cff69f4512a0b4fe7b2 Mon Sep 17 00:00:00 2001 From: James Borlase Date: Wed, 18 Jun 2025 13:49:35 +0100 Subject: [PATCH 08/11] Adding back late arriving event check --- .../base_create_snowplow_events_this_run.sql | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/macros/base/base_create_snowplow_events_this_run.sql b/macros/base/base_create_snowplow_events_this_run.sql index 99c57e66..355fd0e9 100644 --- a/macros/base/base_create_snowplow_events_this_run.sql +++ b/macros/base/base_create_snowplow_events_this_run.sql @@ -54,16 +54,16 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 on a.session_identifier = b.session_identifier where + {% if allow_null_dvce_tstamps %} + and coalesce(a.dvce_sent_tstamp, a.collector_tstamp) <= {{ snowplow_utils.timestamp_add('day', days_late_allowed, 'coalesce(a.dvce_created_tstamp, a.collector_tstamp)') }} + {% else %} + and a.dvce_sent_tstamp <= {{ snowplow_utils.timestamp_add('day', days_late_allowed, 'a.dvce_created_tstamp') }} + {% endif %} + {% if not var('snowplow__enable_keyhole_backfill',false) %} a.{{ session_timestamp }} <= {{ snowplow_utils.timestamp_add('day', max_session_days, 'b.start_tstamp') }} - {% if allow_null_dvce_tstamps %} - and coalesce(a.dvce_sent_tstamp, a.collector_tstamp) <= {{ snowplow_utils.timestamp_add('day', days_late_allowed, 'coalesce(a.dvce_created_tstamp, a.collector_tstamp)') }} - {% else %} - and a.dvce_sent_tstamp <= {{ snowplow_utils.timestamp_add('day', days_late_allowed, 'a.dvce_created_tstamp') }} - {% endif %} - and a.{{ session_timestamp }} >= {{ lower_limit }} and a.{{ session_timestamp }} <= {{ upper_limit }} and a.{{ session_timestamp }} >= b.start_tstamp -- deal with late loading events @@ -320,23 +320,24 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 on a.session_identifier = b.session_identifier where - {% if not var('snowplow__enable_keyhole_backfill',false) %} - a.{{ session_timestamp }} <= {{ snowplow_utils.timestamp_add('day', max_session_days, 'b.start_tstamp') }} {% if allow_null_dvce_tstamps %} and coalesce(a.dvce_sent_tstamp, a.collector_tstamp) <= {{ snowplow_utils.timestamp_add('day', days_late_allowed, 'coalesce(a.dvce_created_tstamp, a.collector_tstamp)') }} {% else %} and a.dvce_sent_tstamp <= {{ snowplow_utils.timestamp_add('day', days_late_allowed, 'a.dvce_created_tstamp') }} {% endif %} - and a.{{ session_timestamp }} >= {{ lower_limit }} - and a.{{ session_timestamp }} <= {{ upper_limit }} - and a.{{ session_timestamp }} >= b.start_tstamp -- deal with late loading events - {% if derived_tstamp_partitioned and target.type == 'bigquery' | as_bool() %} - and a.derived_tstamp >= {{ snowplow_utils.timestamp_add('hour', -1, lower_limit) }} - and a.derived_tstamp <= {{ upper_limit }} - {% endif %} + {% if not var('snowplow__enable_keyhole_backfill',false) %} + a.{{ session_timestamp }} <= {{ snowplow_utils.timestamp_add('day', max_session_days, 'b.start_tstamp') }} + and a.{{ session_timestamp }} >= {{ lower_limit }} + and a.{{ session_timestamp }} <= {{ upper_limit }} + and a.{{ session_timestamp }} >= b.start_tstamp -- deal with late loading events - and + {% if derived_tstamp_partitioned and target.type == 'bigquery' | as_bool() %} + and a.derived_tstamp >= {{ snowplow_utils.timestamp_add('hour', -1, lower_limit) }} + and a.derived_tstamp <= {{ upper_limit }} + {% endif %} + + and {% endif %} {{ snowplow_utils.app_id_filter(app_ids) }} ) From 08600fb84cba16eae28fbd9e509e92a948f92a44 Mon Sep 17 00:00:00 2001 From: James Borlase Date: Wed, 18 Jun 2025 13:52:43 +0100 Subject: [PATCH 09/11] Optimising default adapter of events this run --- macros/base/base_create_snowplow_events_this_run.sql | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/macros/base/base_create_snowplow_events_this_run.sql b/macros/base/base_create_snowplow_events_this_run.sql index 355fd0e9..9d09e8d3 100644 --- a/macros/base/base_create_snowplow_events_this_run.sql +++ b/macros/base/base_create_snowplow_events_this_run.sql @@ -39,6 +39,14 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 e.* from {{ snowplow_events }} e + where + {% if var('snowplow__enable_keyhole_backfill',false) %} + e.{{ session_timestamp }} >= {{ lower_limit }} + and e.{{ session_timestamp }} <= {{ snowplow_utils.timestamp_add('day', max_session_days, upper_limit) }} + {% else %} + e.{{ session_timestamp }} >= {{ lower_limit }} + and e.{{ session_timestamp }} <= {{ upper_limit }} + {% endif %} ) From 6486a9f9fc3979e0eca109221a98a16965eb4805 Mon Sep 17 00:00:00 2001 From: James Borlase Date: Tue, 1 Jul 2025 10:40:11 +0200 Subject: [PATCH 10/11] fixing bug in where clause --- macros/base/base_create_snowplow_events_this_run.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macros/base/base_create_snowplow_events_this_run.sql b/macros/base/base_create_snowplow_events_this_run.sql index 9d09e8d3..eddf1486 100644 --- a/macros/base/base_create_snowplow_events_this_run.sql +++ b/macros/base/base_create_snowplow_events_this_run.sql @@ -327,7 +327,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 inner join {{ sessions_this_run }} as b on a.session_identifier = b.session_identifier - where + where 1=1 {% if allow_null_dvce_tstamps %} and coalesce(a.dvce_sent_tstamp, a.collector_tstamp) <= {{ snowplow_utils.timestamp_add('day', days_late_allowed, 'coalesce(a.dvce_created_tstamp, a.collector_tstamp)') }} {% else %} @@ -335,7 +335,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {% endif %} {% if not var('snowplow__enable_keyhole_backfill',false) %} - a.{{ session_timestamp }} <= {{ snowplow_utils.timestamp_add('day', max_session_days, 'b.start_tstamp') }} + and a.{{ session_timestamp }} <= {{ snowplow_utils.timestamp_add('day', max_session_days, 'b.start_tstamp') }} and a.{{ session_timestamp }} >= {{ lower_limit }} and a.{{ session_timestamp }} <= {{ upper_limit }} and a.{{ session_timestamp }} >= b.start_tstamp -- deal with late loading events From 30cfa37233e19f6ef3d1b510450c0761da8c5170 Mon Sep 17 00:00:00 2001 From: James Borlase Date: Wed, 2 Jul 2025 22:04:12 +0200 Subject: [PATCH 11/11] missing 'and' before app_id filter --- macros/base/base_create_snowplow_events_this_run.sql | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/macros/base/base_create_snowplow_events_this_run.sql b/macros/base/base_create_snowplow_events_this_run.sql index eddf1486..abbaf84e 100644 --- a/macros/base/base_create_snowplow_events_this_run.sql +++ b/macros/base/base_create_snowplow_events_this_run.sql @@ -344,10 +344,8 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 and a.derived_tstamp >= {{ snowplow_utils.timestamp_add('hour', -1, lower_limit) }} and a.derived_tstamp <= {{ upper_limit }} {% endif %} - - and {% endif %} - {{ snowplow_utils.app_id_filter(app_ids) }} + and {{ snowplow_utils.app_id_filter(app_ids) }} ) SELECT * FROM main_logic