aboutsummaryrefslogtreecommitdiff
path: root/db/clickhouse/migrations/08_update_hostname_view.sql
diff options
context:
space:
mode:
Diffstat (limited to 'db/clickhouse/migrations/08_update_hostname_view.sql')
-rw-r--r--db/clickhouse/migrations/08_update_hostname_view.sql253
1 files changed, 253 insertions, 0 deletions
diff --git a/db/clickhouse/migrations/08_update_hostname_view.sql b/db/clickhouse/migrations/08_update_hostname_view.sql
new file mode 100644
index 0000000..061fa62
--- /dev/null
+++ b/db/clickhouse/migrations/08_update_hostname_view.sql
@@ -0,0 +1,253 @@
+-- create new hourly table
+CREATE TABLE umami.website_event_stats_hourly_new
+(
+ website_id UUID,
+ session_id UUID,
+ visit_id UUID,
+ hostname SimpleAggregateFunction(groupArrayArray, Array(String)),
+ browser LowCardinality(String),
+ os LowCardinality(String),
+ device LowCardinality(String),
+ screen LowCardinality(String),
+ language LowCardinality(String),
+ country LowCardinality(String),
+ region LowCardinality(String),
+ city String,
+ entry_url AggregateFunction(argMin, String, DateTime('UTC')),
+ exit_url AggregateFunction(argMax, String, DateTime('UTC')),
+ url_path SimpleAggregateFunction(groupArrayArray, Array(String)),
+ url_query SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_source SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_medium SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_campaign SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_content SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_term SimpleAggregateFunction(groupArrayArray, Array(String)),
+ referrer_domain SimpleAggregateFunction(groupArrayArray, Array(String)),
+ page_title SimpleAggregateFunction(groupArrayArray, Array(String)),
+ gclid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ fbclid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ msclkid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ ttclid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ li_fat_id SimpleAggregateFunction(groupArrayArray, Array(String)),
+ twclid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ event_type UInt32,
+ event_name SimpleAggregateFunction(groupArrayArray, Array(String)),
+ views SimpleAggregateFunction(sum, UInt64),
+ min_time SimpleAggregateFunction(min, DateTime('UTC')),
+ max_time SimpleAggregateFunction(max, DateTime('UTC')),
+ tag SimpleAggregateFunction(groupArrayArray, Array(String)),
+ distinct_id String,
+ created_at Datetime('UTC')
+)
+ENGINE = AggregatingMergeTree
+ PARTITION BY toYYYYMM(created_at)
+ ORDER BY (
+ website_id,
+ event_type,
+ toStartOfHour(created_at),
+ cityHash64(visit_id),
+ visit_id
+ )
+ SAMPLE BY cityHash64(visit_id);
+
+-- create view
+CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv_new
+TO umami.website_event_stats_hourly_new
+AS
+SELECT
+ website_id,
+ session_id,
+ visit_id,
+ hostnames as hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ entry_url,
+ exit_url,
+ url_paths as url_path,
+ url_query,
+ utm_source,
+ utm_medium,
+ utm_campaign,
+ utm_content,
+ utm_term,
+ referrer_domain,
+ page_title,
+ gclid,
+ fbclid,
+ msclkid,
+ ttclid,
+ li_fat_id,
+ twclid,
+ event_type,
+ event_name,
+ views,
+ min_time,
+ max_time,
+ tag,
+ distinct_id,
+ timestamp as created_at
+FROM (SELECT
+ website_id,
+ session_id,
+ visit_id,
+ arrayFilter(x -> x != '', groupArray(hostname)) hostnames,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ argMinState(url_path, created_at) entry_url,
+ argMaxState(url_path, created_at) exit_url,
+ arrayFilter(x -> x != '', groupArray(url_path)) as url_paths,
+ arrayFilter(x -> x != '', groupArray(url_query)) url_query,
+ arrayFilter(x -> x != '', groupArray(utm_source)) utm_source,
+ arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium,
+ arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign,
+ arrayFilter(x -> x != '', groupArray(utm_content)) utm_content,
+ arrayFilter(x -> x != '', groupArray(utm_term)) utm_term,
+ arrayFilter(x -> x != '' and x != hostname, groupArray(referrer_domain)) referrer_domain,
+ arrayFilter(x -> x != '', groupArray(page_title)) page_title,
+ arrayFilter(x -> x != '', groupArray(gclid)) gclid,
+ arrayFilter(x -> x != '', groupArray(fbclid)) fbclid,
+ arrayFilter(x -> x != '', groupArray(msclkid)) msclkid,
+ arrayFilter(x -> x != '', groupArray(ttclid)) ttclid,
+ arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id,
+ arrayFilter(x -> x != '', groupArray(twclid)) twclid,
+ event_type,
+ if(event_type = 2, groupArray(event_name), []) event_name,
+ sumIf(1, event_type = 1) views,
+ min(created_at) min_time,
+ max(created_at) max_time,
+ arrayFilter(x -> x != '', groupArray(tag)) tag,
+ distinct_id,
+ toStartOfHour(created_at) timestamp
+FROM umami.website_event
+GROUP BY website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ event_type,
+ distinct_id,
+ timestamp);
+
+-- rename tables
+RENAME TABLE umami.website_event_stats_hourly TO umami.website_event_stats_hourly_old;
+RENAME TABLE umami.website_event_stats_hourly_new TO umami.website_event_stats_hourly;
+
+-- drop views
+DROP TABLE umami.website_event_stats_hourly_mv;
+DROP TABLE umami.website_event_stats_hourly_mv_new;
+
+-- recreate view
+CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv
+TO umami.website_event_stats_hourly
+AS
+SELECT
+ website_id,
+ session_id,
+ visit_id,
+ hostnames as hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ entry_url,
+ exit_url,
+ url_paths as url_path,
+ url_query,
+ utm_source,
+ utm_medium,
+ utm_campaign,
+ utm_content,
+ utm_term,
+ referrer_domain,
+ page_title,
+ gclid,
+ fbclid,
+ msclkid,
+ ttclid,
+ li_fat_id,
+ twclid,
+ event_type,
+ event_name,
+ views,
+ min_time,
+ max_time,
+ tag,
+ distinct_id,
+ timestamp as created_at
+FROM (SELECT
+ website_id,
+ session_id,
+ visit_id,
+ arrayFilter(x -> x != '', groupArray(hostname)) hostnames,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ argMinState(url_path, created_at) entry_url,
+ argMaxState(url_path, created_at) exit_url,
+ arrayFilter(x -> x != '', groupArray(url_path)) as url_paths,
+ arrayFilter(x -> x != '', groupArray(url_query)) url_query,
+ arrayFilter(x -> x != '', groupArray(utm_source)) utm_source,
+ arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium,
+ arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign,
+ arrayFilter(x -> x != '', groupArray(utm_content)) utm_content,
+ arrayFilter(x -> x != '', groupArray(utm_term)) utm_term,
+ arrayFilter(x -> x != '' and x != hostname, groupArray(referrer_domain)) referrer_domain,
+ arrayFilter(x -> x != '', groupArray(page_title)) page_title,
+ arrayFilter(x -> x != '', groupArray(gclid)) gclid,
+ arrayFilter(x -> x != '', groupArray(fbclid)) fbclid,
+ arrayFilter(x -> x != '', groupArray(msclkid)) msclkid,
+ arrayFilter(x -> x != '', groupArray(ttclid)) ttclid,
+ arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id,
+ arrayFilter(x -> x != '', groupArray(twclid)) twclid,
+ event_type,
+ if(event_type = 2, groupArray(event_name), []) event_name,
+ sumIf(1, event_type = 1) views,
+ min(created_at) min_time,
+ max(created_at) max_time,
+ arrayFilter(x -> x != '', groupArray(tag)) tag,
+ distinct_id,
+ toStartOfHour(created_at) timestamp
+FROM umami.website_event
+GROUP BY website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ event_type,
+ distinct_id,
+ timestamp);