aboutsummaryrefslogtreecommitdiff
path: root/db/clickhouse
diff options
context:
space:
mode:
Diffstat (limited to 'db/clickhouse')
-rw-r--r--db/clickhouse/migrations/01_edit_keys.sql18
-rw-r--r--db/clickhouse/migrations/02_add_visit_id.sql90
-rw-r--r--db/clickhouse/migrations/03_session_data.sql57
-rw-r--r--db/clickhouse/migrations/04_add_tag.sql77
-rw-r--r--db/clickhouse/migrations/05_add_utm_clid.sql332
-rw-r--r--db/clickhouse/migrations/06_update_subdivision.sql122
-rw-r--r--db/clickhouse/migrations/07_add_distinct_id.sql103
-rw-r--r--db/clickhouse/migrations/08_update_hostname_view.sql253
-rw-r--r--db/clickhouse/schema.sql283
9 files changed, 1335 insertions, 0 deletions
diff --git a/db/clickhouse/migrations/01_edit_keys.sql b/db/clickhouse/migrations/01_edit_keys.sql
new file mode 100644
index 0000000..3fc7dd7
--- /dev/null
+++ b/db/clickhouse/migrations/01_edit_keys.sql
@@ -0,0 +1,18 @@
+-- edit event_data values
+ALTER TABLE "event_data" RENAME COLUMN "event_date_value" TO "date_value";
+ALTER TABLE "event_data" RENAME COLUMN "event_numeric_value" TO "number_value";
+ALTER TABLE "event_data" RENAME COLUMN "event_string_value" TO "string_value";
+ALTER TABLE "event_data" RENAME COLUMN "event_data_type" TO "data_type";
+
+-- add job_id
+ALTER TABLE "website_event" ADD COLUMN "job_id" UUID AFTER "created_at";
+ALTER TABLE "event_data" ADD COLUMN "job_id" UUID AFTER "created_at";
+
+-- update event_data string
+alter table umami.event_data
+update string_value = number_value
+where data_type = 2
+
+alter table umami.event_data
+update string_value = replaceOne(concat(CAST(toDateTime(date_value, 'UTC'), 'String'),'Z'), ' ', 'T')
+where data_type = 4 \ No newline at end of file
diff --git a/db/clickhouse/migrations/02_add_visit_id.sql b/db/clickhouse/migrations/02_add_visit_id.sql
new file mode 100644
index 0000000..202c0fd
--- /dev/null
+++ b/db/clickhouse/migrations/02_add_visit_id.sql
@@ -0,0 +1,90 @@
+CREATE TABLE umami.website_event_join
+(
+ session_id UUID,
+ visit_id UUID,
+ created_at DateTime('UTC')
+)
+ engine = MergeTree
+ ORDER BY (session_id, created_at)
+ SETTINGS index_granularity = 8192;
+
+INSERT INTO umami.website_event_join
+SELECT DISTINCT
+ s.session_id,
+ generateUUIDv4() visit_id,
+ s.created_at
+FROM (SELECT DISTINCT session_id,
+ date_trunc('hour', created_at) created_at
+ FROM website_event) s;
+
+-- create new table
+CREATE TABLE umami.website_event_new
+(
+ website_id UUID,
+ session_id UUID,
+ visit_id UUID,
+ event_id UUID,
+ hostname LowCardinality(String),
+ browser LowCardinality(String),
+ os LowCardinality(String),
+ device LowCardinality(String),
+ screen LowCardinality(String),
+ language LowCardinality(String),
+ country LowCardinality(String),
+ subdivision1 LowCardinality(String),
+ subdivision2 LowCardinality(String),
+ city String,
+ url_path String,
+ url_query String,
+ referrer_path String,
+ referrer_query String,
+ referrer_domain String,
+ page_title String,
+ event_type UInt32,
+ event_name String,
+ created_at DateTime('UTC'),
+ job_id UUID
+)
+ engine = MergeTree
+ ORDER BY (website_id, session_id, created_at)
+ SETTINGS index_granularity = 8192;
+
+INSERT INTO umami.website_event_new
+SELECT we.website_id,
+ we.session_id,
+ j.visit_id,
+ we.event_id,
+ we.hostname,
+ we.browser,
+ we.os,
+ we.device,
+ we.screen,
+ we.language,
+ we.country,
+ we.subdivision1,
+ we.subdivision2,
+ we.city,
+ we.url_path,
+ we.url_query,
+ we.referrer_path,
+ we.referrer_query,
+ we.referrer_domain,
+ we.page_title,
+ we.event_type,
+ we.event_name,
+ we.created_at,
+ we.job_id
+FROM umami.website_event we
+JOIN umami.website_event_join j
+ ON we.session_id = j.session_id
+ and date_trunc('hour', we.created_at) = j.created_at
+
+RENAME TABLE umami.website_event TO umami.website_event_old;
+RENAME TABLE umami.website_event_new TO umami.website_event;
+
+/*
+
+ DROP TABLE umami.website_event_old
+ DROP TABLE umami.website_event_join
+
+ */ \ No newline at end of file
diff --git a/db/clickhouse/migrations/03_session_data.sql b/db/clickhouse/migrations/03_session_data.sql
new file mode 100644
index 0000000..1ed2c06
--- /dev/null
+++ b/db/clickhouse/migrations/03_session_data.sql
@@ -0,0 +1,57 @@
+CREATE TABLE umami.event_data_new
+(
+ website_id UUID,
+ session_id UUID,
+ event_id UUID,
+ url_path String,
+ event_name String,
+ data_key String,
+ string_value Nullable(String),
+ number_value Nullable(Decimal64(4)),
+ date_value Nullable(DateTime('UTC')),
+ data_type UInt32,
+ created_at DateTime('UTC'),
+ job_id Nullable(UUID)
+)
+ engine = MergeTree
+ ORDER BY (website_id, event_id, data_key, created_at)
+ SETTINGS index_granularity = 8192;
+
+INSERT INTO umami.event_data_new
+SELECT website_id,
+ session_id,
+ event_id,
+ url_path,
+ event_name,
+ event_key,
+ string_value,
+ number_value,
+ date_value,
+ data_type,
+ created_at,
+ NULL
+FROM umami.event_data;
+
+CREATE TABLE umami.session_data
+(
+ website_id UUID,
+ session_id UUID,
+ data_key String,
+ string_value Nullable(String),
+ number_value Nullable(Decimal64(4)),
+ date_value Nullable(DateTime('UTC')),
+ data_type UInt32,
+ created_at DateTime('UTC'),
+ job_id Nullable(UUID)
+)
+ engine = MergeTree
+ ORDER BY (website_id, session_id, data_key, created_at)
+ SETTINGS index_granularity = 8192;
+
+RENAME TABLE umami.event_data TO umami.event_data_old;
+RENAME TABLE umami.event_data_new TO umami.event_data;
+
+/*
+DROP TABLE umami.event_data_old
+ */
+
diff --git a/db/clickhouse/migrations/04_add_tag.sql b/db/clickhouse/migrations/04_add_tag.sql
new file mode 100644
index 0000000..7ffc499
--- /dev/null
+++ b/db/clickhouse/migrations/04_add_tag.sql
@@ -0,0 +1,77 @@
+-- add tag column
+ALTER TABLE umami.website_event ADD COLUMN "tag" String AFTER "event_name";
+ALTER TABLE umami.website_event_stats_hourly ADD COLUMN "tag" SimpleAggregateFunction(groupArrayArray, Array(String)) AFTER "max_time";
+
+-- update materialized view
+DROP TABLE umami.website_event_stats_hourly_mv;
+
+CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv
+TO umami.website_event_stats_hourly
+AS
+SELECT
+ website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ subdivision1,
+ city,
+ entry_url,
+ exit_url,
+ url_paths as url_path,
+ url_query,
+ referrer_domain,
+ page_title,
+ event_type,
+ event_name,
+ views,
+ min_time,
+ max_time,
+ tag,
+ timestamp as created_at
+FROM (SELECT
+ website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ subdivision1,
+ city,
+ argMinState(url_path, created_at) entry_url,
+ argMaxState(url_path, created_at) exit_url,
+ arrayFilter(x -> x != '', groupArray(url_path)) as url_paths,
+ arrayFilter(x -> x != '', groupArray(url_query)) url_query,
+ arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain,
+ arrayFilter(x -> x != '', groupArray(page_title)) page_title,
+ event_type,
+ if(event_type = 2, groupArray(event_name), []) event_name,
+ sumIf(1, event_type = 1) views,
+ min(created_at) min_time,
+ max(created_at) max_time,
+ arrayFilter(x -> x != '', groupArray(tag)) tag,
+ toStartOfHour(created_at) timestamp
+FROM umami.website_event
+GROUP BY website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ subdivision1,
+ city,
+ event_type,
+ timestamp); \ No newline at end of file
diff --git a/db/clickhouse/migrations/05_add_utm_clid.sql b/db/clickhouse/migrations/05_add_utm_clid.sql
new file mode 100644
index 0000000..30d9671
--- /dev/null
+++ b/db/clickhouse/migrations/05_add_utm_clid.sql
@@ -0,0 +1,332 @@
+-- Create Event
+CREATE TABLE umami.website_event_new
+(
+ website_id UUID,
+ session_id UUID,
+ visit_id UUID,
+ event_id UUID,
+ --sessions
+ hostname LowCardinality(String),
+ browser LowCardinality(String),
+ os LowCardinality(String),
+ device LowCardinality(String),
+ screen LowCardinality(String),
+ language LowCardinality(String),
+ country LowCardinality(String),
+ subdivision1 LowCardinality(String),
+ subdivision2 LowCardinality(String),
+ city String,
+ --pageviews
+ url_path String,
+ url_query String,
+ utm_source String,
+ utm_medium String,
+ utm_campaign String,
+ utm_content String,
+ utm_term String,
+ referrer_path String,
+ referrer_query String,
+ referrer_domain String,
+ page_title String,
+ --clickIDs
+ gclid String,
+ fbclid String,
+ msclkid String,
+ ttclid String,
+ li_fat_id String,
+ twclid String,
+ --events
+ event_type UInt32,
+ event_name String,
+ tag String,
+ created_at DateTime('UTC'),
+ job_id Nullable(UUID)
+)
+ENGINE = MergeTree
+ PARTITION BY toYYYYMM(created_at)
+ ORDER BY (toStartOfHour(created_at), website_id, session_id, visit_id, created_at)
+ PRIMARY KEY (toStartOfHour(created_at), website_id, session_id, visit_id)
+ SETTINGS index_granularity = 8192;
+
+-- stats hourly
+CREATE TABLE umami.website_event_stats_hourly_new
+(
+ website_id UUID,
+ session_id UUID,
+ visit_id UUID,
+ hostname LowCardinality(String),
+ browser LowCardinality(String),
+ os LowCardinality(String),
+ device LowCardinality(String),
+ screen LowCardinality(String),
+ language LowCardinality(String),
+ country LowCardinality(String),
+ subdivision1 LowCardinality(String),
+ city String,
+ entry_url AggregateFunction(argMin, String, DateTime('UTC')),
+ exit_url AggregateFunction(argMax, String, DateTime('UTC')),
+ url_path SimpleAggregateFunction(groupArrayArray, Array(String)),
+ url_query SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_source SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_medium SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_campaign SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_content SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_term SimpleAggregateFunction(groupArrayArray, Array(String)),
+ referrer_domain SimpleAggregateFunction(groupArrayArray, Array(String)),
+ page_title SimpleAggregateFunction(groupArrayArray, Array(String)),
+ gclid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ fbclid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ msclkid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ ttclid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ li_fat_id SimpleAggregateFunction(groupArrayArray, Array(String)),
+ twclid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ event_type UInt32,
+ event_name SimpleAggregateFunction(groupArrayArray, Array(String)),
+ views SimpleAggregateFunction(sum, UInt64),
+ min_time SimpleAggregateFunction(min, DateTime('UTC')),
+ max_time SimpleAggregateFunction(max, DateTime('UTC')),
+ tag SimpleAggregateFunction(groupArrayArray, Array(String)),
+ created_at Datetime('UTC')
+)
+ENGINE = AggregatingMergeTree
+ PARTITION BY toYYYYMM(created_at)
+ ORDER BY (
+ website_id,
+ event_type,
+ toStartOfHour(created_at),
+ cityHash64(visit_id),
+ visit_id
+ )
+ SAMPLE BY cityHash64(visit_id);
+
+CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv_new
+TO umami.website_event_stats_hourly_new
+AS
+SELECT
+ website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ subdivision1,
+ city,
+ entry_url,
+ exit_url,
+ url_paths as url_path,
+ url_query,
+ utm_source,
+ utm_medium,
+ utm_campaign,
+ utm_content,
+ utm_term,
+ referrer_domain,
+ page_title,
+ gclid,
+ fbclid,
+ msclkid,
+ ttclid,
+ li_fat_id,
+ twclid,
+ event_type,
+ event_name,
+ views,
+ min_time,
+ max_time,
+ tag,
+ timestamp as created_at
+FROM (SELECT
+ website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ subdivision1,
+ city,
+ argMinState(url_path, created_at) entry_url,
+ argMaxState(url_path, created_at) exit_url,
+ arrayFilter(x -> x != '', groupArray(url_path)) as url_paths,
+ arrayFilter(x -> x != '', groupArray(url_query)) url_query,
+ arrayFilter(x -> x != '', groupArray(utm_source)) utm_source,
+ arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium,
+ arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign,
+ arrayFilter(x -> x != '', groupArray(utm_content)) utm_content,
+ arrayFilter(x -> x != '', groupArray(utm_term)) utm_term,
+ arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain,
+ arrayFilter(x -> x != '', groupArray(page_title)) page_title,
+ arrayFilter(x -> x != '', groupArray(gclid)) gclid,
+ arrayFilter(x -> x != '', groupArray(fbclid)) fbclid,
+ arrayFilter(x -> x != '', groupArray(msclkid)) msclkid,
+ arrayFilter(x -> x != '', groupArray(ttclid)) ttclid,
+ arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id,
+ arrayFilter(x -> x != '', groupArray(twclid)) twclid,
+ event_type,
+ if(event_type = 2, groupArray(event_name), []) event_name,
+ sumIf(1, event_type = 1) views,
+ min(created_at) min_time,
+ max(created_at) max_time,
+ arrayFilter(x -> x != '', groupArray(tag)) tag,
+ toStartOfHour(created_at) timestamp
+FROM umami.website_event_new
+GROUP BY website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ subdivision1,
+ city,
+ event_type,
+ timestamp);
+
+-- projections
+ALTER TABLE umami.website_event_new
+ADD PROJECTION website_event_url_path_projection (
+SELECT * ORDER BY toStartOfDay(created_at), website_id, url_path, created_at
+);
+
+ALTER TABLE umami.website_event_new MATERIALIZE PROJECTION website_event_url_path_projection;
+
+ALTER TABLE umami.website_event_new
+ADD PROJECTION website_event_referrer_domain_projection (
+SELECT * ORDER BY toStartOfDay(created_at), website_id, referrer_domain, created_at
+);
+
+ALTER TABLE umami.website_event_new MATERIALIZE PROJECTION website_event_referrer_domain_projection;
+
+-- migration
+INSERT INTO umami.website_event_new
+SELECT website_id, session_id, visit_id, event_id, hostname, browser, os, device, screen, language, country, subdivision1, subdivision2, city, url_path, url_query,
+ extract(url_query, 'utm_source=([^&]*)') AS utm_source,
+ extract(url_query, 'utm_medium=([^&]*)') AS utm_medium,
+ extract(url_query, 'utm_campaign=([^&]*)') AS utm_campaign,
+ extract(url_query, 'utm_content=([^&]*)') AS utm_content,
+ extract(url_query, 'utm_term=([^&]*)') AS utm_term,referrer_path, referrer_query, referrer_domain,
+ page_title,
+ extract(url_query, 'gclid=([^&]*)') gclid,
+ extract(url_query, 'fbclid=([^&]*)') fbclid,
+ extract(url_query, 'msclkid=([^&]*)') msclkid,
+ extract(url_query, 'ttclid=([^&]*)') ttclid,
+ extract(url_query, 'li_fat_id=([^&]*)') li_fat_id,
+ extract(url_query, 'twclid=([^&]*)') twclid,
+ event_type, event_name, tag, created_at, job_id
+FROM umami.website_event
+
+-- rename tables
+RENAME TABLE umami.website_event TO umami.website_event_old;
+RENAME TABLE umami.website_event_new TO umami.website_event;
+
+RENAME TABLE umami.website_event_stats_hourly TO umami.website_event_stats_hourly_old;
+RENAME TABLE umami.website_event_stats_hourly_new TO umami.website_event_stats_hourly;
+
+RENAME TABLE umami.website_event_stats_hourly_mv TO umami.website_event_stats_hourly_mv_old;
+RENAME TABLE umami.website_event_stats_hourly_mv_new TO umami.website_event_stats_hourly_mv;
+
+-- recreate view
+DROP TABLE umami.website_event_stats_hourly_mv;
+
+CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv
+TO umami.website_event_stats_hourly
+AS
+SELECT
+ website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ subdivision1,
+ city,
+ entry_url,
+ exit_url,
+ url_paths as url_path,
+ url_query,
+ utm_source,
+ utm_medium,
+ utm_campaign,
+ utm_content,
+ utm_term,
+ referrer_domain,
+ page_title,
+ gclid,
+ fbclid,
+ msclkid,
+ ttclid,
+ li_fat_id,
+ twclid,
+ event_type,
+ event_name,
+ views,
+ min_time,
+ max_time,
+ tag,
+ timestamp as created_at
+FROM (SELECT
+ website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ subdivision1,
+ city,
+ argMinState(url_path, created_at) entry_url,
+ argMaxState(url_path, created_at) exit_url,
+ arrayFilter(x -> x != '', groupArray(url_path)) as url_paths,
+ arrayFilter(x -> x != '', groupArray(url_query)) url_query,
+ arrayFilter(x -> x != '', groupArray(utm_source)) utm_source,
+ arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium,
+ arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign,
+ arrayFilter(x -> x != '', groupArray(utm_content)) utm_content,
+ arrayFilter(x -> x != '', groupArray(utm_term)) utm_term,
+ arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain,
+ arrayFilter(x -> x != '', groupArray(page_title)) page_title,
+ arrayFilter(x -> x != '', groupArray(gclid)) gclid,
+ arrayFilter(x -> x != '', groupArray(fbclid)) fbclid,
+ arrayFilter(x -> x != '', groupArray(msclkid)) msclkid,
+ arrayFilter(x -> x != '', groupArray(ttclid)) ttclid,
+ arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id,
+ arrayFilter(x -> x != '', groupArray(twclid)) twclid,
+ event_type,
+ if(event_type = 2, groupArray(event_name), []) event_name,
+ sumIf(1, event_type = 1) views,
+ min(created_at) min_time,
+ max(created_at) max_time,
+ arrayFilter(x -> x != '', groupArray(tag)) tag,
+ toStartOfHour(created_at) timestamp
+FROM umami.website_event
+GROUP BY website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ subdivision1,
+ city,
+ event_type,
+ timestamp); \ No newline at end of file
diff --git a/db/clickhouse/migrations/06_update_subdivision.sql b/db/clickhouse/migrations/06_update_subdivision.sql
new file mode 100644
index 0000000..70f4ba4
--- /dev/null
+++ b/db/clickhouse/migrations/06_update_subdivision.sql
@@ -0,0 +1,122 @@
+-- drop projections
+ALTER TABLE umami.website_event DROP PROJECTION website_event_url_path_projection;
+ALTER TABLE umami.website_event DROP PROJECTION website_event_referrer_domain_projection;
+
+--drop view
+DROP TABLE umami.website_event_stats_hourly_mv;
+
+-- rename columns
+ALTER TABLE umami.website_event RENAME COLUMN "subdivision1" TO "region";
+ALTER TABLE umami.website_event_stats_hourly RENAME COLUMN "subdivision1" TO "region";
+
+-- drop columns
+ALTER TABLE umami.website_event DROP COLUMN "subdivision2";
+
+-- recreate projections
+ALTER TABLE umami.website_event
+ADD PROJECTION website_event_url_path_projection (
+SELECT * ORDER BY toStartOfDay(created_at), website_id, url_path, created_at
+);
+
+ALTER TABLE umami.website_event MATERIALIZE PROJECTION website_event_url_path_projection;
+
+ALTER TABLE umami.website_event
+ADD PROJECTION website_event_referrer_domain_projection (
+SELECT * ORDER BY toStartOfDay(created_at), website_id, referrer_domain, created_at
+);
+
+ALTER TABLE umami.website_event MATERIALIZE PROJECTION website_event_referrer_domain_projection;
+
+-- recreate view
+CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv
+TO umami.website_event_stats_hourly
+AS
+SELECT
+ website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ entry_url,
+ exit_url,
+ url_paths as url_path,
+ url_query,
+ utm_source,
+ utm_medium,
+ utm_campaign,
+ utm_content,
+ utm_term,
+ referrer_domain,
+ page_title,
+ gclid,
+ fbclid,
+ msclkid,
+ ttclid,
+ li_fat_id,
+ twclid,
+ event_type,
+ event_name,
+ views,
+ min_time,
+ max_time,
+ tag,
+ timestamp as created_at
+FROM (SELECT
+ website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ argMinState(url_path, created_at) entry_url,
+ argMaxState(url_path, created_at) exit_url,
+ arrayFilter(x -> x != '', groupArray(url_path)) as url_paths,
+ arrayFilter(x -> x != '', groupArray(url_query)) url_query,
+ arrayFilter(x -> x != '', groupArray(utm_source)) utm_source,
+ arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium,
+ arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign,
+ arrayFilter(x -> x != '', groupArray(utm_content)) utm_content,
+ arrayFilter(x -> x != '', groupArray(utm_term)) utm_term,
+ arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain,
+ arrayFilter(x -> x != '', groupArray(page_title)) page_title,
+ arrayFilter(x -> x != '', groupArray(gclid)) gclid,
+ arrayFilter(x -> x != '', groupArray(fbclid)) fbclid,
+ arrayFilter(x -> x != '', groupArray(msclkid)) msclkid,
+ arrayFilter(x -> x != '', groupArray(ttclid)) ttclid,
+ arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id,
+ arrayFilter(x -> x != '', groupArray(twclid)) twclid,
+ event_type,
+ if(event_type = 2, groupArray(event_name), []) event_name,
+ sumIf(1, event_type = 1) views,
+ min(created_at) min_time,
+ max(created_at) max_time,
+ arrayFilter(x -> x != '', groupArray(tag)) tag,
+ toStartOfHour(created_at) timestamp
+FROM umami.website_event
+GROUP BY website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ event_type,
+ timestamp); \ No newline at end of file
diff --git a/db/clickhouse/migrations/07_add_distinct_id.sql b/db/clickhouse/migrations/07_add_distinct_id.sql
new file mode 100644
index 0000000..e235694
--- /dev/null
+++ b/db/clickhouse/migrations/07_add_distinct_id.sql
@@ -0,0 +1,103 @@
+-- add tag column
+ALTER TABLE umami.website_event ADD COLUMN "distinct_id" String AFTER "tag";
+ALTER TABLE umami.website_event_stats_hourly ADD COLUMN "distinct_id" String AFTER "tag";
+ALTER TABLE umami.session_data ADD COLUMN "distinct_id" String AFTER "data_type";
+
+-- update materialized view
+DROP TABLE umami.website_event_stats_hourly_mv;
+
+CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv
+TO umami.website_event_stats_hourly
+AS
+SELECT
+ website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ entry_url,
+ exit_url,
+ url_paths as url_path,
+ url_query,
+ utm_source,
+ utm_medium,
+ utm_campaign,
+ utm_content,
+ utm_term,
+ referrer_domain,
+ page_title,
+ gclid,
+ fbclid,
+ msclkid,
+ ttclid,
+ li_fat_id,
+ twclid,
+ event_type,
+ event_name,
+ views,
+ min_time,
+ max_time,
+ tag,
+ distinct_id,
+ timestamp as created_at
+FROM (SELECT
+ website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ argMinState(url_path, created_at) entry_url,
+ argMaxState(url_path, created_at) exit_url,
+ arrayFilter(x -> x != '', groupArray(url_path)) as url_paths,
+ arrayFilter(x -> x != '', groupArray(url_query)) url_query,
+ arrayFilter(x -> x != '', groupArray(utm_source)) utm_source,
+ arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium,
+ arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign,
+ arrayFilter(x -> x != '', groupArray(utm_content)) utm_content,
+ arrayFilter(x -> x != '', groupArray(utm_term)) utm_term,
+ arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain,
+ arrayFilter(x -> x != '', groupArray(page_title)) page_title,
+ arrayFilter(x -> x != '', groupArray(gclid)) gclid,
+ arrayFilter(x -> x != '', groupArray(fbclid)) fbclid,
+ arrayFilter(x -> x != '', groupArray(msclkid)) msclkid,
+ arrayFilter(x -> x != '', groupArray(ttclid)) ttclid,
+ arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id,
+ arrayFilter(x -> x != '', groupArray(twclid)) twclid,
+ event_type,
+ if(event_type = 2, groupArray(event_name), []) event_name,
+ sumIf(1, event_type = 1) views,
+ min(created_at) min_time,
+ max(created_at) max_time,
+ arrayFilter(x -> x != '', groupArray(tag)) tag,
+ distinct_id,
+ toStartOfHour(created_at) timestamp
+FROM umami.website_event
+GROUP BY website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ event_type,
+ distinct_id,
+ timestamp); \ No newline at end of file
diff --git a/db/clickhouse/migrations/08_update_hostname_view.sql b/db/clickhouse/migrations/08_update_hostname_view.sql
new file mode 100644
index 0000000..061fa62
--- /dev/null
+++ b/db/clickhouse/migrations/08_update_hostname_view.sql
@@ -0,0 +1,253 @@
+-- create new hourly table
+CREATE TABLE umami.website_event_stats_hourly_new
+(
+ website_id UUID,
+ session_id UUID,
+ visit_id UUID,
+ hostname SimpleAggregateFunction(groupArrayArray, Array(String)),
+ browser LowCardinality(String),
+ os LowCardinality(String),
+ device LowCardinality(String),
+ screen LowCardinality(String),
+ language LowCardinality(String),
+ country LowCardinality(String),
+ region LowCardinality(String),
+ city String,
+ entry_url AggregateFunction(argMin, String, DateTime('UTC')),
+ exit_url AggregateFunction(argMax, String, DateTime('UTC')),
+ url_path SimpleAggregateFunction(groupArrayArray, Array(String)),
+ url_query SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_source SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_medium SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_campaign SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_content SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_term SimpleAggregateFunction(groupArrayArray, Array(String)),
+ referrer_domain SimpleAggregateFunction(groupArrayArray, Array(String)),
+ page_title SimpleAggregateFunction(groupArrayArray, Array(String)),
+ gclid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ fbclid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ msclkid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ ttclid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ li_fat_id SimpleAggregateFunction(groupArrayArray, Array(String)),
+ twclid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ event_type UInt32,
+ event_name SimpleAggregateFunction(groupArrayArray, Array(String)),
+ views SimpleAggregateFunction(sum, UInt64),
+ min_time SimpleAggregateFunction(min, DateTime('UTC')),
+ max_time SimpleAggregateFunction(max, DateTime('UTC')),
+ tag SimpleAggregateFunction(groupArrayArray, Array(String)),
+ distinct_id String,
+ created_at Datetime('UTC')
+)
+ENGINE = AggregatingMergeTree
+ PARTITION BY toYYYYMM(created_at)
+ ORDER BY (
+ website_id,
+ event_type,
+ toStartOfHour(created_at),
+ cityHash64(visit_id),
+ visit_id
+ )
+ SAMPLE BY cityHash64(visit_id);
+
+-- create view
+CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv_new
+TO umami.website_event_stats_hourly_new
+AS
+SELECT
+ website_id,
+ session_id,
+ visit_id,
+ hostnames as hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ entry_url,
+ exit_url,
+ url_paths as url_path,
+ url_query,
+ utm_source,
+ utm_medium,
+ utm_campaign,
+ utm_content,
+ utm_term,
+ referrer_domain,
+ page_title,
+ gclid,
+ fbclid,
+ msclkid,
+ ttclid,
+ li_fat_id,
+ twclid,
+ event_type,
+ event_name,
+ views,
+ min_time,
+ max_time,
+ tag,
+ distinct_id,
+ timestamp as created_at
+FROM (SELECT
+ website_id,
+ session_id,
+ visit_id,
+ arrayFilter(x -> x != '', groupArray(hostname)) hostnames,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ argMinState(url_path, created_at) entry_url,
+ argMaxState(url_path, created_at) exit_url,
+ arrayFilter(x -> x != '', groupArray(url_path)) as url_paths,
+ arrayFilter(x -> x != '', groupArray(url_query)) url_query,
+ arrayFilter(x -> x != '', groupArray(utm_source)) utm_source,
+ arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium,
+ arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign,
+ arrayFilter(x -> x != '', groupArray(utm_content)) utm_content,
+ arrayFilter(x -> x != '', groupArray(utm_term)) utm_term,
+ arrayFilter(x -> x != '' and x != hostname, groupArray(referrer_domain)) referrer_domain,
+ arrayFilter(x -> x != '', groupArray(page_title)) page_title,
+ arrayFilter(x -> x != '', groupArray(gclid)) gclid,
+ arrayFilter(x -> x != '', groupArray(fbclid)) fbclid,
+ arrayFilter(x -> x != '', groupArray(msclkid)) msclkid,
+ arrayFilter(x -> x != '', groupArray(ttclid)) ttclid,
+ arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id,
+ arrayFilter(x -> x != '', groupArray(twclid)) twclid,
+ event_type,
+ if(event_type = 2, groupArray(event_name), []) event_name,
+ sumIf(1, event_type = 1) views,
+ min(created_at) min_time,
+ max(created_at) max_time,
+ arrayFilter(x -> x != '', groupArray(tag)) tag,
+ distinct_id,
+ toStartOfHour(created_at) timestamp
+FROM umami.website_event
+GROUP BY website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ event_type,
+ distinct_id,
+ timestamp);
+
+-- rename tables
+RENAME TABLE umami.website_event_stats_hourly TO umami.website_event_stats_hourly_old;
+RENAME TABLE umami.website_event_stats_hourly_new TO umami.website_event_stats_hourly;
+
+-- drop views
+DROP TABLE umami.website_event_stats_hourly_mv;
+DROP TABLE umami.website_event_stats_hourly_mv_new;
+
+-- recreate view
+CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv
+TO umami.website_event_stats_hourly
+AS
+SELECT
+ website_id,
+ session_id,
+ visit_id,
+ hostnames as hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ entry_url,
+ exit_url,
+ url_paths as url_path,
+ url_query,
+ utm_source,
+ utm_medium,
+ utm_campaign,
+ utm_content,
+ utm_term,
+ referrer_domain,
+ page_title,
+ gclid,
+ fbclid,
+ msclkid,
+ ttclid,
+ li_fat_id,
+ twclid,
+ event_type,
+ event_name,
+ views,
+ min_time,
+ max_time,
+ tag,
+ distinct_id,
+ timestamp as created_at
+FROM (SELECT
+ website_id,
+ session_id,
+ visit_id,
+ arrayFilter(x -> x != '', groupArray(hostname)) hostnames,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ argMinState(url_path, created_at) entry_url,
+ argMaxState(url_path, created_at) exit_url,
+ arrayFilter(x -> x != '', groupArray(url_path)) as url_paths,
+ arrayFilter(x -> x != '', groupArray(url_query)) url_query,
+ arrayFilter(x -> x != '', groupArray(utm_source)) utm_source,
+ arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium,
+ arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign,
+ arrayFilter(x -> x != '', groupArray(utm_content)) utm_content,
+ arrayFilter(x -> x != '', groupArray(utm_term)) utm_term,
+ arrayFilter(x -> x != '' and x != hostname, groupArray(referrer_domain)) referrer_domain,
+ arrayFilter(x -> x != '', groupArray(page_title)) page_title,
+ arrayFilter(x -> x != '', groupArray(gclid)) gclid,
+ arrayFilter(x -> x != '', groupArray(fbclid)) fbclid,
+ arrayFilter(x -> x != '', groupArray(msclkid)) msclkid,
+ arrayFilter(x -> x != '', groupArray(ttclid)) ttclid,
+ arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id,
+ arrayFilter(x -> x != '', groupArray(twclid)) twclid,
+ event_type,
+ if(event_type = 2, groupArray(event_name), []) event_name,
+ sumIf(1, event_type = 1) views,
+ min(created_at) min_time,
+ max(created_at) max_time,
+ arrayFilter(x -> x != '', groupArray(tag)) tag,
+ distinct_id,
+ toStartOfHour(created_at) timestamp
+FROM umami.website_event
+GROUP BY website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ event_type,
+ distinct_id,
+ timestamp);
diff --git a/db/clickhouse/schema.sql b/db/clickhouse/schema.sql
new file mode 100644
index 0000000..e916032
--- /dev/null
+++ b/db/clickhouse/schema.sql
@@ -0,0 +1,283 @@
+-- Create Event
+CREATE TABLE umami.website_event
+(
+ website_id UUID,
+ session_id UUID,
+ visit_id UUID,
+ event_id UUID,
+ --sessions
+ hostname LowCardinality(String),
+ browser LowCardinality(String),
+ os LowCardinality(String),
+ device LowCardinality(String),
+ screen LowCardinality(String),
+ language LowCardinality(String),
+ country LowCardinality(String),
+ region LowCardinality(String),
+ city String,
+ --pageviews
+ url_path String,
+ url_query String,
+ utm_source String,
+ utm_medium String,
+ utm_campaign String,
+ utm_content String,
+ utm_term String,
+ referrer_path String,
+ referrer_query String,
+ referrer_domain String,
+ page_title String,
+ --clickIDs
+ gclid String,
+ fbclid String,
+ msclkid String,
+ ttclid String,
+ li_fat_id String,
+ twclid String,
+ --events
+ event_type UInt32,
+ event_name String,
+ tag String,
+ distinct_id String,
+ created_at DateTime('UTC'),
+ job_id Nullable(UUID)
+)
+ENGINE = MergeTree
+ PARTITION BY toYYYYMM(created_at)
+ ORDER BY (toStartOfHour(created_at), website_id, session_id, visit_id, created_at)
+ PRIMARY KEY (toStartOfHour(created_at), website_id, session_id, visit_id)
+ SETTINGS index_granularity = 8192;
+
+CREATE TABLE umami.event_data
+(
+ website_id UUID,
+ session_id UUID,
+ event_id UUID,
+ url_path String,
+ event_name String,
+ data_key String,
+ string_value Nullable(String),
+ number_value Nullable(Decimal(22, 4)),
+ date_value Nullable(DateTime('UTC')),
+ data_type UInt32,
+ created_at DateTime('UTC'),
+ job_id Nullable(UUID)
+)
+ENGINE = MergeTree
+ ORDER BY (website_id, event_id, data_key, created_at)
+ SETTINGS index_granularity = 8192;
+
+CREATE TABLE umami.session_data
+(
+ website_id UUID,
+ session_id UUID,
+ data_key String,
+ string_value Nullable(String),
+ number_value Nullable(Decimal(22, 4)),
+ date_value Nullable(DateTime('UTC')),
+ data_type UInt32,
+ distinct_id String,
+ created_at DateTime('UTC'),
+ job_id Nullable(UUID)
+)
+ENGINE = ReplacingMergeTree
+ ORDER BY (website_id, session_id, data_key)
+ SETTINGS index_granularity = 8192;
+
+-- stats hourly
+CREATE TABLE umami.website_event_stats_hourly
+(
+ website_id UUID,
+ session_id UUID,
+ visit_id UUID,
+ hostname SimpleAggregateFunction(groupArrayArray, Array(String)),
+ browser LowCardinality(String),
+ os LowCardinality(String),
+ device LowCardinality(String),
+ screen LowCardinality(String),
+ language LowCardinality(String),
+ country LowCardinality(String),
+ region LowCardinality(String),
+ city String,
+ entry_url AggregateFunction(argMin, String, DateTime('UTC')),
+ exit_url AggregateFunction(argMax, String, DateTime('UTC')),
+ url_path SimpleAggregateFunction(groupArrayArray, Array(String)),
+ url_query SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_source SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_medium SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_campaign SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_content SimpleAggregateFunction(groupArrayArray, Array(String)),
+ utm_term SimpleAggregateFunction(groupArrayArray, Array(String)),
+ referrer_domain SimpleAggregateFunction(groupArrayArray, Array(String)),
+ page_title SimpleAggregateFunction(groupArrayArray, Array(String)),
+ gclid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ fbclid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ msclkid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ ttclid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ li_fat_id SimpleAggregateFunction(groupArrayArray, Array(String)),
+ twclid SimpleAggregateFunction(groupArrayArray, Array(String)),
+ event_type UInt32,
+ event_name SimpleAggregateFunction(groupArrayArray, Array(String)),
+ views SimpleAggregateFunction(sum, UInt64),
+ min_time SimpleAggregateFunction(min, DateTime('UTC')),
+ max_time SimpleAggregateFunction(max, DateTime('UTC')),
+ tag SimpleAggregateFunction(groupArrayArray, Array(String)),
+ distinct_id String,
+ created_at Datetime('UTC')
+)
+ENGINE = AggregatingMergeTree
+ PARTITION BY toYYYYMM(created_at)
+ ORDER BY (
+ website_id,
+ event_type,
+ toStartOfHour(created_at),
+ cityHash64(visit_id),
+ visit_id
+ )
+ SAMPLE BY cityHash64(visit_id);
+
+CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv
+TO umami.website_event_stats_hourly
+AS
+SELECT
+ website_id,
+ session_id,
+ visit_id,
+ hostnames as hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ entry_url,
+ exit_url,
+ url_paths as url_path,
+ url_query,
+ utm_source,
+ utm_medium,
+ utm_campaign,
+ utm_content,
+ utm_term,
+ referrer_domain,
+ page_title,
+ gclid,
+ fbclid,
+ msclkid,
+ ttclid,
+ li_fat_id,
+ twclid,
+ event_type,
+ event_name,
+ views,
+ min_time,
+ max_time,
+ tag,
+ distinct_id,
+ timestamp as created_at
+FROM (SELECT
+ website_id,
+ session_id,
+ visit_id,
+ arrayFilter(x -> x != '', groupArray(hostname)) hostnames,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ argMinState(url_path, created_at) entry_url,
+ argMaxState(url_path, created_at) exit_url,
+ arrayFilter(x -> x != '', groupArray(url_path)) as url_paths,
+ arrayFilter(x -> x != '', groupArray(url_query)) url_query,
+ arrayFilter(x -> x != '', groupArray(utm_source)) utm_source,
+ arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium,
+ arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign,
+ arrayFilter(x -> x != '', groupArray(utm_content)) utm_content,
+ arrayFilter(x -> x != '', groupArray(utm_term)) utm_term,
+ arrayFilter(x -> x != '' and x != hostname, groupArray(referrer_domain)) referrer_domain,
+ arrayFilter(x -> x != '', groupArray(page_title)) page_title,
+ arrayFilter(x -> x != '', groupArray(gclid)) gclid,
+ arrayFilter(x -> x != '', groupArray(fbclid)) fbclid,
+ arrayFilter(x -> x != '', groupArray(msclkid)) msclkid,
+ arrayFilter(x -> x != '', groupArray(ttclid)) ttclid,
+ arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id,
+ arrayFilter(x -> x != '', groupArray(twclid)) twclid,
+ event_type,
+ if(event_type = 2, groupArray(event_name), []) event_name,
+ sumIf(1, event_type != 2) views,
+ min(created_at) min_time,
+ max(created_at) max_time,
+ arrayFilter(x -> x != '', groupArray(tag)) tag,
+ distinct_id,
+ toStartOfHour(created_at) timestamp
+FROM umami.website_event
+GROUP BY website_id,
+ session_id,
+ visit_id,
+ hostname,
+ browser,
+ os,
+ device,
+ screen,
+ language,
+ country,
+ region,
+ city,
+ event_type,
+ distinct_id,
+ timestamp);
+
+-- projections
+ALTER TABLE umami.website_event
+ADD PROJECTION website_event_url_path_projection (
+SELECT * ORDER BY toStartOfDay(created_at), website_id, url_path, created_at
+);
+
+ALTER TABLE umami.website_event MATERIALIZE PROJECTION website_event_url_path_projection;
+
+ALTER TABLE umami.website_event
+ADD PROJECTION website_event_referrer_domain_projection (
+SELECT * ORDER BY toStartOfDay(created_at), website_id, referrer_domain, created_at
+);
+
+ALTER TABLE umami.website_event MATERIALIZE PROJECTION website_event_referrer_domain_projection;
+
+-- revenue
+CREATE TABLE umami.website_revenue
+(
+ website_id UUID,
+ session_id UUID,
+ event_id UUID,
+ event_name String,
+ currency String,
+ revenue DECIMAL(18,4),
+ created_at DateTime('UTC')
+)
+ENGINE = MergeTree
+ PARTITION BY toYYYYMM(created_at)
+ ORDER BY (website_id, session_id, created_at)
+ SETTINGS index_granularity = 8192;
+
+
+CREATE MATERIALIZED VIEW umami.website_revenue_mv
+TO umami.website_revenue
+AS
+SELECT DISTINCT
+ ed.website_id,
+ ed.session_id,
+ ed.event_id,
+ ed.event_name,
+ c.currency,
+ coalesce(toDecimal64(ed.number_value, 2), toDecimal64(ed.string_value, 2)) revenue,
+ ed.created_at
+FROM umami.event_data ed
+JOIN (SELECT event_id, string_value as currency
+ FROM umami.event_data
+ WHERE positionCaseInsensitive(data_key, 'currency') > 0) c
+ ON c.event_id = ed.event_id
+WHERE positionCaseInsensitive(data_key, 'revenue') > 0;