From 396acf3bbbe00a192cb0ea0a9ccf91b1d8d2850b Mon Sep 17 00:00:00 2001 From: Fuwn <50817549+Fuwn@users.noreply.github.com> Date: Sat, 24 Jan 2026 13:09:50 +0000 Subject: Initial commit Created from https://vercel.com/new --- db/clickhouse/migrations/01_edit_keys.sql | 18 ++ db/clickhouse/migrations/02_add_visit_id.sql | 90 ++++++ db/clickhouse/migrations/03_session_data.sql | 57 ++++ db/clickhouse/migrations/04_add_tag.sql | 77 +++++ db/clickhouse/migrations/05_add_utm_clid.sql | 332 +++++++++++++++++++++ db/clickhouse/migrations/06_update_subdivision.sql | 122 ++++++++ db/clickhouse/migrations/07_add_distinct_id.sql | 103 +++++++ .../migrations/08_update_hostname_view.sql | 253 ++++++++++++++++ db/clickhouse/schema.sql | 283 ++++++++++++++++++ .../data-migrations/convert-utm-clid-columns.sql | 49 +++ .../data-migrations/populate-revenue-table.sql | 41 +++ 11 files changed, 1425 insertions(+) create mode 100644 db/clickhouse/migrations/01_edit_keys.sql create mode 100644 db/clickhouse/migrations/02_add_visit_id.sql create mode 100644 db/clickhouse/migrations/03_session_data.sql create mode 100644 db/clickhouse/migrations/04_add_tag.sql create mode 100644 db/clickhouse/migrations/05_add_utm_clid.sql create mode 100644 db/clickhouse/migrations/06_update_subdivision.sql create mode 100644 db/clickhouse/migrations/07_add_distinct_id.sql create mode 100644 db/clickhouse/migrations/08_update_hostname_view.sql create mode 100644 db/clickhouse/schema.sql create mode 100644 db/postgresql/data-migrations/convert-utm-clid-columns.sql create mode 100644 db/postgresql/data-migrations/populate-revenue-table.sql (limited to 'db') diff --git a/db/clickhouse/migrations/01_edit_keys.sql b/db/clickhouse/migrations/01_edit_keys.sql new file mode 100644 index 0000000..3fc7dd7 --- /dev/null +++ b/db/clickhouse/migrations/01_edit_keys.sql @@ -0,0 +1,18 @@ +-- edit event_data values +ALTER TABLE "event_data" RENAME COLUMN "event_date_value" TO "date_value"; +ALTER TABLE "event_data" RENAME COLUMN "event_numeric_value" TO "number_value"; +ALTER TABLE "event_data" RENAME COLUMN "event_string_value" TO "string_value"; +ALTER TABLE "event_data" RENAME COLUMN "event_data_type" TO "data_type"; + +-- add job_id +ALTER TABLE "website_event" ADD COLUMN "job_id" UUID AFTER "created_at"; +ALTER TABLE "event_data" ADD COLUMN "job_id" UUID AFTER "created_at"; + +-- update event_data string +alter table umami.event_data +update string_value = number_value +where data_type = 2 + +alter table umami.event_data +update string_value = replaceOne(concat(CAST(toDateTime(date_value, 'UTC'), 'String'),'Z'), ' ', 'T') +where data_type = 4 \ No newline at end of file diff --git a/db/clickhouse/migrations/02_add_visit_id.sql b/db/clickhouse/migrations/02_add_visit_id.sql new file mode 100644 index 0000000..202c0fd --- /dev/null +++ b/db/clickhouse/migrations/02_add_visit_id.sql @@ -0,0 +1,90 @@ +CREATE TABLE umami.website_event_join +( + session_id UUID, + visit_id UUID, + created_at DateTime('UTC') +) + engine = MergeTree + ORDER BY (session_id, created_at) + SETTINGS index_granularity = 8192; + +INSERT INTO umami.website_event_join +SELECT DISTINCT + s.session_id, + generateUUIDv4() visit_id, + s.created_at +FROM (SELECT DISTINCT session_id, + date_trunc('hour', created_at) created_at + FROM website_event) s; + +-- create new table +CREATE TABLE umami.website_event_new +( + website_id UUID, + session_id UUID, + visit_id UUID, + event_id UUID, + hostname LowCardinality(String), + browser LowCardinality(String), + os LowCardinality(String), + device LowCardinality(String), + screen LowCardinality(String), + language LowCardinality(String), + country LowCardinality(String), + subdivision1 LowCardinality(String), + subdivision2 LowCardinality(String), + city String, + url_path String, + url_query String, + referrer_path String, + referrer_query String, + referrer_domain String, + page_title String, + event_type UInt32, + event_name String, + created_at DateTime('UTC'), + job_id UUID +) + engine = MergeTree + ORDER BY (website_id, session_id, created_at) + SETTINGS index_granularity = 8192; + +INSERT INTO umami.website_event_new +SELECT we.website_id, + we.session_id, + j.visit_id, + we.event_id, + we.hostname, + we.browser, + we.os, + we.device, + we.screen, + we.language, + we.country, + we.subdivision1, + we.subdivision2, + we.city, + we.url_path, + we.url_query, + we.referrer_path, + we.referrer_query, + we.referrer_domain, + we.page_title, + we.event_type, + we.event_name, + we.created_at, + we.job_id +FROM umami.website_event we +JOIN umami.website_event_join j + ON we.session_id = j.session_id + and date_trunc('hour', we.created_at) = j.created_at + +RENAME TABLE umami.website_event TO umami.website_event_old; +RENAME TABLE umami.website_event_new TO umami.website_event; + +/* + + DROP TABLE umami.website_event_old + DROP TABLE umami.website_event_join + + */ \ No newline at end of file diff --git a/db/clickhouse/migrations/03_session_data.sql b/db/clickhouse/migrations/03_session_data.sql new file mode 100644 index 0000000..1ed2c06 --- /dev/null +++ b/db/clickhouse/migrations/03_session_data.sql @@ -0,0 +1,57 @@ +CREATE TABLE umami.event_data_new +( + website_id UUID, + session_id UUID, + event_id UUID, + url_path String, + event_name String, + data_key String, + string_value Nullable(String), + number_value Nullable(Decimal64(4)), + date_value Nullable(DateTime('UTC')), + data_type UInt32, + created_at DateTime('UTC'), + job_id Nullable(UUID) +) + engine = MergeTree + ORDER BY (website_id, event_id, data_key, created_at) + SETTINGS index_granularity = 8192; + +INSERT INTO umami.event_data_new +SELECT website_id, + session_id, + event_id, + url_path, + event_name, + event_key, + string_value, + number_value, + date_value, + data_type, + created_at, + NULL +FROM umami.event_data; + +CREATE TABLE umami.session_data +( + website_id UUID, + session_id UUID, + data_key String, + string_value Nullable(String), + number_value Nullable(Decimal64(4)), + date_value Nullable(DateTime('UTC')), + data_type UInt32, + created_at DateTime('UTC'), + job_id Nullable(UUID) +) + engine = MergeTree + ORDER BY (website_id, session_id, data_key, created_at) + SETTINGS index_granularity = 8192; + +RENAME TABLE umami.event_data TO umami.event_data_old; +RENAME TABLE umami.event_data_new TO umami.event_data; + +/* +DROP TABLE umami.event_data_old + */ + diff --git a/db/clickhouse/migrations/04_add_tag.sql b/db/clickhouse/migrations/04_add_tag.sql new file mode 100644 index 0000000..7ffc499 --- /dev/null +++ b/db/clickhouse/migrations/04_add_tag.sql @@ -0,0 +1,77 @@ +-- add tag column +ALTER TABLE umami.website_event ADD COLUMN "tag" String AFTER "event_name"; +ALTER TABLE umami.website_event_stats_hourly ADD COLUMN "tag" SimpleAggregateFunction(groupArrayArray, Array(String)) AFTER "max_time"; + +-- update materialized view +DROP TABLE umami.website_event_stats_hourly_mv; + +CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv +TO umami.website_event_stats_hourly +AS +SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + subdivision1, + city, + entry_url, + exit_url, + url_paths as url_path, + url_query, + referrer_domain, + page_title, + event_type, + event_name, + views, + min_time, + max_time, + tag, + timestamp as created_at +FROM (SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + subdivision1, + city, + argMinState(url_path, created_at) entry_url, + argMaxState(url_path, created_at) exit_url, + arrayFilter(x -> x != '', groupArray(url_path)) as url_paths, + arrayFilter(x -> x != '', groupArray(url_query)) url_query, + arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain, + arrayFilter(x -> x != '', groupArray(page_title)) page_title, + event_type, + if(event_type = 2, groupArray(event_name), []) event_name, + sumIf(1, event_type = 1) views, + min(created_at) min_time, + max(created_at) max_time, + arrayFilter(x -> x != '', groupArray(tag)) tag, + toStartOfHour(created_at) timestamp +FROM umami.website_event +GROUP BY website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + subdivision1, + city, + event_type, + timestamp); \ No newline at end of file diff --git a/db/clickhouse/migrations/05_add_utm_clid.sql b/db/clickhouse/migrations/05_add_utm_clid.sql new file mode 100644 index 0000000..30d9671 --- /dev/null +++ b/db/clickhouse/migrations/05_add_utm_clid.sql @@ -0,0 +1,332 @@ +-- Create Event +CREATE TABLE umami.website_event_new +( + website_id UUID, + session_id UUID, + visit_id UUID, + event_id UUID, + --sessions + hostname LowCardinality(String), + browser LowCardinality(String), + os LowCardinality(String), + device LowCardinality(String), + screen LowCardinality(String), + language LowCardinality(String), + country LowCardinality(String), + subdivision1 LowCardinality(String), + subdivision2 LowCardinality(String), + city String, + --pageviews + url_path String, + url_query String, + utm_source String, + utm_medium String, + utm_campaign String, + utm_content String, + utm_term String, + referrer_path String, + referrer_query String, + referrer_domain String, + page_title String, + --clickIDs + gclid String, + fbclid String, + msclkid String, + ttclid String, + li_fat_id String, + twclid String, + --events + event_type UInt32, + event_name String, + tag String, + created_at DateTime('UTC'), + job_id Nullable(UUID) +) +ENGINE = MergeTree + PARTITION BY toYYYYMM(created_at) + ORDER BY (toStartOfHour(created_at), website_id, session_id, visit_id, created_at) + PRIMARY KEY (toStartOfHour(created_at), website_id, session_id, visit_id) + SETTINGS index_granularity = 8192; + +-- stats hourly +CREATE TABLE umami.website_event_stats_hourly_new +( + website_id UUID, + session_id UUID, + visit_id UUID, + hostname LowCardinality(String), + browser LowCardinality(String), + os LowCardinality(String), + device LowCardinality(String), + screen LowCardinality(String), + language LowCardinality(String), + country LowCardinality(String), + subdivision1 LowCardinality(String), + city String, + entry_url AggregateFunction(argMin, String, DateTime('UTC')), + exit_url AggregateFunction(argMax, String, DateTime('UTC')), + url_path SimpleAggregateFunction(groupArrayArray, Array(String)), + url_query SimpleAggregateFunction(groupArrayArray, Array(String)), + utm_source SimpleAggregateFunction(groupArrayArray, Array(String)), + utm_medium SimpleAggregateFunction(groupArrayArray, Array(String)), + utm_campaign SimpleAggregateFunction(groupArrayArray, Array(String)), + utm_content SimpleAggregateFunction(groupArrayArray, Array(String)), + utm_term SimpleAggregateFunction(groupArrayArray, Array(String)), + referrer_domain SimpleAggregateFunction(groupArrayArray, Array(String)), + page_title SimpleAggregateFunction(groupArrayArray, Array(String)), + gclid SimpleAggregateFunction(groupArrayArray, Array(String)), + fbclid SimpleAggregateFunction(groupArrayArray, Array(String)), + msclkid SimpleAggregateFunction(groupArrayArray, Array(String)), + ttclid SimpleAggregateFunction(groupArrayArray, Array(String)), + li_fat_id SimpleAggregateFunction(groupArrayArray, Array(String)), + twclid SimpleAggregateFunction(groupArrayArray, Array(String)), + event_type UInt32, + event_name SimpleAggregateFunction(groupArrayArray, Array(String)), + views SimpleAggregateFunction(sum, UInt64), + min_time SimpleAggregateFunction(min, DateTime('UTC')), + max_time SimpleAggregateFunction(max, DateTime('UTC')), + tag SimpleAggregateFunction(groupArrayArray, Array(String)), + created_at Datetime('UTC') +) +ENGINE = AggregatingMergeTree + PARTITION BY toYYYYMM(created_at) + ORDER BY ( + website_id, + event_type, + toStartOfHour(created_at), + cityHash64(visit_id), + visit_id + ) + SAMPLE BY cityHash64(visit_id); + +CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv_new +TO umami.website_event_stats_hourly_new +AS +SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + subdivision1, + city, + entry_url, + exit_url, + url_paths as url_path, + url_query, + utm_source, + utm_medium, + utm_campaign, + utm_content, + utm_term, + referrer_domain, + page_title, + gclid, + fbclid, + msclkid, + ttclid, + li_fat_id, + twclid, + event_type, + event_name, + views, + min_time, + max_time, + tag, + timestamp as created_at +FROM (SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + subdivision1, + city, + argMinState(url_path, created_at) entry_url, + argMaxState(url_path, created_at) exit_url, + arrayFilter(x -> x != '', groupArray(url_path)) as url_paths, + arrayFilter(x -> x != '', groupArray(url_query)) url_query, + arrayFilter(x -> x != '', groupArray(utm_source)) utm_source, + arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium, + arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign, + arrayFilter(x -> x != '', groupArray(utm_content)) utm_content, + arrayFilter(x -> x != '', groupArray(utm_term)) utm_term, + arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain, + arrayFilter(x -> x != '', groupArray(page_title)) page_title, + arrayFilter(x -> x != '', groupArray(gclid)) gclid, + arrayFilter(x -> x != '', groupArray(fbclid)) fbclid, + arrayFilter(x -> x != '', groupArray(msclkid)) msclkid, + arrayFilter(x -> x != '', groupArray(ttclid)) ttclid, + arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id, + arrayFilter(x -> x != '', groupArray(twclid)) twclid, + event_type, + if(event_type = 2, groupArray(event_name), []) event_name, + sumIf(1, event_type = 1) views, + min(created_at) min_time, + max(created_at) max_time, + arrayFilter(x -> x != '', groupArray(tag)) tag, + toStartOfHour(created_at) timestamp +FROM umami.website_event_new +GROUP BY website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + subdivision1, + city, + event_type, + timestamp); + +-- projections +ALTER TABLE umami.website_event_new +ADD PROJECTION website_event_url_path_projection ( +SELECT * ORDER BY toStartOfDay(created_at), website_id, url_path, created_at +); + +ALTER TABLE umami.website_event_new MATERIALIZE PROJECTION website_event_url_path_projection; + +ALTER TABLE umami.website_event_new +ADD PROJECTION website_event_referrer_domain_projection ( +SELECT * ORDER BY toStartOfDay(created_at), website_id, referrer_domain, created_at +); + +ALTER TABLE umami.website_event_new MATERIALIZE PROJECTION website_event_referrer_domain_projection; + +-- migration +INSERT INTO umami.website_event_new +SELECT website_id, session_id, visit_id, event_id, hostname, browser, os, device, screen, language, country, subdivision1, subdivision2, city, url_path, url_query, + extract(url_query, 'utm_source=([^&]*)') AS utm_source, + extract(url_query, 'utm_medium=([^&]*)') AS utm_medium, + extract(url_query, 'utm_campaign=([^&]*)') AS utm_campaign, + extract(url_query, 'utm_content=([^&]*)') AS utm_content, + extract(url_query, 'utm_term=([^&]*)') AS utm_term,referrer_path, referrer_query, referrer_domain, + page_title, + extract(url_query, 'gclid=([^&]*)') gclid, + extract(url_query, 'fbclid=([^&]*)') fbclid, + extract(url_query, 'msclkid=([^&]*)') msclkid, + extract(url_query, 'ttclid=([^&]*)') ttclid, + extract(url_query, 'li_fat_id=([^&]*)') li_fat_id, + extract(url_query, 'twclid=([^&]*)') twclid, + event_type, event_name, tag, created_at, job_id +FROM umami.website_event + +-- rename tables +RENAME TABLE umami.website_event TO umami.website_event_old; +RENAME TABLE umami.website_event_new TO umami.website_event; + +RENAME TABLE umami.website_event_stats_hourly TO umami.website_event_stats_hourly_old; +RENAME TABLE umami.website_event_stats_hourly_new TO umami.website_event_stats_hourly; + +RENAME TABLE umami.website_event_stats_hourly_mv TO umami.website_event_stats_hourly_mv_old; +RENAME TABLE umami.website_event_stats_hourly_mv_new TO umami.website_event_stats_hourly_mv; + +-- recreate view +DROP TABLE umami.website_event_stats_hourly_mv; + +CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv +TO umami.website_event_stats_hourly +AS +SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + subdivision1, + city, + entry_url, + exit_url, + url_paths as url_path, + url_query, + utm_source, + utm_medium, + utm_campaign, + utm_content, + utm_term, + referrer_domain, + page_title, + gclid, + fbclid, + msclkid, + ttclid, + li_fat_id, + twclid, + event_type, + event_name, + views, + min_time, + max_time, + tag, + timestamp as created_at +FROM (SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + subdivision1, + city, + argMinState(url_path, created_at) entry_url, + argMaxState(url_path, created_at) exit_url, + arrayFilter(x -> x != '', groupArray(url_path)) as url_paths, + arrayFilter(x -> x != '', groupArray(url_query)) url_query, + arrayFilter(x -> x != '', groupArray(utm_source)) utm_source, + arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium, + arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign, + arrayFilter(x -> x != '', groupArray(utm_content)) utm_content, + arrayFilter(x -> x != '', groupArray(utm_term)) utm_term, + arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain, + arrayFilter(x -> x != '', groupArray(page_title)) page_title, + arrayFilter(x -> x != '', groupArray(gclid)) gclid, + arrayFilter(x -> x != '', groupArray(fbclid)) fbclid, + arrayFilter(x -> x != '', groupArray(msclkid)) msclkid, + arrayFilter(x -> x != '', groupArray(ttclid)) ttclid, + arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id, + arrayFilter(x -> x != '', groupArray(twclid)) twclid, + event_type, + if(event_type = 2, groupArray(event_name), []) event_name, + sumIf(1, event_type = 1) views, + min(created_at) min_time, + max(created_at) max_time, + arrayFilter(x -> x != '', groupArray(tag)) tag, + toStartOfHour(created_at) timestamp +FROM umami.website_event +GROUP BY website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + subdivision1, + city, + event_type, + timestamp); \ No newline at end of file diff --git a/db/clickhouse/migrations/06_update_subdivision.sql b/db/clickhouse/migrations/06_update_subdivision.sql new file mode 100644 index 0000000..70f4ba4 --- /dev/null +++ b/db/clickhouse/migrations/06_update_subdivision.sql @@ -0,0 +1,122 @@ +-- drop projections +ALTER TABLE umami.website_event DROP PROJECTION website_event_url_path_projection; +ALTER TABLE umami.website_event DROP PROJECTION website_event_referrer_domain_projection; + +--drop view +DROP TABLE umami.website_event_stats_hourly_mv; + +-- rename columns +ALTER TABLE umami.website_event RENAME COLUMN "subdivision1" TO "region"; +ALTER TABLE umami.website_event_stats_hourly RENAME COLUMN "subdivision1" TO "region"; + +-- drop columns +ALTER TABLE umami.website_event DROP COLUMN "subdivision2"; + +-- recreate projections +ALTER TABLE umami.website_event +ADD PROJECTION website_event_url_path_projection ( +SELECT * ORDER BY toStartOfDay(created_at), website_id, url_path, created_at +); + +ALTER TABLE umami.website_event MATERIALIZE PROJECTION website_event_url_path_projection; + +ALTER TABLE umami.website_event +ADD PROJECTION website_event_referrer_domain_projection ( +SELECT * ORDER BY toStartOfDay(created_at), website_id, referrer_domain, created_at +); + +ALTER TABLE umami.website_event MATERIALIZE PROJECTION website_event_referrer_domain_projection; + +-- recreate view +CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv +TO umami.website_event_stats_hourly +AS +SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + region, + city, + entry_url, + exit_url, + url_paths as url_path, + url_query, + utm_source, + utm_medium, + utm_campaign, + utm_content, + utm_term, + referrer_domain, + page_title, + gclid, + fbclid, + msclkid, + ttclid, + li_fat_id, + twclid, + event_type, + event_name, + views, + min_time, + max_time, + tag, + timestamp as created_at +FROM (SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + region, + city, + argMinState(url_path, created_at) entry_url, + argMaxState(url_path, created_at) exit_url, + arrayFilter(x -> x != '', groupArray(url_path)) as url_paths, + arrayFilter(x -> x != '', groupArray(url_query)) url_query, + arrayFilter(x -> x != '', groupArray(utm_source)) utm_source, + arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium, + arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign, + arrayFilter(x -> x != '', groupArray(utm_content)) utm_content, + arrayFilter(x -> x != '', groupArray(utm_term)) utm_term, + arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain, + arrayFilter(x -> x != '', groupArray(page_title)) page_title, + arrayFilter(x -> x != '', groupArray(gclid)) gclid, + arrayFilter(x -> x != '', groupArray(fbclid)) fbclid, + arrayFilter(x -> x != '', groupArray(msclkid)) msclkid, + arrayFilter(x -> x != '', groupArray(ttclid)) ttclid, + arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id, + arrayFilter(x -> x != '', groupArray(twclid)) twclid, + event_type, + if(event_type = 2, groupArray(event_name), []) event_name, + sumIf(1, event_type = 1) views, + min(created_at) min_time, + max(created_at) max_time, + arrayFilter(x -> x != '', groupArray(tag)) tag, + toStartOfHour(created_at) timestamp +FROM umami.website_event +GROUP BY website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + region, + city, + event_type, + timestamp); \ No newline at end of file diff --git a/db/clickhouse/migrations/07_add_distinct_id.sql b/db/clickhouse/migrations/07_add_distinct_id.sql new file mode 100644 index 0000000..e235694 --- /dev/null +++ b/db/clickhouse/migrations/07_add_distinct_id.sql @@ -0,0 +1,103 @@ +-- add tag column +ALTER TABLE umami.website_event ADD COLUMN "distinct_id" String AFTER "tag"; +ALTER TABLE umami.website_event_stats_hourly ADD COLUMN "distinct_id" String AFTER "tag"; +ALTER TABLE umami.session_data ADD COLUMN "distinct_id" String AFTER "data_type"; + +-- update materialized view +DROP TABLE umami.website_event_stats_hourly_mv; + +CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv +TO umami.website_event_stats_hourly +AS +SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + region, + city, + entry_url, + exit_url, + url_paths as url_path, + url_query, + utm_source, + utm_medium, + utm_campaign, + utm_content, + utm_term, + referrer_domain, + page_title, + gclid, + fbclid, + msclkid, + ttclid, + li_fat_id, + twclid, + event_type, + event_name, + views, + min_time, + max_time, + tag, + distinct_id, + timestamp as created_at +FROM (SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + region, + city, + argMinState(url_path, created_at) entry_url, + argMaxState(url_path, created_at) exit_url, + arrayFilter(x -> x != '', groupArray(url_path)) as url_paths, + arrayFilter(x -> x != '', groupArray(url_query)) url_query, + arrayFilter(x -> x != '', groupArray(utm_source)) utm_source, + arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium, + arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign, + arrayFilter(x -> x != '', groupArray(utm_content)) utm_content, + arrayFilter(x -> x != '', groupArray(utm_term)) utm_term, + arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain, + arrayFilter(x -> x != '', groupArray(page_title)) page_title, + arrayFilter(x -> x != '', groupArray(gclid)) gclid, + arrayFilter(x -> x != '', groupArray(fbclid)) fbclid, + arrayFilter(x -> x != '', groupArray(msclkid)) msclkid, + arrayFilter(x -> x != '', groupArray(ttclid)) ttclid, + arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id, + arrayFilter(x -> x != '', groupArray(twclid)) twclid, + event_type, + if(event_type = 2, groupArray(event_name), []) event_name, + sumIf(1, event_type = 1) views, + min(created_at) min_time, + max(created_at) max_time, + arrayFilter(x -> x != '', groupArray(tag)) tag, + distinct_id, + toStartOfHour(created_at) timestamp +FROM umami.website_event +GROUP BY website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + region, + city, + event_type, + distinct_id, + timestamp); \ No newline at end of file diff --git a/db/clickhouse/migrations/08_update_hostname_view.sql b/db/clickhouse/migrations/08_update_hostname_view.sql new file mode 100644 index 0000000..061fa62 --- /dev/null +++ b/db/clickhouse/migrations/08_update_hostname_view.sql @@ -0,0 +1,253 @@ +-- create new hourly table +CREATE TABLE umami.website_event_stats_hourly_new +( + website_id UUID, + session_id UUID, + visit_id UUID, + hostname SimpleAggregateFunction(groupArrayArray, Array(String)), + browser LowCardinality(String), + os LowCardinality(String), + device LowCardinality(String), + screen LowCardinality(String), + language LowCardinality(String), + country LowCardinality(String), + region LowCardinality(String), + city String, + entry_url AggregateFunction(argMin, String, DateTime('UTC')), + exit_url AggregateFunction(argMax, String, DateTime('UTC')), + url_path SimpleAggregateFunction(groupArrayArray, Array(String)), + url_query SimpleAggregateFunction(groupArrayArray, Array(String)), + utm_source SimpleAggregateFunction(groupArrayArray, Array(String)), + utm_medium SimpleAggregateFunction(groupArrayArray, Array(String)), + utm_campaign SimpleAggregateFunction(groupArrayArray, Array(String)), + utm_content SimpleAggregateFunction(groupArrayArray, Array(String)), + utm_term SimpleAggregateFunction(groupArrayArray, Array(String)), + referrer_domain SimpleAggregateFunction(groupArrayArray, Array(String)), + page_title SimpleAggregateFunction(groupArrayArray, Array(String)), + gclid SimpleAggregateFunction(groupArrayArray, Array(String)), + fbclid SimpleAggregateFunction(groupArrayArray, Array(String)), + msclkid SimpleAggregateFunction(groupArrayArray, Array(String)), + ttclid SimpleAggregateFunction(groupArrayArray, Array(String)), + li_fat_id SimpleAggregateFunction(groupArrayArray, Array(String)), + twclid SimpleAggregateFunction(groupArrayArray, Array(String)), + event_type UInt32, + event_name SimpleAggregateFunction(groupArrayArray, Array(String)), + views SimpleAggregateFunction(sum, UInt64), + min_time SimpleAggregateFunction(min, DateTime('UTC')), + max_time SimpleAggregateFunction(max, DateTime('UTC')), + tag SimpleAggregateFunction(groupArrayArray, Array(String)), + distinct_id String, + created_at Datetime('UTC') +) +ENGINE = AggregatingMergeTree + PARTITION BY toYYYYMM(created_at) + ORDER BY ( + website_id, + event_type, + toStartOfHour(created_at), + cityHash64(visit_id), + visit_id + ) + SAMPLE BY cityHash64(visit_id); + +-- create view +CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv_new +TO umami.website_event_stats_hourly_new +AS +SELECT + website_id, + session_id, + visit_id, + hostnames as hostname, + browser, + os, + device, + screen, + language, + country, + region, + city, + entry_url, + exit_url, + url_paths as url_path, + url_query, + utm_source, + utm_medium, + utm_campaign, + utm_content, + utm_term, + referrer_domain, + page_title, + gclid, + fbclid, + msclkid, + ttclid, + li_fat_id, + twclid, + event_type, + event_name, + views, + min_time, + max_time, + tag, + distinct_id, + timestamp as created_at +FROM (SELECT + website_id, + session_id, + visit_id, + arrayFilter(x -> x != '', groupArray(hostname)) hostnames, + browser, + os, + device, + screen, + language, + country, + region, + city, + argMinState(url_path, created_at) entry_url, + argMaxState(url_path, created_at) exit_url, + arrayFilter(x -> x != '', groupArray(url_path)) as url_paths, + arrayFilter(x -> x != '', groupArray(url_query)) url_query, + arrayFilter(x -> x != '', groupArray(utm_source)) utm_source, + arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium, + arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign, + arrayFilter(x -> x != '', groupArray(utm_content)) utm_content, + arrayFilter(x -> x != '', groupArray(utm_term)) utm_term, + arrayFilter(x -> x != '' and x != hostname, groupArray(referrer_domain)) referrer_domain, + arrayFilter(x -> x != '', groupArray(page_title)) page_title, + arrayFilter(x -> x != '', groupArray(gclid)) gclid, + arrayFilter(x -> x != '', groupArray(fbclid)) fbclid, + arrayFilter(x -> x != '', groupArray(msclkid)) msclkid, + arrayFilter(x -> x != '', groupArray(ttclid)) ttclid, + arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id, + arrayFilter(x -> x != '', groupArray(twclid)) twclid, + event_type, + if(event_type = 2, groupArray(event_name), []) event_name, + sumIf(1, event_type = 1) views, + min(created_at) min_time, + max(created_at) max_time, + arrayFilter(x -> x != '', groupArray(tag)) tag, + distinct_id, + toStartOfHour(created_at) timestamp +FROM umami.website_event +GROUP BY website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + region, + city, + event_type, + distinct_id, + timestamp); + +-- rename tables +RENAME TABLE umami.website_event_stats_hourly TO umami.website_event_stats_hourly_old; +RENAME TABLE umami.website_event_stats_hourly_new TO umami.website_event_stats_hourly; + +-- drop views +DROP TABLE umami.website_event_stats_hourly_mv; +DROP TABLE umami.website_event_stats_hourly_mv_new; + +-- recreate view +CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv +TO umami.website_event_stats_hourly +AS +SELECT + website_id, + session_id, + visit_id, + hostnames as hostname, + browser, + os, + device, + screen, + language, + country, + region, + city, + entry_url, + exit_url, + url_paths as url_path, + url_query, + utm_source, + utm_medium, + utm_campaign, + utm_content, + utm_term, + referrer_domain, + page_title, + gclid, + fbclid, + msclkid, + ttclid, + li_fat_id, + twclid, + event_type, + event_name, + views, + min_time, + max_time, + tag, + distinct_id, + timestamp as created_at +FROM (SELECT + website_id, + session_id, + visit_id, + arrayFilter(x -> x != '', groupArray(hostname)) hostnames, + browser, + os, + device, + screen, + language, + country, + region, + city, + argMinState(url_path, created_at) entry_url, + argMaxState(url_path, created_at) exit_url, + arrayFilter(x -> x != '', groupArray(url_path)) as url_paths, + arrayFilter(x -> x != '', groupArray(url_query)) url_query, + arrayFilter(x -> x != '', groupArray(utm_source)) utm_source, + arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium, + arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign, + arrayFilter(x -> x != '', groupArray(utm_content)) utm_content, + arrayFilter(x -> x != '', groupArray(utm_term)) utm_term, + arrayFilter(x -> x != '' and x != hostname, groupArray(referrer_domain)) referrer_domain, + arrayFilter(x -> x != '', groupArray(page_title)) page_title, + arrayFilter(x -> x != '', groupArray(gclid)) gclid, + arrayFilter(x -> x != '', groupArray(fbclid)) fbclid, + arrayFilter(x -> x != '', groupArray(msclkid)) msclkid, + arrayFilter(x -> x != '', groupArray(ttclid)) ttclid, + arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id, + arrayFilter(x -> x != '', groupArray(twclid)) twclid, + event_type, + if(event_type = 2, groupArray(event_name), []) event_name, + sumIf(1, event_type = 1) views, + min(created_at) min_time, + max(created_at) max_time, + arrayFilter(x -> x != '', groupArray(tag)) tag, + distinct_id, + toStartOfHour(created_at) timestamp +FROM umami.website_event +GROUP BY website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + region, + city, + event_type, + distinct_id, + timestamp); diff --git a/db/clickhouse/schema.sql b/db/clickhouse/schema.sql new file mode 100644 index 0000000..e916032 --- /dev/null +++ b/db/clickhouse/schema.sql @@ -0,0 +1,283 @@ +-- Create Event +CREATE TABLE umami.website_event +( + website_id UUID, + session_id UUID, + visit_id UUID, + event_id UUID, + --sessions + hostname LowCardinality(String), + browser LowCardinality(String), + os LowCardinality(String), + device LowCardinality(String), + screen LowCardinality(String), + language LowCardinality(String), + country LowCardinality(String), + region LowCardinality(String), + city String, + --pageviews + url_path String, + url_query String, + utm_source String, + utm_medium String, + utm_campaign String, + utm_content String, + utm_term String, + referrer_path String, + referrer_query String, + referrer_domain String, + page_title String, + --clickIDs + gclid String, + fbclid String, + msclkid String, + ttclid String, + li_fat_id String, + twclid String, + --events + event_type UInt32, + event_name String, + tag String, + distinct_id String, + created_at DateTime('UTC'), + job_id Nullable(UUID) +) +ENGINE = MergeTree + PARTITION BY toYYYYMM(created_at) + ORDER BY (toStartOfHour(created_at), website_id, session_id, visit_id, created_at) + PRIMARY KEY (toStartOfHour(created_at), website_id, session_id, visit_id) + SETTINGS index_granularity = 8192; + +CREATE TABLE umami.event_data +( + website_id UUID, + session_id UUID, + event_id UUID, + url_path String, + event_name String, + data_key String, + string_value Nullable(String), + number_value Nullable(Decimal(22, 4)), + date_value Nullable(DateTime('UTC')), + data_type UInt32, + created_at DateTime('UTC'), + job_id Nullable(UUID) +) +ENGINE = MergeTree + ORDER BY (website_id, event_id, data_key, created_at) + SETTINGS index_granularity = 8192; + +CREATE TABLE umami.session_data +( + website_id UUID, + session_id UUID, + data_key String, + string_value Nullable(String), + number_value Nullable(Decimal(22, 4)), + date_value Nullable(DateTime('UTC')), + data_type UInt32, + distinct_id String, + created_at DateTime('UTC'), + job_id Nullable(UUID) +) +ENGINE = ReplacingMergeTree + ORDER BY (website_id, session_id, data_key) + SETTINGS index_granularity = 8192; + +-- stats hourly +CREATE TABLE umami.website_event_stats_hourly +( + website_id UUID, + session_id UUID, + visit_id UUID, + hostname SimpleAggregateFunction(groupArrayArray, Array(String)), + browser LowCardinality(String), + os LowCardinality(String), + device LowCardinality(String), + screen LowCardinality(String), + language LowCardinality(String), + country LowCardinality(String), + region LowCardinality(String), + city String, + entry_url AggregateFunction(argMin, String, DateTime('UTC')), + exit_url AggregateFunction(argMax, String, DateTime('UTC')), + url_path SimpleAggregateFunction(groupArrayArray, Array(String)), + url_query SimpleAggregateFunction(groupArrayArray, Array(String)), + utm_source SimpleAggregateFunction(groupArrayArray, Array(String)), + utm_medium SimpleAggregateFunction(groupArrayArray, Array(String)), + utm_campaign SimpleAggregateFunction(groupArrayArray, Array(String)), + utm_content SimpleAggregateFunction(groupArrayArray, Array(String)), + utm_term SimpleAggregateFunction(groupArrayArray, Array(String)), + referrer_domain SimpleAggregateFunction(groupArrayArray, Array(String)), + page_title SimpleAggregateFunction(groupArrayArray, Array(String)), + gclid SimpleAggregateFunction(groupArrayArray, Array(String)), + fbclid SimpleAggregateFunction(groupArrayArray, Array(String)), + msclkid SimpleAggregateFunction(groupArrayArray, Array(String)), + ttclid SimpleAggregateFunction(groupArrayArray, Array(String)), + li_fat_id SimpleAggregateFunction(groupArrayArray, Array(String)), + twclid SimpleAggregateFunction(groupArrayArray, Array(String)), + event_type UInt32, + event_name SimpleAggregateFunction(groupArrayArray, Array(String)), + views SimpleAggregateFunction(sum, UInt64), + min_time SimpleAggregateFunction(min, DateTime('UTC')), + max_time SimpleAggregateFunction(max, DateTime('UTC')), + tag SimpleAggregateFunction(groupArrayArray, Array(String)), + distinct_id String, + created_at Datetime('UTC') +) +ENGINE = AggregatingMergeTree + PARTITION BY toYYYYMM(created_at) + ORDER BY ( + website_id, + event_type, + toStartOfHour(created_at), + cityHash64(visit_id), + visit_id + ) + SAMPLE BY cityHash64(visit_id); + +CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv +TO umami.website_event_stats_hourly +AS +SELECT + website_id, + session_id, + visit_id, + hostnames as hostname, + browser, + os, + device, + screen, + language, + country, + region, + city, + entry_url, + exit_url, + url_paths as url_path, + url_query, + utm_source, + utm_medium, + utm_campaign, + utm_content, + utm_term, + referrer_domain, + page_title, + gclid, + fbclid, + msclkid, + ttclid, + li_fat_id, + twclid, + event_type, + event_name, + views, + min_time, + max_time, + tag, + distinct_id, + timestamp as created_at +FROM (SELECT + website_id, + session_id, + visit_id, + arrayFilter(x -> x != '', groupArray(hostname)) hostnames, + browser, + os, + device, + screen, + language, + country, + region, + city, + argMinState(url_path, created_at) entry_url, + argMaxState(url_path, created_at) exit_url, + arrayFilter(x -> x != '', groupArray(url_path)) as url_paths, + arrayFilter(x -> x != '', groupArray(url_query)) url_query, + arrayFilter(x -> x != '', groupArray(utm_source)) utm_source, + arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium, + arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign, + arrayFilter(x -> x != '', groupArray(utm_content)) utm_content, + arrayFilter(x -> x != '', groupArray(utm_term)) utm_term, + arrayFilter(x -> x != '' and x != hostname, groupArray(referrer_domain)) referrer_domain, + arrayFilter(x -> x != '', groupArray(page_title)) page_title, + arrayFilter(x -> x != '', groupArray(gclid)) gclid, + arrayFilter(x -> x != '', groupArray(fbclid)) fbclid, + arrayFilter(x -> x != '', groupArray(msclkid)) msclkid, + arrayFilter(x -> x != '', groupArray(ttclid)) ttclid, + arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id, + arrayFilter(x -> x != '', groupArray(twclid)) twclid, + event_type, + if(event_type = 2, groupArray(event_name), []) event_name, + sumIf(1, event_type != 2) views, + min(created_at) min_time, + max(created_at) max_time, + arrayFilter(x -> x != '', groupArray(tag)) tag, + distinct_id, + toStartOfHour(created_at) timestamp +FROM umami.website_event +GROUP BY website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + screen, + language, + country, + region, + city, + event_type, + distinct_id, + timestamp); + +-- projections +ALTER TABLE umami.website_event +ADD PROJECTION website_event_url_path_projection ( +SELECT * ORDER BY toStartOfDay(created_at), website_id, url_path, created_at +); + +ALTER TABLE umami.website_event MATERIALIZE PROJECTION website_event_url_path_projection; + +ALTER TABLE umami.website_event +ADD PROJECTION website_event_referrer_domain_projection ( +SELECT * ORDER BY toStartOfDay(created_at), website_id, referrer_domain, created_at +); + +ALTER TABLE umami.website_event MATERIALIZE PROJECTION website_event_referrer_domain_projection; + +-- revenue +CREATE TABLE umami.website_revenue +( + website_id UUID, + session_id UUID, + event_id UUID, + event_name String, + currency String, + revenue DECIMAL(18,4), + created_at DateTime('UTC') +) +ENGINE = MergeTree + PARTITION BY toYYYYMM(created_at) + ORDER BY (website_id, session_id, created_at) + SETTINGS index_granularity = 8192; + + +CREATE MATERIALIZED VIEW umami.website_revenue_mv +TO umami.website_revenue +AS +SELECT DISTINCT + ed.website_id, + ed.session_id, + ed.event_id, + ed.event_name, + c.currency, + coalesce(toDecimal64(ed.number_value, 2), toDecimal64(ed.string_value, 2)) revenue, + ed.created_at +FROM umami.event_data ed +JOIN (SELECT event_id, string_value as currency + FROM umami.event_data + WHERE positionCaseInsensitive(data_key, 'currency') > 0) c + ON c.event_id = ed.event_id +WHERE positionCaseInsensitive(data_key, 'revenue') > 0; diff --git a/db/postgresql/data-migrations/convert-utm-clid-columns.sql b/db/postgresql/data-migrations/convert-utm-clid-columns.sql new file mode 100644 index 0000000..fffe1dd --- /dev/null +++ b/db/postgresql/data-migrations/convert-utm-clid-columns.sql @@ -0,0 +1,49 @@ +----------------------------------------------------- +-- PostgreSQL +----------------------------------------------------- +UPDATE "website_event" we +SET fbclid = LEFT(url.fbclid, 255), + gclid = LEFT(url.gclid, 255), + li_fat_id = LEFT(url.li_fat_id, 255), + msclkid = LEFT(url.msclkid, 255), + ttclid = LEFT(url.ttclid, 255), + twclid = LEFT(url.twclid, 255), + utm_campaign = LEFT(url.utm_campaign, 255), + utm_content = LEFT(url.utm_content, 255), + utm_medium = LEFT(url.utm_medium, 255), + utm_source = LEFT(url.utm_source, 255), + utm_term = LEFT(url.utm_term, 255) +FROM (SELECT event_id, website_id, session_id, + (regexp_matches(url_query, '(?:[&?]|^)fbclid=([^&]+)', 'i'))[1] AS fbclid, + (regexp_matches(url_query, '(?:[&?]|^)gclid=([^&]+)', 'i'))[1] AS gclid, + (regexp_matches(url_query, '(?:[&?]|^)li_fat_id=([^&]+)', 'i'))[1] AS li_fat_id, + (regexp_matches(url_query, '(?:[&?]|^)msclkid=([^&]+)', 'i'))[1] AS msclkid, + (regexp_matches(url_query, '(?:[&?]|^)ttclid=([^&]+)', 'i'))[1] AS ttclid, + (regexp_matches(url_query, '(?:[&?]|^)twclid=([^&]+)', 'i'))[1] AS twclid, + (regexp_matches(url_query, '(?:[&?]|^)utm_campaign=([^&]+)', 'i'))[1] AS utm_campaign, + (regexp_matches(url_query, '(?:[&?]|^)utm_content=([^&]+)', 'i'))[1] AS utm_content, + (regexp_matches(url_query, '(?:[&?]|^)utm_medium=([^&]+)', 'i'))[1] AS utm_medium, + (regexp_matches(url_query, '(?:[&?]|^)utm_source=([^&]+)', 'i'))[1] AS utm_source, + (regexp_matches(url_query, '(?:[&?]|^)utm_term=([^&]+)', 'i'))[1] AS utm_term + FROM "website_event" + WHERE url_query IS NOT NULL) url +WHERE we.event_id = url.event_id + and we.session_id = url.session_id + and we.website_id = url.website_id; + +----------------------------------------------------- +-- MySQL +----------------------------------------------------- +UPDATE `website_event` +SET fbclid = LEFT(SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)fbclid=[^&]+'), '=', -1), '&', 1), 255), + gclid = LEFT(SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)gclid=[^&]+'), '=', -1), '&', 1), 255), + li_fat_id = LEFT(SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)li_fat_id=[^&]+'), '=', -1), '&', 1), 255), + msclkid = LEFT(SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)msclkid=[^&]+'), '=', -1), '&', 1), 255), + ttclid = LEFT(SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)ttclid=[^&]+'), '=', -1), '&', 1), 255), + twclid = LEFT(SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)twclid=[^&]+'), '=', -1), '&', 1), 255), + utm_campaign = LEFT(SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)utm_campaign=[^&]+'), '=', -1), '&', 1), 255), + utm_content = LEFT(SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)utm_content=[^&]+'), '=', -1), '&', 1), 255), + utm_medium = LEFT(SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)utm_medium=[^&]+'), '=', -1), '&', 1), 255), + utm_source = LEFT(SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)utm_source=[^&]+'), '=', -1), '&', 1), 255), + utm_term = LEFT(SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)utm_term=[^&]+'), '=', -1), '&', 1), 255) +WHERE url_query IS NOT NULL; diff --git a/db/postgresql/data-migrations/populate-revenue-table.sql b/db/postgresql/data-migrations/populate-revenue-table.sql new file mode 100644 index 0000000..9df7518 --- /dev/null +++ b/db/postgresql/data-migrations/populate-revenue-table.sql @@ -0,0 +1,41 @@ +----------------------------------------------------- +-- PostgreSQL +----------------------------------------------------- +INSERT INTO "revenue" +SELECT gen_random_uuid() revenue_id, + ed.website_id, + we.session_id, + we.event_id, + we.event_name, + currency.string_value currency, + coalesce(ed.number_value, cast(ed.string_value as numeric(19,4))) revenue, + ed.created_at +FROM event_data ed +JOIN website_event we +ON we.event_id = ed.website_event_id +JOIN (SELECT website_event_id, string_value + FROM event_data + WHERE data_key ilike '%currency%') currency +ON currency.website_event_id = ed.website_event_id +WHERE ed.data_key ilike '%revenue%'; + +----------------------------------------------------- +-- MySQL +----------------------------------------------------- +INSERT INTO `revenue` +SELECT UUID() revenue_id, + ed.website_id, + we.session_id, + we.event_id, + we.event_name, + currency.string_value currency, + coalesce(ed.number_value, cast(ed.string_value as decimal(19,4))) revenue, + ed.created_at +FROM event_data ed +JOIN website_event we +ON we.event_id = ed.website_event_id +JOIN (SELECT website_event_id, string_value + FROM event_data + WHERE data_key like '%currency%') currency +ON currency.website_event_id = ed.website_event_id +WHERE ed.data_key like '%revenue%'; \ No newline at end of file -- cgit v1.2.3