diff options
Diffstat (limited to 'src/queries/sql')
48 files changed, 5910 insertions, 0 deletions
diff --git a/src/queries/sql/events/getEventData.ts b/src/queries/sql/events/getEventData.ts new file mode 100644 index 0000000..f12c95c --- /dev/null +++ b/src/queries/sql/events/getEventData.ts @@ -0,0 +1,63 @@ +import type { EventData } from '@/generated/prisma/client'; +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; + +const FUNCTION_NAME = 'getEventData'; + +export async function getEventData( + ...args: [websiteId: string, eventId: string] +): Promise<EventData[]> { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string, eventId: string) { + const { rawQuery } = prisma; + + return rawQuery( + ` + select event_data.website_id as "websiteId", + event_data.website_event_id as "eventId", + website_event.event_name as "eventName", + event_data.data_key as "dataKey", + event_data.string_value as "stringValue", + event_data.number_value as "numberValue", + event_data.date_value as "dateValue", + event_data.data_type as "dataType", + event_data.created_at as "createdAt" + from event_data + join website_event on website_event.event_id = event_data.website_event_id + and website_event.website_id = {{websiteId::uuid}} + where event_data.website_id = {{websiteId::uuid}} + and event_data.website_event_id = {{eventId::uuid}} + `, + { websiteId, eventId }, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery(websiteId: string, eventId: string): Promise<EventData[]> { + const { rawQuery } = clickhouse; + + return rawQuery( + ` + select website_id as websiteId, + event_id as eventId, + event_name as eventName, + data_key as dataKey, + string_value as stringValue, + number_value as numberValue, + date_value as dateValue, + data_type as dataType, + created_at as createdAt + from event_data + where website_id = {websiteId:UUID} + and event_id = {eventId:UUID} + `, + { websiteId, eventId }, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/events/getEventDataEvents.ts b/src/queries/sql/events/getEventDataEvents.ts new file mode 100644 index 0000000..6c8f12c --- /dev/null +++ b/src/queries/sql/events/getEventDataEvents.ts @@ -0,0 +1,139 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getEventDataEvents'; + +export interface WebsiteEventData { + eventName?: string; + propertyName: string; + dataType: number; + propertyValue?: string; + total: number; +} + +export async function getEventDataEvents( + ...args: [websiteId: string, filters: QueryFilters] +): Promise<WebsiteEventData[]> { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string, filters: QueryFilters) { + const { rawQuery, parseFilters } = prisma; + const { event } = filters; + const { queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + if (event) { + return rawQuery( + ` + select + website_event.event_name as "eventName", + event_data.data_key as "propertyName", + event_data.data_type as "dataType", + event_data.string_value as "propertyValue", + count(*) as "total" + from event_data + inner join website_event + on website_event.event_id = event_data.website_event_id + where event_data.website_id = {{websiteId::uuid}} + and event_data.created_at between {{startDate}} and {{endDate}} + and website_event.event_name = {{event}} + group by website_event.event_name, event_data.data_key, event_data.data_type, event_data.string_value + order by 1 asc, 2 asc, 3 asc, 5 desc + `, + queryParams, + FUNCTION_NAME, + ); + } + + return rawQuery( + ` + select + website_event.event_name as "eventName", + event_data.data_key as "propertyName", + event_data.data_type as "dataType", + count(*) as "total" + from event_data + inner join website_event + on website_event.event_id = event_data.website_event_id + where event_data.website_id = {{websiteId::uuid}} + and event_data.created_at between {{startDate}} and {{endDate}} + limit 500 + `, + queryParams, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + filters: QueryFilters, +): Promise<{ eventName: string; propertyName: string; dataType: number; total: number }[]> { + const { rawQuery, parseFilters } = clickhouse; + const { event } = filters; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + if (event) { + return rawQuery( + ` + select + event_name as eventName, + data_key as propertyName, + data_type as dataType, + string_value as propertyValue, + count(*) as total + from event_data + join website_event + on website_event.event_id = event_data.event_id + and website_event.website_id = event_data.website_id + and website_event.website_id = {websiteId:UUID} + and website_event.created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${cohortQuery} + where event_data.website_id = {websiteId:UUID} + and event_data.created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_data.event_name = {event:String} + ${filterQuery} + group by data_key, data_type, string_value, event_name + order by 1 asc, 2 asc, 3 asc, 5 desc + limit 500 + `, + queryParams, + FUNCTION_NAME, + ); + } + + return rawQuery( + ` + select + event_name as eventName, + data_key as propertyName, + data_type as dataType, + count(*) as total + from event_data + join website_event + on website_event.event_id = event_data.event_id + and website_event.website_id = event_data.website_id + and website_event.website_id = {websiteId:UUID} + and website_event.created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${cohortQuery} + where event_data.website_id = {websiteId:UUID} + and event_data.created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${filterQuery} + group by data_key, data_type, event_name + order by 1 asc, 2 asc + limit 500 + `, + queryParams, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/events/getEventDataFields.ts b/src/queries/sql/events/getEventDataFields.ts new file mode 100644 index 0000000..9337769 --- /dev/null +++ b/src/queries/sql/events/getEventDataFields.ts @@ -0,0 +1,84 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getEventDataFields'; + +export async function getEventDataFields(...args: [websiteId: string, filters: QueryFilters]) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string, filters: QueryFilters) { + const { rawQuery, parseFilters, getDateSQL } = prisma; + const { filterQuery, cohortQuery, joinSessionQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + return rawQuery( + ` + select + data_key as "propertyName", + data_type as "dataType", + case + when data_type = 2 then replace(string_value, '.0000', '') + when data_type = 4 then ${getDateSQL('date_value', 'hour')} + else string_value + end as "value", + count(*) as "total" + from event_data + join website_event on website_event.event_id = event_data.website_event_id + and website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + ${cohortQuery} + ${joinSessionQuery} + where event_data.website_id = {{websiteId::uuid}} + and event_data.created_at between {{startDate}} and {{endDate}} + ${filterQuery} + group by data_key, data_type, value + order by 2 desc + limit 100 + `, + queryParams, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + filters: QueryFilters, +): Promise<{ propertyName: string; dataType: number; propertyValue: string; total: number }[]> { + const { rawQuery, parseFilters } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ ...filters, websiteId }); + + return rawQuery( + ` + select + data_key as propertyName, + data_type as dataType, + multiIf(data_type = 2, replaceAll(string_value, '.0000', ''), + data_type = 4, toString(date_trunc('hour', date_value)), + string_value) as "value", + count(*) as "total" + from event_data + join website_event + on website_event.event_id = event_data.event_id + and website_event.website_id = event_data.website_id + and website_event.website_id = {websiteId:UUID} + and website_event.created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${cohortQuery} + where event_data.website_id = {websiteId:UUID} + and event_data.created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${filterQuery} + group by data_key, data_type, value + order by 2 desc + limit 100 + `, + queryParams, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/events/getEventDataProperties.ts b/src/queries/sql/events/getEventDataProperties.ts new file mode 100644 index 0000000..82c078f --- /dev/null +++ b/src/queries/sql/events/getEventDataProperties.ts @@ -0,0 +1,88 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getEventDataProperties'; + +export async function getEventDataProperties( + ...args: [websiteId: string, filters: QueryFilters & { propertyName?: string }] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + filters: QueryFilters & { propertyName?: string }, +) { + const { rawQuery, parseFilters } = prisma; + const { filterQuery, cohortQuery, joinSessionQuery, queryParams } = parseFilters( + { ...filters, websiteId }, + { + columns: { propertyName: 'data_key' }, + }, + ); + + return rawQuery( + ` + select + website_event.event_name as "eventName", + event_data.data_key as "propertyName", + count(*) as "total" + from event_data + join website_event on website_event.event_id = event_data.website_event_id + and website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + ${cohortQuery} + ${joinSessionQuery} + where event_data.website_id = {{websiteId::uuid}} + and event_data.created_at between {{startDate}} and {{endDate}} + ${filterQuery} + group by website_event.event_name, event_data.data_key + order by 3 desc + limit 500 + `, + queryParams, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + filters: QueryFilters & { propertyName?: string }, +): Promise<{ eventName: string; propertyName: string; total: number }[]> { + const { rawQuery, parseFilters } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters( + { ...filters, websiteId }, + { + columns: { propertyName: 'data_key' }, + }, + ); + + return rawQuery( + ` + select + event_name as eventName, + data_key as propertyName, + count(*) as total + from event_data + join website_event + on website_event.event_id = event_data.event_id + and website_event.website_id = event_data.website_id + and website_event.website_id = {websiteId:UUID} + and website_event.created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${cohortQuery} + where event_data.website_id = {websiteId:UUID} + and event_data.created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${filterQuery} + group by event_name, data_key + order by 1, 3 desc + limit 500 + `, + queryParams, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/events/getEventDataStats.ts b/src/queries/sql/events/getEventDataStats.ts new file mode 100644 index 0000000..89e1358 --- /dev/null +++ b/src/queries/sql/events/getEventDataStats.ts @@ -0,0 +1,90 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getEventDataStats'; + +export async function getEventDataStats( + ...args: [websiteId: string, filters: QueryFilters] +): Promise<{ + events: number; + properties: number; + records: number; +}> { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }).then(results => results?.[0]); +} + +async function relationalQuery(websiteId: string, filters: QueryFilters) { + const { rawQuery, parseFilters } = prisma; + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + return rawQuery( + ` + select + count(distinct t.website_event_id) as "events", + count(distinct t.data_key) as "properties", + sum(t.total) as "records" + from ( + select + website_event_id, + data_key, + count(*) as "total" + from event_data + join website_event on website_event.event_id = event_data.website_event_id + and website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + ${cohortQuery} + ${joinSessionQuery} + where event_data.website_id = {{websiteId::uuid}} + and event_data.created_at between {{startDate}} and {{endDate}} + ${filterQuery} + group by website_event_id, data_key + ) as t + `, + queryParams, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + filters: QueryFilters, +): Promise<{ events: number; properties: number; records: number }[]> { + const { rawQuery, parseFilters } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ ...filters, websiteId }); + + return rawQuery( + ` + select + count(distinct t.event_id) as "events", + count(distinct t.data_key) as "properties", + sum(t.total) as "records" + from ( + select + event_id, + data_key, + count(*) as "total" + from event_data + join website_event + on website_event.event_id = event_data.event_id + and website_event.website_id = event_data.website_id + and website_event.website_id = {websiteId:UUID} + and website_event.created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${cohortQuery} + where event_data.website_id = {websiteId:UUID} + and event_data.created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${filterQuery} + group by event_id, data_key + ) as t + `, + queryParams, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/events/getEventDataUsage.ts b/src/queries/sql/events/getEventDataUsage.ts new file mode 100644 index 0000000..50613a7 --- /dev/null +++ b/src/queries/sql/events/getEventDataUsage.ts @@ -0,0 +1,38 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, notImplemented, PRISMA, runQuery } from '@/lib/db'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getEventDataUsage'; + +export function getEventDataUsage(...args: [websiteIds: string[], filters: QueryFilters]) { + return runQuery({ + [PRISMA]: notImplemented, + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +function clickhouseQuery( + websiteIds: string[], + filters: QueryFilters, +): Promise<{ websiteId: string; count: number }[]> { + const { rawQuery } = clickhouse; + const { startDate, endDate } = filters; + + return rawQuery( + ` + select + website_id as websiteId, + count(*) as count + from event_data + where created_at between {startDate:DateTime64} and {endDate:DateTime64} + and website_id in {websiteIds:Array(UUID)} + group by website_id + `, + { + websiteIds, + startDate, + endDate, + }, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/events/getEventDataValues.ts b/src/queries/sql/events/getEventDataValues.ts new file mode 100644 index 0000000..0426e64 --- /dev/null +++ b/src/queries/sql/events/getEventDataValues.ts @@ -0,0 +1,93 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getEventDataValues'; + +interface WebsiteEventData { + value: string; + total: number; +} + +export async function getEventDataValues( + ...args: [websiteId: string, filters: QueryFilters & { propertyName?: string }] +): Promise<WebsiteEventData[]> { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + filters: QueryFilters & { propertyName?: string }, +) { + const { rawQuery, parseFilters, getDateSQL } = prisma; + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + return rawQuery( + ` + select + case + when data_type = 2 then replace(string_value, '.0000', '') + when data_type = 4 then ${getDateSQL('date_value', 'hour')} + else string_value + end as "value", + count(*) as "total" + from event_data + join website_event on website_event.event_id = event_data.website_event_id + and website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + ${cohortQuery} + ${joinSessionQuery} + where event_data.website_id = {{websiteId::uuid}} + and event_data.created_at between {{startDate}} and {{endDate}} + and event_data.data_key = {{propertyName}} + ${filterQuery} + group by value + order by 2 desc + limit 100 + `, + queryParams, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + filters: QueryFilters & { propertyName?: string }, +): Promise<{ value: string; total: number }[]> { + const { rawQuery, parseFilters } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ ...filters, websiteId }); + + return rawQuery( + ` + select + multiIf(data_type = 2, replaceAll(string_value, '.0000', ''), + data_type = 4, toString(date_trunc('hour', date_value)), + string_value) as "value", + count(*) as "total" + from event_data + join website_event + on website_event.event_id = event_data.event_id + and website_event.website_id = event_data.website_id + and website_event.website_id = {websiteId:UUID} + and website_event.created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${cohortQuery} + where event_data.website_id = {websiteId:UUID} + and event_data.created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_data.data_key = {propertyName:String} + and event_data.event_name = {event:String} + ${filterQuery} + group by value + order by 2 desc + limit 100 + `, + queryParams, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/events/getEventExpandedMetrics.ts b/src/queries/sql/events/getEventExpandedMetrics.ts new file mode 100644 index 0000000..f03a347 --- /dev/null +++ b/src/queries/sql/events/getEventExpandedMetrics.ts @@ -0,0 +1,132 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getEventExpandedMetrics'; + +export interface EventExpandedMetricParameters { + type: string; + limit?: string; + offset?: string; +} + +export interface EventExpandedMetricData { + name: string; + pageviews: number; + visitors: number; + visits: number; + bounces: number; + totaltime: number; +} + +export async function getEventExpandedMetrics( + ...args: [websiteId: string, parameters: EventExpandedMetricParameters, filters: QueryFilters] +): Promise<EventExpandedMetricData[]> { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + parameters: EventExpandedMetricParameters, + filters: QueryFilters, +) { + const { type, limit = 500, offset = 0 } = parameters; + const column = FILTER_COLUMNS[type] || type; + const { rawQuery, parseFilters, getTimestampDiffSQL } = prisma; + const { filterQuery, cohortQuery, joinSessionQuery, queryParams } = parseFilters( + { + ...filters, + websiteId, + eventType: EVENT_TYPE.customEvent, + }, + { joinSession: SESSION_COLUMNS.includes(type) }, + ); + + return rawQuery( + ` + select + name, + sum(t.c) as "pageviews", + count(distinct t.session_id) as "visitors", + count(distinct t.visit_id) as "visits", + sum(case when t.c = 1 then 1 else 0 end) as "bounces", + sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}) as "totaltime" + from ( + select + ${column} name, + website_event.session_id, + website_event.visit_id, + count(*) as "c", + min(website_event.created_at) as "min_time", + max(website_event.created_at) as "max_time" + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + ${filterQuery} + group by name, website_event.session_id, website_event.visit_id + ) as t + group by name + order by visitors desc, visits desc + limit ${limit} + offset ${offset} + `, + queryParams, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + parameters: EventExpandedMetricParameters, + filters: QueryFilters, +): Promise<EventExpandedMetricData[]> { + const { type, limit = 500, offset = 0 } = parameters; + const column = FILTER_COLUMNS[type] || type; + const { rawQuery, parseFilters } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + eventType: EVENT_TYPE.customEvent, + }); + + return rawQuery( + ` + select + name, + sum(t.c) as "pageviews", + uniq(t.session_id) as "visitors", + uniq(t.visit_id) as "visits", + sum(if(t.c = 1, 1, 0)) as "bounces", + sum(max_time-min_time) as "totaltime" + from ( + select + ${column} name, + session_id, + visit_id, + count(*) c, + min(created_at) min_time, + max(created_at) max_time + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and name != '' + ${filterQuery} + group by name, session_id, visit_id + ) as t + group by name + order by visitors desc, visits desc + limit ${limit} + offset ${offset} + `, + { ...queryParams, ...parameters }, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/events/getEventMetrics.ts b/src/queries/sql/events/getEventMetrics.ts new file mode 100644 index 0000000..500c67e --- /dev/null +++ b/src/queries/sql/events/getEventMetrics.ts @@ -0,0 +1,97 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getEventMetrics'; + +export interface EventMetricParameters { + type: string; + limit?: string; + offset?: string; +} + +export interface EventMetricData { + x: string; + t: string; + y: number; +} + +export async function getEventMetrics( + ...args: [websiteId: string, parameters: EventMetricParameters, filters: QueryFilters] +): Promise<EventMetricData[]> { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + parameters: EventMetricParameters, + filters: QueryFilters, +) { + const { type, limit = 500, offset = 0 } = parameters; + const column = FILTER_COLUMNS[type] || type; + const { rawQuery, parseFilters } = prisma; + const { filterQuery, cohortQuery, joinSessionQuery, queryParams } = parseFilters( + { + ...filters, + websiteId, + eventType: EVENT_TYPE.customEvent, + }, + { joinSession: SESSION_COLUMNS.includes(type) }, + ); + + return rawQuery( + ` + select ${column} x, + count(*) as y + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + ${filterQuery} + group by 1 + order by 2 desc + limit ${limit} + offset ${offset} + `, + { ...queryParams, ...parameters }, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + parameters: EventMetricParameters, + filters: QueryFilters, +): Promise<EventMetricData[]> { + const { type, limit = 500, offset = 0 } = parameters; + const column = FILTER_COLUMNS[type] || type; + const { rawQuery, parseFilters } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + eventType: EVENT_TYPE.customEvent, + }); + + return rawQuery( + `select ${column} x, + count(*) as y + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${filterQuery} + group by x + order by y desc + limit ${limit} + offset ${offset} + `, + { ...queryParams, ...parameters }, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/events/getEventStats.ts b/src/queries/sql/events/getEventStats.ts new file mode 100644 index 0000000..81d12a0 --- /dev/null +++ b/src/queries/sql/events/getEventStats.ts @@ -0,0 +1,101 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_TYPE } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getEventStats'; + +interface WebsiteEventMetric { + x: string; + t: string; + y: number; +} + +export async function getEventStats( + ...args: [websiteId: string, filters: QueryFilters] +): Promise<WebsiteEventMetric[]> { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string, filters: QueryFilters) { + const { timezone = 'utc', unit = 'day' } = filters; + const { rawQuery, getDateSQL, parseFilters } = prisma; + const { filterQuery, cohortQuery, joinSessionQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + eventType: EVENT_TYPE.customEvent, + }); + + return rawQuery( + ` + select + event_name x, + ${getDateSQL('website_event.created_at', unit, timezone)} t, + count(*) y + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + ${filterQuery} + group by 1, 2 + order by 2 + `, + queryParams, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + filters: QueryFilters, +): Promise<{ x: string; t: string; y: number }[]> { + const { timezone = 'UTC', unit = 'day' } = filters; + const { rawQuery, getDateSQL, parseFilters } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + eventType: EVENT_TYPE.customEvent, + }); + + let sql = ''; + + if (filterQuery || cohortQuery) { + sql = ` + select + event_name x, + ${getDateSQL('created_at', unit, timezone)} t, + count(*) y + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${filterQuery} + group by x, t + order by t + `; + } else { + sql = ` + select + event_name x, + ${getDateSQL('created_at', unit, timezone)} t, + count(*) y + from ( + select arrayJoin(event_name) as event_name, + created_at + from website_event_stats_hourly website_event + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type = {eventType:UInt32} + ) as g + group by x, t + order by t + `; + } + + return rawQuery(sql, queryParams, FUNCTION_NAME); +} diff --git a/src/queries/sql/events/getEventUsage.ts b/src/queries/sql/events/getEventUsage.ts new file mode 100644 index 0000000..40f5a96 --- /dev/null +++ b/src/queries/sql/events/getEventUsage.ts @@ -0,0 +1,38 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, notImplemented, PRISMA, runQuery } from '@/lib/db'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getEventUsage'; + +export function getEventUsage(...args: [websiteIds: string[], filters: QueryFilters]) { + return runQuery({ + [PRISMA]: notImplemented, + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +function clickhouseQuery( + websiteIds: string[], + filters: QueryFilters, +): Promise<{ websiteId: string; count: number }[]> { + const { rawQuery } = clickhouse; + const { startDate, endDate } = filters; + + return rawQuery( + ` + select + website_id as websiteId, + count(*) as count + from website_event + where website_id in {websiteIds:Array(UUID)} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + group by website_id + `, + { + websiteIds, + startDate, + endDate, + }, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/events/getWebsiteEvents.ts b/src/queries/sql/events/getWebsiteEvents.ts new file mode 100644 index 0000000..f11d3ff --- /dev/null +++ b/src/queries/sql/events/getWebsiteEvents.ts @@ -0,0 +1,119 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getWebsiteEvents'; + +export function getWebsiteEvents(...args: [websiteId: string, filters: QueryFilters]) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string, filters: QueryFilters) { + const { pagedRawQuery, parseFilters } = prisma; + const { search } = filters; + const { filterQuery, dateQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + const searchQuery = search + ? `and ((event_name ilike {{search}} and event_type = 2) + or (url_path ilike {{search}} and event_type = 1))` + : ''; + + return pagedRawQuery( + ` + select + website_event.event_id as "id", + website_event.website_id as "websiteId", + website_event.session_id as "sessionId", + website_event.created_at as "createdAt", + website_event.hostname, + website_event.url_path as "urlPath", + website_event.url_query as "urlQuery", + website_event.referrer_path as "referrerPath", + website_event.referrer_query as "referrerQuery", + website_event.referrer_domain as "referrerDomain", + session.country as country, + city as city, + device as device, + os as os, + browser as browser, + page_title as "pageTitle", + website_event.event_type as "eventType", + website_event.event_name as "eventName", + event_id IN (select website_event_id + from event_data + where website_id = {{websiteId::uuid}} + and created_at between {{startDate}} and {{endDate}}) AS "hasData" + from website_event + ${cohortQuery} + join session on session.session_id = website_event.session_id + and session.website_id = website_event.website_id + where website_event.website_id = {{websiteId::uuid}} + ${dateQuery} + ${filterQuery} + ${searchQuery} + order by website_event.created_at desc + `, + queryParams, + filters, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery(websiteId: string, filters: QueryFilters) { + const { pagedRawQuery, parseFilters } = clickhouse; + const { search } = filters; + const { queryParams, dateQuery, cohortQuery, filterQuery } = parseFilters({ + ...filters, + websiteId, + }); + + const searchQuery = search + ? `and ((positionCaseInsensitive(event_name, {search:String}) > 0 and event_type = 2) + or (positionCaseInsensitive(url_path, {search:String}) > 0 and event_type = 1))` + : ''; + + return pagedRawQuery( + ` + select + event_id as id, + website_id as websiteId, + session_id as sessionId, + created_at as createdAt, + hostname, + url_path as urlPath, + url_query as urlQuery, + referrer_path as referrerPath, + referrer_query as referrerQuery, + referrer_domain as referrerDomain, + country as country, + city as city, + device as device, + os as os, + browser as browser, + page_title as pageTitle, + event_type as eventType, + event_name as eventName, + event_id IN (select event_id + from event_data + where website_id = {websiteId:UUID} + ${dateQuery}) as hasData + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + ${dateQuery} + ${filterQuery} + ${searchQuery} + order by created_at desc + `, + queryParams, + filters, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/events/saveEvent.ts b/src/queries/sql/events/saveEvent.ts new file mode 100644 index 0000000..7313fe4 --- /dev/null +++ b/src/queries/sql/events/saveEvent.ts @@ -0,0 +1,249 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_NAME_LENGTH, PAGE_TITLE_LENGTH, URL_LENGTH } from '@/lib/constants'; +import { uuid } from '@/lib/crypto'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import kafka from '@/lib/kafka'; +import prisma from '@/lib/prisma'; +import { saveEventData } from './saveEventData'; +import { saveRevenue } from './saveRevenue'; + +export interface SaveEventArgs { + websiteId: string; + sessionId: string; + visitId: string; + eventType: number; + createdAt?: Date; + + // Page + pageTitle?: string; + hostname?: string; + urlPath: string; + urlQuery?: string; + referrerPath?: string; + referrerQuery?: string; + referrerDomain?: string; + + // Session + distinctId?: string; + browser?: string; + os?: string; + device?: string; + screen?: string; + language?: string; + country?: string; + region?: string; + city?: string; + + // Events + eventName?: string; + eventData?: any; + tag?: string; + + // UTM + utmSource?: string; + utmMedium?: string; + utmCampaign?: string; + utmContent?: string; + utmTerm?: string; + + // Click IDs + gclid?: string; + fbclid?: string; + msclkid?: string; + ttclid?: string; + lifatid?: string; + twclid?: string; +} + +export async function saveEvent(args: SaveEventArgs) { + return runQuery({ + [PRISMA]: () => relationalQuery(args), + [CLICKHOUSE]: () => clickhouseQuery(args), + }); +} + +async function relationalQuery({ + websiteId, + sessionId, + visitId, + eventType, + createdAt, + pageTitle, + hostname, + urlPath, + urlQuery, + referrerPath, + referrerQuery, + referrerDomain, + eventName, + eventData, + tag, + utmSource, + utmMedium, + utmCampaign, + utmContent, + utmTerm, + gclid, + fbclid, + msclkid, + ttclid, + lifatid, + twclid, +}: SaveEventArgs) { + const websiteEventId = uuid(); + + await prisma.client.websiteEvent.create({ + data: { + id: websiteEventId, + websiteId, + sessionId, + visitId, + urlPath: urlPath?.substring(0, URL_LENGTH), + urlQuery: urlQuery?.substring(0, URL_LENGTH), + utmSource, + utmMedium, + utmCampaign, + utmContent, + utmTerm, + referrerPath: referrerPath?.substring(0, URL_LENGTH), + referrerQuery: referrerQuery?.substring(0, URL_LENGTH), + referrerDomain: referrerDomain?.substring(0, URL_LENGTH), + pageTitle: pageTitle?.substring(0, PAGE_TITLE_LENGTH), + gclid, + fbclid, + msclkid, + ttclid, + lifatid, + twclid, + eventType, + eventName: eventName ? eventName?.substring(0, EVENT_NAME_LENGTH) : null, + tag, + hostname, + createdAt, + }, + }); + + if (eventData) { + await saveEventData({ + websiteId, + sessionId, + eventId: websiteEventId, + urlPath: urlPath?.substring(0, URL_LENGTH), + eventName: eventName?.substring(0, EVENT_NAME_LENGTH), + eventData, + createdAt, + }); + + const { revenue, currency } = eventData; + + if (revenue > 0 && currency) { + await saveRevenue({ + websiteId, + sessionId, + eventId: websiteEventId, + eventName: eventName?.substring(0, EVENT_NAME_LENGTH), + currency, + revenue, + createdAt, + }); + } + } +} + +async function clickhouseQuery({ + websiteId, + sessionId, + visitId, + eventType, + createdAt, + pageTitle, + hostname, + urlPath, + urlQuery, + referrerPath, + referrerQuery, + referrerDomain, + distinctId, + browser, + os, + device, + screen, + language, + country, + region, + city, + eventName, + eventData, + tag, + utmSource, + utmMedium, + utmCampaign, + utmContent, + utmTerm, + gclid, + fbclid, + msclkid, + ttclid, + lifatid, + twclid, +}: SaveEventArgs) { + const { insert, getUTCString } = clickhouse; + const { sendMessage } = kafka; + const eventId = uuid(); + + const message = { + website_id: websiteId, + session_id: sessionId, + visit_id: visitId, + event_id: eventId, + country: country, + region: country && region ? (region.includes('-') ? region : `${country}-${region}`) : null, + city: city, + url_path: urlPath?.substring(0, URL_LENGTH), + url_query: urlQuery?.substring(0, URL_LENGTH), + utm_source: utmSource, + utm_medium: utmMedium, + utm_campaign: utmCampaign, + utm_content: utmContent, + utm_term: utmTerm, + referrer_path: referrerPath?.substring(0, URL_LENGTH), + referrer_query: referrerQuery?.substring(0, URL_LENGTH), + referrer_domain: referrerDomain?.substring(0, URL_LENGTH), + page_title: pageTitle?.substring(0, PAGE_TITLE_LENGTH), + gclid: gclid, + fbclid: fbclid, + msclkid: msclkid, + ttclid: ttclid, + li_fat_id: lifatid, + twclid: twclid, + event_type: eventType, + event_name: eventName ? eventName?.substring(0, EVENT_NAME_LENGTH) : null, + tag: tag, + distinct_id: distinctId, + created_at: getUTCString(createdAt), + browser, + os, + device, + screen, + language, + hostname, + }; + + if (kafka.enabled) { + await sendMessage('event', message); + } else { + await insert('website_event', [message]); + } + + if (eventData) { + await saveEventData({ + websiteId, + sessionId, + eventId, + urlPath: urlPath?.substring(0, URL_LENGTH), + eventName: eventName?.substring(0, EVENT_NAME_LENGTH), + eventData, + createdAt, + }); + } +} diff --git a/src/queries/sql/events/saveEventData.ts b/src/queries/sql/events/saveEventData.ts new file mode 100644 index 0000000..b8b0e02 --- /dev/null +++ b/src/queries/sql/events/saveEventData.ts @@ -0,0 +1,79 @@ +import clickhouse from '@/lib/clickhouse'; +import { DATA_TYPE } from '@/lib/constants'; +import { uuid } from '@/lib/crypto'; +import { flattenJSON, getStringValue } from '@/lib/data'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import kafka from '@/lib/kafka'; +import prisma from '@/lib/prisma'; +import type { DynamicData } from '@/lib/types'; + +export interface SaveEventDataArgs { + websiteId: string; + eventId: string; + sessionId?: string; + urlPath?: string; + eventName?: string; + eventData: DynamicData; + createdAt?: Date; +} + +export async function saveEventData(data: SaveEventDataArgs) { + return runQuery({ + [PRISMA]: () => relationalQuery(data), + [CLICKHOUSE]: () => clickhouseQuery(data), + }); +} + +async function relationalQuery(data: SaveEventDataArgs) { + const { websiteId, eventId, eventData, createdAt } = data; + + const jsonKeys = flattenJSON(eventData); + + // id, websiteEventId, eventStringValue + const flattenedData = jsonKeys.map(a => ({ + id: uuid(), + websiteEventId: eventId, + websiteId, + dataKey: a.key, + stringValue: getStringValue(a.value, a.dataType), + numberValue: a.dataType === DATA_TYPE.number ? a.value : null, + dateValue: a.dataType === DATA_TYPE.date ? new Date(a.value) : null, + dataType: a.dataType, + createdAt, + })); + + await prisma.client.eventData.createMany({ + data: flattenedData, + }); +} + +async function clickhouseQuery(data: SaveEventDataArgs) { + const { websiteId, sessionId, eventId, urlPath, eventName, eventData, createdAt } = data; + + const { insert, getUTCString } = clickhouse; + const { sendMessage } = kafka; + + const jsonKeys = flattenJSON(eventData); + + const messages = jsonKeys.map(({ key, value, dataType }) => { + return { + website_id: websiteId, + session_id: sessionId, + event_id: eventId, + url_path: urlPath, + event_name: eventName, + data_key: key, + data_type: dataType, + string_value: getStringValue(value, dataType), + number_value: dataType === DATA_TYPE.number ? value : null, + date_value: dataType === DATA_TYPE.date ? getUTCString(value) : null, + created_at: getUTCString(createdAt), + }; + }); + + if (kafka.enabled) { + await sendMessage('event_data', messages); + } else { + await insert('event_data', messages); + } +} diff --git a/src/queries/sql/events/saveRevenue.ts b/src/queries/sql/events/saveRevenue.ts new file mode 100644 index 0000000..a38df83 --- /dev/null +++ b/src/queries/sql/events/saveRevenue.ts @@ -0,0 +1,36 @@ +import { uuid } from '@/lib/crypto'; +import { PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; + +export interface SaveRevenueArgs { + websiteId: string; + sessionId: string; + eventId: string; + eventName: string; + currency: string; + revenue: number; + createdAt: Date; +} + +export async function saveRevenue(data: SaveRevenueArgs) { + return runQuery({ + [PRISMA]: () => relationalQuery(data), + }); +} + +async function relationalQuery(data: SaveRevenueArgs) { + const { websiteId, sessionId, eventId, eventName, currency, revenue, createdAt } = data; + + await prisma.client.revenue.create({ + data: { + id: uuid(), + websiteId, + sessionId, + eventId, + eventName, + currency, + revenue, + createdAt, + }, + }); +} diff --git a/src/queries/sql/getActiveVisitors.ts b/src/queries/sql/getActiveVisitors.ts new file mode 100644 index 0000000..d763c12 --- /dev/null +++ b/src/queries/sql/getActiveVisitors.ts @@ -0,0 +1,50 @@ +import { subMinutes } from 'date-fns'; +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; + +const FUNCTION_NAME = 'getActiveVisitors'; + +export async function getActiveVisitors(...args: [websiteId: string]) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string) { + const { rawQuery } = prisma; + const startDate = subMinutes(new Date(), 5); + + const result = await rawQuery( + ` + select count(distinct session_id) as "visitors" + from website_event + where website_id = {{websiteId::uuid}} + and created_at >= {{startDate}} + `, + { websiteId, startDate }, + FUNCTION_NAME, + ); + + return result?.[0] ?? null; +} + +async function clickhouseQuery(websiteId: string): Promise<{ x: number }> { + const { rawQuery } = clickhouse; + const startDate = subMinutes(new Date(), 5); + + const result = await rawQuery( + ` + select + count(distinct session_id) as "visitors" + from website_event + where website_id = {websiteId:UUID} + and created_at >= {startDate:DateTime64} + `, + { websiteId, startDate }, + FUNCTION_NAME, + ); + + return result[0] ?? null; +} diff --git a/src/queries/sql/getChannelExpandedMetrics.ts b/src/queries/sql/getChannelExpandedMetrics.ts new file mode 100644 index 0000000..33640d5 --- /dev/null +++ b/src/queries/sql/getChannelExpandedMetrics.ts @@ -0,0 +1,190 @@ +import clickhouse from '@/lib/clickhouse'; +import { + EMAIL_DOMAINS, + PAID_AD_PARAMS, + SEARCH_DOMAINS, + SHOPPING_DOMAINS, + SOCIAL_DOMAINS, + VIDEO_DOMAINS, +} from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getChannelExpandedMetrics'; + +export interface ChannelExpandedMetricsParameters { + limit?: number | string; + offset?: number | string; +} + +export interface ChannelExpandedMetricsData { + name: string; + pageviews: number; + visitors: number; + visits: number; + bounces: number; + totaltime: number; +} + +export async function getChannelExpandedMetrics( + ...args: [websiteId: string, filters?: QueryFilters] +): Promise<ChannelExpandedMetricsData[]> { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + filters: QueryFilters, +): Promise<ChannelExpandedMetricsData[]> { + const { rawQuery, parseFilters, getTimestampDiffSQL } = prisma; + const { queryParams, filterQuery, joinSessionQuery, cohortQuery, dateQuery } = parseFilters({ + ...filters, + websiteId, + }); + + return rawQuery( + ` + WITH prefix AS ( + select case when website_event.utm_medium LIKE 'p%' OR + website_event.utm_medium LIKE '%ppc%' OR + website_event.utm_medium LIKE '%retargeting%' OR + website_event.utm_medium LIKE '%paid%' then 'paid' else 'organic' end prefix, + website_event.referrer_domain, + website_event.url_query, + website_event.utm_medium, + website_event.utm_source, + website_event.session_id, + website_event.visit_id, + count(*) c, + min(website_event.created_at) min_time, + max(website_event.created_at) max_time + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.event_type != 2 + ${dateQuery} + ${filterQuery} + group by prefix, + website_event.referrer_domain, + website_event.url_query, + website_event.utm_medium, + website_event.utm_source, + website_event.session_id, + website_event.visit_id), + + channels as ( + select case + when referrer_domain = '' and url_query = '' then 'direct' + when ${toPostgresPositionClause('url_query', PAID_AD_PARAMS)} then 'paidAds' + when ${toPostgresPositionClause('utm_medium', ['referral', 'app', 'link'])} then 'referral' + when utm_medium ilike '%affiliate%' then 'affiliate' + when utm_medium ilike '%sms%' or utm_source ilike '%sms%' then 'sms' + when ${toPostgresPositionClause('referrer_domain', SEARCH_DOMAINS)} or utm_medium ilike '%organic%' then concat(prefix, 'Search') + when ${toPostgresPositionClause('referrer_domain', SOCIAL_DOMAINS)} then concat(prefix, 'Social') + when ${toPostgresPositionClause('referrer_domain', EMAIL_DOMAINS)} or utm_medium ilike '%mail%' then 'email' + when ${toPostgresPositionClause('referrer_domain', SHOPPING_DOMAINS)} or utm_medium ilike '%shop%' then concat(prefix, 'Shopping') + when ${toPostgresPositionClause('referrer_domain', VIDEO_DOMAINS)} or utm_medium ilike '%video%' then concat(prefix, 'Video') + else '' end AS name, + session_id, + visit_id, + c, + min_time, + max_time + from prefix) + + select + name, + sum(c) as "pageviews", + count(distinct session_id) as "visitors", + count(distinct visit_id) as "visits", + sum(case when c = 1 then 1 else 0 end) as "bounces", + sum(${getTimestampDiffSQL('min_time', 'max_time')}) as "totaltime" + from channels + where name != '' + group by name + order by visitors desc, visits desc + `, + queryParams, + FUNCTION_NAME, + ).then(results => results.map(item => ({ ...item, y: Number(item.y) }))); +} + +async function clickhouseQuery( + websiteId: string, + filters: QueryFilters, +): Promise<ChannelExpandedMetricsData[]> { + const { rawQuery, parseFilters } = clickhouse; + const { queryParams, filterQuery, cohortQuery } = parseFilters({ + ...filters, + websiteId, + }); + + return rawQuery( + ` + select + name, + sum(t.c) as "pageviews", + uniq(t.session_id) as "visitors", + uniq(t.visit_id) as "visits", + sum(if(t.c = 1, 1, 0)) as "bounces", + sum(max_time-min_time) as "totaltime" + from ( + select case when multiSearchAny(utm_medium, ['cp', 'ppc', 'retargeting', 'paid']) != 0 then 'paid' else 'organic' end prefix, + case + when referrer_domain = '' and url_query = '' then 'direct' + when multiSearchAny(url_query, [${toClickHouseStringArray( + PAID_AD_PARAMS, + )}]) != 0 then 'paidAds' + when multiSearchAny(utm_medium, ['referral', 'app','link']) != 0 then 'referral' + when position(utm_medium, 'affiliate') > 0 then 'affiliate' + when position(utm_medium, 'sms') > 0 or position(utm_source, 'sms') > 0 then 'sms' + when multiSearchAny(referrer_domain, [${toClickHouseStringArray( + SEARCH_DOMAINS, + )}]) != 0 or position(utm_medium, 'organic') > 0 then concat(prefix, 'Search') + when multiSearchAny(referrer_domain, [${toClickHouseStringArray( + SOCIAL_DOMAINS, + )}]) != 0 then concat(prefix, 'Social') + when multiSearchAny(referrer_domain, [${toClickHouseStringArray( + EMAIL_DOMAINS, + )}]) != 0 or position(utm_medium, 'mail') > 0 then 'email' + when multiSearchAny(referrer_domain, [${toClickHouseStringArray( + SHOPPING_DOMAINS, + )}]) != 0 or position(utm_medium, 'shop') > 0 then concat(prefix, 'Shopping') + when multiSearchAny(referrer_domain, [${toClickHouseStringArray( + VIDEO_DOMAINS, + )}]) != 0 or position(utm_medium, 'video') > 0 then concat(prefix, 'Video') + else '' end AS name, + session_id, + visit_id, + count(*) c, + min(created_at) min_time, + max(created_at) max_time + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + and name != '' + ${filterQuery} + group by prefix, name, session_id, visit_id + ) as t + group by name + order by visitors desc, visits desc; + `, + queryParams, + FUNCTION_NAME, + ); +} + +function toClickHouseStringArray(arr: string[]): string { + return arr.map(p => `'${p.replace(/'/g, "\\'")}'`).join(', '); +} + +function toPostgresPositionClause(column: string, arr: string[]) { + return arr.map(val => `${column} ilike '%${val.replace(/'/g, "''")}%'`).join(' OR\n '); +} diff --git a/src/queries/sql/getChannelMetrics.ts b/src/queries/sql/getChannelMetrics.ts new file mode 100644 index 0000000..78e4142 --- /dev/null +++ b/src/queries/sql/getChannelMetrics.ts @@ -0,0 +1,142 @@ +import clickhouse from '@/lib/clickhouse'; +import { + EMAIL_DOMAINS, + PAID_AD_PARAMS, + SEARCH_DOMAINS, + SHOPPING_DOMAINS, + SOCIAL_DOMAINS, + VIDEO_DOMAINS, +} from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getChannelMetrics'; + +export async function getChannelMetrics(...args: [websiteId: string, filters?: QueryFilters]) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string, filters: QueryFilters) { + const { rawQuery, parseFilters } = prisma; + const { queryParams, filterQuery, joinSessionQuery, cohortQuery, dateQuery } = parseFilters({ + ...filters, + websiteId, + }); + + return rawQuery( + ` + WITH prefix AS ( + select case when website_event.utm_medium LIKE 'p%' OR + website_event.utm_medium LIKE '%ppc%' OR + website_event.utm_medium LIKE '%retargeting%' OR + website_event.utm_medium LIKE '%paid%' then 'paid' else 'organic' end prefix, + website_event.referrer_domain, + website_event.url_query, + website_event.utm_medium, + website_event.utm_source, + website_event.session_id + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.event_type != 2 + ${dateQuery} + ${filterQuery}), + + channels as ( + select case + when referrer_domain = '' and url_query = '' then 'direct' + when ${toPostgresLikeClause('url_query', PAID_AD_PARAMS)} then 'paidAds' + when ${toPostgresLikeClause('utm_medium', ['referral', 'app', 'link'])} then 'referral' + when utm_medium ilike '%affiliate%' then 'affiliate' + when utm_medium ilike '%sms%' or utm_source ilike '%sms%' then 'sms' + when ${toPostgresLikeClause('referrer_domain', SEARCH_DOMAINS)} or utm_medium ilike '%organic%' then concat(prefix, 'Search') + when ${toPostgresLikeClause('referrer_domain', SOCIAL_DOMAINS)} then concat(prefix, 'Social') + when ${toPostgresLikeClause('referrer_domain', EMAIL_DOMAINS)} or utm_medium ilike '%mail%' then 'email' + when ${toPostgresLikeClause('referrer_domain', SHOPPING_DOMAINS)} or utm_medium ilike '%shop%' then concat(prefix, 'Shopping') + when ${toPostgresLikeClause('referrer_domain', VIDEO_DOMAINS)} or utm_medium ilike '%video%' then concat(prefix, 'Video') + else '' end AS x, + count(distinct session_id) y + from prefix + group by 1 + order by y desc) + + select x, sum(y) y + from channels + where x != '' + group by x + order by y desc; + `, + queryParams, + FUNCTION_NAME, + ).then(results => results.map(item => ({ ...item, y: Number(item.y) }))); +} + +async function clickhouseQuery( + websiteId: string, + filters: QueryFilters, +): Promise<{ x: string; y: number }[]> { + const { rawQuery, parseFilters } = clickhouse; + const { queryParams, filterQuery, cohortQuery, dateQuery } = parseFilters({ + ...filters, + websiteId, + }); + + const sql = ` + WITH channels as ( + select case when multiSearchAny(utm_medium, ['cp', 'ppc', 'retargeting', 'paid']) != 0 then 'paid' else 'organic' end prefix, + case + when referrer_domain = '' and url_query = '' then 'direct' + when multiSearchAny(url_query, [${toClickHouseStringArray( + PAID_AD_PARAMS, + )}]) != 0 then 'paidAds' + when multiSearchAny(utm_medium, ['referral', 'app','link']) != 0 then 'referral' + when position(utm_medium, 'affiliate') > 0 then 'affiliate' + when position(utm_medium, 'sms') > 0 or position(utm_source, 'sms') > 0 then 'sms' + when multiSearchAny(referrer_domain, [${toClickHouseStringArray( + SEARCH_DOMAINS, + )}]) != 0 or position(utm_medium, 'organic') > 0 then concat(prefix, 'Search') + when multiSearchAny(referrer_domain, [${toClickHouseStringArray( + SOCIAL_DOMAINS, + )}]) != 0 then concat(prefix, 'Social') + when multiSearchAny(referrer_domain, [${toClickHouseStringArray( + EMAIL_DOMAINS, + )}]) != 0 or position(utm_medium, 'mail') > 0 then 'email' + when multiSearchAny(referrer_domain, [${toClickHouseStringArray( + SHOPPING_DOMAINS, + )}]) != 0 or position(utm_medium, 'shop') > 0 then concat(prefix, 'Shopping') + when multiSearchAny(referrer_domain, [${toClickHouseStringArray( + VIDEO_DOMAINS, + )}]) != 0 or position(utm_medium, 'video') > 0 then concat(prefix, 'Video') + else '' end AS x, + count(distinct session_id) y + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and event_type != 2 + ${dateQuery} + ${filterQuery} + group by 1, 2 + order by y desc) + + select x, sum(y) y + from channels + where x != '' + group by x + order by y desc; + `; + + return rawQuery(sql, queryParams, FUNCTION_NAME); +} + +function toClickHouseStringArray(arr: string[]): string { + return arr.map(p => `'${p.replace(/'/g, "\\'")}'`).join(', '); +} + +function toPostgresLikeClause(column: string, arr: string[]) { + return arr.map(val => `${column} ilike '%${val.replace(/'/g, "''")}%'`).join(' OR\n '); +} diff --git a/src/queries/sql/getRealtimeActivity.ts b/src/queries/sql/getRealtimeActivity.ts new file mode 100644 index 0000000..075b65e --- /dev/null +++ b/src/queries/sql/getRealtimeActivity.ts @@ -0,0 +1,80 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getRealtimeActivity'; + +export async function getRealtimeActivity(...args: [websiteId: string, filters: QueryFilters]) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string, filters: QueryFilters) { + const { rawQuery, parseFilters } = prisma; + const { queryParams, filterQuery, cohortQuery, dateQuery } = parseFilters({ + ...filters, + websiteId, + }); + + return rawQuery( + ` + select + website_event.session_id as "sessionId", + website_event.event_name as "eventName", + website_event.created_at as "createdAt", + session.browser, + session.os, + session.device, + session.country, + website_event.url_path as "urlPath", + website_event.referrer_domain as "referrerDomain" + from website_event + ${cohortQuery} + inner join session + on session.session_id = website_event.session_id + and session.website_id = website_event.website_id + where website_event.website_id = {{websiteId::uuid}} + ${filterQuery} + ${dateQuery} + order by website_event.created_at desc + limit 100 + `, + queryParams, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery(websiteId: string, filters: QueryFilters): Promise<{ x: number }> { + const { rawQuery, parseFilters } = clickhouse; + const { queryParams, filterQuery, cohortQuery, dateQuery } = parseFilters({ + ...filters, + websiteId, + }); + + return rawQuery( + ` + select + session_id as sessionId, + event_name as eventName, + created_at as createdAt, + browser, + os, + device, + country, + url_path as urlPath, + referrer_domain as referrerDomain + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + ${filterQuery} + ${dateQuery} + order by createdAt desc + limit 100 + `, + queryParams, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/getRealtimeData.ts b/src/queries/sql/getRealtimeData.ts new file mode 100644 index 0000000..4b97cb0 --- /dev/null +++ b/src/queries/sql/getRealtimeData.ts @@ -0,0 +1,78 @@ +import type { QueryFilters } from '@/lib/types'; +import { getRealtimeActivity } from '@/queries/sql/getRealtimeActivity'; +import { getPageviewStats } from '@/queries/sql/pageviews/getPageviewStats'; +import { getSessionStats } from '@/queries/sql/sessions/getSessionStats'; + +function increment(data: object, key: string) { + if (key) { + if (!data[key]) { + data[key] = 1; + } else { + data[key] += 1; + } + } +} + +export async function getRealtimeData(websiteId: string, filters: QueryFilters) { + const [activity, pageviews, sessions] = await Promise.all([ + getRealtimeActivity(websiteId, filters), + getPageviewStats(websiteId, filters), + getSessionStats(websiteId, filters), + ]); + + const uniques = new Set(); + + const { countries, urls, referrers, events } = activity.reverse().reduce( + ( + obj: { countries: any; urls: any; referrers: any; events: any }, + event: { + sessionId: string; + urlPath: string; + referrerDomain: string; + country: string; + eventName: string; + }, + ) => { + const { countries, urls, referrers, events } = obj; + const { sessionId, urlPath, referrerDomain, country, eventName } = event; + + if (!uniques.has(sessionId)) { + uniques.add(sessionId); + increment(countries, country); + + events.push({ __type: 'session', ...event }); + } + + increment(urls, urlPath); + increment(referrers, referrerDomain); + + events.push({ __type: eventName ? 'event' : 'pageview', ...event }); + + return obj; + }, + { + countries: {}, + urls: {}, + referrers: {}, + events: [], + }, + ); + + return { + countries, + urls, + referrers, + events: events.reverse(), + series: { + views: pageviews, + visitors: sessions, + }, + totals: { + views: pageviews.reduce((sum: number, { y }: { y: number }) => Number(sum) + Number(y), 0), + visitors: sessions.reduce((sum: number, { y }: { y: number }) => Number(sum) + Number(y), 0), + events: activity.filter(e => e.eventName).length, + countries: Object.keys(countries).length, + }, + timestamp: Date.now(), + }; +} diff --git a/src/queries/sql/getValues.ts b/src/queries/sql/getValues.ts new file mode 100644 index 0000000..cc6bb7d --- /dev/null +++ b/src/queries/sql/getValues.ts @@ -0,0 +1,129 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getValues'; + +export async function getValues( + ...args: [websiteId: string, column: string, filters: QueryFilters] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string, column: string, filters: QueryFilters) { + const { rawQuery, getSearchSQL } = prisma; + const params = {}; + const { startDate, endDate, search } = filters; + + let searchQuery = ''; + let excludeDomain = ''; + + if (column === 'referrer_domain') { + excludeDomain = `and website_event.referrer_domain != website_event.hostname + and website_event.referrer_domain != ''`; + } + + if (search) { + if (decodeURIComponent(search).includes(',')) { + searchQuery = `AND (${decodeURIComponent(search) + .split(',') + .slice(0, 5) + .map((value: string, index: number) => { + const key = `search${index}`; + + params[key] = value; + + return getSearchSQL(column, key).replace('and ', ''); + }) + .join(' OR ')})`; + } else { + searchQuery = getSearchSQL(column); + } + } + + return rawQuery( + ` + select ${column} as "value", count(*) as "count" + from website_event + inner join session + on session.session_id = website_event.session_id + and session.website_id = website_event.website_id + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + ${searchQuery} + ${excludeDomain} + group by 1 + order by 2 desc + limit 10 + `, + { + websiteId, + startDate, + endDate, + search: `%${search}%`, + ...params, + }, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery(websiteId: string, column: string, filters: QueryFilters) { + const { rawQuery, getSearchSQL } = clickhouse; + const params = {}; + const { startDate, endDate, search } = filters; + + let searchQuery = ''; + let excludeDomain = ''; + + if (column === 'referrer_domain') { + excludeDomain = `and referrer_domain != hostname and referrer_domain != ''`; + } + + if (search) { + searchQuery = `and positionCaseInsensitive(${column}, {search:String}) > 0`; + } + + if (search) { + if (decodeURIComponent(search).includes(',')) { + searchQuery = `AND (${decodeURIComponent(search) + .split(',') + .slice(0, 5) + .map((value: string, index: number) => { + const key = `search${index}`; + + params[key] = value; + + return getSearchSQL(column, key).replace('and ', ''); + }) + .join(' OR ')})`; + } else { + searchQuery = getSearchSQL(column); + } + } + + return rawQuery( + ` + select ${column} as "value", count(*) as "count" + from website_event + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${searchQuery} + ${excludeDomain} + group by 1 + order by 2 desc + limit 10 + `, + { + websiteId, + startDate, + endDate, + search, + ...params, + }, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/getWebsiteDateRange.ts b/src/queries/sql/getWebsiteDateRange.ts new file mode 100644 index 0000000..d6333ad --- /dev/null +++ b/src/queries/sql/getWebsiteDateRange.ts @@ -0,0 +1,55 @@ +import clickhouse from '@/lib/clickhouse'; +import { DEFAULT_RESET_DATE } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; + +export async function getWebsiteDateRange(...args: [websiteId: string]) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string) { + const { rawQuery, parseFilters } = prisma; + const { queryParams } = parseFilters({ + startDate: new Date(DEFAULT_RESET_DATE), + websiteId, + }); + + const result = await rawQuery( + ` + select + min(created_at) as "startDate", + max(created_at) as "endDate" + from website_event + where website_id = {{websiteId::uuid}} + and created_at >= {{startDate}} + `, + queryParams, + ); + + return result[0] ?? null; +} + +async function clickhouseQuery(websiteId: string) { + const { rawQuery, parseFilters } = clickhouse; + const { queryParams } = parseFilters({ + startDate: new Date(DEFAULT_RESET_DATE), + websiteId, + }); + + const result = await rawQuery( + ` + select + min(created_at) as startDate, + max(created_at) as endDate + from website_event_stats_hourly + where website_id = {websiteId:UUID} + and created_at >= {startDate:DateTime64} + `, + queryParams, + ); + + return result[0] ?? null; +} diff --git a/src/queries/sql/getWebsiteStats.ts b/src/queries/sql/getWebsiteStats.ts new file mode 100644 index 0000000..6906839 --- /dev/null +++ b/src/queries/sql/getWebsiteStats.ts @@ -0,0 +1,128 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_COLUMNS } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getWebsiteStats'; + +export interface WebsiteStatsData { + pageviews: number; + visitors: number; + visits: number; + bounces: number; + totaltime: number; +} + +export async function getWebsiteStats( + ...args: [websiteId: string, filters: QueryFilters] +): Promise<WebsiteStatsData[]> { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + filters: QueryFilters, +): Promise<WebsiteStatsData[]> { + const { getTimestampDiffSQL, parseFilters, rawQuery } = prisma; + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + return rawQuery( + ` + select + cast(coalesce(sum(t.c), 0) as bigint) as "pageviews", + count(distinct t.session_id) as "visitors", + count(distinct t.visit_id) as "visits", + coalesce(sum(case when t.c = 1 then 1 else 0 end), 0) as "bounces", + cast(coalesce(sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}), 0) as bigint) as "totaltime" + from ( + select + website_event.session_id, + website_event.visit_id, + count(*) as "c", + min(website_event.created_at) as "min_time", + max(website_event.created_at) as "max_time" + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and website_event.event_type != 2 + ${filterQuery} + group by 1, 2 + ) as t + `, + queryParams, + FUNCTION_NAME, + ).then(result => result?.[0]); +} + +async function clickhouseQuery( + websiteId: string, + filters: QueryFilters, +): Promise<WebsiteStatsData[]> { + const { rawQuery, parseFilters } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + let sql = ''; + + if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item))) { + sql = ` + select + sum(t.c) as "pageviews", + uniq(t.session_id) as "visitors", + uniq(t.visit_id) as "visits", + sum(if(t.c = 1, 1, 0)) as "bounces", + sum(max_time-min_time) as "totaltime" + from ( + select + session_id, + visit_id, + count(*) c, + min(created_at) min_time, + max(created_at) max_time + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + ${filterQuery} + group by session_id, visit_id + ) as t; + `; + } else { + sql = ` + select + sum(t.c) as "pageviews", + uniq(session_id) as "visitors", + uniq(visit_id) as "visits", + sumIf(1, t.c = 1) as "bounces", + sum(max_time-min_time) as "totaltime" + from (select + session_id, + visit_id, + sum(views) c, + min(min_time) min_time, + max(max_time) max_time + from website_event_stats_hourly "website_event" + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + ${filterQuery} + group by session_id, visit_id + ) as t; + `; + } + + return rawQuery(sql, queryParams, FUNCTION_NAME).then(result => result?.[0]); +} diff --git a/src/queries/sql/getWeeklyTraffic.ts b/src/queries/sql/getWeeklyTraffic.ts new file mode 100644 index 0000000..7bbe78a --- /dev/null +++ b/src/queries/sql/getWeeklyTraffic.ts @@ -0,0 +1,97 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_COLUMNS } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getWeeklyTraffic'; + +export async function getWeeklyTraffic(...args: [websiteId: string, filters: QueryFilters]) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string, filters: QueryFilters) { + const timezone = 'utc'; + const { rawQuery, getDateWeeklySQL, parseFilters } = prisma; + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + return rawQuery( + ` + select + ${getDateWeeklySQL('website_event.created_at', timezone)} as time, + count(distinct website_event.session_id) as value + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + ${filterQuery} + group by time + order by 2 + `, + queryParams, + FUNCTION_NAME, + ).then(formatResults); +} + +async function clickhouseQuery(websiteId: string, filters: QueryFilters) { + const { timezone = 'utc' } = filters; + const { rawQuery, parseFilters } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = await parseFilters({ ...filters, websiteId }); + + let sql = ''; + + if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item))) { + sql = ` + select + formatDateTime(toDateTime(created_at, '${timezone}'), '%w:%H') as time, + count(distinct session_id) as value + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${filterQuery} + group by time + order by time + `; + } else { + sql = ` + select + formatDateTime(toDateTime(created_at, '${timezone}'), '%w:%H') as time, + count(distinct session_id) as value + from website_event_stats_hourly website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${filterQuery} + group by time + order by time + `; + } + + return rawQuery(sql, queryParams, FUNCTION_NAME).then(formatResults); +} + +function formatResults(data: any) { + const days = []; + + for (let i = 0; i < 7; i++) { + days.push([]); + + for (let j = 0; j < 24; j++) { + days[i].push( + Number( + data.find(({ time }) => time === `${i}:${j.toString().padStart(2, '0')}`)?.value || 0, + ), + ); + } + } + + return days; +} diff --git a/src/queries/sql/index.ts b/src/queries/sql/index.ts new file mode 100644 index 0000000..1573bde --- /dev/null +++ b/src/queries/sql/index.ts @@ -0,0 +1,41 @@ +export * from './events/getEventDataEvents'; +export * from './events/getEventDataFields'; +export * from './events/getEventDataProperties'; +export * from './events/getEventDataStats'; +export * from './events/getEventDataUsage'; +export * from './events/getEventDataValues'; +export * from './events/getEventExpandedMetrics'; +export * from './events/getEventMetrics'; +export * from './events/getEventStats'; +export * from './events/getEventUsage'; +export * from './events/getWebsiteEvents'; +export * from './events/saveEvent'; +export * from './getActiveVisitors'; +export * from './getChannelExpandedMetrics'; +export * from './getChannelMetrics'; +export * from './getRealtimeActivity'; +export * from './getRealtimeData'; +export * from './getValues'; +export * from './getWebsiteDateRange'; +export * from './getWebsiteStats'; +export * from './getWeeklyTraffic'; +export * from './pageviews/getPageviewExpandedMetrics'; +export * from './pageviews/getPageviewMetrics'; +export * from './pageviews/getPageviewStats'; +export * from './reports/getBreakdown'; +export * from './reports/getFunnel'; +export * from './reports/getJourney'; +export * from './reports/getRetention'; +export * from './reports/getUTM'; +export * from './sessions/createSession'; +export * from './sessions/getSessionActivity'; +export * from './sessions/getSessionData'; +export * from './sessions/getSessionDataProperties'; +export * from './sessions/getSessionDataValues'; +export * from './sessions/getSessionExpandedMetrics'; +export * from './sessions/getSessionMetrics'; +export * from './sessions/getSessionStats'; +export * from './sessions/getWebsiteSession'; +export * from './sessions/getWebsiteSessionStats'; +export * from './sessions/getWebsiteSessions'; +export * from './sessions/saveSessionData'; diff --git a/src/queries/sql/pageviews/getPageviewExpandedMetrics.ts b/src/queries/sql/pageviews/getPageviewExpandedMetrics.ts new file mode 100644 index 0000000..986d7d5 --- /dev/null +++ b/src/queries/sql/pageviews/getPageviewExpandedMetrics.ts @@ -0,0 +1,227 @@ +import clickhouse from '@/lib/clickhouse'; +import { FILTER_COLUMNS, GROUPED_DOMAINS, SESSION_COLUMNS } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getPageviewExpandedMetrics'; + +export interface PageviewExpandedMetricsParameters { + type: string; + limit?: number | string; + offset?: number | string; +} + +export interface PageviewExpandedMetricsData { + name: string; + pageviews: number; + visitors: number; + visits: number; + bounces: number; + totaltime: number; +} + +export async function getPageviewExpandedMetrics( + ...args: [websiteId: string, parameters: PageviewExpandedMetricsParameters, filters: QueryFilters] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + parameters: PageviewExpandedMetricsParameters, + filters: QueryFilters, +): Promise<PageviewExpandedMetricsData[]> { + const { type, limit = 500, offset = 0 } = parameters; + let column = FILTER_COLUMNS[type] || type; + const { rawQuery, parseFilters, getTimestampDiffSQL } = prisma; + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters( + { + ...filters, + websiteId, + }, + { joinSession: SESSION_COLUMNS.includes(type) }, + ); + + let entryExitQuery = ''; + let excludeDomain = ''; + + if (column === 'referrer_domain') { + excludeDomain = `and website_event.referrer_domain != website_event.hostname + and website_event.referrer_domain != ''`; + if (type === 'domain') { + column = toPostgresGroupedReferrer(GROUPED_DOMAINS); + } + } + + if (type === 'entry' || type === 'exit') { + const aggregrate = type === 'entry' ? 'min' : 'max'; + + entryExitQuery = ` + join ( + select visit_id, + ${aggregrate}(created_at) target_created_at + from website_event + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and website_event.event_type != 2 + group by visit_id + ) x + on x.visit_id = website_event.visit_id + and x.target_created_at = website_event.created_at + `; + } + + return rawQuery( + ` + select + name, + sum(t.c) as "pageviews", + count(distinct t.session_id) as "visitors", + count(distinct t.visit_id) as "visits", + sum(case when t.c = 1 then 1 else 0 end) as "bounces", + sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}) as "totaltime" + from ( + select + ${column} as name, + website_event.session_id, + website_event.visit_id, + count(*) as "c", + min(website_event.created_at) as "min_time", + max(website_event.created_at) as "max_time" + from website_event + ${cohortQuery} + ${joinSessionQuery} + ${entryExitQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and website_event.event_type != 2 + ${excludeDomain} + ${filterQuery} + group by ${column}, website_event.session_id, website_event.visit_id + ) as t + where name != '' + group by name + order by visitors desc, visits desc + limit ${limit} + offset ${offset} + `, + queryParams, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + parameters: PageviewExpandedMetricsParameters, + filters: QueryFilters, +): Promise<{ x: string; y: number }[]> { + const { type, limit = 500, offset = 0 } = parameters; + let column = FILTER_COLUMNS[type] || type; + const { rawQuery, parseFilters } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + let excludeDomain = ''; + let entryExitQuery = ''; + + if (column === 'referrer_domain') { + excludeDomain = `and referrer_domain != hostname and referrer_domain != ''`; + if (type === 'domain') { + column = toClickHouseGroupedReferrer(GROUPED_DOMAINS); + } + } + + if (type === 'entry' || type === 'exit') { + const aggregrate = type === 'entry' ? 'argMin' : 'argMax'; + column = `x.${column}`; + + entryExitQuery = ` + JOIN (select visit_id, + ${aggregrate}(url_path, created_at) url_path + from website_event + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + group by visit_id) x + ON x.visit_id = website_event.visit_id`; + } + + return rawQuery( + ` + select + name, + sum(t.c) as "pageviews", + uniq(t.session_id) as "visitors", + uniq(t.visit_id) as "visits", + sum(if(t.c = 1, 1, 0)) as "bounces", + sum(max_time-min_time) as "totaltime" + from ( + select + ${column} name, + session_id, + visit_id, + count(*) c, + min(created_at) min_time, + max(created_at) max_time + from website_event + ${cohortQuery} + ${entryExitQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + and name != '' + ${excludeDomain} + ${filterQuery} + group by name, session_id, visit_id + ) as t + group by name + order by visitors desc, visits desc + limit ${limit} + offset ${offset} + `, + { ...queryParams, ...parameters }, + FUNCTION_NAME, + ); +} + +export function toClickHouseGroupedReferrer( + domains: any[], + column: string = 'referrer_domain', +): string { + return [ + 'CASE', + ...domains.map(group => { + const matches = Array.isArray(group.match) ? group.match : [group.match]; + const formattedArray = matches.map(m => `'${m}'`).join(', '); + return ` WHEN multiSearchAny(${column}, [${formattedArray}]) != 0 THEN '${group.domain}'`; + }), + " ELSE 'Other'", + 'END', + ].join('\n'); +} + +export function toPostgresGroupedReferrer( + domains: any[], + column: string = 'referrer_domain', +): string { + return [ + 'CASE', + ...domains.map(group => { + const matches = Array.isArray(group.match) ? group.match : [group.match]; + + return `WHEN ${toPostgresLikeClause(column, matches)} THEN '${group.domain}'`; + }), + " ELSE 'Other'", + 'END', + ].join('\n'); +} + +function toPostgresLikeClause(column: string, arr: string[]) { + return arr.map(val => `${column} ilike '%${val.replace(/'/g, "''")}%'`).join(' OR\n '); +} diff --git a/src/queries/sql/pageviews/getPageviewMetrics.ts b/src/queries/sql/pageviews/getPageviewMetrics.ts new file mode 100644 index 0000000..9d4f627 --- /dev/null +++ b/src/queries/sql/pageviews/getPageviewMetrics.ts @@ -0,0 +1,191 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_COLUMNS, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getPageviewMetrics'; + +export interface PageviewMetricsParameters { + type: string; + limit?: number | string; + offset?: number | string; +} + +export interface PageviewMetricsData { + x: string; + y: number; +} + +export async function getPageviewMetrics( + ...args: [websiteId: string, parameters: PageviewMetricsParameters, filters: QueryFilters] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + parameters: PageviewMetricsParameters, + filters: QueryFilters, +): Promise<PageviewMetricsData[]> { + const { type, limit = 500, offset = 0 } = parameters; + let column = FILTER_COLUMNS[type] || type; + const { rawQuery, parseFilters } = prisma; + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters( + { + ...filters, + websiteId, + }, + { joinSession: SESSION_COLUMNS.includes(type) }, + ); + + let entryExitQuery = ''; + let excludeDomain = ''; + + if (column === 'referrer_domain') { + excludeDomain = `and website_event.referrer_domain != website_event.hostname + and website_event.referrer_domain != ''`; + } + + if (type === 'entry' || type === 'exit') { + const order = type === 'entry' ? 'asc' : 'desc'; + column = `x.${column}`; + + entryExitQuery = ` + join ( + select distinct on (visit_id) + visit_id, + url_path + from website_event + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and website_event.event_type != 2 + order by visit_id, created_at ${order} + ) x + on x.visit_id = website_event.visit_id + `; + } + + return rawQuery( + ` + select ${column} x, + count(distinct website_event.session_id) as y + from website_event + ${cohortQuery} + ${joinSessionQuery} + ${entryExitQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and website_event.event_type != 2 + ${excludeDomain} + ${filterQuery} + group by 1 + order by 2 desc + limit ${limit} + offset ${offset} + `, + { ...queryParams, ...parameters }, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + parameters: PageviewMetricsParameters, + filters: QueryFilters, +): Promise<{ x: string; y: number }[]> { + const { type, limit = 500, offset = 0 } = parameters; + let column = FILTER_COLUMNS[type] || type; + const { rawQuery, parseFilters } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + let sql = ''; + let excludeDomain = ''; + + if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item))) { + let entryExitQuery = ''; + + if (column === 'referrer_domain') { + excludeDomain = `and referrer_domain != hostname and referrer_domain != ''`; + } + + if (type === 'entry' || type === 'exit') { + const aggregrate = type === 'entry' ? 'argMin' : 'argMax'; + column = `x.${column}`; + + entryExitQuery = ` + JOIN (select visit_id, + ${aggregrate}(url_path, created_at) url_path + from website_event + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + group by visit_id) x + ON x.visit_id = website_event.visit_id`; + } + + sql = ` + select ${column} x, + uniq(website_event.session_id) as y + from website_event + ${cohortQuery} + ${entryExitQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + ${excludeDomain} + ${filterQuery} + group by x + order by y desc + limit ${limit} + offset ${offset} + `; + } else { + let groupByQuery = ''; + let columnQuery = `arrayJoin(${column})`; + + if (column === 'referrer_domain') { + excludeDomain = `and t != ''`; + } + + if (type === 'entry') { + columnQuery = `argMinMerge(entry_url)`; + } + + if (type === 'exit') { + columnQuery = `argMaxMerge(exit_url)`; + } + + if (type === 'entry' || type === 'exit') { + groupByQuery = 'group by s'; + } + + sql = ` + select g.t as x, + uniq(s) as y + from ( + select session_id s, + ${columnQuery} as t + from website_event_stats_hourly as website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + ${excludeDomain} + ${filterQuery} + ${groupByQuery}) as g + group by x + order by y desc + limit ${limit} + offset ${offset} + `; + } + + return rawQuery(sql, { ...queryParams, ...parameters }, FUNCTION_NAME); +} diff --git a/src/queries/sql/pageviews/getPageviewStats.ts b/src/queries/sql/pageviews/getPageviewStats.ts new file mode 100644 index 0000000..251d5b1 --- /dev/null +++ b/src/queries/sql/pageviews/getPageviewStats.ts @@ -0,0 +1,98 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_COLUMNS } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getPageviewStats'; + +export async function getPageviewStats(...args: [websiteId: string, filters: QueryFilters]) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string, filters: QueryFilters) { + const { timezone = 'utc', unit = 'day' } = filters; + const { getDateSQL, parseFilters, rawQuery } = prisma; + const { filterQuery, cohortQuery, joinSessionQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + return rawQuery( + ` + select + ${getDateSQL('website_event.created_at', unit, timezone)} x, + count(*) y + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and website_event.event_type != 2 + ${filterQuery} + group by 1 + order by 1 + `, + queryParams, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + filters: QueryFilters, +): Promise<{ x: string; y: number }[]> { + const { timezone = 'UTC', unit = 'day' } = filters; + const { parseFilters, rawQuery, getDateSQL } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + let sql = ''; + + if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item)) || unit === 'minute') { + sql = ` + select + g.t as x, + g.y as y + from ( + select + ${getDateSQL('website_event.created_at', unit, timezone)} as t, + count(*) as y + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + ${filterQuery} + group by t + ) as g + order by t + `; + } else { + sql = ` + select + g.t as x, + g.y as y + from ( + select + ${getDateSQL('website_event.created_at', unit, timezone)} as t, + sum(views) as y + from website_event_stats_hourly as website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + ${filterQuery} + group by t + ) as g + order by t + `; + } + + return rawQuery(sql, queryParams, FUNCTION_NAME); +} diff --git a/src/queries/sql/reports/getAttribution.ts b/src/queries/sql/reports/getAttribution.ts new file mode 100644 index 0000000..1d04078 --- /dev/null +++ b/src/queries/sql/reports/getAttribution.ts @@ -0,0 +1,514 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_TYPE } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +export interface AttributionParameters { + startDate: Date; + endDate: Date; + model: string; + type: string; + step: string; + currency?: string; +} + +export interface AttributionResult { + referrer: { name: string; value: number }[]; + paidAds: { name: string; value: number }[]; + utm_source: { name: string; value: number }[]; + utm_medium: { name: string; value: number }[]; + utm_campaign: { name: string; value: number }[]; + utm_content: { name: string; value: number }[]; + utm_term: { name: string; value: number }[]; + total: { pageviews: number; visitors: number; visits: number }; +} + +export async function getAttribution( + ...args: [websiteId: string, parameters: AttributionParameters, filters: QueryFilters] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + parameters: AttributionParameters, + filters: QueryFilters, +): Promise<AttributionResult> { + const { model, type, currency } = parameters; + const { rawQuery, parseFilters } = prisma; + const eventType = type === 'path' ? EVENT_TYPE.pageView : EVENT_TYPE.customEvent; + const column = type === 'path' ? 'url_path' : 'event_name'; + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + ...parameters, + websiteId, + eventType, + }); + + function getUTMQuery(utmColumn: string) { + return ` + select + coalesce(we.${utmColumn}, '') name, + ${currency ? 'sum(e.value)' : 'count(distinct we.session_id)'} value + from model m + join website_event we + on we.created_at = m.created_at + and we.session_id = m.session_id + ${currency ? 'join events e on e.session_id = m.session_id' : ''} + where we.website_id = {{websiteId::uuid}} + and we.created_at between {{startDate}} and {{endDate}} + ${currency ? '' : `and we.${utmColumn} != ''`} + group by 1 + order by 2 desc + limit 20`; + } + + const eventQuery = `WITH events AS ( + select distinct + website_event.session_id, + max(website_event.created_at) max_dt + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and website_event.${column} = {{step}} + ${filterQuery} + group by 1),`; + + const revenueEventQuery = `WITH events AS ( + select + revenue.session_id, + max(revenue.created_at) max_dt, + sum(revenue.revenue) value + from revenue + join website_event + on website_event.website_id = revenue.website_id + and website_event.session_id = revenue.session_id + and website_event.event_id = revenue.event_id + and website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + ${cohortQuery} + ${joinSessionQuery} + where revenue.website_id = {{websiteId::uuid}} + and revenue.created_at between {{startDate}} and {{endDate}} + and revenue.${column} = {{step}} + and revenue.currency = {{currency}} + ${filterQuery} + group by 1),`; + + function getModelQuery(model: string) { + return model === 'first-click' + ? `\n + model AS (select e.session_id, + min(we.created_at) created_at + from events e + join website_event we + on we.session_id = e.session_id + where we.website_id = {{websiteId::uuid}} + and we.created_at between {{startDate}} and {{endDate}} + group by e.session_id)` + : `\n + model AS (select e.session_id, + max(we.created_at) created_at + from events e + join website_event we + on we.session_id = e.session_id + where we.website_id = {{websiteId::uuid}} + and we.created_at between {{startDate}} and {{endDate}} + and we.created_at < e.max_dt + group by e.session_id)`; + } + + const referrerRes = await rawQuery( + ` + ${currency ? revenueEventQuery : eventQuery} + ${getModelQuery(model)} + select coalesce(we.referrer_domain, '') name, + ${currency ? 'sum(e.value)' : 'count(distinct we.session_id)'} value + from model m + join website_event we + on we.created_at = m.created_at + and we.session_id = m.session_id + join session s + on s.session_id = m.session_id + ${currency ? 'join events e on e.session_id = m.session_id' : ''} + where we.website_id = {{websiteId::uuid}} + and we.created_at between {{startDate}} and {{endDate}} + ${ + currency + ? '' + : `and we.referrer_domain != hostname + and we.referrer_domain != ''` + } + group by 1 + order by 2 desc + limit 20 + `, + queryParams, + ); + + const paidAdsres = await rawQuery( + ` + ${currency ? revenueEventQuery : eventQuery} + ${getModelQuery(model)}, + + results AS ( + select case + when coalesce(gclid, '') != '' then 'Google Ads' + when coalesce(fbclid, '') != '' then 'Facebook / Meta' + when coalesce(msclkid, '') != '' then 'Microsoft Ads' + when coalesce(ttclid, '') != '' then 'TikTok Ads' + when coalesce(li_fat_id, '') != '' then 'LinkedIn Ads' + when coalesce(twclid, '') != '' then 'Twitter Ads (X)' + else '' + end name, + ${currency ? 'sum(e.value)' : 'count(distinct we.session_id)'} value + from model m + join website_event we + on we.created_at = m.created_at + and we.session_id = m.session_id + ${currency ? 'join events e on e.session_id = m.session_id' : ''} + where we.website_id = {{websiteId::uuid}} + and we.created_at between {{startDate}} and {{endDate}} + group by 1 + order by 2 desc + limit 20) + SELECT * + FROM results + ${currency ? '' : `WHERE name != ''`} + `, + queryParams, + ); + + const sourceRes = await rawQuery( + ` + ${currency ? revenueEventQuery : eventQuery} + ${getModelQuery(model)} + ${getUTMQuery('utm_source')} + `, + queryParams, + ); + + const mediumRes = await rawQuery( + ` + ${currency ? revenueEventQuery : eventQuery} + ${getModelQuery(model)} + ${getUTMQuery('utm_medium')} + `, + queryParams, + ); + + const campaignRes = await rawQuery( + ` + ${currency ? revenueEventQuery : eventQuery} + ${getModelQuery(model)} + ${getUTMQuery('utm_campaign')} + `, + queryParams, + ); + + const contentRes = await rawQuery( + ` + ${currency ? revenueEventQuery : eventQuery} + ${getModelQuery(model)} + ${getUTMQuery('utm_content')} + `, + queryParams, + ); + + const termRes = await rawQuery( + ` + ${currency ? revenueEventQuery : eventQuery} + ${getModelQuery(model)} + ${getUTMQuery('utm_term')} + `, + queryParams, + ); + + const totalRes = await rawQuery( + ` + select + count(*) as "pageviews", + count(distinct website_event.session_id) as "visitors", + count(distinct website_event.visit_id) as "visits" + from website_event + ${joinSessionQuery} + ${cohortQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and website_event.${column} = {{step}} + ${filterQuery} + `, + queryParams, + ).then(result => result?.[0]); + + return { + referrer: referrerRes, + paidAds: paidAdsres, + utm_source: sourceRes, + utm_medium: mediumRes, + utm_campaign: campaignRes, + utm_content: contentRes, + utm_term: termRes, + total: totalRes, + }; +} + +async function clickhouseQuery( + websiteId: string, + parameters: AttributionParameters, + filters: QueryFilters, +): Promise<AttributionResult> { + const { model, type, currency } = parameters; + const { rawQuery, parseFilters } = clickhouse; + const eventType = type === 'path' ? EVENT_TYPE.pageView : EVENT_TYPE.customEvent; + const column = type === 'path' ? 'url_path' : 'event_name'; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + ...parameters, + websiteId, + eventType, + }); + + function getUTMQuery(utmColumn: string) { + return ` + select + we.${utmColumn} name, + ${currency ? 'sum(e.value)' : 'uniqExact(we.session_id)'} value + from model m + join website_event we + on we.created_at = m.created_at + and we.session_id = m.session_id + ${currency ? 'join events e on e.session_id = m.session_id' : ''} + where we.website_id = {websiteId:UUID} + and we.created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${currency ? '' : `and we.${utmColumn} != ''`} + group by 1 + order by 2 desc + limit 20 + `; + } + + function getModelQuery(model: string) { + if (model === 'first-click') { + return ` + model AS (select e.session_id, + min(we.created_at) created_at + from events e + join website_event we + on we.session_id = e.session_id + where we.website_id = {websiteId:UUID} + and we.created_at between {startDate:DateTime64} and {endDate:DateTime64} + group by e.session_id) + `; + } + + return ` + model AS (select e.session_id, + max(we.created_at) created_at + from events e + join website_event we + on we.session_id = e.session_id + where we.website_id = {websiteId:UUID} + and we.created_at between {startDate:DateTime64} and {endDate:DateTime64} + and we.created_at < e.max_dt + group by e.session_id) + `; + } + + const eventQuery = `WITH events AS ( + select distinct + session_id, + max(created_at) max_dt + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and ${column} = {step:String} + ${filterQuery} + group by 1),`; + + const revenueEventQuery = `WITH events AS ( + select + website_revenue.session_id, + max(website_revenue.created_at) max_dt, + sum(website_revenue.revenue) as value + from website_revenue + join website_event + on website_event.website_id = website_revenue.website_id + and website_event.session_id = website_revenue.session_id + and website_event.event_id = website_revenue.event_id + and website_event.website_id = {websiteId:UUID} + and website_event.created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${cohortQuery} + where website_revenue.website_id = {websiteId:UUID} + and website_revenue.created_at between {startDate:DateTime64} and {endDate:DateTime64} + and website_revenue.${column} = {step:String} + and website_revenue.currency = {currency:String} + ${filterQuery} + group by 1),`; + + const referrerRes = await rawQuery< + { + name: string; + value: number; + }[] + >( + ` + ${currency ? revenueEventQuery : eventQuery} + ${getModelQuery(model)} + select we.referrer_domain name, + ${currency ? 'sum(e.value)' : 'uniqExact(we.session_id)'} value + from model m + join website_event we + on we.created_at = m.created_at + and we.session_id = m.session_id + ${currency ? 'join events e on e.session_id = m.session_id' : ''} + where we.website_id = {websiteId:UUID} + and we.created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${ + currency + ? '' + : `and we.referrer_domain != hostname + and we.referrer_domain != ''` + } + group by 1 + order by 2 desc + limit 20 + `, + queryParams, + ); + + const paidAdsres = await rawQuery< + { + name: string; + value: number; + }[] + >( + ` + ${currency ? revenueEventQuery : eventQuery} + ${getModelQuery(model)} + select multiIf(gclid != '', 'Google Ads', + fbclid != '', 'Facebook / Meta', + msclkid != '', 'Microsoft Ads', + ttclid != '', 'TikTok Ads', + li_fat_id != '', 'LinkedIn Ads', + twclid != '', 'Twitter Ads (X)','') name, + ${currency ? 'sum(e.value)' : 'uniqExact(we.session_id)'} value + from model m + join website_event we + on we.created_at = m.created_at + and we.session_id = m.session_id + ${currency ? 'join events e on e.session_id = m.session_id' : ''} + where we.website_id = {websiteId:UUID} + and we.created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${currency ? '' : `and name != ''`} + group by 1 + order by 2 desc + limit 20 + `, + queryParams, + ); + + const sourceRes = await rawQuery< + { + name: string; + value: number; + }[] + >( + ` + ${currency ? revenueEventQuery : eventQuery} + ${getModelQuery(model)} + ${getUTMQuery('utm_source')} + `, + queryParams, + ); + + const mediumRes = await rawQuery< + { + name: string; + value: number; + }[] + >( + ` + ${currency ? revenueEventQuery : eventQuery} + ${getModelQuery(model)} + ${getUTMQuery('utm_medium')} + `, + queryParams, + ); + + const campaignRes = await rawQuery< + { + name: string; + value: number; + }[] + >( + ` + ${currency ? revenueEventQuery : eventQuery} + ${getModelQuery(model)} + ${getUTMQuery('utm_campaign')} + `, + queryParams, + ); + + const contentRes = await rawQuery< + { + name: string; + value: number; + }[] + >( + ` + ${currency ? revenueEventQuery : eventQuery} + ${getModelQuery(model)} + ${getUTMQuery('utm_content')} + `, + queryParams, + ); + + const termRes = await rawQuery< + { + name: string; + value: number; + }[] + >( + ` + ${currency ? revenueEventQuery : eventQuery} + ${getModelQuery(model)} + ${getUTMQuery('utm_term')} + `, + queryParams, + ); + + const totalRes = await rawQuery<{ pageviews: number; visitors: number; visits: number }>( + ` + select + count(*) as "pageviews", + uniqExact(session_id) as "visitors", + uniqExact(visit_id) as "visits" + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and ${column} = {step:String} + ${filterQuery} + `, + queryParams, + ).then(result => result?.[0]); + + return { + referrer: referrerRes, + paidAds: paidAdsres, + utm_source: sourceRes, + utm_medium: mediumRes, + utm_campaign: campaignRes, + utm_content: contentRes, + utm_term: termRes, + total: totalRes, + }; +} diff --git a/src/queries/sql/reports/getBreakdown.ts b/src/queries/sql/reports/getBreakdown.ts new file mode 100644 index 0000000..51773d8 --- /dev/null +++ b/src/queries/sql/reports/getBreakdown.ts @@ -0,0 +1,135 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +export interface BreakdownParameters { + startDate: Date; + endDate: Date; + fields: string[]; +} + +export interface BreakdownData { + x: string; + y: number; +} + +export async function getBreakdown( + ...args: [websiteId: string, parameters: BreakdownParameters, filters: QueryFilters] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + parameters: BreakdownParameters, + filters: QueryFilters, +): Promise<BreakdownData[]> { + const { getTimestampDiffSQL, parseFilters, rawQuery } = prisma; + const { startDate, endDate, fields } = parameters; + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters( + { + ...filters, + websiteId, + startDate, + endDate, + eventType: EVENT_TYPE.pageView, + }, + { + joinSession: !!fields.find((name: string) => SESSION_COLUMNS.includes(name)), + }, + ); + + return rawQuery( + ` + select + sum(t.c) as "views", + count(distinct t.session_id) as "visitors", + count(distinct t.visit_id) as "visits", + sum(case when t.c = 1 then 1 else 0 end) as "bounces", + sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}) as "totaltime", + ${parseFieldsByName(fields)} + from ( + select + ${parseFields(fields)}, + website_event.session_id, + website_event.visit_id, + count(*) as "c", + min(website_event.created_at) as "min_time", + max(website_event.created_at) as "max_time" + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + ${filterQuery} + group by ${parseFieldsByName(fields)}, + website_event.session_id, website_event.visit_id + ) as t + group by ${parseFieldsByName(fields)} + order by 1 desc, 2 desc + limit 500 + `, + queryParams, + ); +} + +async function clickhouseQuery( + websiteId: string, + parameters: BreakdownParameters, + filters: QueryFilters, +): Promise<BreakdownData[]> { + const { parseFilters, rawQuery } = clickhouse; + const { startDate, endDate, fields } = parameters; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + startDate, + endDate, + eventType: EVENT_TYPE.pageView, + }); + + return rawQuery( + ` + select + sum(t.c) as "views", + count(distinct t.session_id) as "visitors", + count(distinct t.visit_id) as "visits", + sum(if(t.c = 1, 1, 0)) as "bounces", + sum(max_time-min_time) as "totaltime", + ${parseFieldsByName(fields)} + from ( + select + ${parseFields(fields)}, + session_id, + visit_id, + count(*) c, + min(created_at) min_time, + max(created_at) max_time + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${filterQuery} + group by ${parseFieldsByName(fields)}, + session_id, visit_id + ) as t + group by ${parseFieldsByName(fields)} + order by 1 desc, 2 desc + limit 500 + `, + queryParams, + ); +} + +function parseFields(fields: string[]) { + return fields.map(name => `${FILTER_COLUMNS[name]} as "${name}"`).join(','); +} + +function parseFieldsByName(fields: string[]) { + return `${fields.map(name => name).join(',')}`; +} diff --git a/src/queries/sql/reports/getFunnel.ts b/src/queries/sql/reports/getFunnel.ts new file mode 100644 index 0000000..4840123 --- /dev/null +++ b/src/queries/sql/reports/getFunnel.ts @@ -0,0 +1,255 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +export interface FunnelParameters { + startDate: Date; + endDate: Date; + window: number; + steps: { type: string; value: string }[]; +} + +export interface FunnelResult { + value: string; + visitors: number; + dropoff: number; +} + +export async function getFunnel( + ...args: [websiteId: string, parameters: FunnelParameters, filters: QueryFilters] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + parameters: FunnelParameters, + filters: QueryFilters, +): Promise<FunnelResult[]> { + const { startDate, endDate, window, steps } = parameters; + const { rawQuery, getAddIntervalQuery, parseFilters } = prisma; + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + startDate, + endDate, + }); + const { levelOneQuery, levelQuery, sumQuery, params } = getFunnelQuery(steps, window); + + function getFunnelQuery( + steps: { type: string; value: string }[], + window: number, + ): { + levelOneQuery: string; + levelQuery: string; + sumQuery: string; + params: string[]; + } { + return steps.reduce( + (pv, cv, i) => { + const levelNumber = i + 1; + const startSum = i > 0 ? 'union ' : ''; + const isURL = cv.type === 'path'; + const column = isURL ? 'url_path' : 'event_name'; + + let operator = '='; + let paramValue = cv.value; + + if (cv.value.startsWith('*') || cv.value.endsWith('*')) { + operator = 'like'; + paramValue = cv.value.replace(/^\*|\*$/g, '%'); + } + + if (levelNumber === 1) { + pv.levelOneQuery = ` + WITH level1 AS ( + select distinct website_event.session_id, website_event.created_at + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and ${column} ${operator} {{${i}}} + ${filterQuery} + )`; + } else { + pv.levelQuery += ` + , level${levelNumber} AS ( + select distinct we.session_id, we.created_at + from level${i} l + join website_event we + on l.session_id = we.session_id + where we.website_id = {{websiteId::uuid}} + and we.created_at between l.created_at and ${getAddIntervalQuery( + `l.created_at `, + `${window} minute`, + )} + and we.${column} ${operator} {{${i}}} + and we.created_at <= {{endDate}} + )`; + } + + pv.sumQuery += `\n${startSum}select ${levelNumber} as level, count(distinct(session_id)) as count from level${levelNumber}`; + pv.params.push(paramValue); + + return pv; + }, + { + levelOneQuery: '', + levelQuery: '', + sumQuery: '', + params: [], + }, + ); + } + + return rawQuery( + ` + ${levelOneQuery} + ${levelQuery} + ${sumQuery} + ORDER BY level; + `, + { + ...params, + ...queryParams, + }, + ).then(formatResults(steps)); +} + +async function clickhouseQuery( + websiteId: string, + parameters: FunnelParameters, + filters: QueryFilters, +): Promise< + { + value: string; + visitors: number; + dropoff: number; + }[] +> { + const { startDate, endDate, window, steps } = parameters; + const { rawQuery, parseFilters } = clickhouse; + const { levelOneQuery, levelQuery, sumQuery, stepFilterQuery, params } = getFunnelQuery( + steps, + window, + ); + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + startDate, + endDate, + }); + + function getFunnelQuery( + steps: { type: string; value: string }[], + window: number, + ): { + levelOneQuery: string; + levelQuery: string; + sumQuery: string; + stepFilterQuery: string; + params: Record<string, string>; + } { + return steps.reduce( + (pv, cv, i) => { + const levelNumber = i + 1; + const startSum = i > 0 ? 'union all ' : ''; + const startFilter = i > 0 ? 'or' : ''; + const isURL = cv.type === 'path'; + const column = isURL ? 'url_path' : 'event_name'; + + let operator = '='; + let paramValue = cv.value; + + if (cv.value.startsWith('*') || cv.value.endsWith('*')) { + operator = 'like'; + paramValue = cv.value.replace(/^\*|\*$/g, '%'); + } + + if (levelNumber === 1) { + pv.levelOneQuery = `\n + level1 AS ( + select * + from level0 + where ${column} ${operator} {param${i}:String} + )`; + } else { + pv.levelQuery += `\n + , level${levelNumber} AS ( + select distinct y.session_id as session_id, + y.url_path as url_path, + y.referrer_path as referrer_path, + y.event_name, + y.created_at as created_at + from level${i} x + join level0 y + on x.session_id = y.session_id + where y.created_at between x.created_at and x.created_at + interval ${window} minute + and y.${column} ${operator} {param${i}:String} + )`; + } + + pv.sumQuery += `\n${startSum}select ${levelNumber} as level, count(distinct(session_id)) as count from level${levelNumber}`; + pv.stepFilterQuery += `${startFilter} ${column} ${operator} {param${i}:String} `; + pv.params[`param${i}`] = paramValue; + + return pv; + }, + { + levelOneQuery: '', + levelQuery: '', + sumQuery: '', + stepFilterQuery: '', + params: {}, + }, + ); + } + + return rawQuery( + ` + WITH level0 AS ( + select distinct session_id, url_path, referrer_path, event_name, created_at + from website_event + ${cohortQuery} + where (${stepFilterQuery}) + and website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${filterQuery} + ), + ${levelOneQuery} + ${levelQuery} + select * + from ( + ${sumQuery} + ) ORDER BY level; + `, + { + ...params, + ...queryParams, + }, + ).then(formatResults(steps)); +} + +const formatResults = (steps: { type: string; value: string }[]) => (results: unknown) => { + return steps.map((step: { type: string; value: string }, i: number) => { + const visitors = Number(results[i]?.count) || 0; + const previous = Number(results[i - 1]?.count) || 0; + const dropped = previous > 0 ? previous - visitors : 0; + const dropoff = 1 - visitors / previous; + const remaining = visitors / Number(results[0].count); + + return { + ...step, + visitors, + previous, + dropped, + dropoff, + remaining, + }; + }); +}; diff --git a/src/queries/sql/reports/getGoal.ts b/src/queries/sql/reports/getGoal.ts new file mode 100644 index 0000000..7e790ff --- /dev/null +++ b/src/queries/sql/reports/getGoal.ts @@ -0,0 +1,105 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_TYPE } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +export interface GoalParameters { + startDate: Date; + endDate: Date; + type: string; + value: string; + operator?: string; + property?: string; +} + +export async function getGoal( + ...args: [websiteId: string, params: GoalParameters, filters: QueryFilters] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + parameters: GoalParameters, + filters: QueryFilters, +) { + const { startDate, endDate, type, value } = parameters; + const { rawQuery, parseFilters } = prisma; + const eventType = type === 'path' ? EVENT_TYPE.pageView : EVENT_TYPE.customEvent; + const column = type === 'path' ? 'url_path' : 'event_name'; + const { filterQuery, dateQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + value, + startDate, + endDate, + eventType, + }); + + return rawQuery( + ` + select count(distinct website_event.session_id) as num, + ( + select count(distinct website_event.session_id) + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + ${dateQuery} + ${filterQuery} + ) as total + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and ${column} = {{value}} + ${dateQuery} + ${filterQuery} + `, + queryParams, + ).then(results => results?.[0]); +} + +async function clickhouseQuery( + websiteId: string, + parameters: GoalParameters, + filters: QueryFilters, +) { + const { startDate, endDate, type, value } = parameters; + const { rawQuery, parseFilters } = clickhouse; + const eventType = type === 'path' ? EVENT_TYPE.pageView : EVENT_TYPE.customEvent; + const column = type === 'path' ? 'url_path' : 'event_name'; + const { filterQuery, dateQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + value, + startDate, + endDate, + eventType, + }); + + return rawQuery( + ` + select count(distinct session_id) as num, + ( + select count(distinct session_id) + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + ${dateQuery} + ${filterQuery} + ) as total + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and ${column} = {value:String} + ${dateQuery} + ${filterQuery} + `, + queryParams, + ).then(results => results?.[0]); +} diff --git a/src/queries/sql/reports/getJourney.ts b/src/queries/sql/reports/getJourney.ts new file mode 100644 index 0000000..283e0fa --- /dev/null +++ b/src/queries/sql/reports/getJourney.ts @@ -0,0 +1,275 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +export interface JourneyParameters { + startDate: Date; + endDate: Date; + steps: number; + startStep?: string; + endStep?: string; +} + +export interface JourneyResult { + e1: string; + e2: string; + e3: string; + e4: string; + e5: string; + e6: string; + e7: string; + count: number; +} + +export async function getJourney( + ...args: [websiteId: string, parameters: JourneyParameters, filters: QueryFilters] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + parameters: JourneyParameters, + filters: QueryFilters, +): Promise<JourneyResult[]> { + const { startDate, endDate, steps, startStep, endStep } = parameters; + const { rawQuery, parseFilters } = prisma; + const { sequenceQuery, startStepQuery, endStepQuery, params } = getJourneyQuery( + steps, + startStep, + endStep, + ); + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + startDate, + endDate, + }); + + function getJourneyQuery( + steps: number, + startStep?: string, + endStep?: string, + ): { + sequenceQuery: string; + startStepQuery: string; + endStepQuery: string; + params: Record<string, string>; + } { + const params = {}; + let sequenceQuery = ''; + let startStepQuery = ''; + let endStepQuery = ''; + + // create sequence query + let selectQuery = ''; + let maxQuery = ''; + let groupByQuery = ''; + + for (let i = 1; i <= steps; i++) { + const endQuery = i < steps ? ',' : ''; + selectQuery += `s.e${i},`; + maxQuery += `\nmax(CASE WHEN event_number = ${i} THEN "event" ELSE NULL END) AS e${i}${endQuery}`; + groupByQuery += `s.e${i}${endQuery} `; + } + + sequenceQuery = `\nsequences as ( + select ${selectQuery} + count(*) count + FROM ( + select visit_id, + ${maxQuery} + FROM events + group by visit_id) s + group by ${groupByQuery}) + `; + + // create start Step params query + if (startStep) { + startStepQuery = `and e1 = {{startStep}}`; + params.startStep = startStep; + } + + // create end Step params query + if (endStep) { + for (let i = 1; i < steps; i++) { + const startQuery = i === 1 ? 'and (' : '\nor '; + endStepQuery += `${startQuery}(e${i} = {{endStep}} and e${i + 1} is null) `; + } + endStepQuery += `\nor (e${steps} = {{endStep}}))`; + + params.endStep = endStep; + } + + return { + sequenceQuery, + startStepQuery, + endStepQuery, + params, + }; + } + + return rawQuery( + ` + WITH events AS ( + select distinct + website_event.visit_id, + website_event.referrer_path, + coalesce(nullIf(website_event.event_name, ''), website_event.url_path) event, + row_number() OVER (PARTITION BY visit_id ORDER BY website_event.created_at) AS event_number + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + ${filterQuery}), + ${sequenceQuery} + select * + from sequences + where 1 = 1 + ${startStepQuery} + ${endStepQuery} + order by count desc + limit 100 + `, + { + ...params, + ...queryParams, + }, + ).then(parseResult); +} + +async function clickhouseQuery( + websiteId: string, + parameters: JourneyParameters, + filters: QueryFilters, +): Promise<JourneyResult[]> { + const { startDate, endDate, steps, startStep, endStep } = parameters; + const { rawQuery, parseFilters } = clickhouse; + const { sequenceQuery, startStepQuery, endStepQuery, params } = getJourneyQuery( + steps, + startStep, + endStep, + ); + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + startDate, + endDate, + }); + + function getJourneyQuery( + steps: number, + startStep?: string, + endStep?: string, + ): { + sequenceQuery: string; + startStepQuery: string; + endStepQuery: string; + params: Record<string, string>; + } { + const params = {}; + let sequenceQuery = ''; + let startStepQuery = ''; + let endStepQuery = ''; + + // create sequence query + let selectQuery = ''; + let maxQuery = ''; + let groupByQuery = ''; + + for (let i = 1; i <= steps; i++) { + const endQuery = i < steps ? ',' : ''; + selectQuery += `s.e${i},`; + maxQuery += `\nmax(CASE WHEN event_number = ${i} THEN "event" ELSE NULL END) AS e${i}${endQuery}`; + groupByQuery += `s.e${i}${endQuery} `; + } + + sequenceQuery = `\nsequences as ( + select ${selectQuery} + count(*) count + FROM ( + select visit_id, + ${maxQuery} + FROM events + group by visit_id) s + group by ${groupByQuery}) + `; + + // create start Step params query + if (startStep) { + startStepQuery = `and e1 = {startStep:String}`; + params.startStep = startStep; + } + + // create end Step params query + if (endStep) { + for (let i = 1; i < steps; i++) { + const startQuery = i === 1 ? 'and (' : '\nor '; + endStepQuery += `${startQuery}(e${i} = {endStep:String} and e${i + 1} is null) `; + } + endStepQuery += `\nor (e${steps} = {endStep:String}))`; + + params.endStep = endStep; + } + + return { + sequenceQuery, + startStepQuery, + endStepQuery, + params, + }; + } + + return rawQuery( + ` + WITH events AS ( + select distinct + visit_id, + coalesce(nullIf(event_name, ''), url_path) "event", + row_number() OVER (PARTITION BY visit_id ORDER BY created_at) AS event_number + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + ${filterQuery} + and created_at between {startDate:DateTime64} and {endDate:DateTime64}), + ${sequenceQuery} + select * + from sequences + where 1 = 1 + ${startStepQuery} + ${endStepQuery} + order by count desc + limit 100 + `, + { + ...params, + ...queryParams, + }, + ).then(parseResult); +} + +function combineSequentialDuplicates(array: any) { + if (array.length === 0) return array; + + const result = [array[0]]; + + for (let i = 1; i < array.length; i++) { + if (array[i] !== array[i - 1]) { + result.push(array[i]); + } + } + + return result; +} + +function parseResult(data: any) { + return data.map(({ e1, e2, e3, e4, e5, e6, e7, count }) => ({ + items: combineSequentialDuplicates([e1, e2, e3, e4, e5, e6, e7]), + count: +Number(count), + })); +} diff --git a/src/queries/sql/reports/getRetention.ts b/src/queries/sql/reports/getRetention.ts new file mode 100644 index 0000000..87b55e0 --- /dev/null +++ b/src/queries/sql/reports/getRetention.ts @@ -0,0 +1,173 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +export interface RetentionParameters { + startDate: Date; + endDate: Date; + timezone?: string; +} + +export interface RetentionResult { + date: string; + day: number; + visitors: number; + returnVisitors: number; + percentage: number; +} + +export async function getRetention( + ...args: [websiteId: string, parameters: RetentionParameters, filters: QueryFilters] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + parameters: RetentionParameters, + filters: QueryFilters, +): Promise<RetentionResult[]> { + const { startDate, endDate, timezone } = parameters; + const { getDateSQL, getDayDiffQuery, getCastColumnQuery, rawQuery, parseFilters } = prisma; + const unit = 'day'; + + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + startDate, + endDate, + timezone, + }); + + return rawQuery( + ` + WITH cohort_items AS ( + select + min(${getDateSQL('website_event.created_at', unit, timezone)}) as cohort_date, + website_event.session_id + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + ${filterQuery} + group by website_event.session_id + ), + user_activities AS ( + select distinct + website_event.session_id, + ${getDayDiffQuery(getDateSQL('created_at', unit, timezone), 'cohort_items.cohort_date')} as day_number + from website_event + join cohort_items + on website_event.session_id = cohort_items.session_id + where website_id = {{websiteId::uuid}} + and created_at between {{startDate}} and {{endDate}} + + ), + cohort_size as ( + select cohort_date, + count(*) as visitors + from cohort_items + group by 1 + order by 1 + ), + cohort_date as ( + select + c.cohort_date, + a.day_number, + count(*) as visitors + from user_activities a + join cohort_items c + on a.session_id = c.session_id + group by 1, 2 + ) + select + c.cohort_date as date, + c.day_number as day, + s.visitors, + c.visitors as "returnVisitors", + ${getCastColumnQuery('c.visitors', 'float')} * 100 / s.visitors as percentage + from cohort_date c + join cohort_size s + on c.cohort_date = s.cohort_date + where c.day_number <= 31 + order by 1, 2`, + queryParams, + ); +} + +async function clickhouseQuery( + websiteId: string, + parameters: RetentionParameters, + filters: QueryFilters, +): Promise<RetentionResult[]> { + const { startDate, endDate, timezone } = parameters; + const { getDateSQL, rawQuery, parseFilters } = clickhouse; + const unit = 'day'; + + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + startDate, + endDate, + timezone, + }); + + return rawQuery( + ` + WITH cohort_items AS ( + select + min(${getDateSQL('created_at', unit, timezone)}) as cohort_date, + session_id + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${filterQuery} + group by session_id + ), + user_activities AS ( + select distinct + website_event.session_id, + toInt32((${getDateSQL('created_at', unit, timezone)} - cohort_items.cohort_date) / 86400) as day_number + from website_event + join cohort_items + on website_event.session_id = cohort_items.session_id + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + ), + cohort_size as ( + select cohort_date, + count(*) as visitors + from cohort_items + group by 1 + order by 1 + ), + cohort_date as ( + select + c.cohort_date, + a.day_number, + count(*) as visitors + from user_activities a + join cohort_items c + on a.session_id = c.session_id + group by 1, 2 + ) + select + c.cohort_date as date, + c.day_number as day, + s.visitors as visitors, + c.visitors returnVisitors, + c.visitors * 100 / s.visitors as percentage + from cohort_date c + join cohort_size s + on c.cohort_date = s.cohort_date + where c.day_number <= 31 + order by 1, 2`, + queryParams, + ); +} diff --git a/src/queries/sql/reports/getRevenue.ts b/src/queries/sql/reports/getRevenue.ts new file mode 100644 index 0000000..fa25078 --- /dev/null +++ b/src/queries/sql/reports/getRevenue.ts @@ -0,0 +1,217 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +export interface RevenuParameters { + startDate: Date; + endDate: Date; + unit: string; + timezone: string; + currency: string; +} + +export interface RevenueResult { + chart: { x: string; t: string; y: number }[]; + country: { name: string; value: number }[]; + total: { sum: number; count: number; average: number; unique_count: number }; +} + +export async function getRevenue( + ...args: [websiteId: string, parameters: RevenuParameters, filters: QueryFilters] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + parameters: RevenuParameters, + filters: QueryFilters, +): Promise<RevenueResult> { + const { startDate, endDate, unit = 'day', timezone = 'utc', currency } = parameters; + const { getDateSQL, rawQuery, parseFilters } = prisma; + const { queryParams, filterQuery, cohortQuery, joinSessionQuery } = parseFilters({ + ...filters, + websiteId, + startDate, + endDate, + currency, + }); + + const joinQuery = filterQuery + ? `join website_event + on website_event.website_id = revenue.website_id + and website_event.session_id = revenue.session_id + and website_event.event_id = revenue.event_id + and website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}}` + : ''; + + const chart = await rawQuery( + ` + select + revenue.event_name x, + ${getDateSQL('revenue.created_at', unit, timezone)} t, + sum(revenue.revenue) y + from revenue + ${joinQuery} + ${cohortQuery} + ${joinSessionQuery} + where revenue.website_id = {{websiteId::uuid}} + and revenue.created_at between {{startDate}} and {{endDate}} + and revenue.currency = upper({{currency}}) + ${filterQuery} + group by x, t + order by t + `, + queryParams, + ); + + const country = await rawQuery( + ` + select + session.country as name, + sum(revenue) value + from revenue + ${joinQuery} + join session + on session.website_id = revenue.website_id + and session.session_id = revenue.session_id + ${cohortQuery} + where revenue.website_id = {{websiteId::uuid}} + and revenue.created_at between {{startDate}} and {{endDate}} + and revenue.currency = upper({{currency}}) + ${filterQuery} + group by session.country + `, + queryParams, + ); + + const total = await rawQuery( + ` + select + sum(revenue.revenue) as sum, + count(distinct revenue.event_id) as count, + count(distinct revenue.session_id) as unique_count + from revenue + ${joinQuery} + ${cohortQuery} + ${joinSessionQuery} + where revenue.website_id = {{websiteId::uuid}} + and revenue.created_at between {{startDate}} and {{endDate}} + and revenue.currency = upper({{currency}}) + ${filterQuery} + `, + queryParams, + ).then(result => result?.[0]); + + total.average = total.count > 0 ? Number(total.sum) / Number(total.count) : 0; + + return { chart, country, total }; +} + +async function clickhouseQuery( + websiteId: string, + parameters: RevenuParameters, + filters: QueryFilters, +): Promise<RevenueResult> { + const { startDate, endDate, unit = 'day', timezone = 'utc', currency } = parameters; + const { getDateSQL, rawQuery, parseFilters } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + startDate, + endDate, + currency, + }); + + const joinQuery = filterQuery + ? `join website_event + on website_event.website_id = website_revenue.website_id + and website_event.session_id = website_revenue.session_id + and website_event.event_id = website_revenue.event_id + and website_event.website_id = {websiteId:UUID} + and website_event.created_at between {startDate:DateTime64} and {endDate:DateTime64}` + : ''; + + const chart = await rawQuery< + { + x: string; + t: string; + y: number; + }[] + >( + ` + select + website_revenue.event_name x, + ${getDateSQL('website_revenue.created_at', unit, timezone)} t, + sum(website_revenue.revenue) y + from website_revenue + ${joinQuery} + ${cohortQuery} + where website_revenue.website_id = {websiteId:UUID} + and website_revenue.created_at between {startDate:DateTime64} and {endDate:DateTime64} + and website_revenue.currency = upper({currency:String}) + ${filterQuery} + group by x, t + order by t + `, + queryParams, + ); + + const country = await rawQuery< + { + name: string; + value: number; + }[] + >( + ` + select + website_event.country as name, + sum(website_revenue.revenue) as value + from website_revenue + join website_event + on website_event.website_id = website_revenue.website_id + and website_event.session_id = website_revenue.session_id + and website_event.event_id = website_revenue.event_id + and website_event.website_id = {websiteId:UUID} + and website_event.created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${cohortQuery} + where website_revenue.website_id = {websiteId:UUID} + and website_revenue.created_at between {startDate:DateTime64} and {endDate:DateTime64} + and website_revenue.currency = upper({currency:String}) + ${filterQuery} + group by website_event.country + order by value desc + `, + queryParams, + ); + + const total = await rawQuery<{ + sum: number; + count: number; + unique_count: number; + }>( + ` + select + sum(website_revenue.revenue) as sum, + uniqExact(website_revenue.event_id) as count, + uniqExact(website_revenue.session_id) as unique_count + from website_revenue + ${joinQuery} + ${cohortQuery} + where website_revenue.website_id = {websiteId:UUID} + and website_revenue.created_at between {startDate:DateTime64} and {endDate:DateTime64} + and website_revenue.currency = upper({currency:String}) + ${filterQuery} + `, + queryParams, + ).then(result => result?.[0]); + + total.average = total.count > 0 ? total.sum / total.count : 0; + + return { chart, country, total }; +} diff --git a/src/queries/sql/reports/getUTM.ts b/src/queries/sql/reports/getUTM.ts new file mode 100644 index 0000000..4d43eb4 --- /dev/null +++ b/src/queries/sql/reports/getUTM.ts @@ -0,0 +1,84 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_TYPE } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +export interface UTMParameters { + column: string; + startDate: Date; + endDate: Date; +} + +export async function getUTM( + ...args: [websiteId: string, parameters: UTMParameters, filters: QueryFilters] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + parameters: UTMParameters, + filters: QueryFilters, +) { + const { column, startDate, endDate } = parameters; + const { parseFilters, rawQuery } = prisma; + + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + startDate, + endDate, + eventType: EVENT_TYPE.pageView, + }); + + return rawQuery( + ` + select website_event.${column} utm, count(*) as views + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and coalesce(website_event.${column}, '') != '' + ${filterQuery} + group by 1 + order by 2 desc + `, + queryParams, + ); +} + +async function clickhouseQuery( + websiteId: string, + parameters: UTMParameters, + filters: QueryFilters, +) { + const { column, startDate, endDate } = parameters; + const { parseFilters, rawQuery } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + startDate, + endDate, + eventType: EVENT_TYPE.pageView, + }); + + return rawQuery( + ` + select ${column} utm, count(*) as views + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and ${column} != '' + ${filterQuery} + group by 1 + order by 2 desc + `, + queryParams, + ); +} diff --git a/src/queries/sql/sessions/createSession.ts b/src/queries/sql/sessions/createSession.ts new file mode 100644 index 0000000..8d07a55 --- /dev/null +++ b/src/queries/sql/sessions/createSession.ts @@ -0,0 +1,44 @@ +import type { Prisma } from '@/generated/prisma/client'; +import prisma from '@/lib/prisma'; + +const FUNCTION_NAME = 'createSession'; + +export async function createSession(data: Prisma.SessionCreateInput) { + const { rawQuery } = prisma; + + await rawQuery( + ` + insert into session ( + session_id, + website_id, + browser, + os, + device, + screen, + language, + country, + region, + city, + distinct_id, + created_at + ) + values ( + {{id}}, + {{websiteId}}, + {{browser}}, + {{os}}, + {{device}}, + {{screen}}, + {{language}}, + {{country}}, + {{region}}, + {{city}}, + {{distinctId}}, + {{createdAt}} + ) + on conflict (session_id) do nothing + `, + data, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/sessions/getSessionActivity.ts b/src/queries/sql/sessions/getSessionActivity.ts new file mode 100644 index 0000000..af31fca --- /dev/null +++ b/src/queries/sql/sessions/getSessionActivity.ts @@ -0,0 +1,78 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getSessionActivity'; + +export async function getSessionActivity( + ...args: [websiteId: string, sessionId: string, filters: QueryFilters] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string, sessionId: string, filters: QueryFilters) { + const { rawQuery } = prisma; + const { startDate, endDate } = filters; + + return rawQuery( + ` + select + created_at as "createdAt", + url_path as "urlPath", + url_query as "urlQuery", + referrer_domain as "referrerDomain", + event_id as "eventId", + event_type as "eventType", + event_name as "eventName", + visit_id as "visitId", + event_id IN (select website_event_id + from event_data + where website_id = {{websiteId::uuid}} + and created_at between {{startDate}} and {{endDate}}) AS "hasData" + from website_event + where website_id = {{websiteId::uuid}} + and session_id = {{sessionId::uuid}} + and created_at between {{startDate}} and {{endDate}} + order by created_at desc + limit 500 + `, + { websiteId, sessionId, startDate, endDate }, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery(websiteId: string, sessionId: string, filters: QueryFilters) { + const { rawQuery } = clickhouse; + const { startDate, endDate } = filters; + + return rawQuery( + ` + select + created_at as createdAt, + url_path as urlPath, + url_query as urlQuery, + referrer_domain as referrerDomain, + event_id as eventId, + event_type as eventType, + event_name as eventName, + visit_id as visitId, + event_id IN (select event_id + from event_data + where website_id = {websiteId:UUID} + and session_id = {sessionId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64}) AS hasData + from website_event + where website_id = {websiteId:UUID} + and session_id = {sessionId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + order by created_at desc + limit 500 + `, + { websiteId, sessionId, startDate, endDate }, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/sessions/getSessionData.ts b/src/queries/sql/sessions/getSessionData.ts new file mode 100644 index 0000000..8f1e493 --- /dev/null +++ b/src/queries/sql/sessions/getSessionData.ts @@ -0,0 +1,60 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; + +const FUNCTION_NAME = 'getSessionData'; + +export async function getSessionData(...args: [websiteId: string, sessionId: string]) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string, sessionId: string) { + const { rawQuery } = prisma; + + return rawQuery( + ` + select + website_id as "websiteId", + session_id as "sessionId", + data_key as "dataKey", + data_type as "dataType", + replace(string_value, '.0000', '') as "stringValue", + number_value as "numberValue", + date_value as "dateValue", + created_at as "createdAt" + from session_data + where website_id = {{websiteId::uuid}} + and session_id = {{sessionId::uuid}} + order by data_key asc + `, + { websiteId, sessionId }, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery(websiteId: string, sessionId: string) { + const { rawQuery } = clickhouse; + + return rawQuery( + ` + select + website_id as websiteId, + session_id as sessionId, + data_key as dataKey, + data_type as dataType, + replace(string_value, '.0000', '') as stringValue, + number_value as numberValue, + date_value as dateValue, + created_at as createdAt + from session_data final + where website_id = {websiteId:UUID} + and session_id = {sessionId:UUID} + order by data_key asc + `, + { websiteId, sessionId }, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/sessions/getSessionDataProperties.ts b/src/queries/sql/sessions/getSessionDataProperties.ts new file mode 100644 index 0000000..9b429f9 --- /dev/null +++ b/src/queries/sql/sessions/getSessionDataProperties.ts @@ -0,0 +1,75 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getSessionDataProperties'; + +export async function getSessionDataProperties( + ...args: [websiteId: string, filters: QueryFilters] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string, filters: QueryFilters) { + const { rawQuery, parseFilters } = prisma; + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + return rawQuery( + ` + select + data_key as "propertyName", + count(distinct session_data.session_id) as "total" + from website_event + ${cohortQuery} + ${joinSessionQuery} + join session_data + on session_data.session_id = website_event.session_id + and session_data.website_id = website_event.website_id + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + ${filterQuery} + group by 1 + order by 2 desc + limit 500 + `, + queryParams, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + filters: QueryFilters, +): Promise<{ propertyName: string; total: number }[]> { + const { rawQuery, parseFilters } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ ...filters, websiteId }); + + return rawQuery( + ` + select + data_key as propertyName, + count(distinct session_data.session_id) as total + from website_event + ${cohortQuery} + join session_data final + on session_data.session_id = website_event.session_id + and session_data.website_id = {websiteId:UUID} + where website_event.website_id = {websiteId:UUID} + and website_event.created_at between {startDate:DateTime64} and {endDate:DateTime64} + and session_data.data_key != '' + ${filterQuery} + group by 1 + order by 2 desc + limit 500 + `, + queryParams, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/sessions/getSessionDataValues.ts b/src/queries/sql/sessions/getSessionDataValues.ts new file mode 100644 index 0000000..5790141 --- /dev/null +++ b/src/queries/sql/sessions/getSessionDataValues.ts @@ -0,0 +1,85 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getSessionDataValues'; + +export async function getSessionDataValues( + ...args: [websiteId: string, filters: QueryFilters & { propertyName?: string }] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + filters: QueryFilters & { propertyName?: string }, +) { + const { rawQuery, parseFilters, getDateSQL } = prisma; + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + return rawQuery( + ` + select + case + when data_type = 2 then replace(string_value, '.0000', '') + when data_type = 4 then ${getDateSQL('date_value', 'hour')} + else string_value + end as "value", + count(distinct session_data.session_id) as "total" + from website_event + ${cohortQuery} + ${joinSessionQuery} + join session_data + on session_data.session_id = website_event.session_id + and session_data.website_id = website_event.website_id + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and session_data.data_key = {{propertyName}} + ${filterQuery} + group by value + order by 2 desc + limit 100 + `, + queryParams, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + filters: QueryFilters & { propertyName?: string }, +): Promise<{ propertyName: string; dataType: number; propertyValue: string; total: number }[]> { + const { rawQuery, parseFilters } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ ...filters, websiteId }); + + return rawQuery( + ` + select + multiIf(data_type = 2, replaceAll(string_value, '.0000', ''), + data_type = 4, toString(date_trunc('hour', date_value)), + string_value) as "value", + uniq(session_data.session_id) as "total" + from website_event + ${cohortQuery} + join session_data final + on session_data.session_id = website_event.session_id + and session_data.website_id = {websiteId:UUID} + where website_event.website_id = {websiteId:UUID} + and website_event.created_at between {startDate:DateTime64} and {endDate:DateTime64} + and session_data.data_key = {propertyName:String} + ${filterQuery} + group by value + order by 2 desc + limit 100 + `, + queryParams, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/sessions/getSessionExpandedMetrics.ts b/src/queries/sql/sessions/getSessionExpandedMetrics.ts new file mode 100644 index 0000000..85c1293 --- /dev/null +++ b/src/queries/sql/sessions/getSessionExpandedMetrics.ts @@ -0,0 +1,152 @@ +import clickhouse from '@/lib/clickhouse'; +import { FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getSessionExpandedMetrics'; + +export interface SessionExpandedMetricsParameters { + type: string; + limit?: number | string; + offset?: number | string; +} + +export interface SessionExpandedMetricsData { + name: string; + pageviews: number; + visitors: number; + visits: number; + bounces: number; + totaltime: number; +} + +export async function getSessionExpandedMetrics( + ...args: [websiteId: string, parameters: SessionExpandedMetricsParameters, filters: QueryFilters] +): Promise<SessionExpandedMetricsData[]> { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + parameters: SessionExpandedMetricsParameters, + filters: QueryFilters, +): Promise<SessionExpandedMetricsData[]> { + const { type, limit = 500, offset = 0 } = parameters; + let column = FILTER_COLUMNS[type] || type; + const { parseFilters, rawQuery, getTimestampDiffSQL } = prisma; + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters( + { + ...filters, + websiteId, + }, + { + joinSession: SESSION_COLUMNS.includes(type), + }, + ); + const includeCountry = column === 'city' || column === 'region'; + + if (type === 'language') { + column = `lower(left(${type}, 2))`; + } + + return rawQuery( + ` + select + name, + ${includeCountry ? 'country,' : ''} + sum(t.c) as "pageviews", + count(distinct t.session_id) as "visitors", + count(distinct t.visit_id) as "visits", + sum(case when t.c = 1 then 1 else 0 end) as "bounces", + sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}) as "totaltime" + from ( + select + ${column} name, + ${includeCountry ? 'country,' : ''} + website_event.session_id, + website_event.visit_id, + count(*) as "c", + min(website_event.created_at) as "min_time", + max(website_event.created_at) as "max_time" + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and website_event.event_type != 2 + ${filterQuery} + group by name, website_event.session_id, website_event.visit_id + ${includeCountry ? ', country' : ''} + ) as t + group by name + ${includeCountry ? ', country' : ''} + order by visitors desc, visits desc + limit ${limit} + offset ${offset} + `, + { ...queryParams, ...parameters }, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + parameters: SessionExpandedMetricsParameters, + filters: QueryFilters, +): Promise<SessionExpandedMetricsData[]> { + const { type, limit = 500, offset = 0 } = parameters; + let column = FILTER_COLUMNS[type] || type; + const { parseFilters, rawQuery } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + const includeCountry = column === 'city' || column === 'region'; + + if (type === 'language') { + column = `lower(left(${type}, 2))`; + } + + return rawQuery( + ` + select + name, + ${includeCountry ? 'country,' : ''} + sum(t.c) as "pageviews", + uniq(t.session_id) as "visitors", + uniq(t.visit_id) as "visits", + sum(if(t.c = 1, 1, 0)) as "bounces", + sum(max_time-min_time) as "totaltime" + from ( + select + ${column} name, + ${includeCountry ? 'country,' : ''} + session_id, + visit_id, + count(*) c, + min(created_at) min_time, + max(created_at) max_time + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + and name != '' + ${filterQuery} + group by name, session_id, visit_id + ${includeCountry ? ', country' : ''} + ) as t + group by name + ${includeCountry ? ', country' : ''} + order by visitors desc, visits desc + limit ${limit} + offset ${offset} + `, + { ...queryParams, ...parameters }, + FUNCTION_NAME, + ); +} diff --git a/src/queries/sql/sessions/getSessionMetrics.ts b/src/queries/sql/sessions/getSessionMetrics.ts new file mode 100644 index 0000000..c519bdd --- /dev/null +++ b/src/queries/sql/sessions/getSessionMetrics.ts @@ -0,0 +1,130 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_COLUMNS, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getSessionMetrics'; + +export interface SessionMetricsParameters { + type: string; + limit?: number | string; + offset?: number | string; +} + +export async function getSessionMetrics( + ...args: [websiteId: string, parameters: SessionMetricsParameters, filters: QueryFilters] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + parameters: SessionMetricsParameters, + filters: QueryFilters, +) { + const { type, limit = 500, offset = 0 } = parameters; + let column = FILTER_COLUMNS[type] || type; + const { parseFilters, rawQuery } = prisma; + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters( + { + ...filters, + websiteId, + }, + { + joinSession: SESSION_COLUMNS.includes(type), + }, + ); + const includeCountry = column === 'city' || column === 'region'; + + if (type === 'language') { + column = `lower(left(${type}, 2))`; + } + + return rawQuery( + ` + select + ${column} x, + count(distinct website_event.session_id) y + ${includeCountry ? ', country' : ''} + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and website_event.event_type != 2 + ${filterQuery} + group by 1 + ${includeCountry ? ', 3' : ''} + order by 2 desc + limit ${limit} + offset ${offset} + `, + { ...queryParams, ...parameters }, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + parameters: SessionMetricsParameters, + filters: QueryFilters, +): Promise<{ x: string; y: number }[]> { + const { type, limit = 500, offset = 0 } = parameters; + let column = FILTER_COLUMNS[type] || type; + const { parseFilters, rawQuery } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + const includeCountry = column === 'city' || column === 'region'; + + if (type === 'language') { + column = `lower(left(${type}, 2))`; + } + + let sql = ''; + + if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item))) { + sql = ` + select + ${column} x, + count(distinct session_id) y + ${includeCountry ? ', country' : ''} + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + ${filterQuery} + group by x + ${includeCountry ? ', country' : ''} + order by y desc + limit ${limit} + offset ${offset} + `; + } else { + sql = ` + select + ${column} x, + uniq(session_id) y + ${includeCountry ? ', country' : ''} + from website_event_stats_hourly as website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + ${filterQuery} + group by x + ${includeCountry ? ', country' : ''} + order by y desc + limit ${limit} + offset ${offset} + `; + } + + return rawQuery(sql, { ...queryParams, ...parameters }, FUNCTION_NAME); +} diff --git a/src/queries/sql/sessions/getSessionStats.ts b/src/queries/sql/sessions/getSessionStats.ts new file mode 100644 index 0000000..fd45772 --- /dev/null +++ b/src/queries/sql/sessions/getSessionStats.ts @@ -0,0 +1,98 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_COLUMNS } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getSessionStats'; + +export async function getSessionStats(...args: [websiteId: string, filters: QueryFilters]) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string, filters: QueryFilters) { + const { timezone = 'utc', unit = 'day' } = filters; + const { getDateSQL, parseFilters, rawQuery } = prisma; + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + return rawQuery( + ` + select + ${getDateSQL('website_event.created_at', unit, timezone)} x, + count(distinct website_event.session_id) y + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and website_event.event_type != 2 + ${filterQuery} + group by 1 + order by 1 + `, + queryParams, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + filters: QueryFilters, +): Promise<{ x: string; y: number }[]> { + const { timezone = 'UTC', unit = 'day' } = filters; + const { parseFilters, rawQuery, getDateSQL } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + let sql = ''; + + if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item)) || unit === 'minute') { + sql = ` + select + g.t as x, + g.y as y + from ( + select + ${getDateSQL('website_event.created_at', unit, timezone)} as t, + count(distinct session_id) as y + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + ${filterQuery} + group by t + ) as g + order by t + `; + } else { + sql = ` + select + g.t as x, + g.y as y + from ( + select + ${getDateSQL('website_event.created_at', unit, timezone)} as t, + uniq(session_id) as y + from website_event_stats_hourly as website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + ${filterQuery} + group by t + ) as g + order by t + `; + } + + return rawQuery(sql, queryParams, FUNCTION_NAME); +} diff --git a/src/queries/sql/sessions/getWebsiteSession.ts b/src/queries/sql/sessions/getWebsiteSession.ts new file mode 100644 index 0000000..3c16087 --- /dev/null +++ b/src/queries/sql/sessions/getWebsiteSession.ts @@ -0,0 +1,113 @@ +import clickhouse from '@/lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; + +const FUNCTION_NAME = 'getWebsiteSession'; + +export async function getWebsiteSession(...args: [websiteId: string, sessionId: string]) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string, sessionId: string) { + const { rawQuery, getTimestampDiffSQL } = prisma; + + return rawQuery( + ` + select id, + distinct_id as "distinctId", + website_id as "websiteId", + browser, + os, + device, + screen, + language, + country, + region, + city, + min(min_time) as "firstAt", + max(max_time) as "lastAt", + count(distinct visit_id) as visits, + sum(views) as views, + sum(events) as events, + sum(${getTimestampDiffSQL('min_time', 'max_time')}) as "totaltime" + from (select + session.session_id as id, + session.distinct_id, + website_event.visit_id, + session.website_id, + session.browser, + session.os, + session.device, + session.screen, + session.language, + session.country, + session.region, + session.city, + min(website_event.created_at) as min_time, + max(website_event.created_at) as max_time, + sum(case when website_event.event_type = 1 then 1 else 0 end) as views, + sum(case when website_event.event_type = 2 then 1 else 0 end) as events + from session + join website_event on website_event.session_id = session.session_id + where session.website_id = {{websiteId::uuid}} + and session.session_id = {{sessionId::uuid}} + group by session.session_id, session.distinct_id, visit_id, session.website_id, session.browser, session.os, session.device, session.screen, session.language, session.country, session.region, session.city) t + group by id, distinct_id, website_id, browser, os, device, screen, language, country, region, city; + `, + { websiteId, sessionId }, + FUNCTION_NAME, + ).then(result => result?.[0]); +} + +async function clickhouseQuery(websiteId: string, sessionId: string) { + const { rawQuery, getDateStringSQL } = clickhouse; + + return rawQuery( + ` + select id, + websiteId, + distinctId, + browser, + os, + device, + screen, + language, + country, + region, + city, + ${getDateStringSQL('min(min_time)')} as firstAt, + ${getDateStringSQL('max(max_time)')} as lastAt, + uniq(visit_id) visits, + sum(views) as views, + sum(events) as events, + sum(max_time-min_time) as totaltime + from (select + session_id as id, + distinct_id as distinctId, + visit_id, + website_id as websiteId, + browser, + os, + device, + screen, + language, + country, + region, + city, + min(min_time) as min_time, + max(max_time) as max_time, + sum(views) as views, + length(groupArrayArray(event_name)) as events + from website_event_stats_hourly + where website_id = {websiteId:UUID} + and session_id = {sessionId:UUID} + group by session_id, distinct_id, visit_id, website_id, browser, os, device, screen, language, country, region, city) t + group by id, websiteId, distinctId, browser, os, device, screen, language, country, region, city; + `, + { websiteId, sessionId }, + FUNCTION_NAME, + ).then(result => result?.[0]); +} diff --git a/src/queries/sql/sessions/getWebsiteSessionStats.ts b/src/queries/sql/sessions/getWebsiteSessionStats.ts new file mode 100644 index 0000000..a12e6c6 --- /dev/null +++ b/src/queries/sql/sessions/getWebsiteSessionStats.ts @@ -0,0 +1,97 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_COLUMNS } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getWebsiteSessionStats'; + +export interface WebsiteSessionStatsData { + pageviews: number; + visitors: number; + visits: number; + countries: number; + events: number; +} + +export async function getWebsiteSessionStats( + ...args: [websiteId: string, filters: QueryFilters] +): Promise<WebsiteSessionStatsData[]> { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + filters: QueryFilters, +): Promise<WebsiteSessionStatsData[]> { + const { parseFilters, rawQuery } = prisma; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + return rawQuery( + ` + select + count(*) as "pageviews", + count(distinct website_event.session_id) as "visitors", + count(distinct website_event.visit_id) as "visits", + count(distinct session.country) as "countries", + sum(case when website_event.event_type = 2 then 1 else 0 end) as "events" + from website_event + ${cohortQuery} + join session on website_event.session_id = session.session_id + and website_event.website_id = session.website_id + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + ${filterQuery} + `, + queryParams, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + filters: QueryFilters, +): Promise<WebsiteSessionStatsData[]> { + const { rawQuery, parseFilters } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ ...filters, websiteId }); + + let sql = ''; + + if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item))) { + sql = ` + select + sumIf(1, event_type = 1) as "pageviews", + uniq(session_id) as "visitors", + uniq(visit_id) as "visits", + uniq(country) as "countries", + sum(length(event_name)) as "events" + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${filterQuery} + `; + } else { + sql = ` + select + sum(views) as "pageviews", + uniq(session_id) as "visitors", + uniq(visit_id) as "visits", + uniq(country) as "countries", + sum(length(event_name)) as "events" + from website_event_stats_hourly website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + ${filterQuery} + `; + } + + return rawQuery(sql, queryParams, FUNCTION_NAME); +} diff --git a/src/queries/sql/sessions/getWebsiteSessions.ts b/src/queries/sql/sessions/getWebsiteSessions.ts new file mode 100644 index 0000000..df640d6 --- /dev/null +++ b/src/queries/sql/sessions/getWebsiteSessions.ts @@ -0,0 +1,156 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_COLUMNS } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getWebsiteSessions'; + +export async function getWebsiteSessions(...args: [websiteId: string, filters: QueryFilters]) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string, filters: QueryFilters) { + const { pagedRawQuery, parseFilters } = prisma; + const { search } = filters; + const { filterQuery, dateQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + search: search ? `%${search}%` : undefined, + }); + + const searchQuery = search + ? `and (distinct_id ilike {{search}} + or city ilike {{search}} + or browser ilike {{search}} + or os ilike {{search}} + or device ilike {{search}})` + : ''; + + return pagedRawQuery( + ` + select + session.session_id as "id", + session.website_id as "websiteId", + website_event.hostname, + session.browser, + session.os, + session.device, + session.screen, + session.language, + session.country, + session.region, + session.city, + min(website_event.created_at) as "firstAt", + max(website_event.created_at) as "lastAt", + count(distinct website_event.visit_id) as "visits", + sum(case when website_event.event_type = 1 then 1 else 0 end) as "views", + max(website_event.created_at) as "createdAt" + from website_event + ${cohortQuery} + join session on session.session_id = website_event.session_id + and session.website_id = website_event.website_id + where website_event.website_id = {{websiteId::uuid}} + ${dateQuery} + ${filterQuery} + ${searchQuery} + group by session.session_id, + session.website_id, + website_event.hostname, + session.browser, + session.os, + session.device, + session.screen, + session.language, + session.country, + session.region, + session.city + order by max(website_event.created_at) desc + `, + queryParams, + filters, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery(websiteId: string, filters: QueryFilters) { + const { pagedRawQuery, parseFilters, getDateStringSQL } = clickhouse; + const { search } = filters; + const { filterQuery, dateQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + const searchQuery = search + ? `and ((positionCaseInsensitive(distinct_id, {search:String}) > 0) + or (positionCaseInsensitive(city, {search:String}) > 0) + or (positionCaseInsensitive(browser, {search:String}) > 0) + or (positionCaseInsensitive(os, {search:String}) > 0) + or (positionCaseInsensitive(device, {search:String}) > 0))` + : ''; + + let sql = ''; + + if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item))) { + sql = ` + select + session_id as id, + website_id as websiteId, + hostname, + browser, + os, + device, + screen, + language, + country, + region, + city, + ${getDateStringSQL('min(created_at)')} as firstAt, + ${getDateStringSQL('max(created_at)')} as lastAt, + uniq(visit_id) as visits, + sumIf(1, event_type = 1) as views, + lastAt as createdAt + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + ${dateQuery} + ${filterQuery} + ${searchQuery} + group by session_id, website_id, hostname, browser, os, device, screen, language, country, region, city + order by lastAt desc + `; + } else { + sql = ` + select + session_id as id, + website_id as websiteId, + arrayFirst(x -> 1, hostname) hostname, + browser, + os, + device, + screen, + language, + country, + region, + city, + ${getDateStringSQL('min(min_time)')} as firstAt, + ${getDateStringSQL('max(max_time)')} as lastAt, + uniq(visit_id) as visits, + sumIf(views, event_type = 1) as views, + lastAt as createdAt + from website_event_stats_hourly as website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + ${dateQuery} + ${filterQuery} + ${searchQuery} + group by session_id, website_id, hostname, browser, os, device, screen, language, country, region, city + order by lastAt desc + `; + } + + return pagedRawQuery(sql, queryParams, filters, FUNCTION_NAME); +} diff --git a/src/queries/sql/sessions/saveSessionData.ts b/src/queries/sql/sessions/saveSessionData.ts new file mode 100644 index 0000000..7409317 --- /dev/null +++ b/src/queries/sql/sessions/saveSessionData.ts @@ -0,0 +1,112 @@ +import clickhouse from '@/lib/clickhouse'; +import { DATA_TYPE } from '@/lib/constants'; +import { uuid } from '@/lib/crypto'; +import { flattenJSON, getStringValue } from '@/lib/data'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import kafka from '@/lib/kafka'; +import prisma from '@/lib/prisma'; +import type { DynamicData } from '@/lib/types'; + +export interface SaveSessionDataArgs { + websiteId: string; + sessionId: string; + sessionData: DynamicData; + distinctId?: string; + createdAt?: Date; +} + +export async function saveSessionData(data: SaveSessionDataArgs) { + return runQuery({ + [PRISMA]: () => relationalQuery(data), + [CLICKHOUSE]: () => clickhouseQuery(data), + }); +} + +export async function relationalQuery({ + websiteId, + sessionId, + sessionData, + distinctId, + createdAt, +}: SaveSessionDataArgs) { + const { client } = prisma; + + const jsonKeys = flattenJSON(sessionData); + + const flattenedData = jsonKeys.map(a => ({ + id: uuid(), + websiteId, + sessionId, + dataKey: a.key, + stringValue: getStringValue(a.value, a.dataType), + numberValue: a.dataType === DATA_TYPE.number ? a.value : null, + dateValue: a.dataType === DATA_TYPE.date ? new Date(a.value) : null, + dataType: a.dataType, + distinctId, + createdAt, + })); + + const existing = await client.sessionData.findMany({ + where: { + sessionId, + }, + select: { + id: true, + sessionId: true, + dataKey: true, + }, + }); + + for (const data of flattenedData) { + const { sessionId, dataKey, ...props } = data; + const record = existing.find(e => e.sessionId === sessionId && e.dataKey === dataKey); + + if (record) { + await client.sessionData.update({ + where: { + id: record.id, + }, + data: { + ...props, + }, + }); + } else { + await client.sessionData.create({ + data, + }); + } + } +} + +async function clickhouseQuery({ + websiteId, + sessionId, + sessionData, + distinctId, + createdAt, +}: SaveSessionDataArgs) { + const { insert, getUTCString } = clickhouse; + const { sendMessage } = kafka; + + const jsonKeys = flattenJSON(sessionData); + + const messages = jsonKeys.map(({ key, value, dataType }) => { + return { + website_id: websiteId, + session_id: sessionId, + data_key: key, + data_type: dataType, + string_value: getStringValue(value, dataType), + number_value: dataType === DATA_TYPE.number ? value : null, + date_value: dataType === DATA_TYPE.date ? getUTCString(value) : null, + distinct_id: distinctId, + created_at: getUTCString(createdAt), + }; + }); + + if (kafka.enabled) { + await sendMessage('session_data', messages); + } else { + await insert('session_data', messages); + } +} |