diff options
Diffstat (limited to 'src/queries/sql/pageviews')
| -rw-r--r-- | src/queries/sql/pageviews/getPageviewExpandedMetrics.ts | 227 | ||||
| -rw-r--r-- | src/queries/sql/pageviews/getPageviewMetrics.ts | 191 | ||||
| -rw-r--r-- | src/queries/sql/pageviews/getPageviewStats.ts | 98 |
3 files changed, 516 insertions, 0 deletions
diff --git a/src/queries/sql/pageviews/getPageviewExpandedMetrics.ts b/src/queries/sql/pageviews/getPageviewExpandedMetrics.ts new file mode 100644 index 0000000..986d7d5 --- /dev/null +++ b/src/queries/sql/pageviews/getPageviewExpandedMetrics.ts @@ -0,0 +1,227 @@ +import clickhouse from '@/lib/clickhouse'; +import { FILTER_COLUMNS, GROUPED_DOMAINS, SESSION_COLUMNS } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getPageviewExpandedMetrics'; + +export interface PageviewExpandedMetricsParameters { + type: string; + limit?: number | string; + offset?: number | string; +} + +export interface PageviewExpandedMetricsData { + name: string; + pageviews: number; + visitors: number; + visits: number; + bounces: number; + totaltime: number; +} + +export async function getPageviewExpandedMetrics( + ...args: [websiteId: string, parameters: PageviewExpandedMetricsParameters, filters: QueryFilters] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + parameters: PageviewExpandedMetricsParameters, + filters: QueryFilters, +): Promise<PageviewExpandedMetricsData[]> { + const { type, limit = 500, offset = 0 } = parameters; + let column = FILTER_COLUMNS[type] || type; + const { rawQuery, parseFilters, getTimestampDiffSQL } = prisma; + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters( + { + ...filters, + websiteId, + }, + { joinSession: SESSION_COLUMNS.includes(type) }, + ); + + let entryExitQuery = ''; + let excludeDomain = ''; + + if (column === 'referrer_domain') { + excludeDomain = `and website_event.referrer_domain != website_event.hostname + and website_event.referrer_domain != ''`; + if (type === 'domain') { + column = toPostgresGroupedReferrer(GROUPED_DOMAINS); + } + } + + if (type === 'entry' || type === 'exit') { + const aggregrate = type === 'entry' ? 'min' : 'max'; + + entryExitQuery = ` + join ( + select visit_id, + ${aggregrate}(created_at) target_created_at + from website_event + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and website_event.event_type != 2 + group by visit_id + ) x + on x.visit_id = website_event.visit_id + and x.target_created_at = website_event.created_at + `; + } + + return rawQuery( + ` + select + name, + sum(t.c) as "pageviews", + count(distinct t.session_id) as "visitors", + count(distinct t.visit_id) as "visits", + sum(case when t.c = 1 then 1 else 0 end) as "bounces", + sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}) as "totaltime" + from ( + select + ${column} as name, + website_event.session_id, + website_event.visit_id, + count(*) as "c", + min(website_event.created_at) as "min_time", + max(website_event.created_at) as "max_time" + from website_event + ${cohortQuery} + ${joinSessionQuery} + ${entryExitQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and website_event.event_type != 2 + ${excludeDomain} + ${filterQuery} + group by ${column}, website_event.session_id, website_event.visit_id + ) as t + where name != '' + group by name + order by visitors desc, visits desc + limit ${limit} + offset ${offset} + `, + queryParams, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + parameters: PageviewExpandedMetricsParameters, + filters: QueryFilters, +): Promise<{ x: string; y: number }[]> { + const { type, limit = 500, offset = 0 } = parameters; + let column = FILTER_COLUMNS[type] || type; + const { rawQuery, parseFilters } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + let excludeDomain = ''; + let entryExitQuery = ''; + + if (column === 'referrer_domain') { + excludeDomain = `and referrer_domain != hostname and referrer_domain != ''`; + if (type === 'domain') { + column = toClickHouseGroupedReferrer(GROUPED_DOMAINS); + } + } + + if (type === 'entry' || type === 'exit') { + const aggregrate = type === 'entry' ? 'argMin' : 'argMax'; + column = `x.${column}`; + + entryExitQuery = ` + JOIN (select visit_id, + ${aggregrate}(url_path, created_at) url_path + from website_event + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + group by visit_id) x + ON x.visit_id = website_event.visit_id`; + } + + return rawQuery( + ` + select + name, + sum(t.c) as "pageviews", + uniq(t.session_id) as "visitors", + uniq(t.visit_id) as "visits", + sum(if(t.c = 1, 1, 0)) as "bounces", + sum(max_time-min_time) as "totaltime" + from ( + select + ${column} name, + session_id, + visit_id, + count(*) c, + min(created_at) min_time, + max(created_at) max_time + from website_event + ${cohortQuery} + ${entryExitQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + and name != '' + ${excludeDomain} + ${filterQuery} + group by name, session_id, visit_id + ) as t + group by name + order by visitors desc, visits desc + limit ${limit} + offset ${offset} + `, + { ...queryParams, ...parameters }, + FUNCTION_NAME, + ); +} + +export function toClickHouseGroupedReferrer( + domains: any[], + column: string = 'referrer_domain', +): string { + return [ + 'CASE', + ...domains.map(group => { + const matches = Array.isArray(group.match) ? group.match : [group.match]; + const formattedArray = matches.map(m => `'${m}'`).join(', '); + return ` WHEN multiSearchAny(${column}, [${formattedArray}]) != 0 THEN '${group.domain}'`; + }), + " ELSE 'Other'", + 'END', + ].join('\n'); +} + +export function toPostgresGroupedReferrer( + domains: any[], + column: string = 'referrer_domain', +): string { + return [ + 'CASE', + ...domains.map(group => { + const matches = Array.isArray(group.match) ? group.match : [group.match]; + + return `WHEN ${toPostgresLikeClause(column, matches)} THEN '${group.domain}'`; + }), + " ELSE 'Other'", + 'END', + ].join('\n'); +} + +function toPostgresLikeClause(column: string, arr: string[]) { + return arr.map(val => `${column} ilike '%${val.replace(/'/g, "''")}%'`).join(' OR\n '); +} diff --git a/src/queries/sql/pageviews/getPageviewMetrics.ts b/src/queries/sql/pageviews/getPageviewMetrics.ts new file mode 100644 index 0000000..9d4f627 --- /dev/null +++ b/src/queries/sql/pageviews/getPageviewMetrics.ts @@ -0,0 +1,191 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_COLUMNS, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getPageviewMetrics'; + +export interface PageviewMetricsParameters { + type: string; + limit?: number | string; + offset?: number | string; +} + +export interface PageviewMetricsData { + x: string; + y: number; +} + +export async function getPageviewMetrics( + ...args: [websiteId: string, parameters: PageviewMetricsParameters, filters: QueryFilters] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + parameters: PageviewMetricsParameters, + filters: QueryFilters, +): Promise<PageviewMetricsData[]> { + const { type, limit = 500, offset = 0 } = parameters; + let column = FILTER_COLUMNS[type] || type; + const { rawQuery, parseFilters } = prisma; + const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters( + { + ...filters, + websiteId, + }, + { joinSession: SESSION_COLUMNS.includes(type) }, + ); + + let entryExitQuery = ''; + let excludeDomain = ''; + + if (column === 'referrer_domain') { + excludeDomain = `and website_event.referrer_domain != website_event.hostname + and website_event.referrer_domain != ''`; + } + + if (type === 'entry' || type === 'exit') { + const order = type === 'entry' ? 'asc' : 'desc'; + column = `x.${column}`; + + entryExitQuery = ` + join ( + select distinct on (visit_id) + visit_id, + url_path + from website_event + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and website_event.event_type != 2 + order by visit_id, created_at ${order} + ) x + on x.visit_id = website_event.visit_id + `; + } + + return rawQuery( + ` + select ${column} x, + count(distinct website_event.session_id) as y + from website_event + ${cohortQuery} + ${joinSessionQuery} + ${entryExitQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and website_event.event_type != 2 + ${excludeDomain} + ${filterQuery} + group by 1 + order by 2 desc + limit ${limit} + offset ${offset} + `, + { ...queryParams, ...parameters }, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + parameters: PageviewMetricsParameters, + filters: QueryFilters, +): Promise<{ x: string; y: number }[]> { + const { type, limit = 500, offset = 0 } = parameters; + let column = FILTER_COLUMNS[type] || type; + const { rawQuery, parseFilters } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + let sql = ''; + let excludeDomain = ''; + + if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item))) { + let entryExitQuery = ''; + + if (column === 'referrer_domain') { + excludeDomain = `and referrer_domain != hostname and referrer_domain != ''`; + } + + if (type === 'entry' || type === 'exit') { + const aggregrate = type === 'entry' ? 'argMin' : 'argMax'; + column = `x.${column}`; + + entryExitQuery = ` + JOIN (select visit_id, + ${aggregrate}(url_path, created_at) url_path + from website_event + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + group by visit_id) x + ON x.visit_id = website_event.visit_id`; + } + + sql = ` + select ${column} x, + uniq(website_event.session_id) as y + from website_event + ${cohortQuery} + ${entryExitQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + ${excludeDomain} + ${filterQuery} + group by x + order by y desc + limit ${limit} + offset ${offset} + `; + } else { + let groupByQuery = ''; + let columnQuery = `arrayJoin(${column})`; + + if (column === 'referrer_domain') { + excludeDomain = `and t != ''`; + } + + if (type === 'entry') { + columnQuery = `argMinMerge(entry_url)`; + } + + if (type === 'exit') { + columnQuery = `argMaxMerge(exit_url)`; + } + + if (type === 'entry' || type === 'exit') { + groupByQuery = 'group by s'; + } + + sql = ` + select g.t as x, + uniq(s) as y + from ( + select session_id s, + ${columnQuery} as t + from website_event_stats_hourly as website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + ${excludeDomain} + ${filterQuery} + ${groupByQuery}) as g + group by x + order by y desc + limit ${limit} + offset ${offset} + `; + } + + return rawQuery(sql, { ...queryParams, ...parameters }, FUNCTION_NAME); +} diff --git a/src/queries/sql/pageviews/getPageviewStats.ts b/src/queries/sql/pageviews/getPageviewStats.ts new file mode 100644 index 0000000..251d5b1 --- /dev/null +++ b/src/queries/sql/pageviews/getPageviewStats.ts @@ -0,0 +1,98 @@ +import clickhouse from '@/lib/clickhouse'; +import { EVENT_COLUMNS } from '@/lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; +import prisma from '@/lib/prisma'; +import type { QueryFilters } from '@/lib/types'; + +const FUNCTION_NAME = 'getPageviewStats'; + +export async function getPageviewStats(...args: [websiteId: string, filters: QueryFilters]) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websiteId: string, filters: QueryFilters) { + const { timezone = 'utc', unit = 'day' } = filters; + const { getDateSQL, parseFilters, rawQuery } = prisma; + const { filterQuery, cohortQuery, joinSessionQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + return rawQuery( + ` + select + ${getDateSQL('website_event.created_at', unit, timezone)} x, + count(*) y + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and website_event.event_type != 2 + ${filterQuery} + group by 1 + order by 1 + `, + queryParams, + FUNCTION_NAME, + ); +} + +async function clickhouseQuery( + websiteId: string, + filters: QueryFilters, +): Promise<{ x: string; y: number }[]> { + const { timezone = 'UTC', unit = 'day' } = filters; + const { parseFilters, rawQuery, getDateSQL } = clickhouse; + const { filterQuery, cohortQuery, queryParams } = parseFilters({ + ...filters, + websiteId, + }); + + let sql = ''; + + if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item)) || unit === 'minute') { + sql = ` + select + g.t as x, + g.y as y + from ( + select + ${getDateSQL('website_event.created_at', unit, timezone)} as t, + count(*) as y + from website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + ${filterQuery} + group by t + ) as g + order by t + `; + } else { + sql = ` + select + g.t as x, + g.y as y + from ( + select + ${getDateSQL('website_event.created_at', unit, timezone)} as t, + sum(views) as y + from website_event_stats_hourly as website_event + ${cohortQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type != 2 + ${filterQuery} + group by t + ) as g + order by t + `; + } + + return rawQuery(sql, queryParams, FUNCTION_NAME); +} |