aboutsummaryrefslogtreecommitdiff
path: root/src/queries/sql/pageviews
diff options
context:
space:
mode:
Diffstat (limited to 'src/queries/sql/pageviews')
-rw-r--r--src/queries/sql/pageviews/getPageviewExpandedMetrics.ts227
-rw-r--r--src/queries/sql/pageviews/getPageviewMetrics.ts191
-rw-r--r--src/queries/sql/pageviews/getPageviewStats.ts98
3 files changed, 516 insertions, 0 deletions
diff --git a/src/queries/sql/pageviews/getPageviewExpandedMetrics.ts b/src/queries/sql/pageviews/getPageviewExpandedMetrics.ts
new file mode 100644
index 0000000..986d7d5
--- /dev/null
+++ b/src/queries/sql/pageviews/getPageviewExpandedMetrics.ts
@@ -0,0 +1,227 @@
+import clickhouse from '@/lib/clickhouse';
+import { FILTER_COLUMNS, GROUPED_DOMAINS, SESSION_COLUMNS } from '@/lib/constants';
+import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
+import prisma from '@/lib/prisma';
+import type { QueryFilters } from '@/lib/types';
+
+const FUNCTION_NAME = 'getPageviewExpandedMetrics';
+
+export interface PageviewExpandedMetricsParameters {
+ type: string;
+ limit?: number | string;
+ offset?: number | string;
+}
+
+export interface PageviewExpandedMetricsData {
+ name: string;
+ pageviews: number;
+ visitors: number;
+ visits: number;
+ bounces: number;
+ totaltime: number;
+}
+
+export async function getPageviewExpandedMetrics(
+ ...args: [websiteId: string, parameters: PageviewExpandedMetricsParameters, filters: QueryFilters]
+) {
+ return runQuery({
+ [PRISMA]: () => relationalQuery(...args),
+ [CLICKHOUSE]: () => clickhouseQuery(...args),
+ });
+}
+
+async function relationalQuery(
+ websiteId: string,
+ parameters: PageviewExpandedMetricsParameters,
+ filters: QueryFilters,
+): Promise<PageviewExpandedMetricsData[]> {
+ const { type, limit = 500, offset = 0 } = parameters;
+ let column = FILTER_COLUMNS[type] || type;
+ const { rawQuery, parseFilters, getTimestampDiffSQL } = prisma;
+ const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters(
+ {
+ ...filters,
+ websiteId,
+ },
+ { joinSession: SESSION_COLUMNS.includes(type) },
+ );
+
+ let entryExitQuery = '';
+ let excludeDomain = '';
+
+ if (column === 'referrer_domain') {
+ excludeDomain = `and website_event.referrer_domain != website_event.hostname
+ and website_event.referrer_domain != ''`;
+ if (type === 'domain') {
+ column = toPostgresGroupedReferrer(GROUPED_DOMAINS);
+ }
+ }
+
+ if (type === 'entry' || type === 'exit') {
+ const aggregrate = type === 'entry' ? 'min' : 'max';
+
+ entryExitQuery = `
+ join (
+ select visit_id,
+ ${aggregrate}(created_at) target_created_at
+ from website_event
+ where website_event.website_id = {{websiteId::uuid}}
+ and website_event.created_at between {{startDate}} and {{endDate}}
+ and website_event.event_type != 2
+ group by visit_id
+ ) x
+ on x.visit_id = website_event.visit_id
+ and x.target_created_at = website_event.created_at
+ `;
+ }
+
+ return rawQuery(
+ `
+ select
+ name,
+ sum(t.c) as "pageviews",
+ count(distinct t.session_id) as "visitors",
+ count(distinct t.visit_id) as "visits",
+ sum(case when t.c = 1 then 1 else 0 end) as "bounces",
+ sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}) as "totaltime"
+ from (
+ select
+ ${column} as name,
+ website_event.session_id,
+ website_event.visit_id,
+ count(*) as "c",
+ min(website_event.created_at) as "min_time",
+ max(website_event.created_at) as "max_time"
+ from website_event
+ ${cohortQuery}
+ ${joinSessionQuery}
+ ${entryExitQuery}
+ where website_event.website_id = {{websiteId::uuid}}
+ and website_event.created_at between {{startDate}} and {{endDate}}
+ and website_event.event_type != 2
+ ${excludeDomain}
+ ${filterQuery}
+ group by ${column}, website_event.session_id, website_event.visit_id
+ ) as t
+ where name != ''
+ group by name
+ order by visitors desc, visits desc
+ limit ${limit}
+ offset ${offset}
+ `,
+ queryParams,
+ FUNCTION_NAME,
+ );
+}
+
+async function clickhouseQuery(
+ websiteId: string,
+ parameters: PageviewExpandedMetricsParameters,
+ filters: QueryFilters,
+): Promise<{ x: string; y: number }[]> {
+ const { type, limit = 500, offset = 0 } = parameters;
+ let column = FILTER_COLUMNS[type] || type;
+ const { rawQuery, parseFilters } = clickhouse;
+ const { filterQuery, cohortQuery, queryParams } = parseFilters({
+ ...filters,
+ websiteId,
+ });
+
+ let excludeDomain = '';
+ let entryExitQuery = '';
+
+ if (column === 'referrer_domain') {
+ excludeDomain = `and referrer_domain != hostname and referrer_domain != ''`;
+ if (type === 'domain') {
+ column = toClickHouseGroupedReferrer(GROUPED_DOMAINS);
+ }
+ }
+
+ if (type === 'entry' || type === 'exit') {
+ const aggregrate = type === 'entry' ? 'argMin' : 'argMax';
+ column = `x.${column}`;
+
+ entryExitQuery = `
+ JOIN (select visit_id,
+ ${aggregrate}(url_path, created_at) url_path
+ from website_event
+ where website_id = {websiteId:UUID}
+ and created_at between {startDate:DateTime64} and {endDate:DateTime64}
+ and event_type != 2
+ group by visit_id) x
+ ON x.visit_id = website_event.visit_id`;
+ }
+
+ return rawQuery(
+ `
+ select
+ name,
+ sum(t.c) as "pageviews",
+ uniq(t.session_id) as "visitors",
+ uniq(t.visit_id) as "visits",
+ sum(if(t.c = 1, 1, 0)) as "bounces",
+ sum(max_time-min_time) as "totaltime"
+ from (
+ select
+ ${column} name,
+ session_id,
+ visit_id,
+ count(*) c,
+ min(created_at) min_time,
+ max(created_at) max_time
+ from website_event
+ ${cohortQuery}
+ ${entryExitQuery}
+ where website_id = {websiteId:UUID}
+ and created_at between {startDate:DateTime64} and {endDate:DateTime64}
+ and event_type != 2
+ and name != ''
+ ${excludeDomain}
+ ${filterQuery}
+ group by name, session_id, visit_id
+ ) as t
+ group by name
+ order by visitors desc, visits desc
+ limit ${limit}
+ offset ${offset}
+ `,
+ { ...queryParams, ...parameters },
+ FUNCTION_NAME,
+ );
+}
+
+export function toClickHouseGroupedReferrer(
+ domains: any[],
+ column: string = 'referrer_domain',
+): string {
+ return [
+ 'CASE',
+ ...domains.map(group => {
+ const matches = Array.isArray(group.match) ? group.match : [group.match];
+ const formattedArray = matches.map(m => `'${m}'`).join(', ');
+ return ` WHEN multiSearchAny(${column}, [${formattedArray}]) != 0 THEN '${group.domain}'`;
+ }),
+ " ELSE 'Other'",
+ 'END',
+ ].join('\n');
+}
+
+export function toPostgresGroupedReferrer(
+ domains: any[],
+ column: string = 'referrer_domain',
+): string {
+ return [
+ 'CASE',
+ ...domains.map(group => {
+ const matches = Array.isArray(group.match) ? group.match : [group.match];
+
+ return `WHEN ${toPostgresLikeClause(column, matches)} THEN '${group.domain}'`;
+ }),
+ " ELSE 'Other'",
+ 'END',
+ ].join('\n');
+}
+
+function toPostgresLikeClause(column: string, arr: string[]) {
+ return arr.map(val => `${column} ilike '%${val.replace(/'/g, "''")}%'`).join(' OR\n ');
+}
diff --git a/src/queries/sql/pageviews/getPageviewMetrics.ts b/src/queries/sql/pageviews/getPageviewMetrics.ts
new file mode 100644
index 0000000..9d4f627
--- /dev/null
+++ b/src/queries/sql/pageviews/getPageviewMetrics.ts
@@ -0,0 +1,191 @@
+import clickhouse from '@/lib/clickhouse';
+import { EVENT_COLUMNS, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants';
+import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
+import prisma from '@/lib/prisma';
+import type { QueryFilters } from '@/lib/types';
+
+const FUNCTION_NAME = 'getPageviewMetrics';
+
+export interface PageviewMetricsParameters {
+ type: string;
+ limit?: number | string;
+ offset?: number | string;
+}
+
+export interface PageviewMetricsData {
+ x: string;
+ y: number;
+}
+
+export async function getPageviewMetrics(
+ ...args: [websiteId: string, parameters: PageviewMetricsParameters, filters: QueryFilters]
+) {
+ return runQuery({
+ [PRISMA]: () => relationalQuery(...args),
+ [CLICKHOUSE]: () => clickhouseQuery(...args),
+ });
+}
+
+async function relationalQuery(
+ websiteId: string,
+ parameters: PageviewMetricsParameters,
+ filters: QueryFilters,
+): Promise<PageviewMetricsData[]> {
+ const { type, limit = 500, offset = 0 } = parameters;
+ let column = FILTER_COLUMNS[type] || type;
+ const { rawQuery, parseFilters } = prisma;
+ const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters(
+ {
+ ...filters,
+ websiteId,
+ },
+ { joinSession: SESSION_COLUMNS.includes(type) },
+ );
+
+ let entryExitQuery = '';
+ let excludeDomain = '';
+
+ if (column === 'referrer_domain') {
+ excludeDomain = `and website_event.referrer_domain != website_event.hostname
+ and website_event.referrer_domain != ''`;
+ }
+
+ if (type === 'entry' || type === 'exit') {
+ const order = type === 'entry' ? 'asc' : 'desc';
+ column = `x.${column}`;
+
+ entryExitQuery = `
+ join (
+ select distinct on (visit_id)
+ visit_id,
+ url_path
+ from website_event
+ where website_event.website_id = {{websiteId::uuid}}
+ and website_event.created_at between {{startDate}} and {{endDate}}
+ and website_event.event_type != 2
+ order by visit_id, created_at ${order}
+ ) x
+ on x.visit_id = website_event.visit_id
+ `;
+ }
+
+ return rawQuery(
+ `
+ select ${column} x,
+ count(distinct website_event.session_id) as y
+ from website_event
+ ${cohortQuery}
+ ${joinSessionQuery}
+ ${entryExitQuery}
+ where website_event.website_id = {{websiteId::uuid}}
+ and website_event.created_at between {{startDate}} and {{endDate}}
+ and website_event.event_type != 2
+ ${excludeDomain}
+ ${filterQuery}
+ group by 1
+ order by 2 desc
+ limit ${limit}
+ offset ${offset}
+ `,
+ { ...queryParams, ...parameters },
+ FUNCTION_NAME,
+ );
+}
+
+async function clickhouseQuery(
+ websiteId: string,
+ parameters: PageviewMetricsParameters,
+ filters: QueryFilters,
+): Promise<{ x: string; y: number }[]> {
+ const { type, limit = 500, offset = 0 } = parameters;
+ let column = FILTER_COLUMNS[type] || type;
+ const { rawQuery, parseFilters } = clickhouse;
+ const { filterQuery, cohortQuery, queryParams } = parseFilters({
+ ...filters,
+ websiteId,
+ });
+
+ let sql = '';
+ let excludeDomain = '';
+
+ if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item))) {
+ let entryExitQuery = '';
+
+ if (column === 'referrer_domain') {
+ excludeDomain = `and referrer_domain != hostname and referrer_domain != ''`;
+ }
+
+ if (type === 'entry' || type === 'exit') {
+ const aggregrate = type === 'entry' ? 'argMin' : 'argMax';
+ column = `x.${column}`;
+
+ entryExitQuery = `
+ JOIN (select visit_id,
+ ${aggregrate}(url_path, created_at) url_path
+ from website_event
+ where website_id = {websiteId:UUID}
+ and created_at between {startDate:DateTime64} and {endDate:DateTime64}
+ and event_type != 2
+ group by visit_id) x
+ ON x.visit_id = website_event.visit_id`;
+ }
+
+ sql = `
+ select ${column} x,
+ uniq(website_event.session_id) as y
+ from website_event
+ ${cohortQuery}
+ ${entryExitQuery}
+ where website_id = {websiteId:UUID}
+ and created_at between {startDate:DateTime64} and {endDate:DateTime64}
+ and event_type != 2
+ ${excludeDomain}
+ ${filterQuery}
+ group by x
+ order by y desc
+ limit ${limit}
+ offset ${offset}
+ `;
+ } else {
+ let groupByQuery = '';
+ let columnQuery = `arrayJoin(${column})`;
+
+ if (column === 'referrer_domain') {
+ excludeDomain = `and t != ''`;
+ }
+
+ if (type === 'entry') {
+ columnQuery = `argMinMerge(entry_url)`;
+ }
+
+ if (type === 'exit') {
+ columnQuery = `argMaxMerge(exit_url)`;
+ }
+
+ if (type === 'entry' || type === 'exit') {
+ groupByQuery = 'group by s';
+ }
+
+ sql = `
+ select g.t as x,
+ uniq(s) as y
+ from (
+ select session_id s,
+ ${columnQuery} as t
+ from website_event_stats_hourly as website_event
+ ${cohortQuery}
+ where website_id = {websiteId:UUID}
+ and created_at between {startDate:DateTime64} and {endDate:DateTime64}
+ and event_type != 2
+ ${excludeDomain}
+ ${filterQuery}
+ ${groupByQuery}) as g
+ group by x
+ order by y desc
+ limit ${limit}
+ offset ${offset}
+ `;
+ }
+
+ return rawQuery(sql, { ...queryParams, ...parameters }, FUNCTION_NAME);
+}
diff --git a/src/queries/sql/pageviews/getPageviewStats.ts b/src/queries/sql/pageviews/getPageviewStats.ts
new file mode 100644
index 0000000..251d5b1
--- /dev/null
+++ b/src/queries/sql/pageviews/getPageviewStats.ts
@@ -0,0 +1,98 @@
+import clickhouse from '@/lib/clickhouse';
+import { EVENT_COLUMNS } from '@/lib/constants';
+import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
+import prisma from '@/lib/prisma';
+import type { QueryFilters } from '@/lib/types';
+
+const FUNCTION_NAME = 'getPageviewStats';
+
+export async function getPageviewStats(...args: [websiteId: string, filters: QueryFilters]) {
+ return runQuery({
+ [PRISMA]: () => relationalQuery(...args),
+ [CLICKHOUSE]: () => clickhouseQuery(...args),
+ });
+}
+
+async function relationalQuery(websiteId: string, filters: QueryFilters) {
+ const { timezone = 'utc', unit = 'day' } = filters;
+ const { getDateSQL, parseFilters, rawQuery } = prisma;
+ const { filterQuery, cohortQuery, joinSessionQuery, queryParams } = parseFilters({
+ ...filters,
+ websiteId,
+ });
+
+ return rawQuery(
+ `
+ select
+ ${getDateSQL('website_event.created_at', unit, timezone)} x,
+ count(*) y
+ from website_event
+ ${cohortQuery}
+ ${joinSessionQuery}
+ where website_event.website_id = {{websiteId::uuid}}
+ and website_event.created_at between {{startDate}} and {{endDate}}
+ and website_event.event_type != 2
+ ${filterQuery}
+ group by 1
+ order by 1
+ `,
+ queryParams,
+ FUNCTION_NAME,
+ );
+}
+
+async function clickhouseQuery(
+ websiteId: string,
+ filters: QueryFilters,
+): Promise<{ x: string; y: number }[]> {
+ const { timezone = 'UTC', unit = 'day' } = filters;
+ const { parseFilters, rawQuery, getDateSQL } = clickhouse;
+ const { filterQuery, cohortQuery, queryParams } = parseFilters({
+ ...filters,
+ websiteId,
+ });
+
+ let sql = '';
+
+ if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item)) || unit === 'minute') {
+ sql = `
+ select
+ g.t as x,
+ g.y as y
+ from (
+ select
+ ${getDateSQL('website_event.created_at', unit, timezone)} as t,
+ count(*) as y
+ from website_event
+ ${cohortQuery}
+ where website_id = {websiteId:UUID}
+ and created_at between {startDate:DateTime64} and {endDate:DateTime64}
+ and event_type != 2
+ ${filterQuery}
+ group by t
+ ) as g
+ order by t
+ `;
+ } else {
+ sql = `
+ select
+ g.t as x,
+ g.y as y
+ from (
+ select
+ ${getDateSQL('website_event.created_at', unit, timezone)} as t,
+ sum(views) as y
+ from website_event_stats_hourly as website_event
+ ${cohortQuery}
+ where website_id = {websiteId:UUID}
+ and created_at between {startDate:DateTime64} and {endDate:DateTime64}
+ and event_type != 2
+ ${filterQuery}
+ group by t
+ ) as g
+ order by t
+ `;
+ }
+
+ return rawQuery(sql, queryParams, FUNCTION_NAME);
+}