Use the SQL code below and change {{projectId}}
and {{datasetName}}
to your own.
#Sessions object
WITH
#Set the date range for the calculation
dates AS
(
SELECT
'20220901' AS date_start,
'20220930' AS date_end,
7 AS tdw #Number of days for deduplicating transactions, for cases where the same transaction could be repeated during few days
),
#Transaction deduplication block
transaction_deduplication AS --deduplicating transactions with the revenue for days and sessions
(
SELECT
t.date,
t.sessionId,
COUNT(*) AS transactions,
SUM(t.transactionRevenue) AS transactionRevenue,
FROM
(--selected transactions for the specified period + a lookback window (tdw) for transaction deduplication
SELECT
t.date,
t.sessionId,
t.transactionId,
t.transactionRevenue,
ROW_NUMBER() OVER (PARTITION BY t.transactionId ORDER BY time ASC) AS RN
FROM
dates AS d, `{{projectId}}.{{datasetName}}._temp_transactions` AS t
WHERE
t.date BETWEEN DATE_SUB(PARSE_DATE('%Y%m%d', d.date_start), INTERVAL tdw DAY) AND PARSE_DATE('%Y%m%d', d.date_end)
) AS t, dates AS d
WHERE
RN=1
AND
t.date BETWEEN PARSE_DATE('%Y%m%d', d.date_start) AND PARSE_DATE('%Y%m%d', d.date_end)
GROUP BY 1,2
),
#Session data selection block
sessions_data AS
(
SELECT
#main attributes of the Sessions object
PARSE_DATE('%Y%m%d',_TABLE_SUFFIX) AS date,
CONCAT(fullvisitorId,'_',CAST(visitId as STRING)) as sessionId,
#landingPage и landingHost are taken from the first hit with isEntrance=1, or from the first hit in session, if there is no hits with isEntrance=1 (in case the session starts with event)
#from the landingPage is the part of the link before the first occurrence taken ? or # or ;
REGEXP_EXTRACT((SELECT * EXCEPT(RN) FROM (SELECT h.page.pagePath, ROW_NUMBER() OVER(ORDER BY IF(h.isEntrance=true, 1, 2) ASC, time ASC) AS RN FROM UNNEST(s.hits) AS h) WHERE RN=1), r'[^(?|#|;)]*') AS landingPage,
(SELECT * EXCEPT(RN) FROM (SELECT h.page.hostname, ROW_NUMBER() OVER(ORDER BY IF(h.isEntrance=true, 1, 2) ASC, time ASC) AS RN FROM UNNEST(s.hits) AS h) WHERE RN=1) AS landingHost,
visitStartTime,
visitNumber,
'WEB' AS touchpoint,
'{{projectId}}.{{datasetName}}.ga_sessions_*' AS dataSource,
totals.hits AS hits,
(SELECT SUM(IF(h.type='PAGE', 1, 0)) FROM UNNEST(s.hits) AS h) AS pageviews,
(SELECT SUM(IF(h.type='EVENT', 1, 0)) FROM UNNEST(s.hits) AS h) AS events,
IF((SELECT SUM(IF(h.type='PAGE' OR (h.type='EVENT' AND h.isInteraction=true), 1, 0)) FROM UNNEST(s.hits) AS h)<=1, 1, 0) AS bounces,
(SELECT MAX(h.time)-MIN(h.time) FROM UNNEST(hits) AS h) AS timeOnSite, --duration in seconds
#keys to connect to other model objects
CAST(FARM_FINGERPRINT(CONCAT( --userHash
IFNULL(clientId, ''),
IFNULL(CAST(null as STRING),""),
IFNULL(CAST(null as STRING),""),
IFNULL(CAST(null as STRING),""),
IFNULL(CAST(null as STRING),""),
IFNULL(CAST(null as STRING),"")
)) AS STRING) AS userHash,
CAST(FARM_FINGERPRINT(CONCAT( --geoHash
IFNULL(geoNetwork.country, ''),
IFNULL(geoNetwork.region, ''),
IFNULL(geoNetwork.city, '')
)) AS STRING) AS geoHash,
CAST(FARM_FINGERPRINT(CONCAT( --deviceHash
IFNULL(device.deviceCategory, ''),
IFNULL(device.browser, ''),
IFNULL(device.browserVersion, ''),
IFNULL(device.operatingSystem, ''),
IFNULL(device.operatingSystemVersion, '')
)) AS STRING) AS deviceHash,
CAST(FARM_FINGERPRINT(CONCAT( --trafficSourceHash
IFNULL(trafficSource.source, ''),
IFNULL(trafficSource.medium, ''),
IFNULL(trafficSource.campaign, ''),
IFNULL(trafficSource.keyword, ''),
IFNULL(trafficSource.adContent, '')
)) AS STRING) AS trafficSourceHash,
#List all session-level CDs, also name them
--(SELECT MAX(IF(index=1, value, null)) FROM UNNEST(hits.customDimensions)) AS custom_dimension_1,
--(SELECT MAX(IF(index=2, value, null)) FROM UNNEST(hits.customDimensions)) AS custom_dimension_2,
--(SELECT MAX(IF(index=3, value, null)) FROM UNNEST(hits.customDimensions)) AS custom_dimension_3,
FROM
dates AS d, `{{projectId}}.{{datasetName}}.ga_sessions_*` AS s
WHERE
_TABLE_SUFFIX BETWEEN d.date_start and d.date_end
)
SELECT
s.*,
IFNULL(t.transactions,0) as transactions,
IFNULL(t.transactionRevenue,0) as transactionRevenue,
0 as impressions,
0 as clicks,
0.0 as cost
FROM
sessions_data AS s
LEFT JOIN
transaction_deduplication AS t
ON s.sessionId=t.sessionId AND s.date=t.date
You will get the data in the table 'sessions' with the schema below:
Field name | Type | |
---|---|---|
date |
DATE | |
sessionId |
STRING | |
landingPage |
STRING | |
landingHost |
STRING | |
visitStartTime |
INTEGER | |
visitNumber |
INTEGER | |
touchpoint |
STRING | |
dataSource |
STRING | |
hits |
INTEGER | |
pageviews |
INTEGER | |
events |
INTEGER | |
bounces |
INTEGER | |
timeOnSite |
INTEGER | |
userHash |
STRING | |
geoHash |
STRING | |
deviceHash |
STRING | |
trafficSourceHash |
STRING | |
transactions |
INTEGER | |
transactionRevenue |
FLOAT | |
impressions |
INTEGER | |
clicks |
INTEGER | |
cost |
FLOAT |
0 Comments