NoteThis transformation contains intermediate data preparation and creates a table ‘_temp_objects’. This table you will use in the following transformations.
Use the SQL code below and change {{projectId}}
and {{datasetName}}
to your own.
#Intermediate table with data for the pageviews, page, events, eventsInfo object as well as information about transactions for deduplication
WITH
#Set the date range for the calculation
dates AS
(
SELECT
'{{start_suffix}}' AS date_start,
'{{end_suffix}}' AS date_end,
),
#Preparing data for pageviews object
pageview AS
(
SELECT
STRUCT (
date AS date,
sessionId AS sessionId,
hitId,
time AS time,
pageviews AS pageview,
CAST(FARM_FINGERPRINT(CONCAT(IFNULL(hostName,""),IFNULL(pagePath,""),IFNULL(pageTitle,""))) AS STRING) AS pageHash
) AS pageviews,
STRUCT ( #empty
CAST(null AS STRING) AS pageHash,
CAST(null AS STRING) AS hostName,
CAST(null AS STRING) AS pagePath,
CAST(null AS STRING) AS pageTitle
) AS page,
STRUCT ( #empty
CAST(null AS DATE) AS date,
CAST(null AS STRING) AS sessionId,
CAST(null AS STRING) AS hitId,
CAST(null AS INTEGER) AS time,
CAST(null AS INTEGER) AS events,
CAST(null AS STRING) AS pageHash
) AS events,
STRUCT ( #empty
CAST(null AS STRING) AS eventHash,
CAST(null AS STRING) AS eventCategory,
CAST(null AS STRING) AS eventAction,
CAST(null AS STRING) AS eventLabel
) AS eventInfo,
STRUCT ( #empty
CAST(null AS DATE) AS date,
CAST(null AS STRING) AS sessionId,
CAST(null AS INTEGER) AS time,
CAST(null AS STRING) AS transactionId,
CAST(null AS FLOAT64) AS transactionRevenue
) AS transactions
FROM
(
SELECT
PARSE_DATE('%Y%m%d', _TABLE_SUFFIX) AS date,
CONCAT(fullvisitorId,'_',CAST(visitorId as STRING)) as sessionId,
TO_HEX(MD5(CONCAT(fullvisitorId,'_',CAST(visitId as STRING),'_',hits.time,'_',ROW_NUMBER() OVER (partition by CONCAT(fullvisitorId,'_',CAST(visitId as STRING)))))) as hitId,
visitStartTime+hits.time AS time,
1 AS pageviews,
hits.page.hostName,
hits.page.pageTitle,
IF(STRPOS(hits.page.pagePath,'?')>0, SUBSTR(hits.page.pagePath,1,STRPOS(hits.page.pagePath,'?')-1),hits.page.pagePath) AS pagePath,
--ROW_NUMBER () OVER (PARTITION BY hitId ORDER BY hits.time asc) as RN
#List all CDs of the hit level, also name them
--(SELECT MAX(IF(index=1,value,NULL)) FROM UNNEST(hits.customDimensions)) AS custom_dimension_1,
--(SELECT MAX(IF(index=2,value,NULL)) FROM UNNEST(hits.customDimensions)) AS custom_dimension_2,
--(SELECT MAX(IF(index=3,value,NULL)) FROM UNNEST(hits.customDimensions)) AS custom_dimension_3,
FROM
dates AS d, `{{project_id}}.{{dataset_id}}.ga_sessions_*`,
UNNEST (hits) AS hits
WHERE _TABLE_SUFFIX BETWEEN d.date_start AND d.date_end AND hits.type='PAGE'
)
),
#Preparing data for page object
p AS
(
SELECT
STRUCT ( #empty
CAST(null AS DATE) AS date,
CAST(null AS STRING) AS sessionId,
CAST(null AS STRING) AS hitId,
CAST(null AS INTEGER) AS time,
CAST(null AS INTEGER) AS pageview,
CAST(null AS STRING) AS pageHash
) AS pageviews,
STRUCT (
CAST(FARM_FINGERPRINT(CONCAT(IFNULL(hostName,""),IFNULL(pagePath,""),IFNULL(pageTitle,""))) AS STRING) AS pageHash,
hostName AS hostName,
pagePath AS pagePath,
pageTitle AS pageTitle
) AS page,
STRUCT ( #empty
CAST(null AS DATE) AS date,
CAST(null AS STRING) AS sessionId,
CAST(null AS STRING) AS hitId,
CAST(null AS INTEGER) AS time,
CAST(null AS INTEGER) AS events,
CAST(null AS STRING) AS pageHash
) AS events,
STRUCT ( #empty
CAST(null AS STRING) AS eventHash,
CAST(null AS STRING) AS eventCategory,
CAST(null AS STRING) AS eventAction,
CAST(null AS STRING) AS eventLabel
) AS eventInfo,
STRUCT ( #empty
CAST(null AS DATE) AS date,
CAST(null AS STRING) AS sessionId,
CAST(null AS INTEGER) AS time,
CAST(null AS STRING) AS transactionId,
CAST(null AS FLOAT64) AS transactionRevenue
) AS transactions
FROM
(
SELECT
hits.page.hostName,
hits.page.pageTitle,
REGEXP_EXTRACT(hits.page.pagePath, r'[^(?|#|;)]*') AS pagePath,
#List all CDs of the hit level, also name them
--(SELECT MAX(IF(index=1,value,NULL)) FROM UNNEST(hits.customDimensions)) AS custom_dimension_1,
--(SELECT MAX(IF(index=2,value,NULL)) FROM UNNEST(hits.customDimensions)) AS custom_dimension_2,
--(SELECT MAX(IF(index=3,value,NULL)) FROM UNNEST(hits.customDimensions)) AS custom_dimension_3,
FROM
dates AS d,`{{project_id}}.{{dataset_id}}.ga_sessions_*`,
UNNEST (hits) AS hits
WHERE _TABLE_SUFFIX BETWEEN d.date_start AND d.date_end AND LOWER(hits.type)='pageview'
GROUP BY 1,2,3
)
),
#Preparing data for events object
event AS
(
SELECT
STRUCT ( #empty
CAST(null AS DATE) AS date,
CAST(null AS STRING) AS sessionId,
CAST(null AS STRING) AS hitId,
CAST(null AS INTEGER) AS time,
CAST(null AS INTEGER) AS pageview,
CAST(null AS STRING) AS pageHash
) AS pageviews,
STRUCT ( #empty
CAST(null AS STRING) AS pageHash,
CAST(null AS STRING) AS hostName,
CAST(null AS STRING) AS pagePath,
CAST(null AS STRING) AS pageTitle
) AS page,
STRUCT (
date,
sessionId,
hitId,
time,
events,
eventHash
) AS events,
STRUCT ( #empty
CAST(null AS STRING) AS eventHash,
CAST(null AS STRING) AS eventCategory,
CAST(null AS STRING) AS eventAction,
CAST(null AS STRING) AS eventLabel
) AS eventInfo,
STRUCT ( #empty
CAST(null AS DATE) AS date,
CAST(null AS STRING) AS sessionId,
CAST(null AS INTEGER) AS time,
CAST(null AS STRING) AS transactionId,
CAST(null AS FLOAT64) AS transactionRevenue
) AS transactions
#List all CDs of the hit level, also name them
--(SELECT MAX(IF(index=1,value,NULL)) FROM UNNEST(hits.customDimensions)) AS custom_dimension_1,
--(SELECT MAX(IF(index=2,value,NULL)) FROM UNNEST(hits.customDimensions)) AS custom_dimension_2,
--(SELECT MAX(IF(index=3,value,NULL)) FROM UNNEST(hits.customDimensions)) AS custom_dimension_3,
FROM
(
SELECT
PARSE_DATE('%Y%m%d', _TABLE_SUFFIX) AS date,
CONCAT(fullvisitorId,'_',CAST(visitId as STRING)) as sessionId,
TO_HEX(MD5(CONCAT(fullvisitorId,'_',CAST(visitId as STRING),'_',hits.time,'_',ROW_NUMBER() OVER (partition by CONCAT(fullvisitorId,'_',CAST(visitId as STRING)))))) as hitId,
visitStartTime+hits.time AS time,
1 as events,
CAST(FARM_FINGERPRINT(CONCAT(IFNULL(hits.eventInfo.eventCategory,""),IFNULL(hits.eventInfo.eventAction,""),IFNULL(hits.eventInfo.eventLabel,""))) AS STRING) AS eventHash,
--ROW_NUMBER () OVER (PARTITION BY hitId ORDER BY hits.time ASC) as RN
FROM
dates as d, `{{project_id}}.{{dataset_id}}.ga_sessions_*`,
UNNEST (hits) AS hits
WHERE _TABLE_SUFFIX BETWEEN d.date_start AND d.date_end AND hits.type='EVENT'
)
),
#Preparing data for eventInfo object
ei AS
(
SELECT
STRUCT ( #empty
CAST(null AS DATE) AS date,
CAST(null AS STRING) AS sessionId,
CAST(null AS STRING) AS hitId,
CAST(null AS INTEGER) AS time,
CAST(null AS INTEGER) AS pageview,
CAST(null AS STRING) AS pageHash
) AS pageviews,
STRUCT ( #empty
CAST(null AS STRING) AS pageHash,
CAST(null AS STRING) AS hostName,
CAST(null AS STRING) AS pagePath,
CAST(null AS STRING) AS pageTitle
) AS page,
STRUCT ( #empty
CAST(null AS DATE) AS date,
CAST(null AS STRING) AS sessionId,
CAST(null AS STRING) AS hitId,
CAST(null AS INTEGER) AS time,
CAST(null AS INTEGER) AS events,
CAST(null AS STRING) AS pageHash
) AS events,
STRUCT (
eventHash,
eventCategory,
eventAction,
eventLabel
) AS eventInfo,
STRUCT ( #empty
CAST(null AS DATE) AS date,
CAST(null AS STRING) AS sessionId,
CAST(null AS INTEGER) AS time,
CAST(null AS STRING) AS transactionId,
CAST(null AS FLOAT64) AS transactionRevenue
) AS transactions
FROM
(
SELECT
CAST(FARM_FINGERPRINT(CONCAT(IFNULL(hits.eventInfo.eventCategory,""),IFNULL(hits.eventInfo.eventAction,""),IFNULL(hits.eventInfo.eventLabel,""))) AS STRING) AS eventHash,
hits.eventInfo.eventCategory AS eventCategory,
hits.eventInfo.eventAction AS eventAction,
hits.eventInfo.eventLabel AS eventLabel
#List all CDs of the hit level, also name them
--(SELECT MAX(IF(index=1,value,NULL)) FROM UNNEST(hits.customDimensions)) AS custom_dimension_1,
--(SELECT MAX(IF(index=2,value,NULL)) FROM UNNEST(hits.customDimensions)) AS custom_dimension_2,
--(SELECT MAX(IF(index=3,value,NULL)) FROM UNNEST(hits.customDimensions)) AS custom_dimension_3,
FROM
dates as d,`{{project_id}}.{{dataset_id}}.ga_sessions_*`,
UNNEST (hits) AS hits
WHERE _TABLE_SUFFIX BETWEEN d.date_start AND d.date_end AND LOWER(hits.type)='event'
GROUP BY 1,2,3,4
)
),
#Preparing data for transactions object
transaction AS
(
SELECT
STRUCT ( #empty
CAST(null AS DATE) AS date,
CAST(null AS STRING) AS sessionId,
CAST(null AS STRING) AS hitId,
CAST(null AS INTEGER) AS time,
CAST(null AS INTEGER) AS pageview,
CAST(null AS STRING) AS pageHash
) AS pageviews,
STRUCT ( #empty
CAST(null AS STRING) AS pageHash,
CAST(null AS STRING) AS hostName,
CAST(null AS STRING) AS pagePath,
CAST(null AS STRING) AS pageTitle
) AS page,
STRUCT ( #empty
CAST(null AS DATE) AS date,
CAST(null AS STRING) AS sessionId,
CAST(null AS STRING) AS hitId,
CAST(null AS INTEGER) AS time,
CAST(null AS INTEGER) AS events,
CAST(null AS STRING) AS pageHash
) AS events,
STRUCT ( #empty
CAST(null AS STRING) AS eventHash,
CAST(null AS STRING) AS eventCategory,
CAST(null AS STRING) AS eventAction,
CAST(null AS STRING) AS eventLabel
) AS eventInfo,
STRUCT (
date,
sessionId,
time,
transactionId,
transactionRevenue
) AS transactions
FROM
(
SELECT
PARSE_DATE('%Y%m%d', _TABLE_SUFFIX) AS date,
CONCAT(fullvisitorId,'_',CAST(visitId as STRING)) as sessionId,
visitStartTime+hits.time AS time,
hits.transaction.transactionId AS transactionId,
hits.transaction.transactionRevenue AS transactionRevenue
FROM
dates as d, `{{project_id}}.{{dataset_id}}.ga_sessions_*`,
UNNEST (hits) AS hits
WHERE _TABLE_SUFFIX BETWEEN d.date_start AND d.date_end AND hits.transaction.transactionId IS NOT null
GROUP BY 1,2,3,4,5
)
),
#Combining data for pageviews, page, events, eventInfo, transactions objects
Final as
(
SELECT pageviews.date as date, * FROM pageview
UNION ALL
SELECT CAST(null as DATE) as date, * FROM p
UNION ALL
SELECT events.date as date, * FROM event
UNION ALL
SELECT CAST(null as DATE) as date, * FROM ei
UNION ALL
SELECT transactions.date as date, * FROM transaction
)
SELECT
*
FROM
Final
You will get the data in the table '_temp_objects' with the schema below:
Field name | Type | |
---|---|---|
date |
DATE | |
pageviews |
RECORD | |
pageviews.date |
DATE | |
pageviews.sessionId |
STRING | |
pageviews.hitId |
STRING | |
pageviews.time |
INTEGER | |
pageviews.pageview |
INTEGER | |
pageviews.pageHash |
STRING | |
page |
RECORD | |
page.pageHash |
STRING | |
page.hostName |
STRING | |
page.pagePath |
STRING | |
page.pageTitle |
STRING | |
events |
RECORD | |
events.date |
DATE | |
events.sessionId |
STRING | |
events.hitId |
STRING | |
events.time |
INTEGER | |
events.events |
INTEGER | |
events.pageHash |
STRING | |
eventInfo |
RECORD | |
eventInfo.eventHash |
STRING | |
eventInfo.eventCategory |
STRING | |
eventInfo.eventAction |
STRING | |
eventInfo.eventLabel |
STRING | |
transactions |
RECORD | |
transactions.date |
DATE | |
transactions.sessionId |
STRING | |
transactions.time |
INTEGER | |
transactions.transactionId |
STRING | |
transactions.transactionRevenue |
FLOAT |
0 Comments