Sample SQL Queries: Event-Level Data

The following is a sample set of SQL queries you can use to advance beyond the basics in your use of the Worklytics dataset. This code block assumes the user has access to the employee events dataset, though the exact table names will have to be substituted for the ones in your company's environment.

If you only have access to the individual aggregates, individual groups, and collaboration graph datasets, we recommend focusing your efforts here.

/* 
SAMPLE SQL QUERIES FOR AGGREGATES AND EVENT-LEVEL DATA
DATE: 2024.09.19

INPUT TABLES: 
{weekly_individual_aggregates}
{weekly_individual_groups}
{collaboration_graph}
{employee_events}

SECTIONS:
SECTION 3: IDENTIFY THE TYPICAL WORKDAY SPAN AND NO. OF HOURS SPENT IN MEETINGS BY WEEKDAY USING THE EVENT-LEVEL TABLE
SECTION 4: IDENTIFY THE CO-LOCATION OF STRONG COLLABORATORS BASED ON BUILDING BADGED INTO

NOTE: QUERIES HAVE BEEN WRITTEN FOR USE IN BIGQUERY - THEREFORE CERTAIN FUNCTIONS MAY NEED TO BE ADJUSTED FOR USE IN ANOTHER ENVIRONMENT

*/

------------------------------------------------------------------------------------------------------------------------------
--QUERY 3.1: FOR EACH WEEKDAY WHAT IS THE TYPICAL WORKDAY SPAN OF THE AVERAGE EMPLOYEE
--NOTE THAT METRICS INCLUDE ONLY ACTIVE EMPLOYEES THAT WORKED A MINIMUM OF 4 HOURS EACH DAY AND INCLUDES ONLY FULL-TIME EMPLOYEES
------------------------------------------------------------------------------------------------------------------------------
WITH active_min_hours_emps AS (
SELECT DISTINCT a.week, a.employeeId AS identifier
FROM {weekly_individual_aggregates} a
JOIN {weekly_individual_groups} g 
ON a.employeeId = g.employeeId 
AND a.week = g.week
WHERE 1=1
AND g.week >= '2024-01-01' AND g.week < '2024-03-31'
AND g.custom_group_1 IN ('Full-Time') -- exact filter will vary by company
AND g.active = true
AND (a.key = 'worklytics:weekdays:avg:timespan:hours' AND a.value >= 4) 
),
workday_span AS (
SELECT date(r.eventTime) AS day,
       FORMAT_DATE('%A',date(r.eventTime)) AS week_day,
       r.employeeId AS personid,
       MAX(time(eventTime)) - MIN(time(eventTime)) AS workday_span
FROM {employee_events} AS r
JOIN active_min_hours_emps AS g
ON r.employeeId = g.identifier
AND r.week = g.week
WHERE 1=1
AND r.employeeId IS NOT NULL
AND r.estimatedTimeSec > 0
AND FORMAT_DATE('%A',date(r.eventTime)) NOT IN ('Saturday', 'Sunday')
GROUP BY 1,2,3
),
total_time_disaggregated AS (
SELECT day,
       personid,
       week_day,
       workday_span,
       (EXTRACT(HOUR FROM workday_span) + EXTRACT(MINUTE FROM workday_span)/60) AS workday_span_hours
FROM workday_span d
),
averages AS (
SELECT personid,
       week_day,
       CAST(AVG(workday_span_hours) AS numeric) AS workday_span_hours
FROM total_time_disaggregated
GROUP BY 1,2
)
SELECT DISTINCT week_day,
       ROUND(PERCENTILE_CONT(workday_span_hours,0.5)  OVER (PARTITION BY week_day),1) AS median_workday_span
FROM averages
ORDER BY 1;


------------------------------------------------------------------------------------------------------------------------------
--QUERY 3.2: FOR EACH WEEKDAY WHAT IS THE TYPICAL NO. OF HOURS EMPLOYEES SPEND IN MEETINGS
--NOTE THAT METRICS INCLUDE ONLY ACTIVE EMPLOYEES THAT WORKED A MINIMUM OF 4 HOURS EACH DAY AND INCLUDES ONLY FULL-TIME EMPLOYEES
------------------------------------------------------------------------------------------------------------------------------
WITH active_min_hours_emps AS (
SELECT DISTINCT a.week, a.employeeId
FROM {weekly_individual_aggregates} a
JOIN {weekly_individual_groups} g 
ON a.employeeId = g.employeeId 
AND a.week = g.week
WHERE 1=1
AND g.week >= '2024-01-01' AND g.week < '2024-03-31'
AND g.custom_group_1 IN ('Full-Time') -- exact filter will vary by company
AND g.active = true
AND (a.key = 'worklytics:weekdays:avg:timespan:hours' AND a.value >= 4) 
),
raw_events_data AS (
SELECT date(r.eventTime) AS day,
       r.employeeId,
       sourceKindId,
       eventType,
       COUNT(DISTINCT id) AS num_actions,
       SUM(estimatedTimeSec) AS exclusive_time
FROM {employee_events} AS r
JOIN active_min_hours_emps AS g
ON r.employeeId = g.employeeId
AND r.week = g.week
WHERE 1=1
AND r.employeeId IS NOT NULL
AND r.estimatedTimeSec > 0
GROUP BY 1,2,3,4
HAVING eventType IN ('attendance','occurrence')
AND sourceKindId IS NOT NULL
),
raw_data_consolidated AS (
SELECT day,
       employeeId,
       FORMAT_DATE('%A', day) AS week_day,
       CASE WHEN CONCAT(sourceKindId, ' | ', eventType) = 'gcal | attendance' THEN 'attending meetings'
       WHEN CONCAT(sourceKindId, ' | ', eventType) = 'zoom | occurrence' THEN 'attending meetings'
       ELSE 'other' END AS activity_type,
       num_actions,
       exclusive_time
FROM raw_events_data
WHERE 1=1 
AND FORMAT_DATE('%A', day) NOT IN ('Saturday', 'Sunday')
),
activities_grouped AS (
SELECT employeeId,
       day,
       week_day,
       activity_type,
       SUM(exclusive_time)/3600 AS time_activities_hours
FROM raw_data_consolidated
GROUP BY 1,2,3,4
),
averages AS (
SELECT employeeId,
       week_day,
       activity_type,
       CAST(AVG(time_activities_hours) AS numeric) AS activities_hours
FROM activities_grouped
GROUP BY 1,2,3
)
SELECT DISTINCT week_day,
       activity_type,
       ROUND(PERCENTILE_CONT(activities_hours,0.5)  OVER (PARTITION BY week_day, activity_type),1) AS activity_hours
FROM averages
WHERE 1=1
AND activity_type IS NOT NULL
ORDER BY 1;


------------------------------------------------------------------------------------------------------------------------------
--QUERY 4.1: IDENTIFY THE PERCENTAGE OF A TYPICAL EMPLOYEE'S CLOSE COLLABORATORS THAT ARE COLOCATED BASED ON BADGE DATA 
--NOTE THAT METRICS INCLUDE ONLY ACTIVE EMPLOYEES THAT WORKED A MINIMUM OF 4 HOURS EACH DAY AND INCLUDES ONLY FULL-TIME EMPLOYEES
--NOTE THAT THE QUERY IS OPTIMIZED FOR WEEKLY DATA BUT CAN BE SWAPPED TO DAY
------------------------------------------------------------------------------------------------------------------------------
WITH building_badge_data AS (
SELECT week,
       person_id,
       time_first_visit,
       building_id
FROM (
       SELECT week,
              ARRAY(SELECT props FROM UNNEST(eventProperties) AS props WHERE props.name='buildingId')[OFFSET(0)].value AS building_id,
              employeeId AS person_id,
              MIN(eventTime) AS time_first_visit
       FROM {employee_events}
       WHERE 1=1 
       AND week >= '2024-01-01' AND week < '2024-03-31'
       AND sourceKindId ='badge' 
       AND employeeId IS NOT NULL
       GROUP BY 1,2,3)
),
building_colocating AS (
SELECT week,
       person_id,
       ARRAY_AGG(DISTINCT building_id IGNORE NULLS) AS building_ids,
       ARRAY_AGG(DISTINCT building_buddy_id IGNORE NULLS) AS building_buddy_ids
FROM (
       SELECT b.week,
              b.person_id,
              b.building_id,
              b2.person_id AS building_buddy_id
       FROM building_badge_data AS b
       LEFT JOIN building_badge_data AS b2
       ON b.week = b2.week
       AND b.building_id = b2.building_id
       AND b.person_id != b2.person_id)
GROUP BY 1,2
),
active_min_hours_emps AS (
SELECT DISTINCT a.week, a.employeeId AS identifier
FROM {weekly_individual_aggregates} a
JOIN {weekly_individual_groups} g 
ON a.employeeId = g.employeeId 
AND a.week = g.week
WHERE 1=1
AND g.week >= '2024-01-01' AND g.week < '2024-03-31'
AND g.custom_group_1 IN ('Full-Time') -- exact filter will vary by company
AND g.active = true
AND (a.key = 'worklytics:weekdays:avg:timespan:hours' AND a.value >= 4) 
),
collaborator_type AS (
SELECT c.week,
       employeeIdSource AS person_id_src,
       employeeIdTarget AS person_id_dst,
       SUM(weeklyCollaborationTimeSeconds)  AS time_spent_together_exclusive_ms,
       SUM(weeklyCollaborationTimeSeconds) >= (2 * 60 * 60) AS is_strong_connection,
       SUM(weeklyCollaborationTimeSeconds) <= (10 * 60) AS is_weak_connection
FROM {collaboration_graph} c
JOIN active_min_hours_emps e
ON c.employeeIdSource = e.identifier
AND DATE_TRUNC(c.week, ISOWEEK) = e.week
JOIN active_min_hours_emps e2
ON c.employeeIdTarget = e2.identifier
AND DATE_TRUNC(c.week, ISOWEEK) = e2.week
WHERE 1=1
GROUP BY 1,2,3 
),
people_collaborator_connections AS (
SELECT week,
       person_id_src AS person_id,
       ARRAY_AGG(DISTINCT person_id_dst IGNORE NULLS) AS connection_ids_all,
       ARRAY_AGG(DISTINCT IF(is_weak_connection, person_id_dst, NULL) IGNORE NULLS) AS connection_ids_weak,
       ARRAY_AGG(DISTINCT IF(is_strong_connection, person_id_dst, NULL) IGNORE NULLS) AS connection_ids_strong,
FROM collaborator_type  
GROUP BY 1,2 
),
people_colocating_badge AS (
SELECT onsite.week,
       onsite.person_id,
       onsite.building_id_first,
       ARRAY_LENGTH(connection_ids_strong) AS strong_collaborator_count,
       ARRAY_LENGTH(building_ids) AS building_count,
       ARRAY_LENGTH(building_buddy_ids) AS colocated_people_count,
       ARRAY_LENGTH(
       ARRAY(
       SELECT * FROM UNNEST(building_buddy_ids)
       INTERSECT DISTINCT
       SELECT * FROM UNNEST(connection_ids_strong)))
        AS strong_collaborator_colocated_count
FROM (SELECT week,
             person_id,
             ARRAY_AGG(bd.building_id IGNORE NULLS ORDER BY time_first_visit)[ORDINAL(1)] as building_id_first
      FROM building_badge_data bd 
      GROUP BY 1,2
) AS onsite 
LEFT JOIN building_colocating bc
ON onsite.week = bc.week
AND onsite.person_id = bc.person_id
LEFT JOIN people_collaborator_connections cc
ON onsite.week = cc.week
AND onsite.person_id = cc.person_id
WHERE 1=1
GROUP BY 1,2,3,4,5,6,7
),
people_colocating_badge_totals AS (
SELECT building_id_first,
       week,
       person_id,
       SUM(strong_collaborator_count) AS strong_collaborator,
       SUM(strong_collaborator_colocated_count) AS strong_collaborator_colocated
FROM people_colocating_badge
GROUP BY 1,2,3
)
SELECT DISTINCT building_id_first,
       ROUND(PERCENTILE_CONT(strong_collaborator,0.5) OVER (PARTITION BY building_id_first),0) AS strong_collaborator,
       ROUND(PERCENTILE_CONT(strong_collaborator_colocated,0.5) OVER (PARTITION BY building_id_first),0) AS strong_collaborator_colocated
FROM people_colocating_badge_totals 
ORDER BY 1;

Last updated