Sample SQL Queries: Aggregate-Level Data & Collab. Graph

The following is a sample set of SQL queries you can use to get started with the Worklytics dataset. This code block assumes the user has access to the individual aggregates, individual groups, and collaboration graph datasets, though the exact table names will have to be substituted for the ones in your company's environment.

/* 
SAMPLE SQL QUERIES FOR AGGREGATE-LEVEL DATA
DATE: 2024.09.19

INPUT TABLES: 
{weekly_individual_aggregates}
{weekly_individual_groups}
{collaboration_graph}
{collaboration_graph_split}


SECTION 1: IDENTIFY KEY METRICS ABOUT YOUR ORGANIZATION'S MEETING CULTURE, FOCUS TIME, WORK-LIFE BALANCE AND COLLABORATION CULTURE USING THE INDIVIDUAL AGGREGATES TABLE
SECTION 2: IDENTIFY THE TYPICAL NUMBER OF COLLABORATORS (AND STRONG COLLABORATORS) AND TYPICAL NUMBER OF SLACK COLLABORATORS

NOTE: QUERIES HAVE BEEN WRITTEN FOR USE IN BIGQUERY - THEREFORE CERTAIN FUNCTIONS MAY NEED TO BE ADJUSTED FOR USE IN ANOTHER ENVIRONMENT

*/

------------------------------------------------------------------------------------------------------------------------------
--QUERY 1.1: IDENTIFY KEY METRICS ABOUT YOUR ORGANIZATION'S MEETING CULTURE
--INCLUDES METRICS FOR MEDIAN NUMBER OF WEEKLY MEETING HOURS, % OF MEETING TIME FOCUSED AND DAILY FOCUS HOURS
--NOTE THAT METRICS INCLUDE ONLY ACTIVE EMPLOYEES THAT WORKED A MINIMUM OF 4 HOURS EACH DAY AND INCLUDES ONLY FULL-TIME EMPLOYEES
------------------------------------------------------------------------------------------------------------------------------
WITH active_min_hours_emps AS (
SELECT DISTINCT a.week, a.employeeId
FROM {weekly_individual_aggregates} a
JOIN {weekly_individual_groups} g 
ON a.employeeId = g.employeeId 
AND a.week = g.week
WHERE 1=1
AND g.week >= '2024-01-01' AND g.week < '2024-03-31'
AND g.custom_group_1 IN ('Full-Time') -- exact filter will vary by company
AND g.active = true
AND (a.key = 'worklytics:weekdays:avg:timespan:hours' AND a.value >= 4) 
),
weekly_totals AS (
SELECT p.week, 
       p.employeeId, 
       CAST(SUM(IF(key='calendar:events:hours:meetings',value,0)) AS NUMERIC) AS weekly_meeting_hours,
       1.0*CAST(SUM(IF(key='calendar:events:focus',value,0)) AS NUMERIC)/100.0 AS percent_meeting_focused,      
       CAST(SUM(IF(key='worklytics:hours:in:focus:blocks:v3:flow',value,0)) AS NUMERIC) AS daily_focus_hours             
FROM {weekly_individual_aggregates} p 
JOIN active_min_hours_emps e 
ON p.employeeId = e.employeeId 
AND p.week = e. week
WHERE 1=1
AND key IN ('calendar:events:hours:meetings','calendar:events:focus','worklytics:hours:in:focus:blocks:v3:flow')
GROUP BY 1,2 
),
average_totals AS (
SELECT employeeId, 
       CAST(AVG(weekly_meeting_hours) AS NUMERIC) AS average_weekly_meeting_hours,
       CAST(AVG(percent_meeting_focused) AS NUMERIC) AS average_percent_meeting_focused,
       CAST(AVG(daily_focus_hours) AS NUMERIC) AS average_daily_focus_hours
FROM weekly_totals
GROUP BY 1
)
SELECT ROUND(PERCENTILE_CONT(average_weekly_meeting_hours,0.5) OVER (),1) AS weekly_meeting_hours,
       ROUND(PERCENTILE_CONT(average_percent_meeting_focused,0.5) OVER (),2) AS percent_meeting_focused,
       ROUND(PERCENTILE_CONT(average_daily_focus_hours,0.5) OVER (),1) AS daily_focus_hours
FROM average_totals
LIMIT 1; 


------------------------------------------------------------------------------------------------------------------------------
--QUERY 1.2: IDENTIFY KEY METRICS ABOUT YOUR ORGANIZATION'S WORK-LIFE BALANCE BY LEVEL OF EMPLOYEE
--INCLUDES METRICS FOR MEDIAN NUMBER OF DAILY WORK HOURS AND WORK HOURS PER WEEKEND 
--NOTE THAT METRICS INCLUDE ONLY ACTIVE EMPLOYEES THAT WORKED A MINIMUM OF 4 HOURS EACH DAY AND INCLUDES ONLY FULL-TIME EMPLOYEES
------------------------------------------------------------------------------------------------------------------------------
WITH active_min_hours_emps AS (
SELECT DISTINCT a.week, a.employeeId, g.level
FROM {weekly_individual_aggregates} a
JOIN {weekly_individual_groups} g 
ON a.employeeId = g.employeeId 
AND a.week = g.week
WHERE 1=1
AND g.week >= '2024-01-01' AND g.week < '2024-03-31'
AND g.custom_group_1 IN ('Full-Time') -- exact filter will vary by company
AND g.active = true
AND (a.key = 'worklytics:weekdays:avg:timespan:hours' AND a.value >= 4) 
),
weekly_totals AS (
SELECT p.week, 
       p.employeeId, 
       e.level,
       CAST(SUM(IF(key='worklytics:weekdays:avg:timespan:hours',value,0)) AS NUMERIC) AS daily_work_hours, 
       CAST(SUM(IF(key='worklytics:weekends:total:time:worked:hours',value,0)) AS NUMERIC) AS weekly_hours_weekend
FROM {weekly_individual_aggregates} p 
JOIN active_min_hours_emps e 
ON p.employeeId = e.employeeId
AND p.week = e. week
WHERE 1=1
AND key IN ('worklytics:weekdays:avg:timespan:hours','worklytics:weekends:total:time:worked:hours')
GROUP BY 1,2,3 
),
average_totals AS (
SELECT employeeId, 
       level,
       CAST(AVG(daily_work_hours) AS NUMERIC) AS average_daily_work_hours,
       CAST(AVG(weekly_hours_weekend) AS NUMERIC) AS average_weekly_hours_weekend
FROM weekly_totals
GROUP BY 1,2
)
SELECT DISTINCT level, ROUND(PERCENTILE_CONT(average_daily_work_hours,0.5) OVER (PARTITION BY level),1) AS daily_work_hours,
       ROUND(PERCENTILE_CONT(average_weekly_hours_weekend,0.5) OVER (PARTITION BY level),1) AS weekly_hours_weekend
FROM average_totals
ORDER BY 1;


------------------------------------------------------------------------------------------------------------------------------
--QUERY 1.3: IDENTIFY KEY METRICS ABOUT YOUR ORGANIZATION'S COLLABORATION CULTURE
--INCLUDES METRICS FOR MEDIAN NUMBER OF WEEKLY COLLABORATORS, WEEKLY STRONG COLLABORATORS, NUMBER OF COLLABORATORS BETWEEN LINES OF BUSINESS
--AND THE NUMBER OF HOURS SPENT COLLABORATING BETWEEN LINES OF BUSINESS
--NOTE THAT METRICS INCLUDE ONLY ACTIVE EMPLOYEES THAT WORKED A MINIMUM OF 4 HOURS EACH DAY AND INCLUDES ONLY FULL-TIME EMPLOYEES
------------------------------------------------------------------------------------------------------------------------------
WITH active_min_hours_emps AS (
SELECT DISTINCT a.week, a.employeeId
FROM {weekly_individual_aggregates} a
JOIN {weekly_individual_groups} g 
ON a.employeeId = g.employeeId 
AND a.week = g.week
WHERE 1=1
AND g.week >= '2024-01-01' AND g.week < '2024-03-31'
AND g.custom_group_1 IN ('Full-Time') -- exact filter will vary by company
AND g.active = true
AND (a.key = 'worklytics:weekdays:avg:timespan:hours' AND a.value >= 4) 
),
weekly_totals AS (
SELECT p.week, 
       p.employeeId, 
       CAST(SUM(IF(key='collaborators:count_distinct',value,0)) AS NUMERIC) AS weekly_collaborators,
       CAST(SUM(IF(key='collaborators:strong_count_distinct',value,0)) AS NUMERIC) AS weekly_strong_collaborators,
       CAST(SUM(IF(key='lineofbusiness_individual_collaboration_intrateam_hours',value,0)) AS NUMERIC) AS weekly_hours_within_lob_collaboration,
       CAST(SUM(IF(key='lineofbusiness_individual_collaboration_interteam_hours',value,0)) AS NUMERIC) AS weekly_hours_between_lob_collaboration,
       CAST(SUM(IF(key='lineofbusiness_individual_collaboration_intrateam_people_count',value,0)) AS NUMERIC) AS weekly_collaborators_within_lob,
       CAST(SUM(IF(key='lineofbusiness_individual_collaboration_interteam_people_count',value,0)) AS NUMERIC) AS weekly_collaborators_between_lob 
FROM {weekly_individual_aggregates} p 
JOIN active_min_hours_emps e 
ON p.employeeId = e.employeeId
AND p.week = e. week
WHERE 1=1
AND key IN ('collaborators:count_distinct','collaborators:strong_count_distinct','lineofbusiness_individual_collaboration_intrateam_hours','lineofbusiness_individual_collaboration_interteam_hours','lineofbusiness_individual_collaboration_intrateam_people_count','lineofbusiness_individual_collaboration_interteam_people_count')
GROUP BY 1,2 
),
average_totals AS (
SELECT employeeId, 
       CAST(AVG(weekly_collaborators) AS NUMERIC) AS average_weekly_collaborators,
       CAST(AVG(weekly_strong_collaborators) AS NUMERIC) AS average_weekly_strong_collaborators,
       CAST(AVG(weekly_hours_within_lob_collaboration) AS NUMERIC) AS average_weekly_hours_within_lob_collaboration,
       CAST(AVG(weekly_hours_between_lob_collaboration) AS NUMERIC) AS average_weekly_hours_between_lob_collaboration,
       CAST(AVG(weekly_collaborators_within_lob) AS NUMERIC) AS average_weekly_collaborators_within_lob,
       CAST(AVG(weekly_collaborators_between_lob ) AS NUMERIC) AS average_weekly_collaborators_between_lob
FROM weekly_totals
GROUP BY 1
)
SELECT ROUND(PERCENTILE_CONT(average_weekly_collaborators,0.5) OVER (),1) AS weekly_collaborators,
       ROUND(PERCENTILE_CONT(average_weekly_strong_collaborators,0.5) OVER (),1) AS weekly_strong_collaborators,
       ROUND(PERCENTILE_CONT(average_weekly_hours_within_lob_collaboration,0.5) OVER (),1) AS weekly_hours_within_lob_collaboration, 
       ROUND(PERCENTILE_CONT(average_weekly_hours_between_lob_collaboration,0.5) OVER (),1) AS weekly_hours_between_lob_collaboration, 
       ROUND(PERCENTILE_CONT(average_weekly_collaborators_within_lob,0.5) OVER (),1) AS weekly_collaborators_within_lob, 
       ROUND(PERCENTILE_CONT(average_weekly_collaborators_between_lob,0.5) OVER (),2) AS weekly_collaborators_between_lob
FROM average_totals
LIMIT 1; 


------------------------------------------------------------------------------------------------------------------------------
--QUERY 2.1 IDENTIFY THE MEDIAN NUMBER OF COLLABORATORS AND STRONG COLLABORATORS BASED ON LINE OF BUSINESS
--NOTE THAT METRICS INCLUDE ONLY ACTIVE EMPLOYEES THAT WORKED A MINIMUM OF 4 HOURS EACH DAY AND INCLUDES ONLY FULL-TIME EMPLOYEES
------------------------------------------------------------------------------------------------------------------------------
WITH active_min_hours_emps AS (
SELECT DISTINCT a.week, 
       a.employeeId,
       g.custom_group_1,
       g.line_of_business
FROM {weekly_individual_aggregates} a
JOIN {weekly_individual_groups} g 
ON a.employeeId = g.employeeId 
AND a.week = g.week
WHERE 1=1
AND g.week >= '2024-01-01' AND g.week < '2024-03-31'
AND g.custom_group_1 IN ('Full-Time') -- exact filter will vary by company
AND g.active = true
AND (a.key = 'worklytics:weekdays:avg:timespan:hours' AND a.value >= 4) 
),
collaborator_type AS (
SELECT c.week,
       e.line_of_business,
       employeeIdSource AS person_id_src,
       employeeIdTarget AS person_id_dst,
       SUM(weeklyCollaborationTimeSeconds)  AS time_spent_together_exclusive_ms,
       SUM(weeklyCollaborationTimeSeconds) >= (2 * 60 * 60) AS is_strong_connection
FROM {collaboration_graph} c
JOIN active_min_hours_emps e
ON c.employeeIdSource = e.employeeId
AND DATE_TRUNC(c.week, ISOWEEK) = e.week
WHERE 1=1
GROUP BY 1,2,3,4 
),
people_connections AS (
SELECT week,
       person_id_src AS person_id,
       line_of_business,
       COUNT(DISTINCT person_id_dst) AS connections,
       COUNT(DISTINCT(IF(is_strong_connection=true, person_id_dst,NULL))) AS strong_connections
FROM collaborator_type  
GROUP BY 1,2,3
)
SELECT DISTINCT line_of_business,
       ROUND(PERCENTILE_CONT(connections,0.5) OVER (PARTITION BY line_of_business),0) AS connections,
       ROUND(PERCENTILE_CONT(strong_connections,0.5) OVER (PARTITION BY line_of_business),0) AS strong_connections
FROM people_connections
WHERE line_of_business IS NOT NULL
ORDER BY 1;


------------------------------------------------------------------------------------------------------------------------------
--QUERY 2.2 IDENTIFY THE MEDIAN NUMBER OF SLACK COLLABORATORS FOR THE TYPICAL EMPLOYEE BASED ON LINE OF BUSINESS
--NOTE THAT METRICS INCLUDE ONLY ACTIVE EMPLOYEES THAT WORKED A MINIMUM OF 4 HOURS EACH DAY AND INCLUDES ONLY FULL-TIME EMPLOYEES
------------------------------------------------------------------------------------------------------------------------------
WITH active_min_hours_emps AS (
SELECT DISTINCT a.week, 
       a.employeeId,
       g.custom_group_1,
       g.line_of_business
FROM {weekly_individual_aggregates} a
JOIN {weekly_individual_groups} g 
ON a.employeeId = g.employeeId 
AND a.week = g.week
WHERE 1=1
AND g.week >= '2024-01-01' AND g.week < '2024-03-31'
AND g.custom_group_1 IN ('Full-Time') -- exact filter will vary by company
AND g.active = true
AND (a.key = 'worklytics:weekdays:avg:timespan:hours' AND a.value >= 4) 
),
collaborator_type AS (
SELECT c.week,
       e.line_of_business,
       employeeIdSource AS person_id_src,
       employeeIdTarget AS person_id_dst
FROM {collaboration_graph_split} c
JOIN active_min_hours_emps e
ON c.employeeIdSource = e.employeeId
AND DATE_TRUNC(c.week, ISOWEEK) = e.week
WHERE 1=1
AND sourceType = 'slack'
GROUP BY 1,2,3,4 
),
people_connections_slack AS (
SELECT week,
       person_id_src AS person_id,
       line_of_business,
       COUNT(DISTINCT person_id_dst) AS slack_connections
FROM collaborator_type  
GROUP BY 1,2,3
)
SELECT DISTINCT line_of_business,
       ROUND(PERCENTILE_CONT(slack_connections,0.5) OVER (PARTITION BY line_of_business),0) AS slack_connections
FROM people_connections_slack
WHERE line_of_business IS NOT NULL
ORDER BY 1;

Last updated