-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
21 changed files
with
1,155 additions
and
0 deletions.
There are no files selected for viewing
130 changes: 130 additions & 0 deletions
130
queries/TAT-WOW-project/LCMB-ISC-X/find_lcmb_isc_samples_performance_analysis.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
-- Making the base sample query as a view would help with readability in the other queries. | ||
|
||
|
||
-- Version 1 - 529 samples, 14 seconds | ||
-- | ||
SELECT subject_uuid | ||
FROM | ||
( | ||
( | ||
SELECT DISTINCT(fev.subject_uuid) | ||
FROM metadata m | ||
JOIN flat_events_view fev ON m.event_id = fev.wh_event_id | ||
WHERE m.`key` = 'order_type' AND m.value = 'LCMB' | ||
AND m.created_at > '2022-05-10' | ||
AND fev.role_type = 'sample' | ||
) | ||
UNION ALL | ||
( | ||
SELECT DISTINCT(fev.subject_uuid) | ||
FROM metadata m | ||
JOIN flat_events_view fev ON m.event_id = fev.wh_event_id | ||
WHERE m.`key` = 'order_type' AND m.value = 'ReISC' | ||
AND m.created_at > '2022-05-10' | ||
AND fev.role_type = 'sample' | ||
) | ||
) AS all_lcmb_reisc_samples | ||
GROUP BY subject_uuid | ||
HAVING Count(*) > 1 | ||
; | ||
|
||
|
||
-- Version 2 - not using flat_events_view - 529 samples, 11 seconds | ||
-- | ||
SELECT id | ||
FROM | ||
( | ||
( | ||
SELECT DISTINCT(s.id) | ||
FROM metadata m | ||
JOIN events e ON e.id = m.event_id | ||
JOIN roles r ON r.event_id = e.id | ||
JOIN role_types rt ON r.role_type_id = rt.id | ||
JOIN subjects s ON s.id = r.subject_id | ||
WHERE m.`key` = 'order_type' AND m.value = 'LCMB' | ||
AND m.created_at > '2022-05-10' | ||
AND rt.`key` = 'sample' | ||
) | ||
UNION ALL | ||
( | ||
SELECT DISTINCT(s.id) | ||
FROM metadata m | ||
JOIN events e ON e.id = m.event_id | ||
JOIN roles r ON r.event_id = e.id | ||
JOIN role_types rt ON r.role_type_id = rt.id | ||
JOIN subjects s ON s.id = r.subject_id | ||
WHERE m.`key` = 'order_type' AND m.value = 'ReISC' | ||
AND m.created_at > '2022-05-10' | ||
AND rt.`key` = 'sample' | ||
) | ||
) AS all_lcmb_reisc_samples | ||
GROUP BY id | ||
HAVING Count(*) > 1 | ||
; | ||
|
||
|
||
-- Version 3 - querying both order_types in one - 529 samples, 6 seconds! :) | ||
-- | ||
SELECT all_lcmb_reisc_samples_by_pipeline.sample_id | ||
FROM | ||
( | ||
SELECT DISTINCT s.id AS sample_id, m.value AS pipeline | ||
FROM metadata m | ||
JOIN events e ON e.id = m.event_id | ||
JOIN roles r ON r.event_id = e.id | ||
JOIN role_types rt ON r.role_type_id = rt.id | ||
JOIN subjects s ON s.id = r.subject_id | ||
WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') | ||
AND m.created_at > '2022-05-10' | ||
AND rt.`key` = 'sample' | ||
) AS all_lcmb_reisc_samples_by_pipeline | ||
GROUP BY all_lcmb_reisc_samples_by_pipeline.sample_id | ||
HAVING Count(*) > 1 | ||
; | ||
|
||
-- Version 4 - including event type filter - 529 samples, 2 seconds! :) | ||
-- | ||
SELECT all_lcmb_reisc_samples_by_pipeline.sample_id | ||
FROM | ||
( | ||
SELECT DISTINCT s.id AS sample_id, m.value AS pipeline | ||
FROM metadata m | ||
JOIN events e ON e.id = m.event_id | ||
JOIN event_types et ON et.id = e.event_type_id | ||
JOIN roles r ON r.event_id = e.id | ||
JOIN role_types rt ON r.role_type_id = rt.id | ||
JOIN subjects s ON s.id = r.subject_id | ||
WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') | ||
AND m.created_at > '2022-05-10' | ||
AND rt.`key` = 'sample' | ||
AND et.`key` = 'order_made' | ||
) AS all_lcmb_reisc_samples_by_pipeline | ||
GROUP BY all_lcmb_reisc_samples_by_pipeline.sample_id | ||
HAVING Count(*) > 1 | ||
; | ||
|
||
-- Version 5 - including more useful information - 529 samples, still 2ish seconds | ||
-- | ||
SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name | ||
FROM | ||
( | ||
SELECT DISTINCT s.id AS ewh_sample_id, | ||
s.uuid AS sample_uuid_bin, | ||
insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, | ||
s.friendly_name AS sample_friendly_name, | ||
m.value AS pipeline | ||
FROM metadata m | ||
JOIN events e ON e.id = m.event_id | ||
JOIN event_types et ON et.id = e.event_type_id | ||
JOIN roles r ON r.event_id = e.id | ||
JOIN role_types rt ON r.role_type_id = rt.id | ||
JOIN subjects s ON s.id = r.subject_id | ||
WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') | ||
AND m.created_at > '2022-05-10' | ||
AND rt.`key` = 'sample' | ||
AND et.`key` = 'order_made' | ||
) AS all_lcmb_reisc_samples_by_pipeline | ||
|
||
GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id | ||
HAVING Count(*) > 1 | ||
; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
-- stock plate info in Events WH seems sketchy | ||
-- Following route seems much more reliable: | ||
-- Events WH sample uuid --> MLWH sample --> MLWH stock_resource | ||
-- This retrieves the stock plate barcode from when the sample manifest was uploaded | ||
-- Finds 8 distinct stock plates | ||
|
||
SELECT -- SQL_NO_CACHE | ||
relevant_samples.ewh_sample_id | ||
,relevant_samples.sample_uuid_bin | ||
,relevant_samples.sample_uuid | ||
,relevant_samples.sample_friendly_name | ||
,sr.labware_human_barcode | ||
FROM | ||
( | ||
-- Relevant (529) samples for LCMB-ISC pipeline | ||
SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name | ||
FROM | ||
( | ||
SELECT DISTINCT s.id AS ewh_sample_id, | ||
s.uuid AS sample_uuid_bin, | ||
insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, | ||
s.friendly_name AS sample_friendly_name, m.value AS pipeline | ||
FROM metadata m | ||
JOIN events e ON e.id = m.event_id | ||
JOIN event_types et ON et.id = e.event_type_id | ||
JOIN roles r ON r.event_id = e.id | ||
JOIN role_types rt ON r.role_type_id = rt.id | ||
JOIN subjects s ON s.id = r.subject_id | ||
WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') | ||
AND m.created_at > '2022-05-10' | ||
AND rt.`key` = 'sample' | ||
AND et.`key` = 'order_made' | ||
) AS all_lcmb_reisc_samples_by_pipeline | ||
GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id | ||
HAVING Count(*) > 1 | ||
) AS relevant_samples | ||
|
||
JOIN mlwhd_mlwarehouse_proddata.sample mlwh_sample ON mlwh_sample.uuid_sample_lims = relevant_samples.sample_uuid | ||
JOIN mlwhd_mlwarehouse_proddata.stock_resource sr USING (id_sample_tmp) | ||
; |
156 changes: 156 additions & 0 deletions
156
queries/TAT-WOW-project/LCMB-ISC-X/find_studies_and_projects.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
-- 2 separate queries, as each one individually takes ~2-3 seconds, but if I try to make them in a readable way into a combined query, it takes much longer. | ||
-- They can be joined in their entirety using UNION or JOIN, but it is probably cleaner to keep them as two separate queries and join using Tableau. | ||
-- If one sample is associated with multiple studies or projects, these appear as multiple rows. | ||
-- Doesn't filter events here by 'LCMB' or 'ReISC' (in the outer query), so we could theoretically find orders for the same samples to other pipelines. | ||
-- At time of writing, adding a metadata filter for 'LCMB' or 'ReISC' made the query slower and didn't change the results, so I left it out. | ||
|
||
-- Sample and study info | ||
SELECT -- SQL_NO_CACHE | ||
DISTINCT | ||
relevant_samples.ewh_sample_id | ||
,relevant_samples.sample_uuid_bin | ||
,relevant_samples.sample_uuid | ||
,relevant_samples.sample_friendly_name | ||
,sub.id AS ewh_study_id | ||
,sub.uuid AS study_uuid_bin | ||
,insert(insert(insert(insert(lower(hex(sub.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS study_uuid | ||
,sub.friendly_name AS study_friendly_name | ||
FROM | ||
( | ||
-- Relevant (529) samples for LCMB-ISC pipeline | ||
SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name | ||
FROM | ||
( | ||
SELECT DISTINCT s.id AS ewh_sample_id, | ||
s.uuid AS sample_uuid_bin, | ||
insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, | ||
s.friendly_name AS sample_friendly_name, m.value AS pipeline | ||
FROM metadata m | ||
JOIN events e ON e.id = m.event_id | ||
JOIN event_types et ON et.id = e.event_type_id | ||
JOIN roles r ON r.event_id = e.id | ||
JOIN role_types rt ON r.role_type_id = rt.id | ||
JOIN subjects s ON s.id = r.subject_id | ||
WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') | ||
AND m.created_at > '2022-05-10' | ||
AND rt.`key` = 'sample' | ||
AND et.`key` = 'order_made' | ||
) AS all_lcmb_reisc_samples_by_pipeline | ||
GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id | ||
HAVING Count(*) > 1 | ||
) AS relevant_samples | ||
|
||
-- Find all order_made events for the relevant samples (1,837 rows, 54 distinct events) | ||
LEFT JOIN roles r_sample ON r_sample.subject_id = relevant_samples.ewh_sample_id | ||
LEFT JOIN events e ON e.id = r_sample.event_id | ||
LEFT JOIN event_types et ON et.id = e.event_type_id | ||
|
||
-- Find any 'study' subjects associated with these events | ||
LEFT JOIN roles r ON r.event_id = e.id | ||
LEFT JOIN role_types rt ON rt.id = r.role_type_id | ||
LEFT JOIN subjects sub ON r.subject_id = sub.id | ||
|
||
WHERE et.`key` = 'order_made' | ||
AND rt.`key` = 'study' | ||
; | ||
|
||
|
||
-- Sample and project info | ||
SELECT -- SQL_NO_CACHE | ||
DISTINCT | ||
relevant_samples.ewh_sample_id | ||
,relevant_samples.sample_uuid_bin | ||
,relevant_samples.sample_uuid | ||
,relevant_samples.sample_friendly_name | ||
,sub.id AS ewh_project_id | ||
,sub.uuid AS project_uuid_bin | ||
,insert(insert(insert(insert(lower(hex(sub.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS project_uuid | ||
,sub.friendly_name AS project_friendly_name | ||
FROM | ||
( | ||
-- Relevant (529) samples for LCMB-ISC pipeline | ||
SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name | ||
FROM | ||
( | ||
SELECT DISTINCT s.id AS ewh_sample_id, | ||
s.uuid AS sample_uuid_bin, | ||
insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, | ||
s.friendly_name AS sample_friendly_name, m.value AS pipeline | ||
FROM metadata m | ||
JOIN events e ON e.id = m.event_id | ||
JOIN event_types et ON et.id = e.event_type_id | ||
JOIN roles r ON r.event_id = e.id | ||
JOIN role_types rt ON r.role_type_id = rt.id | ||
JOIN subjects s ON s.id = r.subject_id | ||
WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') | ||
AND m.created_at > '2022-05-10' | ||
AND rt.`key` = 'sample' | ||
AND et.`key` = 'order_made' | ||
) AS all_lcmb_reisc_samples_by_pipeline | ||
GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id | ||
HAVING Count(*) > 1 | ||
) AS relevant_samples | ||
|
||
-- Find all order_made events for the relevant samples (1,837 rows, 54 distinct events) | ||
LEFT JOIN roles r_sample ON r_sample.subject_id = relevant_samples.ewh_sample_id | ||
LEFT JOIN events e ON e.id = r_sample.event_id | ||
LEFT JOIN event_types et ON et.id = e.event_type_id | ||
|
||
-- Find any 'project' subjects associated with these events | ||
LEFT JOIN roles r ON r.event_id = e.id | ||
LEFT JOIN role_types rt ON rt.id = r.role_type_id | ||
LEFT JOIN subjects sub ON r.subject_id = sub.id | ||
|
||
WHERE et.`key` = 'order_made' | ||
AND rt.`key` = 'project' | ||
; | ||
|
||
|
||
|
||
|
||
-- Concatenating studies, so there's 1 row per sample | ||
SELECT relevant_samples.ewh_sample_id | ||
,relevant_samples.sample_uuid_bin | ||
,relevant_samples.sample_uuid | ||
,relevant_samples.sample_friendly_name | ||
,GROUP_CONCAT(DISTINCT(sub.friendly_name)) AS study_friendly_names | ||
FROM | ||
( | ||
-- Relevant (529) samples for LCMB-ISC pipeline | ||
SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name | ||
FROM | ||
( | ||
SELECT DISTINCT s.id AS ewh_sample_id, | ||
s.uuid AS sample_uuid_bin, | ||
insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, | ||
s.friendly_name AS sample_friendly_name, m.value AS pipeline | ||
FROM metadata m | ||
JOIN events e ON e.id = m.event_id | ||
JOIN event_types et ON et.id = e.event_type_id | ||
JOIN roles r ON r.event_id = e.id | ||
JOIN role_types rt ON r.role_type_id = rt.id | ||
JOIN subjects s ON s.id = r.subject_id | ||
WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') | ||
AND m.created_at > '2022-05-10' | ||
AND rt.`key` = 'sample' | ||
AND et.`key` = 'order_made' | ||
) AS all_lcmb_reisc_samples_by_pipeline | ||
GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id | ||
HAVING Count(*) > 1 | ||
) AS relevant_samples | ||
|
||
-- Find all order_made events for the relevant samples (1,837 rows, 54 distinct events) | ||
LEFT JOIN roles r_sample ON r_sample.subject_id = relevant_samples.ewh_sample_id | ||
LEFT JOIN events e ON e.id = r_sample.event_id | ||
LEFT JOIN event_types et ON et.id = e.event_type_id | ||
|
||
-- Find any 'study' subjects associated with these events | ||
LEFT JOIN roles r ON r.event_id = e.id | ||
LEFT JOIN role_types rt ON rt.id = r.role_type_id | ||
LEFT JOIN subjects sub ON r.subject_id = sub.id | ||
|
||
WHERE et.`key` = 'order_made' | ||
AND rt.`key` = 'study' | ||
|
||
GROUP BY relevant_samples.ewh_sample_id | ||
; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
-- 'submission' here means when the customer asks for a batch of work to be completed | ||
-- this happens via the RT system, via the SSRs | ||
-- here we're using sample manifest upload to SS as a proxy | ||
|
||
SELECT ewh_sample_id | ||
,sample_uuid_bin | ||
,sample_uuid | ||
,sample_friendly_name | ||
,e.id AS manifest_event_id | ||
,e.occured_at AS 'samples_created_by_manifest' | ||
FROM | ||
( | ||
SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name | ||
FROM | ||
( | ||
SELECT DISTINCT s.id AS ewh_sample_id, | ||
s.uuid AS sample_uuid_bin, | ||
insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, | ||
s.friendly_name AS sample_friendly_name, m.value AS pipeline | ||
FROM metadata m | ||
JOIN events e ON e.id = m.event_id | ||
JOIN event_types et ON et.id = e.event_type_id | ||
JOIN roles r ON r.event_id = e.id | ||
JOIN role_types rt ON r.role_type_id = rt.id | ||
JOIN subjects s ON s.id = r.subject_id | ||
WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') | ||
AND m.created_at > '2022-05-10' | ||
AND rt.`key` = 'sample' | ||
AND et.`key` = 'order_made' | ||
) AS all_lcmb_reisc_samples_by_pipeline | ||
GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id | ||
HAVING Count(*) > 1 | ||
) AS relevant_samples | ||
|
||
JOIN roles r ON r.subject_id = relevant_samples.ewh_sample_id | ||
JOIN events e ON e.id = r.event_id | ||
JOIN event_types et ON et.id = e.event_type_id | ||
|
||
WHERE et.`key` = 'sample_manifest.updated' | ||
; |
Oops, something went wrong.