Skip to content

Commit

Permalink
Copied LCMB-ISC directory.
Browse files Browse the repository at this point in the history
  • Loading branch information
yoldas committed Nov 27, 2022
1 parent c40b337 commit 6757a65
Show file tree
Hide file tree
Showing 21 changed files with 1,155 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
-- Making the base sample query as a view would help with readability in the other queries.


-- Version 1 - 529 samples, 14 seconds
--
SELECT subject_uuid
FROM
(
(
SELECT DISTINCT(fev.subject_uuid)
FROM metadata m
JOIN flat_events_view fev ON m.event_id = fev.wh_event_id
WHERE m.`key` = 'order_type' AND m.value = 'LCMB'
AND m.created_at > '2022-05-10'
AND fev.role_type = 'sample'
)
UNION ALL
(
SELECT DISTINCT(fev.subject_uuid)
FROM metadata m
JOIN flat_events_view fev ON m.event_id = fev.wh_event_id
WHERE m.`key` = 'order_type' AND m.value = 'ReISC'
AND m.created_at > '2022-05-10'
AND fev.role_type = 'sample'
)
) AS all_lcmb_reisc_samples
GROUP BY subject_uuid
HAVING Count(*) > 1
;


-- Version 2 - not using flat_events_view - 529 samples, 11 seconds
--
SELECT id
FROM
(
(
SELECT DISTINCT(s.id)
FROM metadata m
JOIN events e ON e.id = m.event_id
JOIN roles r ON r.event_id = e.id
JOIN role_types rt ON r.role_type_id = rt.id
JOIN subjects s ON s.id = r.subject_id
WHERE m.`key` = 'order_type' AND m.value = 'LCMB'
AND m.created_at > '2022-05-10'
AND rt.`key` = 'sample'
)
UNION ALL
(
SELECT DISTINCT(s.id)
FROM metadata m
JOIN events e ON e.id = m.event_id
JOIN roles r ON r.event_id = e.id
JOIN role_types rt ON r.role_type_id = rt.id
JOIN subjects s ON s.id = r.subject_id
WHERE m.`key` = 'order_type' AND m.value = 'ReISC'
AND m.created_at > '2022-05-10'
AND rt.`key` = 'sample'
)
) AS all_lcmb_reisc_samples
GROUP BY id
HAVING Count(*) > 1
;


-- Version 3 - querying both order_types in one - 529 samples, 6 seconds! :)
--
SELECT all_lcmb_reisc_samples_by_pipeline.sample_id
FROM
(
SELECT DISTINCT s.id AS sample_id, m.value AS pipeline
FROM metadata m
JOIN events e ON e.id = m.event_id
JOIN roles r ON r.event_id = e.id
JOIN role_types rt ON r.role_type_id = rt.id
JOIN subjects s ON s.id = r.subject_id
WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC')
AND m.created_at > '2022-05-10'
AND rt.`key` = 'sample'
) AS all_lcmb_reisc_samples_by_pipeline
GROUP BY all_lcmb_reisc_samples_by_pipeline.sample_id
HAVING Count(*) > 1
;

-- Version 4 - including event type filter - 529 samples, 2 seconds! :)
--
SELECT all_lcmb_reisc_samples_by_pipeline.sample_id
FROM
(
SELECT DISTINCT s.id AS sample_id, m.value AS pipeline
FROM metadata m
JOIN events e ON e.id = m.event_id
JOIN event_types et ON et.id = e.event_type_id
JOIN roles r ON r.event_id = e.id
JOIN role_types rt ON r.role_type_id = rt.id
JOIN subjects s ON s.id = r.subject_id
WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC')
AND m.created_at > '2022-05-10'
AND rt.`key` = 'sample'
AND et.`key` = 'order_made'
) AS all_lcmb_reisc_samples_by_pipeline
GROUP BY all_lcmb_reisc_samples_by_pipeline.sample_id
HAVING Count(*) > 1
;

-- Version 5 - including more useful information - 529 samples, still 2ish seconds
--
SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name
FROM
(
SELECT DISTINCT s.id AS ewh_sample_id,
s.uuid AS sample_uuid_bin,
insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid,
s.friendly_name AS sample_friendly_name,
m.value AS pipeline
FROM metadata m
JOIN events e ON e.id = m.event_id
JOIN event_types et ON et.id = e.event_type_id
JOIN roles r ON r.event_id = e.id
JOIN role_types rt ON r.role_type_id = rt.id
JOIN subjects s ON s.id = r.subject_id
WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC')
AND m.created_at > '2022-05-10'
AND rt.`key` = 'sample'
AND et.`key` = 'order_made'
) AS all_lcmb_reisc_samples_by_pipeline

GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id
HAVING Count(*) > 1
;
40 changes: 40 additions & 0 deletions queries/TAT-WOW-project/LCMB-ISC-X/find_stock_plate.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
-- stock plate info in Events WH seems sketchy
-- Following route seems much more reliable:
-- Events WH sample uuid --> MLWH sample --> MLWH stock_resource
-- This retrieves the stock plate barcode from when the sample manifest was uploaded
-- Finds 8 distinct stock plates

SELECT -- SQL_NO_CACHE
relevant_samples.ewh_sample_id
,relevant_samples.sample_uuid_bin
,relevant_samples.sample_uuid
,relevant_samples.sample_friendly_name
,sr.labware_human_barcode
FROM
(
-- Relevant (529) samples for LCMB-ISC pipeline
SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name
FROM
(
SELECT DISTINCT s.id AS ewh_sample_id,
s.uuid AS sample_uuid_bin,
insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid,
s.friendly_name AS sample_friendly_name, m.value AS pipeline
FROM metadata m
JOIN events e ON e.id = m.event_id
JOIN event_types et ON et.id = e.event_type_id
JOIN roles r ON r.event_id = e.id
JOIN role_types rt ON r.role_type_id = rt.id
JOIN subjects s ON s.id = r.subject_id
WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC')
AND m.created_at > '2022-05-10'
AND rt.`key` = 'sample'
AND et.`key` = 'order_made'
) AS all_lcmb_reisc_samples_by_pipeline
GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id
HAVING Count(*) > 1
) AS relevant_samples

JOIN mlwhd_mlwarehouse_proddata.sample mlwh_sample ON mlwh_sample.uuid_sample_lims = relevant_samples.sample_uuid
JOIN mlwhd_mlwarehouse_proddata.stock_resource sr USING (id_sample_tmp)
;
156 changes: 156 additions & 0 deletions queries/TAT-WOW-project/LCMB-ISC-X/find_studies_and_projects.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
-- 2 separate queries, as each one individually takes ~2-3 seconds, but if I try to make them in a readable way into a combined query, it takes much longer.
-- They can be joined in their entirety using UNION or JOIN, but it is probably cleaner to keep them as two separate queries and join using Tableau.
-- If one sample is associated with multiple studies or projects, these appear as multiple rows.
-- Doesn't filter events here by 'LCMB' or 'ReISC' (in the outer query), so we could theoretically find orders for the same samples to other pipelines.
-- At time of writing, adding a metadata filter for 'LCMB' or 'ReISC' made the query slower and didn't change the results, so I left it out.

-- Sample and study info
SELECT -- SQL_NO_CACHE
DISTINCT
relevant_samples.ewh_sample_id
,relevant_samples.sample_uuid_bin
,relevant_samples.sample_uuid
,relevant_samples.sample_friendly_name
,sub.id AS ewh_study_id
,sub.uuid AS study_uuid_bin
,insert(insert(insert(insert(lower(hex(sub.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS study_uuid
,sub.friendly_name AS study_friendly_name
FROM
(
-- Relevant (529) samples for LCMB-ISC pipeline
SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name
FROM
(
SELECT DISTINCT s.id AS ewh_sample_id,
s.uuid AS sample_uuid_bin,
insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid,
s.friendly_name AS sample_friendly_name, m.value AS pipeline
FROM metadata m
JOIN events e ON e.id = m.event_id
JOIN event_types et ON et.id = e.event_type_id
JOIN roles r ON r.event_id = e.id
JOIN role_types rt ON r.role_type_id = rt.id
JOIN subjects s ON s.id = r.subject_id
WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC')
AND m.created_at > '2022-05-10'
AND rt.`key` = 'sample'
AND et.`key` = 'order_made'
) AS all_lcmb_reisc_samples_by_pipeline
GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id
HAVING Count(*) > 1
) AS relevant_samples

-- Find all order_made events for the relevant samples (1,837 rows, 54 distinct events)
LEFT JOIN roles r_sample ON r_sample.subject_id = relevant_samples.ewh_sample_id
LEFT JOIN events e ON e.id = r_sample.event_id
LEFT JOIN event_types et ON et.id = e.event_type_id

-- Find any 'study' subjects associated with these events
LEFT JOIN roles r ON r.event_id = e.id
LEFT JOIN role_types rt ON rt.id = r.role_type_id
LEFT JOIN subjects sub ON r.subject_id = sub.id

WHERE et.`key` = 'order_made'
AND rt.`key` = 'study'
;


-- Sample and project info
SELECT -- SQL_NO_CACHE
DISTINCT
relevant_samples.ewh_sample_id
,relevant_samples.sample_uuid_bin
,relevant_samples.sample_uuid
,relevant_samples.sample_friendly_name
,sub.id AS ewh_project_id
,sub.uuid AS project_uuid_bin
,insert(insert(insert(insert(lower(hex(sub.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS project_uuid
,sub.friendly_name AS project_friendly_name
FROM
(
-- Relevant (529) samples for LCMB-ISC pipeline
SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name
FROM
(
SELECT DISTINCT s.id AS ewh_sample_id,
s.uuid AS sample_uuid_bin,
insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid,
s.friendly_name AS sample_friendly_name, m.value AS pipeline
FROM metadata m
JOIN events e ON e.id = m.event_id
JOIN event_types et ON et.id = e.event_type_id
JOIN roles r ON r.event_id = e.id
JOIN role_types rt ON r.role_type_id = rt.id
JOIN subjects s ON s.id = r.subject_id
WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC')
AND m.created_at > '2022-05-10'
AND rt.`key` = 'sample'
AND et.`key` = 'order_made'
) AS all_lcmb_reisc_samples_by_pipeline
GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id
HAVING Count(*) > 1
) AS relevant_samples

-- Find all order_made events for the relevant samples (1,837 rows, 54 distinct events)
LEFT JOIN roles r_sample ON r_sample.subject_id = relevant_samples.ewh_sample_id
LEFT JOIN events e ON e.id = r_sample.event_id
LEFT JOIN event_types et ON et.id = e.event_type_id

-- Find any 'project' subjects associated with these events
LEFT JOIN roles r ON r.event_id = e.id
LEFT JOIN role_types rt ON rt.id = r.role_type_id
LEFT JOIN subjects sub ON r.subject_id = sub.id

WHERE et.`key` = 'order_made'
AND rt.`key` = 'project'
;




-- Concatenating studies, so there's 1 row per sample
SELECT relevant_samples.ewh_sample_id
,relevant_samples.sample_uuid_bin
,relevant_samples.sample_uuid
,relevant_samples.sample_friendly_name
,GROUP_CONCAT(DISTINCT(sub.friendly_name)) AS study_friendly_names
FROM
(
-- Relevant (529) samples for LCMB-ISC pipeline
SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name
FROM
(
SELECT DISTINCT s.id AS ewh_sample_id,
s.uuid AS sample_uuid_bin,
insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid,
s.friendly_name AS sample_friendly_name, m.value AS pipeline
FROM metadata m
JOIN events e ON e.id = m.event_id
JOIN event_types et ON et.id = e.event_type_id
JOIN roles r ON r.event_id = e.id
JOIN role_types rt ON r.role_type_id = rt.id
JOIN subjects s ON s.id = r.subject_id
WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC')
AND m.created_at > '2022-05-10'
AND rt.`key` = 'sample'
AND et.`key` = 'order_made'
) AS all_lcmb_reisc_samples_by_pipeline
GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id
HAVING Count(*) > 1
) AS relevant_samples

-- Find all order_made events for the relevant samples (1,837 rows, 54 distinct events)
LEFT JOIN roles r_sample ON r_sample.subject_id = relevant_samples.ewh_sample_id
LEFT JOIN events e ON e.id = r_sample.event_id
LEFT JOIN event_types et ON et.id = e.event_type_id

-- Find any 'study' subjects associated with these events
LEFT JOIN roles r ON r.event_id = e.id
LEFT JOIN role_types rt ON rt.id = r.role_type_id
LEFT JOIN subjects sub ON r.subject_id = sub.id

WHERE et.`key` = 'order_made'
AND rt.`key` = 'study'

GROUP BY relevant_samples.ewh_sample_id
;
40 changes: 40 additions & 0 deletions queries/TAT-WOW-project/LCMB-ISC-X/find_submissions.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
-- 'submission' here means when the customer asks for a batch of work to be completed
-- this happens via the RT system, via the SSRs
-- here we're using sample manifest upload to SS as a proxy

SELECT ewh_sample_id
,sample_uuid_bin
,sample_uuid
,sample_friendly_name
,e.id AS manifest_event_id
,e.occured_at AS 'samples_created_by_manifest'
FROM
(
SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name
FROM
(
SELECT DISTINCT s.id AS ewh_sample_id,
s.uuid AS sample_uuid_bin,
insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid,
s.friendly_name AS sample_friendly_name, m.value AS pipeline
FROM metadata m
JOIN events e ON e.id = m.event_id
JOIN event_types et ON et.id = e.event_type_id
JOIN roles r ON r.event_id = e.id
JOIN role_types rt ON r.role_type_id = rt.id
JOIN subjects s ON s.id = r.subject_id
WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC')
AND m.created_at > '2022-05-10'
AND rt.`key` = 'sample'
AND et.`key` = 'order_made'
) AS all_lcmb_reisc_samples_by_pipeline
GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id
HAVING Count(*) > 1
) AS relevant_samples

JOIN roles r ON r.subject_id = relevant_samples.ewh_sample_id
JOIN events e ON e.id = r.event_id
JOIN event_types et ON et.id = e.event_type_id

WHERE et.`key` = 'sample_manifest.updated'
;
Loading

0 comments on commit 6757a65

Please sign in to comment.