From 6757a655342943cb783be46de7eb14ab5f5c5f9d Mon Sep 17 00:00:00 2001 From: yoldas Date: Sun, 27 Nov 2022 13:11:14 +0000 Subject: [PATCH] Copied LCMB-ISC directory. --- ..._lcmb_isc_samples_performance_analysis.sql | 130 +++++++++++++++ .../LCMB-ISC-X/find_stock_plate.sql | 40 +++++ .../LCMB-ISC-X/find_studies_and_projects.sql | 156 ++++++++++++++++++ .../LCMB-ISC-X/find_submissions.sql | 40 +++++ .../LCMB-ISC-X/timestamp_10_end_isc.sql | 43 +++++ .../LCMB-ISC-X/timestamp_11_start_seq.sql | 6 + .../LCMB-ISC-X/timestamp_11a_lib_complete.sql | 40 +++++ .../timestamp_11b_pool_released.sql | 53 ++++++ .../LCMB-ISC-X/timestamp_12_end_seq.sql | 46 ++++++ .../LCMB-ISC-X/timestamp_1_study_set_up.sql | 107 ++++++++++++ ...imestamp_2_faculty_submission_acted_on.sql | 1 + .../timestamp_3.1_quanthub_upload.sql | 40 +++++ .../timestamp_3.2_cherrypick_submission.sql | 54 ++++++ ...tamp_3_received_into_sample_management.sql | 92 +++++++++++ .../timestamp_4_end_sample_management.sql | 44 +++++ .../timestamp_5_start_pre_library.sql | 44 +++++ .../timestamp_6_end_pre_library.sql | 44 +++++ .../timestamp_7_start_post_library.sql | 52 ++++++ .../timestamp_8.1_isc_submission.sql | 40 +++++ .../timestamp_8_end_post_library.sql | 44 +++++ .../LCMB-ISC-X/timestamp_9_isc_start.sql | 39 +++++ 21 files changed, 1155 insertions(+) create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/find_lcmb_isc_samples_performance_analysis.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/find_stock_plate.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/find_studies_and_projects.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/find_submissions.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/timestamp_10_end_isc.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/timestamp_11_start_seq.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/timestamp_11a_lib_complete.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/timestamp_11b_pool_released.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/timestamp_12_end_seq.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/timestamp_1_study_set_up.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/timestamp_2_faculty_submission_acted_on.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/timestamp_3.1_quanthub_upload.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/timestamp_3.2_cherrypick_submission.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/timestamp_3_received_into_sample_management.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/timestamp_4_end_sample_management.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/timestamp_5_start_pre_library.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/timestamp_6_end_pre_library.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/timestamp_7_start_post_library.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/timestamp_8.1_isc_submission.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/timestamp_8_end_post_library.sql create mode 100644 queries/TAT-WOW-project/LCMB-ISC-X/timestamp_9_isc_start.sql diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/find_lcmb_isc_samples_performance_analysis.sql b/queries/TAT-WOW-project/LCMB-ISC-X/find_lcmb_isc_samples_performance_analysis.sql new file mode 100644 index 00000000..0466d7e8 --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/find_lcmb_isc_samples_performance_analysis.sql @@ -0,0 +1,130 @@ +-- Making the base sample query as a view would help with readability in the other queries. + + +-- Version 1 - 529 samples, 14 seconds +-- +SELECT subject_uuid +FROM +( + ( + SELECT DISTINCT(fev.subject_uuid) + FROM metadata m + JOIN flat_events_view fev ON m.event_id = fev.wh_event_id + WHERE m.`key` = 'order_type' AND m.value = 'LCMB' + AND m.created_at > '2022-05-10' + AND fev.role_type = 'sample' + ) + UNION ALL + ( + SELECT DISTINCT(fev.subject_uuid) + FROM metadata m + JOIN flat_events_view fev ON m.event_id = fev.wh_event_id + WHERE m.`key` = 'order_type' AND m.value = 'ReISC' + AND m.created_at > '2022-05-10' + AND fev.role_type = 'sample' + ) +) AS all_lcmb_reisc_samples +GROUP BY subject_uuid +HAVING Count(*) > 1 +; + + +-- Version 2 - not using flat_events_view - 529 samples, 11 seconds +-- +SELECT id +FROM +( + ( + SELECT DISTINCT(s.id) + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value = 'LCMB' + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + ) + UNION ALL + ( + SELECT DISTINCT(s.id) + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value = 'ReISC' + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + ) +) AS all_lcmb_reisc_samples +GROUP BY id +HAVING Count(*) > 1 +; + + +-- Version 3 - querying both order_types in one - 529 samples, 6 seconds! :) +-- +SELECT all_lcmb_reisc_samples_by_pipeline.sample_id +FROM +( + SELECT DISTINCT s.id AS sample_id, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' +) AS all_lcmb_reisc_samples_by_pipeline +GROUP BY all_lcmb_reisc_samples_by_pipeline.sample_id +HAVING Count(*) > 1 +; + +-- Version 4 - including event type filter - 529 samples, 2 seconds! :) +-- + SELECT all_lcmb_reisc_samples_by_pipeline.sample_id + FROM + ( + SELECT DISTINCT s.id AS sample_id, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.sample_id + HAVING Count(*) > 1 +; + +-- Version 5 - including more useful information - 529 samples, still 2ish seconds +-- +SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name +FROM +( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, + m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' +) AS all_lcmb_reisc_samples_by_pipeline + +GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id +HAVING Count(*) > 1 +; diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/find_stock_plate.sql b/queries/TAT-WOW-project/LCMB-ISC-X/find_stock_plate.sql new file mode 100644 index 00000000..c22f30d4 --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/find_stock_plate.sql @@ -0,0 +1,40 @@ +-- stock plate info in Events WH seems sketchy +-- Following route seems much more reliable: +-- Events WH sample uuid --> MLWH sample --> MLWH stock_resource +-- This retrieves the stock plate barcode from when the sample manifest was uploaded +-- Finds 8 distinct stock plates + +SELECT -- SQL_NO_CACHE + relevant_samples.ewh_sample_id + ,relevant_samples.sample_uuid_bin + ,relevant_samples.sample_uuid + ,relevant_samples.sample_friendly_name + ,sr.labware_human_barcode +FROM +( + -- Relevant (529) samples for LCMB-ISC pipeline + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +JOIN mlwhd_mlwarehouse_proddata.sample mlwh_sample ON mlwh_sample.uuid_sample_lims = relevant_samples.sample_uuid +JOIN mlwhd_mlwarehouse_proddata.stock_resource sr USING (id_sample_tmp) +; \ No newline at end of file diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/find_studies_and_projects.sql b/queries/TAT-WOW-project/LCMB-ISC-X/find_studies_and_projects.sql new file mode 100644 index 00000000..3689350c --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/find_studies_and_projects.sql @@ -0,0 +1,156 @@ +-- 2 separate queries, as each one individually takes ~2-3 seconds, but if I try to make them in a readable way into a combined query, it takes much longer. +-- They can be joined in their entirety using UNION or JOIN, but it is probably cleaner to keep them as two separate queries and join using Tableau. +-- If one sample is associated with multiple studies or projects, these appear as multiple rows. +-- Doesn't filter events here by 'LCMB' or 'ReISC' (in the outer query), so we could theoretically find orders for the same samples to other pipelines. +-- At time of writing, adding a metadata filter for 'LCMB' or 'ReISC' made the query slower and didn't change the results, so I left it out. + +-- Sample and study info +SELECT -- SQL_NO_CACHE + DISTINCT + relevant_samples.ewh_sample_id + ,relevant_samples.sample_uuid_bin + ,relevant_samples.sample_uuid + ,relevant_samples.sample_friendly_name + ,sub.id AS ewh_study_id + ,sub.uuid AS study_uuid_bin + ,insert(insert(insert(insert(lower(hex(sub.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS study_uuid + ,sub.friendly_name AS study_friendly_name +FROM +( + -- Relevant (529) samples for LCMB-ISC pipeline + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +-- Find all order_made events for the relevant samples (1,837 rows, 54 distinct events) +LEFT JOIN roles r_sample ON r_sample.subject_id = relevant_samples.ewh_sample_id +LEFT JOIN events e ON e.id = r_sample.event_id +LEFT JOIN event_types et ON et.id = e.event_type_id + +-- Find any 'study' subjects associated with these events +LEFT JOIN roles r ON r.event_id = e.id +LEFT JOIN role_types rt ON rt.id = r.role_type_id +LEFT JOIN subjects sub ON r.subject_id = sub.id + +WHERE et.`key` = 'order_made' + AND rt.`key` = 'study' +; + + +-- Sample and project info +SELECT -- SQL_NO_CACHE + DISTINCT + relevant_samples.ewh_sample_id + ,relevant_samples.sample_uuid_bin + ,relevant_samples.sample_uuid + ,relevant_samples.sample_friendly_name + ,sub.id AS ewh_project_id + ,sub.uuid AS project_uuid_bin + ,insert(insert(insert(insert(lower(hex(sub.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS project_uuid + ,sub.friendly_name AS project_friendly_name +FROM +( + -- Relevant (529) samples for LCMB-ISC pipeline + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +-- Find all order_made events for the relevant samples (1,837 rows, 54 distinct events) +LEFT JOIN roles r_sample ON r_sample.subject_id = relevant_samples.ewh_sample_id +LEFT JOIN events e ON e.id = r_sample.event_id +LEFT JOIN event_types et ON et.id = e.event_type_id + +-- Find any 'project' subjects associated with these events +LEFT JOIN roles r ON r.event_id = e.id +LEFT JOIN role_types rt ON rt.id = r.role_type_id +LEFT JOIN subjects sub ON r.subject_id = sub.id + +WHERE et.`key` = 'order_made' + AND rt.`key` = 'project' +; + + + + +-- Concatenating studies, so there's 1 row per sample +SELECT relevant_samples.ewh_sample_id + ,relevant_samples.sample_uuid_bin + ,relevant_samples.sample_uuid + ,relevant_samples.sample_friendly_name + ,GROUP_CONCAT(DISTINCT(sub.friendly_name)) AS study_friendly_names +FROM +( + -- Relevant (529) samples for LCMB-ISC pipeline + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +-- Find all order_made events for the relevant samples (1,837 rows, 54 distinct events) +LEFT JOIN roles r_sample ON r_sample.subject_id = relevant_samples.ewh_sample_id +LEFT JOIN events e ON e.id = r_sample.event_id +LEFT JOIN event_types et ON et.id = e.event_type_id + +-- Find any 'study' subjects associated with these events +LEFT JOIN roles r ON r.event_id = e.id +LEFT JOIN role_types rt ON rt.id = r.role_type_id +LEFT JOIN subjects sub ON r.subject_id = sub.id + +WHERE et.`key` = 'order_made' + AND rt.`key` = 'study' + +GROUP BY relevant_samples.ewh_sample_id +; diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/find_submissions.sql b/queries/TAT-WOW-project/LCMB-ISC-X/find_submissions.sql new file mode 100644 index 00000000..e5138f26 --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/find_submissions.sql @@ -0,0 +1,40 @@ +-- 'submission' here means when the customer asks for a batch of work to be completed +-- this happens via the RT system, via the SSRs +-- here we're using sample manifest upload to SS as a proxy + +SELECT ewh_sample_id + ,sample_uuid_bin + ,sample_uuid + ,sample_friendly_name + ,e.id AS manifest_event_id + ,e.occured_at AS 'samples_created_by_manifest' +FROM +( + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +JOIN roles r ON r.subject_id = relevant_samples.ewh_sample_id +JOIN events e ON e.id = r.event_id +JOIN event_types et ON et.id = e.event_type_id + +WHERE et.`key` = 'sample_manifest.updated' +; \ No newline at end of file diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_10_end_isc.sql b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_10_end_isc.sql new file mode 100644 index 00000000..d8123e85 --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_10_end_isc.sql @@ -0,0 +1,43 @@ +SELECT -- SQL_NO_CACHE + relevant_samples.ewh_sample_id + ,relevant_samples.sample_uuid_bin + ,relevant_samples.sample_uuid + ,relevant_samples.sample_friendly_name + ,MAX(e.occured_at) AS latest_postmanpat_scan +FROM +( + -- Relevant (529) samples for LCMB-ISC pipeline + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +-- Find all labware.received events for the relevant samples +LEFT JOIN roles r_sample ON r_sample.subject_id = relevant_samples.ewh_sample_id +LEFT JOIN events e ON e.id = r_sample.event_id +LEFT JOIN event_types et ON et.id = e.event_type_id +LEFT JOIN metadata m ON m.event_id = e.id + +WHERE et.`key` = 'labware.received' + AND m.`key` = 'location_barcode' + AND m.value = 'postmanpat' + +GROUP BY relevant_samples.ewh_sample_id +; diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_11_start_seq.sql b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_11_start_seq.sql new file mode 100644 index 00000000..3242766b --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_11_start_seq.sql @@ -0,0 +1,6 @@ +-- timestamp 11 will be MAX out of: + -- timestamp 10 (postmanpat) + -- timestamp 11a (library_complete) + -- timestamp 11b (pool_released) + +-- bear in mind library_complete & pool_released changed meaning on 27/06/2022 (see docs in 'MLWH_Events 101 And Useful Queries' in Confluence) \ No newline at end of file diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_11a_lib_complete.sql b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_11a_lib_complete.sql new file mode 100644 index 00000000..6b1a8ff6 --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_11a_lib_complete.sql @@ -0,0 +1,40 @@ +-- bear in mind library_complete & pool_released changed meaning on 27/06/2022 + +SELECT ewh_sample_id + ,sample_uuid_bin + ,sample_uuid + ,sample_friendly_name + ,e.occured_at +FROM +( + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +JOIN roles r ON r.subject_id = relevant_samples.ewh_sample_id +JOIN events e ON e.id = r.event_id +JOIN event_types et ON et.id = e.event_type_id +JOIN metadata m ON m.event_id = e.id + +WHERE et.`key` = 'library_complete' + AND m.`key` = 'order_type' AND m.value = 'ReISC' +ORDER BY sample_friendly_name +; \ No newline at end of file diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_11b_pool_released.sql b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_11b_pool_released.sql new file mode 100644 index 00000000..ec42108b --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_11b_pool_released.sql @@ -0,0 +1,53 @@ +-- bear in mind library_complete & pool_released changed meaning on 27/06/2022 + +SELECT ewh_sample_id + ,sample_uuid_bin + ,sample_uuid + ,sample_friendly_name + ,pool_released +FROM +( + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +LEFT JOIN +( + -- Grab all ReISC pool_released events, with their samples + -- and return the sample id and the latest event datetime + -- (there are multiple events per sample) + SELECT r.subject_id AS subject_id, MAX(e.occured_at) AS pool_released + FROM roles r + JOIN role_types rt ON rt.id = r.role_type_id + JOIN events e ON e.id = r.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN metadata m ON m.event_id = e.id + WHERE et.`key` = 'pool_released' + AND m.`key` = 'order_type' AND m.value = 'ReISC' + AND rt.`key` = 'sample' + GROUP BY r.subject_id +) AS reisc_pool_released_events + ON relevant_samples.ewh_sample_id = reisc_pool_released_events.subject_id +; + +-- only 453 rows have pool_released event(s) (missing for 76 samples) +-- pool_released event not fired for NT1756559V, NT1756560O & NT1756561P (22 x 3 = 66 samples), presumably because they were made on 27 June, which is the day the deployment was done on +-- pool_released event not present for NT1764717O (remaining 10 samples, and some which are not on the LCMB-ISC list), not sure why \ No newline at end of file diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_12_end_seq.sql b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_12_end_seq.sql new file mode 100644 index 00000000..2fadd907 --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_12_end_seq.sql @@ -0,0 +1,46 @@ +SELECT relevant_samples.sample_uuid + ,relevant_samples.sample_friendly_name + ,iseq_run_status_dict.description + ,MAX(iseq_run_status.date) +FROM +( + -- Relevant (529) samples for LCMB-ISC pipeline + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +JOIN mlwhd_mlwarehouse_proddata.sample mlwh_sample ON mlwh_sample.uuid_sample_lims = relevant_samples.sample_uuid -- 529 sample rows +JOIN mlwhd_mlwarehouse_proddata.iseq_flowcell iseq_flowcell USING (id_sample_tmp) -- 545 iseq_flowcell rows (1 for each 'Twist Pulldown' sample, 2 for each 'Agilent Pulldown' sample because they were run on two lanes) +JOIN mlwhd_mlwarehouse_proddata.iseq_product_metrics iseq_product_metrics USING (id_iseq_flowcell_tmp) +JOIN mlwhd_mlwarehouse_proddata.iseq_run_lane_metrics iseq_run_lane_metrics + ON iseq_product_metrics.id_run = iseq_run_lane_metrics.id_run + AND iseq_product_metrics.position = iseq_run_lane_metrics.position +JOIN mlwhd_mlwarehouse_proddata.iseq_run_status iseq_run_status + ON iseq_run_status.id_run = iseq_run_lane_metrics.id_run +JOIN mlwhd_mlwarehouse_proddata.iseq_run_status_dict iseq_run_status_dict + ON iseq_run_status_dict.id_run_status_dict = iseq_run_status.id_run_status_dict + +WHERE pipeline_id_lims IN ('Twist Pulldown', 'Agilent Pulldown') + AND iseq_run_status_dict.description IN ('run complete', 'analysis pending', 'analysis complete', 'qc review pending', 'run archived', 'qc complete') + +GROUP BY relevant_samples.sample_uuid, iseq_run_status_dict.description +ORDER BY sample_friendly_name, MAX(iseq_run_status.date) +; \ No newline at end of file diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_1_study_set_up.sql b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_1_study_set_up.sql new file mode 100644 index 00000000..22a1631e --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_1_study_set_up.sql @@ -0,0 +1,107 @@ +SELECT -- SQL_NO_CACHE + DISTINCT + relevant_samples.ewh_sample_id + ,relevant_samples.sample_uuid_bin + ,relevant_samples.sample_uuid + ,relevant_samples.sample_friendly_name + ,sub.id AS ewh_study_id + ,sub.uuid AS study_uuid_bin + ,insert(insert(insert(insert(lower(hex(sub.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS study_uuid + ,sub.friendly_name AS study_friendly_name + ,studies.created AS study_set_up +FROM +( + -- Relevant (529) samples for LCMB-ISC pipeline + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +-- Find all order_made events for the relevant samples (1,837 rows, 54 distinct events) +LEFT JOIN roles r_sample ON r_sample.subject_id = relevant_samples.ewh_sample_id +LEFT JOIN events e ON e.id = r_sample.event_id +LEFT JOIN event_types et ON et.id = e.event_type_id + +-- Find any 'study' subjects associated with these events +LEFT JOIN roles r ON r.event_id = e.id +LEFT JOIN role_types rt ON rt.id = r.role_type_id +LEFT JOIN subjects sub ON r.subject_id = sub.id + +-- Join to MLWH study table to get timestamp of Study creation in Sequencescape +LEFT JOIN mlwhd_mlwarehouse_proddata.study studies ON insert(insert(insert(insert(lower(hex(sub.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') = studies.uuid_study_lims + +WHERE et.`key` = 'order_made' + AND rt.`key` = 'study' +; + + + + + +-- study name and timestamp concatenated, so get one row per sample +SELECT relevant_samples.ewh_sample_id + ,relevant_samples.sample_uuid_bin + ,relevant_samples.sample_uuid + ,relevant_samples.sample_friendly_name + ,GROUP_CONCAT(DISTINCT(sub.friendly_name)) AS study_friendly_names + ,GROUP_CONCAT(DISTINCT(studies.created)) AS studies_set_up +FROM +( + -- Relevant (529) samples for LCMB-ISC pipeline + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +-- Find all order_made events for the relevant samples (1,837 rows, 54 distinct events) +LEFT JOIN roles r_sample ON r_sample.subject_id = relevant_samples.ewh_sample_id +LEFT JOIN events e ON e.id = r_sample.event_id +LEFT JOIN event_types et ON et.id = e.event_type_id + +-- Find any 'study' subjects associated with these events +LEFT JOIN roles r ON r.event_id = e.id +LEFT JOIN role_types rt ON rt.id = r.role_type_id +LEFT JOIN subjects sub ON r.subject_id = sub.id + +-- Join to MLWH study table to get timestamp of Study creation in Sequencescape +LEFT JOIN mlwhd_mlwarehouse_proddata.study studies ON insert(insert(insert(insert(lower(hex(sub.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') = studies.uuid_study_lims + +WHERE et.`key` = 'order_made' + AND rt.`key` = 'study' + +GROUP BY relevant_samples.ewh_sample_id +; \ No newline at end of file diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_2_faculty_submission_acted_on.sql b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_2_faculty_submission_acted_on.sql new file mode 100644 index 00000000..691e2b27 --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_2_faculty_submission_acted_on.sql @@ -0,0 +1 @@ +-- see `find_submissions.sql` \ No newline at end of file diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_3.1_quanthub_upload.sql b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_3.1_quanthub_upload.sql new file mode 100644 index 00000000..d360e7fa --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_3.1_quanthub_upload.sql @@ -0,0 +1,40 @@ +SELECT -- SQL_NO_CACHE + relevant_samples.ewh_sample_id + ,relevant_samples.sample_uuid_bin + ,relevant_samples.sample_uuid + ,relevant_samples.sample_friendly_name + ,MIN(qc.date_created) AS 'sample_management_qc_result_date' +FROM +( + -- Relevant (529) samples for LCMB-ISC pipeline + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +JOIN mlwhd_mlwarehouse_proddata.sample mlwh_sample ON mlwh_sample.uuid_sample_lims = relevant_samples.sample_uuid +JOIN mlwhd_mlwarehouse_proddata.qc_result qc USING (id_sample_tmp) + +WHERE qc.labware_purpose = 'Stock Plate' AND qc.assay = 'Stock - Plate Reader v1.0' + +GROUP BY relevant_samples.ewh_sample_id, relevant_samples.sample_uuid_bin, relevant_samples.sample_uuid, relevant_samples.sample_friendly_name +; + +-- only returns 16 rows, as it looks like only one of our plates in the range we're checking went through Sample Management \ No newline at end of file diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_3.2_cherrypick_submission.sql b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_3.2_cherrypick_submission.sql new file mode 100644 index 00000000..8bd77fde --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_3.2_cherrypick_submission.sql @@ -0,0 +1,54 @@ +-- We initially looked at Sequencescape cherrypick submissions (order_made events) here, but this was misleading because: +-- Here, we are looking for the cherrypick that happens during Sample Management +-- In fact, there is also a 'dummy' cherrypick later in the process +-- Most of the samples in our set did not actually go through Sample Management, they went straight to the RNA team +-- Therefore, the order_made query was picking up the later cherrypick rather than the intended one. +-- Instead, it is now using the 'slf_cherrypicking' event, which is issued by Asset Audits and therefore unique to Sample Management. + +-- find MIN date, to get just 1 row per sample +SELECT relevant_samples.ewh_sample_id + ,relevant_samples.sample_uuid_bin + ,relevant_samples.sample_uuid + ,relevant_samples.sample_friendly_name + ,sr.labware_human_barcode + ,sub_stock.id + ,MIN(e.occured_at) AS cherrypick_first +FROM +( + -- Relevant (529) samples for LCMB-ISC pipeline + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +JOIN mlwhd_mlwarehouse_proddata.sample mlwh_sample ON mlwh_sample.uuid_sample_lims = relevant_samples.sample_uuid +JOIN mlwhd_mlwarehouse_proddata.stock_resource sr USING (id_sample_tmp) + +JOIN subjects sub_stock ON sub_stock.friendly_name = sr.labware_human_barcode -- trying to join on uuid is a lot slower +JOIN roles r_stock ON r_stock.subject_id = sub_stock.id +JOIN events e ON e.id = r_stock.event_id +JOIN event_types et ON et.id = e.event_type_id + +WHERE et.`key` = 'slf_cherrypicking' + +GROUP BY relevant_samples.ewh_sample_id +; + +-- only returns 16 rows, as it looks like only one of our plates in the range we're checking went through Sample Management \ No newline at end of file diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_3_received_into_sample_management.sql b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_3_received_into_sample_management.sql new file mode 100644 index 00000000..90c2ec23 --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_3_received_into_sample_management.sql @@ -0,0 +1,92 @@ +-- based on 'find_stock_plate.sql' + +SELECT -- SQL_NO_CACHE + relevant_samples.ewh_sample_id + ,relevant_samples.sample_uuid_bin + ,relevant_samples.sample_uuid + ,relevant_samples.sample_friendly_name + ,sr.labware_human_barcode + ,sub_stock.id + ,e.occured_at AS stock_plate_received +FROM +( + -- Relevant (529) samples for LCMB-ISC pipeline + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +JOIN mlwhd_mlwarehouse_proddata.sample mlwh_sample ON mlwh_sample.uuid_sample_lims = relevant_samples.sample_uuid +JOIN mlwhd_mlwarehouse_proddata.stock_resource sr USING (id_sample_tmp) + +JOIN subjects sub_stock ON sub_stock.friendly_name = sr.labware_human_barcode -- trying to join on uuid is a lot slower +JOIN roles r_stock ON r_stock.subject_id = sub_stock.id +JOIN events e ON e.id = r_stock.event_id +JOIN event_types et ON et.id = e.event_type_id + +WHERE et.`key` = 'slf_receive_plates' +; + + +-- find MAX date, to get just 1 row per sample +SELECT relevant_samples.ewh_sample_id + ,relevant_samples.sample_uuid_bin + ,relevant_samples.sample_uuid + ,relevant_samples.sample_friendly_name + ,sr.labware_human_barcode + ,sub_stock.id + ,MAX(e.occured_at) AS stock_plate_received_latest +FROM +( + -- Relevant (529) samples for LCMB-ISC pipeline + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +JOIN mlwhd_mlwarehouse_proddata.sample mlwh_sample ON mlwh_sample.uuid_sample_lims = relevant_samples.sample_uuid +JOIN mlwhd_mlwarehouse_proddata.stock_resource sr USING (id_sample_tmp) + +JOIN subjects sub_stock ON sub_stock.friendly_name = sr.labware_human_barcode -- trying to join on uuid is a lot slower +JOIN roles r_stock ON r_stock.subject_id = sub_stock.id +JOIN events e ON e.id = r_stock.event_id +JOIN event_types et ON et.id = e.event_type_id + +WHERE et.`key` = 'slf_receive_plates' + +GROUP BY relevant_samples.ewh_sample_id +; \ No newline at end of file diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_4_end_sample_management.sql b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_4_end_sample_management.sql new file mode 100644 index 00000000..33e0b466 --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_4_end_sample_management.sql @@ -0,0 +1,44 @@ +SELECT -- SQL_NO_CACHE + DISTINCT + relevant_samples.ewh_sample_id + ,relevant_samples.sample_uuid_bin + ,relevant_samples.sample_uuid + ,relevant_samples.sample_friendly_name + ,e.occured_at AS post_cherrypick_labwhere_scan +FROM +( + -- Relevant (529) samples for LCMB-ISC pipeline + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +-- Find all labware.received events for the relevant samples +LEFT JOIN roles r_sample ON r_sample.subject_id = relevant_samples.ewh_sample_id +LEFT JOIN events e ON e.id = r_sample.event_id +LEFT JOIN event_types et ON et.id = e.event_type_id +LEFT JOIN metadata m ON m.event_id = e.id + +WHERE et.`key` = 'labware.received' + AND m.`key` = 'location_barcode' + AND m.value = 'lw-shelf-1-455' +; + +-- only get these events for 16 samples... \ No newline at end of file diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_5_start_pre_library.sql b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_5_start_pre_library.sql new file mode 100644 index 00000000..b4435d58 --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_5_start_pre_library.sql @@ -0,0 +1,44 @@ +SELECT -- SQL_NO_CACHE + DISTINCT + relevant_samples.ewh_sample_id + ,relevant_samples.sample_uuid_bin + ,relevant_samples.sample_uuid + ,relevant_samples.sample_friendly_name + ,e.occured_at AS rna_team_labwhere_scan_out +FROM +( + -- Relevant (529) samples for LCMB-ISC pipeline + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +-- Find all labware.received events for the relevant samples +LEFT JOIN roles r_sample ON r_sample.subject_id = relevant_samples.ewh_sample_id +LEFT JOIN events e ON e.id = r_sample.event_id +LEFT JOIN event_types et ON et.id = e.event_type_id +LEFT JOIN metadata m ON m.event_id = e.id + +WHERE et.`key` = 'labware.received' + AND m.`key` = 'location_barcode' + AND m.value = '' +; + +-- 0 rows \ No newline at end of file diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_6_end_pre_library.sql b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_6_end_pre_library.sql new file mode 100644 index 00000000..1230d2ad --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_6_end_pre_library.sql @@ -0,0 +1,44 @@ +-- Sequencescape + +SELECT relevant_samples.sample_id + ,relevant_samples.sample_name + ,sc.created_at AS 'tag_plate_used' +FROM + +( + -- Equivalent of Events WH query identifying (529) LCMB-ISC samples, but using SS db. + SELECT sample_id, sample_name, sample_uuid + FROM + + ( + SELECT DISTINCT s.id AS 'sample_id', s.name AS 'sample_name', u.external_id AS 'sample_uuid', o_r.role AS 'pipeline' + + FROM orders o + + JOIN order_roles o_r ON o_r.id = o.order_role_id + JOIN asset_groups a_g ON a_g.id = o.asset_group_id + JOIN asset_group_assets a_g_a ON a_g_a.asset_group_id = a_g.id + JOIN receptacles r ON a_g_a.asset_id = r.id + JOIN aliquots a ON r.id = a.receptacle_id + JOIN samples s ON s.id = a.sample_id + JOIN uuids u ON u.resource_type = 'sample' AND u.resource_id = s.id + + WHERE o_r.role IN ('LCMB', 'ReISC') + AND o.created_at > '2022-05-10' + ) AS all_lcmb_reisc_samples_by_pipeline + + GROUP BY sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +JOIN aliquots a ON relevant_samples.sample_id = a.sample_id +JOIN receptacles r ON r.id = a.receptacle_id +JOIN labware l ON l.id = r.labware_id +JOIN plate_purposes pp ON pp.id = l.plate_purpose_id +JOIN state_changes sc ON sc.target_id = l.id + +WHERE pp.name = 'LB Lib PCR' + AND sc.target_state = 'exhausted' -- this marks when the tag plate is used. Can't use when it was created as they are created in batches far in advance. + +ORDER BY relevant_samples.sample_name +; \ No newline at end of file diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_7_start_post_library.sql b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_7_start_post_library.sql new file mode 100644 index 00000000..6a1852bb --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_7_start_post_library.sql @@ -0,0 +1,52 @@ +-- Sequencescape + +-- Creation of the LB Lib PCR-XP plate: +-- table 'labware', 'created_at' field - linked to 'plate_purposes' table, where 'name' field is 'LB Lib PCR-XP'. + + +SELECT relevant_samples.sample_id, + relevant_samples.sample_uuid, + relevant_samples.sample_name, + MIN(l.created_at) AS LB_Lib_PCR_XP_created +FROM + +( + -- Equivalent of Events WH query identifying (529) LCMB-ISC samples, but using SS db. + SELECT sample_id, sample_name, sample_uuid + FROM + + ( + SELECT DISTINCT s.id AS 'sample_id', s.name AS 'sample_name', u.external_id AS 'sample_uuid', o_r.role AS 'pipeline' + + FROM orders o + + JOIN order_roles o_r ON o_r.id = o.order_role_id + JOIN asset_groups a_g ON a_g.id = o.asset_group_id + JOIN asset_group_assets a_g_a ON a_g_a.asset_group_id = a_g.id + JOIN receptacles r ON a_g_a.asset_id = r.id + JOIN aliquots a ON r.id = a.receptacle_id + JOIN samples s ON s.id = a.sample_id + JOIN uuids u ON u.resource_type = 'sample' AND u.resource_id = s.id + + WHERE o_r.role IN ('LCMB', 'ReISC') + AND o.created_at > '2022-05-10' + ) AS all_lcmb_reisc_samples_by_pipeline + + GROUP BY sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +JOIN aliquots a ON relevant_samples.sample_id = a.sample_id +JOIN receptacles r ON r.id = a.receptacle_id +JOIN labware l ON l.id = r.labware_id +JOIN plate_purposes pp ON pp.id = l.plate_purpose_id + +WHERE pp.name = 'LB Lib PCR-XP' + +GROUP BY relevant_samples.sample_id +ORDER BY relevant_samples.sample_name +; +-- 1,058 rows +-- All 529 samples represented +-- 2 LB Lib PCR-XP plates for each sample - one seems to be cherrypicked into the other +-- Instructed to use the timestamp from the first of the two, hence the MIN() diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_8.1_isc_submission.sql b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_8.1_isc_submission.sql new file mode 100644 index 00000000..9ed09200 --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_8.1_isc_submission.sql @@ -0,0 +1,40 @@ +SELECT ewh_sample_id + ,sample_uuid_bin + ,sample_uuid + ,sample_friendly_name + ,e.id AS reisc_event_id + ,MIN(e.occured_at) AS 'ReISC_order_made' +FROM +( + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +JOIN roles r ON r.subject_id = relevant_samples.ewh_sample_id +JOIN events e ON e.id = r.event_id +JOIN event_types et ON et.id = e.event_type_id +JOIN metadata m ON m.event_id = e.id + +WHERE et.`key` = 'order_made' + AND m.`key` = 'order_type' AND m.value = 'ReISC' + +GROUP BY ewh_sample_id +; \ No newline at end of file diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_8_end_post_library.sql b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_8_end_post_library.sql new file mode 100644 index 00000000..4ff15796 --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_8_end_post_library.sql @@ -0,0 +1,44 @@ +SELECT -- SQL_NO_CACHE + DISTINCT + relevant_samples.ewh_sample_id + ,relevant_samples.sample_uuid_bin + ,relevant_samples.sample_uuid + ,relevant_samples.sample_friendly_name + ,e.occured_at AS end_post_library_labwhere_scan +FROM +( + -- Relevant (529) samples for LCMB-ISC pipeline + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +-- Find all labware.received events for the relevant samples +LEFT JOIN roles r_sample ON r_sample.subject_id = relevant_samples.ewh_sample_id +LEFT JOIN events e ON e.id = r_sample.event_id +LEFT JOIN event_types et ON et.id = e.event_type_id +LEFT JOIN metadata m ON m.event_id = e.id + +WHERE et.`key` = 'labware.received' + AND m.`key` = 'location_barcode' + AND m.value = 'lw-rna-tube-archive-199-3199' +; + +-- 0 rows \ No newline at end of file diff --git a/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_9_isc_start.sql b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_9_isc_start.sql new file mode 100644 index 00000000..e0e36c30 --- /dev/null +++ b/queries/TAT-WOW-project/LCMB-ISC-X/timestamp_9_isc_start.sql @@ -0,0 +1,39 @@ +SELECT ewh_sample_id + ,sample_uuid_bin + ,sample_uuid + ,sample_friendly_name + ,MIN(e.occured_at) +FROM +( + SELECT ewh_sample_id, sample_uuid_bin, sample_uuid, sample_friendly_name + FROM + ( + SELECT DISTINCT s.id AS ewh_sample_id, + s.uuid AS sample_uuid_bin, + insert(insert(insert(insert(lower(hex(s.uuid)),9,0,'-'),14,0,'-'),19,0,'-'),24,0,'-') AS sample_uuid, + s.friendly_name AS sample_friendly_name, m.value AS pipeline + FROM metadata m + JOIN events e ON e.id = m.event_id + JOIN event_types et ON et.id = e.event_type_id + JOIN roles r ON r.event_id = e.id + JOIN role_types rt ON r.role_type_id = rt.id + JOIN subjects s ON s.id = r.subject_id + WHERE m.`key` = 'order_type' AND m.value IN ('LCMB', 'ReISC') + AND m.created_at > '2022-05-10' + AND rt.`key` = 'sample' + AND et.`key` = 'order_made' + ) AS all_lcmb_reisc_samples_by_pipeline + GROUP BY all_lcmb_reisc_samples_by_pipeline.ewh_sample_id + HAVING Count(*) > 1 +) AS relevant_samples + +JOIN roles r ON r.subject_id = relevant_samples.ewh_sample_id +JOIN events e ON e.id = r.event_id +JOIN event_types et ON et.id = e.event_type_id +JOIN metadata m ON m.event_id = e.id + +WHERE et.`key` = 'library_start' + AND m.`key` = 'order_type' AND m.value = 'ReISC' + +GROUP BY ewh_sample_id +; \ No newline at end of file