From df6b3dbfe89430bb95e71e89a348cca9e4bc734d Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 8 Feb 2021 11:47:08 -0800 Subject: [PATCH 1/8] adding workflow for XIC batch --- xicbatch/Makefile | 7 ++ xicbatch/tools/xicbatch/calculate_xic.py | 77 ++++++++++++ xicbatch/tools/xicbatch/demangle.py | 32 +++++ .../tools/xicbatch/ming_fileio_library.py | 1 + .../tools/xicbatch/ming_proteosafe_library.py | 1 + xicbatch/tools/xicbatch/msaccess | 1 + xicbatch/xicbatch/binding.xml | 47 ++++++++ xicbatch/xicbatch/flow.xml | 29 +++++ xicbatch/xicbatch/input.xml | 113 ++++++++++++++++++ xicbatch/xicbatch/result.xml | 32 +++++ xicbatch/xicbatch/tool.xml | 48 ++++++++ 11 files changed, 388 insertions(+) create mode 100644 xicbatch/Makefile create mode 100644 xicbatch/tools/xicbatch/calculate_xic.py create mode 100644 xicbatch/tools/xicbatch/demangle.py create mode 120000 xicbatch/tools/xicbatch/ming_fileio_library.py create mode 120000 xicbatch/tools/xicbatch/ming_proteosafe_library.py create mode 120000 xicbatch/tools/xicbatch/msaccess create mode 100644 xicbatch/xicbatch/binding.xml create mode 100644 xicbatch/xicbatch/flow.xml create mode 100644 xicbatch/xicbatch/input.xml create mode 100644 xicbatch/xicbatch/result.xml create mode 100644 xicbatch/xicbatch/tool.xml diff --git a/xicbatch/Makefile b/xicbatch/Makefile new file mode 100644 index 00000000..35257d08 --- /dev/null +++ b/xicbatch/Makefile @@ -0,0 +1,7 @@ +include ../Makefile.credentials +include ../Makefile.deploytemplate + +WORKFLOW_NAME=xicbatch +TOOL_FOLDER_NAME=xicbatch +WORKFLOW_VERSION=release_28 +WORKFLOW_DESCRIPTION="xicbatch" diff --git a/xicbatch/tools/xicbatch/calculate_xic.py b/xicbatch/tools/xicbatch/calculate_xic.py new file mode 100644 index 00000000..1a11cc52 --- /dev/null +++ b/xicbatch/tools/xicbatch/calculate_xic.py @@ -0,0 +1,77 @@ +import os +import sys +import numpy as np +import pandas as pd + +import argparse +import uuid +import glob +import shutil +from scipy import integrate + +def calculate_xic(filename, mz, rt, mz_tolerance, rt_min, rt_max, msaccess_path, feature_name): + temp_result_folder = os.path.join(str(uuid.uuid4())) + + mz_lower = mz - mz_tolerance + mz_upper = mz + mz_tolerance + + command = 'export LC_ALL=C && {} {} -o {} -x "tic mz={},{} delimiter=tab" --filter "msLevel 1" --filter "scanTime ["{},{}"]"'.format( + msaccess_path, filename, temp_result_folder, mz_lower, mz_upper, rt_min, rt_max) + + print(command) + os.system(command) + + result_filename = glob.glob(os.path.join(temp_result_folder, "*"))[0] + result_df = pd.read_csv(result_filename, sep="\t", skiprows=1) + + xic_df = pd.DataFrame() + xic_df["rt"] = result_df["rt"] / 60.0 + xic_df["int"] = result_df["sumIntensity"] + + # Remove temp folder + shutil.rmtree(temp_result_folder) + + return xic_df + + +def main(): + parser = argparse.ArgumentParser(description='Creating XIC') + parser.add_argument('input_folder', help='input_mgf') + parser.add_argument('output_results', help='output_results') + parser.add_argument('msaccess_path', help='msaccess_path') + parser.add_argument('--mz', default=None, help='mz') + parser.add_argument('--rt', default=None, help='rt') + parser.add_argument('--mztol', default=None, help='mztol') + parser.add_argument('--rttol', default=None, help='rttol') + + args = parser.parse_args() + + all_input_files = glob.glob(os.path.join(args.input_folder, "*")) + + output_list = [] + + for filename in all_input_files: + mz = float(args.mz) + rt = float(args.rt) + + xic_df = calculate_xic(filename, + mz, rt, + float(args.mztol), + float(args.rt) - float(args.rttol), + float(args.rt) + float(args.rttol), args.msaccess_path, str(args.mz)) + + integration_value = integrate.trapz(xic_df["int"], x=xic_df["rt"]) + + output_dict = {} + output_dict["filename"] = os.path.basename(filename) + output_dict["integration_value"] = integration_value + output_dict["mz"] = mz + output_dict["rt"] = rt + + output_list.append(output_dict) + + results_df = pd.DataFrame(output_list) + results_df.to_csv(args.output_results, sep="\t", index=False) + +if __name__ == "__main__": + main() diff --git a/xicbatch/tools/xicbatch/demangle.py b/xicbatch/tools/xicbatch/demangle.py new file mode 100644 index 00000000..c0fb228b --- /dev/null +++ b/xicbatch/tools/xicbatch/demangle.py @@ -0,0 +1,32 @@ +import os +import sys +import pandas as pd + +import argparse +import ming_proteosafe_library + +def main(): + parser = argparse.ArgumentParser(description='Creating Demangling') + parser.add_argument('input_results', help='input_mgf') + parser.add_argument('output_results', help='output_results') + parser.add_argument('params', help='msaccess_path') + args = parser.parse_args() + + params_dict = ming_proteosafe_library.parse_xml_file(open(args.params)) + mangled_mapping = ming_proteosafe_library.get_mangled_file_mapping(params_dict) + + results_df = pd.read_csv(args.input_results, sep="\t") + results_list = results_df.to_dict(orient="records") + + for result in results_list: + filename = result["filename"] + full_ccms_path = mangled_mapping[filename] + result["full_ccms_path"] = full_ccms_path + + + demanged_results_df = pd.DataFrame(results_list) + demanged_results_df.to_csv(args.output_results, sep="\t", index=False) + + +if __name__ == "__main__": + main() diff --git a/xicbatch/tools/xicbatch/ming_fileio_library.py b/xicbatch/tools/xicbatch/ming_fileio_library.py new file mode 120000 index 00000000..64ef908d --- /dev/null +++ b/xicbatch/tools/xicbatch/ming_fileio_library.py @@ -0,0 +1 @@ +../../../shared_code/ming_fileio_library.py \ No newline at end of file diff --git a/xicbatch/tools/xicbatch/ming_proteosafe_library.py b/xicbatch/tools/xicbatch/ming_proteosafe_library.py new file mode 120000 index 00000000..9d4083af --- /dev/null +++ b/xicbatch/tools/xicbatch/ming_proteosafe_library.py @@ -0,0 +1 @@ +../../../shared_code/ming_proteosafe_library.py \ No newline at end of file diff --git a/xicbatch/tools/xicbatch/msaccess b/xicbatch/tools/xicbatch/msaccess new file mode 120000 index 00000000..14358c90 --- /dev/null +++ b/xicbatch/tools/xicbatch/msaccess @@ -0,0 +1 @@ +../../.././molecular-librarysearch-v2/tools/molecularsearch/msaccess \ No newline at end of file diff --git a/xicbatch/xicbatch/binding.xml b/xicbatch/xicbatch/binding.xml new file mode 100644 index 00000000..467d3daf --- /dev/null +++ b/xicbatch/xicbatch/binding.xml @@ -0,0 +1,47 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xicbatch/xicbatch/flow.xml b/xicbatch/xicbatch/flow.xml new file mode 100644 index 00000000..ad7c833e --- /dev/null +++ b/xicbatch/xicbatch/flow.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xicbatch/xicbatch/input.xml b/xicbatch/xicbatch/input.xml new file mode 100644 index 00000000..d86a09b7 --- /dev/null +++ b/xicbatch/xicbatch/input.xml @@ -0,0 +1,113 @@ + + + xicbatch + xicbatch + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Spectrum Files + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xicbatch/xicbatch/result.xml b/xicbatch/xicbatch/result.xml new file mode 100644 index 00000000..09f0e0de --- /dev/null +++ b/xicbatch/xicbatch/result.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xicbatch/xicbatch/tool.xml b/xicbatch/xicbatch/tool.xml new file mode 100644 index 00000000..29ae7ecb --- /dev/null +++ b/xicbatch/xicbatch/tool.xml @@ -0,0 +1,48 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 23b13aafc0828dd7ca511a3de9477d2678b81146 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 8 Feb 2021 15:46:32 -0800 Subject: [PATCH 2/8] updating xic batch to draw results --- xicbatch/tools/xicbatch/calculate_xic.py | 10 ++++++- xicbatch/tools/xicbatch/draw_results.py | 34 ++++++++++++++++++++++++ xicbatch/xicbatch/binding.xml | 17 ++++++++++++ xicbatch/xicbatch/flow.xml | 11 ++++++++ xicbatch/xicbatch/result.xml | 18 +++++++++++++ xicbatch/xicbatch/tool.xml | 24 +++++++++++++++++ 6 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 xicbatch/tools/xicbatch/draw_results.py diff --git a/xicbatch/tools/xicbatch/calculate_xic.py b/xicbatch/tools/xicbatch/calculate_xic.py index 1a11cc52..0796fb51 100644 --- a/xicbatch/tools/xicbatch/calculate_xic.py +++ b/xicbatch/tools/xicbatch/calculate_xic.py @@ -38,6 +38,7 @@ def main(): parser = argparse.ArgumentParser(description='Creating XIC') parser.add_argument('input_folder', help='input_mgf') parser.add_argument('output_results', help='output_results') + parser.add_argument('extraction_results', help='extraction_results') parser.add_argument('msaccess_path', help='msaccess_path') parser.add_argument('--mz', default=None, help='mz') parser.add_argument('--rt', default=None, help='rt') @@ -49,7 +50,7 @@ def main(): all_input_files = glob.glob(os.path.join(args.input_folder, "*")) output_list = [] - + output_full_xic = [] for filename in all_input_files: mz = float(args.mz) rt = float(args.rt) @@ -60,6 +61,9 @@ def main(): float(args.rt) - float(args.rttol), float(args.rt) + float(args.rttol), args.msaccess_path, str(args.mz)) + xic_df["query"] = "{}:{}".format(mz, rt) + xic_df["filename"] = os.path.basename(filename) + integration_value = integrate.trapz(xic_df["int"], x=xic_df["rt"]) output_dict = {} @@ -67,11 +71,15 @@ def main(): output_dict["integration_value"] = integration_value output_dict["mz"] = mz output_dict["rt"] = rt + output_dict["drawing"] = "{}_{}_{}.png".format(os.path.basename(filename), mz, rt) + output_full_xic.append(xic_df) output_list.append(output_dict) results_df = pd.DataFrame(output_list) results_df.to_csv(args.output_results, sep="\t", index=False) + pd.concat(output_full_xic).to_csv(args.extraction_results, sep='\t', index=False) + if __name__ == "__main__": main() diff --git a/xicbatch/tools/xicbatch/draw_results.py b/xicbatch/tools/xicbatch/draw_results.py new file mode 100644 index 00000000..92f8ff4b --- /dev/null +++ b/xicbatch/tools/xicbatch/draw_results.py @@ -0,0 +1,34 @@ +import os +import sys +import pandas as pd + +import argparse +import ming_proteosafe_library + +from plotnine import * + + +def main(): + parser = argparse.ArgumentParser(description='Creating Demangling') + parser.add_argument('extracted_results', help='extracted_results') + parser.add_argument('output_folder', help='output_folder') + + args = parser.parse_args() + + extraction_df = pd.read_csv(args.extracted_results, sep="\t") + + + p = ( + ggplot(extraction_df, aes(x='rt', y='int', color='filename')) + + geom_line() # line plot + + labs(x='RT', y='Intensity') + ) + + p.save(os.path.join(args.output_folder, "merged.png")) + + # TODO: Drawing individual per file + + + +if __name__ == "__main__": + main() diff --git a/xicbatch/xicbatch/binding.xml b/xicbatch/xicbatch/binding.xml index 467d3daf..8f8520ca 100644 --- a/xicbatch/xicbatch/binding.xml +++ b/xicbatch/xicbatch/binding.xml @@ -21,6 +21,7 @@ + @@ -29,6 +30,14 @@ + + + + + + + + @@ -39,9 +48,17 @@ + + + + + + + + diff --git a/xicbatch/xicbatch/flow.xml b/xicbatch/xicbatch/flow.xml index ad7c833e..edea5b86 100644 --- a/xicbatch/xicbatch/flow.xml +++ b/xicbatch/xicbatch/flow.xml @@ -9,10 +9,12 @@ + + @@ -22,8 +24,17 @@ + + + + + + + + + diff --git a/xicbatch/xicbatch/result.xml b/xicbatch/xicbatch/result.xml index 09f0e0de..5152c8a5 100644 --- a/xicbatch/xicbatch/result.xml +++ b/xicbatch/xicbatch/result.xml @@ -1,6 +1,12 @@ + + + + + + @@ -28,5 +34,17 @@ + + + + + + + + + + + + diff --git a/xicbatch/xicbatch/tool.xml b/xicbatch/xicbatch/tool.xml index 29ae7ecb..55de7c21 100644 --- a/xicbatch/xicbatch/tool.xml +++ b/xicbatch/xicbatch/tool.xml @@ -11,11 +11,13 @@ + + @@ -45,4 +47,26 @@ + + + + + + + + + + + + + + + + + + + + + + From bae71908f66de21702f61af4e595d4a5186607f3 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 8 Feb 2021 15:53:40 -0800 Subject: [PATCH 3/8] adding size --- xicbatch/tools/xicbatch/calculate_xic.py | 6 ++++-- xicbatch/tools/xicbatch/draw_results.py | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/xicbatch/tools/xicbatch/calculate_xic.py b/xicbatch/tools/xicbatch/calculate_xic.py index 0796fb51..505b898b 100644 --- a/xicbatch/tools/xicbatch/calculate_xic.py +++ b/xicbatch/tools/xicbatch/calculate_xic.py @@ -16,9 +16,9 @@ def calculate_xic(filename, mz, rt, mz_tolerance, rt_min, rt_max, msaccess_path, mz_upper = mz + mz_tolerance command = 'export LC_ALL=C && {} {} -o {} -x "tic mz={},{} delimiter=tab" --filter "msLevel 1" --filter "scanTime ["{},{}"]"'.format( - msaccess_path, filename, temp_result_folder, mz_lower, mz_upper, rt_min, rt_max) + msaccess_path, filename, temp_result_folder, mz_lower, mz_upper, rt_min * 60, rt_max * 60) - print(command) + print(command, mz_lower, mz_upper, mz_tolerance) os.system(command) result_filename = glob.glob(os.path.join(temp_result_folder, "*"))[0] @@ -47,6 +47,8 @@ def main(): args = parser.parse_args() + print(args) + all_input_files = glob.glob(os.path.join(args.input_folder, "*")) output_list = [] diff --git a/xicbatch/tools/xicbatch/draw_results.py b/xicbatch/tools/xicbatch/draw_results.py index 92f8ff4b..1d249190 100644 --- a/xicbatch/tools/xicbatch/draw_results.py +++ b/xicbatch/tools/xicbatch/draw_results.py @@ -22,6 +22,7 @@ def main(): ggplot(extraction_df, aes(x='rt', y='int', color='filename')) + geom_line() # line plot + labs(x='RT', y='Intensity') + theme(figure_size=(30,20)) ) p.save(os.path.join(args.output_folder, "merged.png")) From 233371b6305fd6c099832116fd33557efc6f2dd8 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 8 Feb 2021 16:01:22 -0800 Subject: [PATCH 4/8] Drawing v2 --- xicbatch/tools/xicbatch/calculate_xic.py | 2 +- xicbatch/tools/xicbatch/draw_results.py | 24 +++++++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/xicbatch/tools/xicbatch/calculate_xic.py b/xicbatch/tools/xicbatch/calculate_xic.py index 505b898b..eb987b57 100644 --- a/xicbatch/tools/xicbatch/calculate_xic.py +++ b/xicbatch/tools/xicbatch/calculate_xic.py @@ -63,7 +63,7 @@ def main(): float(args.rt) - float(args.rttol), float(args.rt) + float(args.rttol), args.msaccess_path, str(args.mz)) - xic_df["query"] = "{}:{}".format(mz, rt) + xic_df["query"] = "{}_{}".format(mz, rt) xic_df["filename"] = os.path.basename(filename) integration_value = integrate.trapz(xic_df["int"], x=xic_df["rt"]) diff --git a/xicbatch/tools/xicbatch/draw_results.py b/xicbatch/tools/xicbatch/draw_results.py index 1d249190..62ac51b4 100644 --- a/xicbatch/tools/xicbatch/draw_results.py +++ b/xicbatch/tools/xicbatch/draw_results.py @@ -22,13 +22,31 @@ def main(): ggplot(extraction_df, aes(x='rt', y='int', color='filename')) + geom_line() # line plot + labs(x='RT', y='Intensity') - theme(figure_size=(30,20)) + + theme(figure_size=(30,20)) ) - p.save(os.path.join(args.output_folder, "merged.png")) + p.save(os.path.join(args.output_folder, "merged.png"), limitsize=False) # TODO: Drawing individual per file - + all_filenames = list(set(extraction_df["filename"])) + all_queries = list(set(extraction_df["query"])) + for filename in all_filenames: + for query in all_queries: + output_filename = "{}_{}.png".format(filename, query) + filtered_df = extraction_df[extraction_df["filename"] == filename] + filtered_df = extraction_df[extraction_df["query"] == query] + + p = ( + ggplot(filtered_df, aes(x='rt', y='int')) + + geom_line() # line plot + + labs(x='RT', y='Intensity') + + theme(figure_size=(30,20)) + ) + + p.save(os.path.join(args.output_folder, output_filename), limitsize=False) + + + if __name__ == "__main__": From 7fc21037b0cd1b2f316053f475b4ec518e9d56cf Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 8 Feb 2021 16:34:10 -0800 Subject: [PATCH 5/8] updating batch --- xicbatch/tools/xicbatch/calculate_xic.py | 49 ++++++++++++++++++++++-- xicbatch/xicbatch/binding.xml | 5 +++ xicbatch/xicbatch/flow.xml | 3 ++ xicbatch/xicbatch/result.xml | 44 ++++++++++++++++++++- xicbatch/xicbatch/tool.xml | 3 ++ 5 files changed, 100 insertions(+), 4 deletions(-) diff --git a/xicbatch/tools/xicbatch/calculate_xic.py b/xicbatch/tools/xicbatch/calculate_xic.py index eb987b57..385fa246 100644 --- a/xicbatch/tools/xicbatch/calculate_xic.py +++ b/xicbatch/tools/xicbatch/calculate_xic.py @@ -33,12 +33,38 @@ def calculate_xic(filename, mz, rt, mz_tolerance, rt_min, rt_max, msaccess_path, return xic_df +def calculate_ms2(filename, mz, rt, mz_tolerance, rt_min, rt_max, msaccess_path, feature_name): + temp_result_folder = os.path.join(str(uuid.uuid4())) + + command = 'export LC_ALL=C && {} {} -o {} -x "spectrum_table delimiter=tab" --filter "mzPrecursors [{}] mzTol={} Da" --filter "msLevel 2" --filter "scanTime ["{},{}"]"'.format( + msaccess_path, filename, temp_result_folder, mz, mz_tolerance, rt_min * 60, rt_max * 60) + + os.system(command) + + result_filename = glob.glob(os.path.join(temp_result_folder, "*"))[0] + result_df = pd.read_csv(result_filename, sep="\t", skiprows=1) + + formatted_df = pd.DataFrame() + formatted_df["scan"] = result_df["id"].apply(lambda x: x.split(".")[-1]) + formatted_df["filename"] = os.path.basename(filename) + formatted_df["rt"] = result_df["rt"] / 60.0 + formatted_df["tic"] = result_df["TIC"] + formatted_df["precursorMZ"] = result_df["precursorMZ"] + + print(formatted_df) + + # Remove temp folder + shutil.rmtree(temp_result_folder) + + return formatted_df + def main(): parser = argparse.ArgumentParser(description='Creating XIC') parser.add_argument('input_folder', help='input_mgf') parser.add_argument('output_results', help='output_results') parser.add_argument('extraction_results', help='extraction_results') + parser.add_argument('ms2_extraction_results', help='ms2_extraction_results') parser.add_argument('msaccess_path', help='msaccess_path') parser.add_argument('--mz', default=None, help='mz') parser.add_argument('--rt', default=None, help='rt') @@ -47,12 +73,11 @@ def main(): args = parser.parse_args() - print(args) - all_input_files = glob.glob(os.path.join(args.input_folder, "*")) output_list = [] output_full_xic = [] + output_ms2 = [] for filename in all_input_files: mz = float(args.mz) rt = float(args.rt) @@ -63,6 +88,16 @@ def main(): float(args.rt) - float(args.rttol), float(args.rt) + float(args.rttol), args.msaccess_path, str(args.mz)) + try: + formatted_df = calculate_ms2(filename, mz, rt, + float(args.mztol), + float(args.rt) - float(args.rttol), + float(args.rt) + float(args.rttol), args.msaccess_path, str(args.mz)) + formatted_df["query"] = "{}_{}".format(mz, rt) + output_ms2.append(formatted_df) + except: + pass + xic_df["query"] = "{}_{}".format(mz, rt) xic_df["filename"] = os.path.basename(filename) @@ -76,12 +111,20 @@ def main(): output_dict["drawing"] = "{}_{}_{}.png".format(os.path.basename(filename), mz, rt) output_full_xic.append(xic_df) + output_list.append(output_dict) results_df = pd.DataFrame(output_list) results_df.to_csv(args.output_results, sep="\t", index=False) pd.concat(output_full_xic).to_csv(args.extraction_results, sep='\t', index=False) - + + try: + pd.concat(output_ms2).to_csv(args.ms2_extraction_results, sep='\t', index=False) + except: + pd.DataFrame().to_csv(args.ms2_extraction_results, sep='\t', index=False) + pass + + if __name__ == "__main__": main() diff --git a/xicbatch/xicbatch/binding.xml b/xicbatch/xicbatch/binding.xml index 8f8520ca..39a48565 100644 --- a/xicbatch/xicbatch/binding.xml +++ b/xicbatch/xicbatch/binding.xml @@ -22,6 +22,7 @@ + @@ -52,6 +53,10 @@ + + + + diff --git a/xicbatch/xicbatch/flow.xml b/xicbatch/xicbatch/flow.xml index edea5b86..9458d153 100644 --- a/xicbatch/xicbatch/flow.xml +++ b/xicbatch/xicbatch/flow.xml @@ -10,11 +10,13 @@ + + @@ -34,6 +36,7 @@ + diff --git a/xicbatch/xicbatch/result.xml b/xicbatch/xicbatch/result.xml index 5152c8a5..3430e629 100644 --- a/xicbatch/xicbatch/result.xml +++ b/xicbatch/xicbatch/result.xml @@ -22,7 +22,7 @@ - + @@ -34,6 +34,48 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xicbatch/xicbatch/tool.xml b/xicbatch/xicbatch/tool.xml index 55de7c21..3ce8d6ea 100644 --- a/xicbatch/xicbatch/tool.xml +++ b/xicbatch/xicbatch/tool.xml @@ -12,12 +12,15 @@ + + + From 6e76d56a254b4b683140231863a0065c000815ec Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 8 Feb 2021 17:30:19 -0800 Subject: [PATCH 6/8] adding linkouts --- xicbatch/tools/xicbatch/draw_results.py | 9 ++++++--- xicbatch/xicbatch/binding.xml | 22 ++++++++++++++++++++++ xicbatch/xicbatch/flow.xml | 21 ++++++++++++++++++++- xicbatch/xicbatch/result.xml | 14 +++++++++++--- 4 files changed, 59 insertions(+), 7 deletions(-) diff --git a/xicbatch/tools/xicbatch/draw_results.py b/xicbatch/tools/xicbatch/draw_results.py index 62ac51b4..5a466734 100644 --- a/xicbatch/tools/xicbatch/draw_results.py +++ b/xicbatch/tools/xicbatch/draw_results.py @@ -22,7 +22,7 @@ def main(): ggplot(extraction_df, aes(x='rt', y='int', color='filename')) + geom_line() # line plot + labs(x='RT', y='Intensity') - + theme(figure_size=(30,20)) + + theme(figure_size=(20,16)) ) p.save(os.path.join(args.output_folder, "merged.png"), limitsize=False) @@ -34,13 +34,16 @@ def main(): for query in all_queries: output_filename = "{}_{}.png".format(filename, query) filtered_df = extraction_df[extraction_df["filename"] == filename] - filtered_df = extraction_df[extraction_df["query"] == query] + filtered_df = filtered_df[filtered_df["query"] == query] + + print(filtered_df) + print(len(filtered_df)) p = ( ggplot(filtered_df, aes(x='rt', y='int')) + geom_line() # line plot + labs(x='RT', y='Intensity') - + theme(figure_size=(30,20)) + + theme(figure_size=(15,10)) ) p.save(os.path.join(args.output_folder, output_filename), limitsize=False) diff --git a/xicbatch/xicbatch/binding.xml b/xicbatch/xicbatch/binding.xml index 39a48565..6efa21b2 100644 --- a/xicbatch/xicbatch/binding.xml +++ b/xicbatch/xicbatch/binding.xml @@ -31,6 +31,20 @@ + + + + + + + + + + + + + + @@ -61,6 +75,14 @@ + + + + + + + + diff --git a/xicbatch/xicbatch/flow.xml b/xicbatch/xicbatch/flow.xml index 9458d153..c6a88e8a 100644 --- a/xicbatch/xicbatch/flow.xml +++ b/xicbatch/xicbatch/flow.xml @@ -26,10 +26,25 @@ + + + + + + + + + + + + + + + - + @@ -37,7 +52,11 @@ + + + + diff --git a/xicbatch/xicbatch/result.xml b/xicbatch/xicbatch/result.xml index 3430e629..25835e1e 100644 --- a/xicbatch/xicbatch/result.xml +++ b/xicbatch/xicbatch/result.xml @@ -32,6 +32,14 @@ + + + + + + + + @@ -40,7 +48,7 @@ - + @@ -49,12 +57,12 @@ - + - + From fd6ed83e13b43d4b62be93b2c8aa0a9f51ca7b2c Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 9 Feb 2021 11:25:50 -0800 Subject: [PATCH 7/8] updating filename --- xicbatch/tools/xicbatch/draw_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xicbatch/tools/xicbatch/draw_results.py b/xicbatch/tools/xicbatch/draw_results.py index 5a466734..e92be6db 100644 --- a/xicbatch/tools/xicbatch/draw_results.py +++ b/xicbatch/tools/xicbatch/draw_results.py @@ -19,7 +19,7 @@ def main(): p = ( - ggplot(extraction_df, aes(x='rt', y='int', color='filename')) + ggplot(extraction_df, aes(x='rt', y='int', color='full_ccms_path')) + geom_line() # line plot + labs(x='RT', y='Intensity') + theme(figure_size=(20,16)) From 1f1392f9896f6e517315fa401030c597addc0b27 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 8 Jun 2021 09:24:59 -0700 Subject: [PATCH 8/8] adding max rt intentity --- xicbatch/tools/xicbatch/calculate_xic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xicbatch/tools/xicbatch/calculate_xic.py b/xicbatch/tools/xicbatch/calculate_xic.py index 385fa246..e2887aa0 100644 --- a/xicbatch/tools/xicbatch/calculate_xic.py +++ b/xicbatch/tools/xicbatch/calculate_xic.py @@ -103,11 +103,17 @@ def main(): integration_value = integrate.trapz(xic_df["int"], x=xic_df["rt"]) + xic_df = xic_df.sort_values(by=['int'], ascending=False) + max_int_rt = xic_df["rt"].iloc[0] + max_int = xic_df["int"].iloc[0] + output_dict = {} output_dict["filename"] = os.path.basename(filename) output_dict["integration_value"] = integration_value output_dict["mz"] = mz output_dict["rt"] = rt + output_dict["max_int_rt"] = max_int_rt + output_dict["max_int"] = max_int output_dict["drawing"] = "{}_{}_{}.png".format(os.path.basename(filename), mz, rt) output_full_xic.append(xic_df)