From 19d84fd5a372f1428e3e5670144881a56e8af8b2 Mon Sep 17 00:00:00 2001 From: Riccardo Massei <118002526+rmassei@users.noreply.github.com> Date: Tue, 22 Oct 2024 13:35:55 +0200 Subject: [PATCH] New Tools: OMERO get and OMERO filter using ezomero (#61) * add new tools (omero get and omero filter) and updated the ci/pr pipeline to add metadat to the dummy set) * use conditionals to more explicitely indicate the id required. * fix annotation part * convert omero_get to omero_get_id * add get_value * add single quotes * Fix bad copy paste * add tests * remove unused f * remove unused function * fix function name * fix spaces * Updated all the scripts and xml files according to previous review * correct indentation of omero_filter and omero_get_id * Update omero_filter.py Fixed the error handling using sys * fixed code and xml from omero_filter for correct linting * correct image name * fixed indentation * fix the regex of omero_get_value and updated the sheed * fixed the regex to handle single number plus comma separated numbers * Fix testing in output and omero_get_value.xml * Correct the cheetah sintax in omero_get_value.xml * Correct the test in omero_get_value.xml - tsv output has 2 rows considering the header * Last corrections to the omero_get_value script to harmonize it with the other code style (writing tsv in write mode and error handling) * add macros * omero import do not expose password * add requirement macros * add host port token * bump * add macros file * fix macro expansion --------- Co-authored-by: Lucille Delisle Co-authored-by: Matthias Bernt --- .github/workflows/ci.yaml | 2 + .github/workflows/pr.yaml | 2 + tools/omero/.shed.yml | 6 +- tools/omero/macros.xml | 47 +++++ tools/omero/omero_filter.py | 77 ++++++++ tools/omero/omero_filter.xml | 115 +++++++++++ tools/omero/omero_get_id.py | 116 ++++++++++++ tools/omero/omero_get_id.xml | 178 ++++++++++++++++++ tools/omero/omero_get_value.py | 98 ++++++++++ tools/omero/omero_get_value.xml | 82 ++++++++ tools/omero/omero_import.xml | 23 +-- tools/omero/omero_metadata_import.xml | 32 +--- tools/omero/omero_roi_import.xml | 32 +--- tools/omero/test-data/output_KV_import.txt | 2 +- .../test-data/output_filter_filename.tsv | 1 + tools/omero/test-data/output_filter_tag.tsv | 1 + tools/omero/test-data/output_ids_dataset.tsv | 1 + tools/omero/test-data/output_ids_image.tsv | 2 + tools/omero/test-data/output_ids_project.tsv | 1 + 19 files changed, 747 insertions(+), 71 deletions(-) create mode 100644 tools/omero/macros.xml create mode 100644 tools/omero/omero_filter.py create mode 100644 tools/omero/omero_filter.xml create mode 100644 tools/omero/omero_get_id.py create mode 100644 tools/omero/omero_get_id.xml create mode 100644 tools/omero/omero_get_value.py create mode 100644 tools/omero/omero_get_value.xml create mode 100644 tools/omero/test-data/output_filter_filename.tsv create mode 100644 tools/omero/test-data/output_filter_tag.tsv create mode 100644 tools/omero/test-data/output_ids_dataset.tsv create mode 100644 tools/omero/test-data/output_ids_image.tsv create mode 100644 tools/omero/test-data/output_ids_project.tsv diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4a140ee5..e3d139d2 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -188,6 +188,8 @@ jobs: DID=$(omero obj new Dataset name='test_dts') omero obj new ProjectDatasetLink parent=$PID child=$DID omero import -d $DID .github/dummy-dts-omero + omero tag create --name test_tag --desc 'description of my_tag' + omero tag link Image:1 1 echo "Created the dummy dataset into OMERO" # download or create large test data via script diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 908d4530..40ba89f7 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -329,6 +329,8 @@ jobs: DID=$(omero obj new Dataset name='test_dts') omero obj new ProjectDatasetLink parent=$PID child=$DID omero import -d $DID .github/dummy-dts-omero + omero tag create --name test_tag --desc 'description of my_tag' + omero tag link Image:1 1 echo "Created the dummy dataset into OMERO" # download or create large test data via script diff --git a/tools/omero/.shed.yml b/tools/omero/.shed.yml index 70fa0277..4e458f33 100644 --- a/tools/omero/.shed.yml +++ b/tools/omero/.shed.yml @@ -1,8 +1,8 @@ categories: - Imaging name: omero_upload -description: Import images, region of interest, metadata into an OMERO.server using omero-py -long_description: Tool to import and link different objects into OMERO +description: Interact with an OMERO.server using omero-py and ezomero. +long_description: This set of tools allows to import images and metadata into an OMERO.server. owner: ufz remote_repository_url: https://github.com/Helmholtz-UFZ/galaxy-tools/tree/main/tools/omero homepage_url: https://github.com/ome/omero-py/ @@ -13,4 +13,4 @@ suite: name: "suite_omero_py" description: "A suite of tools that brings the omero-py and ezomero project into Galaxy." long_description: | - OMERO.py provides an interface to the OMERO.blitz server. + OMERO.py and ezomero provides an interface to the OMERO.blitz server. diff --git a/tools/omero/macros.xml b/tools/omero/macros.xml new file mode 100644 index 00000000..644a357c --- /dev/null +++ b/tools/omero/macros.xml @@ -0,0 +1,47 @@ + + + 5.18.0 + 3.0.1 + 23.0 + + + + ezomero + pandas + + + + + + + omero-py + + openjdk + + + + + + + + ^[a-zA-Z0-9._-]*$ + '..' not in value + + + + + + + --host '$omero_host' + --port $omero_port + + + + + + \ No newline at end of file diff --git a/tools/omero/omero_filter.py b/tools/omero/omero_filter.py new file mode 100644 index 00000000..40d9d4be --- /dev/null +++ b/tools/omero/omero_filter.py @@ -0,0 +1,77 @@ +import argparse +import csv +import json +import sys + +import ezomero as ez + + +def filter_ids_ezo(user, pws, host, port, filter, id, value1, value2=None, tsv_file="filter_list.tsv"): + + # Transform the id input in a list of integer + id = id.split(',') + id = list(map(int, id)) + + # Function to write tabular file from the ezomero output + def write_ids_to_tsv(data): + with open(tsv_file, 'w', newline='') as f: + writer = csv.writer(f, delimiter='\t') + for item in data: + writer.writerow([item]) # Write each ID + + with ez.connect(user, pws, "", host, port, secure=True) as conn: + + if filter == "filename": + fn_ids = ez.filter_by_filename(conn, id, value1) + write_ids_to_tsv(fn_ids) + return fn_ids + + elif filter == "KP": + kp_ims = ez.filter_by_kv(conn, id, value1, value2) + write_ids_to_tsv(kp_ims) + return kp_ims + + elif filter == "tag": + tg_dict = ez.filter_by_tag_value(conn, id, value1) + write_ids_to_tsv(tg_dict) + return tg_dict + + else: + sys.exit(f"Unsupported object type: {filter}") + + +# Argument parsing +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Fetch and save data as TSV based on object type.") + parser.add_argument("--credential-file", dest="credential_file", type=str, required=True, + help="Credential file (JSON file with username and password for OMERO)") + parser.add_argument('--host', required=True, + help="Host server address.") + parser.add_argument('--port', required=True, type=int, + help='OMERO port') + parser.add_argument('--filter', required=True, + help="Filter type - Filename, Key-Value Pairs, Tag") + parser.add_argument('--id', required=True, + help="List of images IDs") + parser.add_argument('--value1', required=True, + help="First searching values - Filename, Key, Tag") + parser.add_argument('--value2', required=False, + help="Second searching values - Value (necessary just for Key-Value Pairs filter") + parser.add_argument('--tsv_file', default='filter_list.tsv', + help="Output TSV file path.") + args = parser.parse_args() + + if args.filter == "KP" and args.value2 is None: + raise ValueError("'--value 2' is necessary to retrieve KP") + + with open(args.credential_file, 'r') as f: + crds = json.load(f) + + # Call the main function to get the object and save it as a TSV + filter_ids_ezo(user=crds['username'], pws=crds['password'], host=args.host, + port=args.port, + filter=args.filter, + value1=args.value1, + value2=args.value2, + id=args.id, + tsv_file=args.tsv_file) diff --git a/tools/omero/omero_filter.xml b/tools/omero/omero_filter.xml new file mode 100644 index 00000000..e8565082 --- /dev/null +++ b/tools/omero/omero_filter.xml @@ -0,0 +1,115 @@ + + with ezomero + + macros.xml + 0 + + + omero + + + + + + + + + + + + + + + + + ^[\w\-. ]+$ + + + + ^\d+(,\d+)*$ + + + + + ^[\w\-. ]+$ + + + + ^\d+(,\d+)*$ + + + + + ^[\w\-. ]+$ + + + + ^(\d+)(,\d+)*$ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Description +----------- + +Tool to filter images IDs by filename, Key-Value Pairs and Tag value. +For Key-Value Pairs search, two values are required (Value1 = Key, Value2 = Pair). +IDs are a list of image IDs which can be fetched using the omero_get tool. + + + + 10.1038/nmeth.1896 + + \ No newline at end of file diff --git a/tools/omero/omero_get_id.py b/tools/omero/omero_get_id.py new file mode 100644 index 00000000..4ecc3eea --- /dev/null +++ b/tools/omero/omero_get_id.py @@ -0,0 +1,116 @@ +import argparse +import csv +import json +import sys + +import ezomero as ez + + +def get_ids_ezo(user, pws, host, port, final_obj_type, parent_obj_type, parent_id=None, tsv_file="id_list.tsv"): + + # Function to write tabular file from the ezomero output + def write_ids_to_tsv(data): + with open(tsv_file, 'w', newline='') as f: + writer = csv.writer(f, delimiter='\t') + for item in data: + writer.writerow([item]) # Write each ID + + with ez.connect(user, pws, "", host, port, secure=True) as conn: + + if final_obj_type == "Project": + proj_ids = ez.get_project_ids(conn) + write_ids_to_tsv(proj_ids) + return proj_ids + + elif final_obj_type == "Dataset": + args = {'project': None} + if parent_obj_type == "Project": + args['project'] = parent_id + ds_ids = ez.get_dataset_ids(conn, **args) + write_ids_to_tsv(ds_ids) + return ds_ids + + elif final_obj_type == "Image": + args = { + 'project': None, + 'dataset': None, + 'plate': None, + 'well': None + } + if parent_obj_type == "Project": + args['project'] = parent_id + elif parent_obj_type == "Dataset": + args['dataset'] = parent_id + elif parent_obj_type == "Plate": + args['plate'] = parent_id + elif parent_obj_type == "Well": + args['well'] = parent_id + elif parent_obj_type != "All": + raise ValueError("Object set as parent_obj_type is not compatible") + + ds_ims = ez.get_image_ids(conn, **args) + write_ids_to_tsv(ds_ims) + return ds_ims + + elif final_obj_type == "Annotation": + map_annot_ids = ez.get_map_annotation_ids(conn, parent_obj_type, parent_id) + write_ids_to_tsv(map_annot_ids) + return map_annot_ids + + elif final_obj_type == "Tag": + tag_ids = ez.get_tag_ids(conn, parent_obj_type, parent_id) + write_ids_to_tsv(tag_ids) + return tag_ids + + elif final_obj_type == "Roi": + roi_ids = ez.get_roi_ids(conn, parent_id) + write_ids_to_tsv(roi_ids) + return roi_ids + + elif final_obj_type == "Table": + file_ann_ids = ez.get_file_annotation_ids(conn, parent_obj_type, parent_id) + write_ids_to_tsv(file_ann_ids) + return file_ann_ids + + else: + sys.exit(f"Unsupported object type: {filter}") + + +# Argument parsing +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Fetch OMERO object IDs as TSV from parent object.") + parser.add_argument("--credential-file", dest="credential_file", type=str, + required=True, help="Credential file (JSON file with username and password for OMERO)") + parser.add_argument('--host', required=True, + help="Host server address.") + parser.add_argument('--port', required=True, type=int, + help='OMERO port') + parser.add_argument('--final_obj_type', required=True, + help="Type of object to fetch ID: Project, Dataset, Image, Annotation, Tag, Roi, or Table.") + parser.add_argument('--parent_obj_type', required=True, + help="Type of object from which you fetch IDs: Project, Dataset, Plate, Well, Image (or 'All' if you want to get all objects).") + parser.add_argument('--parent_id', required=False, type=int, + help="ID of the OMERO object in `--parent_obj_type`, not required if you used `--parent_obj_type All`.") + parser.add_argument('--tsv_file', default='id_list.tsv', + help="Output TSV file path.") + args = parser.parse_args() + + if args.parent_id is None and args.parent_obj_type != "All": + raise ValueError("ID is only optional is you use `--parent_obj_type All`") + + if args.final_obj_type == "Roi" and args.parent_obj_type != "Image": + raise ValueError("Roi IDs can only be retrived from images, use `--parent_obj_type Image`") + + if args.parent_obj_type == "All" and args.final_obj_type not in ["Image", "Dataset", "Project"]: + raise ValueError("Only Images, Datasets and Projects is compatible with `--parent_obj_type All`") + + with open(args.credential_file, 'r') as f: + crds = json.load(f) + + # Call the main function to get the object and save it as a TSV + get_ids_ezo(user=crds['username'], pws=crds['password'], host=args.host, + port=args.port, + final_obj_type=args.final_obj_type, + parent_obj_type=args.parent_obj_type, + parent_id=args.parent_id, + tsv_file=args.tsv_file) diff --git a/tools/omero/omero_get_id.xml b/tools/omero/omero_get_id.xml new file mode 100644 index 00000000..7faedcc4 --- /dev/null +++ b/tools/omero/omero_get_id.xml @@ -0,0 +1,178 @@ + + with ezomero + + macros.xml + 0 + + + omero + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Description +----------- + +Tool to fetch project, dataset, images, annotations, tags, table and ROIs IDs user defined OMERO.server. + +Options: +Project -> Project IDs present in the OMERO.server +Dataset -> Dataset IDs present in a specific Project or in the server +Image -> Image IDs present in a specific Dataset or Project or Well or Plate or in the server +Annotation (Key-Value Pairs) -> Annotation IDs linked to an Image or Dataset or Project or Well or Plate +Tag -> Tag IDs linked to an Image or Dataset or Project or Well or Plate +ROI -> ROI IDs linked to an Image +Table -> Table linked to an Image or Dataset or Project or Well or Plate + + + + 10.1038/nmeth.1896 + + \ No newline at end of file diff --git a/tools/omero/omero_get_value.py b/tools/omero/omero_get_value.py new file mode 100644 index 00000000..2765b7dd --- /dev/null +++ b/tools/omero/omero_get_value.py @@ -0,0 +1,98 @@ +import argparse +import csv +import json +import sys + +import ezomero as ez + + +def get_object_ezo(user, pws, host, port, obj_type, ids, tsv_file): + # Function to write tabular file from the ezomero output + def write_values_to_tsv(data, header): + with open(tsv_file, 'w', newline='') as f: + writer = csv.writer(f, delimiter='\t') + writer.writerow([header]) # Write the header + for item in data: + writer.writerow([item]) # Write each value + + # Function to write tabular file from a dictionary ezomero output + def write_dict_to_tsv(data, headers): + with open(tsv_file, 'w', newline='') as f: + writer = csv.writer(f, delimiter='\t') + writer.writerow(headers) # Write the headers + for key, value in data.items(): + writer.writerow([key, value]) # Write each key-value pair + + # Function to write tabular file from list of list ezomero output + def write_table_to_tsv(data): + with open(tsv_file, 'w') as f: + for row in data: + f.write('\t'.join([str(val) for val in row]) + '\n') + + with ez.connect(user, pws, "", host, port, secure=True) as conn: + if obj_type == "Annotation": + ma_dict = {} + for maid in ids: + current_ma_dict = ez.get_map_annotation(conn, maid) + ma_dict = {**ma_dict, **current_ma_dict} + write_dict_to_tsv(ma_dict, ["Annotation ID", "Annotation Value"]) + return ma_dict + elif obj_type == "Tag": + tags = [] + for tag_id in ids: + tags.append(ez.get_tag(conn, tag_id)) + # Sort the tags for consistency: + tags.sort + write_values_to_tsv(tags, "Tags") + return tags + elif obj_type == "Table": + if len(ids) > 1: + raise ValueError("Only one table can be exported at a time") + table = ez.get_table(conn, ids[0]) + write_table_to_tsv(table) + return table + + else: + sys.exit(f"Unsupported object type: {filter}") + + +# Argument parsing +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Fetch and save data as TSV based on object type.") + parser.add_argument("--credential-file", dest="credential_file", type=str, + required=True, help="Credential file (JSON file with username and password for OMERO)") + parser.add_argument('--host', required=True, + help="Host server address.") + parser.add_argument('--port', required=True, type=int, + help='OMERO port') + parser.add_argument('--obj_type', required=True, + help="Type of object to fetch: Annotation, Table or Tag.") + group = parser.add_mutually_exclusive_group() + group.add_argument('--ids', nargs='+', type=int, + help="IDs of the OMERO objects.") + group.add_argument('--ids_path', + help="File with IDs of the OMERO objects (one per line).") + parser.add_argument('--tsv_file', default='id_list.tsv', required=True, + help="Output TSV file path.") + args = parser.parse_args() + + if args.ids_path: + args.ids = [] + with open(args.ids_path, 'r') as f: + for line in f: + try: + args.ids.append(int(line)) + except ValueError: + print(f"{line.strip()} is not a valid ID.") + if len(args.ids) == 0: + raise ValueError("Cound not find a single ID in the file.") + + with open(args.credential_file, 'r') as f: + crds = json.load(f) + + # Call the main function to get the object and save it as a TSV + get_object_ezo(user=crds['username'], pws=crds['password'], host=args.host, + port=args.port, + obj_type=args.obj_type, + ids=args.ids, + tsv_file=args.tsv_file) diff --git a/tools/omero/omero_get_value.xml b/tools/omero/omero_get_value.xml new file mode 100644 index 00000000..feb022c1 --- /dev/null +++ b/tools/omero/omero_get_value.xml @@ -0,0 +1,82 @@ + + with ezomero + + macros.xml + 0 + + + omero + + + + + + + + + + + + + + + + + + + + + ^(\d+)(,\d+)*$ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Description +----------- + +Tool to fetch Annotation, Tag and Tables from IDs. + +The IDs can be obtained with the tool OMERO get IDs with ezomero + + + + 10.1038/nmeth.1896 + + \ No newline at end of file diff --git a/tools/omero/omero_import.xml b/tools/omero/omero_import.xml index 20247657..d83ed7bd 100644 --- a/tools/omero/omero_import.xml +++ b/tools/omero/omero_import.xml @@ -1,17 +1,13 @@ - + with omero-py - 5.18.0 - 2 + macros.xml + 3 omero - - omero-py - - openjdk - + $log && omero logout @@ -39,15 +34,9 @@ export OMERO_PASSWORD="$__user__.extra_preferences.get('omero_account|password', ]]> + - - ^[a-zA-Z0-9._-]*$ - '..' not in value - - - - diff --git a/tools/omero/omero_metadata_import.xml b/tools/omero/omero_metadata_import.xml index 590b4a69..9551f017 100644 --- a/tools/omero/omero_metadata_import.xml +++ b/tools/omero/omero_metadata_import.xml @@ -1,24 +1,17 @@ - + with ezomero - 5.18.0 - 2 + macros.xml + 3 omero - - ezomero - pandas - - openjdk - + - + - - ^[a-zA-Z0-9._-]*$ - '..' not in value - - + @@ -64,8 +48,6 @@ - - diff --git a/tools/omero/omero_roi_import.xml b/tools/omero/omero_roi_import.xml index 333fd813..6a12864d 100644 --- a/tools/omero/omero_roi_import.xml +++ b/tools/omero/omero_roi_import.xml @@ -1,46 +1,28 @@ - + with ezomero - 5.18.0 - 3 + macros.xml + 4 omero - - ezomero - pandas - - openjdk - + - + + - - ^[a-zA-Z0-9._-]*$ - '..' not in value - - - - diff --git a/tools/omero/test-data/output_KV_import.txt b/tools/omero/test-data/output_KV_import.txt index 44a790c8..028e491c 100644 --- a/tools/omero/test-data/output_KV_import.txt +++ b/tools/omero/test-data/output_KV_import.txt @@ -1 +1 @@ -SUCCESS: Successfully uploaded metadata for dataset with ID 2. Result: {'Key1': 'Value1', 'Key2': 'Value2'} +SUCCESS: Successfully uploaded metadata for dataset with ID 3. Result: {'Key1': 'Value1', 'Key2': 'Value2'} diff --git a/tools/omero/test-data/output_filter_filename.tsv b/tools/omero/test-data/output_filter_filename.tsv new file mode 100644 index 00000000..0cfbf088 --- /dev/null +++ b/tools/omero/test-data/output_filter_filename.tsv @@ -0,0 +1 @@ +2 diff --git a/tools/omero/test-data/output_filter_tag.tsv b/tools/omero/test-data/output_filter_tag.tsv new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/tools/omero/test-data/output_filter_tag.tsv @@ -0,0 +1 @@ +1 diff --git a/tools/omero/test-data/output_ids_dataset.tsv b/tools/omero/test-data/output_ids_dataset.tsv new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/tools/omero/test-data/output_ids_dataset.tsv @@ -0,0 +1 @@ +1 diff --git a/tools/omero/test-data/output_ids_image.tsv b/tools/omero/test-data/output_ids_image.tsv new file mode 100644 index 00000000..1191247b --- /dev/null +++ b/tools/omero/test-data/output_ids_image.tsv @@ -0,0 +1,2 @@ +1 +2 diff --git a/tools/omero/test-data/output_ids_project.tsv b/tools/omero/test-data/output_ids_project.tsv new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/tools/omero/test-data/output_ids_project.tsv @@ -0,0 +1 @@ +1