New Tools: OMERO get and OMERO filter using ezomero (#61)

* add new tools (omero get and omero filter) and updated the ci/pr pipeline to add metadat to the dummy set) * use conditionals to more explicitely indicate the id required. * fix annotation part * convert omero_get to omero_get_id * add get_value * add single quotes * Fix bad copy paste * add tests * remove unused f * remove unused function * fix function name * fix spaces * Updated all the scripts and xml files according to previous review * correct indentation of omero_filter and omero_get_id * Update omero_filter.py Fixed the error handling using sys * fixed code and xml from omero_filter for correct linting * correct image name * fixed indentation * fix the regex of omero_get_value and updated the sheed * fixed the regex to handle single number plus comma separated numbers * Fix testing in output and omero_get_value.xml * Correct the cheetah sintax in omero_get_value.xml * Correct the test in omero_get_value.xml - tsv output has 2 rows considering the header * Last corrections to the omero_get_value script to harmonize it with the other code style (writing tsv in write mode and error handling) * add macros * omero import do not expose password * add requirement macros * add host port token * bump * add macros file * fix macro expansion --------- Co-authored-by: Lucille Delisle <[email protected]> Co-authored-by: Matthias Bernt <[email protected]>
Helmholtz-UFZ · Oct 22, 2024 · 19d84fd · 19d84fd
1 parent d99e4d2
commit 19d84fd
Show file tree

Hide file tree

Showing 19 changed files with 747 additions and 71 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -188,6 +188,8 @@ jobs:
         DID=$(omero obj new Dataset name='test_dts')
         omero obj new ProjectDatasetLink parent=$PID child=$DID
         omero import -d $DID .github/dummy-dts-omero
+        omero tag create --name test_tag --desc 'description of my_tag'
+        omero tag link Image:1 1
         echo "Created the dummy dataset into OMERO"
 
     # download or create large test data via script

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -329,6 +329,8 @@ jobs:
         DID=$(omero obj new Dataset name='test_dts')
         omero obj new ProjectDatasetLink parent=$PID child=$DID
         omero import -d $DID .github/dummy-dts-omero
+        omero tag create --name test_tag --desc 'description of my_tag'
+        omero tag link Image:1 1
         echo "Created the dummy dataset into OMERO"
 
     # download or create large test data via script

diff --git a/tools/omero/.shed.yml b/tools/omero/.shed.yml
@@ -1,8 +1,8 @@
 categories:
 - Imaging
 name: omero_upload
-description: Import images, region of interest, metadata into an OMERO.server using omero-py
-long_description: Tool to import and link different objects into OMERO
+description: Interact with an OMERO.server using omero-py and ezomero.
+long_description:  This set of tools allows to import images and metadata into an OMERO.server.
 owner: ufz
 remote_repository_url: https://github.com/Helmholtz-UFZ/galaxy-tools/tree/main/tools/omero
 homepage_url: https://github.com/ome/omero-py/
@@ -13,4 +13,4 @@ suite:
   name: "suite_omero_py"
   description: "A suite of tools that brings the omero-py and ezomero project into Galaxy."
   long_description: |
-    OMERO.py provides an interface to the OMERO.blitz server.
+    OMERO.py and ezomero provides an interface to the OMERO.blitz server.
diff --git a/tools/omero/macros.xml b/tools/omero/macros.xml
@@ -0,0 +1,47 @@
+<macros>
+    <!-- for historic reasons the omero-py version is used as the version for all tools -->
+    <token name="@TOOL_VERSION@">5.18.0</token>
+    <token name="@EZOMERO_VERSION@">3.0.1</token>
+    <token name="@PROFILE@">23.0</token>
+
+    <xml name="ezomero_requirements">
+        <requirements>
+            <requirement type="package" version="@EZOMERO_VERSION@">ezomero</requirement>
+            <requirement type="package" version="2.2.2">pandas</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+
+    <xml name="omeropy_requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">omero-py</requirement>
+            <!-- openjdk is needed: https://github.com/conda-forge/omero-py-feedstock/pull/16 -->
+            <requirement type="package" version="21.0.2">openjdk</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+
+
+    <xml name="host_port">
+        <param name="omero_host" type="text" label="OMERO host URL">
+            <validator type="regex" message="Enter a valid host location, for example, your.omero.server">^[a-zA-Z0-9._-]*$</validator>
+            <validator type="expression" message="No two dots (..) allowed">'..' not in value</validator>
+        </param>
+        <param argument="omero_port" type="integer" optional="false" value="4064" label="OMERO port"/>
+        <param name="test_username" type="hidden" value=""/>
+        <param name="test_password" type="hidden" value=""/>
+    </xml>
+    <token name="@HOST_PORT@">
+        --host '$omero_host'
+        --port $omero_port
+    </token>
+
+    <xml name="credentials">
+        <configfile name="credentials"><![CDATA[
+{
+    "username": "$__user__.extra_preferences.get('omero_account|username', $test_username)",
+    "password": "$__user__.extra_preferences.get('omero_account|password', $test_password)"
+}
+        ]]></configfile>
+    </xml>
+</macros>
diff --git a/tools/omero/omero_filter.py b/tools/omero/omero_filter.py
@@ -0,0 +1,77 @@
+import argparse
+import csv
+import json
+import sys
+
+import ezomero as ez
+
+
+def filter_ids_ezo(user, pws, host, port, filter, id, value1, value2=None, tsv_file="filter_list.tsv"):
+
+    # Transform the id input in a list of integer
+    id = id.split(',')
+    id = list(map(int, id))
+
+    # Function to write tabular file from the ezomero output
+    def write_ids_to_tsv(data):
+        with open(tsv_file, 'w', newline='') as f:
+            writer = csv.writer(f, delimiter='\t')
+            for item in data:
+                writer.writerow([item])  # Write each ID
+
+    with ez.connect(user, pws, "", host, port, secure=True) as conn:
+
+        if filter == "filename":
+            fn_ids = ez.filter_by_filename(conn, id, value1)
+            write_ids_to_tsv(fn_ids)
+            return fn_ids
+
+        elif filter == "KP":
+            kp_ims = ez.filter_by_kv(conn, id, value1, value2)
+            write_ids_to_tsv(kp_ims)
+            return kp_ims
+
+        elif filter == "tag":
+            tg_dict = ez.filter_by_tag_value(conn, id, value1)
+            write_ids_to_tsv(tg_dict)
+            return tg_dict
+
+        else:
+            sys.exit(f"Unsupported object type: {filter}")
+
+
+# Argument parsing
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Fetch and save data as TSV based on object type.")
+    parser.add_argument("--credential-file", dest="credential_file", type=str, required=True,
+                        help="Credential file (JSON file with username and password for OMERO)")
+    parser.add_argument('--host', required=True,
+                        help="Host server address.")
+    parser.add_argument('--port', required=True, type=int,
+                        help='OMERO port')
+    parser.add_argument('--filter', required=True,
+                        help="Filter type - Filename, Key-Value Pairs, Tag")
+    parser.add_argument('--id', required=True,
+                        help="List of images IDs")
+    parser.add_argument('--value1', required=True,
+                        help="First searching values - Filename, Key, Tag")
+    parser.add_argument('--value2', required=False,
+                        help="Second searching values - Value (necessary just for Key-Value Pairs filter")
+    parser.add_argument('--tsv_file', default='filter_list.tsv',
+                        help="Output TSV file path.")
+    args = parser.parse_args()
+
+    if args.filter == "KP" and args.value2 is None:
+        raise ValueError("'--value 2' is necessary to retrieve KP")
+
+    with open(args.credential_file, 'r') as f:
+        crds = json.load(f)
+
+    # Call the main function to get the object and save it as a TSV
+    filter_ids_ezo(user=crds['username'], pws=crds['password'], host=args.host,
+                   port=args.port,
+                   filter=args.filter,
+                   value1=args.value1,
+                   value2=args.value2,
+                   id=args.id,
+                   tsv_file=args.tsv_file)
diff --git a/tools/omero/omero_filter.xml b/tools/omero/omero_filter.xml
@@ -0,0 +1,115 @@
+<tool id="omero_filter" name="OMERO IDs" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
+    <description> with ezomero </description>
+    <macros>
+        <import>macros.xml</import>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <xrefs>
+        <xref type="bio.tools">omero</xref>
+    </xrefs>
+    <expand macro="ezomero_requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+        python '$__tool_directory__'/omero_filter.py
+        --credential-file '$credentials'
+        @HOST_PORT@
+        --filter $filter
+        --value1 '$value1'
+        --id $did
+        --tsv_file '$tsv'
+        #if $filter == "KP"
+        --value2 '$value2'
+        #end if
+
+    ]]></command>
+    <configfiles>
+        <expand macro="credentials"/>
+    </configfiles>
+    <inputs>
+        <expand macro="host_port"/>
+        <conditional name = "filter_type">
+            <param name="filter" type="select" optional="false" label="Filter type to apply:">
+                <option value="filename">Filename</option>
+                <option value="KP">Key-Value</option>
+                <option value="tag">Tag</option>
+            </param>
+            <when value="filename">
+                <param name="value1" type="text" label="Filename to search among the image IDs">
+                    <validator type="regex" message="Enter a valid filename to search in the OMERO server">^[\w\-. ]+$</validator>
+                </param>
+                <param name="value2"  value="" type="hidden" label="Not necessary filter"/>
+                <param name="did" type="text" label="List of images IDs">
+                    <validator type="regex" message="Enter a valid list of IDs (i.e. 2,45,56,67)">^\d+(,\d+)*$</validator>
+                </param>
+            </when>
+            <when value="KP">
+                <param name="value1" type="text" label="Key Value to search among the image IDs">
+                    <validator type="regex" message="Enter a valid Key to search in the OMERO server">^[\w\-. ]+$</validator>
+                </param>
+                <param name="value2" type="hidden" label="Pair Values to search among images IDs"/>
+                <param name="did" type="text" label="List of images IDs">
+                    <validator type="regex" message="Enter a valid list of IDs (i.e. 2,45,56,67)">^\d+(,\d+)*$</validator>
+                </param>
+            </when>
+            <when value="tag">
+                <param name="value1" type="text" label="Tag to search among the images IDs">
+                    <validator type="regex" message="Enter a valid Key to search in the OMERO server">^[\w\-. ]+$</validator>
+                </param>
+                <param name="value2"  value="" optional="true" type="hidden" label="Not necessary filter"/>
+                <param name="did" type="text" label="List of images IDs">
+                    <validator type="regex" message="Enter a valid list of IDs (i.e. 2,45,56,67)">^(\d+)(,\d+)*$</validator>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="tsv" format="tabular"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="omero_host" value="host.docker.internal"/>
+            <param name="omero_port" value="6064"/>
+            <conditional name="filter_type">
+                <param name="filter" value="filename"/>
+                <param name="value1" value="sample_image_2.jpg"/>
+                <param name="did" value="1,2"/>
+            </conditional>
+            <param name="test_username" value="root"/>
+            <param name="test_password" value="omero"/>
+            <output name="tsv" value="output_filter_filename.tsv" ftype="tabular">
+                <assert_contents>
+                    <has_text text="2"/>
+                    <has_n_columns n="1"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="omero_host" value="host.docker.internal"/>
+            <param name="omero_port" value="6064"/>
+            <conditional name="filter_type">
+                <param name="filter" value="tag"/>
+                <param name="value1" value="test_tag"/>
+                <param name="did" value="1,2"/>
+            </conditional>
+            <param name="test_username" value="root"/>
+            <param name="test_password" value="omero"/>
+            <output name="tsv" value="output_filter_tag.tsv" ftype="tabular">
+                <assert_contents>
+                    <has_text text="1"/>
+                    <has_n_columns n="1"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+Description
+-----------
+
+Tool to filter images IDs by filename, Key-Value Pairs and Tag value.
+For Key-Value Pairs search, two values are required (Value1 = Key, Value2 = Pair).
+IDs are a list of image IDs which can be fetched using the omero_get tool.
+
+    </help>
+    <citations>
+        <citation type="doi">10.1038/nmeth.1896</citation>
+    </citations>
+</tool>
diff --git a/tools/omero/omero_get_id.py b/tools/omero/omero_get_id.py
@@ -0,0 +1,116 @@
+import argparse
+import csv
+import json
+import sys
+
+import ezomero as ez
+
+
+def get_ids_ezo(user, pws, host, port, final_obj_type, parent_obj_type, parent_id=None, tsv_file="id_list.tsv"):
+
+    # Function to write tabular file from the ezomero output
+    def write_ids_to_tsv(data):
+        with open(tsv_file, 'w', newline='') as f:
+            writer = csv.writer(f, delimiter='\t')
+            for item in data:
+                writer.writerow([item])  # Write each ID
+
+    with ez.connect(user, pws, "", host, port, secure=True) as conn:
+
+        if final_obj_type == "Project":
+            proj_ids = ez.get_project_ids(conn)
+            write_ids_to_tsv(proj_ids)
+            return proj_ids
+
+        elif final_obj_type == "Dataset":
+            args = {'project': None}
+            if parent_obj_type == "Project":
+                args['project'] = parent_id
+            ds_ids = ez.get_dataset_ids(conn, **args)
+            write_ids_to_tsv(ds_ids)
+            return ds_ids
+
+        elif final_obj_type == "Image":
+            args = {
+                'project': None,
+                'dataset': None,
+                'plate': None,
+                'well': None
+            }
+            if parent_obj_type == "Project":
+                args['project'] = parent_id
+            elif parent_obj_type == "Dataset":
+                args['dataset'] = parent_id
+            elif parent_obj_type == "Plate":
+                args['plate'] = parent_id
+            elif parent_obj_type == "Well":
+                args['well'] = parent_id
+            elif parent_obj_type != "All":
+                raise ValueError("Object set as parent_obj_type is not compatible")
+
+            ds_ims = ez.get_image_ids(conn, **args)
+            write_ids_to_tsv(ds_ims)
+            return ds_ims
+
+        elif final_obj_type == "Annotation":
+            map_annot_ids = ez.get_map_annotation_ids(conn, parent_obj_type, parent_id)
+            write_ids_to_tsv(map_annot_ids)
+            return map_annot_ids
+
+        elif final_obj_type == "Tag":
+            tag_ids = ez.get_tag_ids(conn, parent_obj_type, parent_id)
+            write_ids_to_tsv(tag_ids)
+            return tag_ids
+
+        elif final_obj_type == "Roi":
+            roi_ids = ez.get_roi_ids(conn, parent_id)
+            write_ids_to_tsv(roi_ids)
+            return roi_ids
+
+        elif final_obj_type == "Table":
+            file_ann_ids = ez.get_file_annotation_ids(conn, parent_obj_type, parent_id)
+            write_ids_to_tsv(file_ann_ids)
+            return file_ann_ids
+
+        else:
+            sys.exit(f"Unsupported object type: {filter}")
+
+
+# Argument parsing
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Fetch OMERO object IDs as TSV from parent object.")
+    parser.add_argument("--credential-file", dest="credential_file", type=str,
+                        required=True, help="Credential file (JSON file with username and password for OMERO)")
+    parser.add_argument('--host', required=True,
+                        help="Host server address.")
+    parser.add_argument('--port', required=True, type=int,
+                        help='OMERO port')
+    parser.add_argument('--final_obj_type', required=True,
+                        help="Type of object to fetch ID: Project, Dataset, Image, Annotation, Tag, Roi, or Table.")
+    parser.add_argument('--parent_obj_type', required=True,
+                        help="Type of object from which you fetch IDs: Project, Dataset, Plate, Well, Image (or 'All' if you want to get all objects).")
+    parser.add_argument('--parent_id', required=False, type=int,
+                        help="ID of the OMERO object in `--parent_obj_type`, not required if you used `--parent_obj_type All`.")
+    parser.add_argument('--tsv_file', default='id_list.tsv',
+                        help="Output TSV file path.")
+    args = parser.parse_args()
+
+    if args.parent_id is None and args.parent_obj_type != "All":
+        raise ValueError("ID is only optional is you use `--parent_obj_type All`")
+
+    if args.final_obj_type == "Roi" and args.parent_obj_type != "Image":
+        raise ValueError("Roi IDs can only be retrived from images, use `--parent_obj_type Image`")
+
+    if args.parent_obj_type == "All" and args.final_obj_type not in ["Image", "Dataset", "Project"]:
+        raise ValueError("Only Images, Datasets and Projects is compatible with `--parent_obj_type All`")
+
+    with open(args.credential_file, 'r') as f:
+        crds = json.load(f)
+
+    # Call the main function to get the object and save it as a TSV
+    get_ids_ezo(user=crds['username'], pws=crds['password'], host=args.host,
+                port=args.port,
+                final_obj_type=args.final_obj_type,
+                parent_obj_type=args.parent_obj_type,
+                parent_id=args.parent_id,
+                tsv_file=args.tsv_file)