Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Dataset Collections to Object Oriented API #179

Merged
merged 12 commits into from
Jan 13, 2016
86 changes: 85 additions & 1 deletion bioblend/galaxy/objects/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,6 @@ def run(self, input_map=None, history='', params=None, import_inputs=False,
res = self.gi.gi.workflows.run_workflow(self.id, **kwargs)
# res structure: {'history': HIST_ID, 'outputs': [DS_ID, DS_ID, ...]}
out_hist = self.gi.histories.get(res['history'])
assert set(res['outputs']).issubset(out_hist.dataset_ids)
outputs = [out_hist.get_dataset(_) for _ in res['outputs']]

if wait:
Expand Down Expand Up @@ -626,6 +625,63 @@ def delete(self):
self.refresh()


@six.add_metaclass(abc.ABCMeta)
class DatasetCollection(Wrapper):
"""
Abstract base class for Galaxy dataset collections.
"""
BASE_ATTRS = Wrapper.BASE_ATTRS + (
'state', 'deleted'
)
POLLING_INTERVAL = 1 # for state monitoring
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

POLLING_INTERVAL is unused, can be removed.


@abc.abstractmethod
def __init__(self, dsc_dict, container, gi=None):
super(DatasetCollection, self).__init__(dsc_dict, gi=gi)
object.__setattr__(self, 'container', container)

def refresh(self):
"""
Re-fetch the attributes pertaining to this object.

Returns: self
"""
gi_client = getattr(self.gi.gi, self.container.API_MODULE)
dsc_dict = gi_client.show_dataset_collection(self.container.id, self.id)
self.__init__(dsc_dict, self.container, self.gi)
return self


class HistoryDatasetCollectionAssociation(DatasetCollection):
"""
Maps to a Galaxy ``HistoryDatasetCollectionAssociation``.
"""
BASE_ATTRS = DatasetCollection.BASE_ATTRS + ('tags', 'visible', 'elements')
SRC = 'hdca'

def __init__(self, dsc_dict, container, gi=None):
super(HistoryDatasetCollectionAssociation, self).__init__(
dsc_dict, container, gi=gi)

@property
def gi_module(self):
return self.gi.histories

@property
def _stream_url(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method is not used and does not make sense for a dataset collection, please remove.

base_url = self.gi.gi._make_url(
self.gi.gi.histories, module_id=self.container.id, contents=True)
return "%s/%s/display" % (base_url, self.id)

def delete(self):
"""
Delete this dataset collection.
"""
self.gi.gi.histories.delete_dataset_collection(self.container.id, self.id)
self.container.refresh()
self.refresh()


class LibRelatedDataset(Dataset):
"""
Base class for LibraryDatasetDatasetAssociation and LibraryDataset classes.
Expand Down Expand Up @@ -807,6 +863,7 @@ class History(DatasetContainer):
"""
BASE_ATTRS = DatasetContainer.BASE_ATTRS + ('annotation', 'state', 'state_ids', 'state_details', 'tags')
DS_TYPE = HistoryDatasetAssociation
DSC_TYPE = HistoryDatasetCollectionAssociation
CONTENT_INFO_TYPE = HistoryContentInfo
API_MODULE = 'histories'

Expand Down Expand Up @@ -965,6 +1022,33 @@ def download(self, jeha_id, outf, chunk_size=bioblend.CHUNK_SIZE):
return self.gi.gi.histories.download_history(
self.id, jeha_id, outf, chunk_size=chunk_size)

def new_dataset_collection(self, collection_description):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about calling this create_dataset_collection instead? If you agree, please change also the corresponding test name.

"""
Create a new dataset collection in the history by providing the dataset ids.

:type collection_description: bioblend.galaxy.dataset_collections.CollectionDescription
:param collection_description: a description of the dataset collection

:rtype: class:`~.HistoryDatasetCollectionAssociation`
:return: the new dataset collection
"""
dataset_collection = self.gi.gi.histories.create_dataset_collection(self.id, collection_description)
self.refresh()
return self.get_dataset_collection(dataset_collection['id'])

def get_dataset_collection(self, dsc_id):
"""
Retrieve the dataset collection corresponding to the given id.

:type dsc_id: str
:param dsc_id: dataset collection id

:rtype: :class:`~.HistoryDatasetCollectionAssociation`
:return: the dataset collection corresponding to ``dsc_id``
"""
dsc_dict = self.gi.gi.histories.show_dataset_collection(self.id, dsc_id)
return self.DSC_TYPE(dsc_dict, self, gi=self.gi)


class Library(DatasetContainer):
"""
Expand Down
51 changes: 51 additions & 0 deletions tests/TestGalaxyObjects.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import bioblend.galaxy.objects.wrappers as wrappers
import bioblend.galaxy.objects.galaxy_instance as galaxy_instance
from bioblend import ConnectionError
from bioblend.galaxy import dataset_collections as collections
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you not rename the import? I find it easier to grep the whole name.


import test_util
from test_util import unittest
Expand Down Expand Up @@ -558,6 +559,15 @@ def __check_dataset(self, hda):
self.assertEqual(len(self.hist.dataset_ids), 1)
self.assertEqual(self.hist.dataset_ids[0], hda.id)

def __check_dataset_collection(self, hdca):
self.assertIsInstance(hdca, wrappers.HistoryDatasetCollectionAssociation)
self.assertIs(hdca.container, self.hist)
self.assertEqual(len(self.hist.dataset_ids), 2)
dataset1 = self.hist.get_dataset(self.hist.dataset_ids[0])
dataset2 = self.hist.get_dataset(self.hist.dataset_ids[1])
self.assertEqual(dataset1.id, hdca.elements[0]['object']['id'])
self.assertEqual(dataset2.id, hdca.elements[1]['object']['id'])

def test_import_dataset(self):
lib = self.gi.libraries.create('test_%s' % uuid.uuid4().hex)
lds = lib.upload_data(FOO_DATA)
Expand Down Expand Up @@ -617,6 +627,29 @@ def test_update(self):
self.assertEqual(self.hist.annotation, new_annotation)
self.assertEqual(self.hist.tags, new_tags)

def test_new_dataset_collection(self):
collection_description = self._create_collection_description()
dataset_collection = self.hist.new_dataset_collection(collection_description)
self.__check_dataset_collection(dataset_collection)

def test_delete_dataset_collection(self):
collection_description = self._create_collection_description()
dataset_collection = self.hist.new_dataset_collection(collection_description)
dataset_collection.delete()
self.assertTrue(dataset_collection.deleted)

def _create_collection_description(self):
dataset1 = self.hist.paste_content(FOO_DATA)
dataset2 = self.hist.paste_content(FOO_DATA_2)
collection_description = collections.CollectionDescription(
name="MyDatasetList",
elements=[
collections.HistoryDatasetElement(name="sample1", id=dataset1.id),
collections.HistoryDatasetElement(name="sample2", id=dataset2.id),
]
)
return collection_description

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need to add one more blank line here, flake8 is complaining:

./tests/TestGalaxyObjects.py:653:1: E302 expected 2 blank lines, found 1


@test_util.skip_unless_galaxy()
class TestHDAContents(GalaxyObjectsTestBase):
Expand Down Expand Up @@ -704,6 +737,24 @@ def test_new_history(self):
def test_params(self):
self.__test(params=True)

def test_run_workflow_with_dataset_collection(self):
wf_file = os.path.join(THIS_DIR, 'data', 'dataset_collection_run.ga')
with open(wf_file) as f:
wf = self.gi.workflows.import_new(f.read())
history_name = "Run Workflow With Dataset Collection"
outputhist = self.gi.histories.create(history_name)
collection_description = collections.CollectionDescription(
name="MyDatasetList",
elements=[
collections.HistoryDatasetElement(name="sample1", id=self.inputs[0].id),
collections.HistoryDatasetElement(name="sample2", id=self.inputs[1].id),
]
)
dataset_collection = outputhist.new_dataset_collection(collection_description)
input_map = {"Input Dataset Collection": dataset_collection}
outputs = wf.run(input_map, history_name)
self.assertEqual(len(outputs), 2)


@test_util.skip_unless_galaxy()
class TestJob(GalaxyObjectsTestBase):
Expand Down
65 changes: 65 additions & 0 deletions tests/data/dataset_collection_run.ga
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
{
"a_galaxy_workflow": "true",
"annotation": "",
"format-version": "0.1",
"name": "DC Run",
"steps": {
"0": {
"annotation": "",
"id": 0,
"input_connections": {},
"inputs": [
{
"description": "",
"name": "Input Dataset Collection"
}
],
"label": null,
"name": "Input dataset collection",
"outputs": [],
"position": {
"left": 338,
"top": 306
},
"tool_errors": null,
"tool_id": null,
"tool_state": "{\"collection_type\": \"list\", \"name\": \"Input Dataset Collection\"}",
"tool_version": null,
"type": "data_collection_input",
"user_outputs": [],
"uuid": "4a497708-240c-420e-9c7c-a2dc0200476e"
},
"1": {
"annotation": "",
"id": 1,
"input_connections": {
"input": {
"id": 0,
"output_name": "output"
}
},
"inputs": [],
"label": null,
"name": "Sort",
"outputs": [
{
"name": "out_file1",
"type": "input"
}
],
"position": {
"left": 676,
"top": 286
},
"post_job_actions": {},
"tool_errors": null,
"tool_id": "sort1",
"tool_state": "{\"__page__\": 0, \"style\": \"\\\"num\\\"\", \"column\": \"\\\"1\\\"\", \"__rerun_remap_job_id__\": null, \"order\": \"\\\"DESC\\\"\", \"input\": \"null\", \"column_set\": \"[]\"}",
"tool_version": "1.0.3",
"type": "tool",
"user_outputs": [],
"uuid": "3c33333f-08ce-43bf-bc09-fa0b5776845b"
}
},
"uuid": "fd34f048-e17a-4e06-85e8-ed391192d284"
}