Skip to content

Commit

Permalink
Implement paired_or_unpaired collections...
Browse files Browse the repository at this point in the history
  • Loading branch information
jmchilton committed Dec 22, 2024
1 parent a9cec6c commit 8fc2cfe
Show file tree
Hide file tree
Showing 25 changed files with 860 additions and 28 deletions.
10 changes: 10 additions & 0 deletions client/src/components/Collections/CollectionCreatorModal.vue
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import type { CollectionType, DatasetPair } from "../History/adapters/buildColle
import ListCollectionCreator from "./ListCollectionCreator.vue";
import PairCollectionCreator from "./PairCollectionCreator.vue";
import PairedListCollectionCreator from "./PairedListCollectionCreator.vue";
import PairedOrUnpairedListCollectionCreator from "./PairedOrUnpairedListCollectionCreator.vue";
import Heading from "@/components/Common/Heading.vue";
import GenericItem from "@/components/History/Content/GenericItem.vue";
import LoadingSpan from "@/components/LoadingSpan.vue";
Expand Down Expand Up @@ -304,6 +305,15 @@ function resetModal() {
:extensions="props.extensions"
@clicked-create="createListPairedCollection"
@on-cancel="hideModal" />
<PairedOrUnpairedListCollectionCreator
v-else-if="props.collectionType === 'list:paired_or_unpaired'"
:history-id="props.historyId"
:initial-elements="creatorItems || []"
:default-hide-source-items="props.defaultHideSourceItems"
:from-selection="fromSelection"
:extensions="props.extensions"
@clicked-create="createListPairedCollection"
@on-cancel="hideModal" />
<PairCollectionCreator
v-else-if="props.collectionType === 'paired'"
:history-id="props.historyId"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
<script setup lang="ts">
import { type ColDef } from "ag-grid-community";
import { BCol, BContainer, BRow } from "bootstrap-vue";
import { computed, ref } from "vue";
import type { HistoryItemSummary } from "@/api";
import { useAgGrid } from "@/composables/useAgGrid";
import type { DatasetPair } from "../History/adapters/buildCollectionModal";
interface Props {
historyId: string;
initialElements: HistoryItemSummary[];
defaultHideSourceItems?: boolean;
fromSelection?: boolean;
extensions?: string[];
height?: string;
width?: string;
}
const { gridApi, AgGridVue, onGridReady, theme } = useAgGrid(resize);
const generatedPairs = ref<DatasetPair[]>([]);
function resize() {
if (gridApi.value) {
gridApi.value.sizeColumnsToFit();
}
}
const props = defineProps<Props>();
const style = computed(() => {
return { width: props.width || "100%", height: props.height || "500px" };
});
// Default Column Properties
const defaultColDef = ref<ColDef>({
editable: false,
sortable: false,
filter: false,
resizable: true,
});
const rowData = ref<Record<string, unknown>[]>([]);
const columnDefs = computed(() => {
const datasets: ColDef = {
headerName: "Dataset(s)",
field: "datasets",
editable: false,
};
return [datasets];
});
const summaryText = computed(() => {
const numMatchedText = `Auto-matched ${generatedPairs.value.length} pair(s) of datasets from target datasets.`;
const numUnmatched = props.initialElements.length;
let numUnmatchedText = "";
if (numUnmatched > 0) {
numUnmatchedText = `${numUnmatched} dataset(s) were not paired and will not be included in the resulting list of pairs.`;
}
return `${numMatchedText} ${numUnmatchedText}`;
});
function initialize() {
for (const dataset of props.initialElements) {
console.log(dataset);
rowData.value.push({ datasets: dataset.name });
}
}
initialize();
</script>

<template>
<BContainer style="max-width: 100%">
<BRow>
<BCol>
<p>{{ summaryText }}</p>
</BCol>
</BRow>
<BRow>
<div :style="style" :class="theme">
<AgGridVue
:row-data="rowData"
:column-defs="columnDefs"
:default-col-def="defaultColDef"
:style="style"
@gridReady="onGridReady" />
</div>
</BRow>
</BContainer>
</template>
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,18 @@
<b-dropdown-item v-if="showBuildOptions" data-description="build list" @click="buildDatasetList">
<span v-localize>Build Dataset List</span>
</b-dropdown-item>
<b-dropdown-item v-if="showBuildOptions" data-description="build list of pairs" @click="buildListOfPairs">
<b-dropdown-item
v-if="showBuildOptions"
data-description="build list of pairs"
@click="buildListOfPairs">
<span v-localize>Build List of Dataset Pairs</span>
</b-dropdown-item>
<b-dropdown-item
v-if="showBuildOptions"
data-description="build list of paired_or_unpaired"
@click="buildListOfMixedPaired">
<span v-localize>Build List with Optional Pairing</span>
</b-dropdown-item>
<b-dropdown-item
v-if="showBuildOptions"
data-description="build collection from rules"
Expand Down Expand Up @@ -404,6 +413,11 @@ export default {
this.collectionSelection = Array.from(this.contentSelection.values());
this.collectionModalShow = true;
},
buildListOfMixedPaired() {
this.collectionModalType = "list:paired_or_unpaired";
this.collectionSelection = Array.from(this.contentSelection.values());
this.collectionModalShow = true;
},
createdCollection(collection) {
this.$emit("reset-selection");
},
Expand Down
2 changes: 2 additions & 0 deletions lib/galaxy/model/dataset_collections/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
from .types import (
list,
paired,
paired_or_unpaired,
record,
)

PLUGIN_CLASSES = [
list.ListDatasetCollectionType,
paired.PairedDatasetCollectionType,
record.RecordDatasetCollectionType,
paired_or_unpaired.PairedOrUnpairedDatasetCollectionType,
]


Expand Down
11 changes: 7 additions & 4 deletions lib/galaxy/model/dataset_collections/structure.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
""" Module for reasoning about structure of and matching hierarchical collections of data.
"""

import logging
from typing import TYPE_CHECKING

log = logging.getLogger(__name__)
if TYPE_CHECKING:
from .type_description import CollectionTypeDescription


class Leaf:
Expand Down Expand Up @@ -149,7 +150,7 @@ def clone(self):
return Tree(cloned_children, self.collection_type_description)

def __str__(self):
return f"Tree[collection_type={self.collection_type_description},children={','.join(f'{identifier_and_element[0]}={identifier_and_element[1]}' for identifier_and_element in self.children)}]"
return f"Tree[collection_type={self.collection_type_description},children=({','.join(f'{identifier_and_element[0]}={identifier_and_element[1]}' for identifier_and_element in self.children)})]"


def tool_output_to_structure(get_sliced_input_collection_structure, tool_output, collections_manager):
Expand Down Expand Up @@ -190,7 +191,9 @@ def dict_map(func, input_dict):
return {k: func(v) for k, v in input_dict.items()}


def get_structure(dataset_collection_instance, collection_type_description, leaf_subcollection_type=None):
def get_structure(
dataset_collection_instance, collection_type_description: "CollectionTypeDescription", leaf_subcollection_type=None
):
if leaf_subcollection_type:
collection_type_description = collection_type_description.effective_collection_type_description(
leaf_subcollection_type
Expand Down
17 changes: 16 additions & 1 deletion lib/galaxy/model/dataset_collections/subcollections.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,33 @@
from galaxy import exceptions
from .adapters import PromoteCollectionElementToCollectionAdapter


def split_dataset_collection_instance(dataset_collection_instance, collection_type):
"""Split up collection into collection."""
return _split_dataset_collection(dataset_collection_instance.collection, collection_type)


def _is_a_subcollection_type(this_collection_type: str, collection_type: str):
if collection_type == "single_datasets":
# can be a subcollection of anything effectively...
return True
if not this_collection_type.endswith(collection_type) or this_collection_type == collection_type:
return False
return True


def _split_dataset_collection(dataset_collection, collection_type):
this_collection_type = dataset_collection.collection_type
if not this_collection_type.endswith(collection_type) or this_collection_type == collection_type:
is_this_collection_nested = ":" in this_collection_type
if not _is_a_subcollection_type(this_collection_type, collection_type):
raise exceptions.MessageException("Cannot split collection in desired fashion.")

split_elements = []
for element in dataset_collection.elements:
if not is_this_collection_nested and collection_type == "single_datasets":
split_elements.append(PromoteCollectionElementToCollectionAdapter(element))
continue

child_collection = element.child_collection
if child_collection is None:
raise exceptions.MessageException("Cannot split collection in desired fashion.")
Expand Down
30 changes: 26 additions & 4 deletions lib/galaxy/model/dataset_collections/type_description.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,12 @@ def effective_collection_type(self, subcollection_type):
if not self.has_subcollections_of_type(subcollection_type):
raise ValueError(f"Cannot compute effective subcollection type of {subcollection_type} over {self}")

if subcollection_type == "single_datasets":
return self.collection_type

return self.collection_type[: -(len(subcollection_type) + 1)]

def has_subcollections_of_type(self, other_collection_type):
def has_subcollections_of_type(self, other_collection_type) -> bool:
"""Take in another type (either flat string or another
CollectionTypeDescription) and determine if this collection contains
subcollections matching that type.
Expand All @@ -65,18 +68,37 @@ def has_subcollections_of_type(self, other_collection_type):
if hasattr(other_collection_type, "collection_type"):
other_collection_type = other_collection_type.collection_type
collection_type = self.collection_type
return collection_type.endswith(other_collection_type) and collection_type != other_collection_type
if collection_type == other_collection_type:
return False
if collection_type.endswith(other_collection_type):
return True
if other_collection_type == "paired_or_unpaired":
# this can be thought of as a subcollection of anything except a pair
# since it would match a pair exactly
return collection_type != "paired"
if other_collection_type == "single_datasets":
# effectively any collection has unpaired subcollections
return True
return False

def is_subcollection_of_type(self, other_collection_type):
if not hasattr(other_collection_type, "collection_type"):
other_collection_type = self.collection_type_description_factory.for_collection_type(other_collection_type)
return other_collection_type.has_subcollections_of_type(self)

def can_match_type(self, other_collection_type):
def can_match_type(self, other_collection_type) -> bool:
if hasattr(other_collection_type, "collection_type"):
other_collection_type = other_collection_type.collection_type
collection_type = self.collection_type
return other_collection_type == collection_type
if other_collection_type == collection_type:
return True
elif other_collection_type == "paired" and collection_type == "paired_or_unpaired":
return True
elif other_collection_type == "paired_or_unpaired" and collection_type == "paired":
return True

# can we push this to the type registry somehow?
return False

def subcollection_type_description(self):
if not self.__has_subcollections:
Expand Down
8 changes: 8 additions & 0 deletions lib/galaxy/model/dataset_collections/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@ def generate_elements(self, dataset_instances: dict, **kwds):
class BaseDatasetCollectionType(DatasetCollectionType):
def _validation_failed(self, message):
raise exceptions.ObjectAttributeInvalidException(message)

def _ensure_dataset_with_identifier(self, dataset_instances: dict, name: str):
dataset_instance = dataset_instances.get(name)
if dataset_instance is None:
raise exceptions.ObjectAttributeInvalidException(
f"An element with the identifier {name} is required to create this collection type"
)
return dataset_instance
11 changes: 9 additions & 2 deletions lib/galaxy/model/dataset_collections/types/paired.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from galaxy.exceptions import RequestParameterInvalidException
from galaxy.model import (
DatasetCollectionElement,
HistoryDatasetAssociation,
Expand All @@ -16,13 +17,19 @@ class PairedDatasetCollectionType(BaseDatasetCollectionType):
collection_type = "paired"

def generate_elements(self, dataset_instances, **kwds):
if forward_dataset := dataset_instances.get(FORWARD_IDENTIFIER):
num_datasets = len(dataset_instances)
if num_datasets != 2:
raise RequestParameterInvalidException(
f"Incorrect number of datasets - 2 datasets exactly are required to create a single_or_paired collection"
)

if forward_dataset := self._ensure_dataset_with_identifier(dataset_instances, FORWARD_IDENTIFIER):
left_association = DatasetCollectionElement(
element=forward_dataset,
element_identifier=FORWARD_IDENTIFIER,
)
yield left_association
if reverse_dataset := dataset_instances.get(REVERSE_IDENTIFIER):
if reverse_dataset := self._ensure_dataset_with_identifier(dataset_instances, REVERSE_IDENTIFIER):
right_association = DatasetCollectionElement(
element=reverse_dataset,
element_identifier=REVERSE_IDENTIFIER,
Expand Down
46 changes: 46 additions & 0 deletions lib/galaxy/model/dataset_collections/types/paired_or_unpaired.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from galaxy.exceptions import RequestParameterInvalidException
from galaxy.model import (
DatasetCollectionElement,
HistoryDatasetAssociation,
)
from . import BaseDatasetCollectionType
from .paired import (
FORWARD_IDENTIFIER,
REVERSE_IDENTIFIER,
)

SINGLETON_IDENTIFIER = "unpaired"


class PairedOrUnpairedDatasetCollectionType(BaseDatasetCollectionType):
""" """

collection_type = "paired_or_unpaired"

def generate_elements(self, dataset_instances, **kwds):
num_datasets = len(dataset_instances)
if num_datasets > 2 or num_datasets < 1:
raise RequestParameterInvalidException(
f"Incorrect number of datasets - 1 or 2 datasets is required to create a paired_or_unpaired collection"
)

if num_datasets == 2:
if forward_dataset := self._ensure_dataset_with_identifier(dataset_instances, FORWARD_IDENTIFIER):
left_association = DatasetCollectionElement(
element=forward_dataset,
element_identifier=FORWARD_IDENTIFIER,
)
yield left_association
if reverse_dataset := self._ensure_dataset_with_identifier(dataset_instances, REVERSE_IDENTIFIER):
right_association = DatasetCollectionElement(
element=reverse_dataset,
element_identifier=REVERSE_IDENTIFIER,
)
yield right_association
else:
if single_datasets := self._ensure_dataset_with_identifier(dataset_instances, SINGLETON_IDENTIFIER):
single_association = DatasetCollectionElement(
element=single_datasets,
element_identifier=SINGLETON_IDENTIFIER,
)
yield single_association
2 changes: 2 additions & 0 deletions lib/galaxy/schema/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
from typing_extensions import (
Annotated,
Literal,
NotRequired,
TypedDict,
)

from galaxy.schema import partial_model
Expand Down
Loading

0 comments on commit 8fc2cfe

Please sign in to comment.