Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consolidate pifaces #507

Merged
merged 13 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion looper/cli_pydantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None):
# Check at the beginning if user wants to use pipestat and pipestat is configurable
is_pipestat_configured = (
prj._check_if_pipestat_configured(pipeline_type=PipelineLevel.PROJECT.value)
if getattr(subcommand_args, "project", None)
if getattr(subcommand_args, "project", None) or subcommand_name == "runp"
else prj._check_if_pipestat_configured()
)

Expand Down
9 changes: 8 additions & 1 deletion looper/conductor.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,9 @@ def __init__(

self.collate = collate
self.section_key = PROJECT_PL_KEY if self.collate else SAMPLE_PL_KEY
self.pipeline_interface_type = (
"project_interface" if self.collate else "sample_interface"
)
self.pl_iface = pipeline_interface
self.pl_name = self.pl_iface.pipeline_name
self.prj = prj
Expand Down Expand Up @@ -681,7 +684,11 @@ def write_script(self, pool, size):
pipeline=self.pl_iface,
compute=self.prj.dcc.compute,
)
templ = self.pl_iface["command_template"]

if self.pipeline_interface_type is None:
templ = self.pl_iface["command_template"]
else:
templ = self.pl_iface[self.pipeline_interface_type]["command_template"]
if not self.override_extra:
extras_template = (
EXTRA_PROJECT_CMD_TEMPLATE
Expand Down
9 changes: 0 additions & 9 deletions looper/pipeline_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,6 @@ def __init__(self, config, pipeline_type=None):
)
self.update(config)
self._validate(schema_src=PIFACE_SCHEMA_SRC)
if "path" in self:
warn(
message="'path' specification as a top-level pipeline "
"interface key is deprecated and will be removed with "
"the next release. Please use 'paths' section "
"from now on.",
category=DeprecationWarning,
)
self._expand_paths(["path"])
self._expand_paths(["compute", "dynamic_variables_script_path"])

@property
Expand Down
17 changes: 10 additions & 7 deletions looper/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,10 +413,12 @@ def _get_pipestat_configuration(self, pipeline_type=PipelineLevel.SAMPLE.value):
pipestat_config_path = self._check_for_existing_pipestat_config(piface)

if not pipestat_config_path:
self._create_pipestat_config(piface)
self._create_pipestat_config(piface, pipeline_type)
else:
piface.psm = PipestatManager(
config_file=pipestat_config_path, multi_pipelines=True
config_file=pipestat_config_path,
multi_pipelines=True,
pipeline_type="sample",
)

elif pipeline_type == PipelineLevel.PROJECT.value:
Expand All @@ -426,10 +428,12 @@ def _get_pipestat_configuration(self, pipeline_type=PipelineLevel.SAMPLE.value):
)

if not pipestat_config_path:
self._create_pipestat_config(prj_piface)
self._create_pipestat_config(prj_piface, pipeline_type)
else:
prj_piface.psm = PipestatManager(
config_file=pipestat_config_path, multi_pipelines=True
config_file=pipestat_config_path,
multi_pipelines=True,
pipeline_type="project",
)
else:
_LOGGER.error(
Expand Down Expand Up @@ -469,7 +473,7 @@ def _check_for_existing_pipestat_config(self, piface):
else:
return None

def _create_pipestat_config(self, piface):
def _create_pipestat_config(self, piface, pipeline_type):
"""
Each piface needs its own config file and associated psm
"""
Expand Down Expand Up @@ -512,11 +516,10 @@ def _create_pipestat_config(self, piface):
pipestat_config_dict.update({"pipeline_name": piface.data["pipeline_name"]})
else:
pipeline_name = None
if "pipeline_type" in piface.data:
pipestat_config_dict.update({"pipeline_type": piface.data["pipeline_type"]})

# Warn user if there is a mismatch in pipeline_names from sources!!!
if pipeline_name != output_schema_pipeline_name:
# TODO Raise exception here.
donaldcampbelljr marked this conversation as resolved.
Show resolved Hide resolved
_LOGGER.warning(
msg=f"Pipeline name mismatch detected. Pipeline interface: {pipeline_name} Output schema: {output_schema_pipeline_name} Defaulting to pipeline_interface value."
)
Expand Down
20 changes: 14 additions & 6 deletions looper/schemas/pipeline_interface_schema_generic.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,20 @@ properties:
type: string
enum: ["project", "sample"]
description: "type of the pipeline, either 'project' or 'sample'"
command_template:
type: string
description: "Jinja2-like template to construct the command to run"
path:
type: string
description: "path to the pipeline program. Relative to pipeline interface file or absolute."
sample_interface:
type: object
description: "Section that defines compute environment settings"
properties:
command_template:
type: string
description: "Jinja2-like template to construct the command to run"
project_interface:
type: object
description: "Section that defines compute environment settings"
properties:
command_template:
type: string
description: "Jinja2-like template to construct the command to run"
compute:
type: object
description: "Section that defines compute environment settings"
Expand Down
65 changes: 34 additions & 31 deletions looper/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from .const import *
from .command_models.commands import SUPPORTED_COMMANDS
from .exceptions import MisconfigurationException
from .exceptions import MisconfigurationException, PipelineInterfaceConfigError

_LOGGER = getLogger(__name__)

Expand Down Expand Up @@ -538,7 +538,7 @@ def initiate_looper_config(

def determine_pipeline_type(piface_path: str, looper_config_path: str):
"""
Read pipeline interface from disk and determine if pipeline type is sample or project-level
Read pipeline interface from disk and determine if it contains "sample_interface", "project_interface" or both


:param str piface_path: path to pipeline_interface
Expand All @@ -558,9 +558,17 @@ def determine_pipeline_type(piface_path: str, looper_config_path: str):
except FileNotFoundError:
return None, None

pipeline_type = piface_dict.get("pipeline_type", None)
pipeline_types = []
if piface_dict.get("sample_interface", None):
pipeline_types.append(PipelineLevel.SAMPLE.value)
if piface_dict.get("project_interface", None):
pipeline_types.append(PipelineLevel.PROJECT.value)

return pipeline_type, piface_path
if pipeline_types == []:
# TODO WARN USER THEY MUST GIVE EITHER A SAMPLE OR PROJECT INTERFACE
return None, None

return pipeline_types, piface_path


def read_looper_config_file(looper_config_path: str) -> dict:
Expand Down Expand Up @@ -606,36 +614,31 @@ def read_looper_config_file(looper_config_path: str) -> dict:

dp_data.setdefault(PIPELINE_INTERFACES_KEY, {})

if isinstance(dp_data.get(PIPELINE_INTERFACES_KEY), dict) and (
dp_data.get(PIPELINE_INTERFACES_KEY).get("sample")
or dp_data.get(PIPELINE_INTERFACES_KEY).get("project")
):
# Support original nesting of pipeline interfaces under "sample" and "project"
return_dict[SAMPLE_PL_ARG] = dp_data.get(PIPELINE_INTERFACES_KEY).get(
"sample"
)
return_dict[PROJECT_PL_ARG] = dp_data.get(PIPELINE_INTERFACES_KEY).get(
"project"
all_pipeline_interfaces = dp_data.get(PIPELINE_INTERFACES_KEY)
sample_pifaces = []
project_pifaces = []
if isinstance(all_pipeline_interfaces, str):
all_pipeline_interfaces = [all_pipeline_interfaces]
for piface in all_pipeline_interfaces:
pipeline_types, piface_path = determine_pipeline_type(
piface, looper_config_path
)
else:
# infer pipeline type based from interface instead of nested keys: https://github.com/pepkit/looper/issues/465
all_pipeline_interfaces = dp_data.get(PIPELINE_INTERFACES_KEY)
sample_pifaces = []
project_pifaces = []
if isinstance(all_pipeline_interfaces, str):
all_pipeline_interfaces = [all_pipeline_interfaces]
for piface in all_pipeline_interfaces:
pipeline_type, piface_path = determine_pipeline_type(
piface, looper_config_path
)
if pipeline_type == PipelineLevel.SAMPLE.value:
# if pipeline_types is None:
# raise PipelineInterfaceConfigError(
# f"'sample_interface and/or project_interface must be defined in each pipeline interface."
# )
# This will append the same, consolidated piface to two different lists
# In reality only the command templates are the differentiator
if pipeline_types is not None:
# TODO should we raise an exception here? I guess you can amend samples with interfaces...
donaldcampbelljr marked this conversation as resolved.
Show resolved Hide resolved
if PipelineLevel.SAMPLE.value in pipeline_types:
sample_pifaces.append(piface_path)
elif pipeline_type == PipelineLevel.PROJECT.value:
if PipelineLevel.PROJECT.value in pipeline_types:
project_pifaces.append(piface_path)
if len(sample_pifaces) > 0:
return_dict[SAMPLE_PL_ARG] = sample_pifaces
if len(project_pifaces) > 0:
return_dict[PROJECT_PL_ARG] = project_pifaces
if len(sample_pifaces) > 0:
return_dict[SAMPLE_PL_ARG] = sample_pifaces
if len(project_pifaces) > 0:
return_dict[PROJECT_PL_ARG] = project_pifaces

else:
_LOGGER.warning(
Expand Down
10 changes: 4 additions & 6 deletions tests/data/hello_looper-dev/advanced/.looper.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
pep_config: project/project_config.yaml
output_dir: "results"
pipeline_interfaces:
sample:
- ../pipeline/pipeline_interface1_sample.yaml
- ../pipeline/pipeline_interface2_sample.yaml
project:
- ../pipeline/pipeline_interface1_project.yaml
- ../pipeline/pipeline_interface2_project.yaml
- pipeline/pipeline_interface1_sample.yaml
- pipeline/pipeline_interface2_sample.yaml
- pipeline/pipeline_interface1_project.yaml
- pipeline/pipeline_interface2_project.yaml

Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
pep_config: project/project_config.yaml
output_dir: "results"
pipeline_interfaces:
sample:
- ../pipeline/pipestat_pipeline_interface1_sample.yaml
- ../pipeline/pipestat_pipeline_interface2_sample.yaml
- pipeline/pipestat_pipeline_interface1_sample.yaml
- pipeline/pipestat_pipeline_interface2_sample.yaml
pipestat:
results_file_path: results.yaml
flag_file_dir: results/flags
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
pipeline_name: PIPELINE1
pipeline_type: project
output_schema: output_schema.yaml
var_templates:
path: "{looper.piface_dir}/col_pipeline1.py"
command_template: >
python3 {pipeline.var_templates.path} --project-name {project.name}
project_interface:
command_template: >
python3 {pipeline.var_templates.path} --project-name {project.name}


Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
pipeline_name: PIPELINE1
pipeline_type: sample
input_schema: https://schema.databio.org/pep/2.0.0.yaml
output_schema: output_schema.yaml
var_templates:
path: "{looper.piface_dir}/pipeline1.py"
pre_submit:
python_functions:
- looper.write_sample_yaml
command_template: >
python3 {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr}
sample_interface:
command_template: >
python3 {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr}


Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
pipeline_name: OTHER_PIPELINE2
pipeline_type: project
output_schema: output_schema.yaml
var_templates:
path: "{looper.piface_dir}/col_pipeline2.py"
command_template: >
python3 {pipeline.var_templates.path} --project-name {project.name}
project_interface:
command_template: >
python3 {pipeline.var_templates.path} --project-name {project.name}
compute:
size_dependent_variables: resources-project.tsv

Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
pipeline_name: OTHER_PIPELINE2
pipeline_type: sample
output_schema: output_schema.yaml
var_templates:
path: "{looper.piface_dir}/other_pipeline2.py"
pre_submit:
python_functions:
- looper.write_sample_yaml
command_template: >
python3 {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr}
sample_interface:
command_template: >
python3 {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr}
compute:
size_dependent_variables: resources-sample.tsv

Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
pipeline_name: example_pipestat_pipeline
pipeline_type: sample
input_schema: https://schema.databio.org/pep/2.0.0.yaml
output_schema: pipestat_output_schema.yaml
var_templates:
path: "{looper.piface_dir}/pipeline1.py"
pre_submit:
python_functions:
- looper.write_sample_yaml
command_template: >
python3 {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr}
sample_interface:
command_template: >
python3 {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr}


Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
pipeline_name: example_pipestat_pipeline
pipeline_type: sample
input_schema: https://schema.databio.org/pep/2.0.0.yaml
output_schema: pipestat_output_schema.yaml
var_templates:
path: "{looper.piface_dir}/other_pipeline2.py"
pre_submit:
python_functions:
- looper.write_sample_yaml
command_template: >
python3 {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr}
sample_interface:
command_template: >
python3 {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr}
compute:
size_dependent_variables: resources-sample.tsv

Expand Down
2 changes: 1 addition & 1 deletion tests/data/hello_looper-dev/csv/.looper.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ pep_config: project/sample_annotation.csv # local path to CSV
# pep_config: pepkit/hello_looper:default # you can also use a pephub registry path
output_dir: "results"
pipeline_interfaces:
sample: pipeline/pipeline_interface.yaml
- pipeline/pipeline_interface.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,9 @@ pipeline_name: count_lines
pipeline_type: sample
var_templates:
pipeline: '{looper.piface_dir}/count_lines.sh'
command_template: >
{pipeline.var_templates.pipeline} {sample.file}
sample_interface:
command_template: >
{pipeline.var_templates.pipeline} {sample.file}
project_interface:
command_template: >
{pipeline.var_templates.pipeline} "data/*.txt"

This file was deleted.

2 changes: 1 addition & 1 deletion tests/data/hello_looper-dev/intermediate/.looper.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ pep_config: project/project_config.yaml # local path to pep config
# pep_config: pepkit/hello_looper:default # you can also use a pephub registry path
output_dir: "results"
pipeline_interfaces:
sample: pipeline/pipeline_interface.yaml
- pipeline/pipeline_interface.yaml
4 changes: 0 additions & 4 deletions tests/data/hello_looper-dev/intermediate/.looper_project.yaml

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
pipeline_name: count_lines
pipeline_type: sample
var_templates:
pipeline: '{looper.piface_dir}/count_lines.sh'
command_template: >
{pipeline.var_templates.pipeline} {sample.file}
sample_interface:
command_template: >
{pipeline.var_templates.pipeline} {sample.file}
project_interface:
command_template: >
{pipeline.var_templates.pipeline} "data/*.txt"
Loading
Loading