Skip to content

Commit

Permalink
refact: Add suggestions from the third round of reviews (#15)
Browse files Browse the repository at this point in the history
* test: Add test for installing a module already installed from different org

* fix: Remove unnecessary install command

* test: Add test for updating cross-org subwfs

* fix: Change hash to sha

* fix: Add remote url

* refact: Create shared instance of subworkflowinstall

* test: to keep same named cross mods post subwf rm

* refact: Don't use sha for the alternative repo

* fix: Add else to sha check

* fix: Move check to cross-org stuff

* refact: Reverse check order

* test: Add update test (#19)

* wip: Add bug two

* refact: Try defaulting to a dict

* fix: Dont allow remote to be unbound

* refact: Check if not equal to component type

* refact: Re-add isinstance dict

* fix: Remove mention of component_name

* refact: Add special case for strings following dicts

* test: Try removing branch from test case

* refact: Remove isinstance for str

* refact: Use dict only with get_comps out

* Revert "refact: Use dict only with get_comps out"

This reverts commit 7ebc6dc.

* refact: Check if content has subwfs

* refact: Always set current remote

* refact: Check sha before resetting current_repo

* refact: Do the negative test

* refact: Use another variable for remote

* refact: Simplify check

* refact: Expand check once again

* refact: Roll back to previous check

* fix: self.sha must not be none to reset to none

* Revert "fix: self.sha must not be none to reset to none"

This reverts commit 65aa1c4.

* Try removing the section entirely

* refact: Try moving check to reset

* refact: Remove unnecessary current_sha

* refact: Change check in get_comps

* refact: Set git remote beforehand

* refact: Change indent so previous check is the same

* refact: Remove current repo check

* Revert "refact: Remove current repo check"

This reverts commit a956e04.

* refact: Try using name list

* fix: Remove break in loop

* refactor: Always set current_repo

* fix: Check if name is in updated

* refactor: Remove unused sections and logs

* test: Use the same subworkflow in all test cases

get_genome_annotation is not necessary, given we're using another
subworkflow for the other tests

* refact: Update cross_org remote to nf-core-test

* fix: Change remote name in install test

* refact: Remove none check from all_subworkflows

Co-authored-by: Matthieu Muffato <[email protected]>

* docs: Add hash comment

* refact: Use mod/subwf_name in both sections

* test: Check updated subwf content

---------

Co-authored-by: Matthieu Muffato <[email protected]>
  • Loading branch information
jvfe and muffato authored Dec 4, 2024
1 parent 1a8c3be commit 864eb7f
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 34 deletions.
28 changes: 20 additions & 8 deletions nf_core/components/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def __init__(
self.force = force
self.prompt = prompt
self.sha = sha
self.current_sha = sha
if installed_by is not None:
self.installed_by = installed_by
else:
Expand All @@ -57,6 +58,14 @@ def install(self, component: Union[str, Dict[str, str]], silent: bool = False) -
self.modules_repo = ModulesRepo(remote_url, branch)
component = component["name"]

if self.current_remote is None:
self.current_remote = self.modules_repo.remote_url

if self.current_remote == self.modules_repo.remote_url and self.sha is not None:
self.current_sha = self.sha
else:
self.current_sha = None

if self.repo_type == "modules":
log.error(f"You cannot install a {component} in a clone of nf-core/modules")
return False
Expand All @@ -80,8 +89,8 @@ def install(self, component: Union[str, Dict[str, str]], silent: bool = False) -
return False

# Verify SHA
if not self.modules_repo.verify_sha(self.prompt, self.sha):
err_msg = f"SHA '{self.sha}' is not a valid commit SHA for the repository '{self.modules_repo.remote_url}'"
if not self.modules_repo.verify_sha(self.prompt, self.current_sha):
err_msg = f"SHA '{self.current_sha}' is not a valid commit SHA for the repository '{self.modules_repo.remote_url}'"
log.error(err_msg)
return False

Expand Down Expand Up @@ -124,7 +133,7 @@ def install(self, component: Union[str, Dict[str, str]], silent: bool = False) -
modules_json.update(self.component_type, self.modules_repo, component, current_version, self.installed_by)
return False
try:
version = self.get_version(component, self.sha, self.prompt, current_version, self.modules_repo)
version = self.get_version(component, self.current_sha, self.prompt, current_version, self.modules_repo)
except UserWarning as e:
log.error(e)
return False
Expand Down Expand Up @@ -209,15 +218,17 @@ def collect_and_verify_name(
if component is None:
component = questionary.autocomplete(
f"{'Tool' if self.component_type == 'modules' else 'Subworkflow'} name:",
choices=sorted(modules_repo.get_avail_components(self.component_type, commit=self.sha)),
choices=sorted(modules_repo.get_avail_components(self.component_type, commit=self.current_sha)),
style=nf_core.utils.nfcore_question_style,
).unsafe_ask()

if component is None:
return ""

# Check that the supplied name is an available module/subworkflow
if component and component not in modules_repo.get_avail_components(self.component_type, commit=self.sha):
if component and component not in modules_repo.get_avail_components(
self.component_type, commit=self.current_sha
):
log.error(f"{self.component_type[:-1].title()} '{component}' not found in available {self.component_type}")
print(
Panel(
Expand All @@ -233,9 +244,10 @@ def collect_and_verify_name(

raise ValueError

if not modules_repo.component_exists(component, self.component_type, commit=self.sha):
warn_msg = f"{self.component_type[:-1].title()} '{component}' not found in remote '{modules_repo.remote_url}' ({modules_repo.branch})"
log.warning(warn_msg)
if self.current_remote == modules_repo.remote_url:
if not modules_repo.component_exists(component, self.component_type, commit=self.current_sha):
warn_msg = f"{self.component_type[:-1].title()} '{component}' not found in remote '{modules_repo.remote_url}' ({modules_repo.branch})"
log.warning(warn_msg)

return component

Expand Down
70 changes: 54 additions & 16 deletions nf_core/components/update.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import os
import re
import shutil
import tempfile
from pathlib import Path
Expand Down Expand Up @@ -41,6 +42,8 @@ def __init__(
limit_output=False,
):
super().__init__(component_type, pipeline_dir, remote_url, branch, no_pull)
self.current_remote = remote_url
self.branch = branch
self.force = force
self.prompt = prompt
self.sha = sha
Expand Down Expand Up @@ -92,6 +95,13 @@ def update(self, component=None, silent=False, updated=None, check_diff_exist=Tr
Returns:
(bool): True if the update was successful, False otherwise.
"""
if isinstance(component, dict):
# Override modules_repo when the component to install is a dependency from a subworkflow.
remote_url = component.get("git_remote", self.current_remote)
branch = component.get("branch", self.branch)
self.modules_repo = ModulesRepo(remote_url, branch)
component = component["name"]

self.component = component
if updated is None:
updated = []
Expand Down Expand Up @@ -868,7 +878,17 @@ def get_components_to_update(self, component):
if self.component_type == "modules":
# All subworkflow names in the installed_by section of a module are subworkflows using this module
# We need to update them too
subworkflows_to_update = [subworkflow for subworkflow in installed_by if subworkflow != self.component_type]
git_remote = self.current_remote
for subworkflow in installed_by:
if subworkflow != component:
for remote_url, content in mods_json["repos"].items():
if all_subworkflows := content.get("subworkflows"):
for _, details in all_subworkflows.items():
if subworkflow in details:
git_remote = remote_url
if subworkflow != self.component_type:
subworkflows_to_update.append({"name": subworkflow, "git_remote": git_remote})

elif self.component_type == "subworkflows":
for repo, repo_content in mods_json["repos"].items():
for component_type, dir_content in repo_content.items():
Expand All @@ -879,9 +899,9 @@ def get_components_to_update(self, component):
# We need to update it too
if component in comp_content["installed_by"]:
if component_type == "modules":
modules_to_update.append(comp)
modules_to_update.append({"name": comp, "git_remote": repo, "org_path": dir})
elif component_type == "subworkflows":
subworkflows_to_update.append(comp)
subworkflows_to_update.append({"name": comp, "git_remote": repo, "org_path": dir})

return modules_to_update, subworkflows_to_update

Expand All @@ -896,7 +916,7 @@ def update_linked_components(
Update modules and subworkflows linked to the component being updated.
"""
for s_update in subworkflows_to_update:
if s_update in updated:
if s_update["name"] in updated:
continue
original_component_type, original_update_all = self._change_component_type("subworkflows")
self.update(
Expand All @@ -908,7 +928,7 @@ def update_linked_components(
self._reset_component_type(original_component_type, original_update_all)

for m_update in modules_to_update:
if m_update in updated:
if m_update["name"] in updated:
continue
original_component_type, original_update_all = self._change_component_type("modules")
try:
Expand All @@ -931,28 +951,44 @@ def update_linked_components(
def manage_changes_in_linked_components(self, component, modules_to_update, subworkflows_to_update):
"""Check for linked components added or removed in the new subworkflow version"""
if self.component_type == "subworkflows":
subworkflow_directory = Path(self.directory, self.component_type, self.modules_repo.repo_path, component)
org_path_match = re.search(r"(?:https://|git@)[\w\.]+[:/](.*?)/", self.current_remote)
if org_path_match:
org_path = org_path_match.group(1)

subworkflow_directory = Path(self.directory, self.component_type, org_path, component)
included_modules, included_subworkflows = get_components_to_install(subworkflow_directory)
# If a module/subworkflow has been removed from the subworkflow
for module in modules_to_update:
if module not in included_modules:
log.info(f"Removing module '{module}' which is not included in '{component}' anymore.")
module_name = module["name"]
included_modules_names = [m["name"] for m in included_modules]
if module_name not in included_modules_names:
log.info(f"Removing module '{module_name}' which is not included in '{component}' anymore.")
remove_module_object = ComponentRemove("modules", self.directory)
remove_module_object.remove(module, removed_by=component)
remove_module_object.remove(module_name, removed_by=component)
for subworkflow in subworkflows_to_update:
if subworkflow not in included_subworkflows:
log.info(f"Removing subworkflow '{subworkflow}' which is not included in '{component}' anymore.")
subworkflow_name = subworkflow["name"]
included_subworkflow_names = [m["name"] for m in included_subworkflows]
if subworkflow_name not in included_subworkflow_names:
log.info(
f"Removing subworkflow '{subworkflow_name}' which is not included in '{component}' anymore."
)
remove_subworkflow_object = ComponentRemove("subworkflows", self.directory)
remove_subworkflow_object.remove(subworkflow, removed_by=component)
remove_subworkflow_object.remove(subworkflow_name, removed_by=component)
# If a new module/subworkflow is included in the subworklfow and wasn't included before
for module in included_modules:
if module not in modules_to_update:
log.info(f"Installing newly included module '{module}' for '{component}'")
module_name = module["name"]
module["git_remote"] = module.get("git_remote", self.current_remote)
module["branch"] = module.get("branch", self.branch)
if module_name not in modules_to_update:
log.info(f"Installing newly included module '{module_name}' for '{component}'")
install_module_object = ComponentInstall(self.directory, "modules", installed_by=component)
install_module_object.install(module, silent=True)
for subworkflow in included_subworkflows:
if subworkflow not in subworkflows_to_update:
log.info(f"Installing newly included subworkflow '{subworkflow}' for '{component}'")
subworkflow_name = subworkflow["name"]
subworkflow["git_remote"] = subworkflow.get("git_remote", self.current_remote)
subworkflow["branch"] = subworkflow.get("branch", self.branch)
if subworkflow_name not in subworkflows_to_update:
log.info(f"Installing newly included subworkflow '{subworkflow_name}' for '{component}'")
install_subworkflow_object = ComponentInstall(
self.directory, "subworkflows", installed_by=component
)
Expand All @@ -971,3 +1007,5 @@ def _reset_component_type(self, original_component_type, original_update_all):
self.component_type = original_component_type
self.modules_json.pipeline_components = None
self.update_all = original_update_all
if self.current_remote is None:
self.current_remote = self.modules_repo.remote_url
28 changes: 20 additions & 8 deletions tests/subworkflows/test_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from ..test_subworkflows import TestSubworkflows
from ..utils import (
CROSS_ORGANIZATION_BRANCH,
CROSS_ORGANIZATION_URL,
GITLAB_BRANCH_TEST_BRANCH,
GITLAB_REPO,
Expand Down Expand Up @@ -87,16 +86,29 @@ def test_subworkflows_install_different_branch_fail(self):

def test_subworkflows_install_across_organizations(self):
"""Test installing a subworkflow with modules from different organizations"""
install_obj = SubworkflowInstall(
self.pipeline_dir, remote_url=CROSS_ORGANIZATION_URL, branch=CROSS_ORGANIZATION_BRANCH
)
# The hic_bwamem2 subworkflow contains modules from different organizations
install_obj.install("get_genome_annotation")
# The fastq_trim_fastp_fastqc subworkflow contains modules from different organizations
self.subworkflow_install_cross_org.install("fastq_trim_fastp_fastqc")
# Verify that the installed_by entry was added correctly
modules_json = ModulesJson(self.pipeline_dir)
mod_json = modules_json.get_modules_json()
assert mod_json["repos"][CROSS_ORGANIZATION_URL]["modules"]["nf-core-test"]["fastqc"]["installed_by"] == [
"fastq_trim_fastp_fastqc"
]

def test_subworkflow_install_with_same_module(self):
"""Test installing a subworkflow with a module from a different organization that is already installed from another org"""
# The fastq_trim_fastp_fastqc subworkflow contains the cross-org fastqc module, not the nf-core one
self.subworkflow_install_cross_org.install("fastq_trim_fastp_fastqc")
# Verify that the installed_by entry was added correctly
modules_json = ModulesJson(self.pipeline_dir)
mod_json = modules_json.get_modules_json()
assert mod_json["repos"][CROSS_ORGANIZATION_URL]["modules"]["jvfe"]["wget"]["installed_by"] == [
"get_genome_annotation"

assert mod_json["repos"]["https://github.com/nf-core/modules.git"]["modules"]["nf-core"]["fastqc"][
"installed_by"
] == ["modules"]

assert mod_json["repos"][CROSS_ORGANIZATION_URL]["modules"]["nf-core-test"]["fastqc"]["installed_by"] == [
"fastq_trim_fastp_fastqc"
]

def test_subworkflows_install_tracking(self):
Expand Down
25 changes: 25 additions & 0 deletions tests/subworkflows/test_remove.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pathlib import Path

from nf_core.modules.modules_json import ModulesJson
from tests.utils import CROSS_ORGANIZATION_URL

from ..test_subworkflows import TestSubworkflows

Expand Down Expand Up @@ -99,3 +100,27 @@ def test_subworkflows_remove_included_subworkflow(self):
assert Path.exists(samtools_index_path) is True
assert Path.exists(samtools_stats_path) is True
self.subworkflow_remove.remove("bam_sort_stats_samtools")

def test_subworkflows_remove_subworkflow_keep_installed_cross_org_module(self):
"""Test removing subworkflow and all it's dependencies after installing it, except for a separately installed module from another organisation"""
self.subworkflow_install_cross_org.install("fastq_trim_fastp_fastqc")
self.mods_install.install("fastqc")

subworkflow_path = Path(self.subworkflow_install.directory, "subworkflows", "jvfe")
fastq_trim_fastp_fastqc_path = Path(subworkflow_path, "fastq_trim_fastp_fastqc")
fastqc_path = Path(self.subworkflow_install.directory, "modules", "jvfe", "fastqc")
nfcore_fastqc_path = Path(self.subworkflow_install.directory, "modules", "nf-core", "fastqc")

mod_json_before = ModulesJson(self.pipeline_dir).get_modules_json()
assert self.subworkflow_remove_cross_org.remove("fastq_trim_fastp_fastqc")
mod_json_after = ModulesJson(self.pipeline_dir).get_modules_json()

assert Path.exists(fastq_trim_fastp_fastqc_path) is False
assert Path.exists(fastqc_path) is False
assert Path.exists(nfcore_fastqc_path) is True
assert mod_json_before != mod_json_after
# assert subworkflows key is removed from modules.json
assert CROSS_ORGANIZATION_URL not in mod_json_after["repos"].keys()
assert (
"fastqc" in mod_json_after["repos"]["https://github.com/nf-core/modules.git"]["modules"]["nf-core"].keys()
)
31 changes: 30 additions & 1 deletion tests/subworkflows/test_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@
from nf_core.components.components_utils import NF_CORE_MODULES_NAME, NF_CORE_MODULES_REMOTE
from nf_core.modules.modules_json import ModulesJson
from nf_core.modules.update import ModuleUpdate
from nf_core.subworkflows.install import SubworkflowInstall
from nf_core.subworkflows.update import SubworkflowUpdate

from ..test_subworkflows import TestSubworkflows
from ..utils import OLD_SUBWORKFLOWS_SHA, cmp_component
from ..utils import CROSS_ORGANIZATION_URL, OLD_SUBWORKFLOWS_SHA, cmp_component


class TestSubworkflowsUpdate(TestSubworkflows):
Expand Down Expand Up @@ -372,3 +373,31 @@ def test_update_change_of_included_modules(self):
assert "ensemblvep" not in mod_json["repos"][NF_CORE_MODULES_REMOTE]["modules"][NF_CORE_MODULES_NAME]
assert "ensemblvep/vep" in mod_json["repos"][NF_CORE_MODULES_REMOTE]["modules"][NF_CORE_MODULES_NAME]
assert Path(self.pipeline_dir, "modules", NF_CORE_MODULES_NAME, "ensemblvep/vep").is_dir()

def test_update_subworkflow_across_orgs(self):
"""Install and update a subworkflow with modules from different organizations"""
install_obj = SubworkflowInstall(
self.pipeline_dir,
remote_url=CROSS_ORGANIZATION_URL,
# Hash for an old version of fastq_trim_fastp_fastqc
# A dummy code change was made in order to have a commit to compare with
sha="9627f4367b11527194ef14473019d0e1a181b741",
)
# The fastq_trim_fastp_fastqc subworkflow contains the cross-org fastqc module, not the nf-core one
install_obj.install("fastq_trim_fastp_fastqc")

patch_path = Path(self.pipeline_dir, "fastq_trim_fastp_fastqc.patch")
update_obj = SubworkflowUpdate(
self.pipeline_dir,
remote_url=CROSS_ORGANIZATION_URL,
save_diff_fn=patch_path,
update_all=False,
update_deps=True,
show_diff=False,
)
assert update_obj.update("fastq_trim_fastp_fastqc") is True

with open(patch_path) as fh:
content = fh.read()
assert "- fastqc_raw_html = FASTQC_RAW.out.html" in content
assert "+ ch_fastqc_raw_html = FASTQC_RAW.out.html" in content
9 changes: 9 additions & 0 deletions tests/test_subworkflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import nf_core.subworkflows

from .utils import (
CROSS_ORGANIZATION_BRANCH,
CROSS_ORGANIZATION_URL,
GITLAB_SUBWORKFLOWS_BRANCH,
GITLAB_SUBWORKFLOWS_ORG_PATH_BRANCH,
GITLAB_URL,
Expand Down Expand Up @@ -103,10 +105,17 @@ def setUp(self):
force=False,
sha="8c343b3c8a0925949783dc547666007c245c235b",
)
self.subworkflow_install_cross_org = nf_core.subworkflows.SubworkflowInstall(
self.pipeline_dir, remote_url=CROSS_ORGANIZATION_URL, branch=CROSS_ORGANIZATION_BRANCH
)

self.mods_install = nf_core.modules.install.ModuleInstall(self.pipeline_dir, prompt=False, force=True)

# Set up remove objects
self.subworkflow_remove = nf_core.subworkflows.SubworkflowRemove(self.pipeline_dir)
self.subworkflow_remove_cross_org = nf_core.subworkflows.SubworkflowRemove(
self.pipeline_dir, remote_url=CROSS_ORGANIZATION_URL, branch=CROSS_ORGANIZATION_BRANCH
)

@pytest.fixture(autouse=True)
def _use_caplog(self, caplog):
Expand Down
Loading

0 comments on commit 864eb7f

Please sign in to comment.