Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
supernord authored Mar 4, 2024
2 parents 4325997 + c4021f9 commit 3e151aa
Show file tree
Hide file tree
Showing 16 changed files with 4,506 additions and 2,756 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/filter_communities.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ on:
# the workflow it triggered when all_tools_tsv is changed
push:
paths:
- 'results/all_tools.tsv'
- 'results/**'
branches: ["main"]

# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/static.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ on:
push:
paths:
- 'results/**'

branches: ["main"]

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
Expand Down
13 changes: 9 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ The tools performs the following steps:
- Creates an interactive table for all tools: [All tools](https://galaxyproject.github.io/galaxy_tool_metadata_extractor/)
- Creates an interactive table for all registered communities, e.g. [microGalaxy](https://galaxyproject.github.io/galaxy_tool_metadata_extractor/microgalaxy/)



# Usage

## Prepare environment
Expand Down Expand Up @@ -103,8 +101,6 @@ The script will generate a TSV file with each tool found in the list of GitHub r
[--keep <Path to to-keep tool file category file>]
```
## Add your community
In order to add your community you need to:
Expand All @@ -115,3 +111,12 @@ In order to add your community you need to:
- Make a pull request to add your community.
- The workflow will run every sunday, so on the next monday, your community table should be added to `results/<your community name>`
## Development
To make a test run of the tool to check its functionalities follow [Usage](#Usage) to set-up the environnement and the API key, then run
```bash
bash ./bin/extract_all_tools_test.sh test.list
```

This runs the tool, but only parses the test repository [Galaxy-Tool-Metadata-Extractor-Test-Wrapper](https://github.com/paulzierep/Galaxy-Tool-Metadata-Extractor-Test-Wrapper)
3 changes: 2 additions & 1 deletion bin/create_interactive_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pandas as pd

# TODO maybe allow comunities to modify
# TODO maybe allow communities to modify
COLUMNS = [
"Expand",
"Galaxy wrapper id",
Expand All @@ -18,6 +18,7 @@
"EDAM topic",
"Description",
"bio.tool description",
"biii",
"Status",
"Source",
"ToolShed categories",
Expand Down
33 changes: 24 additions & 9 deletions bin/extract_galaxy_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,11 @@ def get_tool_github_repositories(g: Github, RepoSelection: Optional[str], run_te
:param g: GitHub instance
:param RepoSelection: The selection to use from the repository (needed to split the process for CI jobs)
:run_test: for CI testing only use one repository
:run_test: for testing only parse the repository
"""

if run_test:
return ["https://github.com/TGAC/earlham-galaxytools"]
return ["https://github.com/paulzierep/Galaxy-Tool-Metadata-Extractor-Test-Wrapper"]

repo = g.get_user("galaxyproject").get_repo("planemo-monitor")
repo_list: List[str] = []
Expand Down Expand Up @@ -180,17 +180,20 @@ def get_shed_attribute(attrib: str, shed_content: Dict[str, Any], empty_value: A
return empty_value


def get_biotools(el: et.Element) -> Optional[str]:
def get_xref(el: et.Element, attrib_type: str) -> Optional[str]:
"""
Get bio.tools information
Get xref information
:param el: Element object
:attrib_type: the type of the xref (e.g.: bio.tools or biii)
"""

xrefs = el.find("xrefs")
if xrefs is not None:
xref = xrefs.find("xref")
if xref is not None and xref.attrib["type"] == "bio.tools":
return xref.text
xref_items = xrefs.findall("xref") # check all xref items
for xref in xref_items:
if xref is not None and xref.attrib["type"] == attrib_type:
return xref.text
return None


Expand Down Expand Up @@ -248,6 +251,7 @@ def get_tool_metadata(tool: ContentFile, repo: Repository) -> Optional[Dict[str,
"Galaxy tool ids": [],
"Description": None,
"bio.tool id": None,
"biii": None,
"bio.tool name": None,
"bio.tool description": None,
"EDAM operation": [],
Expand Down Expand Up @@ -296,9 +300,14 @@ def get_tool_metadata(tool: ContentFile, repo: Repository) -> Optional[Dict[str,
metadata["Galaxy wrapper version"] = child.text
elif child.attrib["name"] == "requirements":
metadata["Conda id"] = get_conda_package(child)
biotools = get_biotools(child)
# bio.tools
biotools = get_xref(child, attrib_type="bio.tools")
if biotools is not None:
metadata["bio.tool id"] = biotools
# biii
biii = get_xref(child, attrib_type="biii")
if biii is not None:
metadata["biii"] = biii
# parse XML file and get meta data from there, also tool ids
for file in file_list:
if file.name.endswith("xml") and "macro" not in file.name:
Expand All @@ -324,9 +333,15 @@ def get_tool_metadata(tool: ContentFile, repo: Repository) -> Optional[Dict[str,
metadata["Galaxy wrapper version"] = child.text
# bio.tools
if metadata["bio.tool id"] is None:
biotools = get_biotools(root)
biotools = get_xref(root, attrib_type="bio.tools")
if biotools is not None:
metadata["bio.tool id"] = biotools
# bio.tools
# biii
if metadata["biii"] is None:
biii = get_xref(root, attrib_type="biii")
if biii is not None:
metadata["biii"] = biii
# conda package
if metadata["Conda id"] is None:
reqs = get_conda_package(root)
Expand Down
Loading

0 comments on commit 3e151aa

Please sign in to comment.