Skip to content

Commit

Permalink
Write extracted tools only once
Browse files Browse the repository at this point in the history
Also:
- Use `.extend()` instead of `+=` (slightly faster)
- Write error messages to `sys.stderr`
  • Loading branch information
nsoranzo committed Nov 1, 2023
1 parent e71310a commit 1bfbc2d
Showing 1 changed file with 10 additions and 7 deletions.
17 changes: 10 additions & 7 deletions bin/extract_galaxy_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import argparse
import base64
import sys
import time
import xml.etree.ElementTree as et
from pathlib import Path
Expand Down Expand Up @@ -60,7 +61,7 @@ def get_tool_github_repositories(g: Github) -> List[str]:
for i in range(1, 5):
repo_f = repo.get_contents(f"repositories0{i}.list")
repo_l = get_string_content(repo_f).rstrip()
repo_list += repo_l.split("\n")
repo_list.extend(repo_l.split("\n"))
return repo_list


Expand Down Expand Up @@ -221,7 +222,7 @@ def get_tool_metadata(tool: ContentFile, repo: Repository) -> Optional[Dict[str,
try:
root = et.fromstring(file_content)
except Exception:
print(file_content)
print(file_content, sys.stderr)
else:
# version
if metadata["Galaxy wrapper version"] is None:
Expand Down Expand Up @@ -293,7 +294,7 @@ def parse_tools(repo: Repository) -> List[Dict[str, Any]]:
try:
repo_tools = repo.get_contents("wrappers")
except Exception:
print("No tool folder found")
print("No tool folder found", sys.stderr)
return []
assert isinstance(repo_tools, list)
tool_folders.append(repo_tools)
Expand Down Expand Up @@ -418,10 +419,12 @@ def filter_tools(tools: List[Dict], ts_cat: List[str], excluded_tools: List[str]
print(r)
if "github" not in r:
continue
repo = get_github_repo(r, g)
tools += parse_tools(repo)
export_tools(tools, args.all_tools, format_list_col=True)
print()
try:
repo = get_github_repo(r, g)
tools.extend(parse_tools(repo))
except Exception as e:
print(f"Error while extracting tools from repo {r}: {e}", file=sys.stderr)
export_tools(tools, args.all_tools, format_list_col=True)
elif args.command == "filtertools":
tools = pd.read_csv(Path(args.tools), sep="\t", keep_default_na=False).to_dict("records")
# get categories and tools to exclude
Expand Down

0 comments on commit 1bfbc2d

Please sign in to comment.