From 60a4a260f7952a768e06788a1e80468d90bc4cab Mon Sep 17 00:00:00 2001 From: paulzierep Date: Thu, 13 Jun 2024 12:52:06 +0200 Subject: [PATCH] Fix tool filtering (#118) * fix wrong json merging * allow empty or no tools_status file --- .github/workflows/fetch_all_tools.yaml | 4 ++-- bin/extract_galaxy_tools.py | 16 ++++++++++++++-- bin/get_community_tools.sh | 2 ++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/.github/workflows/fetch_all_tools.yaml b/.github/workflows/fetch_all_tools.yaml index 218b3dfc..8e8ba610 100644 --- a/.github/workflows/fetch_all_tools.yaml +++ b/.github/workflows/fetch_all_tools.yaml @@ -77,9 +77,9 @@ jobs: python -m pip install -r requirements.txt sudo apt-get install jq - name: Merge all tools - run: | #merge files with only one header -> https://stackoverflow.com/questions/16890582/unixmerge-multiple-csv-files-with-same-header-by-keeping-the-header-of-the-firs + run: | #merge files with only one header -> https://stackoverflow.com/questions/16890582/unixmerge-multiple-csv-files-with-same-header-by-keeping-the-header-of-the-firs; map(.[]) -> https://stackoverflow.com/questions/42011086/merge-arrays-of-json (get flat array, one tool per entry) awk 'FNR==1 && NR!=1{next;}{print}' results/repositories*.list_tools.tsv > results/all_tools.tsv - jq -s '.' results/repositories*.list_tools.json > results/all_tools.json + jq -s 'map(.[])' results/repositories*.list_tools.json > results/all_tools.json - name: Wordcloud and interactive table run: | bash ./bin/extract_all_tools_downstream.sh diff --git a/bin/extract_galaxy_tools.py b/bin/extract_galaxy_tools.py index aa71d70a..631e5f9d 100644 --- a/bin/extract_galaxy_tools.py +++ b/bin/extract_galaxy_tools.py @@ -769,8 +769,20 @@ def reduce_ontology_terms(terms: List, ontology: Any) -> List: tools = json.load(f) # get categories and tools to exclude categories = read_file(args.categories) - status = pd.read_csv(args.status, sep="\t", index_col=0, header=None).to_dict("index") + try: + status = pd.read_csv(args.status, sep="\t", index_col=0, header=None).to_dict("index") + except Exception as ex: + print(f"Failed to load tool_status.tsv file with:\n{ex}") + print("Not assigning tool status for this community !") + status = {} + # filter tool lists ts_filtered_tools, filtered_tools = filter_tools(tools, categories, status) + export_tools_to_tsv(ts_filtered_tools, args.ts_filtered_tools, format_list_col=True) - export_tools_to_tsv(filtered_tools, args.filtered_tools, format_list_col=True) + + # if there are no filtered tools return the ts filtered tools + if filtered_tools: + export_tools_to_tsv(filtered_tools, args.filtered_tools, format_list_col=True) + else: + export_tools_to_tsv(ts_filtered_tools, args.filtered_tools, format_list_col=True) diff --git a/bin/get_community_tools.sh b/bin/get_community_tools.sh index a0566eef..efb03f9c 100755 --- a/bin/get_community_tools.sh +++ b/bin/get_community_tools.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +# stop on error +set -e for com_data_fp in data/communities/* ; do if [[ -d "$com_data_fp" && ! -L "$com_data_fp" ]]; then