Skip to content

Commit

Permalink
Merge branch 'galaxyproject:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
nomadscientist authored Jun 26, 2024
2 parents 558923e + 5d360b8 commit d9f327b
Show file tree
Hide file tree
Showing 14 changed files with 20,157 additions and 115 deletions.
110 changes: 56 additions & 54 deletions .github/workflows/run_tests.yaml
Original file line number Diff line number Diff line change
@@ -1,72 +1,74 @@
name: Run tests

on:
workflow_dispatch:

# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
# concurrency:
# group: "tools"
# cancel-in-progress: false

permissions:
contents: write
on: [push, pull_request]

jobs:
fetch-all-tools-stepwise:
test-tools:
runs-on: ubuntu-20.04
environment: fetch-tools
name: Fetch all tool stepwise
strategy:
matrix:
python-version: [3.8]
subset:
- test.list
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
python-version: '3.11'
- name: Install requirement
run: python -m pip install -r requirements.txt
- name: Test tool extraction
- name: Tool extraction
# run: bash bin/extract_all_tools.sh
run: |
bash ./bin/extract_all_tools_test.sh "${{ matrix.subset }}"
python bin/extract_galaxy_tools.py \
extractools \
--api $GITHUB_API_KEY \
--all-tools "results/test_tools.tsv" \
--all-tools-json "results/test_tools.json" \
--planemo-repository-list "test.list" \
--test
env:
GITHUB_API_KEY: ${{ secrets.GH_API_TOKEN }}
- name: Test tutorial extraction and filtering
- name: Tool filter
run: |
python bin/extract_galaxy_tools.py \
filtertools \
--tools "results/all_tools.json" \
--ts-filtered-tools "results/microgalaxy/tools_filtered_by_ts_categories.tsv" \
--filtered-tools "results/microgalaxy/tools.tsv" \
--categories "data/communities/microgalaxy/categories" \
--status "data/communities/microgalaxy/tool_status.tsv"
- name: Create interactive table
run: |
python bin/create_interactive_table.py \
--table "results/microgalaxy/tools.tsv" \
--template "data/interactive_table_template.html" \
--output "results/microgalaxy/index.html"
- name: Create wordcloud
run: |
python bin/create_wordcloud.py \
--table "results/microgalaxy/tools.tsv" \
--wordcloud_mask "data/usage_stats/wordcloud_mask.png" \
--output "results/microgalaxy/tools_wordcloud.png" \
--stats_column "No. of tool users (2022-2023) (usegalaxy.eu)"
test-tutorials:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install requirement
run: python -m pip install -r requirements.txt
- name: Tutorial extraction
run: |
bash ./bin/extract_filter_tutorial_test.sh
python bin/extract_gtn_tutorials.py \
extracttutorials \
--all_tutorials "results/test_tutorials.json" \
--tools "results/all_tools.json" \
--api $PLAUSIBLE_API_KEY \
--test
env:
PLAUSIBLE_API_KEY: ${{ secrets.PLAUSIBLE_API_TOKEN }}
- name: Commit all tools
# add or commit any changes in results if there was a change, merge with main and push as bot
run: |
git config user.name github-actions
git config user.email [email protected]
git pull --no-rebase -s recursive -X ours
git add results
git status
git diff --quiet && git diff --staged --quiet || (git commit -m "fetch all tools bot - step fetch")
git push
# fetch-all-tools-merge:
# runs-on: ubuntu-20.04
# needs: fetch-all-tools-stepwise
# name: Fetch all tools merge
# steps:
# - uses: actions/checkout@v4
# - uses: actions/setup-python@v5
# - name: Install requirement
# run: python -m pip install -r requirements.txt
# - name: Run script
# run: |
# cat results/repositories*.list_tools.tsv > results/all_tools.tsv
# bash ./bin/extract_all_tools_downstream.sh
# - name: Commit all tools
# run: |
# git config user.name github-actions
# git config user.email [email protected]
# git diff --quiet || (git add results && git commit -m "fetch all tools bot - step merge")
# git push
- name: Tutorial filtering
run: |
python bin/extract_gtn_tutorials.py \
filtertutorials \
--all_tutorials "results/test_tutorials.json" \
--filtered_tutorials "results/microgalaxy/test_tutorials.tsv" \
--tags "data/communities/microgalaxy/tutorial_tags"
15 changes: 0 additions & 15 deletions bin/extract_all_tools_test.sh

This file was deleted.

32 changes: 0 additions & 32 deletions bin/extract_filter_tutorials_test.sh

This file was deleted.

18 changes: 13 additions & 5 deletions bin/extract_galaxy_tools.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/usr/bin/env python

import argparse
import base64
import json
import sys
import time
import traceback
import xml.etree.ElementTree as et
from functools import lru_cache
from pathlib import Path
Expand Down Expand Up @@ -98,22 +98,28 @@ def get_string_content(cf: ContentFile) -> str:
:param cf: GitHub ContentFile object
"""

return base64.b64decode(cf.content).decode("utf-8")


def get_tool_github_repositories(
g: Github, repository_list: Optional[str], run_test: bool, add_extra_repositories: bool = True
g: Github,
repository_list: Optional[str],
run_test: bool,
test_repository: str = "https://github.com/paulzierep/Galaxy-Tool-Metadata-Extractor-Test-Wrapper",
add_extra_repositories: bool = True,
) -> List[str]:
"""
Get list of tool GitHub repositories to parse
:param g: GitHub instance
:param repository_list: The selection to use from the repository (needed to split the process for CI jobs)
:param run_test: for testing only parse the repository
:test_repository: the link to the test repository to use for the test
"""

if run_test:
return ["https://github.com/paulzierep/Galaxy-Tool-Metadata-Extractor-Test-Wrapper"]
return [test_repository]

repo = g.get_user("galaxyproject").get_repo("planemo-monitor")
repo_list: List[str] = []
Expand Down Expand Up @@ -318,11 +324,11 @@ def get_tool_metadata(tool: ContentFile, repo: Repository) -> Optional[Dict[str,
# parse XML file and get meta data from there
for file in file_list:
if file.name.endswith("xml") and "macro" not in file.name:
file_content = get_string_content(file)
try:
file_content = get_string_content(file)
root = et.fromstring(file_content)
except Exception:
print(file_content, sys.stderr)
print(traceback.format_exc())
else:
# version
if metadata["Galaxy wrapper version"] is None:
Expand Down Expand Up @@ -513,6 +519,7 @@ def export_tools_to_tsv(
:param output_fp: path to output file
:param format_list_col: boolean indicating if list columns should be formatting
"""

df = pd.DataFrame(tools).sort_values("Galaxy wrapper id")
if format_list_col:
df["ToolShed categories"] = shared_functions.format_list_column(df["ToolShed categories"])
Expand Down Expand Up @@ -698,6 +705,7 @@ def reduce_ontology_terms(terms: List, ontology: Any) -> List:
f"Error while extracting tools from repo {r}: {e}",
file=sys.stderr,
)
print(traceback.format_exc())

#######################################################
# add additional information to the List[Dict] object
Expand Down
22 changes: 13 additions & 9 deletions bin/extract_gtn_tutorials.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,19 @@ def get_youtube_stats(tuto: dict) -> None:
tuto["video_versions"] = 0
tuto["video_view"] = 0
ydl_opts = {"ignoreerrors": True, "quiet": True}
if "video_library" in tuto and tuto["video_library"]["tutorial"]:
tuto["video_versions"] = len(tuto["video_library"]["tutorial"]["versions"])
for v in tuto["video_library"]["tutorial"]["versions"]:
url = f"https://www.youtube.com/watch?v={v['link']}"
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
info = ydl.sanitize_info(info)
if info:
tuto["video_view"] += info["view_count"]
recordings = []
if "recordings" in tuto and tuto["recordings"]:
recordings = tuto["recordings"]
elif "slides_recordings" in tuto and tuto["slides_recordings"]:
recordings = tuto["slides_recordings"]
tuto["video_versions"] = len(recordings)
for v in recordings:
url = f"https://www.youtube.com/watch?v={v['youtube_id']}"
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
info = ydl.sanitize_info(info)
if info:
tuto["video_view"] += info["view_count"]


def format_tutorial(tuto: dict, edam_ontology: dict, tools: dict, feedback: dict, plausible_api: str) -> Dict:
Expand Down
5 changes: 5 additions & 0 deletions data/communities/assembly/categories
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Assembly
Fasta Manipulation
Fastq Manipulation
Nanopore

Loading

0 comments on commit d9f327b

Please sign in to comment.