Skip to content

Commit

Permalink
Fix lint
Browse files Browse the repository at this point in the history
  • Loading branch information
bebatut committed Jun 24, 2024
1 parent 9ff0c0a commit 2d12340
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 108 deletions.
16 changes: 5 additions & 11 deletions bin/compare_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,9 @@

import argparse
from pathlib import Path
from typing import (
List,
Set,
)
from typing import Set

import pandas as pd

import shared_functions


Expand All @@ -34,13 +30,13 @@ def write_tool_list(tools: Set, fp: str) -> None:
"""
Write tool list with 1 element per row in a file
"""
tools = list(tools)
tools.sort()
tool_list = list(tools)
tool_list.sort()
with Path(fp).open("w") as f:
f.write("\n".join(tools))
f.write("\n".join(tool_list))


def update_excl_keep_tool_lists(tuto_tool_suites: Set, excl_tool_fp: str, keep_tool_fp: str) -> List:
def update_excl_keep_tool_lists(tuto_tool_suites: Set, excl_tool_fp: str, keep_tool_fp: str) -> None:
"""
Update the lists in to keep and exclude with tool suites in tutorials
"""
Expand Down Expand Up @@ -82,5 +78,3 @@ def update_excl_keep_tool_lists(tuto_tool_suites: Set, excl_tool_fp: str, keep_t

tuto_tools = get_tutorials_tool_suites(args.filtered_tutorials, args.all_tools)
update_excl_keep_tool_lists(tuto_tools, args.exclude, args.keep)


11 changes: 4 additions & 7 deletions bin/extract_galaxy_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,13 @@

import pandas as pd
import requests
import shared_functions
import yaml
from github import Github
from github.ContentFile import ContentFile
from github.Repository import Repository
from owlready2 import get_ontology

import shared_functions


# Config variables
BIOTOOLS_API_URL = "https://bio.tools"
# BIOTOOLS_API_URL = "https://130.226.25.21"
Expand Down Expand Up @@ -94,9 +92,6 @@ def get_tool_stats_from_stats_file(tool_stats_df: pd.DataFrame, tool_ids: List[s
return int(agg_count)





def get_string_content(cf: ContentFile) -> str:
"""
Get string of the content from a ContentFile
Expand Down Expand Up @@ -524,7 +519,9 @@ def export_tools_to_tsv(
df["EDAM operation"] = shared_functions.format_list_column(df["EDAM operation"])
df["EDAM topic"] = shared_functions.format_list_column(df["EDAM topic"])

df["EDAM operation (no superclasses)"] = shared_functions.format_list_column(df["EDAM operation (no superclasses)"])
df["EDAM operation (no superclasses)"] = shared_functions.format_list_column(
df["EDAM operation (no superclasses)"]
)
df["EDAM topic (no superclasses)"] = shared_functions.format_list_column(df["EDAM topic (no superclasses)"])

df["bio.tool ids"] = shared_functions.format_list_column(df["bio.tool ids"])
Expand Down
178 changes: 91 additions & 87 deletions bin/extract_gtn_tutorials.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
#!/usr/bin/env python

import argparse
from datetime import datetime, date
import requests
from datetime import (
date,
datetime,
)
from typing import (
Dict,
List,
)

import pandas as pd
from owlready2 import get_ontology
import yt_dlp

import requests
import shared_functions
import yt_dlp
from owlready2 import get_ontology


def get_request_json(url: str, headers: dict = None) -> dict:
def get_request_json(url: str, headers: dict) -> dict:
"""
Return JSON output using request
Expand Down Expand Up @@ -46,7 +48,7 @@ def get_short_tool_ids(tuto: dict) -> None:
Get tool ids without toolshed URL
"""
tuto["short_tools"] = set()
if "tools" in tuto:
if "tools" in tuto:
for tool in tuto["tools"]:
if "toolshed" in tool:
tuto["short_tools"].add(tool.split("/")[-2])
Expand All @@ -55,7 +57,7 @@ def get_short_tool_ids(tuto: dict) -> None:
tuto["short_tools"] = list(tuto["short_tools"])


def get_edam_topics(tuto: dict, edam_ontology) -> None:
def get_edam_topics(tuto: dict, edam_ontology: dict) -> None:
"""
Get EDAM topics instead of EDAM ids
"""
Expand Down Expand Up @@ -97,9 +99,7 @@ def get_visit_results(url: str, tuto: dict, plausible_api: str) -> None:
"""
Extract visit results from Plausible URL
"""
headers = {
'Authorization' : f"Bearer {plausible_api}"
}
headers = {"Authorization": f"Bearer {plausible_api}"}
results = get_request_json(url, headers)
if "results" in results:
for metric in ["visitors", "pageviews", "visit_duration"]:
Expand Down Expand Up @@ -128,10 +128,7 @@ def get_youtube_stats(tuto: dict) -> None:
"""
tuto["video_versions"] = 0
tuto["video_view"] = 0
ydl_opts = {
"ignoreerrors": True,
"quiet": True
}
ydl_opts = {"ignoreerrors": True, "quiet": True}
if "video_library" in tuto and tuto["video_library"]["tutorial"]:
tuto["video_versions"] = len(tuto["video_library"]["tutorial"]["versions"])
for v in tuto["video_library"]["tutorial"]["versions"]:
Expand All @@ -142,8 +139,8 @@ def get_youtube_stats(tuto: dict) -> None:
if info:
tuto["video_view"] += info["view_count"]

def format_tutorial(tuto: dict, edam_ontology, tools: dict, feedback: dict, plausible_api: str) -> None:

def format_tutorial(tuto: dict, edam_ontology: dict, tools: dict, feedback: dict, plausible_api: str) -> Dict:
tuto["url"] = f'https://training.galaxyproject.org/{tuto["url"]}'
tuto["mod_date"] = format_date(tuto["mod_date"])
tuto["pub_date"] = format_date(tuto["pub_date"])
Expand All @@ -161,8 +158,8 @@ def get_feedback_per_tutorials() -> Dict:
"""
Get feedback from GTN API and group per tutorial
"""
feedback = get_request_json("https://training.galaxyproject.org/training-material/api/feedback2.json")
feedback_per_tuto = {}
feedback = get_request_json("https://training.galaxyproject.org/training-material/api/feedback2.json", {})
feedback_per_tuto = {} # type: dict
for tutorials in feedback.values():
for tuto, feedback in tutorials.items():
for f in feedback:
Expand All @@ -174,19 +171,25 @@ def get_feedback_per_tutorials() -> Dict:
return feedback_per_tuto


def get_tutorials(tool_fp: str, plausible_api: str, run_test: bool,) -> List[Dict]:
def get_tutorials(
tool_fp: str,
plausible_api: str,
run_test: bool,
) -> List[Dict]:
"""
Extract training material from the GTN API, format them, extract EDAM operations from tools, feedback stats, view stats, etc
"""
tools = shared_functions.read_suite_per_tool_id(tool_fp)
tools = shared_functions.read_suite_per_tool_id(tool_fp)
feedback = get_feedback_per_tutorials()
edam_ontology = get_ontology("https://edamontology.org/EDAM_unstable.owl").load()
topics = get_request_json("https://training.galaxyproject.org/training-material/api/topics.json")
topics = get_request_json("https://training.galaxyproject.org/training-material/api/topics.json", {})
if run_test:
topics = ["microbiome"]
topics = {"microbiome": topics["microbiome"]}
tutos = []
for topic in topics:
topic_information = get_request_json(f"https://training.galaxyproject.org/training-material/api/topics/{topic}.json")
topic_information = get_request_json(
f"https://training.galaxyproject.org/training-material/api/topics/{topic}.json", {}
)
for tuto in topic_information["materials"]:
if tuto is None:
continue
Expand All @@ -195,7 +198,7 @@ def get_tutorials(tool_fp: str, plausible_api: str, run_test: bool,) -> List[Dic
return tutos


def filter_tutorials(tutorials: List[Dict], tags: List) -> List[Dict]:
def filter_tutorials(tutorials: dict, tags: List) -> List:
"""
Filter training based on a list of tags
"""
Expand All @@ -211,72 +214,73 @@ def filter_tutorials(tutorials: List[Dict], tags: List) -> List[Dict]:
return filtered_tutorials


def export_tutorials_to_tsv(tutorials: List[Dict], output_fp: str) -> None:
def export_tutorials_to_tsv(tutorials: list, output_fp: str) -> None:
"""
Export tutorials to a TSV file
"""
df = (pd.DataFrame(tutorials)
.assign(
Workflows=lambda df: df.workflows.notna(),
exact_supported_servers= lambda df: df.exact_supported_servers.fillna("").apply(list),
inexact_supported_servers= lambda df: df.inexact_supported_servers.fillna("").apply(list),
visit_duration= lambda df: df.visit_duration/60
)
df = pd.DataFrame(tutorials).assign(
Workflows=lambda df: df.workflows.notna(),
exact_supported_servers=lambda df: df.exact_supported_servers.fillna("").apply(list),
inexact_supported_servers=lambda df: df.inexact_supported_servers.fillna("").apply(list),
visit_duration=lambda df: df.visit_duration / 60,
)

for col in ["exact_supported_servers", "inexact_supported_servers", "short_tools", "edam_operation", "edam_topic"]:
df[col] = shared_functions.format_list_column(df[col])

df = (df
.rename(columns = {
"title": "Title",
"hands_on": "Tutorial",
"url": "Link",
"slides": "Slides",
"mod_date": "Last modification",
"pub_date": "Creation",
"version": "Version",
"short_tools": "Tools",
"exact_supported_servers": "Servers with precise tool versions",
"inexact_supported_servers": "Servers with tool but different versions",
"topic_name_human": "Topic",
"video": "Video",
"edam_topic": "EDAM topic",
"edam_operation": "EDAM operation",
"feedback_number": "Feedback number",
"feedback_mean_note": "Feedback mean note",
"visitors": "Visitors",
"pageviews": "Page views",
"visit_duration": "Visit duration",
"video_versions": "Video versions",
"video_view": "Video views"
})

df = (
df.rename(
columns={
"title": "Title",
"hands_on": "Tutorial",
"url": "Link",
"slides": "Slides",
"mod_date": "Last modification",
"pub_date": "Creation",
"version": "Version",
"short_tools": "Tools",
"exact_supported_servers": "Servers with precise tool versions",
"inexact_supported_servers": "Servers with tool but different versions",
"topic_name_human": "Topic",
"video": "Video",
"edam_topic": "EDAM topic",
"edam_operation": "EDAM operation",
"feedback_number": "Feedback number",
"feedback_mean_note": "Feedback mean note",
"visitors": "Visitors",
"pageviews": "Page views",
"visit_duration": "Visit duration",
"video_versions": "Video versions",
"video_view": "Video views",
}
)
.fillna("")
.reindex(columns = [
"Topic",
"Title",
"Link",
"EDAM topic",
"EDAM operation",
"Creation",
"Last modification",
"Version",
"Tutorial",
"Slides",
"Video",
"Workflows",
"Tools",
"Servers with precise tool versions",
"Servers with tool but different versions",
"Feedback number",
"Feedback mean note",
"Visitors",
"Page views",
"Visit duration",
"Video views"
])
.reindex(
columns=[
"Topic",
"Title",
"Link",
"EDAM topic",
"EDAM operation",
"Creation",
"Last modification",
"Version",
"Tutorial",
"Slides",
"Video",
"Workflows",
"Tools",
"Servers with precise tool versions",
"Servers with tool but different versions",
"Feedback number",
"Feedback mean note",
"Visitors",
"Page views",
"Visit duration",
"Video views",
]
)
)

df.to_csv(output_fp, sep="\t", index=False)


Expand All @@ -287,7 +291,9 @@ def export_tutorials_to_tsv(tutorials: List[Dict], output_fp: str) -> None:
subparser = parser.add_subparsers(dest="command")
# Extract tutorials
extracttutorials = subparser.add_parser("extracttutorials", help="Extract all training materials")
extracttutorials.add_argument("--all_tutorials", "-o", required=True, help="Filepath to JSON with all extracted training materials")
extracttutorials.add_argument(
"--all_tutorials", "-o", required=True, help="Filepath to JSON with all extracted training materials"
)
extracttutorials.add_argument(
"--tools",
"-t",
Expand Down Expand Up @@ -330,11 +336,9 @@ def export_tutorials_to_tsv(tutorials: List[Dict], output_fp: str) -> None:
shared_functions.export_to_json(tutorials, args.all_tutorials)

elif args.command == "filtertutorials":
tutorials = shared_functions.load_json(args.all_tutorials)
all_tutorials = shared_functions.load_json(args.all_tutorials)
# get categories and training to exclude
tags = shared_functions.read_file(args.tags)
# filter training lists
filtered_tutorials = filter_tutorials(tutorials, tags)
filtered_tutorials = filter_tutorials(all_tutorials, tags)
export_tutorials_to_tsv(filtered_tutorials, args.filtered_tutorials)


6 changes: 3 additions & 3 deletions bin/shared_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@ def export_to_json(data: List[Dict], output_fp: str) -> None:
json.dump(data, f, indent=4, sort_keys=True)


def load_json(input_df: str):
def load_json(input_df: str) -> Dict:
"""
Read a JSON file
"""
with Path(input_df).open("r") as t:
content = json.load(t)
content = json.load(t)
return content


Expand All @@ -64,4 +64,4 @@ def read_suite_per_tool_id(tool_fp: str) -> Dict:
"Galaxy wrapper owner": suite["Galaxy wrapper id"],
"EDAM operation": suite["EDAM operation"],
}
return tools
return tools

0 comments on commit 2d12340

Please sign in to comment.