Skip to content

Commit

Permalink
Rename shared
Browse files Browse the repository at this point in the history
  • Loading branch information
bebatut committed Jul 8, 2024
1 parent 5d360b8 commit f91a42a
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 47 deletions.
8 changes: 4 additions & 4 deletions bin/compare_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
from typing import Set

import pandas as pd
import shared_functions
import shared


def get_tutorials_tool_suites(tuto_fp: str, tool_fp: str) -> Set:
"""
Get tool suite ids for all tools in tutorials
"""
tutorials = pd.read_csv(tuto_fp, sep="\t", keep_default_na=False).to_dict("records")
all_tools = shared_functions.read_suite_per_tool_id(tool_fp)
all_tools = shared.read_suite_per_tool_id(tool_fp)
print(all_tools)
tuto_tool_suites = set()
for tuto in tutorials:
Expand Down Expand Up @@ -41,10 +41,10 @@ def update_excl_keep_tool_lists(tuto_tool_suites: Set, excl_tool_fp: str, keep_t
Update the lists in to keep and exclude with tool suites in tutorials
"""
# exclude from the list of tools to exclude the tools that are in tutorials
excl_tools = set(shared_functions.read_file(excl_tool_fp)) - tuto_tool_suites
excl_tools = set(shared.read_file(excl_tool_fp)) - tuto_tool_suites
write_tool_list(excl_tools, excl_tool_fp)
# add from the list of tools to keep the tools that are in tutorials
keep_tools = set(shared_functions.read_file(keep_tool_fp)) | tuto_tool_suites
keep_tools = set(shared.read_file(keep_tool_fp)) | tuto_tool_suites
write_tool_list(keep_tools, keep_tool_fp)


Expand Down
20 changes: 9 additions & 11 deletions bin/extract_galaxy_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import pandas as pd
import requests
import shared_functions
import shared
import yaml
from github import Github
from github.ContentFile import ContentFile
Expand Down Expand Up @@ -522,19 +522,17 @@ def export_tools_to_tsv(

df = pd.DataFrame(tools).sort_values("Galaxy wrapper id")
if format_list_col:
df["ToolShed categories"] = shared_functions.format_list_column(df["ToolShed categories"])
df["EDAM operation"] = shared_functions.format_list_column(df["EDAM operation"])
df["EDAM topic"] = shared_functions.format_list_column(df["EDAM topic"])
df["ToolShed categories"] = shared.format_list_column(df["ToolShed categories"])
df["EDAM operation"] = shared.format_list_column(df["EDAM operation"])
df["EDAM topic"] = shared.format_list_column(df["EDAM topic"])

df["EDAM operation (no superclasses)"] = shared_functions.format_list_column(
df["EDAM operation (no superclasses)"]
)
df["EDAM topic (no superclasses)"] = shared_functions.format_list_column(df["EDAM topic (no superclasses)"])
df["EDAM operation (no superclasses)"] = shared.format_list_column(df["EDAM operation (no superclasses)"])
df["EDAM topic (no superclasses)"] = shared.format_list_column(df["EDAM topic (no superclasses)"])

df["bio.tool ids"] = shared_functions.format_list_column(df["bio.tool ids"])
df["bio.tool ids"] = shared.format_list_column(df["bio.tool ids"])

# the Galaxy tools need to be formatted for the add_instances_to_table to work
df["Galaxy tool ids"] = shared_functions.format_list_column(df["Galaxy tool ids"])
df["Galaxy tool ids"] = shared.format_list_column(df["Galaxy tool ids"])

# if add_usage_stats:
# df = add_usage_stats_for_all_server(df)
Expand Down Expand Up @@ -752,7 +750,7 @@ def reduce_ontology_terms(terms: List, ontology: Any) -> List:
with Path(args.tools).open() as f:
tools = json.load(f)
# get categories and tools to exclude
categories = shared_functions.read_file(args.categories)
categories = shared.read_file(args.categories)
try:
status = pd.read_csv(args.status, sep="\t", index_col=0, header=None).to_dict("index")
except Exception as ex:
Expand Down
45 changes: 13 additions & 32 deletions bin/extract_gtn_tutorials.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,18 @@
#!/usr/bin/env python

import argparse
from datetime import (
date,
datetime,
)
from datetime import date
from typing import (
Dict,
List,
)

import pandas as pd
import requests
import shared_functions
import shared
import yt_dlp
from owlready2 import get_ontology


def get_request_json(url: str, headers: dict) -> dict:
"""
Return JSON output using request
:param url: galaxy tool id
"""
r = requests.get(url, auth=None, headers=headers)
r.raise_for_status()
return r.json()


def format_date(date: str) -> str:
return datetime.fromisoformat(date).strftime("%Y-%m-%d")


def add_supported_servers(tuto: dict) -> None:
"""
Split supported_servers into 2 lists there
Expand Down Expand Up @@ -100,7 +81,7 @@ def get_visit_results(url: str, tuto: dict, plausible_api: str) -> None:
Extract visit results from Plausible URL
"""
headers = {"Authorization": f"Bearer {plausible_api}"}
results = get_request_json(url, headers)
results = shared.get_request_json(url, headers)
if "results" in results:
for metric in ["visitors", "pageviews", "visit_duration"]:
tuto[metric] += results["results"][metric]["value"]
Expand Down Expand Up @@ -146,8 +127,8 @@ def get_youtube_stats(tuto: dict) -> None:

def format_tutorial(tuto: dict, edam_ontology: dict, tools: dict, feedback: dict, plausible_api: str) -> Dict:
tuto["url"] = f'https://training.galaxyproject.org/{tuto["url"]}'
tuto["mod_date"] = format_date(tuto["mod_date"])
tuto["pub_date"] = format_date(tuto["pub_date"])
tuto["mod_date"] = shared.format_date(tuto["mod_date"])
tuto["pub_date"] = shared.format_date(tuto["pub_date"])
add_supported_servers(tuto)
get_short_tool_ids(tuto)
get_edam_topics(tuto, edam_ontology)
Expand All @@ -162,7 +143,7 @@ def get_feedback_per_tutorials() -> Dict:
"""
Get feedback from GTN API and group per tutorial
"""
feedback = get_request_json("https://training.galaxyproject.org/training-material/api/feedback2.json", {})
feedback = shared.get_request_json("https://training.galaxyproject.org/training-material/api/feedback2.json", {})
feedback_per_tuto = {} # type: dict
for tutorials in feedback.values():
for tuto, feedback in tutorials.items():
Expand All @@ -183,15 +164,15 @@ def get_tutorials(
"""
Extract training material from the GTN API, format them, extract EDAM operations from tools, feedback stats, view stats, etc
"""
tools = shared_functions.read_suite_per_tool_id(tool_fp)
tools = shared.read_suite_per_tool_id(tool_fp)
feedback = get_feedback_per_tutorials()
edam_ontology = get_ontology("https://edamontology.org/EDAM_unstable.owl").load()
topics = get_request_json("https://training.galaxyproject.org/training-material/api/topics.json", {})
topics = shared.get_request_json("https://training.galaxyproject.org/training-material/api/topics.json", {})
if run_test:
topics = {"microbiome": topics["microbiome"]}
tutos = []
for topic in topics:
topic_information = get_request_json(
topic_information = shared.get_request_json(
f"https://training.galaxyproject.org/training-material/api/topics/{topic}.json", {}
)
for tuto in topic_information["materials"]:
Expand Down Expand Up @@ -230,7 +211,7 @@ def export_tutorials_to_tsv(tutorials: list, output_fp: str) -> None:
)

for col in ["exact_supported_servers", "inexact_supported_servers", "short_tools", "edam_operation", "edam_topic"]:
df[col] = shared_functions.format_list_column(df[col])
df[col] = shared.format_list_column(df[col])

df = (
df.rename(
Expand Down Expand Up @@ -337,12 +318,12 @@ def export_tutorials_to_tsv(tutorials: list, output_fp: str) -> None:

if args.command == "extracttutorials":
tutorials = get_tutorials(args.tools, args.api, args.test)
shared_functions.export_to_json(tutorials, args.all_tutorials)
shared.export_to_json(tutorials, args.all_tutorials)

elif args.command == "filtertutorials":
all_tutorials = shared_functions.load_json(args.all_tutorials)
all_tutorials = shared.load_json(args.all_tutorials)
# get categories and training to exclude
tags = shared_functions.read_file(args.tags)
tags = shared.read_file(args.tags)
# filter training lists
filtered_tutorials = filter_tutorials(all_tutorials, tags)
export_tutorials_to_tsv(filtered_tutorials, args.filtered_tutorials)
17 changes: 17 additions & 0 deletions bin/shared_functions.py → bin/shared.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python

import json
from datetime import datetime
from pathlib import Path
from typing import (
Dict,
Expand All @@ -9,6 +10,7 @@
)

import pandas as pd
import requests


def format_list_column(col: pd.Series) -> pd.Series:
Expand Down Expand Up @@ -65,3 +67,18 @@ def read_suite_per_tool_id(tool_fp: str) -> Dict:
"EDAM operation": suite["EDAM operation"],
}
return tools


def get_request_json(url: str, headers: dict) -> dict:
"""
Return JSON output using request
:param url: galaxy tool id
"""
r = requests.get(url, auth=None, headers=headers)
r.raise_for_status()
return r.json()


def format_date(date: str) -> str:
return datetime.fromisoformat(date).strftime("%Y-%m-%d")

0 comments on commit f91a42a

Please sign in to comment.