Skip to content

Commit

Permalink
only use usegalaxy atm (#36)
Browse files Browse the repository at this point in the history
  • Loading branch information
paulzierep authored Jan 10, 2024
1 parent a1c7778 commit 0c02a2b
Show file tree
Hide file tree
Showing 2 changed files with 3,068 additions and 0 deletions.
52 changes: 52 additions & 0 deletions bin/extract_galaxy_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,56 @@
"https://usegalaxy.eu",
]

GALAXY_TOOL_STATS = {"https://usegalaxy.eu usage": "../data/usage_stats/tool_usage_per_user_2022_23_EU.csv"}

def get_last_url_position(toot_id: str) -> str:
"""
Returns the last url position of the toot_id, if the value is not a
url it returns the toot_id. So works for local and toolshed
installed tools.
:param tool_id: galaxy tool id
"""

toot_id = toot_id.split("/")[-1]
return toot_id


def add_tool_stats_to_tools(tools_df: pd.DataFrame, tool_stats_path: str, column_name: str) -> pd.DataFrame:
"""
Adds the usage statistics to the community tool table
:param tool_stats_path: path to the table with
the tool stats (csv,
must include "tool_name" and "count")
:param tools_path: path to the table with
the tools (csv,
must include "Galaxy wrapper id")
:param output_path: path to store the new table
:param column_name: column to add for the tool stats,
different columns could be added for the main servers
"""

# parse csvs
tool_stats_df = pd.read_csv(tool_stats_path)

# extract tool id
tool_stats_df["Galaxy wrapper id"] = tool_stats_df["tool_name"].apply(get_last_url_position)

# group local and toolshed tools into one entry
grouped_tool_stats_tools = tool_stats_df.groupby("Galaxy wrapper id", as_index=False)["count"].sum()

community_tool_stats = pd.merge(grouped_tool_stats_tools, tools_df, on="Galaxy wrapper id")
community_tool_stats.rename(columns={"count": column_name}, inplace=True)

return community_tool_stats


def add_usage_stats_for_all_server(tools_df):
for column, path in GALAXY_TOOL_STATS.items():
tools_df = add_tool_stats_to_tools(tools_df, path, column)
return tools_df


def read_file(filepath: Optional[str]) -> List[str]:
"""
Expand Down Expand Up @@ -453,6 +503,8 @@ def export_tools(tools: List[Dict], output_fp: str, format_list_col: bool = Fals
df["Galaxy tool ids"] = format_list_column(df["Galaxy tool ids"])
df = add_instances_to_table(df)

df = add_usage_stats_for_all_server(df)

df.to_csv(output_fp, sep="\t", index=False)


Expand Down
Loading

0 comments on commit 0c02a2b

Please sign in to comment.