-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update stats #78
Update stats #78
Changes from 2 commits
7b065a5
2c416c7
39540e0
27a26a6
92204fe
1d64d30
7fc8e15
07e072f
65be0b3
770d62f
8e4c152
bc366c9
e45242d
922d19d
05e6280
5c96e05
3d3edea
ea0499b
8ca74d3
5c5dd64
7d95919
97c0808
910a95f
136016f
3583806
c78bb38
7749357
725a5dc
a4f5666
fdecb33
6518e14
859e9b4
0de4d1e
f8839eb
5ed1176
b139c9d
bceb24e
aa160c0
8669668
98312ad
bffbc23
dfec6be
d30da25
9b13613
c7f1d95
660ab9c
6be6058
50bd451
c7acc06
a16d3f5
93db1e7
773f361
6a8db1d
24899a6
c680879
7b2ed06
b5023c6
22eb17e
97c5299
da19f3b
ecd9ec6
33a39f4
fd11c1b
3ea51b9
af02011
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -23,18 +23,45 @@ | |||||||||||||||||
from github.ContentFile import ContentFile | ||||||||||||||||||
from github.Repository import Repository | ||||||||||||||||||
|
||||||||||||||||||
COLUMN_ORDER = [ | ||||||||||||||||||
"Galaxy wrapper id", | ||||||||||||||||||
"Galaxy tool ids", | ||||||||||||||||||
"No. tools in the suite", | ||||||||||||||||||
"Description", | ||||||||||||||||||
"bio.tool id", | ||||||||||||||||||
"bio.tool ids", | ||||||||||||||||||
"bio.tool name", | ||||||||||||||||||
"biii", | ||||||||||||||||||
"bio.tool description", | ||||||||||||||||||
"EDAM operation", | ||||||||||||||||||
"EDAM topic", | ||||||||||||||||||
"Conda id", | ||||||||||||||||||
"Conda version", | ||||||||||||||||||
"Galaxy wrapper version", | ||||||||||||||||||
"Status", | ||||||||||||||||||
"ToolShed categories", | ||||||||||||||||||
"ToolShed id", | ||||||||||||||||||
"Source", | ||||||||||||||||||
"Galaxy wrapper owner", | ||||||||||||||||||
"Galaxy wrapper source", | ||||||||||||||||||
"Galaxy wrapper parsed folder", | ||||||||||||||||||
"Galaxy Star Availability", | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why having a separate column here? |
||||||||||||||||||
"All Server Availability", | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe "Public servers with at least one tool" |
||||||||||||||||||
"Tools available on: UseGalaxy.org", | ||||||||||||||||||
"Tools available on: UseGalaxy.org.au", | ||||||||||||||||||
"Tools available on: UseGalaxy.eu", | ||||||||||||||||||
"Tools available on: UseGalaxy.org.fr", | ||||||||||||||||||
Comment on lines
+50
to
+53
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
"No. of tool users (2022-2023) (usegalaxy.eu)", | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
"Total tool usage (usegalaxy.eu)", | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
] | ||||||||||||||||||
|
||||||||||||||||||
|
||||||||||||||||||
# Config variables | ||||||||||||||||||
BIOTOOLS_API_URL = "https://bio.tools" | ||||||||||||||||||
# BIOTOOLS_API_URL = "https://130.226.25.21" | ||||||||||||||||||
|
||||||||||||||||||
# GALAXY_SERVER_URLS = [ | ||||||||||||||||||
# "https://usegalaxy.org", | ||||||||||||||||||
# "https://usegalaxy.org.au", | ||||||||||||||||||
# "https://usegalaxy.eu", | ||||||||||||||||||
# "https://usegalaxy.fr", | ||||||||||||||||||
# ] | ||||||||||||||||||
|
||||||||||||||||||
GALAXY_SERVER_URLS = { | ||||||||||||||||||
USEGALAXY_STAR_SERVER_URLS = { | ||||||||||||||||||
"UseGalaxy.org": "https://usegalaxy.org", | ||||||||||||||||||
"UseGalaxy.org.au": "https://usegalaxy.org.au", | ||||||||||||||||||
"UseGalaxy.eu": "https://usegalaxy.eu", | ||||||||||||||||||
|
@@ -632,7 +659,7 @@ def aggregate_servers(df: pd.DataFrame, server_names: list, column_name: str) -> | |||||||||||||||||
|
||||||||||||||||||
def extract_public_galaxy_servers_tools() -> Dict: | ||||||||||||||||||
""" | ||||||||||||||||||
Extract the tools from the public Galaxy servers using their API -> this is actually done | ||||||||||||||||||
Extract the tools from the public Galaxy servers using their API -> this is actually done in | ||||||||||||||||||
galaxy_tool_extractor/data/usage_stats/get_public_galaxy_servers.py | ||||||||||||||||||
Here we only load the list -> much faster | ||||||||||||||||||
TODO: run get_public_galaxy_servers.py as CI | ||||||||||||||||||
|
@@ -651,6 +678,14 @@ def format_list_column(col: pd.Series) -> pd.Series: | |||||||||||||||||
return col.apply(lambda x: ", ".join(str(i) for i in x)) | ||||||||||||||||||
|
||||||||||||||||||
|
||||||||||||||||||
def order_output_columns(df: pd.DataFrame) -> pd.DataFrame: | ||||||||||||||||||
""" | ||||||||||||||||||
Reorder the columns based on best fitted output | ||||||||||||||||||
""" | ||||||||||||||||||
df = df.reindex(columns=COLUMN_ORDER) | ||||||||||||||||||
return df | ||||||||||||||||||
|
||||||||||||||||||
|
||||||||||||||||||
def export_tools( | ||||||||||||||||||
tools: List[Dict], output_fp: str, format_list_col: bool = False, add_usage_stats: bool = False | ||||||||||||||||||
) -> None: | ||||||||||||||||||
|
@@ -673,12 +708,17 @@ def export_tools( | |||||||||||||||||
df["Galaxy tool ids"] = format_list_column(df["Galaxy tool ids"]) | ||||||||||||||||||
|
||||||||||||||||||
# add availability of star servers | ||||||||||||||||||
df = add_instances_to_table(df, GALAXY_SERVER_URLS) | ||||||||||||||||||
df = aggregate_servers(df, list(GALAXY_SERVER_URLS.keys()), column_name="Galaxy Star Availability") | ||||||||||||||||||
df = add_instances_to_table(df, USEGALAXY_STAR_SERVER_URLS) | ||||||||||||||||||
df = aggregate_servers(df, list(USEGALAXY_STAR_SERVER_URLS.keys()), column_name="Galaxy Star Availability") | ||||||||||||||||||
|
||||||||||||||||||
# rename the the columns for each server | ||||||||||||||||||
server_reindex_columns = {f"Tools available on: {k}": v for k, v in USEGALAXY_STAR_SERVER_URLS.items()} | ||||||||||||||||||
df = df.rename(columns=server_reindex_columns) | ||||||||||||||||||
|
||||||||||||||||||
print(df) | ||||||||||||||||||
|
||||||||||||||||||
# add availability of all servers star servers | ||||||||||||||||||
# only add the aggregated column | ||||||||||||||||||
|
||||||||||||||||||
server_list = extract_public_galaxy_servers_tools() | ||||||||||||||||||
|
||||||||||||||||||
df_selection = df.loc[:, ["Galaxy wrapper id", "Galaxy tool ids"]].copy() | ||||||||||||||||||
|
@@ -689,6 +729,8 @@ def export_tools( | |||||||||||||||||
if add_usage_stats: | ||||||||||||||||||
df = add_usage_stats_for_all_server(df) | ||||||||||||||||||
|
||||||||||||||||||
df = order_output_columns(df) | ||||||||||||||||||
|
||||||||||||||||||
df.to_csv(output_fp, sep="\t", index=False) | ||||||||||||||||||
|
||||||||||||||||||
|
||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we not call it "Galaxy suite id"?