From 9dfe539d074e4e1291f6a8fb7b2b2fafa4a02d99 Mon Sep 17 00:00:00 2001
From: Nicola Soranzo <nicola.soranzo@earlham.ac.uk>
Date: Tue, 31 Oct 2023 14:23:47 +0000
Subject: [PATCH] Fix type annotations + add `.isort.cfg`

---
 .isort.cfg                  | 11 +++++++++++
 bin/extract_galaxy_tools.py | 23 +++++++++++++++--------
 2 files changed, 26 insertions(+), 8 deletions(-)
 create mode 100644 .isort.cfg

diff --git a/.isort.cfg b/.isort.cfg
new file mode 100644
index 00000000..b7d1f8b2
--- /dev/null
+++ b/.isort.cfg
@@ -0,0 +1,11 @@
+[settings]
+combine_as_imports=true
+force_alphabetical_sort_within_sections=true
+# Override force_grid_wrap value from profile=black, but black is still happy
+force_grid_wrap=2
+# Same line length as for black
+line_length=120
+no_lines_before=LOCALFOLDER
+profile=black
+reverse_relative=true
+skip_gitignore=true
diff --git a/bin/extract_galaxy_tools.py b/bin/extract_galaxy_tools.py
index 3825c1b6..19694a0d 100644
--- a/bin/extract_galaxy_tools.py
+++ b/bin/extract_galaxy_tools.py
@@ -5,7 +5,12 @@
 import time
 import xml.etree.ElementTree as et
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+)
 
 import pandas as pd
 import requests
@@ -19,7 +24,7 @@
 BIOTOOLS_API_URL = "https://130.226.25.21"
 
 
-def read_file(filepath):
+def read_file(filepath) -> List[str]:
     """
     Read an optional file with 1 element per line
 
@@ -194,7 +199,9 @@ def get_tool_metadata(tool: ContentFile, repo: Repository) -> Optional[Dict[str,
         if metadata["ToolShed categories"] is None:
             metadata["ToolShed categories"] = []
     # find and parse macro file
-    for file in repo.get_contents(tool.path):
+    file_list = repo.get_contents(tool.path)
+    assert isinstance(file_list, list)
+    for file in file_list:
         if "macro" in file.name and file.name.endswith("xml"):
             file_content = get_string_content(file)
             root = et.fromstring(file_content)
@@ -208,7 +215,7 @@ def get_tool_metadata(tool: ContentFile, repo: Repository) -> Optional[Dict[str,
                     if biotools is not None:
                         metadata["bio.tool id"] = biotools
     # parse XML file and get meta data from there, also tool ids
-    for file in repo.get_contents(tool.path):
+    for file in file_list:
         if file.name.endswith("xml") and "macro" not in file.name:
             file_content = get_string_content(file)
             try:
@@ -272,7 +279,7 @@ def get_tool_metadata(tool: ContentFile, repo: Repository) -> Optional[Dict[str,
     return metadata
 
 
-def parse_tools(repo: Repository):
+def parse_tools(repo: Repository) -> List[Dict[str, Any]]:
     """
     Parse tools in a GitHub repository, extract them and their metadata
 
@@ -335,7 +342,7 @@ def format_list_column(col):
         return col
 
 
-def export_tools(tools: list, output_fp: str) -> None:
+def export_tools(tools: List[Dict], output_fp: str) -> None:
     """
     Export tool metadata to tsv output file
 
@@ -350,7 +357,7 @@ def export_tools(tools: list, output_fp: str) -> None:
     df.to_csv(output_fp, sep="\t", index=False)
 
 
-def filter_tools(tools, ts_cat: List[str], excluded_tools: List[str], keep_tools: List[str]):
+def filter_tools(tools: List[Dict], ts_cat: List[str], excluded_tools: List[str], keep_tools: List[str]) -> List[Dict]:
     """
     Filter tools for specific ToolShed categories and add information if to keep or to exclude
 
@@ -407,7 +414,7 @@ def filter_tools(tools, ts_cat: List[str], excluded_tools: List[str], keep_tools
         # get list of GitHub repositories to parse
         repo_list = get_tool_github_repositories(g)
         # parse tools in GitHub repositories to extract metada, filter by TS categories and export to output file
-        tools = []
+        tools: List[Dict] = []
         for r in repo_list:
             print(r)
             if "github" not in r: