From 1091c9b7bc8e017dc6eeb30188a25e0afbedeabd Mon Sep 17 00:00:00 2001 From: mboudet Date: Fri, 6 May 2022 10:10:30 +0200 Subject: [PATCH 1/6] Test after decoding feature id --- chado/client.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/chado/client.py b/chado/client.py index 51ea42c..a32ddbf 100644 --- a/chado/client.py +++ b/chado/client.py @@ -6,6 +6,7 @@ from __future__ import unicode_literals import re +from urllib.parse import unquote from chado.exceptions import RecordNotFoundError @@ -116,15 +117,22 @@ def _match_feature(self, feature_id, re_name, query_type, organism_id, skip_miss re_res = re.search(re_name, feature_id) if re_res: feature_id = re_res.group(1) + else: + re_res = re.search(re_name, unquote(feature_id)) + if re_res: + feature_id = re_res.group(1) cache_id = (feature_id, organism_id, seqterm) if cache_id not in self._feature_cache: - if skip_missing: - warn('Could not find feature with name "%s", skipping it', feature_id) - return None - else: - raise RecordNotFoundError('Could not find feature with name "%s"' % feature_id) + # Check after decoding + cache_id = (unquote(feature_id), organism_id, seqterm) + if cache_id not in self._feature_cache: + if skip_missing: + warn('Could not find feature with name "%s", skipping it', feature_id) + return None + else: + raise RecordNotFoundError('Could not find feature with name "%s"' % feature_id) return self._feature_cache[cache_id]['feature_id'] From d5f254437ba9df82ae8e9c4e14272f656bfafb66 Mon Sep 17 00:00:00 2001 From: mboudet Date: Fri, 6 May 2022 10:14:35 +0200 Subject: [PATCH 2/6] Blanket decode --- chado/client.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/chado/client.py b/chado/client.py index a32ddbf..9ae540e 100644 --- a/chado/client.py +++ b/chado/client.py @@ -112,27 +112,21 @@ def _init_feature_cache(self, organism_id, type_id=None, match_on_name=False, fo def _match_feature(self, feature_id, re_name, query_type, organism_id, skip_missing=False): seqterm = self.ci.get_cvterm_id(query_type, 'sequence') + feature_id = unquote(feature_id) if re_name: re_res = re.search(re_name, feature_id) if re_res: feature_id = re_res.group(1) - else: - re_res = re.search(re_name, unquote(feature_id)) - if re_res: - feature_id = re_res.group(1) cache_id = (feature_id, organism_id, seqterm) if cache_id not in self._feature_cache: - # Check after decoding - cache_id = (unquote(feature_id), organism_id, seqterm) - if cache_id not in self._feature_cache: - if skip_missing: - warn('Could not find feature with name "%s", skipping it', feature_id) - return None - else: - raise RecordNotFoundError('Could not find feature with name "%s"' % feature_id) + if skip_missing: + warn('Could not find feature with name "%s", skipping it', feature_id) + return None + else: + raise RecordNotFoundError('Could not find feature with name "%s"' % feature_id) return self._feature_cache[cache_id]['feature_id'] From 4f2a83d66ad8f68a7cca5dedfaadaa43126f3ef0 Mon Sep 17 00:00:00 2001 From: mboudet Date: Fri, 6 May 2022 10:14:35 +0200 Subject: [PATCH 3/6] Blanket decode --- chado/client.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/chado/client.py b/chado/client.py index a32ddbf..9ae540e 100644 --- a/chado/client.py +++ b/chado/client.py @@ -112,27 +112,21 @@ def _init_feature_cache(self, organism_id, type_id=None, match_on_name=False, fo def _match_feature(self, feature_id, re_name, query_type, organism_id, skip_missing=False): seqterm = self.ci.get_cvterm_id(query_type, 'sequence') + feature_id = unquote(feature_id) if re_name: re_res = re.search(re_name, feature_id) if re_res: feature_id = re_res.group(1) - else: - re_res = re.search(re_name, unquote(feature_id)) - if re_res: - feature_id = re_res.group(1) cache_id = (feature_id, organism_id, seqterm) if cache_id not in self._feature_cache: - # Check after decoding - cache_id = (unquote(feature_id), organism_id, seqterm) - if cache_id not in self._feature_cache: - if skip_missing: - warn('Could not find feature with name "%s", skipping it', feature_id) - return None - else: - raise RecordNotFoundError('Could not find feature with name "%s"' % feature_id) + if skip_missing: + warn('Could not find feature with name "%s", skipping it', feature_id) + return None + else: + raise RecordNotFoundError('Could not find feature with name "%s"' % feature_id) return self._feature_cache[cache_id]['feature_id'] From 92d96f1f48ea6e7a1571f906501dfc4426bbaaac Mon Sep 17 00:00:00 2001 From: mboudet Date: Fri, 6 May 2022 10:23:43 +0200 Subject: [PATCH 4/6] Actually do it more subtly --- chado/client.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/chado/client.py b/chado/client.py index 9ae540e..a7f7739 100644 --- a/chado/client.py +++ b/chado/client.py @@ -112,16 +112,27 @@ def _init_feature_cache(self, organism_id, type_id=None, match_on_name=False, fo def _match_feature(self, feature_id, re_name, query_type, organism_id, skip_missing=False): seqterm = self.ci.get_cvterm_id(query_type, 'sequence') - feature_id = unquote(feature_id) if re_name: re_res = re.search(re_name, feature_id) if re_res: feature_id = re_res.group(1) + else: + re_res = re.search(re_name, unquote(feature_id)) + if re_res: + feature_id = re_res.group(1) cache_id = (feature_id, organism_id, seqterm) if cache_id not in self._feature_cache: + # Check after decoding + cache_id = (unquote(feature_id), organism_id, seqterm) + if cache_id not in self._feature_cache: + if skip_missing: + warn('Could not find feature with name "%s", skipping it', feature_id) + return None + else: + raise RecordNotFoundError('Could not find feature with name "%s"' % feature_id) if skip_missing: warn('Could not find feature with name "%s", skipping it', feature_id) return None From 8a53bafcd31d71b09e398dc89c606e73fe1a0317 Mon Sep 17 00:00:00 2001 From: mboudet Date: Tue, 10 May 2022 09:05:33 +0200 Subject: [PATCH 5/6] typo --- chado/client.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/chado/client.py b/chado/client.py index a7f7739..a32ddbf 100644 --- a/chado/client.py +++ b/chado/client.py @@ -133,11 +133,6 @@ def _match_feature(self, feature_id, re_name, query_type, organism_id, skip_miss return None else: raise RecordNotFoundError('Could not find feature with name "%s"' % feature_id) - if skip_missing: - warn('Could not find feature with name "%s", skipping it', feature_id) - return None - else: - raise RecordNotFoundError('Could not find feature with name "%s"' % feature_id) return self._feature_cache[cache_id]['feature_id'] From c83db9e9d71dc4135a569c57df5e1312332de8c6 Mon Sep 17 00:00:00 2001 From: mboudet Date: Tue, 10 May 2022 10:40:13 +0200 Subject: [PATCH 6/6] Changelog & setup version --- README.md | 3 +++ setup.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 300755d..a9a5bbd 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,9 @@ $ chakin feature load_fasta \ ## History +- 2.3.9 + - URL decode GFF ids when loading blast/interpro/others + - 2.3.8 - Fix connection closed error when loading big interproscan files diff --git a/setup.py b/setup.py index 9aad9f2..bc2c4ed 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="chado", - version='2.3.8', + version='2.3.9', description="Chado library", author="Anthony Bretaudeau", author_email="anthony.bretaudeau@inrae.fr",