From 50b496e6a63ac554f0e259c6dbfb2debfde4f8a8 Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Thu, 25 Feb 2021 22:00:29 -0500
Subject: [PATCH 01/21] Ajout fonctionnel de techniques avec plusieurs
 datasets.

---
 deslib/multi_datasets.py | 318 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 318 insertions(+)
 create mode 100644 deslib/multi_datasets.py

diff --git a/deslib/multi_datasets.py b/deslib/multi_datasets.py
new file mode 100644
index 0000000..150a1fe
--- /dev/null
+++ b/deslib/multi_datasets.py
@@ -0,0 +1,318 @@
+# coding=utf-8
+
+# Author: Rafael Menelau Oliveira e Cruz <rafaelmenelau@gmail.com>
+#
+# License: BSD 3 clause
+
+import copy
+import numpy as np
+from scipy.stats import mode
+from sklearn.utils.validation import (check_is_fitted, check_array)
+
+from deslib.base import BaseDS
+from deslib.dcs.base import BaseDCS
+from deslib.des.base import BaseDES
+from deslib.util.aggregation import (weighted_majority_voting_rule,
+                                     majority_voting_rule,
+                                     aggregate_proba_ensemble_weighted)
+from deslib.util.instance_hardness import hardness_region_competence
+
+# Créer à partir de KNORA-U
+class MultiDatasets(BaseDS):
+    def __init__(self, ds_classifier, pool_classifiers=None):
+        super(MultiDatasets, self).__init__(pool_classifiers)
+        self.ds_classifier = ds_classifier
+
+    def fit(self, X, y):
+        """
+        Parameters
+        ----------
+        X : array of shape (n_datasets, n_samples, n_features)
+            The input data.
+
+        y : array of shape (n_datasets, n_samples)
+            class labels of each example in X.
+        """
+        n_datasets = len(X)
+        self.ds_classifiers = []
+        for i in range(n_datasets):
+            ds_classifier = copy.deepcopy(self.ds_classifier)
+            ds_classifier.pool_classifiers = self.pool_classifiers[i]
+            ds_classifier.fit(X[i], y[i])
+            self.ds_classifiers.append(ds_classifier)
+        self._setup_label_encoder(y[0])
+
+    def predict(self, X):
+        merged_base_probabilities = []
+        merged_base_predictions = []
+        n_datasets = len(X)
+        for i in range(n_datasets):
+            base_probabilities, base_predictions = \
+                self._get_base_proba_and_pred(self.ds_classifiers[i], X[i])
+            merged_base_probabilities.append(base_probabilities)
+            merged_base_predictions.append(base_predictions)
+        
+        if merged_base_probabilities[0] is not None:
+            merged_base_probabilities = np.concatenate(
+                merged_base_probabilities, axis=1)
+        else:
+            merged_base_probabilities = None
+        merged_base_predictions = np.concatenate(
+                merged_base_predictions, axis=1)
+        
+        n_samples = len(X[0])
+        predicted_labels = np.empty(n_samples, dtype=np.intp)
+        
+        all_agree_vector = BaseDS._all_classifier_agree(merged_base_predictions)
+        ind_all_agree = np.where(all_agree_vector)[0]   
+        
+        # Since the predictions are always the same, get the predictions of the
+        # first base classifier.
+        if ind_all_agree.size:
+            predicted_labels[ind_all_agree] = merged_base_predictions[
+                ind_all_agree, 0]
+
+        # For the samples with disagreement, perform the dynamic selection
+        # steps. First step is to collect the samples with disagreement
+        # between base classifiers
+        ind_disagreement = np.where(~all_agree_vector)[0]
+        if ind_disagreement.size:
+            merged_left_base_predictions = []
+            merged_competences = []
+            for i in range(n_datasets):
+                ds_classifier = self.ds_classifiers[i]
+                X_DS = X[i][ind_disagreement, :]
+                base_probabilities, base_predictions = \
+                    self._get_base_proba_and_pred(ds_classifier, X[i])
+
+                # If the method is based on clustering and does not use IH there
+                # is no need to compute the Neighbors
+                if hasattr(self.ds_classifier, "clustering_") \
+                        and not ds_classifier.with_IH:
+                    distances = neighbors = None
+                else:
+                    # Then, we estimate the nearest neighbors for all samples
+                    # that we need to call DS routines
+                    distances, neighbors = \
+                        ds_classifier._get_region_competence(X_DS)
+
+                if ds_classifier.with_IH:
+                    raise ValueError("TODO: traiter avec tous les inputs.")
+                    ind_ds_classifier, predicted_labels, neighbors, \
+                    distances = \
+                        self._calculate_hardness_level(ds_classifier,
+                                                       ind_disagreement,
+                                                       predicted_labels,
+                                                       neighbors,
+                                                       distances)
+                else:
+                    # IH was not considered. So all samples with disagreement
+                    # are passed down to the DS algorithm
+                    ind_ds_classifier = np.arange(ind_disagreement.size)
+
+                # At this stage the samples which all base classifiers agrees or
+                # that are associated with low hardness were already classified.
+                # The remaining samples are now passed down to the DS techniques
+                # for classification.
+
+                #  First check whether there are still samples to be classified.
+                if ind_ds_classifier.size:
+                    # IF the DFP pruning is considered, calculate the DFP mask
+                    # for all samples in X
+                    DFP_mask = self._get_DFP_mask(
+                        ds_classifier, ind_ds_classifier, neighbors)
+
+                # Get the real indices_ of the samples that will be classified
+                # using a DS algorithm.
+                ind_ds_original_matrix = ind_disagreement[ind_ds_classifier]
+
+                if ds_classifier.needs_proba:
+                    selected_probabilities = base_probabilities[
+                        ind_ds_original_matrix]
+                else:
+                    selected_probabilities = None
+
+                competences = self._get_competences(
+                    ds_classifier,
+                    X_DS[ind_ds_classifier],
+                    base_predictions[ind_ds_original_matrix],
+                    selected_probabilities,
+                    neighbors=neighbors,
+                    distances=distances,
+                    DFP_mask=DFP_mask)
+
+                merged_competences.append(competences)
+                merged_left_base_predictions.append(base_predictions[
+                                                    ind_ds_original_matrix])
+            
+            merged_left_base_predictions = np.concatenate(
+                merged_left_base_predictions, axis=1)
+            merged_competences = np.concatenate(merged_competences, axis=1)
+            
+            if issubclass(type(self.ds_classifier), BaseDCS):
+                pred_ds = self._get_dcs_predicted_label(self.ds_classifier,
+                    merged_left_base_predictions, merged_competences)
+            elif issubclass(type(self.ds_classifier), BaseDES):
+                pred_ds = self._get_des_predicted_label(self.ds_classifier,
+                    merged_left_base_predictions, merged_competences)
+            
+            predicted_labels[ind_ds_original_matrix] = pred_ds
+        
+        return self.classes_.take(predicted_labels)
+
+    def predict_proba(self, X):
+        raise ValueError("Méthode incomplète!")
+
+    def _get_base_proba_and_pred(self, ds_classifier, X):
+        # Check if the DS model was trained
+        check_is_fitted(ds_classifier,
+                        ["DSEL_processed_", "DSEL_data_", "DSEL_target_"])
+
+        # Check if X is a valid input
+        X = check_array(X)
+        ds_classifier._check_num_features(X)
+
+        if self.needs_proba:
+            base_probabilities = ds_classifier._predict_proba_base(X)
+            base_predictions = base_probabilities.argmax(axis=2)
+        else:
+            base_probabilities = None
+            base_predictions = ds_classifier._predict_base(X)
+
+        return base_probabilities, base_predictions
+
+    def _calculate_hardness_level(self, ds_classifier, ind_disagreement,
+            predicted_labels, neighbors, distances):
+        """
+        This function exists so parameters represent all datasets.
+        This code has been copied, but "self" has been replaced by
+        "ds_classifier" because this function doesn't exist elsewhere.
+        """
+        self = ds_classifier
+        # if IH is used, calculate the hardness level associated with
+        # each sample
+        hardness = hardness_region_competence(neighbors,
+                                              self.DSEL_target_,
+                                              self.safe_k)
+
+        # Get the index associated with the easy and hard samples.
+        # Samples with low hardness are passed down to the knn
+        # classifier while samples with high hardness are passed down
+        # to the DS methods. So, here we split the samples that are
+        # passed to down to each stage by calculating their indices_.
+        easy_samples_mask = hardness < self.IH_rate
+        ind_knn_classifier = np.where(easy_samples_mask)[0]
+        ind_ds_classifier = np.where(~easy_samples_mask)[0]
+
+        if ind_knn_classifier.size:
+            # all samples with low hardness should be classified by
+            # the knn method here:
+            # First get the class associated with each neighbor
+            y_neighbors = self.DSEL_target_[
+                neighbors[ind_knn_classifier, :self.safe_k]]
+
+            # Accessing which samples in the original matrix are
+            # associated with the low instance hardness indices_. This
+            # is important since the low hardness indices
+            # ind_knn_classifier was estimated based on a subset
+            # of samples
+            ind_knn_original_matrix = ind_disagreement[ind_knn_classifier]
+            prediction_knn, _ = mode(y_neighbors, axis=1)
+            predicted_labels[
+                ind_knn_original_matrix] = prediction_knn.reshape(-1, )
+
+            # Remove from the neighbors and distance matrices the
+            # samples that were classified using the KNN
+            neighbors = np.delete(neighbors, ind_knn_classifier,axis=0)
+            distances = np.delete(distances, ind_knn_classifier,axis=0)
+
+        return ind_ds_classifier, predicted_labels, neighbors, distances
+
+    def _get_DFP_mask(self, ds_classifier, ind_ds_classifier, neighbors):
+        if ds_classifier.DFP:
+            DFP_mask = frienemy_pruning_preprocessed(
+                neighbors,
+                ds_classifier.DSEL_target_,
+                ds_classifier.DSEL_processed_)
+        else:
+            DFP_mask = np.ones(
+                (ind_ds_classifier.size, ds_classifier.n_classifiers_))
+
+    def _get_competences(self, ds_classifier, query, predictions, 
+                        probabilities=None, neighbors=None, distances=None,
+                        DFP_mask=None):
+        if query.ndim < 2:
+            query = query.reshape(1, -1)
+
+        if predictions.ndim < 2:
+            predictions = predictions.reshape(1, -1)
+
+        if query.shape[0] != predictions.shape[0]:
+            raise ValueError(
+                'The arrays query and predictions must have the same number'
+                ' of samples. query.shape is {}'
+                'and predictions.shape is {}'.format(query.shape,
+                                                     predictions.shape))
+
+        if self.needs_proba:
+            competences = ds_classifier.estimate_competence_from_proba(
+                query,
+                neighbors=neighbors,
+                distances=distances,
+                probabilities=probabilities)
+        else:
+            competences = ds_classifier.estimate_competence(
+                query,
+                neighbors=neighbors,
+                distances=distances,
+                predictions=predictions)
+
+        if self.DFP: competences = competences * DFP_mask
+
+        return competences
+
+    def _get_dcs_predicted_label(self, ds_classifier, predictions, competences):
+        """
+        This function exists so parameters represent all datasets.
+        This code has been copied, but "self" has been replaced by
+        "ds_classifier" because this function doesn't exist elsewhere.
+        """
+        self = ds_classifier
+        if self.selection_method != 'all':
+            # only one classifier is selected
+            clf_index = self.select(competences)
+            predicted_label = predictions[
+                np.arange(predictions.shape[0]), clf_index]
+        else:
+            # Selected ensemble of classifiers is combined using Majority
+            # Voting
+            indices = self.select(competences)
+            votes = np.ma.MaskedArray(predictions, ~indices)
+            predicted_label = majority_voting_rule(votes)
+
+        return predicted_label
+
+    def _get_des_predicted_label(self, ds_classifier, predictions, competences):
+        """
+        This function exists so parameters represent all datasets.
+        This code has been copied, but "self" has been replaced by
+        "ds_classifier" because this function doesn't exist elsewhere.
+        """
+        self = ds_classifier
+        if self.mode == "selection":
+            # The selected_classifiers matrix is used as a mask to remove
+            # the predictions of certain base classifiers.
+            selected_classifiers = ds_classifier.select(competences)
+            votes = np.ma.MaskedArray(predictions, ~selected_classifiers)
+            predicted_label = majority_voting_rule(votes)
+        elif self.mode == "weighting":
+            votes = np.atleast_2d(predictions)
+            predicted_label = weighted_majority_voting_rule(votes, competences,
+                np.arange(ds_classifier.n_classes_))
+        else:
+            selected_classifiers = ds_classifier.select(competences)
+            votes = np.ma.MaskedArray(predictions, ~selected_classifiers)
+            predicted_label = weighted_majority_voting_rule(votes, competences,
+                np.arange(ds_classifier.n_classes_))
+
+        return predicted_label

From e555206f76776b67797bae0fa3efa34ed4b6e82c Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Fri, 26 Feb 2021 19:41:48 -0500
Subject: [PATCH 02/21] Ajout de l'oracle avec plusieurs datasets.

---
 deslib/multi_datasets.py | 43 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 38 insertions(+), 5 deletions(-)

diff --git a/deslib/multi_datasets.py b/deslib/multi_datasets.py
index 150a1fe..f034487 100644
--- a/deslib/multi_datasets.py
+++ b/deslib/multi_datasets.py
@@ -12,6 +12,7 @@
 from deslib.base import BaseDS
 from deslib.dcs.base import BaseDCS
 from deslib.des.base import BaseDES
+from deslib.static.oracle import Oracle
 from deslib.util.aggregation import (weighted_majority_voting_rule,
                                      majority_voting_rule,
                                      aggregate_proba_ensemble_weighted)
@@ -42,16 +43,20 @@ class labels of each example in X.
             self.ds_classifiers.append(ds_classifier)
         self._setup_label_encoder(y[0])
 
-    def predict(self, X):
+    def predict(self, X, y=None):
+        if issubclass(type(self.ds_classifier), Oracle):
+            return self._predict_oracle(X, y)
+
         merged_base_probabilities = []
         merged_base_predictions = []
         n_datasets = len(X)
+
         for i in range(n_datasets):
             base_probabilities, base_predictions = \
                 self._get_base_proba_and_pred(self.ds_classifiers[i], X[i])
             merged_base_probabilities.append(base_probabilities)
             merged_base_predictions.append(base_predictions)
-        
+
         if merged_base_probabilities[0] is not None:
             merged_base_probabilities = np.concatenate(
                 merged_base_probabilities, axis=1)
@@ -59,13 +64,13 @@ def predict(self, X):
             merged_base_probabilities = None
         merged_base_predictions = np.concatenate(
                 merged_base_predictions, axis=1)
-        
+
         n_samples = len(X[0])
         predicted_labels = np.empty(n_samples, dtype=np.intp)
-        
+
         all_agree_vector = BaseDS._all_classifier_agree(merged_base_predictions)
         ind_all_agree = np.where(all_agree_vector)[0]   
-        
+
         # Since the predictions are always the same, get the predictions of the
         # first base classifier.
         if ind_all_agree.size:
@@ -160,6 +165,34 @@ def predict(self, X):
         
         return self.classes_.take(predicted_labels)
 
+    def _predict_oracle(self, X, y):
+        n_datasets = len(X)
+        predicted_labels = -np.ones(y.size, dtype=int)
+
+        for sample_index in range(len(y)):
+            predictions = []
+
+            for i in range(n_datasets):
+                classifier = self.ds_classifiers[i]
+                X[i] = check_array(X[i])
+                y = classifier.enc_.transform(y)
+                x_sample = X[i][sample_index]
+                y_sample = y[sample_index]
+
+                for clf in classifier.pool_classifiers_:
+                    predictions.append(clf.predict(x_sample.reshape(1, -1))[0])
+
+            for p in predictions:
+                # If one base classifier predicts the correct answer,
+                # consider as a correct prediction
+                if p == y_sample:
+                    p = int(p)
+                    predicted_labels[sample_index] = p
+                    break
+                predicted_labels[sample_index] = p
+
+        return self.classes_.take(predicted_labels)
+
     def predict_proba(self, X):
         raise ValueError("Méthode incomplète!")
 

From 0daac6af8252ef8a65831508693cfd7ef7eb2360 Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Fri, 26 Feb 2021 23:13:43 -0500
Subject: [PATCH 03/21] =?UTF-8?q?Ajout=20de=20commentaires,=20ds=5Fclassif?=
 =?UTF-8?q?iers=20remplac=C3=A9s=20par=20self.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 deslib/multi_datasets.py | 91 ++++++++++++++++++++++++++++++----------
 1 file changed, 68 insertions(+), 23 deletions(-)

diff --git a/deslib/multi_datasets.py b/deslib/multi_datasets.py
index f034487..7808f0d 100644
--- a/deslib/multi_datasets.py
+++ b/deslib/multi_datasets.py
@@ -20,12 +20,24 @@
 
 # Créer à partir de KNORA-U
 class MultiDatasets(BaseDS):
-    def __init__(self, ds_classifier, pool_classifiers=None):
+    def __init__(self, ds_classifier, pool_classifiers):
+        """
+        Parameters
+        ----------
+        ds_classifier : classifier from the library
+            The DS model serves as a template for all the datasets.
+
+        pool_classifiers : array of shape (n_datasets, n_classifiers)
+            Classifiers of each dataset.
+        """
         super(MultiDatasets, self).__init__(pool_classifiers)
         self.ds_classifier = ds_classifier
 
     def fit(self, X, y):
-        """
+        """Prepare the DS models by setting the KNN algorithm and
+        pre-processing the information required to apply the DS
+        methods
+
         Parameters
         ----------
         X : array of shape (n_datasets, n_samples, n_features)
@@ -36,17 +48,33 @@ class labels of each example in X.
         """
         n_datasets = len(X)
         self.ds_classifiers = []
+
         for i in range(n_datasets):
             ds_classifier = copy.deepcopy(self.ds_classifier)
             ds_classifier.pool_classifiers = self.pool_classifiers[i]
             ds_classifier.fit(X[i], y[i])
             self.ds_classifiers.append(ds_classifier)
+
         self._setup_label_encoder(y[0])
+        return self
 
     def predict(self, X, y=None):
+        """
+        Parameters
+        ----------
+        X : array of shape (n_datasets, n_samples, n_features)
+            The input data.
+
+        y : array of shape (n_datasets, n_samples)
+            class labels of each example in X.
+            It's added as a parameter so Oracle can be used.
+        """
+        # Oracle is not a DS model, so there is no need to execute
+        # the code below.
         if issubclass(type(self.ds_classifier), Oracle):
             return self._predict_oracle(X, y)
 
+        # TODO: move the code below in a function called _predict_ds(X).
         merged_base_probabilities = []
         merged_base_predictions = []
         n_datasets = len(X)
@@ -149,20 +177,20 @@ def predict(self, X, y=None):
                 merged_competences.append(competences)
                 merged_left_base_predictions.append(base_predictions[
                                                     ind_ds_original_matrix])
-            
+
             merged_left_base_predictions = np.concatenate(
                 merged_left_base_predictions, axis=1)
             merged_competences = np.concatenate(merged_competences, axis=1)
-            
+
             if issubclass(type(self.ds_classifier), BaseDCS):
                 pred_ds = self._get_dcs_predicted_label(self.ds_classifier,
                     merged_left_base_predictions, merged_competences)
             elif issubclass(type(self.ds_classifier), BaseDES):
                 pred_ds = self._get_des_predicted_label(self.ds_classifier,
                     merged_left_base_predictions, merged_competences)
-            
+
             predicted_labels[ind_ds_original_matrix] = pred_ds
-        
+
         return self.classes_.take(predicted_labels)
 
     def _predict_oracle(self, X, y):
@@ -197,31 +225,36 @@ def predict_proba(self, X):
         raise ValueError("Méthode incomplète!")
 
     def _get_base_proba_and_pred(self, ds_classifier, X):
+        """
+        This code has been copied, but "self" has been replaced by
+        "ds_classifier" because this function doesn't exist elsewhere.
+        """
+        self = ds_classifier
         # Check if the DS model was trained
-        check_is_fitted(ds_classifier,
+        check_is_fitted(self,
                         ["DSEL_processed_", "DSEL_data_", "DSEL_target_"])
 
         # Check if X is a valid input
         X = check_array(X)
-        ds_classifier._check_num_features(X)
+        self._check_num_features(X)
 
         if self.needs_proba:
-            base_probabilities = ds_classifier._predict_proba_base(X)
+            base_probabilities = self._predict_proba_base(X)
             base_predictions = base_probabilities.argmax(axis=2)
         else:
             base_probabilities = None
-            base_predictions = ds_classifier._predict_base(X)
+            base_predictions = self._predict_base(X)
 
         return base_probabilities, base_predictions
 
     def _calculate_hardness_level(self, ds_classifier, ind_disagreement,
             predicted_labels, neighbors, distances):
         """
-        This function exists so parameters represent all datasets.
         This code has been copied, but "self" has been replaced by
         "ds_classifier" because this function doesn't exist elsewhere.
         """
         self = ds_classifier
+
         # if IH is used, calculate the hardness level associated with
         # each sample
         hardness = hardness_region_competence(neighbors,
@@ -262,18 +295,30 @@ def _calculate_hardness_level(self, ds_classifier, ind_disagreement,
         return ind_ds_classifier, predicted_labels, neighbors, distances
 
     def _get_DFP_mask(self, ds_classifier, ind_ds_classifier, neighbors):
-        if ds_classifier.DFP:
+        """
+        This code has been copied, but "self" has been replaced by
+        "ds_classifier" because this function doesn't exist elsewhere.
+        """
+        self = ds_classifier
+
+        if self.DFP:
             DFP_mask = frienemy_pruning_preprocessed(
                 neighbors,
-                ds_classifier.DSEL_target_,
-                ds_classifier.DSEL_processed_)
+                self.DSEL_target_,
+                self.DSEL_processed_)
         else:
             DFP_mask = np.ones(
-                (ind_ds_classifier.size, ds_classifier.n_classifiers_))
+                (ind_ds_classifier.size, self.n_classifiers_))
 
     def _get_competences(self, ds_classifier, query, predictions, 
                         probabilities=None, neighbors=None, distances=None,
                         DFP_mask=None):
+        """
+        This code has been copied, but "self" has been replaced by
+        "ds_classifier" because this function doesn't exist elsewhere.
+        """
+        self = ds_classifier
+
         if query.ndim < 2:
             query = query.reshape(1, -1)
 
@@ -288,13 +333,13 @@ def _get_competences(self, ds_classifier, query, predictions,
                                                      predictions.shape))
 
         if self.needs_proba:
-            competences = ds_classifier.estimate_competence_from_proba(
+            competences = self.estimate_competence_from_proba(
                 query,
                 neighbors=neighbors,
                 distances=distances,
                 probabilities=probabilities)
         else:
-            competences = ds_classifier.estimate_competence(
+            competences = self.estimate_competence(
                 query,
                 neighbors=neighbors,
                 distances=distances,
@@ -306,11 +351,11 @@ def _get_competences(self, ds_classifier, query, predictions,
 
     def _get_dcs_predicted_label(self, ds_classifier, predictions, competences):
         """
-        This function exists so parameters represent all datasets.
         This code has been copied, but "self" has been replaced by
         "ds_classifier" because this function doesn't exist elsewhere.
         """
         self = ds_classifier
+
         if self.selection_method != 'all':
             # only one classifier is selected
             clf_index = self.select(competences)
@@ -327,25 +372,25 @@ def _get_dcs_predicted_label(self, ds_classifier, predictions, competences):
 
     def _get_des_predicted_label(self, ds_classifier, predictions, competences):
         """
-        This function exists so parameters represent all datasets.
         This code has been copied, but "self" has been replaced by
         "ds_classifier" because this function doesn't exist elsewhere.
         """
         self = ds_classifier
+
         if self.mode == "selection":
             # The selected_classifiers matrix is used as a mask to remove
             # the predictions of certain base classifiers.
-            selected_classifiers = ds_classifier.select(competences)
+            selected_classifiers = self.select(competences)
             votes = np.ma.MaskedArray(predictions, ~selected_classifiers)
             predicted_label = majority_voting_rule(votes)
         elif self.mode == "weighting":
             votes = np.atleast_2d(predictions)
             predicted_label = weighted_majority_voting_rule(votes, competences,
-                np.arange(ds_classifier.n_classes_))
+                np.arange(self.n_classes_))
         else:
-            selected_classifiers = ds_classifier.select(competences)
+            selected_classifiers = self.select(competences)
             votes = np.ma.MaskedArray(predictions, ~selected_classifiers)
             predicted_label = weighted_majority_voting_rule(votes, competences,
-                np.arange(ds_classifier.n_classes_))
+                np.arange(self.n_classes_))
 
         return predicted_label

From dc2d0500a814557f59ee8e5d54dca4354f3b2690 Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Wed, 3 Mar 2021 22:45:30 -0500
Subject: [PATCH 04/21] Ajout de StackedClassifier.

---
 deslib/multi_datasets.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/deslib/multi_datasets.py b/deslib/multi_datasets.py
index 7808f0d..923703f 100644
--- a/deslib/multi_datasets.py
+++ b/deslib/multi_datasets.py
@@ -13,6 +13,7 @@
 from deslib.dcs.base import BaseDCS
 from deslib.des.base import BaseDES
 from deslib.static.oracle import Oracle
+from deslib.static.stacked import StackedClassifier
 from deslib.util.aggregation import (weighted_majority_voting_rule,
                                      majority_voting_rule,
                                      aggregate_proba_ensemble_weighted)
@@ -73,8 +74,12 @@ class labels of each example in X.
         # the code below.
         if issubclass(type(self.ds_classifier), Oracle):
             return self._predict_oracle(X, y)
+        elif issubclass(type(self.ds_classifier), StackedClassifier):
+            return self._predict_stacked(X)
+        else:
+            return self._predict_ds(X)
 
-        # TODO: move the code below in a function called _predict_ds(X).
+    def _predict_ds(self, X):
         merged_base_probabilities = []
         merged_base_predictions = []
         n_datasets = len(X)
@@ -221,6 +226,24 @@ def _predict_oracle(self, X, y):
 
         return self.classes_.take(predicted_labels)
 
+    def _predict_stacked(self, X):
+        merged_base_preds = []
+        n_datasets = len(X)
+
+        for i in range(n_datasets):
+            classifier = self.ds_classifiers[i]
+            X[i] = check_array(X[i])
+            check_is_fitted(classifier, "meta_classifier_")
+            base_preds = classifier._predict_proba_base(X[i])
+            X_meta = classifier._connect_input(X[i], base_preds)
+            preds = classifier.meta_classifier_.predict_proba(X_meta)
+            merged_base_preds.append(preds)
+
+        merged_base_preds = np.sum(merged_base_preds,0)
+        preds = np.argmax(merged_base_preds,axis=1)
+
+        return self.classes_.take(preds)
+
     def predict_proba(self, X):
         raise ValueError("Méthode incomplète!")
 

From 795a19b2c195ef280107595eb19aa68dd1d61ed5 Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Thu, 4 Mar 2021 15:39:57 -0500
Subject: [PATCH 05/21] Fix de knorau avec multi-datasets.

---
 deslib/multi_datasets.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/deslib/multi_datasets.py b/deslib/multi_datasets.py
index 923703f..379642b 100644
--- a/deslib/multi_datasets.py
+++ b/deslib/multi_datasets.py
@@ -56,7 +56,10 @@ class labels of each example in X.
             ds_classifier.fit(X[i], y[i])
             self.ds_classifiers.append(ds_classifier)
 
+        one_classifier = self.ds_classifiers[0]
+        self.ds_classifier.n_classes_ = one_classifier.n_classes_
         self._setup_label_encoder(y[0])
+
         return self
 
     def predict(self, X, y=None):

From 742bffc7eba1e44a55dc67d4361557ba84ffbaad Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Thu, 11 Mar 2021 14:10:02 -0500
Subject: [PATCH 06/21] =?UTF-8?q?Distances=20ajout=C3=A9s=20dans=20qq=20te?=
 =?UTF-8?q?chniques,=20stats=20=C3=A0=201=20prim=20ajout=C3=A9es.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 deslib/base.py        |  37 ++++++++++---
 deslib/dcs/base.py    |   3 +-
 deslib/dcs/ola.py     |   3 +-
 deslib/des/base.py    |   6 ++-
 deslib/des/knop.py    |   4 +-
 deslib/des/knora_e.py |   4 +-
 deslib/des/knora_u.py |   4 +-
 deslib/util/stats.py  | 122 ++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 170 insertions(+), 13 deletions(-)
 create mode 100644 deslib/util/stats.py

diff --git a/deslib/base.py b/deslib/base.py
index eeefef8..9358ac4 100644
--- a/deslib/base.py
+++ b/deslib/base.py
@@ -16,7 +16,7 @@
 from sklearn.ensemble import BaseEnsemble, BaggingClassifier
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier
-from sklearn.preprocessing import LabelEncoder
+from sklearn.preprocessing import LabelEncoder, normalize
 from sklearn.utils.validation import (check_X_y, check_is_fitted, check_array,
                                       check_random_state)
 
@@ -24,6 +24,7 @@
 from deslib.util import faiss_knn_wrapper
 from deslib.util.dfp import frienemy_pruning_preprocessed
 from deslib.util.instance_hardness import hardness_region_competence
+from deslib.util.stats import stats
 
 
 class BaseDS(BaseEstimator, ClassifierMixin):
@@ -40,8 +41,8 @@ class BaseDS(BaseEstimator, ClassifierMixin):
     @abstractmethod
     def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                  safe_k=None, IH_rate=0.30, needs_proba=False,
-                 random_state=None, knn_classifier='knn', DSEL_perc=0.5,
-                 knne=False, n_jobs=-1):
+                 random_state=None, knn_classifier='knn',
+                 knn_metric='minkowski', DSEL_perc=0.5, knne=False, n_jobs=-1):
 
         self.pool_classifiers = pool_classifiers
         self.k = k
@@ -52,9 +53,11 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
         self.needs_proba = needs_proba
         self.random_state = random_state
         self.knn_classifier = knn_classifier
+        self.knn_metric = knn_metric
         self.DSEL_perc = DSEL_perc
         self.knne = knne
         self.n_jobs = n_jobs
+        self.stats = stats()
 
         # Check optional dependency
         if knn_classifier == 'faiss' and not faiss_knn_wrapper.is_available():
@@ -202,6 +205,7 @@ class labels of each example in X.
         self
         """
         self.random_state_ = check_random_state(self.random_state)
+        self.stats.true_labels = y
 
         # Check if the length of X and y are consistent.
         X, y = check_X_y(X, y)
@@ -244,7 +248,7 @@ class labels of each example in X.
 
         # validate the value of k
         self._validate_k()
-        self._set_region_of_competence_algorithm()
+        self._set_region_of_competence_algorithm(X_dsel)
         self._fit_region_competence(X_dsel, y_dsel)
 
         # validate the IH
@@ -315,6 +319,7 @@ def _fit_region_competence(self, X, y):
             class labels of each sample in X.
 
         """
+        if self.knn_metric == 'cosine': X = normalize(X)
         self.roc_algorithm_.fit(X, y)
 
     def _set_dsel(self, X, y):
@@ -337,17 +342,30 @@ class labels of each sample in X.
         self.n_samples_ = self.DSEL_target_.size
         self.DSEL_processed_, self.BKS_DSEL_ = self._preprocess_dsel()
 
-    def _set_region_of_competence_algorithm(self):
+    def _set_region_of_competence_algorithm(self, X):
+
+        algorithm = "auto"
+        metric = 'minkowski'
+        metric_params = None
+
+        if self.knn_metric == 'mahalanobis':
+            metric = 'mahalanobis'
+            metric_params = {'V': np.cov(X)}
+            algorithm = "brute"
 
         if self.knn_classifier is None or self.knn_classifier in ['knn',
                                                                   'sklearn']:
             knn_class = functools.partial(KNeighborsClassifier,
                                           n_jobs=self.n_jobs,
-                                          algorithm="auto")
+                                          algorithm=algorithm,
+                                          metric=metric,
+                                          metric_params=metric_params)
         elif self.knn_classifier == 'faiss':
             knn_class = functools.partial(
                 faiss_knn_wrapper.FaissKNNClassifier,
-                n_jobs=self.n_jobs, algorithm="brute")
+                n_jobs=self.n_jobs, algorithm="brute",
+                metric=self.knn_metric,
+                metric_params=metric_params)
         elif callable(self.knn_classifier):
             knn_class = self.knn_classifier
         else:
@@ -427,6 +445,7 @@ def predict(self, X):
             base_probabilities = None
             base_predictions = self._predict_base(X)
 
+        self.stats.bases_labels = base_predictions
         all_agree_vector = BaseDS._all_classifier_agree(base_predictions)
         ind_all_agree = np.where(all_agree_vector)[0]
 
@@ -435,6 +454,7 @@ def predict(self, X):
         if ind_all_agree.size:
             predicted_labels[ind_all_agree] = base_predictions[
                 ind_all_agree, 0]
+            self.stats.agree_ind = ind_all_agree
 
         # For the samples with disagreement, perform the dynamic selection
         # steps. First step is to collect the samples with disagreement
@@ -534,6 +554,9 @@ def predict(self, X):
                                                 distances=distances,
                                                 DFP_mask=DFP_mask)
                 predicted_labels[ind_ds_original_matrix] = pred_ds
+                self.stats.disagree_ind = ind_ds_original_matrix
+
+        self.stats.predicted_labels = predicted_labels
 
         return self.classes_.take(predicted_labels)
 
diff --git a/deslib/dcs/base.py b/deslib/dcs/base.py
index 974efd7..3ef54e7 100644
--- a/deslib/dcs/base.py
+++ b/deslib/dcs/base.py
@@ -21,7 +21,7 @@ class BaseDCS(BaseDS):
     def __init__(self, pool_classifiers=None, k=7, DFP=False, safe_k=None,
                  with_IH=False, IH_rate=0.30, selection_method='best',
                  diff_thresh=0.1, random_state=None, knn_classifier='knn',
-                 DSEL_perc=0.5,
+                 knn_metric='minkowski', DSEL_perc=0.5,
                  knne=False, n_jobs=-1):
 
         super(BaseDCS, self).__init__(pool_classifiers=pool_classifiers, k=k,
@@ -29,6 +29,7 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, safe_k=None,
                                       IH_rate=IH_rate,
                                       random_state=random_state,
                                       knn_classifier=knn_classifier,
+                                      knn_metric=knn_metric,
                                       DSEL_perc=DSEL_perc,
                                       knne=knne, n_jobs=n_jobs)
 
diff --git a/deslib/dcs/ola.py b/deslib/dcs/ola.py
index 3ea0f35..b3d81b5 100644
--- a/deslib/dcs/ola.py
+++ b/deslib/dcs/ola.py
@@ -111,7 +111,7 @@ class :class:`FaissKNNClassifier`
     def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                  safe_k=None, IH_rate=0.30, selection_method='best',
                  diff_thresh=0.1, random_state=None, knn_classifier='knn',
-                 knne=False, DSEL_perc=0.5, n_jobs=-1):
+                 knn_metric='minkowski', knne=False, DSEL_perc=0.5, n_jobs=-1):
         super(OLA, self).__init__(pool_classifiers=pool_classifiers, k=k,
                                   DFP=DFP, with_IH=with_IH, safe_k=safe_k,
                                   IH_rate=IH_rate,
@@ -119,6 +119,7 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                                   diff_thresh=diff_thresh,
                                   random_state=random_state,
                                   knn_classifier=knn_classifier,
+                                  knn_metric=knn_metric,
                                   knne=knne,
                                   DSEL_perc=DSEL_perc, n_jobs=n_jobs)
 
diff --git a/deslib/des/base.py b/deslib/des/base.py
index 1530e64..f518bbf 100644
--- a/deslib/des/base.py
+++ b/deslib/des/base.py
@@ -21,7 +21,8 @@ class BaseDES(BaseDS):
     def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                  safe_k=None, IH_rate=0.30, mode='selection',
                  needs_proba=False, random_state=None,
-                 knn_classifier='knn', knne=False, DSEL_perc=0.5, n_jobs=-1):
+                 knn_classifier='knn', knn_metric='minkowski', knne=False,
+                 DSEL_perc=0.5, n_jobs=-1):
 
         super(BaseDES, self).__init__(pool_classifiers=pool_classifiers,
                                       k=k,
@@ -32,6 +33,7 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                                       needs_proba=needs_proba,
                                       random_state=random_state,
                                       knn_classifier=knn_classifier,
+                                      knn_metric=knn_metric,
                                       knne=knne,
                                       DSEL_perc=DSEL_perc, n_jobs=n_jobs)
         self.mode = mode
@@ -191,6 +193,8 @@ def classify_with_ds(self, query, predictions, probabilities=None,
         if self.DFP:
             competences = competences * DFP_mask
 
+        self.stats.competences = competences
+
         if self.mode == "selection":
             # The selected_classifiers matrix is used as a mask to remove
             # the predictions of certain base classifiers.
diff --git a/deslib/des/knop.py b/deslib/des/knop.py
index adffb36..1cffc36 100644
--- a/deslib/des/knop.py
+++ b/deslib/des/knop.py
@@ -106,7 +106,8 @@ class :class:`FaissKNNClassifier`
     """
     def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                  safe_k=None, IH_rate=0.30, random_state=None,
-                 knn_classifier='knn', knne=False, DSEL_perc=0.5, n_jobs=-1):
+                 knn_classifier='knn', knn_metric='minkowski', knne=False,
+                 DSEL_perc=0.5, n_jobs=-1):
 
         super(KNOP, self).__init__(pool_classifiers, k,
                                    DFP=DFP,
@@ -117,6 +118,7 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                                    needs_proba=True,
                                    random_state=random_state,
                                    knn_classifier=knn_classifier,
+                                   knn_metric=knn_metric,
                                    knne=knne,
                                    DSEL_perc=DSEL_perc,
                                    n_jobs=n_jobs)
diff --git a/deslib/des/knora_e.py b/deslib/des/knora_e.py
index fa0298d..6619b03 100644
--- a/deslib/des/knora_e.py
+++ b/deslib/des/knora_e.py
@@ -99,7 +99,8 @@ class :class:`FaissKNNClassifier`
 
     def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                  safe_k=None, IH_rate=0.30, random_state=None,
-                 knn_classifier='knn', knne=False, DSEL_perc=0.5, n_jobs=-1):
+                 knn_classifier='knn', knn_metric='minkowski', knne=False,
+                 DSEL_perc=0.5, n_jobs=-1):
 
         super(KNORAE, self).__init__(pool_classifiers=pool_classifiers,
                                      k=k,
@@ -109,6 +110,7 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                                      IH_rate=IH_rate,
                                      random_state=random_state,
                                      knn_classifier=knn_classifier,
+                                     knn_metric=knn_metric,
                                      knne=knne,
                                      DSEL_perc=DSEL_perc,
                                      n_jobs=n_jobs)
diff --git a/deslib/des/knora_u.py b/deslib/des/knora_u.py
index e6cdc89..c4403fc 100644
--- a/deslib/des/knora_u.py
+++ b/deslib/des/knora_u.py
@@ -95,7 +95,8 @@ class :class:`FaissKNNClassifier`
 
     def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                  safe_k=None, IH_rate=0.30, random_state=None,
-                 knn_classifier='knn', knne=False, DSEL_perc=0.5, n_jobs=-1):
+                 knn_classifier='knn', knn_metric='minkowski', knne=False,
+                 DSEL_perc=0.5, n_jobs=-1):
         super(KNORAU, self).__init__(pool_classifiers, k,
                                      DFP=DFP,
                                      with_IH=with_IH,
@@ -104,6 +105,7 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                                      mode='weighting',
                                      random_state=random_state,
                                      knn_classifier=knn_classifier,
+                                     knn_metric=knn_metric,
                                      knne=knne,
                                      DSEL_perc=DSEL_perc,
                                      n_jobs=n_jobs)
diff --git a/deslib/util/stats.py b/deslib/util/stats.py
new file mode 100644
index 0000000..e9f3194
--- /dev/null
+++ b/deslib/util/stats.py
@@ -0,0 +1,122 @@
+import numpy as np
+
+
+class stats():
+    def __init__(self):
+        self.agree_ind = []
+        self.disagree_ind = []
+        self.true_labels = []
+        self.bases_labels = []
+        self.predicted_labels = []
+        self.agree_labels = []
+        self.competences = []
+
+    def log_stats(self):
+        n_queries = len(self.true_labels)
+        n_classes = len(np.unique(self.predicted_labels))
+        n_bases = len(self.bases_labels[0])
+        n_agree = len(self.agree_ind)
+        n_disagree = len(self.disagree_ind)
+        
+        n_right_clf_by_query, n_right_clf_ind = \
+            self._get_n_right_clf_stats(n_classes)
+
+        predicted_dis = self._get_distribution()
+        agree_dis = self._get_distribution(ind=self.agree_ind)
+        n_right_clf_dis = self._get_distribution(n_right_clf_by_query)
+        
+        agree_score = self._get_score(self.agree_ind)
+        disagree_score = self._get_score(self.disagree_ind)
+        
+        competences_mean, competences_mean_by_clf, n_even_max_competence = \
+            self._get_competences_stats()
+
+        lines = []
+        lines.extend([
+            "Queries:",
+            n_queries,
+            "Nb of right classifiers from 0 to "+str(n_bases)+":",
+            n_right_clf_dis,
+            "--- Agreements",
+            "Instances, ratio on queries:",
+            n_agree,
+            round(n_agree / n_queries, 3),
+            "Classes distribution, ratio on predictions:",
+            agree_dis,
+            np.round(agree_dis / predicted_dis, 3),
+            "Score, ratio on agreements:",
+            agree_score,
+            round(agree_score / n_agree, 3),
+        ])
+
+        for i,n_right_clf in enumerate(n_right_clf_dis):
+            score = self._get_score(n_right_clf_ind[i])
+            lines.extend([
+                "--- "+str(i)+" right classifiers",
+                "Instances, ratio on queries:",
+                n_right_clf_dis[i],
+                round(n_right_clf / n_queries, 3),
+                "Score, ratio on "+str(i)+" right clf:",
+                score,
+                round(score / n_right_clf_dis[i], 3),
+            ])
+
+        lines.extend([
+            "--- Disagreements",
+            "Instances, ratio on queries:",
+            n_disagree,
+            round(n_disagree / n_queries, 3),
+            "Score, ratio on disagreements:",
+            disagree_score,
+            round(disagree_score / n_disagree, 3),
+            "--- Competences",
+            "Mean:",
+            round(competences_mean, 3),
+            "Mean by classifier:",
+            np.round(competences_mean_by_clf, 3),
+            "Even max competences times, ratio on disagreements:",
+            n_even_max_competence,
+            round(n_even_max_competence / n_disagree, 3),
+        ])
+        
+        with open("log.txt",'w') as f:
+            for line in lines:
+                f.write(str(line))
+                f.write("\n")
+
+    def _get_distribution(self, labels=None, ind=None):
+        labels = self.predicted_labels if labels is None else labels
+        if ind is not None: labels = labels[ind]
+        _, counts = np.unique(labels, return_counts=True)
+        return counts
+
+    def _get_n_right_clf_stats(self, n_classes):
+        n_right_clf_by_query = []
+        n_right_clf_ind = [[] for i in range(n_classes)]
+
+        for i,label in enumerate(self.true_labels):
+            row = self.bases_labels[i]
+            n_right_clf = np.count_nonzero(row == label)
+            n_right_clf_by_query.append(n_right_clf)
+            n_right_clf_ind[n_right_clf].append(i)
+
+        return n_right_clf_by_query, n_right_clf_ind
+
+    def _get_competences_stats(self):
+        competences_mean = np.mean(self.competences)
+        competences_mean_by_clf = np.mean(self.competences, axis=0)
+        n_even_max_competence = 0
+
+        for c in self.competences:
+            max_ = c[np.argmax(c)]
+            n_max = np.count_nonzero(c == max_)
+            if n_max > 1: n_even_max_competence += 1
+
+        return competences_mean, competences_mean_by_clf, n_even_max_competence
+
+    def _get_score(self, ind):
+        true_labels = self.true_labels[ind]
+        labels = self.predicted_labels[ind]
+        matches = np.equal(true_labels, labels)
+        score = np.sum(matches)
+        return score

From 6835d89c25525f4f4064cdd72379dd0d391614fc Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Thu, 11 Mar 2021 18:07:12 -0500
Subject: [PATCH 07/21] =?UTF-8?q?Support=20des=20stats=20=C3=A0=20multidat?=
 =?UTF-8?q?asets,=20fix=20avec=20les=20stats.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 deslib/base.py           | 11 +++++------
 deslib/multi_datasets.py |  7 +++++++
 deslib/util/stats.py     |  8 ++++----
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/deslib/base.py b/deslib/base.py
index 9358ac4..041dbe5 100644
--- a/deslib/base.py
+++ b/deslib/base.py
@@ -24,7 +24,7 @@
 from deslib.util import faiss_knn_wrapper
 from deslib.util.dfp import frienemy_pruning_preprocessed
 from deslib.util.instance_hardness import hardness_region_competence
-from deslib.util.stats import stats
+from deslib.util.stats import Stats
 
 
 class BaseDS(BaseEstimator, ClassifierMixin):
@@ -57,7 +57,7 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
         self.DSEL_perc = DSEL_perc
         self.knne = knne
         self.n_jobs = n_jobs
-        self.stats = stats()
+        self.stats = Stats()
 
         # Check optional dependency
         if knn_classifier == 'faiss' and not faiss_knn_wrapper.is_available():
@@ -205,7 +205,6 @@ class labels of each example in X.
         self
         """
         self.random_state_ = check_random_state(self.random_state)
-        self.stats.true_labels = y
 
         # Check if the length of X and y are consistent.
         X, y = check_X_y(X, y)
@@ -445,7 +444,6 @@ def predict(self, X):
             base_probabilities = None
             base_predictions = self._predict_base(X)
 
-        self.stats.bases_labels = base_predictions
         all_agree_vector = BaseDS._all_classifier_agree(base_predictions)
         ind_all_agree = np.where(all_agree_vector)[0]
 
@@ -454,7 +452,6 @@ def predict(self, X):
         if ind_all_agree.size:
             predicted_labels[ind_all_agree] = base_predictions[
                 ind_all_agree, 0]
-            self.stats.agree_ind = ind_all_agree
 
         # For the samples with disagreement, perform the dynamic selection
         # steps. First step is to collect the samples with disagreement
@@ -539,6 +536,7 @@ def predict(self, X):
                 # Get the real indices_ of the samples that will be classified
                 # using a DS algorithm.
                 ind_ds_original_matrix = ind_disagreement[ind_ds_classifier]
+                self.stats.disagree_ind = ind_ds_original_matrix
 
                 if self.needs_proba:
                     selected_probabilities = base_probabilities[
@@ -554,8 +552,9 @@ def predict(self, X):
                                                 distances=distances,
                                                 DFP_mask=DFP_mask)
                 predicted_labels[ind_ds_original_matrix] = pred_ds
-                self.stats.disagree_ind = ind_ds_original_matrix
 
+        self.stats.bases_labels = base_predictions
+        self.stats.agree_ind = ind_all_agree
         self.stats.predicted_labels = predicted_labels
 
         return self.classes_.take(predicted_labels)
diff --git a/deslib/multi_datasets.py b/deslib/multi_datasets.py
index 379642b..4502277 100644
--- a/deslib/multi_datasets.py
+++ b/deslib/multi_datasets.py
@@ -19,6 +19,7 @@
                                      aggregate_proba_ensemble_weighted)
 from deslib.util.instance_hardness import hardness_region_competence
 
+
 # Créer à partir de KNORA-U
 class MultiDatasets(BaseDS):
     def __init__(self, ds_classifier, pool_classifiers):
@@ -166,6 +167,7 @@ def _predict_ds(self, X):
                 # Get the real indices_ of the samples that will be classified
                 # using a DS algorithm.
                 ind_ds_original_matrix = ind_disagreement[ind_ds_classifier]
+                self.stats.disagree_ind = ind_ds_original_matrix
 
                 if ds_classifier.needs_proba:
                     selected_probabilities = base_probabilities[
@@ -189,6 +191,7 @@ def _predict_ds(self, X):
             merged_left_base_predictions = np.concatenate(
                 merged_left_base_predictions, axis=1)
             merged_competences = np.concatenate(merged_competences, axis=1)
+            self.stats.competences = merged_competences
 
             if issubclass(type(self.ds_classifier), BaseDCS):
                 pred_ds = self._get_dcs_predicted_label(self.ds_classifier,
@@ -199,6 +202,10 @@ def _predict_ds(self, X):
 
             predicted_labels[ind_ds_original_matrix] = pred_ds
 
+        self.stats.bases_labels = merged_base_predictions
+        self.stats.agree_ind = ind_all_agree
+        self.stats.predicted_labels = predicted_labels
+
         return self.classes_.take(predicted_labels)
 
     def _predict_oracle(self, X, y):
diff --git a/deslib/util/stats.py b/deslib/util/stats.py
index e9f3194..c8367da 100644
--- a/deslib/util/stats.py
+++ b/deslib/util/stats.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 
-class stats():
+class Stats():
     def __init__(self):
         self.agree_ind = []
         self.disagree_ind = []
@@ -19,7 +19,7 @@ def log_stats(self):
         n_disagree = len(self.disagree_ind)
         
         n_right_clf_by_query, n_right_clf_ind = \
-            self._get_n_right_clf_stats(n_classes)
+            self._get_n_right_clf_stats(n_bases)
 
         predicted_dis = self._get_distribution()
         agree_dis = self._get_distribution(ind=self.agree_ind)
@@ -90,9 +90,9 @@ def _get_distribution(self, labels=None, ind=None):
         _, counts = np.unique(labels, return_counts=True)
         return counts
 
-    def _get_n_right_clf_stats(self, n_classes):
+    def _get_n_right_clf_stats(self, n_bases):
         n_right_clf_by_query = []
-        n_right_clf_ind = [[] for i in range(n_classes)]
+        n_right_clf_ind = [[] for i in range(n_bases + 1)]
 
         for i,label in enumerate(self.true_labels):
             row = self.bases_labels[i]

From 3a764e4453ccaf0284d9194ea259c2ef83b557f7 Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Thu, 11 Mar 2021 21:39:36 -0500
Subject: [PATCH 08/21] Segmentation du log.

---
 deslib/util/stats.py | 105 ++++++++++++++++++++++++++++---------------
 1 file changed, 69 insertions(+), 36 deletions(-)

diff --git a/deslib/util/stats.py b/deslib/util/stats.py
index c8367da..3ab86da 100644
--- a/deslib/util/stats.py
+++ b/deslib/util/stats.py
@@ -10,44 +10,66 @@ def __init__(self):
         self.predicted_labels = []
         self.agree_labels = []
         self.competences = []
+        self.log_fname = "log.txt"
 
     def log_stats(self):
-        n_queries = len(self.true_labels)
-        n_classes = len(np.unique(self.predicted_labels))
-        n_bases = len(self.bases_labels[0])
-        n_agree = len(self.agree_ind)
-        n_disagree = len(self.disagree_ind)
+        self.n_queries = len(self.true_labels)
+        self.n_bases = len(self.bases_labels[0])
+        self.n_disagree = len(self.disagree_ind)
         
-        n_right_clf_by_query, n_right_clf_ind = \
-            self._get_n_right_clf_stats(n_bases)
+        with open(self.log_fname,'w') as f:
+            for line in self._get_all_lines():
+                f.write(str(line))
+                f.write("\n")
 
-        predicted_dis = self._get_distribution()
+    def _get_all_lines(self):
+        lines = []
+        lines.extend(self._get_general_lines())
+        lines.extend(self._get_agree_lines())
+        lines.extend(self._get_n_right_clf_lines())
+        lines.extend(self._get_disagree_lines())
+        lines.extend(self._get_competences_lines())
+        return lines
+
+    def _get_general_lines(self):
+        lines = [
+            "Queries:",
+            self.n_queries,
+        ]
+
+        return lines
+
+    def _get_agree_lines(self):
+        n_agree = len(self.agree_ind)
         agree_dis = self._get_distribution(ind=self.agree_ind)
-        n_right_clf_dis = self._get_distribution(n_right_clf_by_query)
-        
         agree_score = self._get_score(self.agree_ind)
-        disagree_score = self._get_score(self.disagree_ind)
+        predicted_dis = self._get_distribution()
         
-        competences_mean, competences_mean_by_clf, n_even_max_competence = \
-            self._get_competences_stats()
-
-        lines = []
-        lines.extend([
-            "Queries:",
-            n_queries,
-            "Nb of right classifiers from 0 to "+str(n_bases)+":",
-            n_right_clf_dis,
+        lines = [
             "--- Agreements",
             "Instances, ratio on queries:",
             n_agree,
-            round(n_agree / n_queries, 3),
+            round(n_agree / self.n_queries, 3),
             "Classes distribution, ratio on predictions:",
             agree_dis,
             np.round(agree_dis / predicted_dis, 3),
             "Score, ratio on agreements:",
             agree_score,
             round(agree_score / n_agree, 3),
-        ])
+        ]
+
+        return lines
+
+    def _get_n_right_clf_lines(self):
+        n_right_clf_by_query, n_right_clf_ind = \
+            self._get_n_right_clf_stats()
+        n_right_clf_dis = self._get_distribution(n_right_clf_by_query)
+
+        lines = [
+            "--- Right classifiers:",
+            "Distribution:",
+            n_right_clf_dis,
+        ]
 
         for i,n_right_clf in enumerate(n_right_clf_dis):
             score = self._get_score(n_right_clf_ind[i])
@@ -55,20 +77,34 @@ def log_stats(self):
                 "--- "+str(i)+" right classifiers",
                 "Instances, ratio on queries:",
                 n_right_clf_dis[i],
-                round(n_right_clf / n_queries, 3),
+                round(n_right_clf / self.n_queries, 3),
                 "Score, ratio on "+str(i)+" right clf:",
                 score,
                 round(score / n_right_clf_dis[i], 3),
             ])
 
-        lines.extend([
+        return lines
+
+    def _get_disagree_lines(self):
+        disagree_score = self._get_score(self.disagree_ind)
+
+        lines = [
             "--- Disagreements",
             "Instances, ratio on queries:",
-            n_disagree,
-            round(n_disagree / n_queries, 3),
+            self.n_disagree,
+            round(self.n_disagree / self.n_queries, 3),
             "Score, ratio on disagreements:",
             disagree_score,
-            round(disagree_score / n_disagree, 3),
+            round(disagree_score / self.n_disagree, 3),
+        ]
+
+        return lines
+
+    def _get_competences_lines(self):
+        competences_mean, competences_mean_by_clf, n_even_max_competence = \
+            self._get_competences_stats()
+
+        lines = [
             "--- Competences",
             "Mean:",
             round(competences_mean, 3),
@@ -76,13 +112,10 @@ def log_stats(self):
             np.round(competences_mean_by_clf, 3),
             "Even max competences times, ratio on disagreements:",
             n_even_max_competence,
-            round(n_even_max_competence / n_disagree, 3),
-        ])
-        
-        with open("log.txt",'w') as f:
-            for line in lines:
-                f.write(str(line))
-                f.write("\n")
+            round(n_even_max_competence / self.n_disagree, 3),
+        ]
+
+        return lines
 
     def _get_distribution(self, labels=None, ind=None):
         labels = self.predicted_labels if labels is None else labels
@@ -90,9 +123,9 @@ def _get_distribution(self, labels=None, ind=None):
         _, counts = np.unique(labels, return_counts=True)
         return counts
 
-    def _get_n_right_clf_stats(self, n_bases):
+    def _get_n_right_clf_stats(self):
         n_right_clf_by_query = []
-        n_right_clf_ind = [[] for i in range(n_bases + 1)]
+        n_right_clf_ind = [[] for i in range(self.n_bases + 1)]
 
         for i,label in enumerate(self.true_labels):
             row = self.bases_labels[i]

From 9a779a4d001c66ed45336f8dfacdc88caedaad01 Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Mon, 15 Mar 2021 14:27:10 -0400
Subject: [PATCH 09/21] =?UTF-8?q?Ajout=20de=20logs=20multi-datasets,=20ges?=
 =?UTF-8?q?tion=20du=20cas=20o=C3=B9=20il=20y=20a=200=20d=C3=A9saccords,?=
 =?UTF-8?q?=20renommages.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 deslib/base.py           |  5 +++-
 deslib/multi_datasets.py |  6 +++-
 deslib/util/stats.py     | 63 +++++++++++++++++++++++++++++-----------
 3 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/deslib/base.py b/deslib/base.py
index 041dbe5..368c0e5 100644
--- a/deslib/base.py
+++ b/deslib/base.py
@@ -57,7 +57,7 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
         self.DSEL_perc = DSEL_perc
         self.knne = knne
         self.n_jobs = n_jobs
-        self.stats = Stats()
+        self._set_stats()
 
         # Check optional dependency
         if knn_classifier == 'faiss' and not faiss_knn_wrapper.is_available():
@@ -65,6 +65,9 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                 'Using knn_classifier="faiss" requires that the FAISS library '
                 'be installed.Please check the Installation Guide.')
 
+    def _set_stats(self):
+        self.stats = Stats()
+
     @abstractmethod
     def select(self, competences):
         """Select the most competent classifier for
diff --git a/deslib/multi_datasets.py b/deslib/multi_datasets.py
index 4502277..82a58fc 100644
--- a/deslib/multi_datasets.py
+++ b/deslib/multi_datasets.py
@@ -18,7 +18,7 @@
                                      majority_voting_rule,
                                      aggregate_proba_ensemble_weighted)
 from deslib.util.instance_hardness import hardness_region_competence
-
+from deslib.util.stats import MultiStats
 
 # Créer à partir de KNORA-U
 class MultiDatasets(BaseDS):
@@ -35,6 +35,9 @@ def __init__(self, ds_classifier, pool_classifiers):
         super(MultiDatasets, self).__init__(pool_classifiers)
         self.ds_classifier = ds_classifier
 
+    def _set_stats(self):
+        self.stats = MultiStats()
+
     def fit(self, X, y):
         """Prepare the DS models by setting the KNN algorithm and
         pre-processing the information required to apply the DS
@@ -202,6 +205,7 @@ def _predict_ds(self, X):
 
             predicted_labels[ind_ds_original_matrix] = pred_ds
 
+        self.stats.n_datasets = n_datasets
         self.stats.bases_labels = merged_base_predictions
         self.stats.agree_ind = ind_all_agree
         self.stats.predicted_labels = predicted_labels
diff --git a/deslib/util/stats.py b/deslib/util/stats.py
index 3ab86da..267176d 100644
--- a/deslib/util/stats.py
+++ b/deslib/util/stats.py
@@ -27,8 +27,9 @@ def _get_all_lines(self):
         lines.extend(self._get_general_lines())
         lines.extend(self._get_agree_lines())
         lines.extend(self._get_n_right_clf_lines())
-        lines.extend(self._get_disagree_lines())
-        lines.extend(self._get_competences_lines())
+        if self.n_disagree > 0:
+            lines.extend(self._get_disagree_lines())
+            lines.extend(self._get_competences_lines())
         return lines
 
     def _get_general_lines(self):
@@ -50,7 +51,7 @@ def _get_agree_lines(self):
             "Instances, ratio on queries:",
             n_agree,
             round(n_agree / self.n_queries, 3),
-            "Classes distribution, ratio on predictions:",
+            "Distribution, ratio on predictions:",
             agree_dis,
             np.round(agree_dis / predicted_dis, 3),
             "Score, ratio on agreements:",
@@ -64,25 +65,19 @@ def _get_n_right_clf_lines(self):
         n_right_clf_by_query, n_right_clf_ind = \
             self._get_n_right_clf_stats()
         n_right_clf_dis = self._get_distribution(n_right_clf_by_query)
+        scores = [self._get_score(n_right_clf_ind[i]) \
+            for i in range(len(n_right_clf_dis))]
 
         lines = [
             "--- Right classifiers:",
-            "Distribution:",
+            "Distribution, ratio on queries:",
             n_right_clf_dis,
+            np.round(n_right_clf_dis / self.n_queries, 3),
+            "Scores, ratio on N right clf",
+            scores,
+            np.round(scores / n_right_clf_dis, 3),
         ]
 
-        for i,n_right_clf in enumerate(n_right_clf_dis):
-            score = self._get_score(n_right_clf_ind[i])
-            lines.extend([
-                "--- "+str(i)+" right classifiers",
-                "Instances, ratio on queries:",
-                n_right_clf_dis[i],
-                round(n_right_clf / self.n_queries, 3),
-                "Score, ratio on "+str(i)+" right clf:",
-                score,
-                round(score / n_right_clf_dis[i], 3),
-            ])
-
         return lines
 
     def _get_disagree_lines(self):
@@ -110,7 +105,7 @@ def _get_competences_lines(self):
             round(competences_mean, 3),
             "Mean by classifier:",
             np.round(competences_mean_by_clf, 3),
-            "Even max competences times, ratio on disagreements:",
+            "Even max competences times, \nratio on disagreements:",
             n_even_max_competence,
             round(n_even_max_competence / self.n_disagree, 3),
         ]
@@ -153,3 +148,37 @@ def _get_score(self, ind):
         matches = np.equal(true_labels, labels)
         score = np.sum(matches)
         return score
+
+
+class MultiStats(Stats):
+    def __init__(self):
+        super().__init__()
+        self.n_datasets = 1
+
+    def _get_all_lines(self):
+        lines = super()._get_all_lines()
+        lines.extend(self._get_multistats_lines())
+        return lines
+
+    def _get_competences_lines(self):
+        competences_mean, competences_mean_by_clf, n_even_max_competence = \
+            self._get_competences_stats()
+
+        means = competences_mean_by_clf.reshape(self.n_datasets, -1)
+        competences_mean_by_dataset = np.mean(means, axis=1)
+
+        lines = super()._get_competences_lines()
+        lines.extend([
+            "Mean by dataset:",
+            np.round(competences_mean_by_dataset, 3),
+        ])
+
+        return lines
+
+    def _get_multistats_lines(self):
+        lines = [
+            "--- Multidatasets",
+            self.n_datasets
+        ]
+
+        return lines

From bdccfc277459a65c24010bf2f2479ced678890fd Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Sun, 21 Mar 2021 21:13:37 -0400
Subject: [PATCH 10/21] Retrait du choix de distances avec fknn.

---
 deslib/base.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/deslib/base.py b/deslib/base.py
index 368c0e5..1575528 100644
--- a/deslib/base.py
+++ b/deslib/base.py
@@ -365,9 +365,7 @@ def _set_region_of_competence_algorithm(self, X):
         elif self.knn_classifier == 'faiss':
             knn_class = functools.partial(
                 faiss_knn_wrapper.FaissKNNClassifier,
-                n_jobs=self.n_jobs, algorithm="brute",
-                metric=self.knn_metric,
-                metric_params=metric_params)
+                n_jobs=self.n_jobs, algorithm="brute")
         elif callable(self.knn_classifier):
             knn_class = self.knn_classifier
         else:

From 58502dfd0c14d839d4da2e1231408c4286523f5c Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Sun, 28 Mar 2021 21:03:34 -0400
Subject: [PATCH 11/21] =?UTF-8?q?Ajout=20de=20KNOP=20fusionn=C3=A9=20dans?=
 =?UTF-8?q?=20le=20profil.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 deslib/des/multi_knop.py | 256 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 256 insertions(+)
 create mode 100644 deslib/des/multi_knop.py

diff --git a/deslib/des/multi_knop.py b/deslib/des/multi_knop.py
new file mode 100644
index 0000000..c1dac6d
--- /dev/null
+++ b/deslib/des/multi_knop.py
@@ -0,0 +1,256 @@
+# coding=utf-8
+
+# Author: Rafael Menelau Oliveira e Cruz <rafaelmenelau@gmail.com>
+#
+# License: BSD 3 clause
+
+import numpy as np
+
+from deslib.des.base import BaseDS
+from deslib.des.knop import KNOP
+from sklearn.utils.validation import (check_X_y, check_is_fitted, check_array,
+                                      check_random_state)
+
+
+class MultiKNOP(KNOP):
+    def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
+                 safe_k=None, IH_rate=0.30, random_state=None, voting='hard',
+                 knn_classifier='knn', knne=False, DSEL_perc=0.5, n_jobs=-1):
+
+        super(KNOP, self).__init__(None, k,
+                                   DFP=DFP,
+                                   with_IH=with_IH,
+                                   safe_k=safe_k,
+                                   IH_rate=IH_rate,
+                                   needs_proba=True,
+                                   random_state=random_state,
+                                   knn_classifier=knn_classifier,
+                                   knne=knne,
+                                   DSEL_perc=DSEL_perc,
+                                   n_jobs=n_jobs)
+        self.ds_classifiers = []
+        for i in range(len(pool_classifiers)):
+            knop = KNOP(pool_classifiers[i], k,
+                        DFP=DFP,
+                        with_IH=with_IH,
+                        safe_k=safe_k,
+                        IH_rate=IH_rate,
+                        random_state=random_state,
+                        knn_classifier=knn_classifier,
+                        knne=knne,
+                        DSEL_perc=DSEL_perc,
+                        n_jobs=n_jobs)
+            self.ds_classifiers.append(knop)
+
+
+    """Multi k-Nearest Output Profiles (MultiKNOP).
+    """
+    def fit(self, X, y):
+        """Train the DS model by setting the KNN algorithm and
+        pre-process the information required to apply the DS
+        methods. In this case, the scores of the base classifiers for
+        the dynamic selection dataset (DSEL) are pre-calculated to
+        transform each sample in DSEL into an output profile.
+
+        Parameters
+        ----------
+        X : array of shape n_datasets, n_samples, n_features)
+            Data used to fit the model.
+
+        y : array of shape (n_datasets, n_samples)
+            class labels of each example in X.
+
+        Returns
+        -------
+        self
+        """
+        if len(X) == 1 or len(y) == 1:
+            raise ValueError("Error. MultiKNOP  does not accept one dataset!")
+        """
+        for predict()...
+        for i in range(len(y)-1):
+            if np.array_equal(y[0],y[i+1]):
+                raise ValueError(
+                    "Error. All datasets queries must match exactly!")
+        """
+
+        datasets_dsel_scores = []
+        datasets_DSEL_processed_ = []
+        # Process each dataset
+        for i in range(len(X)):
+            self.ds_classifiers[i].fit(X[i], y[i])
+            if self.ds_classifiers[i].n_classes_ == 1:
+                raise ValueError(
+                    "Error. MultiKNOP  does not accept one class datasets!")
+            self.ds_classifiers[i]._check_predict_proba()
+            self.ds_classifiers[i].dsel_scores_ = \
+                self.ds_classifiers[i]._preprocess_dsel_scores()
+            datasets_dsel_scores.append(self.ds_classifiers[i].dsel_scores_)
+            datasets_DSEL_processed_.append(
+                self.ds_classifiers[i].DSEL_processed_)
+        
+        self.dsel_scores_ = np.concatenate(datasets_dsel_scores, axis=1)
+        self.DSEL_processed_ = np.concatenate(datasets_DSEL_processed_, axis=1)
+        self.n_classifiers_ = self.dsel_scores_.shape[1]
+        
+        # Reassignment
+        self.DSEL_target_ = self.ds_classifiers[0].DSEL_target_
+        self.n_samples_ = self.ds_classifiers[0].n_samples_
+        self.n_classes_ = self.ds_classifiers[0].n_classes_
+        self.knn_class_ = self.ds_classifiers[0].knn_class_
+        self.k_ = self.ds_classifiers[0].k_
+        self.classes_ = self.ds_classifiers[0].classes_
+        
+        # Reshape DSEL_scores as a 2-D array for nearest neighbor calculations
+        dsel_output_profiles = self.dsel_scores_.reshape(self.n_samples_,
+                                                         self.n_classifiers_ *
+                                                         self.n_classes_)
+
+        self._fit_OP(dsel_output_profiles, self.DSEL_target_, self.k_)
+
+        return self
+
+    def _fit_OP(self, X_op, y_op, k):
+        """ Fit the set of output profiles.
+
+        Parameters
+        ----------
+        X_op : array of shape (n_samples, n_features)
+            Output profiles of the training data. n_features is equals
+            to (n_classifiers x n_classes).
+
+        y_op : array of shape (n_samples)
+               Class labels of each sample in X_op.
+
+        k : int
+            Number of output profiles used in the region of competence
+            estimation.
+
+        """
+        self.op_knn_ = self.knn_class_(k)
+
+        if self.n_classes_ == 2:
+            # Get only the scores for one class since they are complementary
+            X_temp = X_op[:, ::2]
+            self.op_knn_.fit(X_temp, y_op)
+        else:
+            self.op_knn_.fit(X_op, y_op)
+
+    def _get_similar_out_profiles(self, probabilities):
+        """Get the most similar output profiles of the query sample.
+
+        Parameters
+        ----------
+        probabilities : array of shape (n_samples, n_classifiers, n_classes)
+                        predictions of each base classifier for all samples.
+
+        Returns
+        -------
+        dists : list of shape = [n_samples, k]
+                The distances between the query and each sample in the region
+                of competence. The vector is ordered in an ascending fashion.
+
+        idx : list of shape = [n_samples, k]
+            Indices of the instances belonging to the region of competence of
+            the given query sample.
+        """
+
+        if self.n_classes_ == 2:
+            # Get only the scores for one class since they are complementary
+            query_op = probabilities[:, :, 0]
+        else:
+            query_op = probabilities.reshape((probabilities.shape[0],
+                                              self.n_classifiers_ *
+                                              self.n_classes_))
+
+        dists, idx = self.op_knn_.kneighbors(query_op, n_neighbors=self.k_,
+                                             return_distance=True)
+        return dists, np.atleast_2d(idx)
+
+    def predict(self, X):
+        """Predict the class label for each sample in X.
+        Parameters
+        ----------
+        X : array of shape (n_samples, n_features)
+            The input data.
+        Returns
+        -------
+        predicted_labels : array of shape (n_samples)
+                           Predicted class label for each sample in X.
+        """
+        # Check if the DS model was trained
+        #check_is_fitted(self,
+        #                ["DSEL_processed_", "DSEL_data_", "DSEL_target_"])
+
+        # Check if X is a valid input
+        #for i in range(len(X)):
+        #    X[i] = check_array(X[i])
+        #    self._check_num_features(X[i])
+
+        n_samples = X[0].shape[0]
+        predicted_labels = np.empty(n_samples, dtype=np.intp)
+
+        base_probabilities = []
+        for i in range(len(X)):
+            base_probabilities.append(
+                self.ds_classifiers[i]._predict_proba_base(X[i]))
+        base_probabilities = np.concatenate(base_probabilities,axis=1)
+        base_predictions = base_probabilities.argmax(axis=2)
+
+        all_agree_vector = BaseDS._all_classifier_agree(base_predictions)
+        ind_all_agree = np.where(all_agree_vector)[0]
+
+        # Since the predictions are always the same, get the predictions of the
+        # first base classifier.
+        if ind_all_agree.size:
+            predicted_labels[ind_all_agree] = base_predictions[
+                ind_all_agree, 0]
+
+        # For the samples with disagreement, perform the dynamic selection
+        # steps. First step is to collect the samples with disagreement
+        # between base classifiers
+        ind_disagreement = np.where(~all_agree_vector)[0]
+        if ind_disagreement.size:
+
+            X_DS = X[0][ind_disagreement, :]
+
+            # Then, we estimate the nearest neighbors for all samples that
+            # we need to call DS routines
+            distances, neighbors = None, None
+            #distances, neighbors = self._get_region_competence(X_DS)
+
+            # IH was not considered. So all samples with disagreement are
+            # passed down to the DS algorithm
+            ind_ds_classifier = np.arange(ind_disagreement.size)
+
+            # At this stage the samples which all base classifiers agrees or
+            # that are associated with low hardness were already classified.
+            # The remaining samples are now passed down to the DS techniques
+            # for classification.
+
+            #  First check whether there are still samples to be classified.
+            if ind_ds_classifier.size:
+
+                DFP_mask = np.ones(
+                    (ind_ds_classifier.size, self.n_classifiers_))
+
+                # Get the real indices_ of the samples that will be classified
+                # using a DS algorithm.
+                ind_ds_original_matrix = ind_disagreement[ind_ds_classifier]
+
+                if self.needs_proba or self.voting == 'soft':
+                    selected_probabilities = base_probabilities[
+                        ind_ds_original_matrix]
+                else:
+                    selected_probabilities = None
+
+                pred_ds = self.classify_with_ds(X_DS[ind_ds_classifier],
+                                                base_predictions[
+                                                    ind_ds_original_matrix],
+                                                selected_probabilities,
+                                                neighbors=neighbors,
+                                                distances=distances,
+                                                DFP_mask=DFP_mask)
+                predicted_labels[ind_ds_original_matrix] = pred_ds
+
+        return self.classes_.take(predicted_labels)

From 6d3ba91f980802f0faa9b266cd841d8b556110e2 Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Thu, 24 Jun 2021 00:13:20 -0400
Subject: [PATCH 12/21] Fix: quand il y a 0 accord pour une classe.

---
 deslib/util/stats.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/deslib/util/stats.py b/deslib/util/stats.py
index 267176d..fdcb0e3 100644
--- a/deslib/util/stats.py
+++ b/deslib/util/stats.py
@@ -114,9 +114,13 @@ def _get_competences_lines(self):
 
     def _get_distribution(self, labels=None, ind=None):
         labels = self.predicted_labels if labels is None else labels
+        max_label = max(labels)
         if ind is not None: labels = labels[ind]
-        _, counts = np.unique(labels, return_counts=True)
-        return counts
+        unique_labels, unique_counts = np.unique(labels, return_counts=True)
+        distribution = np.full(max_label+1,0)
+        for i,l in enumerate(unique_labels):
+            distribution[l] = unique_counts[i]
+        return distribution
 
     def _get_n_right_clf_stats(self):
         n_right_clf_by_query = []

From 7d06fe919047ef94f90cbd950fcfcd458d45a652 Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Thu, 1 Jul 2021 23:21:14 -0400
Subject: [PATCH 13/21] =?UTF-8?q?Patch:=20permet=20d'avoir=200=20ou=201=20?=
 =?UTF-8?q?base=20par=20mod=C3=A8le.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 deslib/base.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/deslib/base.py b/deslib/base.py
index 1575528..72a8653 100644
--- a/deslib/base.py
+++ b/deslib/base.py
@@ -826,9 +826,10 @@ def _validate_pool(self):
         ValueError
             If the pool of classifiers is empty.
         """
-        if self.n_classifiers_ <= 1:
-            raise ValueError("n_classifiers must be greater than one, "
-                             "got {}.".format(self.n_classifiers_))
+        # PATCH: allow 0 or 1 base for the multidatasets model.
+        #if self.n_classifiers_ <= 1:
+        #    raise ValueError("n_classifiers must be greater than one, "
+        #                     "got {}.".format(self.n_classifiers_))
 
     def _check_num_features(self, X):
         """ Verify if the number of features (n_features) of X is equals to

From 1390e67f597c8a44cc646889893c2eec9bb6c6c8 Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Fri, 2 Jul 2021 09:38:46 -0400
Subject: [PATCH 14/21] PATCH: 1 base permise pour static

---
 deslib/static/base.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/deslib/static/base.py b/deslib/static/base.py
index e72547e..993b909 100644
--- a/deslib/static/base.py
+++ b/deslib/static/base.py
@@ -134,6 +134,6 @@ def _validate_pool(self):
         ValueError
             If the pool of classifiers is empty or just a single model.
         """
-        if self.n_classifiers_ <= 1:
-            raise ValueError("n_classifiers must be greater than one, "
-                             "got {}.".format(len(self.pool_classifiers)))
+        #if self.n_classifiers_ <= 1:
+        #    raise ValueError("n_classifiers must be greater than one, "
+        #                     "got {}.".format(len(self.pool_classifiers)))

From 2905fb713684c6f8ec17bea356b8702dccd36dca Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Sun, 1 Aug 2021 14:18:11 -0400
Subject: [PATCH 15/21] =?UTF-8?q?Ajout=20des=20tats=20de=20la=20fiabilit?=
 =?UTF-8?q?=C3=A9=20de=20comp=C3=A9tence.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 deslib/base.py           |  2 +
 deslib/multi_datasets.py |  2 +
 deslib/util/stats.py     | 90 +++++++++++++++++++++++++++++++++-------
 3 files changed, 80 insertions(+), 14 deletions(-)

diff --git a/deslib/base.py b/deslib/base.py
index 72a8653..4f20326 100644
--- a/deslib/base.py
+++ b/deslib/base.py
@@ -458,6 +458,7 @@ def predict(self, X):
         # steps. First step is to collect the samples with disagreement
         # between base classifiers
         ind_disagreement = np.where(~all_agree_vector)[0]
+        ind_disagreement = np.asarray(range(len(predicted_labels)))
         if ind_disagreement.size:
 
             X_DS = X[ind_disagreement, :]
@@ -557,6 +558,7 @@ def predict(self, X):
         self.stats.bases_labels = base_predictions
         self.stats.agree_ind = ind_all_agree
         self.stats.predicted_labels = predicted_labels
+        self.stats.k = self.k
 
         return self.classes_.take(predicted_labels)
 
diff --git a/deslib/multi_datasets.py b/deslib/multi_datasets.py
index 82a58fc..09e8624 100644
--- a/deslib/multi_datasets.py
+++ b/deslib/multi_datasets.py
@@ -121,6 +121,7 @@ def _predict_ds(self, X):
         # steps. First step is to collect the samples with disagreement
         # between base classifiers
         ind_disagreement = np.where(~all_agree_vector)[0]
+        ind_disagreement = np.asarray(range(len(merged_base_predictions)))
         if ind_disagreement.size:
             merged_left_base_predictions = []
             merged_competences = []
@@ -209,6 +210,7 @@ def _predict_ds(self, X):
         self.stats.bases_labels = merged_base_predictions
         self.stats.agree_ind = ind_all_agree
         self.stats.predicted_labels = predicted_labels
+        self.stats.k = self.k
 
         return self.classes_.take(predicted_labels)
 
diff --git a/deslib/util/stats.py b/deslib/util/stats.py
index fdcb0e3..604e24b 100644
--- a/deslib/util/stats.py
+++ b/deslib/util/stats.py
@@ -30,6 +30,7 @@ def _get_all_lines(self):
         if self.n_disagree > 0:
             lines.extend(self._get_disagree_lines())
             lines.extend(self._get_competences_lines())
+            lines.extend(self._get_competences_reliability_lines())
         return lines
 
     def _get_general_lines(self):
@@ -96,18 +97,77 @@ def _get_disagree_lines(self):
         return lines
 
     def _get_competences_lines(self):
-        competences_mean, competences_mean_by_clf, n_even_max_competence = \
+        mean, mean_by_clf, var, var_by_clf, n_even_max = \
             self._get_competences_stats()
 
         lines = [
             "--- Competences",
             "Mean:",
-            round(competences_mean, 3),
+            round(mean, 3),
             "Mean by classifier:",
-            np.round(competences_mean_by_clf, 3),
+            np.round(mean_by_clf, 3),
+            "Var:",
+            round(var, 3),
+            "Var by classifier:",
+            np.round(var_by_clf, 3),
             "Even max competences times, \nratio on disagreements:",
-            n_even_max_competence,
-            round(n_even_max_competence / self.n_disagree, 3),
+            n_even_max,
+            round(n_even_max / self.n_disagree, 3),
+        ]
+
+        return lines
+
+    def _get_competences_reliability_lines(self):
+        true_labels_by_base = np.tile(self.true_labels,(self.n_bases,1))
+        correct_bases_bln_array = np.equal(
+            self.bases_labels.T,true_labels_by_base)
+        n_queries = self.bases_labels.shape[0]
+        n_correct_labels_by_base = np.sum(correct_bases_bln_array,axis=1)
+        acc_by_base = np.round(n_correct_labels_by_base/n_queries,3)
+
+        comp = self.competences/self.k
+        n_incorrect_labels_by_base = \
+            len(self.predicted_labels)-n_correct_labels_by_base
+
+        correct_comp = comp.T*correct_bases_bln_array
+        correct_comp_by_base = np.sum(correct_comp,axis=1)
+        correct_comp_by_base /= n_correct_labels_by_base
+        mean_correct_comp_by_base = np.round(correct_comp_by_base,3)
+        mean = mean_correct_comp_by_base
+        mean = np.repeat(mean,n_queries).reshape(self.n_bases,-1)
+        correct_comp_by_base = np.sum((correct_comp-mean)**2,axis=1)
+        correct_comp_by_base /= n_correct_labels_by_base
+        std_correct_comp_by_base = np.round(np.sqrt(correct_comp_by_base),3)
+
+        incorrect_comp = comp.T*~correct_bases_bln_array
+        incorrect_comp_by_base = np.sum(incorrect_comp,axis=1)
+        incorrect_comp_by_base /= n_incorrect_labels_by_base
+        mean_incorrect_comp_by_base = np.round(incorrect_comp_by_base,3)
+        mean = mean_incorrect_comp_by_base
+        mean = np.repeat(mean,n_queries).reshape(self.n_bases,-1)
+        incorrect_comp_by_base = np.sum((incorrect_comp-mean)**2,axis=1)
+        incorrect_comp_by_base /= n_incorrect_labels_by_base
+        std_incorrect_comp_by_base = np.round(
+            np.sqrt(incorrect_comp_by_base),3)
+
+        lines = [
+            "--- Competence reliability",
+            "Acc:",
+            round(np.mean(acc_by_base),3),
+            "(by base):",
+            acc_by_base,
+            "Competence mean & std when well clasified:",
+            round(np.mean(mean_correct_comp_by_base),3),
+            round(np.mean(std_correct_comp_by_base),3),
+            "(by base):",
+            mean_correct_comp_by_base,
+            std_correct_comp_by_base,
+            "Competence mean & std when not well clasified:",
+            round(np.mean(mean_incorrect_comp_by_base),3),
+            round(np.mean(std_incorrect_comp_by_base),3),
+            "(by base):",
+            mean_incorrect_comp_by_base,
+            std_incorrect_comp_by_base,
         ]
 
         return lines
@@ -135,16 +195,18 @@ def _get_n_right_clf_stats(self):
         return n_right_clf_by_query, n_right_clf_ind
 
     def _get_competences_stats(self):
-        competences_mean = np.mean(self.competences)
-        competences_mean_by_clf = np.mean(self.competences, axis=0)
-        n_even_max_competence = 0
+        mean = np.mean(self.competences)
+        var = np.var(self.competences)
+        mean_by_clf = np.mean(self.competences, axis=0)
+        var_by_clf = np.var(self.competences, axis=0)
+        n_even_max = 0
 
         for c in self.competences:
             max_ = c[np.argmax(c)]
             n_max = np.count_nonzero(c == max_)
-            if n_max > 1: n_even_max_competence += 1
+            if n_max > 1: n_even_max += 1
 
-        return competences_mean, competences_mean_by_clf, n_even_max_competence
+        return mean, mean_by_clf, var, var_by_clf, n_even_max
 
     def _get_score(self, ind):
         true_labels = self.true_labels[ind]
@@ -165,16 +227,16 @@ def _get_all_lines(self):
         return lines
 
     def _get_competences_lines(self):
-        competences_mean, competences_mean_by_clf, n_even_max_competence = \
+        _, mean_by_clf, _, var_by_clf, _ = \
             self._get_competences_stats()
 
-        means = competences_mean_by_clf.reshape(self.n_datasets, -1)
-        competences_mean_by_dataset = np.mean(means, axis=1)
+        means = mean_by_clf.reshape(self.n_datasets, -1)
+        mean_by_dataset = np.mean(means, axis=1)
 
         lines = super()._get_competences_lines()
         lines.extend([
             "Mean by dataset:",
-            np.round(competences_mean_by_dataset, 3),
+            np.round(mean_by_dataset, 3),
         ])
 
         return lines

From d40452ab6066a1ed11576938a6e17945304dcc71 Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Wed, 1 Sep 2021 22:33:33 -0400
Subject: [PATCH 16/21] =?UTF-8?q?Lignes=20de=20comp=C3=A9tence=20avec=20do?=
 =?UTF-8?q?nn=C3=A9es=20rendues=20en=20pourcentage.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 deslib/util/stats.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/deslib/util/stats.py b/deslib/util/stats.py
index 604e24b..d48f6ab 100644
--- a/deslib/util/stats.py
+++ b/deslib/util/stats.py
@@ -195,13 +195,14 @@ def _get_n_right_clf_stats(self):
         return n_right_clf_by_query, n_right_clf_ind
 
     def _get_competences_stats(self):
-        mean = np.mean(self.competences)
-        var = np.var(self.competences)
-        mean_by_clf = np.mean(self.competences, axis=0)
-        var_by_clf = np.var(self.competences, axis=0)
+        comp = self.competences/self.k
+        mean = np.mean(comp)
+        var = np.var(comp)
+        mean_by_clf = np.mean(comp, axis=0)
+        var_by_clf = np.var(comp, axis=0)
         n_even_max = 0
 
-        for c in self.competences:
+        for c in comp:
             max_ = c[np.argmax(c)]
             n_max = np.count_nonzero(c == max_)
             if n_max > 1: n_even_max += 1

From 6b91780e8e6f36ff5a00b73de6e6dd7d13c305bd Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Mon, 3 Jan 2022 09:19:11 -0500
Subject: [PATCH 17/21] Sanitizing.

---
 deslib/util/stats.py | 67 ++++++++++++++++++++------------------------
 1 file changed, 30 insertions(+), 37 deletions(-)

diff --git a/deslib/util/stats.py b/deslib/util/stats.py
index d48f6ab..7e87e21 100644
--- a/deslib/util/stats.py
+++ b/deslib/util/stats.py
@@ -13,10 +13,16 @@ def __init__(self):
         self.log_fname = "log.txt"
 
     def log_stats(self):
-        self.n_queries = len(self.true_labels)
+        bln_mat = np.equal(self.true_labels,self.predicted_labels)
+        self.wrong_true_labels = self.true_labels[~bln_mat]
+        self.wrong_bases_labels = self.bases_labels[~bln_mat]
+        self.wrong_predicted_labels = self.predicted_labels[~bln_mat]
+        self.wrong_competences = self.competences[~bln_mat]
+
+        self.n_queries = len(self.wrong_true_labels)
         self.n_bases = len(self.bases_labels[0])
         self.n_disagree = len(self.disagree_ind)
-        
+
         with open(self.log_fname,'w') as f:
             for line in self._get_all_lines():
                 f.write(str(line))
@@ -44,7 +50,8 @@ def _get_general_lines(self):
     def _get_agree_lines(self):
         n_agree = len(self.agree_ind)
         agree_dis = self._get_distribution(ind=self.agree_ind)
-        agree_score = self._get_score(self.agree_ind)
+        agree_score = self._get_score(
+            self.agree_ind, self.true_labels, self.predicted_labels)
         predicted_dis = self._get_distribution()
         
         lines = [
@@ -64,9 +71,12 @@ def _get_agree_lines(self):
 
     def _get_n_right_clf_lines(self):
         n_right_clf_by_query, n_right_clf_ind = \
-            self._get_n_right_clf_stats()
+            self._get_n_right_clf_stats(self.wrong_true_labels,self.wrong_bases_labels)
         n_right_clf_dis = self._get_distribution(n_right_clf_by_query)
-        scores = [self._get_score(n_right_clf_ind[i]) \
+        scores = [self._get_score(
+            n_right_clf_ind[i],
+            self.wrong_true_labels,
+            self.wrong_predicted_labels) \
             for i in range(len(n_right_clf_dis))]
 
         lines = [
@@ -82,8 +92,8 @@ def _get_n_right_clf_lines(self):
         return lines
 
     def _get_disagree_lines(self):
-        disagree_score = self._get_score(self.disagree_ind)
-
+        disagree_score = self._get_score(
+            self.disagree_ind, self.true_labels, self.predicted_labels)
         lines = [
             "--- Disagreements",
             "Instances, ratio on queries:",
@@ -98,7 +108,7 @@ def _get_disagree_lines(self):
 
     def _get_competences_lines(self):
         mean, mean_by_clf, var, var_by_clf, n_even_max = \
-            self._get_competences_stats()
+            self._get_competences_stats(self.wrong_competences)
 
         lines = [
             "--- Competences",
@@ -182,37 +192,35 @@ def _get_distribution(self, labels=None, ind=None):
             distribution[l] = unique_counts[i]
         return distribution
 
-    def _get_n_right_clf_stats(self):
+    def _get_n_right_clf_stats(self,true_labels,bases_labels):
         n_right_clf_by_query = []
         n_right_clf_ind = [[] for i in range(self.n_bases + 1)]
 
-        for i,label in enumerate(self.true_labels):
-            row = self.bases_labels[i]
+        for i,label in enumerate(true_labels):
+            row = bases_labels[i]
             n_right_clf = np.count_nonzero(row == label)
             n_right_clf_by_query.append(n_right_clf)
             n_right_clf_ind[n_right_clf].append(i)
 
         return n_right_clf_by_query, n_right_clf_ind
 
-    def _get_competences_stats(self):
-        comp = self.competences/self.k
-        mean = np.mean(comp)
-        var = np.var(comp)
-        mean_by_clf = np.mean(comp, axis=0)
-        var_by_clf = np.var(comp, axis=0)
+    def _get_competences_stats(self, competences):
+        competences = competences/self.k
+        mean = np.mean(competences)
+        var = np.var(competences)
+        mean_by_clf = np.mean(competences, axis=0)
+        var_by_clf = np.var(competences, axis=0)
         n_even_max = 0
 
-        for c in comp:
+        for c in competences:
             max_ = c[np.argmax(c)]
             n_max = np.count_nonzero(c == max_)
             if n_max > 1: n_even_max += 1
 
         return mean, mean_by_clf, var, var_by_clf, n_even_max
 
-    def _get_score(self, ind):
-        true_labels = self.true_labels[ind]
-        labels = self.predicted_labels[ind]
-        matches = np.equal(true_labels, labels)
+    def _get_score(self, ind, true_labels, predicted_labels):
+        matches = np.equal(true_labels[ind], predicted_labels[ind])
         score = np.sum(matches)
         return score
 
@@ -227,21 +235,6 @@ def _get_all_lines(self):
         lines.extend(self._get_multistats_lines())
         return lines
 
-    def _get_competences_lines(self):
-        _, mean_by_clf, _, var_by_clf, _ = \
-            self._get_competences_stats()
-
-        means = mean_by_clf.reshape(self.n_datasets, -1)
-        mean_by_dataset = np.mean(means, axis=1)
-
-        lines = super()._get_competences_lines()
-        lines.extend([
-            "Mean by dataset:",
-            np.round(mean_by_dataset, 3),
-        ])
-
-        return lines
-
     def _get_multistats_lines(self):
         lines = [
             "--- Multidatasets",

From 9fd1de4642aa114db54662ee4440ef787b332ca9 Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Fri, 14 Jan 2022 16:44:39 -0500
Subject: [PATCH 18/21] Retrait du code sur les stats de la SD.

---
 deslib/base.py           |  11 --
 deslib/des/base.py       |   2 -
 deslib/multi_datasets.py |  12 --
 deslib/util/stats.py     | 244 ---------------------------------------
 4 files changed, 269 deletions(-)
 delete mode 100644 deslib/util/stats.py

diff --git a/deslib/base.py b/deslib/base.py
index 4f20326..e892659 100644
--- a/deslib/base.py
+++ b/deslib/base.py
@@ -24,7 +24,6 @@
 from deslib.util import faiss_knn_wrapper
 from deslib.util.dfp import frienemy_pruning_preprocessed
 from deslib.util.instance_hardness import hardness_region_competence
-from deslib.util.stats import Stats
 
 
 class BaseDS(BaseEstimator, ClassifierMixin):
@@ -57,7 +56,6 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
         self.DSEL_perc = DSEL_perc
         self.knne = knne
         self.n_jobs = n_jobs
-        self._set_stats()
 
         # Check optional dependency
         if knn_classifier == 'faiss' and not faiss_knn_wrapper.is_available():
@@ -65,9 +63,6 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                 'Using knn_classifier="faiss" requires that the FAISS library '
                 'be installed.Please check the Installation Guide.')
 
-    def _set_stats(self):
-        self.stats = Stats()
-
     @abstractmethod
     def select(self, competences):
         """Select the most competent classifier for
@@ -538,7 +533,6 @@ def predict(self, X):
                 # Get the real indices_ of the samples that will be classified
                 # using a DS algorithm.
                 ind_ds_original_matrix = ind_disagreement[ind_ds_classifier]
-                self.stats.disagree_ind = ind_ds_original_matrix
 
                 if self.needs_proba:
                     selected_probabilities = base_probabilities[
@@ -555,11 +549,6 @@ def predict(self, X):
                                                 DFP_mask=DFP_mask)
                 predicted_labels[ind_ds_original_matrix] = pred_ds
 
-        self.stats.bases_labels = base_predictions
-        self.stats.agree_ind = ind_all_agree
-        self.stats.predicted_labels = predicted_labels
-        self.stats.k = self.k
-
         return self.classes_.take(predicted_labels)
 
     def predict_proba(self, X):
diff --git a/deslib/des/base.py b/deslib/des/base.py
index f518bbf..a749155 100644
--- a/deslib/des/base.py
+++ b/deslib/des/base.py
@@ -193,8 +193,6 @@ def classify_with_ds(self, query, predictions, probabilities=None,
         if self.DFP:
             competences = competences * DFP_mask
 
-        self.stats.competences = competences
-
         if self.mode == "selection":
             # The selected_classifiers matrix is used as a mask to remove
             # the predictions of certain base classifiers.
diff --git a/deslib/multi_datasets.py b/deslib/multi_datasets.py
index 09e8624..2ca0b2e 100644
--- a/deslib/multi_datasets.py
+++ b/deslib/multi_datasets.py
@@ -18,7 +18,6 @@
                                      majority_voting_rule,
                                      aggregate_proba_ensemble_weighted)
 from deslib.util.instance_hardness import hardness_region_competence
-from deslib.util.stats import MultiStats
 
 # Créer à partir de KNORA-U
 class MultiDatasets(BaseDS):
@@ -35,9 +34,6 @@ def __init__(self, ds_classifier, pool_classifiers):
         super(MultiDatasets, self).__init__(pool_classifiers)
         self.ds_classifier = ds_classifier
 
-    def _set_stats(self):
-        self.stats = MultiStats()
-
     def fit(self, X, y):
         """Prepare the DS models by setting the KNN algorithm and
         pre-processing the information required to apply the DS
@@ -171,7 +167,6 @@ def _predict_ds(self, X):
                 # Get the real indices_ of the samples that will be classified
                 # using a DS algorithm.
                 ind_ds_original_matrix = ind_disagreement[ind_ds_classifier]
-                self.stats.disagree_ind = ind_ds_original_matrix
 
                 if ds_classifier.needs_proba:
                     selected_probabilities = base_probabilities[
@@ -195,7 +190,6 @@ def _predict_ds(self, X):
             merged_left_base_predictions = np.concatenate(
                 merged_left_base_predictions, axis=1)
             merged_competences = np.concatenate(merged_competences, axis=1)
-            self.stats.competences = merged_competences
 
             if issubclass(type(self.ds_classifier), BaseDCS):
                 pred_ds = self._get_dcs_predicted_label(self.ds_classifier,
@@ -206,12 +200,6 @@ def _predict_ds(self, X):
 
             predicted_labels[ind_ds_original_matrix] = pred_ds
 
-        self.stats.n_datasets = n_datasets
-        self.stats.bases_labels = merged_base_predictions
-        self.stats.agree_ind = ind_all_agree
-        self.stats.predicted_labels = predicted_labels
-        self.stats.k = self.k
-
         return self.classes_.take(predicted_labels)
 
     def _predict_oracle(self, X, y):
diff --git a/deslib/util/stats.py b/deslib/util/stats.py
deleted file mode 100644
index 7e87e21..0000000
--- a/deslib/util/stats.py
+++ /dev/null
@@ -1,244 +0,0 @@
-import numpy as np
-
-
-class Stats():
-    def __init__(self):
-        self.agree_ind = []
-        self.disagree_ind = []
-        self.true_labels = []
-        self.bases_labels = []
-        self.predicted_labels = []
-        self.agree_labels = []
-        self.competences = []
-        self.log_fname = "log.txt"
-
-    def log_stats(self):
-        bln_mat = np.equal(self.true_labels,self.predicted_labels)
-        self.wrong_true_labels = self.true_labels[~bln_mat]
-        self.wrong_bases_labels = self.bases_labels[~bln_mat]
-        self.wrong_predicted_labels = self.predicted_labels[~bln_mat]
-        self.wrong_competences = self.competences[~bln_mat]
-
-        self.n_queries = len(self.wrong_true_labels)
-        self.n_bases = len(self.bases_labels[0])
-        self.n_disagree = len(self.disagree_ind)
-
-        with open(self.log_fname,'w') as f:
-            for line in self._get_all_lines():
-                f.write(str(line))
-                f.write("\n")
-
-    def _get_all_lines(self):
-        lines = []
-        lines.extend(self._get_general_lines())
-        lines.extend(self._get_agree_lines())
-        lines.extend(self._get_n_right_clf_lines())
-        if self.n_disagree > 0:
-            lines.extend(self._get_disagree_lines())
-            lines.extend(self._get_competences_lines())
-            lines.extend(self._get_competences_reliability_lines())
-        return lines
-
-    def _get_general_lines(self):
-        lines = [
-            "Queries:",
-            self.n_queries,
-        ]
-
-        return lines
-
-    def _get_agree_lines(self):
-        n_agree = len(self.agree_ind)
-        agree_dis = self._get_distribution(ind=self.agree_ind)
-        agree_score = self._get_score(
-            self.agree_ind, self.true_labels, self.predicted_labels)
-        predicted_dis = self._get_distribution()
-        
-        lines = [
-            "--- Agreements",
-            "Instances, ratio on queries:",
-            n_agree,
-            round(n_agree / self.n_queries, 3),
-            "Distribution, ratio on predictions:",
-            agree_dis,
-            np.round(agree_dis / predicted_dis, 3),
-            "Score, ratio on agreements:",
-            agree_score,
-            round(agree_score / n_agree, 3),
-        ]
-
-        return lines
-
-    def _get_n_right_clf_lines(self):
-        n_right_clf_by_query, n_right_clf_ind = \
-            self._get_n_right_clf_stats(self.wrong_true_labels,self.wrong_bases_labels)
-        n_right_clf_dis = self._get_distribution(n_right_clf_by_query)
-        scores = [self._get_score(
-            n_right_clf_ind[i],
-            self.wrong_true_labels,
-            self.wrong_predicted_labels) \
-            for i in range(len(n_right_clf_dis))]
-
-        lines = [
-            "--- Right classifiers:",
-            "Distribution, ratio on queries:",
-            n_right_clf_dis,
-            np.round(n_right_clf_dis / self.n_queries, 3),
-            "Scores, ratio on N right clf",
-            scores,
-            np.round(scores / n_right_clf_dis, 3),
-        ]
-
-        return lines
-
-    def _get_disagree_lines(self):
-        disagree_score = self._get_score(
-            self.disagree_ind, self.true_labels, self.predicted_labels)
-        lines = [
-            "--- Disagreements",
-            "Instances, ratio on queries:",
-            self.n_disagree,
-            round(self.n_disagree / self.n_queries, 3),
-            "Score, ratio on disagreements:",
-            disagree_score,
-            round(disagree_score / self.n_disagree, 3),
-        ]
-
-        return lines
-
-    def _get_competences_lines(self):
-        mean, mean_by_clf, var, var_by_clf, n_even_max = \
-            self._get_competences_stats(self.wrong_competences)
-
-        lines = [
-            "--- Competences",
-            "Mean:",
-            round(mean, 3),
-            "Mean by classifier:",
-            np.round(mean_by_clf, 3),
-            "Var:",
-            round(var, 3),
-            "Var by classifier:",
-            np.round(var_by_clf, 3),
-            "Even max competences times, \nratio on disagreements:",
-            n_even_max,
-            round(n_even_max / self.n_disagree, 3),
-        ]
-
-        return lines
-
-    def _get_competences_reliability_lines(self):
-        true_labels_by_base = np.tile(self.true_labels,(self.n_bases,1))
-        correct_bases_bln_array = np.equal(
-            self.bases_labels.T,true_labels_by_base)
-        n_queries = self.bases_labels.shape[0]
-        n_correct_labels_by_base = np.sum(correct_bases_bln_array,axis=1)
-        acc_by_base = np.round(n_correct_labels_by_base/n_queries,3)
-
-        comp = self.competences/self.k
-        n_incorrect_labels_by_base = \
-            len(self.predicted_labels)-n_correct_labels_by_base
-
-        correct_comp = comp.T*correct_bases_bln_array
-        correct_comp_by_base = np.sum(correct_comp,axis=1)
-        correct_comp_by_base /= n_correct_labels_by_base
-        mean_correct_comp_by_base = np.round(correct_comp_by_base,3)
-        mean = mean_correct_comp_by_base
-        mean = np.repeat(mean,n_queries).reshape(self.n_bases,-1)
-        correct_comp_by_base = np.sum((correct_comp-mean)**2,axis=1)
-        correct_comp_by_base /= n_correct_labels_by_base
-        std_correct_comp_by_base = np.round(np.sqrt(correct_comp_by_base),3)
-
-        incorrect_comp = comp.T*~correct_bases_bln_array
-        incorrect_comp_by_base = np.sum(incorrect_comp,axis=1)
-        incorrect_comp_by_base /= n_incorrect_labels_by_base
-        mean_incorrect_comp_by_base = np.round(incorrect_comp_by_base,3)
-        mean = mean_incorrect_comp_by_base
-        mean = np.repeat(mean,n_queries).reshape(self.n_bases,-1)
-        incorrect_comp_by_base = np.sum((incorrect_comp-mean)**2,axis=1)
-        incorrect_comp_by_base /= n_incorrect_labels_by_base
-        std_incorrect_comp_by_base = np.round(
-            np.sqrt(incorrect_comp_by_base),3)
-
-        lines = [
-            "--- Competence reliability",
-            "Acc:",
-            round(np.mean(acc_by_base),3),
-            "(by base):",
-            acc_by_base,
-            "Competence mean & std when well clasified:",
-            round(np.mean(mean_correct_comp_by_base),3),
-            round(np.mean(std_correct_comp_by_base),3),
-            "(by base):",
-            mean_correct_comp_by_base,
-            std_correct_comp_by_base,
-            "Competence mean & std when not well clasified:",
-            round(np.mean(mean_incorrect_comp_by_base),3),
-            round(np.mean(std_incorrect_comp_by_base),3),
-            "(by base):",
-            mean_incorrect_comp_by_base,
-            std_incorrect_comp_by_base,
-        ]
-
-        return lines
-
-    def _get_distribution(self, labels=None, ind=None):
-        labels = self.predicted_labels if labels is None else labels
-        max_label = max(labels)
-        if ind is not None: labels = labels[ind]
-        unique_labels, unique_counts = np.unique(labels, return_counts=True)
-        distribution = np.full(max_label+1,0)
-        for i,l in enumerate(unique_labels):
-            distribution[l] = unique_counts[i]
-        return distribution
-
-    def _get_n_right_clf_stats(self,true_labels,bases_labels):
-        n_right_clf_by_query = []
-        n_right_clf_ind = [[] for i in range(self.n_bases + 1)]
-
-        for i,label in enumerate(true_labels):
-            row = bases_labels[i]
-            n_right_clf = np.count_nonzero(row == label)
-            n_right_clf_by_query.append(n_right_clf)
-            n_right_clf_ind[n_right_clf].append(i)
-
-        return n_right_clf_by_query, n_right_clf_ind
-
-    def _get_competences_stats(self, competences):
-        competences = competences/self.k
-        mean = np.mean(competences)
-        var = np.var(competences)
-        mean_by_clf = np.mean(competences, axis=0)
-        var_by_clf = np.var(competences, axis=0)
-        n_even_max = 0
-
-        for c in competences:
-            max_ = c[np.argmax(c)]
-            n_max = np.count_nonzero(c == max_)
-            if n_max > 1: n_even_max += 1
-
-        return mean, mean_by_clf, var, var_by_clf, n_even_max
-
-    def _get_score(self, ind, true_labels, predicted_labels):
-        matches = np.equal(true_labels[ind], predicted_labels[ind])
-        score = np.sum(matches)
-        return score
-
-
-class MultiStats(Stats):
-    def __init__(self):
-        super().__init__()
-        self.n_datasets = 1
-
-    def _get_all_lines(self):
-        lines = super()._get_all_lines()
-        lines.extend(self._get_multistats_lines())
-        return lines
-
-    def _get_multistats_lines(self):
-        lines = [
-            "--- Multidatasets",
-            self.n_datasets
-        ]
-
-        return lines

From a71dcd7dbc0796227e895e5df0b81ecb615f5c6b Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Fri, 14 Jan 2022 16:56:59 -0500
Subject: [PATCH 19/21] Retrait de multiknop.

---
 deslib/des/multi_knop.py | 256 ---------------------------------------
 1 file changed, 256 deletions(-)
 delete mode 100644 deslib/des/multi_knop.py

diff --git a/deslib/des/multi_knop.py b/deslib/des/multi_knop.py
deleted file mode 100644
index c1dac6d..0000000
--- a/deslib/des/multi_knop.py
+++ /dev/null
@@ -1,256 +0,0 @@
-# coding=utf-8
-
-# Author: Rafael Menelau Oliveira e Cruz <rafaelmenelau@gmail.com>
-#
-# License: BSD 3 clause
-
-import numpy as np
-
-from deslib.des.base import BaseDS
-from deslib.des.knop import KNOP
-from sklearn.utils.validation import (check_X_y, check_is_fitted, check_array,
-                                      check_random_state)
-
-
-class MultiKNOP(KNOP):
-    def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
-                 safe_k=None, IH_rate=0.30, random_state=None, voting='hard',
-                 knn_classifier='knn', knne=False, DSEL_perc=0.5, n_jobs=-1):
-
-        super(KNOP, self).__init__(None, k,
-                                   DFP=DFP,
-                                   with_IH=with_IH,
-                                   safe_k=safe_k,
-                                   IH_rate=IH_rate,
-                                   needs_proba=True,
-                                   random_state=random_state,
-                                   knn_classifier=knn_classifier,
-                                   knne=knne,
-                                   DSEL_perc=DSEL_perc,
-                                   n_jobs=n_jobs)
-        self.ds_classifiers = []
-        for i in range(len(pool_classifiers)):
-            knop = KNOP(pool_classifiers[i], k,
-                        DFP=DFP,
-                        with_IH=with_IH,
-                        safe_k=safe_k,
-                        IH_rate=IH_rate,
-                        random_state=random_state,
-                        knn_classifier=knn_classifier,
-                        knne=knne,
-                        DSEL_perc=DSEL_perc,
-                        n_jobs=n_jobs)
-            self.ds_classifiers.append(knop)
-
-
-    """Multi k-Nearest Output Profiles (MultiKNOP).
-    """
-    def fit(self, X, y):
-        """Train the DS model by setting the KNN algorithm and
-        pre-process the information required to apply the DS
-        methods. In this case, the scores of the base classifiers for
-        the dynamic selection dataset (DSEL) are pre-calculated to
-        transform each sample in DSEL into an output profile.
-
-        Parameters
-        ----------
-        X : array of shape n_datasets, n_samples, n_features)
-            Data used to fit the model.
-
-        y : array of shape (n_datasets, n_samples)
-            class labels of each example in X.
-
-        Returns
-        -------
-        self
-        """
-        if len(X) == 1 or len(y) == 1:
-            raise ValueError("Error. MultiKNOP  does not accept one dataset!")
-        """
-        for predict()...
-        for i in range(len(y)-1):
-            if np.array_equal(y[0],y[i+1]):
-                raise ValueError(
-                    "Error. All datasets queries must match exactly!")
-        """
-
-        datasets_dsel_scores = []
-        datasets_DSEL_processed_ = []
-        # Process each dataset
-        for i in range(len(X)):
-            self.ds_classifiers[i].fit(X[i], y[i])
-            if self.ds_classifiers[i].n_classes_ == 1:
-                raise ValueError(
-                    "Error. MultiKNOP  does not accept one class datasets!")
-            self.ds_classifiers[i]._check_predict_proba()
-            self.ds_classifiers[i].dsel_scores_ = \
-                self.ds_classifiers[i]._preprocess_dsel_scores()
-            datasets_dsel_scores.append(self.ds_classifiers[i].dsel_scores_)
-            datasets_DSEL_processed_.append(
-                self.ds_classifiers[i].DSEL_processed_)
-        
-        self.dsel_scores_ = np.concatenate(datasets_dsel_scores, axis=1)
-        self.DSEL_processed_ = np.concatenate(datasets_DSEL_processed_, axis=1)
-        self.n_classifiers_ = self.dsel_scores_.shape[1]
-        
-        # Reassignment
-        self.DSEL_target_ = self.ds_classifiers[0].DSEL_target_
-        self.n_samples_ = self.ds_classifiers[0].n_samples_
-        self.n_classes_ = self.ds_classifiers[0].n_classes_
-        self.knn_class_ = self.ds_classifiers[0].knn_class_
-        self.k_ = self.ds_classifiers[0].k_
-        self.classes_ = self.ds_classifiers[0].classes_
-        
-        # Reshape DSEL_scores as a 2-D array for nearest neighbor calculations
-        dsel_output_profiles = self.dsel_scores_.reshape(self.n_samples_,
-                                                         self.n_classifiers_ *
-                                                         self.n_classes_)
-
-        self._fit_OP(dsel_output_profiles, self.DSEL_target_, self.k_)
-
-        return self
-
-    def _fit_OP(self, X_op, y_op, k):
-        """ Fit the set of output profiles.
-
-        Parameters
-        ----------
-        X_op : array of shape (n_samples, n_features)
-            Output profiles of the training data. n_features is equals
-            to (n_classifiers x n_classes).
-
-        y_op : array of shape (n_samples)
-               Class labels of each sample in X_op.
-
-        k : int
-            Number of output profiles used in the region of competence
-            estimation.
-
-        """
-        self.op_knn_ = self.knn_class_(k)
-
-        if self.n_classes_ == 2:
-            # Get only the scores for one class since they are complementary
-            X_temp = X_op[:, ::2]
-            self.op_knn_.fit(X_temp, y_op)
-        else:
-            self.op_knn_.fit(X_op, y_op)
-
-    def _get_similar_out_profiles(self, probabilities):
-        """Get the most similar output profiles of the query sample.
-
-        Parameters
-        ----------
-        probabilities : array of shape (n_samples, n_classifiers, n_classes)
-                        predictions of each base classifier for all samples.
-
-        Returns
-        -------
-        dists : list of shape = [n_samples, k]
-                The distances between the query and each sample in the region
-                of competence. The vector is ordered in an ascending fashion.
-
-        idx : list of shape = [n_samples, k]
-            Indices of the instances belonging to the region of competence of
-            the given query sample.
-        """
-
-        if self.n_classes_ == 2:
-            # Get only the scores for one class since they are complementary
-            query_op = probabilities[:, :, 0]
-        else:
-            query_op = probabilities.reshape((probabilities.shape[0],
-                                              self.n_classifiers_ *
-                                              self.n_classes_))
-
-        dists, idx = self.op_knn_.kneighbors(query_op, n_neighbors=self.k_,
-                                             return_distance=True)
-        return dists, np.atleast_2d(idx)
-
-    def predict(self, X):
-        """Predict the class label for each sample in X.
-        Parameters
-        ----------
-        X : array of shape (n_samples, n_features)
-            The input data.
-        Returns
-        -------
-        predicted_labels : array of shape (n_samples)
-                           Predicted class label for each sample in X.
-        """
-        # Check if the DS model was trained
-        #check_is_fitted(self,
-        #                ["DSEL_processed_", "DSEL_data_", "DSEL_target_"])
-
-        # Check if X is a valid input
-        #for i in range(len(X)):
-        #    X[i] = check_array(X[i])
-        #    self._check_num_features(X[i])
-
-        n_samples = X[0].shape[0]
-        predicted_labels = np.empty(n_samples, dtype=np.intp)
-
-        base_probabilities = []
-        for i in range(len(X)):
-            base_probabilities.append(
-                self.ds_classifiers[i]._predict_proba_base(X[i]))
-        base_probabilities = np.concatenate(base_probabilities,axis=1)
-        base_predictions = base_probabilities.argmax(axis=2)
-
-        all_agree_vector = BaseDS._all_classifier_agree(base_predictions)
-        ind_all_agree = np.where(all_agree_vector)[0]
-
-        # Since the predictions are always the same, get the predictions of the
-        # first base classifier.
-        if ind_all_agree.size:
-            predicted_labels[ind_all_agree] = base_predictions[
-                ind_all_agree, 0]
-
-        # For the samples with disagreement, perform the dynamic selection
-        # steps. First step is to collect the samples with disagreement
-        # between base classifiers
-        ind_disagreement = np.where(~all_agree_vector)[0]
-        if ind_disagreement.size:
-
-            X_DS = X[0][ind_disagreement, :]
-
-            # Then, we estimate the nearest neighbors for all samples that
-            # we need to call DS routines
-            distances, neighbors = None, None
-            #distances, neighbors = self._get_region_competence(X_DS)
-
-            # IH was not considered. So all samples with disagreement are
-            # passed down to the DS algorithm
-            ind_ds_classifier = np.arange(ind_disagreement.size)
-
-            # At this stage the samples which all base classifiers agrees or
-            # that are associated with low hardness were already classified.
-            # The remaining samples are now passed down to the DS techniques
-            # for classification.
-
-            #  First check whether there are still samples to be classified.
-            if ind_ds_classifier.size:
-
-                DFP_mask = np.ones(
-                    (ind_ds_classifier.size, self.n_classifiers_))
-
-                # Get the real indices_ of the samples that will be classified
-                # using a DS algorithm.
-                ind_ds_original_matrix = ind_disagreement[ind_ds_classifier]
-
-                if self.needs_proba or self.voting == 'soft':
-                    selected_probabilities = base_probabilities[
-                        ind_ds_original_matrix]
-                else:
-                    selected_probabilities = None
-
-                pred_ds = self.classify_with_ds(X_DS[ind_ds_classifier],
-                                                base_predictions[
-                                                    ind_ds_original_matrix],
-                                                selected_probabilities,
-                                                neighbors=neighbors,
-                                                distances=distances,
-                                                DFP_mask=DFP_mask)
-                predicted_labels[ind_ds_original_matrix] = pred_ds
-
-        return self.classes_.take(predicted_labels)

From 2e36290f67b42963572bc732ba431928182e9843 Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Wed, 23 Feb 2022 22:17:29 -0500
Subject: [PATCH 20/21] Sanitazing.

---
 deslib/base.py           | 26 ++++++--------------------
 deslib/dcs/base.py       |  3 +--
 deslib/dcs/ola.py        |  3 +--
 deslib/des/base.py       |  4 +---
 deslib/des/knop.py       |  4 +---
 deslib/des/knora_e.py    |  4 +---
 deslib/des/knora_u.py    |  4 +---
 deslib/multi_datasets.py |  4 ++--
 8 files changed, 14 insertions(+), 38 deletions(-)

diff --git a/deslib/base.py b/deslib/base.py
index e892659..e8f51cf 100644
--- a/deslib/base.py
+++ b/deslib/base.py
@@ -16,7 +16,7 @@
 from sklearn.ensemble import BaseEnsemble, BaggingClassifier
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier
-from sklearn.preprocessing import LabelEncoder, normalize
+from sklearn.preprocessing import LabelEncoder
 from sklearn.utils.validation import (check_X_y, check_is_fitted, check_array,
                                       check_random_state)
 
@@ -40,8 +40,8 @@ class BaseDS(BaseEstimator, ClassifierMixin):
     @abstractmethod
     def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                  safe_k=None, IH_rate=0.30, needs_proba=False,
-                 random_state=None, knn_classifier='knn',
-                 knn_metric='minkowski', DSEL_perc=0.5, knne=False, n_jobs=-1):
+                 random_state=None, knn_classifier='knn', DSEL_perc=0.5,
+                 knne=False, n_jobs=-1):
 
         self.pool_classifiers = pool_classifiers
         self.k = k
@@ -52,7 +52,6 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
         self.needs_proba = needs_proba
         self.random_state = random_state
         self.knn_classifier = knn_classifier
-        self.knn_metric = knn_metric
         self.DSEL_perc = DSEL_perc
         self.knne = knne
         self.n_jobs = n_jobs
@@ -245,7 +244,7 @@ class labels of each example in X.
 
         # validate the value of k
         self._validate_k()
-        self._set_region_of_competence_algorithm(X_dsel)
+        self._set_region_of_competence_algorithm()
         self._fit_region_competence(X_dsel, y_dsel)
 
         # validate the IH
@@ -316,7 +315,6 @@ def _fit_region_competence(self, X, y):
             class labels of each sample in X.
 
         """
-        if self.knn_metric == 'cosine': X = normalize(X)
         self.roc_algorithm_.fit(X, y)
 
     def _set_dsel(self, X, y):
@@ -339,24 +337,13 @@ class labels of each sample in X.
         self.n_samples_ = self.DSEL_target_.size
         self.DSEL_processed_, self.BKS_DSEL_ = self._preprocess_dsel()
 
-    def _set_region_of_competence_algorithm(self, X):
-
-        algorithm = "auto"
-        metric = 'minkowski'
-        metric_params = None
-
-        if self.knn_metric == 'mahalanobis':
-            metric = 'mahalanobis'
-            metric_params = {'V': np.cov(X)}
-            algorithm = "brute"
+    def _set_region_of_competence_algorithm(self):
 
         if self.knn_classifier is None or self.knn_classifier in ['knn',
                                                                   'sklearn']:
             knn_class = functools.partial(KNeighborsClassifier,
                                           n_jobs=self.n_jobs,
-                                          algorithm=algorithm,
-                                          metric=metric,
-                                          metric_params=metric_params)
+                                          algorithm="auto")
         elif self.knn_classifier == 'faiss':
             knn_class = functools.partial(
                 faiss_knn_wrapper.FaissKNNClassifier,
@@ -453,7 +440,6 @@ def predict(self, X):
         # steps. First step is to collect the samples with disagreement
         # between base classifiers
         ind_disagreement = np.where(~all_agree_vector)[0]
-        ind_disagreement = np.asarray(range(len(predicted_labels)))
         if ind_disagreement.size:
 
             X_DS = X[ind_disagreement, :]
diff --git a/deslib/dcs/base.py b/deslib/dcs/base.py
index 3ef54e7..974efd7 100644
--- a/deslib/dcs/base.py
+++ b/deslib/dcs/base.py
@@ -21,7 +21,7 @@ class BaseDCS(BaseDS):
     def __init__(self, pool_classifiers=None, k=7, DFP=False, safe_k=None,
                  with_IH=False, IH_rate=0.30, selection_method='best',
                  diff_thresh=0.1, random_state=None, knn_classifier='knn',
-                 knn_metric='minkowski', DSEL_perc=0.5,
+                 DSEL_perc=0.5,
                  knne=False, n_jobs=-1):
 
         super(BaseDCS, self).__init__(pool_classifiers=pool_classifiers, k=k,
@@ -29,7 +29,6 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, safe_k=None,
                                       IH_rate=IH_rate,
                                       random_state=random_state,
                                       knn_classifier=knn_classifier,
-                                      knn_metric=knn_metric,
                                       DSEL_perc=DSEL_perc,
                                       knne=knne, n_jobs=n_jobs)
 
diff --git a/deslib/dcs/ola.py b/deslib/dcs/ola.py
index b3d81b5..3ea0f35 100644
--- a/deslib/dcs/ola.py
+++ b/deslib/dcs/ola.py
@@ -111,7 +111,7 @@ class :class:`FaissKNNClassifier`
     def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                  safe_k=None, IH_rate=0.30, selection_method='best',
                  diff_thresh=0.1, random_state=None, knn_classifier='knn',
-                 knn_metric='minkowski', knne=False, DSEL_perc=0.5, n_jobs=-1):
+                 knne=False, DSEL_perc=0.5, n_jobs=-1):
         super(OLA, self).__init__(pool_classifiers=pool_classifiers, k=k,
                                   DFP=DFP, with_IH=with_IH, safe_k=safe_k,
                                   IH_rate=IH_rate,
@@ -119,7 +119,6 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                                   diff_thresh=diff_thresh,
                                   random_state=random_state,
                                   knn_classifier=knn_classifier,
-                                  knn_metric=knn_metric,
                                   knne=knne,
                                   DSEL_perc=DSEL_perc, n_jobs=n_jobs)
 
diff --git a/deslib/des/base.py b/deslib/des/base.py
index a749155..1530e64 100644
--- a/deslib/des/base.py
+++ b/deslib/des/base.py
@@ -21,8 +21,7 @@ class BaseDES(BaseDS):
     def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                  safe_k=None, IH_rate=0.30, mode='selection',
                  needs_proba=False, random_state=None,
-                 knn_classifier='knn', knn_metric='minkowski', knne=False,
-                 DSEL_perc=0.5, n_jobs=-1):
+                 knn_classifier='knn', knne=False, DSEL_perc=0.5, n_jobs=-1):
 
         super(BaseDES, self).__init__(pool_classifiers=pool_classifiers,
                                       k=k,
@@ -33,7 +32,6 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                                       needs_proba=needs_proba,
                                       random_state=random_state,
                                       knn_classifier=knn_classifier,
-                                      knn_metric=knn_metric,
                                       knne=knne,
                                       DSEL_perc=DSEL_perc, n_jobs=n_jobs)
         self.mode = mode
diff --git a/deslib/des/knop.py b/deslib/des/knop.py
index 1cffc36..adffb36 100644
--- a/deslib/des/knop.py
+++ b/deslib/des/knop.py
@@ -106,8 +106,7 @@ class :class:`FaissKNNClassifier`
     """
     def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                  safe_k=None, IH_rate=0.30, random_state=None,
-                 knn_classifier='knn', knn_metric='minkowski', knne=False,
-                 DSEL_perc=0.5, n_jobs=-1):
+                 knn_classifier='knn', knne=False, DSEL_perc=0.5, n_jobs=-1):
 
         super(KNOP, self).__init__(pool_classifiers, k,
                                    DFP=DFP,
@@ -118,7 +117,6 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                                    needs_proba=True,
                                    random_state=random_state,
                                    knn_classifier=knn_classifier,
-                                   knn_metric=knn_metric,
                                    knne=knne,
                                    DSEL_perc=DSEL_perc,
                                    n_jobs=n_jobs)
diff --git a/deslib/des/knora_e.py b/deslib/des/knora_e.py
index 6619b03..fa0298d 100644
--- a/deslib/des/knora_e.py
+++ b/deslib/des/knora_e.py
@@ -99,8 +99,7 @@ class :class:`FaissKNNClassifier`
 
     def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                  safe_k=None, IH_rate=0.30, random_state=None,
-                 knn_classifier='knn', knn_metric='minkowski', knne=False,
-                 DSEL_perc=0.5, n_jobs=-1):
+                 knn_classifier='knn', knne=False, DSEL_perc=0.5, n_jobs=-1):
 
         super(KNORAE, self).__init__(pool_classifiers=pool_classifiers,
                                      k=k,
@@ -110,7 +109,6 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                                      IH_rate=IH_rate,
                                      random_state=random_state,
                                      knn_classifier=knn_classifier,
-                                     knn_metric=knn_metric,
                                      knne=knne,
                                      DSEL_perc=DSEL_perc,
                                      n_jobs=n_jobs)
diff --git a/deslib/des/knora_u.py b/deslib/des/knora_u.py
index c4403fc..e6cdc89 100644
--- a/deslib/des/knora_u.py
+++ b/deslib/des/knora_u.py
@@ -95,8 +95,7 @@ class :class:`FaissKNNClassifier`
 
     def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                  safe_k=None, IH_rate=0.30, random_state=None,
-                 knn_classifier='knn', knn_metric='minkowski', knne=False,
-                 DSEL_perc=0.5, n_jobs=-1):
+                 knn_classifier='knn', knne=False, DSEL_perc=0.5, n_jobs=-1):
         super(KNORAU, self).__init__(pool_classifiers, k,
                                      DFP=DFP,
                                      with_IH=with_IH,
@@ -105,7 +104,6 @@ def __init__(self, pool_classifiers=None, k=7, DFP=False, with_IH=False,
                                      mode='weighting',
                                      random_state=random_state,
                                      knn_classifier=knn_classifier,
-                                     knn_metric=knn_metric,
                                      knne=knne,
                                      DSEL_perc=DSEL_perc,
                                      n_jobs=n_jobs)
diff --git a/deslib/multi_datasets.py b/deslib/multi_datasets.py
index 2ca0b2e..4ef4188 100644
--- a/deslib/multi_datasets.py
+++ b/deslib/multi_datasets.py
@@ -105,7 +105,7 @@ def _predict_ds(self, X):
         predicted_labels = np.empty(n_samples, dtype=np.intp)
 
         all_agree_vector = BaseDS._all_classifier_agree(merged_base_predictions)
-        ind_all_agree = np.where(all_agree_vector)[0]   
+        ind_all_agree = np.where(all_agree_vector)[0]
 
         # Since the predictions are always the same, get the predictions of the
         # first base classifier.
@@ -337,7 +337,7 @@ def _get_DFP_mask(self, ds_classifier, ind_ds_classifier, neighbors):
             DFP_mask = np.ones(
                 (ind_ds_classifier.size, self.n_classifiers_))
 
-    def _get_competences(self, ds_classifier, query, predictions, 
+    def _get_competences(self, ds_classifier, query, predictions,
                         probabilities=None, neighbors=None, distances=None,
                         DFP_mask=None):
         """

From 871abef20368c71db9dd846e377d30e86cbf2cfe Mon Sep 17 00:00:00 2001
From: Pierre-Marc Thibault <pierre-marc.thibault.1@etsmtl.qc.ca>
Date: Mon, 16 May 2022 23:40:41 -0400
Subject: [PATCH 21/21] Multidatasets tests.

---
 deslib/tests/test_multidatasets.py | 100 +++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 deslib/tests/test_multidatasets.py

diff --git a/deslib/tests/test_multidatasets.py b/deslib/tests/test_multidatasets.py
new file mode 100644
index 0000000..83ec0a1
--- /dev/null
+++ b/deslib/tests/test_multidatasets.py
@@ -0,0 +1,100 @@
+import numpy as np
+import pytest
+import math
+from sklearn.datasets import make_classification
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+
+from sklearn.metrics import precision_recall_fscore_support as prf
+from sklearn.calibration import CalibratedClassifierCV as CC
+from sklearn.linear_model import Perceptron
+from sklearn.naive_bayes import GaussianNB as GNB
+from deslib.base import BaseDS
+from deslib.multi_datasets import MultiDatasets
+# Static techniques
+from deslib.static.oracle import Oracle
+from deslib.static.stacked import StackedClassifier
+# DCS techniques
+from deslib.dcs.a_posteriori import APosteriori
+from deslib.dcs.a_priori import APriori
+from deslib.dcs.lca import LCA
+from deslib.dcs.mcb import MCB
+from deslib.dcs.mla import MLA
+from deslib.dcs.ola import OLA
+from deslib.dcs.rank import Rank
+from deslib.des import DESKL
+# DES techniques
+from deslib.des.des_knn import DESKNN
+from deslib.des.des_p import DESP
+from deslib.des.knop import KNOP
+from deslib.des.knora_e import KNORAE
+from deslib.des.knora_u import KNORAU
+from deslib.des.meta_des import METADES
+
+
+# ----- Integration tests -----
+
+def setup_classifiers():
+    rng = np.random.RandomState(123456)
+    rng2 = np.random.RandomState(654321)
+
+    # Generate a classification dataset
+    X, y = make_classification(n_classes=2, n_samples=1000, weights=[0.2, 0.8],
+                               random_state=rng)
+    X2, y2 = make_classification(n_classes=2, n_samples=1000, weights=[0.3, 0.7],
+                               random_state=rng2)
+    # split the data into training and test data
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.5, random_state=rng)
+    X_train2, X_test2, y_train2, y_test2 = train_test_split(
+        X2, y2, test_size=0.5, random_state=rng2)
+
+    # Scale the variables to have 0 mean and unit variance
+    scalar = StandardScaler()
+    X_train = scalar.fit_transform(X_train)
+    X_test = scalar.transform(X_test)
+    scalar2 = StandardScaler()
+    X_train2 = scalar2.fit_transform(X_train2)
+    X_test2 = scalar2.transform(X_test2)
+
+    # Split the data into training and DSEL for DS techniques
+    X_train, X_dsel, y_train, y_dsel = train_test_split(
+        X_train, y_train, test_size=0.5, random_state=rng)
+    X_train2, X_dsel2, y_train2, y_dsel2 = train_test_split(
+        X_train2, y_train2, test_size=0.5, random_state=rng2)
+    
+    gnb1 = GNB()
+    gnb2 = GNB()
+    gnb1.fit(X_train, y_train)
+    gnb2.fit(X_train2, y_train2)
+    pool_classifiers = np.asarray([[gnb1, gnb1], [gnb2, gnb2]])
+    return (X_dsel, y_dsel, X_test, y_test,
+            X_dsel2, y_dsel2, X_test2, y_test2, pool_classifiers)
+
+@pytest.mark.parametrize('params',[
+    [Oracle(),0.962],
+    [StackedClassifier(),0.85],
+    [KNORAU(),0.764],
+    [KNORAE(),0.772],
+    [DESP(),0.666],
+    [OLA(),0.830],
+    [LCA(),0.814],
+    [MLA(),0.810],
+    [MCB(random_state=0),0.806],
+    [APriori(random_state=0),0.796],
+    [Rank(),0.824],
+    [APosteriori(random_state=0),0.782],
+    [METADES(),0.690],
+    [KNOP(),0.792],
+    [DESKL(),0.680]
+])
+def test(params):
+    [X_dsel, y_dsel, X_test, y_test,
+     X_dsel2, y_dsel2, X_test2, y_test2, pool_classifiers] = setup_classifiers()
+    technique = MultiDatasets(params[0], pool_classifiers)
+    technique.fit(np.asarray([X_dsel, X_dsel2]), np.asarray([y_dsel, y_dsel2]))
+    pred = technique.predict(np.asarray([X_test, X_test2]), y_test);
+    precision = prf(y_test, pred, average='micro')[0]
+    assert math.isclose(precision, params[1]);
+