Merge branch 'josesho-v0.2.1' into v0.2.1

ACCLAB · Mar 27, 2019 · 1f52b08 · 1f52b08
2 parents a85a47b + 6842bd5
commit 1f52b08
Show file tree

Hide file tree

Showing 15 changed files with 181 additions and 31 deletions.
diff --git a/dabest/__init__.py b/dabest/__init__.py
@@ -23,4 +23,4 @@
 from ._stats_tools import effsize as effsize
 from ._classes import TwoGroupsEffectSize 
 
-__version__ = "0.2.0"
+__version__ = "0.2.1"
diff --git a/dabest/_api.py b/dabest/_api.py
@@ -0,0 +1,64 @@
+#!/usr/bin/python
+# -*-coding: utf-8 -*-
+# Author: Joses Ho
+# Email : [email protected]
+
+
+def load(data, idx, x=None, y=None, paired=False, id_col=None,
+        ci=95, resamples=5000, random_seed=12345):
+    '''
+    Loads data in preparation for estimation statistics.
+
+    This is designed to work with pandas DataFrames.
+
+    Parameters
+    ----------
+    data : pandas DataFrame
+    idx : tuple
+        List of column names (if 'x' is not supplied) or of category names
+        (if 'x' is supplied). This can be expressed as a tuple of tuples,
+        with each individual tuple producing its own contrast plot
+    x, y : strings, default None
+        Column names for data to be plotted on the x-axis and y-axis.
+    paired : boolean, default False.
+    id_col : default None.
+        Required if `paired` is True.
+    ci : integer, default 95
+        The confidence interval width. The default of 95 produces 95%
+        confidence intervals.
+    resamples : integer, default 5000.
+        The number of resamples taken to generate the bootstraps which are used
+        to generate the confidence intervals.
+    random_seed : int, default 12345
+        This integer is used to seed the random number generator during
+        bootstrap resampling, ensuring that the confidence intervals
+        reported are replicable.
+
+    Returns
+    -------
+    A `Dabest` object.
+
+    Example
+    --------
+    Load libraries.
+
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> import dabest
+
+    Create dummy data for demonstration.
+
+    >>> np.random.seed(88888)
+    >>> N = 10
+    >>> c1 = sp.stats.norm.rvs(loc=100, scale=5, size=N)
+    >>> t1 = sp.stats.norm.rvs(loc=115, scale=5, size=N)
+    >>> df = pd.DataFrame({'Control 1' : c1, 'Test 1': t1})
+
+    Load the data.
+
+    >>> my_data = dabest.load(df, idx=("Control 1", "Test 1"))
+
+    '''
+    from ._classes import Dabest
+
+    return Dabest(data, idx, x, y, paired, id_col, ci, resamples, random_seed)
diff --git a/dabest/_archive/README.md b/dabest/_archive/README.md
diff --git a/dabest/tests/test_plotting.py → dabest/_archive/old_test_plotting.py b/dabest/tests/test_plotting.py → dabest/_archive/old_test_plotting.py
diff --git a/dabest/_classes.py b/dabest/_classes.py
@@ -325,7 +325,8 @@ def __init__(self, control, test, effect_size,
 
         Parameters
         ----------
-        control, test : array-like
+        control : array-like
+        test : array-like
             These should be numerical iterables.
         effect_size : string.
             Any one of the following are accepted inputs:
@@ -344,9 +345,43 @@ def __init__(self, control, test, effect_size,
 
         Returns
         -------
-        A `TwoGroupEffectSize` object.
-
-
+        A :py:class:`TwoGroupEffectSize` object.
+        
+        difference : float
+            The effect size of the difference between the control and the test.
+        
+        effect_size : string
+            The type of effect size reported.
+        
+        is_paired : boolean
+            Whether or not the difference is paired (ie. repeated measures).
+            
+        ci : float
+            Returns the width of the confidence interval, in percent.
+            
+        alpha : float
+            Returns the significance level of the statistical test as a float
+            between 0 and 1.
+            
+        resamples : int
+            The number of resamples performed during the bootstrap procedure.
+
+        bootstraps : nmupy ndarray
+            The generated bootstraps of the effect size.
+            
+        random_seed : int
+            The number used to initialise the numpy random seed generator, ie.
+            `seed_value` from `numpy.random.seed(seed_value)` is returned.
+            
+        bca_low, bca_high : float
+            The bias-corrected and accelerated confidence interval lower limit
+            and upper limits, respectively.
+            
+        pct_low, pct_high : float
+            The percentile confidence interval lower limit and upper limits, 
+            respectively.
+            
+            
         Examples
         --------
         >>> import numpy as np
@@ -360,6 +395,36 @@ def __init__(self, control, test, effect_size,
         The unpaired mean difference is -0.253 [95%CI -0.782, 0.241]
         5000 bootstrap samples. The confidence interval is bias-corrected
         and accelerated.
+        >>> effsize.to_dict() 
+        {'alpha': 0.05,
+         'bca_high': 0.2413346581369784,
+         'bca_interval_idx': (109, 4858),
+         'bca_low': -0.7818088458343655,
+         'bootstraps': array([-1.09875628, -1.08840014, -1.08258695, ...,  0.66675324,
+                 0.75814087,  0.80848265]),
+         'ci': 95,
+         'difference': -0.25315417702752846,
+         'effect_size': 'mean difference',
+         'is_paired': False,
+         'pct_high': 0.25135646125431527,
+         'pct_interval_idx': (125, 4875),
+         'pct_low': -0.763588353717278,
+         'pvalue_brunner_munzel': nan,
+         'pvalue_kruskal': nan,
+         'pvalue_mann_whitney': 0.2600723060808019,
+         'pvalue_paired_students_t': nan,
+         'pvalue_students_t': 0.34743913903372836,
+         'pvalue_welch': 0.3474493875548965,
+         'pvalue_wilcoxon': nan,
+         'random_seed': 12345,
+         'resamples': 5000,
+         'statistic_brunner_munzel': nan,
+         'statistic_kruskal': nan,
+         'statistic_mann_whitney': 406.0,
+         'statistic_paired_students_t': nan,
+         'statistic_students_t': 0.9472545159069105,
+         'statistic_welch': 0.9472545159069105,
+         'statistic_wilcoxon': nan}
         """
 
         from numpy import array, isnan
@@ -532,7 +597,8 @@ def __init__(self, control, test, effect_size,
             # Mann-Whitney test: Non parametric,
             # does not assume normality of distributions
             try:
-                mann_whitney = spstats.mannwhitneyu(control, test)
+                mann_whitney = spstats.mannwhitneyu(control, test, 
+                                                    alternative='two-sided')
                 self.__pvalue_mann_whitney = mann_whitney.pvalue
                 self.__statistic_mann_whitney = mann_whitney.statistic
             except ValueError:

diff --git a/dabest/_misc_tools.py → dabest/misc_tools.py b/dabest/_misc_tools.py → dabest/misc_tools.py
diff --git a/dabest/_plot_tools.py → dabest/plot_tools.py b/dabest/_plot_tools.py → dabest/plot_tools.py
@@ -139,8 +139,15 @@ def gapped_lines(data, x, y, type='mean_sd', offset=0.2, ax=None,
     if 'lw' not in keys:
         kwargs['lw'] = 2.
 
-    # Grab the order in which the groups appear.
-    group_order = pd.unique(data[x])
+    # # Grab the order in which the groups appear.
+    # group_order = pd.unique(data[x])
+
+    # Grab the order in which the groups appear,
+    # depending on whether the x-column is categorical.
+    if isinstance(data[x].dtype, pd.CategoricalDtype):
+        group_order = pd.unique(data[x]).categories
+    else:
+        group_order = pd.unique(data[x])
 
     means    = data.groupby(x)[y].mean().reindex(index=group_order)
     sd       = data.groupby(x)[y].std().reindex(index=group_order)

diff --git a/dabest/pytest.ini b/dabest/pytest.ini
@@ -1,4 +1,4 @@
 [pytest]
 filterwarnings =
     ignore::UserWarning
-    ignore::DeprecationWarning
+    ignore::DeprecationWarning
diff --git a/dabest/tests/README.md b/dabest/tests/README.md
@@ -1,4 +1,11 @@
 # Testing
 
-
 We use [pytest](https://docs.pytest.org/en/latest) to execute the tests. More documentation of the testing paradigm will be added in the near future.
+
+To run the tests, go to the root of this repo directory and run 
+
+```shell
+pytest dabest
+```
+
+
diff --git a/dabest/tests/test_01_effsizes_pvals.py b/dabest/tests/test_01_effsizes_pvals.py
@@ -135,7 +135,7 @@ def test_unpaired_stats():
 
     unpaired_es = TwoGroupsEffectSize(c, t, "mean_diff", is_paired=False)
 
-    p1 = sp.stats.mannwhitneyu(c, t).pvalue
+    p1 = sp.stats.mannwhitneyu(c, t, alternative="two-sided").pvalue
     assert unpaired_es.pvalue_mann_whitney == pytest.approx(p1)
 
     p2 = sp.stats.ttest_ind(c, t, nan_policy='omit').pvalue

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -77,7 +77,7 @@
 # The short X.Y version.
 version = '0.2'
 # The full version, including alpha/beta/rc tags.
-release = '0.2.0'
+release = '0.2.1'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -9,7 +9,7 @@ DABEST
 -----------------------------------------------
 Data Analysis with Bootstrap-coupled ESTimation
 -----------------------------------------------
-*version 0.2.0*
+*version 0.2.1*
 
 Analyze your data with estimation statistics!
 ---------------------------------------------
@@ -20,8 +20,10 @@ Analyze your data with estimation statistics!
 News
 ----
 March 2019:
-  - Release of v0.2.0. This is a major update that makes several breaking changes to the API. See the :doc:`release-notes`.
-
+  - v0.2.1 released. This is a minor bugfix that addressed an issue in gapped line plotting. See the :doc:`release-notes`.
+
+  - Release of v0.2.0. This is a major update that makes several breaking changes to the API. 
+
 January 2019:
   - Release of v0.1.7. Added `cumming_vertical_spacing` option.
 

diff --git a/docs/source/release-notes.rst b/docs/source/release-notes.rst
@@ -4,6 +4,13 @@
 Release Notes
 =============
 
+
+
+v0.2.1
+------
+
+This release fixes a bug that misplotted the gapped summary lines in Cumming plots when the *x*-variable was a :py:mod:`pandas` :py:class:`Categorical` object.
+
 v0.2.0
 ------
 

diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst
@@ -18,7 +18,7 @@ Load Libraries
 
 .. parsed-literal::
 
-    We're using DABEST v0.2.0
+    We're using DABEST v0.2.1
 
 
 Create dataset for demo
@@ -217,7 +217,7 @@ the comparisons that can be computed.
 
 .. parsed-literal::
 
-    DABEST v0.2.0
+    DABEST v0.2.1
     =============
                  
     Good afternoon!
@@ -252,7 +252,7 @@ dataset that indicates the identity of each observation, using the
 
 .. parsed-literal::
 
-    DABEST v0.2.0
+    DABEST v0.2.1
     =============
                  
     Good afternoon!
@@ -283,7 +283,7 @@ produced.
 
 .. parsed-literal::
 
-    DABEST v0.2.0
+    DABEST v0.2.1
     =============
                  
     Good afternoon!
@@ -318,7 +318,7 @@ Each of these are attributes of the `Dabest` object.
 
 .. parsed-literal::
 
-    DABEST v0.2.0
+    DABEST v0.2.1
     =============
                  
     Good afternoon!
@@ -500,7 +500,7 @@ Let's compute the Hedges' g for our comparison.
 
 .. parsed-literal::
 
-    DABEST v0.2.0
+    DABEST v0.2.1
     =============
                  
     Good afternoon!
@@ -743,7 +743,7 @@ to :keyword:`idx` has more than two data columns.
 
 .. parsed-literal::
 
-    DABEST v0.2.0
+    DABEST v0.2.1
     =============
                  
     Good afternoon!
@@ -770,7 +770,7 @@ to :keyword:`idx` has more than two data columns.
 
 .. parsed-literal::
 
-    DABEST v0.2.0
+    DABEST v0.2.1
     =============
                  
     Good afternoon!
@@ -836,7 +836,7 @@ complex visualizations and statistics.
 
 .. parsed-literal::
 
-    DABEST v0.2.0
+    DABEST v0.2.1
     =============
                  
     Good afternoon!
@@ -863,7 +863,7 @@ complex visualizations and statistics.
 
 .. parsed-literal::
 
-    DABEST v0.2.0
+    DABEST v0.2.1
     =============
                  
     Good afternoon!
@@ -1013,7 +1013,7 @@ When your data is in this format, you will need to specify the :keyword:`x` and
 
 .. parsed-literal::
 
-    DABEST v0.2.0
+    DABEST v0.2.1
     =============
                  
     Good afternoon!
@@ -1249,7 +1249,7 @@ effect size differences.
 .. image:: _images/tutorial_72_0.png
 
 
-With v0.2.0, :keyword:`dabest` can now apply `matplotlib style
+With v0.2.1, :keyword:`dabest` can now apply `matplotlib style
 sheets <https://matplotlib.org/tutorials/introductory/customizing.html>`__
 to estimation plots. You can refer to this
 `gallery <https://matplotlib.org/3.0.3/gallery/style_sheets/style_sheets_reference.html>`__

diff --git a/setup.py b/setup.py
@@ -89,7 +89,7 @@ def check_dependencies():
         author_email='[email protected]',
         maintainer='Joses W. Ho',
         maintainer_email='[email protected]',
-        version='0.2.0',
+        version='0.2.1',
         description=DESCRIPTION,
         long_description=LONG_DESCRIPTION,
         packages=find_packages(),
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,6 +4,13 @@ @@
     Release Notes
     =============
+    v0.2.1
+    ------
+    This release fixes a bug that misplotted the gapped summary lines in Cumming plots when the *x*-variable was a :py:mod:`pandas` :py:class:`Categorical` object.
     v0.2.0
     ------
@@ Expand Down @@