diff --git a/.travis.yml b/.travis.yml index b881431c..729f3420 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,8 +5,7 @@ env: - PYTHON=3.5 BACKEND=agg - PYTHON=3.6 BACKEND=agg - PYTHON=3.7 BACKEND=agg - # - PYTHON=3.8 BACKEND=agg - + - PYTHON=3.8 BACKEND=agg before_install: - wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh diff --git a/LICENSE b/LICENSE index 231101f6..b8fd4d93 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The Clear BSD License -Copyright (c) 2016-2019 Joses W. Ho +Copyright (c) 2016-2020 Joses W. Ho All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/README.md b/README.md index d74f27cd..84b8baf5 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,20 @@ [![Free-to-view citation](https://zenodo.org/badge/DOI/10.1038/s41592-019-0470-3.svg)](https://rdcu.be/bHhJ4) [![License](https://img.shields.io/badge/License-BSD%203--Clause--Clear-orange.svg)](https://spdx.org/licenses/BSD-3-Clause-Clear.html) +## Contents + +- [About](#about) +- [Installation](#installation) +- [Usage](#usage) +- [How to cite](#how-to-cite) +- [Bugs](#bugs) +- [Contributing](#contributing) +- [Acknowledgements](#acknowledgements) +- [Testing](#testing) +- [DABEST in other languages](#dabest-in-other-languages) + + + ## About DABEST is a package for **D**ata **A**nalysis using **B**ootstrap-Coupled **EST**imation. @@ -25,7 +39,7 @@ DABEST powers [estimationstats.com](https://www.estimationstats.com/), allowing ## Installation -This package is tested on Python 3.5, 3.6, and 3.7. +This package is tested on Python 3.5, 3.6, 3.7, and 3.8. It is highly recommended to download the [Anaconda distribution](https://www.continuum.io/downloads) of Python in order to obtain the dependencies easily. You can install this package via `pip`. diff --git a/dabest/__init__.py b/dabest/__init__.py index 96ed26ac..d4a93d9c 100644 --- a/dabest/__init__.py +++ b/dabest/__init__.py @@ -21,6 +21,6 @@ from ._api import load from ._stats_tools import effsize as effsize -from ._classes import TwoGroupsEffectSize +from ._classes import TwoGroupsEffectSize, PermutationTest -__version__ = "0.2.8" +__version__ = "0.3.0" diff --git a/dabest/_classes.py b/dabest/_classes.py index 59ba3088..8329669a 100644 --- a/dabest/_classes.py +++ b/dabest/_classes.py @@ -14,7 +14,8 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, resamples, """ Parses and stores pandas DataFrames in preparation for estimation - statistics. + statistics. You should not be calling this class directly; instead, + use `dabest.load()` to parse your DataFrame prior to analysis. """ # Import standard data science libraries. @@ -181,26 +182,26 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, resamples, raise IndexError(err) EffectSizeDataFrame_kwargs = dict(ci=ci, is_paired=paired, - random_seed=random_seed, - resamples=resamples) + random_seed=random_seed, + resamples=resamples) - self.mean_diff = EffectSizeDataFrame(self, "mean_diff", + self.__mean_diff = EffectSizeDataFrame(self, "mean_diff", **EffectSizeDataFrame_kwargs) - self.median_diff = EffectSizeDataFrame(self, "median_diff", + self.__median_diff = EffectSizeDataFrame(self, "median_diff", **EffectSizeDataFrame_kwargs) - self.cohens_d = EffectSizeDataFrame(self, "cohens_d", + self.__cohens_d = EffectSizeDataFrame(self, "cohens_d", **EffectSizeDataFrame_kwargs) - self.hedges_g = EffectSizeDataFrame(self, "hedges_g", + self.__hedges_g = EffectSizeDataFrame(self, "hedges_g", **EffectSizeDataFrame_kwargs) if paired is False: - self.cliffs_delta = EffectSizeDataFrame(self, "cliffs_delta", + self.__cliffs_delta = EffectSizeDataFrame(self, "cliffs_delta", **EffectSizeDataFrame_kwargs) else: - self.cliffs_delta = "The data is paired; Cliff's delta is therefore undefined." + self.__cliffs_delta = "The data is paired; Cliff's delta is therefore undefined." def __repr__(self): @@ -247,6 +248,205 @@ def __repr__(self): # @property # def variable_name(self): # return self.__variable_name() + + @property + def mean_diff(self): + """ + Returns an :py:class:`EffectSizeDataFrame` for the mean difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. + + Example + ------- + >>> from scipy.stats import norm + >>> import pandas as pd + >>> import dabest + >>> control = norm.rvs(loc=0, size=30, random_state=12345) + >>> test = norm.rvs(loc=0.5, size=30, random_state=12345) + >>> my_df = pd.DataFrame({"control": control, + "test": test}) + >>> my_dabest_object = dabest.load(my_df, idx=("control", "test")) + >>> my_dabest_object.mean_diff + + Notes + ----- + This is simply the mean of the control group subtracted from + the mean of the test group. + + .. math:: + \\text{Mean difference} = \\overline{x}_{Test} - \\overline{x}_{Control} + + where :math:`\\overline{x}` is the mean for the group :math:`x`. + """ + return self.__mean_diff + + + @property + def median_diff(self): + """ + Returns an :py:class:`EffectSizeDataFrame` for the median difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. + + Example + ------- + >>> from scipy.stats import norm + >>> import pandas as pd + >>> import dabest + >>> control = norm.rvs(loc=0, size=30, random_state=12345) + >>> test = norm.rvs(loc=0.5, size=30, random_state=12345) + >>> my_df = pd.DataFrame({"control": control, + "test": test}) + >>> my_dabest_object = dabest.load(my_df, idx=("control", "test")) + >>> my_dabest_object.median_diff + + Notes + ----- + This is simply the median of the control group subtracted from + the median of the test group. + + .. math:: + \\text{Median difference} = \\widetilde{x}_{Test} - \\widetilde{x}_{Control} + + where :math:`\\widetilde{x}` is the median for the group :math:`x`. + """ + return self.__median_diff + + + @property + def cohens_d(self): + """ + Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Cohen's `d`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. + + Example + ------- + >>> from scipy.stats import norm + >>> import pandas as pd + >>> import dabest + >>> control = norm.rvs(loc=0, size=30, random_state=12345) + >>> test = norm.rvs(loc=0.5, size=30, random_state=12345) + >>> my_df = pd.DataFrame({"control": control, + "test": test}) + >>> my_dabest_object = dabest.load(my_df, idx=("control", "test")) + >>> my_dabest_object.cohens_d + + Notes + ----- + Cohen's `d` is simply the mean of the control group subtracted from + the mean of the test group. + + If the comparison(s) are unpaired, Cohen's `d` is computed with the following equation: + + .. math:: + + d = \\frac{\\overline{x}_{Test} - \\overline{x}_{Control}} {\\text{pooled standard deviation}} + + + For paired comparisons, Cohen's d is given by + + .. math:: + d = \\frac{\\overline{x}_{Test} - \\overline{x}_{Control}} {\\text{average standard deviation}} + + where :math:`\\overline{x}` is the mean of the respective group of observations, :math:`{Var}_{x}` denotes the variance of that group, + + .. math:: + + \\text{pooled standard deviation} = \\sqrt{ \\frac{(n_{control} - 1) * {Var}_{control} + (n_{test} - 1) * {Var}_{test} } {n_{control} + n_{test} - 2} } + + and + + .. math:: + + \\text{average standard deviation} = \\sqrt{ \\frac{{Var}_{control} + {Var}_{test}} {2}} + + The sample variance (and standard deviation) uses N-1 degrees of freedoms. + This is an application of `Bessel's correction `_, and yields the unbiased + sample variance. + + References: + https://en.wikipedia.org/wiki/Effect_size#Cohen's_d + https://en.wikipedia.org/wiki/Bessel%27s_correction + https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation + """ + return self.__cohens_d + + + @property + def hedges_g(self): + """ + Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Hedges' `g`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. + + + Example + ------- + >>> from scipy.stats import norm + >>> import pandas as pd + >>> import dabest + >>> control = norm.rvs(loc=0, size=30, random_state=12345) + >>> test = norm.rvs(loc=0.5, size=30, random_state=12345) + >>> my_df = pd.DataFrame({"control": control, + "test": test}) + >>> my_dabest_object = dabest.load(my_df, idx=("control", "test")) + >>> my_dabest_object.hedges_g + + Notes + ----- + + Hedges' `g` is :py:attr:`cohens_d` corrected for bias via multiplication with the following correction factor: + + .. math:: + \\frac{ \\Gamma( \\frac{a} {2} )} {\\sqrt{ \\frac{a} {2} } \\times \\Gamma( \\frac{a - 1} {2} )} + + where + + .. math:: + a = {n}_{control} + {n}_{test} - 2 + + and :math:`\\Gamma(x)` is the `Gamma function `_. + + + + References: + https://en.wikipedia.org/wiki/Effect_size#Hedges'_g + https://journals.sagepub.com/doi/10.3102/10769986006002107 + """ + return self.__hedges_g + + + @property + def cliffs_delta(self): + """ + Returns an :py:class:`EffectSizeDataFrame` for Cliff's delta, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. + + + Example + ------- + >>> from scipy.stats import norm + >>> import pandas as pd + >>> import dabest + >>> control = norm.rvs(loc=0, size=30, random_state=12345) + >>> test = norm.rvs(loc=0.5, size=30, random_state=12345) + >>> my_df = pd.DataFrame({"control": control, + "test": test}) + >>> my_dabest_object = dabest.load(my_df, idx=("control", "test")) + >>> my_dabest_object.cliffs_delta + + + Notes + ----- + + Cliff's delta is a measure of ordinal dominance, ie. how often the values from the test sample are larger than values from the control sample. + + .. math:: + \\text{Cliff's delta} = \\frac{\\#({x}_{test} > {x}_{control}) - \\#({x}_{test} < {x}_{control})} {{n}_{Test} \\times {n}_{Control}} + + + where :math:`\\#` denotes the number of times a value from the test sample exceeds (or is lesser than) values in the control sample. + + Cliff's delta ranges from -1 to 1; it can also be thought of as a measure of the degree of overlap between the two samples. An attractive aspect of this effect size is that it does not make an assumptions about the underlying distributions that the samples were drawn from. + + References: + https://en.wikipedia.org/wiki/Effect_size#Effect_size_for_ordinal_data + https://psycnet.apa.org/record/1994-08169-001 + """ + return self.__cliffs_delta + @property @@ -273,7 +473,7 @@ def is_paired(self): @property def id_col(self): """ - Returns the ic column declared to `dabest.load()`. + Returns the id column declared to `dabest.load()`. """ return self.__id_col @@ -356,7 +556,9 @@ class TwoGroupsEffectSize(object): def __init__(self, control, test, effect_size, is_paired=False, ci=95, - resamples=5000, random_seed=12345): + resamples=5000, + permutation_count=5000, + random_seed=12345): """ Compute the effect size between two groups. @@ -371,7 +573,11 @@ def __init__(self, control, test, effect_size, 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta' is_paired : boolean, default False resamples : int, default 5000 - The number of bootstrap resamples to be taken. + The number of bootstrap resamples to be taken for the calculation + of the confidence interval limits. + permutation_count : int, default 5000 + The number of permutations (reshuffles) to perform for the + computation of the permutation p-value ci : float, default 95 The confidence interval width. The default of 95 produces 95% confidence intervals. @@ -435,25 +641,24 @@ def __init__(self, control, test, effect_size, and accelerated. >>> effsize.to_dict() {'alpha': 0.05, - 'bca_high': 0.2413346581369784, - 'bca_interval_idx': (109, 4858), - 'bca_low': -0.7818088458343655, - 'bootstraps': array([-1.09875628, -1.08840014, -1.08258695, ..., 0.66675324, - 0.75814087, 0.80848265]), + 'bca_high': 0.24951887238295106, + 'bca_interval_idx': (125, 4875), + 'bca_low': -0.7801782111071534, + 'bootstraps': array([-1.25579022, -1.20979484, -1.17604415, ..., 0.57700183, + 0.5902485 , 0.61043212]), 'ci': 95, 'difference': -0.25315417702752846, 'effect_size': 'mean difference', 'is_paired': False, - 'pct_high': 0.25135646125431527, + 'pct_high': 0.24951887238295106, 'pct_interval_idx': (125, 4875), - 'pct_low': -0.763588353717278, + 'pct_low': -0.7801782111071534, + 'permutation_count': 5000, 'pvalue_brunner_munzel': nan, 'pvalue_kruskal': nan, - 'pvalue_lqrt_paired': nan, - 'pvalue_lqrt_unpaired_equal_variance': 0.36, - 'pvalue_lqrt_unpaired_unequal_variance': 0.36, - 'pvalue_mann_whitney': 0.2600723060808019, + 'pvalue_mann_whitney': 0.5201446121616038, 'pvalue_paired_students_t': nan, + 'pvalue_permutation': 0.3484, 'pvalue_students_t': 0.34743913903372836, 'pvalue_welch': 0.3474493875548965, 'pvalue_wilcoxon': nan, @@ -461,22 +666,19 @@ def __init__(self, control, test, effect_size, 'resamples': 5000, 'statistic_brunner_munzel': nan, 'statistic_kruskal': nan, - 'statistic_lqrt_paired': nan, - 'statistic_lqrt_unpaired_equal_variance': 0.8894980773231964, - 'statistic_lqrt_unpaired_unequal_variance': 0.8916901409507432, - 'statistic_mann_whitney': 406.0, + 'statistic_mann_whitney': 494.0, 'statistic_paired_students_t': nan, 'statistic_students_t': 0.9472545159069105, 'statistic_welch': 0.9472545159069105, 'statistic_wilcoxon': nan} """ - + + import numpy as np from numpy import array, isnan, isinf from numpy import sort as npsort from numpy.random import choice, seed import scipy.stats as spstats - import lqrt # import statsmodels.stats.power as power @@ -487,7 +689,6 @@ def __init__(self, control, test, effect_size, from ._stats_tools import effsize as es - self.__EFFECT_SIZE_DICT = {"mean_diff" : "mean difference", "median_diff" : "median difference", "cohens_d" : "Cohen's d", @@ -512,14 +713,15 @@ def __init__(self, control, test, effect_size, control = control[~isnan(control)] test = test[~isnan(test)] - self.__effect_size = effect_size - self.__control = control - self.__test = test - self.__is_paired = is_paired - self.__resamples = resamples - self.__random_seed = random_seed - self.__ci = ci - self.__alpha = ci2g._compute_alpha_from_ci(ci) + self.__effect_size = effect_size + self.__control = control + self.__test = test + self.__is_paired = is_paired + self.__resamples = resamples + self.__permutation_count = permutation_count + self.__random_seed = random_seed + self.__ci = ci + self.__alpha = ci2g._compute_alpha_from_ci(ci) self.__difference = es.two_group_difference( @@ -603,17 +805,24 @@ def __init__(self, control, test, effect_size, self.__pct_high = self.__bootstraps[pct_idx_high] # Perform statistical tests. + + self.__PermutationTest_result = PermutationTest(control, test, + effect_size, + is_paired, + permutation_count) + if is_paired is True: # Wilcoxon, a non-parametric version of the paired T-test. wilcoxon = spstats.wilcoxon(control, test) self.__pvalue_wilcoxon = wilcoxon.pvalue self.__statistic_wilcoxon = wilcoxon.statistic - lqrt_result = lqrt.lqrtest_rel(control, test, - random_state=random_seed) - - self.__pvalue_paired_lqrt = lqrt_result.pvalue - self.__statistic_paired_lqrt = lqrt_result.statistic + + # Introduced in v0.2.8, removed in v0.3.0 for performance issues. +# lqrt_result = lqrt.lqrtest_rel(control, test, +# random_state=random_seed) +# self.__pvalue_paired_lqrt = lqrt_result.pvalue +# self.__statistic_paired_lqrt = lqrt_result.statistic if effect_size != "median_diff": # Paired Student's t-test. @@ -670,21 +879,22 @@ def __init__(self, control, test, effect_size, # Occurs when the control and test are exactly identical # in terms of rank (eg. all zeros.) pass - - # Likelihood Q-Ratio test: - lqrt_equal_var_result = lqrt.lqrtest_ind(control, test, - random_state=random_seed, - equal_var=True) + + # Introduced in v0.2.8, removed in v0.3.0 for performance issues. +# # Likelihood Q-Ratio test: +# lqrt_equal_var_result = lqrt.lqrtest_ind(control, test, +# random_state=random_seed, +# equal_var=True) - self.__pvalue_lqrt_equal_var = lqrt_equal_var_result.pvalue - self.__statistic_lqrt_equal_var = lqrt_equal_var_result.statistic +# self.__pvalue_lqrt_equal_var = lqrt_equal_var_result.pvalue +# self.__statistic_lqrt_equal_var = lqrt_equal_var_result.statistic - lqrt_unequal_var_result = lqrt.lqrtest_ind(control, test, - random_state=random_seed, - equal_var=False) +# lqrt_unequal_var_result = lqrt.lqrtest_ind(control, test, +# random_state=random_seed, +# equal_var=False) - self.__pvalue_lqrt_unequal_var = lqrt_unequal_var_result.pvalue - self.__statistic_lqrt_unequal_var = lqrt_unequal_var_result.statistic +# self.__pvalue_lqrt_unequal_var = lqrt_unequal_var_result.pvalue +# self.__statistic_lqrt_unequal_var = lqrt_unequal_var_result.statistic standardized_es = es.cohens_d(control, test, is_paired=False) @@ -701,59 +911,77 @@ def __init__(self, control, test, effect_size, def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3): - UNPAIRED_ES_TO_TEST = {"mean_diff" : "Mann-Whitney", - "median_diff" : "Kruskal", - "cohens_d" : "Mann-Whitney", - "hedges_g" : "Mann-Whitney", - "cliffs_delta" : "Brunner-Munzel"} - - TEST_TO_PVAL_ATTR = {"Mann-Whitney" : "pvalue_mann_whitney", - "Kruskal" : "pvalue_kruskal", - "Brunner-Munzel" : "pvalue_brunner_munzel", - "Wilcoxon" : "pvalue_wilcoxon"} - + + # # Deprecated in v0.3.0; permutation p-values will be reported by default. + # UNPAIRED_ES_TO_TEST = {"mean_diff" : "Mann-Whitney", + # "median_diff" : "Kruskal", + # "cohens_d" : "Mann-Whitney", + # "hedges_g" : "Mann-Whitney", + # "cliffs_delta" : "Brunner-Munzel"} + # + # TEST_TO_PVAL_ATTR = {"Mann-Whitney" : "pvalue_mann_whitney", + # "Kruskal" : "pvalue_kruskal", + # "Brunner-Munzel" : "pvalue_brunner_munzel", + # "Wilcoxon" : "pvalue_wilcoxon"} + PAIRED_STATUS = {True: 'paired', False: 'unpaired'} - + first_line = {"is_paired": PAIRED_STATUS[self.__is_paired], "es" : self.__EFFECT_SIZE_DICT[self.__effect_size]} - + out1 = "The {is_paired} {es} ".format(**first_line) - + base_string_fmt = "{:." + str(sigfig) + "}" if "." in str(self.__ci): ci_width = base_string_fmt.format(self.__ci) else: ci_width = str(self.__ci) - + ci_out = {"es" : base_string_fmt.format(self.__difference), "ci" : ci_width, "bca_low" : base_string_fmt.format(self.__bca_low), "bca_high" : base_string_fmt.format(self.__bca_high)} - + out2 = "is {es} [{ci}%CI {bca_low}, {bca_high}].".format(**ci_out) out = out1 + out2 - - if self.__is_paired: - stats_test = "Wilcoxon" - else: - stats_test = UNPAIRED_ES_TO_TEST[self.__effect_size] - pval_rounded = base_string_fmt.format(getattr(self, - TEST_TO_PVAL_ATTR[stats_test]) - ) - pvalue = "The two-sided p-value of the {} test is {}.".format(stats_test, - pval_rounded) - + + # # Deprecated in v0.3.0; permutation p-values will be reported by default. + # if self.__is_paired: + # stats_test = "Wilcoxon" + # else: + # stats_test = UNPAIRED_ES_TO_TEST[self.__effect_size] + + + # pval_rounded = base_string_fmt.format(getattr(self, + # TEST_TO_PVAL_ATTR[stats_test]) + # ) + + pval_rounded = base_string_fmt.format(self.pvalue_permutation) + + # # Deprecated in v0.3.0; permutation p-values will be reported by default. + # pvalue = "The two-sided p-value of the {} test is {}.".format(stats_test, + # pval_rounded) + + # pvalue = "The two-sided p-value of the {} test is {}.".format(stats_test, + # pval_rounded) + + + pvalue = "The p-value of the two-sided permutation t-test is {}. ".format(pval_rounded) + bs1 = "{} bootstrap samples were taken; ".format(self.__resamples) bs2 = "the confidence interval is bias-corrected and accelerated." bs = bs1 + bs2 - defined = "The p-value(s) reported are the likelihood(s) of observing the " + \ + pval_def1 = "The p-value(s) reported are the likelihood(s) of observing the " + \ "effect size(s),\nif the null hypothesis of zero difference is true." + pval_def2 = "\nFor each p-value, 5000 reshuffles of the " + \ + "control and test labels were performed." + pval_def = pval_def1 + pval_def2 if show_resample_count and define_pval: - return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, defined) + return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, pval_def) elif show_resample_count is False and define_pval is True: - return "{}\n{}\n\n{}".format(out, pvalue, defined) + return "{}\n{}\n\n{}".format(out, pvalue, pval_def) elif show_resample_count is True and define_pval is False: return "{}\n{}\n\n{}".format(out, pvalue, bs) else: @@ -995,65 +1223,76 @@ def statistic_mann_whitney(self): return self.__statistic_mann_whitney except AttributeError: return npnan - - - + + # Introduced in v0.3.0. + @property + def pvalue_permutation(self): + return self.__PermutationTest_result.pvalue + # + # @property - def pvalue_lqrt_paired(self): - from numpy import nan as npnan - try: - return self.__pvalue_paired_lqrt - except AttributeError: - return npnan + def permutation_count(self): + return self.__PermutationTest_result.permutation_count - @property - def statistic_lqrt_paired(self): - from numpy import nan as npnan - try: - return self.__statistic_paired_lqrt - except AttributeError: - return npnan - - - @property - def pvalue_lqrt_unpaired_equal_variance(self): - from numpy import nan as npnan - try: - return self.__pvalue_lqrt_equal_var - except AttributeError: - return npnan + # Introduced in v0.2.8, removed in v0.3.0 for performance issues. +# @property +# def pvalue_lqrt_paired(self): +# from numpy import nan as npnan +# try: +# return self.__pvalue_paired_lqrt +# except AttributeError: +# return npnan - @property - def statistic_lqrt_unpaired_equal_variance(self): - from numpy import nan as npnan - try: - return self.__statistic_lqrt_equal_var - except AttributeError: - return npnan +# @property +# def statistic_lqrt_paired(self): +# from numpy import nan as npnan +# try: +# return self.__statistic_paired_lqrt +# except AttributeError: +# return npnan + +# @property +# def pvalue_lqrt_unpaired_equal_variance(self): +# from numpy import nan as npnan +# try: +# return self.__pvalue_lqrt_equal_var +# except AttributeError: +# return npnan + + + +# @property +# def statistic_lqrt_unpaired_equal_variance(self): +# from numpy import nan as npnan +# try: +# return self.__statistic_lqrt_equal_var +# except AttributeError: +# return npnan - @property - def pvalue_lqrt_unpaired_unequal_variance(self): - from numpy import nan as npnan - try: - return self.__pvalue_lqrt_unequal_var - except AttributeError: - return npnan - - - - @property - def statistic_lqrt_unpaired_unequal_variance(self): - from numpy import nan as npnan - try: - return self.__statistic_lqrt_unequal_var - except AttributeError: - return npnan + +# @property +# def pvalue_lqrt_unpaired_unequal_variance(self): +# from numpy import nan as npnan +# try: +# return self.__pvalue_lqrt_unequal_var +# except AttributeError: +# return npnan + + + +# @property +# def statistic_lqrt_unpaired_unequal_variance(self): +# from numpy import nan as npnan +# try: +# return self.__statistic_lqrt_unequal_var +# except AttributeError: +# return npnan @@ -1075,18 +1314,21 @@ class EffectSizeDataFrame(object): def __init__(self, dabest, effect_size, is_paired, ci=95, - resamples=5000, random_seed=12345): + resamples=5000, + permutation_count=5000, + random_seed=12345): """ Parses the data from a Dabest object, enabling plotting and printing capability for the effect size of interest. """ - self.__dabest_obj = dabest - self.__effect_size = effect_size - self.__is_paired = is_paired - self.__ci = ci - self.__resamples = resamples - self.__random_seed = random_seed + self.__dabest_obj = dabest + self.__effect_size = effect_size + self.__is_paired = is_paired + self.__ci = ci + self.__resamples = resamples + self.__permutation_count = permutation_count + self.__random_seed = random_seed def __pre_calc(self): @@ -1114,6 +1356,7 @@ def __pre_calc(self): self.__is_paired, self.__ci, self.__resamples, + self.__permutation_count, self.__random_seed) r_dict = result.to_dict() @@ -1156,9 +1399,11 @@ def __pre_calc(self): 'bca_low', 'bca_high', 'bca_interval_idx', 'pct_low', 'pct_high', 'pct_interval_idx', - + 'bootstraps', 'resamples', 'random_seed', - + + 'pvalue_permutation', 'permutation_count', + 'pvalue_welch', 'statistic_welch', @@ -1179,18 +1424,11 @@ def __pre_calc(self): 'pvalue_kruskal', 'statistic_kruskal', - - 'pvalue_lqrt_paired', - 'statistic_lqrt_paired', - - 'pvalue_lqrt_unpaired_equal_variance', - 'statistic_lqrt_unpaired_equal_variance', - - 'pvalue_lqrt_unpaired_unequal_variance', - 'statistic_lqrt_unpaired_unequal_variance'] + ] self.__results = out_.reindex(columns=columns_in_order) self.__results.dropna(axis="columns", how="all", inplace=True) + @@ -1200,7 +1438,63 @@ def __repr__(self): except AttributeError: self.__pre_calc() return self.__for_print + + + + def __calc_lqrt(self): + import lqrt + import pandas as pd + + rnd_seed = self.__random_seed + db_obj = self.__dabest_obj + dat = db_obj._plot_data + xvar = db_obj._xvar + yvar = db_obj._yvar + + out = [] + + for j, current_tuple in enumerate(db_obj.idx): + cname = current_tuple[0] + control = dat[dat[xvar] == cname][yvar].copy() + + for ix, tname in enumerate(current_tuple[1:]): + test = dat[dat[xvar] == tname][yvar].copy() + + if self.__is_paired is True: + # Refactored here in v0.3.0 for performance issues. + lqrt_result = lqrt.lqrtest_rel(control, test, + random_state=rnd_seed) + + out.append({"control": cname, "test": tname, + "control_N": int(len(control)), + "test_N": int(len(test)), + "pvalue_paired_lqrt": lqrt_result.pvalue, + "statistic_paired_lqrt": lqrt_result.statistic + }) + + else: + # Likelihood Q-Ratio test: + lqrt_equal_var_result = lqrt.lqrtest_ind(control, test, + random_state=rnd_seed, + equal_var=True) + + + lqrt_unequal_var_result = lqrt.lqrtest_ind(control, test, + random_state=rnd_seed, + equal_var=False) + + out.append({"control": cname, "test": tname, + "control_N": int(len(control)), + "test_N": int(len(test)), + + "pvalue_lqrt_equal_var" : lqrt_equal_var_result.pvalue, + "statistic_lqrt_equal_var" : lqrt_equal_var_result.statistic, + "pvalue_lqrt_unequal_var" : lqrt_unequal_var_result.pvalue, + "statistic_lqrt_unequal_var" : lqrt_unequal_var_result.statistic, + }) + + self.__lqrt_results = pd.DataFrame(out) def plot(self, color_col=None, @@ -1485,4 +1779,169 @@ def dabest_obj(self): Returns the `dabest` object that invoked the current EffectSizeDataFrame class. """ - return self.__dabest_obj \ No newline at end of file + return self.__dabest_obj + + + @property + def lqrt(self): + """Returns all pairwise Lq-Likelihood Ratio Type test results + as a pandas DataFrame. + + For more information on LqRT tests, see https://arxiv.org/abs/1911.11922 + """ + try: + return self.__lqrt_results + except AttributeError: + self.__calc_lqrt() + return self.__lqrt_results + + + + +class PermutationTest: + """ + A class to compute and report permutation tests. + + Parameters + ---------- + control : array-like + test : array-like + These should be numerical iterables. + effect_size : string. + Any one of the following are accepted inputs: + 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta' + is_paired : boolean, default False + permutation_count : int, default 10000 + The number of permutations (reshuffles) to perform. + random_seed : int, default 12345 + `random_seed` is used to seed the random number generator during + bootstrap resampling. This ensures that the generated permutations + are replicable. + + + Returns + ------- + A :py:class:`PermutationTest` object. + + difference : float + The effect size of the difference between the control and the test. + + effect_size : string + The type of effect size reported. + + + Notes + ----- + The basic concept of permutation tests is the same as that behind bootstrapping. + In an "exact" permutation test, all possible resuffles of the control and test + labels are performed, and the proportion of effect sizes that equal or exceed + the observed effect size is computed. This is the probability, under the null + hypothesis of zero difference between test and control groups, of observing the + effect size: the p-value of the Student's t-test. + + Exact permutation tests are impractical: computing the effect sizes for all reshuffles quickly exceeds trivial computational loads. A control group and a test group both with 10 observations each would have a total of :math:`20!` or :math:`2.43 \\times {10}^{18}` reshuffles. + Therefore, in practice, "approximate" permutation tests are performed, where a sufficient number of reshuffles are performed (5,000 or 10,000), from which the p-value is computed. + + More information can be found `here `_. + + + Example + ------- + >>> import numpy as np + >>> from scipy.stats import norm + >>> import dabest + >>> control = norm.rvs(loc=0, size=30, random_state=12345) + >>> test = norm.rvs(loc=0.5, size=30, random_state=12345) + >>> perm_test = dabest.PermutationTest(control, test, + ... effect_size="mean_diff", + ... is_paired=False) + >>> perm_test + 5000 permutations were taken. The pvalue is 0.0758. + """ + + def __init__(self, control, test, + effect_size, is_paired, + permutation_count=5000, + random_seed=12345, + **kwargs): + + import numpy as np + from numpy.random import PCG64, RandomState + from ._stats_tools.effsize import two_group_difference + + self.__permutation_count = permutation_count + + # Run Sanity Check. + if is_paired and len(control) != len(test): + raise ValueError("The two arrays do not have the same length.") + + # Initialise random number generator. + # rng = np.random.default_rng(seed=random_seed) + rng = RandomState(PCG64(random_seed)) + + # Set required constants and variables + control = np.array(control) + test = np.array(test) + + control_sample = control.copy() + test_sample = test.copy() + + BAG = np.array([*control, *test]) + CONTROL_LEN = int(len(control)) + EXTREME_COUNT = 0. + THRESHOLD = np.abs(two_group_difference(control, test, + is_paired, effect_size)) + self.__permutations = [] + + for i in range(int(permutation_count)): + + if is_paired: + # Select which control-test pairs to swap. + random_idx = rng.choice(CONTROL_LEN, + rng.randint(0, CONTROL_LEN+1), + replace=False) + + # Perform swap. + for i in random_idx: + _placeholder = control_sample[i] + control_sample[i] = test_sample[i] + test_sample[i] = _placeholder + + else: + # Shuffle the bag and assign to control and test groups. + # NB. rng.shuffle didn't produce replicable results... + shuffled = rng.permutation(BAG) + control_sample = shuffled[:CONTROL_LEN] + test_sample = shuffled[CONTROL_LEN:] + + + es = two_group_difference(control_sample, test_sample, + False, effect_size) + + self.__permutations.append(es) + + if np.abs(es) > THRESHOLD: + EXTREME_COUNT += 1. + + self.pvalue = EXTREME_COUNT / permutation_count + + + def __repr__(self): + return("{} permutations were taken. The p-value is {}.".format(self.permutation_count, + self.pvalue)) + + + @property + def permutation_count(self): + """ + The number of permuations taken. + """ + return self.__permutation_count + + + @property + def permutations(self): + """ + The effect sizes of all the permutations in a list. + """ + return self.__permutations \ No newline at end of file diff --git a/dabest/_stats_tools/confint_1group.py b/dabest/_stats_tools/confint_1group.py index 4650ea5f..682a2c59 100644 --- a/dabest/_stats_tools/confint_1group.py +++ b/dabest/_stats_tools/confint_1group.py @@ -9,22 +9,22 @@ def create_bootstrap_indexes(array, resamples=5000, random_seed=12345): """Given an array-like, returns a generator of bootstrap indexes to be used for resampling. - """ + """i import numpy as np - - # Set seed. - np.random.seed(random_seed) + from numpy.random import PCG64, RandomState + rng = RandomState(PCG64(random_seed)) indexes = range(0, len(array)) - out = (np.random.choice(indexes, len(indexes), replace=True) + out = (rng.choice(indexes, len(indexes), replace=True) for i in range(0, resamples)) - - # Reset seed - np.random.seed() - + + # Reset RNG + # rng = RandomState(MT19937()) return out + + def compute_1group_jackknife(x, func, *args, **kwargs): """ Returns the jackknife bootstraps for func(x). @@ -49,9 +49,6 @@ def compute_1group_bootstraps(x, func, resamples=5000, random_seed=12345, import numpy as np - # Instantiate random seed. - np.random.seed(random_seed) - # Create bootstrap indexes. boot_indexes = create_bootstrap_indexes(x, resamples=resamples, random_seed=random_seed) @@ -60,8 +57,6 @@ def compute_1group_bootstraps(x, func, resamples=5000, random_seed=12345, del boot_indexes - np.random.seed() - return out diff --git a/dabest/_stats_tools/confint_2group_diff.py b/dabest/_stats_tools/confint_2group_diff.py index fc01e87c..9282b1be 100644 --- a/dabest/_stats_tools/confint_2group_diff.py +++ b/dabest/_stats_tools/confint_2group_diff.py @@ -130,13 +130,15 @@ def _calc_accel(jack_dist): def compute_bootstrapped_diff(x0, x1, is_paired, effect_size, - resamples=5000, random_seed=12345): + resamples=5000, random_seed=12345): """Bootstraps the effect_size for 2 groups.""" from . import effsize as __es import numpy as np - - np.random.seed(random_seed) + from numpy.random import PCG64, RandomState + + # rng = RandomState(default_rng(random_seed)) + rng = RandomState(PCG64(random_seed)) out = np.repeat(np.nan, resamples) x0_len = len(x0) @@ -147,18 +149,15 @@ def compute_bootstrapped_diff(x0, x1, is_paired, effect_size, if is_paired: if x0_len != x1_len: raise ValueError("The two arrays do not have the same length.") - random_idx = np.random.choice(x0_len, x0_len, replace=True) + random_idx = rng.choice(x0_len, x0_len, replace=True) x0_sample = x0[random_idx] x1_sample = x1[random_idx] else: - x0_sample = np.random.choice(x0, x0_len, replace=True) - x1_sample = np.random.choice(x1, x1_len, replace=True) + x0_sample = rng.choice(x0, x0_len, replace=True) + x1_sample = rng.choice(x1, x1_len, replace=True) out[i] = __es.two_group_difference(x0_sample, x1_sample, is_paired, effect_size) - - # reset seed - np.random.seed() # check whether there are any infinities in the bootstrap, # which likely indicates the sample sizes are too small as diff --git a/dabest/_stats_tools/effsize.py b/dabest/_stats_tools/effsize.py index 69c9cd5b..1637085a 100644 --- a/dabest/_stats_tools/effsize.py +++ b/dabest/_stats_tools/effsize.py @@ -60,7 +60,7 @@ def two_group_difference(control, test, is_paired=False, J(n) = ------------------------------ sqrt(n/2) * gamma((n - 1) / 2) - where n = (n1 + n2 -2). + where n = (n1 + n2 - 2). median_diff: This is the median of `control` subtracted from the median of `test`. diff --git a/dabest/plot_tools.py b/dabest/plot_tools.py index 745d89b5..377b9eda 100644 --- a/dabest/plot_tools.py +++ b/dabest/plot_tools.py @@ -19,13 +19,13 @@ def halfviolin(v, half='right', fill_color='k', alpha=1, mean_vertical = np.mean(V[:, 0]) mean_horizontal = np.mean(V[:, 1]) - if half is 'left': - V[:, 0] = np.clip(V[:, 0], -np.inf, mean_vertical) - if half is 'right': + if half == 'right': V[:, 0] = np.clip(V[:, 0], mean_vertical, np.inf) - if half is 'bottom': + elif half == 'left': + V[:, 0] = np.clip(V[:, 0], -np.inf, mean_vertical) + elif half == 'bottom': V[:, 1] = np.clip(V[:, 1], -np.inf, mean_horizontal) - if half is 'top': + elif half == 'top': V[:, 1] = np.clip(V[:, 1], mean_horizontal, np.inf) b.set_color(fill_color) diff --git a/dabest/tests/baseline_images/test_01_gardner_altman_unpaired_meandiff.png b/dabest/tests/baseline_images/test_01_gardner_altman_unpaired_meandiff.png index 1ba96441..efe7d87b 100644 Binary files a/dabest/tests/baseline_images/test_01_gardner_altman_unpaired_meandiff.png and b/dabest/tests/baseline_images/test_01_gardner_altman_unpaired_meandiff.png differ diff --git a/dabest/tests/baseline_images/test_02_gardner_altman_unpaired_mediandiff.png b/dabest/tests/baseline_images/test_02_gardner_altman_unpaired_mediandiff.png index f450293e..09aabb9a 100644 Binary files a/dabest/tests/baseline_images/test_02_gardner_altman_unpaired_mediandiff.png and b/dabest/tests/baseline_images/test_02_gardner_altman_unpaired_mediandiff.png differ diff --git a/dabest/tests/baseline_images/test_03_gardner_altman_unpaired_hedges_g.png b/dabest/tests/baseline_images/test_03_gardner_altman_unpaired_hedges_g.png index 4f44b926..8eb24366 100644 Binary files a/dabest/tests/baseline_images/test_03_gardner_altman_unpaired_hedges_g.png and b/dabest/tests/baseline_images/test_03_gardner_altman_unpaired_hedges_g.png differ diff --git a/dabest/tests/baseline_images/test_04_gardner_altman_paired_hedges_g.png b/dabest/tests/baseline_images/test_04_gardner_altman_paired_hedges_g.png index c25e627b..3efccad0 100644 Binary files a/dabest/tests/baseline_images/test_04_gardner_altman_paired_hedges_g.png and b/dabest/tests/baseline_images/test_04_gardner_altman_paired_hedges_g.png differ diff --git a/dabest/tests/baseline_images/test_04_gardner_altman_paired_meandiff.png b/dabest/tests/baseline_images/test_04_gardner_altman_paired_meandiff.png index f96fc50c..d440b849 100644 Binary files a/dabest/tests/baseline_images/test_04_gardner_altman_paired_meandiff.png and b/dabest/tests/baseline_images/test_04_gardner_altman_paired_meandiff.png differ diff --git a/dabest/tests/baseline_images/test_05_cummings_two_group_unpaired_meandiff.png b/dabest/tests/baseline_images/test_05_cummings_two_group_unpaired_meandiff.png index cc4ea3d9..8b289700 100644 Binary files a/dabest/tests/baseline_images/test_05_cummings_two_group_unpaired_meandiff.png and b/dabest/tests/baseline_images/test_05_cummings_two_group_unpaired_meandiff.png differ diff --git a/dabest/tests/baseline_images/test_06_cummings_two_group_paired_meandiff.png b/dabest/tests/baseline_images/test_06_cummings_two_group_paired_meandiff.png index 3961fb3e..265ae254 100644 Binary files a/dabest/tests/baseline_images/test_06_cummings_two_group_paired_meandiff.png and b/dabest/tests/baseline_images/test_06_cummings_two_group_paired_meandiff.png differ diff --git a/dabest/tests/baseline_images/test_07_cummings_multi_group_unpaired.png b/dabest/tests/baseline_images/test_07_cummings_multi_group_unpaired.png index 0a0e81cd..28db90fd 100644 Binary files a/dabest/tests/baseline_images/test_07_cummings_multi_group_unpaired.png and b/dabest/tests/baseline_images/test_07_cummings_multi_group_unpaired.png differ diff --git a/dabest/tests/baseline_images/test_08_cummings_multi_group_paired.png b/dabest/tests/baseline_images/test_08_cummings_multi_group_paired.png index 1812eabf..15905943 100644 Binary files a/dabest/tests/baseline_images/test_08_cummings_multi_group_paired.png and b/dabest/tests/baseline_images/test_08_cummings_multi_group_paired.png differ diff --git a/dabest/tests/baseline_images/test_09_cummings_shared_control.png b/dabest/tests/baseline_images/test_09_cummings_shared_control.png index 9c09b869..9f015ec2 100644 Binary files a/dabest/tests/baseline_images/test_09_cummings_shared_control.png and b/dabest/tests/baseline_images/test_09_cummings_shared_control.png differ diff --git a/dabest/tests/baseline_images/test_10_cummings_multi_groups.png b/dabest/tests/baseline_images/test_10_cummings_multi_groups.png index 33b2c8f8..7a287389 100644 Binary files a/dabest/tests/baseline_images/test_10_cummings_multi_groups.png and b/dabest/tests/baseline_images/test_10_cummings_multi_groups.png differ diff --git a/dabest/tests/baseline_images/test_11_inset_plots.png b/dabest/tests/baseline_images/test_11_inset_plots.png index 7f1ad97e..9c5b77b6 100644 Binary files a/dabest/tests/baseline_images/test_11_inset_plots.png and b/dabest/tests/baseline_images/test_11_inset_plots.png differ diff --git a/dabest/tests/baseline_images/test_12_gardner_altman_ylabel.png b/dabest/tests/baseline_images/test_12_gardner_altman_ylabel.png index e18a5250..598e10cc 100644 Binary files a/dabest/tests/baseline_images/test_12_gardner_altman_ylabel.png and b/dabest/tests/baseline_images/test_12_gardner_altman_ylabel.png differ diff --git a/dabest/tests/baseline_images/test_13_multi_2group_color.png b/dabest/tests/baseline_images/test_13_multi_2group_color.png index cffab938..943d13b0 100644 Binary files a/dabest/tests/baseline_images/test_13_multi_2group_color.png and b/dabest/tests/baseline_images/test_13_multi_2group_color.png differ diff --git a/dabest/tests/baseline_images/test_14_gardner_altman_paired_color.png b/dabest/tests/baseline_images/test_14_gardner_altman_paired_color.png index be5458e9..8c70d906 100644 Binary files a/dabest/tests/baseline_images/test_14_gardner_altman_paired_color.png and b/dabest/tests/baseline_images/test_14_gardner_altman_paired_color.png differ diff --git a/dabest/tests/baseline_images/test_15_change_palette_a.png b/dabest/tests/baseline_images/test_15_change_palette_a.png index 47480263..9aacd4b9 100644 Binary files a/dabest/tests/baseline_images/test_15_change_palette_a.png and b/dabest/tests/baseline_images/test_15_change_palette_a.png differ diff --git a/dabest/tests/baseline_images/test_16_change_palette_b.png b/dabest/tests/baseline_images/test_16_change_palette_b.png index 87b8b240..02ca2d59 100644 Binary files a/dabest/tests/baseline_images/test_16_change_palette_b.png and b/dabest/tests/baseline_images/test_16_change_palette_b.png differ diff --git a/dabest/tests/baseline_images/test_17_change_palette_c.png b/dabest/tests/baseline_images/test_17_change_palette_c.png index ee356f4a..d8e9fb21 100644 Binary files a/dabest/tests/baseline_images/test_17_change_palette_c.png and b/dabest/tests/baseline_images/test_17_change_palette_c.png differ diff --git a/dabest/tests/baseline_images/test_18_desat.png b/dabest/tests/baseline_images/test_18_desat.png index ea6e42f8..08b8b14f 100644 Binary files a/dabest/tests/baseline_images/test_18_desat.png and b/dabest/tests/baseline_images/test_18_desat.png differ diff --git a/dabest/tests/baseline_images/test_19_dot_sizes.png b/dabest/tests/baseline_images/test_19_dot_sizes.png index 6376e4d6..296c6189 100644 Binary files a/dabest/tests/baseline_images/test_19_dot_sizes.png and b/dabest/tests/baseline_images/test_19_dot_sizes.png differ diff --git a/dabest/tests/baseline_images/test_20_change_ylims.png b/dabest/tests/baseline_images/test_20_change_ylims.png index 2547312a..87a2e312 100644 Binary files a/dabest/tests/baseline_images/test_20_change_ylims.png and b/dabest/tests/baseline_images/test_20_change_ylims.png differ diff --git a/dabest/tests/baseline_images/test_21_invert_ylim.png b/dabest/tests/baseline_images/test_21_invert_ylim.png index 56c1d99b..a60712df 100644 Binary files a/dabest/tests/baseline_images/test_21_invert_ylim.png and b/dabest/tests/baseline_images/test_21_invert_ylim.png differ diff --git a/dabest/tests/baseline_images/test_22_ticker_gardner_altman.png b/dabest/tests/baseline_images/test_22_ticker_gardner_altman.png index 8189b550..199effa0 100644 Binary files a/dabest/tests/baseline_images/test_22_ticker_gardner_altman.png and b/dabest/tests/baseline_images/test_22_ticker_gardner_altman.png differ diff --git a/dabest/tests/baseline_images/test_23_ticker_cumming.png b/dabest/tests/baseline_images/test_23_ticker_cumming.png index d43068c5..fe76b9e5 100644 Binary files a/dabest/tests/baseline_images/test_23_ticker_cumming.png and b/dabest/tests/baseline_images/test_23_ticker_cumming.png differ diff --git a/dabest/tests/baseline_images/test_24_wide_df_nan.png b/dabest/tests/baseline_images/test_24_wide_df_nan.png index 9b6af3e4..7508c4dc 100644 Binary files a/dabest/tests/baseline_images/test_24_wide_df_nan.png and b/dabest/tests/baseline_images/test_24_wide_df_nan.png differ diff --git a/dabest/tests/baseline_images/test_25_long_df_nan.png b/dabest/tests/baseline_images/test_25_long_df_nan.png index 9b6af3e4..7508c4dc 100644 Binary files a/dabest/tests/baseline_images/test_25_long_df_nan.png and b/dabest/tests/baseline_images/test_25_long_df_nan.png differ diff --git a/dabest/tests/baseline_images/test_26_slopegraph_kwargs.png b/dabest/tests/baseline_images/test_26_slopegraph_kwargs.png index 41d80538..2a595b02 100644 Binary files a/dabest/tests/baseline_images/test_26_slopegraph_kwargs.png and b/dabest/tests/baseline_images/test_26_slopegraph_kwargs.png differ diff --git a/dabest/tests/baseline_images/test_27_gardner_altman_reflines_kwargs.png b/dabest/tests/baseline_images/test_27_gardner_altman_reflines_kwargs.png index a46e8793..79aab0dc 100644 Binary files a/dabest/tests/baseline_images/test_27_gardner_altman_reflines_kwargs.png and b/dabest/tests/baseline_images/test_27_gardner_altman_reflines_kwargs.png differ diff --git a/dabest/tests/baseline_images/test_28_paired_cumming_slopegraph_reflines_kwargs.png b/dabest/tests/baseline_images/test_28_paired_cumming_slopegraph_reflines_kwargs.png index de720fbb..d1aca9d3 100644 Binary files a/dabest/tests/baseline_images/test_28_paired_cumming_slopegraph_reflines_kwargs.png and b/dabest/tests/baseline_images/test_28_paired_cumming_slopegraph_reflines_kwargs.png differ diff --git a/dabest/tests/baseline_images/test_28_unpaired_cumming_reflines_kwargs.png b/dabest/tests/baseline_images/test_28_unpaired_cumming_reflines_kwargs.png index 83c4280a..a26e642c 100644 Binary files a/dabest/tests/baseline_images/test_28_unpaired_cumming_reflines_kwargs.png and b/dabest/tests/baseline_images/test_28_unpaired_cumming_reflines_kwargs.png differ diff --git a/dabest/tests/baseline_images/test_99_style_sheets.png b/dabest/tests/baseline_images/test_99_style_sheets.png index 9827bfc0..46413f8b 100644 Binary files a/dabest/tests/baseline_images/test_99_style_sheets.png and b/dabest/tests/baseline_images/test_99_style_sheets.png differ diff --git a/dabest/tests/test_01_effsizes_pvals.py b/dabest/tests/test_01_effsizes_pvals.py index 151dbdc3..3b2fba7d 100644 --- a/dabest/tests/test_01_effsizes_pvals.py +++ b/dabest/tests/test_01_effsizes_pvals.py @@ -11,7 +11,7 @@ import scipy as sp import pandas as pd from .._stats_tools import effsize -from .._classes import TwoGroupsEffectSize +from .._classes import TwoGroupsEffectSize, PermutationTest, Dabest @@ -26,8 +26,9 @@ # from Cumming 2012 Table 11.2 Page 291 -paired_wb = {"pre": [43, 28, 54, 36, 31, 48, 50, 69, 29, 40], - "post": [51, 33, 58, 42, 39, 45, 54, 68, 35, 44]} +paired_wb = {"pre": [43, 28, 54, 36, 31, 48, 50, 69, 29, 40], + "post": [51, 33, 58, 42, 39, 45, 54, 68, 35, 44], + "ID": np.arange(10)} paired_wellbeing = pd.DataFrame(paired_wb) @@ -52,6 +53,9 @@ +# kwargs for Dabest class init. +dabest_default_kwargs = dict(x=None, y=None, ci=95, + resamples=5000, random_seed=12345) @@ -180,28 +184,50 @@ def test_ordinal_dominance(): assert es.pvalue_brunner_munzel == pytest.approx(p1) - + +def test_unpaired_permutation_test(): + perm_test = PermutationTest(wellbeing.control, wellbeing.expt, + effect_size="mean_diff", + is_paired=False) + assert perm_test.pvalue == pytest.approx(0.2976) + + + +def test_paired_permutation_test(): + perm_test = PermutationTest(paired_wellbeing.pre, + paired_wellbeing.post, + effect_size="mean_diff", + is_paired=True) + assert perm_test.pvalue == pytest.approx(0.0124) + + + def test_lqrt_unpaired(): - es = TwoGroupsEffectSize(wellbeing.control, wellbeing.expt, - "mean_diff", is_paired=False) - + unpaired_dabest = Dabest(wellbeing, idx=("control", "expt"), + paired=False, id_col=None, + **dabest_default_kwargs) + lqrt_result = unpaired_dabest.mean_diff.lqrt + p1 = lqrt.lqrtest_ind(wellbeing.control, wellbeing.expt, - equal_var=False, + equal_var=True, random_state=12345) p2 = lqrt.lqrtest_ind(wellbeing.control, wellbeing.expt, - equal_var=True, + equal_var=False, random_state=12345) - assert es.pvalue_lqrt_unpaired_unequal_variance == pytest.approx(p1.pvalue) - assert es.pvalue_lqrt_unpaired_equal_variance == pytest.approx(p2.pvalue) + assert lqrt_result.pvalue_lqrt_equal_var[0] == pytest.approx(p1.pvalue) + assert lqrt_result.pvalue_lqrt_unequal_var[0] == pytest.approx(p2.pvalue) + def test_lqrt_paired(): - es = TwoGroupsEffectSize(paired_wellbeing.pre, paired_wellbeing.post, - "mean_diff", is_paired=True) + paired_dabest = Dabest(paired_wellbeing, idx=("pre", "post"), + paired=True, id_col="ID", + **dabest_default_kwargs) + lqrt_result = paired_dabest.mean_diff.lqrt p1 = lqrt.lqrtest_rel(paired_wellbeing.pre, paired_wellbeing.post, random_state=12345) - assert es.pvalue_lqrt_paired == pytest.approx(p1.pvalue) \ No newline at end of file + assert lqrt_result.pvalue_paired_lqrt[0] == pytest.approx(p1.pvalue) \ No newline at end of file diff --git a/dabest/tests/test_02_edge_cases.py b/dabest/tests/test_02_edge_cases.py index c6e4d654..645f01c0 100644 --- a/dabest/tests/test_02_edge_cases.py +++ b/dabest/tests/test_02_edge_cases.py @@ -6,6 +6,7 @@ import sys import numpy as np +from numpy.random import PCG64, RandomState import scipy as sp import pytest import pandas as pd @@ -21,15 +22,15 @@ def test_unrelated_columns(N=60, random_seed=12345): Added in v0.2.5. """ - - np.random.seed(random_seed) + + # rng = RandomState(MT19937(random_seed)) + rng = RandomState(PCG64(12345)) + # rng = np.random.default_rng(seed=random_seed) df = pd.DataFrame( - {'groups': np.random.choice(['Group 1', 'Group 2', 'Group 3'], size=(N,)), - 'color' : np.random.choice(['green', 'red', 'purple'], size=(N,)), - 'value': np.random.random(size=(N,))}) - - np.random.seed() + {'groups': rng.choice(['Group 1', 'Group 2', 'Group 3'], size=(N,)), + 'color' : rng.choice(['green', 'red', 'purple'], size=(N,)), + 'value': rng.random(size=(N,))}) df['unrelated'] = np.nan @@ -38,6 +39,6 @@ def test_unrelated_columns(N=60, random_seed=12345): md = test.mean_diff.results - assert md.difference[0] == pytest.approx(0.1115, abs=1e-6) - assert md.bca_low[0] == pytest.approx(-0.042835, abs=1e-6) - assert md.bca_high[0] == pytest.approx(0.264542, abs=1e-6) \ No newline at end of file + assert md.difference[0] == pytest.approx(-0.0322, abs=1e-4) + assert md.bca_low[0] == pytest.approx(-0.2279, abs=1e-4) + assert md.bca_high[0] == pytest.approx(0.1613, abs=1e-4) \ No newline at end of file diff --git a/dabest/tests/test_03_plotting.py b/dabest/tests/test_03_plotting.py index af7c2c98..9723b009 100644 --- a/dabest/tests/test_03_plotting.py +++ b/dabest/tests/test_03_plotting.py @@ -28,22 +28,24 @@ paired=True, id_col="ID") multi_2group = load(df, idx=(("Control 1", "Test 1",), - ("Control 2", "Test 2")) - ) + ("Control 2", "Test 2")) + ) -multi_2group_paired = load(df, idx=(("Control 1", "Test 1"), - ("Control 2", "Test 2")), - paired=True, id_col="ID") +multi_2group_paired = load(df, + idx=(("Control 1", "Test 1"), + ("Control 2", "Test 2")), + paired=True, id_col="ID") shared_control = load(df, idx=("Control 1", "Test 1", - "Test 2", "Test 3", - "Test 4", "Test 5", "Test 6") - ) + "Test 2", "Test 3", + "Test 4", "Test 5", "Test 6") + ) multi_groups = load(df, idx=(("Control 1", "Test 1",), - ("Control 2", "Test 2","Test 3"), - ("Control 3", "Test 4","Test 5", "Test 6") - )) + ("Control 2", "Test 2","Test 3"), + ("Control 3", "Test 4","Test 5", "Test 6") + ) + ) diff --git a/dabest/tests/test_99_confint.py b/dabest/tests/test_99_confint.py index 8f476aa0..293d0fae 100644 --- a/dabest/tests/test_99_confint.py +++ b/dabest/tests/test_99_confint.py @@ -27,8 +27,8 @@ def test_paired_mean_diff_ci(): paired=True, id_col="subject_id") paired_mean_diff = ex_bp.mean_diff.results - assert pytest.approx(3.625) == paired_mean_diff.bca_low[0] - assert pytest.approx(9.125) == paired_mean_diff.bca_high[0] + assert pytest.approx(3.875) == paired_mean_diff.bca_low[0] + assert pytest.approx(9.5) == paired_mean_diff.bca_high[0] # def test_paired_median_diff_ci(): @@ -122,28 +122,28 @@ def test_unpaired_ci(reps=30, ci=95): error_count_cliffs_delta = 0 for i in range(0, reps): + # print(i) # for debug. # pick a random seed rnd_sd = np.random.randint(0, 999999) load_kwargs = dict(ci=ci, random_seed=rnd_sd) - - std_diff_data = load(data=std_diff_df, idx=("Control", "Test"), **load_kwargs) - cd = std_diff_data.cohens_d.results + # print("cohen's d") # for debug. cd_low, cd_high = float(cd.bca_low), float(cd.bca_high) if cd_low < POP_D < cd_high is False: error_count_cohens_d += 1 hg = std_diff_data.hedges_g.results + # print("hedges' g") # for debug. hg_low, hg_high = float(hg.bca_low), float(hg.bca_high) if hg_low < POP_D < hg_high is False: error_count_hedges_g += 1 - mean_diff_data = load(data=mean_df, idx=("Control", "Test"), **load_kwargs) mean_d = mean_diff_data.mean_diff.results + # print("mean diff") # for debug. mean_d_low, mean_d_high = float(mean_d.bca_low), float(mean_d.bca_high) if mean_d_low < TRUE_DIFFERENCE < mean_d_high is False: error_count_mean_diff += 1 @@ -152,14 +152,16 @@ def test_unpaired_ci(reps=30, ci=95): median_diff_data = load(data=median_df, idx=("Control", "Test"), **load_kwargs) median_d = median_diff_data.median_diff.results + # print("median diff") # for debug. median_d_low, median_d_high = float(median_d.bca_low), float(median_d.bca_high) if median_d_low < MEDIAN_DIFFERENCE < median_d_high is False: error_count_median_diff += 1 cd_data = load(data=cd_df, idx=("Control", "Test"), **load_kwargs) - cd = cd_data.cliffs_delta.results - low, high = float(cd.bca_low), float(cd.bca_high) + cliffs = cd_data.cliffs_delta.results + # print("cliff's delta") # for debug. + low, high = float(cliffs.bca_low), float(cliffs.bca_high) if low < 0.5 < high is False: error_count_cliffs_delta += 1 diff --git a/docs/source/_images/tutorial_27_0.png b/docs/source/_images/tutorial_27_0.png new file mode 100644 index 00000000..60cfb63a Binary files /dev/null and b/docs/source/_images/tutorial_27_0.png differ diff --git a/docs/source/_images/tutorial_28_0.png b/docs/source/_images/tutorial_28_0.png index cd801d51..8ad407fc 100644 Binary files a/docs/source/_images/tutorial_28_0.png and b/docs/source/_images/tutorial_28_0.png differ diff --git a/docs/source/_images/tutorial_29_0.png b/docs/source/_images/tutorial_29_0.png deleted file mode 100644 index 974d55dd..00000000 Binary files a/docs/source/_images/tutorial_29_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_30_0.png b/docs/source/_images/tutorial_30_0.png new file mode 100644 index 00000000..8b826056 Binary files /dev/null and b/docs/source/_images/tutorial_30_0.png differ diff --git a/docs/source/_images/tutorial_31_0.png b/docs/source/_images/tutorial_31_0.png deleted file mode 100644 index f121a936..00000000 Binary files a/docs/source/_images/tutorial_31_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_32_0.png b/docs/source/_images/tutorial_32_0.png new file mode 100644 index 00000000..20596dfc Binary files /dev/null and b/docs/source/_images/tutorial_32_0.png differ diff --git a/docs/source/_images/tutorial_33_0.png b/docs/source/_images/tutorial_33_0.png index 25b8c01c..27a23ae4 100644 Binary files a/docs/source/_images/tutorial_33_0.png and b/docs/source/_images/tutorial_33_0.png differ diff --git a/docs/source/_images/tutorial_34_0.png b/docs/source/_images/tutorial_34_0.png deleted file mode 100644 index 7ff93abf..00000000 Binary files a/docs/source/_images/tutorial_34_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_35_0.png b/docs/source/_images/tutorial_35_0.png new file mode 100644 index 00000000..ad0c08ad Binary files /dev/null and b/docs/source/_images/tutorial_35_0.png differ diff --git a/docs/source/_images/tutorial_36_0.png b/docs/source/_images/tutorial_36_0.png deleted file mode 100644 index 9ffa7426..00000000 Binary files a/docs/source/_images/tutorial_36_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_37_0.png b/docs/source/_images/tutorial_37_0.png new file mode 100644 index 00000000..274104bc Binary files /dev/null and b/docs/source/_images/tutorial_37_0.png differ diff --git a/docs/source/_images/tutorial_38_0.png b/docs/source/_images/tutorial_38_0.png deleted file mode 100644 index 17ecf4e8..00000000 Binary files a/docs/source/_images/tutorial_38_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_42_0.png b/docs/source/_images/tutorial_42_0.png new file mode 100644 index 00000000..77f051d2 Binary files /dev/null and b/docs/source/_images/tutorial_42_0.png differ diff --git a/docs/source/_images/tutorial_43_0.png b/docs/source/_images/tutorial_43_0.png deleted file mode 100644 index e01ec359..00000000 Binary files a/docs/source/_images/tutorial_43_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_47_0.png b/docs/source/_images/tutorial_47_0.png new file mode 100644 index 00000000..1eef3b47 Binary files /dev/null and b/docs/source/_images/tutorial_47_0.png differ diff --git a/docs/source/_images/tutorial_48_0.png b/docs/source/_images/tutorial_48_0.png deleted file mode 100644 index d5691bae..00000000 Binary files a/docs/source/_images/tutorial_48_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_52_0.png b/docs/source/_images/tutorial_52_0.png new file mode 100644 index 00000000..98a05a89 Binary files /dev/null and b/docs/source/_images/tutorial_52_0.png differ diff --git a/docs/source/_images/tutorial_53_0.png b/docs/source/_images/tutorial_53_0.png deleted file mode 100644 index 66b0a542..00000000 Binary files a/docs/source/_images/tutorial_53_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_55_0.png b/docs/source/_images/tutorial_55_0.png new file mode 100644 index 00000000..03060641 Binary files /dev/null and b/docs/source/_images/tutorial_55_0.png differ diff --git a/docs/source/_images/tutorial_56_0.png b/docs/source/_images/tutorial_56_0.png deleted file mode 100644 index d86bfc53..00000000 Binary files a/docs/source/_images/tutorial_56_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_57_0.png b/docs/source/_images/tutorial_57_0.png new file mode 100644 index 00000000..4ab22be3 Binary files /dev/null and b/docs/source/_images/tutorial_57_0.png differ diff --git a/docs/source/_images/tutorial_58_0.png b/docs/source/_images/tutorial_58_0.png index e3d0650e..4df9f6cf 100644 Binary files a/docs/source/_images/tutorial_58_0.png and b/docs/source/_images/tutorial_58_0.png differ diff --git a/docs/source/_images/tutorial_59_0.png b/docs/source/_images/tutorial_59_0.png deleted file mode 100644 index 2e407ae8..00000000 Binary files a/docs/source/_images/tutorial_59_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_60_0.png b/docs/source/_images/tutorial_60_0.png new file mode 100644 index 00000000..2c2ec38d Binary files /dev/null and b/docs/source/_images/tutorial_60_0.png differ diff --git a/docs/source/_images/tutorial_61_0.png b/docs/source/_images/tutorial_61_0.png index 045b2e95..54b83c1d 100644 Binary files a/docs/source/_images/tutorial_61_0.png and b/docs/source/_images/tutorial_61_0.png differ diff --git a/docs/source/_images/tutorial_62_0.png b/docs/source/_images/tutorial_62_0.png deleted file mode 100644 index 891ed76d..00000000 Binary files a/docs/source/_images/tutorial_62_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_63_0.png b/docs/source/_images/tutorial_63_0.png new file mode 100644 index 00000000..564bd9a8 Binary files /dev/null and b/docs/source/_images/tutorial_63_0.png differ diff --git a/docs/source/_images/tutorial_64_0.png b/docs/source/_images/tutorial_64_0.png deleted file mode 100644 index ff3fa6d5..00000000 Binary files a/docs/source/_images/tutorial_64_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_65_0.png b/docs/source/_images/tutorial_65_0.png new file mode 100644 index 00000000..aff0d6af Binary files /dev/null and b/docs/source/_images/tutorial_65_0.png differ diff --git a/docs/source/_images/tutorial_66_0.png b/docs/source/_images/tutorial_66_0.png deleted file mode 100644 index 0a494e37..00000000 Binary files a/docs/source/_images/tutorial_66_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_67_0.png b/docs/source/_images/tutorial_67_0.png new file mode 100644 index 00000000..875a6f65 Binary files /dev/null and b/docs/source/_images/tutorial_67_0.png differ diff --git a/docs/source/_images/tutorial_68_0.png b/docs/source/_images/tutorial_68_0.png deleted file mode 100644 index d9a3de61..00000000 Binary files a/docs/source/_images/tutorial_68_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_69_0.png b/docs/source/_images/tutorial_69_0.png new file mode 100644 index 00000000..281de08e Binary files /dev/null and b/docs/source/_images/tutorial_69_0.png differ diff --git a/docs/source/_images/tutorial_70_0.png b/docs/source/_images/tutorial_70_0.png deleted file mode 100644 index e9a9dde3..00000000 Binary files a/docs/source/_images/tutorial_70_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_71_0.png b/docs/source/_images/tutorial_71_0.png new file mode 100644 index 00000000..6b92bdac Binary files /dev/null and b/docs/source/_images/tutorial_71_0.png differ diff --git a/docs/source/_images/tutorial_72_0.png b/docs/source/_images/tutorial_72_0.png deleted file mode 100644 index 37ec0120..00000000 Binary files a/docs/source/_images/tutorial_72_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_73_0.png b/docs/source/_images/tutorial_73_0.png new file mode 100644 index 00000000..dec597b8 Binary files /dev/null and b/docs/source/_images/tutorial_73_0.png differ diff --git a/docs/source/_images/tutorial_74_0.png b/docs/source/_images/tutorial_74_0.png index 21a8eec3..467dbcb0 100644 Binary files a/docs/source/_images/tutorial_74_0.png and b/docs/source/_images/tutorial_74_0.png differ diff --git a/docs/source/_images/tutorial_75_0.png b/docs/source/_images/tutorial_75_0.png deleted file mode 100644 index 64e60c8c..00000000 Binary files a/docs/source/_images/tutorial_75_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_76_0.png b/docs/source/_images/tutorial_76_0.png new file mode 100644 index 00000000..b47f0a07 Binary files /dev/null and b/docs/source/_images/tutorial_76_0.png differ diff --git a/docs/source/_images/tutorial_77_0.png b/docs/source/_images/tutorial_77_0.png deleted file mode 100644 index 5b0334c1..00000000 Binary files a/docs/source/_images/tutorial_77_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_78_0.png b/docs/source/_images/tutorial_78_0.png new file mode 100644 index 00000000..7f8e1516 Binary files /dev/null and b/docs/source/_images/tutorial_78_0.png differ diff --git a/docs/source/_images/tutorial_79_0.png b/docs/source/_images/tutorial_79_0.png deleted file mode 100644 index 38205d9c..00000000 Binary files a/docs/source/_images/tutorial_79_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_81_0.png b/docs/source/_images/tutorial_81_0.png new file mode 100644 index 00000000..471660ab Binary files /dev/null and b/docs/source/_images/tutorial_81_0.png differ diff --git a/docs/source/_images/tutorial_82_0.png b/docs/source/_images/tutorial_82_0.png deleted file mode 100644 index fe7d5ac3..00000000 Binary files a/docs/source/_images/tutorial_82_0.png and /dev/null differ diff --git a/docs/source/about.rst b/docs/source/about.rst index a3644da3..de38d8ed 100644 --- a/docs/source/about.rst +++ b/docs/source/about.rst @@ -7,7 +7,7 @@ About Authors -------- -DABEST is written in Python by `Joses W. Ho `_, with design and input from `Adam Claridge-Chang `_ and other lab members. +DABEST is written in Python by `Joses W. Ho `_, with design and input from `Adam Claridge-Chang `_ and other `lab members `__. To find out more about the authors’ research, please visit the `Claridge-Chang lab webpage `_. diff --git a/docs/source/api.rst b/docs/source/api.rst index 1a7cb8b4..3f197048 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -10,23 +10,27 @@ Loading Data .. autofunction:: load + +Computing Effect Sizes +---------------------- + +.. autoclass:: dabest._classes.Dabest + :members: mean_diff, median_diff, cohens_d, hedges_g, cliffs_delta + :member-order: bysource + +.. .. autoclass:: dabest._classes.TwoGroupsEffectSize + + Plotting Data ------------- -.. autofunction:: dabest._classes.EffectSizeDataFrame.plot -.. .. autoclass:: dabest._classes.EffectSizeDataFrame -.. :members: plot +.. autoclass:: dabest._classes.EffectSizeDataFrame + :members: plot, lqrt + :member-order: bysource -Computing Effect Sizes ----------------------- -.. autoclass:: dabest._classes.TwoGroupsEffectSize -.. -.. .. autofunction:: dabest._stats_tools.effsize.func_difference -.. -.. .. autofunction:: dabest._stats_tools.effsize.cohens_d -.. -.. .. autofunction:: dabest._stats_tools.effsize.hedges_g -.. -.. .. autofunction:: dabest._stats_tools.effsize.cliffs_delta +Permutation Tests +----------------- + +.. autoclass:: dabest._classes.PermutationTest \ No newline at end of file diff --git a/docs/source/citation.rst b/docs/source/citation.rst new file mode 100644 index 00000000..89d7615c --- /dev/null +++ b/docs/source/citation.rst @@ -0,0 +1,17 @@ +.. _Citing DABEST: + + +============= +Citing DABEST +============= + +If your publication features a graphic generated with this software library, please cite the following publication. + +**Moving beyond P values: Everyday data analysis with estimation plots** +Joses Ho, Tayfun Tumkaya, Sameer Aryal, Hyungwon Choi, Adam Claridge-Chang + +`Nature Methods` 2019, 1548-7105. `doi:10.1038/s41592-019-0470-3 `__ + +`Free-to-view PDF `__ + +`Paywalled publisher site `__ diff --git a/docs/source/conf.py b/docs/source/conf.py index 78db6b42..63d707e4 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -46,7 +46,7 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon', - 'sphinx.ext.githubpages', 'sphinx.ext.intersphinx'] + 'sphinx.ext.mathjax', 'sphinx.ext.githubpages', 'sphinx.ext.intersphinx'] # Add mappings intersphinx_mapping = { @@ -67,7 +67,7 @@ # General information about the project. project = 'dabest' -copyright = '2019, Joses W. Ho' +copyright = '2017-2020, Joses W. Ho' author = 'Joses W. Ho' # The version info for the project you're documenting, acts as replacement for @@ -75,9 +75,9 @@ # built documents. # # The short X.Y version. -version = '0.2' +version = '0.3' # The full version, including alpha/beta/rc tags. -release = '0.2.8' +release = '0.3.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/source/getting-started.rst b/docs/source/getting-started.rst index 5ab6c523..69df07ff 100644 --- a/docs/source/getting-started.rst +++ b/docs/source/getting-started.rst @@ -8,16 +8,16 @@ Getting Started Requirements ------------ -Python 3.7 is strongly recommended. DABEST has also been tested with Python 3.5 and 3.6. +Python 3.8 is strongly recommended. DABEST has also been tested with Python 3.5, 3.6, and 3.7. In addition, the following packages are also required (listed with their minimal versions): -* `numpy 1.15 `_ +* `numpy 1.17 `_ * `scipy 1.2 `_ * `matplotlib 3.0 `_ * `pandas 0.25.3 `_ * `seaborn 0.9 `_ -* `lqrt 0.3.2 `_ +* `lqrt 0.3 `_ To obtain these package dependencies easily, it is highly recommended to download the `Anaconda `_ distribution of Python. diff --git a/docs/source/index.rst b/docs/source/index.rst index 4ad2d1f3..7bbb3414 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -18,12 +18,15 @@ Analyze your data with estimation statistics! News ---- +January 2020: + - v0.3.0 released. Approximate permutation tests have been added, and are now the default p-values reported in the textual output. The LqRT tests were also refactored to a user-callable property. For more information, see the :doc:`release-notes`. + December 2019: - - v0.2.8 released. This release adds the `Lq-Likelihood-Ratio-Type Test `_ in the statistical output, and also a bugfix for slopegraph and reference line keyword parsing. For more information, see the :doc:`release-notes`. + - v0.2.8 released. This release adds the `Lq-Likelihood-Ratio-Type Test `_ in the statistical output, and also a bugfix for slopegraph and reference line keyword parsing. October 2019: - v0.2.7 released. A minor bugfix in the handling of wide datasets with unequal Ns in each group. - - v0.2.6 released. This release has one new feature (plotting of estimation plot inside any :py:mod:`matplotlib` :py:class:`Axes`; see the section on :ref:`inset_plot` in the :doc:`tutorial`). There are also two bug patches for the handling of bootstrap plotting, and of dependency installation. + - v0.2.6 released. This release has one new feature (plotting of estimation plot inside any :py:mod:`matplotlib` :py:class:`Axes`; see the section on :ref:`inset plot` in the :doc:`tutorial`). There are also two bug patches for the handling of bootstrap plotting, and of dependency installation. September 2019: - v0.2.5 released. This release addresses two feature requests, and also patches two bugs: one affecting the paired difference CIs, and one involving NaNs in unused/irrelevant columns. @@ -44,12 +47,13 @@ Contents -------- .. toctree:: - :maxdepth: 2 - - robust-beautiful - bootstraps - getting-started - tutorial - release-notes - api - about + :maxdepth: 1 + + robust-beautiful + bootstraps + getting-started + tutorial + release-notes + api + about + citation diff --git a/docs/source/release-notes.rst b/docs/source/release-notes.rst index 40a00d84..7f6dd5f7 100644 --- a/docs/source/release-notes.rst +++ b/docs/source/release-notes.rst @@ -4,10 +4,21 @@ Release Notes ============= + +v0.3.0 +------ +This is a new major release that refactors the statistical test output. All users are strongly encouraged to update to this release. + +Main Changes: + - the Lq-Likelihood Ratio-Type test results are now computed only when the user calls the `lqrt` property of the `dabest_object.effect_size` :py:class:`EffectSizeDataFrame` object. This refactor was done to mitigate the lengthy computation time the test took. See Issues `#94 `_ and `#91 `_. + - The default hypothesis test reported by calling the `dabest_object.effect_size` :py:class:`EffectSizeDataFrame` object is now the `non-parametric two-sided permutation t-test `_. Conceptually, this hypothesis test mirrors the usage of the bootstrap (to obtain the 95% confidence intervals). Read more at :doc:`tutorial`. See Issue `#92 `_ and PR `#93 `_. + - The minimum version of :py:mod:`numpy` is now v0.17, which has an updated method of generating random samples. The resampling code used in :py:mod:`dabest` has thus been updated as well. + + v0.2.8 ------ -This release fixes minor bugs, and implements a new statistical test. There are no known breaking changes to the API; hence **all users are strongly encouraged to upgrade to the latest version**. +This release fixes minor bugs, and implements a new statistical test. Feature Additions: - Implement `Lq-Likelihood-Ratio-Type Test `_ in statistical output with `PR #85 `_; thanks to Adam Li (`@adam2392 `_). @@ -28,7 +39,7 @@ v0.2.6 ------ Feature additions: - - It is now possible to specify a pre-determined :py:mod:`matplotlib` :py:class:`Axes` to create the estimation plot in. See the new section in the tutorial for more information. (`Pull request #73 `_; thanks to Adam Nekimken (`@anekimken `_). + - It is now possible to specify a pre-determined :py:mod:`matplotlib` :py:class:`Axes` to create the estimation plot in. See :ref:`inset plot` in the :doc:`tutorial` (`Pull request #73 `_; thanks to Adam Nekimken (`@anekimken `_). - diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index 5f6a91d0..62cb2201 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -4,12 +4,14 @@ Tutorial ======== + Load Libraries -------------- .. code-block:: python3 :linenos: + import numpy as np import pandas as pd import dabest @@ -19,18 +21,19 @@ Load Libraries .. parsed-literal:: - We're using DABEST v0.2.7 + We're using DABEST v0.3.0 Create dataset for demo ----------------------- -Here, we create a dataset to illustrate how DABEST functions. In +Here, we create a dataset to illustrate how ``dabest`` functions. In this dataset, each column corresponds to a group of observations. .. code-block:: python3 :linenos: + from scipy.stats import norm # Used in generation of populations. np.random.seed(9999) # Fix the seed so the results are replicable. @@ -70,13 +73,14 @@ Note that we have 9 groups (3 Control samples and 6 Test samples). Our dataset also has a non-numerical column indicating gender, and another column indicating the identity of each observation. -This is known as a 'wide' dataset. See this +This is known as a ‘wide’ dataset. See this `writeup `__ for more details. .. code-block:: python3 :linenos: + df.head() @@ -86,7 +90,7 @@ for more details.
@@ -408,6 +407,8 @@ You can access the results as a :py:mod:`pandas` :py:class:`DataFrame` as well. + + @@ -430,13 +431,15 @@ You can access the results as a :py:mod:`pandas` :py:class:`DataFrame` as well. - - - - - + + + + + - + + + @@ -455,6 +458,7 @@ You can access the results as a :py:mod:`pandas` :py:class:`DataFrame` as well. .. code-block:: python3 :linenos: + two_groups_unpaired.mean_diff.statistical_tests @@ -490,6 +494,7 @@ You can access the results as a :py:mod:`pandas` :py:class:`DataFrame` as well. + @@ -509,8 +514,9 @@ You can access the results as a :py:mod:`pandas` :py:class:`DataFrame` as well. - - + + + @@ -524,11 +530,12 @@ You can access the results as a :py:mod:`pandas` :py:class:`DataFrame` as well. -Let's compute the Hedges' g for our comparison. +Let’s compute the Hedges’ *g* for our comparison. .. code-block:: python3 :linenos: + two_groups_unpaired.hedges_g @@ -536,18 +543,19 @@ Let's compute the Hedges' g for our comparison. .. parsed-literal:: - DABEST v0.2.7 + DABEST v0.3.0 ============= Good afternoon! - The current time is Mon Oct 21 11:29:01 2019. + The current time is Mon Jan 20 17:12:46 2020. - The unpaired Hedges' g between Control 1 and Test 1 is 1.03 [95%CI 0.317, 1.62]. - The two-sided p-value of the Mann-Whitney test is 0.00163. + The unpaired Hedges' g between Control 1 and Test 1 is 1.03 [95%CI 0.349, 1.62]. + The p-value of the two-sided permutation t-test is 0.001. 5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated. The p-value(s) reported are the likelihood(s) of observing the effect size(s), if the null hypothesis of zero difference is true. + For each p-value, 5000 reshuffles of the control and test labels were performed. To get the results of all valid statistical tests, use `.hedges_g.statistical_tests` @@ -556,6 +564,7 @@ Let's compute the Hedges' g for our comparison. .. code-block:: python3 :linenos: + two_groups_unpaired.hedges_g.results @@ -595,6 +604,8 @@ Let's compute the Hedges' g for our comparison. + + @@ -617,13 +628,15 @@ Let's compute the Hedges' g for our comparison. - - - - - + + + + + - + + + @@ -644,37 +657,31 @@ Producing estimation plots To produce a **Gardner-Altman estimation plot**, simply use the ``.plot()`` method. You can read more about its genesis and design -inspiration here. +inspiration at :doc:`robust-beautiful`. Every effect size instance has access to the ``.plot()`` method. This means you can quickly create plots for different effect sizes easily. -.. .. code-block:: python3 -.. :linenos: -.. -.. import matplotlib as mpl -.. import warnings -.. # warnings.filterwarnings("ignore", category=mpl.cbook.mplDeprecation) -.. warnings.filterwarnings("ignore") - .. code-block:: python3 :linenos: - - two_groups_unpaired.mean_diff.plot(); + + + two_groups_unpaired.mean_diff.plot(); -.. image:: _images/tutorial_28_0.png +.. image:: _images/tutorial_27_0.png .. code-block:: python3 :linenos: + two_groups_unpaired.hedges_g.plot(); -.. image:: _images/tutorial_29_0.png +.. image:: _images/tutorial_28_0.png Instead of a Gardner-Altman plot, you can produce a **Cumming estimation @@ -682,16 +689,17 @@ plot** by setting ``float_contrast=False`` in the ``plot()`` method. This will plot the bootstrap effect sizes below the raw data, and also displays the the mean (gap) and ± standard deviation of each group (vertical ends) as gapped lines. This design was inspired by Edward -Tufte's dictum to maximise the data-ink ratio. +Tufte’s dictum to maximise the data-ink ratio. .. code-block:: python3 :linenos: + two_groups_unpaired.hedges_g.plot(float_contrast=False); -.. image:: _images/tutorial_31_0.png +.. image:: _images/tutorial_30_0.png For paired data, we use @@ -702,24 +710,26 @@ Both Gardner-Altman and Cumming plots support this. .. code-block:: python3 :linenos: + two_groups_paired.mean_diff.plot(); -.. image:: _images/tutorial_33_0.png +.. image:: _images/tutorial_32_0.png .. code-block:: python3 :linenos: + two_groups_paired.mean_diff.plot(float_contrast=False); -.. image:: _images/tutorial_34_0.png +.. image:: _images/tutorial_33_0.png -The DABEST package also implements a range of estimation plot +The ``dabest`` package also implements a range of estimation plot designs aimed at depicting common experimental designs. The **multi-two-group estimation plot** tiles two or more Cumming plots @@ -733,6 +743,7 @@ meta-analyses to aggregate and compare data from different experiments. .. code-block:: python3 :linenos: + multi_2group = dabest.load(df, idx=(("Control 1", "Test 1",), ("Control 2", "Test 2") )) @@ -741,7 +752,7 @@ meta-analyses to aggregate and compare data from different experiments. -.. image:: _images/tutorial_36_0.png +.. image:: _images/tutorial_35_0.png The multi-two-group design also accomodates paired comparisons. @@ -749,6 +760,7 @@ The multi-two-group design also accomodates paired comparisons. .. code-block:: python3 :linenos: + multi_2group_paired = dabest.load(df, idx=(("Control 1", "Test 1"), ("Control 2", "Test 2") ), @@ -759,7 +771,7 @@ The multi-two-group design also accomodates paired comparisons. -.. image:: _images/tutorial_38_0.png +.. image:: _images/tutorial_37_0.png The **shared control plot** displays another common experimental @@ -772,6 +784,7 @@ to ``idx`` has more than two data columns. .. code-block:: python3 :linenos: + shared_control = dabest.load(df, idx=("Control 1", "Test 1", "Test 2", "Test 3", "Test 4", "Test 5", "Test 6") @@ -780,6 +793,7 @@ to ``idx`` has more than two data columns. .. code-block:: python3 :linenos: + shared_control @@ -787,11 +801,11 @@ to ``idx`` has more than two data columns. .. parsed-literal:: - DABEST v0.2.7 + DABEST v0.3.0 ============= Good afternoon! - The current time is Mon Oct 21 11:29:24 2019. + The current time is Mon Jan 20 17:12:54 2020. Effect size(s) with 95% confidence intervals will be computed for: 1. Test 1 minus Control 1 @@ -808,6 +822,7 @@ to ``idx`` has more than two data columns. .. code-block:: python3 :linenos: + shared_control.mean_diff @@ -815,33 +830,34 @@ to ``idx`` has more than two data columns. .. parsed-literal:: - DABEST v0.2.7 + DABEST v0.3.0 ============= Good afternoon! - The current time is Mon Oct 21 11:29:25 2019. + The current time is Mon Jan 20 17:12:58 2020. - The unpaired mean difference between Control 1 and Test 1 is 0.48 [95%CI 0.205, 0.774]. - The two-sided p-value of the Mann-Whitney test is 0.00163. + The unpaired mean difference between Control 1 and Test 1 is 0.48 [95%CI 0.221, 0.768]. + The p-value of the two-sided permutation t-test is 0.001. - The unpaired mean difference between Control 1 and Test 2 is -0.542 [95%CI -0.915, -0.206]. - The two-sided p-value of the Mann-Whitney test is 0.0114. + The unpaired mean difference between Control 1 and Test 2 is -0.542 [95%CI -0.914, -0.211]. + The p-value of the two-sided permutation t-test is 0.0042. - The unpaired mean difference between Control 1 and Test 3 is 0.174 [95%CI -0.273, 0.647]. - The two-sided p-value of the Mann-Whitney test is 0.409. + The unpaired mean difference between Control 1 and Test 3 is 0.174 [95%CI -0.295, 0.628]. + The p-value of the two-sided permutation t-test is 0.479. - The unpaired mean difference between Control 1 and Test 4 is 0.79 [95%CI 0.325, 1.33]. - The two-sided p-value of the Mann-Whitney test is 0.0531. + The unpaired mean difference between Control 1 and Test 4 is 0.79 [95%CI 0.306, 1.31]. + The p-value of the two-sided permutation t-test is 0.0042. - The unpaired mean difference between Control 1 and Test 5 is 0.265 [95%CI 0.0115, 0.497]. - The two-sided p-value of the Mann-Whitney test is 0.0411. + The unpaired mean difference between Control 1 and Test 5 is 0.265 [95%CI 0.0137, 0.497]. + The p-value of the two-sided permutation t-test is 0.0404. - The unpaired mean difference between Control 1 and Test 6 is 0.288 [95%CI 0.00913, 0.524]. - The two-sided p-value of the Mann-Whitney test is 0.0275. + The unpaired mean difference between Control 1 and Test 6 is 0.288 [95%CI -0.00441, 0.515]. + The p-value of the two-sided permutation t-test is 0.0324. 5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated. The p-value(s) reported are the likelihood(s) of observing the effect size(s), if the null hypothesis of zero difference is true. + For each p-value, 5000 reshuffles of the control and test labels were performed. To get the results of all valid statistical tests, use `.mean_diff.statistical_tests` @@ -850,19 +866,21 @@ to ``idx`` has more than two data columns. .. code-block:: python3 :linenos: + shared_control.mean_diff.plot(); -.. image:: _images/tutorial_43_0.png +.. image:: _images/tutorial_42_0.png -DABEST thus empowers you to robustly perform and elegantly present +``dabest`` thus empowers you to robustly perform and elegantly present complex visualizations and statistics. .. code-block:: python3 :linenos: + multi_groups = dabest.load(df, idx=(("Control 1", "Test 1",), ("Control 2", "Test 2","Test 3"), ("Control 3", "Test 4","Test 5", "Test 6") @@ -872,6 +890,7 @@ complex visualizations and statistics. .. code-block:: python3 :linenos: + multi_groups @@ -879,11 +898,11 @@ complex visualizations and statistics. .. parsed-literal:: - DABEST v0.2.7 + DABEST v0.3.0 ============= Good afternoon! - The current time is Mon Oct 21 11:29:26 2019. + The current time is Mon Jan 20 17:12:58 2020. Effect size(s) with 95% confidence intervals will be computed for: 1. Test 1 minus Control 1 @@ -900,6 +919,7 @@ complex visualizations and statistics. .. code-block:: python3 :linenos: + multi_groups.mean_diff @@ -907,33 +927,34 @@ complex visualizations and statistics. .. parsed-literal:: - DABEST v0.2.7 + DABEST v0.3.0 ============= Good afternoon! - The current time is Mon Oct 21 11:29:27 2019. + The current time is Mon Jan 20 17:13:02 2020. - The unpaired mean difference between Control 1 and Test 1 is 0.48 [95%CI 0.205, 0.774]. - The two-sided p-value of the Mann-Whitney test is 0.00163. + The unpaired mean difference between Control 1 and Test 1 is 0.48 [95%CI 0.221, 0.768]. + The p-value of the two-sided permutation t-test is 0.001. - The unpaired mean difference between Control 2 and Test 2 is -1.38 [95%CI -1.93, -0.905]. - The two-sided p-value of the Mann-Whitney test is 2.6e-05. + The unpaired mean difference between Control 2 and Test 2 is -1.38 [95%CI -1.93, -0.895]. + The p-value of the two-sided permutation t-test is 0.0. - The unpaired mean difference between Control 2 and Test 3 is -0.666 [95%CI -1.29, -0.0788]. - The two-sided p-value of the Mann-Whitney test is 0.0439. + The unpaired mean difference between Control 2 and Test 3 is -0.666 [95%CI -1.3, -0.103]. + The p-value of the two-sided permutation t-test is 0.0352. - The unpaired mean difference between Control 3 and Test 4 is 0.362 [95%CI -0.111, 0.901]. - The two-sided p-value of the Mann-Whitney test is 0.365. + The unpaired mean difference between Control 3 and Test 4 is 0.362 [95%CI -0.114, 0.887]. + The p-value of the two-sided permutation t-test is 0.161. - The unpaired mean difference between Control 3 and Test 5 is -0.164 [95%CI -0.398, 0.0747]. - The two-sided p-value of the Mann-Whitney test is 0.156. + The unpaired mean difference between Control 3 and Test 5 is -0.164 [95%CI -0.404, 0.0742]. + The p-value of the two-sided permutation t-test is 0.208. - The unpaired mean difference between Control 3 and Test 6 is -0.14 [95%CI -0.4, 0.0937]. - The two-sided p-value of the Mann-Whitney test is 0.441. + The unpaired mean difference between Control 3 and Test 6 is -0.14 [95%CI -0.398, 0.102]. + The p-value of the two-sided permutation t-test is 0.282. 5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated. The p-value(s) reported are the likelihood(s) of observing the effect size(s), if the null hypothesis of zero difference is true. + For each p-value, 5000 reshuffles of the control and test labels were performed. To get the results of all valid statistical tests, use `.mean_diff.statistical_tests` @@ -942,22 +963,23 @@ complex visualizations and statistics. .. code-block:: python3 :linenos: + multi_groups.mean_diff.plot(); -.. image:: _images/tutorial_48_0.png +.. image:: _images/tutorial_47_0.png -Using long (aka 'melted') data frames +Using long (aka ‘melted’) data frames ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -DABEST can also work with 'melted' or 'long' data. This term is -so used because each row will now correspond to a single datapoint, with -one column carrying the value and other columns carrying 'metadata' +``dabest`` can also work with ‘melted’ or ‘long’ data. This term is so +used because each row will now correspond to a single datapoint, with +one column carrying the value and other columns carrying ‘metadata’ describing that datapoint. -More details on wide vs long or 'melted' data can be found in this +More details on wide vs long or ‘melted’ data can be found in this `Wikipedia article `__. The `pandas @@ -967,6 +989,7 @@ gives recipes for melting dataframes. .. code-block:: python3 :linenos: + x='group' y='metric' @@ -1057,6 +1080,7 @@ When your data is in this format, you will need to specify the ``x`` and .. code-block:: python3 :linenos: + analysis_of_long_df = dabest.load(df_melted, idx=("Control 1", "Test 1"), x="group", y="metric") @@ -1067,11 +1091,11 @@ When your data is in this format, you will need to specify the ``x`` and .. parsed-literal:: - DABEST v0.2.7 + DABEST v0.3.0 ============= Good afternoon! - The current time is Mon Oct 21 11:29:27 2019. + The current time is Mon Jan 20 17:13:03 2020. Effect size(s) with 95% confidence intervals will be computed for: 1. Test 1 minus Control 1 @@ -1083,11 +1107,12 @@ When your data is in this format, you will need to specify the ``x`` and .. code-block:: python3 :linenos: + analysis_of_long_df.mean_diff.plot(); -.. image:: _images/tutorial_53_0.png +.. image:: _images/tutorial_52_0.png Controlling plot aesthetics @@ -1098,12 +1123,13 @@ Changing the y-axes labels. .. code-block:: python3 :linenos: + two_groups_unpaired.mean_diff.plot(swarm_label="This is my\nrawdata", contrast_label="The bootstrap\ndistribtions!"); -.. image:: _images/tutorial_56_0.png +.. image:: _images/tutorial_55_0.png Color the rawdata according to another column in the dataframe. @@ -1111,21 +1137,23 @@ Color the rawdata according to another column in the dataframe. .. code-block:: python3 :linenos: + multi_2group.mean_diff.plot(color_col="Gender"); -.. image:: _images/tutorial_58_0.png +.. image:: _images/tutorial_57_0.png .. code-block:: python3 :linenos: + two_groups_paired.mean_diff.plot(color_col="Gender"); -.. image:: _images/tutorial_59_0.png +.. image:: _images/tutorial_58_0.png Changing the palette used with ``custom_palette``. Any valid matplotlib @@ -1134,21 +1162,23 @@ or seaborn color palette is accepted. .. code-block:: python3 :linenos: + multi_2group.mean_diff.plot(color_col="Gender", custom_palette="Dark2"); -.. image:: _images/tutorial_61_0.png +.. image:: _images/tutorial_60_0.png .. code-block:: python3 :linenos: + multi_2group.mean_diff.plot(custom_palette="Paired"); -.. image:: _images/tutorial_62_0.png +.. image:: _images/tutorial_61_0.png You can also create your own color palette. Create a dictionary where @@ -1162,6 +1192,7 @@ tuples. .. code-block:: python3 :linenos: + my_color_palette = {"Control 1" : "blue", "Test 1" : "purple", "Control 2" : "#cb4b16", # This is a hex string. @@ -1172,7 +1203,7 @@ tuples. -.. image:: _images/tutorial_64_0.png +.. image:: _images/tutorial_63_0.png By default, ``dabest.plot()`` will @@ -1186,13 +1217,14 @@ You can alter the default values with the ``swarm_desat`` and .. code-block:: python3 :linenos: + multi_2group.mean_diff.plot(custom_palette=my_color_palette, swarm_desat=0.75, halfviolin_desat=0.25); -.. image:: _images/tutorial_66_0.png +.. image:: _images/tutorial_65_0.png You can also change the sizes of the dots used in the rawdata swarmplot, @@ -1201,12 +1233,13 @@ and those used to indicate the effect sizes. .. code-block:: python3 :linenos: + multi_2group.mean_diff.plot(raw_marker_size=3, es_marker_size=12); -.. image:: _images/tutorial_68_0.png +.. image:: _images/tutorial_67_0.png Changing the y-limits for the rawdata axes, and for the contrast axes. @@ -1214,12 +1247,13 @@ Changing the y-limits for the rawdata axes, and for the contrast axes. .. code-block:: python3 :linenos: + multi_2group.mean_diff.plot(swarm_ylim=(0, 5), contrast_ylim=(-2, 2)); -.. image:: _images/tutorial_70_0.png +.. image:: _images/tutorial_69_0.png If your effect size is qualitatively inverted (ie. a smaller value is a @@ -1229,24 +1263,26 @@ better outcome), you can simply invert the tuple passed to .. code-block:: python3 :linenos: + multi_2group.mean_diff.plot(contrast_ylim=(2, -2), contrast_label="More negative is better!"); -.. image:: _images/tutorial_72_0.png +.. image:: _images/tutorial_71_0.png You can add minor ticks and also change the tick frequency by accessing the axes directly. -The estimation plot produced by ``dabest.plot()`` has 2 axes. The first one +Each estimation plot produced by ``dabest`` has 2 axes. The first one contains the rawdata swarmplot; the second one contains the bootstrap effect size differences. .. code-block:: python3 :linenos: + import matplotlib.ticker as Ticker f = two_groups_unpaired.mean_diff.plot() @@ -1262,12 +1298,13 @@ effect size differences. -.. image:: _images/tutorial_74_0.png +.. image:: _images/tutorial_73_0.png .. code-block:: python3 :linenos: + f = multi_2group.mean_diff.plot(swarm_ylim=(0,6), contrast_ylim=(-3, 1)) @@ -1282,14 +1319,15 @@ effect size differences. -.. image:: _images/tutorial_75_0.png +.. image:: _images/tutorial_74_0.png -.. _inset_plot: + +.. _inset plot: Creating estimation plots in existing axes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*Implemented in v0.2.6 by Adam Nekimken.* +*Implemented in v0.2.6 by Adam Nekimken*. ``dabest.plot`` has an ``ax`` keyword that accepts any Matplotlib ``Axes``. The entire estimation plot will be created in the specified @@ -1298,12 +1336,11 @@ Creating estimation plots in existing axes .. code-block:: python3 :linenos: + from matplotlib import pyplot as plt f, axx = plt.subplots(nrows=2, ncols=2, figsize=(15, 15), - - # ensure proper width-wise spacing. - gridspec_kw={'wspace': 0.25} + gridspec_kw={'wspace': 0.25} # ensure proper width-wise spacing. ) two_groups_unpaired.mean_diff.plot(ax=axx.flat[0]); @@ -1316,7 +1353,7 @@ Creating estimation plots in existing axes -.. image:: _images/tutorial_77_0.png +.. image:: _images/tutorial_76_0.png In this case, to access the individual rawdata axes, use @@ -1326,15 +1363,17 @@ In this case, to access the individual rawdata axes, use .. code-block:: python3 :linenos: + topleft_axes = axx.flat[0] topleft_axes.set_ylabel("New y-axis label for rawdata") topleft_axes.contrast_axes.set_ylabel("New y-axis label for effect size") + f -.. image:: _images/tutorial_79_0.png +.. image:: _images/tutorial_78_0.png @@ -1343,7 +1382,7 @@ Applying style sheets *Implemented in v0.2.0*. -DABEST can now apply `matplotlib style +``dabest`` can apply `matplotlib style sheets `__ to estimation plots. You can refer to this `gallery `__ @@ -1352,15 +1391,18 @@ of style sheets for reference. .. code-block:: python3 :linenos: + import matplotlib.pyplot as plt plt.style.use("dark_background") .. code-block:: python3 :linenos: + multi_2group.mean_diff.plot(); -.. image:: _images/tutorial_82_0.png +.. image:: _images/tutorial_81_0.png + diff --git a/setup.py b/setup.py index 5eccd2f3..e11f28fe 100644 --- a/setup.py +++ b/setup.py @@ -34,21 +34,33 @@ author_email='joseshowh@gmail.com', maintainer='Joses W. Ho', maintainer_email='joseshowh@gmail.com', - version='0.2.8', + version='0.3.0', description=DESCRIPTION, long_description=LONG_DESCRIPTION, packages=find_packages(), install_requires=[ - 'numpy~=1.15', + 'numpy~=1.17', 'scipy~=1.2', 'pandas~=0.25,!=0.25.2', 'matplotlib~=3.0', 'seaborn~=0.9', - 'lqrt~=0.3.2' + 'lqrt~=0.3' ], - extras_require={'dev': ['pytest~=5.2', 'pytest-mpl~=0.10']}, + extras_require={'dev': ['pytest~=5.3', 'pytest-mpl~=0.11']}, python_requires='~=3.5', + classifiers=[ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Science/Research", + "Intended Audience :: Education", + "License :: OSI Approved :: BSD License", + "Programming Language :: Python :: 3", + "Topic :: Scientific/Engineering :: Visualization", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX :: Linux", + "Operating System :: Unix", + "Operating System :: MacOS", + ], url='https://acclab.github.io/DABEST-python-docs', download_url='https://www.github.com/ACCLAB/DABEST-python', license='BSD 3-clause Clear License'
pct_low pct_high pct_interval_idxpvalue_permutationpermutation_count bootstraps resamples random_seedFalse 0.48029 950.2051610.773647(145, 4893)0.1974270.7587520.2208690.767721(140, 4889)0.2156970.761716 (125, 4875)[-0.05989473868674011, -0.018608309424335, 0.0...0.0015000[-0.157303571150468, -0.025932185794146356, 0.... 5000 12345 0.002094ci bca_low bca_highpvalue_permutation pvalue_welch statistic_welch pvalue_students_tFalse 0.48029 950.2051610.7736470.2208690.7677210.001 0.002094 -3.308806 0.002057pct_low pct_high pct_interval_idxpvalue_permutationpermutation_count bootstraps resamples random_seedFalse 1.025525 950.3165061.616235(42, 4725)0.444861.7451460.3493941.618579(42, 4724)0.4728441.74166 (125, 4875)[-0.1491709040527835, -0.0504066101302326, 0.0...0.0015000[-0.3617512915188043, -0.06120428036887727, 0.... 5000 12345 0.002094