Merge pull request #147 from Jacobluke-/vnbdev

∆∆ updates and Issue #144 fixes
ACCLAB · Aug 7, 2023 · 010711e · 010711e
2 parents f41f3d5 + 486005a
commit 010711e
Show file tree

Hide file tree

Showing 122 changed files with 1,866 additions and 560 deletions.
diff --git a/.github/workflows/test-image.yaml b/.github/workflows/test-image.yaml
@@ -8,11 +8,11 @@ jobs:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
         with:
-          python-version: 3.9
+          python-version: 3.8
           cache: "pip"
           cache-dependency-path: settings.ini
       - name: Run pytest
         run: |
           python -m pip install --upgrade pip
-          pip install .[dev]
+          pip install -e '.[dev]'
           pytest nbs/tests/
diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@ Python**. This new version provided the following new features:
 2.  **Proportional data.** Generates proportional bar plots,
     proportional differences, and calculates Cohen’s h. Also enables
     plotting Sankey diagrams for paired binary data. This is the
-    estimation equivalent to a bar chart with Fischer’s exact test.
+    estimation equivalent to a bar chart with Fisher’s exact test.
 
 3.  **The $\Delta\Delta$ plot.** Calculates the delta-delta
     ($\Delta\Delta$) for 2 × 2 experimental designs and plots the four

diff --git a/bumpver.toml b/bumpver.toml
@@ -0,0 +1,23 @@
+# bumpver.toml
+# This file is used for BumpVer, don't use nbdev_bump_version to bump version
+# since it's only available for increasing one digit.
+# After finishing all the setup for this package, run through all the notebooks for version updates in docs.
+
+[bumpver]
+current_version = "2023.03.29"
+version_pattern = "YYYY.0M.0D"
+commit_message = "bump version {old_version} -> {new_version}"
+commit = true
+tag = true
+push = false
+
+[bumpver.file_patterns]
+"bumpver.toml" = [
+    'current_version = "{version}"',
+]
+"settings.ini" = [
+    'version = {version}'
+]
+"dabest/__init__.py" = [
+    '__version__ = "{version}"'
+]
diff --git a/dabest/__init__.py b/dabest/__init__.py
@@ -1,5 +1,5 @@
-from ._api import load
+from ._api import load, prop_dataset
 from ._stats_tools import effsize as effsize
 from ._classes import TwoGroupsEffectSize, PermutationTest
 
-__version__ = "2023.2.14"
+__version__ = "2023.03.29"
diff --git a/dabest/_api.py b/dabest/_api.py
@@ -1,7 +1,7 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/load.ipynb.
 
 # %% auto 0
-__all__ = ['load']
+__all__ = ['load', 'prop_dataset']
 
 # %% ../nbs/API/load.ipynb 4
 def load(data, idx=None, x=None, y=None, paired=None, id_col=None,
@@ -77,3 +77,54 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None,
     return Dabest(data, idx, x, y, paired, id_col, ci, resamples, random_seed, proportional, delta2, experiment, experiment_label, x1_level, mini_meta)
 
 
+
+# %% ../nbs/API/load.ipynb 5
+import numpy as np
+from typing import Union, Optional
+
+def prop_dataset(group:Union[list, tuple, np.ndarray, dict], #Accepts lists, tuples, or numpy ndarrays of numeric types.
+                 group_names: Optional[list] = None):
+    '''
+    Convenient function to generate a dataframe of binary data.
+    '''
+    import pandas as pd
+
+    if isinstance(group, dict):
+        # If group_names is not provided, use the keys of the dict as group_names
+        if group_names is None:
+            group_names = list(group.keys())
+        elif not set(group_names) == set(group.keys()):
+            # Check if the group_names provided is the same as the keys of the dict
+            raise ValueError('group_names must be the same as the keys of the dict.')
+        # Check if the values in the dict are numeric
+        if not all([isinstance(group[name], (list, tuple, np.ndarray)) for name in group_names]):
+            raise ValueError('group must be a dict of lists, tuples, or numpy ndarrays of numeric types.')
+        # Check if the values in the dict only have two elements under each parent key
+        if not all([len(group[name]) == 2 for name in group_names]):
+            raise ValueError('Each parent key should have only two elements.')
+        group_val = group
+
+    else:
+        if group_names is None:
+            raise ValueError('group_names must be provided if group is not a dict.')
+        # Check if the length of group is two times of the length of group_names
+        if not len(group) == 2 * len(group_names):
+            raise ValueError('The length of group must be two times of the length of group_names.')
+        group_val = {group_names[i]: [group[i*2], group[i*2+1]] for i in range(len(group_names))}
+
+    # Check if the sum of values in group_val under each key are the same
+    if not all([sum(group_val[name]) == sum(group_val[group_names[0]]) for name in group_val.keys()]):
+        raise ValueError('The sum of values under each key must be the same.')
+
+    id_col = pd.Series(range(1, sum(group_val[group_names[0]])+1))
+
+    final_df = pd.DataFrame()
+
+    for name in group_val.keys():
+        col = np.repeat(0, group_val[name][0]).tolist() + np.repeat(1, group_val[name][1]).tolist()
+        df = pd.DataFrame({name:col})
+        final_df = pd.concat([final_df, df], axis=1)
+
+    final_df['ID'] = id_col
+
+    return final_df