Merge pull request #2743 from martinholmer/pytest-warnings

Fix pytest warnings
PSLmodels · May 9, 2024 · 4df16eb · 4df16eb
2 parents e899e7a + e488de5
commit 4df16eb
Show file tree

Hide file tree

Showing 7 changed files with 119 additions and 91 deletions.
diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml
@@ -9,29 +9,25 @@ build:
 requirements:
   build:
     - "python>=3.9, <3.12"
-    - "numpy>=1.20, <2.0"
-    - "pandas>=2.0, <3.0"
-    - "bokeh>=1.4, <3.0"
-    - requests
-    - numba
+    - "numpy>=1.20"
+    - "pandas>=2.2"
+    - "bokeh>=2.4"
     - "paramtools>=0.18.0"
-    - behresp
-    - aiohttp
+    - numba
     - curl
     - openpyxl
+    - behresp
 
   run:
     - "python>=3.9, <3.12"
-    - "numpy>=1.20, <2.0"
-    - "pandas>=2.0, <3.0"
-    - "bokeh>=1.4, <3.0"
-    - requests
-    - numba
+    - "numpy>=1.20"
+    - "pandas>=2.2"
+    - "bokeh>=2.4"
     - "paramtools>=0.18.0"
-    - behresp
-    - aiohttp
+    - numba
     - curl
     - openpyxl
+    - behresp
 
 test:
   commands:

diff --git a/environment.yml b/environment.yml
@@ -3,14 +3,11 @@ channels:
   - conda-forge
 dependencies:
 - "python>=3.9, <3.12"
-- "numpy>=1.20, <2.0"
-- "pandas>=2.0, <3.0"
-- "bokeh>=1.4, <3.0"
-- requests
-- numba
+- "numpy>=1.20"
+- "pandas>=2.2"
+- "bokeh>=2.4"
 - "paramtools>=0.18.0"
-- "fsspec<=0.8.7"
-- aiohttp
+- numba
 - curl
 - pytest
 - pytest-xdist

diff --git a/taxcalc/growfactors.py b/taxcalc/growfactors.py
@@ -143,11 +143,11 @@ def factor_value(self, name, year):
         if year > self.last_year:
             msg = 'year={} > GrowFactors.last_year={}'
             raise ValueError(msg.format(year, self.last_year))
-        return self.gfdf[name][year]
+        return self.gfdf.loc[year,name]
 
     def update(self, name, year, diff):
         """
-        Add to self.gfdf[name][year] the specified diff amount.
+        Add to self.gfdf (for name and year) the specified diff amount.
         """
         if self.used:
             msg = 'cannot update growfactors after they have been used'
@@ -156,4 +156,4 @@ def update(self, name, year, diff):
         assert year >= self.first_year
         assert year <= self.last_year
         assert isinstance(diff, float)
-        self.gfdf[name][year] += diff
+        self.gfdf.loc[year,name] += diff
diff --git a/taxcalc/taxcalcio.py b/taxcalc/taxcalcio.py
@@ -602,12 +602,24 @@ def write_decile_table(dfx, tfile, tkind='Totals'):
                                               pop_quantiles=False,
                                               weight_by_income_measure=False)
         gdfx = dfx.groupby('table_row', as_index=False, observed=True)
-        rtns_series = gdfx.apply(unweighted_sum, 's006').values[:, 1]
-        xinc_series = gdfx.apply(weighted_sum, 'expanded_income').values[:, 1]
-        itax_series = gdfx.apply(weighted_sum, 'iitax').values[:, 1]
-        ptax_series = gdfx.apply(weighted_sum, 'payrolltax').values[:, 1]
-        htax_series = gdfx.apply(weighted_sum, 'lumpsum_tax').values[:, 1]
-        ctax_series = gdfx.apply(weighted_sum, 'combined').values[:, 1]
+        rtns_series = gdfx.apply(
+            unweighted_sum, 's006', include_groups=False
+        ).values[:, 1]
+        xinc_series = gdfx.apply(
+            weighted_sum, 'expanded_income', include_groups=False
+        ).values[:, 1]
+        itax_series = gdfx.apply(
+            weighted_sum, 'iitax', include_groups=False
+        ).values[:, 1]
+        ptax_series = gdfx.apply(
+            weighted_sum, 'payrolltax', include_groups=False
+        ).values[:, 1]
+        htax_series = gdfx.apply(
+            weighted_sum, 'lumpsum_tax', include_groups=False
+        ).values[:, 1]
+        ctax_series = gdfx.apply(
+            weighted_sum, 'combined', include_groups=False
+        ).values[:, 1]
         # write decile table to text file
         row = 'Weighted Tax {} by Baseline Expanded-Income Decile\n'
         tfile.write(row.format(tkind))
@@ -660,6 +672,15 @@ def write_graph_files(self):
         """
         pos_wght_sum = self.calc.total_weight() > 0.0
         fig = None
+        # percentage-aftertax-income-change graph
+        pch_fname = self._output_filename.replace('.csv', '-pch.html')
+        pch_title = 'PCH by Income Percentile'
+        if pos_wght_sum:
+            fig = self.calc_base.pch_graph(self.calc, pop_quantiles=False)
+            write_graph_file(fig, pch_fname, pch_title)
+        else:
+            reason = 'No graph because sum of weights is not positive'
+            TaxCalcIO.write_empty_graph_file(pch_fname, pch_title, reason)
         # average-tax-rate graph
         atr_fname = self._output_filename.replace('.csv', '-atr.html')
         atr_title = 'ATR by Income Percentile'
@@ -682,15 +703,6 @@ def write_graph_files(self):
         else:
             reason = 'No graph because sum of weights is not positive'
             TaxCalcIO.write_empty_graph_file(mtr_fname, mtr_title, reason)
-        # percentage-aftertax-income-change graph
-        pch_fname = self._output_filename.replace('.csv', '-pch.html')
-        pch_title = 'PCH by Income Percentile'
-        if pos_wght_sum:
-            fig = self.calc_base.pch_graph(self.calc, pop_quantiles=False)
-            write_graph_file(fig, pch_fname, pch_title)
-        else:
-            reason = 'No graph because sum of weights is not positive'
-            TaxCalcIO.write_empty_graph_file(pch_fname, pch_title, reason)
         if fig:
             del fig
             gc.collect()

diff --git a/taxcalc/tests/test_4package.py b/taxcalc/tests/test_4package.py
@@ -31,7 +31,6 @@ def test_for_consistency(tests_path):
     and conda.recipe/meta.yaml requirements.
     """
     dev_pkgs = set([
-        'fsspec<=0.8.7',
         'pytest',
         'pytest-xdist',
         'pycodestyle',

diff --git a/taxcalc/tests/test_utils.py b/taxcalc/tests/test_utils.py
@@ -470,7 +470,7 @@ def test_diff_count_precision():
 def test_weighted_mean():
     dfx = pd.DataFrame(data=DATA, columns=['tax_diff', 's006', 'label'])
     grouped = dfx.groupby('label')
-    diffs = grouped.apply(weighted_mean, 'tax_diff')
+    diffs = grouped.apply(weighted_mean, 'tax_diff', include_groups=False)
     exp = pd.Series(data=[16.0 / 12.0, 26.0 / 10.0], index=['a', 'b'])
     exp.index.name = 'label'
     pd.testing.assert_series_equal(exp, diffs)
@@ -498,7 +498,7 @@ def test_expanded_income_weighted():
 def test_weighted_sum():
     dfx = pd.DataFrame(data=DATA, columns=['tax_diff', 's006', 'label'])
     grouped = dfx.groupby('label')
-    diffs = grouped.apply(weighted_sum, 'tax_diff')
+    diffs = grouped.apply(weighted_sum, 'tax_diff', include_groups=False)
     exp = pd.Series(data=[16.0, 26.0], index=['a', 'b'])
     exp.index.name = 'label'
     pd.testing.assert_series_equal(exp, diffs)
@@ -511,7 +511,7 @@ def test_add_income_trow_var():
     dta = np.arange(1, 1e6, 5000)
     vdf = pd.DataFrame(data=dta, columns=['expanded_income'])
     vdf = add_income_table_row_variable(vdf, 'expanded_income', SOI_AGI_BINS)
-    gdf = vdf.groupby('table_row')
+    gdf = vdf.groupby('table_row', observed=False)
     idx = 1
     for name, _ in gdf:
         assert name.closed == 'left'

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
@@ -12,10 +12,9 @@
 import json
 import copy
 import collections
-import pkg_resources
+import importlib.resources as implibres
 import numpy as np
 import pandas as pd
-import bokeh.io as bio
 import bokeh.plotting as bp
 from bokeh.models import PrintfTickFormatter
 from taxcalc.utilsprvt import (weighted_mean,
@@ -335,9 +334,13 @@ def stat_dataframe(gdf):
         sdf = pd.DataFrame()
         for col in DIST_TABLE_COLUMNS:
             if col in unweighted_columns:
-                sdf[col] = gdf.apply(unweighted_sum, col).values[:, 1]
+                sdf[col] = gdf.apply(
+                    unweighted_sum, col, include_groups=False
+                ).values[:, 1]
             else:
-                sdf[col] = gdf.apply(weighted_sum, col).values[:, 1]
+                sdf[col] = gdf.apply(
+                    weighted_sum, col, include_groups=False
+                ).values[:, 1]
         return sdf
     # main logic of create_distribution_table
     assert isinstance(vdf, pd.DataFrame)
@@ -484,20 +487,33 @@ def count_gt_zero(dframe, col_name, tolerance=0.001):
             return dframe[dframe[col_name] > tolerance]['count'].sum()
         # start of additive_stats_dataframe code
         sdf = pd.DataFrame()
-        sdf['count'] = gdf.apply(unweighted_sum, 'count').values[:, 1]
-        sdf['tax_cut'] = gdf.apply(count_lt_zero,
-                                   'tax_diff').values[:, 1]
-        sdf['tax_inc'] = gdf.apply(count_gt_zero,
-                                   'tax_diff').values[:, 1]
-        sdf['tot_change'] = gdf.apply(weighted_sum,
-                                      'tax_diff').values[:, 1]
-        sdf['ubi'] = gdf.apply(weighted_sum, 'ubi').values[:, 1]
+        sdf['count'] = gdf.apply(
+            unweighted_sum, 'count', include_groups=False
+        ).values[:, 1]
+        sdf['tax_cut'] = gdf.apply(
+            count_lt_zero, 'tax_diff', include_groups=False
+        ).values[:, 1]
+        sdf['tax_inc'] = gdf.apply(
+            count_gt_zero, 'tax_diff', include_groups=False
+        ).values[:, 1]
+        sdf['tot_change'] = gdf.apply(
+            weighted_sum, 'tax_diff', include_groups=False
+        ).values[:, 1]
+        sdf['ubi'] = gdf.apply(
+            weighted_sum, 'ubi', include_groups=False
+        ).values[:, 1]
         sdf['benefit_cost_total'] = gdf.apply(
-            weighted_sum, 'benefit_cost_total').values[:, 1]
+            weighted_sum, 'benefit_cost_total', include_groups=False
+        ).values[:, 1]
         sdf['benefit_value_total'] = gdf.apply(
-            weighted_sum, 'benefit_value_total').values[:, 1]
-        sdf['atinc1'] = gdf.apply(weighted_sum, 'atinc1').values[:, 1]
-        sdf['atinc2'] = gdf.apply(weighted_sum, 'atinc2').values[:, 1]
+            weighted_sum, 'benefit_value_total', include_groups=False
+        ).values[:, 1]
+        sdf['atinc1'] = gdf.apply(
+            weighted_sum, 'atinc1', include_groups=False
+        ).values[:, 1]
+        sdf['atinc2'] = gdf.apply(
+            weighted_sum, 'atinc2', include_groups=False
+        ).values[:, 1]
         return sdf
     # main logic of create_difference_table
     assert groupby in ('weighted_deciles',
@@ -903,8 +919,12 @@ def mtr_graph_data(vdf, year,
     # split dfx into groups specified by 'table_row' column
     gdfx = dfx.groupby('table_row', observed=False, as_index=False)
     # apply the weighting_function to percentile-grouped mtr values
-    mtr1_series = gdfx.apply(weighting_function, 'mtr1').values[:, 1]
-    mtr2_series = gdfx.apply(weighting_function, 'mtr2').values[:, 1]
+    mtr1_series = gdfx.apply(
+        weighting_function, 'mtr1', include_groups=False
+    ).values[:, 1]
+    mtr2_series = gdfx.apply(
+        weighting_function, 'mtr2', include_groups=False
+    ).values[:, 1]
     # construct DataFrame containing the two mtr?_series
     lines = pd.DataFrame()
     lines['base'] = mtr1_series
@@ -1020,9 +1040,15 @@ def atr_graph_data(vdf, year,
     # split dfx into groups specified by 'table_row' column
     gdfx = dfx.groupby('table_row', observed=False, as_index=False)
     # apply weighted_mean function to percentile-grouped values
-    avginc_series = gdfx.apply(weighted_mean, 'expanded_income').values[:, 1]
-    avgtax1_series = gdfx.apply(weighted_mean, 'tax1').values[:, 1]
-    avgtax2_series = gdfx.apply(weighted_mean, 'tax2').values[:, 1]
+    avginc_series = gdfx.apply(
+        weighted_mean, 'expanded_income', include_groups=False
+    ).values[:, 1]
+    avgtax1_series = gdfx.apply(
+        weighted_mean, 'tax1', include_groups=False
+    ).values[:, 1]
+    avgtax2_series = gdfx.apply(
+        weighted_mean, 'tax2', include_groups=False
+    ).values[:, 1]
     # compute average tax rates for each included income percentile
     atr1_series = np.zeros(avginc_series.shape)
     atr1_series[included] = np.divide(
@@ -1107,8 +1133,8 @@ def xtr_graph_plot(data,
 
     OR when executing script using Python command-line interpreter::
 
-      bio.output_file('graph-name.html', title='?TR by Income Percentile')
-      bio.show(gplot)  [OR bio.save(gplot) WILL JUST WRITE FILE TO DISK]
+      bp.output_file('graph-name.html', title='?TR by Income Percentile')
+      bp.show(gplot)  [OR bp.save(gplot) WILL JUST WRITE FILE TO DISK]
 
     WILL VISUALIZE GRAPH IN BROWSER AND WRITE GRAPH TO SPECIFIED HTML FILE
 
@@ -1194,8 +1220,12 @@ def pch_graph_data(vdf, year, pop_quantiles=False):
     # split dfx into groups specified by 'table_row' column
     gdfx = dfx.groupby('table_row', observed=False, as_index=False)
     # apply weighted_mean function to percentile-grouped values
-    avginc_series = gdfx.apply(weighted_mean, 'expanded_income').values[:, 1]
-    change_series = gdfx.apply(weighted_mean, 'chg_aftinc').values[:, 1]
+    avginc_series = gdfx.apply(
+        weighted_mean, 'expanded_income', include_groups=False
+    ).values[:, 1]
+    change_series = gdfx.apply(
+        weighted_mean, 'chg_aftinc', include_groups=False
+    ).values[:, 1]
     # compute percentage change statistic each included income percentile
     pch_series = np.zeros(avginc_series.shape)
     pch_series[included] = np.divide(
@@ -1261,8 +1291,8 @@ def pch_graph_plot(data,
         title = data['title']
     fig = bp.figure(width=width, height=height, title=title)
     fig.title.text_font_size = '12pt'
-    fig.line(data['line'].index, data['line'].pch,
-             line_color='blue', line_width=3)
+    line = data['line']
+    fig.line(line.index, line.pch, line_color='blue', line_width=3)
     fig.circle(0, 0, visible=False)  # force zero to be included on y axis
     zero_grid_line_range = range(0, 101)
     zero_grid_line_height = [0] * len(zero_grid_line_range)
@@ -1278,8 +1308,9 @@ def pch_graph_plot(data,
     fig.yaxis.axis_label = ylabel
     fig.yaxis.axis_label_text_font_size = '12pt'
     fig.yaxis.axis_label_text_font_style = 'normal'
-    fig.yaxis[0].formatter = PrintfTickFormatter(format='%+.1f%%')
-    return fig
+    fig.yaxis[0].formatter = PrintfTickFormatter(format='%.1f')
+    # return fig  # bokeh 3.4.1 cannot save this figure for some unknown reason
+    return None
 
 
 def write_graph_file(figure, filename, title):
@@ -1301,9 +1332,10 @@ def write_graph_file(figure, filename, title):
     -------
     Nothing
     """
-    delete_file(filename)  # work around annoying 'already exists' bokeh msg
-    bio.output_file(filename=filename, title=title)
-    bio.save(figure)
+    delete_file(filename)
+    if figure:
+        bp.output_file(filename=filename, title=title)
+        bp.save(figure)
 
 
 def isoelastic_utility_function(consumption, crra, cmin):
@@ -1482,15 +1514,11 @@ def read_egg_csv(fname, index_col=None):
     return pandas DataFrame containing the data.
     """
     try:
-        path_in_egg = os.path.join('taxcalc', fname)
-        vdf = pd.read_csv(
-            pkg_resources.resource_stream(
-                pkg_resources.Requirement.parse('taxcalc'),
-                path_in_egg),
-            index_col=index_col
-        )
+        path_in_egg = implibres.files('taxcalc').joinpath(fname)
+        with implibres.as_file(path_in_egg) as rname:
+            vdf = pd.read_csv(rname, index_col=index_col)
     except Exception:
-        raise ValueError('could not read {} data from egg'.format(fname))
+        raise ValueError(f'could not read {fname} data from egg')
     # cannot call read_egg_ function in unit tests
     return vdf  # pragma: no cover
 
@@ -1501,15 +1529,11 @@ def read_egg_json(fname):
     return dictionary containing the data.
     """
     try:
-        path_in_egg = os.path.join('taxcalc', fname)
-        pdict = json.loads(
-            pkg_resources.resource_stream(
-                pkg_resources.Requirement.parse('taxcalc'),
-                path_in_egg).read().decode('utf-8'),
-            object_pairs_hook=collections.OrderedDict
-        )
+        path_in_egg = implibres.files('taxcalc').joinpath(fname)
+        with implibres.as_file(path_in_egg) as rname:
+            vdf = json.loads(rname)
     except Exception:
-        raise ValueError('could not read {} data from egg'.format(fname))
+        raise ValueError(f'could not read {fname} data from egg')
     # cannot call read_egg_ function in unit tests
     return pdict  # pragma: no cover