Skip to content

Commit

Permalink
Merge pull request #2743 from martinholmer/pytest-warnings
Browse files Browse the repository at this point in the history
Fix pytest warnings
  • Loading branch information
martinholmer committed May 9, 2024
2 parents e899e7a + e488de5 commit 4df16eb
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 91 deletions.
24 changes: 10 additions & 14 deletions conda.recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,25 @@ build:
requirements:
build:
- "python>=3.9, <3.12"
- "numpy>=1.20, <2.0"
- "pandas>=2.0, <3.0"
- "bokeh>=1.4, <3.0"
- requests
- numba
- "numpy>=1.20"
- "pandas>=2.2"
- "bokeh>=2.4"
- "paramtools>=0.18.0"
- behresp
- aiohttp
- numba
- curl
- openpyxl
- behresp

run:
- "python>=3.9, <3.12"
- "numpy>=1.20, <2.0"
- "pandas>=2.0, <3.0"
- "bokeh>=1.4, <3.0"
- requests
- numba
- "numpy>=1.20"
- "pandas>=2.2"
- "bokeh>=2.4"
- "paramtools>=0.18.0"
- behresp
- aiohttp
- numba
- curl
- openpyxl
- behresp

test:
commands:
Expand Down
11 changes: 4 additions & 7 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,11 @@ channels:
- conda-forge
dependencies:
- "python>=3.9, <3.12"
- "numpy>=1.20, <2.0"
- "pandas>=2.0, <3.0"
- "bokeh>=1.4, <3.0"
- requests
- numba
- "numpy>=1.20"
- "pandas>=2.2"
- "bokeh>=2.4"
- "paramtools>=0.18.0"
- "fsspec<=0.8.7"
- aiohttp
- numba
- curl
- pytest
- pytest-xdist
Expand Down
6 changes: 3 additions & 3 deletions taxcalc/growfactors.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,11 @@ def factor_value(self, name, year):
if year > self.last_year:
msg = 'year={} > GrowFactors.last_year={}'
raise ValueError(msg.format(year, self.last_year))
return self.gfdf[name][year]
return self.gfdf.loc[year,name]

def update(self, name, year, diff):
"""
Add to self.gfdf[name][year] the specified diff amount.
Add to self.gfdf (for name and year) the specified diff amount.
"""
if self.used:
msg = 'cannot update growfactors after they have been used'
Expand All @@ -156,4 +156,4 @@ def update(self, name, year, diff):
assert year >= self.first_year
assert year <= self.last_year
assert isinstance(diff, float)
self.gfdf[name][year] += diff
self.gfdf.loc[year,name] += diff
42 changes: 27 additions & 15 deletions taxcalc/taxcalcio.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,12 +602,24 @@ def write_decile_table(dfx, tfile, tkind='Totals'):
pop_quantiles=False,
weight_by_income_measure=False)
gdfx = dfx.groupby('table_row', as_index=False, observed=True)
rtns_series = gdfx.apply(unweighted_sum, 's006').values[:, 1]
xinc_series = gdfx.apply(weighted_sum, 'expanded_income').values[:, 1]
itax_series = gdfx.apply(weighted_sum, 'iitax').values[:, 1]
ptax_series = gdfx.apply(weighted_sum, 'payrolltax').values[:, 1]
htax_series = gdfx.apply(weighted_sum, 'lumpsum_tax').values[:, 1]
ctax_series = gdfx.apply(weighted_sum, 'combined').values[:, 1]
rtns_series = gdfx.apply(
unweighted_sum, 's006', include_groups=False
).values[:, 1]
xinc_series = gdfx.apply(
weighted_sum, 'expanded_income', include_groups=False
).values[:, 1]
itax_series = gdfx.apply(
weighted_sum, 'iitax', include_groups=False
).values[:, 1]
ptax_series = gdfx.apply(
weighted_sum, 'payrolltax', include_groups=False
).values[:, 1]
htax_series = gdfx.apply(
weighted_sum, 'lumpsum_tax', include_groups=False
).values[:, 1]
ctax_series = gdfx.apply(
weighted_sum, 'combined', include_groups=False
).values[:, 1]
# write decile table to text file
row = 'Weighted Tax {} by Baseline Expanded-Income Decile\n'
tfile.write(row.format(tkind))
Expand Down Expand Up @@ -660,6 +672,15 @@ def write_graph_files(self):
"""
pos_wght_sum = self.calc.total_weight() > 0.0
fig = None
# percentage-aftertax-income-change graph
pch_fname = self._output_filename.replace('.csv', '-pch.html')
pch_title = 'PCH by Income Percentile'
if pos_wght_sum:
fig = self.calc_base.pch_graph(self.calc, pop_quantiles=False)
write_graph_file(fig, pch_fname, pch_title)
else:
reason = 'No graph because sum of weights is not positive'
TaxCalcIO.write_empty_graph_file(pch_fname, pch_title, reason)
# average-tax-rate graph
atr_fname = self._output_filename.replace('.csv', '-atr.html')
atr_title = 'ATR by Income Percentile'
Expand All @@ -682,15 +703,6 @@ def write_graph_files(self):
else:
reason = 'No graph because sum of weights is not positive'
TaxCalcIO.write_empty_graph_file(mtr_fname, mtr_title, reason)
# percentage-aftertax-income-change graph
pch_fname = self._output_filename.replace('.csv', '-pch.html')
pch_title = 'PCH by Income Percentile'
if pos_wght_sum:
fig = self.calc_base.pch_graph(self.calc, pop_quantiles=False)
write_graph_file(fig, pch_fname, pch_title)
else:
reason = 'No graph because sum of weights is not positive'
TaxCalcIO.write_empty_graph_file(pch_fname, pch_title, reason)
if fig:
del fig
gc.collect()
Expand Down
1 change: 0 additions & 1 deletion taxcalc/tests/test_4package.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def test_for_consistency(tests_path):
and conda.recipe/meta.yaml requirements.
"""
dev_pkgs = set([
'fsspec<=0.8.7',
'pytest',
'pytest-xdist',
'pycodestyle',
Expand Down
6 changes: 3 additions & 3 deletions taxcalc/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ def test_diff_count_precision():
def test_weighted_mean():
dfx = pd.DataFrame(data=DATA, columns=['tax_diff', 's006', 'label'])
grouped = dfx.groupby('label')
diffs = grouped.apply(weighted_mean, 'tax_diff')
diffs = grouped.apply(weighted_mean, 'tax_diff', include_groups=False)
exp = pd.Series(data=[16.0 / 12.0, 26.0 / 10.0], index=['a', 'b'])
exp.index.name = 'label'
pd.testing.assert_series_equal(exp, diffs)
Expand Down Expand Up @@ -498,7 +498,7 @@ def test_expanded_income_weighted():
def test_weighted_sum():
dfx = pd.DataFrame(data=DATA, columns=['tax_diff', 's006', 'label'])
grouped = dfx.groupby('label')
diffs = grouped.apply(weighted_sum, 'tax_diff')
diffs = grouped.apply(weighted_sum, 'tax_diff', include_groups=False)
exp = pd.Series(data=[16.0, 26.0], index=['a', 'b'])
exp.index.name = 'label'
pd.testing.assert_series_equal(exp, diffs)
Expand All @@ -511,7 +511,7 @@ def test_add_income_trow_var():
dta = np.arange(1, 1e6, 5000)
vdf = pd.DataFrame(data=dta, columns=['expanded_income'])
vdf = add_income_table_row_variable(vdf, 'expanded_income', SOI_AGI_BINS)
gdf = vdf.groupby('table_row')
gdf = vdf.groupby('table_row', observed=False)
idx = 1
for name, _ in gdf:
assert name.closed == 'left'
Expand Down
120 changes: 72 additions & 48 deletions taxcalc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@
import json
import copy
import collections
import pkg_resources
import importlib.resources as implibres
import numpy as np
import pandas as pd
import bokeh.io as bio
import bokeh.plotting as bp
from bokeh.models import PrintfTickFormatter
from taxcalc.utilsprvt import (weighted_mean,
Expand Down Expand Up @@ -335,9 +334,13 @@ def stat_dataframe(gdf):
sdf = pd.DataFrame()
for col in DIST_TABLE_COLUMNS:
if col in unweighted_columns:
sdf[col] = gdf.apply(unweighted_sum, col).values[:, 1]
sdf[col] = gdf.apply(
unweighted_sum, col, include_groups=False
).values[:, 1]
else:
sdf[col] = gdf.apply(weighted_sum, col).values[:, 1]
sdf[col] = gdf.apply(
weighted_sum, col, include_groups=False
).values[:, 1]
return sdf
# main logic of create_distribution_table
assert isinstance(vdf, pd.DataFrame)
Expand Down Expand Up @@ -484,20 +487,33 @@ def count_gt_zero(dframe, col_name, tolerance=0.001):
return dframe[dframe[col_name] > tolerance]['count'].sum()
# start of additive_stats_dataframe code
sdf = pd.DataFrame()
sdf['count'] = gdf.apply(unweighted_sum, 'count').values[:, 1]
sdf['tax_cut'] = gdf.apply(count_lt_zero,
'tax_diff').values[:, 1]
sdf['tax_inc'] = gdf.apply(count_gt_zero,
'tax_diff').values[:, 1]
sdf['tot_change'] = gdf.apply(weighted_sum,
'tax_diff').values[:, 1]
sdf['ubi'] = gdf.apply(weighted_sum, 'ubi').values[:, 1]
sdf['count'] = gdf.apply(
unweighted_sum, 'count', include_groups=False
).values[:, 1]
sdf['tax_cut'] = gdf.apply(
count_lt_zero, 'tax_diff', include_groups=False
).values[:, 1]
sdf['tax_inc'] = gdf.apply(
count_gt_zero, 'tax_diff', include_groups=False
).values[:, 1]
sdf['tot_change'] = gdf.apply(
weighted_sum, 'tax_diff', include_groups=False
).values[:, 1]
sdf['ubi'] = gdf.apply(
weighted_sum, 'ubi', include_groups=False
).values[:, 1]
sdf['benefit_cost_total'] = gdf.apply(
weighted_sum, 'benefit_cost_total').values[:, 1]
weighted_sum, 'benefit_cost_total', include_groups=False
).values[:, 1]
sdf['benefit_value_total'] = gdf.apply(
weighted_sum, 'benefit_value_total').values[:, 1]
sdf['atinc1'] = gdf.apply(weighted_sum, 'atinc1').values[:, 1]
sdf['atinc2'] = gdf.apply(weighted_sum, 'atinc2').values[:, 1]
weighted_sum, 'benefit_value_total', include_groups=False
).values[:, 1]
sdf['atinc1'] = gdf.apply(
weighted_sum, 'atinc1', include_groups=False
).values[:, 1]
sdf['atinc2'] = gdf.apply(
weighted_sum, 'atinc2', include_groups=False
).values[:, 1]
return sdf
# main logic of create_difference_table
assert groupby in ('weighted_deciles',
Expand Down Expand Up @@ -903,8 +919,12 @@ def mtr_graph_data(vdf, year,
# split dfx into groups specified by 'table_row' column
gdfx = dfx.groupby('table_row', observed=False, as_index=False)
# apply the weighting_function to percentile-grouped mtr values
mtr1_series = gdfx.apply(weighting_function, 'mtr1').values[:, 1]
mtr2_series = gdfx.apply(weighting_function, 'mtr2').values[:, 1]
mtr1_series = gdfx.apply(
weighting_function, 'mtr1', include_groups=False
).values[:, 1]
mtr2_series = gdfx.apply(
weighting_function, 'mtr2', include_groups=False
).values[:, 1]
# construct DataFrame containing the two mtr?_series
lines = pd.DataFrame()
lines['base'] = mtr1_series
Expand Down Expand Up @@ -1020,9 +1040,15 @@ def atr_graph_data(vdf, year,
# split dfx into groups specified by 'table_row' column
gdfx = dfx.groupby('table_row', observed=False, as_index=False)
# apply weighted_mean function to percentile-grouped values
avginc_series = gdfx.apply(weighted_mean, 'expanded_income').values[:, 1]
avgtax1_series = gdfx.apply(weighted_mean, 'tax1').values[:, 1]
avgtax2_series = gdfx.apply(weighted_mean, 'tax2').values[:, 1]
avginc_series = gdfx.apply(
weighted_mean, 'expanded_income', include_groups=False
).values[:, 1]
avgtax1_series = gdfx.apply(
weighted_mean, 'tax1', include_groups=False
).values[:, 1]
avgtax2_series = gdfx.apply(
weighted_mean, 'tax2', include_groups=False
).values[:, 1]
# compute average tax rates for each included income percentile
atr1_series = np.zeros(avginc_series.shape)
atr1_series[included] = np.divide(
Expand Down Expand Up @@ -1107,8 +1133,8 @@ def xtr_graph_plot(data,
OR when executing script using Python command-line interpreter::
bio.output_file('graph-name.html', title='?TR by Income Percentile')
bio.show(gplot) [OR bio.save(gplot) WILL JUST WRITE FILE TO DISK]
bp.output_file('graph-name.html', title='?TR by Income Percentile')
bp.show(gplot) [OR bp.save(gplot) WILL JUST WRITE FILE TO DISK]
WILL VISUALIZE GRAPH IN BROWSER AND WRITE GRAPH TO SPECIFIED HTML FILE
Expand Down Expand Up @@ -1194,8 +1220,12 @@ def pch_graph_data(vdf, year, pop_quantiles=False):
# split dfx into groups specified by 'table_row' column
gdfx = dfx.groupby('table_row', observed=False, as_index=False)
# apply weighted_mean function to percentile-grouped values
avginc_series = gdfx.apply(weighted_mean, 'expanded_income').values[:, 1]
change_series = gdfx.apply(weighted_mean, 'chg_aftinc').values[:, 1]
avginc_series = gdfx.apply(
weighted_mean, 'expanded_income', include_groups=False
).values[:, 1]
change_series = gdfx.apply(
weighted_mean, 'chg_aftinc', include_groups=False
).values[:, 1]
# compute percentage change statistic each included income percentile
pch_series = np.zeros(avginc_series.shape)
pch_series[included] = np.divide(
Expand Down Expand Up @@ -1261,8 +1291,8 @@ def pch_graph_plot(data,
title = data['title']
fig = bp.figure(width=width, height=height, title=title)
fig.title.text_font_size = '12pt'
fig.line(data['line'].index, data['line'].pch,
line_color='blue', line_width=3)
line = data['line']
fig.line(line.index, line.pch, line_color='blue', line_width=3)
fig.circle(0, 0, visible=False) # force zero to be included on y axis
zero_grid_line_range = range(0, 101)
zero_grid_line_height = [0] * len(zero_grid_line_range)
Expand All @@ -1278,8 +1308,9 @@ def pch_graph_plot(data,
fig.yaxis.axis_label = ylabel
fig.yaxis.axis_label_text_font_size = '12pt'
fig.yaxis.axis_label_text_font_style = 'normal'
fig.yaxis[0].formatter = PrintfTickFormatter(format='%+.1f%%')
return fig
fig.yaxis[0].formatter = PrintfTickFormatter(format='%.1f')
# return fig # bokeh 3.4.1 cannot save this figure for some unknown reason
return None


def write_graph_file(figure, filename, title):
Expand All @@ -1301,9 +1332,10 @@ def write_graph_file(figure, filename, title):
-------
Nothing
"""
delete_file(filename) # work around annoying 'already exists' bokeh msg
bio.output_file(filename=filename, title=title)
bio.save(figure)
delete_file(filename)
if figure:
bp.output_file(filename=filename, title=title)
bp.save(figure)


def isoelastic_utility_function(consumption, crra, cmin):
Expand Down Expand Up @@ -1482,15 +1514,11 @@ def read_egg_csv(fname, index_col=None):
return pandas DataFrame containing the data.
"""
try:
path_in_egg = os.path.join('taxcalc', fname)
vdf = pd.read_csv(
pkg_resources.resource_stream(
pkg_resources.Requirement.parse('taxcalc'),
path_in_egg),
index_col=index_col
)
path_in_egg = implibres.files('taxcalc').joinpath(fname)
with implibres.as_file(path_in_egg) as rname:
vdf = pd.read_csv(rname, index_col=index_col)
except Exception:
raise ValueError('could not read {} data from egg'.format(fname))
raise ValueError(f'could not read {fname} data from egg')
# cannot call read_egg_ function in unit tests
return vdf # pragma: no cover

Expand All @@ -1501,15 +1529,11 @@ def read_egg_json(fname):
return dictionary containing the data.
"""
try:
path_in_egg = os.path.join('taxcalc', fname)
pdict = json.loads(
pkg_resources.resource_stream(
pkg_resources.Requirement.parse('taxcalc'),
path_in_egg).read().decode('utf-8'),
object_pairs_hook=collections.OrderedDict
)
path_in_egg = implibres.files('taxcalc').joinpath(fname)
with implibres.as_file(path_in_egg) as rname:
vdf = json.loads(rname)
except Exception:
raise ValueError('could not read {} data from egg'.format(fname))
raise ValueError(f'could not read {fname} data from egg')
# cannot call read_egg_ function in unit tests
return pdict # pragma: no cover

Expand Down

0 comments on commit 4df16eb

Please sign in to comment.