PSLmodels · Amy-Xu · Dec 6, 2017 · Dec 7, 2017 · Dec 7, 2017 · Dec 7, 2017
diff --git a/cps_stage3/aggregates.txt b/cps_stage3/aggregates.txt
@@ -0,0 +1,27 @@
+Total benefits (billions)
+programs  2014  2015  2016  2017    2018    2019    2020    2021    2022    2023    2024
+      ss 849.2 901.7 935.5 976.7 1,049.1 1,124.0 1,203.6 1,283.0 1,372.0 1,468.1 1,570.3
+     ssi  54.1  54.8  54.9  55.3    57.0    58.9    60.9    62.8    64.8    66.9    69.1
+medicaid 368.6 412.8 384.1 380.7   392.3   391.6   391.7   391.2   391.1   390.9   391.0
+medicare 576.1 602.5 629.9 667.9   718.3   777.9   841.7   907.1   978.5 1,056.2 1,135.8
+      vb 146.8 152.3 158.2 159.0   159.8   160.7   161.7   162.7   163.8   164.9   166.2
+    snap  83.0  82.6  79.0  79.0    79.0    79.0    79.0    79.0    79.0    79.0    79.0
+
+Total participating tax units (millions)
+programs  2014  2015  2016  2017  2018  2019  2020  2021  2022  2023  2024
+      ss  44.6  45.2  45.8  46.5  47.1  47.8  48.5  49.1  49.9  50.6  51.3
+     ssi   6.8   6.8   6.8   6.9   6.9   7.0   6.9   7.0   7.0   7.1   7.1
+medicaid  27.9  29.7  30.8  31.2  31.7  32.5  32.9  33.4  33.8  34.3  34.8
+medicare  38.6  39.6  41.0  42.1  43.3  44.6  46.1  47.6  49.1  50.5  51.9
+      vb   4.9   4.9   4.6   4.6   4.6   4.6   4.6   4.6   4.6   4.6   4.6
+    snap  28.5  28.0  26.8  26.7  26.6  26.4  26.3  26.2  26.1  26.0  25.8
+
+Total participants (millions)
+programs  2014  2015  2016  2017  2018  2019  2020  2021  2022  2023  2024
+      ss  56.5  57.8  59.4  60.9  62.5  64.0  65.5  66.4  69.0  70.9  72.9
+     ssi   7.6   7.6   7.5   7.5   7.5   7.6   7.6   7.7   7.7   7.8   7.8
+medicaid  52.4  56.4  58.1  58.9  59.7  61.2  62.0  62.9  63.7  64.6  65.4
+medicare  49.6  50.7  52.4  53.8  55.4  57.0  58.7  60.4  62.2  63.8  65.5
+      vb   5.0   5.0   4.6   4.7   4.7   4.7   4.7   4.7   4.7   4.7   4.7
+    snap  43.0  42.2  40.8  40.8  40.8  40.8  40.8  40.8  40.8  40.8  40.8
+
diff --git a/cps_stage3/decile2015.csv b/cps_stage3/decile2015.csv
@@ -0,0 +1,11 @@
+2015_decile,ss_benefits,ss_taxunits,ss_average,ssi_benefits,ssi_taxunits,ssi_average,medicaid_benefits,medicaid_taxunits,medicaid_average,medicare_benefits,medicare_taxunits,medicare_average,vb_benefits,vb_taxunits,vb_average,snap_benefits,snap_taxunits,snap_average
+1.0,235648164574.3,11828872.7,19921.4,15446420820.6,1905326.1,8107.0,59859073657.8,4222520.3,14176.1,172522666333.8,10921985.5,15795.9,36577757385.0,1213400.6,30144.8,14599547226.0,5844984.0,2497.8
+2.0,241140803199.8,12047991.5,20015.0,15752573846.5,1994613.0,7897.6,82066261681.9,4168809.1,19685.8,186804345030.4,10958997.0,17045.8,32955416319.7,1025216.7,32144.8,13892541220.2,5804971.9,2393.2
+3.0,178323885867.7,9105893.6,19583.3,12778730438.7,1620057.0,7887.8,68752039985.8,4510126.1,15243.9,130756967800.6,8295777.6,15761.9,24826044904.9,787832.6,31511.8,12771442403.3,5127523.1,2490.8
+4.0,51941541201.6,2553402.2,20342.1,3151031056.1,392073.0,8036.8,49749892872.5,4130106.4,12045.7,28925090583.7,2106380.3,13732.1,6579280519.5,218581.0,30100.0,15534412690.7,4532945.8,3427.0
+5.0,40677288594.5,2082786.3,19530.2,2897197915.0,348485.8,8313.7,48182460457.7,3888821.7,12390.0,19615969305.1,1608914.5,12192.1,6757385855.2,226861.1,29786.4,13560422068.9,3822778.9,3547.3
+6.0,34212236748.2,1790789.2,19104.6,2368829570.4,289770.5,8174.8,37913747915.4,2989568.3,12682.0,16896754076.9,1367227.1,12358.4,7237855735.1,240994.4,30033.3,7453550856.0,1809022.0,4120.2
+7.0,34063165570.0,1693676.4,20112.0,1751105764.4,215732.5,8117.0,26736187395.1,2220411.7,12041.1,15458622689.6,1256250.2,12305.4,8096538973.5,268389.7,30167.1,3285332871.7,740924.7,4434.1
+8.0,33557347102.4,1669427.0,20101.1,600805574.6,69494.9,8645.3,18087766855.8,1583274.5,11424.3,14803643416.6,1247236.2,11869.2,10020136903.8,329978.9,30366.0,1260181184.6,247498.8,5091.7
+9.0,27446719012.6,1306899.4,21001.4,18709682.5,2015.5,9282.9,12981209061.1,1149838.7,11289.6,9036414388.8,996337.3,9069.6,10341242490.4,321022.8,32213.4,209709592.0,36699.4,5714.2
+10.0,24662603933.5,1082297.0,22787.3,0.0,0.0,0.0,8435236248.0,837370.0,10073.5,7633994376.3,853738.3,8941.8,8905775130.9,276498.0,32209.2,3008985.8,1575.0,1910.5
diff --git a/cps_stage3/tabs.txt b/cps_stage3/tabs.txt
@@ -0,0 +1,68 @@
+vb
+       2014    2015    2016    2017    2018    2019    2020    2021    2022    2023    2024
+0.0  440860  440860  441980  442081  442213  442327  442487  442570  442685  442755  442864
+1.0   15394   15394   14322   14222   14091   13978   13822   13741   13627   13557   13450
+2.0     210     210     162     161     160     159     156     154     153     153     151
+3.0       1       1       1       1       1       1       0       0       0       0       0
+
+ss
+       2014    2015    2016    2017    2018    2019    2020    2021    2022    2023    2024
+0.0  328317  326525  326111  325606  325403  325331  325242  325242  324970  324803  324728
+1.0   83781   84533   84328   84234   84211   84192   84185   84185   84139   84113   84097
+2.0   44108   44607   44462   44418   44394   44391   44389   44389   44366   44361   44340
+3.0     258     473     472     474     475     474     473     473     473     473     473
+4.0       1      68      68      68      69      69      69      69      66      65      65
+
+medicaid
+        2014    2015    2016    2017    2018    2019    2020    2021    2022    2023    2024
+0.0   392005  388485  387573  387573  387573  386957  386957  386957  386957  386957  386957
+1.0    32353   34347   34984   34984   34984   35390   35390   35390   35390   35390   35390
+2.0    16027   16313   16503   16503   16503   16602   16602   16602   16602   16602   16602
+3.0     7810    8183    8240    8240    8240    8271    8271    8271    8271    8271    8271
+4.0     4910    5337    5358    5358    5358    5408    5408    5408    5408    5408    5408
+5.0     2085    2346    2352    2352    2352    2371    2371    2371    2371    2371    2371
+6.0      775     871     872     872     872     877     877     877     877     877     877
+7.0      351     419     419     419     419     425     425     425     425     425     425
+8.0       86      97      97      97      97      96      96      96      96      96      96
+9.0       30      33      33      33      33      34      34      34      34      34      34
+11.0      20      21      21      21      21      21      21      21      21      21      21
+10.0      10       9       9       9       9       9       9       9       9       9       9
+12.0       2       3       3       3       3       3       3       3       3       3       3
+14.0       1       1       1       1       1       1       1       1       1       1       1
+
+medicare
+       2014    2015    2016    2017    2018    2019    2020    2021    2022    2023    2024
+0.0  344758  342573  339832  337965  336186  334385  332111  329739  327392  325338  323332
+1.0   71106   73109   75094   76324   77319   78511   80682   82909   85152   87003   88823
+2.0   40148   40310   41060   41693   42471   43073   43172   43311   43412   43605   43784
+3.0     349     369     359     363     369     376     380     386     388     395     402
+4.0      49      49      65      64      64      64      64      64      65      67      67
+7.0      31      31      31      31      31      31      31      31      31      32      32
+5.0      16      16      16      17      17      16      16      16      16      16      16
+6.0       7       7       7       7       7       8       8       8       8       8       8
+8.0       1       1       1       1       1       1       1       1       1       1       1
+
+snap
+        2014    2015    2016    2017    2018    2019    2020    2021    2022    2023    2024
+0.0   408953  410079  412791  413483  414187  414915  415626  416229  416892  417594  418304
+1.0    34020   33272   31160   30597   29967   29322   28736   28205   27631   26976   26322
+2.0     6932    6675    6251    6168    6113    6054    5952    5903    5853    5830    5796
+3.0     2811    2719    2608    2575    2563    2550    2534    2518    2503    2493    2477
+4.0     1919    1901    1859    1852    1848    1840    1836    1830    1810    1802    1798
+5.0     1083    1076    1056    1051    1050    1047    1044    1044    1042    1039    1037
+6.0      479     475     472     472     471     471     471     470     469     467     467
+7.0      175     175     175     174     174     174     174     174     173     172     172
+8.0       58      58      58      58      57      57      57      57      57      57      57
+9.0       26      26      26      26      26      26      26      26      26      26      26
+11.0       5       5       5       5       5       5       5       5       5       5       5
+10.0       3       3       3       3       3       3       3       3       3       3       3
+12.0       1       1       1       1       1       1       1       1       1       1       1
+
+ssi
+       2014    2015    2016    2017    2018    2019    2020    2021    2022    2023    2024
+0.0  443520  443520  443771  443995  444219  444219  444463  444463  444705  444705  444851
+1.0   11643   11643   11443   11262   11080   11080   10876   10876   10646   10646   10507
+2.0    1206    1206    1164    1134    1099    1099    1059    1059    1048    1048    1042
+3.0      89      89      81      69      62      62      62      62      61      61      60
+4.0       7       7       6       5       5       5       5       5       5       5       5
+
diff --git a/cps_stage3/test_cps_benefits.py b/cps_stage3/test_cps_benefits.py
@@ -0,0 +1,204 @@
+import sys
+import pandas as pd
+import numpy as np
+from pandas.util.testing import assert_frame_equal
+
+
+'''
+The tests in this script check distribution and aggregates for the benefit data.
+Whenever the benefit data gets updated, the tests would create new statistics
+and compare with the previous version. 
+
+This file needs three inputs: CPS weights (cps_weights.csv.gz), CPS tax unit
+database (cps.csv.gz), and CPS benefit (cps_benefits_extrap_full.csv.gz).
+The first two input files are in their corresponding folders as indicated
+in the code, but the third benefit file is different from the current version
+in that it also includes recipient information for each tax unit. Because this
+recipient per tax unit information is not used in the tax-calculator, we have
+dropped those variable to save space. But the recipients is essential for
+checking aggregates and distribution. So anyone who wants to run the tests needs
+to recreate the full version of benefit data.
+
+It is relatively simple to generate the full version:
+
+1. find the extrapolation.py in the current folder and comment out the six
+lines of code (line 310 - 315) that drop all recipients
+2. Rename the output to cps_benefits_extrap_full.csv.gz
+3. Run the extrapolation script with 'python extrapolation.py'
+
+The tests will create three new files for statistic summary:
+
+1. decile2015_new.csv: participation, total benefit and average benefit
+by wage decile
+2. aggregates_new.csv: total participation and benefits for each program
+from 2014 to 2026
+3. tabs_new.csv: tabulations of tax unit participation for each program
+from 2014 to 2026
+
+If all three files are exactly the same as the previous version, then the tests
+will pass. If the tests fail, compare the new version with the previous version
+carefully and then replace the previous with the new version if the difference is
+reasonable.
+
+'''
+
+
+programs = ['ss', 'ssi', 'medicaid', 'medicare', 'vb', 'snap']
+billion = 1e09
+million = 1e06
+delta = 1e-06
+
+def read_files():
+    ''' import weights, benefit, and raw cps file'''
+
+    # import from taxdata repo
+    # weights and wage are for 10-year and decile tables
+    weights = pd.read_csv('../cps_stage2/cps_weights.csv.gz', compression='gzip')
+    cps_income = pd.read_csv('../cps_data/cps.csv.gz',
+                             compression='gzip')[['e00200', 's006', 'RECID']]
+    # the benefit file that includes both benefits and recipients
+    cps_benefit = pd.read_csv('cps_benefits_extrap_full.csv.gz')
+
+    assert len(cps_income) == len(weights)
+
+    # merge all essential variables
+    cps = cps_income.merge(cps_benefit, on='RECID', how='left')
+    cps.fillna(0, inplace=True)
+    cps = cps.join(weights/100)
+
+    # rename to facilitate for loops
+    cps.rename(columns={'s006': 'WT2014'}, inplace=True)
+
+    # create decile ranks by wage
+    cps = cps.sort_values(by='e00200')
+    cps['WT2015_cumsum'] = cps.WT2015.cumsum()
+    cps['WT2015_decile'] = np.ceil(cps.WT2015_cumsum/(max(cps.WT2015_cumsum)/9.99))
+
+    return cps
+
+cps = read_files()
+
+def test_decile_dist():
+
+    ''' total participation, total benefits and average benefits
+        by decile
+    '''
+    benefits_vars = [x + '_benefits_2015' for x in programs]
+    p_vars = [x + '_recipients_2015' for x in programs]
+
+    decile2015 = pd.DataFrame(np.linspace(1,10, num=10), columns=['2015_decile'])
+
+    for i in range(6):
+
+        # create weighted benefit
+        cps[benefits_vars[i] + '_weighted'] = cps[benefits_vars[i]] * cps['WT2015']
+
+        # temporary variable for weighted participation
+        cps['dummy'] = np.where(cps[p_vars[i]]!=0, cps['WT2015'], 0)
+
+        # calculate total benefits, participation (# tax units), and average per decile
+        variables = [benefits_vars[i] + '_weighted', 'dummy']
+        bp = cps[variables].groupby(cps.WT2015_decile, as_index=False).sum()
+
+
+        bp['average'] = bp[benefits_vars[i] + '_weighted']/(bp['dummy'] + delta)
+
+        # rename and save
+        bp.columns = [programs[i]+'_benefits', programs[i]+'_taxunits', programs[i]+'_average']
+        decile2015 = pd.concat([decile2015, bp], axis=1)
+
+        decile2015.to_csv('decile2015_new.csv', float_format='%.1f', index=False)
+
+    decile_old = pd.read_csv('decile2015.csv')
+    assert_frame_equal(decile2015.round(1), decile_old)
+
+
+def test_aggregates():
+
+    '''total individual & taxunit participation, total benefits from 2014-2026'''
+
+    benefits = pd.DataFrame(programs, columns=['programs'])
+    taxunits = pd.DataFrame(programs, columns=['programs'])
+    participants = pd.DataFrame(programs, columns=['programs'])
+
+    for year in range(2014, 2025):
+        #benefits
+        benefits_vars = [x + '_benefits_' + str(year) for x in programs]
+        raw_benefits = cps.loc[:,benefits_vars]
+        weighted_benefits = raw_benefits.multiply(cps['WT' + str(year)], axis='index')
+        benefit_total = pd.DataFrame(weighted_benefits.sum()/billion)
+        benefits[year] = benefit_total.values
+
+        #participants
+        p_vars = [x + '_recipients_'+ str(year) for x in programs]
+        raw_participants = cps.loc[:, p_vars]
+        weighted_par = raw_participants.multiply(cps['WT' + str(year)], axis='index')
+        participant_total = pd.DataFrame(weighted_par.sum()/million)
+        participants[year] = participant_total.values
+
+        # tax units
+        dummy = raw_participants.astype(bool)
+        weighted_taxunits = dummy.multiply(cps['WT' + str(year)], axis='index')
+        taxunit_total = pd.DataFrame(weighted_taxunits.sum()/million)
+        taxunits[year] = taxunit_total.values
+
+    pd.options.display.float_format = '{:,.1f}'.format
+    with open('aggregates_new.txt', 'w') as file:
+        file.write("Total benefits (billions)\n" + benefits.to_string(index=False) + '\n\n')
+        file.write('Total participating tax units (millions)\n' + taxunits.to_string(index=False) + '\n\n')
+        file.write('Total participants (millions)\n' + participants.to_string(index=False) + '\n\n')
+
+    # import the current version
+    agg_old = pd.read_csv('aggregates.txt', delim_whitespace=True, skiprows=[0,9,18], thousands=',')
+    agg_old.columns = ['programs'] + list(range(2014, 2025))
+
+    benefits_old = agg_old.loc[0:5]
+    assert_frame_equal(benefits.round(1), benefits_old)
+
+    taxunits_old = agg_old.loc[7:12].reset_index().drop(['index'], axis=1)
+    assert_frame_equal(taxunits.round(1), taxunits_old)
+
+    participants_old = agg_old.loc[14:19].reset_index().drop(['index'], axis=1)
+    assert_frame_equal(participants.round(1), participants_old)
+
+
+def test_tabs():
+
+    ''' tabulation of number of participants per tax unit from 2014 to 2026'''
+
+    tabs = {}
+
+    # inline function to create single year program tabulation
+    p_tab = lambda program: cps[program].value_counts()
+
+    for program in programs:
+        program_tab = {}
+        for year in range(2014, 2025): 
+            program_tab[year] = p_tab(program+"_recipients_"+str(year))
+            program_tab = pd.DataFrame(program_tab)
+            program_tab.fillna(0, inplace=True)
+        tabs[program] = program_tab.astype(int)
+
+    with open('tabs_new.txt', 'w') as file:
+        for key, dfs in tabs.iteritems():
+            file.write(key + '\n')
+            file.write(dfs.to_string() + '\n\n')
+
+    tabs_old = pd.read_csv('tabs.txt', delim_whitespace=True,
+                           names=['index'] + list(range(2014, 2025)))
+    tabs_old = tabs_old[tabs_old['index']!='2014']
+
+    for program in programs:
+
+        unitmax = len(tabs[program])
+        start_row = (tabs_old.index[tabs_old['index']==program] + 1).values[0]
+        end_row = start_row + unitmax
+
+        participation_old = tabs_old.loc[start_row: end_row]
+        participation_old = participation_old.reset_index().drop(['level_0'], axis=1)
+
+        assert_frame_equal(participation_old.astype(float),
+                           tabs[program].reset_index().astype(float),
+                           check_column_type=False, check_index_type=False)
+
+