Skip to content

Commit

Permalink
Improved handling of realtime parameters in get_series().
Browse files Browse the repository at this point in the history
Arguments realtime_start and realtime_end in get_series() now cause a
pandas.DataFrame to be returned with pandas.MultiIndex for realtime
data.  Added simple test for the new feature and documentation.

Added __init__.py in fredapi.tests so it's correctly interpreted as
a package. Now we could revert to python setup.py test in .travis.yml.

Fixed test_invalid_kwarg_in_get_series() as we sometimes get a
TypeError and sometimes a ValueError. Seems that pandas passes through
whatever exception it gets, might be a good reason for this so we
follow the same policy.

Simplified comparison of dataframe output in tests.
  • Loading branch information
elmotec committed Oct 19, 2015
1 parent fb61173 commit aa1db9b
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 18 deletions.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,18 @@ For instance, there has been three observations (data points) for the GDP of 201

This means the GDP value for Q1 2014 has been released three times. First release was on 4/30/2014 for a value of 17149.6, and then there have been two revisions on 5/29/2014 and 6/25/2014 for revised values of 17101.3 and 17016.0, respectively.

If you pass realtime_start and/or realtime_end to `get_series`, you will get a pandas.DataFrame with a pandas.MultiIndex instead of a pandas.Series.

For instance, with observation_start and observation_end set to 2015-01-01 and
realtime_start set to 2015-01-01, one will get:
```
GDP
obs_date rt_start rt_end
2015-01-01 2015-04-29 2015-05-28 00:00:00 17710.0
2015-05-29 2015-06-23 00:00:00 17665.0
2015-06-24 9999-12-31 17693.3
```

### Get first data release only (i.e. ignore revisions)

```python
Expand Down Expand Up @@ -83,6 +95,7 @@ this outputs:
2014-04-01 17294.7
dtype: float64
```

### Get latest data known on a given date

```python
Expand Down
51 changes: 41 additions & 10 deletions fredapi/fred.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,25 +98,35 @@ def get_series_info(self, series_id):
info = pd.Series(root.getchildren()[0].attrib)
return info

def get_series(self, series_id, observation_start=None, observation_end=None, **kwargs):
def get_series(self, series_id, observation_start=None,
observation_end=None, realtime_start=None,
realtime_end=None, **kwargs):
"""
Get data for a Fred series id. This fetches the latest known data, and is equivalent to get_series_latest_release()
Parameters
----------
series_id : str
Fred series id such as 'CPIAUCSL'
observation_start : datetime or datetime-like str such as '7/1/2014', optional
earliest observation date
observation_end : datetime or datetime-like str such as '7/1/2014', optional
latest observation date
observation_start : datetime or datetime-like str such as '7/1/2014'
earliest observation date (optional)
observation_end : datetime or datetime-like str such as '7/1/2014'
latest observation date (optional)
realtime_start : datetime or datetime-like str such as '7/1/2014'
earliest as-of date (optional)
realtime_end : datetime or datetime-like str such as '7/1/2014'
latest as-of date (optional)
kwargs : additional parameters
Any additional parameters supported by FRED. You can see https://api.stlouisfed.org/docs/fred/series_observations.html for the full list
Any additional parameters supported by FRED. You can see
https://api.stlouisfed.org/docs/fred/series_observations.html
for the full list
Returns
-------
data : Series
a Series where each index is the observation date and the value is the data for the Fred series
a pandas Series where each index is the observation date and the
value is the data for the Fred series
"""
url = "%s/series/observations?series_id=%s" % (self.root_url, series_id)
if observation_start is not None:
Expand All @@ -126,20 +136,41 @@ def get_series(self, series_id, observation_start=None, observation_end=None, **
if observation_end is not None:
observation_end = pd.to_datetime(observation_end, errors='raise')
url += '&observation_end=' + observation_end.strftime('%Y-%m-%d')
if realtime_start is not None:
realtime_start = pd.to_datetime(realtime_start, errors='raise')
url += '&realtime_start=' + realtime_start.strftime('%Y-%m-%d')
if realtime_end is not None:
realtime_end = pd.to_datetime(realtime_end, errors='raise')
url += '&realtime_end=' + realtime_end.strftime('%Y-%m-%d')
if kwargs.keys():
url += '&' + urlencode(kwargs)
root = self.__fetch_data(url)
if root is None:
raise ValueError('No data exists for series id: ' + series_id)
data = {}
realtime = (realtime_start or realtime_end)
values = []
obsdates = []
rtstarts = []
rtends = []
for child in root.getchildren():
val = child.get('value')
if val == self.nan_char:
val = float('NaN')
else:
val = float(val)
data[self._parse(child.get('date'))] = val
return pd.Series(data)
values.append(val)
obsdates.append(self._parse(child.get('date')))
if realtime:
rtstarts.append(self._parse(child.get('realtime_start')))
rtends.append(self._parse(child.get('realtime_end')))
if realtime:
names = ['obs_date', 'rt_start', 'rt_end']
index = pd.MultiIndex.from_arrays([obsdates, rtstarts, rtends],
names=names)
return pd.DataFrame(values, index=index, columns=[series_id])
else:
return pd.Series(values, index=obsdates)


def get_series_latest_release(self, series_id):
"""
Expand Down
70 changes: 62 additions & 8 deletions fredapi/tests/test_fred.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
import textwrap
import contextlib

import pandas as pd

import fredapi
import fredapi.fred

Expand Down Expand Up @@ -120,6 +118,39 @@ def __init__(self, rel_url, response=None, side_effect=None):
last_updated="2015-06-05 08:47:20-05"
popularity="86" notes="..." />
</seriess>'''))
gdp_obs_rt_call = HTTPCall('series/observations?{}&{}&{}&{}'.
format('series_id=GDP',
'observation_start=2014-07-01',
'observation_end=2015-01-01',
'realtime_start=2014-07-01'),
response=textwrap.dedent('''\
<?xml version="1.0" encoding="utf-8" ?>
<observations realtime_start="2014-07-01" realtime_end="9999-12-31"
observation_start="2014-07-01" observation_end="2015-01-01"
units="lin" output_type="1" file_type="xml"
order_by="observation_date" sort_order="asc" count="9"
offset="0" limit="100000">
<observation realtime_start="2014-10-30" realtime_end="2014-11-24"
date="2014-07-01" value="17535.4"/>
<observation realtime_start="2014-11-25" realtime_end="2014-12-22"
date="2014-07-01" value="17555.2"/>
<observation realtime_start="2014-12-23" realtime_end="9999-12-31"
date="2014-07-01" value="17599.8"/>
<observation realtime_start="2015-01-30" realtime_end="2015-02-26"
date="2014-10-01" value="17710.7"/>
<observation realtime_start="2015-02-27" realtime_end="2015-03-26"
date="2014-10-01" value="17701.3"/>
<observation realtime_start="2015-03-27" realtime_end="9999-12-31"
date="2014-10-01" value="17703.7"/>
<observation realtime_start="2015-04-29" realtime_end="2015-05-28"
date="2015-01-01" value="17710.0"/>
<observation realtime_start="2015-05-29" realtime_end="2015-06-23"
date="2015-01-01" value="17665.0"/>
<observation realtime_start="2015-06-24" realtime_end="9999-12-31"
date="2015-01-01" value="17693.3"/>
</observations>
'''))



class TestFred(unittest.TestCase):
Expand Down Expand Up @@ -232,9 +263,9 @@ def test_invalid_kwarg_in_get_series(self, urlopen):
fred_api_key)
side_effect = fredapi.fred.HTTPError(url, 400, '', '', sys.stderr)
self.prepare_urlopen(urlopen, side_effect=side_effect)
with self.assertRaises(ValueError) as context:
self.fred.get_series('SP500',
observation_start='invalid-datetime-str')
# FIXME: different environment throw ValueError or TypeError.
with self.assertRaises(Exception):
self.fred.get_series('SP500', observation_start='invalid')
self.assertFalse(urlopen.called)

@mock.patch('fredapi.fred.urlopen')
Expand All @@ -249,12 +280,35 @@ def test_search(self, urlopen):
'seasonal_adjustment_short']])
expected = textwrap.dedent('''\
popularity observation_start seasonal_adjustment_short
series id
series id
PCPI01001 0 1969-01-01 NSA
PCPI01003 0 1969-01-01 NSA
PCPI01005 0 1969-01-01 NSA''')
for aline, eline in zip(actual.split('\n'), expected.split('\n')):
self.assertEqual(aline.strip(), eline.strip())
self.assertEqual(actual.split('\n'), expected.split('\n'))

@mock.patch('fredapi.fred.urlopen')
def test_get_series_with_realtime(self, urlopen):
"""Test get_series with realtime argument."""
side_effects = [gdp_obs_rt_call.response]
self.prepare_urlopen(urlopen, side_effect=side_effects)
df = self.fred.get_series('GDP', observation_start='7/1/2014',
observation_end='1/1/2015',
realtime_start='7/1/2014')
urlopen.assert_called_with(gdp_obs_rt_call.url)
actual = str(df)
expected = textwrap.dedent('''\
GDP
obs_date rt_start rt_end
2014-07-01 2014-10-30 2014-11-24 00:00:00 17535.4
2014-11-25 2014-12-22 00:00:00 17555.2
2014-12-23 9999-12-31 17599.8
2014-10-01 2015-01-30 2015-02-26 00:00:00 17710.7
2015-02-27 2015-03-26 00:00:00 17701.3
2015-03-27 9999-12-31 17703.7
2015-01-01 2015-04-29 2015-05-28 00:00:00 17710.0
2015-05-29 2015-06-23 00:00:00 17665.0
2015-06-24 9999-12-31 17693.3''')
self.assertEqual(actual.split('\n'), expected.split('\n'))


if __name__ == '__main__':
Expand Down

0 comments on commit aa1db9b

Please sign in to comment.