Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get series with realtime #11

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,18 @@ For instance, there has been three observations (data points) for the GDP of 201

This means the GDP value for Q1 2014 has been released three times. First release was on 4/30/2014 for a value of 17149.6, and then there have been two revisions on 5/29/2014 and 6/25/2014 for revised values of 17101.3 and 17016.0, respectively.

If you pass realtime_start and/or realtime_end to `get_series`, you will get a pandas.DataFrame with a pandas.MultiIndex instead of a pandas.Series.

For instance, with observation_start and observation_end set to 2015-01-01 and
realtime_start set to 2015-01-01, one will get:
```
GDP
obs_date rt_start rt_end
2015-01-01 2015-04-29 2015-05-28 00:00:00 17710.0
2015-05-29 2015-06-23 00:00:00 17665.0
2015-06-24 9999-12-31 17693.3
```

### Get first data release only (i.e. ignore revisions)

```python
Expand Down Expand Up @@ -83,6 +95,7 @@ this outputs:
2014-04-01 17294.7
dtype: float64
```

### Get latest data known on a given date

```python
Expand Down
53 changes: 43 additions & 10 deletions fredapi/fred.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ def _parse(self, date_str, format='%Y-%m-%d'):
"""
helper function for parsing FRED date string into datetime
"""
if date_str == self.latest_realtime_end:
return None
rv = pd.to_datetime(date_str, format=format)
if hasattr(rv, 'to_datetime'):
rv = rv.to_datetime()
Expand All @@ -98,25 +100,35 @@ def get_series_info(self, series_id):
info = pd.Series(root.getchildren()[0].attrib)
return info

def get_series(self, series_id, observation_start=None, observation_end=None, **kwargs):
def get_series(self, series_id, observation_start=None,
observation_end=None, realtime_start=None,
realtime_end=None, **kwargs):
"""
Get data for a Fred series id. This fetches the latest known data, and is equivalent to get_series_latest_release()

Parameters
----------
series_id : str
Fred series id such as 'CPIAUCSL'
observation_start : datetime or datetime-like str such as '7/1/2014', optional
earliest observation date
observation_end : datetime or datetime-like str such as '7/1/2014', optional
latest observation date

observation_start : datetime or datetime-like str such as '7/1/2014'
earliest observation date (optional)
observation_end : datetime or datetime-like str such as '7/1/2014'
latest observation date (optional)
realtime_start : datetime or datetime-like str such as '7/1/2014'
earliest as-of date (optional)
realtime_end : datetime or datetime-like str such as '7/1/2014'
latest as-of date (optional)
kwargs : additional parameters
Any additional parameters supported by FRED. You can see https://api.stlouisfed.org/docs/fred/series_observations.html for the full list
Any additional parameters supported by FRED. You can see
https://api.stlouisfed.org/docs/fred/series_observations.html
for the full list

Returns
-------
data : Series
a Series where each index is the observation date and the value is the data for the Fred series
a pandas Series where each index is the observation date and the
value is the data for the Fred series
"""
url = "%s/series/observations?series_id=%s" % (self.root_url, series_id)
if observation_start is not None:
Expand All @@ -126,20 +138,41 @@ def get_series(self, series_id, observation_start=None, observation_end=None, **
if observation_end is not None:
observation_end = pd.to_datetime(observation_end, errors='raise')
url += '&observation_end=' + observation_end.strftime('%Y-%m-%d')
if realtime_start is not None:
realtime_start = pd.to_datetime(realtime_start, errors='raise')
url += '&realtime_start=' + realtime_start.strftime('%Y-%m-%d')
if realtime_end is not None:
realtime_end = pd.to_datetime(realtime_end, errors='raise')
url += '&realtime_end=' + realtime_end.strftime('%Y-%m-%d')
if kwargs.keys():
url += '&' + urlencode(kwargs)
root = self.__fetch_data(url)
if root is None:
raise ValueError('No data exists for series id: ' + series_id)
data = {}
realtime = (realtime_start or realtime_end)
values = []
obsdates = []
rtstarts = []
rtends = []
for child in root.getchildren():
val = child.get('value')
if val == self.nan_char:
val = float('NaN')
else:
val = float(val)
data[self._parse(child.get('date'))] = val
return pd.Series(data)
values.append(val)
obsdates.append(self._parse(child.get('date')))
if realtime:
rtstarts.append(self._parse(child.get('realtime_start')))
rtends.append(self._parse(child.get('realtime_end')))
if realtime:
names = ['obs_date', 'rt_start', 'rt_end']
index = pd.MultiIndex.from_arrays([obsdates, rtstarts, rtends],
names=names)
return pd.DataFrame(values, index=index, columns=[series_id])
else:
return pd.Series(values, index=obsdates)


def get_series_latest_release(self, series_id):
"""
Expand Down
91 changes: 75 additions & 16 deletions fredapi/tests/test_fred.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
import textwrap
import contextlib

import pandas as pd

import fredapi
import fredapi.fred

Expand Down Expand Up @@ -79,26 +77,32 @@ def __init__(self, rel_url, response=None, side_effect=None):
<seriess realtime_start="2015-07-19" realtime_end="2015-07-19"
order_by="series_id" sort_order="asc" count="6164"
offset="0" limit="1000">
<series id="PCPI01001" realtime_start="2015-07-19" realtime_end="2015-07-19"
<series id="PCPI01001" realtime_start="2015-07-19"
realtime_end="2015-07-19"
title="Per Capita Personal Income in Autauga County, AL"
observation_start="1969-01-01" observation_end="2013-01-01"
frequency="Annual" frequency_short="A" units="Dollars"
units_short="$" seasonal_adjustment="Not Seasonally Adjusted"
seasonal_adjustment_short="NSA" last_updated="2015-01-29 12:10:21-06"
seasonal_adjustment_short="NSA"
last_updated="2015-01-29 12:10:21-06"
popularity="0" notes="..." />
<series id="PCPI01003" realtime_start="2015-07-19" realtime_end="2015-07-19"
<series id="PCPI01003" realtime_start="2015-07-19"
realtime_end="2015-07-19"
title="Per Capita Personal Income in Baldwin County, AL"
observation_start="1969-01-01" observation_end="2013-01-01"
frequency="Annual" frequency_short="A" units="Dollars"
units_short="$" seasonal_adjustment="Not Seasonally Adjusted"
seasonal_adjustment_short="NSA" last_updated="2015-01-29 12:10:21-06"
seasonal_adjustment_short="NSA"
last_updated="2015-01-29 12:10:21-06"
popularity="0" notes="..." />
<series id="PCPI01005" realtime_start="2015-07-19" realtime_end="2015-07-19"
<series id="PCPI01005" realtime_start="2015-07-19"
realtime_end="2015-07-19"
title="Per Capita Personal Income in Barbour County, AL"
observation_start="1969-01-01" observation_end="2013-01-01"
frequency="Annual" frequency_short="A" units="Dollars"
units_short="$" seasonal_adjustment="Not Seasonally Adjusted"
seasonal_adjustment_short="NSA" last_updated="2015-01-29 12:10:21-06"
seasonal_adjustment_short="NSA"
last_updated="2015-01-29 12:10:21-06"
popularity="0" notes="..." />
<!-- more series come here, but not useful for the test... -->
</seriess>
Expand All @@ -120,6 +124,39 @@ def __init__(self, rel_url, response=None, side_effect=None):
last_updated="2015-06-05 08:47:20-05"
popularity="86" notes="..." />
</seriess>'''))
gdp_obs_rt_call = HTTPCall('series/observations?{}&{}&{}&{}'.
format('series_id=GDP',
'observation_start=2014-07-01',
'observation_end=2015-01-01',
'realtime_start=2014-07-01'),
response=textwrap.dedent('''\
<?xml version="1.0" encoding="utf-8" ?>
<observations realtime_start="2014-07-01" realtime_end="9999-12-31"
observation_start="2014-07-01" observation_end="2015-01-01"
units="lin" output_type="1" file_type="xml"
order_by="observation_date" sort_order="asc" count="9"
offset="0" limit="100000">
<observation realtime_start="2014-10-30" realtime_end="2014-11-24"
date="2014-07-01" value="17535.4"/>
<observation realtime_start="2014-11-25" realtime_end="2014-12-22"
date="2014-07-01" value="17555.2"/>
<observation realtime_start="2014-12-23" realtime_end="9999-12-31"
date="2014-07-01" value="17599.8"/>
<observation realtime_start="2015-01-30" realtime_end="2015-02-26"
date="2014-10-01" value="17710.7"/>
<observation realtime_start="2015-02-27" realtime_end="2015-03-26"
date="2014-10-01" value="17701.3"/>
<observation realtime_start="2015-03-27" realtime_end="9999-12-31"
date="2014-10-01" value="17703.7"/>
<observation realtime_start="2015-04-29" realtime_end="2015-05-28"
date="2015-01-01" value="17710.0"/>
<observation realtime_start="2015-05-29" realtime_end="2015-06-23"
date="2015-01-01" value="17665.0"/>
<observation realtime_start="2015-06-24" realtime_end="9999-12-31"
date="2015-01-01" value="17693.3"/>
</observations>
'''))



class TestFred(unittest.TestCase):
Expand Down Expand Up @@ -150,7 +187,6 @@ def setUp(self):
self.fake_fred_call = fake_fred_call
self.__original_urlopen = fredapi.fred.urlopen


def tearDown(self):
"""Cleanup."""
pass
Expand Down Expand Up @@ -230,11 +266,11 @@ def test_invalid_kwarg_in_get_series(self, urlopen):
"""Test invalid keyword argument in call to get_series."""
url = '{}/series?series_id=invalid&api_key={}'.format(self.root_url,
fred_api_key)
side_effect = fredapi.fred.HTTPError(url, 400, '', '', sys.stderr)
side_effect = fredapi.fred.HTTPError(url, 400, '', '', io.StringIO())
self.prepare_urlopen(urlopen, side_effect=side_effect)
with self.assertRaises(ValueError) as context:
self.fred.get_series('SP500',
observation_start='invalid-datetime-str')
# FIXME: different environment throw ValueError or TypeError.
with self.assertRaises(Exception):
self.fred.get_series('SP500', observation_start='invalid')
self.assertFalse(urlopen.called)

@mock.patch('fredapi.fred.urlopen')
Expand All @@ -249,12 +285,35 @@ def test_search(self, urlopen):
'seasonal_adjustment_short']])
expected = textwrap.dedent('''\
popularity observation_start seasonal_adjustment_short
series id
series id
PCPI01001 0 1969-01-01 NSA
PCPI01003 0 1969-01-01 NSA
PCPI01005 0 1969-01-01 NSA''')
for aline, eline in zip(actual.split('\n'), expected.split('\n')):
self.assertEqual(aline.strip(), eline.strip())
self.assertEqual(actual.split('\n'), expected.split('\n'))

@mock.patch('fredapi.fred.urlopen')
def test_get_series_with_realtime(self, urlopen):
"""Test get_series with realtime argument."""
side_effects = [gdp_obs_rt_call.response]
self.prepare_urlopen(urlopen, side_effect=side_effects)
df = self.fred.get_series('GDP', observation_start='7/1/2014',
observation_end='1/1/2015',
realtime_start='7/1/2014')
urlopen.assert_called_with(gdp_obs_rt_call.url)
actual = str(df)
expected = textwrap.dedent('''\
GDP
obs_date rt_start rt_end
2014-07-01 2014-10-30 2014-11-24 17535.4
2014-11-25 2014-12-22 17555.2
2014-12-23 NaT 17599.8
2014-10-01 2015-01-30 2015-02-26 17710.7
2015-02-27 2015-03-26 17701.3
2015-03-27 NaT 17703.7
2015-01-01 2015-04-29 2015-05-28 17710.0
2015-05-29 2015-06-23 17665.0
2015-06-24 NaT 17693.3''')
self.assertEqual(actual.split('\n'), expected.split('\n'))


if __name__ == '__main__':
Expand Down