Skip to content

Commit

Permalink
Added more data vendor customization
Browse files Browse the repository at this point in the history
  • Loading branch information
saeedamen committed May 20, 2022
1 parent 5f619b3 commit da481ad
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 19 deletions.
3 changes: 3 additions & 0 deletions README.md
Expand Up @@ -115,6 +115,7 @@ individual data providers)

# Release Notes

* 0.1.27 - findatapy (20 May 2022)
* 0.1.26 - findatapy (07 Oct 2021)
* 0.1.25 - findatapy (07 Oct 2021)
* 0.1.24 - findatapy (06 Oct 2021)
Expand All @@ -133,6 +134,8 @@ individual data providers)

# Coding log

* 20 May 2022
* Added more customisation for data vendors
* 25 Jan 2022
* Fixed delete key problem with Redis
* 20 Jan 2022
Expand Down
54 changes: 38 additions & 16 deletions findatapy/market/ioengine.py
Expand Up @@ -17,6 +17,7 @@

import io
import datetime
import json
from dateutil.parser import parse

import codecs
Expand Down Expand Up @@ -316,15 +317,17 @@ def write_time_series_cache_to_disk(self, fname, data_frame,
3)

if mem_float < 500:
ser = io.BytesIO()


if use_cache_compression:
ser = io.BytesIO()
data_frame.to_pickle(ser,
compression="gzip")
ser.seek(0)

r.set('comp_' + fname, ser.read())
else:
ser = io.BytesIO()
data_frame.to_pickle(ser)
ser.seek(0)

Expand Down Expand Up @@ -1031,20 +1034,8 @@ def to_parquet(self, df, path, filename=None, cloud_credentials=None,
"""
logger = LoggerManager.getLogger(__name__)

if cloud_credentials is None: cloud_credentials = DataConstants().cloud_credentials

if isinstance(path, list):
pass
else:
path = [path]

if filename is not None:
new_path = []

for p in path:
new_path.append(self.path_join(p, filename))

path = new_path
path, cloud_credentials = self._get_cloud_path(
path, filename=filename, cloud_credentials=cloud_credentials)

constants = DataConstants()

Expand Down Expand Up @@ -1283,7 +1274,7 @@ def to_csv_parquet(self, df, path, filename=None, cloud_credentials=None,
parquet_compression=parquet_compression,
use_pyarrow_directly=use_pyarrow_directly)

def to_csv(self, df, path, filename=None, cloud_credentials=None):
def _get_cloud_path(self, path, filename=None, cloud_credentials=None):
if cloud_credentials is None: cloud_credentials = constants.cloud_credentials

if isinstance(path, list):
Expand All @@ -1299,6 +1290,13 @@ def to_csv(self, df, path, filename=None, cloud_credentials=None):

path = new_path

return path, cloud_credentials

def to_csv(self, df, path, filename=None, cloud_credentials=None):

path, cloud_credentials = self._get_cloud_path(
path, filename=filename, cloud_credentials=cloud_credentials)

for p in path:
if "s3://" in p:
s3 = self._create_cloud_filesystem(cloud_credentials,
Expand All @@ -1312,6 +1310,30 @@ def to_csv(self, df, path, filename=None, cloud_credentials=None):
else:
df.to_csv(p)

def to_json(self, dictionary, path, filename=None, cloud_credentials=None):

path, cloud_credentials = self._get_cloud_path(
path, filename=filename, cloud_credentials=cloud_credentials)

for p in path:
if "s3://" in p:
s3 = self._create_cloud_filesystem(cloud_credentials,
's3_filesystem')

path_in_s3 = self.sanitize_path(p).replace("s3://", "")

# Use 'w' for py3, 'wb' for py2
with s3.open(path_in_s3, 'w') as f:
if isinstance(dictionary, dict):
json.dump(dictionary, f, indent=4)
else:
dictionary.to_json(f)
else:
if isinstance(dictionary, dict):
json.dump(dictionary, p, indent=4)
elif isinstance(dictionary, pd.DataFrame):
dictionary.to_json(p)

def path_exists(self, path, cloud_credentials=None):
if cloud_credentials is None: cloud_credentials = constants.cloud_credentials

Expand Down
2 changes: 2 additions & 0 deletions findatapy/market/marketdatagenerator.py
Expand Up @@ -162,6 +162,8 @@ def get_data_vendor(self, md_request):
data_vendor = DataVendorHuobi()
elif data_source in self._data_vendor_dict:
data_vendor = self._data_vendor_dict[data_source]
elif data_source in md_request.data_vendor_custom:
data_vendor = md_request.data_vendor_custom[data_source]
else:
logger.warn(str(data_source) +
" is an unrecognized data source")
Expand Down
8 changes: 6 additions & 2 deletions findatapy/market/marketdatarequest.py
Expand Up @@ -79,7 +79,8 @@ def generate_key(self):
["logger",
"_MarketDataRequest__abstract_curve",
"_MarketDataRequest__cache_algo",
"_MarketDataRequest__overrides"]) \
"_MarketDataRequest__overrides",
"_MarketDataRequest__data_vendor_custom"]) \
+ "_df"

def __init__(self, data_source=None,
Expand Down Expand Up @@ -110,7 +111,8 @@ def __init__(self, data_source=None,
eikon_api_key=data_constants.eikon_api_key,
push_to_cache=True,
overrides={},
freeform_md_request={}):
freeform_md_request={},
data_vendor_custom=data_constants.data_vendor_custom):

# Can deep copy MarketDataRequest (use a lock, so can be used with
# threading when downloading time series)
Expand Down Expand Up @@ -174,6 +176,7 @@ def __init__(self, data_source=None,
copy.deepcopy(md_request.freeform_md_request)

self.tickers = copy.deepcopy(md_request.tickers) # Need this after category in case have wildcard
self.data_vendor_custom = copy.deepcopy(md_request.data_vendor_custom)
else:
self.freq_mult = freq_mult

Expand Down Expand Up @@ -228,6 +231,7 @@ def __init__(self, data_source=None,
self.tickers = vendor_tickers

self.old_tickers = self.tickers
self.data_vendor_custom = data_vendor_custom

def __str__(self):
return "MarketDataRequest summary - " + self.generate_key()
Expand Down
2 changes: 2 additions & 0 deletions findatapy/util/dataconstants.py
Expand Up @@ -271,6 +271,8 @@ class DataConstants(object):
default_data_environment = 'backtest'
possible_data_environment = ['backtest', 'prod']

data_vendor_custom = {}

# Overwrite field variables with those listed in DataCred or user provided dictionary override_fields
def __init__(self, override_fields={}):
try:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -5,7 +5,7 @@
tickers, using configuration files. There is also functionality which is particularly useful for those downloading FX market data."""

setup(name='findatapy',
version='0.1.26',
version='0.1.27',
description='Market data library',
author='Saeed Amen',
author_email='saeed@cuemacro.com',
Expand Down

0 comments on commit da481ad

Please sign in to comment.