-
Notifications
You must be signed in to change notification settings - Fork 11
/
data_transfer.py
101 lines (76 loc) · 2.43 KB
/
data_transfer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
To run our analyzes, the KxY backend needs your data. The methods below are the only methods involved in sharing your data with us. The :code:`kxy` package only uploads your data `if` and `when` needed.
"""
import hashlib
import logging
logging.basicConfig(level=logging.INFO)
from time import time
import requests
import pandas as pd
from .client import APIClient
UPLOADED_FILES = {}
def generate_upload_url(identifier):
"""
Requests a pre-signed URL to upload a dataset.
Parameters
----------
identifier: str
A string that uniquely identifies the content of the file.
Returns
-------
d : dict or None
The dictionary containing the pre-signed url.
"""
api_response = APIClient.route(
path='/wk/generate-signed-upload-url', method='POST',\
file_identifier=identifier, timestamp=int(time()))
if api_response.status_code == requests.codes.ok:
api_response = api_response.json()
if 'presigned_url' in api_response:
presigned_url = api_response['presigned_url']
return presigned_url
elif api_response.get('file_already_exists', False):
logging.debug('This file was previously uploaded.')
return {}
else:
return None
else:
api_response = api_response.json()
if 'message' in api_response:
logging.warning("\n%s" % api_response['message'])
return None
def upload_data(df):
"""
Updloads a dataframe to kxy servers.
Parameters
----------
df: pd.DataFrame
The dataframe to upload.
Returns
-------
d : bool
Whether the upload was successful.
"""
identifier = hashlib.sha256(df.to_string().encode()).hexdigest()
if UPLOADED_FILES.get(identifier, False):
logging.debug('The file with identifier %s was previously uplooaded' % identifier)
return identifier
presigned_url = generate_upload_url(identifier)
if presigned_url is None:
return None
if presigned_url == {}:
UPLOADED_FILES[identifier] = True
return identifier
file_name = identifier + '.csv'
files = {'file': (file_name, df.to_csv(index=False))}
url = presigned_url['url']
data = presigned_url['fields']
upload_response = requests.post(url, data=data, files=files)
if upload_response.status_code in [requests.codes.ok, requests.codes.created, requests.codes.accepted, requests.codes.no_content]:
UPLOADED_FILES[identifier] = True
return identifier
else:
logging.warning('Failed to upload the file. Received status code %s.' % (upload_response.status_code))
return None