-
Notifications
You must be signed in to change notification settings - Fork 0
/
combine_data.py
executable file
·92 lines (80 loc) · 2.94 KB
/
combine_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env python
from bs4 import BeautifulSoup
from datetime import datetime
import json
import os
import pandas as pd
import pytz
data_dir = 'data'
gps_path = os.path.join(data_dir, 'gps/')
dust_path = os.path.join(data_dir, 'dust/dusty.csv')
out_path_csv = os.path.join(data_dir, 'bm2019pm.csv')
out_path_geojsonp = os.path.join(data_dir, 'bm2019pm.geojsonp')
def get_gps_dataframe(gps_path):
locs = []
for filename in os.listdir(gps_path):
gpx = open('{}/{}'.format(gps_path, filename), 'r').read()
soup = BeautifulSoup(gpx, "lxml-xml")
locs.extend([
{
'lat': float(location.attrs['lat']),
'lon': float(location.attrs['lon']),
'time': location.time.text
} for location in soup.find_all('trkpt')
])
df_loc = pd.DataFrame(locs)
df_loc.index = pd.to_datetime(df_loc['time'])
df_loc.drop(axis=1, columns=['time'], inplace=True)
return df_loc
def get_dust_dataframe(dust_path):
df = pd.read_csv(dust_path)
df.index = pd.to_datetime(df['datetime'])
df.drop(axis=1, columns=['datetime'], inplace=True)
return df
def create_geojson_feature(row):
time = row['datetime']
lat = row['lat']
lon = row['lon']
pm25_value = round(float(row['pm2.5']), 1)
pm10_value = round(float(row['pm10']), 1)
data_type = "air_quality"
text = '{}<br>PM2.5: {}<br>PM10: {}'.format(time.isoformat(), pm25_value, pm10_value)
def _get_epoch_time_ms(this_time):
# Convert to Burning Man timezone then strip timezone info.
this_time = (
this_time.tz_localize('America/Los_Angeles').\
astimezone(pytz.UTC).replace(tzinfo=None)
)
return (this_time - datetime(1970,1,1,0,0,0)).total_seconds() * 1000.0
return {"type":"Feature",
"properties":
{
"pm25": pm25_value,
"time": _get_epoch_time_ms(time),
"iso_time": time.isoformat(),
"text": text,
},
"geometry":{
"type":"Point","coordinates":[lon, lat, 1]
},
}
def get_geojson_from_dataframe(df):
# Generate geojson
df['datetime'] = df.index
features = [create_geojson_feature(row) for index, row in df.iterrows()]
geojson = {
"type":"featurecollection","metadata": {},
"features": features
}
return 'eqfeed_callback({})'.format(json.dumps(geojson))
if __name__ == '__main__':
# Resample so that join works well
df_gps = get_gps_dataframe(gps_path).resample('60S').mean()
df_dust = get_dust_dataframe(dust_path).resample('60S').mean()
df = df_gps.join(df_dust)
# Slice for BM 2019 (remove test values)
df = df['2019-08-23':'2019-09-02'].dropna()
df.to_csv(out_path_csv, index_label='datetime')
geojson = get_geojson_from_dataframe(df)
with open(out_path_geojsonp, 'w') as outfile:
outfile.write(geojson)