/
preprocessGas.py
executable file
·102 lines (66 loc) · 3.01 KB
/
preprocessGas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# Author = Joseph Cameron
# preprocessGas.py
# Saves clean gas csv data, with boiler efficiency and COP values accounted for.
# USAGE
# python preprocessGas.py gasData.csv
# --------------------------------------------------
# IMPORT STATEMENTS
import pandas as pd
from datetime import datetime
import csv
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pickle
import sys
# ---------------------------------------------------------------------
# READ AND MODEL GAS DATA
headers = ['Time', 'Sensor Value']
# Read .csv while getting rid of IDEAL data strings surrounding the data on the first and/or last rows
# Be careful with skiprows and skipfooter, maybe remove them
gas_data = pd.read_csv(sys.argv[1], parse_dates = {"Datetime" : [0]}, names = headers, skiprows = 1, skipfooter = 1)
# Formatting dates
gas_data['Time'] = gas_data['Datetime'].map(lambda x: datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S'))
def boilerFunc(x):
return x * 0.9
def copFunc(x):
return x / 2.1
# Apply Boiler Efficiency of 90% to obtain useful heat
gas_data['Sensor Value'] = gas_data['Sensor Value'].apply(boilerFunc)
# Apply COP to obtain the ASHP electricity needs to provide the same useful heat given by a boiler
gas_data['Sensor Value'] = gas_data['Sensor Value'].apply(copFunc)
# Round the Datetimes to the nearest 5 minutes, in order to match electricity data
gas_data['Time'] = gas_data['Time'].dt.round('5min')
# Sum gas data within 5 minute buckets
gas_data = gas_data.reset_index().set_index('Time')
gas_data = gas_data.resample('5T').sum()
# ----------------------------------------------------------------------
# DELETE OUTLIERS
# First, remove readings that are negative, as this is clearly due to a sensor fault
gas_data = gas_data[gas_data["Sensor Value"] > 0]
# Secondly, remove readings that are unrealistically large, as this is also clearly due to sensor fault
# Assume that only points within the top percentile are too large
gas_data = gas_data[gas_data["Sensor Value"] < (gas_data["Sensor Value"].quantile(0.99) * 4)]
# ----------------------------------------------------------------------
# SAVE CSV
gas_data.to_csv('Clean-Gas-Data/' + sys.argv[1][:4] + '_clean.csv', columns=['Sensor Value'], index=True)
# ----------------------------------------------------------------------
# SAVE/SHOW RESULTS
# Plot
#gas_data.plot(y='Sensor Value')
# Variable used to create pickles
#pickleVar = ideal_data
# Properly format the x-labels
#plt.gcf().autofmt_xdate()
# Y axis label
#plt.ylabel("Watt Hours")
# Title
#plt.title(sys.argv[1][:4] + " Clean Gas Data")
#legendText = plt.legend()
#quantileVal = ideal_data["Energy"].quantile(0.99)
#legendText.get_texts()[0].set_text(quantileVal)
# Toggle comment if figure should be shown first
#plt.show()
# Saving Pickle for future interactivity in the 'Pickles' directory
#pickle.dump(pickleVar, file('Pickles/' + sys.argv[1] + '_Figure.pickle','w'))
# Save Figure in the 'Figures' directory
#plt.savefig('EnergyFigures/' + sys.argv[1][:4] + 'Energy_Figure.png', dpi=1000)