/
showTimeGaps.py
executable file
·137 lines (97 loc) · 4.11 KB
/
showTimeGaps.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# Author = Joseph Cameron
# showTimeGaps.py
# Looks for time gaps in data collection, and produces a graph to show how many gaps of varying sizes occur within the dataset.
# USAGE
# python showTimeGaps.py gasData.csv
# --------------------------------------------------
# IMPORT STATEMENTS
import pandas as pd
from datetime import datetime
import csv
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pickle
import sys
import glob
import numpy as np
from numpy import diff
from matplotlib.pyplot import cm
# ---------------------------------------------------------------------
# Files within this directory
files = sorted(glob.glob('*.csv'))
# Line colours
colors = iter(cm.rainbow(np.linspace(0,1,23)))
overallCounts = []
for f in files:
# READ AND MODEL GAS DATA
headers = ['Time', 'Energy']
# Read .csv while getting rid of IDEAL data strings surrounding the data on the first and/or last rows
# Be careful with skiprows and skipfooter, maybe remove them
gas_data = pd.read_csv(f, parse_dates = {"Datetime" : [0]}, names = headers, skiprows = 1, skipfooter = 1)
# Formatting dates
gas_data['Time'] = gas_data['Datetime'].map(lambda x: datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S'))
# ----------------------------------------------------------------------
# DELETE OUTLIERS
# First, remove readings that are negative, as this is clearly due to a sensor fault
gas_data = gas_data[gas_data["Energy"] > 0]
# Secondly, remove readings that are unrealistically large, as this is also clearly due to sensor fault
# Assume that only points within the top percentile are too large
gas_data = gas_data[gas_data["Energy"] < (gas_data["Energy"].quantile(0.99) * 4)]
# ----------------------------------------------------------------------
# SOLVE TIME GAPS
gaps = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,2,3,4,5,6,7,8,9,10,20,30,40,50,60,120,180,240,300,360,420,480,540,600,1200,1800,2400,3000,3600,4200,4800,5400,6000,12000,18000,24000,30000,36000,48000,54000,60000]
counts = []
for x in gaps:
gas_data['OVER 5 MINS'] = (gas_data['Time'].diff()).dt.seconds > x
#val = None
#count = 0
#for index in range(len(gas_data["Time"])):
#if gas_data.iloc[index]["OVER 5 MINS"] == True:
#val = gas_data.iloc[index]["Time"] - gas_data.iloc[index-1]["Time"]
#gas_data.iloc[index]["Energy"] = gas_data.iloc[index]["Energy"] / val
#count = count + 1
counts.append(sum(gas_data['OVER 5 MINS']))
# ----------------------------------------------------------------------
# SAVE ENERGY DEMAND CSV
# Save new CSV of energy data
#ideal_data.to_csv('EnergyCSV/' + sys.argv[1][:4] + 'Energy.csv', columns = ['Datetime', 'Energy'], index=False)
# ----------------------------------------------------------------------
# SAVE/SHOW RESULTS
#x = gaps
#y = counts
#dy = diff(y)
#x = x[:-1]
#dy = np.absolute(dy)
#plt.plot(x,dy,color=next(colors),label=f)
overallCounts.append(counts)
x = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,2,3,4,5,6,7,8,9,10,20,30,40,50,60,120,180,240,300,360,420,480,540,600,1200,1800,2400,3000,3600,4200,4800,5400,6000,12000,18000,24000,30000,36000,48000,54000,60000]
y = np.array(overallCounts)
y = y.mean(axis=0)
dy = diff(y)
x = x[:-1]
dy = np.absolute(dy)
plt.plot(x,dy)
plt.yscale('log')
plt.xscale('log')
plt.ylabel('Count')
plt.xlabel('Gaps (seconds)')
plt.title('Number of Time Gaps > Time Gap Value')
#plt.legend()
plt.grid()
plt.show()
# Plot
#gas_data.plot(x='Datetime', y='Energy')
# Variable used to create pickles
#pickleVar = ideal_data
# Properly format the x-labels
#plt.gcf().autofmt_xdate()
# Y axis label
#plt.ylabel("Watt Hours")
# Title
#plt.title(sys.argv[1][:4] + " Energy Data")
# Toggle comment if figure should be shown first
#plt.show()
# Saving Pickle for future interactivity in the 'Pickles' directory
#pickle.dump(pickleVar, file('Pickles/' + sys.argv[1] + '_Figure.pickle','w'))
# Save Figure in the 'Figures' directory
#plt.savefig('EnergyFigures/' + sys.argv[1][:4] + 'Energy_Figure.png', dpi=1000)