-
Notifications
You must be signed in to change notification settings - Fork 3
/
runExperiments.py
197 lines (195 loc) · 10.4 KB
/
runExperiments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
from GraphNeuralNet import GGNN, printOrLog, deltaToTimeString
import os, argparse, pickle, time, numpy as np
import tensorflow as tf
import DNFGen
import time
def str2bool(v):
if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')
def log_or_print(v):
if v.lower() in ('l','log'):
return True
elif v.lower() in ('p','print'):
return False
else:
raise argparse.ArgumentTypeError("Invalid Option Returned for log_or_print")
def runExperiments_SynthData():
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
argParser = argparse.ArgumentParser(description='Test the Neural#DNF system')
argParser.add_argument("dataDir",type=str,help="The directory from which labelled test data is loaded")
argParser.add_argument("-communicationProtocol", type=int, default=2, metavar='',
help="Communication Protocol to use for testing (Default is 2). ")
argParser.add_argument("-embeddingDim", type=int, default=128, metavar='',
help="The embedding size to use (Default 128)")
argParser.add_argument('-numIter', type=int, default=8, metavar='', help="The number of message passing "
"iterations to run (Default 8)")
argParser.add_argument("-weightsDir",type=str,default=None,help="The directory from which to load "
"network parameters", metavar='')
argParser.add_argument("-measureRunTime",type=str2bool,default=True, metavar='',
help="Specify whether timing tests should be conducted (Default True)")
argParser.add_argument("-widthBasedAnalysis", type=str2bool, default=False, metavar='',
help="Measure Precision With Respect to Width (Default False)")
argParser.add_argument("-outputFileName", type=str, default="Results.p",metavar='',
help="Set a custom output file name (Default Results.p)")
argParser.add_argument("-logOpt", type=log_or_print, default=True, metavar='',
help="Choose whether to log progress in text file (L, Default) or print on console (P)")
# Parse the Arguments
args = argParser.parse_args()
nbIter = args.numIter
if args.weightsDir is not None:
paramLocation = args.weightsDir+"/values.ckpt"
else:
paramLocation = None
dataDir = args.dataDir
commProt = args.communicationProtocol
embDim = args.embeddingDim
measureRunTime = args.measureRunTime
fileName = args.outputFileName
widthMeas = args.widthBasedAnalysis
log_opt = args.logOpt
fileNameBackup = "Backup"+fileName
GraphNet = GGNN(nbIterations=nbIter,communicationProtocol=commProt,embeddingDim=embDim,widthExtract=widthMeas) # Initialise the Graph Neural Net
GraphNet.loadParamsSession(paramLocation=paramLocation)
dataFiles = [dataDir + f for f in os.listdir(dataDir) if f.endswith(".p")]# Prepare data for loading
thresholds = np.array([0.01,0.02,0.05,0.1,0.15,0.2])
thresholdsComparable = np.expand_dims(np.array(thresholds).T,axis=-1)
nbThresholds = thresholds.shape[0]
counts = np.zeros(nbThresholds)
# Measured Entities
batchRunTimes = []
batchSizes = []
networkMus = []
networkSigmas = []
KLMMus = []
AbsDiffs = []
KLDivs = []
nbFormulas = 0
test_widths = [3, 5, 8, 13, 21, 34]
if widthMeas:
sess = tf.Session()
observedWidths = []
width_success_dict = {width: [0] * len(test_widths) for width in test_widths}
width_total_trial_dict = {width: [0] * len(test_widths) for width in test_widths}
else:
observedWidths = None
sess = None
width_success_dict = None
width_total_trial_dict = None
#Start experiments
print(" Thresholds: " + str(thresholds))
try:
logFile = open("test_progress_log.txt", "w")
filePath = "test_progress_log.txt"
logFile.write("Log File Started: \r\n")
logFile.close()
intermediate_results = open("intermediate_results_log.txt", "w")
intermediate_resultsPath = "intermediate_results_log.txt"
intermediate_results.write("Log File Started: \r\n")
intermediate_results.close()
start_time = time.time()
for index, dataFile in enumerate(dataFiles):
intermediate_time = time.time()
printOrLog(deltaToTimeString(intermediate_time - start_time)+")" + str(dataFile)+"||" +
str(nbFormulas) + " formulas tested", log_opt, filePath)
printOrLog(deltaToTimeString(intermediate_time - start_time) + ")" + str(dataFile), log_opt,
intermediate_resultsPath)
printOrLog("-----------", log_opt, intermediate_resultsPath)
if nbFormulas > 0:
printOrLog(str(np.divide(counts, nbFormulas)), log_opt, intermediate_resultsPath)
if widthMeas:
for width in test_widths:
printOrLog("Width " + str(width) + ":", log_opt, intermediate_resultsPath)
printOrLog(str(np.divide(width_success_dict[width], width_total_trial_dict[width])),
log_opt, intermediate_resultsPath)
with open(dataFile, "rb" ) as file:
batchSet = pickle.load(file) # Load batch set
# New: Only load session once
for index, batch in enumerate(batchSet):
nbC, posLit, disjConj, conjLit, approxKL, approxRA = batch # Load individual batch
if widthMeas:
widths = GraphNet.extractWidthFromBatch(conjLit=conjLit, disjConj=disjConj) # Additional Step
else:
widths = None
if measureRunTime: # Measure Time
tBefore = time.time()
logMeans, logVariances = GraphNet.forwardPass(nbConjunctions=nbC, posLitProbs=posLit,
disjConj = disjConj, conjLit=conjLit,
createSession=False)
runTime = time.time() - tBefore
else: # Don't measure time
runTime="Not Measured" # Won't be used, but to eliminate the pesky warning
logMeans, logVariances = GraphNet.forwardPass(nbConjunctions=nbC, posLitProbs=posLit,
disjConj=disjConj, conjLit=conjLit,
createSession=False)
batchSize = approxKL.shape[0]
batchSizes.append(batchSize)
#Now log the individual batcheS
if measureRunTime:
batchRunTimes.append(runTime)
# Convert entries to probabilities, as opposed to logs of probabilities
approxProbValues = np.exp(approxKL)
networkProbValues = np.exp(logMeans)
# Eliminate useless for loop
approxLogValues = approxKL[:,0]
approxStDevs = approxKL[:, 1]
networkLogValues = logMeans[:,0]
approxValues = approxProbValues[:,0]
networkValues = networkProbValues[:,0]
logStDevValues = logVariances[:,0]
absDifferences = computeAbsDiff(networkValues, approxValues)
if widthMeas:
for width in test_widths:
mask = (widths == width)
nb_compat = np.sum(mask * 1)
if nb_compat == 0:
continue # Ignore this width
absDifferenceForWidth = absDifferences[mask]
absDiffRep = np.repeat(np.expand_dims(absDifferenceForWidth, axis=0), nbThresholds, axis=0) # Repeat
threshCompArray = (absDiffRep <= thresholdsComparable) * 1
nbSuccesses = np.sum(threshCompArray, axis=1)
width_success_dict[width] += nbSuccesses
width_total_trial_dict[width] += nb_compat
networkMus.extend(networkValues)
networkSigmas.extend(logStDevValues)
KLMMus.extend(approxValues)
AbsDiffs.extend(absDifferences)
KLDivs.extend(computeKLDiv(networkLogValues,logStDevValues,approxLogValues,approxStDevs))
if widthMeas:
observedWidths.extend(widths)
#Check how many match the thresholds (Aggregate across all widths)
AbsDiffsRepeated = np.repeat(np.expand_dims(absDifferences, axis=0), nbThresholds, axis=0) # Repeat
threshCompArray = (AbsDiffsRepeated <= thresholdsComparable) * 1
nbSuccesses = np.sum(threshCompArray,axis =1)
counts+=nbSuccesses
nbFormulas += batchSize
# End of testing
with open(fileName,"wb") as f:
print("Final Proportions:")
print(np.divide(counts, nbFormulas))
if widthMeas:
for width in test_widths:
print("Width " + str(width) + ":")
print(np.divide(width_success_dict[width], width_total_trial_dict[width]))
if not widthMeas:
statsObject = (batchRunTimes, batchSizes, networkMus, networkSigmas, KLMMus, AbsDiffs, KLDivs)
else:
statsObject = (batchRunTimes, batchSizes, networkMus, networkSigmas, KLMMus, AbsDiffs, KLDivs, observedWidths)
pickle.dump(statsObject, f) # End of experiments
except KeyboardInterrupt: # Make robust to interruption
pass
#print("Saving Results so far and quitting ... ")
#book.save(fileName)
def computeKLDiv(networkLogMean, networkLogStDev, approxMean, approxStDev):
return np.log(approxStDev / networkLogStDev) - 0.5 + np.divide(networkLogStDev**2 +
(approxMean - networkLogMean)**2, 2*approxStDev**2)
def computeAbsDiff(networkMean, approxMean):
return np.abs(approxMean - networkMean)
def saveExcelSheet(book,fileName):
print("Saving Results so far and quitting... ")
book.save(fileName)
if __name__ == "__main__":
runExperiments_SynthData()