-
Notifications
You must be signed in to change notification settings - Fork 3
/
runExperimentsBySize.py
123 lines (120 loc) · 6.62 KB
/
runExperimentsBySize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from GraphNeuralNet import GGNN
import os, argparse, pickle, time, numpy as np
def str2bool(v):
if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')
def runExperiments_SynthData():
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
argParser = argparse.ArgumentParser(description='Test the Neural#DNF system')
argParser.add_argument("dataDir",type=str,help="The directory from which labelled test data is loaded")
argParser.add_argument("-communicationProtocol", type=int, default=2, metavar='',
help="Which Communication Protocol to use for testing (Default 2)")
argParser.add_argument("-embeddingDim", type=int, default=128, metavar='',
help="The embedding size to use (Default 128)")
argParser.add_argument('-numIter',type=int, metavar='',
help="The number of message passing iterations to run (Default 8)",default=8)
argParser.add_argument("-weightsDir",type=str,default=None, metavar='',
help="The directory from which to load network parameters")
argParser.add_argument("-measureRunTime",type=str2bool,default=True, metavar='',
help="Specify whether timing tests should be conducted (Default True)")
argParser.add_argument("-outputFileName", type=str, default="Results.p", metavar='',
help="Set a custom output file name (Default Results.p)")
# Parse the Arguments
args = argParser.parse_args()
nbIter = args.numIter
if args.weightsDir is not None:
paramLocation = args.weightsDir+"/values.ckpt"
else:
paramLocation = None
dataDir = args.dataDir
commProt = args.communicationProtocol
embDim = args.embeddingDim
measureRunTime = args.measureRunTime
fileName = args.outputFileName
thresholds = np.array([0.01, 0.02, 0.05, 0.1, 0.15, 0.2])
GraphNet = GGNN(nbIterations=nbIter,communicationProtocol=commProt,embeddingDim=embDim) # Initialise the Graph Neural Net
sizes = [50,100,250,500,750,1000,2500,5000]
print(" Thresholds: " + str(thresholds))
for size in sizes:
dataFiles = [dataDir + f for f in os.listdir(dataDir) if f.endswith("_"+str(size)+"0.p")] # Load data by size
thresholdsComparable = np.expand_dims(np.array(thresholds).T,axis=-1)
nbThresholds = thresholds.shape[0]
counts = np.zeros(nbThresholds)
# Measured Entities
batchRunTimes = []
batchSizes = []
networkMus = []
networkSigmas = []
KLMMus = []
AbsDiffs = []
KLDivs = []
nbFormulas = 0
#Start experiments
print("Size "+str(size)+":")
try:
for dataFile in dataFiles:
with open(dataFile, "rb" ) as file:
batchSet = pickle.load(file) # Load batch set
# New: Only load session once
GraphNet.loadParamsSession(paramLocation=paramLocation)
for index, batch in enumerate(batchSet):
nbC, posLit, disjConj, conjLit, approxKL, approxRA = batch # Load individual batch
if measureRunTime: # Measure Time
tBefore = time.time()
logMeans, logVariances = GraphNet.forwardPass(nbConjunctions=nbC, posLitProbs=posLit,
disjConj = disjConj, conjLit=conjLit,
createSession=False)
runTime = time.time() - tBefore
else: # Don't measure time
runTime="Not Measured" # Won't be used, but to eliminate the pesky warning
logMeans, logVariances = GraphNet.forwardPass(nbConjunctions=nbC, posLitProbs=posLit,
disjConj=disjConj, conjLit=conjLit,
createSession=False)
batchSize = approxKL.shape[0]
batchSizes.append(batchSize)
#Now log the individual batcheS
if measureRunTime:
batchRunTimes.append(runTime)
# Convert entries to probabilities, as opposed to logs of probabilities
approxProbValues = np.exp(approxKL)
networkProbValues = np.exp(logMeans)
# Eliminate useless for loop
approxLogValues = approxKL[:,0]
approxStDevs = approxKL[:, 1]
networkLogValues = logMeans[:,0]
approxValues = approxProbValues[:,0]
networkValues = networkProbValues[:,0]
logStDevValues = logVariances[:,0]
absDifferences = computeAbsDiff(networkValues, approxValues)
networkMus.extend(networkValues)
networkSigmas.extend(logStDevValues)
KLMMus.extend(approxValues)
AbsDiffs.extend(absDifferences)
KLDivs.extend(computeKLDiv(networkLogValues,logStDevValues,approxLogValues,approxStDevs))
#Check how many match the thresholds
AbsDiffsRepeated = np.repeat(np.expand_dims(absDifferences, axis=0), nbThresholds, axis=0) # Repeat
threshCompArray = (AbsDiffsRepeated <= thresholdsComparable) * 1
nbSuccesses = np.sum(threshCompArray,axis =1)
counts+=nbSuccesses
nbFormulas += batchSize
# End of testing
with open(str(size)+"_"+fileName,"wb") as f: # Make it file-specific
print(np.divide(counts, nbFormulas))
statsObject = (batchRunTimes, batchSizes, networkMus, networkSigmas, KLMMus, AbsDiffs, KLDivs)
pickle.dump(statsObject, f) # End of experiments
except KeyboardInterrupt: # Make robust to interruption
pass
def computeKLDiv(networkLogMean, networkLogStDev, approxMean, approxStDev):
return np.log(approxStDev / networkLogStDev) - 0.5 + np.divide(networkLogStDev**2 +
(approxMean - networkLogMean)**2, 2*approxStDev**2)
def computeAbsDiff(networkMean, approxMean):
return np.abs(approxMean - networkMean)
def saveExcelSheet(book,fileName):
print("Saving Results so far and quitting... ")
book.save(fileName)
if __name__ == "__main__":
runExperiments_SynthData()