/
main.py
56 lines (42 loc) · 2.37 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#run outlying aspect mining for your own dataset
import argparse
import numpy as np
import pandas as pd
import sys
sys.path.append("SPFlow/src/")
import spnOutlierDetection as spnc
parser = argparse.ArgumentParser(
prog = 'SOAM',
description = 'Outlying Aspect Mining using Sum-Product Networks')
parser.add_argument('filename',help="(unlabeled) dataset as CSV file. last column indicates whether you want to get outlying aspects for the sample (1) or not (0)")
parser.add_argument("-t","--threshold",default=2.7,help="threshold of elbow method")
parser.add_argument("-w","--width",default=10,help="beam width of search algorithm")
parser.add_argument("-s","--search",default="forward",help="search strategy (forward/backward)") #beamsearch / simplify
parser.add_argument("-e","--explanation",default="threshold",help="explanation strategy (threshold aka elbow / zscore)")
parser.add_argument('-m',"--mparameter",default=200,help="SPN regularization parameter")
parser.add_argument("-d","--dim",default=5,help="maximum dimension of the explanation")
args = parser.parse_args()
#load data
dat = np.genfromtxt(args.filename, delimiter=',',skip_header=1)
X = dat[:,0:(dat.shape[1]-1)]
y = dat[:,dat.shape[1]-1]
norms = y==0
Xoutlier = X[np.logical_not(norms.reshape(norms.shape[0])),:]
#fit SPN
spn = spnc.fit_spn(X,args.mparameter,"gmm","rdc","gaussian",args.threshold)
#do the search
if args.search == "forward":
allresults = spnc.explain_beamsearch(spn, Xoutlier, maxdim=args.dim, beamwidth=args.width)
elif args.search == "backward":
allresults = spnc.explain_simplify(spn, Xoutlier,maxdim=args.dim)
#extract the actual explanations from the search results
if args.explanation == "threshold" and args.search == "forward":
explanations = spnc.extract_explanation(allresults,args.threshold)
elif args.explanation=="zscore" and args.search == "forward":
explanations = spnc.extract_explanation_zscore(allresults,X,spn)
elif args.explanation == "threshold" and args.search == "backward":
explanations = spnc.extract_explanation_simplify2(allresults,args.threshold)
elif args.explanation == "zscore" and args.search == "backward":
explanations = spnc.extract_explanation_simplify4(allresults,args.threshold,X,spn)
#return outlying aspects (=subspaces) for each of the queried samples (which had a 1 in the last column)
print(explanations)