/
novelty.cpp
116 lines (89 loc) · 3.19 KB
/
novelty.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#include "includes/simsim_func.h"
//based on instructions from http://eplex.cs.ucf.edu/noveltysearch/userspage/
void printHouseMap(map<House *, float> v){
map<House *, float>::iterator i;
for(i=v.begin();i!=v.end();++i){
cout << i->first->getName() << ": " << i->second << endl;
}
}
void printHouseVec(vector<pair<House *, float>> v){
int i;
for(i=0;i<v.size();i++){
cout << v[i].first->getName() << ": " << v[i].second << endl;
}
}
bool isNovel(list<House *> novelSet, House* h, float simFitness){
//if nothing in the novel set, then it's the novel-est!
if(novelSet.size() == 0)
return true;
//check minimum criteria
if(simFitness < MINIMUM_SIM_FITNESS_CRITERIA){ //did not pass
return false;
}
int real_k = (K_VALUE > novelSet.size() ? novelSet.size() : K_VALUE);
float nd = avg_knn_dist(novelSet, h, real_k);
if(nd >= MINIMUM_NOVEL_DISTANCE) //if distance is big enough, return as novel
return true;
else
return false;
}
bool sortAsc(const pair<House *, float> &a, const pair<House *, float> &b){return (a.second < b.second); }
//returns the euclidean distance between 2 integer vectors (assume for the object count)
//if an object is missing from one of the vectors that is found in the other, the default for the missing object = 0
//if both are missing, then 0 is default and will be (0-0)^2 for that item
float euclidDist(map<string, int> a, map<string, int>b){
float d_2 = 0;
/// iterate through both arrays ///
//a house objects - look for similar objects and missing objects
for(map<string,int>::iterator i = a.begin();i != a.end(); ++i){
string key = i->first;
if(b.find(key) != a.end()) //if the key was found, add the distance between values
d_2 += pow(a[i->first]-b[i->first],2);
else //else add the entire value (x-0)^2
d_2 += pow(a[i->first]-0,2);
}
//b house objects - look missing objects only
for(map<string,int>::iterator i = b.begin();i != b.end(); ++i){
string key = i->first;
if(a.find(key) != b.end()) //if the key was found, it was already calculated so skip
continue;
else //else add the entire value (0-y)^2
d_2 += pow(0-b[i->first],2);
}
return sqrt(d_2);
}
//returns the average distance to the k nearest neighbors from a set
float avg_knn_dist(list<House*> neighbors, House* noob, int k){
/// get the distances ///
map<House *, float> neighborDist;
list<House *>::iterator n;
for(n=neighbors.begin();n != neighbors.end();n++){
neighborDist.insert(pair<House *, float>((*n), euclidDist(noob->getObjectCt(), (*n)->getObjectCt())));
}
if(SHOW_DEBUG){
cout << "--Original--" << endl;
printHouseMap(neighborDist);
}
/// sort in ascending order ///
vector<pair<House *, float>> kvec;
// copy rom the map to the vector
map<House *, float>::iterator i2;
for (i2=neighborDist.begin(); i2!=neighborDist.end(); i2++){
kvec.push_back(make_pair(i2->first, i2->second));
}
//sort the vector
sort(kvec.begin(), kvec.end(), sortAsc);
if(SHOW_DEBUG){
cout << "--Sorted--" << endl;
printHouseVec(kvec);
}
/// average the first k distances ///
float d_sum = 0;
int i;
int k_min = std::min((int)kvec.size(), k);
for(i=0;i<k_min;i++){
d_sum += kvec[i].second;
}
return d_sum/k_min;
}
//int main(){return 0;}