/
ClassCharacteristicSet.cs
53 lines (40 loc) · 1.3 KB
/
ClassCharacteristicSet.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Whetstone;
namespace TextCharacteristicLearner
{
//Represents the set of items that are characteristic for a certain class.
public class ClassCharacteristicSetKmer<A> : Dictionary<Kmer<A>, double>
{
public string name;
uint k;
uint kmerCount;
public ClassCharacteristicSetKmer (string name, uint k){
this.name = name;
this.k = k;
}
public ClassCharacteristicSetKmer (string name, uint k, uint initialCapacity) : base((int)initialCapacity) {
this.name = name;
this.k = k;
}
public static ClassCharacteristicSetKmer<A> BuildSubtractiveDifference(string name, MultisetKmer<A> baselineClass, MultisetKmer<A> thisClass, uint countCutoff)
{
ClassCharacteristicSetKmer<A> newSet = new ClassCharacteristicSetKmer<A>(name, Math.Min (baselineClass.maxK, thisClass.maxK));
//TODO statistically significant?
//TODO diffence amount?
foreach(Kmer<A> key in thisClass.Keys){
if(thisClass.getCount(key) > countCutoff){
double thisFrac = thisClass.GetKeyFrac(key);
double baseFrac = baselineClass.GetKeyFrac (key);
if(thisFrac > baseFrac){
newSet.Add (key, thisFrac - baseFrac);
}
}
}
//TODO select top x?
//double[] function?
return newSet;
}
}