/
Multiset.cs
115 lines (87 loc) · 2.71 KB
/
Multiset.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
using System;
using System.Collections.Generic;
using System.Collections;
using System.Linq;
using System.Text;
using Whetstone;
namespace TextCharacteristicLearner
{
public class Multiset<Tyvar> : Dictionary<Tyvar, int>, EventSeriesConsumer<Tyvar>
{
public int size = 0;
public Multiset(){
}
public Multiset(int initialCapacity) : base(initialCapacity) {
}
public Multiset(IEnumerable<Tyvar> t){
t.ForEach (Add);
}
public void Add(Tyvar s){
int val;
TryGetValue(s, out val);
this[s] = val + 1;
size ++;
}
public void AddMulti(Tyvar s, int count){
int val;
TryGetValue(s, out val);
this[s] = val + count;
size += count;
}
public void Add(IEnumerable<Tyvar> s){
s.ForEach (Add);
}
public void ConsumeEventSeries(IEnumerable<Tyvar> s){
Add (s);
}
public int getCount(Tyvar s){
int ret;
TryGetValue (s, out ret);
return ret;
}
public double GetKeyFrac(Tyvar v){
return (double)getCount (v) / (double)size;
}
public double GetKeyFracLaplace(Tyvar val){
return ((double)getCount (val) + 1) / ((double)size + 1); //TODO is this laplacian smoothing?
}
public double GetKeyFracLaplace(Tyvar val, double smooth){
return ((double)getCount (val) + smooth) / ((double)size + smooth); //TODO is this laplacian smoothing?
}
public void putVal(Tyvar s, int val){
base[s] = val;
}
/*
private string flatten(string[] strs){
return strs.Aggregate("", (sum, val) => sum + "|" + val);
}
*/
public override string ToString(){
return Keys.FoldToString (key => key + ":" + this[key]);
}
public string ToString(int count){
return Keys.OrderByDescending (key => this[key]).Take (count).FoldToString (key => key + ":" + this[key]);
}
}
public static class Multiset_Extensions{
public static Multiset<A> MultisetUnion<A>(this IEnumerable<Multiset<A>> sets){
//TODO: Lower bound on size, requires multienumeration.
Multiset<A> d = new Multiset<A>(); // sets.Select (multiset => multiset.Count).Max()); //Lower bound on size.
sets.ForEach (aset => aset.ForEach(kvp => d.Add(kvp.Key, kvp.Value)));
return d;
}
public static MultisetKmer<A> MultisetKmerUnion<A>(this IEnumerable<MultisetKmer<A>> sets){
//TODO check they all have the same k?
MultisetKmer<A> d = new MultisetKmer<A>(sets.First ().maxK); //sets.Select (multiset => multiset.Count).Max());
//TODO add number
sets.ForEach (aset => aset.ForEach(kvp => d.AddKmer(kvp.Key, kvp.Value)));
return d;
}
public static void AddKmers<A>(this Multiset<Kmer<A>> thisSet, IEnumerable<A> toAdd, int k){
A[] toAddArr = toAdd.ToArray ();
for(int i = 0; i < toAddArr.Length - k; i++){
thisSet.Add (new Kmer<A>(toAddArr, i, k));
}
}
}
}