/
mix_gaussian.py
85 lines (71 loc) · 3.66 KB
/
mix_gaussian.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import numpy as np
from scipy.integrate import quad, dblquad
from scipy.special import xlogy
class MixedGaussian():
# Mixture of two bivariate gaussians
#
# data(mix,Mode,Rho,N) generates N samples with
# mix: mixing ration between 0 and 1
# Rho[0] correlation for the first bivariate gaussian and Rho[1] for the second
# Mode[0] separation between the two bivariate gaussians along the x-axis and Mode[1] is the separation along the y-axis
def __init__(self, sample_size=400, mean1=0, mean2=0, rho1=0.9, rho2=-0.9, mix=0.5, mix2=0.5):
# sample_size is the number of sample representing the distribution
# mix: mixing ratio of two bivariate gaussian in between 0 and 1
# Rho1: correlation for the first bivariate gaussian
# Rho2: correlation for the second bivariate gaussian
# mean1 is the mean of first variable and mean2 is the mean of second variable
self.sample_size = sample_size
self.covMat1 = np.array([[1, rho1], [rho1, 1]])
self.covMat2 = np.array([[1, rho2], [rho2, 1]])
self.sample_size = sample_size
self.mix = mix
self.mix2 = mix2
self.mu = np.array([mean1, mean2])
self.name = 'bimodal'
@property
def data(self):
"""[summary]
Returns:
[np.array] -- [N by 2 matrix]
"""
N1 = int(self.mix*self.sample_size)
N2 = self.sample_size-N1
temp1 = np.random.multivariate_normal(mean=self.mu,
cov=self.covMat1,
size=N1)
temp2 = np.random.multivariate_normal(mean=-self.mu,
cov=self.covMat2,
size=N2)
X = np.append(temp1, temp2, axis=0)
np.random.shuffle(X)
return X
@property
def ground_truth(self):
# fx and fy are x and y marginal probability density functions(pdf) of mix-gaussian distribution
# fxy is the joint probability density function of mix-gaussian distribution
# the mutual information ground truth is the difference between sum of entropy of individual variables and joint entropy of all variables
# the entropies are computed by integrating the expectation of pdf of variables involved
mix, covMat1, covMat2, mu = self.mix, self.covMat1, self.covMat2, self.mu
def fxy(x, y):
X = np.array([x, y])
temp1 = np.matmul(
np.matmul(X-mu, np.linalg.inv(covMat1)), (X-mu).transpose())
temp2 = np.matmul(
np.matmul(X+mu, np.linalg.inv(covMat2)), (X+mu).transpose())
return mix*np.exp(-.5*temp1) / (2*np.pi * np.sqrt(np.linalg.det(covMat1))) \
+ (1-mix)*np.exp(-.5*temp2) / \
(2*np.pi * np.sqrt(np.linalg.det(covMat2)))
def fx(x):
return mix*np.exp(-(x-mu[0])**2/(2*covMat1[0, 0])) / np.sqrt(2*np.pi*covMat1[0, 0]) \
+ (1-mix)*np.exp(-(x+mu[0])**2/(2*covMat2[0, 0])
) / np.sqrt(2*np.pi*covMat2[0, 0])
def fy(y):
return mix*np.exp(-(y-mu[1])**2/(2*covMat1[1, 1])) / np.sqrt(2*np.pi*covMat1[1, 1]) \
+ (1-mix)*np.exp(-(y+mu[1])**2/(2*covMat2[1, 1])
) / np.sqrt(2*np.pi*covMat2[1, 1])
lim = np.inf
hx = quad(lambda x: -xlogy(fx(x), fx(x)), -lim, lim)
hy = quad(lambda y: -xlogy(fy(y), fy(y)), -lim, lim)
hxy = dblquad(lambda x, y: -xlogy(fxy(x, y), fxy(x, y)), -
lim, lim, lambda x: -lim, lambda x: lim)
return hx[0] + hy[0] - hxy[0]