Skip to content

Here I illustrate how GGM selection methods work better in network hub and cluster detection when the false discover rate (FDR) control is ignored.

Notifications You must be signed in to change notification settings

markkukuismin/ROPEGGMSelectionNoFDR

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

21 Commits
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

ROPEGGMSelectionNoFDR

You need following R packages to run the script:

install.packages("ggplot2")
install.packages("reshape")
install.packages("camel")
install.packages("Matrix")
install.packages("spatstat")
install.packages("huge")
install.packages("igraph")
install.packages("BDgraph")
install.packages("GGMridge")
install.packages("rags2ridges")
install.packages("FastGGM")
install.packages("fdrtool")

Example

Explore network clusters from penalized precision matrix estimates:

library(huge)
library(igraph)
library(rags2ridges)
library(fdrtool)

source("ROPE.txt")
source("crossvalidationROPE.txt")
source("ROPEEEDKernelMethod.r")
source("ROPEEEDEmpiricPoisson.r")
source("Scale.r")

lambda = seq(0.01,10,length.out=40) # The largest value should be even larger when p increases

alpha = 0.05 # 100*(1-alpha)% confidence level

B = NULL # Number of breaks in the histogram. Default "Sturges"

p = 200
n = 100

nmb = p*(p-1)/2

I = diag(1,p)

#Simulate data:
set.seed(242377)
L = huge.generator(n=n,d=p,g=4,graph="cluster")  # Generate data with hub structures

Sigma = L$sigma
Theta = L$theta
Theta = as.matrix(Theta)

TrueA = L$theta
TrueA = as.matrix(TrueA)
TrueG = graph.adjacency(TrueA,mode="undirected",diag=F)

ClustersTrue = walktrap.community(TrueG)
TrueNmbOfClusters = length(table(membership(ClustersTrue)))

X = mvrnorm(2*n,rep(0,p),Sigma)
X = huge.npn(X)

Yvalid = X[1:n,]
Y = X[(n+1):(2*n),]

# Choose optimal value for the tuning parameter from the validation set

lambdaROPE = ROPEPoissonEED(Yvalid,lambda=lambda,alpha=alpha,target=I)$lambda

ROPEPoisson = ROPEPoissonEED(Y,lambda=lambdaROPE,alpha=alpha,target=I,cv=F)
ROPEKer = ROPEKernelEED(Y,lambda=lambdaROPE,alpha=alpha,target=I,cv=F)

AROPEPoisson = ROPEPoisson$IndROPEPoisson
AROPEKernel = ROPEKer$IndROPEKernel

#####################################################################################

# Method similar to ROPE, "RagsRidges" which utilizes partial correlation and Strimmer method:

OPT = optPenalty.LOOCV(Yvalid, lambdaMin = .5, lambdaMax = 30, step = 40,verbose = F,graph=F,target = I)

## Determine support regularized (standardized) precision under optimal penalty

ThetaRagsRidge = ridgeP(cov(Y),lambda=OPT$optLambda,target=I)

SparseThetaRagsRidge = sparsify(ThetaRagsRidge, threshold = "localFDR",verbose = F)

ARagsRidge = ifelse(SparseThetaRagsRidge$sparsePrecision != 0, 1, 0); diag(ARagsRidge) = 0

###################################################

PoissonG = graph.adjacency(AROPEPoisson,mode="undirected",diag=F)
ClustersPoisson = walktrap.community(PoissonG)

KernelG = graph.adjacency(AROPEKernel,mode="undirected",diag=F)
ClustersKernel = walktrap.community(KernelG)

RagsRidgeG = graph.adjacency(ARagsRidge,mode="undirected",diag=F)
ClustersRagsRidge = walktrap.community(RagsRidgeG)

#Plot graphs and clusters:
  
par(mfrow=c(2,2))

plot(ClustersTrue, TrueG, vertex.label=NA, vertex.size=5,main="Ground truth graph")
plot(ClustersPoisson, PoissonG, vertex.label=NA, vertex.size=5,main="ROPE (Poisson)")
plot(ClustersKernel, KernelG, vertex.label=NA, vertex.size=5,main="ROPE (Kernel)")
plot(ClustersRagsRidge, RagsRidgeG, vertex.label=NA, vertex.size=5,main="Rags2Ridges")

graph1

# Larger sample size:

n = 200

X = mvrnorm(2*n,rep(0,p),Sigma)
X = huge.npn(X)

Yvalid = X[1:n,]
Y = X[(n+1):(2*n),]

lambdaROPE = ROPEPoissonEED(Yvalid,lambda=lambda,alpha=alpha,target=I)$lambda

ROPEPoisson = ROPEPoissonEED(Y,lambda=lambdaROPE,alpha=alpha,target=I,cv=F)
ROPEKer = ROPEKernelEED(Y,lambda=lambdaROPE,alpha=alpha,target=I,cv=F)

AROPEPoisson = ROPEPoisson$IndROPEPoisson
AROPEKernel = ROPEKer$IndROPEKernel

#####################################################################################

OPT = optPenalty.LOOCV(Yvalid, lambdaMin = .5, lambdaMax = 30, step = 40,verbose = F,graph=F,target = I)

ThetaRagsRidge = ridgeP(cov(Y),lambda=OPT$optLambda,target=I)

SparseThetaRagsRidge = sparsify(ThetaRagsRidge, threshold = "localFDR",verbose = F)

ARagsRidge = ifelse(SparseThetaRagsRidge$sparsePrecision != 0, 1, 0); diag(ARagsRidge) = 0

###################################################

PoissonG = graph.adjacency(AROPEPoisson,mode="undirected",diag=F)
ClustersPoisson = walktrap.community(PoissonG)

KernelG = graph.adjacency(AROPEKernel,mode="undirected",diag=F)
ClustersKernel = walktrap.community(KernelG)

RagsRidgeG = graph.adjacency(ARagsRidge,mode="undirected",diag=F)
ClustersRagsRidge = walktrap.community(RagsRidgeG)

par(mfrow=c(2,2))

plot(ClustersTrue, TrueG, vertex.label=NA, vertex.size=5,main="Ground truth graph")
plot(ClustersPoisson, PoissonG, vertex.label=NA, vertex.size=5,main="ROPE (Poisson)")
plot(ClustersKernel, KernelG, vertex.label=NA, vertex.size=5,main="ROPE (Kernel)")
plot(ClustersRagsRidge, RagsRidgeG, vertex.label=NA, vertex.size=5,main="Rags2Ridges")

graph2

# RagsRidge without FDR control:

# Calculate p-values from the RagsRdige Regularized partial correlations

R = pcor(ThetaRagsRidge) # Compute the partial correlation matrix

R = R[upper.tri(R)]

EmpiricDistribution = fdrtool(R,statistic = "correlation",verbose = F,plot=F) 

pvalues = EmpiricDistribution$pval # p-values determined from the partical correlation coefficients

ARagsRidgeNoFDR = matrix(0,p,p)

pvalues[pvalues > alpha] = 0 # Not significant p-values
pvalues[pvalues != 0] = 1

ARagsRidgeNoFDR[upper.tri(ARagsRidgeNoFDR)] = pvalues

ARagsRidgeNoFDR = ARagsRidgeNoFDR + t(ARagsRidgeNoFDR)

RagsRidgeGNoFDR = graph.adjacency(ARagsRidgeNoFDR,mode="undirected",diag=F)

ClustersRagsRidgeNoFDR = walktrap.community(RagsRidgeGNoFDR)

par(mfrow=c(1,2))

plot(ClustersRagsRidge, RagsRidgeG, vertex.label=NA, vertex.size=5,main="Rags2Ridges")
plot(ClustersRagsRidgeNoFDR, RagsRidgeGNoFDR, vertex.label=NA, vertex.size=5,main="Rags2Ridges (No FDR)")

graph3

About

Here I illustrate how GGM selection methods work better in network hub and cluster detection when the false discover rate (FDR) control is ignored.

Topics

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages