Skip to content

Commit

Permalink
updated formatting based on styler
Browse files Browse the repository at this point in the history
  • Loading branch information
tetomonti committed Aug 29, 2023
1 parent ba5b8d7 commit a142320
Showing 1 changed file with 80 additions and 93 deletions.
173 changes: 80 additions & 93 deletions R/revealer_rowscore.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,55 +40,49 @@
#' @return return a vector of row-wise scores where its labels or names
#' must match the row names of \code{FS} object
#'
revealer_rowscore <- function
(
FS,
input_score,
meta_feature = NULL,
assoc_metric = c("IC", "COR")
)
{

revealer_rowscore <- function(
FS,
input_score,
meta_feature = NULL,
assoc_metric = c("IC", "COR")
) {
assoc_metric <- match.arg(assoc_metric)

# Check if meta_feature is provided
if(is.null(meta_feature)){
if (is.null(meta_feature)) {
meta_vector <- as.vector(rep(0, ncol(FS)))
}else{
} else {
# Getting the position of the known meta features
locs <- match(meta_feature, row.names(FS))
# Taking the union across the known meta features
if(length(locs) > 1) {
meta_vector <- as.numeric(ifelse(colSums(FS[locs,]) == 0, 0, 1))
}else{
meta_vector <- as.numeric(FS[locs,])
if (length(locs) > 1) {
meta_vector <- as.numeric(ifelse(colSums(FS[locs, ]) == 0, 0, 1))
} else {
meta_vector <- as.numeric(FS[locs, ])
}
names(meta_vector) <- names(input_score)
# Remove the seeds from the binary feature matrix
FS <- FS[-locs, , drop=FALSE]
FS <- FS[-locs, , drop = FALSE]
}

# Assume the score direction is always positive
# So we need to sort input_score from highest to lowest values
input_score <- sort(input_score, decreasing=TRUE)
input_score <- sort(input_score, decreasing = TRUE)

# Re-order the matrix based on the order of input_score
FS <- FS[, names(input_score), drop=FALSE]
FS <- FS[, names(input_score), drop = FALSE]

# Compute CMI given known seed features
cmi_1 <- apply(X=FS, MARGIN=1, function(x){
cmi_1 <- apply(X = FS, MARGIN = 1, function(x) {
revealer_score(
x = input_score,
y = x,
z = meta_vector,
assoc_metric = assoc_metric
)
})

names(cmi_1) <- rownames(FS)

return(cmi_1)

return(cmi_1)
}

#' Compute Conditional Mutual Information of x and y given z from
Expand All @@ -112,39 +106,30 @@ revealer_rowscore <- function
#' @importFrom misc3d kde3d
#' @importFrom stats cor median sd
#' @importFrom ppcor pcor.test
revealer_score <- function
(
x,
y,
z,
assoc_metric = c("IC", "COR")
)
{

revealer_score <- function(
x, y, z,
assoc_metric = c("IC", "COR")
) {
assoc_metric <- match.arg(assoc_metric)

# Compute CMI
cmi <- suppressWarnings(
cond_assoc(x=x, y=y, z=z, metric=assoc_metric)
cond_assoc(x = x, y = y, z = z, metric = assoc_metric)
)

# Revealer only returns score statistics value
return(c(score=cmi))

return(c(score = cmi))
}



# Compute the conditional mutual information of x and y given z
cond_assoc <- function(x, y, z, metric) {

cond_assoc <- function(x, y, z, metric) {
# Association of x and y given z
#
# Conditional mutual information I(x, y | z)

if (length(unique(x)) == 1 || length(unique(y)) == 1) return(0)

if (length(unique(z)) == 1) { # e.g. for NULLSEED
if (length(unique(x)) == 1 || length(unique(y)) == 1) {
return(0)
}
if (length(unique(z)) == 1) { # e.g. for NULLSEED
if (metric == "IC") {
return(mutual_inf_v2(x = x, y = y, n.grid = 25)$IC)
} else if (metric == "COR") {
Expand All @@ -157,15 +142,16 @@ cond_assoc <- function(x, y, z, metric) {
return(pcor.test(x, y, z)$estimate)
}
}

}

# Computes the conditional mutual information x, y | z
cond_mutual_inf <- function(x, y, z,
n.grid=25,
delta = 0.25*c(MASS::bcv(x), MASS::bcv(y),
MASS::bcv(z))) {

cond_mutual_inf <- function(
x, y, z,
n.grid = 25,
delta = 0.25 * c(
MASS::bcv(x), MASS::bcv(y),
MASS::bcv(z))
) {
# Computes the Conditional mutual information:
# I(X, Y | X) = H(X, Z) + H(Y, Z) - H(X, Y, Z) - H(Z)
# The 0.25 in front of the bandwidth is because different conventions
Expand All @@ -175,11 +161,11 @@ cond_mutual_inf <- function(x, y, z,

rho <- cor(x, y)
rho2 <- ifelse(rho < 0, 0, rho)
delta <- delta*(1 + (-0.75)*rho2)
delta <- delta * (1 + (-0.75) * rho2)

# Kernel-based prob. density

kde3d.xyz <- misc3d::kde3d(x=x, y=y, z=z, h=delta, n = n.grid)
kde3d.xyz <- misc3d::kde3d(x = x, y = y, z = z, h = delta, n = n.grid)
X <- kde3d.xyz$x
Y <- kde3d.xyz$y
Z <- kde3d.xyz$z
Expand All @@ -190,49 +176,51 @@ cond_mutual_inf <- function(x, y, z,

# Normalize density and calculate marginal densities and entropies

PXYZ <- PXYZ/(sum(PXYZ)*dx*dy*dz)
PXZ <- colSums(aperm(PXYZ, c(2,1,3)))*dy
PYZ <- colSums(PXYZ)*dx
PZ <- rowSums(aperm(PXYZ, c(3,1,2)))*dx*dy
PXY <- colSums(aperm(PXYZ, c(3,1,2)))*dz
PX <- rowSums(PXYZ)*dy*dz
PY <- rowSums(aperm(PXYZ, c(2,1,3)))*dx*dz

HXYZ <- - sum(PXYZ * log(PXYZ))*dx*dy*dz
HXZ <- - sum(PXZ * log(PXZ))*dx*dz
HYZ <- - sum(PYZ * log(PYZ))*dy*dz
HZ <- - sum(PZ * log(PZ))*dz
HXY <- - sum(PXY * log(PXY))*dx*dy
HX <- - sum(PX * log(PX))*dx
HY <- - sum(PY * log(PY))*dy
PXYZ <- PXYZ / (sum(PXYZ) * dx * dy * dz)
PXZ <- colSums(aperm(PXYZ, c(2, 1, 3))) * dy
PYZ <- colSums(PXYZ) * dx
PZ <- rowSums(aperm(PXYZ, c(3, 1, 2))) * dx * dy
PXY <- colSums(aperm(PXYZ, c(3, 1, 2))) * dz
PX <- rowSums(PXYZ) * dy * dz
PY <- rowSums(aperm(PXYZ, c(2, 1, 3))) * dx * dz

HXYZ <- -sum(PXYZ * log(PXYZ)) * dx * dy * dz
HXZ <- -sum(PXZ * log(PXZ)) * dx * dz
HYZ <- -sum(PYZ * log(PYZ)) * dy * dz
HZ <- -sum(PZ * log(PZ)) * dz
HXY <- -sum(PXY * log(PXY)) * dx * dy
HX <- -sum(PX * log(PX)) * dx
HY <- -sum(PY * log(PY)) * dy

MI <- HX + HY - HXY
CMI <- HXZ + HYZ - HXYZ - HZ

SMI <- sign(rho) * MI
SCMI <- sign(rho) * CMI

IC <- sign(rho) * sqrt(1 - exp(- 2 * MI))
CIC <- sign(rho) * sqrt(1 - exp(- 2 * CMI))

return(list(CMI=CMI, MI=MI,
SCMI=SCMI, SMI=SMI,
HXY=HXY, HXYZ=HXYZ,
IC=IC, CIC=CIC))
IC <- sign(rho) * sqrt(1 - exp(-2 * MI))
CIC <- sign(rho) * sqrt(1 - exp(-2 * CMI))

return(list(
CMI = CMI, MI = MI,
SCMI = SCMI, SMI = SMI,
HXY = HXY, HXYZ = HXYZ,
IC = IC, CIC = CIC
))
}

# Computes the Mutual Information/Information Coefficient IC(x, y)
mutual_inf_v2 <- function(x, y, n.grid=25,
delta = c(MASS::bcv(x), MASS::bcv(y))) {

mutual_inf_v2 <- function(
x, y, n.grid = 25,
delta = c(MASS::bcv(x), MASS::bcv(y))
) {
# Computes the Mutual Information/Information Coefficient IC(x, y)
#
# Compute correlation-dependent bandwidth

rho <- cor(x, y)
rho2 <- abs(rho)
delta <- delta*(1 + (-0.75)*rho2)
delta <- delta * (1 + (-0.75) * rho2)

# Kernel-based prob. density

Expand All @@ -241,28 +229,27 @@ mutual_inf_v2 <- function(x, y, n.grid=25,
FXY <- kde2d.xy$z + .Machine$double.eps
dx <- kde2d.xy$x[2] - kde2d.xy$x[1]
dy <- kde2d.xy$y[2] - kde2d.xy$y[1]
PXY <- FXY/(sum(FXY)*dx*dy)
PX <- rowSums(PXY)*dy
PY <- colSums(PXY)*dx
HXY <- -sum(PXY * log(PXY))*dx*dy
HX <- -sum(PX * log(PX))*dx
HY <- -sum(PY * log(PY))*dy
PXY <- FXY / (sum(FXY) * dx * dy)
PX <- rowSums(PXY) * dy
PY <- colSums(PXY) * dx
HXY <- -sum(PXY * log(PXY)) * dx * dy
HX <- -sum(PX * log(PX)) * dx
HY <- -sum(PY * log(PY)) * dy

PX <- matrix(PX, nrow=n.grid, ncol=n.grid)
PY <- matrix(PY, byrow = TRUE, nrow=n.grid, ncol=n.grid)
PX <- matrix(PX, nrow = n.grid, ncol = n.grid)
PY <- matrix(PY, byrow = TRUE, nrow = n.grid, ncol = n.grid)

MI <- sum(PXY * log(PXY/(PX*PY)))*dx*dy
MI <- sum(PXY * log(PXY / (PX * PY))) * dx * dy
rho <- cor(x, y)
SMI <- sign(rho) * MI

# Use pearson correlation the get the sign (directionality)

IC <- sign(rho) * sqrt(1 - exp(- 2 * MI))

NMI <- sign(rho) * ((HX + HY)/HXY - 1)
IC <- sign(rho) * sqrt(1 - exp(-2 * MI))

return(list(MI=MI, SMI=SMI, HXY=HXY, HX=HX, HY=HY, NMI=NMI, IC=IC))
NMI <- sign(rho) * ((HX + HY) / HXY - 1)

return(list(MI = MI, SMI = SMI, HXY = HXY, HX = HX, HY = HY, NMI = NMI, IC = IC))
}


0 comments on commit a142320

Please sign in to comment.