metaqap.R

# QAP over several groups
#
# The main function. Given a number of groups, for each group it computes the correlation
# coefficient for dyads between sharing parameter value and making the same choice. It uses the
# Quadratic Assignment Procedure to control for network structures. The results can be combined
# into a single measure using the function metaqap.stats.
#
# param - column in data defining the parameter of interest
# data - the data frame containing the necessary columns
# reps - number of QAP rearrangements to produce for each group
# gf - function to produce adjacency matrices from the data frame, based on param and choice
# g - adjacency matrices, in case they have already been produced
# qapf - function for the QAP tests
# class - column in data defining the groups
# choice - column in data defining the choices
# subclass - subcategory to class, which is used for the control case
# statistic - the test statistic, defaults to graph correlation
#
# Returns a metaqap object containing, for each group:
# g - adjacency matrices based on param and choice
# info - the input data
# q - results from the QAP test
# param - the parameter of interest
metaqap <- function (param, data, reps = 1000, gf = pairwise.m, g = NULL, qapf = qaptest, class = "class", choice = "choice", subclass = "year", statistic = gcor, ...) {
  require(sna)
  out <- list()
  if (is.null(g))
    g <- gf(data[!is.na(data[,param]),],class,param,choice,subclass)
  pos <- sapply(g$gs,function(x) sum(x[1,,],na.rm=T)>0)
  g$gs <- g$gs[pos]
  g$info <- g$info[pos]
  q <- lapply(g$gs,function(x) qapf(x,statistic,g1=1,g2=2,mode="graph",reps=reps,...))
  out$g <- g$gs; out$info <- g$info; out$q <- q; out$param <- param
  #out$size <- sapply(g$info,function(x)mean(x$size)); out$size2 <- sapply(g$info,function(x)mean(x$size2))
  class(out) <- "metaqap"
  out
}

# Metaanalysis over QAP tests
#
# Combines effect sizes and distributions from QAP tests into one measure and mean distribution
# using metaanalytic methods.
#
# Input parameters are generated by metaqap:
# q - lists of QAP test results
# info - the input data (optional)
#
# Returns a metaqap.stats object containing:
# sel - the groups that were included in the test (based on sufficient amount of data)
# Y - effect sizes for each group
# dist - QAP distribution for each group
# V - QAP variance from each group
# k - number of groups
# T2 - tau^2 for random-effects metaanalysis
# m - combined measure
# p - combined p-value
# mean.dist - a mean distribution from the QAP tests
# m.fixed - combined measure using fixed-effect metaanalysis
# p.fixed - combined p-value using fixed-effect metaanalysis
# mean.dist.fixed - a mean distribution from the QAP tests using fixed-effect metaanalysis
# N - number of subjects included
metaqap.stats <- function (q,info=NULL) {
  Y <- sapply(q,"[[","testval")
  dist <- sapply(q,"[[","dist")
  V <- apply(dist,2,var,na.rm=T)
  W <- 1 / V
  sel <- !is.na(W) & !is.na(Y) & W < 10^5 & colSums(is.na(dist)) < 100
  dist <- dist[,sel]; Y <- Y[sel]; V <- V[sel]; W <- W[sel]
  k <- length(Y)
  # Estimate tau^2 to perform a random-effects meta-analysis
  T2 <- ( sum(W * Y^2) - (sum(W * Y))^2 / sum(W) - k + 1 ) / ( sum(W) - sum(W^2) / sum(W) )
  T2 <- max(0,T2)
  m.fixed <- weighted.mean(Y,1/V)
  m <- weighted.mean(Y,1/(V+T2))
  mean.dist.fixed <- apply(dist,1,function(x) weighted.mean(x,1/V,na.rm=T))
  mean.dist <- apply(dist,1,function(x) weighted.mean(x,1/(V+T2),na.rm=T))
  p.fixed <- sum(m.fixed<mean.dist.fixed) / length(mean.dist.fixed)
  p <- sum(m<mean.dist) / length(mean.dist)
  out <- list()
  out$sel <- sel; out$Y <- Y; out$dist <- dist; out$V <- V; out$k <- k; out$T2 <- T2
  out$m <- m; out$p <- p; out$mean.dist <- mean.dist
  out$m.fixed <- m.fixed; out$p.fixed <- p.fixed; out$mean.dist.fixed <- mean.dist.fixed
  if (!is.null(info))
    out$N <- sum(sapply(info[sel],nrow))
  class(out) <- c("metaqap.stats","metaqap")
  out
}

# Summary statistics from metaanalytic QAP
#
# q - a metaqap.stats object
# plotting - whether to output a plot of weighted mean distribution and effect size
summary.metaqap.stats <- function (q,plotting=T) {
  cat("N =",q$N,", k =",q$k,"\n")
  cat("m =",q$m,", p =",q$p,"\n")
  print(density(q$mean.dist,na.rm=T))
  if (plotting)
    with(q,plot(density(mean.dist,na.rm=T),main="Weighted simulated correlations",xlim=c(min(-abs(m),min(mean.dist)),max(abs(m),max(mean.dist)))))
  abline(v=q$m,col=2,lty=2)
  cat("\nFixed effect model:\n")
  cat("m =",q$m.fixed,", p =",q$p.fixed,"\n")
  cat("T2 =",q$T2,"\n")
  print(density(q$mean.dist.fixed,na.rm=T))
}

# Metaanalysis over QAP tests using Fisher transformed values
#
# A wrapper function to metaqap.stats first Fisher transforming measures before carrying out a
# metaanalysis and then transforming them back.
#
# Input and output variables are the same as for metaqap.stats. Output is a metaqap.stats.fisher
# object.
metaqap.stats.fisher <- function (q,info=NULL) {
  Y <- sapply(q,"[[","testval")
  dist <- sapply(q,"[[","dist")
  Y.z <- 0.5 * log((1+Y)/(1-Y))
  Y.z[Y.z==1] <- 0.9999; Y.z[Y.z==-1] <- -0.9999 # arbitrarily chosen to avoid division by 0
  dist.z <- 0.5 * log((1+dist)/(1-dist))
  dist.z[dist.z==1] <- 0.9999; dist.z[dist.z==-1] <- -0.9999
  q.z <- q
  for (i in 1:length(q.z)) { q.z[[i]]$testval <- Y.z[i]; q.z[[i]]$dist <- dist.z[,i] }
  out <- metaqap.stats(q.z,info)
  out$Y <- Y; out$dist <- dist
  out$m <- (exp(2*out$m)-1) / (exp(2*out$m)+1)
  out$mean.dist <- (exp(2*out$mean.dist)-1) / (exp(2*out$mean.dist)+1)
  out$m.fixed <- (exp(2*out$m.fixed)-1) / (exp(2*out$m.fixed)+1)
  out$mean.dist.fixed <- (exp(2*out$mean.dist.fixed)-1) / (exp(2*out$mean.dist.fixed)+1)
  class(out) <- c("metaqap.stats.fisher","metaqap.stats")
  out
}

# Difference between two metaanalytic QAP measures
#
# Gives the difference between two effect sizes and between their two associated metaanalytic
# QAP distributions.
#
# q1, q2 - two metaqap.stats objects
#
# Returns a metaqap.diff object containing:
# m - difference between measures
# p - proportion of values in dist greater than m
# dist - difference between metaQAP distribution values (paired in stored order)
# m.fixed - difference between fixed-effect measures
# p.fixed - proportion of values in dist.fixed greater than m.fixed
# dist.fixed - difference between metaQAP distribution values using fixed-effect measures
metaqap.diff <- function (q1,q2) {
  m <- q1$m - q2$m
  dist <- q1$mean.dist - q2$mean.dist
  p <- sum(m<dist) / length(dist)
  m.fixed <- q1$m.fixed - q2$m.fixed
  dist.fixed <- q1$mean.dist.fixed - q2$mean.dist.fixed
  p.fixed <- sum(m.fixed<dist.fixed) / length(dist.fixed)
  out <- list(m=m,p=p,dist=dist,m.fixed=m.fixed,p.fixed=p.fixed,dist.fixed=dist.fixed)
  class(out) <- c("metaqap.diff","metaqap.stats")
  out
}

# Summary statistics from differences between two metaanalytic QAP
#
# q - a metaqap.diff object
# plotting - whether to output a plot of weighted mean distribution and effect size
summary.metaqap.diff <- function (q,plotting=T) {
  cat("m =",q$m,", p =",q$p,"\n")
  print(density(q$dist))
  if (plotting)
    with(q,plot(density(dist),main="Difference in weighted simulated correlations",xlim=c(min(-abs(m),min(dist)),max(abs(m),max(dist)))))
  abline(v=q$m,col=2,lty=2)
  cat("\nFixed effect model:\n")
  cat("m =",q$m.fixed,", p =",q$p.fixed,"\n")
  print(density(q$dist.fixed))
}

# Construct adjacency matrices from data
#
# Constructs one adjacency matrix based on having the same value(s) for predictors, and one on
# having the same value(s) for outcomes, for each group in the data.
#
# data - a data frame sorted on classifiers
# classifiers - a classifier defining group membership
# predictors - names of columns in data containing values of predictors
# outcomes - names of columns in data containing values of outcomes
#
# Returns a list containing:
# gs - list of two adjacency matrices per group
# info - list of data frames divided into groups
pairwise.m <- function(data,classifiers,predictors,outcomes,...) { # data sorted on classifiers
  n <- nrow(data)
  changing <- c(1,which(data[2:n,classifiers] != data[1:(n-1),classifiers])+1,n+1)
  classes <- length(changing) - 1
  gs <- list()
  infos <- list()
  out <- list()
  for (k in 1:classes) {
    m <- changing[k+1] - changing[k]
    g <- array(NA,c(2,m,m))
    info <- data[changing[k]:(changing[k+1]-1),]
    if (m > 1) {
      start <- changing[k] - 1
      for (i in 1:(m-1)) {
        for (j in (i+1):m) {
          g[1,i,j] <- 1 * (data[start+j,predictors] == data[start+i,predictors])
          g[1,j,i] <- 1 * (data[start+j,predictors] == data[start+i,predictors])
          g[2,i,j] <- 1 * (data[start+j,outcomes] == data[start+i,outcomes])
          g[2,j,i] <- 1 * (data[start+j,outcomes] == data[start+i,outcomes])
        }
      }
    }
    gs[[k]] <- g
    infos[[k]] <- info
  }
  out$gs <- gs
  out$info <- infos
  out
}

# Construct adjacency matrices from data between subgroups
#
# Constructs one adjacency matrices based on having the same value(s) for predictors, and one on
# having the same value(s) for outcomes, for each group in the data. Elements are matched to other
# elements in the same group, excluding elements in the same subgroup.
#
# data - a data frame sorted on classifiers
# classifiers - a classifier defining group membership
# predictors - names of columns in data containing values of predictors
# outcomes - names of columns in data containing values of outcomes
#
# Returns a list containing:
# gs - list of two adjacency matrices per group
# info - list of data frames divided into groups
pairwise.m.b <- function(data,classifiers,predictors,outcomes,subclass) { # data sorted on classifiers
  n <- nrow(data)
  changing <- c(1,which(data[2:n,classifiers] != data[1:(n-1),classifiers])+1,n+1)
  classes <- length(changing) - 1
  gs <- list()
  infos <- list()
  out <- list()
  for (k in 1:classes) {
    m <- changing[k+1] - changing[k]
    g <- array(NA,c(2,m,m))
    info <- data[changing[k]:(changing[k+1]-1),]
    if (m > 1) {
      start <- changing[k] - 1
      for (i in 1:(m-1)) {
        for (j in (i+1):m) {
          if (data[start+i,subclass] != data[start+j,subclass]) {
            g[1,i,j] <- 1 * (data[start+j,predictors] == data[start+i,predictors])
            g[1,j,i] <- 1 * (data[start+j,predictors] == data[start+i,predictors])
            g[2,i,j] <- 1 * (data[start+j,outcomes] == data[start+i,outcomes])
            g[2,j,i] <- 1 * (data[start+j,outcomes] == data[start+i,outcomes])
          }
        }
      }
    }
    gs[[k]] <- g
    infos[[k]] <- info
  }
  out$gs <- gs
  out$info <- infos
  out
}