paul-buerkner · yananlong · Feb 1, 2023 · Feb 1, 2023 · Feb 1, 2023 · Feb 1, 2023
diff --git a/.gitignore b/.gitignore
@@ -9,3 +9,4 @@ tests/local/models_0.8.0.Rda
 tests/local/models_0.10.0.Rda
 tests/local/models_1.2.0.Rda
 tests/local/Rplots.pdf
+.ipynb*
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -16,7 +16,8 @@ Authors@R:
       person("Mattan S.", "Ben-Shachar", role = c("ctb")),
       person("Hayden", "Rabel", role = c("ctb")),
       person("Simon C.", "Mills", role = c("ctb")),
-      person("Stephen", "Wild", role = c("ctb")))
+      person("Stephen", "Wild", role = c("ctb")),
+      person("Yanan", "Long",  role = c("ctb")))
 Depends:
     R (>= 3.5.0),
     Rcpp (>= 0.12.0),

diff --git a/NAMESPACE b/NAMESPACE
@@ -382,6 +382,8 @@ export(dhurdle_poisson)
 export(dinv_gaussian)
 export(dirichlet)
 export(dlogistic_normal)
+export(dmixcure_lognormal)
+export(dmixcure_weibull)
 export(dmulti_normal)
 export(dmulti_student_t)
 export(do_call)
@@ -461,6 +463,8 @@ export(marginal_smooths)
 export(mcmc_plot)
 export(me)
 export(mi)
+export(mixcure_lognormal)
+export(mixcure_weibull)
 export(mixture)
 export(mm)
 export(mmc)
@@ -493,6 +497,8 @@ export(phurdle_lognormal)
 export(phurdle_negbinomial)
 export(phurdle_poisson)
 export(pinv_gaussian)
+export(pmixcure_lognormal)
+export(pmixcure_weibull)
 export(post_prob)
 export(posterior_average)
 export(posterior_epred)

diff --git a/R/distributions.R b/R/distributions.R
@@ -2052,6 +2052,107 @@ phurdle_lognormal <- function(q, mu, sigma, hu, lower.tail = TRUE,
   out
 }
 
+#' @rdname Mixcure
+#' @export
+dmixcure_lognormal <- function(x, mu, sigma, inc, log = FALSE) {
+    pars <- list(meanlog = mu, sdlog = sigma)
+    .dmixcure(x, "lnorm", inc, pars, log)
+}
+
+#' @rdname Mixcure
+#' @export
+pmixcure_lognormal <- function(q, mu, sigma, inc, lower.tail = TRUE, log.p = FALSE) {
+    pars <- list(meanlog = mu, sdlog = sigma)
+    .pmixcure(q, "lnorm", inc, pars, lower.tail, log.p)
+}
+
+#' @rdname Mixcure
+#' @export
+dmixcure_weibull <- function(x, shape, scale, inc, log = FALSE) {
+    pars <- list(shape = shape, scale = scale)
+    .dmixcure(x, "weibull", inc, pars, log)
+}
+
+#' @rdname Mixcure
+#' @export
+pmixcure_weibull <- function(q, shape, scale, inc, lower.tail = TRUE, log.p = FALSE) {
+    pars <- list(shape = shape, scale = scale)
+    .pmixcure(q, "weibull", inc, pars, lower.tail, log.p)
+}
+
+# density of a mixcure distribution
+# @param dist name of the distribution
+# @param inc bernoulli incidence parameter
+# @param pars list of parameters passed to pdf
+.dmixcure <- function(x, dist, inc, pars, log) {
+    stopifnot(is.list(pars))
+    dist <- as_one_character(dist)
+    log <- as_one_logical(log)
+    args <- expand(dots = c(nlist(x, inc), pars))
+    x <- args$x
+    inc <- args$inc
+    pars <- args[names(pars)]
+    pdf <- paste0("d", dist)
+    # incidence part (not censored): pi(z) * f(t | x)
+    out <- log(inc) + do_call(pdf, c(list(x), pars, log = TRUE))
+    if (!log) {
+        out <- exp(out)
+    }
+    out
+}
+
+# CDF of a mixcure distribution
+# @param dist name of the distribution
+# @param inc bernoulli incidence parameter
+# @param pars list of parameters passed to pdf
+# @param lb lower bound of the conditional distribution
+# @param ub upper bound of the conditional distribution
+.pmixcure <- function(q, dist, inc, pars, lower.tail, log.p, lb = 0, ub = Inf) {
+    stopifnot(is.list(pars))
+    dist <- as_one_character(dist)
+    lower.tail <- as_one_logical(lower.tail)
+    log.p <- as_one_logical(log.p)
+    args <- expand(dots = c(nlist(q, inc), pars))
+    q <- args$q
+    inc <- args$inc
+    pars <- args[names(pars)]
+    cdf <- paste0("p", dist)
+    # compute log CCDF values
+    # latency part (right-censored): [1 - pi(z)] + pi(z) * S(t | x)
+    out <- matrixStats::logSumExp(c(
+        1, -inc,
+        inc + do_call(
+            cdf,
+            c(list(q), pars, lower.tail = FALSE, log.p = TRUE)
+        )
+    ))
+    # take the limits of the distribution into account
+    out <- ifelse(q < lb, 0, out)
+    out <- ifelse(q > ub, -Inf, out)
+    if (lower.tail) {
+    out <- 1 - exp(out)
+        if (log.p) {
+          out <- log(out)
+        }
+    } else {
+        if (!log.p) {
+          out <- exp(out)
+        }
+    }
+    out
+    if (lower.tail) {
+        out <- 1 - exp(out)
+        if (log.p) {
+            out <- log(out)
+        }
+    } else {
+        if (!log.p) {
+          out <- exp(out)
+        }
+    }
+    out
+}
+
 # density of the categorical distribution with the softmax transform
 # @param x positive integers not greater than ncat
 # @param eta the linear predictor (of length or ncol ncat)

diff --git a/R/families.R b/R/families.R
@@ -35,6 +35,7 @@
 #' @param link_beta Link of auxiliary parameter \code{beta} if being predicted.
 #' @param link_zi Link of auxiliary parameter \code{zi} if being predicted.
 #' @param link_hu Link of auxiliary parameter \code{hu} if being predicted.
+#' @param link_inc Link of auxiliary parameter \code{inc} if being predicted.
 #' @param link_zoi Link of auxiliary parameter \code{zoi} if being predicted.
 #' @param link_coi Link of auxiliary parameter \code{coi} if being predicted.
 #' @param link_disc Link of auxiliary parameter \code{disc} if being predicted.
@@ -192,7 +193,8 @@ brmsfamily <- function(family, link = NULL, link_sigma = "log",
                        link_shape = "log", link_nu = "logm1",
                        link_phi = "log", link_kappa = "log",
                        link_beta = "log", link_zi = "logit",
-                       link_hu = "logit", link_zoi = "logit",
+                       link_hu = "logit", link_inc = "logit",
+                       link_zoi = "logit",
                        link_coi = "logit", link_disc = "log",
                        link_bs = "log", link_ndt = "log",
                        link_bias = "logit", link_xi = "log1p",
@@ -206,7 +208,7 @@ brmsfamily <- function(family, link = NULL, link_sigma = "log",
     link_sigma = link_sigma, link_shape = link_shape,
     link_nu = link_nu, link_phi = link_phi,
     link_kappa = link_kappa, link_beta = link_beta,
-    link_zi = link_zi, link_hu = link_hu,
+    link_zi = link_zi, link_hu = link_hu, link_inc = link_inc,
     link_zoi = link_zoi, link_coi = link_coi,
     link_disc = link_disc, link_bs = link_bs,
     link_ndt = link_ndt, link_bias = link_bias,
@@ -735,6 +737,24 @@ hurdle_cumulative <- function(link = "logit", link_hu = "logit",
                threshold = threshold)
 }
 
+#' @rdname brmsfamily
+#' @export
+mixcure_lognormal <- function(link = "identity", link_sigma = "log",
+                              link_inc = "logit") {
+  slink <- substitute(link)
+  .brmsfamily("mixcure_lognormal", link = link, slink = slink,
+              link_sigma = link_sigma, link_inc = link_inc)
+}
+
+#' @rdname brmsfamily
+#' @export
+mixcure_weibull <- function(link = "log", link_shape = "log",
+                            link_inc = "logit") {
+  slink <- substitute(link)
+  .brmsfamily("mixcure_weibull", link = link, slink = slink,
+              link_shape = link_shape, link_inc = link_inc)
+}
+
 #' @rdname brmsfamily
 #' @export
 zero_inflated_beta <- function(link = "logit", link_phi = "log",
@@ -1333,6 +1353,7 @@ links_dpars <- function(dpar) {
     beta = c("log", "identity", "softplus", "squareplus"),
     zi = c("logit", "identity"),
     hu = c("logit", "identity"),
+    inc = c("logit", "identity"),
     zoi = c("logit", "identity"),
     coi = c("logit", "identity"),
     disc = c("log", "identity", "softplus", "squareplus"),
@@ -1867,8 +1888,8 @@ family_bounds.brmsterms <- function(x, ...) {
     "gamma", "weibull", "exponential", "lognormal",
     "frechet", "inverse.gaussian",
     "hurdle_poisson", "hurdle_negbinomial", "hurdle_gamma",
-    "hurdle_lognormal", "zero_inflated_poisson",
-    "zero_inflated_negbinomial"
+    "hurdle_lognormal", "mixcure_lognormal", "mixcure_weibull",
+    "zero_inflated_poisson", "zero_inflated_negbinomial"
   )
   beta_families <- c("beta", "zero_inflated_beta", "zero_one_inflated_beta")
   ordinal_families <- c("cumulative", "cratio", "sratio", "acat")

diff --git a/R/family-lists.R b/R/family-lists.R
@@ -497,6 +497,30 @@
   )
 }
 
+.family_mixcure_lognormal <- function() {
+  list(
+    links = c("identity", "inverse"),
+    dpars = c("mu", "sigma", "inc"), type = "real",
+    ybounds = c(0, Inf), closed = c(TRUE, NA),
+    ad = c("weights", "subset", "cens", "trunc", "index"),
+    include = "fun_mixcure_lognormal.stan",
+    specials = c("logscale", "sbi_inc_logit"),
+    normalized = ""
+  )
+}
+
+.family_mixcure_weibull <- function() {
+  list(
+    links = c("log", "identity", "inverse", "softplus", "squareplus"),
+    dpars = c("mu", "shape", "inc"), type = "real",
+    ybounds = c(0, Inf), closed = c(TRUE, NA),
+    ad = c("weights", "subset", "cens", "trunc", "index"),
+    include = "fun_mixcure_weibull.stan",
+    specials = c("logscale", "sbi_inc_logit"),
+    normalized = ""
+  )
+}
+
 .family_zero_inflated_poisson <- function() {
   list(
     links = c("log", "identity", "sqrt", "softplus", "squareplus"),

diff --git a/R/log_lik.R b/R/log_lik.R
@@ -742,6 +742,30 @@ log_lik_hurdle_cumulative <- function(i, prep) {
   log_lik_weight(out, i = i, prep = prep)
 }
 
+log_lik_mixcure_lognormal <- function(i, prep) {
+  mu <- get_dpar(prep, "mu", i)
+  sigma <- get_dpar(prep, "sigma", i = i)
+  inc <- get_dpar(prep, "inc", i)
+  args <- nlist(mu = mu, sigma = sigma, inc = inc)
+  out <- log_lik_censor("mixcure_lognormal", args, i, prep)
+  out <- log_lik_truncate(out, pmixcure_lognormal, args, i, prep)
+  log_lik_weight(out, i = i, prep = prep)
+}
+
+log_lik_mixcure_weibull <- function(i, prep) {
+  shape <- get_dpar(prep, "shape", i = i)
+  scale <- get_dpar(prep, "mu", i = i) / gamma(1 + 1 / shape)
+  inc <- get_dpar(prep, "inc", i)
+  args <- list(shape = shape, scale = scale, inc = inc)
+  out <- log_lik_censor(
+    dist = "mixcure_weibull", args = args, i = i, prep = prep
+  )
+  out <- log_lik_truncate(
+    out, cdf = pmixcure_weibull, args = args, i = i, prep = prep
+  )
+  log_lik_weight(out, i = i, prep = prep)
+}
+
 log_lik_zero_inflated_poisson <- function(i, prep) {
   zi <- get_dpar(prep, "zi", i)
   lambda <- get_dpar(prep, "mu", i)

diff --git a/R/posterior_epred.R b/R/posterior_epred.R
@@ -485,6 +485,16 @@ posterior_epred_hurdle_lognormal <- function(prep) {
   with(prep$dpars, exp(mu + sigma^2 / 2) * (1 - hu))
 }
 
+posterior_epred_mixcure_lognormal <- function(prep) {
+  stop2("Cannot compute expected values of the posterior predictive ",
+        "distribution for family 'micure_lognormal'.")
+}
+
+posterior_epred_mixcure_weibull <- function(prep) {
+  stop2("Cannot compute expected values of the posterior predictive ",
+        "distribution for family 'micure_weibull'.")
+}
+
 posterior_epred_hurdle_cumulative <- function(prep) {
   adjust <- ifelse(prep$family$link == "identity", 0, 1)
   ncat_max <- max(prep$data$nthres) + adjust

diff --git a/R/priors.R b/R/priors.R
@@ -1062,6 +1062,7 @@ def_dprior <- function(x, dpar, data = NULL) {
       beta = "gamma(1, 0.1)",
       zi = "beta(1, 1)",
       hu = "beta(1, 1)",
+      inc = "beta(1, 1)",
       zoi = "beta(1, 1)",
       coi = "beta(1, 1)",
       bs = "gamma(1, 1)",
@@ -1085,6 +1086,7 @@ def_dprior <- function(x, dpar, data = NULL) {
       beta = "normal(1.7, 1.3)",
       zi = "logistic(0, 1)",
       hu = "logistic(0, 1)",
+      inc = "logistic(0, 1)",
       zoi = "logistic(0, 1)",
       coi = "logistic(0, 1)",
       bs = "normal(-0.6, 1.3)",
@@ -1679,6 +1681,7 @@ dpar_bounds <- function(dpar, suffix = "", family = NULL) {
     beta = list(lb = "0", ub = ""),
     zi = list(lb = "0", ub = "1"),
     hu = list(lb = "0", ub = "1"),
+    inc = list(lb = "0", ub = "1"),
     zoi = list(lb = "0", ub = "1"),
     coi = list(lb = "0", ub = "1"),
     bs = list(lb = "0", ub = ""),

diff --git a/R/stan-likelihood.R b/R/stan-likelihood.R
@@ -283,9 +283,9 @@ stan_log_lik_simple_lpdf <- function(lpdf, link, bterms, sep = "_") {
 }
 
 # prepare _logit suffix for distributional parameters
-# used in zero-inflated and hurdle models
+# used in zero-inflated, hurdle and mixcure models
 stan_log_lik_dpar_usc_logit <- function(dpar, bterms) {
-  stopifnot(dpar %in% c("zi", "hu"))
+  stopifnot(dpar %in% c("zi", "hu", "inc"))
   stopifnot(is.brmsterms(bterms))
   cens_or_trunc <- stan_log_lik_adj(bterms, c("cens", "trunc"))
   usc_logit <- isTRUE(bterms$dpars[[dpar]]$family$link == "logit")
@@ -889,6 +889,25 @@ stan_log_lik_hurdle_cumulative <- function(bterms, resp = "", mix = "",
   sdist(lpdf, p$mu, p$hu, p$disc, p$thres, p$Jthres)
 }
 
+stan_log_lik_mixcure_lognormal <- function(bterms, resp = "", mix = "", ...) {
+  p <- stan_log_lik_dpars(bterms, TRUE, resp, mix)
+  usc_logit <- stan_log_lik_dpar_usc_logit("inc", bterms)
+  lpdf <- paste0("mixcure_lognormal", usc_logit)
+  sdist(lpdf, p$mu, p$sigma, p$inc)
+}
+
+stan_log_lik_mixcure_weibull <- function(bterms, resp = "", mix = "", ...) {
+  reqn <- stan_log_lik_adj(bterms) || nzchar(mix)
+  p <- stan_log_lik_dpars(bterms, TRUE, resp, mix)
+  usc_logit <- stan_log_lik_dpar_usc_logit("inc", bterms)
+  lpdf <- paste0("mixcure_weibull", usc_logit)
+  # Stan uses shape-scale parameterization for weibull
+  need_dot_div <- !reqn && paste0("shape", mix) %in% names(bterms$dpars)
+  div_op <- str_if(need_dot_div, " ./ ", " / ")
+  p$scale <- paste0(p$mu, div_op, "tgamma(1 + 1", div_op, p$shape, ")")
+  sdist(lpdf, p$scale, p$shape, p$inc)
+}
+
 stan_log_lik_zero_inflated_poisson <- function(bterms, resp = "", mix = "",
                                                ...) {
   p <- stan_log_lik_dpars(bterms, TRUE, resp, mix)

diff --git a/inst/chunks/fun_mixcure_lognormal.stan b/inst/chunks/fun_mixcure_lognormal.stan
@@ -0,0 +1,30 @@
+/* mixcure lognormal (AFT) log-PDF of a single response
+ * identity parameterization of the incidence part
+ */
+real mixcure_lognormal_lpdf(real y, real mu, real sigma, real inc) {
+    return bernoulli_lpmf(1 | inc) + lognormal_lpdf(y | mu, sigma);
+}
+real mixcure_lognormal_lccdf(real y, real mu, real sigma, real inc) {
+    return log_sum_exp(
+        bernoulli_lpmf(0 | inc),
+        bernoulli_lpmf(1 | inc) + lognormal_lccdf(y | mu, sigma)
+    );
+}
+real mixcure_lognormal_lcdf(real y, real mu, real sigma, real inc) {
+    return log1m_exp(mixcure_lognormal_lccdf(y | mu, sigma, inc));
+}
+/* mixcure lognormal (AFT) log-PDF of a single response
+ * logit parameterization of the incidence part
+ */
+real mixcure_lognormal_logit_lpdf(real y, real mu, real sigma, real inc) {
+    return bernoulli_logit_lpmf(1 | inc) + lognormal_lpdf(y | mu, sigma);
+}
+real mixcure_lognormal_logit_lccdf(real y, real mu, real sigma, real inc) {
+    return log_sum_exp(
+        bernoulli_logit_lpmf(0 | inc),
+        bernoulli_logit_lpmf(1 | inc) + lognormal_lccdf(y | mu, sigma)
+    );
+}
+real mixcure_lognormal_logit_lcdf(real y, real mu, real sigma, real inc) {
+    return log1m_exp(mixcure_lognormal_logit_lccdf(y | mu, sigma, inc));
+}