Skip to content

Commit

Permalink
Merge pull request #2 from AndrewLawrence/dev
Browse files Browse the repository at this point in the history
Version 1.07
  • Loading branch information
AndrewLawrence committed Oct 7, 2020
2 parents 13f6a27 + aeadfa3 commit 3be9cc7
Show file tree
Hide file tree
Showing 21 changed files with 931 additions and 123 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
@@ -1,7 +1,7 @@
Package: dCVnet
Type: Package
Title: doubly cross-validated elastic-net regularised generalised linear models
Version: 1.0.6
Version: 1.0.7
Authors@R: c(person("Andrew J.", "Lawrence",
email = "lawrenceajl@gmail.com",
role = c("aut", "cre")))
Expand All @@ -27,12 +27,13 @@ Imports:
methods,
stats
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.0
RoxygenNote: 7.1.1
Suggests:
knitr,
rmarkdown,
openxlsx,
car,
dplyr,
psych,
boot,
tidyverse,
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

S3method(calibration,default)
S3method(coef,dCVnet)
S3method(coef,multialpha.repeated.cv.glmnet)
S3method(performance,dCVnet)
Expand All @@ -12,17 +13,20 @@ S3method(plot,multialpha.repeated.cv.glmnet)
S3method(plot,rocdata)
S3method(predict,dCVnet)
S3method(predict,multialpha.repeated.cv.glmnet)
S3method(print,calcoefs)
S3method(print,dCVnet)
S3method(print,multialpha.repeated.cv.glmnet)
S3method(print,performance)
S3method(print,reflogreg)
S3method(reflogreg,dCVnet)
S3method(reflogreg,default)
S3method(summary,calcoefs)
S3method(summary,dCVnet)
S3method(summary,multialpha.repeated.cv.glmnet)
S3method(summary,performance)
export(amalgamate_cv.glmnet)
export(average_rocdata)
export(calibration)
export(casesummary.performance)
export(coef_reflogreg)
export(coefficients_summary)
Expand Down Expand Up @@ -71,6 +75,7 @@ importFrom(stats,model.matrix)
importFrom(stats,prcomp)
importFrom(stats,predict)
importFrom(stats,predict.glm)
importFrom(stats,qlogis)
importFrom(stats,relevel)
importFrom(stats,sd)
importFrom(stats,setNames)
Expand Down
130 changes: 130 additions & 0 deletions R/dCVnet_calibration.R
@@ -0,0 +1,130 @@

# calibration S3 generic ---------------------------------------------

# implements 'weak' (i.e. linear) calibration.


#' calibration
#'
#' calculates 'weak' (i.e. intercept + slope) calibration equivalent to the
#' values returned by the val.prob function in the rms package
#' (which accompanies Frank Harrell's Regression Modelling Strategies book).
#'
#' Calibration is not returned via \code{\link{performance}} because of the
#' computational overhead of model fitting.
#'
#' @name calibration
#'
#' @param x an object containing predicted probabilities and observed outcomes,
#' from which calibration can be extracted.
#' @param ... arguments to pass on
#'
#' @return calibration intercept and calibration slope
#'
#' @importFrom stats qlogis
#'
#' @export
calibration <- function(x, ...) {
UseMethod("calibration")
}

# Internal function to run the actual calculation:
calibration_ <- function(predictedprobability,
observedoutcome) {
logit <- qlogis(predictedprobability)
finite <- is.finite(logit)
if ( any(!finite) ) {
# remove from fit:
logit <- logit[finite]
observedoutcome <- observedoutcome[finite]
# warn:
n <- length(finite)
i <- sum(!finite)
warning(paste0(i, "/", n,
"values removed due to predictions equal to 0 / 1"))
}
# xx is data.frame with reference & prediction (no group)
m <- glm(formula = observedoutcome ~ logit,
family = "binomial")
cc <- coef(m)
return(c(Intercept = cc[[1]], Slope = cc[[2]]))
}


#' calibration.default
#'
#' @rdname calibration
#' @description Default function behaviour assumes input is a
#' list/data.frame with required vectors as elements.
#' @param ppid indicator for predicted probability element in x
#' (column "name" or index)
#' @param ooid indicator for observed outcome element in x
#' (column "name" or index)
#' @param gid indicator for grouping variable in x
#' (column "name" or index). Set to NA to force no grouping.
#' @export
calibration.default <- function(x,
ppid = "prediction",
ooid = "reference",
gid = "label",
...) {
if ( length(unique(c(ppid, ooid, gid))) != 3 ) {
stop("Predictions and observations must be distinct elements in x")
}
pp <- x[[ppid]]
oo <- x[[ooid]]

# No implicit recyling
if ( length(pp) != length(oo) ) {
stop("numbers of predicted probabilities and
observed outcomes must match")
}

# grouping variable?
if ( is.na(gid) ) {
g <- rep(deparse(substitute(x)), times = length(pp))
} else {
g <- x[[gid]]
}

# iterable for groups:
glist <- setNames(unique(g), unique(g))

# calibration for each group:
result <- vapply(
glist,
FUN = function(i) {
calibration_(predictedprobability = pp[g %in% i],
observedoutcome = oo[g %in% i])
},
FUN.VALUE = c(Intercept = 0.0,
Slope = 1.0),
USE.NAMES = TRUE)

class(result) <- c("calcoefs")
result
}

# Simple print function for calibration result
#' @export
print.calcoefs <- function(x, ...) {
cat("Calibration coefficents\n")
print(unclass(x))
invisible(x)
}

# Simple summary function for calibration result
#' @export
summary.calcoefs <- function(object,
FUNS = c(mean = mean,
sd = sd,
min = min,
max = max),
...) {
if ( NCOL(object) < 2 ) return(object)
return(sapply(FUNS, function(f) {

apply(object, 1, f)

}))
}
2 changes: 2 additions & 0 deletions R/dCVnet_main.R
Expand Up @@ -14,6 +14,8 @@
#' and \bold{alpha} (the balance of L1 and L2 regularisation types).
#'
#' @inheritParams parse_dCVnet_input
#' @inheritSection parse_dCVnet_input Factor Outcomes
#' @inheritSection parse_dCVnet_input Notes
#' @inheritParams multialpha.repeated.cv.glmnet
#'
#' @param k_inner an integer, the k in the inner k-fold CV.
Expand Down
17 changes: 14 additions & 3 deletions R/dCVnet_performance.R
Expand Up @@ -334,7 +334,9 @@ summary.performance <- function(object,
# However glm / model.matrix uses treatment-coding: the
# first level of y is the reference class (e.g. control subjects).
# This behaviour is helpful when interpreting model coefficients
# as they represent the deviation from the reference.
# as they represent the deviation from the reference such that
# a positive coefficient indicates an increased probability of
# the 'positive' class.
# glmnet: to add to the confusion glmnet will reorder binary factors
# such that levels are alphabetical.
# dCVnet coerces input data into an alphabetical factor with the
Expand All @@ -348,8 +350,18 @@ summary.performance <- function(object,
# Next add the AUC:
B <- ModelMetrics::auc(actual = performance$reference,
predicted = performance$prediction)
# and the Brier Score:
if ( is.numeric(performance$reference) ) {
Bs <- ModelMetrics::brier(actual = performance$reference,
predicted = performance$prediction)
} else {
Bs <- ModelMetrics::brier(actual = as.numeric(performance$reference) - 1,
predicted = performance$prediction)
}
# following hack removed: B <- pmax(B, 1 - B)
B <- data.frame(Measure = "AUROC", Value = B, stringsAsFactors = FALSE)
B <- data.frame(Measure = c("AUROC", "Brier"),
Value = c(B, Bs),
stringsAsFactors = FALSE)

B$label <- A$label <- unique(performance$label)
return(rbind(A, B))
Expand Down Expand Up @@ -386,7 +398,6 @@ summary.performance <- function(object,
return(R)
}


#' report_performance_summary
#'
#' extracts performance from a dCVnet object
Expand Down
80 changes: 68 additions & 12 deletions R/dCVnet_plotting.R
Expand Up @@ -132,15 +132,35 @@ plot.dCVnet <- function(x, type = "tuning", ...) {
#' @param legend logical. Display a legend?
#' @param alphalabel should certain alpha values (probability thresholds)
#' on the curve be highlighted with symbols indicating threshold?
#' @param guide_labels a named list of 3 labels used in the legend:
#' \itemize{
#' \item{group = label for the Grouping factor}
#' \item{threshold = label for the Threshold factor}
#' \item{refline = label for the Reference line}
#' }
#' @param ... additional arguments (unused)
#' @return a ROC plot, as above.
#'
#' @export
plot.rocdata <- function(x,
legend = TRUE,
alphalabel = c(0.25, 0.5, 0.75),
guide_labels = c(group = "Model",
threshold = expression(P[Threshold]),
refline = "Chance\nPerformance"),
...) {
hasalphas <- any(!is.na(alphalabel))
guide_labels_expect <- c("group", "threshold", "refline")
if ( ! all( guide_labels_expect %in% names(guide_labels)) ) {
warning(paste("guide_labels must contain named elements:",
"\t\t'group', 'threshold' and 'refline'",
" Using defaults instead", sep = "\n"))
guide_labels <- c(group = "Model",
threshold = expression(P[Threshold]),
refline = "Chance\nPerformance")
}
# were thresholds supplied?
hasalphas <- !any(is.na(alphalabel)) && !identical(FALSE, alphalabel)

.closest <- function(vals, x) {
locs <- vapply(vals, function(v) which.min(abs(x - v)), FUN.VALUE = 1L)
r <- rep(NA_character_, NROW(x))
Expand Down Expand Up @@ -169,32 +189,68 @@ plot.rocdata <- function(x,
x$PThreshold <- factor(x$PThreshold, levels = names(alphalabel))
}

p <- ggplot2::ggplot(x, ggplot2::aes_string(y = "Sens",
x = "InvSpec",
group = "run",
colour = "run")) +
ggplot2::geom_abline(slope = 1, intercept = 0,
colour = "black") +
ggplot2::geom_line(show.legend = legend) +
ggplot2::xlab("False positive rate") +
ggplot2::ylab("True positive rate") +
refline <- data.frame(Sens = 0,
InvSpec = 0,
SensEnd = 1,
InvSpecEnd = 1,
lty = guide_labels$refline)

p <- ggplot2::ggplot(x,
ggplot2::aes_string(y = "Sens",
x = "InvSpec")) +
ggplot2::geom_segment(
ggplot2::aes_(
x = ~InvSpec,
y = ~Sens,
xend = ~InvSpecEnd,
yend = ~SensEnd,
lty = ~lty),
show.legend = legend,
data = refline,
inherit.aes = FALSE) +
ggplot2::geom_line(ggplot2::aes_string(colour = "run"),
show.legend = legend) +
ggplot2::xlab("False Positive Rate") +
ggplot2::ylab("True Positive Rate") +
ggplot2::scale_x_continuous(breaks = seq(0, 1, 0.1), minor_breaks = NULL) +
ggplot2::scale_y_continuous(breaks = seq(0, 1, 0.1), minor_breaks = NULL) +
ggplot2::scale_size_manual(values = c(0.7)) +
ggplot2::coord_equal() +
ggplot2::theme_light()

if ( hasalphas ) {
if ( legend ) {
p <- p + ggplot2::guides(
colour = ggplot2::guide_legend(title = guide_labels$group, order = 1),
lty = ggplot2::guide_legend(title = NULL,
order = 3,
override.aes = list(
linetype = 1,
shape = 32,
alpha = 1)))
}

if ( hasalphas ) {
p <- p +
ggplot2::geom_point(data = x[!is.na(x$PThreshold), ],
mapping = ggplot2::aes_string(shape = "PThreshold"),
mapping = ggplot2::aes_string(shape = "PThreshold",
colour = "run"),
show.legend = legend)

if ( legend ) {
p <- p + ggplot2::guides(
shape = ggplot2::guide_legend(title = guide_labels$threshold,
order = 2,
override.aes = list(linetype = NA))
)
}
}
print(p)
return(invisible(list(plot = p,
data = x)))
}



# ~ non-S3 plotters ------------------------------------------------------


Expand Down

0 comments on commit 3be9cc7

Please sign in to comment.