Skip to content

Commit

Permalink
0.3-0
Browse files Browse the repository at this point in the history
* Github actions.
* Roxygen
* package logo
  • Loading branch information
fbertran committed Mar 21, 2021
1 parent a4be893 commit 1f98d70
Show file tree
Hide file tree
Showing 120 changed files with 6,316 additions and 3,563 deletions.
9 changes: 6 additions & 3 deletions .Rbuildignore
@@ -1,17 +1,20 @@
^genlogo.R$
#All packages
^.*\.Rproj$
^\.Rproj\.user$
^\.gitignore$
^NEWS$
#For the pkgdown website
^pkgdown*$
^docs*$
#For the cached chunks of README.Rmd
^cache*$
^README_cache*$
#If README too big, not on CRAN but only on git
^man/figures*$
^README\.Rmd$
^README\.md$
#If README too big, not on CRAN but only on git
#^man/figures*$
#^README\.md$
#Specific
^fullrespdf*$
^inst/animation*$
^\.github$
1 change: 1 addition & 0 deletions .github/.gitignore
@@ -0,0 +1 @@
*.html
32 changes: 32 additions & 0 deletions .github/workflows/R-CMD-check.yaml
@@ -0,0 +1,32 @@
# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
on:
push:
branches:
- main
- master
pull_request:
branches:
- main
- master

name: R-CMD-check

jobs:
R-CMD-check:
runs-on: macOS-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v2
- uses: r-lib/actions/setup-r@v1
- name: Install dependencies
run: |
install.packages(c("remotes", "rcmdcheck"))
remotes::install_deps(dependencies = TRUE)
shell: Rscript {0}
- name: Check
run: |
options(crayon.enabled = TRUE)
rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error")
shell: Rscript {0}
10 changes: 5 additions & 5 deletions DESCRIPTION
Expand Up @@ -3,8 +3,8 @@ Type: Package
Title: Degrees of Freedom and Statistical Inference for Partial Least
Squares Regression
Depends: MASS
Version: 0.2-9
Date: 2019-01-31
Version: 0.3-0
Date: 2021-03-13
Author: Nicole Kraemer, Mikio L. Braun
Maintainer: Frederic Bertrand <frederic.bertrand@math.unistra.fr>
Description: The plsdof package provides Degrees of Freedom estimates
Expand All @@ -28,6 +28,6 @@ X-CRAN-Original-Maintainer: Nicole Kraemer
<kraemer_r_packages@yahoo.de>
X-CRAN-Comment: Orphaned and corrected on 2018-07-21 as check problems
were not corrected despite reminders.
RoxygenNote: 6.1.1
URL: https://github.com/fbertran/plsdof
BugReports: https://github.com/fbertran/plsdof/issues
RoxygenNote: 7.1.1
URL: https://github.com/fbertran/plsdof/, https://fbertran.github.io/plsdof/
BugReports: https://github.com/fbertran/plsdof/issues/
39 changes: 29 additions & 10 deletions NAMESPACE
@@ -1,11 +1,30 @@
import(MASS)

importFrom("graphics", "plot")
importFrom("stats", "coef", "cor", "sd")

## and exported functions
export(benchmark.pls,coef.plsdof,dA,dvvtz,information.criteria,krylov,normalize,pcr,pls.dof,pls.model,tr,vvtz,benchmark.regression,compute.lower.bound,dnormalize,first.local.minimum, kernel.pls.fit, linear.pls.fit, pcr.cv,pls.cv, pls.ic, ridge.cv,vcov.plsdof)

S3method(coef, plsdof)
S3method(vcov, plsdof)
# Generated by roxygen2: do not edit by hand

S3method(coef,plsdof)
S3method(vcov,plsdof)
export(benchmark.pls)
export(benchmark.regression)
export(compute.lower.bound)
export(dA)
export(dnormalize)
export(dvvtz)
export(first.local.minimum)
export(information.criteria)
export(kernel.pls.fit)
export(krylov)
export(linear.pls.fit)
export(normalize)
export(pcr)
export(pcr.cv)
export(pls.cv)
export(pls.dof)
export(pls.ic)
export(pls.model)
export(ridge.cv)
export(tr)
export(vvtz)
import(MASS)
importFrom(graphics,plot)
importFrom(stats,coef)
importFrom(stats,cor)
importFrom(stats,sd)
5 changes: 5 additions & 0 deletions NEWS.md
@@ -1,3 +1,8 @@
# plsdof 0.3-0

* Github actions.
* Roxygen

# plsdof 0.2-9

* Added a website for the package.
Expand Down
73 changes: 73 additions & 0 deletions R/benchmark.pls.R
@@ -1,3 +1,76 @@
#' Comparison of model selection criteria for Partial Least Squares Regression.
#'
#' This function computes the test error over several runs for different model
#' selection strategies.
#'
#' The function estimates the optimal number of PLS components based on four
#' different criteria: (1) cross-validation, (2) information criteria with the
#' naive Degrees of Freedom DoF(m)=m+1, (3) information criteri with the
#' Degrees of Freedom computed via a Lanczos represenation of PLS and (4)
#' information criteri with the Degrees of Freedom computed via a Krylov
#' represenation of PLS. Note that the latter two options only differ with
#' respect to the estimation of the model error.
#'
#' In addition, the function computes the test error of the "zero model", i.e.
#' \code{mean(y)} on the training data is used for prediction.
#'
#' If \code{true.coefficients} are available, the function also computes the
#' model error for the different methods, i.e. the sum of squared differences
#' between the true and the estimated regression coefficients.
#'
#' @param X matrix of predictor observations.
#' @param y vector of response observations. The length of \code{y} is the same
#' as the number of rows of \code{X}.
#' @param m maximal number of Partial Least Squares components. Default is
#' \code{m=ncol(X)}.
#' @param R number of runs. Default is 20.
#' @param ratio ratio no of training examples/(no of training examples + no of
#' test examples). Default is 0.8
#' @param verbose If \code{TRUE}, the functions plots the progress of the
#' function. Default is \code{TRUE}.
#' @param k number of cross-validation splits. Default is 10.
#' @param ratio.samples Ratio of (no of training examples + no of test
#' examples)/\code{nrow(X)}. Default is 1.
#' @param use.kernel Use kernel representation? Default is
#' \code{use.kernel=FALSE}.
#' @param criterion Choice of the model selection criterion. One of the three
#' options aic, bic, gmdl. Default is "bic".
#' @param true.coefficients The vector of true regression coefficients (without
#' intercept), if available. Default is \code{NULL}.
#' @return \item{MSE}{data frame of size R x 5. It contains the test error for
#' the five different methods for each of the R runs.} \item{M}{data frame of
#' size R x 5. It contains the optimal number of components for the five
#' different methods for each of the R runs.} \item{DoF}{data frame of size R x
#' 5. It contains the Degrees of Freedom (corresponding to \code{M}) for the
#' five different methods for each of the R runs.} \item{TIME}{data frame of
#' size R x 4. It contains the runtime for all methods (apart from the zero
#' model) for each of the R runs.} \item{M.CRASH}{data frame of size R x 2. It
#' contains the number of components for which the Krylov representation and
#' the Lanczos representation return negative Degrees of Freedom, hereby
#' indicating numerical problems.} \item{ME}{if \code{true.coefficients} are
#' available, this is a data frame of size R x 5. It contains the model error
#' for the five different methods for each of the R runs.} \item{SIGMAHAT}{data
#' frame of size R x 5. It contains the estimation of the noise level provided
#' by the five different methods for each of the R runs.}
#' @author Nicole Kraemer
#' @seealso \code{\link{pls.ic}}, \code{\link{pls.cv}}
#' @references
#'
#' Kraemer, N., Sugiyama M. (2011). "The Degrees of Freedom of Partial Least
#' Squares Regression". Journal of the American Statistical Association 106
#' (494) \url{https://www.tandfonline.com/doi/abs/10.1198/jasa.2011.tm10107}
#' @keywords multivariate
#' @examples
#'
#' # generate artificial data
#' n<-50 # number of examples
#' p<-5 # number of variables
#' X<-matrix(rnorm(n*p),ncol=p)
#' true.coefficients<-runif(p,1,3)
#' y<-X%*%true.coefficients + rnorm(n,0,5)
#' my.benchmark<-benchmark.pls(X,y,R=10,true.coefficients=true.coefficients)
#'
#' @export benchmark.pls
benchmark.pls<-function(X,y,m=ncol(X),R=20,ratio=0.8,verbose=TRUE,k=10,ratio.samples=1,use.kernel=FALSE,criterion="bic",true.coefficients=NULL){
n<-floor(nrow(X)*ratio.samples)
m.crash.krylov<-m.crash.lanczos<-vector(length=R)
Expand Down
68 changes: 68 additions & 0 deletions R/benchmark.regression.R
@@ -1,3 +1,71 @@
#' Comparison of Partial Least Squares Regression, Principal Components
#' Regression and Ridge Regression.
#'
#' This function computes the test error over several runs for (a) PLS, (b) PCR
#' (c) Ridge Regression and (d) the null model, that is the mean of \code{y}.
#' In the first three cases, the optimal model is selected via
#' cross-validation.
#'
#' The function computes the test error, the cross-validation-optimal model
#' parameters, their corresponding Degrees of Freedom, and the
#' sum-of-squared-residuals (SSR) for PLS and PCR.
#'
#' @param X matrix of predictor observations.
#' @param y vector of response observations. The length of \code{y} is the same
#' as the number of rows of \code{X}.
#' @param m maximal number of components for PLS. Default is \code{m=ncol(X)}.
#' @param R number of runs. Default is 20.
#' @param ratio ratio no of training examples/(no of training examples + no of
#' test examples). Default is 0.8
#' @param verbose If \code{TRUE}, the functions plots the progress of the
#' function. Default is \code{TRUE}.
#' @param k number of cross-validation splits. Default is 10.
#' @param nsamples number of data points. Default is \code{nrow(X)}.
#' @param use.kernel Use kernel representation for PLS? Default is
#' \code{use.kernel=FALSE}.
#' @param supervised Should the principal components be sorted by decreasing
#' squared correlation to the response? Default is FALSE.
#' @return \item{MSE}{data frame of size R x 4. It contains the test error for
#' the four different methods for each of the R runs.} \item{M}{data frame of
#' size R x 4. It contains the optimal model parameters for the four different
#' methods for each of the R runs.} \item{DoF}{data frame of size R x 4. It
#' contains the Degrees of Freedom (corresponding to \code{M}) for the four
#' different methods for each of the R runs.} \item{res.pls}{matrix of size R x
#' (ncol(X+1)). It contains the SSR for PLS for each of the R runs.}
#' \item{res.pcr}{matrix of size R x (ncol(X+1)). It contains the SSR for PCR
#' for each of the R runs.} \item{DoF.all}{matrix of size R x (ncol(X+1)). It
#' contains the Degrees of Freedom for PLS for all components for each of the R
#' runs.}
#' @author Nicole Kraemer
#' @seealso \code{\link{pls.cv}}, \code{\link{pcr.cv}},
#' \code{\link{benchmark.pls}}
#' @references
#'
#' Kraemer, N., Sugiyama M. (2011). "The Degrees of Freedom of Partial Least
#' Squares Regression". Journal of the American Statistical Association 106
#' (494) \url{https://www.tandfonline.com/doi/abs/10.1198/jasa.2011.tm10107}
#' @keywords multivariate
#' @examples
#'
#' \donttest{
#' # Boston Housing data
#' library(MASS)
#' data(Boston)
#' X<-as.matrix(Boston[,1:4]) # select the first 3 columns as predictor variables
#' y<-as.vector(Boston[,14])
#'
#' my.benchmark<-benchmark.regression(X,y,ratio=0.5,R=10,k=5)
#'
#' # boxplot of the mean squared error
#'
#' boxplot(my.benchmark$MSE,outline=FALSE)
#'
#' # boxplot of the degrees of freedom, without the null model
#'
#' boxplot(my.benchmark$DoF[,-4])
#' }
#'
#' @export benchmark.regression
benchmark.regression=function (X, y, m = ncol(X), R = 20, ratio = 0.8, verbose = TRUE,k = 10, nsamples = nrow(X), use.kernel = FALSE,supervised=FALSE) {
n <- nsamples
m.pls <- m.pcr<-lambda.ridge<-vector(length = R) # vector of optimal model parameters
Expand Down
37 changes: 37 additions & 0 deletions R/coef.plsdof.R
@@ -1,3 +1,40 @@
#' Regression coefficients
#'
#' This function returns the regression coefficients of a plsdof-object.
#'
#' The function returns the regression coefficients (without intercept) for the
#' optimal number of components.
#'
#' @param object an object of class "plsdof" that is returned by the functions
#' \code{pls.ic} and \code{pls.cv}.
#' @param ... additional parameters
#' @return regression coefficients.
#' @author Nicole Kraemer
#' @seealso \code{\link{vcov.plsdof}}, \code{\link{pls.model}},
#' \code{\link{pls.ic}}, \code{\link{pls.cv}}
#' @references
#'
#' Kraemer, N., Sugiyama M. (2011). "The Degrees of Freedom of Partial Least
#' Squares Regression". Journal of the American Statistical Association 106
#' (494) \url{https://www.tandfonline.com/doi/abs/10.1198/jasa.2011.tm10107}
#'
#' Kraemer, N., Braun, M.L. (2007) "Kernelizing PLS, Degrees of Freedom, and
#' Efficient Model Selection", Proceedings of the 24th International Conference
#' on Machine Learning, Omni Press, 441 - 448
#' @keywords models
#' @examples
#'
#'
#' n<-50 # number of observations
#' p<-5 # number of variables
#' X<-matrix(rnorm(n*p),ncol=p)
#' y<-rnorm(n)
#'
#'
#' pls.object<-pls.ic(X,y,criterion="bic")
#' mycoef<-coef(pls.object)
#'
#' @export
coef.plsdof=function(object,...){
return(object$coefficients)
}
31 changes: 31 additions & 0 deletions R/compute.lower.bound.R
@@ -1,3 +1,34 @@
#' Lower bound for the Degrees of Freedom
#'
#' This function computes the lower bound for the the Degrees of Freedom of PLS
#' with 1 component.
#'
#' If the decay of the eigenvalues of \code{cor(X)} is not too fast, we can
#' lower-bound the Degrees of Freedom of PLS with 1 component. Note that we
#' implicitly assume that we use scaled predictor variables to compute the PLS
#' solution.
#'
#' @param X matrix of predictor observations.
#' @return \item{bound}{logical. bound is \code{TRUE} if the decay of the
#' eigenvalues is slow enough} \item{lower.bound}{if bound is TRUE, this is the
#' lower bound, otherwise, it is set to -1}
#' @author Nicole Kraemer
#' @seealso \code{\link{pls.model}}
#' @references
#'
#' Kraemer, N., Sugiyama M. (2011). "The Degrees of Freedom of Partial Least
#' Squares Regression". Journal of the American Statistical Association 106
#' (494) \url{https://www.tandfonline.com/doi/abs/10.1198/jasa.2011.tm10107}
#' @keywords math
#' @examples
#'
#' # Boston Housing data
#' library(MASS)
#' data(Boston)
#' X<-Boston[,-14]
#' my.lower<-compute.lower.bound(X)
#'
#' @export compute.lower.bound
compute.lower.bound=function(X){
S=cor(X)
lower.bound=-1
Expand Down
37 changes: 36 additions & 1 deletion R/dA.R
@@ -1,5 +1,40 @@
#' Derivative of normalization function
#'
#' This function computes the derivative of the function \deqn{v\mapsto
#' \frac{w}{\|w\|_A}} with respect to y.
#'
#' The first derivative of the normalization operator is
#' \deqn{\frac{\partial}{\partial y}\left(w\mapsto
#' \frac{w}{\|w\|_A}\right)=\frac{1}{\|w\|}\left(I_n - \frac{w w^ \top
#' A}{w^\top w}\right) \frac{\partial w}{\partial y}}
#'
#' @param w vector of length n.
#' @param A square matrix that defines the norm
#' @param dw derivative of w with respect to y. As y is a vector of length n,
#' the derivative is a matrix of size nxn.
#' @return the Jacobian matrix of the normalization function. This is a matrix
#' of size nxn.
#' @author Nicole Kraemer
#' @seealso \code{\link{normalize}}, \code{\link{dnormalize}}
#' @references Kraemer, N., Sugiyama M. (2011). "The Degrees of Freedom of
#' Partial Least Squares Regression". Journal of the American Statistical
#' Association 106 (494)
#' \url{https://www.tandfonline.com/doi/abs/10.1198/jasa.2011.tm10107}
#'
#' Kraemer, N., Braun, M.L. (2007) "Kernelizing PLS, Degrees of Freedom, and
#' Efficient Model Selection", Proceedings of the 24th International Conference
#' on Machine Learning, Omni Press, 441 - 448
#' @keywords math
#' @examples
#'
#' w<-rnorm(15)
#' dw<-diag(15)
#' A<-diag(1:15)
#' d.object<-dA(w,A,dw)
#'
#' @export dA
dA<-function(w,A,dw){
wa<-sqrt(sum((w*(A%*%w))))
dummy<-(1/wa)*(diag(length(w))- w%*%t(w)%*%A/(wa^2))%*%dw
return(dummy)
}
}

0 comments on commit 1f98d70

Please sign in to comment.