0.3-0

* Github actions. * Roxygen * package logo
fbertran · Mar 21, 2021 · 1f98d70 · 1f98d70
1 parent a4be893
commit 1f98d70
Show file tree

Hide file tree

Showing 120 changed files with 6,316 additions and 3,563 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -1,17 +1,20 @@
+^genlogo.R$
 #All packages
 ^.*\.Rproj$
 ^\.Rproj\.user$
 ^\.gitignore$
 ^NEWS$    
 #For the pkgdown website
+^pkgdown*$
 ^docs*$
 #For the cached chunks of README.Rmd
 ^cache*$
 ^README_cache*$
-#If README too big, not on CRAN but only on git
-^man/figures*$
 ^README\.Rmd$
-^README\.md$
+#If README too big, not on CRAN but only on git
+#^man/figures*$
+#^README\.md$
 #Specific
 ^fullrespdf*$
 ^inst/animation*$
+^\.github$
diff --git a/.github/.gitignore b/.github/.gitignore
@@ -0,0 +1 @@
+*.html
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -0,0 +1,32 @@
+# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
+# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
+on:
+  push:
+    branches:
+      - main
+      - master
+  pull_request:
+    branches:
+      - main
+      - master
+
+name: R-CMD-check
+
+jobs:
+  R-CMD-check:
+    runs-on: macOS-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - uses: actions/checkout@v2
+      - uses: r-lib/actions/setup-r@v1
+      - name: Install dependencies
+        run: |
+          install.packages(c("remotes", "rcmdcheck"))
+          remotes::install_deps(dependencies = TRUE)
+        shell: Rscript {0}
+      - name: Check
+        run: |
+          options(crayon.enabled = TRUE)
+          rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error")
+        shell: Rscript {0}
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -3,8 +3,8 @@ Type: Package
 Title: Degrees of Freedom and Statistical Inference for Partial Least
         Squares Regression
 Depends: MASS
-Version: 0.2-9
-Date: 2019-01-31
+Version: 0.3-0
+Date: 2021-03-13
 Author: Nicole Kraemer, Mikio L. Braun
 Maintainer: Frederic Bertrand <frederic.bertrand@math.unistra.fr>
 Description: The plsdof package provides Degrees of Freedom estimates
@@ -28,6 +28,6 @@ X-CRAN-Original-Maintainer: Nicole Kraemer
         <kraemer_r_packages@yahoo.de>
 X-CRAN-Comment: Orphaned and corrected on 2018-07-21 as check problems
         were not corrected despite reminders.
-RoxygenNote: 6.1.1
-URL: https://github.com/fbertran/plsdof
-BugReports: https://github.com/fbertran/plsdof/issues
+RoxygenNote: 7.1.1
+URL: https://github.com/fbertran/plsdof/, https://fbertran.github.io/plsdof/
+BugReports: https://github.com/fbertran/plsdof/issues/
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,11 +1,30 @@
-import(MASS)
-
-importFrom("graphics", "plot")
-importFrom("stats", "coef", "cor", "sd")
-
-## and exported functions
-export(benchmark.pls,coef.plsdof,dA,dvvtz,information.criteria,krylov,normalize,pcr,pls.dof,pls.model,tr,vvtz,benchmark.regression,compute.lower.bound,dnormalize,first.local.minimum, kernel.pls.fit, linear.pls.fit, pcr.cv,pls.cv, pls.ic, ridge.cv,vcov.plsdof)
-
-S3method(coef, plsdof)
-S3method(vcov, plsdof)
+# Generated by roxygen2: do not edit by hand
 
+S3method(coef,plsdof)
+S3method(vcov,plsdof)
+export(benchmark.pls)
+export(benchmark.regression)
+export(compute.lower.bound)
+export(dA)
+export(dnormalize)
+export(dvvtz)
+export(first.local.minimum)
+export(information.criteria)
+export(kernel.pls.fit)
+export(krylov)
+export(linear.pls.fit)
+export(normalize)
+export(pcr)
+export(pcr.cv)
+export(pls.cv)
+export(pls.dof)
+export(pls.ic)
+export(pls.model)
+export(ridge.cv)
+export(tr)
+export(vvtz)
+import(MASS)
+importFrom(graphics,plot)
+importFrom(stats,coef)
+importFrom(stats,cor)
+importFrom(stats,sd)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+# plsdof 0.3-0
+
+* Github actions.
+* Roxygen
+
 # plsdof 0.2-9
 
 * Added a website for the package.

diff --git a/R/benchmark.pls.R b/R/benchmark.pls.R
@@ -1,3 +1,76 @@
+#' Comparison of model selection criteria for Partial Least Squares Regression.
+#' 
+#' This function computes the test error over several runs for different model
+#' selection strategies.
+#' 
+#' The function estimates the optimal number of PLS components based on four
+#' different criteria: (1) cross-validation, (2) information criteria with the
+#' naive Degrees of Freedom DoF(m)=m+1, (3) information criteri with the
+#' Degrees of Freedom computed via a Lanczos represenation of PLS and (4)
+#' information criteri with the Degrees of Freedom computed via a Krylov
+#' represenation of PLS. Note that the latter two options only differ with
+#' respect to the estimation of the model error.
+#' 
+#' In addition, the function computes the test error of the "zero model", i.e.
+#' \code{mean(y)} on the training data is used for prediction.
+#' 
+#' If \code{true.coefficients} are available, the function also computes the
+#' model error for the different methods, i.e. the sum of squared differences
+#' between the true and the estimated regression coefficients.
+#' 
+#' @param X matrix of predictor observations.
+#' @param y vector of response observations. The length of \code{y} is the same
+#' as the number of rows of \code{X}.
+#' @param m maximal number of Partial Least Squares components. Default is
+#' \code{m=ncol(X)}.
+#' @param R number of runs. Default is 20.
+#' @param ratio ratio no of training examples/(no of training examples + no of
+#' test examples). Default is 0.8
+#' @param verbose If \code{TRUE}, the functions plots the progress of the
+#' function. Default is \code{TRUE}.
+#' @param k number of cross-validation splits. Default is 10.
+#' @param ratio.samples Ratio of (no of training examples + no of test
+#' examples)/\code{nrow(X)}. Default is 1.
+#' @param use.kernel Use kernel representation? Default is
+#' \code{use.kernel=FALSE}.
+#' @param criterion Choice of the model selection criterion. One of the three
+#' options aic, bic, gmdl. Default is "bic".
+#' @param true.coefficients The vector of true regression coefficients (without
+#' intercept), if available. Default is \code{NULL}.
+#' @return \item{MSE}{data frame of size R x 5. It contains the test error for
+#' the five different methods for each of the R runs.} \item{M}{data frame of
+#' size R x 5. It contains the optimal number of components for the five
+#' different methods for each of the R runs.} \item{DoF}{data frame of size R x
+#' 5. It contains the Degrees of Freedom (corresponding to \code{M}) for the
+#' five different methods for each of the R runs.} \item{TIME}{data frame of
+#' size R x 4. It contains the runtime for all methods (apart from the zero
+#' model) for each of the R runs.} \item{M.CRASH}{data frame of size R x 2. It
+#' contains the number of components for which the Krylov representation and
+#' the Lanczos representation return negative Degrees of Freedom, hereby
+#' indicating numerical problems.} \item{ME}{if \code{true.coefficients} are
+#' available, this is a data frame of size R x 5. It contains the model error
+#' for the five different methods for each of the R runs.} \item{SIGMAHAT}{data
+#' frame of size R x 5. It contains the estimation of the noise level provided
+#' by the five different methods for each of the R runs.}
+#' @author Nicole Kraemer
+#' @seealso \code{\link{pls.ic}}, \code{\link{pls.cv}}
+#' @references
+#' 
+#' Kraemer, N., Sugiyama M. (2011). "The Degrees of Freedom of Partial Least
+#' Squares Regression". Journal of the American Statistical Association 106
+#' (494) \url{https://www.tandfonline.com/doi/abs/10.1198/jasa.2011.tm10107}
+#' @keywords multivariate
+#' @examples
+#' 
+#' # generate artificial data
+#' n<-50 # number of examples
+#' p<-5 # number of variables
+#' X<-matrix(rnorm(n*p),ncol=p)
+#' true.coefficients<-runif(p,1,3)
+#' y<-X%*%true.coefficients + rnorm(n,0,5)
+#' my.benchmark<-benchmark.pls(X,y,R=10,true.coefficients=true.coefficients)
+#' 
+#' @export benchmark.pls
 benchmark.pls<-function(X,y,m=ncol(X),R=20,ratio=0.8,verbose=TRUE,k=10,ratio.samples=1,use.kernel=FALSE,criterion="bic",true.coefficients=NULL){
     n<-floor(nrow(X)*ratio.samples)
     m.crash.krylov<-m.crash.lanczos<-vector(length=R)

diff --git a/R/benchmark.regression.R b/R/benchmark.regression.R
@@ -1,3 +1,71 @@
+#' Comparison of Partial Least Squares Regression, Principal Components
+#' Regression and Ridge Regression.
+#' 
+#' This function computes the test error over several runs for (a) PLS, (b) PCR
+#' (c) Ridge Regression and (d) the null model, that is the mean of \code{y}.
+#' In the first three cases, the optimal model is selected via
+#' cross-validation.
+#' 
+#' The function computes the test error, the cross-validation-optimal model
+#' parameters, their corresponding Degrees of Freedom, and the
+#' sum-of-squared-residuals (SSR) for PLS and PCR.
+#' 
+#' @param X matrix of predictor observations.
+#' @param y vector of response observations. The length of \code{y} is the same
+#' as the number of rows of \code{X}.
+#' @param m maximal number of components for PLS. Default is \code{m=ncol(X)}.
+#' @param R number of runs. Default is 20.
+#' @param ratio ratio no of training examples/(no of training examples + no of
+#' test examples). Default is 0.8
+#' @param verbose If \code{TRUE}, the functions plots the progress of the
+#' function. Default is \code{TRUE}.
+#' @param k number of cross-validation splits. Default is 10.
+#' @param nsamples number of data points. Default is \code{nrow(X)}.
+#' @param use.kernel Use kernel representation for PLS? Default is
+#' \code{use.kernel=FALSE}.
+#' @param supervised Should the principal components be sorted by decreasing
+#' squared correlation to the response? Default is FALSE.
+#' @return \item{MSE}{data frame of size R x 4. It contains the test error for
+#' the four different methods for each of the R runs.} \item{M}{data frame of
+#' size R x 4. It contains the optimal model parameters for the four different
+#' methods for each of the R runs.} \item{DoF}{data frame of size R x 4. It
+#' contains the Degrees of Freedom (corresponding to \code{M}) for the four
+#' different methods for each of the R runs.} \item{res.pls}{matrix of size R x
+#' (ncol(X+1)). It contains the SSR for PLS for each of the R runs.}
+#' \item{res.pcr}{matrix of size R x (ncol(X+1)). It contains the SSR for PCR
+#' for each of the R runs.} \item{DoF.all}{matrix of size R x (ncol(X+1)). It
+#' contains the Degrees of Freedom for PLS for all components for each of the R
+#' runs.}
+#' @author Nicole Kraemer
+#' @seealso \code{\link{pls.cv}}, \code{\link{pcr.cv}},
+#' \code{\link{benchmark.pls}}
+#' @references
+#' 
+#' Kraemer, N., Sugiyama M. (2011). "The Degrees of Freedom of Partial Least
+#' Squares Regression". Journal of the American Statistical Association 106
+#' (494) \url{https://www.tandfonline.com/doi/abs/10.1198/jasa.2011.tm10107}
+#' @keywords multivariate
+#' @examples
+#' 
+#' \donttest{
+#' # Boston Housing data
+#' library(MASS)
+#' data(Boston)
+#' X<-as.matrix(Boston[,1:4]) # select the first 3 columns as predictor variables
+#' y<-as.vector(Boston[,14])
+#' 
+#' my.benchmark<-benchmark.regression(X,y,ratio=0.5,R=10,k=5)
+#' 
+#' # boxplot of the mean squared error
+#' 
+#' boxplot(my.benchmark$MSE,outline=FALSE)
+#' 
+#' # boxplot of the degrees of freedom, without the null model
+#' 
+#' boxplot(my.benchmark$DoF[,-4])
+#' }
+#' 
+#' @export benchmark.regression
 benchmark.regression=function (X, y, m = ncol(X), R = 20, ratio = 0.8, verbose = TRUE,k = 10, nsamples = nrow(X), use.kernel = FALSE,supervised=FALSE) {
     n <- nsamples
     m.pls <- m.pcr<-lambda.ridge<-vector(length = R) # vector of optimal model parameters

diff --git a/R/coef.plsdof.R b/R/coef.plsdof.R
@@ -1,3 +1,40 @@
+#' Regression coefficients
+#' 
+#' This function returns the regression coefficients of a plsdof-object.
+#' 
+#' The function returns the regression coefficients (without intercept) for the
+#' optimal number of components.
+#' 
+#' @param object an object of class "plsdof" that is returned by the functions
+#' \code{pls.ic} and \code{pls.cv}.
+#' @param ... additional parameters
+#' @return regression coefficients.
+#' @author Nicole Kraemer
+#' @seealso \code{\link{vcov.plsdof}}, \code{\link{pls.model}},
+#' \code{\link{pls.ic}}, \code{\link{pls.cv}}
+#' @references
+#' 
+#' Kraemer, N., Sugiyama M. (2011). "The Degrees of Freedom of Partial Least
+#' Squares Regression". Journal of the American Statistical Association 106
+#' (494) \url{https://www.tandfonline.com/doi/abs/10.1198/jasa.2011.tm10107}
+#' 
+#' Kraemer, N., Braun, M.L. (2007) "Kernelizing PLS, Degrees of Freedom, and
+#' Efficient Model Selection", Proceedings of the 24th International Conference
+#' on Machine Learning, Omni Press, 441 - 448
+#' @keywords models
+#' @examples
+#' 
+#' 
+#' n<-50 # number of observations
+#' p<-5 # number of variables
+#' X<-matrix(rnorm(n*p),ncol=p)
+#' y<-rnorm(n)
+#' 
+#' 
+#' pls.object<-pls.ic(X,y,criterion="bic")
+#' mycoef<-coef(pls.object)
+#' 
+#' @export 
 coef.plsdof=function(object,...){
     return(object$coefficients)
 }
diff --git a/R/compute.lower.bound.R b/R/compute.lower.bound.R
@@ -1,3 +1,34 @@
+#' Lower bound for the Degrees of Freedom
+#' 
+#' This function computes the lower bound for the the Degrees of Freedom of PLS
+#' with 1 component.
+#' 
+#' If the decay of the eigenvalues of \code{cor(X)} is not too fast, we can
+#' lower-bound the Degrees of Freedom of PLS with 1 component. Note that we
+#' implicitly assume that we use scaled predictor variables to compute the PLS
+#' solution.
+#' 
+#' @param X matrix of predictor observations.
+#' @return \item{bound}{logical. bound is \code{TRUE} if the decay of the
+#' eigenvalues is slow enough} \item{lower.bound}{if bound is TRUE, this is the
+#' lower bound, otherwise, it is set to -1}
+#' @author Nicole Kraemer
+#' @seealso \code{\link{pls.model}}
+#' @references
+#' 
+#' Kraemer, N., Sugiyama M. (2011). "The Degrees of Freedom of Partial Least
+#' Squares Regression". Journal of the American Statistical Association 106
+#' (494) \url{https://www.tandfonline.com/doi/abs/10.1198/jasa.2011.tm10107}
+#' @keywords math
+#' @examples
+#' 
+#' # Boston Housing data
+#' library(MASS)
+#' data(Boston)
+#' X<-Boston[,-14]
+#' my.lower<-compute.lower.bound(X)
+#' 
+#' @export compute.lower.bound
 compute.lower.bound=function(X){
     S=cor(X)
     lower.bound=-1

diff --git a/R/dA.R b/R/dA.R
@@ -1,5 +1,40 @@
+#' Derivative of normalization function
+#' 
+#' This function computes the derivative of the function \deqn{v\mapsto
+#' \frac{w}{\|w\|_A}} with respect to y.
+#' 
+#' The first derivative of the normalization operator is
+#' \deqn{\frac{\partial}{\partial y}\left(w\mapsto
+#' \frac{w}{\|w\|_A}\right)=\frac{1}{\|w\|}\left(I_n - \frac{w w^ \top
+#' A}{w^\top w}\right) \frac{\partial w}{\partial y}}
+#' 
+#' @param w vector of length n.
+#' @param A square matrix that defines the norm
+#' @param dw derivative of w with respect to y. As y is a vector of length n,
+#' the derivative is a matrix of size nxn.
+#' @return the Jacobian matrix of the normalization function. This is a matrix
+#' of size nxn.
+#' @author Nicole Kraemer
+#' @seealso \code{\link{normalize}}, \code{\link{dnormalize}}
+#' @references Kraemer, N., Sugiyama M. (2011). "The Degrees of Freedom of
+#' Partial Least Squares Regression". Journal of the American Statistical
+#' Association 106 (494)
+#' \url{https://www.tandfonline.com/doi/abs/10.1198/jasa.2011.tm10107}
+#' 
+#' Kraemer, N., Braun, M.L. (2007) "Kernelizing PLS, Degrees of Freedom, and
+#' Efficient Model Selection", Proceedings of the 24th International Conference
+#' on Machine Learning, Omni Press, 441 - 448
+#' @keywords math
+#' @examples
+#' 
+#' w<-rnorm(15)
+#' dw<-diag(15)
+#' A<-diag(1:15)
+#' d.object<-dA(w,A,dw)
+#' 
+#' @export dA
 dA<-function(w,A,dw){
 wa<-sqrt(sum((w*(A%*%w))))
 dummy<-(1/wa)*(diag(length(w))- w%*%t(w)%*%A/(wa^2))%*%dw
 return(dummy)
-}
+}