diff --git a/.project b/.project
deleted file mode 100644
index a807d01..0000000
--- a/.project
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
- ccaPP
-
-
-
-
-
- de.walware.statet.r.builders.RSupport
-
-
-
-
-
- de.walware.statet.base.StatetNature
- de.walware.statet.r.RNature
- de.walware.statet.r.RPkgNature
-
-
diff --git a/DESCRIPTION b/DESCRIPTION
index 91fbad3..c1fccdf 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
Package: ccaPP
Type: Package
Title: (Robust) Canonical Correlation Analysis via Projection Pursuit
-Version: 0.3.3
-Date: 2019-12-09
+Version: 0.4.0
+Date: 2021-10-14
Depends:
R (>= 3.2.0),
parallel,
@@ -26,4 +26,4 @@ Author: Andreas Alfons [aut, cre],
David Simcha [ctb]
Maintainer: Andreas Alfons
Encoding: UTF-8
-RoxygenNote: 6.1.1
+RoxygenNote: 7.1.2
diff --git a/NAMESPACE b/NAMESPACE
index cec708f..596a150 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,8 +1,10 @@
# Generated by roxygen2: do not edit by hand
S3method(print,cca)
+S3method(print,cvFolds)
S3method(print,maxCor)
S3method(print,permTest)
+S3method(print,sMaxCor)
export(CCAgrid)
export(CCAproj)
export(ccaGrid)
@@ -12,11 +14,13 @@ export(corM)
export(corPearson)
export(corQuadrant)
export(corSpearman)
+export(cvFolds)
export(fastMAD)
export(fastMedian)
export(maxCorGrid)
export(maxCorProj)
export(permTest)
+export(sMaxCorGrid)
import(parallel)
import(pcaPP)
import(robustbase)
diff --git a/NEWS b/NEWS
index e524dc1..9c741b7 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,9 @@
+Changes in ccaPP version 0.4.0
+
+ + New function sMaxCorGrid() for sparse maximum correlation estimators.
+
+
+
Changes in ccaPP version 0.3.3
+ C++ functions are now properly registered.
diff --git a/R/cca.R b/R/cca.R
index c791338..395e3fd 100644
--- a/R/cca.R
+++ b/R/cca.R
@@ -1,7 +1,7 @@
-# ------------------------------------
+# --------------------------------------
# Author: Andreas Alfons
-# Erasmus University Rotterdam
-# ------------------------------------
+# Erasmus Universiteit Rotterdam
+# --------------------------------------
#' (Robust) CCA via alternating series of grid searches
#'
@@ -154,7 +154,33 @@ ccaGrid <- function(x, y, k = 1,
cca
}
-## wrapper function for more compatibility with package pcaPP
+
+## @rdname ccaGrid
+## @export
+
+#sccaGrid <- function(x, y, k = 1, lambda = 0,
+# method = c("spearman", "kendall", "quadrant", "M", "pearson"),
+# control = list(...), nIterations = 10, nAlternate = 10, nGrid = 25,
+# select = NULL, tol = 1e-06, fallback = FALSE, seed = NULL, ...) {
+# ## initializations
+# matchedCall <- match.call()
+# ## define list of control arguments for algorithm
+# lambda <- rep(as.numeric(lambda), length.out=2)
+# lambda[is.na(lambda)] <- formals()$lambda
+# nIterations <- as.integer(nIterations)
+# nAlternate <- as.integer(nAlternate)
+# nGrid <- as.integer(nGrid)
+# tol <- as.numeric(tol)
+# ppControl <- list(lambda=lambda, nIterations=nIterations,
+# nAlternate=nAlternate, nGrid=nGrid, select=select, tol=tol)
+# ## call workhorse function
+# cca <- ccaPP(x, y, k, method=method, corControl=control, algorithm="sparse",
+# ppControl=ppControl, fallback=fallback, seed=seed)
+# cca$call <- matchedCall
+# cca
+#}
+
+
#' @rdname ccaGrid
#' @export
@@ -175,6 +201,24 @@ CCAgrid <- function(x, y, k = 1,
}
+## @rdname ccaGrid
+## @export
+
+#sCCAgrid <- function(x, y, k = 1, lambda = 0,
+# method = c("spearman", "kendall", "quadrant", "M", "pearson"),
+# maxiter = 10, maxalter = 10, splitcircle = 25, select=NULL,
+# zero.tol = 1e-06, fallback = FALSE, seed = NULL, ...) {
+# ## initializations
+# matchedCall <- match.call()
+# ## call ccaGrid()
+# cca <- sccaGrid(x, y, k=k, lambda=lambda, method=method, nIterations=maxiter,
+# nAlternate=maxalter, nGrid=splitcircle, select=select, tol=zero.tol,
+# fallback=fallback, seed=seed, ...)
+# cca$call <- matchedCall
+# cca
+#}
+
+
#' (Robust) CCA via projections through the data points
#'
#' Perform canoncial correlation analysis via projection pursuit based on
@@ -280,7 +324,7 @@ ccaProj <- function(x, y, k = 1,
cca
}
-## wrapper function for more compatibility with package pcaPP
+
#' @rdname ccaProj
#' @export
diff --git a/R/cor.R b/R/cor.R
index f7f3fdb..261dfa3 100644
--- a/R/cor.R
+++ b/R/cor.R
@@ -1,7 +1,7 @@
-# ----------------------
+# --------------------------------------
# Author: Andreas Alfons
-# KU Leuven
-# ----------------------
+# Erasmus Universiteit Rotterdam
+# --------------------------------------
#' Fast implementations of (robust) correlation estimators
#'
diff --git a/R/cv.R b/R/cv.R
new file mode 100644
index 0000000..353a5af
--- /dev/null
+++ b/R/cv.R
@@ -0,0 +1,169 @@
+# --------------------------------------
+# Author: Andreas Alfons
+# Erasmus Universiteit Rotterdam
+# --------------------------------------
+
+## perform cross-validation
+cvMaxCor <- function(call, x, y, folds, corFun = corSpearman, corArgs = list(),
+ envir = parent.frame(), cl = NULL) {
+ # initializations
+ R <- folds$R
+ useParallel <- !is.null(cl)
+ # obtain list of predictions for all replications
+ if(R == 1) {
+ s <- getIndices(folds)
+ if(useParallel) xy <- pcvXY(s, call=call, x=x, y=y, envir=envir, cl=cl)
+ else xy <- cvXY(s, call=call, x=x, y=y, envir=envir)
+ corXY(xy, corFun=corFun, corArgs=corArgs)
+ } else {
+ cvFun <- function(r) {
+ s <- getIndices(folds, r)
+ xy <- cvXY(s, call=call, x=x, y=y, envir=envir)
+ corXY(xy, corFun=corFun, corArgs=corArgs)
+ }
+ if(useParallel) {
+ cv <- parLapply(cl, seq_len(R), cvFun)
+ cv <- do.call(cbind, cv)
+ }
+ else cv <- sapply(seq_len(R), cvFun)
+ apply(cv, 1, mean)
+ }
+}
+
+# compute the weighting vectors for the training data and obtain linear
+# combinations of the test data
+rsXY <- function(i, call, x, y, envir) {
+ # plug training data into function call
+ call$x <- x[-i, , drop=FALSE]
+ call$y <- y[-i, , drop=FALSE]
+ # evaluate function call in supplied environment to make sure
+ # that other arguments are evaluated correctly
+ fit <- eval(call, envir)
+ # obtain linear combinations for test data
+ cbind(x=x[i, , drop=FALSE] %*% fit$a, y=y[i, , drop=FALSE] %*% fit$b)
+}
+
+# compute linear combinations for each left-out block in one replication of
+# cross validation
+cvXY <- function(folds, call, x, y, envir) {
+ tmp <- lapply(folds, rsXY, call=call, x=x, y=y, envir=envir)
+ # instead of collecting the results from the folds in the original order
+ # of the observations, they are simply stacked on top of each other
+ do.call(rbind, tmp)
+}
+
+# compute linear combinations for each left-out block in one replication of
+# cross validation via parallel computing
+pcvXY <- function(folds, call, x, y, envir, cl) {
+ # parLapply() already has an argument 'x', so don't use the argument names
+ # for the data or it will throw an error
+ tmp <- parLapply(cl, folds, rsXY, call, x, y, envir=envir)
+ # instead of collecting the results from the folds in the original order
+ # of the observations, they are simply stacked on top of each other
+ do.call(rbind, tmp)
+}
+
+# compute the correlation between the linear combinations obtained via
+# cross-validation
+corXY <- function(xy, corFun = corSpearman, corArgs = list()) {
+ # number of linear combinations of each data set
+ p <- ncol(xy) / 2
+ # split the data into the linear combinations of x and y, respectively, and
+ # compute the correlation between each pair of linear combinations
+ if(p == 1) doCall(corFun, xy[, 1], xy[, 2], args=corArgs)
+ else {
+ sapply(seq_len(p), function(j) {
+ doCall(corFun, xy[, j], xy[, p+j], args=corArgs)
+ })
+ }
+}
+
+
+#' Cross-validation folds
+#'
+#' Split observations or groups of observations into \eqn{K} folds to be used
+#' for (repeated) \eqn{K}-fold cross-validation. \eqn{K} should thereby be
+#' chosen such that all folds are of approximately equal size.
+#'
+#' @aliases print.cvFolds
+#'
+#' @param n an integer giving the number of observations to be split into
+#' folds. This is ignored if \code{grouping} is supplied in order to split
+#' groups of observations into folds.
+#' @param K an integer giving the number of folds into which the observations
+#' should be split (the default is five). Setting \code{K} equal to the number
+#' of observations or groups yields leave-one-out cross-validation.
+#' @param R an integer giving the number of replications for repeated
+#' \eqn{K}-fold cross-validation. This is ignored for for leave-one-out
+#' cross-validation and other non-random splits of the data.
+#' @param type a character string specifying the type of folds to be
+#' generated. Possible values are \code{"random"} (the default),
+#' \code{"consecutive"} or \code{"interleaved"}.
+#' @param grouping a factor specifying groups of observations. If supplied,
+#' the data are split according to the groups rather than individual
+#' observations such that all observations within a group belong to the same
+#' fold.
+#'
+#' @return An object of class \code{"cvFolds"} with the following components:
+#' \item{n}{an integer giving the number of observations or groups.}
+#' \item{K}{an integer giving the number of folds.}
+#' \item{R}{an integer giving the number of replications.}
+#' \item{subsets}{an integer matrix in which each column contains a permutation
+#' of the indices of the observations or groups.}
+#' \item{which}{an integer vector giving the fold for each permuted observation
+#' or group.}
+#' \item{grouping}{a list giving the indices of the observations belonging to
+#' each group. This is only returned if a grouping factor has been supplied.}
+#'
+#' @author Andreas Alfons
+#'
+#' @seealso \code{\link{sMaxCorGrid}}
+#'
+#' @examples
+#' set.seed(1234) # set seed for reproducibility
+#' cvFolds(20, K = 5)
+#' cvFolds(20, K = 5, R = 10)
+#'
+#' @keywords utilities
+#'
+#' @export
+
+cvFolds <- function(n, K = 5, R = 1,
+ type = c("random", "consecutive", "interleaved"),
+ grouping = NULL) {
+ # check arguments
+ n <- if(is.null(grouping)) round(rep(n, length.out=1)) else nlevels(grouping)
+ if(!isTRUE(n > 0)) stop("'n' must be positive")
+ if(!isTRUE(K <= n)) stop(sprintf("'K' must be smaller or equal to %d", n))
+ if(K == n) type <- "leave-one-out"
+ else type <- match.arg(type)
+ # obtain CV folds
+ if(type == "random") {
+ # random K-fold splits with R replications
+ subsets <- replicate(R, sample.int(n))
+ } else {
+ # leave-one-out CV or non-random splits, replication not meaningful
+ R <- 1
+ subsets <- as.matrix(seq_len(n))
+ }
+ which <- as.factor(rep(seq_len(K), length.out=n))
+ if(type == "consecutive") which <- rep.int(seq_len(K), tabulate(which))
+ # construct and return object
+ folds <- list(n=n, K=K, R=R, subsets=subsets, which=which)
+ if(!is.null(grouping)) folds$grouping <- split(seq_along(grouping), grouping)
+ class(folds) <- "cvFolds"
+ folds
+}
+
+# retrieve list of indices for r-th replication
+getIndices <- function(x, r = 1) {
+ # split permuted items according to the folds
+ subsets <- split(x$subsets[, r], x$which)
+ # in case of grouped data, the list contains the group indices in each CV
+ # fold, so the indices of the respective observations need to be extracted
+ if(!is.null(grouping <- x$grouping))
+ subsets <- lapply(subsets, function(s) unlist(grouping[s], use.names=FALSE))
+ # return list of indices for CV folds
+ names(subsets) <- NULL
+ subsets
+}
diff --git a/R/fastMAD.R b/R/fastMAD.R
index c4fadb8..2a2fac5 100644
--- a/R/fastMAD.R
+++ b/R/fastMAD.R
@@ -1,7 +1,7 @@
-# ----------------------
+# --------------------------------------
# Author: Andreas Alfons
-# KU Leuven
-# ----------------------
+# Erasmus Universiteit Rotterdam
+# --------------------------------------
#' Fast implementation of the median absolute deviation
#'
diff --git a/R/fastMedian.R b/R/fastMedian.R
index 52f8a75..f3d29f0 100644
--- a/R/fastMedian.R
+++ b/R/fastMedian.R
@@ -1,7 +1,7 @@
-# ----------------------
+# --------------------------------------
# Author: Andreas Alfons
-# KU Leuven
-# ----------------------
+# Erasmus Universiteit Rotterdam
+# --------------------------------------
#' Fast implementation of the median
#'
diff --git a/R/maxCor.R b/R/maxCor.R
index 29c57b3..4f2ba3b 100644
--- a/R/maxCor.R
+++ b/R/maxCor.R
@@ -1,7 +1,7 @@
-# ------------------------------------
+# --------------------------------------
# Author: Andreas Alfons
-# Erasmus University Rotterdam
-# ------------------------------------
+# Erasmus Universiteit Rotterdam
+# --------------------------------------
#' (Robust) maximum correlation via alternating series of grid searches
#'
@@ -37,9 +37,19 @@
#' absolute correlation with the respective other data set to ensure symmetry
#' of the algorithm.
#'
+#' \code{sMaxCorGrid} enforces sparsity by adding \eqn{L_{1}}{L1} penalties on
+#' the weighting vectors to the objective function (i.e., the correlation of
+#' the resulting projections). The optimal combination of penalty paramters is
+#' thereby determined via cross-validation over a grid of values. To increase
+#' the computational performance of this cross-validation procedure, parallel
+#' computing is implemented via package \pkg{parallel}.
+#'
#' @aliases print.maxCor
#'
#' @param x,y each can be a numeric vector, matrix or data frame.
+#' @param lambdaX,lambdaY numeric vectors of non-negative values giving the
+#' penalty parameters for the weighting vectors of \code{x} and \code{y},
+#' respectively.
#' @param method a character string specifying the correlation functional to
#' maximize. Possible values are \code{"spearman"} for the Spearman
#' correlation, \code{"kendall"} for the Kendall correlation, \code{"quadrant"}
@@ -74,6 +84,19 @@
#' zero (e.g., dummy variables) are standardized via mean and standard
#' deviation. Note that if the Pearson correlation is maximized,
#' standardization is always done via mean and standard deviation.
+#' @param K,R,type,grouping additional arguments for generating
+#' cross-validation folds (see \code{\link{cvFolds}}).
+#' @param folds an object of class \code{"cvFolds"} (as returned by
+#' \code{\link{cvFolds}}) defining the folds of the data for cross-validation.
+#' If supplied, this is preferred over the arguments for generating
+#' cross-validation folds.
+#' @param nCores a positive integer giving the number of processor cores to be
+#' used for parallel computing in cross-validation (the default is 1 for no
+#' parallelization). If this is set to \code{NA}, all available processor
+#' cores are used.
+#' @param cl a \pkg{parallel} cluster for parallel computing to be used in
+#' cross-validation, as generated by \code{\link[parallel]{makeCluster}}. If
+#' supplied, this is preferred over \code{nCores}.
#' @param seed optional initial seed for the random number generator (see
#' \code{\link{.Random.seed}}). This is only used if \code{select} specifies
#' the numbers of variables of each data set to be randomly selected for
@@ -93,6 +116,16 @@
#' standardization of \code{x}.}
#' \item{scaleY}{a numeric vector giving the scale estimates used in
#' standardization of \code{y}.}
+#' \item{lambdaX}{a numeric giving the optimal penalty parameter for the
+#' weighting vector of \code{x} (only \code{sMaxCorGrid}).}
+#' \item{lambdaY}{a numeric giving the optimal penalty parameter for the
+#' weighting vector of \code{y} (only \code{sMaxCorGrid}).}
+#' \item{objective}{a numeric giving the value of the objective function
+#' for the optimal combination of penalty parameters (only \code{sMaxCorGrid}).}
+#' \item{cv}{a numeric matrix containing the (average) results from
+#' cross-validation for each combination of penalty parameters (only
+#' \code{sMaxCorGrid} and if a grid of values for the penalty parameters has
+#' been supplied).}
#' \item{call}{the matched function call.}
#'
#' @author Andreas Alfons
@@ -106,8 +139,8 @@
#' Statistics}, \bold{45}(1), 71--79.
#'
#' A. Alfons, C. Croux and P. Filzmoser (2016) Robust maximum association
-#' estimators. \emph{Journal of the American Statistical Association}. DOI
-#' 10.1080/01621459.2016.1148609. In press.
+#' estimators. \emph{Journal of the American Statistical Association}.
+#' \bold{112}(517), 436--445.
#'
#' @examples
#' data("diabetes")
@@ -152,6 +185,41 @@ maxCorGrid <- function(x, y,
}
+#' @rdname maxCorGrid
+#' @export
+
+sMaxCorGrid <- function(x, y, lambdaX = 0, lambdaY = 0,
+ method = c("spearman", "kendall", "quadrant", "M", "pearson"),
+ control = list(...), nIterations = 10, nAlternate = 10,
+ nGrid = 25, select = NULL, tol = 1e-06,
+ standardize = TRUE, fallback = FALSE, K = 5, R = 1,
+ type = c("random", "consecutive", "interleaved"),
+ grouping = NULL, folds = NULL, nCores = 1, cl = NULL,
+ seed = NULL, ...) {
+ ## initializations
+ matchedCall <- match.call()
+ ## define list of control arguments for algorithm
+ lambdaX <- as.numeric(lambdaX)
+ lambdaX[is.na(lambdaX)] <- formals()$lambdaX
+ lambdaY <- as.numeric(lambdaY)
+ lambdaY[is.na(lambdaY)] <- formals()$lambdaY
+ nIterations <- as.integer(nIterations)
+ nAlternate <- as.integer(nAlternate)
+ nGrid <- as.integer(nGrid)
+ tol <- as.numeric(tol)
+ ppControl <- list(lambdaX=lambdaX, lambdaY=lambdaY, nIterations=nIterations,
+ nAlternate=nAlternate, nGrid=nGrid, select=select, tol=tol)
+ ## call workhorse function
+ maxCor <- sMaxCorPP(x, y, method=method, corControl=control,
+ algorithm="grid", ppControl=ppControl,
+ standardize=standardize, fallback=fallback, K=K,
+ R=R, type=type, grouping=grouping, folds=folds,
+ nCores=nCores, cl=cl, seed=seed)
+ maxCor$call <- matchedCall
+ maxCor
+}
+
+
#' (Robust) maximum correlation via projections through the data points
#'
#' Compute the maximum correlation between two data sets via projection pursuit
@@ -246,7 +314,7 @@ maxCorProj <- function(x, y,
}
-## workhorse function
+## workhorse function for maximum correlation
maxCorPP <- function(x, y, ...) {
## call workhorse function for canonical correlation analysis
maxCor <- ccaPP(x, y, forceConsistency=FALSE, ...)
@@ -257,3 +325,103 @@ maxCorPP <- function(x, y, ...) {
class(maxCor) <- "maxCor"
maxCor
}
+
+
+## workhorse function for sparse maximum correlation
+#' @import parallel
+sMaxCorPP <- function(x, y,
+ method = c("spearman", "kendall", "quadrant", "M", "pearson"),
+ corControl, algorithm = "grid", ppControl,
+ standardize = TRUE, fallback = FALSE, folds = NULL,
+ nCores = 1, cl = NULL, seed = NULL, ...) {
+ ## initializations
+ x <- as.matrix(x)
+ y <- as.matrix(y)
+ n <- nrow(x)
+ if(nrow(y) != n) stop("'x' and 'y' must have the same number of observations")
+ p <- ncol(x)
+ q <- ncol(y)
+ ## prepare the data and call C++ function
+ if(n == 0 || p == 0 || q == 0) {
+ # zero dimension
+ maxCor <- list(cor=NA, a=numeric(), b=numeric(), lambdaX=NA,
+ lambdaY=NA, objective=NA)
+ } else {
+ # get list of control arguments
+ method <- match.arg(method)
+ corControl <- getCorControl(method, corControl, forceConsistency=FALSE)
+ standardize <- isTRUE(standardize)
+ fallback <- isTRUE(fallback)
+ if(!is.null(seed)) set.seed(seed)
+ ppControl <- getPPControl(algorithm, ppControl, p=p, q=q)
+ # check grid of values for the penalty parameters
+ lambdaX <- ppControl$lambdaX
+ lambdaY <- ppControl$lambdaY
+ ppControl$lambdaX <- NULL
+ ppControl$lambdaY <- NULL
+ lambdaX <- if(p == 1) 0 else sort(unique(lambdaX))
+ lambdaY <- if(q == 1) 0 else sort(unique(lambdaY))
+ lambda <- as.matrix(expand.grid(lambdaX=lambdaX, lambdaY=lambdaY))
+ ppControl$lambda <- lambda
+ # check whether cross-validation over grid of tuning parameters is necessary
+ useCV <- nrow(lambda) > 1
+ if(useCV) {
+ if(is.null(folds)) folds <- cvFolds(n, ...)
+ # set up parallel computing if requested
+ haveCl <- inherits(cl, "cluster")
+ if(haveCl) haveNCores <- FALSE
+ else {
+ if(is.na(nCores)) nCores <- detectCores() # use all available cores
+ if(!is.numeric(nCores) || is.infinite(nCores) || nCores < 1) {
+ nCores <- 1 # use default value
+ warning("invalid value of 'nCores'; using default value")
+ } else nCores <- as.integer(nCores)
+ R <- folds$R
+ nCores <- if(R == 1) min(nCores, folds$K) else min(nCores, R)
+ haveNCores <- nCores > 1
+ }
+ # check whether parallel computing should be used
+ useParallel <- haveNCores || haveCl
+ # set up multicore or snow cluster if not supplied
+ if(haveNCores) {
+ if(.Platform$OS.type == "windows") {
+ cl <- makePSOCKcluster(rep.int("localhost", nCores))
+ } else cl <- makeForkCluster(nCores)
+ on.exit(stopCluster(cl))
+ }
+ # perform cross-validation over grid of tuning parameters
+ call <- call("sMaxCorPPFit", method=method, corControl=corControl,
+ algorithm=algorithm, ppControl=ppControl,
+ standardize=standardize, fallback=fallback)
+ call[[1]] <- sMaxCorPPFit # necessary to work with parallel computing
+ corFun <- switch(method, spearman=corSpearman, kendall=corKendall,
+ quadrant=corQuadrant, M=corM, pearson=corPearson)
+ cv <- cvMaxCor(call, x, y, folds=folds, corFun=corFun,
+ corArgs=corControl, cl=cl)
+ # determine the optimal tuning parameters
+ ppControl$lambda <- lambda[which.max(cv), , drop=FALSE]
+ }
+ # compute the final solution
+ maxCor <- sMaxCorPPFit(x, y, method=method, corControl=corControl,
+ algorithm=algorithm, ppControl=ppControl,
+ standardize=standardize, fallback=fallback)
+ maxCor$a <- drop(maxCor$a)
+ maxCor$b <- drop(maxCor$b)
+ if(useCV) maxCor$cv <- cbind(lambda, CV=cv)
+ }
+ ## assign class and return results
+ class(maxCor) <- c("sMaxCor", "maxCor")
+ maxCor
+}
+
+## simple wrapper for calling the C++ function
+sMaxCorPPFit <- function(x, y,
+ method = c("spearman", "kendall", "quadrant", "M", "pearson"),
+ corControl, algorithm = "grid", ppControl,
+ standardize = TRUE, fallback = FALSE) {
+ # call C++ function
+ maxCor <- .Call("R_sMaxCorPP", R_x=x, R_y=y, R_method=method,
+ R_corControl=corControl, R_algorithm=algorithm,
+ R_ppControl=ppControl, R_standardize=standardize,
+ R_fallback=fallback, PACKAGE="ccaPP")
+}
diff --git a/R/permTest.R b/R/permTest.R
index 0ebd57b..f9a0ac8 100644
--- a/R/permTest.R
+++ b/R/permTest.R
@@ -1,7 +1,7 @@
-# ----------------------
+# --------------------------------------
# Author: Andreas Alfons
-# KU Leuven
-# ----------------------
+# Erasmus Universiteit Rotterdam
+# --------------------------------------
#' (Robust) permutation test for no association
#'
diff --git a/R/print.R b/R/print.R
index 840ea76..0e558e8 100644
--- a/R/print.R
+++ b/R/print.R
@@ -1,7 +1,7 @@
-# ----------------------
+# --------------------------------------
# Author: Andreas Alfons
-# KU Leuven
-# ----------------------
+# Erasmus Universiteit Rotterdam
+# --------------------------------------
#' @export
print.cca <- function(x, ...) {
@@ -17,6 +17,29 @@ print.cca <- function(x, ...) {
invisible(x)
}
+#' @export
+print.cvFolds <- function(x, ...) {
+ # print general information
+ cvText <- if(x$n == x$K) "Leave-one-out CV" else sprintf("%d-fold CV", x$K)
+ if(x$R > 1) cvText <- paste(cvText, "with", x$R, "replications")
+ cat(paste("\n", cvText, ":", sep=""))
+ # print information on folds (add space between folds and subsets)
+ subsets <- x$subsets
+ if(x$R == 1) {
+ cn <- if("grouping" %in% names(x)) "Group index" else "Index"
+ nblanks <- 2
+ } else {
+ cn <- as.character(seq_len(x$R))
+ nblanks <- 3
+ }
+ nblanks <- max(nchar(as.character(subsets[, 1]))-nchar(cn[1]), 0) + nblanks
+ cn[1] <- paste(c(rep.int(" ", nblanks), cn[1]), collapse="")
+ dimnames(subsets) <- list(Fold=x$which, cn)
+ print(subsets, ...)
+ # return object invisibly
+ invisible(x)
+}
+
#' @export
print.maxCor <- function(x, ...) {
# print function call
@@ -44,3 +67,19 @@ print.permTest <- function(x, ...) {
# return object invisibly
invisible(x)
}
+
+#' @export
+print.sMaxCor <- function(x, ...) {
+ print.maxCor(x, ...)
+ # print optimal tuning parameters
+ if(!is.null(cv <- x$cv)) {
+ haveGridX <- any(cv[, "lambdaX"] != 0)
+ haveGridY <- any(cv[, "lambdaY"] != 0)
+ if(haveGridX && haveGridY) cat("\nOptimal tuning parameters:\n")
+ else cat("\nOptimal tuning parameter:\n")
+ if(haveGridX) cat(sprintf("lambdaX: %f\n", x$lambdaX))
+ if(haveGridY) cat(sprintf("lambdaY: %f\n", x$lambdaY))
+ }
+ # return object invisibly
+ invisible(x)
+}
diff --git a/R/utils.R b/R/utils.R
index b300e20..a65f277 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -1,7 +1,7 @@
-# ----------------------
+# --------------------------------------
# Author: Andreas Alfons
-# KU Leuven
-# ----------------------
+# Erasmus Universiteit Rotterdam
+# --------------------------------------
### transform canonical vectors back to original scale
#backtransform <- function(A, scale) {
@@ -18,6 +18,17 @@ checkIndices <- function(indices, max) {
indices[which(indices > 0 & indices <= max)]
}
+## call a function by either
+# 1) simply evaluating a supplied function for the basic arguments if there are
+# no additional arguments in list format
+# 2) evaluating a supplied function with 'do.call' if there are additional
+# arguments in list format
+doCall <- function(fun, ..., args = list()) {
+ if(length(args) == 0) {
+ fun(...)
+ } else do.call(fun, c(list(...), args))
+}
+
## call C++ to compute ranks of observations in a vector (for testing)
fastRank <- function(x) {
x <- as.numeric(x)
@@ -61,6 +72,49 @@ getCorControl <- function(method, control, forceConsistency = TRUE) {
out
}
+## get list of control arguments for projection pursuit algorithm
+getPPControl <- function(algorithm, control, p, q, seed = NULL) {
+ # additional checks for grid search algorithm
+ if(algorithm == "grid") {
+ # check subset of variables to be used for determining the order of
+ # the variables from the respective other data set
+ select <- control$select
+ control$select <- NULL
+ if(!is.null(select)) {
+ if(is.list(select)) {
+ # make sure select is a list with two index vectors and
+ # drop invalid indices from each vector
+ select <- rep(select, length.out=2)
+ select <- mapply(function(indices, max) {
+ indices <- as.integer(indices)
+ indices[which(indices > 0 & indices <= max)] - 1
+ }, select, c(p, q))
+ valid <- sapply(select, length) > 0
+ # add the two index vectors to control object
+ if(all(valid)) {
+ control$selectX <- select[[1]]
+ control$selectY <- select[[2]]
+ } else select <- NULL
+ } else {
+ # check number of indices to sample
+ select <- rep(as.integer(select), length.out=2)
+ valid <- !is.na(select) & select > 0 & select < c(p, q)
+ if(all(valid)) {
+ # generate index vectors and add them to control object
+ if(!is.null(seed)) set.seed(seed)
+ control$selectX <- sample.int(p, select[1]) - 1
+ control$selectY <- sample.int(q, select[2]) - 1
+ } else select <- NULL
+ }
+ }
+ if(is.null(select)) {
+ control$selectX <- control$selectY <- integer()
+ }
+ }
+ # return list of control arguments
+ control
+}
+
## L1 median (for testing)
l1Median <- function(x) {
# initializations
diff --git a/inst/CITATION b/inst/CITATION
index aca49ca..13e23e2 100644
--- a/inst/CITATION
+++ b/inst/CITATION
@@ -3,7 +3,7 @@ citHeader("To cite package 'ccaPP' in publications use:")
citEntry(entry = "Article",
title = "Robust maximum association between data sets: The {R} package {ccaPP}",
author = personList(as.person("Andreas Alfons"),
- as.person("Christophe Croux"),
+ as.person("Christophe Croux"),
as.person("Peter Filzmoser")),
journal = "Austrian Journal of Statistics",
year = "2016",
@@ -20,16 +20,17 @@ citEntry(entry = "Article",
citEntry(entry = "Article",
title = "Robust maximum association estimators",
author = personList(as.person("Andreas Alfons"),
- as.person("Christophe Croux"),
+ as.person("Christophe Croux"),
as.person("Peter Filzmoser")),
journal = "Journal of the American Statistical Association",
- year = "2016",
- note = "In press",
- doi = "10.1080/01621459.2016.1148609",
+ year = "2017",
+ volume = "112",
+ number = "517",
+ pages = "436--445",
textVersion =
- paste("Andreas Alfons, Christophe Croux, Peter Filzmoser (2016).",
+ paste("Andreas Alfons, Christophe Croux, Peter Filzmoser (2017).",
"Robust maximum association estimators.",
- "Journal of the American Statistical Association.",
- "DOI 10.1080/01621459.2016.1148609. In press.")
+ "Journal of the American Statistical Association,",
+ "112(517), 436-445.")
)
diff --git a/man/ccaGrid.Rd b/man/ccaGrid.Rd
index 74b53fa..2e08759 100644
--- a/man/ccaGrid.Rd
+++ b/man/ccaGrid.Rd
@@ -6,15 +6,38 @@
\alias{CCAgrid}
\title{(Robust) CCA via alternating series of grid searches}
\usage{
-ccaGrid(x, y, k = 1, method = c("spearman", "kendall", "quadrant", "M",
- "pearson"), control = list(...), nIterations = 10, nAlternate = 10,
- nGrid = 25, select = NULL, tol = 1e-06, standardize = TRUE,
- fallback = FALSE, seed = NULL, ...)
-
-CCAgrid(x, y, k = 1, method = c("spearman", "kendall", "quadrant", "M",
- "pearson"), maxiter = 10, maxalter = 10, splitcircle = 25,
- select = NULL, zero.tol = 1e-06, standardize = TRUE,
- fallback = FALSE, seed = NULL, ...)
+ccaGrid(
+ x,
+ y,
+ k = 1,
+ method = c("spearman", "kendall", "quadrant", "M", "pearson"),
+ control = list(...),
+ nIterations = 10,
+ nAlternate = 10,
+ nGrid = 25,
+ select = NULL,
+ tol = 1e-06,
+ standardize = TRUE,
+ fallback = FALSE,
+ seed = NULL,
+ ...
+)
+
+CCAgrid(
+ x,
+ y,
+ k = 1,
+ method = c("spearman", "kendall", "quadrant", "M", "pearson"),
+ maxiter = 10,
+ maxalter = 10,
+ splitcircle = 25,
+ select = NULL,
+ zero.tol = 1e-06,
+ standardize = TRUE,
+ fallback = FALSE,
+ seed = NULL,
+ ...
+)
}
\arguments{
\item{x, y}{each can be a numeric vector, matrix or data frame.}
diff --git a/man/ccaPP-package.Rd b/man/ccaPP-package.Rd
index ba3e53c..4dc30f2 100644
--- a/man/ccaPP-package.Rd
+++ b/man/ccaPP-package.Rd
@@ -20,12 +20,12 @@ The DESCRIPTION file:
Maintainer: \packageMaintainer{ccaPP}
}
\references{
-A. Alfons, C. Croux and P. Filzmoser (2016) Robust maximum association between
-data sets: The \R Package \pkg{ccaPP}. \emph{Austrian Journal of Statistics},
+A. Alfons, C. Croux and P. Filzmoser (2016) Robust maximum association between
+data sets: The \R Package \pkg{ccaPP}. \emph{Austrian Journal of Statistics},
\bold{45}(1), 71--79.
-A. Alfons, C. Croux and P. Filzmoser (2016) Robust maximum association
-estimators. \emph{Journal of the American Statistical Association}. DOI
-10.1080/01621459.2016.1148609. In press.
+A. Alfons, C. Croux and P. Filzmoser (2017) Robust maximum association
+estimators. \emph{Journal of the American Statistical Association}.
+\bold{112}(517), 436--445.
}
\keyword{package}
diff --git a/man/ccaProj.Rd b/man/ccaProj.Rd
index 175119a..4df9768 100644
--- a/man/ccaProj.Rd
+++ b/man/ccaProj.Rd
@@ -5,13 +5,28 @@
\alias{CCAproj}
\title{(Robust) CCA via projections through the data points}
\usage{
-ccaProj(x, y, k = 1, method = c("spearman", "kendall", "quadrant", "M",
- "pearson"), control = list(...), standardize = TRUE,
- useL1Median = TRUE, fallback = FALSE, ...)
+ccaProj(
+ x,
+ y,
+ k = 1,
+ method = c("spearman", "kendall", "quadrant", "M", "pearson"),
+ control = list(...),
+ standardize = TRUE,
+ useL1Median = TRUE,
+ fallback = FALSE,
+ ...
+)
-CCAproj(x, y, k = 1, method = c("spearman", "kendall", "quadrant", "M",
- "pearson"), standardize = TRUE, useL1Median = TRUE,
- fallback = FALSE, ...)
+CCAproj(
+ x,
+ y,
+ k = 1,
+ method = c("spearman", "kendall", "quadrant", "M", "pearson"),
+ standardize = TRUE,
+ useL1Median = TRUE,
+ fallback = FALSE,
+ ...
+)
}
\arguments{
\item{x, y}{each can be a numeric vector, matrix or data frame.}
diff --git a/man/corFunctions.Rd b/man/corFunctions.Rd
index 90c7191..0d118cf 100644
--- a/man/corFunctions.Rd
+++ b/man/corFunctions.Rd
@@ -17,8 +17,13 @@ corKendall(x, y, consistent = FALSE)
corQuadrant(x, y, consistent = FALSE)
-corM(x, y, prob = 0.9, initial = c("quadrant", "spearman", "kendall",
- "pearson"), tol = 1e-06)
+corM(
+ x,
+ y,
+ prob = 0.9,
+ initial = c("quadrant", "spearman", "kendall", "pearson"),
+ tol = 1e-06
+)
}
\arguments{
\item{x, y}{numeric vectors.}
diff --git a/man/cvFolds.Rd b/man/cvFolds.Rd
new file mode 100644
index 0000000..35fc925
--- /dev/null
+++ b/man/cvFolds.Rd
@@ -0,0 +1,67 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/cv.R
+\name{cvFolds}
+\alias{cvFolds}
+\alias{print.cvFolds}
+\title{Cross-validation folds}
+\usage{
+cvFolds(
+ n,
+ K = 5,
+ R = 1,
+ type = c("random", "consecutive", "interleaved"),
+ grouping = NULL
+)
+}
+\arguments{
+\item{n}{an integer giving the number of observations to be split into
+folds. This is ignored if \code{grouping} is supplied in order to split
+groups of observations into folds.}
+
+\item{K}{an integer giving the number of folds into which the observations
+should be split (the default is five). Setting \code{K} equal to the number
+of observations or groups yields leave-one-out cross-validation.}
+
+\item{R}{an integer giving the number of replications for repeated
+\eqn{K}-fold cross-validation. This is ignored for for leave-one-out
+cross-validation and other non-random splits of the data.}
+
+\item{type}{a character string specifying the type of folds to be
+generated. Possible values are \code{"random"} (the default),
+\code{"consecutive"} or \code{"interleaved"}.}
+
+\item{grouping}{a factor specifying groups of observations. If supplied,
+the data are split according to the groups rather than individual
+observations such that all observations within a group belong to the same
+fold.}
+}
+\value{
+An object of class \code{"cvFolds"} with the following components:
+\item{n}{an integer giving the number of observations or groups.}
+\item{K}{an integer giving the number of folds.}
+\item{R}{an integer giving the number of replications.}
+\item{subsets}{an integer matrix in which each column contains a permutation
+of the indices of the observations or groups.}
+\item{which}{an integer vector giving the fold for each permuted observation
+or group.}
+\item{grouping}{a list giving the indices of the observations belonging to
+each group. This is only returned if a grouping factor has been supplied.}
+}
+\description{
+Split observations or groups of observations into \eqn{K} folds to be used
+for (repeated) \eqn{K}-fold cross-validation. \eqn{K} should thereby be
+chosen such that all folds are of approximately equal size.
+}
+\examples{
+set.seed(1234) # set seed for reproducibility
+cvFolds(20, K = 5)
+cvFolds(20, K = 5, R = 10)
+
+}
+\seealso{
+\code{\link{sMaxCorGrid}}
+}
+\author{
+Andreas Alfons
+}
+\keyword{utilities}
diff --git a/man/maxCorGrid.Rd b/man/maxCorGrid.Rd
index 8933c84..5c4b93d 100644
--- a/man/maxCorGrid.Rd
+++ b/man/maxCorGrid.Rd
@@ -3,12 +3,49 @@
\name{maxCorGrid}
\alias{maxCorGrid}
\alias{print.maxCor}
+\alias{sMaxCorGrid}
\title{(Robust) maximum correlation via alternating series of grid searches}
\usage{
-maxCorGrid(x, y, method = c("spearman", "kendall", "quadrant", "M",
- "pearson"), control = list(...), nIterations = 10, nAlternate = 10,
- nGrid = 25, select = NULL, tol = 1e-06, standardize = TRUE,
- fallback = FALSE, seed = NULL, ...)
+maxCorGrid(
+ x,
+ y,
+ method = c("spearman", "kendall", "quadrant", "M", "pearson"),
+ control = list(...),
+ nIterations = 10,
+ nAlternate = 10,
+ nGrid = 25,
+ select = NULL,
+ tol = 1e-06,
+ standardize = TRUE,
+ fallback = FALSE,
+ seed = NULL,
+ ...
+)
+
+sMaxCorGrid(
+ x,
+ y,
+ lambdaX = 0,
+ lambdaY = 0,
+ method = c("spearman", "kendall", "quadrant", "M", "pearson"),
+ control = list(...),
+ nIterations = 10,
+ nAlternate = 10,
+ nGrid = 25,
+ select = NULL,
+ tol = 1e-06,
+ standardize = TRUE,
+ fallback = FALSE,
+ K = 5,
+ R = 1,
+ type = c("random", "consecutive", "interleaved"),
+ grouping = NULL,
+ folds = NULL,
+ nCores = 1,
+ cl = NULL,
+ seed = NULL,
+ ...
+)
}
\arguments{
\item{x, y}{each can be a numeric vector, matrix or data frame.}
@@ -63,6 +100,27 @@ determining the order of the variables of the respective other data set.}
\item{\dots}{additional arguments to be passed to the specified correlation
functional.}
+
+\item{lambdaX, lambdaY}{numeric vectors of non-negative values giving the
+penalty parameters for the weighting vectors of \code{x} and \code{y},
+respectively.}
+
+\item{K, R, type, grouping}{additional arguments for generating
+cross-validation folds (see \code{\link{cvFolds}}).}
+
+\item{folds}{an object of class \code{"cvFolds"} (as returned by
+\code{\link{cvFolds}}) defining the folds of the data for cross-validation.
+If supplied, this is preferred over the arguments for generating
+cross-validation folds.}
+
+\item{nCores}{a positive integer giving the number of processor cores to be
+used for parallel computing in cross-validation (the default is 1 for no
+parallelization). If this is set to \code{NA}, all available processor
+cores are used.}
+
+\item{cl}{a \pkg{parallel} cluster for parallel computing to be used in
+cross-validation, as generated by \code{\link[parallel]{makeCluster}}. If
+supplied, this is preferred over \code{nCores}.}
}
\value{
An object of class \code{"maxCor"} with the following components:
@@ -77,6 +135,16 @@ standardization of \code{y}.}
standardization of \code{x}.}
\item{scaleY}{a numeric vector giving the scale estimates used in
standardization of \code{y}.}
+\item{lambdaX}{a numeric giving the optimal penalty parameter for the
+weighting vector of \code{x} (only \code{sMaxCorGrid}).}
+\item{lambdaY}{a numeric giving the optimal penalty parameter for the
+weighting vector of \code{y} (only \code{sMaxCorGrid}).}
+\item{objective}{a numeric giving the value of the objective function
+for the optimal combination of penalty parameters (only \code{sMaxCorGrid}).}
+\item{cv}{a numeric matrix containing the (average) results from
+cross-validation for each combination of penalty parameters (only
+\code{sMaxCorGrid} and if a grid of values for the penalty parameters has
+been supplied).}
\item{call}{the matched function call.}
}
\description{
@@ -112,6 +180,13 @@ directly.
Note that also the data sets are ordered according to the maximum average
absolute correlation with the respective other data set to ensure symmetry
of the algorithm.
+
+\code{sMaxCorGrid} enforces sparsity by adding \eqn{L_{1}}{L1} penalties on
+the weighting vectors to the objective function (i.e., the correlation of
+the resulting projections). The optimal combination of penalty paramters is
+thereby determined via cross-validation over a grid of values. To increase
+the computational performance of this cross-validation procedure, parallel
+computing is implemented via package \pkg{parallel}.
}
\examples{
data("diabetes")
@@ -132,8 +207,8 @@ between data sets: The \R Package \pkg{ccaPP}. \emph{Austrian Journal of
Statistics}, \bold{45}(1), 71--79.
A. Alfons, C. Croux and P. Filzmoser (2016) Robust maximum association
-estimators. \emph{Journal of the American Statistical Association}. DOI
-10.1080/01621459.2016.1148609. In press.
+estimators. \emph{Journal of the American Statistical Association}.
+\bold{112}(517), 436--445.
}
\seealso{
\code{\link{maxCorProj}}, \code{\link{ccaGrid}},
diff --git a/man/maxCorProj.Rd b/man/maxCorProj.Rd
index d3de5c1..f6ae6ce 100644
--- a/man/maxCorProj.Rd
+++ b/man/maxCorProj.Rd
@@ -4,9 +4,16 @@
\alias{maxCorProj}
\title{(Robust) maximum correlation via projections through the data points}
\usage{
-maxCorProj(x, y, method = c("spearman", "kendall", "quadrant", "M",
- "pearson"), control = list(...), standardize = TRUE,
- useL1Median = TRUE, fallback = FALSE, ...)
+maxCorProj(
+ x,
+ y,
+ method = c("spearman", "kendall", "quadrant", "M", "pearson"),
+ control = list(...),
+ standardize = TRUE,
+ useL1Median = TRUE,
+ fallback = FALSE,
+ ...
+)
}
\arguments{
\item{x, y}{each can be a numeric vector, matrix or data frame.}
diff --git a/man/permTest.Rd b/man/permTest.Rd
index 3b7e771..c5bbb38 100644
--- a/man/permTest.Rd
+++ b/man/permTest.Rd
@@ -4,8 +4,17 @@
\alias{permTest}
\title{(Robust) permutation test for no association}
\usage{
-permTest(x, y, R = 1000, fun = maxCorGrid, permutations = NULL,
- nCores = 1, cl = NULL, seed = NULL, ...)
+permTest(
+ x,
+ y,
+ R = 1000,
+ fun = maxCorGrid,
+ permutations = NULL,
+ nCores = 1,
+ cl = NULL,
+ seed = NULL,
+ ...
+)
}
\arguments{
\item{x, y}{each can be a numeric vector, matrix or data frame.}
diff --git a/src/cca.cpp b/src/cca.cpp
index 1658da5..31d5b68 100644
--- a/src/cca.cpp
+++ b/src/cca.cpp
@@ -1,6 +1,6 @@
/*
* Author: Andreas Alfons
- * Erasmus University Rotterdam
+ * Erasmus Universiteit Rotterdam
*/
#include
@@ -173,7 +173,7 @@ double CorMControl::cor(const vec& x, const vec& y) {
// for testing purposes
template
-double fastCor(const vec& x, const vec& y, CorControl control) {
+double fastCor(const vec& x, const vec& y, CorControl& control) {
return control.cor(x, y);
}
@@ -241,22 +241,24 @@ class GridControl {
GridControl(List&);
// get the order in which to update the elements of a weighting vector
template
- void findOrder(const mat&, const vec&, CorControl, uvec&, double&, vec&);
+ void findOrder(const mat&, const vec&, CorControl&, uvec&, double&, vec&);
// get the order in which to update the elements of the weighting vectors
template
- void findOrder(const mat&, const mat&, CorControl, uvec&, uvec&, double&,
+ void findOrder(const mat&, const mat&, CorControl&, uvec&, uvec&, double&,
vec&, vec&, bool&);
// get equispaced grid of angles in a plane
vec getGrid(const uword&);
+ // compute updated weighting vector in grid search
+ vec getVector(const vec&, const double&, const uword&);
// grid search to update one weighting vector
template
- void gridSearch(const mat&, const uvec&, const vec&, CorControl, vec&,
+ void gridSearch(const mat&, const uvec&, const vec&, CorControl&, vec&,
double&, vec&);
// set counter of consecutive iterations without improvement
void setCounter(uword&, const double&, const double&);
// maximum correlation between multivariate data sets x and y
template
- double maxCor(const mat&, const mat&, CorControl, vec&, vec&);
+ double maxCor(const mat&, const mat&, CorControl&, vec&, vec&);
};
// constructors
@@ -295,7 +297,7 @@ inline GridControl::GridControl(List& control) {
// maxCor ....... maximum correlation to get initial value
// a ............ weighting vector to get initial value
template
-void GridControl::findOrder(const mat& x, const vec& y, CorControl corControl,
+void GridControl::findOrder(const mat& x, const vec& y, CorControl& corControl,
uvec& orderX, double& maxCor, vec& a) {
// compute columnwise absolute correlations of x with y
const uword p = x.n_cols;
@@ -325,7 +327,7 @@ void GridControl::findOrder(const mat& x, const vec& y, CorControl corControl,
// startWithX ... logical to be computed that indicates whether to start with
// the first data set in the alternate grid searches
template
-void GridControl::findOrder(const mat& x, const mat& y, CorControl corControl,
+void GridControl::findOrder(const mat& x, const mat& y, CorControl& corControl,
uvec& orderX, uvec& orderY, double& maxCor, vec& a, vec& b,
bool& startWithX) {
const uword p = x.n_cols, q = y.n_cols;
@@ -418,32 +420,39 @@ vec GridControl::getGrid(const uword& i) {
return grid;
}
+// compute updated weighting vector in grid search
+vec GridControl::getVector(const vec& a, const double& angle, const uword& j) {
+ // fast computation of cos(angle) * a + sin(angle) * ej
+ vec b = cos(angle) * a;
+ b(j) += sin(angle);
+ return b;
+}
+
// grid search to update one weighting vector
// x ............ data matrix for which to update the weighting vector
+// orderX ......... order in which to browse through the variables
// y ............ linear combination of the other data matrix according to the
// other weighting vector, which is kept fixed
// corControl ... control object to compute correlation
-// grid ........ grid points to be used for grid search
+// grid ......... grid points to be used for grid search
// maxCor ....... maximum correlation to be updated
// a ............ weighting vector to be updated
template
void GridControl::gridSearch(const mat& x, const uvec& orderX, const vec& y,
- CorControl corControl, vec& grid, double& maxCor, vec& a) {
+ CorControl& corControl, vec& grid, double& maxCor, vec& a) {
// initializations
const uword p = x.n_cols, nGrid = grid.n_elem;
// perform grid searches for each canonical basis vector
for(uword j = 0; j < p; j++) {
- // define current canonical basis vector according to order of columns
- vec ej = zeros(p);
- ej(orderX(j)) = 1;
+ // current coordinate to be updated (in the order of columns)
+ uword orderJ = orderX(j);
// perform grid search for the current canonical basis vector
vec corY(nGrid);
for(uword k = 0; k < nGrid; k++) {
- double angle = grid(k);
- vec currentA = cos(angle) * a + sin(angle) * ej;
+ vec currentA = getVector(a, grid(k), orderJ);
corY(k) = abs(corControl.cor(x * currentA, y));
}
- // find grid point that maximizes correlation functional and keep
+ // find grid point that maximizes the correlation functional and keep
// maximum correlation of current grid search
uword whichMax;
double currentMaxCor = corY.max(whichMax);
@@ -452,8 +461,7 @@ void GridControl::gridSearch(const mat& x, const uvec& orderX, const vec& y,
// of the current grid search may be smaller than the previous maximum
if(currentMaxCor > maxCor) {
maxCor = currentMaxCor;
- double optAngle = grid(whichMax);
- a = cos(optAngle) * a + sin(optAngle) * ej;
+ a = getVector(a, grid(whichMax), orderJ);
}
}
}
@@ -479,7 +487,7 @@ void GridControl::setCounter(uword& counter, const double& current,
// a ............. first weighting vector to be updated
// b ............. second weighting vector to be updated
template
-double GridControl::maxCor(const mat& x, const mat& y, CorControl corControl,
+double GridControl::maxCor(const mat& x, const mat& y, CorControl& corControl,
vec& a, vec& b) {
// initializations
uword p = x.n_cols, q = y.n_cols;
@@ -490,7 +498,12 @@ double GridControl::maxCor(const mat& x, const mat& y, CorControl corControl,
// both data sets are univariate
a.ones(p); b.ones(q);
vec xx = x.unsafe_col(0), yy = y.unsafe_col(0); // reuse memory
- maxCor = abs(corControl.cor(xx, yy)); // compute correlation
+ maxCor = corControl.cor(xx, yy); // compute correlation
+ // check sign of correlation
+ if(maxCor < 0) {
+ maxCor = -maxCor;
+ b = -b;
+ }
} else {
double previousMaxCor = R_NegInf;
if((p > 1) && (q == 1)) {
@@ -580,17 +593,17 @@ double GridControl::maxCor(const mat& x, const mat& y, CorControl corControl,
} else {
return NA_REAL; // should never happen
}
- }
- // ensure that norm of weighting vectors is 1
- a = a / norm(a, 2);
- b = b / norm(b, 2);
- // check direction
- double r = corControl.cor(x * a, y * b);
- if(r < 0) {
- if((p > 1) && (q == 1)) {
- a = -a;
- } else {
- b = -b;
+ // ensure that norm of weighting vectors is 1
+ a = a / norm(a, 2);
+ b = b / norm(b, 2);
+ // check direction
+ double r = corControl.cor(x * a, y * b);
+ if(r < 0) {
+ if((p > 1) && (q == 1)) {
+ a = -a;
+ } else {
+ b = -b;
+ }
}
}
// return maximum correlation
@@ -598,6 +611,320 @@ double GridControl::maxCor(const mat& x, const mat& y, CorControl corControl,
}
+// ------------------------------------------------
+// control class for sparse alternate grid searches
+// ------------------------------------------------
+
+class SparseGridControl: public GridControl {
+public:
+ vec lambda; // grid of values for penalty parameters
+ // constructors
+ SparseGridControl();
+ SparseGridControl(List&);
+ // compute updated weighting vector in grid search
+ vec getVector(const vec&, const double&, const uword&);
+ // grid search to update weighting vector for multivariate x and univariate y
+ template
+ void gridSearch(const mat&, const uvec&, const double&, const vec&,
+ CorControl&, vec&, vec&, double&);
+ // grid search to update one weighting vector for multivariate x and y
+ template
+ void gridSearch(const mat&, const uvec&, const double&, const vec&,
+ const double&, CorControl&, vec&, double&, vec&, double&, double&);
+ // workhorse function for maximum correlation between multivariate x and
+ // univariate y
+ template
+ void maxCorFit(const mat&, const uvec&, const double&, const vec&,
+ CorControl&, double&, vec&, double&);
+ // workhorse function for maximum correlation between multivariate x and y
+ template
+ void maxCorFit(const mat&, const uvec&, const double&, const mat&,
+ const uvec&, const double&, CorControl&, double&, vec&, vec&,
+ double&, double&, double&);
+ // maximum correlation between multivariate data sets x and y
+ template
+ vec maxCor(const mat&, const mat&, CorControl&, mat&, mat&, vec&);
+};
+
+// constructors
+inline SparseGridControl::SparseGridControl() {
+ lambda = zeros(1, 2);
+}
+inline SparseGridControl::SparseGridControl(List& control)
+: GridControl(control) {
+ SEXP R_lambda = control["lambda"];
+ lambda = as(R_lambda);
+}
+
+// compute updated weighting vector in grid search
+vec SparseGridControl::getVector(const vec& a,
+ const double& angle, const uword& j) {
+ // fast computation of b / ||b|| with b = cos(angle) * a + sin(angle) * ej
+ double tanAngle = tan(angle);
+ double denominator = sqrt(1 + 2*tanAngle*a(j) + tanAngle*tanAngle);
+ vec b = a / denominator;
+ b(j) += tanAngle / denominator;
+ return b;
+}
+
+// sparse grid search to update weighting vector for multivariate x and
+// univariate y
+// x .............. data matrix for which to update the weighting vector
+// orderX ......... order in which to browse through the variables
+// lambda ......... penalty parameter for the weighting vector to be updated
+// y .............. other data vector
+// corControl ..... control object to compute correlation
+// grid ........... grid points to be used for grid search
+// a .............. weighting vector to be updated
+// maxObjective ... maximum value of the objective function to be updated
+template
+void SparseGridControl::gridSearch(const mat& x, const uvec& orderX,
+ const double& lambda, const vec& y, CorControl& corControl,
+ vec& grid, vec& a, double& maxObjective) {
+ // initializations
+ const uword p = x.n_cols, nGrid = grid.n_elem;
+ // perform grid searches for each canonical basis vector
+ for(uword j = 0; j < p; j++) {
+ // current coordinate to be updated (in the order of columns)
+ uword orderJ = orderX(j);
+ // perform grid search for the current canonical basis vector
+ vec objective(nGrid);
+ for(uword k = 0; k < nGrid; k++) {
+ vec currentA = getVector(a, grid(k), orderJ);
+ double currentCorY = abs(corControl.cor(x * currentA, y));
+ objective(k) = currentCorY - lambda * norm(currentA, 1);
+ }
+ // find grid point that maximizes the penalized correlation functional
+ // and keep maximum of current grid search
+ uword whichMax;
+ double currentMaxObjective = objective.max(whichMax);
+ // update maximum and weighting vector
+ // if 0 degree angle is not part of the grid, the penalized maximum
+ // correlation of the current grid search may be smaller than the
+ // previous maximum
+ if(currentMaxObjective > maxObjective) {
+ maxObjective = currentMaxObjective;
+ a = getVector(a, grid(whichMax), orderJ);
+ }
+ }
+}
+
+// sparse grid search to update one weighting vector for multivariate x and y
+// x .............. data matrix for which to update the weighting vector
+// orderX ......... order in which to browse through the variables
+// lambda ......... penalty parameter for the weighting vector to be updated
+// y .............. linear combination of the other data matrix according to
+// the other weighting vector, which is kept fixed
+// penaltyY ....... penalty for the weighting vector to kept fixed
+// corControl ..... control object to compute correlation
+// grid ........... grid points to be used for grid search
+// maxCor ......... maximum correlation to be updated
+// a .............. weighting vector to be updated
+// penaltyX ....... penalty for the weighting vector to be updated
+// maxObjective ... maximum value of the objective function to be updated
+template
+void SparseGridControl::gridSearch(const mat& x, const uvec& orderX,
+ const double& lambda, const vec& y, const double& penaltyY,
+ CorControl& corControl, vec& grid, double& maxCor, vec& a,
+ double& penaltyX, double& maxObjective) {
+ // initializations
+ const uword p = x.n_cols, nGrid = grid.n_elem;
+ // perform grid searches for each canonical basis vector
+ for(uword j = 0; j < p; j++) {
+ // current coordinate to be updated (in the order of columns)
+ uword orderJ = orderX(j);
+ // perform grid search for the current canonical basis vector
+ vec corY(nGrid), objective(nGrid), penalties(nGrid);
+ for(uword k = 0; k < nGrid; k++) {
+ vec currentA = getVector(a, grid(k), orderJ);
+ corY(k) = abs(corControl.cor(x * currentA, y));
+ penalties(k) = lambda * norm(currentA, 1);
+ objective(k) = corY(k) - penalties(k) - penaltyY;
+ }
+ // find grid point that maximizes the penalized correlation functional
+ // and keep maximum of current grid search
+ uword whichMax;
+ double currentMaxObjective = objective.max(whichMax);
+ // update maximum and weighting vector
+ // if 0 degree angle is not part of the grid, the penalized maximum
+ // correlation of the current grid search may be smaller than the
+ // previous maximum
+ if(currentMaxObjective > maxObjective) {
+ maxCor = corY(whichMax);
+ a = getVector(a, grid(whichMax), orderJ);
+ penaltyX = penalties(whichMax);
+ maxObjective = currentMaxObjective;
+ }
+ }
+}
+
+// workhorse function for maximum correlation between multivariate x and
+// univariate y
+template
+void SparseGridControl::maxCorFit(const mat& x, const uvec& orderX,
+ const double& lambda, const vec& y, CorControl& corControl,
+ double& maxCor, vec& a, double& maxObjective) {
+ // initializations
+ double previousMaxObjective = R_NegInf;
+ // stop if there are two consecutive iterations without improvement
+ uword i = 0, convCounter = 0;
+ while((i < nIterations) && (convCounter < 2)) {
+ previousMaxObjective = maxObjective;
+ vec grid = getGrid(i+1); // define vector of grid points
+ gridSearch(x, orderX, lambda, y, corControl, grid, a, maxObjective);
+ i++;
+ setCounter(convCounter, maxObjective, previousMaxObjective);
+ }
+ // check direction
+ maxCor = corControl.cor(x * a, y);
+ if(maxCor < 0) {
+ maxCor = -maxCor;
+ a = -a;
+ }
+}
+
+// workhorse function for maximum correlation between multivariate x and y
+template
+void SparseGridControl::maxCorFit(const mat& x, const uvec& orderX,
+ const double& lambdaX, const mat& y, const uvec& orderY,
+ const double& lambdaY, CorControl& corControl, double& maxCor, vec& a,
+ vec& b, double& penaltyX, double& penaltyY, double& maxObjective) {
+ // initializations
+ double previousMaxObjective = R_NegInf;
+ // stop if there are two consecutive iterations without improvement
+ uword i = 0, convCounter = 0;
+ while((i < nIterations) && (convCounter < 2)) {
+ previousMaxObjective = maxObjective;
+ vec grid = getGrid(i+1); // define vector of grid points
+ uword j = 0;
+ double altMaxObjective = R_NegInf;
+ while((j < nAlternate) && ((maxObjective - altMaxObjective) > tol)) {
+ altMaxObjective = maxObjective;
+ // maximize correlation functional over a keeping b fixed
+ vec yb = y * b; // linear combination of columns of y
+ gridSearch(x, orderX, lambdaX, yb, penaltyY, corControl,
+ grid, maxCor, a, penaltyX, maxObjective);
+ // maximize correlation functional over b keeping a fixed
+ vec xa = x * a; // linear combination of columns of x
+ gridSearch(y, orderY, lambdaY, xa, penaltyX, corControl,
+ grid, maxCor, b, penaltyY, maxObjective);
+ j++;
+ }
+ i++;
+ setCounter(convCounter, maxObjective, previousMaxObjective);
+ }
+ // check direction
+ maxCor = corControl.cor(x * a, y * b);
+ if(maxCor < 0) {
+ maxCor = -maxCor;
+ b = -b;
+ }
+}
+
+// maximum correlation between multivariate data sets x and y based on
+// sparse alternate grid searches
+// x ............. first data matrix
+// y ............. second data matrix
+// corControl .... control object to compute correlation
+// a ............. first weighting vector to be updated
+// b ............. second weighting vector to be updated
+template
+vec SparseGridControl::maxCor(const mat& x, const mat& y,
+ CorControl& corControl, mat& A, mat& B, vec& objective) {
+ // initializations
+ uword p = x.n_cols, q = y.n_cols;
+ vec r; // initialize vector of maximum correlations
+ // perform alternate grid searches if both data sets are multivariate
+ // if one data set is univariate, alternate grid searches are not necessary
+ if((p == 1) && (q == 1)) {
+ // both data sets are univariate
+ r.set_size(1); objective.set_size(1);
+ A.ones(p, 1); B.ones(q, 1);
+ vec xx = x.unsafe_col(0), yy = y.unsafe_col(0); // reuse memory
+ // compute correlation
+ r(0) = corControl.cor(xx, yy);
+ // check sign of correlation
+ if(r(0) < 0) {
+ r(0) = -r(0);
+ B(0, 0) = -B(0, 0);
+ }
+ objective(0) = r(0);
+ } else {
+ double initialMaxCor, maxCor, maxObjective;
+ uword nLambda = lambda.n_rows;
+ r.set_size(nLambda); objective.set_size(nLambda);
+ if((p > 1) && (q == 1)) {
+ // x is multivariate, y is univariate
+ A.set_size(p, nLambda); B.ones(q, nLambda);
+ vec yy = y.unsafe_col(0); // reuse memory
+ // find order of x variables
+ uvec orderX(p);
+ vec initialA = zeros(p);
+ findOrder(x, yy, corControl, orderX, initialMaxCor, initialA);
+ // compute maximum correlation for each penalty parameter
+ for(uword k = 0; k < nLambda; k++) {
+ maxCor = initialMaxCor;
+ vec a = A.unsafe_col(k); a = initialA;
+ maxObjective = maxCor - lambda(k,0); // L1 norm is 1
+ maxCorFit(x, orderX, lambda(k,0), yy, corControl,
+ maxCor, a, maxObjective);
+ r(k) = maxCor;
+ objective(k) = maxObjective;
+ }
+ } else if((p == 1) && (q > 1)) {
+ // x is univariate, y is multivariate
+ A.ones(p, nLambda); B.set_size(q, nLambda);
+ vec xx = x.unsafe_col(0); // reuse memory
+ // find order of y variables
+ uvec orderY(q);
+ vec initialB = zeros(q);
+ findOrder(y, xx, corControl, orderY, initialMaxCor, initialB);
+ // compute maximum correlation for each penalty parameter
+ for(uword k = 0; k < nLambda; k++) {
+ maxCor = initialMaxCor;
+ vec b = B.unsafe_col(k); b = initialB;
+ maxObjective = maxCor - lambda(k,1); // L1 norm is 1
+ maxCorFit(y, orderY, lambda(k,1), xx, corControl,
+ maxCor, b, maxObjective);
+ r(k) = maxCor;
+ objective(k) = maxObjective;
+ }
+ } else if((p > 1) && (q > 1)) {
+ // both data sets are multivariate
+ A.set_size(p, nLambda), B.set_size(q, nLambda);
+ // find order of x and y variables
+ uvec orderX(p), orderY(q);
+ vec initialA = zeros(p), initialB = zeros(q);
+ bool startWithX;
+ findOrder(x, y, corControl, orderX, orderY, initialMaxCor,
+ initialA, initialB, startWithX);
+ // compute maximum correlation for each combination of penalty parameters
+ for(uword k = 0; k < nLambda; k++) {
+ maxCor = initialMaxCor;
+ vec a = A.unsafe_col(k), b = B.unsafe_col(k);
+ a = initialA; b = initialB;
+ double penaltyX = lambda(k,0), penaltyY = lambda(k,1); // L1 norms are 1
+ maxObjective = maxCor - penaltyX - penaltyY;
+ // perform alternate grid searches
+ if(startWithX) {
+ // start with grid search for x
+ maxCorFit(x, orderX, lambda(k,0), y, orderY, lambda(k,1), corControl,
+ maxCor, a, b, penaltyX, penaltyY, maxObjective);
+ } else {
+ // start with grid search for y
+ maxCorFit(y, orderY, lambda(k,1), x, orderX, lambda(k,0), corControl,
+ maxCor, b, a, penaltyY, penaltyX, maxObjective);
+ }
+ r(k) = maxCor;
+ objective(k) = maxObjective;
+ }
+ }
+ }
+ // return maximum correlation
+ return r;
+}
+
+
// -------------------------------------------------
// control class for projections through data points
// -------------------------------------------------
@@ -612,7 +939,7 @@ class ProjControl {
mat getDirections(const mat&);
// maximum correlation between multivariate data sets x and y
template
- double maxCor(const mat&, const mat&, CorControl, vec&, vec&);
+ double maxCor(const mat&, const mat&, CorControl&, vec&, vec&);
};
// constructors
@@ -654,7 +981,7 @@ mat ProjControl::getDirections(const mat& x) {
// a ............ first weighting vector to be updated
// b ............ second weighting vector to be updated
template
-double ProjControl::maxCor(const mat& x, const mat& y, CorControl corControl,
+double ProjControl::maxCor(const mat& x, const mat& y, CorControl& corControl,
vec& a, vec& b) {
// initializations
uword n = x.n_rows, p = x.n_cols, q = y.n_cols;
@@ -822,6 +1149,7 @@ mat householder(const vec& a) {
return P;
}
+
// canonical correlation analysis via projection pursuit
// x ............ first data matrix
// y ............ second data matrix
@@ -1018,3 +1346,108 @@ SEXP R_ccaPP(SEXP R_x, SEXP R_y, SEXP R_k, SEXP R_method, SEXP R_corControl,
Named("scaleY") = scaleY
);
}
+
+
+// sparse maximum correlation via projection pursuit
+// x ............ first data matrix
+// y ............ second data matrix
+// corControl ... control object to compute correlation
+// ppControl .... control object for sparse algorithm
+// standard ..... should the data be standardized?
+// robust ....... should robust standardization be used?
+// fallback ..... should the fallback mode for robust standardization be used?
+// A ............ matrix of weighting vectors for first matrix to be updated
+// B ............ matrix of weighting vectors for second matrix to be updated
+template
+vec sMaxCorPP(const mat& x, const mat& y, CorControl& corControl,
+ PPControl& ppControl, const bool& standard, const bool& robust,
+ const bool& fallback, mat& A, mat& B, vec& centerX, vec& centerY,
+ vec& scaleX, vec& scaleY, vec& objective) {
+ // initializations
+ vec r;
+ // standardize the data if requested
+ mat xs, ys;
+ if(standard) {
+ xs = standardize(x, robust, fallback, centerX, scaleX);
+ ys = standardize(y, robust, fallback, centerY, scaleY);
+ // compute maximum correlations with standardized data
+ r = ppControl.maxCor(xs, ys, corControl, A, B, objective);
+ // transform weighting vectors back to original scale
+ for(uword k = 0; k < A.n_cols; k++) {
+ vec a = A.unsafe_col(k); backtransform(a, scaleX);
+ }
+ for(uword k = 0; k < B.n_cols; k++) {
+ vec b = B.unsafe_col(k); backtransform(b, scaleY);
+ }
+ } else {
+ uword p = x.n_cols, q = y.n_cols;
+ centerX = zeros(p); centerY = zeros(q);
+ scaleX = ones(p); scaleY = ones(q);
+ // compute maximum correlations with original data
+ r = ppControl.maxCor(x, y, corControl, A, B, objective);
+ }
+ // return vector of maximum correlations
+ return r;
+}
+
+// R interface
+SEXP R_sMaxCorPP(SEXP R_x, SEXP R_y, SEXP R_method, SEXP R_corControl,
+ SEXP R_algorithm, SEXP R_ppControl, SEXP R_standardize, SEXP R_fallback) {
+ // initializations
+ NumericMatrix Rcpp_x(R_x), Rcpp_y(R_y); // convert data to Rcpp types
+ mat x(Rcpp_x.begin(), Rcpp_x.nrow(), Rcpp_x.ncol(), false); // convert data
+ mat y(Rcpp_y.begin(), Rcpp_y.nrow(), Rcpp_y.ncol(), false); // to arma types
+ string method = as(R_method); // convert character string
+ List Rcpp_corControl(R_corControl); // list of control parameters
+ string algorithm = as(R_algorithm); // convert character string
+ List Rcpp_ppControl(R_ppControl); // list of control parameters
+ bool standard = as(R_standardize); // convert to boolean
+ bool fallback = as(R_fallback); // convert to boolean
+ // initialize results
+ vec r, centerX, centerY, scaleX, scaleY, lambdaX, lambdaY, objective;
+ mat A, B;
+ if(algorithm == "grid") {
+ // define control object for alternate grid searches
+ SparseGridControl ppControl(Rcpp_ppControl);
+ // define control object for the correlations and call the arma version
+ if(method == "spearman") {
+ CorSpearmanControl corControl(Rcpp_corControl);
+ r = sMaxCorPP(x, y, corControl, ppControl, standard, true, fallback,
+ A, B, centerX, centerY, scaleX, scaleY, objective);
+ } else if(method == "kendall") {
+ CorKendallControl corControl(Rcpp_corControl);
+ r = sMaxCorPP(x, y, corControl, ppControl, standard, true, fallback,
+ A, B, centerX, centerY, scaleX, scaleY, objective);
+ } else if(method == "quadrant") {
+ CorQuadrantControl corControl(Rcpp_corControl);
+ r = sMaxCorPP(x, y, corControl, ppControl, standard, true, fallback,
+ A, B, centerX, centerY, scaleX, scaleY, objective);
+ } else if(method == "M") {
+ CorMControl corControl(Rcpp_corControl);
+ r = sMaxCorPP(x, y, corControl, ppControl, standard, true, fallback,
+ A, B, centerX, centerY, scaleX, scaleY, objective);
+ } else if(method == "pearson") {
+ CorPearsonControl corControl;
+ r = sMaxCorPP(x, y, corControl, ppControl, standard, false, false,
+ A, B, centerX, centerY, scaleX, scaleY, objective);
+ } else {
+ error("method not available");
+ }
+ lambdaX = ppControl.lambda.col(0); lambdaY = ppControl.lambda.col(1);
+ } else {
+ error("algorithm not available");
+ }
+ // wrap and return result
+ return List::create(
+ Named("cor") = wrap(r.begin(), r.end()),
+ Named("a") = A,
+ Named("b") = B,
+ Named("centerX") = centerX,
+ Named("centerY") = centerY,
+ Named("scaleX") = scaleX,
+ Named("scaleY") = scaleY,
+ Named("lambdaX") = wrap(lambdaX.begin(), lambdaX.end()),
+ Named("lambdaY") = wrap(lambdaY.begin(), lambdaY.end()),
+ Named("objective") = wrap(objective.begin(), objective.end())
+ );
+}
diff --git a/src/cca.h b/src/cca.h
index fd8f99a..06a4c4a 100644
--- a/src/cca.h
+++ b/src/cca.h
@@ -1,6 +1,6 @@
/*
* Author: Andreas Alfons
- * Erasmus University Rotterdam
+ * Erasmus Universiteit Rotterdam
*/
#ifndef _ccaPP_CCA_H
@@ -17,5 +17,7 @@ using namespace Rcpp;
RcppExport SEXP R_fastCor(SEXP R_x, SEXP R_y, SEXP R_method, SEXP R_control); // for testing
RcppExport SEXP R_ccaPP(SEXP R_x, SEXP R_y, SEXP R_k, SEXP R_method, SEXP R_corControl,
SEXP R_algorithm, SEXP R_ppControl, SEXP R_standardize, SEXP R_fallback);
+RcppExport SEXP R_sMaxCorPP(SEXP R_x, SEXP R_y, SEXP R_method, SEXP R_corControl,
+ SEXP R_algorithm, SEXP R_ppControl, SEXP R_standardize, SEXP R_fallback);
#endif
diff --git a/src/ccaPP_init.c b/src/ccaPP_init.c
index f364117..9ec30e5 100644
--- a/src/ccaPP_init.c
+++ b/src/ccaPP_init.c
@@ -10,10 +10,12 @@ extern SEXP R_corM(SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP R_corPearson(SEXP, SEXP);
extern SEXP R_corQuadrant(SEXP, SEXP, SEXP);
extern SEXP R_corSpearman(SEXP, SEXP, SEXP);
+extern SEXP R_fastCor(SEXP, SEXP, SEXP, SEXP);
extern SEXP R_fastMAD(SEXP, SEXP);
extern SEXP R_fastMedian(SEXP);
extern SEXP R_l1Median(SEXP);
extern SEXP R_rank(SEXP);
+extern SEXP R_sMaxCorPP(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
static const R_CallMethodDef CallEntries[] = {
{"R_ccaPP", (DL_FUNC) &R_ccaPP, 9},
@@ -22,10 +24,12 @@ static const R_CallMethodDef CallEntries[] = {
{"R_corPearson", (DL_FUNC) &R_corPearson, 2},
{"R_corQuadrant", (DL_FUNC) &R_corQuadrant, 3},
{"R_corSpearman", (DL_FUNC) &R_corSpearman, 3},
+ {"R_fastCor", (DL_FUNC) &R_fastCor, 4},
{"R_fastMAD", (DL_FUNC) &R_fastMAD, 2},
{"R_fastMedian", (DL_FUNC) &R_fastMedian, 1},
{"R_l1Median", (DL_FUNC) &R_l1Median, 1},
{"R_rank", (DL_FUNC) &R_rank, 1},
+ {"R_sMaxCorPP", (DL_FUNC) &R_sMaxCorPP, 8},
{NULL, NULL, 0}
};
diff --git a/src/cor.cpp b/src/cor.cpp
index 27eff5d..c144901 100644
--- a/src/cor.cpp
+++ b/src/cor.cpp
@@ -1,6 +1,6 @@
/*
* Author: Andreas Alfons
- * KU Leuven
+ * Erasmus Universiteit Rotterdam
*/
#include
diff --git a/src/cor.h b/src/cor.h
index 8d646fc..22179a4 100644
--- a/src/cor.h
+++ b/src/cor.h
@@ -1,6 +1,6 @@
/*
* Author: Andreas Alfons
- * KU Leuven
+ * Erasmus Universiteit Rotterdam
*/
#ifndef _ccaPP_COR_H
diff --git a/src/fastCorKendall.cpp b/src/fastCorKendall.cpp
index 3be9dc4..537f9aa 100755
--- a/src/fastCorKendall.cpp
+++ b/src/fastCorKendall.cpp
@@ -36,7 +36,7 @@
/*
* Modifications: Andreas Alfons
- * KU Leuven
+ * Erasmus Universiteit Rotterdam
*/
diff --git a/src/fastCorKendall.h b/src/fastCorKendall.h
index 1dd033a..405dd9a 100644
--- a/src/fastCorKendall.h
+++ b/src/fastCorKendall.h
@@ -1,6 +1,6 @@
/*
* Author: Andreas Alfons
- * KU Leuven
+ * Erasmus Universiteit Rotterdam
*/
#ifndef _ccaPP_FASTCORKENDALL_H
diff --git a/src/utils.cpp b/src/utils.cpp
index 83470b6..0255f8f 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -1,6 +1,6 @@
/*
* Author: Andreas Alfons
- * Erasmus University Rotterdam
+ * Erasmus Universiteit Rotterdam
*/
#include
diff --git a/src/utils.h b/src/utils.h
index 2186d60..53c84e8 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -1,6 +1,6 @@
/*
* Author: Andreas Alfons
- * Erasmus University Rotterdam
+ * Erasmus Universiteit Rotterdam
*/
#ifndef _ccaPP_UTILS_H