## file biplot.R
## copyright (C) 1998 W. N. Venables and B. D. Ripley
##
biplot <- function(x, ...) UseMethod("biplot")
biplot.default <-
    function(x, y, var.axes = TRUE, col, cex = rep(par("cex"), 2),
	     xlabs = NULL, ylabs = NULL, expand=1, xlim = NULL, ylim = NULL,
	     arrow.len = 0.1, ...)
{
    n <- nrow(x)
    p <- nrow(y)
    if(missing(xlabs)) {
	xlabs <- dimnames(x)[[1]]
	if(is.null(xlabs)) xlabs <- 1:n
    }
    xlabs <- as.character(xlabs)
    dimnames(x) <- list(xlabs, dimnames(x)[[2]])
    if(missing(ylabs)) {
	ylabs <- dimnames(y)[[1]]
	if(is.null(ylabs)) ylabs <- paste("Var", 1:p)
    }
    ylabs <- as.character(ylabs)
    dimnames(y) <- list(ylabs, dimnames(y)[[2]])
    if(length(cex) == 1) cex <- c(cex, cex)
    if(missing(col)) {
	col <- par("col")
	if (!is.numeric(col)) col <- match(col, palette())
	col <- c(col, col + 1)
    }
    else if(length(col) == 1) col <- c(col, col)
    unsigned.range <- function(x) c(-abs(min(x)), abs(max(x)))
    rangx1 <- unsigned.range(x[, 1])
    rangx2 <- unsigned.range(x[, 2])
    rangy1 <- unsigned.range(y[, 1])
    rangy2 <- unsigned.range(y[, 2])
    if(missing(xlim) && missing(ylim))
	xlim <- ylim <- rangx1 <- rangx2 <- range(rangx1, rangx2)
    else if(missing(xlim)) xlim <- rangx1 else ylim <- rangx2
    ratio <- max(rangy1/rangx1, rangy2/rangx2)/expand
    on.exit(par(oldpar))
    oldpar <- par(pty = "s")
    plot(x, type = "n", xlim = xlim, ylim = ylim, col = col[1], ...)
    text(x, xlabs, cex = cex[1], col = col[1], ...)
    par(new = TRUE)
    plot(y, axes = FALSE, type = "n", xlim = xlim*ratio, ylim = ylim*ratio,
	 xlab = "", ylab = "", col = col[1], ...)
    axis(3, col = col[2])
    axis(4, col = col[2])
    box(col = col[1])
    text(y, labels=ylabs, cex = cex[2], col = col[2], ...)
    if(var.axes)
	arrows(0, 0, y[,1] * 0.8, y[,2] * 0.8, col = col[2], length=arrow.len)
    invisible()
}
biplot.princomp <- function(x, choices = 1:2, scale = 1, pc.biplot=FALSE, ...)
{
    if(length(choices) != 2) stop("length of choices must be 2")
    if(!length(scores <- x$scores))
	stop(paste("object", deparse(substitute(x)), "has no scores"))
    lam <- x$sdev[choices]
    if(is.null(n <- x$n.obs)) n <- 1
    lam <- lam * sqrt(n)
    if(scale < 0 || scale > 1) warning("scale is outside [0, 1]")
    if(scale != 0) lam <- lam^scale
    if(pc.biplot) lam <- lam / sqrt(n)
    biplot.default(t(t(scores[, choices]) / lam),
		   t(t(x$loadings[, choices]) * lam), ...)
    invisible()
}
## Seber pages 506-507, after a Golub original
cancor <- function(x, y, xcenter=TRUE, ycenter=TRUE)
{
    x <- as.matrix(x)
    y <- as.matrix(y)
    if((nr <- nrow(x)) != nrow(y)) stop("unequal number of rows in cancor")
    ncx <- ncol(x)
    ncy <- ncol(y)
    if(!nr || !ncx || !ncy) stop("dimension 0 in x or y")
    if(is.logical(xcenter)) {
	if(xcenter) {
	    xcenter <- apply(x, 2, mean)
	    x <- x - rep(xcenter, rep(nr, ncx))
	}
	else xcenter <- rep(0, ncx)
    }
    else {
	xcenter <- rep(xcenter, length = ncx)
	x <- x - rep(xcenter, rep(nr, ncx))
    }
    if(is.logical(ycenter)) {
	if(ycenter) {
	    ycenter <- apply(y, 2, mean)
	    y <- y - rep(ycenter, rep(nr, ncy))
	}
	else ycenter <- rep(0, ncy)
    }
    else {
	ycenter <- rep(ycenter, length = ncy)
	y <- y - rep(ycenter, rep(nr,ncy))
    }
    qx <- qr(x)
    qy <- qr(y)
    dx <- qx$rank;	if(!dx) stop("`x' has rank 0")
    dy <- qy$rank;	if(!dy) stop("`y' has rank 0")
    ## compute svd(Qx'Qy)
    z <- svd(qr.qty(qx, qr.qy(qy, diag(1, nr, dy)))[1:dx,, drop = FALSE],
	     dx, dy)
    list(cor = z$d,
	 xcoef = backsolve((qx$qr)[1:dx, 1:dx, drop = FALSE], z$u),
	 ycoef = backsolve((qy$qr)[1:dy, 1:dy, drop = FALSE], z$v),
	 xcenter = xcenter,
	 ycenter = ycenter)
}
cmdscale <- function (d, k = 2, eig = FALSE) {
    if (any(is.na(d)))
	stop("NA values not allowed in d")
    if (is.null(n <- attr(d, "Size"))) {
	x <- as.matrix(d^2)
	if ((n <- nrow(x)) != ncol(x))
	    stop("Distances must be result of dist or a square matrix")
    }
    else {
	x <- matrix(0, n, n)
	x[row(x) > col(x)] <- d^2
	x <- x + t(x)
    }
    storage.mode(x) <- "double"
    Tmat <- -0.5 * .C("dblcen", x, as.integer(n), PACKAGE="mva")[[1]]
    e <- eigen(Tmat, symmetric = TRUE)
    ev <- e$values[1:k]
    points <- e$vectors[, 1:k] %*% diag(sqrt(ev))
    dimnames(points) <- list(dimnames(d)[[1]], NULL)
    if (eig) list(points = points, eig = ev)
    else points
}
cutree <- function(tree, k=NULL, h=NULL)
{
    if(is.null(k) & is.null(h))
        stop("Either k or h must be specified")
    else if(is.null(k)){
        k <- integer(length(h))
        myh <- h
        myh[h<min(tree$height)] <- min(tree$height)
        myh[h>max(tree$height)] <- max(tree$height)
        for(n in 1:length(h))
            k[n] <- min(which(rev(tree$height) <= myh[n]))
    }
    else{
        k <- as.integer(k)
        if(min(k) < 2 | max(k) > nrow(tree$merge))
            stop(paste("Elements of k must be between 2 and",
                       nrow(tree$merge)))
    }
    ans <- .Call("R_cutree", tree$merge, k)
    if(length(k)==1){
        ans <- as.vector(ans)
        names(ans) <- tree$labels
    }
    else{
        if(! is.null(h))
            colnames(ans) <- h
        else
            colnames(ans) <- k
        rownames(ans) <- tree$labels
    }
    return(ans)
}
dist <- function(x, method="euclidean", diag=FALSE, upper=FALSE)
{
    ## account for possible spellings of euclid?an
    if(!is.na(pmatch(method, "euclidian")))
	method <- "euclidean"
    METHODS <- c("euclidean", "maximum",
                 "manhattan", "canberra", "binary")
    method <- pmatch(method, METHODS)
    if(is.na(method))
	stop("invalid distance method")
    if(method == -1)
	stop("ambiguous distance method")
    N <- nrow(x <- as.matrix(x))
    d <- .C("R_distance",
	    x = as.double(x),
	    nr= N,
	    nc= ncol(x),
	    d = double(N*(N - 1)/2),
	    diag  = as.integer(FALSE),
	    method= as.integer(method),
	    DUP = FALSE, PACKAGE="base")$d
    attr(d, "Size") <- N
    attr(d, "Labels") <- dimnames(x)[[1]]
    attr(d, "Diag") <- diag
    attr(d, "Upper") <- upper
    attr(d, "method") <- METHODS[method]
    attr(d, "call") <- match.call()
    class(d) <- "dist"
    return(d)
}
names.dist <- function(d) attr(d, "Labels")
"names<-.dist" <- function(d, n)
{
    if(length(n) != attr(d, "Size"))
	stop("invalid names for dist object")
    attr(d, "Labels") <- n
    d
}
as.matrix.dist <- function(obj)
{
    size <- attr(obj, "Size")
    df <- matrix(0, size, size)
    df[row(df) > col(df)] <- obj
    df <- df + t(df)
    labels <- attr(obj, "Labels")
    dimnames(df) <-
	if(is.null(labels)) list(1:size,1:size) else list(labels,labels)
    df
}
as.dist <- function(m, diag = FALSE, upper=FALSE)
{
    m <- as.matrix(m)
    retval <-  m[row(m) > col(m)]
    attributes(retval) <- NULL
    if(!is.null(rownames(m)))
        attr(retval,"Labels") <- rownames(m)
    else if(!is.null(colnames(m)))
        attr(retval,"Labels") <- colnames(m)
    attr(retval,"Size") <- nrow(m)
    attr(retval,"Diag") <- diag
    attr(retval,"Upper") <- upper
    attr(retval, "call") <- match.call()
    class(retval) <- "dist"
    retval
}
print.dist <- function(obj, diag=NULL, upper=NULL)
{
    if(is.null(diag))
	diag <- if(is.null(attr(obj, "Diag"))) FALSE else attr(obj, "Diag")
    if(is.null(upper))
	upper <- if(is.null(attr(obj,"Upper")))FALSE else attr(obj, "Upper")
    size <- attr(obj, "Size")
    df <- as.matrix.dist(obj)
    if(!upper)
	df[row(df) < col(df)] <- NA
    if(!diag)
	df[row(df) == col(df)] <- NA
    print(if(diag || upper) df else df[-1,-size], na="")
    invisible(obj)
}
## Hierarchical clustering, on raw input data; we will use Euclidean
## distance.  A range of criteria are supported; also there is a
## storage-economic option.
##
## We use the general routine, `hc', which caters for 7 criteria,
## using a half dissimilarity matrix; (BTW, this uses the very efficient
## nearest neighbor chain algorithm, which makes this algorithm of
## O(n^2) computational time, and differentiates it from the less
## efficient -- i.e. O(n^3) -- implementations in all commercial
## statistical packages -- as far as I am aware -- except Clustan.)
##
## Clustering Methods:
##
## 1. Ward's minimum variance or error sum of squares method.
## 2. single linkage or nearest neighbor method.
## 3. complete linkage or diameter.
## 4. average linkage, group average, or UPGMA method.
## 5. McQuitty's or WPGMA method.
## 6. median, Gower's or WPGMC method.
## 7. centroid or UPGMC method (7).
##
## Original author: F. Murtagh, May 1992
## R Modifications: Ross Ihaka, Dec 1996
##		    Friedrich Leisch, Apr 1998
hclust <- function(d, method="complete")
{
    METHODS <- c("ward", "single",
                 "complete", "average", "mcquitty",
                 "median", "centroid")
    method <-  pmatch(method, METHODS)
    if(is.na(method))
	stop("invalid clustering method")
    if(method == -1)
	stop("ambiguous clustering method")
    n <- attr(d, "Size")
    if(is.null(n))
	stop("invalid dissimilarities")
    labels <- attr(d, "Labels")
    len <- n*(n-1)/2
    hcl <- .Fortran("hclust",
		    n = as.integer(n),
		    len = as.integer(len),
		    method = as.integer(method),
		    ia = integer(n),
		    ib = integer(n),
		    crit = double(n),
		    membr = double(n),
		    nn = integer(n),
		    disnn = double(n),
		    flag = logical(n),
		    diss = as.double(d), PACKAGE="mva")
    ## 2nd step: interpret the information that we now have
    ## as merge, height, and order lists.
    hcass <- .Fortran("hcass2",
		      n = as.integer(n),
		      ia = as.integer(hcl$ia),
		      ib = as.integer(hcl$ib),
		      order = integer(n),
		      iia = integer(n),
		      iib = integer(n), PACKAGE="mva")
    tree <- list(merge=cbind(hcass$iia[1:(n-1)], hcass$iib[1:(n-1)]),
		 height=hcl$crit[1:(n-1)],
		 order=hcass$order,
		 labels=attr(d, "Labels"),
                 method=METHODS[method],
                 call=match.call())
    if(!is.null(attr(d, "method"))){
        tree$dist.method <- attr(d, "method")
    }
    class(tree) <- "hclust"
    tree
}
plot.hclust <-
    function (tree, hang = 0.1, labels=NULL, ...)
{
    merge <- tree$merge
    if (!is.matrix(merge) || ncol(merge) != 2)
	stop("invalid dendrogram")
    n <- nrow(merge)
    height <- as.double(tree$height)
    order <- as.double(order(tree$order))
    labels <-
	if(missing(labels)){
	    if (is.null(tree$labels))
		paste(1:(n+1))
	    else
		as.character(tree$labels)
	} else {
	    if(labels==FALSE)
		character(n+1)
	    else
		as.character(labels)
	}
    plot.new()
    .Internal(dend.window(n, merge, height, order, hang, labels, ...))
    .Internal(dend(n, merge, height, order, hang, labels, ...))
    axis(2, at=pretty(range(height)))
    invisible()
}
as.hclust <- function(x, ...) UseMethod("as.hclust")
as.hclust.twins <- function(x)
{
    retval <- list(merge = x$merge,
                   height = sort(x$height),
                   order = x$order,
                   call = match.call(),
                   dist.method = attr(x$diss, "Metric"),
                   labels = rownames(x$data))
    class(retval) <- "hclust"
    retval
}
print.hclust <- function(tree)
{
    if(!is.null(tree$call))
        cat("\nCall:\n",deparse(tree$call),"\n\n",sep="")
    if(!is.null(tree$method))
        cat("Cluster method   :", tree$method, "\n") 
    if(!is.null(tree$dist.method))
        cat("Distance         :", tree$dist.method, "\n")
        cat("Number of objects:", length(tree$height)+1, "\n")
    cat("\n")
}
rect.hclust <- function(hclust.obj, k=NULL, which=NULL,
                        x=NULL, h=NULL, border=2, cluster=NULL)
{
    if(length(h)>1 | length(k)>1)
        stop("k and h must be a scalar")
    if(!is.null(h)){
        if(!is.null(k))
            stop("specify exactly one of k and h")
        k <- min(which(rev(hclust.obj$height)<h))
        k <- max(k, 2)
    }
    else
        if(is.null(k)) 
            stop("specify exactly one of k and h")
    if(k < 2 | k > length(hclust.obj$height))
        stop(paste("k must be between 2 and", length(hclust.obj$height)))
    if(is.null(cluster))
        cluster <- cutree(hclust.obj, k=k)
    ## cutree returns classes sorted by data, we need classes
    ## as occurring in the tree (from left to right)
    clustab <- table(cluster)[unique(cluster[hclust.obj$order])]
    m <- c(0, cumsum(clustab))
    if(!is.null(x)){
        if(!is.null(which))
            stop("specify exactly one of which and x")        
        which <- x
        for(n in 1:length(x))
            which[n] <- max(which(m<x[n]))
    }
    else
        if(is.null(which))
            which <- 1:k
    if(any(which>k))
        stop(paste("all elements of which must be between 1 and", k))
    border <- rep(border, length=length(which))
    retval <- list()
    for(n in 1:length(which)){
        rect(m[which[n]]+0.66, par("usr")[3],
             m[which[n]+1]+0.33, mean(rev(hclust.obj$height)[(k-1):k]),
             border = border[n])
        retval[[n]] <- which(cluster==as.integer(names(clustab)[which[n]]))
    }
    invisible(retval)
}
identify.hclust <- function(HCOBJ, FUN=NULL, N=20, MAXCLUSTER=20,
                            DEV.FUN=NULL, ...)
{
  cluster <- cutree(HCOBJ, k=2:MAXCLUSTER)
  retval <- list()
  oldk <- NULL
  oldx <- NULL
  DEV.HCOBJ <- dev.cur()
  for(n in 1:N){
    dev.set(DEV.HCOBJ)
    x <- locator(1)
    if(is.null(x))
      break
    k <- min(which(rev(HCOBJ$height)<x$y), MAXCLUSTER)
    k <- max(k, 2)
    if(!is.null(oldx)){
      rect.hclust(HCOBJ, k=oldk, x=oldx, cluster=cluster[,oldk-1],
                  border="grey")
    }
    retval[[n]] <- unlist(rect.hclust(HCOBJ, k=k, x=x$x,
                                      cluster=cluster[,k-1],
                                      border="red"))
    if(!is.null(FUN)){
      if(!is.null(DEV.FUN)){
        dev.set(DEV.FUN)
      }
      retval[[n]] <- FUN(retval[[n]], ...)
    }
    oldx <- x$x
    oldk <- k
  }
  dev.set(DEV.HCOBJ)
  invisible(retval)
}
kmeans <- function(x, centers, iter.max = 10)
{
    x <- as.matrix(x)
    m <- nrow(x)
    if(missing(centers))
	stop("centers must be a number or a matrix")
    if(length(centers) == 1) {
	k <- centers
	if(m < k)
	    stop("more cluster centers than data points.")
	centers <- x[sample(1:m, k), ]
    } else {
	centers <- as.matrix(centers)
	k <- nrow(centers)
    }
    if(iter.max < 1) stop("iter.max must be positive.")
    if(m < k)
	stop("more cluster centers than data points.")
    if(ncol(x) != ncol(centers))
	stop("must have same number of columns in x and centers.")
    Z <- .Fortran("kmns",
		  as.double(x),
		  as.integer(m),
		  as.integer(ncol(x)),
		  centers = as.double(centers),
		  as.integer(k),
		  c1 = integer(m),
		  integer(m),
		  nc =integer(k),
		  double(k),
		  double(k),
		  integer(k),
		  double(m),
		  integer(k),
		  integer(k),
		  as.integer(iter.max),
		  wss = double(k),
		  ifault = as.integer(0), PACKAGE="mva")
    switch(Z$ifault,
	   stop("empty cluster: try a better set of initial centers"),
	   warning("did not converge in iter.max iterations"),
	   stop("number of cluster centres must lie between 1 and nrow(x)")
	   )
    centers <- matrix(Z$centers, k)
    dimnames(centers) <- list(1:k, dimnames(x)[[2]])
    list(cluster = Z$c1, centers = centers, withinss = Z$wss, size = Z$nc)
}
plot.prcomp <- function(x, ...) { screeplot(x, ...) }
prcomp <- function(x, retx = TRUE, center = TRUE, scale. = FALSE,
                   tol = NULL) {
    x <- as.matrix(x)
    s <- svd(scale(x, center = center, scale = scale.), nu = 0)
    if (!is.null(tol)) {
        rank <- sum(s$d > (s$d[1]*tol))
        if (rank < ncol(x))
            s$v <- s$v[, 1:rank, drop = FALSE]
    }
    s$d <- s$d / sqrt(max(1, nrow(x) - 1))
    dimnames(s$v) <-
        list(colnames(x), paste("PC", seq(len = ncol(s$v)), sep = ""))
    r <- list(sdev = s$d, rotation = s$v)
    if (retx) r$x <- x %*% s$v
    class(r) <- "prcomp"
    r
}
print.prcomp <- function(x, print.x = FALSE, ...) {
    cat("Standard deviations:\n")
    print(x$sdev)
    cat("\nRotation:\n")
    print(x$rotation)
    if (print.x && length(x$x)) {
        cat("\nRotated variables:\n")
        print(x$x)
    }
    invisible(x)
}
summary.prcomp <- function(object) {
    vars <- object$sdev^2
    vars <- vars/sum(vars)
    importance <- rbind("Standard deviation" = object$sdev,
                        "Proportion of Variance" = round(vars, 5),
                        "Cumulative Proportion" = round(cumsum(vars), 5))
    colnames(importance) <- colnames(object$rotation)
    object$importance <- importance
    class(object) <- "summary.prcomp"
    object
}
print.summary.prcomp <- function(x, digits = min(3, getOption("digits")-3),
                                 ...) {
    cat("Importance of components:\n")
    print(x$importance, digits = digits)
    invisible(x)
}
## copyright (C) 1998 W. N. Venables and B. D. Ripley
##
predict.princomp <- function(object, newdata, ...) {
    if (missing(newdata)) return(object$scores)
    scale(newdata, object$center, object$scale) %*% object$loadings
}
summary.princomp <-
function(object, loadings = FALSE, cutoff = 0.1, digits = 3, ...) {
    vars <- object$sdev^2
    vars <- vars/sum(vars)
    cat("Importance of components:\n")
    print(rbind("Standard deviation" = object$sdev,
                "Proportion of Variance" = vars,
                "Cumulative Proportion" = cumsum(vars)))
    if(loadings) {
        cat("\nLoadings:\n")
        cx <- format(round(object$loadings, digits = digits))
        cx[abs(object$loadings) < cutoff] <-
            substring("       ", 1, nchar(cx[1,1]))
        print(cx, quote = FALSE, ...)
    }
    invisible(object)
}
plot.princomp <- function(x, ...) { screeplot(x, ...) }
screeplot <-
function(x, npcs = min(10, length(x$sdev)),
         type = c("barplot", "lines"),
         main = deparse(substitute(x)), ...) {
    eval(main)
    type <- match.arg(type)
    pcs <- x$sdev^2
    xp <- seq(length=npcs)
    if(type=="barplot")
        barplot(pcs[xp], names = names(pcs[xp]), main = main,
                ylab = "Variances", ...)
    else {
        plot(xp, pcs[xp], type = "b", axes = FALSE, main = main,
             xlab = "", ylab = "Variances", ...)
        axis(2)
        axis(1, at = xp, labels = names(pcs[xp]))
    }
    invisible()
}
loadings <- function(x) x$loadings
princomp <- function(x, cor = FALSE, scores = TRUE, covmat = NULL,
                     subset = rep(TRUE, nrow(as.matrix(x)))) {
    if (!missing(x)) z <- as.matrix(x)[subset, , drop = FALSE]
    else z <- NULL
    if (is.list(covmat)) {
        if(any(is.na(match(c("cov", "n.obs"), names(covmat)))))
            stop("covmat is not a valid covariance list")
        cv <- covmat$cov
        n.obs <- covmat$n.obs
        cen <- covmat$center
    } else if(is.matrix(covmat)) {
        cv <- covmat
        n.obs <- NA
        cen <- NULL
    } else if(is.null(covmat)){
        covmat <- cov.wt(z)
        n.obs <- covmat$n.obs
        cv <- covmat$cov * (1 - 1/n.obs) # for S-PLUS compatibility
        cen <- covmat$center
    } else stop("covmat is of unknown type")
    if (cor) {
        sds <- sqrt(diag(cv))
        cv <- cv/(sds %o% sds)
    }
    edc <- eigen(cv)
    if (any(edc$values < 0))
        stop("covariance matrix is not non-negative definite")
    cn <- paste("Comp.", 1:ncol(cv), sep = "")
    names(edc$values) <- cn
    dimnames(edc$vectors) <- list(dimnames(x)[[2]], cn)
    sdev <- sqrt(edc$values)
    if (cor) sc <- sds
    else sc <- rep(1, ncol(z))
    names(sc) <- colnames(cv)
    scr <- NULL
    if (scores && !missing(x))
        scr <- scale(z, center = TRUE, scale = FALSE) %*% edc$vectors
    if (is.null(cen)) cen <- rep(NA, nrow(cv))
    edc <-list(sdev = sdev, loadings = edc$vectors,
               center = cen, scale = sc, n.obs = n.obs,
               scores = scr, call = match.call())
    ## The Splus function also return list elements factor.sdev,
    ## correlations and coef, but these are not documented in the help.
    ## coef seems to equal load.  The Splus function also returns list
    ## element terms which is not supported here.
    class(edc) <- "princomp"
    edc
}
print.princomp <- function(x)
{
    cat("Call:\n"); dput(x$call)
    cat("\nStandard deviations:\n")
    print(x$sdev)
    cat("\n", length(x$scale), " variables and ", x$n.obs,
        "observations.\n")
    invisible(x)
}
.First.lib <- function(lib, pkg) {
    library.dynam("mva", pkg, lib)
    provide(mva)
}
