ifelse1 <- function (test, x, y) if (test) x else y dummy <- function(x,subset=rep(T,length(x)),reference=sort(unique(x[!is.na(x)])),includeAll=F) { if (length(reference)==1) reference <- unique(c(reference,sort(unique(x[!is.na(x)])))) X <- NULL if (includeAll) X <- cbind(subset*as.integer(x==reference[1])) else X <- NULL for (r in reference[-1]) X <- cbind(X,subset*as.integer(x==r)) if (includeAll) dimnames(X) <- list(NULL,reference) else if (length(reference) > 2) { dimnames(X) <- list(NULL,paste(reference[-1]," vs ",reference[1],sep="")) } else dimnames(X) <- list(NULL,reference[-1]) attr(X,"transformation") <- "dummy" attr(X,"reference") <- reference attr(X,"original") <- x X } descrip <- function (..., strata=NULL, subset=NULL, probs= c(.25,.50,.75), replaceZeroes=F, restriction=Inf, above=NULL, below=NULL, labove=NULL, rbelow=NULL, lbetween=NULL, rbetween=NULL, interval=NULL, linterval=NULL, rinterval=NULL, lrinterval=NULL, version=F) { vrsn <- "20110928" if (version) return(vrsn) # # subset defines a subsetting criterion applied to every vector # strata defines a matrix or data frame of vectors whose unique combinations define strata # probs defines the quantiles to be estimated # - 0 and 1 will be appended in order to get min and max # - if the variable is censored, a potentially censored min and max will be returned # restriction defines the bounds for a restricted mean when using censored time to event data # - if restriction==Inf, the largest observation time will be used as the restriction # above, below, labove, rbelow, lbetween, rbetween define thresholds for cdf or survivor functions # - for above, below, labove, lbelow, -Inf and Inf are appended, and the sorted unique values are then used as cutpoints # - the proportions are defined for # x > above[j] # x >= labove[j] # x < below[j] # x <= rbelow[j] # lbetween[j] <= x < lbetween[j+1], where -Inf and Inf are appended and sorted unique values used # rbetween[j] < x <= rbetween[j+1], where -Inf and Inf are appended and sorted unique values used # interval[j,1] < x < interval[j,2] # linterval[j,1] <= x < linterval[j,2] # rinterval[j,1] < x <= rinterval[j,2] # lrinterval[j,1] <= x <= lrinterval[j,2] # # The value returned will be a matrix having columns corresponding to # N the number of observations # Msng the number of missing values for observations # Mean the (restricted) mean # Std Dev the (restricted) standard deviation # Gm Mean the (restricted) geometric mean # Min the (possibly censored) minimum # quantiles quantiles corresponding to the values in probs # Max the (possibly censored) maximum # probabilities the probabilities (possibly derived from KM) for intervals defined by above, labove, # below, rbelow, lbetween, rbetween, interval, linterval, rinterval, lrinterval # restriction the limit used for restricted means (Inf for noncensored data) # firstEvent the lowest value at which an observation is uncensored # lastEvent the highest value at which an observation is uncensored # is.Date <- function (x) inherits(x,"Date") vDescrip <- function (x, probs, thresholds, geometricMean, replaceZeroes) { if (is.factor(x) | is.logical(x)) { x <- as.numeric(x) } ntholds <- if (is.null(thresholds)) 0 else dim(thresholds)[1] probs <- sort(unique(c(probs,0,1))) rslt <- length(x) if (rslt==0) { rslt <- c(rslt, rep(NaN,7+length(probs)+ntholds)) } else { u <- is.na(x) rslt <- c(rslt,sum(u)) x <- x[!u] if (length(x)==0 | is.character(x)) { rslt <- c(rslt, rep(NA,6+length(probs)+ntholds)) } else { rslt <- c(rslt, mean(x), sd(x), ifelse1 (geometricMean, exp(mean(log(ifelse(x==0,replaceZeroes,x)))), NA), quantile(x, probs)) if (ntholds>0) { for (j in 1:ntholds) { u <- ifelse1(thresholds[j,1]==0, x > thresholds[j,2], x >= thresholds[j,2]) & ifelse1(thresholds[j,3]==0, x < thresholds[j,4], x <= thresholds[j,4]) rslt <- c(rslt,mean(u)) } } rslt <- c(rslt,Inf,rslt[5+c(1,length(probs))]) } } rslt <- matrix(c(rslt,0),1) qnames <- paste(format(100*probs),"%",sep="") qnames[probs==0.5] <- " Mdn" qnames[probs==0] <- " Min" qnames[probs==1] <- " Max" tnames <- NULL if (ntholds>0) { tholds <- thresholds tholds[tholds==Inf | tholds==-Inf] <- 0 tnames <- paste(sep="","Pr", ifelse(thresholds[,2] == -Inf, paste(sep="",ifelse(thresholds[,3]==0,"<","<="),format(tholds[,4])), ifelse(thresholds[,4]==Inf, paste(sep="",ifelse(thresholds[,1]==0,">",">="),format(tholds[,2])), paste(sep="",ifelse(thresholds[,1]==0,"(","["),format(tholds[,2]),",",format(tholds[,4]), ifelse(thresholds[,3]==0,")","]"))))) } dimnames (rslt) <- list("",c("N", "Msng", "Mean", "Std Dev", "Geom Mn", qnames, tnames, "restriction", "firstEvent", "lastEvent", "isDate")) rslt } sDescrip <- function (x, probs, thresholds, geometricMean, replaceZeroes, restriction) { KM <- function (x) { if (!is.Surv(x)) stop("x must be a Surv object") x <- x[!is.na(x)] obs <- x[,1] ev <- x[,2] ce <- 1 - ev if (length(obs) == 0) stop("No data to estimate survival curve") N <- length (obs) if (!any(obs==0)) { obs <- c(0,obs) ev <- c(0,ev) ce <- c(0,ce) } i <- order (obs,1-ev) obs <- obs[i] ev <- ev[i] ce <- ce[i] ev <- rev (cumsum (rev (ev))) ce <- rev (cumsum (rev (ce))) n <- ce + ev i <- !duplicated (obs) obs <- obs[i] n <- n[i] ev <- ev[i] ev <- ev - c (ev[-1],0) ce <- ce[i] ce <- ce - c (ce[-1],0) v <- N * cumsum (ev / (n - ev) / n) S <- exp (cumsum (log (1 - ev / n))) if (is.na (S[length(S)])) S[length(S)] <- 0 rslt <- data.frame (t=obs, atrisk=n, events=ev, censored=ce, S=S, v=v) class(rslt) <- c("KM","data.frame") rslt } sKM <- function (x, times, rightCtsCDF=T) { if (!inherits(x,"KM")) stop("x must be a KM object") if (rightCtsCDF) { rslt <- as.vector(apply(matrix(rep(times,each=length(x$t)),length(x$t)) >= x$t, 2, sum)) + 1 } else rslt <- as.vector(apply(matrix(rep(times,each=length(x$t)),length(x$t)) > x$t, 2, sum)) + 1 if (x$S[length(x$S)] > 0) rslt[times > x$t[length(x$t)]] <- NA c(1,x$S)[rslt] } pKM <- function (x, times, rightCtsCDF=T) { 1 - sKM (x, times, rightCtsCDF) } qKM <- function (x, probs) { rslt <- length(probs) for (i in 1:length(probs)) { p <- 1 - probs[i] j <- abs(x$S - p) < 1e-15 & x$events > 0 if (any(j)) { if (abs(p - min(x$S)) < 1e-15) { rslt[i] <- (x$t[j] + max(x$t)) / 2 } else { rslt[i] <- (x$t[j] + min(x$t[x$t > x$t[j] & x$events > 0])) / 2 } } else { j <- sum(x$S > p) if (j == length(x$S) | p == 1) { rslt[i] <- NA } else rslt[i] <- x$t[j+1] } } rslt } meanKM <- function (x, restriction=Inf) { if (length(restriction)==1) restriction <- c(x$t[1]-1,restriction) if (restriction[2]==Inf) restriction[2] <- x$t[length(x$t)] tms <- c(restriction[1],x$t[x$t > restriction[1] & x$t < restriction[2]],restriction[2]) s <- sKM(x,restriction) s <- c(s[1],x$S[x$t > restriction[1] & x$t < restriction[2]],s[2]) ne <- tms <= 0 po <- tms >= 0 neS <- 1 - s[ne] neX <- abs(c(diff(tms[ne]),0)) neI <- neS != 0 & neX != 0 if (sum(neI) > 0) rslt <- - sum(neS[neI] * neX[neI]) else rslt <- 0 poS <- s[po] poX <- c(diff(tms[po]),0) poI <- poS != 0 & poX != 0 if (sum(poI) > 0) rslt <- rslt + sum(poS[poI] * poX[poI]) attr(rslt,"restriction") <- restriction rslt } if (!is.Surv(x)) stop("x must be a Surv object") ntholds <- if (is.null(thresholds)) 0 else dim(thresholds)[1] probs <- sort(unique(c(probs,0,1))) rslt <- dim(x)[1] if (rslt==0) { rslt <- c(rslt, rep(NaN,7+length(probs)+ntholds)) } else { u <- is.na(x) rslt <- c(rslt,sum(u)) x <- x[!u] if (dim(x)[1]==0) { rslt <- c(rslt, rep(NA,6+length(probs)+ntholds)) } else { z <- KM(x) tmp1 <- meanKM(z, restriction) x2 <- x x2[,1] <- x2[,1]^2 z2 <- KM(x2) tmp2 <- sqrt(meanKM(z2, restriction^2) - tmp1^2) if (geometricMean) { x2 <- x x2[,1] <- ifelse(x2[,1]==0,log(replaceZeroes),log(x2[,1])) z2 <- KM(x2) tmp3 <- exp(meanKM(z2, log(restriction))) } else tmp3 <- NA if (any(x[,2]==1)) { firstEvent <- min(x[x[,2]==1,1]) lastEvent <- max(x[x[,2]==1,1]) } else { firstEvent <- Inf lastEvent <- - Inf } rslt <- c(rslt, tmp1, tmp2, tmp3, min(x[,1]), qKM(z, probs[-c(1,length(probs))]), max(x[,1])) if (ntholds>0) { for (j in 1:ntholds) { rslt <- c(rslt, ifelse1(thresholds[j,1]==0, sKM(z,thresholds[j,2]), sKM(z,thresholds[j,2],F)) - ifelse1(thresholds[j,4]==Inf, 0, ifelse1(thresholds[j,3]==0, sKM(z,thresholds[j,4],F), sKM(z,thresholds[j,4])))) } } rslt <- c(rslt,attr(tmp1,"restriction")[2],firstEvent,lastEvent) } } rslt <- matrix(c(rslt,0),1) qnames <- paste(format(100*probs),"%",sep="") qnames[probs==0.5] <- " Mdn" qnames[probs==0] <- " Min" qnames[probs==1] <- " Max" tnames <- NULL if (ntholds>0) { tholds <- thresholds tholds[tholds==Inf | tholds==-Inf] <- 0 tnames <- paste(sep="","Pr", ifelse(thresholds[,2] == -Inf, paste(sep="",ifelse(thresholds[,3]==0,"<","<="),format(tholds[,4])), ifelse(thresholds[,4]==Inf, paste(sep="",ifelse(thresholds[,1]==0,">",">="),format(tholds[,2])), paste(sep="",ifelse(thresholds[,1]==0,"(","["),format(tholds[,2]),",",format(tholds[,4]), ifelse(thresholds[,3]==0,")","]"))))) } dimnames (rslt) <- list("",c("N", "Msng", "Mean", "Std Dev", "Geom Mn", qnames, tnames, "restriction", "firstEvent", "lastEvent", "isDate")) rslt } vStrdescr <- function (x, stratav, subsetv, probs, thresholds, replaceZeroes) { if (is.null(subsetv)) subsetv <- rep(T,length(x)) if (length(x) != length(subsetv)) stop("length of variables must match length of subset") if (is.null(stratav)) stratav <- rep(1,length(x)) if (length(x) != length(stratav)) stop("length of variables must match length of strata") x <- x[subsetv] if (is.factor(x) | all(x[!is.na(x)] %in% c(0,1))) geometricMean <- F else geometricMean <- !any(x[!is.na(x)]<0) if (is.logical(replaceZeroes)) { if (!replaceZeroes | is.factor(x) | all(x[!is.na(x)] %in% c(0,1))) { replaceZeroes <- NA } else { replaceZeroes <- min(x[!is.na(x) & x > 0]) / 2 } } stratav <- stratav[subsetv] s <- sort(unique(stratav)) rslt <- vDescrip(x, probs, thresholds, geometricMean, replaceZeroes) if (length(s) > 1) { for (i in s) rslt <- rbind(rslt, vDescrip(x[stratav==i & !is.na(stratav)], probs, thresholds, geometricMean, replaceZeroes)) if (any(is.na(stratav))) { rslt <- rbind(rslt, vDescrip(x[is.na(stratav)], probs, thresholds, geometricMean, replaceZeroes)) dimnames(rslt)[[1]]<- format(c("All",paste(" Str",format(c(format(s),"NA"))))) } else dimnames(rslt)[[1]]<- format(c("All",paste(" Str",format(s)))) } rslt } dStrdescr <- function (x, stratad, subsetd, probs, threshholds, replaceZeroes) { if (!is.Date(x)) stop("x must be a Date object") xi <- as.integer(x) rslt <- vStrdescr (xi, stratad, subsetd, probs, thresholds, replaceZeroes) rslt[,"isDate"] <- 1 rslt } sStrdescr <- function (x, stratas, subsets, probs, thresholds, replaceZeroes, restriction) { if (!is.Surv(x)) stop("x must be a Surv object") n <- dim(x)[1] if (is.null(subsets)) subsets <- rep(T,n) if (n != length(subsets)) stop("length of variables must match length of subset") if (is.null(stratas)) stratas <- rep(1,n) if (n != length(stratas)) stop("length of variables must match length of strata") x <- x[subsets] geometricMean <- !any(x[!is.na(x),1]<0) if (is.logical(replaceZeroes)) { if (replaceZeroes) replaceZeroes <- min(x[!is.na(x) & x[,1] > 0,1]) / 2 else replaceZeroes <- NA } stratas <- stratas[subsets] s <- sort(unique(stratas)) rslt <- sDescrip(x, probs, thresholds, geometricMean, replaceZeroes, restriction) if (length(s) > 1) { for (i in s) rslt <- rbind(rslt, sDescrip(x[stratas==i & !is.na(stratas)], probs, thresholds, geometricMean, replaceZeroes, restriction)) if (any(is.na(stratas))) { rslt <- rbind(rslt, sDescrip(x[is.na(stratas)], probs, thresholds, geometricMean, replaceZeroes, restriction)) dimnames(rslt)[[1]]<- format(c("All",paste(" Str",format(c(format(s),"NA"))))) } else dimnames(rslt)[[1]]<- format(c("All",paste(" Str",format(s)))) } rslt } mStrdescr <- function (x, stratam, subsetm, probs, thresholds, replaceZeroes) { if (!is.matrix(x)) stop("x must be a matrix") p <- dim(x)[2] nms <- dimnames(x)[[2]] if (is.null(nms)) nms <- paste("V",1:p,sep="") rslt <- NULL for (i in 1:p) rslt <- rbind(rslt, vStrdescr(x[,i], stratam, subsetm, probs, thresholds, replaceZeroes)) dimnames(rslt)[[1]] <- paste(format(rep(nms,each=(dim(rslt)[1])/p)),dimnames(rslt)[[1]]) rslt } lStrdescr <- function (x, stratal, subsetl, probs, thresholds, replaceZeroes, restriction) { if (!is.list(x)) stop("x must be a list") p <- length(x) nms <- names(x) if (is.null(nms)) nms <- paste("V",1:p,sep="") rslt <- NULL for (i in 1:p) { if(is.Surv(x[[i]])) { rslt <- rbind(rslt, sStrdescr(x[[i]], stratal, subsetl, probs, thresholds, replaceZeroes, restriction)) } else if (is.Date(x[[i]])) { rslt <- rbind(rslt, dStrdescr(x[[i]], stratal, subsetl, probs, thresholds, replaceZeroes)) } else { rslt <- rbind(rslt, vStrdescr(x[[i]], stratal, subsetl, probs, thresholds, replaceZeroes)) } } dimnames(rslt)[[1]] <- paste(format(rep(nms,each=(dim(rslt)[1])/p)),dimnames(rslt)[[1]]) rslt } L <- list(...) names(L) <- unlist(match.call(expand.dots=F)$...) p <- length(L) nms <- NULL for (i in 1:p) { x <- L[[i]] if (is.list(x)) { if (is.null(names(x))) { nms <- c(nms,paste(names(L)[i],".V",1:length(x),sep="")) } else nms <- c(nms,names(x)) } else if (is.matrix(x) & !is.Surv(x)) { if (is.null(dimnames(x)[[2]])) { nms <- c(nms,paste(names(L)[i],".V",1:(dim(x)[2]),sep="")) } else nms <- c(nms, dimnames(x)[[2]]) } else nms <- c(nms,names(L)[i]) } nms <- paste(format(nms, justify="right"),": ",sep="") if (!is.null(strata)) { if (is.list(strata)) { for (i in 1:length(strata)) { if (!is.vector(strata[[i]])) stop("strata can only be a vector, matrix, or list of vectors") } n <- length(strata[[1]]) if (length(strata) > 1) { for (i in 2:length(strata)) { if (length(strata[[i]]) != n) stop("all elements in strata must be same length") } } snms <- names(strata) if (is.null(snms)) snms <- rep("",length(strata)) tmp <- paste(snms[1],format(strata[[1]])) if (length(strata) > 1) { for (i in 2:length(strata)) tmp <- paste(tmp,snms[i],format(strata[[i]])) } } else { strata <- as.matrix(strata) snms <- dimnames(strata)[[2]] if (is.null(snms)) snms <- rep("",dim(strata)[2]) tmp <- paste(snms[1],format(strata[,1])) if (dim(strata)[2] > 1) { for (i in 2:(dim(strata)[2])) tmp <- paste(tmp,snms[i],format(strata[,i])) } } strata <- tmp } thresholds <- NULL if (length(above)>0) thresholds <- rbind(thresholds,cbind(0,above,0,Inf)) if (length(labove)>0) thresholds <- rbind(thresholds,cbind(1,labove,0,Inf)) if (length(below)>0) thresholds <- rbind(thresholds,cbind(0,-Inf,0,below)) if (length(rbelow)>0) thresholds <- rbind(thresholds,cbind(0,-Inf,1,rbelow)) if (length(lbetween)>0) { lbetween <- sort(unique(c(-Inf,lbetween,Inf))) thresholds <- rbind(thresholds,cbind(1,lbetween[-length(lbetween)],0,lbetween[-1])) } if (length(rbetween)>0) { rbetween <- sort(unique(c(-Inf,rbetween,Inf))) thresholds <- rbind(thresholds,cbind(0,rbetween[-length(rbetween)],1,rbetween[-1])) } if (!is.null(interval)) { if (length(interval)==2) interval <- matrix(interval,ncol=2) if (dim(interval)[2]!=2) stop("intervals must be specified in a 2 column matrix") thresholds <- rbind(thresholds,cbind(0,interval[,1],0,interval[,2])) } if (!is.null(linterval)) { if (length(linterval)==2) linterval <- matrix(linterval,ncol=2) if (dim(linterval)[2]!=2) stop("intervals must be specified in a 2 column matrix") thresholds <- rbind(thresholds,cbind(1,linterval[,1],0,linterval[,2])) } if (!is.null(rinterval)) { if (length(rinterval)==2) rinterval <- matrix(rinterval,ncol=2) if (dim(rinterval)[2]!=2) stop("intervals must be specified in a 2 column matrix") thresholds <- rbind(thresholds,cbind(0,rinterval[,1],1,rinterval[,2])) } if (!is.null(lrinterval)) { if (length(lrinterval)==2) lrinterval <- matrix(lrinterval,ncol=2) if (dim(lrinterval)[2]!=2) stop("intervals must be specified in a 2 column matrix") thresholds <- rbind(thresholds,cbind(1,lrinterval[,1],1,lrinterval[,2])) } rslt <- NULL nV <- 0 for (i in 1:p) { x <- L[[i]] if (is.list(x)) { names(x) <- nms[nV + (1:length(x))] nV <- nV + length(x) rslt <- rbind(rslt, lStrdescr(x, strata, subset, probs, thresholds, replaceZeroes, restriction)) } else if (is.matrix(x) & !is.Surv(x)) { dimnames(x) <- list(NULL,nms[nV + (1:(dim(x)[2]))]) nV <- nV + dim(x)[2] rslt <- rbind(rslt, mStrdescr(x, strata, subset, probs, thresholds, replaceZeroes)) } else if (is.Surv(x)) { nV <- nV + 1 rslt <- rbind(rslt, sStrdescr(x, strata, subset, probs, thresholds, replaceZeroes, restriction)) } else if (is.Date(x)) { nV <- nV + 1 rslt <- rbind(rslt, dStrdescr(x, strata, subset, probs, thresholds, replaceZeroes)) } else if (is.factor(x)) { x <- list(x) nV <- nV + 1 names(x) <- nms[nV] rslt <- rbind(rslt, lStrdescr(x, strata, subset, probs, thresholds, replaceZeroes)) } else { x <- matrix(x) nV <- nV + 1 dimnames(x) <- list(NULL,nms[nV]) rslt <- rbind(rslt, mStrdescr(x, strata, subset, probs, thresholds, replaceZeroes)) } } class(rslt) <- "uDescriptives" rslt } print.uDescriptives <- function (x,sigfigs=max(3,getOption("digits")-3),width=9,nonsci.limit=5, version=F) { vrsn <- "20121025" if (version) return(vrsn) # # prints CI or exp(CI) to specified sigfigs # rounds exp(Est) to same number of decimal figures as exp(CI) # prints Std Err or Robust SE to specified sigfigs # rounds Estimate to same number of decimal figures as StdErr or Robust SE # prints F stat to specified sigfigs # prints P value to number of decimal figures as nonsci.limit unless < 10^-nonsci.limit when "< .00001" used # centers all but df and P value, which is right justified cmptRoundDigits <- function (x, sf) { y <- max(abs(x),na.rm=T) if (y==0) { sf } else { y <- trunc(log(y) / log(10)) - (y < 1) max(0,sf - y - (y < sf)) } } frmtCol <- function (x, sf, nonsci.limit, colwidth=9, append="") { rslt <- NULL for (i in 1:length(x)) { if (is.na(x[i])) { tmp <- "NA" } else { rd <- cmptRoundDigits (x[i], sf) if (rd <= nonsci.limit & abs(x[i]) < 10^nonsci.limit) { tmp <- format(round(x[i],rd),nsmall=rd,width=1) } else { tmp <- format(round(x[i],rd), digits=sf, scientific=T, width=1) } } rslt <- c(rslt,ifelse(x[i]<0,tmp,paste(" ",tmp,sep=""))) } rslt <- paste(rslt,append,sep="") format(rslt, justify="centre", width=colwidth) } ncol <- dim(x)[2] meancol <- (1:ncol)[dimnames(x)[[2]]=="Mean"] mincol <- (1:ncol)[dimnames(x)[[2]]==" Min"] maxcol <- (1:ncol)[dimnames(x)[[2]]==" Max"] censMin <- x[,"firstEvent"] > x[,mincol] censMin[is.na(censMin)] <- F censMax <- x[,"lastEvent"] < x[,maxcol] censMax[is.na(censMax)] <- F if (!any(censMax)) { restriction <- NULL } else { restriction <- frmtCol(x[,"restriction"],sigfigs,nonsci.limit,1,")") restriction <- paste("(R",restriction,sep="") restriction[!censMax] <- "" restriction <- format(restriction, justify="left") } frmtCoefficients <- format(x[,1:(ncol-4),drop=F]) for (j in 1:2) frmtCoefficients[,j] <- format (x[,j],justify="right",width=5) frmtCoefficients[,mincol] <- frmtCol (x[,mincol],sigfigs,nonsci.limit,width,ifelse(censMin,"+","")) for (j in 3:5) frmtCoefficients[,j] <- frmtCol (x[,j],sigfigs,nonsci.limit,width,ifelse(censMax,"+","")) indx <- 7:(ncol-4) indx <- indx[indx != maxcol] for (j in indx) frmtCoefficients[,j] <- frmtCol (x[,j],sigfigs,nonsci.limit,width) frmtCoefficients[,maxcol] <- frmtCol (x[,maxcol],sigfigs,nonsci.limit,width,ifelse(censMax,"+","")) if (any(x[,"isDate"]==1)) { xformCol <- c(3,5:(dim(x)[2]-4)) for (j in 1:length(xformCol)) { if (substring(dimnames(x)[[2]][xformCol[j]],1,2)=="Pr") xformCol[j] <- NA } xformCol <- xformCol[!is.na(xformCol)] orgn <- "1970-01-01" frmtCoefficients[x[,"isDate"]==1,xformCol] <- format(as.Date(x[x[,"isDate"]==1,xformCol],orgn)) } if (!is.null(restriction)) frmtCoefficients <- cbind(frmtCoefficients[,1:2,drop=F], "Restrict"=restriction,frmtCoefficients[,-(1:2),drop=F]) print(frmtCoefficients,quote=F) invisible(frmtCoefficients) } tableStat <- function (variable, ..., stat="count", na.rm=T, subset=NULL, probs= c(.25,.50,.75), replaceZeroes=F, restriction=Inf, above=NULL, below=NULL, labove=NULL, rbelow=NULL, lbetween=NULL, rbetween=NULL, interval=NULL, linterval=NULL, rinterval=NULL, lrinterval=NULL, version=F) { vrsn <- "20121025" if (version) return(vrsn) # # Provides descriptive statistics (1 at a time) tabulated within strata # variable is a numeric vector (factor is coerced to numeric) or Surv object # ... are the stratifying variables each of which is a vector # stat is one or more (partial matches work) of # c("count", "missing", "mean", "geometric mean", "median", "sd", # "variance", "minimum", "maximum", "quantiles", "probabilities", # "mn(sd)", "range", "iqr","all") # or any formatting string allowed in print.tableStat() # na.rm indicates whether descriptive statistics should only be displayed on # nonmissing values of variable # # All other arguments are passed through to descrip() # # The object returned corresponds to stat="all", na.rm=T. Other # choices of those arguments just affect the print method. Hence, print.tableStat() can # use alternative choices of those arguments. # cl <- match.call() strata <- list(...) p <- length(strata) if (p==0) { if (is.null(variable)) stop("must specify variable or strata") marginOnly <- T if (!is.Surv(variable)) strata <- list(rep(1,length(variable))) else strata <- list(rep(1,dim(variable)[1])) namesL <- "" p <- 1 } else { marginOnly <- F namesL <- unlist(match.call(expand.dots=F)$...) hyperNames <- as.character(names(match.call(expand.dots=F)$...)) if (!is.null(hyperNames)) namesL[hyperNames!=""] <- hyperNames[hyperNames!=""] } n <- length(strata[[1]]) if (is.null(variable)) { nullVariable <- T variable <- rep(1,n) isDate <- F } else { nullVariable <- F isDate <- inherits(variable,"Date") if (isDate) variable <- as.integer(variable) } if (is.factor(variable)) variable <- as.numeric(variable) dimTable <- NULL dimLabels <- NULL indicators <- NULL for (j in 1:p) { if (is.factor(strata[[j]])) strata[[j]] <- as.numeric(strata[[j]]) if (!is.vector(strata[[j]])) stop("stratification arguments must be vectors") if (length(strata[[j]]) != n) stop("all stratification arguments must be of same length") tmp <- dummy(strata[[j]],includeAll=T) if (any(is.na(strata[[j]]))) { tmp <- cbind(tmp,"NA"=is.na(strata[[j]]),NotNA=!is.na(strata[[j]])) } tmp[is.na(tmp)] <- 0 tmp <- cbind(tmp,ALL=1) dimTable <- c(dimTable,dim(tmp)[2]) dimLabels <- c(dimLabels,list(paste(namesL[j],".",dimnames(tmp)[[2]],sep=""))) if (is.null(indicators)) indicators <- tmp else indicators <- rep(indicators,dim(tmp)[2]) * matrix(rep(t(tmp),each=dim(indicators)[2]),n,byrow=T) } indicators[indicators==0] <- NA if (!is.Surv(variable)) { rslt <- descrip (indicators*variable, probs=probs, replaceZeroes=replaceZeroes, restriction=restriction, above=above, below=below, labove=labove, rbelow=rbelow, lbetween=lbetween, rbetween=rbetween, interval=interval, linterval=linterval, rinterval=rinterval, lrinterval=lrinterval, subset=subset) } else { L <- NULL for (i in 1:(dim(indicators)[2])) { L <- c(L,list(variable)) L[[i]][,1] <- L[[i]][,1] * indicators[,i] } names(L) <- dimnames(indicators)[[2]] rslt <- descrip (L, probs=probs, replaceZeroes=replaceZeroes, restriction=restriction, above=above, below=below, labove=labove, rbelow=rbelow, lbetween=lbetween, rbetween=rbetween, interval=interval, linterval=linterval, rinterval=rinterval, lrinterval=lrinterval, subset=subset) } if (!is.null(subset)) { msng <- apply(indicators[subset,,drop=FALSE] * is.na(variable)[subset],2,sum,na.rm=T) cnt <- apply(indicators[subset,,drop=FALSE] * rep(1,n)[subset],2,sum,na.rm=T) } else { msng <- apply(indicators * is.na(variable),2,sum,na.rm=T) cnt <- apply(indicators * rep(1,n),2,sum,na.rm=T) } rslt[,1] <- as.vector(cnt) rslt[,2] <- as.vector(msng) Lrslt <- NULL for (i in 1:(dim(rslt)[2])) Lrslt <- c(Lrslt,list(array(rslt[,i],dim=dimTable,dimnames=dimLabels))) for (i in 3:length(Lrslt)) Lrslt[[i]][cnt==0] <- NaN if (nullVariable) for (i in 3:length(Lrslt)) Lrslt[[i]] <- Lrslt[[i]] * NA names(Lrslt) <- dimnames(rslt)[[2]] attr(Lrslt,"stat") <- stat attr(Lrslt,"call") <- cl attr(Lrslt,"na.rm") <- na.rm attr(Lrslt,"marginOnly") <- marginOnly attr(Lrslt,"isDate") <- isDate class(Lrslt) <- "tableStat" Lrslt } print.tableStat <- function (x, stat=attr(x,"stat"), na.rm=attr(x,"na.rm"), sigfigs=max(3,getOption("digits")-3),width=9,nonsci.limit=5, version=F) { vrsn <- "20121015" if (version) return(vrsn) cmptRoundDigits <- function (x, sf) { u <- !is.na(x) & x>-Inf & x < Inf y <- if (any(u)) max(abs(x[u]),na.rm=T) else 0 if (y==0) { 1 } else { y <- trunc(log(y) / log(10)) - (y < 1) max(0,sf - y - (y < sf)) } } frmtCol <- function (x, rd, sf, nonsci.limit, colwidth=9, append="") { rslt <- NULL for (i in 1:length(x)) { if (is.nan(x[i])) { tmp <- "NaN" } else if (is.na(x[i])) { tmp <- "NA" } else { if (rd <= nonsci.limit & abs(x[i]) < 10^nonsci.limit) { tmp <- format(round(x[i],rd),nsmall=rd,width=1) } else { tmp <- format(round(x[i],rd), digits=sf, scientific=T, width=1) } tmp <- ifelse(x[i]<0,tmp,paste(" ",tmp,sep="")) } rslt <- c(rslt,tmp) } rslt <- paste(rslt,append,sep="") format(rslt, justify="right", width=colwidth) } parseFormat <- function (x) { z <- strsplit(x,"@")[[1]] p <- length(z) stat <- z[seq(2,p,2)] specialFormat <- z[seq(1,p,2)] list(stat=stat,specialFormat=specialFormat) } removeLeading <- function (x) { allHaveBlank <- T while (allHaveBlank) { first <- substring(x,1,1) if (any(first!=" ")) allHaveBlank <- F if (allHaveBlank) x <- substring(x,2) } x } removeTrailing <- function (x) { allHaveBlank <- T while (allHaveBlank) { first <- substring(x,nchar(x),nchar(x)) if (any(first!=" ")) allHaveBlank <- F if (allHaveBlank) x <- substring(x,1,nchar(x)-1) } x } removeLeadingTrailing <- function (x) { x <- removeLeading (x) x <- removeTrailing (x) x } if (!inherits(x,"tableStat")) stop("x must be a tableStat object") isDate <- attr(x,"isDate") stat.chc <- c("count", "missing", "mean", "geometric mean", "median", "sd", "variance", "minimum", "maximum", "quantiles", "probabilities", "mn(sd)", "range", "iqr","all","row%", "col%", "tot%") Labels <- c("Counts of","Number of missing","Mean of","Geometric mean","Median of","Standard deviation of", "Variance of","Minima of","Maxima of","Quantiles of","Probabilities for specified ranges of", "Mean (SD) of","Min, Max of","25th, 75th %iles of","","Percentages by Row of","Percentages by Column of","Percentages (overall) of") xindx <- list(1,2,3,5,(1:length(x))[names(x)==" Mdn"],4,4,6,(1:length(x))[names(x)==" Max"], 7:((1:length(x))[names(x)==" Max"]-1),((1:length(x))[names(x)==" Max"]+1):(length(x)-4), 3:4,c(6,(1:length(x))[names(x)==" Max"]), c((1:length(x))[names(x)==" 25%"],(1:length(x))[names(x)==" 75%"])) if (length(xindx[[10]]) == 2 && xindx[[10]][1] > xindx[[10]][2]) xindx[[10]] <- rep(NA,2) if (length(xindx[[11]]) == 2 && xindx[[11]][1] > xindx[[11]][2]) xindx[[11]] <- rep(NA,2) if (length(xindx[[5]])==0) xindx[[5]] <- NA if (length(xindx[[14]])!=2) xindx[[14]] <- NA specialLabel <- list("Cnt","Msng","Mean","GeomMn","Mdn","SD", "Vrnc","Min","Max",names(x)[xindx[[10]]],names(x)[xindx[[11]]], "Mn(SD)","(Min, Max)","(25%, 75%)","","% of row","% of col","% of total") if (any(stat %in% c("a","al","all"))) stat <- stat.chc[-c(5,length(stat.chc))] if (!na.rm) { for (i in 3:length(x)) x[[i]][x[[2]]>0 & !is.nan(x[[2]])] <- NA } else { x[[1]] <- x[[1]] - x[[2]] } rowPct <- x[[1]] if (is.vector(rowPct)) { rowPct <- 100 * rowPct / rowPct[length(rowPct)] } else { dimTable <- dim(rowPct) tblSize <- dimTable[1] * dimTable[2] indx <- 1:length(rowPct) tblNbr <- (indx - 1) %/% tblSize tblRow <- rep(1:(dimTable[1]),length.out=length(indx)) denomIndx <- (tblNbr * tblSize) + (tblSize - dimTable[1]) + tblRow rowPct <- 100 * rowPct / rowPct[denomIndx] } colPct <- x[[1]] if (is.vector(colPct)) { colPct <- 100 * colPct / colPct } else { denomIndx <- rep(seq(dimTable[1],length(colPct),by=dimTable[1]),each=dimTable[1]) colPct <- 100 * colPct / colPct[denomIndx] } totPct <- 100 * x[[1]] / x[[1]][length(x[[1]])] censMin <- x[["firstEvent"]] > x[[6]] censMin[is.na(censMin)] <- F censMax <- x[["lastEvent"]] < x[[xindx[[9]]]] censMax[is.na(censMax)] <- F if (any(censMax)) { rAppend <- ifelse(censMax,"+"," ") maxAppend <- ifelse(censMax,"+"," ") } else { rAppend <- NULL maxAppend <- NULL } if (any(censMin)) { minAppend <- ifelse(censMin,"+"," ") } else { minAppend <- NULL } printRestriction <- F vrnc <- format(x[[4]]) rnd <- cmptRoundDigits(as.vector(x[[4]]^2),sigfigs) append <- rAppend vrnc[1:length(vrnc)] <- removeLeadingTrailing (frmtCol(as.vector(x[[4]]^2), rnd, sigfigs, nonsci.limit, 1, append)) if (isDate) { xformCol <- c(3,5:(length(x)-3)) for (j in 1:length(xformCol)) { if (substring(names(x)[xformCol[j]],1,2)=="Pr") xformCol[j] <- NA } xformCol <- xformCol[!is.na(xformCol)] orgn <- "1970-01-01" } for (i in 1:(length(x)-2)) { frmtTmp <- format(x[[i]]) rnd <- cmptRoundDigits(as.vector(x[[i]]),sigfigs) if (i %in% c(3:5)) { append <- rAppend } else if (i==6) { append <- minAppend } else if (names(x)[i]==" Max") { append <- maxAppend } else append <- NULL if (isDate) { if (i %in% xformCol) { frmtTmp[1:length(frmtTmp)] <- format(as.Date(x[[i]],orgn)) } else frmtTmp[1:length(frmtTmp)] <- removeLeadingTrailing (frmtCol(as.vector(x[[i]]), rnd, sigfigs, nonsci.limit, 1, append)) } else frmtTmp[1:length(frmtTmp)] <- removeLeadingTrailing (frmtCol(as.vector(x[[i]]), rnd, sigfigs, nonsci.limit, 1, append)) x[[i]] <- frmtTmp } append <- "%" frmtTmp <- format(rowPct) rnd <- cmptRoundDigits(as.vector(rowPct),sigfigs) frmtTmp[1:length(frmtTmp)] <- removeLeadingTrailing (frmtCol(as.vector(rowPct), rnd, sigfigs, nonsci.limit, 1, append)) rowPct <- frmtTmp frmtTmp <- format(colPct) rnd <- cmptRoundDigits(as.vector(colPct),sigfigs) frmtTmp[1:length(frmtTmp)] <- removeLeadingTrailing (frmtCol(as.vector(colPct), rnd, sigfigs, nonsci.limit, 1, append)) colPct <- frmtTmp frmtTmp <- format(totPct) rnd <- cmptRoundDigits(as.vector(totPct),sigfigs) frmtTmp[1:length(frmtTmp)] <- removeLeadingTrailing (frmtCol(as.vector(totPct), rnd, sigfigs, nonsci.limit, 1, append)) totPct <- frmtTmp cat("Tabled descriptive statistics by strata\nCall:\n ",format(attr(x,"call")),"\n") cat(" - NaN denotes strata with no observations\n") cat(" - NA arises from missing or censored data\n") statTable <- NULL specialFormat <- NULL template <- NULL term <- 0 pterm <- 0 qterm <- 0 censoredExtrema <- F if (length(stat)==1) { if (is.na(pmatch(stat,stat.chc))) { z <- parseFormat (stat) if (length(z$stat) > 0) { specialFormat <- T stat <- z$stat specialFormat <- c(z$specialFormat,"") } } } for (s in stat) { indx <- pmatch(s,stat.chc) if (is.na(indx)) { warning(paste(s,"is unrecognized stat")) } else if (indx < 16 && any(is.na(xindx[[indx]]))) { warning(paste("descriptives needed for",s,"are unavailable")) } else { if (is.null(specialFormat)) { cat("\n##### ",Labels[indx],ifelse1(na.rm," nonmissing "," "),"observations within strata\n") if (indx %in% c(3,4,6,12) & !is.null(rAppend)) { printRestriction <- T cat(" - (+ denotes a restricted mean-- see output for restriction on observation time)\n") } else if (indx %in% c(8,9,13) & !(is.null(minAppend) & is.null(maxAppend))) { cat(" - (+ denotes a censored estimate of extrema)\n") } cat("\n") if (indx %in% c(1:6,8,9)) { print(x[[xindx[[indx]][1]]],quote=F) statTable <- c(statTable,list(x[[xindx[[indx]][1]]])) } else if (indx==7) { print(vrnc,quote=F) statTable <- c(statTable,list(vrnc)) } else if (indx==16) { print(rowPct,quote=F) statTable <- c(statTable,list(rowPct)) } else if (indx==17) { print(colPct,quote=F) statTable <- c(statTable,list(colPct)) } else if (indx==18) { print(totPct,quote=F) statTable <- c(statTable,list(totPct)) } else if (indx %in% 10:11) { statTable <- c(statTable,x[xindx[[indx]]]) for (j in xindx[[indx]]) { cat("#",c("Quantile: ","Probability: ")[indx-9],names(x)[j],"\n") print(x[[j]],quote=F) } } else if (indx==12) { tmp <- x[[3]] tmp[1:length(tmp)] <- paste(as.vector(tmp)," (",as.vector(x[[4]]),")",sep="") print(tmp,quote=F) statTable <- c(statTable,list(tmp)) } else { tmp <- x[[xindx[[indx]][1]]] tmp[1:length(tmp)] <- paste("(",as.vector(tmp),",",as.vector(x[[xindx[[indx]][2]]]),")",sep="") print(tmp,quote=F) statTable <- c(statTable,list(tmp)) } } else { if (indx %in% c(3,4,6,12) & !is.null(rAppend)) { printRestriction <- T } else if (indx %in% c(8,9,13) & !(is.null(minAppend) & is.null(maxAppend))) { censoredExtrema <- T } if (indx %in% c(1:6,8,9)) { tmp <- x[[xindx[[indx]][1]]] sL <- specialLabel[[indx]][1] } else if (indx==7) { tmp <- vrnc sL <- specialLabel[[indx]][1] } else if (indx==16) { tmp <- rowPct sL <- specialLabel[[indx]][1] } else if (indx==17) { tmp <- colPct sL <- specialLabel[[indx]][1] } else if (indx==18) { tmp <- totPct sL <- specialLabel[[indx]][1] } else if (indx == 10) { if (qterm < length(xindx[[10]])) qterm <- qterm + 1 tmp <- x[[xindx[[10]][qterm]]] sL <- specialLabel[[indx]][qterm] } else if (indx == 11) { if (pterm < length(xindx[[11]])) pterm <- pterm + 1 tmp <- x[[xindx[[11]][pterm]]] sL <- specialLabel[[indx]][pterm] } else if (indx==12) { tmp <- x[[3]] tmp[1:length(tmp)] <- paste(as.vector(tmp)," (",as.vector(x[[4]]),")",sep="") sL <- specialLabel[[indx]][1] } else { tmp <- x[[xindx[[indx]][1]]] tmp[1:length(tmp)] <- paste("(",as.vector(tmp),",",as.vector(x[[xindx[[indx]][2]]]),")",sep="") sL <- specialLabel[[indx]][1] } term <- term + 1 if (term == 1) { tmp[1:length(tmp)] <- paste(specialFormat[term],as.vector(tmp),sep="") statTable <- tmp template <- paste(specialFormat[term],sL,sep="") } else { statTable[1:length(statTable)] <- paste(as.vector(statTable),specialFormat[term],as.vector(tmp),sep="") template <- paste(template,specialFormat[term],sL,sep="") } } } } if (!is.null(specialFormat)) { term <- term + 1 statTable[1:length(statTable)] <- paste(as.vector(statTable),specialFormat[term],sep="") template <- paste(template,specialFormat[term],sep="") if (printRestriction || censoredExtrema) cat(" - (+ denotes a censored estimate-- see output for restriction on observation time)\n") cat("\nFormat: ",template,"\n\n") if (attr(x,"marginOnly")) { print(statTable[-1], quote=F) if (printRestriction) { cat("\n##### Restriction on observations within strata for computing means\n") print(x[[length(x)-3]][-1],quote=F) } } else { print(statTable, quote=F) if (printRestriction) { cat("\n##### Restriction on observations within strata for computing means\n") print(x[[length(x)-3]],quote=F) } } } else { if (printRestriction) { cat("\n##### Restriction on observations within strata for computing means\n") cat(" - NaN denotes strata with no observations\n\n") print(x[[length(x)-3]],quote=F) statTable <- c(statTable,list(x[[length(x)-2]])) } names(statTable) <- c(stat,ifelse1(printRestriction,"Restriction",NULL)) } invisible(statTable) } scatter <- function (y, x, strata=rep(1,length(y)), subset= rep(T,length(y)), reference=sort(unique(strata)), plotPoints=T, plotLowess=T, plotLSfit=F, legend=0.05, colors=c("black", "blue", "orange", "pink", "green", "red", "cornflowerblue", "darkolivegreen", "magenta"), xJitter=T, yJitter=F, newplot=T, ..., version=F) { vrsn <- "20110928" if (version) return(vrsn) if (!is.null(strata)) { if (is.list(strata)) { for (i in 1:length(strata)) { if (!is.vector(strata[[i]])) stop("strata can only be a vector, matrix, or list of vectors") } n <- length(strata[[1]]) if (length(strata) > 1) { for (i in 2:length(strata)) { if (length(strata[[i]]) != n) stop("all elements in strata must be same length") } } snms <- names(strata) if (is.null(snms)) snms <- rep("",length(strata)) tmp <- paste(snms[1],format(strata[[1]])) if (length(strata) > 1) { for (i in 2:length(strata)) tmp <- paste(tmp,snms[i],format(strata[[i]])) } } else { strata <- as.matrix(strata) snms <- dimnames(strata)[[2]] if (is.null(snms)) snms <- rep("",dim(strata)[2]) tmp <- paste(snms[1],format(strata[,1])) if (dim(strata)[2] > 1) { for (i in 2:(dim(strata)[2])) tmp <- paste(tmp,snms[i],format(strata[,i])) } } strata <- tmp } y <- y[subset] x <- x[subset] strata <- strata[subset] hyperNames <- as.character(names(match.call(expand.dots=F)$...)) xlbl <- ylbl <- NULL if (!("xlab" %in% hyperNames)) { xlbl <- as.character(match.call(expand.dots=F)$x) } if (!("ylab" %in% hyperNames)) { ylbl <- as.character(match.call(expand.dots=F)$y) } u <- !is.na(x) & !is.na(y) & !is.na(strata) y <- y[u] x <- x[u] xj <- xJitter * min(diff(sort(unique(x)))) / 8 yj <- yJitter * min(diff(sort(unique(y)))) / 8 strata <- strata[u] xrng <- range(x) yrng <- range(y) if (length(reference) == 1) legend <- 0 if (newplot) { if (is.null(xlbl)) { if (is.null(ylbl)) { plot(xrng + c(0,(legend > 0)*0.25*diff(xrng)),yrng,type="n",...) } else { plot(xrng + c(0,(legend > 0)*0.25*diff(xrng)),yrng,type="n",ylab=ylbl,...) } } else if (is.null(ylbl)) { plot(xrng + c(0,(legend > 0)*0.25*diff(xrng)),yrng,type="n",xlab=xlbl,...) } else plot(xrng + c(0,(legend > 0)*0.25*diff(xrng)),yrng,type="n",xlab=xlbl,ylab=ylbl,...) } for (i in 1:length(reference)) { u <- strata==reference[i] if (plotPoints) points(x[u] + rnorm(sum(u))*xj,y[u] + rnorm(sum(u))*yj,col=colors[i],pch=i) if (plotLowess) lines(lowess(x[u],y[u]),lty=i,col=colors[i]) if (plotLSfit) { z <- lm.fit(cbind(1,x[u]),y[u])$coeff lines(xrng,z[1]+z[2]*xrng,lty=i,col=colors[i]) } if (legend > 0) { if (plotLowess | plotLSfit) lines(xrng[2] + 2*xj + c(0.02,.1)*diff(xrng), rep(yrng[2] - i*legend*diff(yrng),2),lty=i,col=colors[i]) points(xrng[2] + 2*xj + .06*diff(xrng), yrng[2] - i*legend*diff(yrng),pch=i,col=colors[i]) text(xrng[2] + 2*xj + .12*diff(xrng), yrng[2] - i*legend*diff(yrng), reference[i],adj=0) } } invisible(NULL) } correlate <- function (..., strata=NULL, subset=NULL, conf.level= 0.95, use="pairwise.complete.obs", method="pearson", stat="cor", byStratum=T, version=F) { vrsn <- "20121017" if (version) return(vrsn) # # subset defines a subsetting criterion applied to every vector # strata defines a matrix or data frame of vectors whose unique combinations define strata # corr <- function(z, na.rm=T, conf.level= 0.95, method="pearson") { if (!is.matrix(z)) stop ("corr must take a matrix z") p <- dim(z)[2] nms <- dimnames(z)[[2]] if(method=="spearman") { for(j in 1:p) z[,j] <- rank(z[,j],na.last="keep") } z1 <- matrix(rep(z, p), ncol = p * p) z2 <- z1[, as.vector(t(matrix(1:(p^2), p)))] one <- rep(1, dim(z)[1]) w <- z1 * z2 if (na.rm) { u <- is.na(w) z1[u] <- 0 z2[u] <- 0 w[u] <- 0 } else u <- matrix(F,dim(w)[1],dim(w)[2]) n <- as.vector(one %*% (!u)) nn <- n nn[nn <= 2] <- NA vx <- one %*% z1^2 - (one %*% z1)^2/nn vy <- one %*% z2^2 - (one %*% z2)^2/nn cov <- matrix((one %*% w - ((one %*% z1) * (one %*% z2))/n)/sqrt(vx * vy), nrow = p) tst <- cov/sqrt((1 - cov^2)/(nn - 2)) tst[cov == 1 | cov == -1] <- 0 z <- log ((1+cov)/(1-cov)) / 2 lo <- hi <- p <- rep(NA, length(n)) if (any(!is.na(nn))) { for(m in unique(n[!is.na(nn)])) p[n == m] <- 2 * pt( - abs(tst[n == m]), m - 2) } if (any(n > 3)) { for(m in unique(n[n > 3])) { lo[n == m] <- z[n == m] - qnorm(conf.level/2 + 0.5) / sqrt(m-3) hi[n == m] <- z[n == m] + qnorm(conf.level/2 + 0.5) / sqrt(m-3) } } lo <- (exp(2 * lo) - 1) / (exp(2 * lo) + 1) hi <- (exp(2 * hi) - 1) / (exp(2 * hi) + 1) n <- matrix(n, dim(cov)[1]) p <- matrix(p, dim(cov)[1]) lo <- matrix(lo, dim(cov)[1]) hi <- matrix(hi, dim(cov)[1]) diag(lo) <- 1 diag(hi) <- 1 lo[n ==0] <- NaN hi[n ==0] <- NaN dimnames(cov) <- dimnames(n) <- dimnames(tst) <- dimnames(p) <- dimnames(lo) <- dimnames(hi) <- list(nms, nms) rslt <- list(cormtx = cov, n = n, t.stat = tst, pval = p, lo=lo,hi=hi) names(rslt)[5:6] <- paste(c("lo","hi"),format(100*conf.level),"%CI",sep="") rslt } chc.use <- c("everything","complete.obs","pairwise.complete.obs") use <- pmatch(use, chc.use) if (is.na(use)) stop("unrecognized value for use") chc.method <- c("pearson","spearman") method <- pmatch(method,chc.method) if (is.na(method)) stop("unrecognized value for method") method <- chc.method[method] na.rm <- use==3 L <- list(...) names(L) <- unlist(match.call(expand.dots=F)$...) p <- length(L) nms <- NULL for (i in 1:p) { x <- L[[i]] if (is.list(x)) { if (is.null(names(x))) { nms <- c(nms,paste(names(L)[i],".V",1:length(x),sep="")) } else nms <- c(nms,names(x)) } else if (is.matrix(x) & !is.Surv(x)) { if (is.null(dimnames(x)[[2]])) { nms <- c(nms,paste(names(L)[i],".V",1:(dim(x)[2]),sep="")) } else nms <- c(nms, dimnames(x)[[2]]) } else nms <- c(nms,names(L)[i]) } nms <- paste(format(nms, justify="right"),": ",sep="") if (!is.null(strata)) { if (is.list(strata)) { for (i in 1:length(strata)) { if (!is.vector(strata[[i]])) stop("strata can only be a vector, matrix, or list of vectors") } n <- length(strata[[1]]) if (length(strata) > 1) { for (i in 2:length(strata)) { if (length(strata[[i]]) != n) stop("all elements in strata must be same length") } } snms <- names(strata) if (is.null(snms)) snms <- rep("",length(strata)) tmp <- paste(snms[1],format(strata[[1]])) if (length(strata) > 1) { for (i in 2:length(strata)) tmp <- paste(tmp,snms[i],format(strata[[i]])) } } else { strata <- as.matrix(strata) snms <- dimnames(strata)[[2]] if (is.null(snms)) snms <- rep("",dim(strata)[2]) tmp <- paste(snms[1],format(strata[,1])) if (dim(strata)[2] > 1) { for (i in 2:(dim(strata)[2])) tmp <- paste(tmp,snms[i],format(strata[,i])) } } strata <- tmp } rslt <- NULL X <- NULL nV <- 0 omitVariables <- NULL for (i in 1:p) { x <- L[[i]] if (is.list(x)) { names(x) <- nms[nV + (1:length(x))] p2 <- length(x) for (j in 1:p2) { x2 <- x[[j]] if (is.matrix(x2) & !is.Surv(x2)) { dimnames(x2) <- list(NULL,nms[nV + (1:(dim(x)[2]))]) nV <- nV + dim(x2)[2] if (!is.character(x2) & !is.factor(x2)) { X <- cbind(X,x2) } else { omitVariables <- c(omitVariables,dimnames(x2)[[2]]) } } else if (is.Surv(x2) | is.factor(x2)) { nV <- nV + 1 omitVariables <- c(omitVariables,nms[nV]) } else { x2 <- matrix(x2) nV <- nV + 1 dimnames(x2) <- list(NULL,nms[nV]) X <- cbind(X,x2) } } } else if (is.matrix(x) & !is.Surv(x)) { dimnames(x) <- list(NULL,nms[nV + (1:(dim(x)[2]))]) nV <- nV + dim(x)[2] if (!is.character(x) & !is.factor(x)) { X <- cbind(X,x) } else { omitVariables <- c(omitVariables,dimnames(x)[[2]]) } } else if (is.Surv(x) | is.factor(x)) { nV <- nV + 1 omitVariables <- c(omitVariables,nms[nV]) } else { x <- matrix(x) nV <- nV + 1 dimnames(x) <- list(NULL,nms[nV]) X <- cbind(X,x) } } if (is.null(X)) stop("no variables for correlations") if (dim(X)[1]==1) stop("must have more than 1 variable for correlations") if (use == 2) { deleteCase <- apply(is.na(X),1,any) } else deleteCase <- rep(F,dim(X)[1]) if (!is.null(subset)) { if (is.logical(subset)) keepCase <- (1:(dim(X)[1]))[subset & !deleteCase] else if (is.integer(subset)) { subset <- subset[!is.na(subset)] if (length(subset)==0) stop("improper format for subset") if (all(subset > 0)) { keepCase <- subset[!(subset %in% (1:(dim(X)[1]))[deleteCase])] } else if (all(subset < 0)) { keepCase <- (1:(dim(X)[1]))[unique(c(subset,-(1:(dim(X)[1]))[deleteCase]))] } else stop("improper format for subset") } else stop("improper format for subset") } else keepCase <- !deleteCase X <- X[keepCase,] strata <- strata[keepCase] if (!is.null(strata)) { for (s in sort(unique(strata))) { indx <- strata==s rslt <- c(rslt,list(corr(X[indx,],na.rm=na.rm, conf.level=conf.level, method=method))) } names(rslt) <- sort(unique(strata)) } rslt <- c(rslt,list(All=corr(X,na.rm=na.rm, conf.level=conf.level, method=method))) attr(rslt,"omitVariables") <- omitVariables attr(rslt,"use") <- chc.use[use] attr(rslt,"method") <- method attr(rslt,"stat") <- stat attr(rslt, "byStratum") <- byStratum attr(rslt, "conf.level") <- conf.level attr(rslt, "call") <- match.call() class(rslt) <- "uCorrelate" rslt } print.uCorrelate <- function (x, stat=attr(x,"stat"), byStratum=attr(x,"byStratum"), sigfigs=max(5,getOption("digits")-2),width=9,nonsci.limit=5, version=F) { vrsn <- "20110930" if (version) return(vrsn) cmptRoundDigits <- function (x, sf) { u <- !is.na(x) & x>-Inf & x < Inf y <- if (any(u)) max(abs(x[u]),na.rm=T) else 0 if (y==0) { 1 } else { y <- trunc(log(y) / log(10)) - (y < 1) max(0,sf - y - (y < sf)) } } frmtCol <- function (x, rd, sf, nonsci.limit, colwidth=9, append="") { rslt <- NULL for (i in 1:length(x)) { if (is.nan(x[i])) { tmp <- "NaN" } else if (is.na(x[i])) { tmp <- "NA" } else { if (rd <= nonsci.limit & abs(x[i]) < 10^nonsci.limit) { tmp <- format(round(x[i],rd),nsmall=rd,width=1) } else { tmp <- format(round(x[i],rd), digits=sf, scientific=T, width=1) } tmp <- ifelse(x[i]<0,tmp,paste(" ",tmp,sep="")) } rslt <- c(rslt,tmp) } rslt <- paste(rslt,append,sep="") format(rslt, justify="right", width=colwidth) } parseFormat <- function (x) { z <- strsplit(x,"@")[[1]] p <- length(z) stat <- z[seq(2,p,2)] specialFormat <- z[seq(1,p,2)] list(stat=stat,specialFormat=specialFormat) } removeLeading <- function (x) { allHaveBlank <- T while (allHaveBlank) { first <- substring(x,1,1) if (any(first!=" ")) allHaveBlank <- F if (allHaveBlank) x <- substring(x,2) } x } removeTrailing <- function (x) { allHaveBlank <- T while (allHaveBlank) { first <- substring(x,nchar(x),nchar(x)) if (any(first!=" ")) allHaveBlank <- F if (allHaveBlank) x <- substring(x,1,nchar(x)-1) } x } removeLeadingTrailing <- function (x) { x <- removeLeading (x) x <- removeTrailing (x) x } if (!inherits(x,"uCorrelate")) stop("x must be a uCorrelate object") conf.level <- attr(x,"conf.level") stat.chc <- c("cor", "n", "t.stat", "pval", "loCI", "hiCI","all") Labels <- c("Estimated Correlation Coefficients","Sample Size","t Statistics","Two-sided P values", paste(format(100*conf.level), "% Conf Intvl: Lower",sep=""), paste(format(100*conf.level), "% Conf Intvl: Upper",sep="")) xindx <- 1:6 specialLabel <- list("Corr","N","t Stat","Two-sided P","loCI","hiCI") if (!byStratum & length(x) > 1) { v <- NULL for (k in 1:(dim(x[[1]][[1]])[1])) { vrblTables <- NULL for (i in 1:6) { vt <- NULL for (j in 1:length(x)) { vt <- rbind(vt,x[[j]][[i]][k,]) } dimnames(vt)[[1]] <- names(x) vrblTables <- c(vrblTables,list(vt)) } names(vrblTables) <- names(x[[1]]) v <- c(v,list(vrblTables)) } names(v) <- dimnames(x[[1]][[1]])[[1]] } else v <- x for (i in 1:(length(v))) { for (k in 1:6) { frmtTmp <- format(v[[i]][[k]]) rnd <- cmptRoundDigits(as.vector(v[[i]][[k]]),sf=ifelse1(k==2,1,sigfigs)) frmtTmp[1:length(frmtTmp)] <- removeLeadingTrailing (frmtCol(as.vector(v[[i]][[k]]), rnd, sf=ifelse1(k==2,1,sigfigs), nonsci.limit, 1, "")) v[[i]][[k]] <- frmtTmp } } chc.use <- c("everything","complete.obs","pairwise.complete.obs") use <- pmatch(attr(x,"use"), chc.use) chc.method <- c("pearson","spearman") method <- pmatch(attr(x,"method"),chc.method) cat("Tabled correlation statistics by strata\nCall:\n ",format(attr(x,"call")),"\n") cat(" Method: ",c("Pearson","Spearman")[method],"\n") cat(" Data : ",c("All","Complete Cases","Pairwise Complete")[use],"\n") cat(" - NaN denotes strata with no observations\n") cat(" - NA arises from missing data\n") statTable <- NULL specialFormat <- NULL if (length(stat)==1) { if (is.na(pmatch(stat,stat.chc))) { z <- parseFormat (stat) if (length(z$stat) > 0) { stat <- z$stat specialFormat <- c(z$specialFormat,"") } } else if (stat=="all" | stat=="al" | stat=="a") stat <- stat.chc[1:6] } for (j in 1:length(v)) { template <- NULL term <- 0 stratumTables <- NULL if (byStratum | length(v)==1) { if (j < length(v)) cat("\n##### Stratum ",names(v)[j],"\n") else cat("\n##### ALL DATA\n") } else cat("\n##### Variable ",names(v)[j],"\n") for (s in stat) { indx <- pmatch(s,stat.chc) if (is.na(indx)) { warning(paste(s,"is unrecognized stat")) } else { if (is.null(specialFormat)) { cat(" ## ",Labels[indx],"\n") print(v[[j]][[xindx[indx]]],quote=F) stratumTables <- c(stratumTables,list(v[[j]][[xindx[indx]]])) } else { tmp <- v[[j]][[xindx[indx]]] sL <- specialLabel[[indx]] term <- term + 1 if (term == 1) { tmp[1:length(tmp)] <- paste(specialFormat[term],as.vector(tmp),sep="") stratumTables <- tmp template <- paste(specialFormat[term],sL,sep="") } else { stratumTables[1:length(stratumTables)] <- paste(as.vector(stratumTables),specialFormat[term],as.vector(tmp),sep="") template <- paste(template,specialFormat[term],sL,sep="") } } } } if (!is.null(specialFormat)) { term <- term + 1 stratumTables[1:length(stratumTables)] <- paste(as.vector(stratumTables),specialFormat[term],sep="") template <- paste(template,specialFormat[term],sep="") cat("\nFormat: ",template,"\n\n") print(stratumTables, quote=F) } statTable <- c(statTable,list(stratumTables)) } names(statTable) <- names(x) invisible(statTable) } extract.tableStat <- function (x, stat=attr(x,"stat"), na.rm=attr(x,"na.rm"), version=F) { vrsn <- "20121024" if (version) return(vrsn) # This function is used by clusterStats(). It presumes a "tableStat" object created with # - stratified statistics on a single variable that had no missing data (this is # satisfied if any missing strata values formed their own strata), # - only a single quantile or a single probability will have been generated # (actually it returns the first quantile or first probability) # # Only the first element of stat is used, and it cannot be a special format # # It returns # - numeric vectors for all probabilities # - for uncensored numeric data, it returns a numeric vector for all statistics # - for censored data, the minimum, maximum, and quantiles are returned as "Surv" objects # - for censored data, means, geometric means, sd, variances are returned with restrictions # as an attribute # - dates, it returns dates for all means, minimum, maximum, quantiles and numeric vectors # for sd and variances if (!inherits(x,"tableStat")) stop("x must be a tableStat object") ns <- dim(x[[1]]) if (length(ns) > 1) stop("only a single stratification variable can be used") isDate <- attr(x,"isDate") orgn <- "1970-01-01" stat.chc <- c("count", "missing", "mean", "geometric mean", "median", "sd", "variance", "minimum", "maximum", "quantiles", "probabilities") stat <- stat[1] indx <- pmatch(stat,stat.chc) if (is.na(indx)) stop("statistic not recognized") xindx <- list(1,2,3,5,(1:length(x))[names(x)==" Mdn"],4,4,6,(1:length(x))[names(x)==" Max"], 7:((1:length(x))[names(x)==" Max"]-1),((1:length(x))[names(x)==" Max"]+1):(length(x)-4)) if (length(xindx[[10]]) == 2 && xindx[[10]][1] > xindx[[10]][2]) xindx[[10]] <- NA if (length(xindx[[11]]) == 2 && xindx[[11]][1] > xindx[[11]][2]) xindx[[11]] <- NA if (length(xindx[[5]])==0) xindx[[5]] <- NA specialLabel <- list("Cnt","Msng","Mean","GeomMn","Mdn","SD", "Vrnc","Min","Max",paste(names(x)[xindx[[10]]],"ile",sep=""),names(x)[xindx[[11]]]) if (!na.rm) { for (i in 3:length(x)) x[[i]][x[[2]]>0 & !is.nan(x[[2]])] <- NA } else { x[[1]] <- x[[1]] - x[[2]] } censMin <- x[["firstEvent"]] > x[[6]] censMin[is.na(censMin)] <- F censMax <- x[["lastEvent"]] < x[[xindx[[9]]]] censMax[is.na(censMax)] <- F if (any(is.na(xindx[[indx]][1]))) stop(paste("descriptives needed for",stat,"are unavailable")) rslt <- x[[xindx[[indx]][1]]][-ns] if (indx==7) rslt <- rslt^2 if (indx==8 && any(censMin)) rslt <- Surv(rslt,1-censMin[-ns]) else if (indx==9 && any(censMax)) rslt <- Surv(rslt,1-censMax[-ns]) else if (indx %in% c(3,4,6,7) && any(x[["restriction"]]!=Inf)) { rslt <- Surv(rslt,1-censMax[-ns]) attr(rslt,"restriction") <- x[[length(x)-3]][-ns] } if (isDate && indx %in% c(3:5,8:10)) rslt <- as.Date(rslt,orgn) attr(rslt,"statistic") <- specialLabel[[indx]][1] rslt } clusterStatsOld <- function(y, cluster=NULL, stat="count", subset=NULL, x=NULL, ..., version=F) { vrsn <- "20121015" if (version) return(vrsn) if (!is.null(cluster)) { if (is.list(cluster)) { for (i in 1:length(cluster)) { if (!is.vector(cluster[[i]])) stop("cluster can only be a vector, matrix, or list of vectors") } n <- length(cluster[[1]]) if (length(cluster) > 1) { for (i in 2:length(cluster)) { if (length(cluster[[i]]) != n) stop("all elements in cluster must be same length") } } snms <- names(cluster) if (is.null(cluster)) snms <- rep("",length(cluster)) tmp <- paste(snms[1],format(cluster[[1]])) if (length(cluster) > 1) { for (i in 2:length(cluster)) tmp <- paste(tmp,snms[i],format(cluster[[i]])) } } else { cluster <- as.matrix(cluster) snms <- dimnames(cluster)[[2]] if (is.null(snms)) snms <- rep("",dim(cluster)[2]) tmp <- paste(snms[1],format(cluster[,1])) if (dim(cluster)[2] > 1) { for (i in 2:(dim(cluster)[2])) tmp <- paste(tmp,snms[i],format(cluster[,i])) } } snms <- cluster cluster <- as.integer(factor(tmp)) } if (stat!="slope") { zy <- extract.tableStat(tableStat(y, cluster, stat=stat, subset=subset, ...)) rslt <- zy[cluster] if(!is.null(attr(zy,"restriction"))) attr(rslt,"restriction") <- attr(zy,"restriction")[cluster] } else { if (is.null(x) || length(x) != length(y) || !is.numeric(x)) stop("need numeric x to compute slopes") if (inherits(y,"Date") || is.Surv(y)) stop("need numeric y to compute slopes") zxy <- extract.tableStat(tableStat(x*y, cluster, stat="mean", subset=subset, ...)) zx2 <- extract.tableStat(tableStat(x*x, cluster, stat="mean", subset=subset, ...)) zx <- extract.tableStat(tableStat(x, cluster, stat="mean", subset=subset, ...)) zy <- extract.tableStat(tableStat(y, cluster, stat="mean", subset=subset, ...)) rslt <- (zx2 - zx*zx) rslt <- ifelse(is.na(rslt) | rslt==0, NA, rslt) rslt <- (zxy - zx * zy) / rslt rslt <- rslt[cluster] } names(rslt) <- snms attr(rslt,"stat") <- stat rslt } clusterStats <- function(y, cluster=NULL, stat="count", subset=NULL, x=NULL, ..., version=F) { vrsn <- "20121024" if (version) return(vrsn) if (!is.null(cluster)) { if (is.list(cluster)) { for (i in 1:length(cluster)) { if (!is.vector(cluster[[i]])) stop("cluster can only be a vector, matrix, or list of vectors") } n <- length(cluster[[1]]) if (length(cluster) > 1) { for (i in 2:length(cluster)) { if (length(cluster[[i]]) != n) stop("all elements in cluster must be same length") } } snms <- names(cluster) if (is.null(cluster)) snms <- rep("",length(cluster)) tmp <- paste(snms[1],format(cluster[[1]])) if (length(cluster) > 1) { for (i in 2:length(cluster)) tmp <- paste(tmp,snms[i],format(cluster[[i]])) } } else { cluster <- as.matrix(cluster) snms <- dimnames(cluster)[[2]] if (is.null(snms)) snms <- rep("",dim(cluster)[2]) tmp <- paste(snms[1],format(cluster[,1])) if (dim(cluster)[2] > 1) { for (i in 2:(dim(cluster)[2])) tmp <- paste(tmp,snms[i],format(cluster[,i])) } } snms <- cluster cluster <- as.integer(factor(tmp)) } uniqueClusters <- unique(cluster) nCluster <- length(uniqueClusters) rslt <- rep(0,length(cluster)) restr <- rep(Inf,length(cluster)) if (stat!="slope") { for (cl in uniqueClusters) { u <- cluster==cl zy <- extract.tableStat(tableStat(y[u], stat=stat, subset=subset[u], ...)) if(is.Surv(zy)) { if(!is.Surv(rslt)) rslt <- Surv(rslt,rep(1,length(rslt))) rslt[u,1] <- zy[,1] rslt[u,2] <- zy[,2] } else rslt[u] <- zy if(!is.null(attr(zy,"restriction"))) restr[u] <- attr(zy,"restriction") } if(any(restr!=Inf)) attr(rslt,"restriction") <- restr if(inherits(y,"Date")) rslt <- as.Date(rslt,origin="1970-01-01") } else { if (is.null(x) || length(x) != length(y) || !is.numeric(x)) stop("need numeric x to compute slopes") if (inherits(y,"Date") || is.Surv(y)) stop("need numeric y to compute slopes") for (cl in uniqueClusters) { u <- cluster==cl zxy <- extract.tableStat(tableStat(x[u]*y[u], stat="mean", subset=subset[u], ...)) zx2 <- extract.tableStat(tableStat(x[u]^2, stat="mean", subset=subset[u], ...)) zx <- extract.tableStat(tableStat(x[u], stat="mean", subset=subset[u], ...)) zy <- extract.tableStat(tableStat(y[u], stat="mean", subset=subset[u], ...)) tmp <- (zx2 - zx*zx) tmp <- ifelse(is.na(tmp) | tmp==0, NA, tmp) tmp <- (zxy - zx * zy) / tmp rslt[u] <- tmp } } names(rslt) <- snms attr(rslt,"stat") <- stat rslt }