regress <- function (fnctl, # the distribution functional to be compared y, # the response variable in the regression analysis model, # the terms in the regression analysis intercept=fnctl!="hazard", strata=rep(1,n),weights=rep(1,n),id=1:n,subset=rep(T,n), robustSE = T, conf.level=0.95, exponentiate=fnctl!="mean", replaceZeroes, useFdstn=T, ..., version=F) { vrsn <- "20110928" if (version) return(vrsn) cl <- match.call() # checking functional fnctl findx <- pmatch(fnctl,c("mean", "geometric mean", "odds", "rate", "hazard")) if (is.na(findx)) stop("unsupported functional") fnctl <- c("mean", "geometric mean", "odds", "rate", "hazard")[findx] if (intercept & fnctl=="hazard") stop("hazard regression cannot include an intercept") # checking response variable y if (fnctl=="geometric mean") { if (missing(replaceZeroes)) replaceZeroes <- min(y[y>0]/2) y <- log(ifelse(y<=0,replaceZeroes,y)) } else replaceZeroes <- NA if (fnctl=="hazard") { if (!is.Surv(y)) stop("y must be a Surv object for hazard regression") n <- dim(y)[1] } else n <- length(y) # checking the predictors, weights, subset, and cluster if (!inherits(model,"uModel")) { if (!is.list(model) & !is.matrix(model)) { model <- cbind(model) dimnames(model)[[2]] <- as.character(cl[[4]]) } model <- uModel(model) } if (dim(model$X)[1] != n) stop("response variable and predictors must be of same length") if (!missing(strata) & fnctl != "hazard") warning("strata ignored unless hazard regression") if (length(strata) != n) stop("response variable and strata must be of same length") if (length(weights) != n) stop("response variable and weights must be of same length") if (length(subset) != n) stop("response variable and subsetting variable must be of same length") if (length(id) != n) stop("response variable and clustering variable must be of same length") model <- c(model,list(y=y,strata=strata,weights=weights,id=id,subset=subset)) z <- c(model,list(fnctl=fnctl, intercept=intercept, exponentiate=exponentiate, replaceZeroes=replaceZeroes, conf.level=conf.level, useFdstn=useFdstn, original=model)) # applying subsetting variable if (fnctl=="hazard") z$y <- z$y[subset,] else z$y <- z$y[subset] z$X <- z$X[subset,,drop=F] z$strata <- z$strata[subset] z$weights <- z$weights[subset] z$id <- z$id[subset] # setting up the augmented coefficients matrix nms <- c(z$terms[z$firstPred!=z$lastPred],z$preds) fst <- c(z$firstPred[z$firstPred!=z$lastPred],1:length(z$preds)) lst <- c(z$lastPred[z$firstPred!=z$lastPred],1:length(z$preds)) u <- order(fst,-lst) nms <- c(ifelse1(intercept,"Intercept",NULL),nms[u]) fst <- c(ifelse1(intercept,0,NULL),fst[u]) + intercept lst <- c(ifelse1(intercept,0,NULL),lst[u]) + intercept cinames <- paste(format(100*conf.level),"%",c("L","H"),sep="") if (exponentiate) cinames <- paste("e(",c("Est",cinames),")",sep="") cinames <- c("Estimate",ifelse1(robustSE,c("Naive SE","Robust SE"),"Std Err"),cinames, ifelse1(useFdstn,c("F stat","Pr(>F)"),c("Chi2 stat","Pr(>Chi2)"))) augCoefficients <- matrix(0,sum(z$firstPred!=z$lastPred)+length(z$pred)+intercept,length(cinames)) dimnames(augCoefficients) <- list(nms,cinames) # checking for singular design matrices yy <- rnorm(dim(z$X)[1]) yy[is.na(z$y)] <- NA zz <- if (intercept) lm(yy ~ z$X,weights=z$weights) else lm(yy ~ 0 + z$X,weights=z$weights) droppedPred <- is.na(zz$coefficients) if (intercept) droppedPred <- droppedPred[-1] names(droppedPred) <- dimnames(z$X)[[2]] z <- c(z,list(droppedPred=droppedPred)) if (intercept) droppedPred <- c(Intercept=F,droppedPred) anyRepeated <- any(table(id[!is.na(id)]) > 1) if (anyRepeated) { stop("clustered observations not yet implemented") } else { secol <- 2 + robustSE if (fnctl == "hazard") { zz <- coxph(z$y~z$X[,!z$droppedPred,drop=F],weights=z$weights,robust=robustSE,...) zzs <- summary(zz) converge <- NA n <- zzs$n zzs$coefficients <- zzs$coefficients[,-2,drop=F] if (robustSE) { zzs$robustCov <- zz$var zzs$naiveCov <- zz$naive.var scoreStat <- zz$rscore } else { zzs$naiveCov <- zz$var scoreStat <- zz$score } waldStat <- zz$wald.test LRStat <- 2 * diff(zz$loglik) } else { if (fnctl %in% c("mean","geometric mean")) { if (intercept) zz <- if (intercept) lm (z$y ~ z$X[,!z$droppedPred,drop=F],weights=z$weights,...) else lm (z$y ~ 0 + z$X[,!z$droppedPred,drop=F],weights=z$weights,...) zzs <- summary(zz) converge <- T zzs$naiveCov <-zzs$sigma^2 * zzs$cov.unscaled n <- sum(zzs$df[1:2]) } else { if (fnctl == "odds") { zz <- if (intercept) glm (z$y ~ z$X[,!z$droppedPred,drop=F],weights=z$weights,family="binomial",...) else glm (z$y ~ 0 + z$X[,!droppedPred,drop=F],weights=z$weights,family="binomial",...) } else { zz <- if (intercept) glm (z$y ~ z$X[,!z$droppedPred,drop=F],weights=z$weights,family="poisson",...) else glm (z$y ~ 0 + z$X[,!droppedPred,drop=F],weights=z$weights,family="poisson",...) } converge <- zz$converged zzs <- summary(zz) zzs$naiveCov <- zzs$cov.scaled n <- zzs$df.null + intercept } if (robustSE) { m <- sandwich(zz,adjust=T) zzs$coefficients <- cbind(zzs$coefficients[,1:2,drop=F],sqrt(diag(m)),zzs$coefficients[,-(1:2),drop=F]) dimnames(zzs$coefficients)[[2]][2:3] <- c("Naive SE","Robust SE") zzs$robustCov <- m } p <- dim(zzs$coefficients)[1] if (robustSE) m <- zzs$robustCov else m <- zzs$naiveCov zzs$coefficients[,secol+1] <- zzs$coefficients[,1] / zzs$coefficients[,secol] u <- (1+intercept):p waldStat <- t(zzs$coefficients[u,1]) %*% (solve(m[u,u]) %*% zzs$coefficients[u,1]) / (p - intercept) LRStat <- if (fnctl %in% c("mean","geometric mean")) waldStat else (zzs$null.deviance - zzs$deviance) / (p - intercept) / zzs$deviance * (n-p) scoreStat <- NULL } p <- dim(zzs$coefficients)[1] if (useFdstn) { waldStat <- c(waldStat,1-pf(waldStat,p-intercept,n-p),p-intercept,n-p) LRStat <- c(LRStat,1-pf(LRStat,p-intercept,n-p),p-intercept,n-p) if (!is.null(scoreStat)) scoreStat <- c(scoreStat,1-pf(scoreStat,p-intercept,n-p),p-intercept,n-p) zzs$coefficients[,secol+2] <- 2 * pt(- abs(zzs$coefficients[,secol+1]),df=n-p) } else { waldStat <- c(waldStat,1-pchisq(waldStat,p-intercept),p-intercept) LRStat <- c(LRStat,1-pchisq(LRStat,p-intercept),p-intercept) if (!is.null(scoreStat)) scoreStat <- c(scoreStat,1-pchisq(waldStat,p-intercept),p-intercept) zzs$coefficients[,secol+2] <- 2 * pnorm(- abs(zzs$coefficients[,secol+1])) } linearPredictor <- cbind(if (intercept) 1 else NULL,z$X[,!z$droppedPred]) %*% zzs$coefficients[,1] zzs$call <- cl tmp <- class(zzs) zzs <- c(zzs,list(augCoefficients=augCoefficients, waldStat=waldStat, LRStat=LRStat, scoreStat=scoreStat, linearPredictor=linearPredictor, fitobj=zz, gModel=z, converge=converge)) class(zzs) <- c("uRegress",tmp) if (useFdstn) ci <- qt((1-conf.level)/2,df=n-p) * zzs$coefficients[,secol] else ci <- qnorm((1-conf.level)/2) * zzs$coefficients[,secol] ci <- zzs$coefficients[,1] + cbind(ci,-ci) dimnames(ci)[[2]] <- paste(format(100*conf.level),"%",c("L","H"),sep="") if (exponentiate) { ci <- cbind(Est=zzs$coefficients[,1],ci) dimnames(ci)[[2]] <- paste("e(",dimnames(ci)[[2]],")",sep="") ci <- exp(ci) } zzs$coefficients <- cbind(zzs$coefficients[,1:secol,drop=F],ci,zzs$coefficients[,-(1:secol),drop=F]) u <- fst==lst u[u] <- !droppedPred zzs$augCoefficients[u,] <- zzs$coefficients ncol <- dim(zzs$augCoefficients)[2] u <- fst==lst u[u] <- droppedPred zzs$augCoefficients[u,-1] <- NA zzs$augCoefficients[,ncol-1] <- zzs$augCoefficients[,ncol-1,drop=F]^2 zzs$augCoefficients[fst!=lst,] <- NA for (j in 1:length(nms)) { if (fst[j]!=lst[j]) { r <- lst[j] - fst[j] + 1 cntrst <- matrix(0,r,dim(z$X)[2]+intercept) cntrst[1:r,fst[j]:lst[j]] <- diag(r) cntrst <- cntrst[,!droppedPred,drop=F] cntrst <- cntrst[apply(cntrst!=0,1,any),,drop=F] zzs$augCoefficients[j,ncol - (1:0)] <- uWaldtest (zzs, cntrst)[1:2] } } j <- dim(zzs$augCoefficients)[2] zzs$augCoefficients <- cbind(zzs$augCoefficients[,-j,drop=F],df=lst-fst+1,zzs$augCoefficients[,j,drop=F]) class(zzs$augCoefficients) <- "augCoefficients" } zzs } fitted.uRegress <- function (obj,X, version=F) { vrsn <- "20110928" if (version) return(vrsn) if (!missing(X)) { if (dim(X)[2] != dim(obj$processTerms$X)[2]) stop("new data not appropriate dimensions") if (dim(obj$coefficients)[1] == dim(X)[2] + 1) X <- cbind(1,X) fits <- X %*% obj$coefficients[,1] } else fits <- obj$glmobj$fitted.values if (obj$functional == "geometric mean") fits <- exp(fits) fits } print.augCoefficients <- function (x,sigfigs=max(3,getOption("digits")-3),width=9,nonsci.limit=5,Psci=F, version=F) { vrsn <- "20110928" if (version) return(vrsn) # # prints CI or exp(CI) to specified sigfigs # rounds exp(Est) to same number of decimal figures as exp(CI) # prints Std Err or Robust SE to specified sigfigs # rounds Estimate to same number of decimal figures as StdErr or Robust SE # prints F stat to specified sigfigs # prints P value to number of decimal figures as nonsci.limit unless < 10^-nonsci.limit when "< .00001" used # centers all but df and P value, which is right justified cmptRoundDigits <- function (x, sf) { y <- max(abs(x),na.rm=T) if (y==0) { sf } else { y <- trunc(log(y) / log(10)) - (y < 1) max(0,sf - y - (y < sf)) } } frmtCol <- function (x, sf, nonsci.limit, colwidth=9) { rslt <- NULL for (i in 1:length(x)) { if (is.na(x[i])) { tmp <- "NA" } else { rd <- cmptRoundDigits (x[i], sf) if (rd <= nonsci.limit & abs(x[i]) < 10^nonsci.limit) { tmp <- format(round(x[i],rd),nsmall=rd,width=1) } else { tmp <- format(round(x[i],rd), digits=sf, scientific=T, width=1) } } rslt <- c(rslt,ifelse(x[i]<0,tmp,paste(" ",tmp,sep=""))) } format(rslt, justify="centre", width=colwidth) } frmtCoefficients <- format(x) Pvalcol <- dim(x)[2] for (j in 1:(Pvalcol-3)) frmtCoefficients[,j] <- frmtCol (x[,j],sigfigs,nonsci.limit,width) SEcol <- ifelse(dimnames(x)[[2]][3]=="RobustSE",3,2) frmtCoefficients[,SEcol+1] <- paste(frmtCoefficients[,SEcol+1]," ") frmtCoefficients[,Pvalcol-2] <- paste(" ",format(round(x[,Pvalcol-2],2),width=width,justify="right")) dimnames(frmtCoefficients)[[2]][Pvalcol-2] <- paste(" ",dimnames(frmtCoefficients)[[2]][Pvalcol-2]) frmtCoefficients[,Pvalcol-1] <- format(x[,Pvalcol-1]) if (Psci) { frmtCoefficients[,Pvalcol] <- format(x[,Pvalcol],digits=sigfigs,scientific=T,width=width,justify="centre") } else{ z <- paste(format(round(x[,Pvalcol],4),width=width-1,justify="right")," ",sep="") z[x[,Pvalcol] < 5e-5] <- format("< 0.00005",width=width,justify="right") frmtCoefficients[,Pvalcol] <- format(z,justify="right",width=width) } u <- is.na(x[,1]) if (any(u)) frmtCoefficients[u,1:(Pvalcol-3)] <- "" dimnames(frmtCoefficients)[[1]] <- paste(dimnames(frmtCoefficients)[[1]]," ") print(frmtCoefficients,quote=F) invisible(frmtCoefficients) } print.uRegress <- function (x,augmented=T,digits=max(3,getOption("digits")-3),signif.stars=F, version=F) { vrsn <- "20110928" if (version) return(vrsn) f.PH <- function (x, digits = max(getOption("digits") - 3, 3), signif.stars = getOption("show.signif.stars"), ...) { if (!is.null(x$call)) { cat("Call:\n") dput(x$call) cat("\n") } if (!is.null(x$fail)) { cat(" Coxreg failed.", x$fail, "\n") return() } savedig <- options(digits = digits) on.exit(options(savedig)) omit <- x$na.action cat(" n=", x$n) if (!is.null(x$nevent)) cat(", number of events=", x$nevent, "\n") else cat("\n") if (length(omit)) cat(" (", naprint(omit), ")\n", sep = "") if (nrow(x$coef) == 0) { cat(" Null model\n") return() } if (!is.null(x$coefficients)) { cat("\n") print (x$coefficients) } cat("\n") if (!is.null(x$concordance)) { cat("Concordance=", format(round(x$concordance[1], 3)), " (se =", format(round(x$concordance[2], 3)), ")\n") } cat("Rsquare=", format(round(x$rsq["rsq"], 3)), " (max possible=", format(round(x$rsq["maxrsq"], 3)), ")\n") cat("Likelihood ratio test= ", format(round(x$logtest["test"], 2)), " on ", x$logtest["df"], " df,", " p=", format(x$logtest["pvalue"]), "\n", sep = "") cat("Wald test = ", format(round(x$waldtest["test"], 2)), " on ", x$waldtest["df"], " df,", " p=", format(x$waldtest["pvalue"]), "\n", sep = "") cat("Score (logrank) test = ", format(round(x$sctest["test"], 2)), " on ", x$sctest["df"], " df,", " p=", format(x$sctest["pvalue"]), sep = "") if (is.null(x$robscore)) cat("\n\n") else cat(", Robust = ", format(round(x$robscore["test"], 2)), " p=", format(x$robscore["pvalue"]), "\n\n", sep = "") if (x$used.robust) cat(" (Note: the likelihood ratio and score tests", "assume independence of\n observations within a cluster,", "the Wald and robust score tests do not).\n") invisible() } print(x$call) # cat("n =",x$n) if(!is.null(x$na.action)) cat("(",length(x$na.action)," cases deleted due to missing values)") cat("\n\n") if (x$gModel$fnctl %in% c("mean","geometric mean")) { f <- getAnywhere(print.summary.lm)$objs[[1]] } else if (x$gModel$fnctl == "hazard") { f <- f.PH } else f <- getAnywhere(print.summary.glm)$objs[[1]] if (augmented) { x$coefficients <- x$augCoefficients x$conf.int <- NULL } f(x,digits=digits,signif.stars=signif.stars) } uLRtest <- function (full,reduced, version=F) { vrsn <- "20110928" if (version) return(vrsn) if (dimnames(full$coefficients)[[2]][2]=="Robust SE") stop("uLRtest inappropriate with robust SE") if (dimnames(reduced$coefficients)[[2]][2]=="Robust SE") stop("uLRtest inappropriate with robust SE") if (!all(dimnames(reduced$coefficients)[[1]] %in% dimnames(reduced$coefficients)[[1]])) stop("uLRtest only appropriate with hierarchical models") if (dim(reduced$lmobj$model)[1] != dim(full$lmobj$model)[1]) stop("full and reduced models must be based on same data") if (any(dimnames(reduced$lmobj$model)[[1]] != dimnames(full$lmobj$model)[[1]])) stop("full and reduced models must be based on same data") df.full <- full$df[2] df.redu <- reduced$df[2] RSSH <- reduced$sigma^2 * df.redu RSS <- full$sigma^2 * df.full Fstat <- (RSSH - RSS) / (df.redu - df.full) / (full$sigma^2) pval <- 1-pf(Fstat,df.redu-df.full,df.full) rslt <- c(Fstat, pval,df.redu-df.full,df.full) names(rslt) <- c("F stat","p value","num df","den df") rslt } uWaldtest <- function (full, contrasts=c(0,rep(1,p-1)), hypothesis=matrix(0,r,1), version=F) { vrsn <- "20110928" if (version) return(vrsn) if (!inherits(full,"uRegress")) stop("not a uRegress object") coefficients <- full$coefficients[,1] p <- length(coefficients) if (!is.matrix(contrasts)) contrasts <- matrix(contrasts,1) if (dim(contrasts)[2] != p) stop ("contrasts, coefficient vector must be conformable") r <- dim(contrasts)[1] if (length(hypothesis) != r) stop ("contrasts, hypothesis must be conformable") covmtx <- full$robustCov if (is.null(covmtx)) covmtx <- full$naiveCov covmtx <- contrasts %*% covmtx %*% t(contrasts) Fstat <- t(contrasts %*% coefficients - hypothesis) %*% solve(covmtx) %*% (contrasts %*% coefficients - hypothesis) / r if (full$gModel$useFdstn) { df.den <- full$waldStat[4] pval <- 1-pf(Fstat,r,df.den) rslt <- c(Fstat, pval,r,df.den) names(rslt) <- c("F stat","p value","num df","den df") } else { pval <- 1-pchisq(Fstat,r,) rslt <- c(Fstat, pval,r) names(rslt) <- c("Chi2 stat","p value","df") } rslt } uModel <- function (..., version=F) { vrsn <- "20110928" if (version) return(vrsn) processTerm <- function (z, Term, TermName) { z$terms <- c(z$terms,TermName) nTerm <- length(z$terms) nPred <- length(z$preds) z$firstPred <- c(z$firstPred,nPred+1) z$lastPred <- c(z$lastPred,nPred+1) if (is.list(Term)) { p <- length(Term) if (is.null(names(Term))) { names(Term) <- paste("T",1:length(Term),sep="") } for (i in 1:p) { z <- processTerm (z, Term[[i]], paste(ifelse(p==1,""," "),TermName,".",names(Term)[i],sep="")) } } else if (is.matrix(Term)) { if (is.null(dimnames(Term)[[2]])) { predNms <- paste("V",1:(dim(Term)[2]),sep="") } else predNms <- dimnames(Term)[[2]] if (!is.null(z$X)) { if (dim(Term)[1]!=dim(z$X)[1]) stop("all terms must have same number of cases") } mode(Term) <- "numeric" z$X <- cbind(z$X,Term) z$preds <- c(z$preds,paste(ifelse(dim(Term)[2]==1,""," "),TermName,".",predNms,sep="")) } else { if (!is.null(z$X)) { if (length(Term)!=dim(z$X)[1]) stop("all terms must have same number of cases") } mode(Term) <- "numeric" z$X <- cbind(z$X,Term) z$preds <- c(z$preds,TermName) } z$lastPred[nTerm] <- dim(z$X)[2] z } cl <- match.call() L <- list(...) namesL <- as.character(unlist(match.call(expand.dots=F)$...)) hyperNames <- as.character(names(match.call(expand.dots=F)$...)) if (length(L)==1 & is.matrix(L[[1]])) { if (dim(L[[1]])[2]==1) { if(!is.null(dimnames(L[[1]]))) { nms <- dimnames(L[[1]])[[2]] if (!is.null(nms)) { namesL[1] <- nms L[[1]] <- as.vector(L[[1]]) } } } } if (!is.null(hyperNames)) namesL[hyperNames!=""] <- hyperNames[hyperNames!=""] names(L) <- namesL p <- length(L) z <- list(call=cl, terms=NULL,firstPred=NULL,lastPred=NULL,preds=NULL,X=NULL) for (i in 1:p) { z <- processTerm (z, L[[i]], namesL[i]) } z$preds <- paste(z$preds, ifelse(duplicated(z$preds),paste(".dup",cumsum(duplicated(z$preds)),sep=""),rep("",length(z$preds))),sep="") z$terms <- paste(z$terms, ifelse(duplicated(z$terms),paste(".dup",cumsum(duplicated(z$terms)),sep=""),rep("",length(z$terms))),sep="") dimnames(z$X)[[2]] <- z$preds class (z) <- "uModel" z } dummy <- function(x,subset=rep(T,length(x)),reference=sort(unique(x[!is.na(x)])),includeAll=F, version=F) { vrsn <- "20110928" if (version) return(vrsn) if (length(reference)==1) reference <- unique(c(reference,sort(unique(x[!is.na(x)])))) X <- NULL if (includeAll) X <- cbind(subset*as.integer(x==reference[1])) else X <- NULL for (r in reference[-1]) X <- cbind(X,subset*as.integer(x==r)) if (includeAll) dimnames(X) <- list(NULL,reference) else if (length(reference) > 2) { dimnames(X) <- list(NULL,paste(reference[-1]," vs ",reference[1],sep="")) } else dimnames(X) <- list(NULL,reference[-1]) attr(X,"transformation") <- "dummy" attr(X,"reference") <- reference attr(X,"original") <- x X } polynomial <- function(x,degree=2,center=mean(x,na.rm=T), version=F) { vrsn <- "20110928" if (version) return(vrsn) x <- x - center if (length(degree)==1) { degree <- round(degree) if (degree < 2) stop("inappropriate degree of polynomial") nms <- paste(c("Linear","Square","Cube"),ifelse1(center!=0,"(ctr)",""),sep="")[1:min(degree,3)] if (degree > 3) nms <- c(nms,paste(4:degree,"thPwr",ifelse1(center!=0,"(ctr)",""),sep="")) degree <- 1:degree } else nms <- paste("^",format(degree),ifelse1(center!=0,"(ctr)",""),sep="") rslt <- NULL for (d in degree) rslt <- cbind(rslt,x^d) dimnames(rslt) <- list(NULL,nms) attr(rslt,"transformation") <- "polynomial" attr(rslt,"degree") <- degree attr(rslt,"center") <- center attr(rslt,"original") <- x rslt } lspline <- function (x, knots, lbl=NULL, version=F) { vrsn <- "20140208" if (version) return(vrsn) n <- length(x) p <- length(knots) + 1 if (is.null(lbl)) { cl <- match.call() lbl <- as.character(cl[[2]]) } if(length(lbl) != p) { lbl <- lbl[1] lbl <- paste(lbl,":",c("min",format(knots)),sep="") } m <- matrix(x,n,p) - cbind(0,matrix(rep(knots,each=n),nrow=n)) mx <- cbind(matrix(rep(c(knots[1],diff(knots)),each=n),n),Inf) m[!is.na(m) & m > mx] <- mx[!is.na(m) & m > mx] u <- m < 0 u[1:n] <- F m[u] <- 0 dimnames(m) <- list(rep("",n),lbl) attr(m,"transformation") <- "lspline" attr(m,"knots") <- knots attr(m,"original") <- x m }