############ # # This file contains the code demonstrated in class. # # 1) It assumes you have a function descrip() that works appropriately on vectors, # including properly handling different storage modes (consider storage.mode() or # is.numeric(), is.character(), is.logical(), is.factor()): # a) vectors of characters (should it return NULL or count everything as missing?) # b) vectors of booleans (recall F=0, T=1 when coerced to numeric) # c) vectors of factors (these are coded as integers with labels applied when printing, # but what should you return?) # d) vectors of all missing data # # 2) I named the intermediate functions strdescr2(), strdescr3(), etc. You might want to use # more appropriate mnemonics if you take this general approach. Note that I did not # appropriately handle error in which, for instance, strdescr3() was called with # x a non-matrix # # 3) The handling of the default value for strata is far from optimal. You might consider # making the default value NULL in strdescr(...,strata) and using is.null() to generate the # appropriate defaults in each of strdescr2() strdescr3() strdescr3a() # # 4) I put no error checks for the length of strata. You should handle it appropriately. # # 5) Rather than using the for() loops in strdescr3() and strdescr3a(), you could consider # using apply() and lapply(), respectively. # # 6) In strdescr(), you need to think about how you put variable names on your table when # a) the user supplies combinations of vectors and matrices or lists (data frames) # b) the user supplies only a single vector # c) the user supplies a matrix with no column names or a data frame with no names # # 7) When calling strdescr() you will always have to supply the argument for strata in # order to distinguish it from one of the variables you want descriptives for (so # you might call strdescr(psadta[-9],M,strata=psadta$inrem)) # # 8) Just to reiterate two points; # a) you need to carefully consider how your descrip() function acts with erroneous input # b) getting the row names in your output right will take some work # ############ ############ # Stratified statistics for a numeric vector: alternative versions for names ############ strdescr2 <- function (x,strata=rep(1,length(x))) { s <- sort(unique(strata)) rslt <- rbind(All=descrip(x)) if (length(s)>1){ for (i in s) rslt <- rbind(rslt, format(i)=descrip(x[strata==i])) } rslt } strdescr2 <- function (x,strata=rep(1,length(x))) { s <- sort(unique(strata)) rslt <- descrip(x) if (length(s)>1){ for (i in s) rslt <- rbind(rslt, descrip(x[strata==i])) } dimnames(rslt)[[1]]<- c("All",format(s)) rslt } ############ # Stratified statistics for a numeric matrix ############ strdescr3 <- function (x,strata) { s <- sort(unique(strata)) if (is.matrix(x)) { if (missing(strata)) strata <- rep(1,dim(x)[1]) rslt <- NULL for (j in 1:dim(x)[2]) rslt <- rbind(rslt,strdescr2(x[,j],strata)) } rownames(rslt) <- paste(rep(colnames(x),each=length(s)+1), rownames(rslt)) rslt } ############ # Stratified statistics for a list of numeric vectors ############ strdescr3a <- function (x,strata) { s <- sort(unique(strata)) if (is.list(x)) { if (missing(strata)) strata <- rep(1,length(x[[1]])) rslt <- NULL for (j in 1:length(x)) rslt <- rbind(rslt,strdescr2(x[[j]],strata)) } rownames(rslt) <- paste(rep(names(x),each=length(s)+1), rownames(rslt)) rslt } ############ # Stratified statistics for an arbitrary combination of vectors, matrices, lists ############ strdescr <- function (...,strata) { L <- list(...) names(L) <- as.vector(match.call(expand.dots=F)$...) if (missing(strata)) { if (is.vector(L[[1]])) strata=rep(1,length(L[[1]])) else if (is.matrix(L[[1]])) strata=rep(1,dim(L[[1]])[1]) else strata=rep(1,length(L[[1]][[1]])) } # And now process L by making calls to strdescr2(), strdescr2(), strdescr3a() rslt }