# Eric Minikel # 2013-09-08 # Simulation of power in BLI studies library(survival) options(stringsAsFactors=FALSE) # create wrapper function for simple log-rank test where no observations are censored # http://www.cureffi.org/2012/12/03/log-rank-test-power-depends-on-variance/ simplesurvtest = function(vec1,vec2) { # vec1, vec2: lists of survival times of mice in control and treatment groups days = c(vec1,vec2) # make a single vector with times for both mice group = c(rep(0,length(vec1)),rep(1,length(vec2))) # make a matching vector indicating control/treatment status status = rep(1,length(vec1)+length(vec2)) # "status" = 1 = "had an event" for all mice mice = data.frame(days,status,group) # convert to data frame return (survdiff(Surv(days,status==1)~group,data = mice)) # return log-rank test results } # rounds a value up to the next time it would be observed, e.g. if monitoring every 7 days, gliosis at day 50 is not noticed until day 56 roundup = function(rawvalue, interval) { return ( (rawvalue %/% interval + 1)*interval ) # %/% is integer division, which rounds down; +1 rounds up instead } # Wrapper to print a double space-delimited list of variable names and values (used in plotting to show conditions used in simulation) print_vars = function(namedvector) { nam = names(namedvector) val = namedvector return (paste(paste(nam,val,sep=': '),collapse=' ')) } # Wrap p values for various statistical tests in functions for ease of calling p_t = function(control_onset,treated_onset) { t_test_result = t.test(control_onset,treated_onset,alternative="two.sided",paired=FALSE) return (t_test_result$p.value) } p_lrt = function(control_onset,treated_onset) { lrt_test_result = simplesurvtest(control_onset,treated_onset) # call wrapper function for simple survival test lrt_p_value = 1 - pchisq(lrt_test_result$chisq, df=1) # obtain p value from chisq statistic by taking 1 - CDF(chi at 1 degree of freedom) return (lrt_p_value) } p_ks = function(control_onset,treated_onset) { ks_test_result = ks.test(control_onset, treated_onset) return(ks_test_result$p.value) } # function to simulate and calculate power empirically empirical_power = function (n_iter=1000, alpha=.05, mean_onset_control, sd_onset_control, effect_size, n_per_group, obs_freq, statistical_test) { empirical_power_result = 0.0 # choose a test if (statistical_test == "t") { p_func = p_t } else if (statistical_test == "lrt") { p_func = p_lrt } else if (statistical_test == "ks") { p_func = p_ks } # conduct simulation for (iter in 1:n_iter) { this_iter_result = 0 control_onset = rnorm(n=n_per_group, m=mean_onset_control, s=sd_onset_control) treated_onset = rnorm(n=n_per_group, m=mean_onset_control*(1+effect_size), s=sd_onset_control*(1+effect_size)) # detect cases where rounding makes data constant, which will otherwise throw errors in statistical test functions if ( length(unique(roundup(control_onset,obs_freq))) == 1 & length(unique(roundup(treated_onset,obs_freq))) == 1 ) { # if control == treated then the correct answer is NO difference if (roundup(control_onset,obs_freq)[1] == roundup(treated_onset,obs_freq)[1]) { this_iter_result = 0 } else { # if control <> treated then the correct answer is YES there is a difference this_iter_result = 1 } } else { # normal cases where data are not constant this_iter_result = (1 * (p_func(roundup(control_onset,obs_freq),roundup(treated_onset,obs_freq)) < alpha)) } empirical_power_result = empirical_power_result + this_iter_result * 1/n_iter } # return power return (empirical_power_result) } # optional: set seed for reproducibility set.seed(12345) # get power curves for study of FFI mice at MRI # fixed vars obs_freq = 7 statistical_test = "t" alpha = .05 mean_onset_control = 365 n_per_group = 15 # iterable vars effect_sizes = c(.1,.3,.44,.97) sd_onset_control_vals = seq(10,150,10) results = data.frame(sd_onset_control = numeric(), effect_size=numeric(), pwr=numeric()) for (effect_size in effect_sizes) { for (sd_onset_control in sd_onset_control_vals) { pwr = empirical_power(n_iter=1000, alpha=alpha, mean_onset_control=mean_onset_control, sd_onset_control=sd_onset_control, effect_size=effect_size, n_per_group=n_per_group, obs_freq=obs_freq, statistical_test=statistical_test) results = rbind(results, c(sd_onset_control, effect_size, pwr)) } } colnames(results) = c("sd_onset_control","effect_size","pwr") results # use the cbind()[1,] trick to create a named vector of all the fixed simulation parameters conditions = cbind(alpha,mean_onset_control,n_per_group,obs_freq,statistical_test)[1,] # plot power curves plot(NA,NA,xlim=range(sd_onset_control_vals),ylim=c(0,1), xlab='Standard deviation (days) of onset in control group', ylab='Power', main='Power curves for FFI mice', sub=print_vars(conditions), cex.sub = .8 ) for (effect_size_val in effect_sizes) { r.sub = subset(results, effect_size==effect_size_val) points(r.sub$sd_onset_control, r.sub$pwr, type='l') } legend('bottomleft',legend=rev(c("10%","30%","44%","97%")),pch='-',title='Delay in onset')