R commands for Chapter 3 1. Randomization test for equality of means (with one sided alternative hypothesis as in the Chickens & Phytate example). Option A : It produces a dotplot ## RANDOMIZATION TEST to compare groups A and B ## when the alternative is Ha:u(A)>u(B) A<-c(6.07,7.20,6.61,9.69, 9.45,8.95,8.72,6.07) B<-c(2.57,2.39,2.51,2.57,1.80,2.37,6.28,2.91) nrep<-10000 # number of regroupings ## NO MORE INPUT IS NECESSARY n1<-length(A) ## number of observations in A n2<-length(B) ## number of observations in B n<-n1+n2 ## total number of observations meanA<-mean(A) ; meanB<-mean(B) truedif<-meanA-meanB ## difference of the two means ## performs the randomization test alldata<-append(A,B) ## merges the two groups chips<-1:n ## creates n chips difmeans<-double(nrep) ## creates storage space for (i in 1:nrep){ ## commands will be executed 10000 times chipsgroup1<-sample(chips,n1,replace=FALSE) ## selects group 1 chipsgroup2<-chips[-chipsgroup1] ## rest goes to group 2 group1<-(alldata[chipsgroup1]) ## values in group 1 group2<-(alldata[chipsgroup2]) ## values in group 2 difmeans[i]<-mean(group1)-mean(group2) ## difference in means } ## assigns value 1 to the reg-roupings in which the ## difference of means is equal to or greater than the difference ## the A and B groups numgreater<-difmeans>= truedif ## for two-sided alternative hypothesis ## use abs(difmeans)>= abs(truedif) in previous line ## calculates the proportion or approximated p-value pvalue<-mean(numgreater) ; pvalue stripchart(difmeans, method='stack',pch=1,at=0,main=" ",ylim=c(-0.3,2)) arrows(truedif,-0.2,truedif,0,col="red",lwd=2) text(truedif,-0.25,c(truedif)) 2. Randomization test for equality of means (with one sided alternative hypothesis as in the Chickens & Phytate example). Option B : It produces a HISTOGRAM ## RANDOMIZATION TEST to compare groups A and B ## when the alternative is Ha:u(A)>u(B) A<-c(6.07,7.20,6.61,9.69, 9.45,8.95,8.72,6.07) B<-c(2.57,2.39,2.51,2.57,1.80,2.37,6.28,2.91) nrep<-10000 # number of regroupings ## NO MORE INPUT IS NECESSARY n1<-length(A) ## number of observations in A n2<-length(B) ## number of observations in B n<-n1+n2 ## total number of observations meanA<-mean(A) ; meanB<-mean(B) truedif<-meanA-meanB ## difference of the two means ## performs the randomization test alldata<-append(A,B) ## merges the two groups chips<-1:n ## creates n chips difmeans<-double(nrep) ## creates storage space for (i in 1:nrep){ ## commands will be executed 10000 times chipsgroup1<-sample(chips,n1,replace=FALSE) ## selects group 1 chipsgroup2<-chips[-chipsgroup1] ## rest goes to group 2 group1<-(alldata[chipsgroup1]) ## values in group 1 group2<-(alldata[chipsgroup2]) ## values in group 2 difmeans[i]<-mean(group1)-mean(group2) ## difference in means } ## assigns value 1 to the reg-roupings in which the ## difference of means is equal to or greater than the difference ## the A and B groups numgreater<-difmeans>= truedif ## for two-sided alternative hypothesis ## use abs(difmeans)>= abs(truedif) in previous line ## calculates the proportion or approximated p-value pvalue<-mean(numgreater) ; pvalue hist(difmeans, main=" ",xlab="differences of means") arrows(truedif,1000,truedif,10,col="red") text(truedif,1100,c(truedif)) 3. Randomization test for equality of means, case of a two sided alternative hypothesis. ## RANDOMIZATION TEST to compare groups A and B ## when the alternative is Ha:u(A) not equal u(B) A<-c(6.07,7.20,6.61,9.69, 9.45,8.95,8.72,6.07) B<-c(2.57,2.39,2.51,2.57,1.80,2.37,6.28,2.91) nrep<-10000 # number of regroupings ## NO MORE INPUT IS NECESSARY n1<-length(A) ## number of observations in A n2<-length(B) ## number of observations in B n<-n1+n2 ## total number of observations meanA<-mean(A) ; meanB<-mean(B) truedif<-meanA-meanB ## difference of the two means ## performs the randomization test alldata<-append(A,B) ## merges the two groups chips<-1:n ## creates n chips difmeans<-double(nrep) ## creates storage space for (i in 1:nrep){ ## commands will be executed 10000 times chipsgroup1<-sample(chips,n1,replace=FALSE) ## selects group 1 chipsgroup2<-chips[-chipsgroup1] ## rest goes to group 2 group1<-(alldata[chipsgroup1]) ## values in group 1 group2<-(alldata[chipsgroup2]) ## values in group 2 difmeans[i]<-mean(group1)-mean(group2) ## difference in means } ## assigns value 1 to the re-roupings in which the ## difference of means is equal to or greater than the difference ## the A and B groups numgreater<-abs(difmeans)>= abs(truedif) ## for two-sided alternative hypothesis ## calculates the proportion or approximated p-value pvalue<-mean(numgreater) ; pvalue stripchart(difmeans, method='stack',pch=1,at=0,main=" ",ylim=c(-0.3,2)) arrows(truedif,-0.2,truedif,0, col="red",lwd=2) arrows(-truedif,-0.2,-truedif,0, col="red",lwd=2) text(truedif,-0.25,c(truedif)) 4. Randomization test for paired data as in Darwin’s example, with a one sided alternative hypothesis ## RANDOMIZATION for paired data, to test ud=0 vs. ud>0, for d=A-B A<-c(23.5,12,21,22,19.125,21.5,22.125,20.375,18.25,21.625,23.25, 21,22.125,23,12) B<-c(17.375,20.375,20,20,18.375,18.625,18.625,15.25,16.5,18,16.25, 18,12.75,15.5,18) nrand<-10000 ## number of randomizations d<-A-B ## calculates differences n<-length(d) ## how many observations truemdif<-mean(d) ## mean of the true differences randmeansd<-double(10000) ## creates storage space for (i in 1:10000){ ## commands to be executed 10000 times flips<-rbinom(n,1,0.5) ## randomly decides where flips happen nflips<-2*flips-1 ## 1:original sign -1:flip newdif<-nflips*d ## differences after flipping newmeand<-mean(newdif) ## mean of the new differences randmeansd[i]<-newmeand } ## stores the mean differences numgreater<-randmeansd>= truemdif ## comparing with true dif pvalue<-mean(numgreater) ## calculates pvalue as proportion pvalue ## shows pvalue on the screen hist(randmeansd,main=" ",xlab="mean differences exchanging labels") arrows(truemdif,800,truemdif,10,col="red") ## puts arrow in plot rtruemdif=round(truemdif,4) ## rounding true mean difference text(truemdif,900,c(rtruemdif)) ## writes difference in plot text(max(randmeansd),400,'pvalue~') ## writes label for p-value text(max(randmeansd),300,c(pvalue)) ## writes p-value in plot 5. Randomization test for paired data as in Darwin’s example, with a two-sided alternative hypothesis ## RANDOMIZATION for paired data, to test ud=0 vs. ud not equal 0, for d=A-B A<-c(23.5,12,21,22,19.125,21.5,22.125,20.375,18.25,21.625,23.25, 21,22.125,23,12) B<-c(17.375,20.375,20,20,18.375,18.625,18.625,15.25,16.5,18,16.25, 18,12.75,15.5,18) nrand<-10000 ## number of randomizations d<-A-B ## calculates differences n<-length(d) ## how many observations truemdif<-mean(d) ## mean of the true differences randmeansd<-double(10000) ## creates storage space for (i in 1:10000){ ## commands to be executed 10000 times flips<-rbinom(n,1,0.5) ## randomly decides where flips happen nflips<-2*flips-1 ## 1:original sign -1:flip newdif<-nflips*d ## differences after flipping newmeand<-mean(newdif) ## mean of the new differences randmeansd[i]<-newmeand } ## stores the mean differences numgreater<-abs(randmeansd)>= abs(truemdif) ## comparing with true dif pvalue<-mean(numgreater) ## calculates pvalue as proportion pvalue ## shows pvalue on the screen hist(randmeansd,main=" ",xlab="mean differences exchanging labels") arrows(truemdif,800,truemdif,10,col="red") ## puts arrow in plot arrows(-truemdif,800,-truemdif,10,col="red") ## puts arrow in plot rtruemdif=round(truemdif,4) ## rounding true mean difference text(truemdif,900,c(rtruemdif)) ## writes difference in plot text(max(randmeansd),400,'pvalue~') ## writes label for p-value text(max(randmeansd),300,c(pvalue)) ## writes p-value in plot 6. Bootstrap percentile confidence intervals as in the rhododendron example Option A: output is a dotplot ## PERCENTILE BOOTSTRAP CONFIDENCE INTERVAL FOR THE MEAN ## INPUT : x<-c(9.70,10.40,8.90,10.70,9.85,10.30,9.00 ,10.00,10.45,8.65) nboot<-2000 ## number of bootstrap replicates confidence<-0.95 ## NO MORE INPUT IS NEEDED n<-length(x) sampm<-double(nboot) ## prepares storage space for (i in 1:nboot){ ## repeat 2000 times subsam<-sample(x,n,replace=TRUE) ## bootstrap sample sampm[i]<-mean(subsam) ## stores the sample mean } ## calculates the endpoints of the confidence interval low<-quantile(sampm,(1-confidence)/2) hi<-quantile(sampm,confidence+(1-confidence)/2) stripchart(sampm,method="stack", at=0.1,cex=1, pch=1,main=" ") ## marks the endpoints of the interval arrows(low, -0.5,low,0.1,col="red") arrows(hi, -0.5,hi,0.1,col="red") ## prints the interval low; hi ## prints mean and sd for sample and bootstrap replicates mean(x); sd(x) mean(sampm); sd(sampm) 7. Bootstrap percentile confidence intervals as in the rhododendron example Option B: output is a histogram ## PERCENTILE BOOTSTRAP CONFIDENCE INTERVAL FOR THE MEAN ## INPUT : x<-c(9.70,10.40,8.90,10.70,9.85,10.30,9.00 ,10.00,10.45,8.65) nboot<-2000 ## number of bootstrap replicates confidence<-0.95 ## NO MORE INPUT IS NEEDED n<-length(x) sampm<-double(nboot) ## prepares storage space for (i in 1:nboot){ ## repeat 2000 times subsam<-sample(x,n,replace=TRUE) ## bootstrap sample sampm[i]<-mean(subsam) ## stores the sample mean } ## calculates the endpoints of the confidence interval low<-quantile(sampm,(1-confidence)/2) hi<-quantile(sampm,confidence+(1-confidence)/2) hist(sampm, xlab= "bootstrap replicates ", main=" ") ## marks the endpoints of the interval arrows(low, 200,low,0,col="red") arrows(hi, 200,hi,0,col="red") ## prints the interval low; hi ## prints mean and sd for sample and bootstrap replicates mean(x); sd(x) mean(sampm); sd(sampm) 8. Reading onion data from example 3.4 in Figure 3.11 x<-c(0.032, 0.043, 0.023, 0.056, 0.040, 0.076, 0.026, 0.034, 0.033, 0.045, 0.046, 0.021, 0.056, 0.060, 0.037, 0.028, 0.045, 0.074, 0.044, 0.055, 0.035, 0.044, 0.027, 0.058, 0.033, 0.047, 0.035, 0.054, 0.075, 0.045, 0.052, 0.062 0.026, 0.039, 0.067, 0.071, 0.020, 0.042, 0.022, 0.046) 9. Bootstrapping the correlation coefficient as in the altitude and RBC example ## BOOTSTRAPPING THE CORRELATION COEFFICIENT ## INPUT x<-c(0,1840,2200,2200,5000,5200,5750,7400,8650,10740,12000, 12200,12300,14200,14800,14900,17500) y<-c(4.93,4.75,5.40,4.65,5.42,6.55,5.99,5.39,5.44,5.82,7.50,5.67, 6.31,7.05,6.46,6.66,7.37) nboot<-2000 ## number of bootstrap replicates confidence<-0.95 ## NO MORE INPUT IS NEEDED n<-length(x) # counts number of observations who<-seq(1,n,by=1) #creates a list labels 1:n rboot<-double(nboot) # creates storage space ## now we select random samples from the labels ## and identify the corresponding values of x and y for (i in 1:nboot){ subsam<-sample(who,n,replace=TRUE) xwho<-(x[subsam]) ywho<-(y[subsam]) rboot[i]<-cor(xwho,ywho) } low<-quantile(rboot,(1-confidence)/2) ## lower end of interval high<-quantile(rboot,confidence+(1-confidence)/2) ## upper end par(mfcol=c(1,2)) ## 2 plots in figure plot(x,y,pch=19) ## scatterplot hist(rboot, main= ' ',xlab= 'bootstrap replicates of r',xlim=c(0,1)) arrows(low,-100,low ,0,col="red") arrows(high,-100, high ,0,col="red") low;high ## prints the interval 10. Fisher’s exact test (Lady tasting tea story) teacups<-matrix(c(4,0,0,4),nr=2) fisher.test(teacups,alternative='greater')