get.CI = function(rhat, n) { logterm = 0.5*log(1+rhat)-0.5*log(1-rhat) CL = logterm-2/sqrt(n-3) CU = logterm+2/sqrt(n-3) LB = (exp(2*CL)-1)/(exp(2*CL)+1) UB = (exp(2*CU)-1)/(exp(2*CU)+1) return(c(LB,UB)) } T = read.table(file="college2000.txt", head=TRUE, row.name=1) ## Calculate the mean acceptance ratio for type=Private mean(T$Accept.Ratio[T$Type==1]) ## Calculate the mean acceptance ratio for type=Public mean(T$Accept.Ratio[T$Type==2]) ## Calculate the sample correlation coef. between Grad.rate ## and S.F.Ratio plot(T$Grad.rate, T$S.F.Ratio, type="p", pch=1, xlab="Graduation Rate", ylab="Students/Faculty") rhat=cor(T$Grad.rate, T$S.F.Ratio) n=length(T$Grad.rate) CI=get.CI(rhat,n) ## Approximate 95% CI for r print(CI) ## Do this separately for type=Public, type=Private ## Make Scatterplots par(mfrow=c(1,2)) plot(T$Grad.rate[T$Type==1], T$S.F.Ratio[T$Type==1], type="p", pch=1, xlab="Graduation Rate", ylab="Students/Faculty", main="Private") plot(T$Grad.rate[T$Type==2], T$S.F.Ratio[T$Type==2], type="p", pch=1, xlab="Graduation Rate", ylab="Students/Faculty", main="Public") ## Private rhat=cor(T$Grad.rate[T$Type==1], T$S.F.Ratio[T$Type==1]) n=length(T$Grad.rate[T$Type==1]) CI=get.CI(rhat,n) ## Private Approximate 95% CI for r print(CI) ## Public rhat=cor(T$Grad.rate[T$Type==2], T$S.F.Ratio[T$Type==2]) n=length(T$Grad.rate[T$Type==2]) CI=get.CI(rhat,n) ## Public Approximate 95% CI for r print(CI) G=read.table("genes.txt") ## Get rows for tumer/non-tumor classes c1=which(G[,2001] == 1) c2=which(G[,2001] == 2) ## Get Sample Sizes n1=length(c1) n2=length(c2) ## Make a table with the class variable removed GG=G[,-2001] ## Get Sample Variances var1=apply(GG[c1,],2,var) var2=apply(GG[c2,],2,var) ## Get Sample Means mean1=apply(GG[c1,],2,mean) mean2=apply(GG[c2,],2,mean) ## Compute the t-statistics t.den=sqrt(var1/n1+var2/n2) t.num=mean1-mean2 t.stats=t.num/t.den ## Get the variable ordering based on t-stat magnitude v.order=order(abs(t.stats),decreasing=TRUE) Gsmall=G[,c(2001,v.order[1:3])] ## Label the columns colnames(Gsmall) = c("Class", "G1", "G2", "G3") ## Get Scatterplots pairs(Gsmall) ## 2-most-significant genes scatterplot colors=Gsmall$Class points=Gsmall$Class plot(Gsmall$G1, Gsmall$G2, type="p", col=colors, pch=points, xlab="Gene 1", ylab="Gene 2") legend(x=10,y=500,legend=c("Tumor", "non-Tumor"), col=c(1,2), pch=c(1,2)) ## 2-most-significant genes scatterplot -log transform colors=Gsmall$Class points=Gsmall$Class plot(log(Gsmall$G1), log(Gsmall$G2), type="p", col=colors, pch=points, xlab="Gene 1", ylab="Gene 2") legend(x=2.5,y=6,legend=c("Tumor", "non-Tumor"), col=c(1,2), pch=c(1,2)) ## Weather Plotting Example ## Read in data Z=read.table("AnnArbor.txt",sep=",", row.names=NULL, header=T) ## Get min, mean, max temps for first 365 days min.temp = Z[1:365,4] mean.temp = Z[1:365,3] max.temp = Z[1:365,2] ## Compute the y-min and y-max y.min = min(min.temp) y.max = max(max.temp) plot(1:365, min.temp, type="l",lwd=2, col="blue", ylim=c(y.min,y.max), xlab="Day", ylab="Temperature F", main="Ann Arbor Temperature 2001", axes=FALSE) lines(1:365, mean.temp, type="l", col="black") lines(1:365, max.temp, type="l",lwd=2, col="red") axis(at=seq(-20, 100, by=10), side=2, lwd=2, pos=1) axis(at=c(1,seq(50,300,by=50),365), side=1,lwd=2) legend(x=150,y=20,legend=c("Max", "Mean", "Min"), col=c("red","black", "blue"), lty=c(1,1,1), lwd=c(2,1,2), bty="n") ## Get min, mean, max temps for the third year inds = 731:1095 min.temp = Z[inds,4] mean.temp = Z[inds,3] max.temp = Z[inds,2] ## Compute the y-min and y-max y.min = min(min.temp) y.max = max(max.temp) plot(1:365, min.temp, type="l",lwd=2, col="blue", ylim=c(y.min,y.max), xlab="Day", ylab="Temperature F", main="Ann Arbor Temperature Year 3", axes=FALSE) lines(1:365, mean.temp, type="l", col="black") lines(1:365, max.temp, type="l",lwd=2, col="red") axis(at=seq(-20, 100, by=10), side=2,lwd=2, pos=1) axis(at=c(1,seq(50,300,by=50),365), side=1,lwd=2) legend(x=150,y=20,legend=c("Max", "Mean", "Min"), col=c("red","black", "blue"), lty=c(1,1,1), lwd=c(2,1,2), bty="n") ## Save the plot as a pdf: ## make sure any open plots are closed before running this: pdf(file="aatemp.pdf", width=10, height=5) plot(1:365, min.temp, type="l",lwd=2, col="blue", ylim=c(y.min,y.max), xlab="Day", ylab="Temperature F", main="Ann Arbor Temperature Year 3", axes=FALSE) lines(1:365, mean.temp, type="l", col="black") lines(1:365, max.temp, type="l",lwd=2, col="red") axis(at=seq(-20, 100, by=10), side=2,lwd=2, pos=1) axis(at=c(1,seq(50,300,by=50),365), side=1,lwd=2) legend(x=150,y=20,legend=c("Max", "Mean", "Min"), col=c("red","black", "blue"), lty=c(1,1,1), lwd=c(2,1,2), bty="n") dev.off()