## Generate a Scheffe confidence band for the mean response in a ## polynomial regression problem. ## Sample size. n = 100 ## Highest order polynomial term to include. p = 3 ## The X values for each case. x = seq(-1, 1, length.out=n) ## The design matrix. X = array(0, c(n, p+1)) for (j in 1:(p+1)) { X[,j] = x^(j-1) } ## The population regression coefficients. beta = 1 + runif(p+1) ## The outcome data. Y = X %*% beta + rnorm(n) ## The F-quantile in the Scheffe formula. QF = qf(0.95, p+1, n-p-1) ## Factorize the design matrix and get some other needed things. qq = qr(X) Q = qr.Q(qq) R = qr.R(qq) XtX = t(R) %*% R XtXi = solve(XtX) ## Fit the linear model. beta_hat = solve(R, t(Q) %*% Y) ## Fitted values, residuals, and estimated error variance. Yhat = X %*% beta_hat resid = Y - Q %*% (t(Q) %*% Y) sigma2_hat = sum(resid^2) / (n-p-1) K = sqrt(sigma2_hat)*sqrt((p+1)*QF) ## The Scheffe confidence bands (lower band is in column 1, upper band ## is in column 2). S = array(0, c(n,2)) ## Get the band coordinates for each case. for (i in 1:n) { V = X[i,] %*% XtXi %*% X[i,] S[i,1] = Yhat[i] - K*sqrt(V) S[i,2] = Yhat[i] + K*sqrt(V) } ## Print the number of x points in the design matrix for which the ## Scheffe band does not cover EY. Note that the Scheffe band covers ## all x points simultaneously with probability alpha. Therefore the ## proportion of time that this sum is greater than zero will ## generally be less than 0.05, since it doesn't consider when the ## band fails to cover at a non-design point. EY = X %*% beta print(sum((EY > S[,2]) | (EY < S[,1]))) ## Make a plot showing the data, the population mean function, and the ## Scheffe band. plot(x, Y, t='p') lines(x, S[,1], t='l', col='red') lines(x, S[,2], t='l', col='red') lines(x, EY, t='l', col='blue')