## Calculate and graph CI's and PI's for a univariate regression data set. ## The sample size. n = 50 ## The design matrix. X1 = rnorm(n) X = cbind(array(1,n), X1) ## Simulate data from the model. beta = rnorm(2) Y = X %*% beta + rnorm(n) Ynew = X %*% beta + rnorm(n) ## Fit the model. m = lm(Y ~ X+0) ## The parameter estimates. sigma2_hat = sum(m$residuals^2)/(n-2) beta_hat = m$coefficients Yhat = m$fitted.values ## The (X'*X)^(-1) matrix. qrd = qr(X) R = qr.R(qrd) H = solve(t(R) %*% R) q = qt(0.975, n-3) ## Plot CI and CI bands at -3 < X < 3. G = seq(-3, 3, 0.1) CI = array(0, c(length(G),5)) PI = array(0, c(length(G),5)) for (k in 1:length(G)) { g = G[k] x = c(1, g) V = x %*% H %*% x ## The point estimate. yhat = sum(x * beta_hat) ## The true response value. ytrue = sum(x * beta) w = q*sqrt(sigma2_hat*V) CI[k,] = c(g, yhat, ytrue, yhat-w, yhat+w) w = q*sqrt(sigma2_hat*(1+V)) PI[k,] = c(g, yhat, ytrue, yhat-w, yhat+w) } ymin = min(PI[,4]) ymax = max(PI[,5]) plot(CI[,1], CI[,2], ylim=c(ymin,ymax), t='l', ylab="Y", xlab="X") lines(CI[,1], CI[,3], ylim=c(ymin,ymax), t='l', ylab="Y", xlab="X", col='green') points(X[,2], Ynew, t='p') lines(CI[,1], CI[,4], t='l', col='blue') lines(CI[,1], CI[,5], t='l', col='blue') lines(PI[,1], PI[,4], t='l', col='red') lines(PI[,1], PI[,5], t='l', col='red') legend('topright', c("Fitted", "Population", "Data", "CI", "PI"), col=c('black', 'green', 'black', 'blue', 'red'), lty=c(1,1,NA,1,1), pch=c(NA,NA,1,NA,NA))