## Use simulation to assess the coverage probabilities of prediction ## intervals for independent data in a bivariate regression. ## Sample size. n = 20 ## Number of simulation replications. nrep = 1e4 ## Correlation between X1 and X2 r = 0.3 ## Keep track of the proportion of points covered by their PI. Q = 0 ## Q1 and Q2 are coverage probabilities for intervals that ignore ## the estimation error (Q1) or the observation error (Q2). Q1 = 0 Q2 = 0 for (k in 1:nrep) { ## The correlation between the two covariates is r. X1 = rnorm(n) X2 = r*X1 + sqrt(1-r^2)*rnorm(n) ## The design matrix. X = cbind(array(1,n), X1, X2) ## The (X'*X)^(-1) matrix. qrd = qr(X) R = qr.R(qrd) H = solve(t(R) %*% R) ## Simulate data from the model. beta = rnorm(3) Y = X %*% beta + rnorm(n) ## Fit the model. m = lm(Y ~ X+0) ## The parameter estimates. sigma2_hat = sum(m$residuals^2)/(n-3) beta_hat = m$coefficients ## Generate 100 design points to predict at. XP = rnorm(300) XP = array(XP, c(100,3)) XP[,1] = 1 ## Generate the responses to predict. YP = XP %*% beta + rnorm(100) ## The predicted values. P = XP %*% beta_hat ## Get the V_x* values (see notes for definition). V = NULL for (i in 1:100) { V[i] = XP[i,] %*% H %*% XP[i,] } ## The prediction standard deviation. W = sigma2_hat*(1 + V) W = sqrt(W) ## The PI. q = qt(0.975, n-3) LB = P - q*W UB = P + q*W Q = Q + mean( (YP > LB) * (YP < UB) ) ## What if we ignore estimation error? W = sigma2_hat W = sqrt(W) LB = P - q*W UB = P + q*W Q1 = Q1 + mean( (YP > LB) * (YP < UB) ) ## What if we ignore observation error? W = sigma2_hat*V W = sqrt(W) LB = P - q*W UB = P + q*W Q2 = Q2 + mean( (YP > LB) * (YP < UB) ) } Q = Q/nrep Q1 = Q1/nrep Q2 = Q2/nrep