## Use simulation to check the coverage probabilities of the usual ## confidence interval for a regression coefficient. ## Number of cases n = 20 ## Number of covariates p = 2 ## Generate a design matrix X = rnorm(n*(p+1)) X = array(X, c(n,p+1)) X[,1] = 1 ## Some things we'll need below. H = solve(t(X) %*% X) V = H[2,2] ## Error standard deviation. sigma = 1 ## Number of simulation replications. nrep = 10000 ## Loop over population values for beta_1. for (b in seq(0, 1, 0.1)) { ## Generate the population regression vector. beta = rep(0, p+1) beta[2] = b ## Storage for all the estimates of beta_1. B = array(0, nrep) ## The mean response. EY = X %*% beta ## Simulation replications. for (k in 1:nrep) { ## Generate response data from Y|X. Y = EY + sigma*rnorm(n) ## Fit the model. m = lm(Y ~ 0+X) s = summary(m) beta_hat = m$coeff mse = s$sigma^2 rmse = sqrt(mse) ## The standard error for beta1_hat. SE = rmse * sqrt(V) ## The multiplier for the 95% CI. f = qt(0.975, n-p-1) ## The lower and upper limit of the CI. LB = beta_hat[2] - f*SE UB = beta_hat[2] + f*SE ## Get the indicator of coverage for this sample. B[k] = 1*((beta[2] > LB) & (beta[2] < UB)) } ## Print the true beta value and coverage probability. print(c(b, mean(B))) }