## Use simulation to study the sampling properties of the estimated ## regression coefficients, and of the MSE. ## Number of independent cases. n = 30 ## Number of covariates. p = 3 ## Standard deviation of the errors. sig = 2 B = array(0, c(1000,p+1)) MSE = array(0,1000) ## Simulate a design matrix. X = rnorm(n*(p+1)) X = array(X, c(n,p+1)) X[,1] = 1 X[,3] = X[,2] + 1.0*rnorm(n) ## Population covariate slope vector. beta = c(0, -1, 2, 0) ## True outcome vector. EY = X %*% beta ## Simulation replications. for (i in 1:1000) { ## Observed outcome vector. #Y = EY + sig*rnorm(n) ## Gaussian errors Y = EY + sig*rt(n, 3)/1.74 ## t errors with unit variance m = lm(Y ~ 0+X) s = summary(m) B[i,] = m$coeff MSE[i] = s$sigma^2 } ## Now look at the mean and variance print(c(mean(B), sd(B))) print(c(mean(MSE), sd(MSE))) ## Make a histogram and a quantile plot. hist(B) qqnorm(B)