## ## Some calculations with simple linear regression. ## ## Sample size. n = 100 ## Simulate some data. X = rnorm(n) coeff = c(1, -0.5) E = 0.5*rnorm(n) Y = coeff[1] + coeff[2]*X + E ## Make a simple scatterplot. plot(X, Y, pch='+') ## Calculate the least squares coefficients directly without linear algebra. beta_hat = cov(Y, X)/var(X) alpha_hat = mean(Y) - beta_hat*mean(X) coeff_est_1 = c(alpha_hat, beta_hat) ## Calculate the least square coefficients using linear algebra. D = cbind(array(1, n), X) qrd = qr(D) Q = qr.Q(qrd) R = qr.R(qrd) coeff_est_2 = solve(R, t(Q) %*% Y) ## Calculate the least squares coefficients using the built-in lm ## function. m = lm(Y ~ X) coeff_est_3 = m$coeff ## Check that the fitted regression function passes through the center ## of the data. ybar = mean(Y) xbar = mean(X) dc = ybar - coeff_est_1[1] - coeff_est_1[2]*xbar ## ## Some analysis of the residuals and fitted values. ## ## Three ways to get the fitted values. F1 = coeff_est_1[1] + coeff_est_1[2]*X F2 = m$fitted.values F3 = Q %*% (t(Q) %*% Y) ## Two ways to get the residuals. R1 = Y - F1 R2 = m$residuals ## Check some properties of the residuals. print(sprintf("The residuals sum to: %f", sum(R1))) print(sprintf(" = %f", sum(F1*R2)))