## Demonstrate the relationship between the partial R2 for a covariate ## and the significance level of the covariate. M = NULL ## Use bivariate regression with cor(X1,X2) = r. r = 0.4 for (n in c(20,50,100,200,500)) { sig_level = NULL p_r2 = NULL ## Do replications to get a more stable result. for (k in 1:100) { ## Generate the design matrix. X1 = rnorm(n) U = rnorm(n) X2 = r*X1 + sqrt(1-r^2)*U X = cbind(X1, X2) ## Simulate the response. Y = X1 + X2 + 4*rnorm(n) ## Calculate the full model R^2. m_full = lm(Y ~ X) R2_full = cor(Y, m_full$fitted.values)^2 ## Get the significance level. XX = cbind(array(1,n), X) qrd = qr(XX) R = qr.R(qrd) H = solve(t(R) %*% R) sigma2 = sum( (Y - m_full$fitted.values)^2 ) / (n-3) se = sqrt(sigma2 * H[2,2]) sig_level[k] = m_full$coeff[2] / se ## Calculate the submodel R^2 for X2 only. m_2 = lm(Y ~ X2) R2_2 = cor(Y, m_2$fitted.values)^2 ## The significance level and partial R^2. p_r2[k] = (R2_full-R2_2)/(1-R2_2) } M = rbind(M, c(n, mean(sig_level), mean(p_r2))) }