library(splines) ## Plot the CP statistic as a function of the number of covariates ## included in a working model. ## Calculate the CP statistic for reponse data Y, working design ## matrix X, and "large" design matrix XL. get_cp = function(Y, XW, XL) { n = dim(XW)[1] p = 1+dim(XW)[2] M = lm(Y ~ XW) S = summary(M) sig2 = S$sigma^2 M1 = lm(Y ~ XL) S1 = summary(M1) sig2star = S1$sigma^2 return((n-p-1)*(sig2-sig2star)/sig2star + p + 1) } ## Sample size. n = 100 ## Grid on which functions are defined. G = seq(0, 1, length.out=n) ## Design matrix containing EY. XT = bs(G, df=8, intercept=TRUE) ## Large design matrix thought to contain EY. XL = bs(G, df=15, intercept=TRUE) ## True coefficients (w.r.t. X2). bhat = rnorm(dim(XT)[2]) ## True mean vector. EY = XT %*% bhat ## Replications. plot.new() mx = 0 for (k in 1:10) { ## Observed response data. Y = EY + rnorm(n) ## Consider a sequence of working design matrices. CP = NULL for (df in seq(4,20)) { ## A working design matrix. XW = bs(G, df=df, intercept=TRUE) cp = get_cp(Y, XW, XL) CP = rbind(CP, c(df,cp)) } mx = max(mx, CP[,2]) if (k==1) { plot(CP[,1], CP[,2], 'l', xlim=c(4,20), ylim=c(0,mx)) } else { lines(CP[,1], CP[,2], 'l', xlim=c(4,20), ylim=c(0,mx)) } }