## Use shrinkage to improve the estimation of cluster means. ## ## The mean of the i^th cluster is estimated as: ## ## Xbar_i - f*(Xbar_i - Xbar) ## ## Where f = sigma_w^2 / (sigma_w^2 + n*sigma_u^2) ## ## sigma_w^2 is the average within-cluster variance ## sigma_u^2 is the variance across the cluster means ## The number of clusters. nclust = 100 ## Number of observations within each cluster. n = 10 nrep = 500 ## The standard deviation for the individual variance within each ## cluster. The variance of the cluster means is always one. sigma_individual = 1 + 3*rexp(nclust) ## MSE's for the two estimators. MSE1 = 0 MSE0 = 0 ## The bias for estimating the value of the lowest cluster, using the ## two estimators. B1 = 0 B0 = 0 ## The probability that the lowest cluster based on the data is truly ## one of the lowest five clusters. N1 = 0 N0 = 0 for (k in 1:nrep) { ## Cluster centers. Z1 = rnorm(nclust) Z = array(Z1, c(nclust,n)) ## The individual variance. V = array(sigma_individual, c(nclust,n)) X = rnorm(nclust*n) X = V*array(X, c(nclust,n)) ## The observed data. X = X + Z ## The cluster sample means and sample variances. Xbar = apply(X, 1, mean) Xvar = apply(X, 1, var) ## The grand mean and variance. Tmean = mean(array(X)) Tvar = var(array(X)) ## The shrinkage factors. vb = mean(Xvar) f = vb / (vb + n*(Tvar-vb)) ## The estimated scores. S = Xbar - f*(Xbar - Tmean) ## Update the MSE for the simple estimate. MSE1 = MSE1 + sum( (S-Z1)^2 ) ## Update the MSE for the shrunken estimate. MSE0 = MSE0 + sum( (Xbar-Z1)^2 ) ## The actual worst cluster. ii = order(Z1)[1] ## The worst cluster by the simple estimate. i0 = order(Xbar)[1] ## The worst cluster by the shrunken estimate. i1 = order(S)[1] ## Update the bias for the simple estimate. B0 = B0 + Xbar[i0] - Z1[ii] ## Update the bias for the shrunken estimate. B1 = B1 + S[i1] - Z1[ii] ## Check whether the worst estimated mean corresponds to one of ## the worst 5 clusters. R = rank(Z1) if (R[i0] > n-5) { N0 = N0 + 1 } if (R[i1] > n-5) { N1 = N1 + 1 } } MSE1 = MSE1/nrep MSE0 = MSE0/nrep B0 = B0/nrep B1 = B1/nrep N0 = N0/nrep N1 = N1/nrep