fid = gzfile("NHANES-600.csv.gz") M = read.csv(fid) ## ## Overall age distribution. ## hist(M$RIDAGEYR, xlab="Age") scan() ## ## Ovrall gender distribution. ## boxplot(M$RIDAGEYR ~ M$RIAGENDR, ylab="Age", names=c("Males", "Females")) scan() ## ## Gender distribution within age (1 yr resolution). ## Q = table(M$RIAGENDR, M$RIDAGEYR) barplot(Q, beside=T, space=c(0,2), col=c('orange', 'cyan'), xlab='Age', legend=c("Male", "Female")) scan() ## ## Gender distribution within age (5 yr resolution). ## A = floor(M$RIDAGEYR/5)*5 Q = table(M$RIAGENDR, A) barplot(Q, beside=T, space=c(0,2), col=c('orange', 'cyan'), xlab='Age', legend=c("Male", "Female")) scan() ## ## Number of positive MHQ items as a function of age. ## D = apply(M[,29:40]==1, 1, sum, na.rm=T) ii = which(!is.na(D)) D = D[ii] A = M$RIDAGEYR[ii] boxplot(D ~ A, col='orange', ylab='Number of positive items', xlab='Age') scan() ## ## Number of positive individuals for each MHQ item. ## D = apply(M[,29:40]==1, 2, mean, na.rm=T) barplot(D, cex.names=0.7, ylab='Proportion of positive individuals') scan() ## ## Plot each quantitative measure against age. ## for (k in 9:28) { plot(M$RIDAGE, M[,k], t='p', xlab='Age', ylab=names(M)[k]) m = loess(M[,k] ~ M$RIDAGE) V = cbind(m$x, m$fitted) ii = order(V[,1]) V = V[ii,] lines(V, col='red') scan() } ## ## Plot each quantitative measure against log BMI. ## LBMI = log(M$BMXBMI) for (k in 9:28) { plot(LBMI, M[,k], t='p', xlab='log BMI', ylab=names(M)[k]) m = loess(M[,k] ~ LBMI) V = cbind(m$x, m$fitted) ii = order(V[,1]) V = V[ii,] lines(V, col='red') scan() } ## ## Correlation matrix between every pair of quantitative measures. ## C = cor(M[,9:28], use="pairwise.complete.obs") heatmap(C, Rowv=NA, Colv=NA, symm=T, scale='none') scan() ## ## Z-scores of each quantitative measure against each questionaire item. ## D = array(0, c(20,12)) for (i in 9:28) { for (j in 29:40) { ii = which(!(is.na(M[,i]) | is.na(M[,j]))) A = M[ii,i] B = M[ii,j] s = sqrt(var(A)/length(A) + var(B)/length(B)) D[i-8,j-28] = (mean(A)-mean(B)) / s } } N = names(M) heatmap(abs(D), Rowv=NA, Colv=NA, labRow=N[9:28], labCol=N[29:40], scale='none') scan() ## ## Scatterplot matrix. ## I = c(6, 26, 27, 28) pairs(M[,I], labels=names(M)[I]) scan() ## ## Coplot ## coplot(M$LBXSIR ~ M$RIDAGE | log(M$BMXBMI), rows=1) scan() ii = which((M$MCQ010 != 9) & !is.nan(M$MCQ010)) coplot(M$BMXBMI[ii] ~ M$RIDAGE[ii] | factor(M$RIAGENDR[ii]) * factor(M$MCQ010[ii]), col = "red", bg = "pink", pch = 21, bar.bg = c(fac = "light blue"))