1; ## Use the EM algorithm to calculate the MLE for the row and column ## probabilities of a 2x3 contingency table. The row and column assignments ## are assumed independent, and only four of the six cells are observed: ## ## Z(1) = X(1,1) + X(1,2) ## Z(2) = X(2,1) ## Z(3) = X(2,2) + X(1,3) ## Z(4) = X(2,3) ## EM calculation of the MLE. function [p, q] = EM(Z) ## The full table. X = zeros(2,3); X(2,1) = Z(2); X(2,3) = Z(4); ## Starting values. p = [1/2, 1/2]; q = [1/3, 1/3, 1/3]; while (1) ## E step. X(1,1) = p(1)*q(1)*Z(1)/(p(1)*q(1) + p(1)*q(2)); X(1,2) = p(1)*q(2)*Z(1)/(p(1)*q(1) + p(1)*q(2)); X(2,2) = p(2)*q(2)*Z(3)/(p(2)*q(2) + p(1)*q(3)); X(1,3) = p(1)*q(3)*Z(3)/(p(2)*q(2) + p(1)*q(3)); ## M step. p_new = sum(X')/sum(Z); q_new = sum(X)/sum(Z); ## Test for copnvergence. if (max(max(abs(p_new-p))) + max(max(abs(q_new-q))) < 1e-6) break; endif p = p_new; q = q_new; ## Observed data log llikelihood. L = Z(1)*log(p(1)*q(1) + p(1)*q(2)) + Z(2)*log(p(2)*q(1)); L = L + Z(3)*log(p(2)*q(2) + p(1)*q(3)) + Z(4)*log(p(2)*q(3)); disp(L); endwhile endfunction ## Population structure. p_true = [0.8, 0.2]; q_true = [0.3, 0.4, 0.3]; n = 10000; ## Simulate the full cell counts. X = zeros(2,3); for i=1:n ## Generate the row assignment. if (rand(1,1) < p_true(1)) ri = 1; else ri = 2; endif ## Generate the column assignment. u = rand(1,1); if (u < q_true(1)) ci = 1; elseif (u < q_true(1)+q_true(2)) ci = 2; else ci = 3; endif X(ri,ci) = X(ri,ci) + 1; endfor ## Observed data. Z = zeros(4,1); Z(1) = X(1,1) + X(1,2); Z(2) = X(2,1); Z(3) = X(2,2) + X(1,3); Z(4) = X(2,3); [p_est, q_est] = EM(Z);