BNPgraph package: demo_experiments
This Matlab script performs posterior inference on the yeast protein interaction network.
For downloading the package and information on installation, visit the BNPgraph webpage.
Reference: F. Caron and E.B. Fox. Sparse graphs using exchangeable random measures. arXiv:1401.1137, 2014.
Author: François Caron, University of Oxford
Tested on Matlab R2014a.
Contents
General settings
clear all % Add paths addpath('./GGP/'); addpath('./utils/'); set(0,'DefaultAxesFontSize',14) % Set the seed rng('default')
Load data
% Download the mat file from the pajek website namefile = 'yeast.mat'; urlwrite('http://www.cise.ufl.edu/research/sparse/mat/Pajek/yeast.mat',namefile); % Load the data load(namefile); G = Problem.A; % Remove self-edges and nodes with no connection G = G-diag(diag(G)); ind =(sum(G,1) + sum(G,2)')>0; G = sparse(logical(G(ind, ind))); % Plot adjacency matrix figure spy(G) xlabel('Node id'); % Plot empirical degree distribution figure h2 = plot_degree(G); set(h2, 'markersize', 10, 'marker', 'o',... 'markeredgecolor', 'none', 'markerfacecolor', [1, .75, .75]) box off
Posterior inference
% Parameters of the model hyper_alpha =[0,0]; hyper_tau = [0,0]; hyper_sigma = [0, 0]; objprior = graphmodel('GGP', hyper_alpha, hyper_sigma, hyper_tau, 'simple'); % Parameters of the MCMC algorithm niter = 40000; nburn = 0; nadapt = niter/4; thin = 20; nchains = 3; store_w = false; verbose = false; % Run MCMC tic objmcmc = graphmcmc(objprior, niter, nburn, thin, nadapt, nchains, store_w); % Create a MCMC object objmcmc = graphmcmcsamples(objmcmc, G, verbose); % Run MCMC time = toc;
----------------------------------- MCMC chain 1/3 ----------------------------------- Start MCMC for GGP graphs Nb of nodes: 2284 - Nb of edges: 6646 Number of iterations: 40000 Estimated computation time: 0 hour(s) 5 minute(s) Estimated end of computation: 01-May-2015 13:52:10 ----------------------------------- ----------------------------------- End MCMC for GGP graphs Computation time: 0 hour(s) 2 minute(s) End of computation: 01-May-2015 13:49:10 ----------------------------------- ----------------------------------- MCMC chain 2/3 ----------------------------------- Start MCMC for GGP graphs Nb of nodes: 2284 - Nb of edges: 6646 Number of iterations: 40000 Estimated computation time: 0 hour(s) 2 minute(s) Estimated end of computation: 01-May-2015 13:50:52 ----------------------------------- ----------------------------------- End MCMC for GGP graphs Computation time: 0 hour(s) 2 minute(s) End of computation: 01-May-2015 13:50:51 ----------------------------------- ----------------------------------- MCMC chain 3/3 ----------------------------------- Start MCMC for GGP graphs Nb of nodes: 2284 - Nb of edges: 6646 Number of iterations: 40000 Estimated computation time: 0 hour(s) 2 minute(s) Estimated end of computation: 01-May-2015 13:52:50 ----------------------------------- ----------------------------------- End MCMC for GGP graphs Computation time: 0 hour(s) 2 minute(s) End of computation: 01-May-2015 13:52:30 -----------------------------------
Trace plots and posterior histograms
nsamples = length(objmcmc.samples(1).w_rem); [samples_all, estimates] = graphest(objmcmc, nsamples/2) ; % concatenate chains and returns estimates col = {'k', 'r', 'b'}; % Trace plots variables = {'alpha', 'sigma', 'tau', 'w_rem'}; names = {'\alpha', '\sigma', '\tau', 'w_*'}; for j=1:length(variables) figure for k=1:nchains plot(thin:thin:niter, objmcmc.samples(k).(variables{j}), 'color', col{k}); hold on end legend boxoff xlabel('MCMC iterations', 'fontsize', 16); ylabel(names{j}, 'fontsize', 16); box off end
% Posterior Histograms variables = {'alpha', 'sigma', 'tau', 'w_rem'}; names = {'\alpha', '\sigma', '\tau', 'w_*'}; for j=1:length(variables) figure hist(samples_all.(variables{j}), 30); legend boxoff ylabel('Number of samples', 'fontsize', 16); xlabel(names{j}, 'fontsize', 16); box off end
Assessing the sparsity of the network
% Probability that sigma>=0 proba_sparse = mean(samples_all.sigma>=0); fprintf('Probability of sparse graph = %.3f \n', proba_sparse); fprintf('99 %% posterior interval for sigma = [%.3f,%.3f] \n',... quantile(samples_all.sigma, [.005, .995]));
Probability of sparse graph = 0.280 99 % posterior interval for sigma = [-0.093,0.054]
Posterior predictive degree distribution
% Posterior predictive degree distribution nsamples_all = length(samples_all.alpha); ndraws = 2000; ind =floor(linspace(1,nsamples_all,ndraws)); freq = zeros(ndraws, 13); htemp=figure('Visible', 'off'); for ii=1:ndraws % Samples from the predictive if rem(ii, 200)==0 fprintf('Sample %d/%d from the posterior predictive\n', ii, ndraws); end Gsamp = graphrnd(graphmodel('GGP', samples_all.alpha(ind(ii)), samples_all.sigma(ind(ii)), samples_all.tau(ind(ii))), 1e-6); [h2, centerbins, freq(ii, :)] = plot_degree(Gsamp); end close(htemp); plot_variance = @(x,lower,upper,color) fill([x,x(end:-1:1)],[upper,lower(end:-1:1)],color, 'EdgeColor', color);%set(,'EdgeColor',color); quantile_freq = quantile(freq, [.025, .975]); figure plot(centerbins, quantile_freq, 'color', [.8, .8, 1], 'linewidth', 2.5); hold on ind2 = quantile_freq(1,:)>0; ha = plot_variance(centerbins(ind2), quantile_freq(1,ind2),quantile_freq(2,ind2), [.8, .8, 1] ); set(gca,'XScale','log') set(gca,'YScale','log') hold on hb = plot_degree(G); set(hb, 'markersize', 10, 'marker', 'o',... 'markeredgecolor', 'none', 'markerfacecolor', [1, .75, .75]) legend([ha, hb],{'95% posterior predictive', 'Data'}) legend boxoff xlim([.8, 1e3]) box off
Sample 200/2000 from the posterior predictive Sample 400/2000 from the posterior predictive Sample 600/2000 from the posterior predictive Sample 800/2000 from the posterior predictive Sample 1000/2000 from the posterior predictive Sample 1200/2000 from the posterior predictive Sample 1400/2000 from the posterior predictive Sample 1600/2000 from the posterior predictive Sample 1800/2000 from the posterior predictive Sample 2000/2000 from the posterior predictive