<!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta charset="utf-8" /> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <meta name="generator" content="pandoc" /> <title>MASH v FLASH introduction</title> <script src="site_libs/jquery-1.11.3/jquery.min.js"></script> <meta name="viewport" content="width=device-width, initial-scale=1" /> <link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" /> <script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script> <script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script> <link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" /> <script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script> <script src="site_libs/navigation-1.1/tabsets.js"></script> <script src="site_libs/navigation-1.1/codefolding.js"></script> <link href="site_libs/highlightjs-9.12.0/textmate.css" rel="stylesheet" /> <script src="site_libs/highlightjs-9.12.0/highlight.js"></script> <link href="site_libs/font-awesome-4.5.0/css/font-awesome.min.css" rel="stylesheet" /> <style type="text/css">code{white-space: pre;}</style> <style type="text/css"> pre:not([class]) { background-color: white; } </style> <script type="text/javascript"> if (window.hljs) { hljs.configure({languages: []}); hljs.initHighlightingOnLoad(); if (document.readyState && document.readyState === "complete") { window.setTimeout(function() { hljs.initHighlighting(); }, 0); } } </script> <style type="text/css"> h1 { font-size: 34px; } h1.title { font-size: 38px; } h2 { font-size: 30px; } h3 { font-size: 24px; } h4 { font-size: 18px; } h5 { font-size: 16px; } h6 { font-size: 12px; } .table th:not([align]) { text-align: left; } </style> </head> <body> <style type = "text/css"> .main-container { max-width: 940px; margin-left: auto; margin-right: auto; } code { color: inherit; background-color: rgba(0, 0, 0, 0.04); } img { max-width:100%; height: auto; } .tabbed-pane { padding-top: 12px; } button.code-folding-btn:focus { outline: none; } </style> <style type="text/css"> /* padding for bootstrap navbar */ body { padding-top: 51px; padding-bottom: 40px; } /* offset scroll position for anchor links (for fixed navbar) */ .section h1 { padding-top: 56px; margin-top: -56px; } .section h2 { padding-top: 56px; margin-top: -56px; } .section h3 { padding-top: 56px; margin-top: -56px; } .section h4 { padding-top: 56px; margin-top: -56px; } .section h5 { padding-top: 56px; margin-top: -56px; } .section h6 { padding-top: 56px; margin-top: -56px; } </style> <script> // manage active state of menu based on current page $(document).ready(function () { // active menu anchor href = window.location.pathname href = href.substr(href.lastIndexOf('/') + 1) if (href === "") href = "index.html"; var menuAnchor = $('a[href="' + href + '"]'); // mark it active menuAnchor.parent().addClass('active'); // if it's got a parent navbar menu mark it active as well menuAnchor.closest('li.dropdown').addClass('active'); }); </script> <div class="container-fluid main-container"> <!-- tabsets --> <script> $(document).ready(function () { window.buildTabsets("TOC"); }); </script> <!-- code folding --> <style type="text/css"> .code-folding-btn { margin-bottom: 4px; } </style> <script> $(document).ready(function () { window.initializeCodeFolding("show" === "show"); }); </script> <script> $(document).ready(function () { // move toc-ignore selectors from section div to header $('div.section.toc-ignore') .removeClass('toc-ignore') .children('h1,h2,h3,h4,h5').addClass('toc-ignore'); // establish options var options = { selectors: "h1,h2,h3", theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 }; options.showAndHide = true; options.smoothScroll = true; // tocify var toc = $("#TOC").tocify(options).data("toc-tocify"); }); </script> <style type="text/css"> #TOC { margin: 25px 0px 20px 0px; } @media (max-width: 768px) { #TOC { position: relative; width: 100%; } } .toc-content { padding-left: 30px; padding-right: 40px; } div.main-container { max-width: 1200px; } div.tocify { width: 20%; max-width: 260px; max-height: 85%; } @media (min-width: 768px) and (max-width: 991px) { div.tocify { width: 25%; } } @media (max-width: 767px) { div.tocify { width: 100%; max-width: none; } } .tocify ul, .tocify li { line-height: 20px; } .tocify-subheader .tocify-item { font-size: 0.90em; padding-left: 25px; text-indent: 0; } .tocify .list-group-item { border-radius: 0px; } </style> <!-- setup 3col/9col grid for toc_float and main content --> <div class="row-fluid"> <div class="col-xs-12 col-sm-4 col-md-3"> <div id="TOC" class="tocify"> </div> </div> <div class="toc-content col-xs-12 col-sm-8 col-md-9"> <div class="navbar navbar-default navbar-fixed-top" role="navigation"> <div class="container"> <div class="navbar-header"> <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar"> <span class="icon-bar"></span> <span class="icon-bar"></span> <span class="icon-bar"></span> </button> <a class="navbar-brand" href="index.html">MASHvFLASH</a> </div> <div id="navbar" class="navbar-collapse collapse"> <ul class="nav navbar-nav"> <li> <a href="index.html">Home</a> </li> <li> <a href="about.html">About</a> </li> <li> <a href="license.html">License</a> </li> </ul> <ul class="nav navbar-nav navbar-right"> <li> <a href="https://github.com/jdblischak/workflowr"> <span class="fa fa-github"></span> </a> </li> </ul> </div><!--/.nav-collapse --> </div><!--/.container --> </div><!--/.navbar --> <!-- Add a small amount of space between sections. --> <style type="text/css"> div.section { padding-top: 12px; } </style> <div class="fluid-row" id="header"> <div class="btn-group pull-right"> <button type="button" class="btn btn-default btn-xs dropdown-toggle" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false"><span>Code</span> <span class="caret"></span></button> <ul class="dropdown-menu" style="min-width: 50px;"> <li><a id="rmd-show-all-code" href="#">Show All Code</a></li> <li><a id="rmd-hide-all-code" href="#">Hide All Code</a></li> </ul> </div> <h1 class="title toc-ignore">MASH v FLASH introduction</h1> </div> <p><strong>Last updated:</strong> 2018-06-21</p> <strong>workflowr checks:</strong> <small>(Click a bullet for more information)</small> <ul> <li> <p><details> <summary> <strong style="color:blue;">✔</strong> <strong>R Markdown file:</strong> up-to-date </summary></p> <p>Great! Since the R Markdown file has been committed to the Git repository, you know the exact version of the code that produced these results.</p> </details> </li> <li> <p><details> <summary> <strong style="color:blue;">✔</strong> <strong>Environment:</strong> empty </summary></p> <p>Great job! The global environment was empty. Objects defined in the global environment can affect the analysis in your R Markdown file in unknown ways. For reproduciblity it’s best to always run the code in an empty environment.</p> </details> </li> <li> <p><details> <summary> <strong style="color:blue;">✔</strong> <strong>Seed:</strong> <code>set.seed(20180609)</code> </summary></p> <p>The command <code>set.seed(20180609)</code> was run prior to running the code in the R Markdown file. Setting a seed ensures that any results that rely on randomness, e.g. subsampling or permutations, are reproducible.</p> </details> </li> <li> <p><details> <summary> <strong style="color:blue;">✔</strong> <strong>Session information:</strong> recorded </summary></p> <p>Great job! Recording the operating system, R version, and package versions is critical for reproducibility.</p> </details> </li> <li> <p><details> <summary> <strong style="color:blue;">✔</strong> <strong>Repository version:</strong> <a href="https://github.com/willwerscheid/MASHvFLASH/tree/56b00c1ca80dd7655697dff44146bf397a829e9f" target="_blank">56b00c1</a> </summary></p> Great! You are using Git for version control. Tracking code development and connecting the code version to the results is critical for reproducibility. The version displayed above was the version of the Git repository at the time these results were generated. <br><br> Note that you need to be careful to ensure that all relevant files for the analysis have been committed to Git prior to generating the results (you can use <code>wflow_publish</code> or <code>wflow_git_commit</code>). workflowr only checks the R Markdown file, but you know if there are other scripts or data files that it depends on. Below is the status of the Git repository when the results were generated: <pre><code> Ignored files: Ignored: .DS_Store Ignored: .Rhistory Ignored: .Rproj.user/ Ignored: docs/.DS_Store Ignored: docs/images/.DS_Store Ignored: output/.DS_Store </code></pre> Note that any generated files, e.g. HTML, png, CSS, etc., are not included in this status report because it is ok for generated content to have uncommitted changes. </details> </li> </ul> <details> <summary> <small><strong>Expand here to see past versions:</strong></small> </summary> <ul> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> File </th> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> <th style="text-align:left;"> Message </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Rmd </td> <td style="text-align:left;"> <a href="https://github.com/willwerscheid/MASHvFLASH/blob/56b00c1ca80dd7655697dff44146bf397a829e9f/analysis/intro.Rmd" target="_blank">56b00c1</a> </td> <td style="text-align:left;"> Jason Willwerscheid </td> <td style="text-align:left;"> 2018-06-21 </td> <td style="text-align:left;"> wflow_publish(“analysis/intro.Rmd”) </td> </tr> <tr> <td style="text-align:left;"> html </td> <td style="text-align:left;"> <a href="https://cdn.rawgit.com/willwerscheid/MASHvFLASH/736c5705449728276c3561bf495b5b2d026c898a/docs/intro.html" target="_blank">736c570</a> </td> <td style="text-align:left;"> Jason Willwerscheid </td> <td style="text-align:left;"> 2018-06-21 </td> <td style="text-align:left;"> Build site. </td> </tr> <tr> <td style="text-align:left;"> Rmd </td> <td style="text-align:left;"> <a href="https://github.com/willwerscheid/MASHvFLASH/blob/ffb36e7e9871e229e4805b07de45fe6222f8f8ae/analysis/intro.Rmd" target="_blank">ffb36e7</a> </td> <td style="text-align:left;"> Jason Willwerscheid </td> <td style="text-align:left;"> 2018-06-21 </td> <td style="text-align:left;"> wflow_publish(“analysis/intro.Rmd”) </td> </tr> <tr> <td style="text-align:left;"> html </td> <td style="text-align:left;"> <a href="https://cdn.rawgit.com/willwerscheid/MASHvFLASH/69cecbc60b8616a3ceaf513a6f169f249bcca6d4/docs/intro.html" target="_blank">69cecbc</a> </td> <td style="text-align:left;"> Jason Willwerscheid </td> <td style="text-align:left;"> 2018-06-20 </td> <td style="text-align:left;"> Build site. </td> </tr> <tr> <td style="text-align:left;"> Rmd </td> <td style="text-align:left;"> <a href="https://github.com/willwerscheid/MASHvFLASH/blob/abf0f70d7c77ab8cba395605aab7ea8116103c02/analysis/intro.Rmd" target="_blank">abf0f70</a> </td> <td style="text-align:left;"> Jason Willwerscheid </td> <td style="text-align:left;"> 2018-06-20 </td> <td style="text-align:left;"> workflowr::wflow_publish(c(“analysis/intro.Rmd”, </td> </tr> <tr> <td style="text-align:left;"> html </td> <td style="text-align:left;"> <a href="https://cdn.rawgit.com/willwerscheid/MASHvFLASH/558656aaff8347e0c0b5e985b60ebb0f94e677d9/docs/intro.html" target="_blank">558656a</a> </td> <td style="text-align:left;"> Jason Willwerscheid </td> <td style="text-align:left;"> 2018-06-20 </td> <td style="text-align:left;"> Build site. </td> </tr> <tr> <td style="text-align:left;"> Rmd </td> <td style="text-align:left;"> <a href="https://github.com/willwerscheid/MASHvFLASH/blob/9a419c23df646e2497cbe151089706d461929d15/analysis/intro.Rmd" target="_blank">9a419c2</a> </td> <td style="text-align:left;"> Jason Willwerscheid </td> <td style="text-align:left;"> 2018-06-20 </td> <td style="text-align:left;"> workflowr::wflow_publish(“analysis/intro.Rmd”) </td> </tr> <tr> <td style="text-align:left;"> html </td> <td style="text-align:left;"> <a href="https://cdn.rawgit.com/willwerscheid/MASHvFLASH/bdf2579326ce383f77f1d9ae89a21ab689bdd48c/docs/intro.html" target="_blank">bdf2579</a> </td> <td style="text-align:left;"> Jason Willwerscheid </td> <td style="text-align:left;"> 2018-06-19 </td> <td style="text-align:left;"> Build site. </td> </tr> <tr> <td style="text-align:left;"> Rmd </td> <td style="text-align:left;"> <a href="https://github.com/willwerscheid/MASHvFLASH/blob/a5fd6f414a45c23583a20455ed1930ff325f8d00/analysis/intro.Rmd" target="_blank">a5fd6f4</a> </td> <td style="text-align:left;"> Jason Willwerscheid </td> <td style="text-align:left;"> 2018-06-19 </td> <td style="text-align:left;"> wflow_publish(“analysis/intro.Rmd”) </td> </tr> <tr> <td style="text-align:left;"> html </td> <td style="text-align:left;"> <a href="https://cdn.rawgit.com/willwerscheid/MASHvFLASH/e092b8715080145e9c36a27970c56b735fe57c0d/docs/intro.html" target="_blank">e092b87</a> </td> <td style="text-align:left;"> Jason Willwerscheid </td> <td style="text-align:left;"> 2018-06-18 </td> <td style="text-align:left;"> Build site. </td> </tr> <tr> <td style="text-align:left;"> Rmd </td> <td style="text-align:left;"> <a href="https://github.com/willwerscheid/MASHvFLASH/blob/8d6ce20d2e3f99805ed55f22295714634f3bb0e1/analysis/intro.Rmd" target="_blank">8d6ce20</a> </td> <td style="text-align:left;"> Jason Willwerscheid </td> <td style="text-align:left;"> 2018-06-18 </td> <td style="text-align:left;"> wflow_publish(“analysis/intro.Rmd”) </td> </tr> </tbody> </table> </ul> <p></details></p> <hr /> <div id="introduction" class="section level2"> <h2>Introduction</h2> <p>This vignette shows how <code>flashr</code> can be used to learn something about the covariance structure of a dataset.</p> </div> <div id="motivation-part-i-flash-and-covariance" class="section level2"> <h2>Motivation, part I: flash and covariance</h2> <p>Recall that <code>flashr</code> fits the model <span class="math display">\[\begin{aligned} Y &= X + E \\ X &= LF' \end{aligned}\]</span> where <span class="math inline">\(Y\)</span> (the “observed” data) and <span class="math inline">\(X\)</span> (the “true” effects) are <span class="math inline">\(n \times p\)</span> matrices. <span class="math inline">\(L\)</span> is an <span class="math inline">\(n \times k\)</span> matrix of loadings, <span class="math inline">\(F\)</span> is a <span class="math inline">\(p \times k\)</span> matrix of factors, and <span class="math inline">\(E\)</span> is an <span class="math inline">\(n \times p\)</span> matrix of residuals. Denote the columns of <span class="math inline">\(L\)</span> as <span class="math inline">\(\ell_1, \dots, \ell_k\)</span> and the columns of <span class="math inline">\(F\)</span> as <span class="math inline">\(f_1, \ldots, f_k\)</span>.</p> <p>Notice that the fitted model does not only tell us about how the elements of <span class="math inline">\(L\)</span> and <span class="math inline">\(F\)</span> are distributed; it also tells us something about how the elements of <span class="math inline">\(X\)</span> covary.</p> <p>In particular, if <span class="math inline">\(L\)</span> is regarded as fixed (by, for example, fixing each <span class="math inline">\(L_{ij}\)</span> at its posterior mean), then one may write <span class="math display">\[ X_{:, j} = F_{j 1} \ell_1 + \ldots + F_{j k} \ell_k \]</span> so that, if the loadings <span class="math inline">\(\ell_1, \ldots, \ell_k\)</span> are mutually orthogonal (in general, they are not), <span class="math display">\[ \begin{aligned} \text{Cov} (X_{:, j}) &= (\mathbb{E} F_{j1}^2) \ell_1 \ell_1' + \ldots + (\mathbb{E} F_{jk}^2) \ell_k \ell_k' \end{aligned}\]</span></p> <p>In other words, the covariance of the columns of <span class="math inline">\(X\)</span> will be a linear combination (or, more precisely, a conical combination) of the rank-one covariance matrices <span class="math inline">\(\ell_i \ell_i'\)</span>.</p> </div> <div id="motivation-part-ii-covariance-mixtures" class="section level2"> <h2>Motivation, part II: covariance mixtures</h2> <p>One can take this idea a bit further by recalling that the priors on factors <span class="math inline">\(f_1, \ldots, f_k\)</span> are mixture distributions. For simplicity, assume that the priors are point-normal distributions <span class="math display">\[f_m \sim (1 - \pi_m) \delta_0 + \pi_m N(0, \tau_m^2)\]</span> with <span class="math inline">\(0 \le \pi_m \le 1\)</span>.</p> <p>By augmenting the data with hidden variables <span class="math inline">\(I_{jm}\)</span> that indicate the mixture components from which the elements <span class="math inline">\(F_{jm}\)</span> are drawn, one may equivalently write <span class="math display">\[\begin{aligned} F_{jm} \mid I_{jm} = 0 &\sim \delta_0 \\ F_{jm} \mid I_{jm} = 1 &\sim N(0, \tau_m^2) \\ I_{jm} &\sim \text{Bernoulli}(\pi_m) \end{aligned}\]</span> so that <span class="math inline">\(I_{jm} = 1\)</span> if column <span class="math inline">\(j\)</span> is “active” for factor <span class="math inline">\(m\)</span> and <span class="math inline">\(I_{jm} = 0\)</span> otherwise.</p> <p>Now, if one regards the variables <span class="math inline">\(I_{jm}\)</span> (as well as <span class="math inline">\(L\)</span>) as fixed, then one obtains <span class="math display">\[ \begin{aligned} \text{Cov} (X_{:, j}) &= I_{j 1} \tau_1^2 \ell_1 \ell_1' + \ldots + I_{j_m} \tau_k^2 \ell_k \ell_k' \end{aligned}\]</span></p> <p>In other words, depending on the values of <span class="math inline">\(I_{j1}, \ldots, I_{jm}\)</span>, the elements of column <span class="math inline">\(X_{:, j}\)</span> will covary in one of <span class="math inline">\(2^m\)</span> possible ways.</p> </div> <div id="relation-to-mash" class="section level2"> <h2>Relation to mash</h2> <p>In fact, if the priors on factors <span class="math inline">\(f_1, \ldots, f_k\)</span> are arbitrary mixtures of normals (including the point-normal priors discussed in the previous section), then fixing <span class="math inline">\(L\)</span> (and again assuming that the columns of <span class="math inline">\(L\)</span> are mutually orthogonal) results in the columns of <span class="math inline">\(X\)</span> being distributed (exchangeably) as a mixture of multivariate normals. In the point-normal case, <span class="math display">\[ X_{:, j} \sim \sum_{r = 1}^{2^m} N_n(0, \Sigma_r), \]</span> where each <span class="math inline">\(\Sigma_r\)</span> is a conical combination of the rank-one covariance matrices <span class="math inline">\(\ell_1 \ell_1', \ldots, \ell_m \ell_m'\)</span>.</p> <p><code>mashr</code> (see <a href="https://stephenslab.github.io/mashr/index.html">here</a>) similarly models <span class="math inline">\(X\)</span> as a mixture of multivariate normals, so it makes sense to attempt to use <code>flashr</code> (which is, on its face, much simpler than <code>mashr</code>) to similar ends.</p> </div> <div id="canonical-covariance-structures" class="section level2"> <h2>Canonical covariance structures</h2> <p>In addition to a set of covariance matrices that are fit to the data, <code>mashr</code> includes a set of “canonical” covariance matrices. For example, it is reasonable to expect that some effects will be unique to a single condition <span class="math inline">\(i\)</span>. For the corresponding columns of <span class="math inline">\(X\)</span>, the covariance matrix will have a single non-zero entry (namely, the <span class="math inline">\(i\)</span>th diagonal entry, <span class="math inline">\(\Sigma_{ii}\)</span>).</p> <p>One can accommodate such effects in <code>flashr</code> by adding <span class="math inline">\(n\)</span> fixed one-hot vectors as loadings (one for each condition). In other words, we can add “canonical” covariance structures corresponding to effects that are unique in a single condition by fitting the model <span class="math display">\[ X = \begin{pmatrix} L & I_n \end{pmatrix} F'. \]</span></p> <p>By the same logic as above, this model should be able to accommodate conical combinations of the matrices <span class="math inline">\(e_1 e_1', \ldots, e_n e_n'\)</span> (where <span class="math inline">\(e_i\)</span> is the <span class="math inline">\(i\)</span>th canonical basis vector). In particular, it should be able to model effects that are independent across conditions, or unique to a few conditions, as well as effects that are unique to a single condition.</p> </div> <div id="fixing-standard-errors" class="section level2"> <h2>Fixing standard errors</h2> <p>Now the full model is <span class="math display">\[\begin{aligned} Y &= \begin{pmatrix} L & I_n \end{pmatrix} F' + E. \\ \end{aligned}\]</span></p> <p>Notice that this approach only makes sense if the standard errors for <span class="math inline">\(E\)</span> are considered fixed. Writing <span class="math display">\[ F = \begin{pmatrix} F_1' \\ F_2' \end{pmatrix}\]</span> gives <span class="math display">\[ Y = LF_1' + F_2' + E, \]</span> so that, for example, putting independent <span class="math inline">\(N(0, 1)\)</span> priors on the elements of <span class="math inline">\(F_2\)</span> and <span class="math inline">\(E\)</span> is equivalent to putting a <span class="math inline">\(\delta_0\)</span> prior on the elements of <span class="math inline">\(F_2\)</span> and independent <span class="math inline">\(N(0, 2)\)</span> priors on the residuals.</p> <p>To fix standard errors in <code>flashr</code>, it is necessary to pass the data into <code>flash_set_data</code> using parameter <code>S</code> to specify standard errors. Further, calls to <code>flash</code> (and <code>flash_add_greedy</code> and <code>flash_backfit</code>) must specify parameter option <code>var_type = "zero"</code>, which indicates that standard errors should not be estimated, but should be considered fixed.</p> </div> <div id="fitting-the-flash-object" class="section level2"> <h2>Fitting the flash object</h2> <p>Finally, the recommended procedure is as follows:</p> <ol style="list-style-type: decimal"> <li><p>Create a flash data object, using parameter <code>S</code> to specify standard errors.</p></li> <li><p>Add the “data-driven” loadings to the flash fit object greedily, using parameter option <code>var_type = "zero"</code> to indicate that the standard errors should be considered fixed.</p></li> <li><p>Add <span class="math inline">\(n\)</span> fixed one-hot loadings vectors to the flash fit object.</p></li> <li><p>Refine the flash fit object via backfitting, again using parameter option <code>var_type = "zero"</code>. The parameter option <code>nullcheck = FALSE</code> should also be used so that the canonical covariance structures are retained.</p></li> </ol> </div> <div id="example-with-simulated-data" class="section level2"> <h2>Example with simulated data</h2> <p>The first code chunk simulates a <span class="math inline">\(10 \times 400\)</span> data matrix where a quarter of columns <span class="math inline">\(X_{:, j}\)</span> are null across all conditions, a quarter are nonnull in the first condition only, a quarter are nonnull in all conditions with effect sizes that are independent across conditions, and a quarter are nonnull in all conditions with an effect size that is identical across conditions. The effect sizes are all drawn from a normal distribution with standard deviation equal to 5.</p> <pre class="r"><code>set.seed(1) n <- 5 p <- 400 ncond <- n # n nsamp <- as.integer(p / 4) # Null effects: X.zero = matrix(0, nrow=ncond, ncol=nsamp) # Effects that occur only in condition 1: X.one = X.zero b2 = 5 * rnorm(nsamp) X.one[1,] = b2 # Independent effects: X.ind = matrix(5 * rnorm(ncond*nsamp), nrow=ncond, ncol=nsamp) b = 5 * rnorm(nsamp) # Effects that are identical across conditions: X.ident = matrix(rep(b, ncond), nrow=ncond, ncol=nsamp, byrow=T) X = cbind(X.zero, X.one, X.ind, X.ident) E = matrix(rnorm(4*ncond*nsamp), nrow=ncond, ncol=4*nsamp) Y = X + E</code></pre> <p>The next code chunk fits a flash object using the procedure described in the previous section.</p> <pre class="r"><code>#library(flashr) devtools::load_all("/Users/willwerscheid/GitHub/flashr2/") data <- flash_set_data(Y, S = 1) fl_greedy <- flash_add_greedy(data, Kmax = 10, var_type = "zero") I_n <- diag(rep(1, n)) fl_fixedl <- flash_add_fixed_l(data, I_n, fl_greedy) fl_backfit <- flash_backfit(data, fl_fixedl, var_type = "zero", nullcheck = F)</code></pre> <p>Finally, the following code chunk calculates the mean squared error obtained using the flash fit object posterior means, relative to the mean squared error that would be obtained by simply estimating <span class="math inline">\(X\)</span> using the data matrix <span class="math inline">\(Y\)</span>.</p> <pre class="r"><code>baseline_mse <- sum((Y - X)^2)/(n * p) fl_preds <- flash_get_lf(fl_backfit) flash_mse <- sum((fl_preds - X)^2)/(n * p) flash_mse / baseline_mse</code></pre> <pre><code>[1] 0.5224411</code></pre> <p>To verify that <code>flashr</code> has in fact learned something about how the data covaries, one can collapse the data into a vector and use <code>ashr</code> to fit a prior that is a univariate mixture of normals.</p> <pre class="r"><code>ash_fit <- ashr::ash(betahat = as.vector(Y), sebetahat = 1) ash_pm <- ashr::get_pm(ash_fit) ash_preds <- matrix(ash_pm, nrow=n, ncol=p) ash_mse <- sum((ash_preds - X)^2)/(n * p) ash_mse / baseline_mse</code></pre> <pre><code>[1] 0.7572497</code></pre> <p>So, the <code>flashr</code> estimates are much better, even though <code>flashr</code> uses point-normal priors rather than the more flexible class of normal mixtures used by <code>ashr</code>.</p> </div> <div id="flash-v-mash" class="section level2"> <h2>FLASH v MASH</h2> <p>For this particular simulation, <code>mashr</code> outperforms <code>flashr</code> in terms of MSE:</p> <pre class="r"><code>library(mashr) data <- mash_set_data(t(Y)) U.c = cov_canonical(data) m.1by1 <- mash_1by1(data) strong <- get_significant_results(m.1by1, 0.05) U.pca <- cov_pca(data, 5, strong) U.ed <- cov_ed(data, U.pca, strong) U <- c(U.c, U.ed) m <- mash(data, U)</code></pre> <pre class="r"><code>mash_mse <- sum((t(get_pm(m)) - X)^2)/(n * p) mash_mse / baseline_mse</code></pre> <pre><code>[1] 0.4161114</code></pre> <p>This is as expected, since the data were after all generated from the mash model. More generally, however, the two methods often perform quite similarly. See the simulation study <a href="MASHvFLASHsims.html">here</a> and a comparison using GTEx data <a href="MASHvFLASHgtex.html">here</a>.</p> </div> <div id="session-information" class="section level2"> <h2>Session information</h2> <pre class="r"><code>sessionInfo()</code></pre> <pre><code>R version 3.4.3 (2017-11-30) Platform: x86_64-apple-darwin15.6.0 (64-bit) Running under: macOS Sierra 10.12.6 Matrix products: default BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] mashr_0.2-7 ashr_2.2-7 flashr_0.5-8 loaded via a namespace (and not attached): [1] Rcpp_0.12.17 pillar_1.2.1 [3] plyr_1.8.4 compiler_3.4.3 [5] git2r_0.21.0 workflowr_1.0.1 [7] R.methodsS3_1.7.1 R.utils_2.6.0 [9] iterators_1.0.9 tools_3.4.3 [11] testthat_2.0.0 digest_0.6.15 [13] tibble_1.4.2 evaluate_0.10.1 [15] memoise_1.1.0 gtable_0.2.0 [17] lattice_0.20-35 rlang_0.2.0 [19] Matrix_1.2-12 foreach_1.4.4 [21] commonmark_1.4 yaml_2.1.17 [23] parallel_3.4.3 mvtnorm_1.0-7 [25] ebnm_0.1-11 withr_2.1.1.9000 [27] stringr_1.3.0 roxygen2_6.0.1.9000 [29] xml2_1.2.0 knitr_1.20 [31] REBayes_1.2 devtools_1.13.4 [33] rprojroot_1.3-2 grid_3.4.3 [35] R6_2.2.2 rmarkdown_1.8 [37] rmeta_3.0 ggplot2_2.2.1 [39] magrittr_1.5 whisker_0.3-2 [41] backports_1.1.2 scales_0.5.0 [43] codetools_0.2-15 htmltools_0.3.6 [45] MASS_7.3-48 assertthat_0.2.0 [47] softImpute_1.4 colorspace_1.3-2 [49] stringi_1.1.6 Rmosek_7.1.3 [51] lazyeval_0.2.1 munsell_0.4.3 [53] doParallel_1.0.11 pscl_1.5.2 [55] truncnorm_1.0-8 SQUAREM_2017.10-1 [57] ExtremeDeconvolution_1.3 R.oo_1.21.0 </code></pre> </div> <!-- Adjust MathJax settings so that all math formulae are shown using TeX fonts only; see http://docs.mathjax.org/en/latest/configuration.html. This will make the presentation more consistent at the cost of the webpage sometimes taking slightly longer to load. Note that this only works because the footer is added to webpages before the MathJax javascript. --> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <hr> <p> This reproducible <a href="http://rmarkdown.rstudio.com">R Markdown</a> analysis was created with <a href="https://github.com/jdblischak/workflowr">workflowr</a> 1.0.1 </p> <hr> </div> </div> </div> <script> // add bootstrap table styles to pandoc tables function bootstrapStylePandocTables() { $('tr.header').parent('thead').parent('table').addClass('table table-condensed'); } $(document).ready(function () { bootstrapStylePandocTables(); }); </script> <!-- dynamically load mathjax for compatibility with self-contained --> <script> (function () { var script = document.createElement("script"); script.type = "text/javascript"; script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; document.getElementsByTagName("head")[0].appendChild(script); })(); </script> </body> </html>