<!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta charset="utf-8" /> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <meta name="generator" content="pandoc" /> <meta name="author" content="Lei Sun" /> <meta name="date" content="2017-05-09" /> <title>Improvement on Implementation with Rmosek: Regularization</title> <script src="site_libs/jquery-1.11.3/jquery.min.js"></script> <meta name="viewport" content="width=device-width, initial-scale=1" /> <link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" /> <script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script> <script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script> <link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" /> <script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script> <script src="site_libs/navigation-1.1/tabsets.js"></script> <link href="site_libs/highlightjs-9.12.0/textmate.css" rel="stylesheet" /> <script src="site_libs/highlightjs-9.12.0/highlight.js"></script> <link href="site_libs/font-awesome-4.5.0/css/font-awesome.min.css" rel="stylesheet" /> <style type="text/css">code{white-space: pre;}</style> <style type="text/css"> pre:not([class]) { background-color: white; } </style> <script type="text/javascript"> if (window.hljs) { hljs.configure({languages: []}); hljs.initHighlightingOnLoad(); if (document.readyState && document.readyState === "complete") { window.setTimeout(function() { hljs.initHighlighting(); }, 0); } } </script> <style type="text/css"> h1 { font-size: 34px; } h1.title { font-size: 38px; } h2 { font-size: 30px; } h3 { font-size: 24px; } h4 { font-size: 18px; } h5 { font-size: 16px; } h6 { font-size: 12px; } .table th:not([align]) { text-align: left; } </style> </head> <body> <style type = "text/css"> .main-container { max-width: 940px; margin-left: auto; margin-right: auto; } code { color: inherit; background-color: rgba(0, 0, 0, 0.04); } img { max-width:100%; height: auto; } .tabbed-pane { padding-top: 12px; } button.code-folding-btn:focus { outline: none; } </style> <style type="text/css"> /* padding for bootstrap navbar */ body { padding-top: 51px; padding-bottom: 40px; } /* offset scroll position for anchor links (for fixed navbar) */ .section h1 { padding-top: 56px; margin-top: -56px; } .section h2 { padding-top: 56px; margin-top: -56px; } .section h3 { padding-top: 56px; margin-top: -56px; } .section h4 { padding-top: 56px; margin-top: -56px; } .section h5 { padding-top: 56px; margin-top: -56px; } .section h6 { padding-top: 56px; margin-top: -56px; } </style> <script> // manage active state of menu based on current page $(document).ready(function () { // active menu anchor href = window.location.pathname href = href.substr(href.lastIndexOf('/') + 1) if (href === "") href = "index.html"; var menuAnchor = $('a[href="' + href + '"]'); // mark it active menuAnchor.parent().addClass('active'); // if it's got a parent navbar menu mark it active as well menuAnchor.closest('li.dropdown').addClass('active'); }); </script> <div class="container-fluid main-container"> <!-- tabsets --> <script> $(document).ready(function () { window.buildTabsets("TOC"); }); </script> <!-- code folding --> <script> $(document).ready(function () { // move toc-ignore selectors from section div to header $('div.section.toc-ignore') .removeClass('toc-ignore') .children('h1,h2,h3,h4,h5').addClass('toc-ignore'); // establish options var options = { selectors: "h1,h2,h3", theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 }; options.showAndHide = true; options.smoothScroll = true; // tocify var toc = $("#TOC").tocify(options).data("toc-tocify"); }); </script> <style type="text/css"> #TOC { margin: 25px 0px 20px 0px; } @media (max-width: 768px) { #TOC { position: relative; width: 100%; } } .toc-content { padding-left: 30px; padding-right: 40px; } div.main-container { max-width: 1200px; } div.tocify { width: 20%; max-width: 260px; max-height: 85%; } @media (min-width: 768px) and (max-width: 991px) { div.tocify { width: 25%; } } @media (max-width: 767px) { div.tocify { width: 100%; max-width: none; } } .tocify ul, .tocify li { line-height: 20px; } .tocify-subheader .tocify-item { font-size: 0.90em; padding-left: 25px; text-indent: 0; } .tocify .list-group-item { border-radius: 0px; } </style> <!-- setup 3col/9col grid for toc_float and main content --> <div class="row-fluid"> <div class="col-xs-12 col-sm-4 col-md-3"> <div id="TOC" class="tocify"> </div> </div> <div class="toc-content col-xs-12 col-sm-8 col-md-9"> <div class="navbar navbar-default navbar-fixed-top" role="navigation"> <div class="container"> <div class="navbar-header"> <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar"> <span class="icon-bar"></span> <span class="icon-bar"></span> <span class="icon-bar"></span> </button> <a class="navbar-brand" href="index.html">truncash</a> </div> <div id="navbar" class="navbar-collapse collapse"> <ul class="nav navbar-nav"> <li> <a href="index.html">Home</a> </li> <li> <a href="about.html">About</a> </li> <li> <a href="license.html">License</a> </li> </ul> <ul class="nav navbar-nav navbar-right"> <li> <a href="https://github.com/LSun/truncash"> <span class="fa fa-github"></span> </a> </li> </ul> </div><!--/.nav-collapse --> </div><!--/.container --> </div><!--/.navbar --> <div class="fluid-row" id="header"> <h1 class="title toc-ignore">Improvement on Implementation with <code>Rmosek</code>: Regularization</h1> <h4 class="author"><em>Lei Sun</em></h4> <h4 class="date"><em>2017-05-09</em></h4> </div> <!-- The file analysis/chunks.R contains chunks that define default settings shared across the workflowr files. --> <!-- Update knitr chunk options --> <!-- Insert the date the file was last updated --> <p><strong>Last updated:</strong> 2017-12-21</p> <!-- Insert the code version (Git commit SHA1) if Git repository exists and R package git2r is installed --> <p><strong>Code version:</strong> 6e42447</p> <!-- Add your analysis here --> <section id="introduction" class="level2"> <h2>Introduction</h2> <p>The <span class="math inline">\(w\)</span> optimization problem we hope to solve has two constraints. If we are not imposing these two constraints, chances are the optimization will be <a href="gaussian_derivatives_3.html">unstable</a>, especially when we don’t know <span class="math inline">\(L\)</span> for sure beforehand. However, these two constraints are hard to be converted directly and strictly to convex or even tractable forms.</p> <p><span class="math display">\[ \begin{array}{rl} \min\limits_{w} & -\sum\limits_{j = 1}^n\log\left(\sum\limits_{l=1}^Lw_l\left(\sum\limits_{k = 1}^K\hat\pi_k f_{jkl}\right) + \sum\limits_{k = 1}^K\hat\pi_kf_{jk0}\right) + \sum\limits_{l = 1}^L\lambda_l^w \phi \left(\left|w_l\right|\right) \\ \text{subject to} & \sum\limits_{l=1}^L w_l \varphi^{(l)}\left(z\right) + \varphi\left(z\right) \geq 0, \forall z\in \mathbb{R}\\ & w_l \text{ decay reasonably fast.} \end{array} \]</span></p> <p>One observation is that without the constraints, the optimization goes unstable as <span class="math inline">\(\hat w\)</span> get uncontrollably large. Therefore, a natural idea to replace these two constraints would be to bound, penalize, or regularize <span class="math inline">\(w\)</span>, to prevent them from being too large.</p> <p>On the other hand, as indicated the <a href="gaussian_derivatives.html">theory</a> and <a href="mosek_reg.html">examples</a> of good fitting by Gaussian derivatives, <span class="math inline">\(w_l\)</span> is getting smaller as <span class="math inline">\(l\)</span> increases. Moreover, oftentimes, really small <span class="math inline">\(w_l\)</span> could still make a difference and thus indispensable. Therefore, although we need to stop <span class="math inline">\(\hat w\)</span> getting too large, we cerntainly don’t want to shrink them unnecessarily and unwarrantedly to zero.</p> <p>Ideally, the goal of <span class="math inline">\(\sum\limits_{l = 1}^L\lambda_l^w \phi \left(\left|w_l\right|\right)\)</span> regularizing <span class="math inline">\(w\)</span> should be to</p> </section> <section id="ideas-on-lambda_lw-and-phi" class="level2"> <h2>Ideas on <span class="math inline">\(\lambda_l^w\)</span> and <span class="math inline">\(\phi\)</span></h2> <p>Remember that in <a href="gaussian_derivatives.html">theory</a>, if we are writing the random empirical density of the correlated marginally <span class="math inline">\(N\left(0, 1\right)\)</span> random samples as</p> <p><span class="math display">\[ f_0\left(z\right) = \varphi\left(z\right) + \sum\limits_{l = 1}^\infty W_k\varphi^{(l)}\left(z\right) \ , \]</span> then</p> <p><span class="math display">\[ \text{var}\left(W_l\right) = \frac{\alpha_l}{l!} \ , \]</span> where</p> <p><span class="math display">\[ \alpha_l = \frac{1}{\frac{n\left(n - 1\right)}{2}}\sum_\limits{i < j}\rho_{ij}^l := \bar{\rho_{ij}^l} \ . \]</span></p> <p>Therefore, naturally <span class="math inline">\(w_l\)</span> should decay somehow in the order of</p> <p><span class="math display">\[ \left|w_l\right| \leadsto \sqrt{\text{var}\left(W_l\right)} = \frac{\sqrt{\bar{\rho_{ij}^l}}}{\sqrt{l!}} \ . \]</span></p> <section id="normalization" class="level3"> <h3>Normalization</h3> <p>The order of decay suggests that we should work with <a href="mosek_reg_2.html">normalized coefficients</a>, so that</p> <p><span class="math display">\[ \left|w_l^n\right| = \sqrt{l!}\left|w_l\right| \leadsto \sqrt{\bar{\rho_{ij}^l}} \ . \]</span> This piece of information on the order of magnitude of the normalized <span class="math inline">\(w_l^n\)</span> provides a hint to determine <span class="math inline">\(\lambda_l^w\)</span>, for example,</p> </section> <section id="regularizing-even-orders-only" class="level3"> <h3>Regularizing even orders only</h3> <p>Odd orders of Gaussian derivatives are generally associated with mean shift and skewness of <span class="math inline">\(f_0\)</span>, so they are generally very small, but if they are indeed not zero, they are important however they are small.</p> <p>Meanwhile, when <span class="math inline">\(l\)</span> is odd, <span class="math inline">\(\bar{\rho_{ij}^l}\)</span> could be very small, and not in order of <span class="math inline">\(\bar{\rho^l}\)</span> for some <span class="math inline">\(\rho \in \left(0, 1\right)\)</span>, so it’s very difficult to come up with a good <span class="math inline">\(\lambda_l^w\)</span> when <span class="math inline">\(l\)</span> is odd.</p> <p>Therefore, it might be better to leave the odd orders alone and regularize the even orders only.</p> </section> <section id="penalty-l_1-and-l_2" class="level3"> <h3>Penalty: <span class="math inline">\(l_1\)</span> and <span class="math inline">\(l_2\)</span></h3> <p>Generally speaking, <span class="math inline">\(l_1\)</span> imposes sparsity by shrinking small estimates exactly to zero, which is both good and bad for our case, whereas <span class="math inline">\(l_2\)</span> penalizes large estimates severely but doesn’t force exact sparsity, which could also be good or bad.</p> <p>We’ll implement both and see what happens.</p> </section> <section id="lambda_lw" class="level3"> <h3><span class="math inline">\(\lambda_l^w\)</span></h3> <p><span class="math inline">\(\lambda_l^w\)</span> is determined to help ensure</p> <p><span class="math display">\[ \left|w_l^n\right| = \sqrt{l!}\left|w_l\right| \leadsto \sqrt{\bar{\rho_{ij}^l}} \ , \]</span></p> <p>Given <span class="math inline">\(\rho \in \left(0, 1\right)\)</span>, for <span class="math inline">\(l_1\)</span> regularization, <span class="math inline">\(\lambda_l^n \sim \lambda / \sqrt{\rho^l}\)</span>; for <span class="math inline">\(l_2\)</span> regularization, <span class="math inline">\(\lambda_l^n \sim \lambda / \rho^l\)</span>.</p> </section> <section id="l" class="level3"> <h3><span class="math inline">\(L\)</span></h3> <p>So far, <span class="math inline">\(L = 9\)</span> has been able to handle all the example we’ve tried. Without constraints or regularization, <span class="math inline">\(L = 9\)</span> is usually too large for cases where, say, <span class="math inline">\(L = 4\)</span> is enough, and a larger than necessary <span class="math inline">\(L\)</span> could lead to numerical instability.</p> <p>Hence, we’ll experiment with <span class="math inline">\(L = 12\)</span>, assuming it’s enough for all the correlation induced distortions in practice, and assuming the regularization could prevent the optimization from going crazy.</p> </section> </section> <section id="summary" class="level2"> <h2>Summary</h2> <p>The <span class="math inline">\(w\)</span> optimization problem becomes</p> <p><span class="math display">\[ \begin{array}{rl} \min\limits_{w, g} & -\sum\limits_{j = 1}^n \log\left(g_j\right) + \text{Regularization} \\ \text{subject to} & Aw + a = g \\ & g \geq 0\ , \end{array} \]</span> where <span class="math inline">\(A_{n \times L} = \left[a_1,\ldots, a_L\right]\)</span> and <span class="math inline">\(a\)</span> are computed with normalized Gaussian derivatives and Hermite polynomials.</p> <p>Let <span class="math inline">\(\lambda > 0\)</span>, <span class="math inline">\(\rho \in \left(0, 1\right)\)</span>, the regularization term has two forms as follows.</p> <section id="l_1-regularization" class="level3"> <h3><span class="math inline">\(l_1\)</span> regularization</h3> <p><span class="math display">\[ \begin{array}{l} \sum\limits_{l = 1}^L\lambda_l^{w^s}\left|w_l^s\right| \ ,\\ \lambda_l^{w^s} = \begin{cases} 0, & l \text{ is odd;} \\ \lambda / \rho^{l/2}, & l \text{ is even.} \end{cases} \end{array} \]</span></p> </section> <section id="l_2-regularization" class="level3"> <h3><span class="math inline">\(l_2\)</span> regularization</h3> <p><span class="math display">\[ \begin{array}{l} \sum\limits_{l = 1}^L \lambda_l^{w^s}{w_l^s}^2 \ ,\\ \lambda_l^{w^s} = \begin{cases} 0, & l \text{ is odd;} \\ \lambda / \rho^{l}, & l \text{ is even.} \end{cases} \end{array} \]</span></p> </section> </section> <section id="dual-problem-for-l_1-regularization." class="level2"> <h2>Dual problem for <span class="math inline">\(l_1\)</span> regularization.</h2> <p>The primal form is</p> <p><span class="math display">\[ \begin{array}{rl} \min\limits_{w, g} & -\sum\limits_{j = 1}^n \log\left(g_j\right) + \sum\limits_{l = 1}^L\lambda_l^{w^s}\left|w_l^s\right| \\ \text{subject to} & Aw + a = g \\ & g \geq 0\ , \end{array} \]</span> The dual form is</p> <p><span class="math display">\[ \begin{array}{rl} \min\limits_{v} & -\sum\limits_{j = 1}^n \log\left(v_j\right) + a^Tv - n \\ \text{subject to} & -\lambda \leq A^Tv \leq \lambda \\ & v \geq 0\ . \end{array} \]</span></p> </section> <section id="dual-problem-for-l_2-regularization." class="level2"> <h2>Dual problem for <span class="math inline">\(l_2\)</span> regularization.</h2> <p>The primal form is</p> <p><span class="math display">\[ \begin{array}{rl} \min\limits_{w, g} & -\sum\limits_{j = 1}^n \log\left(g_j\right) + \sum\limits_{l = 1}^L\lambda_l^{w^s}{w_l^s}^2 \\ \text{subject to} & Aw + a = g \\ & g \geq 0\ , \end{array} \]</span> The dual form is</p> <p><span class="math display">\[ \begin{array}{rl} \min\limits_{v} & -\sum\limits_{j = 1}^n \log\left(v_j\right) + a^Tv - n + \frac14 v^T \left(A \Lambda^{-1} A^T\right)v \\ \text{subject to} & a_j^Tv = 0 \ \text{ if }\lambda_j = 0 \ ,\\ & v \geq 0\ , \end{array} \]</span> where <span class="math inline">\(\Lambda = \begin{bmatrix}\lambda_1 & & \\ & \ddots & \\ & & \lambda_L \end{bmatrix}\)</span>, and <span class="math inline">\({\Lambda^{-1}}_{jj} = 0\)</span> when <span class="math inline">\(\lambda_j = 0\)</span>.</p> </section> <section id="choosing-lambda-and-rho" class="level2"> <h2>Choosing <span class="math inline">\(\lambda\)</span> and <span class="math inline">\(\rho\)</span></h2> <p>Let’s start with <span class="math inline">\(\rho = 0.9\)</span> and <span class="math inline">\(\lambda = 0.1\)</span>.</p> </section> <section id="session-information" class="level2"> <h2>Session information</h2> <!-- Insert the session information into the document --> <pre class="r"><code>sessionInfo()</code></pre> <pre><code>R version 3.4.3 (2017-11-30) Platform: x86_64-apple-darwin15.6.0 (64-bit) Running under: macOS High Sierra 10.13.2 Matrix products: default BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 attached base packages: [1] stats graphics grDevices utils datasets methods base loaded via a namespace (and not attached): [1] compiler_3.4.3 backports_1.1.2 magrittr_1.5 rprojroot_1.3-1 [5] tools_3.4.3 htmltools_0.3.6 yaml_2.1.16 Rcpp_0.12.14 [9] stringi_1.1.6 rmarkdown_1.8 knitr_1.17 git2r_0.20.0 [13] stringr_1.2.0 digest_0.6.13 evaluate_0.10.1</code></pre> </section> <hr> <p> This <a href="http://rmarkdown.rstudio.com">R Markdown</a> site was created with <a href="https://github.com/jdblischak/workflowr">workflowr</a> </p> <hr> <!-- To enable disqus, uncomment the section below and provide your disqus_shortname --> <!-- disqus <div id="disqus_thread"></div> <script type="text/javascript"> /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ var disqus_shortname = 'rmarkdown'; // required: replace example with your forum shortname /* * * DON'T EDIT BELOW THIS LINE * * */ (function() { var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); })(); </script> <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a> --> </div> </div> </div> <script> // add bootstrap table styles to pandoc tables function bootstrapStylePandocTables() { $('tr.header').parent('thead').parent('table').addClass('table table-condensed'); } $(document).ready(function () { bootstrapStylePandocTables(); }); </script> <!-- dynamically load mathjax for compatibility with self-contained --> <script> (function () { var script = document.createElement("script"); script.type = "text/javascript"; script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; document.getElementsByTagName("head")[0].appendChild(script); })(); </script> </body> </html>