topic-modeling.html

<!DOCTYPE html>
<html lang="" xml:lang="">
<head>

  <meta charset="utf-8" />
  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
  <title>Chapter 8 Topic modeling | Natural Language Processing with R</title>
  <meta name="description" content="This is a tutorial of various techniques used in natural language processing and text mining." />
  <meta name="generator" content="bookdown 0.18 and GitBook 2.6.7" />

  <meta property="og:title" content="Chapter 8 Topic modeling | Natural Language Processing with R" />
  <meta property="og:type" content="book" />
  
  
  <meta property="og:description" content="This is a tutorial of various techniques used in natural language processing and text mining." />
  

  <meta name="twitter:card" content="summary" />
  <meta name="twitter:title" content="Chapter 8 Topic modeling | Natural Language Processing with R" />
  
  <meta name="twitter:description" content="This is a tutorial of various techniques used in natural language processing and text mining." />
  

<meta name="author" content="Saif SHabou" />


<meta name="date" content="2020-05-06" />

  <meta name="viewport" content="width=device-width, initial-scale=1" />
  <meta name="apple-mobile-web-app-capable" content="yes" />
  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
  
  
<link rel="prev" href="word-and-document-frequency-tf-idf.html"/>
<link rel="next" href="words-relationships-analysis.html"/>
<script src="libs/jquery-2.2.3/jquery.min.js"></script>
<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />


<style type="text/css">
a.sourceLine { display: inline-block; line-height: 1.25; }
a.sourceLine { pointer-events: none; color: inherit; text-decoration: inherit; }
a.sourceLine:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode { white-space: pre; position: relative; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
code.sourceCode { white-space: pre-wrap; }
a.sourceLine { text-indent: -1em; padding-left: 1em; }
}
pre.numberSource a.sourceLine
  { position: relative; left: -4em; }
pre.numberSource a.sourceLine::before
  { content: attr(title);
    position: relative; left: -1em; text-align: right; vertical-align: baseline;
    border: none; pointer-events: all; display: inline-block;
    -webkit-touch-callout: none; -webkit-user-select: none;
    -khtml-user-select: none; -moz-user-select: none;
    -ms-user-select: none; user-select: none;
    padding: 0 4px; width: 4em;
    color: #aaaaaa;
  }
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
div.sourceCode
  {  }
@media screen {
a.sourceLine::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
</style>

<link rel="stylesheet" href="style.css" type="text/css" />
</head>

<body>


  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">

    <div class="book-summary">
      <nav role="navigation">

<ul class="summary">
<li><a href="./">NLP with R</a></li>

<li class="divider"></li>
<li class="chapter" data-level="1" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i><b>1</b> Introduction</a></li>
<li class="chapter" data-level="2" data-path="text-processing.html"><a href="text-processing.html"><i class="fa fa-check"></i><b>2</b> Text processing</a><ul>
<li class="chapter" data-level="2.1" data-path="text-processing.html"><a href="text-processing.html#text-data"><i class="fa fa-check"></i><b>2.1</b> Text data</a></li>
<li class="chapter" data-level="2.2" data-path="text-processing.html"><a href="text-processing.html#nlp-applications"><i class="fa fa-check"></i><b>2.2</b> NLP applications</a></li>
<li class="chapter" data-level="2.3" data-path="text-processing.html"><a href="text-processing.html#tokenization"><i class="fa fa-check"></i><b>2.3</b> Tokenization</a></li>
<li class="chapter" data-level="2.4" data-path="text-processing.html"><a href="text-processing.html#stop-words-handeling"><i class="fa fa-check"></i><b>2.4</b> Stop words handeling</a></li>
<li class="chapter" data-level="2.5" data-path="text-processing.html"><a href="text-processing.html#words-frequencies"><i class="fa fa-check"></i><b>2.5</b> Words frequencies</a></li>
</ul></li>
<li class="chapter" data-level="3" data-path="Word-embeddings.html"><a href="Word-embeddings.html"><i class="fa fa-check"></i><b>3</b> Word embeddings</a><ul>
<li class="chapter" data-level="3.1" data-path="Word-embeddings.html"><a href="Word-embeddings.html#vectorizing-text"><i class="fa fa-check"></i><b>3.1</b> Vectorizing text</a></li>
<li class="chapter" data-level="3.2" data-path="Word-embeddings.html"><a href="Word-embeddings.html#one-hot-encoding"><i class="fa fa-check"></i><b>3.2</b> One-hot encoding</a></li>
<li class="chapter" data-level="3.3" data-path="Word-embeddings.html"><a href="Word-embeddings.html#word-embeddings-methods"><i class="fa fa-check"></i><b>3.3</b> Word embeddings methods</a><ul>
<li class="chapter" data-level="3.3.1" data-path="Word-embeddings.html"><a href="Word-embeddings.html#learn-world-embeddings"><i class="fa fa-check"></i><b>3.3.1</b> Learn world embeddings</a></li>
<li class="chapter" data-level="3.3.2" data-path="Word-embeddings.html"><a href="Word-embeddings.html#pre-trained-word-embeddings"><i class="fa fa-check"></i><b>3.3.2</b> Pre-trained word embeddings</a></li>
</ul></li>
<li class="chapter" data-level="3.4" data-path="Word-embeddings.html"><a href="Word-embeddings.html#applications"><i class="fa fa-check"></i><b>3.4</b> Applications</a><ul>
<li class="chapter" data-level="3.4.1" data-path="Word-embeddings.html"><a href="Word-embeddings.html#using-skip-gram"><i class="fa fa-check"></i><b>3.4.1</b> Using Skip-Gram</a></li>
<li class="chapter" data-level="3.4.2" data-path="Word-embeddings.html"><a href="Word-embeddings.html#using-glove"><i class="fa fa-check"></i><b>3.4.2</b> Using GloVe</a></li>
</ul></li>
<li class="chapter" data-level="3.5" data-path="Word-embeddings.html"><a href="Word-embeddings.html#references"><i class="fa fa-check"></i><b>3.5</b> references</a></li>
</ul></li>
<li class="chapter" data-level="4" data-path="text-classification.html"><a href="text-classification.html"><i class="fa fa-check"></i><b>4</b> Text classification</a><ul>
<li class="chapter" data-level="4.1" data-path="text-classification.html"><a href="text-classification.html#load-the-data"><i class="fa fa-check"></i><b>4.1</b> Load the data</a></li>
<li class="chapter" data-level="4.2" data-path="text-classification.html"><a href="text-classification.html#prepare-the-data-for-neural-network"><i class="fa fa-check"></i><b>4.2</b> Prepare the data for neural network</a></li>
<li class="chapter" data-level="4.3" data-path="text-classification.html"><a href="text-classification.html#building-the-model"><i class="fa fa-check"></i><b>4.3</b> Building the model</a></li>
<li class="chapter" data-level="4.4" data-path="text-classification.html"><a href="text-classification.html#testing-the-model"><i class="fa fa-check"></i><b>4.4</b> Testing the model</a></li>
<li class="chapter" data-level="4.5" data-path="text-classification.html"><a href="text-classification.html#reference"><i class="fa fa-check"></i><b>4.5</b> Reference</a></li>
</ul></li>
<li class="chapter" data-level="5" data-path="RNN.html"><a href="RNN.html"><i class="fa fa-check"></i><b>5</b> Reccurent Neural Networks (RNN)</a><ul>
<li class="chapter" data-level="5.1" data-path="RNN.html"><a href="RNN.html#understanding-recurrent-neural-network"><i class="fa fa-check"></i><b>5.1</b> Understanding Recurrent Neural Network</a></li>
<li class="chapter" data-level="5.2" data-path="RNN.html"><a href="RNN.html#rnn-with-keras"><i class="fa fa-check"></i><b>5.2</b> RNN with Keras</a></li>
<li class="chapter" data-level="5.3" data-path="RNN.html"><a href="RNN.html#lstm-with-keras"><i class="fa fa-check"></i><b>5.3</b> LSTM with Keras</a></li>
</ul></li>
<li class="chapter" data-level="6" data-path="sentiment-analysis.html"><a href="sentiment-analysis.html"><i class="fa fa-check"></i><b>6</b> Sentiment Analysis</a><ul>
<li class="chapter" data-level="6.1" data-path="sentiment-analysis.html"><a href="sentiment-analysis.html#the-sentiments-dataset"><i class="fa fa-check"></i><b>6.1</b> The “Sentiments” dataset</a></li>
<li class="chapter" data-level="6.2" data-path="sentiment-analysis.html"><a href="sentiment-analysis.html#application"><i class="fa fa-check"></i><b>6.2</b> Application</a></li>
<li class="chapter" data-level="6.3" data-path="sentiment-analysis.html"><a href="sentiment-analysis.html#references-1"><i class="fa fa-check"></i><b>6.3</b> References:</a></li>
</ul></li>
<li class="chapter" data-level="7" data-path="word-and-document-frequency-tf-idf.html"><a href="word-and-document-frequency-tf-idf.html"><i class="fa fa-check"></i><b>7</b> Word and document frequency (TF-IDF)</a><ul>
<li class="chapter" data-level="7.1" data-path="word-and-document-frequency-tf-idf.html"><a href="word-and-document-frequency-tf-idf.html#term-frequency-application"><i class="fa fa-check"></i><b>7.1</b> Term frequency application</a></li>
<li class="chapter" data-level="7.2" data-path="word-and-document-frequency-tf-idf.html"><a href="word-and-document-frequency-tf-idf.html#zipfs-law"><i class="fa fa-check"></i><b>7.2</b> Zipf’s law</a></li>
<li class="chapter" data-level="7.3" data-path="word-and-document-frequency-tf-idf.html"><a href="word-and-document-frequency-tf-idf.html#tf_idf-metric"><i class="fa fa-check"></i><b>7.3</b> TF_IDF metric</a></li>
</ul></li>
<li class="chapter" data-level="8" data-path="topic-modeling.html"><a href="topic-modeling.html"><i class="fa fa-check"></i><b>8</b> Topic modeling</a><ul>
<li class="chapter" data-level="8.1" data-path="topic-modeling.html"><a href="topic-modeling.html#latent-dirichlet-allocation"><i class="fa fa-check"></i><b>8.1</b> Latent Dirichlet allocation</a></li>
<li class="chapter" data-level="8.2" data-path="topic-modeling.html"><a href="topic-modeling.html#document-topic-probabilities"><i class="fa fa-check"></i><b>8.2</b> Document-topic probabilities</a></li>
</ul></li>
<li class="chapter" data-level="9" data-path="words-relationships-analysis.html"><a href="words-relationships-analysis.html"><i class="fa fa-check"></i><b>9</b> Words’ relationships analysis</a><ul>
<li class="chapter" data-level="9.1" data-path="words-relationships-analysis.html"><a href="words-relationships-analysis.html#extracting-bi-grams"><i class="fa fa-check"></i><b>9.1</b> Extracting bi-grams</a></li>
<li class="chapter" data-level="9.2" data-path="words-relationships-analysis.html"><a href="words-relationships-analysis.html#analyzing-bi-grams"><i class="fa fa-check"></i><b>9.2</b> Analyzing bi-grams</a></li>
<li class="chapter" data-level="9.3" data-path="words-relationships-analysis.html"><a href="words-relationships-analysis.html#visualizing-a-network-of-bigrams"><i class="fa fa-check"></i><b>9.3</b> Visualizing a network of bigrams</a></li>
</ul></li>
<li class="chapter" data-level="10" data-path="document-term-matrix.html"><a href="document-term-matrix.html"><i class="fa fa-check"></i><b>10</b> Document-term matrix</a><ul>
<li class="chapter" data-level="10.1" data-path="document-term-matrix.html"><a href="document-term-matrix.html#converting-dtm-into-dataframe"><i class="fa fa-check"></i><b>10.1</b> COnverting DTM into dataframe</a></li>
<li class="chapter" data-level="10.2" data-path="document-term-matrix.html"><a href="document-term-matrix.html#generating-document-term-matrix"><i class="fa fa-check"></i><b>10.2</b> Generating Document-term matrix</a></li>
</ul></li>
<li class="divider"></li>
<li><a href="https://github.com/rstudio/bookdown" target="blank">Published with bookdown</a></li>

</ul>

      </nav>
    </div>

    <div class="book-body">
      <div class="body-inner">
        <div class="book-header" role="navigation">
          <h1>
            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">Natural Language Processing with R</a>
          </h1>
        </div>

        <div class="page-wrapper" tabindex="-1" role="main">
          <div class="page-inner">

            <section class="normal" id="section-">
<div id="topic-modeling" class="section level1">
<h1><span class="header-section-number">Chapter 8</span> Topic modeling</h1>
<p>Topic modeling is a type of statistical modeling for discovering the abstract “topics” that occur in a collection of documents.</p>
<div id="latent-dirichlet-allocation" class="section level2">
<h2><span class="header-section-number">8.1</span> Latent Dirichlet allocation</h2>
<p>Latent Dirichlet allocation (LDA) is an example of topic model and is used to classify text in a document to a particular topic. It treats each document as a mixture of topics, and each topic as a mixture of words. LDA is a mathematical method for finding the mixture of words associated with each topic and the mixture of topics that describes each document.</p>
<p>Here is an example of applying LDA model with 2 topics as parameter:</p>
<div class="sourceCode" id="cb143"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb143-1" title="1"><span class="kw">library</span>(topicmodels)</a>
<a class="sourceLine" id="cb143-2" title="2"></a>
<a class="sourceLine" id="cb143-3" title="3"><span class="co"># load data</span></a>
<a class="sourceLine" id="cb143-4" title="4"><span class="kw">data</span>(<span class="st">&quot;AssociatedPress&quot;</span>)</a>
<a class="sourceLine" id="cb143-5" title="5">AssociatedPress</a></code></pre></div>
<pre><code>## &lt;&lt;DocumentTermMatrix (documents: 2246, terms: 10473)&gt;&gt;
## Non-/sparse entries: 302031/23220327
## Sparsity           : 99%
## Maximal term length: 18
## Weighting          : term frequency (tf)</code></pre>
<div class="sourceCode" id="cb145"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb145-1" title="1"><span class="co"># fitting LDA model with 2 topics</span></a>
<a class="sourceLine" id="cb145-2" title="2">ap_lda =<span class="st"> </span><span class="kw">LDA</span>(AssociatedPress, <span class="dt">k=</span><span class="dv">2</span>, <span class="dt">control =</span> <span class="kw">list</span>(<span class="dt">seed =</span> <span class="dv">1234</span>))</a>
<a class="sourceLine" id="cb145-3" title="3">ap_lda</a></code></pre></div>
<pre><code>## A LDA_VEM topic model with 2 topics.</code></pre>
<p>Now we can extract the per-topic-per-word probabilities from the model</p>
<div class="sourceCode" id="cb147"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb147-1" title="1"><span class="kw">library</span>(tidytext)</a>
<a class="sourceLine" id="cb147-2" title="2"></a>
<a class="sourceLine" id="cb147-3" title="3">ap_topics =<span class="st"> </span><span class="kw">tidy</span>(ap_lda, <span class="dt">matrix =</span> <span class="st">&quot;beta&quot;</span>)</a>
<a class="sourceLine" id="cb147-4" title="4">ap_topics</a></code></pre></div>
<pre><code>## # A tibble: 20,946 x 3
##    topic term           beta
##    &lt;int&gt; &lt;chr&gt;         &lt;dbl&gt;
##  1     1 aaron      1.69e-12
##  2     2 aaron      3.90e- 5
##  3     1 abandon    2.65e- 5
##  4     2 abandon    3.99e- 5
##  5     1 abandoned  1.39e- 4
##  6     2 abandoned  5.88e- 5
##  7     1 abandoning 2.45e-33
##  8     2 abandoning 2.34e- 5
##  9     1 abbott     2.13e- 6
## 10     2 abbott     2.97e- 5
## # ... with 20,936 more rows</code></pre>
<p>The resulting dataframe present the probability of each term to be generated from the different topics. For example the term “abandoned” has a probability of <span class="math inline">\(1.39 \times 10^{-4}\)</span> of beng generated from topic 1 and a probability of<span class="math inline">\(5.88 \times 10^{-5}\)</span> for being generated from topic 2.</p>
<p>Let’s find the 10 terms that are most common within each topic.</p>
<div class="sourceCode" id="cb149"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb149-1" title="1"><span class="kw">library</span>(ggplot2)</a>
<a class="sourceLine" id="cb149-2" title="2"><span class="kw">library</span>(dplyr)</a>
<a class="sourceLine" id="cb149-3" title="3"></a>
<a class="sourceLine" id="cb149-4" title="4">ap_top_terms &lt;-<span class="st"> </span>ap_topics <span class="op">%&gt;%</span></a>
<a class="sourceLine" id="cb149-5" title="5"><span class="st">  </span><span class="kw">group_by</span>(topic) <span class="op">%&gt;%</span></a>
<a class="sourceLine" id="cb149-6" title="6"><span class="st">  </span><span class="kw">top_n</span>(<span class="dv">10</span>, beta) <span class="op">%&gt;%</span></a>
<a class="sourceLine" id="cb149-7" title="7"><span class="st">  </span><span class="kw">ungroup</span>() <span class="op">%&gt;%</span></a>
<a class="sourceLine" id="cb149-8" title="8"><span class="st">  </span><span class="kw">arrange</span>(topic, <span class="op">-</span>beta)</a>
<a class="sourceLine" id="cb149-9" title="9"></a>
<a class="sourceLine" id="cb149-10" title="10">ap_top_terms <span class="op">%&gt;%</span></a>
<a class="sourceLine" id="cb149-11" title="11"><span class="st">  </span><span class="kw">mutate</span>(<span class="dt">term =</span> <span class="kw">reorder_within</span>(term, beta, topic)) <span class="op">%&gt;%</span></a>
<a class="sourceLine" id="cb149-12" title="12"><span class="st">  </span><span class="kw">ggplot</span>(<span class="kw">aes</span>(term, beta, <span class="dt">fill =</span> <span class="kw">factor</span>(topic))) <span class="op">+</span></a>
<a class="sourceLine" id="cb149-13" title="13"><span class="st">  </span><span class="kw">geom_col</span>(<span class="dt">show.legend =</span> <span class="ot">FALSE</span>) <span class="op">+</span></a>
<a class="sourceLine" id="cb149-14" title="14"><span class="st">  </span><span class="kw">facet_wrap</span>(<span class="op">~</span><span class="st"> </span>topic, <span class="dt">scales =</span> <span class="st">&quot;free&quot;</span>) <span class="op">+</span></a>
<a class="sourceLine" id="cb149-15" title="15"><span class="st">  </span><span class="kw">coord_flip</span>() <span class="op">+</span></a>
<a class="sourceLine" id="cb149-16" title="16"><span class="st">  </span><span class="kw">scale_x_reordered</span>()</a></code></pre></div>
<p><img src="NLP-book_files/figure-html/unnamed-chunk-20-1.png" width="672" /></p>
<p>We can interpret the result as a first topic related to finanial news (“precent’,”million“,”company“) and a second topic related to political news (”president“,”government“,”states").</p>
</div>
<div id="document-topic-probabilities" class="section level2">
<h2><span class="header-section-number">8.2</span> Document-topic probabilities</h2>
<p>Besides estimating each topic as a mixture of words, LDA also models each document as a mixture of topics. For examining per-document-per-topic probabilities, we use the “gamma” metric.</p>
<div class="sourceCode" id="cb150"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb150-1" title="1">ap_documents &lt;-<span class="st"> </span><span class="kw">tidy</span>(ap_lda, <span class="dt">matrix =</span> <span class="st">&quot;gamma&quot;</span>)</a>
<a class="sourceLine" id="cb150-2" title="2">ap_documents</a></code></pre></div>
<pre><code>## # A tibble: 4,492 x 3
##    document topic    gamma
##       &lt;int&gt; &lt;int&gt;    &lt;dbl&gt;
##  1        1     1 0.248   
##  2        2     1 0.362   
##  3        3     1 0.527   
##  4        4     1 0.357   
##  5        5     1 0.181   
##  6        6     1 0.000588
##  7        7     1 0.773   
##  8        8     1 0.00445 
##  9        9     1 0.967   
## 10       10     1 0.147   
## # ... with 4,482 more rows</code></pre>
<p>Each of these values is an estimated proportion of words from that document that are generated from that topic. For example, the model estimates that only about 25% of the words in document 1 were generated from topic 1.</p>

</div>
</div>
            </section>

          </div>
        </div>
      </div>
<a href="word-and-document-frequency-tf-idf.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
<a href="words-relationships-analysis.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
    </div>
  </div>
<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
<script src="libs/gitbook-2.6.7/js/lunr.js"></script>
<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
<script>
gitbook.require(["gitbook"], function(gitbook) {
gitbook.start({
"sharing": {
"github": false,
"facebook": true,
"twitter": true,
"linkedin": false,
"weibo": false,
"instapaper": false,
"vk": false,
"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
},
"fontsettings": {
"theme": "white",
"family": "sans",
"size": 2
},
"edit": {
"link": null,
"text": null
},
"history": {
"link": null,
"text": null
},
"view": {
"link": null,
"text": null
},
"download": ["NLP-book.pdf", "NLP-book.epub"],
"toc": {
"collapse": "subsection"
}
});
});
</script>

<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    var src = "true";
    if (src === "" || src === "true") src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-MML-AM_CHTML";
    if (location.protocol !== "file:")
      if (/^https?:/.test(src))
        src = src.replace(/^https?:/, '');
    script.src = src;
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>
</body>

</html>