From 11e1789d83ed099cf247eda872659a22db2b0db8 Mon Sep 17 00:00:00 2001
From: Inga Kirschnick <inga.kirschnick@uni-bielefeld.de>
Date: Mon, 19 Jun 2023 13:41:56 +0200
Subject: [PATCH] visualization testing

---
 app/static/js/CorpusAnalysis/CQiClient.js     | 328 ++++++++++--------
 .../js/CorpusAnalysis/CorpusAnalysisApp.js    | 103 ++++--
 .../js/ResourceLists/CorpusTextInfoList.js    |  34 +-
 app/templates/corpora/analysis.html.j2        |  88 +++--
 4 files changed, 330 insertions(+), 223 deletions(-)

diff --git a/app/static/js/CorpusAnalysis/CQiClient.js b/app/static/js/CorpusAnalysis/CQiClient.js
index fcc0c87d..92cc6422 100644
--- a/app/static/js/CorpusAnalysis/CQiClient.js
+++ b/app/static/js/CorpusAnalysis/CQiClient.js
@@ -101,164 +101,188 @@ class CQiCorpus {
   getCorpusData() {
     return new Promise((resolve, reject) => {
       const dummyData = {
-          "num_tokens": 2000,    // number of tokens in the corpus
-          "num_unique_words": 500,    // number of unique words in the corpus
-          "num_unique_lemmas": 200,    // number of unique lemmas in the corpus
-          "num_sentences": 90,    // number of sentences in the corpus
-          "average_sentence_length": 11,   // average number of tokens per sentence in the corpus
-          "num_ent_types": 30,    // number of entities in the corpus
-          "num_unique_ent_types":10,
-          "ent_type_freqs": {
-            "str": 10,    // number of ent_types with ent_type "str"
-            // ...
-        },
-          "texts": [
-              {
-                  "num_tokens": 11,    // number of tokens in the text
-                  "num_unique_words": 12,    // number of unique words in the text
-                  "word_freqs": {    // frequency of unique words in the text (sorted by frequency)
-                    "str": "int",    // number of tokens with word "str"
-                    // ...
+          "corpus": {
+              "bounds": [1, 689],
+              "counts": {
+                  "token": 743,
+                  "ent": 321,
+                  "s": 234
+              },
+              "freqs": {
+                  "word": {
+                      "1": 876,
+                      "2": 234,
+                      "3": 657
                   },
-                  "num_unique_lemmas": 15,    // number of unique lemmas in the text
-                  "lemma_freqs": {    // frequency of unique lemmas in the text (sorted by frequency)
-                    "str": "int",    // number of tokens with lemma "str"
-                    // ...
+                  "lemma": {
+                      "1": 543,
+                      "2": 876,
+                      "3": 321
                   },
-                  "num_sentences": 4,    // number of sentences in the text
-                  "average_sentence_length": 3,   // average number of tokens per sentence in the text
-                  "num_ent_types": 12,    // number of ent_types in the text
-                  "num_unique_ent_types": 28,    // number of unique ent_types in the text
-                  "num_entities_by_id": {
-                      "1": "int",    // number of entities with id 1
-                      // ...
-                  },            
-                  "author": "Author Name",
-                  "title": "Titel",
-                  "publishing_year": 1950
-              },
-              {
-                "num_tokens": 15,    // number of tokens in the text
-                "num_unique_words": 4,    // number of unique words in the text
-                "word_freqs": {    // frequency of unique words in the text (sorted by frequency)
-                  "str": "int",    // number of tokens with word "str"
-                  // ...
-                },
-                "num_unique_lemmas": 90,    // number of unique lemmas in the text
-                "lemma_freqs": {    // frequency of unique lemmas in the text (sorted by frequency)
-                  "str": "int",    // number of tokens with lemma "str"
-                  // ...
-                },
-                "num_sentences": 11,    // number of sentences in the text
-                "average_sentence_length": 3,   // average number of tokens per sentence in the text
-                "num_ent_types": 4,    // number of ent_types in the text
-                "num_unique_ent_types": 300,    // number of unique ent_types in the text
-                "num_entities_by_id": {
-                    "1": "int",    // number of entities with id 1
-                    // ...
-                },            
-                "author": "Author Name",
-                "title": "Titel 1",
-                  "publishing_year": 1962
+                  "pos": {
+                      "1": 456,
+                      "2": 789,
+                      "3": 234
+                  },
+                  "simple_pos": {
+                      "1": 987,
+                      "2": 876,
+                      "3": 543
+                  },
+                  "ent": {
+                      "1": 654,
+                      "2": 321,
+                      "3": 987
+                  }
+              }
+          },
+          "text": {
+              "1": {
+                  "bounds": [0, 435],
+                  "counts": {
+                      "token": 345,
+                      "ent_type": 123,
+                      "s": 89
+                  },
+                  "freqs": {
+                      "word": {
+                          "1": 25,
+                          "2": 90,
+                          "3": 200
+                      },
+                      "lemma": {
+                          "1": 654,
+                          "2": 321,
+                          "3": 987
+                      },
+                      "pos": {
+                          "1": 543,
+                          "2": 876,
+                          "3": 234
+                      },
+                      "simple_pos": {
+                          "1": 987,
+                          "2": 654,
+                          "3": 321
+                      },
+                      "ent_type": {
+                          "1": 234,
+                          "2": 789,
+                          "3": 543
+                      }
+                  },
+                  "values": {
+                      "author": 1,
+                      "publishing_year":1950,
+                      "title": 1
+                  }
               },
-              {
-                "num_tokens": 11,    // number of tokens in the text
-                "num_unique_words": 12,    // number of unique words in the text
-                "word_freqs": {    // frequency of unique words in the text (sorted by frequency)
-                  "str": "int",    // number of tokens with word "str"
-                  // ...
-                },
-                "num_unique_lemmas": 64,    // number of unique lemmas in the text
-                "lemma_freqs": {    // frequency of unique lemmas in the text (sorted by frequency)
-                  "str": "int",    // number of tokens with lemma "str"
-                  // ...
-                },
-                "num_sentences": 52,    // number of sentences in the text
-                "average_sentence_length": 3,   // average number of tokens per sentence in the text
-                "num_ent_types": 45,    // number of ent_types in the text
-                "num_unique_ent_types": 68,    // number of unique ent_types in the text
-                "num_entities_by_id": {
-                    "1": "int",    // number of entities with id 1
-                    // ...
-                },            
-                "author": "Author Name",
-                "title": "Titel 2",
-                "publishing_year": 1850
+              "2": {
+                  "bounds": [435, 689],
+                  "counts": {
+                      "token": 389,
+                      "ent_type": 198,
+                      "s": 145
+                  },
+                  "freqs": {
+                      "word": {
+                          "1": 60,
+                          "2": 70,
+                          "3": 100
+                      },
+                      "lemma": {
+                          "1": 654,
+                          "2": 321,
+                          "3": 987
+                      },
+                      "pos": {
+                          "1": 543,
+                          "2": 876,
+                          "3": 234
+                      },
+                      "simple_pos": {
+                          "1": 987,
+                          "2": 654,
+                          "3": 321
+                      },
+                      "ent_type": {
+                          "1": 234,
+                          "2": 789,
+                          "3": 543
+                      }
+                  },
+                  "values": {
+                      "author": 2,
+                      "publishing_year":1951,
+                      "title": 2
+                  }
+              }
+          },
+          "s": {
+              "1": {
+                  "bounds": [345, 678]
+              }
+          },
+          "ent": {
+              "1": {
+                  "bounds": [567, 890],
+                  "values": {
+                      "type": 789
+                  }
+              }
+          },
+          "token": {
+              "310": {
+                  "values": {
+                      "word": 1,
+                      "lemma": 2,
+                      "pos": 1,
+                      "simple_pos": 1
+                  }
+              }
+          },
+          "value_lookups": {
+              "text": {
+                  "author": {
+                      "1": "John Doe",
+                      "2": "Jane Smith"
+                  },
+                  "title": {
+                      "1": "Test Title 1",
+                      "2": "Test Title 2"
+                  }
               },
-              {
-                "num_tokens": 56,    // number of tokens in the text
-                "num_unique_words": 13,    // number of unique words in the text
-                "word_freqs": {    // frequency of unique words in the text (sorted by frequency)
-                  "str": "int",    // number of tokens with word "str"
-                  // ...
-                },
-                "num_unique_lemmas": 43,    // number of unique lemmas in the text
-                "lemma_freqs": {    // frequency of unique lemmas in the text (sorted by frequency)
-                  "str": "int",    // number of tokens with lemma "str"
-                  // ...
-                },
-                "num_sentences": 45,    // number of sentences in the text
-                "average_sentence_length": 56,   // average number of tokens per sentence in the text
-                "num_ent_types": 8792,    // number of ent_types in the text
-                "num_unique_ent_types": 56758,    // number of unique ent_types in the text
-                "num_entities_by_id": {
-                    "1": "int",    // number of entities with id 1
-                    // ...
-                },            
-                "author": "Author Name",
-                "title": "Titel 3",
-                "publishing_year": 1504
+              "ent": {
+                  "type": {
+                      "1": "Person",
+                      "2": "Organization"
+                  }
               },
-              {
-                "num_tokens": 54345,    // number of tokens in the text
-                "num_unique_words": 561,    // number of unique words in the text
-                "word_freqs": {    // frequency of unique words in the text (sorted by frequency)
-                  "str": "int",    // number of tokens with word "str"
-                  // ...
-                },
-                "num_unique_lemmas": 546,    // number of unique lemmas in the text
-                "lemma_freqs": {    // frequency of unique lemmas in the text (sorted by frequency)
-                  "str": "int",    // number of tokens with lemma "str"
-                  // ...
-                },
-                "num_sentences": 5427,    // number of sentences in the text
-                "average_sentence_length": 657,   // average number of tokens per sentence in the text
-                "num_ent_types": 3465,    // number of ent_types in the text
-                "num_unique_ent_types": 45,    // number of unique ent_types in the text
-                "num_entities_by_id": {
-                    "1": "int",    // number of entities with id 1
-                    // ...
-                },            
-                "author": "Author Name",
-                "title": "Titel 4",
-                "publishing_year": 1712
-              },                            
-              {
-                "num_tokens": 4354,    // number of tokens in the text
-                "num_unique_words": 45234,    // number of unique words in the text
-                "word_freqs": {    // frequency of unique words in the text (sorted by frequency)
-                  "testwort": 50,    // number of tokens with word "str"
-                  "testwort2": 1
-                },
-                "num_unique_lemmas": 15,    // number of unique lemmas in the text
-                "lemma_freqs": {    // frequency of unique lemmas in the text (sorted by frequency)
-                  "testlemma": 11,    // number of tokens with lemma "str"
-                  "testlemma2": 1
-                },
-                "num_sentences": 90,    // number of sentences in the text
-                "average_sentence_length": 7,   // average number of tokens per sentence in the text
-                "num_ent_types": 19,
-                "num_unique_ent_types": 5,    // number of unique ent_types in the text
-                "num_entities_by_id": {
-                    "1": "int",    // number of entities with id 1
-                    // ...
-                },            
-                "author": "Author Name 2",
-                "title": "Titel 5",
-                "publishing_year": 1951
+              "token": {
+                  "word": {
+                      "1": "apple",
+                      "2": "banana",
+                      "3": "orange"
+                  },
+                  "lemma": {
+                      "1": "run",
+                      "2": "walk",
+                      "3": "jump"
+                  },
+                  "pos": {
+                      "1": "noun",
+                      "2": "verb",
+                      "3": "adjective"
+                  },
+                  "simple_pos": {
+                      "1": "subject",
+                      "2": "object",
+                      "3": "predicate"
+                  }
               }
-          ]
-      };
+          }
+      }
+      
 
       resolve(dummyData);
       /*
diff --git a/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js b/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js
index cb012730..fbb91b4c 100644
--- a/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js
+++ b/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js
@@ -39,6 +39,8 @@ class CorpusAnalysisApp {
               this.renderGeneralCorpusInfo(corpusData);
               this.renderTextInfoList(corpusData);
               this.renderTextProportionsGraphic(corpusData);
+              this.renderWordFrequenciesGraphic(corpusData);
+              this.renderWordDistributionsGraphic(corpusData);
             });
           // TODO: Don't do this hgere
           cQiCorpus.updateDb();
@@ -103,38 +105,85 @@ class CorpusAnalysisApp {
   }
 
   renderGeneralCorpusInfo(corpusData) {
-    let corpusGeneralInfoListElement = document.querySelector('.corpus-general-info-list');
-    corpusGeneralInfoListElement.querySelector('.corpus-num-tokens').innerHTML = `<b>Number of tokens:</b> ${this.data.corpus.o.size}`;
-    corpusGeneralInfoListElement.querySelector('.corpus-text-count').innerHTML = `<b>Corpus text count:</b> ${corpusData.texts.length}`;
-    corpusGeneralInfoListElement.querySelector('.corpus-num-unique-words').innerHTML = `<b>Corpus unique word count:</b> ${corpusData.num_unique_words}`;
-    corpusGeneralInfoListElement.querySelector('.corpus-num-unique-lemmas').innerHTML = `<b>Corpus unique lemma count:</b> ${corpusData.num_unique_lemmas}`;
-    // corpusGeneralInfoListElement.querySelector('.corpus-most-frequent-words').innerHTML = `<b>Corpus most frequent words:</b> ${corpusData.most_frequent_words.join(', ');
-    corpusGeneralInfoListElement.querySelector('.corpus-num-sentences').innerHTML = `<b>Corpus sentence count:</b> ${corpusData.num_sentences}`;
-    corpusGeneralInfoListElement.querySelector('.corpus-average-sentence-length').innerHTML = `<b>Corpus average sentence length:</b> ${corpusData.average_sentence_length}`;
-    corpusGeneralInfoListElement.querySelector('.corpus-num-ent-types').innerHTML = `<b>Corpus entity count:</b> ${corpusData.num_ent_types}`;
-    corpusGeneralInfoListElement.querySelector('.corpus-num-unique-ent-types').innerHTML = `<b>Corpus unique entity count:</b> ${corpusData.num_unique_ent_types}`;
+    document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.counts.token;
+    document.querySelector('.corpus-num-s').innerHTML = corpusData.corpus.counts.s;
+    // corpusGeneralInfoListElement.querySelector('.corpus-text-count').innerHTML = <b>Corpus text count:</b> ${Object.entries(corpusData.text).length;
+    document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.freqs.word).length;
+    document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.freqs.lemma).length;
+    document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.freqs.pos).length;
+    document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.freqs.simple_pos).length;
   }
 
   renderTextInfoList(corpusData) {
-    let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
-    let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement);
-    corpusTextInfoList.add(corpusData.texts);
-    
+    // let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
+    // let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement);
+    // for (let text of Object.values(corpusData.text)) {
+    //   text.values.title = corpusData.value_lookups.text.title[text.values.title];
+    // }
+    // corpusTextInfoList.add(Object.values(corpusData.text));
+
+    // let textCountChipElement = document.querySelector('.text-count-chip');
+    // textCountChipElement.innerHTML = `Text count: ${Object.values(corpusData.text).length}`;
   }
 
   renderTextProportionsGraphic(corpusData) {
-    let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
-    let graphData = [
-      {
-        values: corpusData.texts.map(text => text.num_tokens),
-        labels: corpusData.texts.map(text => `${text.title} (${text.publishing_year})`),
-        type: 'pie'
-      }
-    ];
-    let graphLayout = {
-      height: 400,
-      width: 500
-    };
-    Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout);
+    // let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
+    // let texts = Object.values(corpusData.text);
+    // let graphData = [
+    //   {
+    //     values: texts.map(text => text.counts.token),
+    //     labels: texts.map(text => `${text.values.title} (${text.values.publishing_year})`),
+    //     type: 'pie'
+    //   }
+    // ];
+    // let graphLayout = {
+    //   height: 400,
+    //   width: 500
+    // };
+    // Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout);
+  }
+
+  renderWordFrequenciesGraphic(corpusData) {
+    // let wordFrequenciesGraphicElement = document.querySelector('#word-frequencies-graphic');
+    // let words = Object.entries(corpusData.value_lookups.token.word);
+    // let texts = Object.values(corpusData.text);
+    // let graphData = [];
+    // for (let word of words) {
+    //   let data = {
+    //     x: texts.map(text => `${text.values.title} (${text.values.publishing_year})`),
+    //     y: texts.map(text => text.freqs.word[word[0]]),
+    //     name: word[1],
+    //     type: 'bar'
+    //   };
+    //   graphData.push(data);
+    // }
+
+    // let graphLayout = {
+    //   height: 400,
+    //   width: 500,
+    //   barmode: 'stack',
+    //   type: 'bar'
+    // };
+    // Plotly.newPlot(wordFrequenciesGraphicElement, graphData, graphLayout);
+  }
+
+  renderWordDistributionsGraphic(corpusData) {
+    // let wordDistributionGraphicElement = document.querySelector('#word-distributions-graphic');
+    // var trace1 = {
+    //   x: [1, 2, 3, 4],
+    //   y: [10, 11, 12, 13],
+    //   mode: 'markers',
+    //   marker: {
+    //     size: [40, 60, 80, 100]
+    //   }
+    // };
+    // var data = [trace1];
+    // var layout = {
+    //   title: 'Marker Size',
+    //   showlegend: false,
+    //   height: 600,
+    //   width: 600
+    // };
+    // Plotly.newPlot(wordDistributionGraphicElement, data, layout);
   }
 }
diff --git a/app/static/js/ResourceLists/CorpusTextInfoList.js b/app/static/js/ResourceLists/CorpusTextInfoList.js
index 3e697d2d..6e8e8310 100644
--- a/app/static/js/ResourceLists/CorpusTextInfoList.js
+++ b/app/static/js/ResourceLists/CorpusTextInfoList.js
@@ -29,11 +29,11 @@ class CorpusTextInfoList extends ResourceList {
         <tr class="list-item clickable hoverable">
           <td><span class="title"></span> (<span class="publishing_year"></span>)</td>
           <td><span class="num_tokens"></span></td>
+          <td><span class="num_sentences"></span></td>
           <td><span class="num_unique_words"></span></td>
           <td><span class="num_unique_lemmas"></span></td>
-          <td><span class="num_sentences"></span></td>
-          <td><span class="average_sentence_length"></span></td>
-          <td><span class="num_unique_ent_types"></span></td>
+          <td><span class="num_unique_pos"></span></td>
+          <td><span class="num_unique_simple_pos"></span></td>
         </tr>
       `.trim();
     }
@@ -44,11 +44,11 @@ class CorpusTextInfoList extends ResourceList {
       'title',
       'publishing_year',
       'num_tokens',
+      'num_sentences',
       'num_unique_words',
       'num_unique_lemmas',
-      'num_sentences',
-      'average_sentence_length',
-      'num_unique_ent_types'
+      'num_unique_pos',
+      'num_unique_simple_pos'
     ];
   }
 
@@ -68,11 +68,11 @@ class CorpusTextInfoList extends ResourceList {
           <tr>
             <th>Text<span class="sort right material-icons" data-sort="title" style="cursor:pointer; color:#aa9cc9">arrow_drop_down</span></th>
             <th>Number of tokens<span class="sort right material-icons" data-sort="num_tokens" style="cursor:pointer">arrow_drop_down</span></th>
+            <th>Number of sentences<span class="sort right material-icons" data-sort="num_sentences" style="cursor:pointer">arrow_drop_down</span></th>
             <th>Number of unique words<span class="sort right material-icons" data-sort="num_unique_words" style="cursor:pointer">arrow_drop_down</span></th>
             <th>Number of unique lemmas<span class="sort right material-icons" data-sort="num_unique_lemmas" style="cursor:pointer">arrow_drop_down</span></th>
-            <th>Number of sentences<span class="sort right material-icons" data-sort="num_sentences" style="cursor:pointer">arrow_drop_down</span></th>
-            <th>Average sentence length<span class="sort right material-icons" data-sort="average_sentence_length" style="cursor:pointer">arrow_drop_down</span></th>
-            <th>Number of unique entity types<span class="sort right material-icons" data-sort="num_unique_ent_types" style="cursor:pointer">arrow_drop_down</span></th>
+            <th>Number of unique pos<span class="sort right material-icons" data-sort="num_unique_pos" style="cursor:pointer">arrow_drop_down</span></th>
+            <th>Number of unique simple pos<span class="sort right material-icons" data-sort="num_unique_simple_pos" style="cursor:pointer">arrow_drop_down</span></th>
           </tr>
         </thead>
         <tbody class="list"></tbody>
@@ -83,14 +83,14 @@ class CorpusTextInfoList extends ResourceList {
 
   mapResourceToValue(corpusTextData) {
     return {
-      title: corpusTextData.title,
-      publishing_year: corpusTextData.publishing_year,
-      num_tokens: corpusTextData.num_tokens,
-      num_unique_words: corpusTextData.num_unique_words,
-      num_unique_lemmas: corpusTextData.num_unique_lemmas,
-      num_sentences: corpusTextData.num_sentences,
-      average_sentence_length: corpusTextData.average_sentence_length,
-      num_unique_ent_types: corpusTextData.num_unique_ent_types
+      title: corpusTextData.values.title,
+      publishing_year: corpusTextData.values.publishing_year,
+      num_tokens: corpusTextData.counts.token,
+      num_sentences: corpusTextData.counts.s,
+      num_unique_words: Object.entries(corpusTextData.freqs.word).length,
+      num_unique_lemmas: Object.entries(corpusTextData.freqs.lemma).length,
+      num_unique_pos: Object.entries(corpusTextData.freqs.pos).length,
+      num_unique_simple_pos: Object.entries(corpusTextData.freqs.simple_pos).length
     };
   }
 
diff --git a/app/templates/corpora/analysis.html.j2 b/app/templates/corpora/analysis.html.j2
index d10126cd..1452d2d0 100644
--- a/app/templates/corpora/analysis.html.j2
+++ b/app/templates/corpora/analysis.html.j2
@@ -35,44 +35,70 @@
       <div class="col s12">
         <h4><i class="material-icons left">query_stats</i>Visualizations</h4>
       </div>
-      <div class="col s4" >
-        <div class="card hoverable">
-          <div class="card-content">
-            <span class="card-title">General information about the Corpus</span>
-            <p></p>
-            <br>
-            <ul class="corpus-general-info-list">
-              <li class="corpus-num-tokens"></li>
-              <br>
-              <li class="corpus-text-count"></li>
-              <br>
-              <li class="corpus-num-unique-words"></li>
-              <br>
-              <li class="corpus-num-unique-lemmas"></li>
-              <br>
-              <li class="corpus-num-sentences"></li>
-              <br>
-              <li class="corpus-average-sentence-length"></li>
-              <br>
-              <li class="corpus-num-ent-types"></li>
-              <br>
-              <li class="corpus-num-unique-ent-types"></li>
-              <br>
-            </ul>
+    </div>
+    <div class="row">
+      <div class="col s2">
+        <div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
+          <div class="card-content" style="padding:10px !important; text-align:center;">
+            <p>Number of tokens</p>
+            <span class="card-title corpus-num-tokens"></span>
+          </div>
+        </div>
+      </div>
+      <div class="col s2">
+        <div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
+          <div class="card-content" style="padding:10px !important; text-align:center">
+            <p>Number of sentences</p>
+            <span class="card-title corpus-num-s"></span>
           </div>
         </div>
       </div>
-      <div class="col s8">
+      <div class="col s2">
+        <div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
+          <div class="card-content" style="padding:10px !important; text-align:center">
+            <p>Number of unique words</p>
+            <span class="card-title corpus-num-unique-words"></span>
+          </div>
+        </div>
+      </div>
+      <div class="col s2">
+        <div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
+          <div class="card-content" style="padding:10px !important; text-align:center">
+            <p>Number of unique lemmas</p>
+            <span class="card-title corpus-num-unique-lemmas"></span>
+          </div>
+        </div>
+      </div>
+      <div class="col s2">
+        <div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
+          <div class="card-content" style="padding:10px !important; text-align:center">
+            <p>Number of unique pos</p>
+            <span class="card-title corpus-num-unique-pos"></span>
+          </div>
+        </div>
+      </div>
+      <div class="col s2">
+        <div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
+          <div class="card-content" style="padding:10px !important; text-align:center">
+            <p>Number of unique simple_pos</p>
+            <span class="card-title corpus-num-unique-simple-pos"></span>
+          </div>
+        </div>
+      </div>
+    </div>
+    <div class="row">
+      <div class="col s12">
         <div class="card hoverable">
           <div class="card-content">
             <span class="card-title">Text information</span>
+            <div class="chip text-count-chip" style="background-color:#6b3f89; color:white""></div>
             <div class="corpus-text-info-list no-autoinit"></div>
           </div>
         </div>
       </div>
     </div>
     <div class="row">
-      <div class="col s6">
+      <div class="col s3">
         <div class="card hoverable">
           <div class="card-content">
             <span class="card-title">Text proportions within the corpus</span>
@@ -80,7 +106,7 @@
           </div>
         </div>
       </div>
-      <div class="col s6">
+      <div class="col s3">
         <div class="card hoverable">
           <div class="card-content">
             <span class="card-title">Word frequencies</span>
@@ -88,6 +114,14 @@
           </div>
         </div>
       </div>
+      <div class="col s6">
+        <div class="card hoverable">
+          <div class="card-content">
+            <span class="card-title">Word distributions</span>
+            <div id="word-distributions-graphic"></div>
+          </div>
+        </div>
+      </div>
     </div>
   </div>
 
-- 
GitLab