diff --git a/app/static/js/CorpusAnalysis/CorpusAnalysisStaticVisualization.js b/app/static/js/CorpusAnalysis/CorpusAnalysisStaticVisualization.js index 09686387b186ad286a99c4b937ae7acf19017bc1..01c8f08f64f0bad1511e81a505f265afcd7b6fa1 100644 --- a/app/static/js/CorpusAnalysis/CorpusAnalysisStaticVisualization.js +++ b/app/static/js/CorpusAnalysis/CorpusAnalysisStaticVisualization.js @@ -7,20 +7,21 @@ class CorpusAnalysisStaticVisualization { stopwords: undefined, originalStopwords: {}, stopwordCache: {}, - promises: {getStopwords: undefined} + promises: {getStopwords: undefined}, + tokenSet: new Set() }; this.app.registerExtension(this); } - async init() { + init() { // Init data this.data.corpus = this.app.data.corpus; this.renderGeneralCorpusInfo(); this.renderTextInfoList(); this.renderTextProportionsGraphic(); this.renderTokenList(); - this.renderFrequenciesGraphic(); + // this.renderFrequenciesGraphic(); // Add event listeners let frequenciesStopwordSettingModal = document.querySelector('#frequencies-stopwords-setting-modal'); @@ -46,7 +47,7 @@ class CorpusAnalysisStaticVisualization { let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown"); frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => { frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML; - this.renderFrequenciesGraphic(); + this.renderTokenList(); }); let frequenciesGraphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button'); @@ -56,7 +57,7 @@ class CorpusAnalysisStaticVisualization { btn.classList.remove('disabled'); }); event.target.closest('.frequencies-graph-mode-button').classList.add('disabled'); - this.renderFrequenciesGraphic(); + this.renderFrequenciesGraphic(this.data.tokenSet); }); }); @@ -64,7 +65,8 @@ class CorpusAnalysisStaticVisualization { actionButton.addEventListener('click', (event) => { let action = event.target.closest('.frequencies-stopword-setting-modal-action-buttons').dataset.action; if (action === 'submit') { - this.renderFrequenciesGraphic(); + console.log('Stopwords changed'); + this.renderTokenList(); } else if (action === 'cancel') { this.data.stopwords = structuredClone(this.data.stopwordCache); } @@ -208,34 +210,60 @@ class CorpusAnalysisStaticVisualization { } async renderTokenList() { - let corpusData = this.data.corpus.o.staticData; let corpusTokenListElement = document.querySelector('.corpus-token-list'); let corpusTokenList = new CorpusTokenList(corpusTokenListElement); + let filteredData = this.filterData(); let stopwords = this.data.stopwords; if (this.data.stopwords === undefined) { stopwords = await this.getStopwords(); } stopwords = Object.values(stopwords).flat(); - let mostFrequent = Object.entries(corpusData.corpus.freqs.word) - .sort((a, b) => b[1] - a[1]) - .filter(item => !stopwords.includes(corpusData.values.p_attrs.word[item[0]].toLowerCase())) + let mostFrequent = Object.entries(filteredData) + .sort((a, b) => b[1].count - a[1].count) + .filter(item => !stopwords.includes(item[0].toLowerCase())) .slice(0, 4) - .map(item => parseInt(item[0])); + .map(item => item[0]) + let tokenData = []; - for (let i = 0; i < Object.values(corpusData.corpus.freqs.word).length; i++) { + Object.entries(filteredData).forEach(item => { let resource = { - term: corpusData.values.p_attrs.word[i].toLowerCase(), - count: corpusData.corpus.freqs.word[i], - mostFrequent: mostFrequent.includes(i) + term: item[0], + count: item[1].count, + mostFrequent: mostFrequent.includes(item[0]) }; if (!Object.values(stopwords).includes(resource.term)) { tokenData.push(resource); } - } + }); corpusTokenList.add(tokenData); } - async renderFrequenciesGraphic() { + filterData() { + let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]'); + let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase(); + let corpusData = this.data.corpus.o.staticData; + let filteredData = {}; + + for (let i = 0; i < Object.values(corpusData.corpus.freqs[tokenCategory]).length; i++) { + let term = corpusData.values.p_attrs[tokenCategory][i].toLowerCase(); + let count = corpusData.corpus.freqs[tokenCategory][i]; + + if (filteredData[term]) { + filteredData[term].count += count; + filteredData[term].originalIds.push(i); + } else { + filteredData[term] = { + count: count, + originalIds: [i] + }; + } + } + return filteredData; + } + + + renderFrequenciesGraphic(tokenSet) { + this.data.tokenSet = tokenSet; let corpusData = this.data.corpus.o.staticData; let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]'); let frequenciesGraphicElement = document.querySelector('#frequencies-graphic'); @@ -243,12 +271,16 @@ class CorpusAnalysisStaticVisualization { let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType; let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase(); - let graphData = await this.createFrequenciesGraphData(tokenCategory, texts, graphtype); + let graphData = this.createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet); let graphLayout = { barmode: graphtype === 'bar' ? 'stack' : '', yaxis: { showticklabels: graphtype === 'markers' ? false : true }, + height: 627, + margin: { + l: 17 + } }; let config = { responsive: true, @@ -258,31 +290,28 @@ class CorpusAnalysisStaticVisualization { Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config); } - async createFrequenciesGraphData(tokenCategory, texts, graphtype) { + createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet) { let corpusData = this.data.corpus.o.staticData; - let stopwords = this.data.stopwords; - if (this.data.stopwords === undefined) { - stopwords = await this.getStopwords(); - } - let stopwordList = Object.values(stopwords).flat(); let graphData = []; - let filteredData = Object.entries(corpusData.corpus.freqs[tokenCategory]) - .sort((a, b) => b[1] - a[1]) - .filter(item => !stopwordList.includes(corpusData.values.p_attrs[tokenCategory][item[0]].toLowerCase())) - .slice(0, 5); - + let filteredData = this.filterData(); switch (graphtype) { case 'markers': - for (let item of filteredData) { - let size = texts.map(text => text[1].freqs[tokenCategory][item[0]] || 0); + for (let item of tokenSet) { + let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`); + let tokenCountPerText = []; + for (let originalId of filteredData[item].originalIds) { + for (let i = 0; i < texts.length; i++) { + tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0); + } + } let data = { - x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), - y: texts.map(text => corpusData.values.p_attrs[tokenCategory][item[0]]), - name: corpusData.values.p_attrs[tokenCategory][item[0]], - text: texts.map(text => `${corpusData.values.p_attrs[tokenCategory][item[0]]}<br>${text[1].freqs[tokenCategory][item[0]] || 0}`), + x: textTitles, + y: texts.map(text => item), + name: item, + text: texts.map(text => `${item}<br>${tokenCountPerText || 0}`), mode: 'markers', marker: { - size: size, + size: tokenCountPerText, sizeref: 0.4 } }; @@ -290,11 +319,18 @@ class CorpusAnalysisStaticVisualization { } break; default: - for (let item of filteredData) { + for (let item of tokenSet) { + let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`); + let tokenCountPerText = []; + for (let originalId of filteredData[item].originalIds) { + for (let i = 0; i < texts.length; i++) { + tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0); + } + } let data = { - x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), - y: texts.map(text => text[1].freqs[tokenCategory][item[0]] || 0), - name: corpusData.values.p_attrs[tokenCategory][item[0]], + x: textTitles, + y: tokenCountPerText, + name: item, type: graphtype }; graphData.push(data); diff --git a/app/static/js/CorpusAnalysis/QueryBuilder.js b/app/static/js/CorpusAnalysis/QueryBuilder.js index ee52fb4577eac67e2969c33151c6721f29437ee6..f3a2bf6039f0dff5773902535d8084f75068d5a7 100644 --- a/app/static/js/CorpusAnalysis/QueryBuilder.js +++ b/app/static/js/CorpusAnalysis/QueryBuilder.js @@ -296,7 +296,7 @@ class ConcordanceQueryBuilder { this.elements.entity.innerHTML = 'Entity'; } this.elements.counter -= 1; - if (this.elements.counter <= 0) { + if (this.elements.counter === 0) { this.elements.queryContainer.classList.add('hide'); } this.queryPreviewBuilder(); diff --git a/app/static/js/ResourceLists/CorpusTokenList.js b/app/static/js/ResourceLists/CorpusTokenList.js index 1b99203878686b06fa63db1a88722ad486e4cfb4..48129217e37233dd9a96cc224ec50eab44480395 100644 --- a/app/static/js/ResourceLists/CorpusTokenList.js +++ b/app/static/js/ResourceLists/CorpusTokenList.js @@ -6,7 +6,7 @@ class CorpusTokenList extends ResourceList { } static defaultOptions = { - page: 100 + page: 7 }; constructor(listContainerElement, options = {}) { @@ -16,8 +16,35 @@ class CorpusTokenList extends ResourceList { ); super(listContainerElement, _options); this.listjs.list.addEventListener('click', (event) => {this.onClick(event)}); - this.selectedItemIds = new Set(); - + this.selectedItemTerms = new Set(); + this.listjs.on('sortComplete', () => { + let listItems = Array.from(this.listjs.items).filter(item => item.elm); + for (let item of listItems) { + let termElement = item.elm.querySelector('.term'); + let mostFrequent = item.elm.dataset.mostfrequent === 'true'; + if (mostFrequent) { + this.selectedItemTerms.add(termElement.textContent); + } + } + corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms); + }); + + let tokenListResetButtonElement = this.listContainerElement.querySelector('#token-list-reset-button'); + tokenListResetButtonElement.addEventListener('click', () => { + this.selectedItemTerms.clear(); + let listItems = Array.from(this.listjs.items).filter(item => item.elm); + for (let item of listItems) { + let termElement = item.elm.querySelector('.term'); + let mostFrequent = item.elm.dataset.mostfrequent === 'true'; + if (mostFrequent) { + item.elm.querySelector('.select-checkbox').checked = true; + this.selectedItemTerms.add(termElement.textContent); + } else { + item.elm.querySelector('.select-checkbox').checked = false; + } + } + corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms); + }); } get item() { @@ -42,7 +69,7 @@ class CorpusTokenList extends ResourceList { return [ 'term', 'count', - 'mostFrequent', + {data: ['mostFrequent']}, 'frequency' ]; } @@ -58,24 +85,21 @@ class CorpusTokenList extends ResourceList { <input id="${listSearchElementId}" class="search" type="text"></input> <label for="${listSearchElementId}">Search token</label> </div> - <div class="scrollable-list-container-wrapper" style="height:276px; overflow:scroll;"> - <div class="scrollable-list-container"> - <table> - <thead> - <tr> - <th></th> - <th>Term</th> - <th>Count</th> - <th>Frequency</th> - </tr> - </thead> - <tbody class="list"></tbody> - </table> - </div> - </div> + <table> + <thead> + <tr> + <th style="width:15%;"> + <span class="material-icons" style="cursor:pointer" id="token-list-reset-button">refresh</span> + </th> + <th>Term</th> + <th>Count</th> + <th>Frequency</th> + </tr> + </thead> + <tbody class="list"></tbody> + </table> <ul class="pagination"></ul> `.trim(); - this.listContainerElement.style.padding = '30px'; } mapResourceToValue(corpusTokenData) { @@ -92,19 +116,20 @@ class CorpusTokenList extends ResourceList { } onClick(event) { - let listItemElement = event.target.closest('.list-item[data-id]'); + if (event.target.closest('.disable-on-click') !== null) {return;} + let listItemElement = event.target.closest('.list-item'); if (listItemElement === null) {return;} - let itemId = listItemElement.dataset.id; + let item = listItemElement.querySelector('.term').textContent; let listActionElement = event.target.closest('.list-action-trigger[data-list-action]'); let listAction = listActionElement === null ? '' : listActionElement.dataset.listAction; switch (listAction) { case 'select': { if (event.target.checked) { - this.selectedItemIds.add(itemId); + this.selectedItemTerms.add(item); } else { - this.selectedItemIds.delete(itemId); + this.selectedItemTerms.delete(item); } - this.renderingItemSelection(); + corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms); break; } default: { @@ -113,9 +138,4 @@ class CorpusTokenList extends ResourceList { } } - renderingItemSelection() { - - - } - } diff --git a/app/templates/corpora/_analysis/static_visualization.html.j2 b/app/templates/corpora/_analysis/static_visualization.html.j2 index 70a0b605234e8ca476d474cdc82855b373bfd794..8ff561788578b8d230c54db77e888da39ebf0a74 100644 --- a/app/templates/corpora/_analysis/static_visualization.html.j2 +++ b/app/templates/corpora/_analysis/static_visualization.html.j2 @@ -90,9 +90,8 @@ <div class="card-content"> <span class="card-title">Frequencies</span> <div class="row"> - {# <div class="col s1"></div> #} - <div class="col s5"> - <div class="corpus-token-list no-autoinit"></div> + <div class="col s4"> + <div class="corpus-token-list no-autoinit" style="transform: scale(0.91);"></div> <a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a> <a class="btn-flat modal-trigger no-autoinit" id="frequencies-stopwords-setting-modal-button" href="#frequencies-stopwords-setting-modal"> <i class="material-icons grey-text text-darken-2">settings</i> @@ -104,12 +103,13 @@ <li><a data-token-category="simple_pos">Simple_pos</a></li> </ul> </div> - {# <div class="col s1"></div> #} - <div class="col s7"> - <div id="frequencies-graphic"></div> - <a class="btn disabled frequencies-graph-mode-button" data-graph-type="bar"><i class="material-icons">stacked_bar_chart</i></a> - <a class="btn frequencies-graph-mode-button" data-graph-type="scatter"><i class="material-icons">show_chart</i></a> - <a class="btn frequencies-graph-mode-button" data-graph-type="markers"><i class="material-icons">bubble_chart</i></a> + <div class="col s8"> + <div id="frequencies-graphic"></div> + <div> + <a class="btn disabled frequencies-graph-mode-button" data-graph-type="bar"><i class="material-icons">stacked_bar_chart</i></a> + <a class="btn frequencies-graph-mode-button" data-graph-type="scatter"><i class="material-icons">show_chart</i></a> + <a class="btn frequencies-graph-mode-button" data-graph-type="markers"><i class="material-icons">bubble_chart</i></a> + </div> </div> </div> </div> @@ -130,8 +130,7 @@ like "the" or "and," that carry little meaning and are often removed in text analysis to improve efficiency and accuracy.</p> <div id="user-stopword-list-container"></div> - <div class="chips col s8 no-autoinit input-field" id="stopword-input-field"> - </div> + <div class="chips col s8 no-autoinit input-field" id="stopword-input-field"></div> </div> <div class="row"> <p>Below you can find a list of all stopwords that are always filtered out. @@ -159,3 +158,4 @@ const corpusAnalysisStaticVisualization = new CorpusAnalysisStaticVisualization(corpusAnalysisApp); </script> {% endset %} + \ No newline at end of file