Skip to content
Snippets Groups Projects
Commit d08f95e9 authored by Inga Kirschnick's avatar Inga Kirschnick
Browse files

dynamic token visualization

parent e4f435c5
No related branches found
No related tags found
No related merge requests found
......@@ -7,20 +7,21 @@ class CorpusAnalysisStaticVisualization {
stopwords: undefined,
originalStopwords: {},
stopwordCache: {},
promises: {getStopwords: undefined}
promises: {getStopwords: undefined},
tokenSet: new Set()
};
this.app.registerExtension(this);
}
async init() {
init() {
// Init data
this.data.corpus = this.app.data.corpus;
this.renderGeneralCorpusInfo();
this.renderTextInfoList();
this.renderTextProportionsGraphic();
this.renderTokenList();
this.renderFrequenciesGraphic();
// this.renderFrequenciesGraphic();
// Add event listeners
let frequenciesStopwordSettingModal = document.querySelector('#frequencies-stopwords-setting-modal');
......@@ -46,7 +47,7 @@ class CorpusAnalysisStaticVisualization {
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
this.renderFrequenciesGraphic();
this.renderTokenList();
});
let frequenciesGraphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button');
......@@ -56,7 +57,7 @@ class CorpusAnalysisStaticVisualization {
btn.classList.remove('disabled');
});
event.target.closest('.frequencies-graph-mode-button').classList.add('disabled');
this.renderFrequenciesGraphic();
this.renderFrequenciesGraphic(this.data.tokenSet);
});
});
......@@ -64,7 +65,8 @@ class CorpusAnalysisStaticVisualization {
actionButton.addEventListener('click', (event) => {
let action = event.target.closest('.frequencies-stopword-setting-modal-action-buttons').dataset.action;
if (action === 'submit') {
this.renderFrequenciesGraphic();
console.log('Stopwords changed');
this.renderTokenList();
} else if (action === 'cancel') {
this.data.stopwords = structuredClone(this.data.stopwordCache);
}
......@@ -208,34 +210,60 @@ class CorpusAnalysisStaticVisualization {
}
async renderTokenList() {
let corpusData = this.data.corpus.o.staticData;
let corpusTokenListElement = document.querySelector('.corpus-token-list');
let corpusTokenList = new CorpusTokenList(corpusTokenListElement);
let filteredData = this.filterData();
let stopwords = this.data.stopwords;
if (this.data.stopwords === undefined) {
stopwords = await this.getStopwords();
}
stopwords = Object.values(stopwords).flat();
let mostFrequent = Object.entries(corpusData.corpus.freqs.word)
.sort((a, b) => b[1] - a[1])
.filter(item => !stopwords.includes(corpusData.values.p_attrs.word[item[0]].toLowerCase()))
let mostFrequent = Object.entries(filteredData)
.sort((a, b) => b[1].count - a[1].count)
.filter(item => !stopwords.includes(item[0].toLowerCase()))
.slice(0, 4)
.map(item => parseInt(item[0]));
.map(item => item[0])
let tokenData = [];
for (let i = 0; i < Object.values(corpusData.corpus.freqs.word).length; i++) {
Object.entries(filteredData).forEach(item => {
let resource = {
term: corpusData.values.p_attrs.word[i].toLowerCase(),
count: corpusData.corpus.freqs.word[i],
mostFrequent: mostFrequent.includes(i)
term: item[0],
count: item[1].count,
mostFrequent: mostFrequent.includes(item[0])
};
if (!Object.values(stopwords).includes(resource.term)) {
tokenData.push(resource);
}
}
});
corpusTokenList.add(tokenData);
}
async renderFrequenciesGraphic() {
filterData() {
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
let corpusData = this.data.corpus.o.staticData;
let filteredData = {};
for (let i = 0; i < Object.values(corpusData.corpus.freqs[tokenCategory]).length; i++) {
let term = corpusData.values.p_attrs[tokenCategory][i].toLowerCase();
let count = corpusData.corpus.freqs[tokenCategory][i];
if (filteredData[term]) {
filteredData[term].count += count;
filteredData[term].originalIds.push(i);
} else {
filteredData[term] = {
count: count,
originalIds: [i]
};
}
}
return filteredData;
}
renderFrequenciesGraphic(tokenSet) {
this.data.tokenSet = tokenSet;
let corpusData = this.data.corpus.o.staticData;
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
......@@ -243,12 +271,16 @@ class CorpusAnalysisStaticVisualization {
let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType;
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
let graphData = await this.createFrequenciesGraphData(tokenCategory, texts, graphtype);
let graphData = this.createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet);
let graphLayout = {
barmode: graphtype === 'bar' ? 'stack' : '',
yaxis: {
showticklabels: graphtype === 'markers' ? false : true
},
height: 627,
margin: {
l: 17
}
};
let config = {
responsive: true,
......@@ -258,31 +290,28 @@ class CorpusAnalysisStaticVisualization {
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
}
async createFrequenciesGraphData(tokenCategory, texts, graphtype) {
createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet) {
let corpusData = this.data.corpus.o.staticData;
let stopwords = this.data.stopwords;
if (this.data.stopwords === undefined) {
stopwords = await this.getStopwords();
}
let stopwordList = Object.values(stopwords).flat();
let graphData = [];
let filteredData = Object.entries(corpusData.corpus.freqs[tokenCategory])
.sort((a, b) => b[1] - a[1])
.filter(item => !stopwordList.includes(corpusData.values.p_attrs[tokenCategory][item[0]].toLowerCase()))
.slice(0, 5);
let filteredData = this.filterData();
switch (graphtype) {
case 'markers':
for (let item of filteredData) {
let size = texts.map(text => text[1].freqs[tokenCategory][item[0]] || 0);
for (let item of tokenSet) {
let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`);
let tokenCountPerText = [];
for (let originalId of filteredData[item].originalIds) {
for (let i = 0; i < texts.length; i++) {
tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0);
}
}
let data = {
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
y: texts.map(text => corpusData.values.p_attrs[tokenCategory][item[0]]),
name: corpusData.values.p_attrs[tokenCategory][item[0]],
text: texts.map(text => `${corpusData.values.p_attrs[tokenCategory][item[0]]}<br>${text[1].freqs[tokenCategory][item[0]] || 0}`),
x: textTitles,
y: texts.map(text => item),
name: item,
text: texts.map(text => `${item}<br>${tokenCountPerText || 0}`),
mode: 'markers',
marker: {
size: size,
size: tokenCountPerText,
sizeref: 0.4
}
};
......@@ -290,11 +319,18 @@ class CorpusAnalysisStaticVisualization {
}
break;
default:
for (let item of filteredData) {
for (let item of tokenSet) {
let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`);
let tokenCountPerText = [];
for (let originalId of filteredData[item].originalIds) {
for (let i = 0; i < texts.length; i++) {
tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0);
}
}
let data = {
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
y: texts.map(text => text[1].freqs[tokenCategory][item[0]] || 0),
name: corpusData.values.p_attrs[tokenCategory][item[0]],
x: textTitles,
y: tokenCountPerText,
name: item,
type: graphtype
};
graphData.push(data);
......
......@@ -296,7 +296,7 @@ class ConcordanceQueryBuilder {
this.elements.entity.innerHTML = 'Entity';
}
this.elements.counter -= 1;
if (this.elements.counter <= 0) {
if (this.elements.counter === 0) {
this.elements.queryContainer.classList.add('hide');
}
this.queryPreviewBuilder();
......
......@@ -6,7 +6,7 @@ class CorpusTokenList extends ResourceList {
}
static defaultOptions = {
page: 100
page: 7
};
constructor(listContainerElement, options = {}) {
......@@ -16,8 +16,35 @@ class CorpusTokenList extends ResourceList {
);
super(listContainerElement, _options);
this.listjs.list.addEventListener('click', (event) => {this.onClick(event)});
this.selectedItemIds = new Set();
this.selectedItemTerms = new Set();
this.listjs.on('sortComplete', () => {
let listItems = Array.from(this.listjs.items).filter(item => item.elm);
for (let item of listItems) {
let termElement = item.elm.querySelector('.term');
let mostFrequent = item.elm.dataset.mostfrequent === 'true';
if (mostFrequent) {
this.selectedItemTerms.add(termElement.textContent);
}
}
corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms);
});
let tokenListResetButtonElement = this.listContainerElement.querySelector('#token-list-reset-button');
tokenListResetButtonElement.addEventListener('click', () => {
this.selectedItemTerms.clear();
let listItems = Array.from(this.listjs.items).filter(item => item.elm);
for (let item of listItems) {
let termElement = item.elm.querySelector('.term');
let mostFrequent = item.elm.dataset.mostfrequent === 'true';
if (mostFrequent) {
item.elm.querySelector('.select-checkbox').checked = true;
this.selectedItemTerms.add(termElement.textContent);
} else {
item.elm.querySelector('.select-checkbox').checked = false;
}
}
corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms);
});
}
get item() {
......@@ -42,7 +69,7 @@ class CorpusTokenList extends ResourceList {
return [
'term',
'count',
'mostFrequent',
{data: ['mostFrequent']},
'frequency'
];
}
......@@ -58,24 +85,21 @@ class CorpusTokenList extends ResourceList {
<input id="${listSearchElementId}" class="search" type="text"></input>
<label for="${listSearchElementId}">Search token</label>
</div>
<div class="scrollable-list-container-wrapper" style="height:276px; overflow:scroll;">
<div class="scrollable-list-container">
<table>
<thead>
<tr>
<th></th>
<th>Term</th>
<th>Count</th>
<th>Frequency</th>
</tr>
</thead>
<tbody class="list"></tbody>
</table>
</div>
</div>
<table>
<thead>
<tr>
<th style="width:15%;">
<span class="material-icons" style="cursor:pointer" id="token-list-reset-button">refresh</span>
</th>
<th>Term</th>
<th>Count</th>
<th>Frequency</th>
</tr>
</thead>
<tbody class="list"></tbody>
</table>
<ul class="pagination"></ul>
`.trim();
this.listContainerElement.style.padding = '30px';
}
mapResourceToValue(corpusTokenData) {
......@@ -92,19 +116,20 @@ class CorpusTokenList extends ResourceList {
}
onClick(event) {
let listItemElement = event.target.closest('.list-item[data-id]');
if (event.target.closest('.disable-on-click') !== null) {return;}
let listItemElement = event.target.closest('.list-item');
if (listItemElement === null) {return;}
let itemId = listItemElement.dataset.id;
let item = listItemElement.querySelector('.term').textContent;
let listActionElement = event.target.closest('.list-action-trigger[data-list-action]');
let listAction = listActionElement === null ? '' : listActionElement.dataset.listAction;
switch (listAction) {
case 'select': {
if (event.target.checked) {
this.selectedItemIds.add(itemId);
this.selectedItemTerms.add(item);
} else {
this.selectedItemIds.delete(itemId);
this.selectedItemTerms.delete(item);
}
this.renderingItemSelection();
corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms);
break;
}
default: {
......@@ -113,9 +138,4 @@ class CorpusTokenList extends ResourceList {
}
}
renderingItemSelection() {
}
}
......@@ -90,9 +90,8 @@
<div class="card-content">
<span class="card-title">Frequencies</span>
<div class="row">
{# <div class="col s1"></div> #}
<div class="col s5">
<div class="corpus-token-list no-autoinit"></div>
<div class="col s4">
<div class="corpus-token-list no-autoinit" style="transform: scale(0.91);"></div>
<a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a>
<a class="btn-flat modal-trigger no-autoinit" id="frequencies-stopwords-setting-modal-button" href="#frequencies-stopwords-setting-modal">
<i class="material-icons grey-text text-darken-2">settings</i>
......@@ -104,12 +103,13 @@
<li><a data-token-category="simple_pos">Simple_pos</a></li>
</ul>
</div>
{# <div class="col s1"></div> #}
<div class="col s7">
<div id="frequencies-graphic"></div>
<a class="btn disabled frequencies-graph-mode-button" data-graph-type="bar"><i class="material-icons">stacked_bar_chart</i></a>
<a class="btn frequencies-graph-mode-button" data-graph-type="scatter"><i class="material-icons">show_chart</i></a>
<a class="btn frequencies-graph-mode-button" data-graph-type="markers"><i class="material-icons">bubble_chart</i></a>
<div class="col s8">
<div id="frequencies-graphic"></div>
<div>
<a class="btn disabled frequencies-graph-mode-button" data-graph-type="bar"><i class="material-icons">stacked_bar_chart</i></a>
<a class="btn frequencies-graph-mode-button" data-graph-type="scatter"><i class="material-icons">show_chart</i></a>
<a class="btn frequencies-graph-mode-button" data-graph-type="markers"><i class="material-icons">bubble_chart</i></a>
</div>
</div>
</div>
</div>
......@@ -130,8 +130,7 @@
like "the" or "and," that carry little meaning and are often removed in text analysis
to improve efficiency and accuracy.</p>
<div id="user-stopword-list-container"></div>
<div class="chips col s8 no-autoinit input-field" id="stopword-input-field">
</div>
<div class="chips col s8 no-autoinit input-field" id="stopword-input-field"></div>
</div>
<div class="row">
<p>Below you can find a list of all stopwords that are always filtered out.
......@@ -159,3 +158,4 @@
const corpusAnalysisStaticVisualization = new CorpusAnalysisStaticVisualization(corpusAnalysisApp);
</script>
{% endset %}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment