diff --git a/app/corpora/json_routes.py b/app/corpora/json_routes.py index 6005fc48b778cc7241b64832c9296a619213e349..b6ef0110f41afcfe9558c3122fc50b79240e7454 100644 --- a/app/corpora/json_routes.py +++ b/app/corpora/json_routes.py @@ -7,6 +7,8 @@ from app.decorators import content_negotiation from app.models import Corpus, CorpusFollowerRole from . import bp from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required +import nltk +from string import punctuation @bp.route('/<hashid:corpus_id>', methods=['DELETE']) @@ -56,6 +58,27 @@ def build_corpus(corpus_id): } return response_data, 202 +@bp.route('/stopwords') +@content_negotiation(produces='application/json') +def get_stopwords(): + # data = request.json + # if not isinstance(data, dict): + # abort(400) + # language = data.get('language') + # if not isinstance(language, str): + # abort(400) + nltk.download('stopwords') + languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"] + stopwords = {} + for language in languages: + stopwords[language] = nltk.corpus.stopwords.words(language) + stopwords['punctuation'] = list(punctuation) + ['—', '|'] + stopwords['user_stopwords'] = [] + print(stopwords) + response_data = { + 'stopwords': stopwords + } + return response_data, 202 # @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST']) # @corpus_follower_permission_required('MANAGE_FOLLOWERS') diff --git a/app/jobs/json_routes.py b/app/jobs/json_routes.py index 7bedc7269fc236e49da5779020b8b2d4277101d4..9f1e1b2f237c8fe570bd1db0b6a19a510752f416 100644 --- a/app/jobs/json_routes.py +++ b/app/jobs/json_routes.py @@ -42,7 +42,6 @@ def job_log(job_id): with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file: log = log_file.read() response_data = { - 'message': '', 'jobLog': log } return response_data, 200 diff --git a/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js b/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js index 3b0213d029ecf21d45231d07be57d56935e8d548..c3adcc7f657d530221e8494d8cfd2231f848f238 100644 --- a/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js +++ b/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js @@ -1,6 +1,8 @@ class CorpusAnalysisApp { constructor(corpusId) { - this.data = {}; + this.data = { + promises: {getStopwords: []} + }; // HTML elements this.elements = { @@ -22,6 +24,49 @@ class CorpusAnalysisApp { }; } + // getStopwords(language) { + // if (language in this.data.promises.getStopwords) { + // console.log('Stopwords already loaded'); + // return this.data.promises.getStopwords[language]; + // } + // this.data.promises.getStopwords[language] = new Promise((resolve, reject) => { + // Requests.corpora.entity.getStopwords(language) + // .then((response) => { + // response.json() + // .then((json) => { + // let stopwords = json.stopwords; + // resolve(stopwords); + // }) + // .catch((error) => { + // reject(error); + // }); + // }); + // }); + // return this.data.promises.getStopwords[language]; + // } + + getStopwords() { + if (this.data.promises.getStopwords.length !== 0) { + console.log('Stopwords already loaded'); + return this.data.promises.getStopwords; + } + this.data.promises.getStopwords = new Promise((resolve, reject) => { + Requests.corpora.entity.getStopwords() + .then((response) => { + response.json() + .then((json) => { + let stopwords = json.stopwords; + resolve(stopwords); + }) + .catch((error) => { + reject(error); + }); + }); + }); + return this.data.promises.getStopwords; + } + + init() { this.disableActionElements(); this.elements.m.initModal.open(); @@ -155,9 +200,28 @@ class CorpusAnalysisApp { type: 'pie' } ]; - let config = {responsive: true}; + let graphLayout = { + showlegend: true, + height: 486, + margin: { + l: 10, + r: 10, + b: 10, + t: 10 + }, + legend: { + "orientation": "h", + font: { + size: 10 + } + } + }; + let config = { + responsive: true, + displaylogo: false + }; - Plotly.newPlot(textProportionsGraphicElement, graphData, config); + Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config); } renderFrequenciesGraphic(corpusData) { @@ -165,41 +229,105 @@ class CorpusAnalysisApp { let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown"); let frequenciesGraphicElement = document.querySelector('#frequencies-graphic'); let texts = Object.entries(corpusData.s_attrs.text.lexicon); - - + let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType; + let graphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button'); + frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => { frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML; this.renderFrequenciesGraphic(corpusData); }); + + graphModeButtons.forEach(graphModeButton => { + graphModeButton.addEventListener('click', (event) => { + graphModeButtons.forEach(btn => { + btn.classList.remove('disabled'); + }); + event.target.closest('.frequencies-graph-mode-button').classList.add('disabled'); + this.renderFrequenciesGraphic(corpusData); + }); + }); let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase(); - let graphData = this.createFrequenciesGraphData(tokenCategory, texts, corpusData); - let graphLayout = { - barmode: 'stack', - type: 'bar' - }; - let config = {responsive: true}; - - Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config); + this.createFrequenciesGraphData(tokenCategory, texts, corpusData, graphtype) + .then(graphData => { + let graphLayout = { + barmode: graphtype === 'bar' ? 'stack' : '', + margin: { + t: 20, + l: 50 + }, + yaxis: { + showticklabels: graphtype === 'markers' ? false : true + }, + }; + let config = { + responsive: true, + modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'], + displaylogo: false + }; + Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config); + }); } - createFrequenciesGraphData(category, texts, corpusData) { - let graphData = []; - let sortedData = Object.entries(corpusData.corpus.freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5); + createFrequenciesGraphData(category, texts, corpusData, graphtype) { + return new Promise((resolve, reject) => { + this.getStopwords() + .then(stopwords => { + this.renderStopwordSettingsModal(stopwords); + let stopwordList = []; + Object.values(stopwords).forEach(stopwordItems => { + stopwordItems.forEach(stopword => { + stopwordList.push(stopword); + }); + }); + let graphData = []; + let filteredData = Object.entries(corpusData.corpus.freqs[category]) + .sort((a, b) => b[1] - a[1]) + .filter(item => !stopwordList.includes(corpusData.values.p_attrs[category][item[0]].toLowerCase())) + .slice(0, 5); + if (graphtype !== 'markers') { + for (let item of filteredData) { + let data = { + x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), + y: texts.map(text => text[1].freqs[category][item[0]] || 0), + name: corpusData.values.p_attrs[category][item[0]], + type: graphtype + }; + graphData.push(data); + } + } else { + for (let item of filteredData) { + let size = texts.map(text => text[1].freqs[category][item[0]] || 0); + let data = { + x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), + y: texts.map(text => corpusData.values.p_attrs[category][item[0]]), + name: corpusData.values.p_attrs[category][item[0]], + text: texts.map(text => `${corpusData.values.p_attrs[category][item[0]]}<br>${text[1].freqs[category][item[0]] || 0}`), + mode: 'markers', + marker: { + size: size, + // sizeref: 2.0 * Math.max(...size) / (80**2), + // sizemode: 'area', + sizeref: 0.2 + } + }; + graphData.push(data); + } + } + resolve(graphData); + }) + .catch(error => { + reject(error); + }); + }); + } + + renderStopwordSettingsModal(stopwords) { + let stopwordInputField = document.querySelector('.stopword-input-field'); + } - for (let item of sortedData) { - let data = { - x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), - y: texts.map(text => text[1].freqs[category][item[0]]), - name: corpusData.values.p_attrs[category][item[0]], - type: 'bar' - }; - graphData.push(data); - } - return graphData; - } renderBoundsGraphic(corpusData) { let boundsGraphicElement = document.querySelector('#bounds-graphic'); @@ -232,7 +360,11 @@ class CorpusAnalysisApp { } }; - let config = {responsive: true}; + let config = { + responsive: true, + modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'], + displaylogo: false + }; Plotly.newPlot(boundsGraphicElement, graphData, graphLayout, config); } diff --git a/app/static/js/Requests/Requests.js b/app/static/js/Requests/Requests.js index 0504d8a0d3cdd00a5760da5382008d681f3ce2e8..7282f0f7130b87fd4d6170007cc52cd5905c94b8 100644 --- a/app/static/js/Requests/Requests.js +++ b/app/static/js/Requests/Requests.js @@ -22,9 +22,11 @@ Requests.JSONfetch = (input, init={}) => { response.json() .then( (json) => { - let message = json.message || json; + let message = json.message; let category = json.category || 'message'; - app.flash(message, category); + if (message) { + app.flash(message, category); + } }, (error) => { app.flash(`[${response.status}]: ${response.statusText}`, 'error'); diff --git a/app/static/js/Requests/corpora/corpora.js b/app/static/js/Requests/corpora/corpora.js index 55f6b899730e62f96fd2564df9ab22ca0ea4f6b7..3118a1538879342c82fd0e9e5b5a9fb4ec9e37ea 100644 --- a/app/static/js/Requests/corpora/corpora.js +++ b/app/static/js/Requests/corpora/corpora.js @@ -31,6 +31,14 @@ Requests.corpora.entity.generateShareLink = (corpusId, role, expiration) => { return Requests.JSONfetch(input, init); }; +Requests.corpora.entity.getStopwords = () => { + let input = `/corpora/stopwords`; + let init = { + method: 'GET' + }; + return Requests.JSONfetch(input, init); +}; + Requests.corpora.entity.isPublic = {}; Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => { @@ -43,4 +51,3 @@ Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => { }; - diff --git a/app/templates/corpora/analysis.html.j2 b/app/templates/corpora/analysis.html.j2 index fbcd0be6a62c7e1eeee374ad131971b8cf9a2cee..b9a80c974ab16e3d821ae2d73cbb560c65e47da4 100644 --- a/app/templates/corpora/analysis.html.j2 +++ b/app/templates/corpora/analysis.html.j2 @@ -98,19 +98,19 @@ </div> </div> <div class="row"> - <div class="col s6"> + <div class="col s4"> <div class="card hoverable"> <div class="card-content"> <span class="card-title">Proportions</span> <p>of texts within the corpus</p> - <div id="text-proportions-graphic"></div> + <div id="text-proportions-graphic" style="width:100"></div> </div> </div> </div> - <div class="col s6"> + <div class="col s8"> <div class="card hoverable"> <div class="card-content"> - <span class="card-title"><a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a> Frequencies</span> + <span class="card-title">Frequencies</span> <ul id="frequencies-token-category-dropdown" class="dropdown-content"> <li><a data-token-category="word">Word</a></li> <li><a data-token-category="lemma">Lemma</a></li> @@ -119,6 +119,11 @@ </ul> <p>within the texts of the 5 most frequent words in the corpus</p> <div id="frequencies-graphic"></div> + <a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a> + <a class="btn disabled frequencies-graph-mode-button" data-graph-type="bar"><i class="material-icons">equalizer</i></a> + <a class="btn frequencies-graph-mode-button" data-graph-type="scatter"><i class="material-icons">show_chart</i></a> + <a class="btn frequencies-graph-mode-button" data-graph-type="markers"><i class="material-icons">bubble_chart</i></a> + <a class="btn-flat modal-trigger" href="#frequencies-stopwords-setting-modal"><i class="material-icons grey-text text-darken-2">settings</i></a> </div> </div> </div> @@ -161,6 +166,21 @@ </div> </div> +<div class="modal" id="frequencies-stopwords-setting-modal"> + <div class="modal-content"> + <h4>Settings</h4> + <p>Here you can change the stopword-lists. Add your own stopwords or change the already existing below.</p> + <div class="chips chips-placeholder stopword-input-field"></div> + <div class="row"> + <div class="input-field col s3"> + <select class="stopword-language-selection"></select> + <label>Stopword language select</label> + </div> + </div> + </div> +</div> + + {% for extension in extensions %} {{ extension.modals }} {% endfor %} diff --git a/requirements.txt b/requirements.txt index 404b9e871c31202ab5b54ef92c83109ba753a586..18593b1cca193d92aa72142ef54cf03318c3eb05 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,6 +20,7 @@ Flask-WTF hiredis MarkupSafe==2.0.1 marshmallow-sqlalchemy==0.29.0 +nltk psycopg2 PyJWT pyScss