From e6b27b0970b7f165e726ff74499a592c4460d48b Mon Sep 17 00:00:00 2001 From: Stephan Porada <sporada@uni-bielefeld.de> Date: Tue, 19 Nov 2019 15:21:42 +0100 Subject: [PATCH] Add first setup for compression --- app/corpora/CQiWrapper/CQiWrapper.py | 8 +++++--- app/corpora/events.py | 17 ++++++++++++++--- app/corpora/forms.py | 7 ++++++- app/templates/corpora/analyse_corpus.html.j2 | 2 ++ 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/app/corpora/CQiWrapper/CQiWrapper.py b/app/corpora/CQiWrapper/CQiWrapper.py index 96d1f442..ff5773f1 100644 --- a/app/corpora/CQiWrapper/CQiWrapper.py +++ b/app/corpora/CQiWrapper/CQiWrapper.py @@ -230,9 +230,11 @@ class CQiWrapper(CQiClient): for struct_attr_key in self.attr_strings[key].keys(): struct_entry = self.cl_cpos2struc(self.attr_strings[key][struct_attr_key], all_cpos) - logger.warning("{}: {}".format(self.attr_strings[key][struct_attr_key], struct_entry)) - match_strs = self.cl_struc2str(self.attr_strings[key][struct_attr_key], struct_entry) - logger.warning('{}:{}'.format(self.attr_strings[key][struct_attr_key], match_strs)) + has_value = self.corpus_structural_attribute_has_values(self.attr_strings[key][struct_attr_key]) + if has_value: + match_strs = self.cl_struc2str(self.attr_strings[key][struct_attr_key], struct_entry) + else: + match_strs = [None for i in struct_entry] cpos_infos[struct_attr_key] = zip(struct_entry, match_strs) tmp_list = [] attr_key_list = [] diff --git a/app/corpora/events.py b/app/corpora/events.py index af04b469..799765f8 100644 --- a/app/corpora/events.py +++ b/app/corpora/events.py @@ -4,7 +4,9 @@ from app.models import Corpus from flask import current_app, request from flask_login import current_user, login_required from .CQiWrapper.CQiWrapper import CQiWrapper - +import sys +import gzip +import zlib ''' ' A dictionary containing lists of, with corpus ids associated, Socket.IO @@ -55,12 +57,21 @@ def recv_query(message): room=request.sid) return """ Prepare and execute a query """ + logger.warning('Payload: {}'.format(message)) corpus_name = 'CORPUS' query = message['query'] analysis_client.select_corpus(corpus_name) analysis_client.query_subcorpus(query) - results = analysis_client.show_query_results(result_len=2) - socketio.emit('query', results, room=request.sid) + results = analysis_client.show_query_results(result_len=int(message['hits_per_page']), + context_len=int(message['context'])) + size_internal_dict = sys.getsizeof(results) / 1000000 + size_dict_to_str = sys.getsizeof(str(results)) / 1000000 + compressed_str = gzip.compress(str(results).encode()) + size_dict_to_str_compressed = sys.getsizeof(compressed_str) / 1000000 + logger.warning('Size of dict for {} hits per page: {} MB'.format(message['hits_per_page'], size_internal_dict)) + logger.warning('Size of dict to string for {} hits per page: {} MB'.format(message['hits_per_page'], size_dict_to_str)) + logger.warning('Size of compressed dict to string for {} hits per page: {} MB'.format(message['hits_per_page'], size_dict_to_str_compressed)) + socketio.emit('query', compressed_str, room=request.sid) def observe_corpus_analysis_connection(app, corpus_id, session_id): diff --git a/app/corpora/forms.py b/app/corpora/forms.py index 1afe1c78..313b9d51 100644 --- a/app/corpora/forms.py +++ b/app/corpora/forms.py @@ -42,7 +42,12 @@ class QueryForm(FlaskForm): ('20', '20'), ('30', '30'), ('40', '40'), - ('50', '50')], + ('50', '50'), + ('60', '60'), + ('70', '70'), + ('80', '80'), + ('90', '90'), + ('100', '100')], validators=[DataRequired()]) context = SelectField('Context', choices=[('', 'Words of context around hit'), diff --git a/app/templates/corpora/analyse_corpus.html.j2 b/app/templates/corpora/analyse_corpus.html.j2 index 003458c4..a5a303cb 100644 --- a/app/templates/corpora/analyse_corpus.html.j2 +++ b/app/templates/corpora/analyse_corpus.html.j2 @@ -118,6 +118,8 @@ socket.on('query', function(results) { queryResultsElement.innerHTML = ''; console.log(results) + var decoder = new TextDecoder().decode(results) + console.log(results) for (let key in results) { console.log(results[key]); queryResultsElement.innerHTML += '<p>' + results[key]['match_cpos_list'] + '</p>'; -- GitLab