diff --git a/app/corpora/CQiWrapper/CQiWrapper.py b/app/corpora/CQiWrapper/CQiWrapper.py index 96d1f4425f615e3107baff8ad4d9427e90eda5f8..ff5773f1faa8f50f79a1df1f2568691dc4dd47dd 100644 --- a/app/corpora/CQiWrapper/CQiWrapper.py +++ b/app/corpora/CQiWrapper/CQiWrapper.py @@ -230,9 +230,11 @@ class CQiWrapper(CQiClient): for struct_attr_key in self.attr_strings[key].keys(): struct_entry = self.cl_cpos2struc(self.attr_strings[key][struct_attr_key], all_cpos) - logger.warning("{}: {}".format(self.attr_strings[key][struct_attr_key], struct_entry)) - match_strs = self.cl_struc2str(self.attr_strings[key][struct_attr_key], struct_entry) - logger.warning('{}:{}'.format(self.attr_strings[key][struct_attr_key], match_strs)) + has_value = self.corpus_structural_attribute_has_values(self.attr_strings[key][struct_attr_key]) + if has_value: + match_strs = self.cl_struc2str(self.attr_strings[key][struct_attr_key], struct_entry) + else: + match_strs = [None for i in struct_entry] cpos_infos[struct_attr_key] = zip(struct_entry, match_strs) tmp_list = [] attr_key_list = [] diff --git a/app/corpora/events.py b/app/corpora/events.py index af04b4699d275f277c8b433b912efad13eb10fe5..799765f805dd9b267a5b4e3ef00a62d8371e14f0 100644 --- a/app/corpora/events.py +++ b/app/corpora/events.py @@ -4,7 +4,9 @@ from app.models import Corpus from flask import current_app, request from flask_login import current_user, login_required from .CQiWrapper.CQiWrapper import CQiWrapper - +import sys +import gzip +import zlib ''' ' A dictionary containing lists of, with corpus ids associated, Socket.IO @@ -55,12 +57,21 @@ def recv_query(message): room=request.sid) return """ Prepare and execute a query """ + logger.warning('Payload: {}'.format(message)) corpus_name = 'CORPUS' query = message['query'] analysis_client.select_corpus(corpus_name) analysis_client.query_subcorpus(query) - results = analysis_client.show_query_results(result_len=2) - socketio.emit('query', results, room=request.sid) + results = analysis_client.show_query_results(result_len=int(message['hits_per_page']), + context_len=int(message['context'])) + size_internal_dict = sys.getsizeof(results) / 1000000 + size_dict_to_str = sys.getsizeof(str(results)) / 1000000 + compressed_str = gzip.compress(str(results).encode()) + size_dict_to_str_compressed = sys.getsizeof(compressed_str) / 1000000 + logger.warning('Size of dict for {} hits per page: {} MB'.format(message['hits_per_page'], size_internal_dict)) + logger.warning('Size of dict to string for {} hits per page: {} MB'.format(message['hits_per_page'], size_dict_to_str)) + logger.warning('Size of compressed dict to string for {} hits per page: {} MB'.format(message['hits_per_page'], size_dict_to_str_compressed)) + socketio.emit('query', compressed_str, room=request.sid) def observe_corpus_analysis_connection(app, corpus_id, session_id): diff --git a/app/corpora/forms.py b/app/corpora/forms.py index 1afe1c781bdf6fbd919e692c08a1311287ff64ff..313b9d5100d6f5ea1dd78f12b03ce48c99b079a1 100644 --- a/app/corpora/forms.py +++ b/app/corpora/forms.py @@ -42,7 +42,12 @@ class QueryForm(FlaskForm): ('20', '20'), ('30', '30'), ('40', '40'), - ('50', '50')], + ('50', '50'), + ('60', '60'), + ('70', '70'), + ('80', '80'), + ('90', '90'), + ('100', '100')], validators=[DataRequired()]) context = SelectField('Context', choices=[('', 'Words of context around hit'), diff --git a/app/templates/corpora/analyse_corpus.html.j2 b/app/templates/corpora/analyse_corpus.html.j2 index 003458c4a8f1f6f7eb4d586ae0b1254937535d95..a5a303cb0794f6c89888b50d30050f056493e8ad 100644 --- a/app/templates/corpora/analyse_corpus.html.j2 +++ b/app/templates/corpora/analyse_corpus.html.j2 @@ -118,6 +118,8 @@ socket.on('query', function(results) { queryResultsElement.innerHTML = ''; console.log(results) + var decoder = new TextDecoder().decode(results) + console.log(results) for (let key in results) { console.log(results[key]); queryResultsElement.innerHTML += '<p>' + results[key]['match_cpos_list'] + '</p>';