From e6b27b0970b7f165e726ff74499a592c4460d48b Mon Sep 17 00:00:00 2001
From: Stephan Porada <sporada@uni-bielefeld.de>
Date: Tue, 19 Nov 2019 15:21:42 +0100
Subject: [PATCH] Add first setup for compression

---
 app/corpora/CQiWrapper/CQiWrapper.py         |  8 +++++---
 app/corpora/events.py                        | 17 ++++++++++++++---
 app/corpora/forms.py                         |  7 ++++++-
 app/templates/corpora/analyse_corpus.html.j2 |  2 ++
 4 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/app/corpora/CQiWrapper/CQiWrapper.py b/app/corpora/CQiWrapper/CQiWrapper.py
index 96d1f442..ff5773f1 100644
--- a/app/corpora/CQiWrapper/CQiWrapper.py
+++ b/app/corpora/CQiWrapper/CQiWrapper.py
@@ -230,9 +230,11 @@ class CQiWrapper(CQiClient):
                 for struct_attr_key in self.attr_strings[key].keys():
                     struct_entry = self.cl_cpos2struc(self.attr_strings[key][struct_attr_key],
                                                       all_cpos)
-                    logger.warning("{}: {}".format(self.attr_strings[key][struct_attr_key], struct_entry))
-                    match_strs = self.cl_struc2str(self.attr_strings[key][struct_attr_key], struct_entry)
-                    logger.warning('{}:{}'.format(self.attr_strings[key][struct_attr_key], match_strs))
+                    has_value = self.corpus_structural_attribute_has_values(self.attr_strings[key][struct_attr_key])
+                    if has_value:
+                        match_strs = self.cl_struc2str(self.attr_strings[key][struct_attr_key], struct_entry)
+                    else:
+                        match_strs = [None for i in struct_entry]
                     cpos_infos[struct_attr_key] = zip(struct_entry, match_strs)
         tmp_list = []
         attr_key_list = []
diff --git a/app/corpora/events.py b/app/corpora/events.py
index af04b469..799765f8 100644
--- a/app/corpora/events.py
+++ b/app/corpora/events.py
@@ -4,7 +4,9 @@ from app.models import Corpus
 from flask import current_app, request
 from flask_login import current_user, login_required
 from .CQiWrapper.CQiWrapper import CQiWrapper
-
+import sys
+import gzip
+import zlib
 
 '''
 ' A dictionary containing lists of, with corpus ids associated, Socket.IO
@@ -55,12 +57,21 @@ def recv_query(message):
                       room=request.sid)
         return
     """ Prepare and execute a query """
+    logger.warning('Payload: {}'.format(message))
     corpus_name = 'CORPUS'
     query = message['query']
     analysis_client.select_corpus(corpus_name)
     analysis_client.query_subcorpus(query)
-    results = analysis_client.show_query_results(result_len=2)
-    socketio.emit('query', results, room=request.sid)
+    results = analysis_client.show_query_results(result_len=int(message['hits_per_page']),
+                                                 context_len=int(message['context']))
+    size_internal_dict = sys.getsizeof(results) / 1000000
+    size_dict_to_str = sys.getsizeof(str(results)) / 1000000
+    compressed_str = gzip.compress(str(results).encode())
+    size_dict_to_str_compressed = sys.getsizeof(compressed_str) / 1000000
+    logger.warning('Size of dict for {} hits per page: {} MB'.format(message['hits_per_page'], size_internal_dict))
+    logger.warning('Size of dict to string for {} hits per page: {} MB'.format(message['hits_per_page'], size_dict_to_str))
+    logger.warning('Size of compressed dict to string for {} hits per page: {} MB'.format(message['hits_per_page'], size_dict_to_str_compressed))
+    socketio.emit('query', compressed_str, room=request.sid)
 
 
 def observe_corpus_analysis_connection(app, corpus_id, session_id):
diff --git a/app/corpora/forms.py b/app/corpora/forms.py
index 1afe1c78..313b9d51 100644
--- a/app/corpora/forms.py
+++ b/app/corpora/forms.py
@@ -42,7 +42,12 @@ class QueryForm(FlaskForm):
                                          ('20', '20'),
                                          ('30', '30'),
                                          ('40', '40'),
-                                         ('50', '50')],
+                                         ('50', '50'),
+                                         ('60', '60'),
+                                         ('70', '70'),
+                                         ('80', '80'),
+                                         ('90', '90'),
+                                         ('100', '100')],
                                 validators=[DataRequired()])
     context = SelectField('Context',
                           choices=[('', 'Words of context around hit'),
diff --git a/app/templates/corpora/analyse_corpus.html.j2 b/app/templates/corpora/analyse_corpus.html.j2
index 003458c4..a5a303cb 100644
--- a/app/templates/corpora/analyse_corpus.html.j2
+++ b/app/templates/corpora/analyse_corpus.html.j2
@@ -118,6 +118,8 @@
   socket.on('query', function(results) {
     queryResultsElement.innerHTML = '';
     console.log(results)
+    var decoder = new TextDecoder().decode(results)
+    console.log(results)
     for (let key in results) {
       console.log(results[key]);
       queryResultsElement.innerHTML += '<p>' + results[key]['match_cpos_list'] + '</p>';
-- 
GitLab