From 1c9e715980f59acc1bcbb8e659f2ca870562a5aa Mon Sep 17 00:00:00 2001
From: Stephan Porada <sporada@uni-bielefeld.de>
Date: Thu, 28 Nov 2019 15:19:52 +0100
Subject: [PATCH] Fixe some things for query results

---
 app/corpora/CQiWrapper/CQiWrapper.py         | 9 +++++----
 app/corpora/events.py                        | 9 +++++++--
 app/templates/corpora/analyse_corpus.html.j2 | 9 +++++----
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/app/corpora/CQiWrapper/CQiWrapper.py b/app/corpora/CQiWrapper/CQiWrapper.py
index ea0acff9..c313bf05 100644
--- a/app/corpora/CQiWrapper/CQiWrapper.py
+++ b/app/corpora/CQiWrapper/CQiWrapper.py
@@ -133,8 +133,8 @@ class CQiWrapper(CQiClient):
             # match_boundries shows the start and end cpos of one match as a
             # pair of cpositions
             # [(1355, 1357), (1477, 1479)] Example for two boundry pairs
-            offset_start = 0 + (result_offset + 1) if result_offset != 0 else result_offset
-            offset_end = self.nr_matches + result_offset
+            offset_start = 0 if result_offset == 0 else result_offset
+            offset_end = self.nr_matches + result_offset - 1
             match_boundaries = zip(self.cqp_dump_subcorpus(self.result_subcorpus,
                                                            CONST_FIELD_MATCH,
                                                            offset_start,
@@ -152,9 +152,10 @@ class CQiWrapper(CQiClient):
         all_matches = []
         all_cpos = []
         for start, end in match_boundaries:
+            end += 1
             lc_cpos = list(range(max([0, start - self.context_len]), start))
             lc = {'lc': lc_cpos}
-            match_cpos = list(range(start, end + 1))
+            match_cpos = list(range(start, end))
             match = {'hit': match_cpos}
             rc_cpos = list(range(end, min([self.corpus_max_len, end + self.context_len])))
             rc = {'rc': rc_cpos}
@@ -226,7 +227,7 @@ class CQiWrapper(CQiClient):
         text_lookup = {}
         for d in structs_to_check:
             s_key, s_value = zip(*d.items())
-            s_value = s_value[0].split('_')[1]
+            s_value = s_value[0].split('_', 1)[1]
             struct_values = self.cl_struc2str(s_key[0], text_lookup_ids)
             zipped = dict(zip(text_lookup_ids, struct_values))
             for zip_key, zip_value in zipped.items():
diff --git a/app/corpora/events.py b/app/corpora/events.py
index 8d1580c3..954e1594 100644
--- a/app/corpora/events.py
+++ b/app/corpora/events.py
@@ -44,10 +44,15 @@ def corpus_analysis(message):
         return
     """ Prepare and execute a query """
     corpus_name = 'CORPUS'
-    query = (message['query'])
+    query = str(message['query'])
+    result_len = int(message['hits_per_page'])
+    context_len = int(message['context'])
+    result_offset = int(0)
     client.select_corpus(corpus_name)
     client.query_subcorpus(query)
-    results = client.show_query_results(result_len=int(message['hits_per_page']), context_len=int(message['context']))
+    results = client.show_query_results(result_len=result_len,
+                                        context_len=context_len,
+                                        result_offset=result_offset)
 
     socketio.emit('corpus_analysis', results, room=request.sid)
 
diff --git a/app/templates/corpora/analyse_corpus.html.j2 b/app/templates/corpora/analyse_corpus.html.j2
index 5db09786..a488c1e6 100644
--- a/app/templates/corpora/analyse_corpus.html.j2
+++ b/app/templates/corpora/analyse_corpus.html.j2
@@ -269,10 +269,11 @@
                                </tr>
                                <tr>
                                  <td class="left-align">
-                                   word: ${token["word"]}<br>
-                                   lemma: ${token["lemma"]}<br>
-                                   pos: ${token["pos"]}<br>
-                                   simple_pos: ${token["simple_pos"]}
+                                   Word: ${token["word"]}<br>
+                                   Lemma: ${token["lemma"]}<br>
+                                   POS: ${token["pos"]}<br>
+                                   Simple POS: ${token["simple_pos"]}<br>
+                                   NER: ${token["ner"]}
                                  </td>
                                  <td class="left-align">
                                    Title: ${result["text_lookup"][token["text"]]["title"]}<br>
-- 
GitLab