Skip to content
Snippets Groups Projects
Commit 1c98c507 authored by Inga Kirschnick's avatar Inga Kirschnick
Browse files

Merge branch 'visualizations-update' of...

Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update
parents 142c82cc 1e333668
No related branches found
No related tags found
No related merge requests found
from collections import Counter
from flask import session
import cqi
import json
import math
import random
import os
from app import db, socketio
from app.decorators import socketio_login_required
from app.models import Corpus
......@@ -49,6 +50,12 @@ def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
corpus = Corpus.query.get(session['d']['corpus_id'])
visualization_data_file_path = os.path.join(corpus.path, 'cwb', 'visualization_data.json')
if os.path.exists(visualization_data_file_path):
with open(visualization_data_file_path, 'r') as f:
payload = json.load(f)
return {'code': 200, 'msg': 'OK', 'payload': payload}
cqi_corpus = cqi_client.corpora.get(corpus_name)
##########################################################################
# A faster way to get cpos boundaries for smaller s_attrs #
......@@ -108,7 +115,8 @@ def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_
if s_attr.has_values:
continue
payload['corpus']['counts'][s_attr.name] = s_attr.size
payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': []}
payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
payload['values']['s_attrs'][s_attr.name] = {}
for id in range(0, s_attr.size):
payload['s_attrs'][s_attr.name]['lexicon'][id] = {}
lbound, rbound = s_attr.cpos_by_id(id)
......@@ -125,6 +133,26 @@ def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
for p_attr in cqi_p_attrs.values():
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
s_attr_value_names = [
sub_s_attr.name[(len(s_attr.name) + 1):]
for sub_s_attr in sub_s_attrs
]
sub_s_attr_values = [
sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
for sub_s_attr in sub_s_attrs
]
payload['s_attrs'][s_attr.name]['values'] = s_attr_value_names
payload['values']['s_attrs'][s_attr.name] = {
s_attr_id: {
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
for s_attr_value_name_idx, s_attr_value_name in enumerate(
payload['s_attrs'][s_attr.name]['values']
)
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
}
with open(visualization_data_file_path, 'w') as f:
json.dump(payload, f)
return {'code': 200, 'msg': 'OK', 'payload': payload}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment