Skip to content
Snippets Groups Projects
Commit 413b6111 authored by Patrick Jentsch's avatar Patrick Jentsch
Browse files

Implement fast boundary computation for ent and s s_attrs

parent a9f05fff
No related branches found
No related tags found
No related merge requests found
...@@ -99,12 +99,29 @@ def ext_corpus_static_data(corpus: str) -> Dict: ...@@ -99,12 +99,29 @@ def ext_corpus_static_data(corpus: str) -> Dict:
static_corpus_data['corpus']['counts'][s_attr.name] = s_attr.size static_corpus_data['corpus']['counts'][s_attr.name] = s_attr.size
static_corpus_data['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None} static_corpus_data['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
static_corpus_data['values']['s_attrs'][s_attr.name] = {} static_corpus_data['values']['s_attrs'][s_attr.name] = {}
if s_attr.name in ['s', 'ent']:
cqi_corpus.query('Last', f'<{s_attr.name}> []* </{s_attr.name}>;')
cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
first_match = 0
last_match = cqi_subcorpus.size - 1
match_boundaries = zip(
range(first_match, last_match + 1),
cqi_subcorpus.dump(cqi_subcorpus.fields['match'], first_match, last_match),
cqi_subcorpus.dump(cqi_subcorpus.fields['matchend'], first_match, last_match)
)
for id, lbound, rbound in match_boundaries:
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = {}
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
cqi_subcorpus.drop()
for id in range(0, s_attr.size): for id in range(0, s_attr.size):
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = {} if s_attr.name not in ['s', 'ent']:
lbound, rbound = s_attr.cpos_by_id(id) static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = {}
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound] lbound, rbound = s_attr.cpos_by_id(id)
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {} static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1 static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
if s_attr.name not in ['text', 's']: if s_attr.name not in ['text', 's']:
continue continue
cpos_range = range(lbound, rbound + 1) cpos_range = range(lbound, rbound + 1)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment