Skip to content
Snippets Groups Projects
Commit edc0b340 authored by Patrick Jentsch's avatar Patrick Jentsch
Browse files

Process corpus files in task, not in database model

parent e882af88
No related branches found
No related tags found
No related merge requests found
from .. import db
from ..decorators import background
from ..models import Corpus, CorpusFile
import xml.etree.ElementTree as ET
import os
import shutil
@background
def build_corpus(app, corpus_id):
with app.app_context():
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return
corpus.status = 'File processing'
db.session.commit()
corpus_dir = os.path.join(app.config['NOPAQUE_STORAGE'],
str(corpus.user_id), 'corpora',
str(corpus.id))
output_dir = os.path.join(corpus_dir, 'merged')
shutil.rmtree(output_dir, ignore_errors=True)
os.mkdir(output_dir)
master_element_tree = ET.ElementTree(
ET.fromstring('<corpus>\n</corpus>'))
for corpus_file in corpus.files:
file = os.path.join(corpus_dir, corpus_file.filename)
element_tree = ET.parse(file)
text_node = element_tree.find('text')
text_node.set('address', corpus_file.address or "NULL")
text_node.set('author', corpus_file.author)
text_node.set('booktitle', corpus_file.booktitle or "NULL")
text_node.set('chapter', corpus_file.chapter or "NULL")
text_node.set('editor', corpus_file.editor or "NULL")
text_node.set('institution', corpus_file.institution or "NULL")
text_node.set('journal', corpus_file.journal or "NULL")
text_node.set('pages', corpus_file.pages or "NULL")
text_node.set('publisher', corpus_file.publisher or "NULL")
text_node.set('publishing_year', str(corpus_file.publishing_year))
text_node.set('school', corpus_file.school or "NULL")
text_node.set('title', corpus_file.title)
element_tree.write(file)
master_element_tree.getroot().insert(1, text_node)
output_file = os.path.join(output_dir, 'corpus.vrt')
master_element_tree.write(output_file, xml_declaration=True,
encoding='utf-8')
corpus.status = 'submitted'
db.session.commit()
@background
def delete_corpus(app, corpus_id):
with app.app_context():
......@@ -30,12 +73,3 @@ def delete_corpus_file(app, corpus_file_id):
pass
else:
corpus_file.delete()
@background
def edit_corpus_file(app, corpus_file_id):
with app.app_context():
corpus_file = CorpusFile.query.get(corpus_file_id)
if corpus_file is None:
raise Exception('Corpus file {} not found!'.format(corpus_file_id))
corpus_file.insert_metadata()
......@@ -60,14 +60,16 @@ def analyse_corpus(corpus_id):
query_form = QueryForm(prefix='query-form',
query=request.args.get('query'))
query_download_form = QueryDownloadForm(prefix='query-download-form')
inspect_display_options_form = InspectDisplayOptionsForm(prefix='inspect-display-options-form')
return render_template('corpora/analyse_corpus.html.j2',
corpus_id=corpus_id,
display_options_form=display_options_form,
query_form=query_form,
query_download_form=query_download_form,
inspect_display_options_form=inspect_display_options_form,
title='Corpus analysis')
inspect_display_options_form = InspectDisplayOptionsForm(
prefix='inspect-display-options-form')
return render_template(
'corpora/analyse_corpus.html.j2',
corpus_id=corpus_id,
display_options_form=display_options_form,
query_form=query_form,
query_download_form=query_download_form,
inspect_display_options_form=inspect_display_options_form,
title='Corpus analysis')
@corpora.route('/<int:corpus_id>/delete')
......@@ -114,8 +116,8 @@ def add_corpus_file(corpus_id):
school=add_corpus_file_form.school.data,
title=add_corpus_file_form.title.data)
db.session.add(corpus_file)
corpus.status = 'unprepared'
db.session.commit()
tasks.edit_corpus_file(corpus_file.id)
flash('Corpus file added!')
return make_response(
{'redirect_url': url_for('corpora.corpus', corpus_id=corpus.id)},
......@@ -181,8 +183,8 @@ def edit_corpus_file(corpus_id, corpus_file_id):
edit_corpus_file_form.publishing_year.data
corpus_file.school = edit_corpus_file_form.school.data
corpus_file.title = edit_corpus_file_form.title.data
corpus.status = 'unprepared'
db.session.commit()
tasks.edit_corpus_file(corpus_file_id)
flash('Corpus file edited!')
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
# If no form is submitted or valid, fill out fields with current values
......@@ -211,9 +213,8 @@ def prepare_corpus(corpus_id):
if not (corpus.creator == current_user or current_user.is_administrator()):
abort(403)
if corpus.files.all():
corpus.status = 'submitted'
db.session.commit()
flash('Corpus marked for preparation!')
tasks.build_corpus(corpus_id)
flash('Corpus gets build now.')
else:
flash('Can not prepare corpus, please add corpus file(s).')
flash('Can not build corpus, please add corpus file(s).')
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
......@@ -7,7 +7,6 @@ from werkzeug.utils import secure_filename
from . import db, logger, login_manager
import os
import shutil
import xml.etree.ElementTree as ET
class Permission:
......@@ -380,28 +379,6 @@ class CorpusFile(db.Model):
db.session.delete(self)
db.session.commit()
def insert_metadata(self):
file = os.path.join(current_app.config['NOPAQUE_STORAGE'],
self.dir, self.filename)
element_tree = ET.parse(file)
text_node = element_tree.find('text')
# TODO: USE OR
text_node.set('address', self.address if self.address else "NULL")
text_node.set('author', self.author)
text_node.set('booktitle', self.booktitle if self.booktitle else "NULL")
text_node.set('chapter', self.chapter if self.chapter else "NULL")
text_node.set('editor', self.editor if self.editor else "NULL")
text_node.set('institution', self.institution if self.institution else "NULL")
text_node.set('journal', self.journal if self.journal else "NULL")
text_node.set('pages', self.pages if self.pages else "NULL")
text_node.set('publisher', self.publisher if self.publisher else "NULL")
text_node.set('publishing_year', str(self.publishing_year))
text_node.set('school', self.school if self.school else "NULL")
text_node.set('title', self.title)
element_tree.write(file)
self.corpus.status = 'unprepared'
db.session.commit()
def to_dict(self):
return {'id': self.id,
'address': self.address,
......@@ -447,9 +424,6 @@ class Corpus(db.Model):
'title': self.title,
'user_id': self.user_id}
def build(self):
pass
def delete(self):
for corpus_file in self.files:
db.session.delete(corpus_file)
......
......@@ -5,6 +5,9 @@ networks:
external:
name: reverse-proxy
volumes:
redis-trash1:
services:
web:
depends_on:
......@@ -52,3 +55,5 @@ services:
- "/srv/nopaque/db:/var/lib/postgresql/data"
redis:
image: redis:5
volumes:
- "redis-trash1:/data"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment