Skip to content
Snippets Groups Projects
Commit 7f8797d2 authored by Patrick Jentsch's avatar Patrick Jentsch
Browse files

Delete files in db model methods.

parent f86f3f4f
No related branches found
No related tags found
No related merge requests found
from datetime import datetime
from .. import db from .. import db
from ..decorators import background from ..decorators import background
from ..models import Corpus, CorpusFile from ..models import Corpus, CorpusFile
import xml.etree.ElementTree as ET
import os
import shutil
@background @background
...@@ -13,68 +9,26 @@ def build_corpus(corpus_id, *args, **kwargs): ...@@ -13,68 +9,26 @@ def build_corpus(corpus_id, *args, **kwargs):
with app.app_context(): with app.app_context():
corpus = Corpus.query.get(corpus_id) corpus = Corpus.query.get(corpus_id)
if corpus is None: if corpus is None:
return raise Exception('Corpus {} not found'.format(corpus_id))
corpus.status = 'File processing' corpus.build()
db.session.commit()
corpus_dir = os.path.join(app.config['NOPAQUE_STORAGE'],
str(corpus.user_id), 'corpora',
str(corpus.id))
output_dir = os.path.join(corpus_dir, 'merged')
shutil.rmtree(output_dir, ignore_errors=True)
os.mkdir(output_dir)
master_element_tree = ET.ElementTree(
ET.fromstring('<corpus>\n</corpus>'))
for corpus_file in corpus.files:
file = os.path.join(corpus_dir, corpus_file.filename)
element_tree = ET.parse(file)
text_node = element_tree.find('text')
text_node.set('address', corpus_file.address or "NULL")
text_node.set('author', corpus_file.author)
text_node.set('booktitle', corpus_file.booktitle or "NULL")
text_node.set('chapter', corpus_file.chapter or "NULL")
text_node.set('editor', corpus_file.editor or "NULL")
text_node.set('institution', corpus_file.institution or "NULL")
text_node.set('journal', corpus_file.journal or "NULL")
text_node.set('pages', corpus_file.pages or "NULL")
text_node.set('publisher', corpus_file.publisher or "NULL")
text_node.set('publishing_year', str(corpus_file.publishing_year))
text_node.set('school', corpus_file.school or "NULL")
text_node.set('title', corpus_file.title)
element_tree.write(file)
master_element_tree.getroot().insert(1, text_node)
output_file = os.path.join(output_dir, 'corpus.vrt')
master_element_tree.write(output_file, xml_declaration=True,
encoding='utf-8')
corpus.status = 'submitted'
corpus.last_edited_date = datetime.utcnow()
db.session.commit() db.session.commit()
@background @background
def delete_corpus(corpus_id, *args, **kwargs): def delete_corpus(corpus_id, *args, **kwargs):
app = kwargs['app'] with kwargs['app'].app_context():
with app.app_context():
corpus = Corpus.query.get(corpus_id) corpus = Corpus.query.get(corpus_id)
if corpus is None: if corpus is None:
return raise Exception('Corpus {} not found'.format(corpus_id))
path = os.path.join(app.config['NOPAQUE_STORAGE'], str(corpus.user_id),
'corpora', str(corpus.id))
shutil.rmtree(path, ignore_errors=True)
corpus.delete() corpus.delete()
db.session.commit()
@background @background
def delete_corpus_file(corpus_file_id, *args, **kwargs): def delete_corpus_file(corpus_file_id, *args, **kwargs):
app = kwargs['app'] with kwargs['app'].app_context():
with app.app_context():
corpus_file = CorpusFile.query.get(corpus_file_id) corpus_file = CorpusFile.query.get(corpus_file_id)
if corpus_file is None: if corpus_file is None:
return raise Exception('Corpus file {} not found'.format(corpus_file_id))
path = os.path.join(app.config['NOPAQUE_STORAGE'], corpus_file.dir, corpus_file.delete()
corpus_file.filename) db.session.commit()
try:
os.remove(path)
except Exception:
pass
else:
corpus_file.delete()
...@@ -2,7 +2,6 @@ from . import socketio ...@@ -2,7 +2,6 @@ from . import socketio
from flask import abort, current_app, request from flask import abort, current_app, request
from flask_login import current_user from flask_login import current_user
from functools import wraps from functools import wraps
from threading import Thread
def admin_required(f): def admin_required(f):
...@@ -27,8 +26,7 @@ def background(f): ...@@ -27,8 +26,7 @@ def background(f):
@wraps(f) @wraps(f)
def wrapped(*args, **kwargs): def wrapped(*args, **kwargs):
kwargs['app'] = current_app._get_current_object() kwargs['app'] = current_app._get_current_object()
thread = Thread(target=f, args=args, kwargs=kwargs) thread = socketio.start_background_task(f, *args, **kwargs)
thread.start()
return thread return thread
return wrapped return wrapped
......
from .. import db
from ..decorators import background from ..decorators import background
from ..models import Job from ..models import Job
@background @background
def delete_job(job_id, *args, **kwargs): def delete_job(job_id, *args, **kwargs):
app = kwargs['app'] with kwargs['app'].app_context():
with app.app_context():
job = Job.query.get(job_id) job = Job.query.get(job_id)
if job is None: if job is None:
raise Exception('Could not find job with id {}'.format(job_id)) raise Exception('Job {} not found'.format(job_id))
job.delete() job.delete()
db.session.commit()
@background @background
def restart_job(job_id, *args, **kwargs): def restart_job(job_id, *args, **kwargs):
app = kwargs['app'] with kwargs['app'].app_context():
with app.app_context():
job = Job.query.get(job_id) job = Job.query.get(job_id)
if job is None: if job is None:
raise Exception('Could not find job with id {}'.format(job_id)) raise Exception('Job {} not found'.format(job_id))
job.restart() job.restart()
db.session.commit()
...@@ -60,10 +60,6 @@ def restart(job_id): ...@@ -60,10 +60,6 @@ def restart(job_id):
else: else:
tasks.restart_job(job_id) tasks.restart_job(job_id)
flash('Job has been restarted!', 'job') flash('Job has been restarted!', 'job')
job_inputs = [dict(filename=input.filename,
id=input.id,
job_id=job.id)
for input in job.inputs]
return redirect(url_for('jobs.job', job_id=job_id)) return redirect(url_for('jobs.job', job_id=job_id))
......
...@@ -5,6 +5,7 @@ from itsdangerous import BadSignature, TimedJSONWebSignatureSerializer ...@@ -5,6 +5,7 @@ from itsdangerous import BadSignature, TimedJSONWebSignatureSerializer
from time import sleep from time import sleep
from werkzeug.security import generate_password_hash, check_password_hash from werkzeug.security import generate_password_hash, check_password_hash
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
import xml.etree.ElementTree as ET
from . import db, login_manager from . import db, login_manager
import os import os
import shutil import shutil
...@@ -246,9 +247,10 @@ class User(UserMixin, db.Model): ...@@ -246,9 +247,10 @@ class User(UserMixin, db.Model):
''' '''
Delete the user and its corpora and jobs from database and filesystem. Delete the user and its corpora and jobs from database and filesystem.
''' '''
user_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
str(self.id))
shutil.rmtree(user_dir, ignore_errors=True)
db.session.delete(self) db.session.delete(self)
db.session.commit()
class AnonymousUser(AnonymousUserMixin): class AnonymousUser(AnonymousUserMixin):
...@@ -383,7 +385,6 @@ class Job(db.Model): ...@@ -383,7 +385,6 @@ class Job(db.Model):
str(self.id)) str(self.id))
shutil.rmtree(job_dir, ignore_errors=True) shutil.rmtree(job_dir, ignore_errors=True)
db.session.delete(self) db.session.delete(self)
db.session.commit()
def restart(self): def restart(self):
''' '''
...@@ -400,7 +401,6 @@ class Job(db.Model): ...@@ -400,7 +401,6 @@ class Job(db.Model):
shutil.rmtree(os.path.join(job_dir, 'pyflow.data'), ignore_errors=True) shutil.rmtree(os.path.join(job_dir, 'pyflow.data'), ignore_errors=True)
self.end_date = None self.end_date = None
self.status = 'submitted' self.status = 'submitted'
db.session.commit()
def to_dict(self): def to_dict(self):
return {'id': self.id, return {'id': self.id,
...@@ -504,9 +504,17 @@ class CorpusFile(db.Model): ...@@ -504,9 +504,17 @@ class CorpusFile(db.Model):
title = db.Column(db.String(255)) title = db.Column(db.String(255))
def delete(self): def delete(self):
self.corpus.status = 'unprepared' corpus_file = os.path.join(current_app.config['NOPAQUE_STORAGE'],
str(self.corpus.user_id),
'corpora',
str(self.corpus_id),
self.filename)
try:
os.remove(corpus_file)
except OSError:
pass
db.session.delete(self) db.session.delete(self)
db.session.commit() self.corpus.status = 'unprepared'
def to_dict(self): def to_dict(self):
return {'id': self.id, return {'id': self.id,
...@@ -557,9 +565,49 @@ class Corpus(db.Model): ...@@ -557,9 +565,49 @@ class Corpus(db.Model):
'title': self.title, 'title': self.title,
'files': {file.id: file.to_dict() for file in self.files}} 'files': {file.id: file.to_dict() for file in self.files}}
def build(self):
corpus_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
str(self.user_id),
'corpora',
str(self.id))
output_dir = os.path.join(corpus_dir, 'merged')
shutil.rmtree(output_dir, ignore_errors=True)
os.mkdir(output_dir)
master_element_tree = ET.ElementTree(
ET.fromstring('<corpus>\n</corpus>')
)
for corpus_file in self.files:
corpus_file_path = os.path.join(corpus_dir, corpus_file.filename)
element_tree = ET.parse(corpus_file_path)
text_node = element_tree.find('text')
text_node.set('address', corpus_file.address or "NULL")
text_node.set('author', corpus_file.author)
text_node.set('booktitle', corpus_file.booktitle or "NULL")
text_node.set('chapter', corpus_file.chapter or "NULL")
text_node.set('editor', corpus_file.editor or "NULL")
text_node.set('institution', corpus_file.institution or "NULL")
text_node.set('journal', corpus_file.journal or "NULL")
text_node.set('pages', corpus_file.pages or "NULL")
text_node.set('publisher', corpus_file.publisher or "NULL")
text_node.set('publishing_year', str(corpus_file.publishing_year))
text_node.set('school', corpus_file.school or "NULL")
text_node.set('title', corpus_file.title)
element_tree.write(corpus_file_path)
master_element_tree.getroot().insert(1, text_node)
output_file = os.path.join(output_dir, 'corpus.vrt')
master_element_tree.write(output_file,
xml_declaration=True,
encoding='utf-8')
self.last_edited_date = datetime.utcnow()
self.status = 'submitted'
def delete(self): def delete(self):
corpus_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
str(self.user_id),
'corpora',
str(self.id))
shutil.rmtree(corpus_dir, ignore_errors=True)
db.session.delete(self) db.session.delete(self)
db.session.commit()
def __repr__(self): def __repr__(self):
''' '''
...@@ -582,8 +630,10 @@ class Result(db.Model): ...@@ -582,8 +630,10 @@ class Result(db.Model):
cascade='save-update, merge, delete') cascade='save-update, merge, delete')
def delete(self): def delete(self):
result_file_path = os.path.join(current_app.config['NOPAQUE_STORAGE'],
self.file[0].dir)
shutil.rmtree(result_file_path)
db.session.delete(self) db.session.delete(self)
db.session.commit()
def __repr__(self): def __repr__(self):
''' '''
......
from .. import db
from ..decorators import background from ..decorators import background
from ..models import User from ..models import User
import os
import shutil
@background @background
def delete_user(user_id, *args, **kwargs): def delete_user(user_id, *args, **kwargs):
app = kwargs['app'] with kwargs['app'].app_context():
with app.app_context():
user = User.query.get(user_id) user = User.query.get(user_id)
if user is None: if user is None:
raise Exception('User {} not found!'.format(user_id)) raise Exception('User {} not found'.format(user_id))
path = os.path.join(app.config['NOPAQUE_STORAGE'], str(user.id))
shutil.rmtree(path, ignore_errors=True)
user.delete() user.delete()
db.session.commit()
from .. import db
from ..decorators import background from ..decorators import background
from ..models import Result from ..models import Result
import os
import shutil
@background @background
def delete_result(result_id, *args, **kwargs): def delete_result(result_id, *args, **kwargs):
app = kwargs['app'] with kwargs['app'].app_context():
with app.app_context():
result = Result.query.get(result_id) result = Result.query.get(result_id)
if result is None: if result is None:
return raise Exception('Result {} not found'.format(result_id))
result_file_path = os.path.join(app.config['NOPAQUE_STORAGE'],
result.file[0].dir)
shutil.rmtree(result_file_path)
result.delete() # cascades down and also deletes ResultFile result.delete() # cascades down and also deletes ResultFile
db.session.commit()
...@@ -33,8 +33,7 @@ class Config: ...@@ -33,8 +33,7 @@ class Config:
os.makedirs('logs', exist_ok=True) os.makedirs('logs', exist_ok=True)
logging.basicConfig(filename='logs/nopaque.log', logging.basicConfig(filename='logs/nopaque.log',
format='[%(asctime)s] %(levelname)s in ' format='[%(asctime)s] %(levelname)s in '
'%(name)s/%(filename)s:%(lineno)d - ' '%(pathname)s:%(lineno)d - %(message)s',
'%(message)s',
datefmt='%Y-%m-%d %H:%M:%S', filemode='w') datefmt='%Y-%m-%d %H:%M:%S', filemode='w')
''' ### Security enhancements ### ''' ''' ### Security enhancements ### '''
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment