Newer
Older
from flask import current_app
from flask_login import UserMixin, AnonymousUserMixin
from itsdangerous import BadSignature, TimedJSONWebSignatureSerializer
from time import sleep
from werkzeug.security import generate_password_hash, check_password_hash
from werkzeug.utils import secure_filename
from . import db, login_manager
import os
import shutil
class Permission:
Defines User permissions as integers by the power of 2. User permission
can be evaluated using the bitwise operator &. 3 equals to CREATE_JOB and
DELETE_JOB and so on.
MANAGE_CORPORA = 1
MANAGE_JOBS = 2
# PERMISSION_NAME = 4
# PERMISSION_NAME = 8
ADMIN = 16
class Role(db.Model):
Model for the different roles Users can have. Is a one-to-many
relationship. A Role can be associated with many User rows.
__tablename__ = 'roles'
id = db.Column(db.Integer, primary_key=True)
default = db.Column(db.Boolean, default=False, index=True)
permissions = db.Column(db.BigInteger)
users = db.relationship('User', backref='role', lazy='dynamic')
def to_dict(self):
return {'id': self.id,
'default': self.default,
'name': self.name,
'permissions': self.permissions}
def __init__(self, **kwargs):
super(Role, self).__init__(**kwargs)
if self.permissions is None:
self.permissions = 0
def __repr__(self):
String representation of the Role. For human readability.
return '<Role {role_name}>'.format(role_name=self.name)
def add_permission(self, perm):
Add new permission to Role. Input is a Permission.
if not self.has_permission(perm):
self.permissions += perm
def remove_permission(self, perm):
Removes permission from a Role. Input a Permission.
if self.has_permission(perm):
self.permissions -= perm
def reset_permissions(self):
Resets permissions to zero. Zero equals no permissions at all.
self.permissions = 0
def has_permission(self, perm):
Checks if a Role has a specific Permission. Does this with the bitwise
return self.permissions & perm == perm
@staticmethod
def insert_roles():
Inserts roles into the database. This has to be executed befor Users
are added to the database. Otherwiese Users will not have a Role
assigned to them. Order of the roles dictionary determines the ID of
each role. Users have the ID 1 and Administrators have the ID 2.
roles = {'User': [Permission.MANAGE_CORPORA, Permission.MANAGE_JOBS],
'Administrator': [Permission.MANAGE_CORPORA,
Permission.MANAGE_JOBS, Permission.ADMIN]}
default_role = 'User'
for r in roles:
role = Role.query.filter_by(name=r).first()
if role is None:
role = Role(name=r)
role.reset_permissions()
for perm in roles[r]:
role.add_permission(perm)
role.default = (role.name == default_role)
db.session.add(role)
db.session.commit()
class User(UserMixin, db.Model):
Model for Users that are registered to Opaque.
__tablename__ = 'users'
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
role_id = db.Column(db.Integer, db.ForeignKey('roles.id'))
email = db.Column(db.String(254), unique=True, index=True)
last_seen = db.Column(db.DateTime(), default=datetime.utcnow)
member_since = db.Column(db.DateTime(), default=datetime.utcnow)
password_hash = db.Column(db.String(128))
setting_dark_mode = db.Column(db.Boolean, default=False)
setting_job_status_mail_notifications = db.Column(db.String(16),
default='end')
setting_job_status_site_notifications = db.Column(db.String(16),
default='all')
username = db.Column(db.String(64), unique=True, index=True)
corpora = db.relationship('Corpus', backref='creator', lazy='dynamic',
cascade='save-update, merge, delete')
jobs = db.relationship('Job', backref='creator', lazy='dynamic',
cascade='save-update, merge, delete')
query_results = db.relationship('QueryResult',
backref='creator',
cascade='save-update, merge, delete',
lazy='dynamic')
def to_dict(self):
return {'id': self.id,
'confirmed': self.confirmed,
'email': self.email,
'last_seen': self.last_seen.timestamp(),
'member_since': self.member_since.timestamp(),
'settings': {'dark_mode': self.setting_dark_mode,
'job_status_mail_notifications':
self.setting_job_status_mail_notifications,
'job_status_site_notifications':
self.setting_job_status_site_notifications},
'corpora': {corpus.id: corpus.to_dict()
for corpus in self.corpora},
'jobs': {job.id: job.to_dict() for job in self.jobs},
'query_results': {query_result.id: query_result.to_dict()
for query_result in self.query_results},
'role': self.role.to_dict()}
def __repr__(self):
String representation of the User. For human readability.
return '<User {username}>'.format(username=self.username)
def __init__(self, **kwargs):
super(User, self).__init__(**kwargs)
if self.role is None:
if self.email == current_app.config['ADMIN_EMAIL_ADRESS']:
self.role = Role.query.filter_by(name='Administrator').first()
if self.role is None:
self.role = Role.query.filter_by(default=True).first()
def generate_confirmation_token(self, expiration=3600):
Generates a confirmation token for user confirmation via email.
s = TimedJSONWebSignatureSerializer(current_app.config['SECRET_KEY'],
expiration)
return s.dumps({'confirm': self.id}).decode('utf-8')
def generate_reset_token(self, expiration=3600):
Generates a reset token for password reset via email.
s = TimedJSONWebSignatureSerializer(current_app.config['SECRET_KEY'],
expiration)
return s.dumps({'reset': self.id}).decode('utf-8')
Confirms User if the given token is valid and not expired.
s = TimedJSONWebSignatureSerializer(current_app.config['SECRET_KEY'])
return False
if data.get('confirm') != self.id:
return False
self.confirmed = True
db.session.add(self)
return True
@staticmethod
def reset_password(token, new_password):
Resets password for User if the given token is valid and not expired.
s = TimedJSONWebSignatureSerializer(current_app.config['SECRET_KEY'])
try:
data = s.loads(token.encode('utf-8'))
return False
user = User.query.get(data.get('reset'))
if user is None:
return False
user.password = new_password
db.session.add(user)
return True
@property
def password(self):
raise AttributeError('password is not a readable attribute')
@password.setter
def password(self, password):
self.password_hash = generate_password_hash(password)
def verify_password(self, password):
return check_password_hash(self.password_hash, password)
def can(self, perm):
Checks if a User with its current role can doe something. Checks if the
associated role actually has the needed Permission.
return self.role is not None and self.role.has_permission(perm)
def is_administrator(self):
return self.can(Permission.ADMIN)
def ping(self):
self.last_seen = datetime.utcnow()
db.session.add(self)
Delete the user and its corpora and jobs from database and filesystem.
user_dir = os.path.join(current_app.config['DATA_DIR'],
str(self.id))
shutil.rmtree(user_dir, ignore_errors=True)
class AnonymousUser(AnonymousUserMixin):
Model replaces the default AnonymousUser.
def can(self, permissions):
return False
def is_administrator(self):
return False
__tablename__ = 'job_inputs'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
dir = db.Column(db.String(255))
filename = db.Column(db.String(255))
String representation of the JobInput. For human readability.
return '<JobInput {filename}>'.format(filename=self.filename)
'job_id': self.job_id,
'filename': self.filename}
__tablename__ = 'job_results'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
dir = db.Column(db.String(255))
filename = db.Column(db.String(255))
String representation of the JobResult. For human readability.
return '<JobResult {filename}>'.format(filename=self.filename)
'job_id': self.job_id,
'filename': self.filename}
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
end_date = db.Column(db.DateTime())
mem_mb = db.Column(db.Integer)
n_cores = db.Column(db.Integer)
# This is used for zip creation
secure_filename = db.Column(db.String(32))
service_version = db.Column(db.String(16))
status = db.Column(db.String(16))
inputs = db.relationship('JobInput', backref='job', lazy='dynamic',
results = db.relationship('JobResult', backref='job', lazy='dynamic',
notification_data = db.relationship('NotificationData',
cascade='save-update, merge, delete',
uselist=False,
back_populates='job') # One-to-One relationship
notification_email_data = db.relationship('NotificationEmailData',
cascade='save-update, merge, delete',
back_populates='job')
String representation of the Job. For human readability.
return '<Job {job_title}>'.format(job_title=self.title)
def create_secure_filename(self):
Takes the job.title string nad cratesa a secure filename from this.
self.secure_filename = secure_filename(self.title)
Delete the job and its inputs and results from the database.
if self.status not in ['complete', 'failed']:
self.status = 'canceling'
db.session.commit()
while self.status != 'canceled':
# In case the daemon handled a job in any way
if self.status != 'canceling':
self.status = 'canceling'
db.session.commit()
sleep(1)
db.session.refresh(self)
job_dir = os.path.join(current_app.config['DATA_DIR'],
str(self.user_id),
'jobs',
str(self.id))
shutil.rmtree(job_dir, ignore_errors=True)
def restart(self):
'''
Restart a job - only if the status is failed
'''
if self.status != 'failed':
raise Exception('Could not restart job: status is not "failed"')
job_dir = os.path.join(current_app.config['DATA_DIR'],
str(self.user_id),
'jobs',
str(self.id))
shutil.rmtree(os.path.join(job_dir, 'output'), ignore_errors=True)
shutil.rmtree(os.path.join(job_dir, 'pyflow.data'), ignore_errors=True)
self.end_date = None
self.status = 'submitted'

Patrick Jentsch
committed
return {'id': self.id,
'user_id': self.user_id,

Patrick Jentsch
committed
'creation_date': self.creation_date.timestamp(),
'description': self.description,
'end_date': (self.end_date.timestamp() if self.end_date else
None),
'service': {'args': self.service_args,
'name': self.service,
'version': self.service_version},
'status': self.status,
'title': self.title,
'inputs': {input.id: input.to_dict() for input in self.inputs},
'results': {result.id: result.to_dict()

Patrick Jentsch
committed
Class to define notification data used for sending a notification mail with
nopaque_notify.
'''
__tablename__ = 'notification_data'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign Key
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
# relationships
job = db.relationship('Job', back_populates='notification_data')
notified_on = db.Column(db.String(16), default=None)
def __repr__(self):
'''
String representation of the NotificationData. For human readability.
'''
return '<NotificationData {id}>'.format(id=self.id)
def to_dict(self):
return {'id': self.id,
'job_id': self.job_id,
'job': self.job,
'notified': self.notified}
class NotificationEmailData(db.Model):
'''
Class to define data that will be used to send a corresponding Notification
via email.
'''
__tablename__ = 'notification_email_data'
# Primary Key
id = db.Column(db.Integer, primary_key=True)
# Foreign Key
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
# relationships
job = db.relationship('Job', back_populates='notification_email_data')
notify_status = db.Column(db.String(16), default=None)
creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
def __repr__(self):
'''
String representation of the NotificationEmailData. For human readability.
'''
return '<NotificationData {id}>'.format(id=self.id)
def to_dict(self):
return {'id': self.id,
'job_id': self.job_id,
'job': self.job,
'notify_status': self.notify_status,
'creation_date': self.creation_date}
__tablename__ = 'corpus_files'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id'))
address = db.Column(db.String(255))
author = db.Column(db.String(255))
booktitle = db.Column(db.String(255))
chapter = db.Column(db.String(255))
dir = db.Column(db.String(255))
filename = db.Column(db.String(255))
institution = db.Column(db.String(255))
journal = db.Column(db.String(255))
pages = db.Column(db.String(255))
publisher = db.Column(db.String(255))
publishing_year = db.Column(db.Integer)
school = db.Column(db.String(255))
title = db.Column(db.String(255))
corpus_file_path = os.path.join(current_app.config['DATA_DIR'],
str(self.corpus.user_id),
'corpora',
str(self.corpus_id),
self.filename)
os.remove(corpus_file_path)

Patrick Jentsch
committed
def to_dict(self):
return {'id': self.id,
'corpus_id': self.corpus_id,

Patrick Jentsch
committed
'address': self.address,
'author': self.author,
'booktitle': self.booktitle,
'chapter': self.chapter,
'editor': self.editor,
'filename': self.filename,
'institution': self.institution,
'journal': self.journal,
'pages': self.pages,
'publisher': self.publisher,
'publishing_year': self.publishing_year,
'school': self.school,
'title': self.title}

Patrick Jentsch
committed
__tablename__ = 'corpora'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
current_nr_of_tokens = db.Column(db.BigInteger, default=0)
description = db.Column(db.String(255))

Patrick Jentsch
committed
last_edited_date = db.Column(db.DateTime(), default=datetime.utcnow)
max_nr_of_tokens = db.Column(db.BigInteger, default=2147483647)
archive_file = db.Column(db.String(255))
files = db.relationship('CorpusFile', backref='corpus', lazy='dynamic',

Patrick Jentsch
committed
return {'id': self.id,
'user_id': self.user_id,
'creation_date': self.creation_date.timestamp(),

Patrick Jentsch
committed
'description': self.description,

Patrick Jentsch
committed
'last_edited_date': self.last_edited_date.timestamp(),

Patrick Jentsch
committed
'title': self.title,
'files': {file.id: file.to_dict() for file in self.files}}
corpus_dir = os.path.join(current_app.config['DATA_DIR'],
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
str(self.user_id),
'corpora',
str(self.id))
output_dir = os.path.join(corpus_dir, 'merged')
shutil.rmtree(output_dir, ignore_errors=True)
os.mkdir(output_dir)
master_element_tree = ET.ElementTree(
ET.fromstring('<corpus>\n</corpus>')
)
for corpus_file in self.files:
corpus_file_path = os.path.join(corpus_dir, corpus_file.filename)
element_tree = ET.parse(corpus_file_path)
text_node = element_tree.find('text')
text_node.set('address', corpus_file.address or "NULL")
text_node.set('author', corpus_file.author)
text_node.set('booktitle', corpus_file.booktitle or "NULL")
text_node.set('chapter', corpus_file.chapter or "NULL")
text_node.set('editor', corpus_file.editor or "NULL")
text_node.set('institution', corpus_file.institution or "NULL")
text_node.set('journal', corpus_file.journal or "NULL")
text_node.set('pages', corpus_file.pages or "NULL")
text_node.set('publisher', corpus_file.publisher or "NULL")
text_node.set('publishing_year', str(corpus_file.publishing_year))
text_node.set('school', corpus_file.school or "NULL")
text_node.set('title', corpus_file.title)
element_tree.write(corpus_file_path)
master_element_tree.getroot().insert(1, text_node)
output_file = os.path.join(output_dir, 'corpus.vrt')
master_element_tree.write(output_file,
xml_declaration=True,
encoding='utf-8')
self.last_edited_date = datetime.utcnow()
self.status = 'submitted'
corpus_dir = os.path.join(current_app.config['DATA_DIR'],
str(self.user_id),
'corpora',
str(self.id))
shutil.rmtree(corpus_dir, ignore_errors=True)

Stephan Porada
committed
db.session.delete(self)
def __repr__(self):
String representation of the corpus. For human readability.
return '<Corpus {corpus_title}>'.format(corpus_title=self.title)
class QueryResult(db.Model):
'''
Class to define a corpus analysis result.
'''
__tablename__ = 'query_results'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
# Fields
description = db.Column(db.String(255))
filename = db.Column(db.String(255))
query_metadata = db.Column(db.JSON())
title = db.Column(db.String(32))
def delete(self):
query_result_dir = os.path.join(current_app.config['DATA_DIR'],
str(self.user_id),
'query_results',
str(self.id))
shutil.rmtree(query_result_dir, ignore_errors=True)
db.session.delete(self)
def to_dict(self):
return {'id': self.id,
'user_id': self.user_id,
'description': self.description,
'filename': self.filename,
'query_metadata': self.query_metadata,
'title': self.title}
def __repr__(self):
'''
String representation of the CorpusAnalysisResult. For human readability.
'''
return '<QueryResult {}>'.format(self.title)
'''
' Flask-Login is told to use the application’s custom anonymous user by setting
' its class in the login_manager.anonymous_user attribute.
'''
login_manager.anonymous_user = AnonymousUser
@login_manager.user_loader
def load_user(user_id):
return User.query.get(int(user_id))