Skip to content
Snippets Groups Projects
Commit a82b7292 authored by Patrick Jentsch's avatar Patrick Jentsch
Browse files

Change job model.

parent 50273ea4
No related branches found
No related tags found
No related merge requests found
......@@ -221,24 +221,17 @@ class Job(db.Model):
# Primary key
id = db.Column(db.Integer, primary_key=True)
creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
description = db.Column(db.String(64))
'''
' Requested ressources.
' Example: {"n_cores": 2,
' "mem_mb": 4096
' }
'''
ressources = db.Column(db.String(255))
description = db.Column(db.String(255))
mem_mb = db.Column(db.Integer)
n_cores = db.Column(db.Integer)
service = db.Column(db.String(64))
'''
' Service specific arguments in JSON format.
' Example: {"args": ["--keep-intermediates", "skip-binarization"],
' "lang": "eng",
' "version": "latest"
' }
' Service specific arguments as string list.
' Example: ["-l eng", "--keep-intermediates", "--skip-binarization"]
'''
service_args = db.Column(db.String(255))
status = db.Column(db.String(8))
service_version = db.Column(db.String(16))
status = db.Column(db.String(16))
title = db.Column(db.String(32))
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
......
......@@ -3,24 +3,21 @@ from wtforms import MultipleFileField, SelectField, StringField, SubmitField, Va
from wtforms.validators import DataRequired, Length
class NewOCRJobForm(FlaskForm):
class NewNLPJobForm(FlaskForm):
description = StringField(
'Description',
validators=[DataRequired(), Length(1, 64)]
validators=[DataRequired(), Length(1, 255)]
)
files = MultipleFileField('Files', validators=[DataRequired()])
language = SelectField(
'Language',
choices=[('', 'Choose your option'),
('eng', 'English'),
('enm', 'English, Middle (1100-1500)'),
('fra', 'French'),
('frm', 'French, Middle (ca. 1400-1600)'),
('deu', 'German'),
('frk', 'German Fraktur'),
('ita', 'Italian'),
('por', 'Portuguese'),
('spa', 'Spanish; Castilian')
('en', 'English'),
('fr', 'French'),
('de', 'German'),
('it', 'Italian'),
('pt', 'Portuguese'),
('es', 'Spanish')
],
validators=[DataRequired()]
)
......@@ -39,28 +36,31 @@ class NewOCRJobForm(FlaskForm):
def validate_files(form, field):
for file in field.data:
if not file.filename.lower().endswith(('.pdf', '.tif', '.tiff')):
if not file.filename.lower().endswith('.txt'):
raise ValidationError(
'File does not have an approved extension: '
'.pdf | .tif | .tiff'
'.txt'
)
class NewNLPJobForm(FlaskForm):
class NewOCRJobForm(FlaskForm):
description = StringField(
'Description',
validators=[DataRequired(), Length(1, 64)]
validators=[DataRequired(), Length(1, 255)]
)
files = MultipleFileField('Files', validators=[DataRequired()])
language = SelectField(
'Language',
choices=[('', 'Choose your option'),
('en', 'English'),
('fr', 'French'),
('de', 'German'),
('it', 'Italian'),
('pt', 'Portuguese'),
('es', 'Spanish')
('eng', 'English'),
('enm', 'English, Middle (1100-1500)'),
('fra', 'French'),
('frm', 'French, Middle (ca. 1400-1600)'),
('deu', 'German'),
('frk', 'German Fraktur'),
('ita', 'Italian'),
('por', 'Portuguese'),
('spa', 'Spanish; Castilian')
],
validators=[DataRequired()]
)
......@@ -79,8 +79,8 @@ class NewNLPJobForm(FlaskForm):
def validate_files(form, field):
for file in field.data:
if not file.filename.lower().endswith('.txt'):
if not file.filename.lower().endswith(('.pdf', '.tif', '.tiff')):
raise ValidationError(
'File does not have an approved extension: '
'.txt'
'.pdf | .tif | .tiff'
)
......@@ -6,43 +6,41 @@ from ..models import Job
from ..import swarm
from .. import db
from threading import Thread
import os
import json
import os
@services.route('/ocr', methods=['GET', 'POST'])
@services.route('/nlp', methods=['GET', 'POST'])
@login_required
def ocr():
new_ocr_job_form = NewOCRJobForm()
if new_ocr_job_form.validate_on_submit():
ocr_job = Job(creator=current_user._get_current_object(),
description=new_ocr_job_form.description.data,
service="ocr",
ressources=json.dumps({"n_cores": 4,
"mem_mb": 8192}),
service_args=json.dumps({"args": ["--keep-intermediates",
"--skip-binarisation"],
"lang": new_ocr_job_form.language.data,
"version": new_ocr_job_form.version.data}),
status="pending",
title=new_ocr_job_form.title.data)
def nlp():
new_nlp_job_form = NewNLPJobForm()
if new_nlp_job_form.validate_on_submit():
nlp_job = Job(creator=current_user,
description=new_nlp_job_form.description.data,
mem_mb=4096,
n_cores=2,
service='nlp',
service_args=json.dumps(['-l {}'.format(new_nlp_job_form.language.data)]),
service_version=new_nlp_job_form.version.data,
status='submitted',
title=new_nlp_job_form.title.data)
db.session.add(ocr_job)
db.session.add(nlp_job)
db.session.commit()
dir = os.path.join(current_app.config['OPAQUE_STORAGE'],
str(ocr_job.user_id),
str(nlp_job.user_id),
'jobs',
str(ocr_job.id))
str(nlp_job.id))
try:
os.makedirs(dir)
except OSError:
flash('OSError!')
db.session.remove(ocr_job)
db.session.remove(nlp_job)
db.session.commit()
else:
for file in new_ocr_job_form.files.data:
for file in new_nlp_job_form.files.data:
file.save(os.path.join(dir, file.filename))
'''
' TODO: Let the scheduler run this job in the background.
......@@ -50,51 +48,53 @@ def ocr():
' NOTE: Using self created threads is just for testing purpose as
' there is no scheduler available.
'''
db.session.expunge(ocr_job)
thread = Thread(target=swarm.run, args=(ocr_job,))
db.session.expunge(nlp_job)
thread = Thread(target=swarm.run, args=(nlp_job,))
thread.start()
flash('Job created!')
return redirect(url_for('services.ocr'))
return redirect(url_for('services.nlp'))
return render_template(
'services/ocr.html.j2',
title='Optical Character Recognition',
new_ocr_job_form=new_ocr_job_form
'services/nlp.html.j2',
title='Natrual Language Processing',
new_nlp_job_form=new_nlp_job_form
)
@services.route('/nlp', methods=['GET', 'POST'])
@services.route('/ocr', methods=['GET', 'POST'])
@login_required
def nlp():
new_nlp_job_form = NewNLPJobForm()
if new_nlp_job_form.validate_on_submit():
nlp_job = Job(creator=current_user._get_current_object(),
description=new_nlp_job_form.description.data,
service="nlp",
ressources=json.dumps({"n_cores": 2,
"mem_mb": 4096}),
service_args=json.dumps({"args": [],
"lang": new_nlp_job_form.language.data,
"version": new_nlp_job_form.version.data}),
status="pending",
title=new_nlp_job_form.title.data)
def ocr():
new_ocr_job_form = NewOCRJobForm()
if new_ocr_job_form.validate_on_submit():
ocr_job = Job(creator=current_user,
description=new_ocr_job_form.description.data,
mem_mb=8192,
n_cores=4,
service='ocr',
service_args=json.dumps([
'-l {}'.format(new_ocr_job_form.language.data),
'--keep-intermediates',
'--skip-binarisation']),
service_version=new_ocr_job_form.version.data,
status='submitted',
title=new_ocr_job_form.title.data)
db.session.add(nlp_job)
db.session.add(ocr_job)
db.session.commit()
dir = os.path.join(current_app.config['OPAQUE_STORAGE'],
str(nlp_job.user_id),
str(ocr_job.user_id),
'jobs',
str(nlp_job.id))
str(ocr_job.id))
try:
os.makedirs(dir)
except OSError:
flash('OSError!')
db.session.remove(nlp_job)
db.session.remove(ocr_job)
db.session.commit()
else:
for file in new_nlp_job_form.files.data:
for file in new_ocr_job_form.files.data:
file.save(os.path.join(dir, file.filename))
'''
' TODO: Let the scheduler run this job in the background.
......@@ -102,14 +102,14 @@ def nlp():
' NOTE: Using self created threads is just for testing purpose as
' there is no scheduler available.
'''
db.session.expunge(nlp_job)
thread = Thread(target=swarm.run, args=(nlp_job,))
db.session.expunge(ocr_job)
thread = Thread(target=swarm.run, args=(ocr_job,))
thread.start()
flash('Job created!')
return redirect(url_for('services.nlp'))
return redirect(url_for('services.ocr'))
return render_template(
'services/nlp.html.j2',
title='Natrual Language Processing',
new_nlp_job_form=new_nlp_job_form
'services/ocr.html.j2',
title='Optical Character Recognition',
new_ocr_job_form=new_ocr_job_form
)
......@@ -35,16 +35,14 @@ class Swarm:
'''
# Prepare argument values needed for the service creation.
service_args = json.loads(job.service_args)
ressources = json.loads(job.ressources)
_command = (job.service
+ ' -i /files'
+ ' -l {}'.format(service_args['lang'])
+ ' -o /files/output'
+ ' ' + ' '.join(service_args['args']))
+ ' ' + ' '.join(service_args))
_constraints = ['node.role==worker']
_image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/{}:{}'.format(
job.service,
service_args['version']
job.service_version
)
_labels = {'service': job.service}
_mounts = [os.path.join('/home/compute/mnt/opaque',
......@@ -64,8 +62,8 @@ class Swarm:
' in megabytes, it is also necessary to convert the value.
'''
_resources = docker.types.Resources(
cpu_reservation=ressources['n_cores'] * (10 ** 9),
mem_reservation=ressources['mem_mb'] * (10 ** 6)
cpu_reservation=job.n_cores * (10 ** 9),
mem_reservation=job.mem_mb * (10 ** 6)
)
_restart_policy = docker.types.RestartPolicy(condition='none')
'''
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment