diff --git a/app/models.py b/app/models.py index e7db71748432c0dc1ee61a75e41264288e3dd5bf..ba0859e8ff1c3b1c2571fc9c90ee905ac04e1fdf 100644 --- a/app/models.py +++ b/app/models.py @@ -221,24 +221,17 @@ class Job(db.Model): # Primary key id = db.Column(db.Integer, primary_key=True) creation_date = db.Column(db.DateTime(), default=datetime.utcnow) - description = db.Column(db.String(64)) - ''' - ' Requested ressources. - ' Example: {"n_cores": 2, - ' "mem_mb": 4096 - ' } - ''' - ressources = db.Column(db.String(255)) + description = db.Column(db.String(255)) + mem_mb = db.Column(db.Integer) + n_cores = db.Column(db.Integer) service = db.Column(db.String(64)) ''' - ' Service specific arguments in JSON format. - ' Example: {"args": ["--keep-intermediates", "skip-binarization"], - ' "lang": "eng", - ' "version": "latest" - ' } + ' Service specific arguments as string list. + ' Example: ["-l eng", "--keep-intermediates", "--skip-binarization"] ''' service_args = db.Column(db.String(255)) - status = db.Column(db.String(8)) + service_version = db.Column(db.String(16)) + status = db.Column(db.String(16)) title = db.Column(db.String(32)) user_id = db.Column(db.Integer, db.ForeignKey('users.id')) diff --git a/app/services/forms.py b/app/services/forms.py index 1c1aac02456afbd9ac399477532a9a4e7ceb8bd4..5a79f8bb174cf116afc85c4d00a0e2df45b4e5a3 100644 --- a/app/services/forms.py +++ b/app/services/forms.py @@ -3,24 +3,21 @@ from wtforms import MultipleFileField, SelectField, StringField, SubmitField, Va from wtforms.validators import DataRequired, Length -class NewOCRJobForm(FlaskForm): +class NewNLPJobForm(FlaskForm): description = StringField( 'Description', - validators=[DataRequired(), Length(1, 64)] + validators=[DataRequired(), Length(1, 255)] ) files = MultipleFileField('Files', validators=[DataRequired()]) language = SelectField( 'Language', choices=[('', 'Choose your option'), - ('eng', 'English'), - ('enm', 'English, Middle (1100-1500)'), - ('fra', 'French'), - ('frm', 'French, Middle (ca. 1400-1600)'), - ('deu', 'German'), - ('frk', 'German Fraktur'), - ('ita', 'Italian'), - ('por', 'Portuguese'), - ('spa', 'Spanish; Castilian') + ('en', 'English'), + ('fr', 'French'), + ('de', 'German'), + ('it', 'Italian'), + ('pt', 'Portuguese'), + ('es', 'Spanish') ], validators=[DataRequired()] ) @@ -39,28 +36,31 @@ class NewOCRJobForm(FlaskForm): def validate_files(form, field): for file in field.data: - if not file.filename.lower().endswith(('.pdf', '.tif', '.tiff')): + if not file.filename.lower().endswith('.txt'): raise ValidationError( 'File does not have an approved extension: ' - '.pdf | .tif | .tiff' + '.txt' ) -class NewNLPJobForm(FlaskForm): +class NewOCRJobForm(FlaskForm): description = StringField( 'Description', - validators=[DataRequired(), Length(1, 64)] + validators=[DataRequired(), Length(1, 255)] ) files = MultipleFileField('Files', validators=[DataRequired()]) language = SelectField( 'Language', choices=[('', 'Choose your option'), - ('en', 'English'), - ('fr', 'French'), - ('de', 'German'), - ('it', 'Italian'), - ('pt', 'Portuguese'), - ('es', 'Spanish') + ('eng', 'English'), + ('enm', 'English, Middle (1100-1500)'), + ('fra', 'French'), + ('frm', 'French, Middle (ca. 1400-1600)'), + ('deu', 'German'), + ('frk', 'German Fraktur'), + ('ita', 'Italian'), + ('por', 'Portuguese'), + ('spa', 'Spanish; Castilian') ], validators=[DataRequired()] ) @@ -79,8 +79,8 @@ class NewNLPJobForm(FlaskForm): def validate_files(form, field): for file in field.data: - if not file.filename.lower().endswith('.txt'): + if not file.filename.lower().endswith(('.pdf', '.tif', '.tiff')): raise ValidationError( 'File does not have an approved extension: ' - '.txt' + '.pdf | .tif | .tiff' ) diff --git a/app/services/views.py b/app/services/views.py index 20af79fac59fd663e19c08a04d424e6838863c09..6d493008b5ef44110f00092c516e49d7f5c901d9 100644 --- a/app/services/views.py +++ b/app/services/views.py @@ -6,43 +6,41 @@ from ..models import Job from ..import swarm from .. import db from threading import Thread -import os import json +import os -@services.route('/ocr', methods=['GET', 'POST']) +@services.route('/nlp', methods=['GET', 'POST']) @login_required -def ocr(): - new_ocr_job_form = NewOCRJobForm() - if new_ocr_job_form.validate_on_submit(): - ocr_job = Job(creator=current_user._get_current_object(), - description=new_ocr_job_form.description.data, - service="ocr", - ressources=json.dumps({"n_cores": 4, - "mem_mb": 8192}), - service_args=json.dumps({"args": ["--keep-intermediates", - "--skip-binarisation"], - "lang": new_ocr_job_form.language.data, - "version": new_ocr_job_form.version.data}), - status="pending", - title=new_ocr_job_form.title.data) +def nlp(): + new_nlp_job_form = NewNLPJobForm() + if new_nlp_job_form.validate_on_submit(): + nlp_job = Job(creator=current_user, + description=new_nlp_job_form.description.data, + mem_mb=4096, + n_cores=2, + service='nlp', + service_args=json.dumps(['-l {}'.format(new_nlp_job_form.language.data)]), + service_version=new_nlp_job_form.version.data, + status='submitted', + title=new_nlp_job_form.title.data) - db.session.add(ocr_job) + db.session.add(nlp_job) db.session.commit() dir = os.path.join(current_app.config['OPAQUE_STORAGE'], - str(ocr_job.user_id), + str(nlp_job.user_id), 'jobs', - str(ocr_job.id)) + str(nlp_job.id)) try: os.makedirs(dir) except OSError: flash('OSError!') - db.session.remove(ocr_job) + db.session.remove(nlp_job) db.session.commit() else: - for file in new_ocr_job_form.files.data: + for file in new_nlp_job_form.files.data: file.save(os.path.join(dir, file.filename)) ''' ' TODO: Let the scheduler run this job in the background. @@ -50,51 +48,53 @@ def ocr(): ' NOTE: Using self created threads is just for testing purpose as ' there is no scheduler available. ''' - db.session.expunge(ocr_job) - thread = Thread(target=swarm.run, args=(ocr_job,)) + db.session.expunge(nlp_job) + thread = Thread(target=swarm.run, args=(nlp_job,)) thread.start() flash('Job created!') - return redirect(url_for('services.ocr')) + return redirect(url_for('services.nlp')) return render_template( - 'services/ocr.html.j2', - title='Optical Character Recognition', - new_ocr_job_form=new_ocr_job_form + 'services/nlp.html.j2', + title='Natrual Language Processing', + new_nlp_job_form=new_nlp_job_form ) -@services.route('/nlp', methods=['GET', 'POST']) +@services.route('/ocr', methods=['GET', 'POST']) @login_required -def nlp(): - new_nlp_job_form = NewNLPJobForm() - if new_nlp_job_form.validate_on_submit(): - nlp_job = Job(creator=current_user._get_current_object(), - description=new_nlp_job_form.description.data, - service="nlp", - ressources=json.dumps({"n_cores": 2, - "mem_mb": 4096}), - service_args=json.dumps({"args": [], - "lang": new_nlp_job_form.language.data, - "version": new_nlp_job_form.version.data}), - status="pending", - title=new_nlp_job_form.title.data) +def ocr(): + new_ocr_job_form = NewOCRJobForm() + if new_ocr_job_form.validate_on_submit(): + ocr_job = Job(creator=current_user, + description=new_ocr_job_form.description.data, + mem_mb=8192, + n_cores=4, + service='ocr', + service_args=json.dumps([ + '-l {}'.format(new_ocr_job_form.language.data), + '--keep-intermediates', + '--skip-binarisation']), + service_version=new_ocr_job_form.version.data, + status='submitted', + title=new_ocr_job_form.title.data) - db.session.add(nlp_job) + db.session.add(ocr_job) db.session.commit() dir = os.path.join(current_app.config['OPAQUE_STORAGE'], - str(nlp_job.user_id), + str(ocr_job.user_id), 'jobs', - str(nlp_job.id)) + str(ocr_job.id)) try: os.makedirs(dir) except OSError: flash('OSError!') - db.session.remove(nlp_job) + db.session.remove(ocr_job) db.session.commit() else: - for file in new_nlp_job_form.files.data: + for file in new_ocr_job_form.files.data: file.save(os.path.join(dir, file.filename)) ''' ' TODO: Let the scheduler run this job in the background. @@ -102,14 +102,14 @@ def nlp(): ' NOTE: Using self created threads is just for testing purpose as ' there is no scheduler available. ''' - db.session.expunge(nlp_job) - thread = Thread(target=swarm.run, args=(nlp_job,)) + db.session.expunge(ocr_job) + thread = Thread(target=swarm.run, args=(ocr_job,)) thread.start() flash('Job created!') - return redirect(url_for('services.nlp')) + return redirect(url_for('services.ocr')) return render_template( - 'services/nlp.html.j2', - title='Natrual Language Processing', - new_nlp_job_form=new_nlp_job_form + 'services/ocr.html.j2', + title='Optical Character Recognition', + new_ocr_job_form=new_ocr_job_form ) diff --git a/app/swarm.py b/app/swarm.py index 748e48228bc4ed2666285b093ffeabbf62cb01a8..3ee6596de9c7d8ddb2ae9c8defdab46f2e5eecd8 100644 --- a/app/swarm.py +++ b/app/swarm.py @@ -35,16 +35,14 @@ class Swarm: ''' # Prepare argument values needed for the service creation. service_args = json.loads(job.service_args) - ressources = json.loads(job.ressources) _command = (job.service + ' -i /files' - + ' -l {}'.format(service_args['lang']) + ' -o /files/output' - + ' ' + ' '.join(service_args['args'])) + + ' ' + ' '.join(service_args)) _constraints = ['node.role==worker'] _image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/{}:{}'.format( job.service, - service_args['version'] + job.service_version ) _labels = {'service': job.service} _mounts = [os.path.join('/home/compute/mnt/opaque', @@ -64,8 +62,8 @@ class Swarm: ' in megabytes, it is also necessary to convert the value. ''' _resources = docker.types.Resources( - cpu_reservation=ressources['n_cores'] * (10 ** 9), - mem_reservation=ressources['mem_mb'] * (10 ** 6) + cpu_reservation=job.n_cores * (10 ** 9), + mem_reservation=job.mem_mb * (10 ** 6) ) _restart_policy = docker.types.RestartPolicy(condition='none') '''