From a82b7292ed66412dcb4d4103eae76ee7fd8d7c88 Mon Sep 17 00:00:00 2001
From: Patrick Jentsch <p.jentsch@uni-bielefeld.de>
Date: Fri, 9 Aug 2019 11:48:43 +0200
Subject: [PATCH] Change job model.

---
 app/models.py         |  21 +++------
 app/services/forms.py |  46 +++++++++----------
 app/services/views.py | 104 +++++++++++++++++++++---------------------
 app/swarm.py          |  10 ++--
 4 files changed, 86 insertions(+), 95 deletions(-)

diff --git a/app/models.py b/app/models.py
index e7db7174..ba0859e8 100644
--- a/app/models.py
+++ b/app/models.py
@@ -221,24 +221,17 @@ class Job(db.Model):
     # Primary key
     id = db.Column(db.Integer, primary_key=True)
     creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
-    description = db.Column(db.String(64))
-    '''
-    ' Requested ressources.
-    ' Example: {"n_cores": 2,
-    '           "mem_mb": 4096
-    '           }
-    '''
-    ressources = db.Column(db.String(255))
+    description = db.Column(db.String(255))
+    mem_mb = db.Column(db.Integer)
+    n_cores = db.Column(db.Integer)
     service = db.Column(db.String(64))
     '''
-    ' Service specific arguments in JSON format.
-    ' Example: {"args": ["--keep-intermediates", "skip-binarization"],
-    '           "lang": "eng",
-    '           "version": "latest"
-    '           }
+    ' Service specific arguments as string list.
+    ' Example: ["-l eng", "--keep-intermediates", "--skip-binarization"]
     '''
     service_args = db.Column(db.String(255))
-    status = db.Column(db.String(8))
+    service_version = db.Column(db.String(16))
+    status = db.Column(db.String(16))
     title = db.Column(db.String(32))
     user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
 
diff --git a/app/services/forms.py b/app/services/forms.py
index 1c1aac02..5a79f8bb 100644
--- a/app/services/forms.py
+++ b/app/services/forms.py
@@ -3,24 +3,21 @@ from wtforms import MultipleFileField, SelectField, StringField, SubmitField, Va
 from wtforms.validators import DataRequired, Length
 
 
-class NewOCRJobForm(FlaskForm):
+class NewNLPJobForm(FlaskForm):
     description = StringField(
         'Description',
-        validators=[DataRequired(), Length(1, 64)]
+        validators=[DataRequired(), Length(1, 255)]
     )
     files = MultipleFileField('Files', validators=[DataRequired()])
     language = SelectField(
         'Language',
         choices=[('', 'Choose your option'),
-                 ('eng', 'English'),
-                 ('enm', 'English, Middle (1100-1500)'),
-                 ('fra', 'French'),
-                 ('frm', 'French, Middle (ca. 1400-1600)'),
-                 ('deu', 'German'),
-                 ('frk', 'German Fraktur'),
-                 ('ita', 'Italian'),
-                 ('por', 'Portuguese'),
-                 ('spa', 'Spanish; Castilian')
+                 ('en', 'English'),
+                 ('fr', 'French'),
+                 ('de', 'German'),
+                 ('it', 'Italian'),
+                 ('pt', 'Portuguese'),
+                 ('es', 'Spanish')
                  ],
         validators=[DataRequired()]
     )
@@ -39,28 +36,31 @@ class NewOCRJobForm(FlaskForm):
 
     def validate_files(form, field):
         for file in field.data:
-            if not file.filename.lower().endswith(('.pdf', '.tif', '.tiff')):
+            if not file.filename.lower().endswith('.txt'):
                 raise ValidationError(
                     'File does not have an approved extension: '
-                    '.pdf | .tif | .tiff'
+                    '.txt'
                 )
 
 
-class NewNLPJobForm(FlaskForm):
+class NewOCRJobForm(FlaskForm):
     description = StringField(
         'Description',
-        validators=[DataRequired(), Length(1, 64)]
+        validators=[DataRequired(), Length(1, 255)]
     )
     files = MultipleFileField('Files', validators=[DataRequired()])
     language = SelectField(
         'Language',
         choices=[('', 'Choose your option'),
-                 ('en', 'English'),
-                 ('fr', 'French'),
-                 ('de', 'German'),
-                 ('it', 'Italian'),
-                 ('pt', 'Portuguese'),
-                 ('es', 'Spanish')
+                 ('eng', 'English'),
+                 ('enm', 'English, Middle (1100-1500)'),
+                 ('fra', 'French'),
+                 ('frm', 'French, Middle (ca. 1400-1600)'),
+                 ('deu', 'German'),
+                 ('frk', 'German Fraktur'),
+                 ('ita', 'Italian'),
+                 ('por', 'Portuguese'),
+                 ('spa', 'Spanish; Castilian')
                  ],
         validators=[DataRequired()]
     )
@@ -79,8 +79,8 @@ class NewNLPJobForm(FlaskForm):
 
     def validate_files(form, field):
         for file in field.data:
-            if not file.filename.lower().endswith('.txt'):
+            if not file.filename.lower().endswith(('.pdf', '.tif', '.tiff')):
                 raise ValidationError(
                     'File does not have an approved extension: '
-                    '.txt'
+                    '.pdf | .tif | .tiff'
                 )
diff --git a/app/services/views.py b/app/services/views.py
index 20af79fa..6d493008 100644
--- a/app/services/views.py
+++ b/app/services/views.py
@@ -6,43 +6,41 @@ from ..models import Job
 from ..import swarm
 from .. import db
 from threading import Thread
-import os
 import json
+import os
 
 
-@services.route('/ocr', methods=['GET', 'POST'])
+@services.route('/nlp', methods=['GET', 'POST'])
 @login_required
-def ocr():
-    new_ocr_job_form = NewOCRJobForm()
-    if new_ocr_job_form.validate_on_submit():
-        ocr_job = Job(creator=current_user._get_current_object(),
-                      description=new_ocr_job_form.description.data,
-                      service="ocr",
-                      ressources=json.dumps({"n_cores": 4,
-                                             "mem_mb": 8192}),
-                      service_args=json.dumps({"args": ["--keep-intermediates",
-                                                        "--skip-binarisation"],
-                                               "lang": new_ocr_job_form.language.data,
-                                               "version": new_ocr_job_form.version.data}),
-                      status="pending",
-                      title=new_ocr_job_form.title.data)
+def nlp():
+    new_nlp_job_form = NewNLPJobForm()
+    if new_nlp_job_form.validate_on_submit():
+        nlp_job = Job(creator=current_user,
+                      description=new_nlp_job_form.description.data,
+                      mem_mb=4096,
+                      n_cores=2,
+                      service='nlp',
+                      service_args=json.dumps(['-l {}'.format(new_nlp_job_form.language.data)]),
+                      service_version=new_nlp_job_form.version.data,
+                      status='submitted',
+                      title=new_nlp_job_form.title.data)
 
-        db.session.add(ocr_job)
+        db.session.add(nlp_job)
         db.session.commit()
 
         dir = os.path.join(current_app.config['OPAQUE_STORAGE'],
-                           str(ocr_job.user_id),
+                           str(nlp_job.user_id),
                            'jobs',
-                           str(ocr_job.id))
+                           str(nlp_job.id))
 
         try:
             os.makedirs(dir)
         except OSError:
             flash('OSError!')
-            db.session.remove(ocr_job)
+            db.session.remove(nlp_job)
             db.session.commit()
         else:
-            for file in new_ocr_job_form.files.data:
+            for file in new_nlp_job_form.files.data:
                 file.save(os.path.join(dir, file.filename))
             '''
             ' TODO: Let the scheduler run this job in the background.
@@ -50,51 +48,53 @@ def ocr():
             ' NOTE: Using self created threads is just for testing purpose as
             '       there is no scheduler available.
             '''
-            db.session.expunge(ocr_job)
-            thread = Thread(target=swarm.run, args=(ocr_job,))
+            db.session.expunge(nlp_job)
+            thread = Thread(target=swarm.run, args=(nlp_job,))
             thread.start()
             flash('Job created!')
-        return redirect(url_for('services.ocr'))
+        return redirect(url_for('services.nlp'))
 
     return render_template(
-        'services/ocr.html.j2',
-        title='Optical Character Recognition',
-        new_ocr_job_form=new_ocr_job_form
+        'services/nlp.html.j2',
+        title='Natrual Language Processing',
+        new_nlp_job_form=new_nlp_job_form
     )
 
 
-@services.route('/nlp', methods=['GET', 'POST'])
+@services.route('/ocr', methods=['GET', 'POST'])
 @login_required
-def nlp():
-    new_nlp_job_form = NewNLPJobForm()
-    if new_nlp_job_form.validate_on_submit():
-        nlp_job = Job(creator=current_user._get_current_object(),
-                      description=new_nlp_job_form.description.data,
-                      service="nlp",
-                      ressources=json.dumps({"n_cores": 2,
-                                             "mem_mb": 4096}),
-                      service_args=json.dumps({"args": [],
-                                               "lang": new_nlp_job_form.language.data,
-                                               "version": new_nlp_job_form.version.data}),
-                      status="pending",
-                      title=new_nlp_job_form.title.data)
+def ocr():
+    new_ocr_job_form = NewOCRJobForm()
+    if new_ocr_job_form.validate_on_submit():
+        ocr_job = Job(creator=current_user,
+                      description=new_ocr_job_form.description.data,
+                      mem_mb=8192,
+                      n_cores=4,
+                      service='ocr',
+                      service_args=json.dumps([
+                        '-l {}'.format(new_ocr_job_form.language.data),
+                        '--keep-intermediates',
+                        '--skip-binarisation']),
+                      service_version=new_ocr_job_form.version.data,
+                      status='submitted',
+                      title=new_ocr_job_form.title.data)
 
-        db.session.add(nlp_job)
+        db.session.add(ocr_job)
         db.session.commit()
 
         dir = os.path.join(current_app.config['OPAQUE_STORAGE'],
-                           str(nlp_job.user_id),
+                           str(ocr_job.user_id),
                            'jobs',
-                           str(nlp_job.id))
+                           str(ocr_job.id))
 
         try:
             os.makedirs(dir)
         except OSError:
             flash('OSError!')
-            db.session.remove(nlp_job)
+            db.session.remove(ocr_job)
             db.session.commit()
         else:
-            for file in new_nlp_job_form.files.data:
+            for file in new_ocr_job_form.files.data:
                 file.save(os.path.join(dir, file.filename))
             '''
             ' TODO: Let the scheduler run this job in the background.
@@ -102,14 +102,14 @@ def nlp():
             ' NOTE: Using self created threads is just for testing purpose as
             '       there is no scheduler available.
             '''
-            db.session.expunge(nlp_job)
-            thread = Thread(target=swarm.run, args=(nlp_job,))
+            db.session.expunge(ocr_job)
+            thread = Thread(target=swarm.run, args=(ocr_job,))
             thread.start()
             flash('Job created!')
-        return redirect(url_for('services.nlp'))
+        return redirect(url_for('services.ocr'))
 
     return render_template(
-        'services/nlp.html.j2',
-        title='Natrual Language Processing',
-        new_nlp_job_form=new_nlp_job_form
+        'services/ocr.html.j2',
+        title='Optical Character Recognition',
+        new_ocr_job_form=new_ocr_job_form
     )
diff --git a/app/swarm.py b/app/swarm.py
index 748e4822..3ee6596d 100644
--- a/app/swarm.py
+++ b/app/swarm.py
@@ -35,16 +35,14 @@ class Swarm:
         '''
         # Prepare argument values needed for the service creation.
         service_args = json.loads(job.service_args)
-        ressources = json.loads(job.ressources)
         _command = (job.service
                     + ' -i /files'
-                    + ' -l {}'.format(service_args['lang'])
                     + ' -o /files/output'
-                    + ' ' + ' '.join(service_args['args']))
+                    + ' ' + ' '.join(service_args))
         _constraints = ['node.role==worker']
         _image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/{}:{}'.format(
             job.service,
-            service_args['version']
+            job.service_version
         )
         _labels = {'service': job.service}
         _mounts = [os.path.join('/home/compute/mnt/opaque',
@@ -64,8 +62,8 @@ class Swarm:
         ' in megabytes, it is also necessary to convert the value.
         '''
         _resources = docker.types.Resources(
-            cpu_reservation=ressources['n_cores'] * (10 ** 9),
-            mem_reservation=ressources['mem_mb'] * (10 ** 6)
+            cpu_reservation=job.n_cores * (10 ** 9),
+            mem_reservation=job.mem_mb * (10 ** 6)
         )
         _restart_policy = docker.types.RestartPolicy(condition='none')
         '''
-- 
GitLab