From 351004b7959226e2adceaf98a00f07cde87a5026 Mon Sep 17 00:00:00 2001
From: Patrick Jentsch <p.jentsch@uni-bielefeld.de>
Date: Fri, 19 Feb 2021 13:00:52 +0100
Subject: [PATCH] First work on fixed versioning

---
 web/app/jobs/forms.py                  | 80 --------------------------
 web/app/services/__init__.py           | 63 ++++++++++++++++++++
 web/app/services/forms.py              | 68 ++++++++++++++++++++++
 web/app/services/views.py              | 31 ++++------
 web/app/templates/services/ocr.html.j2 |  2 +-
 5 files changed, 144 insertions(+), 100 deletions(-)
 delete mode 100644 web/app/jobs/forms.py
 create mode 100644 web/app/services/forms.py

diff --git a/web/app/jobs/forms.py b/web/app/jobs/forms.py
deleted file mode 100644
index 127a981a..00000000
--- a/web/app/jobs/forms.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from flask_wtf import FlaskForm
-from wtforms import (BooleanField, MultipleFileField, SelectField, StringField,
-                     SubmitField, ValidationError)
-from wtforms.validators import DataRequired, Length
-
-
-class AddNLPJobForm(FlaskForm):
-    description = StringField('Description',
-                              validators=[DataRequired(), Length(1, 255)])
-    files = MultipleFileField('Files', validators=[DataRequired()])
-    language = SelectField('Language',
-                           choices=[('', 'Choose your option'),
-                                    ('nl', 'Dutch'),
-                                    ('en', 'English'),
-                                    ('fr', 'French'),
-                                    ('de', 'German'),
-                                    ('el', 'Greek'),
-                                    ('it', 'Italian'),
-                                    ('pt', 'Portuguese'),
-                                    ('es', 'Spanish')],
-                           validators=[DataRequired()])
-    submit = SubmitField()
-    title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
-    version = SelectField('Version', choices=[('latest', 'Latest')],
-                          validators=[DataRequired()])
-    check_encoding = BooleanField('Check encoding')
-
-    def validate_files(form, field):
-        for file in field.data:
-            if not file.filename.lower().endswith('.txt'):
-                raise ValidationError('File does not have an approved '
-                                      'extension: .txt')
-
-
-class AddOCRJobForm(FlaskForm):
-    binarization = BooleanField('Binarazation')
-    description = StringField('Description',
-                              validators=[DataRequired(), Length(1, 255)])
-    files = MultipleFileField('Files', validators=[DataRequired()])
-    language = SelectField('Language',
-                           choices=[('', 'Choose your option'),
-                                    ('eng', 'English'),
-                                    ('enm', 'English, Middle (1100-1500)'),
-                                    ('fra', 'French'),
-                                    ('frm', 'French, Middle (ca. 1400-1600)'),
-                                    ('deu', 'German'),
-                                    ('frk', 'German Fraktur'),
-                                    ('ita', 'Italian'),
-                                    ('por', 'Portuguese'),
-                                    ('spa', 'Spanish; Castilian')],
-                           validators=[DataRequired()])
-    split = BooleanField('Split')
-    submit = SubmitField()
-    title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
-    version = SelectField('Version', choices=[('latest', 'Latest')],
-                          validators=[DataRequired()])
-
-    def validate_files(form, field):
-        for file in field.data:
-            if not file.filename.lower().endswith('.pdf'):
-                raise ValidationError('File does not have an approved '
-                                      'extension: .pdf')
-
-
-class AddFileSetupJobForm(FlaskForm):
-    description = StringField('Description',
-                              validators=[DataRequired(), Length(1, 255)])
-    submit = SubmitField()
-    title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
-    files = MultipleFileField('Files', validators=[DataRequired()])
-    version = SelectField('Version', choices=[('latest', 'Latest')],
-                          validators=[DataRequired()])
-
-    def validate_files(form, field):
-        for file in field.data:
-            if not file.filename.lower().endswith(('.jpeg', '.jpg', '.png',
-                                                   '.tiff', '.tif')):
-                raise ValidationError('File does not have an approved '
-                                      'extension: .jpeg | .jpg | .png | .tiff '
-                                      '| .tif')
diff --git a/web/app/services/__init__.py b/web/app/services/__init__.py
index ea9a403f..03836993 100644
--- a/web/app/services/__init__.py
+++ b/web/app/services/__init__.py
@@ -1,5 +1,68 @@
 from flask import Blueprint
 
 
+SERVICES = {
+    'corpus_analysis': {
+        'name': 'Corpus analysis'
+    },
+    'file-setup': {
+        'name': 'File setup',
+        'versions': {
+            'latest': '1.0.0',
+            '1.0.0': {
+                'publishing_data': {
+                    'date': None,
+                    'title': 'nopaque File setup service',
+                    'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/file-setup/-/tree/1.0.0',  # noqa
+                    'version': '1.0.0'
+                }
+            }
+        }
+    },
+    'nlp': {
+        'name': 'Natural Language Processing',
+        'versions': {
+            'latest': '1.0.0',
+            '1.0.0': {
+                'check_encoding': True,
+                'models': {},
+                'publishing_data': {
+                    'date': None,
+                    'title': 'nopaque NLP service',
+                    'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp/-/tree/1.0.0',  # noqa
+                    'version': '1.0.0'
+                }
+            }
+        }
+    },
+    'ocr': {
+        'name': 'Optical Character Recognition',
+        'versions': {
+            'latest': '1.0.0',
+            '1.0.0': {
+                'binarization': True,
+                'models': {
+                    'eng': 'English',
+                    'enm': 'English, Middle 1100-1500',
+                    'fra': 'French',
+                    'frm': 'French, Middle ca. 1400-1600',
+                    'deu': 'German',
+                    'frk': 'German Fraktur',
+                    'ita': 'Italian',
+                    'por': 'Portuguese',
+                    'spa': 'Spanish; Castilian',
+                },
+                'publishing_data': {
+                    'date': None,
+                    'title': 'nopaque OCR service',
+                    'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr/-/tree/1.0.0',  # noqa
+                    'version': '1.0.0'
+                }
+            }
+        }
+    }
+}
+
+
 services = Blueprint('services', __name__)
 from . import views
diff --git a/web/app/services/forms.py b/web/app/services/forms.py
new file mode 100644
index 00000000..cea741de
--- /dev/null
+++ b/web/app/services/forms.py
@@ -0,0 +1,68 @@
+from flask_wtf import FlaskForm
+from wtforms import (BooleanField, MultipleFileField, SelectField, StringField,
+                     SubmitField, ValidationError)
+from wtforms.validators import DataRequired, Length
+from . import SERVICES
+
+
+class AddJobForm(FlaskForm):
+    description = StringField('Description',
+                              validators=[DataRequired(), Length(1, 255)])
+    submit = SubmitField()
+    title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
+
+
+class AddNLPJobForm(AddJobForm):
+    files = MultipleFileField('Files', validators=[DataRequired()])
+    model = SelectField('Model', validators=[DataRequired()])
+    version = SelectField('Version',
+                          choices=[(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'],  # noqa
+                          default=SERVICES['nlp']['versions']['latest'],
+                          validators=[DataRequired()])
+    check_encoding = BooleanField('Check encoding')
+
+    def validate_files(form, field):
+        for file in field.data:
+            if not file.filename.lower().endswith('.txt'):
+                raise ValidationError('File does not have an approved '
+                                      'extension: .txt')
+
+
+class AddOCRJobForm(AddJobForm):
+    binarization = BooleanField('Binarazation')
+    files = MultipleFileField('Files', validators=[DataRequired()])
+    model = SelectField('Model', validators=[DataRequired()])
+    version = SelectField('Version',
+                          choices=[(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'],  # noqa
+                          default=SERVICES['ocr']['versions']['latest'],
+                          validators=[DataRequired()])
+
+    def validate_binarization(form, field):
+        if field.data and 'binarization' not in SERVICES['ocr'][form.version.data]:  # noqa
+            raise ValidationError('Binarization is not available in this version')  # noqa
+
+    def validate_files(form, field):
+        for file in field.data:
+            if not file.filename.lower().endswith('.pdf'):
+                raise ValidationError('File does not have an approved '
+                                      'extension: .pdf')
+
+    def validate_model(form, field):
+        if field.data not in SERVICES['ocr'][form.versiondata]['models']:
+            raise ValidationError('Model is not available in this version')
+
+
+class AddFileSetupJobForm(AddJobForm):
+    files = MultipleFileField('Files', validators=[DataRequired()])
+    version = SelectField('Version',
+                          choices=[(x, x) for x in SERVICES['file-setup']['versions'] if x != 'latest'],  # noqa
+                          default=SERVICES['file-setup']['versions']['latest'],
+                          validators=[DataRequired()])
+
+    def validate_files(form, field):
+        for file in field.data:
+            if not file.filename.lower().endswith(('.jpeg', '.jpg', '.png',
+                                                   '.tiff', '.tif')):
+                raise ValidationError('File does not have an approved '
+                                      'extension: .jpeg | .jpg | .png | .tiff '
+                                      '| .tif')
diff --git a/web/app/services/views.py b/web/app/services/views.py
index 1d81e6a8..4436d1ff 100644
--- a/web/app/services/views.py
+++ b/web/app/services/views.py
@@ -1,27 +1,17 @@
-from flask import abort, flash, make_response, render_template, url_for
+from flask import (abort, flash, make_response, render_template, request,
+                   url_for)
 from flask_login import current_user, login_required
 from werkzeug.utils import secure_filename
 from . import services
+from . import SERVICES
+from .forms import AddFileSetupJobForm, AddNLPJobForm, AddOCRJobForm
 from .. import db, socketio
-from ..jobs.forms import AddFileSetupJobForm, AddNLPJobForm, AddOCRJobForm
 from ..models import Job, JobInput
 import json
 import logging
 import os
 
 
-SERVICES = {'corpus_analysis': {'name': 'Corpus analysis'},
-            'file-setup': {'name': 'File setup',
-                           'resources': {'mem_mb': 4096, 'n_cores': 4},
-                           'form': AddFileSetupJobForm},
-            'nlp': {'name': 'Natural Language Processing',
-                    'resources': {'mem_mb': 4096, 'n_cores': 2},
-                    'form': AddNLPJobForm},
-            'ocr': {'name': 'Optical Character Recognition',
-                    'resources': {'mem_mb': 8192, 'n_cores': 4},
-                    'form': AddOCRJobForm}}
-
-
 @services.route('/<service>', methods=['GET', 'POST'])
 @login_required
 def service(service):
@@ -30,23 +20,26 @@ def service(service):
     if service == 'corpus_analysis':
         return render_template('services/{}.html.j2'.format(service),
                                title=SERVICES[service]['name'])
-    form = SERVICES[service]['form'](prefix='add-job-form')
+    elif service == 'file-setup':
+        form = AddFileSetupJobForm(prefix='add-file-setup-job-form')
+    elif service == 'nlp':
+        form = AddNLPJobForm(prefix='add-nlp-job-form')
+    elif service == 'ocr':
+        form = AddOCRJobForm(prefix='add-ocr-job-form')
     if form.is_submitted():
         if not form.validate():
             return make_response(form.errors, 400)
         service_args = []
         if service == 'nlp':
-            service_args.append('-l {}'.format(form.language.data))
+            service_args.append('-l {}'.format(form.model.data))
             if form.check_encoding.data:
                 service_args.append('--check-encoding')
         if service == 'ocr':
-            service_args.append('-l {}'.format(form.language.data))
+            service_args.append('-l {}'.format(form.model.data))
             if form.binarization.data:
                 service_args.append('--binarize')
         job = Job(creator=current_user,
                   description=form.description.data,
-                  mem_mb=SERVICES[service]['resources']['mem_mb'],
-                  n_cores=SERVICES[service]['resources']['n_cores'],
                   service=service, service_args=json.dumps(service_args),
                   service_version=form.version.data,
                   status='preparing', title=form.title.data)
diff --git a/web/app/templates/services/ocr.html.j2 b/web/app/templates/services/ocr.html.j2
index 09759e0c..5cb520bf 100644
--- a/web/app/templates/services/ocr.html.j2
+++ b/web/app/templates/services/ocr.html.j2
@@ -60,7 +60,7 @@
                 {{ wtf.render_field(form.files, accept='application/pdf', color=ocr_color_darken, placeholder='Choose your .pdf files') }}
               </div>
               <div class="col s12 l4">
-                {{ wtf.render_field(form.language, material_icon='language') }}
+                {{ wtf.render_field(form.model, material_icon='language') }}
               </div>
               <div class="col s12 l3">
                 {{ wtf.render_field(form.version, material_icon='apps') }}
-- 
GitLab