Skip to content
Snippets Groups Projects
Commit 351004b7 authored by Patrick Jentsch's avatar Patrick Jentsch
Browse files

First work on fixed versioning

parent 76e3ffb9
No related branches found
No related tags found
No related merge requests found
from flask import Blueprint from flask import Blueprint
SERVICES = {
'corpus_analysis': {
'name': 'Corpus analysis'
},
'file-setup': {
'name': 'File setup',
'versions': {
'latest': '1.0.0',
'1.0.0': {
'publishing_data': {
'date': None,
'title': 'nopaque File setup service',
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/file-setup/-/tree/1.0.0', # noqa
'version': '1.0.0'
}
}
}
},
'nlp': {
'name': 'Natural Language Processing',
'versions': {
'latest': '1.0.0',
'1.0.0': {
'check_encoding': True,
'models': {},
'publishing_data': {
'date': None,
'title': 'nopaque NLP service',
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp/-/tree/1.0.0', # noqa
'version': '1.0.0'
}
}
}
},
'ocr': {
'name': 'Optical Character Recognition',
'versions': {
'latest': '1.0.0',
'1.0.0': {
'binarization': True,
'models': {
'eng': 'English',
'enm': 'English, Middle 1100-1500',
'fra': 'French',
'frm': 'French, Middle ca. 1400-1600',
'deu': 'German',
'frk': 'German Fraktur',
'ita': 'Italian',
'por': 'Portuguese',
'spa': 'Spanish; Castilian',
},
'publishing_data': {
'date': None,
'title': 'nopaque OCR service',
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr/-/tree/1.0.0', # noqa
'version': '1.0.0'
}
}
}
}
}
services = Blueprint('services', __name__) services = Blueprint('services', __name__)
from . import views from . import views
...@@ -2,26 +2,22 @@ from flask_wtf import FlaskForm ...@@ -2,26 +2,22 @@ from flask_wtf import FlaskForm
from wtforms import (BooleanField, MultipleFileField, SelectField, StringField, from wtforms import (BooleanField, MultipleFileField, SelectField, StringField,
SubmitField, ValidationError) SubmitField, ValidationError)
from wtforms.validators import DataRequired, Length from wtforms.validators import DataRequired, Length
from . import SERVICES
class AddNLPJobForm(FlaskForm): class AddJobForm(FlaskForm):
description = StringField('Description', description = StringField('Description',
validators=[DataRequired(), Length(1, 255)]) validators=[DataRequired(), Length(1, 255)])
files = MultipleFileField('Files', validators=[DataRequired()])
language = SelectField('Language',
choices=[('', 'Choose your option'),
('nl', 'Dutch'),
('en', 'English'),
('fr', 'French'),
('de', 'German'),
('el', 'Greek'),
('it', 'Italian'),
('pt', 'Portuguese'),
('es', 'Spanish')],
validators=[DataRequired()])
submit = SubmitField() submit = SubmitField()
title = StringField('Title', validators=[DataRequired(), Length(1, 32)]) title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
version = SelectField('Version', choices=[('latest', 'Latest')],
class AddNLPJobForm(AddJobForm):
files = MultipleFileField('Files', validators=[DataRequired()])
model = SelectField('Model', validators=[DataRequired()])
version = SelectField('Version',
choices=[(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'], # noqa
default=SERVICES['nlp']['versions']['latest'],
validators=[DataRequired()]) validators=[DataRequired()])
check_encoding = BooleanField('Check encoding') check_encoding = BooleanField('Check encoding')
...@@ -32,43 +28,35 @@ class AddNLPJobForm(FlaskForm): ...@@ -32,43 +28,35 @@ class AddNLPJobForm(FlaskForm):
'extension: .txt') 'extension: .txt')
class AddOCRJobForm(FlaskForm): class AddOCRJobForm(AddJobForm):
binarization = BooleanField('Binarazation') binarization = BooleanField('Binarazation')
description = StringField('Description',
validators=[DataRequired(), Length(1, 255)])
files = MultipleFileField('Files', validators=[DataRequired()]) files = MultipleFileField('Files', validators=[DataRequired()])
language = SelectField('Language', model = SelectField('Model', validators=[DataRequired()])
choices=[('', 'Choose your option'), version = SelectField('Version',
('eng', 'English'), choices=[(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'], # noqa
('enm', 'English, Middle (1100-1500)'), default=SERVICES['ocr']['versions']['latest'],
('fra', 'French'),
('frm', 'French, Middle (ca. 1400-1600)'),
('deu', 'German'),
('frk', 'German Fraktur'),
('ita', 'Italian'),
('por', 'Portuguese'),
('spa', 'Spanish; Castilian')],
validators=[DataRequired()])
split = BooleanField('Split')
submit = SubmitField()
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
version = SelectField('Version', choices=[('latest', 'Latest')],
validators=[DataRequired()]) validators=[DataRequired()])
def validate_binarization(form, field):
if field.data and 'binarization' not in SERVICES['ocr'][form.version.data]: # noqa
raise ValidationError('Binarization is not available in this version') # noqa
def validate_files(form, field): def validate_files(form, field):
for file in field.data: for file in field.data:
if not file.filename.lower().endswith('.pdf'): if not file.filename.lower().endswith('.pdf'):
raise ValidationError('File does not have an approved ' raise ValidationError('File does not have an approved '
'extension: .pdf') 'extension: .pdf')
def validate_model(form, field):
if field.data not in SERVICES['ocr'][form.versiondata]['models']:
raise ValidationError('Model is not available in this version')
class AddFileSetupJobForm(FlaskForm):
description = StringField('Description', class AddFileSetupJobForm(AddJobForm):
validators=[DataRequired(), Length(1, 255)])
submit = SubmitField()
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
files = MultipleFileField('Files', validators=[DataRequired()]) files = MultipleFileField('Files', validators=[DataRequired()])
version = SelectField('Version', choices=[('latest', 'Latest')], version = SelectField('Version',
choices=[(x, x) for x in SERVICES['file-setup']['versions'] if x != 'latest'], # noqa
default=SERVICES['file-setup']['versions']['latest'],
validators=[DataRequired()]) validators=[DataRequired()])
def validate_files(form, field): def validate_files(form, field):
......
from flask import abort, flash, make_response, render_template, url_for from flask import (abort, flash, make_response, render_template, request,
url_for)
from flask_login import current_user, login_required from flask_login import current_user, login_required
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
from . import services from . import services
from . import SERVICES
from .forms import AddFileSetupJobForm, AddNLPJobForm, AddOCRJobForm
from .. import db, socketio from .. import db, socketio
from ..jobs.forms import AddFileSetupJobForm, AddNLPJobForm, AddOCRJobForm
from ..models import Job, JobInput from ..models import Job, JobInput
import json import json
import logging import logging
import os import os
SERVICES = {'corpus_analysis': {'name': 'Corpus analysis'},
'file-setup': {'name': 'File setup',
'resources': {'mem_mb': 4096, 'n_cores': 4},
'form': AddFileSetupJobForm},
'nlp': {'name': 'Natural Language Processing',
'resources': {'mem_mb': 4096, 'n_cores': 2},
'form': AddNLPJobForm},
'ocr': {'name': 'Optical Character Recognition',
'resources': {'mem_mb': 8192, 'n_cores': 4},
'form': AddOCRJobForm}}
@services.route('/<service>', methods=['GET', 'POST']) @services.route('/<service>', methods=['GET', 'POST'])
@login_required @login_required
def service(service): def service(service):
...@@ -30,23 +20,26 @@ def service(service): ...@@ -30,23 +20,26 @@ def service(service):
if service == 'corpus_analysis': if service == 'corpus_analysis':
return render_template('services/{}.html.j2'.format(service), return render_template('services/{}.html.j2'.format(service),
title=SERVICES[service]['name']) title=SERVICES[service]['name'])
form = SERVICES[service]['form'](prefix='add-job-form') elif service == 'file-setup':
form = AddFileSetupJobForm(prefix='add-file-setup-job-form')
elif service == 'nlp':
form = AddNLPJobForm(prefix='add-nlp-job-form')
elif service == 'ocr':
form = AddOCRJobForm(prefix='add-ocr-job-form')
if form.is_submitted(): if form.is_submitted():
if not form.validate(): if not form.validate():
return make_response(form.errors, 400) return make_response(form.errors, 400)
service_args = [] service_args = []
if service == 'nlp': if service == 'nlp':
service_args.append('-l {}'.format(form.language.data)) service_args.append('-l {}'.format(form.model.data))
if form.check_encoding.data: if form.check_encoding.data:
service_args.append('--check-encoding') service_args.append('--check-encoding')
if service == 'ocr': if service == 'ocr':
service_args.append('-l {}'.format(form.language.data)) service_args.append('-l {}'.format(form.model.data))
if form.binarization.data: if form.binarization.data:
service_args.append('--binarize') service_args.append('--binarize')
job = Job(creator=current_user, job = Job(creator=current_user,
description=form.description.data, description=form.description.data,
mem_mb=SERVICES[service]['resources']['mem_mb'],
n_cores=SERVICES[service]['resources']['n_cores'],
service=service, service_args=json.dumps(service_args), service=service, service_args=json.dumps(service_args),
service_version=form.version.data, service_version=form.version.data,
status='preparing', title=form.title.data) status='preparing', title=form.title.data)
......
...@@ -60,7 +60,7 @@ ...@@ -60,7 +60,7 @@
{{ wtf.render_field(form.files, accept='application/pdf', color=ocr_color_darken, placeholder='Choose your .pdf files') }} {{ wtf.render_field(form.files, accept='application/pdf', color=ocr_color_darken, placeholder='Choose your .pdf files') }}
</div> </div>
<div class="col s12 l4"> <div class="col s12 l4">
{{ wtf.render_field(form.language, material_icon='language') }} {{ wtf.render_field(form.model, material_icon='language') }}
</div> </div>
<div class="col s12 l3"> <div class="col s12 l3">
{{ wtf.render_field(form.version, material_icon='apps') }} {{ wtf.render_field(form.version, material_icon='apps') }}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment