diff --git a/app/daemon/job_utils.py b/app/daemon/job_utils.py index ab17e76027823d7154edbfddbd6d555812b8ced5..cfb362db8106523a2b1faeb23b599966b78dd3b3 100644 --- a/app/daemon/job_utils.py +++ b/app/daemon/job_utils.py @@ -65,6 +65,9 @@ def _create_job_service(job): command += f' -m {job.service_args["model"]}' if 'binarization' in job.service_args and job.service_args['binarization']: command += ' --binarize' + if 'ocropus_nlbin_threshold' in job.service_args and job.service_args['ocropus_nlbin_threshold']: + value = job.service_args['ocropus_nlbin_threshold'] + command += f' --ocropus-nlbin-threshold {value}' elif job.service == 'transkribus-htr-pipeline': transkribus_htr_pipeline_model_id = job.service_args['model'] command += f' -m {transkribus_htr_pipeline_model_id}' @@ -144,8 +147,6 @@ def _create_job_service(job): ) ''' ## Restart policy ## ''' restart_policy = docker.types.RestartPolicy() - print(command) - print(mounts) try: docker_client.services.create( image, diff --git a/app/services/forms.py b/app/services/forms.py index 58bab1645eeb544ed8571a63c54b4acb73072a29..96caecc4b6d77c84ef4bbee59da5740522f35d04 100644 --- a/app/services/forms.py +++ b/app/services/forms.py @@ -1,16 +1,12 @@ from flask_login import current_user from flask_wtf import FlaskForm from flask_wtf.file import FileField, FileRequired -from wtforms import ( - BooleanField, - MultipleFileField, - SelectField, - StringField, - SubmitField, - ValidationError -) +from wtforms import (BooleanField, DecimalRangeField, MultipleFileField, + SelectField, StringField, SubmitField, ValidationError) from wtforms.validators import InputRequired, Length -from app.models import TesseractOCRPipelineModel, SpaCyNLPPipelineModel + +from app.models import SpaCyNLPPipelineModel, TesseractOCRPipelineModel + from . import SERVICES @@ -49,13 +45,16 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm): binarization = BooleanField('Binarization') pdf = FileField('File', validators=[FileRequired()]) model = SelectField('Model', validators=[InputRequired()]) + ocropus_nlbin_threshold = DecimalRangeField( + render_kw={'min': 0, 'max': 1, 'step': 0.1, 'start': [0.5], 'disabled': True} + ) def validate_binarization(self, field): service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data] if field.data: if not('methods' in service_info and 'binarization' in service_info['methods']): raise ValidationError('Binarization is not available') - + def validate_pdf(self, field): if field.data.mimetype != 'application/pdf': raise ValidationError('PDF files only!') @@ -68,10 +67,16 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm): if self.binarization.render_kw is None: self.binarization.render_kw = {} self.binarization.render_kw['disabled'] = True + if self.ocropus_nlbin_threshold.render_kw is None: + self.ocropus_nlbin_threshold.render_kw = {} + self.ocropus_nlbin_threshold.render_kw['disabled'] = True if 'methods' in service_info: if 'binarization' in service_info['methods']: if 'disabled' in self.binarization.render_kw: del self.binarization.render_kw['disabled'] + if 'ocropus_nlbin_threshold' in service_info['methods']: + if 'disabled' in self.ocropus_nlbin_threshold.render_kw: + del self.ocropus_nlbin_threshold.render_kw['disabled'] models = [ x for x in TesseractOCRPipelineModel.query.order_by(TesseractOCRPipelineModel.title).all() if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) diff --git a/app/services/routes.py b/app/services/routes.py index 4bfca9bb20b2ee2ff1cd40efbc7dcbac715cd152..7748240c894761613ce4ea88a30b96c555087ab0 100644 --- a/app/services/routes.py +++ b/app/services/routes.py @@ -79,7 +79,8 @@ def tesseract_ocr_pipeline(): service=service_name, service_args={ 'binarization': form.binarization.data, - 'model': hashids.decode(form.model.data) + 'model': hashids.decode(form.model.data), + 'ocropus_nlbin_threshold': float(form.ocropus_nlbin_threshold.data) }, service_version=form.version.data, user=current_user diff --git a/app/services/services.yml b/app/services/services.yml index c9d61e08dc9d8b1a645c8ac2d538ccfbe4a4cd35..8a8377d54f67ce1793540aa9da96e8f0dff728e9 100644 --- a/app/services/services.yml +++ b/app/services/services.yml @@ -20,6 +20,7 @@ tesseract-ocr-pipeline: 0.1.1: methods: - 'binarization' + - 'ocropus_nlbin_threshold' publishing_year: 2022 url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1' transkribus-htr-pipeline: diff --git a/app/templates/services/tesseract_ocr_pipeline.html.j2 b/app/templates/services/tesseract_ocr_pipeline.html.j2 index b66e968bd8cfcd258b5875db06f3134aa63b05b0..29ff60e8b1079c4b2576d4c13bb6bdcf91e0ba5e 100644 --- a/app/templates/services/tesseract_ocr_pipeline.html.j2 +++ b/app/templates/services/tesseract_ocr_pipeline.html.j2 @@ -84,6 +84,11 @@ </label> </div> </div> + <div class="col s12"><p> </p></div> + <div class="col s9"> + <p>Intensity (between 0 and 1)</p> + <p class="range-field">{{ form.ocropus_nlbin_threshold() }}</p> + </div> <div class="col s12"><p> </p></div> <div class="col s12 divider"></div> <div class="col s12"><p> </p></div>