Skip to content
Snippets Groups Projects
forms.py 6.93 KiB
from app.models import TesseractOCRModel, TranskribusHTRModel
from flask_login import current_user
from flask_wtf import FlaskForm
from flask_wtf.file import FileField, FileRequired
from wtforms import (
    BooleanField,
    MultipleFileField,
    SelectField,
    StringField,
    SubmitField,
    ValidationError
)
from wtforms.validators import DataRequired, InputRequired, Length
from . import SERVICES


class AddJobForm(FlaskForm):
    description = StringField('Description', validators=[InputRequired(), Length(1, 255)])
    title = StringField('Title', validators=[InputRequired(), Length(1, 32)])
    version = SelectField('Version', validators=[DataRequired()])
    submit = SubmitField()


class AddFileSetupPipelineJobForm(AddJobForm):
    images = MultipleFileField('File(s)', validators=[DataRequired()])

    def validate_images(form, field):
        valid_mimetypes = ['image/jpeg', 'image/png', 'image/tiff']
        for image in field.data:
            if image.mimetype not in valid_mimetypes:
                raise ValidationError('JPEG, PNG and TIFF files only!')

    def __init__(self, *args, **kwargs):
        service_manifest = SERVICES['file-setup-pipeline']
        version = kwargs.pop('version', service_manifest['latest_version'])
        super().__init__(*args, **kwargs)
        self.version.choices = [(x, x) for x in service_manifest['versions']]
        self.version.data = version
        self.version.default = service_manifest['latest_version']


class AddTesseractOCRPipelineJobForm(AddJobForm):
    binarization = BooleanField('Binarization')
    pdf = FileField('File', validators=[FileRequired()])
    model = SelectField('Model', validators=[DataRequired()])

    def validate_binarization(self, field):
        service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
        if field.data:
            if(
                'methods' not in service_info
                or 'binarization' not in service_info['methods']
            ):
                raise ValidationError('Binarization is not available')

    def validate_pdf(self, field):
        if field.data.mimetype != 'application/pdf':
            raise ValidationError('PDF files only!')

    def __init__(self, *args, **kwargs):
        service_manifest = SERVICES['tesseract-ocr-pipeline']
        version = kwargs.pop('version', service_manifest['latest_version'])
        super().__init__(*args, **kwargs)
        service_info = service_manifest['versions'][version]
        if self.binarization.render_kw is None:
            self.binarization.render_kw = {}
        self.binarization.render_kw['disabled'] = True
        if 'methods' in service_info:
            if 'binarization' in service_info['methods']:
                if 'disabled' in self.binarization.render_kw:
                    del self.binarization.render_kw['disabled']
        models = [
            x for x in TesseractOCRModel.query.filter().all()
            if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
        ]
        self.model.choices = [('', 'Choose your option')]
        self.model.choices += [(x.hashid, x.title) for x in models]
        self.model.default = ''
        self.version.choices = [(x, x) for x in service_manifest['versions']]
        self.version.data = version
        self.version.default = service_manifest['latest_version']


class AddTranskribusHTRPipelineJobForm(AddJobForm):
    binarization = BooleanField('Binarization')
    pdf = FileField('File', validators=[FileRequired()])
    model = SelectField('Model', validators=[DataRequired()])

    def validate_binarization(self, field):
        service_info = SERVICES['transkribus-htr-pipeline']['versions'][self.version.data]
        if field.data:
            if(
                'methods' not in service_info
                or 'binarization' not in service_info['methods']
            ):
                raise ValidationError('Binarization is not available')

    def validate_pdf(self, field):
        if field.data.mimetype != 'application/pdf':
            raise ValidationError('PDF files only!')

    def __init__(self, *args, **kwargs):
        service_manifest = SERVICES['transkribus-htr-pipeline']
        version = kwargs.pop('version', service_manifest['latest_version'])
        super().__init__(*args, **kwargs)
        service_info = service_manifest['versions'][version]
        if self.binarization.render_kw is None:
            self.binarization.render_kw = {}
        self.binarization.render_kw['disabled'] = True
        if 'methods' in service_info:
            if 'binarization' in service_info['methods']:
                if 'disabled' in self.binarization.render_kw:
                    del self.binarization.render_kw['disabled']
        models = [
            x for x in TranskribusHTRModel.query.filter().all()
            if x.shared == True or x.user == current_user
        ]
        self.model.choices = [('', 'Choose your option')]
        self.model.choices += [(x.hashid, x.transkribus_name) for x in models]
        self.model.default = ''
        self.version.choices = [(x, x) for x in service_manifest['versions']]
        self.version.data = version
        self.version.default = service_manifest['latest_version']


class AddSpacyNLPPipelineJobForm(AddJobForm):
    encoding_detection = BooleanField('Encoding detection', render_kw={'disabled': True})
    txt = FileField('File', validators=[FileRequired()])
    model = SelectField('Model', validators=[DataRequired()])

    def validate_encoding_detection(self, field):
        service_info = SERVICES['spacy-nlp-pipeline']['versions'][self.version.data]
        if field.data:
            if(
                'methods' not in service_info
                or 'encoding_detection' not in service_info['methods']
            ):
                raise ValidationError('Encoding detection is not available')

    def validate_txt(form, field):
        if field.data.mimetype != 'text/plain':
            raise ValidationError('Plain text files only!')

    def __init__(self, *args, **kwargs):
        service_manifest = SERVICES['spacy-nlp-pipeline']
        version = kwargs.pop('version', service_manifest['latest_version'])
        super().__init__(*args, **kwargs)
        service_info = service_manifest['versions'][version]
        if self.encoding_detection.render_kw is None:
            self.encoding_detection.render_kw = {}
        self.encoding_detection.render_kw['disabled'] = True
        if 'methods' in service_info:
            if 'encoding_detection' in service_info['methods']:
                if 'disabled' in self.encoding_detection.render_kw:
                    del self.encoding_detection.render_kw['disabled']
        self.model.choices = [('', 'Choose your option')]
        self.model.choices += [(x, y) for x, y in service_info['models'].items()]  # noqa
        self.model.default = ''
        self.version.choices = [(x, x) for x in service_manifest['versions']]
        self.version.data = version
        self.version.default = version