diff --git a/app/TesseractOCRModel.defaults.yml b/app/TesseractOCRPipelineModel.defaults.yml similarity index 100% rename from app/TesseractOCRModel.defaults.yml rename to app/TesseractOCRPipelineModel.defaults.yml diff --git a/app/api/auth.py b/app/api/auth.py index afda3a30699ebb21f46b3f2b7a448c63c5fcef93..398052f5d92a1e7a2a2c63ce42dc7d0209625fb6 100644 --- a/app/api/auth.py +++ b/app/api/auth.py @@ -1,4 +1,3 @@ -from flask import current_app from flask_httpauth import HTTPBasicAuth, HTTPTokenAuth from werkzeug.exceptions import Forbidden, Unauthorized from app.models import User diff --git a/app/api/jobs.py b/app/api/jobs.py index e730f2e60b3c85cacb151cd673a59b6448201de3..2eaecd3f0a5a875be9e34033d8cca40f21d32918 100644 --- a/app/api/jobs.py +++ b/app/api/jobs.py @@ -4,8 +4,8 @@ from apifairy.decorators import body, other_responses from flask import abort, Blueprint from werkzeug.exceptions import InternalServerError from app import db, hashids -from app.models import Job, JobInput, JobStatus, TesseractOCRModel -from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRModelSchema +from app.models import Job, JobInput, JobStatus, TesseractOCRPipelineModel +from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRPipelineModelSchema from .auth import auth_error_responses, token_auth @@ -14,8 +14,8 @@ job_schema = JobSchema() jobs_schema = JobSchema(many=True) spacy_nlp_pipeline_job_schema = SpaCyNLPPipelineJobSchema() tesseract_ocr_pipeline_job_schema = TesseractOCRPipelineJobSchema() -tesseract_ocr_model_schema = TesseractOCRModelSchema() -tesseract_ocr_models_schema = TesseractOCRModelSchema(many=True) +tesseract_ocr_pipeline_model_schema = TesseractOCRPipelineModelSchema() +tesseract_ocr_pipeline_models_schema = TesseractOCRPipelineModelSchema(many=True) @bp.route('', methods=['GET']) @@ -60,11 +60,11 @@ def create_tesseract_ocr_pipeline_job(args): @bp.route('/tesseract-ocr-pipeline/models', methods=['GET']) @authenticate(token_auth) -@response(tesseract_ocr_models_schema) +@response(tesseract_ocr_pipeline_models_schema) @other_responses(auth_error_responses) def get_tesseract_ocr_models(): """Get all Tesseract OCR Models""" - return TesseractOCRModel.query.all() + return TesseractOCRPipelineModel.query.all() @bp.route('/<hashid:job_id>', methods=['DELETE']) diff --git a/app/api/schemas.py b/app/api/schemas.py index 394b1ebb6e2f29468477bd3fdc9a54ad36165f90..9474bd1a3cec00eba9697edbc9da984e99e17e96 100644 --- a/app/api/schemas.py +++ b/app/api/schemas.py @@ -3,7 +3,14 @@ from marshmallow import validate, validates, ValidationError from marshmallow.decorators import post_dump from app import ma from app.auth import USERNAME_REGEX -from app.models import Job, JobStatus, TesseractOCRModel, Token, User, UserSettingJobStatusMailNotificationLevel +from app.models import ( + Job, + JobStatus, + TesseractOCRPipelineModel, + Token, + User, + UserSettingJobStatusMailNotificationLevel +) from app.services import SERVICES @@ -21,9 +28,9 @@ class TokenSchema(ma.SQLAlchemySchema): refresh_token = ma.String() -class TesseractOCRModelSchema(ma.SQLAlchemySchema): +class TesseractOCRPipelineModelSchema(ma.SQLAlchemySchema): class Meta: - model = TesseractOCRModel + model = TesseractOCRPipelineModel ordered = True hashid = ma.String(data_key='id', dump_only=True) diff --git a/app/api/users.py b/app/api/users.py index fc180df0b62419dd623f23ce2251fcbde7822942..c9ea5d39223b17a3447de2c612e1ea844274d568 100644 --- a/app/api/users.py +++ b/app/api/users.py @@ -1,7 +1,7 @@ from apifairy import authenticate, body, response from apifairy.decorators import other_responses -from flask import abort, Blueprint, current_app +from flask import abort, Blueprint from werkzeug.exceptions import InternalServerError from app import db from app.email import create_message, send diff --git a/app/cli.py b/app/cli.py index d9b4fdf0637ad2aec9615eac1b248745d0aa775f..bfaf8754feb6948267b5c3413b25d8a4e13ce777 100644 --- a/app/cli.py +++ b/app/cli.py @@ -2,7 +2,12 @@ from flask import current_app from flask_migrate import upgrade import click import os -from app.models import Role, User, TesseractOCRModel, TranskribusHTRModel +from app.models import ( + Role, + User, + TesseractOCRPipelineModel, + TranskribusHTRPipelineModel +) def _make_default_dirs(): @@ -35,10 +40,10 @@ def register(app): Role.insert_defaults() current_app.logger.info('Insert/Update default users') User.insert_defaults() - current_app.logger.info('Insert/Update default TesseractOCRModels') - TesseractOCRModel.insert_defaults() - current_app.logger.info('Insert/Update default TranskribusHTRModels') - TranskribusHTRModel.insert_defaults() + current_app.logger.info('Insert/Update default TesseractOCRPipelineModels') + TesseractOCRPipelineModel.insert_defaults() + current_app.logger.info('Insert/Update default TranskribusHTRPipelineModels') + TranskribusHTRPipelineModel.insert_defaults() @app.cli.group() def converter(): diff --git a/app/daemon/job_utils.py b/app/daemon/job_utils.py index 38d6c48b05c418f3ac9d91e2eca6f8146cef14d3..0f74f3a05dedd2628676f2fa51123f300a273f88 100644 --- a/app/daemon/job_utils.py +++ b/app/daemon/job_utils.py @@ -3,8 +3,8 @@ from app.models import ( Job, JobResult, JobStatus, - TesseractOCRModel, - TranskribusHTRModel + TesseractOCRPipelineModel, + TranskribusHTRPipelineModel ) from datetime import datetime from flask import current_app @@ -61,7 +61,7 @@ def _create_job_service(job): if 'binarization' in job.service_args and job.service_args['binarization']: command += ' --binarize' elif job.service == 'transkribus-htr-pipeline': - transkribus_htr_model = TranskribusHTRModel.query.get(job.service_args['model']) + transkribus_htr_model = TranskribusHTRPipelineModel.query.get(job.service_args['model']) command += f' -m {transkribus_htr_model.transkribus_model_id}' readcoop_username = current_app.config.get('NOPAQUE_READCOOP_USERNAME') command += f' --readcoop-username "{readcoop_username}"' @@ -96,7 +96,7 @@ def _create_job_service(job): else: job.status = JobStatus.FAILED return - model = TesseractOCRModel.query.get(model_id) + model = TesseractOCRPipelineModel.query.get(model_id) if model is None: job.status = JobStatus.FAILED return diff --git a/app/models.py b/app/models.py index 21a64d356f3114100525751982d0e7cd1a58065c..beab7d6e5f62b48ac4f3b35c16e1932f7ac211a4 100644 --- a/app/models.py +++ b/app/models.py @@ -23,7 +23,6 @@ from app.email import create_message TRANSKRIBUS_HTR_MODELS = \ json.loads(requests.get('https://transkribus.eu/TrpServer/rest/models/text', params={'docType': 'handwritten'}).content)['trpModelMetadata'] # noqa - ############################################################################## # enums # ############################################################################## @@ -274,14 +273,14 @@ class User(HashidMixin, UserMixin, db.Model): last_seen = db.Column(db.DateTime()) # Backrefs: role: Role # Relationships - tesseract_ocr_models = db.relationship( - 'TesseractOCRModel', + tesseract_ocr_pipeline_models = db.relationship( + 'TesseractOCRPipelineModel', backref='user', cascade='all, delete-orphan', lazy='dynamic' ) - transkribus_htr_models = db.relationship( - 'TranskribusHTRModel', + transkribus_htr_pipeline_models = db.relationship( + 'TranskribusHTRPipelineModel', backref='user', cascade='all, delete-orphan', lazy='dynamic' @@ -342,7 +341,7 @@ class User(HashidMixin, UserMixin, db.Model): db.session.refresh(user) try: os.mkdir(user.path) - os.mkdir(os.path.join(user.path, 'tesseract_ocr_models')) + os.mkdir(os.path.join(user.path, 'tesseract_ocr_pipeline_models')) os.mkdir(os.path.join(user.path, 'corpora')) os.mkdir(os.path.join(user.path, 'jobs')) except OSError as e: @@ -518,14 +517,14 @@ class User(HashidMixin, UserMixin, db.Model): x.hashid: x.to_json(relationships=True) for x in self.jobs } - _json['tesseract_ocr_models'] = { + _json['tesseract_ocr_pipeline_models'] = { x.hashid: x.to_json(relationships=True) - for x in self.tesseract_ocr_models + for x in self.tesseract_ocr_pipeline_models } return _json -class TesseractOCRModel(FileMixin, HashidMixin, db.Model): - __tablename__ = 'tesseract_ocr_models' +class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model): + __tablename__ = 'tesseract_ocr_pipeline_models' # Primary key id = db.Column(db.Integer, primary_key=True) # Foreign keys @@ -546,7 +545,7 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): def path(self): return os.path.join( self.user.path, - 'tesseract_ocr_models', + 'tesseract_ocr_pipeline_models', str(self.id) ) @@ -555,12 +554,12 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): nopaque_user = User.query.filter_by(username='nopaque').first() defaults_file = os.path.join( os.path.dirname(os.path.abspath(__file__)), - 'TesseractOCRModel.defaults.yml' + 'TesseractOCRPipelineModel.defaults.yml' ) with open(defaults_file, 'r') as f: defaults = yaml.safe_load(f) for m in defaults: - model = TesseractOCRModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa + model = TesseractOCRPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa if model is not None: model.compatible_service_versions = m['compatible_service_versions'] model.description = m['description'] @@ -572,7 +571,7 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): model.title = m['title'] model.version = m['version'] continue - model = TesseractOCRModel( + model = TesseractOCRPipelineModel( compatible_service_versions=m['compatible_service_versions'], description=m['description'], publisher=m['publisher'], @@ -623,8 +622,8 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): return _json -class TranskribusHTRModel(HashidMixin, db.Model): - __tablename__ = 'transkribus_htr_models' +class TranskribusHTRPipelineModel(HashidMixin, db.Model): + __tablename__ = 'transkribus_htr_pipeline_models' # Primary key id = db.Column(db.Integer, primary_key=True) # Foreign keys @@ -643,12 +642,12 @@ class TranskribusHTRModel(HashidMixin, db.Model): # and 'docType' in m and m['docType'] == 'handwritten' # ] for m in TRANSKRIBUS_HTR_MODELS: - model = TranskribusHTRModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa + model = TranskribusHTRPipelineModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa if model is not None: model.shared = True model.transkribus_model_id = m['modelId'] continue - model = TranskribusHTRModel( + model = TranskribusHTRPipelineModel( transkribus_model_id=m['modelId'], shared=True, user=nopaque_user, diff --git a/app/services/forms.py b/app/services/forms.py index 008e0d0a6d9e4757746e76f6c0db6eec19702e9f..ea1c1e7052e7321f8532c85c33e73c46fa7ff7c1 100644 --- a/app/services/forms.py +++ b/app/services/forms.py @@ -12,8 +12,8 @@ from wtforms import ( from wtforms.validators import InputRequired, Length from app.models import ( TRANSKRIBUS_HTR_MODELS, - TesseractOCRModel, - TranskribusHTRModel + TesseractOCRPipelineModel, + TranskribusHTRPipelineModel ) from . import SERVICES @@ -77,7 +77,7 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm): if 'disabled' in self.binarization.render_kw: del self.binarization.render_kw['disabled'] models = [ - x for x in TesseractOCRModel.query.filter().all() + x for x in TesseractOCRPipelineModel.query.filter().all() if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) ] self.model.choices = [('', 'Choose your option')] @@ -119,7 +119,7 @@ class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm): if 'disabled' in self.binarization.render_kw: del self.binarization.render_kw['disabled'] models = [ - x for x in TranskribusHTRModel.query.filter().all() + x for x in TranskribusHTRPipelineModel.query.filter().all() if x.shared == True or x.user == current_user ] self.model.choices = [('', 'Choose your option')] diff --git a/app/services/routes.py b/app/services/routes.py index 913acbb75deaaf85d69f0a62410d90061c2f6444..4f019525c9b246af3a53789b5224b59db2ac61fe 100644 --- a/app/services/routes.py +++ b/app/services/routes.py @@ -5,9 +5,9 @@ from app.models import ( Job, JobInput, JobStatus, - TesseractOCRModel, + TesseractOCRPipelineModel, TRANSKRIBUS_HTR_MODELS, - TranskribusHTRModel + TranskribusHTRPipelineModel ) from . import bp, SERVICES from .forms import ( @@ -95,14 +95,14 @@ def tesseract_ocr_pipeline(): message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created') flash(message, 'job') return {}, 201, {'Location': job.url} - tesseract_ocr_models = [ - x for x in TesseractOCRModel.query.all() + tesseract_ocr_pipeline_models = [ + x for x in TesseractOCRPipelineModel.query.all() if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) ] return render_template( 'services/tesseract_ocr_pipeline.html.j2', form=form, - tesseract_ocr_models=tesseract_ocr_models, + tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models, title=service_manifest['name'] ) @@ -145,8 +145,8 @@ def transkribus_htr_pipeline(): message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created') flash(message, 'job') return {}, 201, {'Location': job.url} - transkribus_htr_models = [ - x for x in TranskribusHTRModel.query.all() + transkribus_htr_pipeline_models = [ + x for x in TranskribusHTRPipelineModel.query.all() if x.shared == True or x.user == current_user ] return render_template( @@ -154,7 +154,7 @@ def transkribus_htr_pipeline(): form=form, title=service_manifest['name'], TRANSKRIBUS_HTR_MODELS=TRANSKRIBUS_HTR_MODELS, - transkribus_htr_models=transkribus_htr_models + transkribus_htr_pipeline_models=transkribus_htr_pipeline_models ) diff --git a/app/templates/services/tesseract_ocr_pipeline.html.j2 b/app/templates/services/tesseract_ocr_pipeline.html.j2 index c38c396593ecdd8267cccfe9717290f1263c9c72..982265bc93aa91b56262d03e07347352aef06f9f 100644 --- a/app/templates/services/tesseract_ocr_pipeline.html.j2 +++ b/app/templates/services/tesseract_ocr_pipeline.html.j2 @@ -160,8 +160,8 @@ </tr> </thead> <tbody> - {% for m in tesseract_ocr_models %} - <tr id="tesseract-ocr-model-{{ m.hashid }}"> + {% for m in tesseract_ocr_pipeline_models %} + <tr id="tesseract-ocr-pipeline-model-{{ m.hashid }}"> <td>{{ m.title }}</td> {% if m.description == '' %} <td>Description is not available.</td> diff --git a/app/templates/services/transkribus_htr_pipeline.html.j2 b/app/templates/services/transkribus_htr_pipeline.html.j2 index 7aedbd4f9ceff47f790ea0343b7e3665eb213c35..79519f3a9986cc0f6a3200918d5e1d9bd6523208 100644 --- a/app/templates/services/transkribus_htr_pipeline.html.j2 +++ b/app/templates/services/transkribus_htr_pipeline.html.j2 @@ -157,8 +157,8 @@ <div class="modal-content"> <h4>Transkribus HTR Pipeline models</h4> <ul class="collapsible popout" id="transkribus-htr-models"> - {% for m in transkribus_htr_models %} - <li id="transkribus-htr-model-{{ m.hashid }}"> + {% for m in transkribus_htr_pipeline_models %} + <li id="transkribus-htr-pipeline-model-{{ m.hashid }}"> {% for m_info in TRANSKRIBUS_HTR_MODELS if m_info['modelId'] == m.transkribus_model_id %} <div class="collapsible-header"><i class="material-icons">widgets</i>{{ m_info.name }}</div> <div class="collapsible-body"> diff --git a/migrations/versions/63b2cc26a01f_.py b/migrations/versions/63b2cc26a01f_.py new file mode 100644 index 0000000000000000000000000000000000000000..5876ed3cc52f7587b3fc6b2fc9158277caa438a8 --- /dev/null +++ b/migrations/versions/63b2cc26a01f_.py @@ -0,0 +1,59 @@ +"""Rename pipeline model tables + +Revision ID: 63b2cc26a01f +Revises: 260b57d5f4e7 +Create Date: 2022-10-11 14:32:13.227364 + +""" +from alembic import op +from flask import current_app +import os +from app.models import User + +# revision identifiers, used by Alembic. +revision = '63b2cc26a01f' +down_revision = '260b57d5f4e7' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + users = User.query.all() + for user in users: + old_tesseract_ocr_pipeline_model_path = os.path.join( + user.path, + 'tesseract_ocr_models' + ) + new_tesseract_ocr_pipeline_model_path = os.path.join( + user.path, + 'tesseract_ocr_pipeline_models' + ) + os.rename( + old_tesseract_ocr_pipeline_model_path, + new_tesseract_ocr_pipeline_model_path + ) + op.rename_table('tesseract_ocr_models', 'tesseract_ocr_pipeline_models') + op.rename_table('transkribus_htr_models', 'transkribus_htr_pipeline_models') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + users = User.query.all() + for user in users: + old_tesseract_ocr_pipeline_model_path = os.path.join( + user.path, + 'tesseract_ocr_models' + ) + new_tesseract_ocr_pipeline_model_path = os.path.join( + user.path, + 'tesseract_ocr_pipeline_models' + ) + os.rename( + new_tesseract_ocr_pipeline_model_path, + old_tesseract_ocr_pipeline_model_path + ) + op.rename_table('tesseract_ocr_pipeline_models', 'tesseract_ocr_models') + op.rename_table('transkribus_htr_pipeline_models', 'transkribus_htr_models') + # ### end Alembic commands ### diff --git a/nopaque.py b/nopaque.py index 96746d9750a00648055820c87b5b0321bb4c231f..457add2cacaa5c6abfa8a8d0e6ac2ce43d541ca2 100644 --- a/nopaque.py +++ b/nopaque.py @@ -12,8 +12,8 @@ from app.models import ( JobResult, Permission, Role, - TesseractOCRModel, - TranskribusHTRModel, + TesseractOCRPipelineModel, + TranskribusHTRPipelineModel, User ) # noqa from flask import Flask # noqa @@ -42,8 +42,8 @@ def make_shell_context() -> Dict[str, Any]: 'JobResult': JobResult, 'Permission': Permission, 'Role': Role, - 'TesseractOCRModel': TesseractOCRModel, - 'TranskribusHTRModel': TranskribusHTRModel, + 'TesseractOCRPipelineModel': TesseractOCRPipelineModel, + 'TranskribusHTRPipelineModel': TranskribusHTRPipelineModel, 'User': User }