From 60a59383c7301665e20887a5751c1dc6271af754 Mon Sep 17 00:00:00 2001
From: Patrick Jentsch <p.jentsch@uni-bielefeld.de>
Date: Mon, 15 May 2023 12:00:13 +0200
Subject: [PATCH] A better application structure

---
 app/__init__.py               |  16 ++-
 app/cli/__init__.py           |  10 --
 app/cli/converter.py          |  21 ----
 app/cli/corpus.py             |  23 -----
 app/cli/main.py               |  45 ---------
 app/contributions/__init__.py |  10 +-
 app/converters/cli.py         |  22 ++++
 app/converters/sandpaper.py   | 185 +++++++++++++++++-----------------
 app/corpora/__init__.py       |   5 +-
 app/corpora/cli.py            |  21 ++++
 app/errors/__init__.py        |   7 +-
 app/errors/handlers.py        |  17 ++--
 app/main/__init__.py          |   2 +-
 app/main/cli.py               |  45 +++++++++
 app/tests/__init__.py         |   5 -
 app/users/__init__.py         |   3 +-
 16 files changed, 212 insertions(+), 225 deletions(-)
 delete mode 100644 app/cli/__init__.py
 delete mode 100644 app/cli/converter.py
 delete mode 100644 app/cli/corpus.py
 delete mode 100644 app/cli/main.py
 create mode 100644 app/converters/cli.py
 create mode 100644 app/corpora/cli.py
 create mode 100644 app/main/cli.py

diff --git a/app/__init__.py b/app/__init__.py
index 3afa99af..3a03e00b 100644
--- a/app/__init__.py
+++ b/app/__init__.py
@@ -13,6 +13,7 @@ from flask_paranoid import Paranoid
 from flask_socketio import SocketIO
 from flask_sqlalchemy import SQLAlchemy
 from flask_hashids import Hashids
+from werkzeug.exceptions import HTTPException
 
 
 apifairy = APIFairy()
@@ -35,7 +36,7 @@ socketio = SocketIO()
 
 def create_app(config: Config = Config) -> Flask:
     ''' Creates an initialized Flask (WSGI Application) object. '''
-    app: Flask = Flask(__name__)
+    app = Flask(__name__)
     app.config.from_object(config)
     config.init_app(app)
     docker_client.login(
@@ -57,12 +58,6 @@ def create_app(config: Config = Config) -> Flask:
     scheduler.init_app(app)
     socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI'])  # noqa
 
-    from .errors import init_app as init_error_handlers
-    init_error_handlers(app)
-
-    from .cli import init_app as init_cli
-    init_cli(app)
-
     from .admin import bp as admin_blueprint
     default_breadcrumb_root(admin_blueprint, '.admin')
     app.register_blueprint(admin_blueprint, url_prefix='/admin')
@@ -80,7 +75,10 @@ def create_app(config: Config = Config) -> Flask:
 
     from .corpora import bp as corpora_blueprint
     default_breadcrumb_root(corpora_blueprint, '.corpora')
-    app.register_blueprint(corpora_blueprint, url_prefix='/corpora')
+    app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora')
+
+    from .errors import bp as errors_bp
+    app.register_blueprint(errors_bp)
 
     from .jobs import bp as jobs_blueprint
     default_breadcrumb_root(jobs_blueprint, '.jobs')
@@ -88,7 +86,7 @@ def create_app(config: Config = Config) -> Flask:
 
     from .main import bp as main_blueprint
     default_breadcrumb_root(main_blueprint, '.')
-    app.register_blueprint(main_blueprint)
+    app.register_blueprint(main_blueprint, cli_group=None)
 
     from .services import bp as services_blueprint
     default_breadcrumb_root(services_blueprint, '.services')
diff --git a/app/cli/__init__.py b/app/cli/__init__.py
deleted file mode 100644
index 1803deea..00000000
--- a/app/cli/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from .converter import init_app as converter_init_app
-from .corpus import init_app as corpus_init_app
-from .main import init_app as main_init_app
-
-
-
-def init_app(app):
-    converter_init_app(app)
-    corpus_init_app(app)
-    main_init_app(app)
diff --git a/app/cli/converter.py b/app/cli/converter.py
deleted file mode 100644
index 4d07bc30..00000000
--- a/app/cli/converter.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import click
-
-
-def init_app(app):
-    @app.cli.group('converter')
-    def converter():
-        ''' Converter commands. '''
-        pass
-
-    @converter.group('sandpaper')
-    def sandpaper_converter():
-        ''' Sandpaper converter commands. '''
-        pass
-
-    @sandpaper_converter.command('run')
-    @click.argument('json_db')
-    @click.argument('data_dir')
-    def run_sandpaper_converter(json_db, data_dir):
-        ''' Run the sandpaper converter. '''
-        from app.converters.sandpaper import convert
-        convert(json_db, data_dir)
diff --git a/app/cli/corpus.py b/app/cli/corpus.py
deleted file mode 100644
index e79269f0..00000000
--- a/app/cli/corpus.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from app.models import Corpus, CorpusStatus
-
-
-def init_app(app):
-    @app.cli.group('corpus')
-    def corpus():
-        ''' Corpus commands. '''
-        pass
-
-    @corpus.command('dismantle')
-    def dismantle():
-        ''' Dismantle built corpora. '''
-        status = [
-            CorpusStatus.QUEUED,
-            CorpusStatus.BUILDING,
-            CorpusStatus.BUILT,
-            CorpusStatus.STARTING_ANALYSIS_SESSION,
-            CorpusStatus.RUNNING_ANALYSIS_SESSION,
-            CorpusStatus.CANCELING_ANALYSIS_SESSION
-        ]
-        for corpus in [x for x in Corpus.query.all() if x.status in status]:
-            corpus.status = CorpusStatus.SUBMITTED
-            corpus.num_analysis_sessions = 0
diff --git a/app/cli/main.py b/app/cli/main.py
deleted file mode 100644
index 2022d609..00000000
--- a/app/cli/main.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from flask import current_app
-from flask_migrate import upgrade
-import os
-from app.models import (
-    CorpusFollowerRole,
-    Role,
-    SpaCyNLPPipelineModel,
-    TesseractOCRPipelineModel,
-    User
-)
-
-
-def init_app(app):
-    @app.cli.command('deploy')
-    def deploy():
-        ''' Run deployment tasks. '''
-        # Make default directories
-        print('Make default directories')
-        base_dir = current_app.config['NOPAQUE_DATA_DIR']
-        default_dirs = [
-            os.path.join(base_dir, 'tmp'),
-            os.path.join(base_dir, 'users')
-        ]
-        for dir in default_dirs:
-            if os.path.exists(dir):
-                if not os.path.isdir(dir):
-                    raise NotADirectoryError(f'{dir} is not a directory')
-            else:
-                os.mkdir(dir)
-
-        # migrate database to latest revision
-        print('Migrate database to latest revision')
-        upgrade()
-
-        # Insert/Update default database values
-        print('Insert/Update default Roles')
-        Role.insert_defaults()
-        print('Insert/Update default Users')
-        User.insert_defaults()
-        print('Insert/Update default CorpusFollowerRoles')
-        CorpusFollowerRole.insert_defaults()
-        print('Insert/Update default SpaCyNLPPipelineModels')
-        SpaCyNLPPipelineModel.insert_defaults()
-        print('Insert/Update default TesseractOCRPipelineModels')
-        TesseractOCRPipelineModel.insert_defaults()
diff --git a/app/contributions/__init__.py b/app/contributions/__init__.py
index 5a7ddf1b..3805e489 100644
--- a/app/contributions/__init__.py
+++ b/app/contributions/__init__.py
@@ -15,7 +15,9 @@ def before_request():
     pass
 
 
-from . import routes
-from . import spacy_nlp_pipeline_models
-from . import tesseract_ocr_pipeline_models
-from . import transkribus_htr_pipeline_models
+from . import (
+    routes,
+    spacy_nlp_pipeline_models,
+    tesseract_ocr_pipeline_models,
+    transkribus_htr_pipeline_models
+)
diff --git a/app/converters/cli.py b/app/converters/cli.py
new file mode 100644
index 00000000..a7baf465
--- /dev/null
+++ b/app/converters/cli.py
@@ -0,0 +1,22 @@
+import click
+from . import bp
+from .sandpaper import SandpaperConverter
+
+
+@bp.cli.group('converter')
+def converter():
+    ''' Converter commands. '''
+    pass
+
+@converter.group('sandpaper')
+def sandpaper_converter():
+    ''' Sandpaper converter commands. '''
+    pass
+
+@sandpaper_converter.command('run')
+@click.argument('json_db_file')
+@click.argument('data_dir')
+def run_sandpaper_converter(json_db_file, data_dir):
+    ''' Run the sandpaper converter. '''
+    sandpaper_converter = SandpaperConverter(json_db_file, data_dir)
+    sandpaper_converter.run()
diff --git a/app/converters/sandpaper.py b/app/converters/sandpaper.py
index 2ea61d98..27f2bcc6 100644
--- a/app/converters/sandpaper.py
+++ b/app/converters/sandpaper.py
@@ -7,101 +7,106 @@ import os
 import shutil
 
 
-def convert(json_db_file, data_dir):
-    with open(json_db_file, 'r') as f:
-        json_db = json.loads(f.read())
+class SandpaperConverter:
+    def __init__(self, json_db_file, data_dir):
+        self.json_db_file = json_db_file
+        self.data_dir = data_dir
 
-    for json_user in json_db:
-        if not json_user['confirmed']:
-            current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
-            continue
-        user_dir = os.path.join(data_dir, str(json_user['id']))
-        convert_user(json_user, user_dir)
-        db.session.commit()
+    def run(self):
+        with open(self.json_db_file, 'r') as f:
+            json_db = json.loads(f.read())
 
+        for json_user in json_db:
+            if not json_user['confirmed']:
+                current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
+                continue
+            user_dir = os.path.join(self.data_dir, str(json_user['id']))
+            self.convert_user(json_user, user_dir)
+            db.session.commit()
 
-def convert_user(json_user, user_dir):
-    current_app.logger.info(f'Create User {json_user["username"]}...')
-    user = User(
-        confirmed=json_user['confirmed'],
-        email=json_user['email'],
-        last_seen=datetime.fromtimestamp(json_user['last_seen']),
-        member_since=datetime.fromtimestamp(json_user['member_since']),
-        password_hash=json_user['password_hash'],  # TODO: Needs to be added manually
-        username=json_user['username']
-    )
-    db.session.add(user)
-    db.session.flush(objects=[user])
-    db.session.refresh(user)
-    try:
-        user.makedirs()
-    except OSError as e:
-        current_app.logger.error(e)
-        db.session.rollback()
-        raise Exception('Internal Server Error')
-    for json_corpus in json_user['corpora'].values():
-        if not json_corpus['files'].values():
-            current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
-            continue
-        corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
-        convert_corpus(json_corpus, user, corpus_dir)
-    current_app.logger.info('Done')
 
-
-def convert_corpus(json_corpus, user, corpus_dir):
-    current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
-    corpus = Corpus(
-        user=user,
-        creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
-        description=json_corpus['description'],
-        title=json_corpus['title']
-    )
-    db.session.add(corpus)
-    db.session.flush(objects=[corpus])
-    db.session.refresh(corpus)
-    try:
-        corpus.makedirs()
-    except OSError as e:
-        current_app.logger.error(e)
-        db.session.rollback()
-        raise Exception('Internal Server Error')
-    for json_corpus_file in json_corpus['files'].values():
-        convert_corpus_file(json_corpus_file, corpus, corpus_dir)
-    current_app.logger.info('Done')
+    def convert_user(self, json_user, user_dir):
+        current_app.logger.info(f'Create User {json_user["username"]}...')
+        user = User(
+            confirmed=json_user['confirmed'],
+            email=json_user['email'],
+            last_seen=datetime.fromtimestamp(json_user['last_seen']),
+            member_since=datetime.fromtimestamp(json_user['member_since']),
+            password_hash=json_user['password_hash'],  # TODO: Needs to be added manually
+            username=json_user['username']
+        )
+        db.session.add(user)
+        db.session.flush(objects=[user])
+        db.session.refresh(user)
+        try:
+            user.makedirs()
+        except OSError as e:
+            current_app.logger.error(e)
+            db.session.rollback()
+            raise Exception('Internal Server Error')
+        for json_corpus in json_user['corpora'].values():
+            if not json_corpus['files'].values():
+                current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
+                continue
+            corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
+            self.convert_corpus(json_corpus, user, corpus_dir)
+        current_app.logger.info('Done')
 
 
-def convert_corpus_file(json_corpus_file, corpus, corpus_dir):
-    current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
-    corpus_file = CorpusFile(
-        corpus=corpus,
-        address=json_corpus_file['address'],
-        author=json_corpus_file['author'],
-        booktitle=json_corpus_file['booktitle'],
-        chapter=json_corpus_file['chapter'],
-        editor=json_corpus_file['editor'],
-        filename=json_corpus_file['filename'],
-        institution=json_corpus_file['institution'],
-        journal=json_corpus_file['journal'],
-        mimetype='application/vrt+xml',
-        pages=json_corpus_file['pages'],
-        publisher=json_corpus_file['publisher'],
-        publishing_year=json_corpus_file['publishing_year'],
-        school=json_corpus_file['school'],
-        title=json_corpus_file['title']
-    )
-    db.session.add(corpus_file)
-    db.session.flush(objects=[corpus_file])
-    db.session.refresh(corpus_file)
-    try:
-        shutil.copy2(
-            os.path.join(corpus_dir, json_corpus_file['filename']),
-            corpus_file.path
+    def convert_corpus(self, json_corpus, user, corpus_dir):
+        current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
+        corpus = Corpus(
+            user=user,
+            creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
+            description=json_corpus['description'],
+            title=json_corpus['title']
         )
-    except:
-        current_app.logger.warning(
-            'Can not convert corpus file: '
-            f'{os.path.join(corpus_dir, json_corpus_file["filename"])}'
-            ' -> '
-            f'{corpus_file.path}'
+        db.session.add(corpus)
+        db.session.flush(objects=[corpus])
+        db.session.refresh(corpus)
+        try:
+            corpus.makedirs()
+        except OSError as e:
+            current_app.logger.error(e)
+            db.session.rollback()
+            raise Exception('Internal Server Error')
+        for json_corpus_file in json_corpus['files'].values():
+            self.convert_corpus_file(json_corpus_file, corpus, corpus_dir)
+        current_app.logger.info('Done')
+
+
+    def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir):
+        current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
+        corpus_file = CorpusFile(
+            corpus=corpus,
+            address=json_corpus_file['address'],
+            author=json_corpus_file['author'],
+            booktitle=json_corpus_file['booktitle'],
+            chapter=json_corpus_file['chapter'],
+            editor=json_corpus_file['editor'],
+            filename=json_corpus_file['filename'],
+            institution=json_corpus_file['institution'],
+            journal=json_corpus_file['journal'],
+            mimetype='application/vrt+xml',
+            pages=json_corpus_file['pages'],
+            publisher=json_corpus_file['publisher'],
+            publishing_year=json_corpus_file['publishing_year'],
+            school=json_corpus_file['school'],
+            title=json_corpus_file['title']
         )
-    current_app.logger.info('Done')
+        db.session.add(corpus_file)
+        db.session.flush(objects=[corpus_file])
+        db.session.refresh(corpus_file)
+        try:
+            shutil.copy2(
+                os.path.join(corpus_dir, json_corpus_file['filename']),
+                corpus_file.path
+            )
+        except:
+            current_app.logger.warning(
+                'Can not convert corpus file: '
+                f'{os.path.join(corpus_dir, json_corpus_file["filename"])}'
+                ' -> '
+                f'{corpus_file.path}'
+            )
+        current_app.logger.info('Done')
diff --git a/app/corpora/__init__.py b/app/corpora/__init__.py
index 3766f2a6..34663b69 100644
--- a/app/corpora/__init__.py
+++ b/app/corpora/__init__.py
@@ -3,6 +3,7 @@ from flask_login import login_required
 
 
 bp = Blueprint('corpora', __name__)
+bp.cli.short_help = 'Corpus commands.'
 
 
 @bp.before_request
@@ -15,6 +16,4 @@ def before_request():
     pass
 
 
-from . import cqi_over_socketio, routes, json_routes
-from . import files
-from . import followers
+from . import cli, cqi_over_socketio, files, followers, routes, json_routes
diff --git a/app/corpora/cli.py b/app/corpora/cli.py
new file mode 100644
index 00000000..d21e8289
--- /dev/null
+++ b/app/corpora/cli.py
@@ -0,0 +1,21 @@
+from app.models import Corpus, CorpusStatus
+import os
+import shutil
+from . import bp
+
+
+@bp.cli.command('reset')
+def reset():
+    ''' Reset built corpora. '''
+    status = [
+        CorpusStatus.QUEUED,
+        CorpusStatus.BUILDING,
+        CorpusStatus.BUILT,
+        CorpusStatus.STARTING_ANALYSIS_SESSION,
+        CorpusStatus.RUNNING_ANALYSIS_SESSION,
+        CorpusStatus.CANCELING_ANALYSIS_SESSION
+    ]
+    for corpus in [x for x in Corpus.query.all() if x.status in status]:
+        shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True)
+        corpus.status = CorpusStatus.SUBMITTED
+        corpus.num_analysis_sessions = 0
diff --git a/app/errors/__init__.py b/app/errors/__init__.py
index 847658fb..0d79af48 100644
--- a/app/errors/__init__.py
+++ b/app/errors/__init__.py
@@ -1,6 +1,5 @@
-from werkzeug.exceptions import HTTPException
-from .handlers import generic
+from flask import Blueprint
 
 
-def init_app(app):
-    app.register_error_handler(HTTPException, generic)
+bp = Blueprint('errors', __name__)
+from . import handlers
diff --git a/app/errors/handlers.py b/app/errors/handlers.py
index fe7aaf4f..a18979ab 100644
--- a/app/errors/handlers.py
+++ b/app/errors/handlers.py
@@ -1,13 +1,14 @@
-from flask import jsonify, render_template, request, Response
+from flask import jsonify, render_template, request
 from werkzeug.exceptions import HTTPException
-from typing import Tuple, Union
+from . import bp
 
 
-def generic(error: HTTPException) -> Tuple[Union[str, Response], int]:
-    ''' Generic error handler '''
-    accent_json: bool = request.accept_mimetypes.accept_json
-    accept_html: bool = request.accept_mimetypes.accept_html
-    if accent_json and not accept_html:
-        response: Response = jsonify(str(error))
+@bp.app_errorhandler(HTTPException)
+def handle_http_exception(error):
+    ''' Generic HTTP exception handler '''
+    accept_json = request.accept_mimetypes.accept_json
+    accept_html = request.accept_mimetypes.accept_html
+    if accept_json and not accept_html:
+        response = jsonify(str(error))
         return response, error.code
     return render_template('errors/error.html.j2', error=error), error.code
diff --git a/app/main/__init__.py b/app/main/__init__.py
index f32fed5f..c9586fca 100644
--- a/app/main/__init__.py
+++ b/app/main/__init__.py
@@ -2,4 +2,4 @@ from flask import Blueprint
 
 
 bp = Blueprint('main', __name__, cli_group=None)
-from . import routes
+from . import cli, routes
diff --git a/app/main/cli.py b/app/main/cli.py
new file mode 100644
index 00000000..0284bb88
--- /dev/null
+++ b/app/main/cli.py
@@ -0,0 +1,45 @@
+from flask import current_app
+from flask_migrate import upgrade
+import os
+from app.models import (
+    CorpusFollowerRole,
+    Role,
+    SpaCyNLPPipelineModel,
+    TesseractOCRPipelineModel,
+    User
+)
+from . import bp
+
+
+@bp.cli.command('deploy')
+def deploy():
+    ''' Run deployment tasks. '''
+    # Make default directories
+    print('Make default directories')
+    base_dir = current_app.config['NOPAQUE_DATA_DIR']
+    default_dirs = [
+        os.path.join(base_dir, 'tmp'),
+        os.path.join(base_dir, 'users')
+    ]
+    for dir in default_dirs:
+        if os.path.exists(dir):
+            if not os.path.isdir(dir):
+                raise NotADirectoryError(f'{dir} is not a directory')
+        else:
+            os.mkdir(dir)
+
+    # migrate database to latest revision
+    print('Migrate database to latest revision')
+    upgrade()
+
+    # Insert/Update default database values
+    print('Insert/Update default Roles')
+    Role.insert_defaults()
+    print('Insert/Update default Users')
+    User.insert_defaults()
+    print('Insert/Update default CorpusFollowerRoles')
+    CorpusFollowerRole.insert_defaults()
+    print('Insert/Update default SpaCyNLPPipelineModels')
+    SpaCyNLPPipelineModel.insert_defaults()
+    print('Insert/Update default TesseractOCRPipelineModels')
+    TesseractOCRPipelineModel.insert_defaults()
diff --git a/app/tests/__init__.py b/app/tests/__init__.py
index 4665c05d..e69de29b 100644
--- a/app/tests/__init__.py
+++ b/app/tests/__init__.py
@@ -1,5 +0,0 @@
-from flask import Blueprint
-
-
-bp = Blueprint('tests', __name__)
-from . import cli
diff --git a/app/users/__init__.py b/app/users/__init__.py
index 8584a56f..b3492a37 100644
--- a/app/users/__init__.py
+++ b/app/users/__init__.py
@@ -15,5 +15,4 @@ def before_request():
     pass
 
 
-from . import events, json_routes, routes
-from . import settings
+from . import events, json_routes, routes, settings
-- 
GitLab