From 7db27020c751a1a648072d3ac478cca220dc1d0e Mon Sep 17 00:00:00 2001 From: Patrick Jentsch <p.jentsch@uni-bielefeld.de> Date: Wed, 17 Jul 2019 13:34:20 +0200 Subject: [PATCH] Add Docker Swarm interface. --- app/__init__.py | 4 ++ app/main/forms.py | 6 ++ app/main/views.py | 38 +++++++++++-- app/swarm.py | 98 ++++++++++++++++++++++++++++++++ app/templates/main/admin.html.j2 | 12 ++++ requirements.txt | 1 + 6 files changed, 155 insertions(+), 4 deletions(-) create mode 100644 app/swarm.py diff --git a/app/__init__.py b/app/__init__.py index 7b4f60ad..e8bcfa39 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -3,6 +3,7 @@ from flask import Flask from flask_login import LoginManager from flask_mail import Mail from flask_sqlalchemy import SQLAlchemy +from .swarm import Swarm db = SQLAlchemy() @@ -11,6 +12,7 @@ login_manager = LoginManager() login_manager.login_view = 'auth.login' mail = Mail() +swarm = Swarm() def create_app(config_name): @@ -21,6 +23,8 @@ def create_app(config_name): db.init_app(app) login_manager.init_app(app) mail.init_app(app) + if not hasattr(app, 'extensions'): + app.extensions = {} from .auth import auth as auth_blueprint app.register_blueprint(auth_blueprint, url_prefix='/auth') diff --git a/app/main/forms.py b/app/main/forms.py index e69de29b..e850cfe1 100644 --- a/app/main/forms.py +++ b/app/main/forms.py @@ -0,0 +1,6 @@ +from flask_wtf import FlaskForm +from wtforms import SubmitField + + +class SwarmForm(FlaskForm): + submit = SubmitField('Submit') diff --git a/app/main/views.py b/app/main/views.py index 2f7fce7a..e7c9ab86 100644 --- a/app/main/views.py +++ b/app/main/views.py @@ -1,9 +1,12 @@ -from flask import render_template +from flask import redirect, render_template, url_for from ..models import User from ..tables import AdminUserTable, AdminUserItem from . import main from ..decorators import admin_required -from flask_login import login_required +from flask_login import current_user, login_required +from .forms import SwarmForm +from ..import swarm +from threading import Thread @main.route('/') @@ -16,7 +19,7 @@ def about(): return render_template('main/about.html.j2', title='About') -@main.route('/admin') +@main.route('/admin', methods=['GET', 'POST']) @login_required @admin_required def for_admins_only(): @@ -26,5 +29,32 @@ def for_admins_only(): users = User.query.order_by(User.username).all() items = [AdminUserItem(u.username, u.email, u.role_id, u.confirmed) for u in users] table = AdminUserTable(items) + + swarm_form = SwarmForm() + if swarm_form.validate_on_submit(): + ''' + ' TODO: Implement a Job class. For now a dictionary representation is + ' enough. + ''' + job = { + 'creator': current_user.id, + 'id': '5fd40cb0cadef3ab5676c4968fc3d748', + 'requested_cpus': 2, + 'requested_memory': 2048, + 'service': 'ocr', + 'service_args': { + 'lang': 'eng' + }, + 'status': 'queued' + } + ''' + ' TODO: Let the scheduler run this job in the background. Using self + ' created threads is just for testing purpose as there is no + ' scheduler available. + ''' + thread = Thread(target=swarm.run, args=(job,)) + thread.start() + return redirect(url_for('main.for_admins_only')) + return render_template('main/admin.html.j2', title='Administration tools', - table=table.__html__()) + swarm_form=swarm_form, table=table.__html__()) diff --git a/app/swarm.py b/app/swarm.py new file mode 100644 index 00000000..928a79cb --- /dev/null +++ b/app/swarm.py @@ -0,0 +1,98 @@ +import docker +import subprocess + + +class Swarm: + def __init__(self): + self.docker = docker.from_env() + self.checkout() + + def checkout(self): + cpus = 0 + memory = 0 + for node in self.docker.nodes.list(filters={'role': 'worker'}): + if node.attrs.get('Status').get('State') == 'ready': + cpus += 0 or node.attrs \ + .get('Description') \ + .get('Resources') \ + .get('NanoCPUs') + memory += 0 or node.attrs \ + .get('Description') \ + .get('Resources') \ + .get('MemoryBytes') + ''' + ' For whatever reason the Python Docker SDK provides a CPU count in + ' nano (10^-6), whilst this is not that handy, it gets converted. + ''' + cpus *= 10 ** -9 + ''' + ' For a more natural handling the memory information + ' gets converted from bytes to megabytes. + ''' + memory *= 10 ** -6 + self.cpus = int(cpus) + self.memory = int(memory) + self.available_cpus = self.cpus + self.available_memory = self.memory + + def run(self, job): + if self.available_cpus < job['requested_cpus'] or \ + self.available_memory < job['requested_memory']: + print('Not enough ressources available.') + ''' + ' TODO: At this point the scheduler thinks that the job gets + ' processed, which apparently is not the case. So the job + ' needs to get rescheduled and gain a new chance to get + ' processed (next). + ' + ' Note: Maybe it is a good idea to create a method that checks if + ' enough ressources are available before the run method gets + ' executed. This would replace the need of the TODO mentioned + ' above. + ''' + return + + job['status'] = 'running' + # TODO: Push job changes to the database + self.available_cpus -= job['requested_cpus'] + self.available_memory -= job['requested_memory'] + + container_command = 'ocr' \ + + ' -i /input/{}'.format(job['id']) \ + + ' -l {}'.format(job['service_args']['lang']) \ + + ' -o /output' \ + + ' --keep-intermediates' \ + + ' --nCores {}'.format(job['requested_cpus']) + container_image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/ocr' + container_mount = '/media/sf_files/=/input/' + ''' + ' Swarm mode is intendet to run containers which are meant to serve a + ' non terminating service like a webserver. In order to process the + ' occuring jobs it is necessary to use one-shot (terminating) + ' containers. These one-shot containers are spawned with a programm + ' called JaaS¹ (Jobs as a Service), which is described in Alex Ellis' + ' short article "One-shot containers on Docker Swarm"². + ' + ' ¹ https://github.com/alexellis/jaas + ' ² https://blog.alexellis.io/containers-on-swarm/ + ''' + cmd = ['jaas', 'run'] \ + + ['--command', container_command] \ + + ['--image', container_image] \ + + ['--mount', container_mount] \ + + ['--timeout', '86400s'] + completed_process = subprocess.run( + cmd, + stderr=subprocess.DEVNULL, + stdout=subprocess.DEVNULL + ) + + self.available_cpus += job['requested_cpus'] + self.available_memory += job['requested_memory'] + if (completed_process.returncode == 0): + job['status'] = 'finished' + else: + job['status'] = 'failed' + # TODO: Push job changes to the database + + return diff --git a/app/templates/main/admin.html.j2 b/app/templates/main/admin.html.j2 index 1420ca4c..56f550c5 100644 --- a/app/templates/main/admin.html.j2 +++ b/app/templates/main/admin.html.j2 @@ -9,4 +9,16 @@ </div> </div> </div> + +<div class="col s12"> + <div class="card large"> + <div class="card-content"> + <span class="card-title">Swarm</span> + <form method="POST"> + {{ swarm_form.hidden_tag() }} + {{ swarm_form.submit(class='btn') }} + </form> + </div> + </div> +</div> {% endblock %} diff --git a/requirements.txt b/requirements.txt index 5eede513..55c5be5e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +docker==4.0.2 Flask==1.0.3 Flask-APScheduler==1.11.0 Flask-Login==0.4.1 -- GitLab