From 838beafd839f63411dfed7a1860bf4ca499eeebe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B6bel?= <dgoebel@techfak.uni-bielefeld.de> Date: Wed, 27 Sep 2023 14:29:11 +0200 Subject: [PATCH] Support public GitLab repositories hosted on private GitLab instances #52 --- .gitlab-ci.yml | 4 +- .pre-commit-config.yaml | 4 +- app/api/endpoints/workflow.py | 4 +- app/api/endpoints/workflow_credentials.py | 19 +++++-- app/api/endpoints/workflow_execution.py | 4 +- app/scm/scm.py | 24 ++++----- app/tests/api/test_workflow.py | 22 ++++++++ app/tests/api/test_workflow_credentials.py | 57 ++++++++++++++++++-- app/tests/api/test_workflow_execution.py | 62 ++++++++++++++++++---- app/tests/unit/test_scm.py | 3 +- requirements.txt | 2 +- 11 files changed, 163 insertions(+), 42 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a195b18..87dbb16 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -136,7 +136,7 @@ lint-test-job: # Runs linters checks on code publish-dev-docker-container-job: stage: deploy image: - name: gcr.io/kaniko-project/executor:v1.9.1-debug + name: gcr.io/kaniko-project/executor:v1.16.0-debug entrypoint: [""] dependencies: [] only: @@ -159,7 +159,7 @@ publish-dev-docker-container-job: publish-docker-container-job: stage: deploy image: - name: gcr.io/kaniko-project/executor:v1.9.1-debug + name: gcr.io/kaniko-project/executor:v1.16.0-debug entrypoint: [""] dependencies: [] only: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8f894d6..5bc2182 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: files: app args: [--check] - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: 'v0.0.289' + rev: 'v0.0.291' hooks: - id: ruff - repo: https://github.com/PyCQA/isort @@ -39,5 +39,5 @@ repos: additional_dependencies: - boto3-stubs-lite[s3] - sqlalchemy>=2.0.0.<2.1.0 - - pydantic<2.4.0 + - pydantic<2.5.0 - types-requests diff --git a/app/api/endpoints/workflow.py b/app/api/endpoints/workflow.py index 58768df..791c8ed 100644 --- a/app/api/endpoints/workflow.py +++ b/app/api/endpoints/workflow.py @@ -9,7 +9,7 @@ from app.api.utils import check_repo, upload_scm_file from app.core.config import settings from app.crud import CRUDWorkflow, CRUDWorkflowVersion from app.crud.crud_workflow_mode import CRUDWorkflowMode -from app.git_repository import build_repository +from app.git_repository import GitHubRepository, build_repository from app.schemas.workflow import WorkflowIn, WorkflowOut, WorkflowStatistic, WorkflowUpdate from app.schemas.workflow_version import WorkflowVersion as WorkflowVersionSchema from app.scm import SCM, Provider @@ -148,7 +148,7 @@ async def create_workflow( # If it is a private repository, create an SCM file and upload it to the params bucket scm_provider = Provider.from_repo(repo, name=f"repo{workflow_db.workflow_id.hex}") - if scm_provider is not None: + if repo.token is not None or not isinstance(repo, GitHubRepository): background_tasks.add_task( upload_scm_file, s3=s3, scm=SCM([scm_provider]), scm_file_id=workflow_db.workflow_id.hex ) diff --git a/app/api/endpoints/workflow_credentials.py b/app/api/endpoints/workflow_credentials.py index bca48e4..55828af 100644 --- a/app/api/endpoints/workflow_credentials.py +++ b/app/api/endpoints/workflow_credentials.py @@ -7,7 +7,7 @@ from app.api.utils import check_repo, upload_scm_file from app.core.config import settings from app.crud.crud_workflow import CRUDWorkflow from app.crud.crud_workflow_version import CRUDWorkflowVersion -from app.git_repository import build_repository +from app.git_repository import GitHubRepository, build_repository from app.schemas.workflow import WorkflowCredentialsIn, WorkflowCredentialsOut from app.scm import SCM, Provider @@ -102,7 +102,7 @@ async def update_workflow_credentials( background_tasks.add_task( upload_scm_file, s3=s3, - scm=SCM(providers=[scm_provider]), # type: ignore[list-item] + scm=SCM(providers=[scm_provider]), scm_file_id=workflow.workflow_id.hex, ) await CRUDWorkflow.update_credentials(db, workflow.workflow_id, token=credentials.token) @@ -110,6 +110,7 @@ async def update_workflow_credentials( @router.delete("", status_code=status.HTTP_204_NO_CONTENT, summary="Delete the credentials of a workflow") async def delete_workflow_credentials( + background_tasks: BackgroundTasks, workflow: CurrentWorkflow, db: DBSession, current_user: CurrentUser, @@ -132,6 +133,8 @@ async def delete_workflow_credentials( Async function to ask the auth service for authorization. Dependency Injection. s3 : boto3_type_annotations.s3.ServiceResource S3 Service to perform operations on buckets in Ceph. Dependency Injection. + background_tasks : fastapi.BackgroundTasks + Entrypoint for new BackgroundTasks. Provided by FastAPI. Returns ------- @@ -140,5 +143,15 @@ async def delete_workflow_credentials( """ rbac_operation = "delete" if workflow.developer_id == current_user.uid else "delete_any" await authorization(rbac_operation) - s3.Bucket(settings.PARAMS_BUCKET).Object(f"{workflow.workflow_id.hex}.scm").delete() + repo = build_repository(workflow.repository_url, workflow.versions[0].git_commit_hash) + if isinstance(repo, GitHubRepository): + s3.Bucket(settings.PARAMS_BUCKET).Object(f"{workflow.workflow_id.hex}.scm").delete() + else: + scm_provider = Provider.from_repo(repo=repo, name=f"repo{workflow.workflow_id.hex}") + background_tasks.add_task( + upload_scm_file, + s3=s3, + scm=SCM(providers=[scm_provider]), + scm_file_id=workflow.workflow_id.hex, + ) await CRUDWorkflow.update_credentials(db, workflow.workflow_id, token=None) diff --git a/app/api/endpoints/workflow_execution.py b/app/api/endpoints/workflow_execution.py index 4af4831..3d780c5 100644 --- a/app/api/endpoints/workflow_execution.py +++ b/app/api/endpoints/workflow_execution.py @@ -24,7 +24,7 @@ from app.api.utils import ( ) from app.core.config import settings from app.crud import CRUDWorkflowExecution, CRUDWorkflowVersion -from app.git_repository import build_repository +from app.git_repository import GitHubRepository, build_repository from app.schemas.workflow_execution import DevWorkflowExecutionIn, WorkflowExecutionIn, WorkflowExecutionOut from app.scm import SCM, Provider from app.slurm.slurm_rest_client import SlurmClient @@ -259,7 +259,7 @@ async def start_arbitrary_workflow( ) scm_provider = Provider.from_repo(repo=repo, name=f"repo{execution.execution_id.hex}") - if scm_provider is not None: + if repo.token is not None or not isinstance(repo, GitHubRepository): background_tasks.add_task( upload_scm_file, s3=s3, scm=SCM([scm_provider]), scm_file_id=execution.execution_id.hex ) diff --git a/app/scm/scm.py b/app/scm/scm.py index fc1c93e..1783a02 100644 --- a/app/scm/scm.py +++ b/app/scm/scm.py @@ -13,8 +13,8 @@ class Provider: def __init__( self, name: str, - user: str, - password: str, + user: Optional[str] = None, + password: Optional[str] = None, platform: Optional[str] = None, server: Optional[str] = None, token: Optional[str] = None, @@ -85,7 +85,7 @@ class Provider: return Provider(**params) def __str__(self) -> str: - return f"Provider(name={self.name} user={self.user} password={len(self.password)*'*'})" + return f"Provider(name={self.name} user={self.user}{'password protected' if self.password else ''})" def __repr__(self) -> str: return str(self) @@ -96,7 +96,7 @@ class Provider: return other.name == self.name and other.password == self.password and other.user == self.user @staticmethod - def from_repo(repo: GitRepository, name: str) -> Optional["Provider"]: + def from_repo(repo: GitRepository, name: str) -> "Provider": """ Build a provider from a git repository. @@ -112,15 +112,13 @@ class Provider: provider : Provider | None Provider with data from the git repository. If the repository is public, return None """ - if repo.token is None: - return None - elif isinstance(repo, GitHubRepository): + if isinstance(repo, GitHubRepository): return GitHubProvider(user=repo.account, password=repo.token) elif isinstance(repo, GitlabRepository): return GitlabProvider( name=name, password=repo.token, server=str(AnyHttpUrl.build(scheme="https", host=repo.domain))[:-1] ) - return None # pragma: no cover + return Provider(name=name, password=repo.token, server=repo.repo_url) # pragma: no cover class GitHubProvider(Provider): @@ -128,11 +126,11 @@ class GitHubProvider(Provider): Convenience class for the GitHub provider """ - def __init__(self, user: str, password: str): + def __init__(self, user: str, password: Optional[str] = None): super().__init__(name="github", user=user, password=password) def __str__(self) -> str: - return f"GitHubProvider(user={self.user} password={len(self.password)*'*'})" + return f"GitHubProvider(user={self.user}{'password protected' if self.password else ''})" class GitlabProvider(Provider): @@ -140,13 +138,11 @@ class GitlabProvider(Provider): Convenience class for the GitLab provider """ - def __init__(self, name: str, password: str, server: str): + def __init__(self, name: str, server: str, password: Optional[str]): super().__init__(name=name, user="nonempty", password=password, platform="gitlab", server=server) def __str__(self) -> str: - return ( - f"GitlabProvider(name={self.name} server={self.server} user={self.user} password={len(self.password)*'*'})" - ) + return f"GitlabProvider(name={self.name} server={self.server} user={self.user}{'password protected' if self.password else ''})" # noqa: E501 class SCM: diff --git a/app/tests/api/test_workflow.py b/app/tests/api/test_workflow.py index 1812155..f1a73a1 100644 --- a/app/tests/api/test_workflow.py +++ b/app/tests/api/test_workflow.py @@ -73,6 +73,7 @@ class TestWorkflowRoutesCreate(_TestWorkflowRoutes): client: AsyncClient, db: AsyncSession, random_user: UserWithAuthHeader, + mock_s3_service: MockS3ServiceResource, ) -> None: """ Test for successfully creating a workflow with a Gitlab repository. @@ -85,6 +86,8 @@ class TestWorkflowRoutesCreate(_TestWorkflowRoutes): HTTP Client to perform the request on. random_user : app.tests.utils.user.UserWithAuthHeader Random user for testing. + mock_s3_service : app.tests.mocks.mock_s3_resource.MockS3ServiceResource + Mock S3 Service to manipulate objects. """ workflow = WorkflowIn( git_commit_hash=random_hex_string(), @@ -100,10 +103,29 @@ class TestWorkflowRoutesCreate(_TestWorkflowRoutes): stmt = select(Workflow).where(Workflow._workflow_id == UUID(hex=created_workflow["workflow_id"]).bytes) db_workflow = await db.scalar(stmt) assert db_workflow is not None + + # Download SCM file in PARAMS_BUCKET that should be created + scm_file_name = f"{db_workflow.workflow_id.hex}.scm" + assert scm_file_name in mock_s3_service.Bucket(settings.PARAMS_BUCKET).objects.all_keys() + obj = mock_s3_service.Bucket(settings.PARAMS_BUCKET).Object(scm_file_name) + with BytesIO() as f: + obj.download_fileobj(f) + f.seek(0) + scm = SCM.deserialize(f) + + # Check content of SCM file + assert len(scm.providers) == 1 + provider = scm.providers[0] + assert provider.password is None + assert provider.name == f"repo{db_workflow.workflow_id.hex}" + assert provider.platform == "gitlab" + assert provider.server == "https://gitlab.de" + await db.execute( delete(Workflow).where(Workflow._workflow_id == UUID(hex=created_workflow["workflow_id"]).bytes) ) await db.commit() + obj.delete() @pytest.mark.asyncio async def test_create_workflow_with_private_gitlab( diff --git a/app/tests/api/test_workflow_credentials.py b/app/tests/api/test_workflow_credentials.py index 83bcf36..0ab806d 100644 --- a/app/tests/api/test_workflow_credentials.py +++ b/app/tests/api/test_workflow_credentials.py @@ -5,7 +5,7 @@ from botocore.client import ClientError from clowmdb.models import Workflow from fastapi import status from httpx import AsyncClient -from sqlalchemy import select +from sqlalchemy import select, update from sqlalchemy.ext.asyncio import AsyncSession from app.core.config import settings @@ -213,7 +213,7 @@ class TestWorkflowCredentialsRoutesDelete(_TestWorkflowCredentialRoutes): mock_s3_service: MockS3ServiceResource, ) -> None: """ - Test for updating the credentials on a workflow formerly hosted in a public git repository. + Test for deleting the credentials on a workflow hosted in a public GitHub repository. Parameters ---------- @@ -246,7 +246,7 @@ class TestWorkflowCredentialsRoutesDelete(_TestWorkflowCredentialRoutes): scm_file.download_fileobj(f) @pytest.mark.asyncio - async def test_delete_workflow_credentials_on_private_workflow( + async def test_delete_workflow_credentials_on_private_github_workflow( self, db: AsyncSession, client: AsyncClient, @@ -255,7 +255,7 @@ class TestWorkflowCredentialsRoutesDelete(_TestWorkflowCredentialRoutes): mock_s3_service: MockS3ServiceResource, ) -> None: """ - Test for updating the credentials on a workflow formerly hosted in a public git repository. + Test for deleting the credentials on a workflow formerly hosted in a private GitHub repository. Parameters ---------- @@ -287,6 +287,55 @@ class TestWorkflowCredentialsRoutesDelete(_TestWorkflowCredentialRoutes): with BytesIO() as f: scm_file.download_fileobj(f) + @pytest.mark.asyncio + async def test_delete_workflow_credentials_on_gitlab_workflow( + self, + db: AsyncSession, + client: AsyncClient, + random_user: UserWithAuthHeader, + random_workflow: Workflow, + mock_s3_service: MockS3ServiceResource, + ) -> None: + """ + Test for deleting the credentials on a workflow hosted in a public GitLab repository. + + Parameters + ---------- + db : sqlalchemy.ext.asyncio.AsyncSession. + Async database session to perform query on. + client : httpx.AsyncClient + HTTP Client to perform the request on. + random_user : app.tests.utils.user.UserWithAuthHeader + Random user for testing. + mock_s3_service : app.tests.mocks.mock_s3_resource.MockS3ServiceResource + Mock S3 Service to manipulate objects. + random_workflow : app.schemas.workflow.WorkflowOut + Random workflow for testing. + """ + + stmt = ( + update(Workflow) + .where(Workflow._workflow_id == random_workflow.workflow_id.bytes) + .values(repository_url="https://gitlab.com/example/example") + ) + await db.execute(stmt) + await db.commit() + + response = await client.delete( + "/".join([self.base_path, str(random_workflow.workflow_id), "credentials"]), + headers=random_user.auth_headers, + ) + assert response.status_code == status.HTTP_204_NO_CONTENT + + scm_file = mock_s3_service.Bucket(settings.PARAMS_BUCKET).Object(f"{random_workflow.workflow_id.hex}.scm") + with BytesIO() as f: + scm_file.download_fileobj(f) + f.seek(0) + scm = SCM.deserialize(f) + + assert len(scm.providers) == 1 + assert scm.providers[0].password is None + class TestWorkflowCredentialsRoutesGet(_TestWorkflowCredentialRoutes): @pytest.mark.asyncio diff --git a/app/tests/api/test_workflow_execution.py b/app/tests/api/test_workflow_execution.py index 3f36022..73cdc60 100644 --- a/app/tests/api/test_workflow_execution.py +++ b/app/tests/api/test_workflow_execution.py @@ -9,10 +9,11 @@ from sqlalchemy import update from sqlalchemy.ext.asyncio import AsyncSession from app.core.config import settings +from app.git_repository import build_repository from app.schemas.workflow import WorkflowOut from app.schemas.workflow_execution import DevWorkflowExecutionIn, WorkflowExecutionIn from app.schemas.workflow_mode import WorkflowModeIn -from app.scm import SCM +from app.scm import SCM, Provider from app.tests.mocks import MockS3ServiceResource, MockSlurmCluster from app.tests.utils.bucket import add_permission_for_bucket from app.tests.utils.user import UserWithAuthHeader @@ -30,7 +31,7 @@ class TestWorkflowExecutionRoutesCreate(_TestWorkflowExecutionRoutes): client: AsyncClient, random_user: UserWithAuthHeader, random_workflow_version: WorkflowVersion, - random_workflow: Workflow, + random_workflow: WorkflowOut, mock_s3_service: MockS3ServiceResource, mock_slurm_cluster: MockSlurmCluster, ) -> None: @@ -43,6 +44,8 @@ class TestWorkflowExecutionRoutesCreate(_TestWorkflowExecutionRoutes): HTTP Client to perform the request on. random_user : app.tests.utils.user.UserWithAuthHeader Random user for testing. + random_workflow : app.schemas.workflow.WorkflowOut + Random workflow for testing. random_workflow_version : clowmdb.models.WorkflowVersion Random workflow version for testing. mock_s3_service : app.tests.mocks.mock_s3_resource.MockS3ServiceResource @@ -144,6 +147,7 @@ class TestWorkflowExecutionRoutesCreate(_TestWorkflowExecutionRoutes): db: AsyncSession, client: AsyncClient, random_user: UserWithAuthHeader, + random_workflow: WorkflowOut, random_workflow_version: WorkflowVersion, mock_s3_service: MockS3ServiceResource, mock_slurm_cluster: MockSlurmCluster, @@ -159,6 +163,8 @@ class TestWorkflowExecutionRoutesCreate(_TestWorkflowExecutionRoutes): HTTP Client to perform the request on. random_user : app.tests.utils.user.UserWithAuthHeader Random user for testing. + random_workflow : app.schemas.workflow.WorkflowOut + Random workflow for testing. random_workflow_version : clowmdb.models.WorkflowVersion Random workflow version for testing. mock_s3_service : app.tests.mocks.mock_s3_resource.MockS3ServiceResource @@ -166,6 +172,7 @@ class TestWorkflowExecutionRoutesCreate(_TestWorkflowExecutionRoutes): mock_slurm_cluster : app.tests.mocks.mock_slurm_cluster.MockSlurmCluster Mock Slurm cluster to inspect submitted jobs. """ + stmt = ( update(Workflow) .where(Workflow._workflow_id == random_workflow_version.workflow_id.bytes) @@ -173,6 +180,19 @@ class TestWorkflowExecutionRoutesCreate(_TestWorkflowExecutionRoutes): ) await db.execute(stmt) await db.commit() + + # Upload SCM file to PARAMS_BUCKET + scm_provider = Provider.from_repo( + build_repository(random_workflow.repository_url, random_workflow_version.git_commit_hash), + name=f"repo{random_workflow.workflow_id.hex}", + ) + assert scm_provider is not None + with BytesIO() as f: + SCM([scm_provider]).serialize(f) + f.seek(0) + scm_obj = mock_s3_service.Bucket(settings.PARAMS_BUCKET).Object(f"{random_workflow.workflow_id.hex}.scm") + scm_obj.upload_fileobj(f) + execution_in = WorkflowExecutionIn(workflow_version_id=random_workflow_version.git_commit_hash, parameters={}) response = await client.post(self.base_path, headers=random_user.auth_headers, json=execution_in.model_dump()) assert response.status_code == status.HTTP_201_CREATED @@ -191,13 +211,15 @@ class TestWorkflowExecutionRoutesCreate(_TestWorkflowExecutionRoutes): job = mock_slurm_cluster.get_job_by_name(str(execution_id)) assert job is not None assert job["job"]["environment"]["TOWER_WORKSPACE_ID"] == execution_id.hex[:16] - assert "NXF_SCM_FILE" not in job["job"]["environment"].keys() + assert "NXF_SCM_FILE" in job["job"]["environment"].keys() nextflow_script = job["script"] - assert "-hub gitlab" in nextflow_script + assert f"-hub repo{random_workflow.workflow_id.hex}" in nextflow_script assert "-entry" not in nextflow_script assert f"-revision {random_workflow_version.git_commit_hash}" in nextflow_script + scm_obj.delete() + @pytest.mark.asyncio async def test_start_too_many_workflow_executions( self, @@ -726,24 +748,42 @@ class TestDevWorkflowExecutionRoutesCreate(_TestWorkflowExecutionRoutes): assert execution_response["user_id"] == random_user.user.uid assert execution_response["status"] == WorkflowExecution.WorkflowExecutionStatus.PENDING - assert ( - f"params-{UUID(hex=execution_response['execution_id']).hex }.json" - in mock_s3_service.Bucket(settings.PARAMS_BUCKET).objects.all_keys() - ) - execution_id = UUID(execution_response["execution_id"]) + # Check if params file is created + params_file_name = f"params-{execution_id.hex}.json" + assert params_file_name in mock_s3_service.Bucket(settings.PARAMS_BUCKET).objects.all_keys() + + scm_file_name = f"{execution_id.hex}.scm" + scm_obj = mock_s3_service.Bucket(settings.PARAMS_BUCKET).Object(scm_file_name) + with BytesIO() as f: + scm_obj.download_fileobj(f) + f.seek(0) + scm = SCM.deserialize(f) + + # Check content of SCM file + assert len(scm.providers) == 1 + provider = scm.providers[0] + assert provider.password is None + assert provider.name == f"repo{execution_id.hex}" + assert provider.platform == "gitlab" + assert provider.server == "https://gitlab.com" + job = mock_slurm_cluster.get_job_by_name(str(execution_id)) assert job is not None assert job["job"]["environment"]["TOWER_WORKSPACE_ID"] == execution_id.hex[:16] - assert "NXF_SCM_FILE" not in job["job"]["environment"].keys() + assert "NXF_SCM_FILE" in job["job"]["environment"].keys() nextflow_script = job["script"] - assert "-hub gitlab" in nextflow_script + assert f"-hub repo{execution_id.hex}" in nextflow_script assert "-entry" not in nextflow_script assert f"-revision {execution_in.git_commit_hash}" in nextflow_script assert f"run {execution_in.repository_url}" in nextflow_script + # Clean up after test + mock_s3_service.Bucket(settings.PARAMS_BUCKET).Object(params_file_name).delete() + scm_obj.delete() + @pytest.mark.asyncio async def test_start_dev_workflow_execution_from_private_gitlab( self, diff --git a/app/tests/unit/test_scm.py b/app/tests/unit/test_scm.py index d438d82..cb11055 100644 --- a/app/tests/unit/test_scm.py +++ b/app/tests/unit/test_scm.py @@ -145,4 +145,5 @@ class TestSCM: url=AnyHttpUrl("https://github.com/bilbobaggins/example"), git_commit_hash=random_hex_string() ) provider = Provider.from_repo(repo, name="empty") - assert provider is None + assert provider is not None + assert provider.password is None diff --git a/requirements.txt b/requirements.txt index 2dfa1b7..f9480f6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ clowmdb>=2.2.0,<2.3.0 # Webserver packages anyio>=3.7.0,<4.0.0 fastapi>=0.103.0,<0.104.0 -pydantic>=2.3.0,<2.4.0 +pydantic>=2.4.0,<2.5.0 pydantic-settings uvicorn>=0.23.0,<0.24.0 python-multipart -- GitLab