diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 29fab3d4ca0cd9923fb41f0ca8732e745274c37f..fda4ac07aaa3b861e0ef8e91f2f2b35fbae4f0d4 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -46,7 +46,7 @@ integration-test-job: # Runs integration tests with the database MYSQL_DATABASE: "$DB_DATABASE" MYSQL_USER: "$DB_USER" MYSQL_PASSWORD: "$DB_PASSWORD" - - name: $CI_REGISTRY/cmg/clowm/clowm-database:v2.3 + - name: $CI_REGISTRY/cmg/clowm/clowm-database:v3.0 alias: upgrade-db script: - python app/check_database_connection.py @@ -74,7 +74,7 @@ e2e-test-job: # Runs e2e tests on the API endpoints MYSQL_DATABASE: "$DB_DATABASE" MYSQL_USER: "$DB_USER" MYSQL_PASSWORD: "$DB_PASSWORD" - - name: $CI_REGISTRY/cmg/clowm/clowm-database:v2.3 + - name: $CI_REGISTRY/cmg/clowm/clowm-database:v3.0 alias: upgrade-db script: - python app/check_database_connection.py @@ -132,7 +132,7 @@ lint-test-job: # Runs linters checks on code publish-dev-docker-container-job: stage: deploy image: - name: gcr.io/kaniko-project/executor:v1.17.0-debug + name: gcr.io/kaniko-project/executor:v1.20.0-debug entrypoint: [""] dependencies: [] only: @@ -155,7 +155,7 @@ publish-dev-docker-container-job: publish-docker-container-job: stage: deploy image: - name: gcr.io/kaniko-project/executor:v1.17.0-debug + name: gcr.io/kaniko-project/executor:v1.20.0-debug entrypoint: [""] dependencies: [] only: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2e8793959ae1ebe78b898fe22fef6651b3bafabe..6f006262969542f620716e57c1bc0904bc38a10c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,13 +15,13 @@ repos: - id: check-merge-conflict - id: check-ast - repo: https://github.com/psf/black - rev: 23.12.1 + rev: 24.1.1 hooks: - id: black files: app args: [--check] - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: 'v0.1.11' + rev: 'v0.1.15' hooks: - id: ruff - repo: https://github.com/PyCQA/isort @@ -39,5 +39,5 @@ repos: additional_dependencies: - types-aiobotocore-lite[s3]>=2.8.0,<2.9.0 - sqlalchemy>=2.0.0.<2.1.0 - - pydantic<2.6.0 + - pydantic<2.7.0 - types-requests diff --git a/Dockerfile b/Dockerfile index d364a9b2efbb0abeb5800317b3544c8c059f56f7..43b2eac2816d02938b589bc488f0a46851099038 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,15 @@ FROM python:3.11-slim -EXPOSE 8000 +ENV PORT=8000 +EXPOSE $PORT + # dumb-init forwards the kill signal to the python process -RUN apt-get update && apt-get -y install dumb-init curl +RUN apt-get update && apt-get -y install dumb-init && apt-get clean ENTRYPOINT ["/usr/bin/dumb-init", "--"] +STOPSIGNAL SIGINT +RUN pip install --no-cache-dir httpx[cli] "uvicorn<0.28.0" -HEALTHCHECK --interval=30s --timeout=4s CMD curl -f http://localhost:8000/health || exit 1 +HEALTHCHECK --interval=30s --timeout=2s CMD httpx http://localhost:$PORT/health || exit 1 RUN useradd -m worker USER worker @@ -13,10 +17,13 @@ WORKDIR /home/worker/code ENV PYTHONPATH=/home/worker/code ENV PATH="/home/worker/.local/bin:${PATH}" +COPY ./start_service_uvicorn.sh /home/worker/code/start.sh +COPY ./scripts/prestart.sh /home/worker/code/prestart.sh + COPY --chown=worker:worker requirements.txt ./requirements.txt RUN pip install --user --no-cache-dir --upgrade -r requirements.txt -COPY --chown=worker:worker . . +COPY --chown=worker:worker ./app /home/worker/code/app -CMD ["./start_service.sh"] +CMD ["./start.sh"] diff --git a/Dockerfile-Gunicorn b/Dockerfile-Gunicorn index 79f82be9b4fa45d10d66ed9256a5d9f97337a34a..0dfcbd561faa7300edd624c0904065bac01c9050 100644 --- a/Dockerfile-Gunicorn +++ b/Dockerfile-Gunicorn @@ -1,14 +1,20 @@ -FROM tiangolo/uvicorn-gunicorn-fastapi:python3.11-slim -EXPOSE 8000 +FROM python:3.11-slim ENV PORT=8000 +EXPOSE $PORT +WORKDIR /app/ +ENV PYTHONPATH=/app -RUN pip install --no-cache-dir httpx[cli] +RUN pip install --no-cache-dir httpx[cli] "gunicorn<21.3.0" "uvicorn<0.28.0" +COPY ./gunicorn_conf.py ./gunicorn_conf.py +COPY ./start_service_gunicorn.sh ./start.sh -HEALTHCHECK --interval=30s --timeout=4s CMD httpx http://localhost:$PORT/health || exit 1 +HEALTHCHECK --interval=30s --timeout=2s CMD httpx http://localhost:$PORT/health || exit 1 -COPY ./scripts/prestart.sh /app/prestart.sh -COPY ./requirements.txt /app/requirements.txt +COPY ./scripts/prestart.sh ./prestart.sh +COPY ./requirements.txt ./requirements.txt RUN pip install --no-cache-dir --upgrade -r requirements.txt -COPY ./app /app/app +COPY ./app ./app + +CMD ["./start.sh"] diff --git a/README.md b/README.md index 7561e561193c85bb19cf9a2ee89c2d404e3fe3b0..a64b5491ced7a1d8f791ed415078668c8ad20451 100644 --- a/README.md +++ b/README.md @@ -17,24 +17,27 @@ This is the Resource service of the CloWM service. | `DB_PASSWORD` | unset | \<db password> | Password of the database user | | `DB_DATABASE` | unset | \<db name> | Name of the database | | `OBJECT_GATEWAY_URI` | unset | HTTP URL | HTTP URL of the Ceph Object Gateway | -| `BUCKET_CEPH_ACCESS_KEY` | unset | \<access key> | Ceph access key with admin privileges | -| `BUCKET_CEPH_SECRET_KEY` | unset | \<secret key> | Ceph secret key with admin privileges | +| `BUCKET_CEPH_ACCESS_KEY` | unset | \<access key> | S3 access key with admin privileges | +| `BUCKET_CEPH_SECRET_KEY` | unset | \<secret key> | S3 secret key with admin privileges | | `OPA_URI` | unset | HTTP URL | HTTP URL of the OPA service | | `SLURM_ENDPOINT` | unset | HTTP URL | HTTP URL to communicate with the Slurm cluster | | `SLURM_TOKEN` | unset | \<JWT> | JWT for communication with the Slurm REST API. Should belong to the user `SLURM_USER` | ### Optional Variables -| Variable | Default | Value | Description | -|-----------------------------------|-------------------------|--------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------| -| `API_PREFIX` | `/api/workflow-service` | URL path | Prefix before every URL path | -| `SQLALCHEMY_VERBOSE_LOGGER` | `False` | `<"True"|"False">` | Enables verbose SQL output.<br>Should be `false` in production | -| `RESOURCE_BUCKET` | `clowm-resources` | Bucket name | Bucket where to save workflow icons. Should be publicly available. | -| `SLURM_USER` | `slurm` | string | User on the slurm cluster who should run the job. Should be the user of the `SLURM_TOKEN` | -| `SLURM_WORKING_DIRECTORY` | `/tmp` | Path on slurm cluster | Working directory for the slurm job with the nextflow command | -| `OPA_POLICY_PATH` | `/clowm/authz/allow` | URL path | Path to the OPA Policy for Authorization | -| `OTLP_GRPC_ENDPOINT` | unset | <hostname / IP> | OTLP compatible endpoint to send traces via gRPC, e.g. Jaeger | +| Variable | Default | Value | Description | +|-----------------------------|-------------------------|-------------------------|-------------------------------------------------------------------------------------------| +| `API_PREFIX` | `/api/workflow-service` | URL path | Prefix before every URL path | +| `SQLALCHEMY_VERBOSE_LOGGER` | `False` | `<"True"|"False">` | Enables verbose SQL output.<br>Should be `false` in production | +| `RESOURCE_BUCKET` | `clowm-resources` | Bucket name | Bucket where to save the resources. | +| `SLURM_USER` | `slurm` | string | User on the slurm cluster who should run the job. Should be the user of the `SLURM_TOKEN` | +| `SLURM_WORKING_DIRECTORY` | `/tmp` | Path on slurm cluster | Working directory for the slurm job with the nextflow command | +| `RESOURCE_CLUSTER_PATH` | `/vol/data/databases` | Path on slurm cluster | Directory on the cluster where the resources should be downloaded to | +| `RESOURCE_CONTAINER_PATH` | `/vol/resources` | Path on slurm cluster | Directory in the container where al resources are readonly available | +| `OPA_POLICY_PATH` | `/clowm/authz/allow` | URL path | Path to the OPA Policy for Authorization | +| `OTLP_GRPC_ENDPOINT` | unset | <hostname / IP> | OTLP compatible endpoint to send traces via gRPC, e.g. Jaeger | ## License -The API is licensed under the [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0) license. See the [License](LICENSE) file for more information +The API is licensed under the [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0) license. See +the [License](LICENSE) file for more information. diff --git a/app/api/dependencies.py b/app/api/dependencies.py index c177dd83393eb15df007edd0063f8c7ff1f9666c..3b40357d8dd8a074d2d91f146e678b489e3ae1f7 100644 --- a/app/api/dependencies.py +++ b/app/api/dependencies.py @@ -159,6 +159,37 @@ async def decode_bearer_token( raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Malformed JWT") +async def get_current_user(token: Annotated[JWT, Depends(decode_bearer_token)], db: DBSession) -> User: + """ + Get the current user from the database based on the JWT. + + FastAPI Dependency Injection Function. + + Parameters + ---------- + token : app.schemas.security.JWT + The verified and decoded JWT. + db : sqlalchemy.ext.asyncio.AsyncSession. + Async database session to perform query on. Dependency Injection. + + Returns + ------- + user : clowmdb.models.User + User associated with the JWT sent with the HTTP request. + """ + try: + uid = UUID(token.sub) + except ValueError: # pragma: no cover + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Malformed JWT") + user = await CRUDUser.get(db, uid) + if user: + return user + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="User not found") + + +CurrentUser = Annotated[User, Depends(get_current_user)] + + class AuthorizationDependency: """ Class to parameterize the authorization request with the resource to perform an operation on. @@ -175,7 +206,7 @@ class AuthorizationDependency: def __call__( self, - token: Annotated[JWT, Depends(decode_bearer_token)], + user: CurrentUser, client: HTTPClient, ) -> Callable[[str], Awaitable[AuthzResponse]]: """ @@ -183,8 +214,8 @@ class AuthorizationDependency: Parameters ---------- - token : app.schemas.security.JWT - The verified and decoded JWT. Dependency Injection. + user : clowmdb.models.User + The current user based on the JWT. Dependency Injection. client : httpx.AsyncClient HTTP Client with an open connection. Dependency Injection. @@ -195,39 +226,12 @@ class AuthorizationDependency: """ async def authorization_wrapper(operation: str) -> AuthzResponse: - params = AuthzRequest(operation=operation, resource=self.resource, uid=token.sub) + params = AuthzRequest(operation=operation, resource=self.resource, uid=user.lifescience_id) return await request_authorization(request_params=params, client=client) return authorization_wrapper -async def get_current_user(token: Annotated[JWT, Depends(decode_bearer_token)], db: DBSession) -> User: - """ - Get the current user from the database based on the JWT. - - FastAPI Dependency Injection Function. - - Parameters - ---------- - token : app.schemas.security.JWT - The verified and decoded JWT. - db : sqlalchemy.ext.asyncio.AsyncSession. - Async database session to perform query on. Dependency Injection. - - Returns - ------- - user : clowmdb.models.User - User associated with the JWT sent with the HTTP request. - """ - user = await CRUDUser.get(db, token.sub) - if user: - return user - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="User not found") - - -CurrentUser = Annotated[User, Depends(get_current_user)] - - async def get_current_resource(rid: Annotated[UUID, Path()], db: DBSession) -> Resource: """ Get the current resource from the database based on the ID in the path. diff --git a/app/api/endpoints/resource_version.py b/app/api/endpoints/resource_version.py index 2da37ad792458bcd60e403aaf78389f70696325b..1df5349aa0a680e7def04d7986107430675285e5 100644 --- a/app/api/endpoints/resource_version.py +++ b/app/api/endpoints/resource_version.py @@ -12,7 +12,6 @@ from app.api.dependencies import ( CurrentUser, DBSession, S3Resource, - SlurmClient, ) from app.api.resource_cluster_utils import ( delete_cluster_resource_version, @@ -46,7 +45,7 @@ async def list_resource_versions( version_status: Annotated[ Union[List[ResourceVersion.Status], SkipJsonSchema[None]], Query( - description=f"Which versions to include in the response. Permission `resource:read_any` required, current user is the maintainer, then only permission `resource:read` required. Default `{ResourceVersion.Status.LATEST.name}` and `{ResourceVersion.Status.SYNCHRONIZED.name}`.", + description=f"Which versions to include in the response. Permission `resource:read_any` required, current user is the maintainer, then only permission `resource:read` required. Default `{ResourceVersion.Status.LATEST.name}`, `{ResourceVersion.Status.SYNCHRONIZED.name}` and `{ResourceVersion.Status.SETTING_LATEST.name}`.", # noqa: E501 ), ] = None, @@ -76,7 +75,9 @@ async def list_resource_versions( ) await authorization(rbac_operation) requested_versions = ( - version_status if version_status else [ResourceVersion.Status.LATEST, ResourceVersion.Status.SYNCHRONIZED] + version_status + if version_status + else [ResourceVersion.Status.LATEST, ResourceVersion.Status.SYNCHRONIZED, ResourceVersion.Status.SETTING_LATEST] ) return [ ResourceVersionOut.from_db_resource_version(version) @@ -114,8 +115,6 @@ async def request_resource_version( Async database session to perform query on. Dependency Injection. background_tasks : fastapi.BackgroundTasks Entrypoint for new BackgroundTasks. Provided by FastAPI. - s3 : types_aiobotocore_s3.service_resource import S3ServiceResource - S3 Service to perform operations on buckets. Dependency Injection. """ current_span = trace.get_current_span() current_span.set_attributes( @@ -256,7 +255,6 @@ async def resource_version_sync( db: DBSession, s3: S3Resource, background_tasks: BackgroundTasks, - slurm_client: SlurmClient, ) -> ResourceVersionOut: """ Synchronize the resource version to the cluster. @@ -273,8 +271,6 @@ async def resource_version_sync( Resource Version associated with the ID in the path. Dependency Injection. db : sqlalchemy.ext.asyncio.AsyncSession. Async database session to perform query on. Dependency Injection. - slurm_client : app.slurm.rest_client.SlurmClient - Slurm client with an open connection. Dependency Injection background_tasks : fastapi.BackgroundTasks Entrypoint for new BackgroundTasks. Provided by FastAPI. s3 : types_aiobotocore_s3.service_resource import S3ServiceResource @@ -283,8 +279,9 @@ async def resource_version_sync( trace.get_current_span().set_attributes( {"resource_id": str(resource.resource_id), "resource_version_id": str(resource_version.resource_version_id)} ) - await authorization("sync") + await authorization("sync_denied" if resource_version.status == ResourceVersion.Status.DENIED else "sync") if resource_version.status not in [ + ResourceVersion.Status.DENIED, ResourceVersion.Status.SYNC_REQUESTED, ResourceVersion.Status.CLUSTER_DELETED, ]: @@ -315,7 +312,6 @@ async def resource_version_latest( resource: CurrentResource, resource_version: CurrentResourceVersion, background_tasks: BackgroundTasks, - slurm_client: SlurmClient, ) -> ResourceVersionOut: """ Set the resource version as the latest version. @@ -332,8 +328,6 @@ async def resource_version_latest( Resource associated with the ID in the path. Dependency Injection. resource_version : clowmdb.models.ResourceVersion Resource Version associated with the ID in the path. Dependency Injection. - slurm_client : app.slurm.rest_client.SlurmClient - Slurm client with an open connection. Dependency Injection background_tasks : fastapi.BackgroundTasks Entrypoint for new BackgroundTasks. Provided by FastAPI. """ @@ -346,7 +340,11 @@ async def resource_version_latest( status_code=status.HTTP_400_BAD_REQUEST, detail=f"Can't set resource version to {ResourceVersion.Status.LATEST.name} with status {resource_version.status.name}", ) + await CRUDResourceVersion.update_status( + db, resource_version_id=resource_version.resource_version_id, status=ResourceVersion.Status.SETTING_LATEST + ) resource_version_out = ResourceVersionOut.from_db_resource_version(resource_version) + resource_version_out.status = ResourceVersion.Status.SETTING_LATEST background_tasks.add_task(set_cluster_resource_version_latest, resource_version=resource_version_out) return resource_version_out @@ -359,7 +357,6 @@ async def delete_resource_version_cluster( resource_version: CurrentResourceVersion, db: DBSession, background_tasks: BackgroundTasks, - slurm_client: SlurmClient, ) -> ResourceVersionOut: """ Delete the resource version on the cluster. @@ -385,10 +382,19 @@ async def delete_resource_version_cluster( {"resource_id": str(resource.resource_id), "resource_version_id": str(resource_version.resource_version_id)} ) await authorization("delete_cluster") + if resource_version.status not in [ + ResourceVersion.Status.SYNCHRONIZED, + ResourceVersion.Status.LATEST, + ResourceVersion.Status.CLUSTER_DELETE_ERROR, + ]: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Can't delete resource version on cluster with status {resource_version.status.name}", + ) await CRUDResourceVersion.update_status( - db, resource_version_id=resource_version.resource_version_id, status=ResourceVersion.Status.CLUSTER_DELETED + db, resource_version_id=resource_version.resource_version_id, status=ResourceVersion.Status.CLUSTER_DELETING ) - resource_version.status = ResourceVersion.Status.CLUSTER_DELETED + resource_version.status = ResourceVersion.Status.CLUSTER_DELETING resource_version_out = ResourceVersionOut.from_db_resource_version(resource_version) background_tasks.add_task(delete_cluster_resource_version, resource_version=resource_version_out) return resource_version_out @@ -401,7 +407,6 @@ async def delete_resource_version_s3( resource: CurrentResource, resource_version: CurrentResourceVersion, db: DBSession, - s3: S3Resource, background_tasks: BackgroundTasks, ) -> ResourceVersionOut: """ @@ -427,11 +432,20 @@ async def delete_resource_version_s3( trace.get_current_span().set_attributes( {"resource_id": str(resource.resource_id), "resource_version_id": str(resource_version.resource_version_id)} ) + await authorization("delete_s3") + if resource_version.status not in [ + ResourceVersion.Status.DENIED, + ResourceVersion.Status.CLUSTER_DELETED, + ResourceVersion.Status.S3_DELETE_ERROR, + ]: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Can't delete S3 resource version with status {resource_version.status.name}", + ) await CRUDResourceVersion.update_status( - db, resource_version_id=resource_version.resource_version_id, status=ResourceVersion.Status.S3_DELETED + db, resource_version_id=resource_version.resource_version_id, status=ResourceVersion.Status.S3_DELETING ) - await authorization("delete_s3") - resource_version.status = ResourceVersion.Status.S3_DELETED + resource_version.status = ResourceVersion.Status.S3_DELETING background_tasks.add_task( delete_s3_resource_version, resource_id=resource.resource_id, diff --git a/app/api/endpoints/resources.py b/app/api/endpoints/resources.py index 02f9f14a38a869b97f4d6d0a938e0ecebb9a338b..dbbc5cdb2823887eed0712c01dd427204621b720 100644 --- a/app/api/endpoints/resources.py +++ b/app/api/endpoints/resources.py @@ -1,4 +1,5 @@ from typing import Annotated, Any, Awaitable, Callable, List, Union +from uuid import UUID from clowmdb.models import ResourceVersion from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query, status @@ -31,9 +32,8 @@ async def list_resources( current_user: CurrentUser, db: DBSession, maintainer_id: Annotated[ - Union[str, SkipJsonSchema[None]], + Union[UUID, SkipJsonSchema[None]], Query( - max_length=64, description="Filter for resource by maintainer. If current user is the same as maintainer ID, permission `resource:list` required, otherwise `resource:list_filter`.", # noqa: E501 ), @@ -41,7 +41,7 @@ async def list_resources( version_status: Annotated[ Union[List[ResourceVersion.Status], SkipJsonSchema[None]], Query( - description=f"Which versions of the resource to include in the response. Permission `resource:list_filter` required, unless `maintainer_id` is provided and current user is maintainer, then only permission `resource:list` required. Default `{ResourceVersion.Status.LATEST.name}` and `{ResourceVersion.Status.SYNCHRONIZED.name}`.", + description=f"Which versions of the resource to include in the response. Permission `resource:list_filter` required, unless `maintainer_id` is provided and current user is maintainer, then only permission `resource:list` required. Default `{ResourceVersion.Status.LATEST.name}`, `{ResourceVersion.Status.SYNCHRONIZED.name}` and `{ResourceVersion.Status.SETTING_LATEST.name}`.", # noqa: E501 ), ] = None, @@ -60,7 +60,7 @@ async def list_resources( Current user. Dependency injection. db : sqlalchemy.ext.asyncio.AsyncSession. Async database session to perform query on. Dependency Injection. - maintainer_id : str | None, default None + maintainer_id : uuid.UUID | None, default None Filter resources by a maintainer. Query Parameter. version_status : List[clowmdb.models.ResourceVersion.Status] | None, default None Filter resource version by their status. Query Parameter. @@ -69,7 +69,7 @@ async def list_resources( """ current_span = trace.get_current_span() if maintainer_id: # pragma: no cover - current_span.set_attribute("maintainer_id", maintainer_id) + current_span.set_attribute("maintainer_id", str(maintainer_id)) if version_status: # pragma: no cover current_span.set_attribute("version_status", [state.name for state in version_status]) if name_substring: # pragma: no cover @@ -85,9 +85,15 @@ async def list_resources( db, name_substring=name_substring, maintainer_id=maintainer_id, - version_status=version_status - if version_status - else [ResourceVersion.Status.LATEST, ResourceVersion.Status.SYNCHRONIZED], + version_status=( + version_status + if version_status + else [ + ResourceVersion.Status.LATEST, + ResourceVersion.Status.SYNCHRONIZED, + ResourceVersion.Status.SETTING_LATEST, + ] + ), ) return [ResourceOut.from_db_resource(resource) for resource in resources] diff --git a/app/api/middleware/etagmiddleware.py b/app/api/middleware/etagmiddleware.py index 14decedeb63c259445795879c4f068768c4d6afb..44cfd515f85e4a377f2afa151acb60c28403756f 100644 --- a/app/api/middleware/etagmiddleware.py +++ b/app/api/middleware/etagmiddleware.py @@ -1,8 +1,9 @@ from hashlib import md5 -from typing import Mapping, Optional +from typing import Awaitable, Callable, Mapping, Optional -from fastapi import status +from fastapi import Request, Response, status from fastapi.responses import JSONResponse +from starlette.middleware.base import BaseHTTPMiddleware class HashJSONResponse(JSONResponse): @@ -11,3 +12,18 @@ class HashJSONResponse(JSONResponse): # Add the ETag header (MD5 hash of content) to the response if self.status_code == status.HTTP_200_OK: self.headers["ETag"] = md5(self.body).hexdigest() + + +class ETagMiddleware(BaseHTTPMiddleware): + async def dispatch(self, request: Request, call_next: Callable[[Request], Awaitable[Response]]) -> Response: + response = await call_next(request) + if request.method == "GET": + # Client can ask if the cached data is stale or not + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match + # This saves network bandwidth but the database is still queried + if ( + response.headers.get("ETag") is not None + and request.headers.get("If-None-Match") == response.headers["ETag"] + ): + return Response(status_code=status.HTTP_304_NOT_MODIFIED) + return response diff --git a/app/api/resource_cluster_utils.py b/app/api/resource_cluster_utils.py index 8f3af3a7ef7a4e0f2f2db25c631018d32d59360d..0f27414f1688ec7f33773614881b3f60302ca6e6 100644 --- a/app/api/resource_cluster_utils.py +++ b/app/api/resource_cluster_utils.py @@ -51,7 +51,7 @@ async def synchronize_cluster_resource(resource_version: ResourceVersionOut) -> failed_job = AsyncJob( update_db_resource_wrapper, resource_version_id=resource_version.resource_version_id, - status=ResourceVersion.Status.SYNC_REQUESTED, + status=ResourceVersion.Status.SYNC_ERROR, resource_id=None, ) try: @@ -185,6 +185,12 @@ async def set_cluster_resource_version_latest(resource_version: ResourceVersionO resource_version_id=resource_version.resource_version_id, resource_id=resource_version.resource_id, ), + failed_job=AsyncJob( + update_db_resource_wrapper, + status=ResourceVersion.Status.SYNCHRONIZED, + resource_version_id=resource_version.resource_version_id, + resource_id=resource_version.resource_id, + ), ) except (HTTPError, KeyError): # pragma: no cover pass @@ -237,7 +243,22 @@ async def delete_cluster_resource_version( slurm_client = get_slurm_client(client) # Try to start the job on the slurm cluster slurm_job_id = await slurm_client.submit_job(job_submission=job_submission) - await _monitor_proper_job_execution(slurm_client=slurm_client, slurm_job_id=slurm_job_id) + await _monitor_proper_job_execution( + slurm_client=slurm_client, + slurm_job_id=slurm_job_id, + success_job=AsyncJob( + update_db_resource_wrapper, + status=ResourceVersion.Status.CLUSTER_DELETED, + resource_version_id=resource_version.resource_version_id, + resource_id=resource_version.resource_id, + ), + failed_job=AsyncJob( + update_db_resource_wrapper, + status=ResourceVersion.Status.CLUSTER_DELETE_ERROR, + resource_version_id=resource_version.resource_version_id, + resource_id=resource_version.resource_id, + ), + ) except (HTTPError, KeyError): # pragma: no cover pass diff --git a/app/api/resource_s3_utils.py b/app/api/resource_s3_utils.py index 5d9590c6074ae08989460edd771a80fd583ba775..cc56c5c64e441c38203d8bc1aac587bb25db01ef 100644 --- a/app/api/resource_s3_utils.py +++ b/app/api/resource_s3_utils.py @@ -2,9 +2,13 @@ from io import BytesIO from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, Optional from uuid import UUID +from botocore.exceptions import ClientError +from clowmdb.models import ResourceVersion from opentelemetry import trace +from app.api.dependencies import get_db from app.core.config import settings +from app.crud import CRUDResourceVersion from app.s3.s3_resource import ( add_s3_bucket_policy_stmt, boto_session, @@ -52,7 +56,7 @@ async def give_permission_to_s3_resource(resource: ResourceOut) -> None: policy: Dict[str, Any] = { "Sid": str(resource.resource_id), "Effect": "Allow", - "Principal": {"AWS": f"arn:aws:iam:::user/{resource.maintainer_id}"}, + "Principal": {"AWS": f"arn:aws:iam:::user/{str(resource.maintainer_id)}"}, "Resource": f"arn:aws:s3:::{settings.RESOURCE_BUCKET}", "Action": ["s3:ListBucket"], "Condition": {"StringLike": {"s3:prefix": resource_dir_name(resource.resource_id) + "/*"}}, @@ -61,7 +65,7 @@ async def give_permission_to_s3_resource(resource: ResourceOut) -> None: await add_s3_bucket_policy_stmt(policy, s3=s3, bucket_name=settings.RESOURCE_BUCKET) -async def give_permission_to_s3_resource_version(resource_version: ResourceVersionOut, maintainer_id: str) -> None: +async def give_permission_to_s3_resource_version(resource_version: ResourceVersionOut, maintainer_id: UUID) -> None: """ Give the maintainer permissions to upload the resource to the appropriate S3 key. @@ -75,7 +79,7 @@ async def give_permission_to_s3_resource_version(resource_version: ResourceVersi policy: Dict[str, Any] = { "Sid": str(resource_version.resource_version_id), "Effect": "Allow", - "Principal": {"AWS": f"arn:aws:iam:::user/{maintainer_id}"}, + "Principal": {"AWS": f"arn:aws:iam:::user/{str(maintainer_id)}"}, "Resource": f"arn:aws:s3:::{resource_version.s3_path[5:]}", "Action": ["s3:DeleteObject", "s3:PutObject", "s3:GetObject"], } @@ -144,10 +148,22 @@ async def delete_s3_resource_version(resource_id: UUID, resource_version_id: UUI resource_version_id : uuid.UUID ID of the resource version """ - async for s3 in get_s3_resource(): - await delete_s3_objects( - s3, bucket_name=settings.RESOURCE_BUCKET, prefix=resource_version_dir_name(resource_id, resource_version_id) - ) + try: + async for s3 in get_s3_resource(): + await delete_s3_objects( + s3, + bucket_name=settings.RESOURCE_BUCKET, + prefix=resource_version_dir_name(resource_id, resource_version_id), + ) + async for db in get_db(): + await CRUDResourceVersion.update_status( + db, status=ResourceVersion.Status.S3_DELETED, resource_version_id=resource_version_id + ) + except ClientError: + async for db in get_db(): + await CRUDResourceVersion.update_status( + db, status=ResourceVersion.Status.S3_DELETE_ERROR, resource_version_id=resource_version_id + ) async def get_s3_resource_version_obj( diff --git a/app/crud/crud_resource.py b/app/crud/crud_resource.py index ece5f60ec9975d9ceffa51d1a761cedf8d104164..e371be7aef362e65e0a09c8d4badcb788f335eb8 100644 --- a/app/crud/crud_resource.py +++ b/app/crud/crud_resource.py @@ -64,7 +64,7 @@ class CRUDResource: async def list_resources( db: AsyncSession, name_substring: Optional[str] = None, - maintainer_id: Optional[str] = None, + maintainer_id: Optional[UUID] = None, version_status: Optional[List[ResourceVersion.Status]] = None, ) -> List[Resource]: """ @@ -76,7 +76,7 @@ class CRUDResource: Async database session to perform query on. name_substring : str | None, default None Substring to filter for in the name of a resource. - maintainer_id : str | None, default None + maintainer_id : uuid.UUID | None, default None Filter resources by maintainer. version_status : List[clowmdb.models.ResourceVersion.Status] | None, default None Filter versions of a resource based on the status. Removes resources that have no version after this filter. @@ -92,8 +92,8 @@ class CRUDResource: span.set_attribute("name_substring", name_substring) stmt = stmt.where(Resource.name.contains(name_substring)) if maintainer_id is not None: - span.set_attribute("maintainer_id", maintainer_id) - stmt = stmt.where(Resource.maintainer_id == maintainer_id) + span.set_attribute("maintainer_id", str(maintainer_id)) + stmt = stmt.where(Resource._maintainer_id == maintainer_id.bytes) if version_status is not None and len(version_status) > 0: span.set_attribute("status", [status.name for status in version_status]) stmt = stmt.options( @@ -124,7 +124,7 @@ class CRUDResource: await db.commit() @staticmethod - async def create(db: AsyncSession, resource_in: ResourceIn, maintainer_id: str) -> Resource: + async def create(db: AsyncSession, resource_in: ResourceIn, maintainer_id: UUID) -> Resource: """ Create a new resource. @@ -144,13 +144,13 @@ class CRUDResource: """ with tracer.start_as_current_span( "db_create_resource", - attributes={"maintainer_id": maintainer_id, "resource_in": resource_in.model_dump_json(indent=2)}, + attributes={"maintainer_id": str(maintainer_id), "resource_in": resource_in.model_dump_json(indent=2)}, ) as span: resource_db = Resource( name=resource_in.name, short_description=resource_in.description, source=resource_in.source, - maintainer_id=maintainer_id, + _maintainer_id=maintainer_id.bytes, ) db.add(resource_db) await db.commit() diff --git a/app/crud/crud_user.py b/app/crud/crud_user.py index 945af06c3071e9c92cb383e0a8fd3c0143b07017..9c075203a2b4791ffa6cd1691497f3fbcbc0ff6a 100644 --- a/app/crud/crud_user.py +++ b/app/crud/crud_user.py @@ -1,4 +1,5 @@ from typing import Optional +from uuid import UUID from clowmdb.models import User from opentelemetry import trace @@ -10,7 +11,7 @@ tracer = trace.get_tracer_provider().get_tracer(__name__) class CRUDUser: @staticmethod - async def get(db: AsyncSession, uid: str) -> Optional[User]: + async def get(db: AsyncSession, uid: UUID) -> Optional[User]: """ Get a user by its UID. @@ -18,7 +19,7 @@ class CRUDUser: ---------- db : sqlalchemy.ext.asyncio.AsyncSession. Async database session to perform query on. - uid : str + uid : uuid.UUID UID of a user. Returns @@ -26,7 +27,7 @@ class CRUDUser: user : clowmdb.models.User | None The user for the given UID if he exists, None otherwise """ - with tracer.start_as_current_span("db_get_user", attributes={"uid": uid}) as span: - stmt = select(User).where(User.uid == uid) + with tracer.start_as_current_span("db_get_user", attributes={"uid": str(uid)}) as span: + stmt = select(User).where(User._uid == uid.bytes) span.set_attribute("sql_query", str(stmt)) return await db.scalar(stmt) diff --git a/app/main.py b/app/main.py index 538783032d5e4f1dc032be59d1d2963b6d78d1c1..1db5b48d2443cef511a086e021676a314a32562b 100644 --- a/app/main.py +++ b/app/main.py @@ -18,7 +18,7 @@ from opentelemetry.sdk.trace.export import BatchSpanProcessor from opentelemetry.trace import Status, StatusCode from app.api.api import api_router -from app.api.middleware.etagmiddleware import HashJSONResponse +from app.api.middleware.etagmiddleware import ETagMiddleware, HashJSONResponse from app.api.miscellaneous_endpoints import miscellaneous_router from app.core.config import settings from app.s3.s3_resource import boto_session @@ -55,7 +55,7 @@ app = FastAPI( "email": "dgoebel@techfak.uni-bielefeld.de", }, generate_unique_id_function=custom_generate_unique_id, - # license_info={"name": "MIT", "url": "https://mit-license.org/"}, + license_info={"name": "Apache 2.0", "url": "https://www.apache.org/licenses/LICENSE-2.0"}, root_path=settings.API_PREFIX, openapi_url=None, # create it manually to enable caching on client side default_response_class=HashJSONResponse, # Add ETag header based on MD5 hash of content @@ -93,7 +93,7 @@ FastAPIInstrumentor.instrument_app( # Enable caching based on ETag -# app.add_middleware(ETagMiddleware) +app.add_middleware(ETagMiddleware) # Enable br compression for large responses, fallback gzip app.add_middleware(BrotliMiddleware) diff --git a/app/schemas/__init__.py b/app/schemas/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0c55669cbe106300bd53c860ab1286bf3d9b131b 100644 --- a/app/schemas/__init__.py +++ b/app/schemas/__init__.py @@ -0,0 +1,6 @@ +from typing import Annotated +from uuid import UUID as NativeUUID + +from pydantic import PlainSerializer + +UUID = Annotated[NativeUUID, PlainSerializer(lambda uuid: str(uuid), return_type=str, when_used="unless-none")] diff --git a/app/schemas/resource.py b/app/schemas/resource.py index 48f10d66e57ad36dd734f789f9239ab52aed21f1..e86bf599fd12df5e055a1346ca14fd631d2aa06b 100644 --- a/app/schemas/resource.py +++ b/app/schemas/resource.py @@ -1,9 +1,9 @@ from typing import List, Optional, Sequence -from uuid import UUID from clowmdb.models import Resource, ResourceVersion, User from pydantic import BaseModel, Field +from app.schemas import UUID from app.schemas.resource_version import ResourceVersionIn, ResourceVersionOut, resource_version_dir_name @@ -31,7 +31,7 @@ class ResourceIn(BaseResource, ResourceVersionIn): class ResourceOut(BaseResource): resource_id: UUID = Field(..., description="ID of the resource", examples=["4c072e39-2bd9-4fa3-b564-4d890e240ccd"]) - maintainer_id: str = Field(..., description="ID of the maintainer", examples=["28c5353b8bb34984a8bd4169ba94c606"]) + maintainer_id: UUID = Field(..., description="ID of the maintainer", examples=["28c5353b8bb34984a8bd4169ba94c606"]) versions: List[ResourceVersionOut] = Field(..., description="Versions of the resource") @staticmethod @@ -75,7 +75,7 @@ class S3ResourceVersionInfo(BaseResource): resource_version_id: UUID = Field( ..., description="ID of the resource version", examples=["fb4cee12-1e91-49f3-905f-808845c7c1f4"] ) - maintainer_id: str = Field(..., description="ID of the maintainer", examples=["28c5353b8bb34984a8bd4169ba94c606"]) + maintainer_id: UUID = Field(..., description="ID of the maintainer", examples=["28c5353b8bb34984a8bd4169ba94c606"]) maintainer: str = Field(..., description="Name of the maintainer", examples=["Bilbo Baggins"]) def s3_path(self) -> str: diff --git a/app/schemas/resource_version.py b/app/schemas/resource_version.py index faca74360d1007da6f7a186a5e17269a3bb865ab..898c632a57856c10de835e576e6bb9b7540f8194 100644 --- a/app/schemas/resource_version.py +++ b/app/schemas/resource_version.py @@ -1,10 +1,10 @@ from typing import Optional -from uuid import UUID from clowmdb.models import ResourceVersion from pydantic import BaseModel, Field, computed_field from app.core.config import settings +from app.schemas import UUID class BaseResourceVersion(BaseModel): diff --git a/app/schemas/security.py b/app/schemas/security.py index c66952a0eb0806436a134c0c7af96f75a478364c..76d3b2a5b800c6eff2cc897372e797d3280719fc 100644 --- a/app/schemas/security.py +++ b/app/schemas/security.py @@ -1,8 +1,9 @@ from datetime import datetime -from uuid import UUID from pydantic import BaseModel, Field +from app.schemas import UUID + class AuthzResponse(BaseModel): """Schema for a response from OPA""" diff --git a/app/tests/api/test_miscellaneous_enpoints.py b/app/tests/api/test_miscellaneous_enpoints.py index 04eb528ce8a674fb4aa8be82dcf98c0a0df483b4..b9f6a56becfdb9efb7b5dc5eb7a8792a4cc96c49 100644 --- a/app/tests/api/test_miscellaneous_enpoints.py +++ b/app/tests/api/test_miscellaneous_enpoints.py @@ -35,8 +35,8 @@ class TestOpenAPIRoute: assert response1.status_code == status.HTTP_200_OK assert response1.headers.get("ETag") is not None - # response2 = await client.get("/openapi.json", headers={"If-None-Match": response1.headers.get("ETag")}) - # assert response2.status_code == status.HTTP_304_NOT_MODIFIED + response2 = await client.get("/openapi.json", headers={"If-None-Match": response1.headers.get("ETag")}) + assert response2.status_code == status.HTTP_304_NOT_MODIFIED @pytest.mark.asyncio async def test_swagger_ui_route(self, client: AsyncClient) -> None: diff --git a/app/tests/api/test_resource_version.py b/app/tests/api/test_resource_version.py index b6303e3defacb65175030220cb639b45b2197e6b..0a9b2eaf902946aa804b2924bb99f81421a325d6 100644 --- a/app/tests/api/test_resource_version.py +++ b/app/tests/api/test_resource_version.py @@ -1,4 +1,5 @@ import json +from copy import copy from io import BytesIO from uuid import uuid4 @@ -183,6 +184,7 @@ class TestResourceVersionRouteDelete(_TestResourceVersionRoutes): random_user : app.tests.utils.user.UserWithAuthHeader Random user for testing. """ + previous_state = copy(random_resource_version_states.status) response = await client.delete( "/".join( [ @@ -195,7 +197,14 @@ class TestResourceVersionRouteDelete(_TestResourceVersionRoutes): ), headers=random_user.auth_headers, ) - assert response.status_code == status.HTTP_200_OK + if previous_state in [ + ResourceVersion.Status.SYNCHRONIZED, + ResourceVersion.Status.LATEST, + ResourceVersion.Status.CLUSTER_DELETE_ERROR, + ]: + assert response.status_code == status.HTTP_200_OK + else: + assert response.status_code == status.HTTP_400_BAD_REQUEST @pytest.mark.asyncio async def test_delete_resource_version_s3_route( @@ -222,6 +231,7 @@ class TestResourceVersionRouteDelete(_TestResourceVersionRoutes): mock_s3_service : app.tests.mocks.mock_s3_resource.MockS3ServiceResource Mock S3 Service to manipulate objects. """ + previous_state = copy(random_resource_version_states.status) response = await client.delete( "/".join( [ @@ -234,7 +244,15 @@ class TestResourceVersionRouteDelete(_TestResourceVersionRoutes): ), headers=random_user.auth_headers, ) - assert response.status_code == status.HTTP_200_OK + if previous_state in [ + ResourceVersion.Status.CLUSTER_DELETED, + ResourceVersion.Status.DENIED, + ResourceVersion.Status.S3_DELETE_ERROR, + ]: + assert response.status_code == status.HTTP_200_OK + else: + assert response.status_code == status.HTTP_400_BAD_REQUEST + return # test that the resource is deleted in S3 with pytest.raises(ClientError): @@ -439,7 +457,7 @@ class TestResourceVersionRoutePut(_TestResourceVersionRoutes): random_user : app.tests.utils.user.UserWithAuthHeader Random user for testing. """ - previous_status = random_resource_version_states.status + previous_status = copy(random_resource_version_states.status) response = await client.put( "/".join( [ @@ -455,6 +473,7 @@ class TestResourceVersionRoutePut(_TestResourceVersionRoutes): if previous_status in [ ResourceVersion.Status.SYNC_REQUESTED, ResourceVersion.Status.CLUSTER_DELETED, + ResourceVersion.Status.DENIED, ]: assert response.status_code == status.HTTP_200_OK else: @@ -484,7 +503,7 @@ class TestResourceVersionRoutePut(_TestResourceVersionRoutes): random_user : app.tests.utils.user.UserWithAuthHeader Random user for testing. """ - previous_status = random_resource_version_states.status + previous_status = copy(random_resource_version_states.status) mock_slurm_cluster.fail_jobs = True def reset_slurm_config() -> None: @@ -506,6 +525,7 @@ class TestResourceVersionRoutePut(_TestResourceVersionRoutes): if previous_status in [ ResourceVersion.Status.SYNC_REQUESTED, ResourceVersion.Status.CLUSTER_DELETED, + ResourceVersion.Status.DENIED, ]: assert response.status_code == status.HTTP_200_OK else: diff --git a/app/tests/api/test_security.py b/app/tests/api/test_security.py index b7ed53e5eb885a8454273a19435d41361af646f2..e731a69545caa580eb96bd9d98168dd118cf30ed 100644 --- a/app/tests/api/test_security.py +++ b/app/tests/api/test_security.py @@ -54,8 +54,9 @@ class TestJWTProtectedRoutes: Random user for testing. """ response = await client.get( - self.protected_route, params={"user": random_user.user.uid}, headers=random_user.auth_headers + self.protected_route, params={"maintainer_id": str(random_user.user.uid)}, headers=random_user.auth_headers ) + print(response.json()) assert response.status_code == status.HTTP_200_OK @pytest.mark.asyncio diff --git a/app/tests/conftest.py b/app/tests/conftest.py index 9aa6dc593cd6b0a1826f5d9be660aa91d2547e7a..f4cd3bd92b0cb75b3933f05de47b68edfeedbabb 100644 --- a/app/tests/conftest.py +++ b/app/tests/conftest.py @@ -84,6 +84,13 @@ async def mock_client( yield http_client +@pytest.fixture(autouse=True) +def monkeypatch_environment( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("OTLP_GRPC_ENDPOINT", "") + + @pytest.fixture(autouse=True) def monkeypatch_background_tasks( monkeypatch: pytest.MonkeyPatch, @@ -107,7 +114,6 @@ def monkeypatch_background_tasks( monkeypatch.setattr(dependencies, "get_db", get_patch_db) monkeypatch.setattr(resource_s3_utils, "get_s3_resource", get_s3) monkeypatch.setattr(resource_cluster_utils, "get_async_http_client", get_http_client) - monkeypatch.setenv("OTLP_GRPC_ENDPOINT", "") @pytest_asyncio.fixture(scope="module") @@ -184,9 +190,9 @@ async def random_user(db: AsyncSession, mock_opa_service: MockOpaService) -> Asy Create a random user and deletes him afterward. """ user = await create_random_user(db) - mock_opa_service.add_user(user.uid, privileged=True) + mock_opa_service.add_user(user.lifescience_id, privileged=True) yield UserWithAuthHeader(user=user, auth_headers=get_authorization_headers(uid=user.uid, secret=jwt_secret)) - mock_opa_service.delete_user(user.uid) + mock_opa_service.delete_user(user.lifescience_id) await db.delete(user) await db.commit() @@ -197,9 +203,9 @@ async def random_second_user(db: AsyncSession, mock_opa_service: MockOpaService) Create a random second user and deletes him afterward. """ user = await create_random_user(db) - mock_opa_service.add_user(user.uid) + mock_opa_service.add_user(user.lifescience_id) yield UserWithAuthHeader(user=user, auth_headers=get_authorization_headers(uid=user.uid, secret=jwt_secret)) - mock_opa_service.delete_user(user.uid) + mock_opa_service.delete_user(user.lifescience_id) await db.delete(user) await db.commit() @@ -210,9 +216,9 @@ async def random_third_user(db: AsyncSession, mock_opa_service: MockOpaService) Create a random third user and deletes him afterward. """ user = await create_random_user(db) - mock_opa_service.add_user(user.uid) + mock_opa_service.add_user(user.lifescience_id) yield UserWithAuthHeader(user=user, auth_headers=get_authorization_headers(uid=user.uid, secret=jwt_secret)) - mock_opa_service.delete_user(user.uid) + mock_opa_service.delete_user(user.lifescience_id) await db.delete(user) await db.commit() @@ -231,7 +237,7 @@ async def random_resource( name=random_lower_string(8), short_description=random_lower_string(32), source=random_lower_string(32), - maintainer_id=random_user.user.uid, + _maintainer_id=random_user.user.uid.bytes, ) db.add(resource_db) await db.commit() diff --git a/app/tests/crud/test_user.py b/app/tests/crud/test_user.py index 7db6677c3e6f660439694f0b03e9b3d02b7f185a..d6c0a3b6c888c6a3b86755fc4df7fd940f0aed54 100644 --- a/app/tests/crud/test_user.py +++ b/app/tests/crud/test_user.py @@ -1,9 +1,10 @@ +from uuid import uuid4 + import pytest from sqlalchemy.ext.asyncio import AsyncSession from app.crud import CRUDUser from app.tests.utils.user import UserWithAuthHeader -from app.tests.utils.utils import random_hex_string class TestUserCRUD: @@ -33,5 +34,5 @@ class TestUserCRUD: db : sqlalchemy.ext.asyncio.AsyncSession. Async database session to perform query on. """ - user = await CRUDUser.get(db, random_hex_string()) + user = await CRUDUser.get(db, uuid4()) assert user is None diff --git a/app/tests/mocks/__init__.py b/app/tests/mocks/__init__.py index 56f370d8a08ba9a3bb5696e22b43c5f1caf4d182..14ae1a513cd066d8f36e7b873fcdc1edaf737a06 100644 --- a/app/tests/mocks/__init__.py +++ b/app/tests/mocks/__init__.py @@ -6,8 +6,7 @@ from httpx import Request, Response class MockHTTPService(ABC): @abstractmethod - def handle_request(self, request: Request, **kwargs: bool) -> Response: - ... + def handle_request(self, request: Request, **kwargs: bool) -> Response: ... class DefaultMockHTTPService(MockHTTPService): diff --git a/app/tests/utils/bucket.py b/app/tests/utils/bucket.py index 2f04f31e47ce111fc00e9ee78b9df304a94e25ac..e9d7970151046a88aa069a723d602b19851e1191 100644 --- a/app/tests/utils/bucket.py +++ b/app/tests/utils/bucket.py @@ -1,14 +1,13 @@ from datetime import datetime from typing import Optional +from uuid import UUID -import pytest from clowmdb.models import Bucket, BucketPermission, User from sqlalchemy.ext.asyncio import AsyncSession from .utils import random_lower_string -@pytest.mark.asyncio async def create_random_bucket(db: AsyncSession, user: User) -> Bucket: """ Creates a random bucket in the database. @@ -28,18 +27,17 @@ async def create_random_bucket(db: AsyncSession, user: User) -> Bucket: bucket = Bucket( name=random_lower_string(), description=random_lower_string(length=127), - owner_id=user.uid, + _owner_id=user.uid.bytes, ) db.add(bucket) await db.commit() return bucket -@pytest.mark.asyncio async def add_permission_for_bucket( db: AsyncSession, bucket_name: str, - uid: str, + uid: UUID, from_: Optional[datetime] = None, to: Optional[datetime] = None, permission: BucketPermission.Permission = BucketPermission.Permission.READWRITE, @@ -66,7 +64,7 @@ async def add_permission_for_bucket( The file prefix for the permission. """ perm = BucketPermission( - user_id=uid, + _uid=uid.bytes, bucket_name=bucket_name, from_=round(from_.timestamp()) if from_ is not None else None, to=round(to.timestamp()) if to is not None else None, diff --git a/app/tests/utils/user.py b/app/tests/utils/user.py index ff514b4ca098533bce88bf17f3e6988a32d83343..198470c85a05ed302315a57eea8a5ec0fa6217d4 100644 --- a/app/tests/utils/user.py +++ b/app/tests/utils/user.py @@ -1,8 +1,8 @@ from dataclasses import dataclass -from datetime import datetime, timedelta +from datetime import UTC, datetime, timedelta from typing import Dict +from uuid import UUID -import pytest from authlib.jose import JsonWebToken from clowmdb.models import User from sqlalchemy.ext.asyncio import AsyncSession @@ -18,7 +18,7 @@ class UserWithAuthHeader: user: User -def get_authorization_headers(uid: str, secret: str = "SuperSecret") -> Dict[str, str]: +def get_authorization_headers(uid: UUID, secret: str = "SuperSecret") -> Dict[str, str]: """ Create a valid JWT and return the correct headers for subsequent requests. @@ -33,7 +33,7 @@ def get_authorization_headers(uid: str, secret: str = "SuperSecret") -> Dict[str headers : Dict[str,str] HTTP Headers to authorize each request. """ - to_encode = {"sub": uid, "exp": datetime.utcnow() + timedelta(hours=1)} + to_encode = {"sub": str(uid), "exp": datetime.now(UTC) + timedelta(hours=1)} encoded_jwt = _jwt.encode(header={"alg": "HS256"}, payload=to_encode, key=secret) headers = {"Authorization": f"Bearer {encoded_jwt.decode('utf-8')}"} @@ -68,7 +68,6 @@ def decode_mock_token(token: str, secret: str = "SuperSecret") -> Dict[str, str] return claims -@pytest.mark.asyncio async def create_random_user(db: AsyncSession) -> User: """ Creates a random user in the database. @@ -84,7 +83,7 @@ async def create_random_user(db: AsyncSession) -> User: Newly created user. """ user = User( - uid=random_hex_string(), + lifescience_id=random_hex_string(), display_name=random_lower_string(), ) db.add(user) diff --git a/gunicorn_conf.py b/gunicorn_conf.py new file mode 100644 index 0000000000000000000000000000000000000000..7dd141dfc55f98de00b07daffea9a898677e9df4 --- /dev/null +++ b/gunicorn_conf.py @@ -0,0 +1,67 @@ +import json +import multiprocessing +import os + +workers_per_core_str = os.getenv("WORKERS_PER_CORE", "1") +max_workers_str = os.getenv("MAX_WORKERS") +use_max_workers = None +if max_workers_str: + use_max_workers = int(max_workers_str) +web_concurrency_str = os.getenv("WEB_CONCURRENCY", None) + +host = os.getenv("HOST", "0.0.0.0") +port = os.getenv("PORT", "80") +bind_env = os.getenv("BIND", None) +use_loglevel = os.getenv("LOG_LEVEL", "info") +if bind_env: + use_bind = bind_env +else: + use_bind = f"{host}:{port}" + +cores = multiprocessing.cpu_count() +workers_per_core = float(workers_per_core_str) +default_web_concurrency = workers_per_core * cores +if web_concurrency_str: + web_concurrency = int(web_concurrency_str) + assert web_concurrency > 0 +else: + web_concurrency = max(int(default_web_concurrency), 2) + if use_max_workers: + web_concurrency = min(web_concurrency, use_max_workers) +accesslog_var = os.getenv("ACCESS_LOG", "-") +use_accesslog = accesslog_var or None +errorlog_var = os.getenv("ERROR_LOG", "-") +use_errorlog = errorlog_var or None +graceful_timeout_str = os.getenv("GRACEFUL_TIMEOUT", "120") +timeout_str = os.getenv("TIMEOUT", "120") +keepalive_str = os.getenv("KEEP_ALIVE", "5") + +# Gunicorn config variables +loglevel = use_loglevel +workers = web_concurrency +bind = use_bind +errorlog = use_errorlog +worker_tmp_dir = "/dev/shm" +accesslog = use_accesslog +graceful_timeout = int(graceful_timeout_str) +timeout = int(timeout_str) +keepalive = int(keepalive_str) + + +# For debugging and testing +log_data = { + "loglevel": loglevel, + "workers": workers, + "bind": bind, + "graceful_timeout": graceful_timeout, + "timeout": timeout, + "keepalive": keepalive, + "errorlog": errorlog, + "accesslog": accesslog, + # Additional, non-gunicorn variables + "workers_per_core": workers_per_core, + "use_max_workers": use_max_workers, + "host": host, + "port": port, +} +print(json.dumps(log_data)) diff --git a/pyproject.toml b/pyproject.toml index c1825ab66eb937ecd4857c45c036dadee5c9f29c..e48e191ed42e52535b20dea692f0263c77c27b55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ line-length = 120 [tool.ruff] line-length = 120 -target-version = "py312" +target-version = "py311" [tool.mypy] plugins = ["pydantic.mypy", "sqlalchemy.ext.mypy.plugin"] diff --git a/requirements-dev.txt b/requirements-dev.txt index bb9b40c398cf581c4283c86c02f1b1ec65e57f08..0f686491db49a7b17af328d1a91e303b331b4116 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,11 +1,11 @@ # test packages -pytest>=7.4.0,<7.5.0 +pytest>=8.0.0,<8.1.0 pytest-asyncio>=0.21.0,<0.22.0 pytest-cov>=4.1.0,<4.2.0 coverage[toml]>=7.4.0,<7.5.0 # Linters ruff>=0.1.0,<0.2.0 -black>=23.12.0,<24.0.0 +black>=24.1.0,<24.2.0 isort>=5.13.0,<5.14.0 mypy>=1.8.0,<1.9.0 # stubs for mypy @@ -13,3 +13,4 @@ types-aiobotocore-lite[s3]>=2.9.0,<2.10.0 types-requests # Miscellaneous pre-commit>=3.6.0,<3.7.0 +uvicorn>=0.27.0,<0.28.0 diff --git a/requirements.txt b/requirements.txt index 3207bd527c4873e6fbcb3c0f957b741aede7838e..c56d80dc264b9aa8ed65f89fea64fe7096856a48 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,10 @@ --extra-index-url https://gitlab.ub.uni-bielefeld.de/api/v4/projects/5493/packages/pypi/simple -clowmdb>=2.3.0,<2.4.0 +clowmdb>=3.0.0,<3.1.0 # Webserver packages fastapi>=0.109.0,<0.110.0 -pydantic>=2.5.0,<2.6.0 +pydantic>=2.6.0,<2.7.0 pydantic-settings>=2.1.0,<2.2.0 -uvicorn>=0.25.0,<0.26.0 # Database packages PyMySQL>=1.1.0,<1.2.0 SQLAlchemy>=2.0.0,<2.1.0 diff --git a/start_service_gunicorn.sh b/start_service_gunicorn.sh new file mode 100755 index 0000000000000000000000000000000000000000..1b65c1164b89af0c3f9da44d8135959f614d0c9d --- /dev/null +++ b/start_service_gunicorn.sh @@ -0,0 +1,7 @@ +#! /usr/bin/env sh +set -e + +./prestart.sh + +# Start Gunicorn +exec gunicorn -k uvicorn.workers.UvicornWorker -c /app/gunicorn_conf.py app.main:app diff --git a/start_service.sh b/start_service_uvicorn.sh similarity index 83% rename from start_service.sh rename to start_service_uvicorn.sh index 2fa8c34e1fec829ac3c9dc245c8ab9836287dd65..5361f1738b21097b2ae71a09a63ac69b472452d4 100755 --- a/start_service.sh +++ b/start_service_uvicorn.sh @@ -1,6 +1,7 @@ #! /usr/bin/env bash +set -e -./scripts/prestart.sh +./prestart.sh # Start webserver uvicorn app.main:app --host 0.0.0.0 --port 8000 --no-server-header