diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ff91b0d74cffb33ef2ced1ef0de704b7ffe3171f..501e4a649cea7a9bcc8b48a04b33edc1213f0a62 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,22 +14,17 @@ repos: - id: debug-statements - id: check-merge-conflict - id: check-ast -- repo: https://github.com/psf/black - rev: 24.4.0 - hooks: - - id: black - files: app - args: [--check] - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: 'v0.3.7' + rev: 'v0.4.1' hooks: - - id: ruff + - id: ruff + args: ["--fix", "--show-fixes"] + - id: ruff-format - repo: https://github.com/PyCQA/isort rev: 5.13.2 hooks: - id: isort files: app - args: [-c] - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.9.0 hooks: diff --git a/README.md b/README.md index f50b8e3d561e1fd4c4ba36e47e3570538678ed56..5977a3ca283bab7db1448637dc070164f82cc96e 100644 --- a/README.md +++ b/README.md @@ -44,14 +44,16 @@ user-friendly manner. 👠### S3 -| Env variable | Config file key | Default | Value | Example | Description | -|--------------------------------|-----------------------|---------|----------|--------------------------|----------------------------------------------------------------------------------| -| * `CLOWM_S3__URI` | `s3.uri` | unset | HTTP URL | `http://localhost` | URI of the S3 Object Storage | -| * `CLOWM_S3__ACCESS_KEY` | `s3.acess_key` | unset | String | `ZR7U56KMK20VW` | Access key for the S3 that owns the buckets | -| * `CLOWM_S3__SECRET_KEY` | `s3.secret_key` | unset | String | `9KRUU41EGSCB3H9ODECNHW` | Secret key for the S3 that owns the buckets | -| * `CLOWM_S3__USERNAME` | `s3.username` | unset | String | `clowm-bucket-manager` | ID of the user in ceph who owns all the buckets. Owner of `CLOWM_S3__ACCESS_KEY` | -| * `CLOWM_S3__ADMIN_ACCESS_KEY` | `s3.admin_acess_key` | unset | String | `ZR7U56KMK20VW` | Access key for the Ceph Object Gateway user with `user=*,bucket=*` capabilities. | -| * `CLOWM_S3__ADMIN_SECRET_KEY` | `s3.admin_secret_key` | unset | String | `9KRUU41EGSCB3H9ODECNHW` | Secret key for the Ceph Object Gateway user with `user=*,bucket=*` capabilities. | +| Env variable | Config file key | Default | Value | Example | Description | +|-----------------------------------------|----------------------------------|-----------|----------|----------------------------|----------------------------------------------------------------------------------| +| * `CLOWM_S3__URI` | `s3.uri` | unset | HTTP URL | `http://localhost` | URI of the S3 Object Storage | +| * `CLOWM_S3__ACCESS_KEY` | `s3.acess_key` | unset | String | `ZR7U56KMK20VW` | Access key for the S3 that owns the buckets | +| * `CLOWM_S3__SECRET_KEY` | `s3.secret_key` | unset | String | `9KRUU41EGSCB3H9ODECNHW` | Secret key for the S3 that owns the buckets | +| * `CLOWM_S3__USERNAME` | `s3.username` | unset | String | `clowm-bucket-manager` | ID of the user in ceph who owns all the buckets. Owner of `CLOWM_S3__ACCESS_KEY` | +| * `CLOWM_S3__ADMIN_ACCESS_KEY` | `s3.admin_acess_key` | unset | String | `ZR7U56KMK20VW` | Access key for the Ceph Object Gateway user with `user=*,bucket=*` capabilities. | +| * `CLOWM_S3__ADMIN_SECRET_KEY` | `s3.admin_secret_key` | unset | String | `9KRUU41EGSCB3H9ODECNHW` | Secret key for the Ceph Object Gateway user with `user=*,bucket=*` capabilities. | +| `CLOWM_S3__DEFAULT_BUCKET_SIZE_LIMIT` | `s3.default_bucket_size_limit` | `400 GiB` | ByteSize | `10 KB`, `10 KiB`, `10 MB` | Size limit of a new Bucket. Between `1 KiB` and `4.3 TB` | +| `CLOWM_S3__DEFAULT_BUCKET_OBJECT_LIMIT` | `s3.default_bucket_object_limit` | `40000` | Integer | `10000` | Maximum number of objects in a new bucket. Must be $<2^{32}$ | ### Security diff --git a/app/api/__init__.py b/app/api/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..994dbb4486665c2bdcde9fd46ca30390ee7e0264 100644 --- a/app/api/__init__.py +++ b/app/api/__init__.py @@ -0,0 +1,51 @@ +from typing import Any + +from fastapi import APIRouter, Depends, status + +from app.schemas.security import ErrorDetail + +from .dependencies import decode_bearer_token +from .endpoints import bucket_permission_router, bucket_router, miscellaneous_router, s3key_router + +__all__ = ["api_router"] + +alternative_responses: dict[int | str, dict[str, Any]] = { + status.HTTP_400_BAD_REQUEST: { + "model": ErrorDetail, + "description": "Error decoding JWT Token", + "content": {"application/json": {"example": {"detail": "Malformed JWT Token"}}}, + }, + status.HTTP_401_UNAUTHORIZED: { + "model": ErrorDetail, + "description": "Not authenticated", + "content": {"application/json": {"example": {"detail": "Not authenticated"}}}, + }, + status.HTTP_403_FORBIDDEN: { + "model": ErrorDetail, + "description": "Not authorized", + "content": {"application/json": {"example": {"detail": "Not authorized"}}}, + }, + status.HTTP_404_NOT_FOUND: { + "model": ErrorDetail, + "description": "Entity not Found", + "content": {"application/json": {"example": {"detail": "Entity not found."}}}, + }, +} + +api_router = APIRouter() +api_router.include_router( + bucket_router, + dependencies=[Depends(decode_bearer_token)], + responses=alternative_responses, +) +api_router.include_router( + s3key_router, + dependencies=[Depends(decode_bearer_token)], + responses=alternative_responses, +) +api_router.include_router( + bucket_permission_router, + dependencies=[Depends(decode_bearer_token)], + responses=alternative_responses, +) +api_router.include_router(miscellaneous_router) diff --git a/app/api/api.py b/app/api/api.py deleted file mode 100644 index a92574c713fd45edc8ec0a9b861878eb592f9cd6..0000000000000000000000000000000000000000 --- a/app/api/api.py +++ /dev/null @@ -1,49 +0,0 @@ -from typing import Any - -from fastapi import APIRouter, Depends, status - -from app.api.dependencies import decode_bearer_token -from app.api.endpoints import bucket_permissions, buckets, s3key -from app.api.endpoints.miscellaneous_endpoints import router as miscellaneous_router -from app.schemas.security import ErrorDetail - -alternative_responses: dict[int | str, dict[str, Any]] = { - status.HTTP_400_BAD_REQUEST: { - "model": ErrorDetail, - "description": "Error decoding JWT Token", - "content": {"application/json": {"example": {"detail": "Malformed JWT Token"}}}, - }, - status.HTTP_401_UNAUTHORIZED: { - "model": ErrorDetail, - "description": "Not authenticated", - "content": {"application/json": {"example": {"detail": "Not authenticated"}}}, - }, - status.HTTP_403_FORBIDDEN: { - "model": ErrorDetail, - "description": "Not authorized", - "content": {"application/json": {"example": {"detail": "Not authorized"}}}, - }, - status.HTTP_404_NOT_FOUND: { - "model": ErrorDetail, - "description": "Entity not Found", - "content": {"application/json": {"example": {"detail": "Entity not found."}}}, - }, -} - -api_router = APIRouter() -api_router.include_router( - buckets.router, - dependencies=[Depends(decode_bearer_token)], - responses=alternative_responses, -) -api_router.include_router( - s3key.router, - dependencies=[Depends(decode_bearer_token)], - responses=alternative_responses, -) -api_router.include_router( - bucket_permissions.router, - dependencies=[Depends(decode_bearer_token)], - responses=alternative_responses, -) -api_router.include_router(miscellaneous_router) diff --git a/app/api/endpoints/__init__.py b/app/api/endpoints/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..4bf7642e8a013ddd26a94f360ed8effa12a76b44 100644 --- a/app/api/endpoints/__init__.py +++ b/app/api/endpoints/__init__.py @@ -0,0 +1,6 @@ +from .bucket_permissions import router as bucket_permission_router +from .buckets import router as bucket_router +from .miscellaneous_endpoints import router as miscellaneous_router +from .s3key import router as s3key_router + +__all__ = ["bucket_router", "bucket_permission_router", "s3key_router", "miscellaneous_router"] diff --git a/app/api/endpoints/buckets.py b/app/api/endpoints/buckets.py index 299eed082767ea20c6753574059ea36534bd836c..b592dcd3dcc0945694b82f3e4c18d2ceef974bd9 100644 --- a/app/api/endpoints/buckets.py +++ b/app/api/endpoints/buckets.py @@ -17,6 +17,7 @@ from app.api.dependencies import ( RGWAdminResource, S3Resource, ) +from app.ceph.rgw import update_bucket_limits as rgw_update_bucket_limits from app.ceph.s3 import get_s3_bucket_objects, get_s3_bucket_policy, put_s3_bucket_policy from app.core.config import settings from app.crud import CRUDBucket, CRUDBucketPermission, DuplicateError @@ -108,8 +109,6 @@ async def list_buckets( Async database session to perform query on. Dependency Injection. current_user : clowmdb.models.User Current user who will be the owner of the newly created bucket. Dependency Injection. - s3 : boto3_type_annotations.s3.ServiceResource - S3 Service to perform operations on buckets in Ceph. Dependency Injection. authorization : Callable[[str], Awaitable[Any]] Async function to ask the auth service for authorization. Dependency Injection. Returns @@ -143,6 +142,7 @@ async def create_bucket( db: DBSession, s3: S3Resource, authorization: Authorization, + rgw: RGWAdminResource, ) -> Bucket: """ Create a bucket for the current user.\n @@ -163,6 +163,8 @@ async def create_bucket( S3 Service to perform operations on buckets in Ceph. Dependency Injection. authorization : Callable[[str], Awaitable[Any]] Async function to ask the auth service for authorization. Dependency Injection. + rgw : rgwadmin.RGWAdmin + RGW admin interface to manage Ceph's object store. Dependency Injection. Returns ------- @@ -173,7 +175,13 @@ async def create_bucket( current_span.set_attribute("bucket_name", bucket.name) await authorization("create") try: - db_bucket = await CRUDBucket.create(bucket, current_user.uid, db=db) + db_bucket = await CRUDBucket.create( + bucket, + current_user.uid, + db=db, + size_limit=int(settings.s3.default_bucket_size_limit.to("KiB")), + object_limit=settings.s3.default_bucket_object_limit, + ) except DuplicateError as e: current_span.record_exception(e) raise HTTPException( @@ -200,7 +208,15 @@ async def create_bucket( "Sid": "PseudoOwnerPerm", "Effect": "Allow", "Principal": {"AWS": [f"arn:aws:iam:::user/{current_user.uid}"]}, - "Action": ["s3:GetObject", "s3:DeleteObject", "s3:PutObject", "s3:ListBucket"], + "Action": [ + "s3:GetObject", + "s3:DeleteObject", + "s3:PutObject", + "s3:ListBucket", + "s3:AbortMultipartUpload", + "s3:ListBucketMultipartUploads", + "s3:ListMultipartUploadParts", + ], "Resource": [f"arn:aws:s3:::{db_bucket.name}/*", f"arn:aws:s3:::{db_bucket.name}"], }, ], @@ -210,6 +226,7 @@ async def create_bucket( with tracer.start_as_current_span("s3_put_bucket_cors_rules") as span: span.set_attribute("bucket_name", db_bucket.name) s3_bucket.Cors().put(CORSConfiguration=cors_rule) # type: ignore[arg-type] + rgw_update_bucket_limits(rgw=rgw, bucket=db_bucket) return db_bucket @@ -342,27 +359,10 @@ async def update_bucket_limits( if limits.object_limit is not None: # pragma: no cover current_span.set_attribute("object_limit", limits.object_limit) await authorization("update_any") - with tracer.start_as_current_span( - "rgw_set_bucket_limits", - attributes={ - "bucket_name": bucket.name, - "enabled": limits.object_limit is not None or limits.size_limit is not None, - }, - ) as span: - if limits.size_limit is not None: # pragma: no cover - span.set_attribute("size_limit", ByteSize(limits.size_limit * 1024).human_readable()) - if limits.object_limit is not None: # pragma: no cover - span.set_attribute("object_limit", limits.object_limit) - rgw.set_bucket_quota( - uid=str(bucket.owner_id), - bucket=bucket.name, - max_size_kb=-1 if limits.size_limit is None else limits.size_limit, - max_objects=-1 if limits.object_limit is None else limits.object_limit, - enabled=limits.object_limit is not None or limits.size_limit is not None, - ) await CRUDBucket.update_bucket_limits( db=db, bucket_name=bucket.name, object_limit=limits.object_limit, size_limit=limits.size_limit ) + rgw_update_bucket_limits(rgw=rgw, bucket=bucket) return bucket diff --git a/app/ceph/rgw.py b/app/ceph/rgw.py index 3b538f06ad5cc22ba40b868f520dadfdb0f48c05..f0dc1cd8767c2d3ab9416a42c9db724c47c752c7 100644 --- a/app/ceph/rgw.py +++ b/app/ceph/rgw.py @@ -1,6 +1,8 @@ from uuid import UUID +from clowmdb.models import Bucket from opentelemetry import trace +from pydantic import ByteSize from rgwadmin import RGWAdmin from app.core.config import settings @@ -8,6 +10,8 @@ from app.schemas.s3key import S3Key tracer = trace.get_tracer_provider().get_tracer(__name__) +__all__ = ["rgw", "get_s3_keys", "update_bucket_limits"] + rgw = RGWAdmin( access_key=settings.s3.admin_access_key, secret_key=settings.s3.admin_secret_key.get_secret_value(), @@ -19,3 +23,24 @@ rgw = RGWAdmin( def get_s3_keys(rgw: RGWAdmin, uid: UUID) -> list[S3Key]: with tracer.start_as_current_span("s3_get_user_keys", attributes={"uid": str(uid)}): return [S3Key(uid=uid, **key) for key in rgw.get_user(uid=str(uid), stats=False)["keys"]] + + +def update_bucket_limits(rgw: RGWAdmin, bucket: Bucket) -> None: + with tracer.start_as_current_span( + "rgw_set_bucket_limits", + attributes={ + "bucket_name": bucket.name, + "enabled": bucket.object_limit is not None or bucket.size_limit is not None, + }, + ) as span: + if bucket.size_limit is not None: # pragma: no cover + span.set_attribute("size_limit", ByteSize(bucket.size_limit * 1024).human_readable()) + if bucket.object_limit is not None: # pragma: no cover + span.set_attribute("object_limit", bucket.object_limit) + rgw.set_bucket_quota( + uid=settings.s3.username, + bucket=bucket.name, + max_size_kb=-1 if bucket.size_limit is None else bucket.size_limit, + max_objects=-1 if bucket.object_limit is None else bucket.object_limit, + enabled=bucket.object_limit is not None or bucket.size_limit is not None, + ) diff --git a/app/core/config.py b/app/core/config.py index 28b234b657c6f41eccf966db2cb3046c06e017c2..0c1e6674c8775869cb31d565f1a9af090d8f9e5e 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -2,7 +2,7 @@ import os from functools import cached_property from typing import Literal, Type -from pydantic import AnyHttpUrl, BaseModel, Field, FilePath, MySQLDsn, NameEmail, SecretStr +from pydantic import AnyHttpUrl, BaseModel, ByteSize, Field, FilePath, MySQLDsn, NameEmail, SecretStr, field_validator from pydantic_settings import ( BaseSettings, JsonConfigSettingsSource, @@ -57,6 +57,20 @@ class S3Settings(BaseModel): admin_secret_key: SecretStr = Field( ..., description="Secret key for the Ceph Object Gateway user with `user=*,bucket=*` capabilities." ) + # 25 * 2**32 = 400 GiB + default_bucket_size_limit: ByteSize = Field(ByteSize(25 * 2**34), description="Size limit of a new Bucket") + default_bucket_object_limit: int = Field( + 40000, gt=0, lt=2**32, description="Maximum number of objects in a new bucket" + ) + + @field_validator("default_bucket_size_limit") + @classmethod + def default_bucket_size_limit_validator(cls, size: ByteSize) -> ByteSize: + if size.to("KiB") >= 2**32: + raise ValueError("size can be maximal 4.3TB") + elif size.to("KiB") < 1: + raise ValueError("size must be at least 1 KiB") + return size class OPASettings(BaseModel): diff --git a/app/crud/crud_bucket.py b/app/crud/crud_bucket.py index 033df16a53fa2f302f54b713e5a77558dd334f5e..4a2970664475e7700daa59b745ccc81fb1c7c831 100644 --- a/app/crud/crud_bucket.py +++ b/app/crud/crud_bucket.py @@ -157,7 +157,14 @@ class CRUDBucket: return (await db.scalars(stmt)).all() @staticmethod - async def create(bucket_in: BucketInSchema, uid: UUID, *, db: AsyncSession) -> Bucket: + async def create( + bucket_in: BucketInSchema, + uid: UUID, + size_limit: int | None = None, + object_limit: int | None = None, + *, + db: AsyncSession, + ) -> Bucket: """ Create a bucket for a given user. @@ -175,7 +182,9 @@ class CRUDBucket: bucket : clowmdb.models.Bucket Returns the created bucket. """ - bucket = Bucket(**bucket_in.model_dump(), owner_id_bytes=uid.bytes) + bucket = Bucket( + **bucket_in.model_dump(), owner_id_bytes=uid.bytes, size_limit=size_limit, object_limit=object_limit + ) with tracer.start_as_current_span( "db_create_bucket", attributes={"uid": str(uid), "bucket_name": bucket.name}, diff --git a/app/main.py b/app/main.py index a3212bc5327a108469f8c613ccbd980b6a4fa30a..9a2be876e0bf8ca761b1bdc5efb04a6fc4e4b340 100644 --- a/app/main.py +++ b/app/main.py @@ -18,7 +18,7 @@ from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor from opentelemetry.trace import Status, StatusCode -from app.api.api import api_router +from app.api import api_router from app.core.config import settings description = """ diff --git a/app/tests/api/test_buckets.py b/app/tests/api/test_buckets.py index d740c8ab6219fbe6d3f1cdf925e93a9070d94e36..eb2971b51a84ad0a44e5a9e03834ec5ece93c203 100644 --- a/app/tests/api/test_buckets.py +++ b/app/tests/api/test_buckets.py @@ -6,6 +6,7 @@ from pydantic import TypeAdapter from sqlalchemy.ext.asyncio import AsyncSession from app.api.endpoints.buckets import ANONYMOUS_ACCESS_SID +from app.core.config import settings from app.crud import CRUDBucket from app.schemas.bucket import BucketIn, BucketOut, BucketSizeLimits from app.tests.mocks.mock_s3_resource import MockS3ServiceResource @@ -211,9 +212,10 @@ class TestBucketRoutesCreate(_TestBucketRoutes): assert response.status_code == status.HTTP_201_CREATED bucket = BucketOut.model_validate_json(response.content) - assert bucket assert bucket.name == bucket_info.name assert bucket.owner_id == random_user.user.uid + assert bucket.size_limit == settings.s3.default_bucket_size_limit.to("KiB") + assert bucket.object_limit == settings.s3.default_bucket_object_limit db_bucket = await CRUDBucket.get(bucket_info.name, db=db) assert db_bucket diff --git a/pyproject.toml b/pyproject.toml index cc3c81d43a9679ee28ba9f36b46ea5befe903f8f..402c0aa4f086183c2c4528f6db6ff7f3829c3d54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,9 +3,6 @@ profile = "black" line_length = 120 balanced_wrapping = true -[tool.black] -line-length = 120 - [tool.ruff] line-length = 120 target-version = "py312" diff --git a/requirements-dev.txt b/requirements-dev.txt index 9efc29a1be52a79a2300481986955bbb7b4c380f..f666b8a3edcd5f1112672c16d28805bcf91caf69 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,10 +2,9 @@ pytest>=8.0.0,<8.2.0 pytest-asyncio>=0.21.0,<0.22.0 pytest-cov>=5.0.0,<5.1.0 -coverage[toml]>=7.4.0,<7.5.0 +coverage[toml]>=7.4.0,<7.6.0 # Linters -ruff<0.4.0 -black>=24.2.0,<24.5.0 +ruff>=0.4.0,<0.5.0 isort>=5.13.0,<5.14.0 mypy>=1.8.0,<1.10.0 # stubs for mypy diff --git a/scripts/format.sh b/scripts/format.sh index 1c5f45d44046e7f093cf64e34bdfdb713182731e..97ce08c64f5490abaf2f94c6ca590409756bdb9e 100755 --- a/scripts/format.sh +++ b/scripts/format.sh @@ -1,7 +1,6 @@ #!/bin/sh -e set -x -isort --force-single-line-imports app +ruff format app ruff check --fix --show-fixes app -black app isort app diff --git a/scripts/lint.sh b/scripts/lint.sh index 3ae0ba81d6719f3be5502679b18e0a149209d6f6..909febe2f22ca730e80c190fa687ed70257295a5 100755 --- a/scripts/lint.sh +++ b/scripts/lint.sh @@ -4,12 +4,11 @@ set -x ruff --version ruff check app +ruff format --diff app isort --version isort -c app -black --version -black app --check mypy --version mypy app