Skip to content
Snippets Groups Projects
abstract_repository.py 3.96 KiB
import asyncio
from abc import ABC, abstractmethod
from tempfile import SpooledTemporaryFile
from typing import TYPE_CHECKING

from fastapi import HTTPException, status
from httpx import AsyncClient

if TYPE_CHECKING:
    from mypy_boto3_s3.service_resource import Object
else:
    Object = object


class GitRepository(ABC):
    """
    Abstract class for Git Repositories
    """

    @abstractmethod
    def __init__(self, url: str, git_commit_hash: str):
        """
        Initialize Git repository object.

        Parameters
        ----------
        url : str
            URL of the git repository
        git_commit_hash : str
            Pin dowm git commit hash
        """
        ...

    @abstractmethod
    def downloadFileURL(self, filepath: str) -> str:
        """
        Construct an URL where to download a file from

        Parameters
        ----------
        filepath : str
            Path of a file

        Returns
        -------
        url : str
            URL where to download the specified file from.
        """
        ...

    @abstractmethod
    def __repr__(self) -> str:
        ...

    def __str__(self) -> str:
        return repr(self)

    async def check_file_exists(self, filepath: str, client: AsyncClient, raise_error: bool = True) -> bool:
        """
        Check that a file exists in the Git Repository

        Parameters
        ----------
        filepath : str
            Path to the file
        client : httpx.AsyncClient
            Async HTTP Client with an open connection
        raise_error : bool, default True
            Raise an HTTPException if the file doesn't exist.

        Returns
        -------
        exist : bool
            Flag if the file exists.
        """
        response = await client.head(self.downloadFileURL(filepath), follow_redirects=True)
        exist = response.status_code == status.HTTP_200_OK
        if raise_error and not exist:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail=f"The file {filepath} is missing in the repo {str(self)}",
            )
        return exist

    async def check_files_exist(self, files: list[str], client: AsyncClient, raise_error: bool = True) -> list[bool]:
        """

        Parameters
        ----------
        files : list[str]
            Paths to the file to check
        client : httpx.AsyncClient
            Async HTTP Client with an open connection
        raise_error : bool, default True
            Raise an HTTPException if any of the files doesn't exist.
        Returns
        -------
        exist : list[bool]
            Flags if the files exist.
        """
        tasks = [
            asyncio.ensure_future(self.check_file_exists(file, client=client, raise_error=False)) for file in files
        ]
        result = await asyncio.gather(*tasks)
        if raise_error:
            missing_files = [f for f, exist in zip(files, result) if not exist]
            if len(missing_files) > 0:
                raise HTTPException(
                    status_code=status.HTTP_400_BAD_REQUEST,
                    detail=f"The files {', '.join(missing_files)} are missing in the repo {str(self)}",
                )
        return result

    async def copy_file_to_bucket(self, filepath: str, obj: Object, client: AsyncClient) -> None:
        """
        Copy a file from a git repository to a bucket

        Parameters
        ----------
        filepath : str
            Path of the file to copy
        obj : mypy_boto3_s3.service_resource import Object
            S3 object to upload file to
        client : httpx.AsyncClient
            Async HTTP Client with an open connection
        """
        with SpooledTemporaryFile(max_size=129024) as f:  # temporary file with 126kB data spooled in memory
            async with client.stream("GET", self.downloadFileURL(filepath)) as response:
                async for chunk in response.aiter_bytes():
                    f.write(chunk)
            obj.upload_fileobj(f)