-
Daniel Göbel authored
#3
Daniel Göbel authored#3
abstract_repository.py 3.96 KiB
import asyncio
from abc import ABC, abstractmethod
from tempfile import SpooledTemporaryFile
from typing import TYPE_CHECKING
from fastapi import HTTPException, status
from httpx import AsyncClient
if TYPE_CHECKING:
from mypy_boto3_s3.service_resource import Object
else:
Object = object
class GitRepository(ABC):
"""
Abstract class for Git Repositories
"""
@abstractmethod
def __init__(self, url: str, git_commit_hash: str):
"""
Initialize Git repository object.
Parameters
----------
url : str
URL of the git repository
git_commit_hash : str
Pin dowm git commit hash
"""
...
@abstractmethod
def downloadFileURL(self, filepath: str) -> str:
"""
Construct an URL where to download a file from
Parameters
----------
filepath : str
Path of a file
Returns
-------
url : str
URL where to download the specified file from.
"""
...
@abstractmethod
def __repr__(self) -> str:
...
def __str__(self) -> str:
return repr(self)
async def check_file_exists(self, filepath: str, client: AsyncClient, raise_error: bool = True) -> bool:
"""
Check that a file exists in the Git Repository
Parameters
----------
filepath : str
Path to the file
client : httpx.AsyncClient
Async HTTP Client with an open connection
raise_error : bool, default True
Raise an HTTPException if the file doesn't exist.
Returns
-------
exist : bool
Flag if the file exists.
"""
response = await client.head(self.downloadFileURL(filepath), follow_redirects=True)
exist = response.status_code == status.HTTP_200_OK
if raise_error and not exist:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"The file {filepath} is missing in the repo {str(self)}",
)
return exist
async def check_files_exist(self, files: list[str], client: AsyncClient, raise_error: bool = True) -> list[bool]:
"""
Parameters
----------
files : list[str]
Paths to the file to check
client : httpx.AsyncClient
Async HTTP Client with an open connection
raise_error : bool, default True
Raise an HTTPException if any of the files doesn't exist.
Returns
-------
exist : list[bool]
Flags if the files exist.
"""
tasks = [
asyncio.ensure_future(self.check_file_exists(file, client=client, raise_error=False)) for file in files
]
result = await asyncio.gather(*tasks)
if raise_error:
missing_files = [f for f, exist in zip(files, result) if not exist]
if len(missing_files) > 0:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"The files {', '.join(missing_files)} are missing in the repo {str(self)}",
)
return result
async def copy_file_to_bucket(self, filepath: str, obj: Object, client: AsyncClient) -> None:
"""
Copy a file from a git repository to a bucket
Parameters
----------
filepath : str
Path of the file to copy
obj : mypy_boto3_s3.service_resource import Object
S3 object to upload file to
client : httpx.AsyncClient
Async HTTP Client with an open connection
"""
with SpooledTemporaryFile(max_size=129024) as f: # temporary file with 126kB data spooled in memory
async with client.stream("GET", self.downloadFileURL(filepath)) as response:
async for chunk in response.aiter_bytes():
f.write(chunk)
obj.upload_fileobj(f)