Source code for intelligence_layer.evaluation.infrastructure.hugging_face_repository

from pathlib import Path

import huggingface_hub  # type: ignore

from intelligence_layer.evaluation.infrastructure.file_system_based_repository import (
    FileSystemBasedRepository,
)


[docs] class HuggingFaceRepository(FileSystemBasedRepository): """HuggingFace base repository.""" _REPO_TYPE = "dataset" _ROOT_DIRECTORY_PREFIX_ = "datasets" # HuggingFace API root directory
[docs] @staticmethod def path_to_str(path: Path) -> str: return path.as_posix()
def __init__(self, repository_id: str, token: str, private: bool) -> None: """Create a HuggingFace repository. Creates a corresponding repository and initializes the file system. Args: repository_id: The HuggingFace namespace and repository name, separated by a "/". token: The HuggingFace authentication token. private: Whether the dataset repository should be private. """ assert repository_id[-1] != "/" self.create_repository(repository_id, token, private) file_system = huggingface_hub.HfFileSystem(token=token) root_directory = Path(f"{self._ROOT_DIRECTORY_PREFIX_}/{repository_id}") super().__init__(file_system, root_directory) self._repository_id = repository_id # the file system is assigned in super init but this fixes the typing self._file_system: huggingface_hub.HfFileSystem def create_repository(self, repository_id: str, token: str, private: bool) -> None: huggingface_hub.create_repo( repo_id=repository_id, token=token, repo_type=self._REPO_TYPE, private=private, exist_ok=True, ) def delete_repository(self) -> None: huggingface_hub.delete_repo( repo_id=self._repository_id, token=self._file_system.token, repo_type=self._REPO_TYPE, missing_ok=True, )