Source code for intelligence_layer.evaluation.dataset.studio_dataset_repository

import warnings
from collections.abc import Iterable
from typing import Optional

from intelligence_layer.connectors import (
    SerializableDict,
    StudioClient,
)
from intelligence_layer.connectors.studio.studio import (
    StudioDataset,
    StudioExample,
)
from intelligence_layer.core import Input
from intelligence_layer.evaluation import (
    Dataset,
    DatasetRepository,
    Example,
    ExpectedOutput,
)


[docs] class StudioDatasetRepository(DatasetRepository): """Dataset repository interface with Data Platform.""" def __init__(self, studio_client: StudioClient) -> None: """Initializes the StudioDatasetRepository. Args: studio_client: Client to interact with the Studio API. """ self.studio_client = studio_client warnings.warn( "The StudioDatasetRepository is currently in beta and only supports create_dataset." )
[docs] def create_dataset( self, examples: Iterable[Example[Input, ExpectedOutput]], dataset_name: str, id: str | None = None, labels: set[str] | None = None, metadata: SerializableDict | None = None, ) -> Dataset: """Creates a dataset from given :class:`Example`s and returns the ID of that dataset. Args: examples: An :class:`Iterable` of :class:`Example`s to be saved in the same dataset. dataset_name: A name for the dataset. id: ID is not used in the StudioDatasetRepository as it is generated by the Studio. labels: A list of labels for filtering. Defaults to an empty list. Defaults to None. metadata: A dict for additional information about the dataset. Defaults to an empty dict. Defaults to None. Returns: :class:`Dataset` """ if id is not None: raise NotImplementedError( "Custom dataset IDs are not supported by the StudioDataRepository" ) created_dataset = Dataset( name=dataset_name, labels=labels or set(), metadata=metadata or dict(), ) studio_dataset = self.map_to_studio_dataset(created_dataset) studio_examples = self.map_to_many_studio_example(examples) studio_dataset_id = self.studio_client.submit_dataset( dataset=studio_dataset, examples=studio_examples ) created_dataset.id = studio_dataset_id return created_dataset
[docs] def delete_dataset(self, dataset_id: str) -> None: """Deletes a dataset identified by the given dataset ID. Args: dataset_id: Dataset ID of the dataset to delete. """ raise NotImplementedError()
[docs] def dataset(self, dataset_id: str) -> Optional[Dataset]: """Returns a dataset identified by the given dataset ID. Args: dataset_id: Dataset ID of the dataset to delete. Returns: :class:`Dataset` if it was not, `None` otherwise. """ raise NotImplementedError()
[docs] def datasets(self) -> Iterable[Dataset]: """Returns all :class:`Dataset`s. Sorting is not guaranteed. Returns: :class:`Sequence` of :class:`Dataset`s. """ raise NotImplementedError()
[docs] def dataset_ids(self) -> Iterable[str]: """Returns all sorted dataset IDs. Returns: :class:`Iterable` of dataset IDs. """ raise NotImplementedError()
[docs] def example( self, dataset_id: str, example_id: str, input_type: type[Input], expected_output_type: type[ExpectedOutput], ) -> Optional[Example[Input, ExpectedOutput]]: """Returns an :class:`Example` for the given dataset ID and example ID. Args: dataset_id: Dataset ID of the linked dataset. example_id: ID of the example to retrieve. input_type: Input type of the example. expected_output_type: Expected output type of the example. Returns: :class:`Example` if it was found, `None` otherwise. """ raise NotImplementedError()
[docs] def examples( self, dataset_id: str, input_type: type[Input], expected_output_type: type[ExpectedOutput], examples_to_skip: Optional[frozenset[str]] = None, ) -> Iterable[Example[Input, ExpectedOutput]]: """Returns all :class:`Example`s for the given dataset ID sorted by their ID. Args: dataset_id: Dataset ID whose examples should be retrieved. input_type: Input type of the example. expected_output_type: Expected output type of the example. examples_to_skip: Optional list of example IDs. Those examples will be excluded from the output. Defaults to None. Returns: :class:`Iterable` of :class`Example`s. """ raise NotImplementedError()
def map_to_studio_example( self, example_to_map: Example[Input, ExpectedOutput] ) -> StudioExample[Input, ExpectedOutput]: return StudioExample(**example_to_map.model_dump()) def map_to_many_studio_example( self, examples_to_map: Iterable[Example[Input, ExpectedOutput]] ) -> Iterable[StudioExample[Input, ExpectedOutput]]: return (self.map_to_studio_example(example) for example in examples_to_map) def map_to_studio_dataset(self, dataset_to_map: Dataset) -> StudioDataset: return StudioDataset(**dataset_to_map.model_dump())