Source code for intelligence_layer.connectors.data.models

import io
from datetime import datetime
from enum import Enum
from typing import Any, Optional

from pydantic import BaseModel, ConfigDict
from pydantic.alias_generators import to_camel, to_snake


class BaseDataModel(BaseModel):
    model_config = ConfigDict(alias_generator=to_camel, arbitrary_types_allowed=True)


class MediaTypes(str, Enum):
    jsonlines = "application/x-ndjson"
    jsonlines_ = "application/jsonlines"


class Modality(str, Enum):
    text = "text"


[docs] class DataRepository(BaseDataModel): """Data Repository model. Attributes: repository_id: Repository ID that identifies the repository(group of datasets) name: Name of the repository mutable: Indicates if the datasets in the repository are mutable or not media_type: Media type of the data: application/json, application/csv, etc. modality: Modality of the data: image, text, etc. created_at: Datetime when the repository was created updated_at: Datetime when the repository was updated """ repository_id: str name: str mutable: bool media_type: MediaTypes modality: Modality created_at: datetime updated_at: datetime
[docs] class DataRepositoryCreate(BaseDataModel): """Data Repository creation model. Attributes: name: Name of the repository media_type: Media type of the data: application/json, application/csv, etc. modality: Modality of the data: image, text, etc. """ name: str media_type: MediaTypes modality: Modality
[docs] class DataDataset(BaseDataModel): """Dataset model. Attributes: repository_id: Repository ID that identifies the repository(group of datasets) dataset_id: Dataset ID that identifies the dataset name: Name of the dataset labels: List of labels of the dataset total_datapoints: Total number of units in the dataset metadata: Metadata of the dataset created_at: Datetime when the dataset was created updated_at: Datetime when the dataset was updated """ repository_id: str dataset_id: str name: Optional[str] = None labels: Optional[list[str]] = None total_datapoints: int metadata: Optional[dict[str, Any]] = None created_at: datetime updated_at: datetime
[docs] class DatasetCreate(BaseDataModel): """Dataset creation model. Attributes: source_data: Source data of the dataset in bytes(file like object) name: Name of the dataset labels: List of labels of the dataset total_datapoints: Total number of units in the dataset metadata: Metadata of the dataset """ model_config = ConfigDict(alias_generator=to_snake, arbitrary_types_allowed=True) source_data: io.BufferedReader | bytes name: Optional[str] = None labels: list[str] total_datapoints: int metadata: Optional[dict[str, Any]] = None