Source code for intelligence_layer.connectors.data.models

import io
from datetime import datetime
from enum import Enum
from typing import Annotated, Any, Optional

from pydantic import AfterValidator, BaseModel, ConfigDict
from pydantic.alias_generators import to_camel


class BaseDataModel(BaseModel):
    model_config = ConfigDict(
        alias_generator=to_camel,
        arbitrary_types_allowed=True,
        populate_by_name=True,
    )


allowed_media_types = ["application/x-ndjson", "application/jsonlines", "jsonlines"]


def media_type_validator(v: str) -> str:
    assert v in allowed_media_types
    return v


custom_media_type = Annotated[str, AfterValidator(media_type_validator)]


class Modality(str, Enum):
    text = "text"


[docs] class DataRepository(BaseDataModel): """Data Repository model. Attributes: repository_id: Repository ID that identifies the repository(group of datasets) name: Name of the repository mutable: Indicates if the datasets in the repository are mutable or not media_type: Media type of the data: application/json, application/csv, etc. modality: Modality of the data: image, text, etc. created_at: Datetime when the repository was created updated_at: Datetime when the repository was updated """ repository_id: str name: str mutable: bool media_type: custom_media_type modality: Modality created_at: datetime updated_at: datetime
[docs] class DataRepositoryCreate(BaseDataModel): """Data Repository creation model. Attributes: name: Name of the repository media_type: Media type of the data: application/json, application/csv, etc. modality: Modality of the data: image, text, etc. """ name: str media_type: custom_media_type modality: Modality
[docs] class DataDataset(BaseDataModel): """Dataset model. Attributes: repository_id: Repository ID that identifies the repository(group of datasets) dataset_id: Dataset ID that identifies the dataset name: Name of the dataset labels: List of labels of the dataset total_datapoints: Total number of units in the dataset metadata: Metadata of the dataset created_at: Datetime when the dataset was created updated_at: Datetime when the dataset was updated """ repository_id: str dataset_id: str name: Optional[str] = None labels: Optional[list[str]] = None total_datapoints: int metadata: Optional[dict[str, Any]] = None created_at: datetime updated_at: datetime
[docs] class DatasetCreate(BaseDataModel): """Dataset creation model. Attributes: source_data: Source data of the dataset in bytes(file like object) name: Name of the dataset labels: List of labels of the dataset total_datapoints: Total number of units in the dataset metadata: Metadata of the dataset """ source_data: io.BufferedReader | bytes name: Optional[str] = None labels: list[str] total_datapoints: int metadata: Optional[dict[str, Any]] = None
[docs] class DataStageCreate(BaseDataModel): """Stage creation model. Attributes: name: Name of the stage """ name: str
[docs] class DataStage(BaseDataModel): """Stage model. Attributes: stage_id: Stage ID that identifies the stage name: Name of the stage created_at: Datetime when the stage was created updated_at: Datetime when the stage was updated """ stage_id: str name: str created_at: datetime updated_at: datetime
[docs] class DataFile(BaseDataModel): file_id: str stage_id: str name: str created_at: datetime updated_at: datetime media_type: str size: int
[docs] class DataFileCreate(BaseDataModel): source_data: io.BufferedReader | bytes name: str