Source code for intelligence_layer.connectors.retrievers.base_retriever

from abc import ABC, abstractmethod
from collections.abc import Sequence
from typing import Any, Generic, Optional, TypeVar

from pydantic import BaseModel


[docs] class Document(BaseModel): """A document. Attributes: text: The document's text. metadata: Any metadata added to the document. """ text: str metadata: Any = None
[docs] class DocumentChunk(BaseModel): """Part of a :class:`Document`, specifically for retrieval use cases. Attributes: text: Chunk of the document that matched the search query. metadata: Any metadata added to the document. start: Start index of the chunk within the document end: End index of the chunk within the document """ text: str start: int end: int metadata: Any = None
ID = TypeVar("ID")
[docs] class SearchResult(BaseModel, Generic[ID]): """Contains a text alongside its search score. Attributes: id: Unique identifier of the document score: The similarity score between the text and the query that was searched with. Will be between 0 and 1, where 0 means no similarity and 1 perfect similarity. document_chunk: The document chunk found by search. """ id: ID score: float document_chunk: DocumentChunk
[docs] class BaseRetriever(ABC, Generic[ID]): """General interface for any retriever. Retrievers are used to find texts given a user query. Each Retriever implementation owns its own logic for retrieval. For comparison purposes, we assume scores in the `SearchResult` instances to be between 0 and 1. """ @abstractmethod def get_relevant_documents_with_scores( self, query: str ) -> Sequence[SearchResult[ID]]: pass @abstractmethod def get_full_document(self, id: ID) -> Optional[Document]: pass