Source code for intelligence_layer.connectors.retrievers.qdrant_in_memory_retriever

from collections.abc import Sequence
from concurrent.futures import ThreadPoolExecutor
from enum import Enum
from typing import Optional

from aleph_alpha_client import Prompt, SemanticEmbeddingRequest, SemanticRepresentation
from qdrant_client import QdrantClient
from qdrant_client.conversions.common_types import ScoredPoint
from qdrant_client.http.models import Distance, PointStruct, VectorParams, models

from intelligence_layer.connectors.limited_concurrency_client import (
    AlephAlphaClientProtocol,
    LimitedConcurrencyClient,
)
from intelligence_layer.connectors.retrievers.base_retriever import (
    BaseRetriever,
    Document,
    DocumentChunk,
    SearchResult,
)



[docs]
class RetrieverType(Enum):
    """Specify the type of retriever to instantiate.

    Attributes:
        ASYMMETRIC: Query is embedded as `Query` and each document as `Document`.
        SYMMETRIC: Both query and documents will be embedded as `Symmetric`.
    """

    ASYMMETRIC = (SemanticRepresentation.Query, SemanticRepresentation.Document)
    SYMMETRIC = (SemanticRepresentation.Symmetric, SemanticRepresentation.Symmetric)




[docs]
class QdrantInMemoryRetriever(BaseRetriever[int]):
    """Search through documents stored in memory using semantic search.

    This retriever uses a [Qdrant](https://github.com/qdrant/qdrant)-in-Memory vector store instance to store documents and their asymmetric embeddings.
    When run, the given query is embedded and scored against the document embeddings to retrieve the k-most similar matches by cosine similarity.

    Args:
        documents: The sequence of documents to be made searchable.
        k: The (top) number of documents to be returned by search.
        client: Aleph Alpha client instance for running model related API calls.
        threshold: The mimumum value of cosine similarity between the query vector and the document vector.
        retriever_type: The type of retriever to be instantiated.
            Should be `ASYMMETRIC` for most query-document retrieveal use cases, `SYMMETRIC` is optimized
            for similar document retrieval.
        distance_metric: The distance metric to be used for vector comparison.

    Example:
        >>> from intelligence_layer.connectors import LimitedConcurrencyClient, Document, QdrantInMemoryRetriever
        >>> client = LimitedConcurrencyClient.from_env()
        >>> documents = [Document(text=t) for t in ["I do not like rain.", "Summer is warm.", "We are so back."]]
        >>> retriever = QdrantInMemoryRetriever(documents, 5, client=client)
        >>> query = "Do you like summer?"
        >>> documents = retriever.get_relevant_documents_with_scores(query)
    """

    MAX_WORKERS = 10

    def __init__(
        self,
        documents: Sequence[Document],
        k: int,
        client: AlephAlphaClientProtocol | None = None,
        threshold: float = 0.5,
        retriever_type: RetrieverType = RetrieverType.ASYMMETRIC,
        distance_metric: Distance = Distance.COSINE,
    ) -> None:
        self._client = client or LimitedConcurrencyClient.from_env()
        self._search_client = QdrantClient(":memory:")
        self._collection_name = "in_memory_collection"
        self._k = k
        self._threshold = threshold
        self._query_representation, self._document_representation = retriever_type.value
        self._distance_metric = distance_metric

        if self._search_client.collection_exists(collection_name=self._collection_name):
            self._search_client.delete_collection(
                collection_name=self._collection_name,
            )

        self._search_client.create_collection(
            collection_name=self._collection_name,
            vectors_config=VectorParams(size=4608, distance=self._distance_metric),
        )

        self._add_texts_to_memory(documents)

    def get_relevant_documents_with_scores(
        self, query: str
    ) -> Sequence[SearchResult[int]]:
        query_embedding = self._embed(query, self._query_representation)
        search_result = self._search_client.search(
            collection_name=self._collection_name,
            query_vector=query_embedding,
            score_threshold=self._threshold,
            limit=self._k,
        )
        return [self._point_to_search_result(point) for point in search_result]

    def _embed(self, text: str, representation: SemanticRepresentation) -> list[float]:
        embedding_request = SemanticEmbeddingRequest(
            prompt=Prompt.from_text(text),
            representation=representation,
            normalize=True,
        )
        return self._client.semantic_embed(
            request=embedding_request, model="pharia-1-embedding-4608-control"
        ).embedding

    @staticmethod
    def _point_to_search_result(point: ScoredPoint) -> SearchResult[int]:
        assert point.payload
        assert isinstance(point.id, int)
        return SearchResult(
            id=point.id,
            score=point.score,
            document_chunk=DocumentChunk(**point.payload),
        )

    def _add_texts_to_memory(self, documents: Sequence[Document]) -> None:
        with ThreadPoolExecutor(max_workers=self.MAX_WORKERS) as executor:
            embeddings = list(
                executor.map(
                    lambda c: self._embed(c.text, self._document_representation),
                    documents,
                )
            )
        self._search_client.upsert(
            collection_name=self._collection_name,
            wait=True,
            points=[
                PointStruct(
                    id=idx,
                    vector=text_embedding,
                    payload=DocumentChunk(
                        text=document.text,
                        start=0,
                        end=len(document.text) - 1,
                        metadata=document.metadata,
                    ).model_dump(),
                )
                for idx, (text_embedding, document) in enumerate(
                    zip(embeddings, documents, strict=True)
                )
            ],
        )


[docs]
    def get_filtered_documents_with_scores(
        self, query: str, filter: models.Filter
    ) -> Sequence[SearchResult[int]]:
        """Specific method for `InMemoryRetriever` to support filtering search results.

        Args:
            query: The text to be searched with.
            filter: Conditions to filter by.

        Returns:
            All documents that correspond to the query and pass the filter.
        """
        query_embedding = self._embed(query, self._query_representation)
        search_result = self._search_client.search(
            collection_name=self._collection_name,
            query_vector=query_embedding,
            limit=self._k,
            query_filter=filter,
        )
        return [self._point_to_search_result(point) for point in search_result]


    def get_full_document(self, id: int) -> Optional[Document]:
        records = self._search_client.retrieve(self._collection_name, [id], True, False)
        if not records:
            return None
        record = records.pop()
        assert record.payload
        return Document(**record.payload)