Source code for intelligence_layer.examples.classify.keyword_extract

from collections.abc import Mapping

from pydantic import BaseModel

from intelligence_layer.core import (
    CompleteInput,
    ControlModel,
    Language,
    LuminousControlModel,
    Task,
    TaskSpan,
    TextChunk,
)

INSTRUCT_CONFIGS = {
    Language(
        "de"
    ): "Worum geht es in dem Text? Extrahiere ein paar Stichwörter in Form einer Komma-separierten Liste.",
    Language(
        "en"
    ): "What is the text about? Extract a few keywords in form of a comma-separated list.",
    Language(
        "es"
    ): "¿De qué trata el texto? Extrae algunas palabras clave en forma de una lista separada por comas.",
    Language(
        "fr"
    ): "De quoi parle le texte? Extraire quelques mots-clés sous forme d'une liste séparée par des virgules.",
    Language(
        "it"
    ): "Di cosa tratta il testo? Estrai alcune parole chiave sotto forma di una lista separata da virgole.",
}


[docs] class KeywordExtractInput(BaseModel): chunk: TextChunk language: Language
[docs] class KeywordExtractOutput(BaseModel): keywords: frozenset[str]
[docs] class KeywordExtract(Task[KeywordExtractInput, KeywordExtractOutput]): def __init__( self, model: ControlModel | None = None, instruct_configs: Mapping[Language, str] = INSTRUCT_CONFIGS, maximum_tokens: int = 32, ) -> None: self._instruct_configs = instruct_configs self._model = model or LuminousControlModel("luminous-base-control") self._maximum_tokens = maximum_tokens
[docs] def do_run( self, input: KeywordExtractInput, task_span: TaskSpan ) -> KeywordExtractOutput: instruction = input.language.language_config(self._instruct_configs) result = self._model.complete( CompleteInput( prompt=self._model.to_instruct_prompt( instruction=instruction, input=str(input.chunk) ), maximum_tokens=self._maximum_tokens, ), task_span, ) return KeywordExtractOutput( keywords=frozenset(s.strip() for s in result.completion.split(",")) )