Source code for intelligence_layer.examples.qa.single_chunk_qa

from collections.abc import Mapping, Sequence
from typing import Optional

from liquid import Template
from pydantic import BaseModel

from intelligence_layer.core import (
    CompleteInput,
    CompleteOutput,
    ControlModel,
    Language,
    LuminousControlModel,
    RichPrompt,
    Task,
    TaskSpan,
    TextChunk,
    TextHighlight,
    TextHighlightInput,
    TextHighlightOutput,
)
from intelligence_layer.core.prompt_template import TextCursor
from intelligence_layer.core.text_highlight import ScoredTextHighlight


class QaSetup(BaseModel):
    unformatted_instruction: str
    no_answer_str: str
    no_answer_logit_bias: Optional[float] = None


QA_INSTRUCTIONS = {
    Language("en"): QaSetup(
        unformatted_instruction='Question: {{question}}\nAnswer the question on the basis of the text. If there is no answer within the text, respond "{{no_answer_text}}".',
        no_answer_str="no answer in text",
        no_answer_logit_bias=1.0,
    ),
    Language("de"): QaSetup(
        unformatted_instruction='Beantworte die Frage anhand des Textes. Wenn sich die Frage nicht mit dem Text beantworten lässt, antworte "{{no_answer_text}}".\nFrage: {{question}}',
        no_answer_str="Unbeantwortbar",
        no_answer_logit_bias=0.5,
    ),
    Language("fr"): QaSetup(
        unformatted_instruction="{{question}}\nS'il n'y a pas de réponse, dites \"{{no_answer_text}}\". Ne répondez à la question qu'en vous basant sur le texte.",
        no_answer_str="pas de réponse dans le texte",
    ),
    Language("es"): QaSetup(
        unformatted_instruction='{{question}}\nSi no hay respuesta, di "{{no_answer_text}}". Responde sólo a la pregunta basándote en el texto.',
        no_answer_str="no hay respuesta en el texto",
    ),
    Language("it"): QaSetup(
        unformatted_instruction='{{question}}\nSe non c\'è risposta, dire "{{no_answer_text}}". Rispondere alla domanda solo in base al testo.',
        no_answer_str="nessuna risposta nel testo",
    ),
}


[docs] class SingleChunkQaInput(BaseModel): """The input for a `SingleChunkQa` task. Attributes: chunk: The (short) text to be asked about. Usually measures one or a few paragraph(s). Can't be longer than the context length of the model used minus the size of the system prompt. question: The question to be asked by about the chunk. language: The desired language of the answer. ISO 619 str with language e.g. en, fr, etc. generate_highlights: Whether to generate highlights (using the explainability feature) for the answer. Defaults to `True`. """ chunk: TextChunk question: str language: Language = Language("en") generate_highlights: bool = True
[docs] class SingleChunkQaOutput(BaseModel): """The output of a `SingleChunkQa` task. Attributes: answer: The answer generated by the task. Can be a string or None (if no answer was found). highlights: Highlights indicating which parts of the chunk contributed to the answer. Each highlight is a quote from the text. """ answer: Optional[str] highlights: Sequence[ScoredTextHighlight]
[docs] class SingleChunkQa(Task[SingleChunkQaInput, SingleChunkQaOutput]): """Answer a question on the basis of one chunk. Uses Aleph Alpha models to generate a natural language answer for a text chunk given a question. Will answer `None` if the language model determines that the question cannot be answered on the basis of the text. Args: model: The model used throughout the task for model related API calls. text_highlight: The task that is used for highlighting that parts of the input that are relevant for the answer. Defaults to :class:`TextHighlight` . instruction_config: defines instructions for different languages. maximum_tokens: the maximal number of tokens to be generated for an answer. Attributes: NO_ANSWER_STR: The string to be generated by the model in case no answer can be found. Example: >>> import os >>> from intelligence_layer.core import Language, InMemoryTracer >>> from intelligence_layer.core import TextChunk >>> from intelligence_layer.examples import SingleChunkQa, SingleChunkQaInput >>> >>> task = SingleChunkQa() >>> input = SingleChunkQaInput( ... chunk=TextChunk("Tina does not like pizza. However, Mike does."), ... question="Who likes pizza?", ... language=Language("en"), ... ) >>> tracer = InMemoryTracer() >>> output = task.run(input, tracer) """ def __init__( self, model: ControlModel | None = None, text_highlight: Task[TextHighlightInput, TextHighlightOutput] | None = None, instruction_config: Mapping[Language, QaSetup] = QA_INSTRUCTIONS, maximum_tokens: int = 256, ): super().__init__() self._model = model or LuminousControlModel("luminous-supreme-control") self._text_highlight = text_highlight or TextHighlight(self._model, clamp=True) self._instruction_config = instruction_config self._maximum_tokens = maximum_tokens
[docs] def do_run( self, input: SingleChunkQaInput, task_span: TaskSpan ) -> SingleChunkQaOutput: qa_setup = input.language.language_config(self._instruction_config) instruction = Template(qa_setup.unformatted_instruction).render( question=input.question, no_answer_text=qa_setup.no_answer_str ) no_answer_logit_bias = ( self._get_no_answer_logit_bias( qa_setup.no_answer_str, qa_setup.no_answer_logit_bias ) if qa_setup.no_answer_logit_bias else None ) output, prompt = self._generate_answer( instruction, input.chunk, no_answer_logit_bias, task_span, ) answer = self._no_answer_to_none( output.completion.strip(), qa_setup.no_answer_str ) if input.generate_highlights: raw_highlights = ( self._get_highlights( prompt, output.completion, task_span, ) if answer else [] ) highlights = self._shift_highlight_ranges_to_input(prompt, raw_highlights) else: highlights = [] return SingleChunkQaOutput( answer=answer, highlights=highlights, )
def _shift_highlight_ranges_to_input( self, prompt: RichPrompt, raw_highlights: Sequence[ScoredTextHighlight] ) -> Sequence[ScoredTextHighlight]: # This only works with models that have an 'input' range, e.g. control models. if "input" not in prompt.ranges or len(prompt.ranges["input"]) == 0: return raw_highlights input_cursor = prompt.ranges["input"][0].start assert isinstance(input_cursor, TextCursor) input_offset = input_cursor.position return [ ScoredTextHighlight( start=raw.start - input_offset, end=raw.end - input_offset, score=raw.score, ) for raw in raw_highlights ] def _get_no_answer_logit_bias( self, no_answer_str: str, no_answer_logit_bias: float ) -> dict[int, float]: return {self._model.tokenize(no_answer_str).ids[0]: no_answer_logit_bias} def _generate_answer( self, instruction: str, input: str, no_answer_logit_bias: Optional[dict[int, float]], task_span: TaskSpan, ) -> tuple[CompleteOutput, RichPrompt]: prompt = self._model.to_instruct_prompt(instruction, input) return ( self._model.complete( CompleteInput( prompt=prompt, maximum_tokens=self._maximum_tokens, logit_bias=no_answer_logit_bias, ), task_span, ), prompt, ) def _get_highlights( self, rich_prompt: RichPrompt, completion: str, task_span: TaskSpan, ) -> Sequence[ScoredTextHighlight]: highlight_input = TextHighlightInput( rich_prompt=rich_prompt, target=completion, focus_ranges=frozenset({"input"}), ) highlight_output = self._text_highlight.run(highlight_input, task_span) return [h for h in highlight_output.highlights if h.score > 0] def _no_answer_to_none(self, completion: str, no_answer_str: str) -> Optional[str]: return completion if no_answer_str not in completion else None