Source code for intelligence_layer.examples.summarize.summarize

from collections.abc import Iterable, Sequence
from typing import Union

from pydantic import BaseModel

from intelligence_layer.core import Language, TextChunk
from intelligence_layer.evaluation import (
    AggregationLogic,
    BleuGrader,
    Example,
    MeanAccumulator,
    RougeGrader,
    SingleOutputEvaluationLogic,
)


[docs] class LongContextSummarizeInput(BaseModel): """The input for a summarize-task for a text of any length. Attributes: text: A text of any length. language: The desired language of the summary. ISO 619 str with language e.g. en, fr, etc. """ text: str language: Language = Language("en")
[docs] class PartialSummary(BaseModel): """The summary of a single chunk. Attributes: summary: The summary generated by the task. chunk: The source chunk. generated_tokens: The number of tokens generated for the summary """ summary: str chunk: TextChunk generated_tokens: int
[docs] class LongContextSummarizeOutput(BaseModel): """The output of a summarize-task for a text of any length. Attributes: partial_summaries: Chunk-wise summaries. """ partial_summaries: Sequence[PartialSummary]
[docs] class SingleChunkSummarizeInput(BaseModel): """The input for a summarize-task that only deals with a single chunk. Attributes: chunk: The text chunk to be summarized. language: The desired language of the summary. ISO 619 str with language e.g. en, fr, etc. """ chunk: TextChunk language: Language = Language("en")
[docs] class SummarizeOutput(BaseModel): """The output of a summarize-task. Attributes: summary: The summary generated by the task. generated_tokens: The number of tokens generated for the summary. """ summary: str generated_tokens: int
[docs] class SummarizeEvaluation(BaseModel): """The evaluation of a summarization run. Attributes: bleu: roughly corresponds to precision rouge: roughly corresponds to recall output: The actual output from the task run """ bleu: float rouge: float output: Union[SummarizeOutput, LongContextSummarizeOutput]
[docs] class AggregatedSummarizeEvaluation(BaseModel): """The aggregated evaluation of a summarization implementation against a dataset. Attributes: aggregate_bleu: average over BLEU-scores aggregate_rouge: average over ROUGE-scores """ aggregate_bleu: float aggregate_rouge: float
[docs] class SingleChunkSummarizeAggregationLogic( AggregationLogic[SummarizeEvaluation, AggregatedSummarizeEvaluation] ):
[docs] def aggregate( self, evaluations: Iterable[SummarizeEvaluation] ) -> AggregatedSummarizeEvaluation: return aggregate_summarize_evaluation(evaluations)
[docs] class SingleChunkSummarizeEvaluationLogic( SingleOutputEvaluationLogic[ SingleChunkSummarizeInput, SummarizeOutput, str, SummarizeEvaluation, ] ): def __init__(self) -> None: super().__init__() self.bleu_grader = BleuGrader() self.rouge_grader = RougeGrader() def do_evaluate_single_output( self, example: Example[SingleChunkSummarizeInput, str], output: SummarizeOutput, ) -> SummarizeEvaluation: bleu_score = self.bleu_grader.calculate_bleu( output.summary, example.expected_output ) rouge_score = self.rouge_grader.calculate_rouge( output.summary, example.expected_output ) return SummarizeEvaluation( bleu=bleu_score, rouge=rouge_score.recall, output=output )
[docs] class LongContextSummarizeAggregationLogic( AggregationLogic[SummarizeEvaluation, AggregatedSummarizeEvaluation] ):
[docs] def aggregate( self, evaluations: Iterable[SummarizeEvaluation] ) -> AggregatedSummarizeEvaluation: return aggregate_summarize_evaluation(evaluations)
[docs] class LongContextSummarizeEvaluationLogic( SingleOutputEvaluationLogic[ LongContextSummarizeInput, LongContextSummarizeOutput, str, SummarizeEvaluation, ] ): def __init__(self) -> None: super().__init__() self.bleu_grader = BleuGrader() self.rouge_grader = RougeGrader() def do_evaluate_single_output( self, example: Example[LongContextSummarizeInput, str], output: LongContextSummarizeOutput, ) -> SummarizeEvaluation: joint_summary = " ".join( partial_summary.summary for partial_summary in output.partial_summaries ) bleu_score = self.bleu_grader.calculate_bleu( joint_summary, example.expected_output ) rouge_score = self.rouge_grader.calculate_rouge( joint_summary, example.expected_output ) return SummarizeEvaluation( bleu=bleu_score, rouge=rouge_score.recall, output=output )
def aggregate_summarize_evaluation( evaluations: Iterable[SummarizeEvaluation], ) -> AggregatedSummarizeEvaluation: acc_bleu = MeanAccumulator() acc_rouge = MeanAccumulator() for evaluation in evaluations: acc_bleu.add(evaluation.bleu) acc_rouge.add(evaluation.rouge) return AggregatedSummarizeEvaluation( aggregate_bleu=acc_bleu.extract(), aggregate_rouge=acc_rouge.extract(), )