Skip to content

Scores

Class representing a Scorer object.

Attributes:

Name Type Description
func Callable[[str, str], float]

The scoring function to be wrapped.

Methods:

Name Description
measure

str, target: str) -> float: Calculates the score between the completion and target strings using the wrapped scoring function.

name

Returns the name of the wrapped scoring function with underscores replaced by spaces and title-cased.

Source code in parsbench/scores/base.py
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
class Scorer:
    """
    Class representing a Scorer object.

    Attributes:
        func (Callable[[str, str], float]): The scoring function to be wrapped.

    Methods:
        measure(completion: str, target: str) -> float:
            Calculates the score between the completion and target strings using the wrapped scoring function.

        name() -> str:
            Returns the name of the wrapped scoring function with underscores replaced by spaces and title-cased.
    """

    def __init__(self, func: Callable[[str, str], float]):
        self.func = func

    def measure(self, completion: str, target: str) -> float:
        return self.func(completion, target)

    @property
    def name(self) -> str:
        return self.func.__name__.replace("_", " ").title()

Wraps a scorer function inside the Scorer class.

Source code in parsbench/scores/base.py
30
31
32
def wrap_scorer(func):
    """Wraps a scorer function inside the Scorer class."""
    return Scorer(func)
Source code in parsbench/scores/common.py
4
5
6
@wrap_scorer
def exact_match(completion: str, target: str) -> int:
    return int(completion == target)
Source code in parsbench/scores/machine_translation.py
 7
 8
 9
10
11
12
13
14
15
16
@wrap_scorer
def english_sentence_bleu(completion: str, target: str) -> float:
    nltk.download("punkt", quiet=True)

    reference_translation = [nltk.word_tokenize(target)]
    model_translation = nltk.word_tokenize(completion)
    bleu_score = nltk.translate.bleu(
        reference_translation, model_translation, weights=(1,)
    )
    return bleu_score
Source code in parsbench/scores/machine_translation.py
19
20
21
22
23
24
25
26
@wrap_scorer
def persian_sentence_bleu(completion: str, target: str) -> float:
    reference_translation = [hazm.word_tokenize(target)]
    model_translation = hazm.word_tokenize(completion)
    bleu_score = nltk.translate.bleu(
        reference_translation, model_translation, weights=(1,)
    )
    return bleu_score
Source code in parsbench/scores/summarization.py
 8
 9
10
11
12
13
14
15
16
@wrap_scorer
def english_rouge(completion: str, target: str) -> float:
    nltk.download("punkt", quiet=True)

    tokenizer = nltk.tokenize.NLTKWordTokenizer()

    scorer = rouge_scorer.RougeScorer(["rouge1"], tokenizer=tokenizer)
    scores = scorer.score(target, completion)
    return scores["rouge1"].fmeasure
Source code in parsbench/scores/summarization.py
19
20
21
22
23
24
25
@wrap_scorer
def persian_rouge(completion: str, target: str) -> float:
    tokenizer = hazm.WordTokenizer()

    scorer = rouge_scorer.RougeScorer(["rouge1"], tokenizer=tokenizer)
    scores = scorer.score(target, completion)
    return scores["rouge1"].fmeasure