File size: 1,444 Bytes
9c888b7
 
 
 
 
cfae7a7
9c888b7
 
cf983b8
 
9c888b7
 
 
 
 
 
 
cfae7a7
 
9c888b7
 
f78cba2
cfae7a7
 
f78cba2
 
41a051f
 
 
9c888b7
f78cba2
cfae7a7
9c888b7
f78cba2
9c888b7
cfae7a7
f78cba2
 
dccaaac
f78cba2
41a051f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""
grader.py  (Task 2 – Property Discovery)
-----------------------------------------
Deterministic scorer for natural-language property submissions.
One submission attempt per episode.
Grade range: 0.0 – 1.0  (matchscore output, already normalised).
"""

from typing import Tuple
from utils import SemanticMatcher

class Task2Grader:
    """
    Grades a Task 2 property submission.

    Parameters
    ----------
    function_name : name of the target function
    property      : the 'property' field from the target function's data
    """

    def __init__(self, function_name: str, property: str, n: int) -> None:
        self.function_name = function_name
        self.property      = property
        self.n             = n
        self._decay        = 0.75
    
    def _clamp(self, reward: float) -> float:
        return max(0.001, min(0.999, reward))

    def grade(self, submitted: str, steps: int, cummulative_cost: int) -> Tuple[float, str]:
        """Deterministic grade strictly in (0, 1)."""
        if not submitted or not submitted.strip():
            return 0.001, "no_match"

        matcher = SemanticMatcher()
        match_score   = matcher.matchscore(self.property, submitted)
        free_budget = (cummulative_cost / steps) * (self.n + 2)
        final_score = (match_score * 0.5) + (self._decay ** max(0, cummulative_cost - free_budget))
        
        return self._clamp(final_score), matcher.confidence()