Spaces:
Sleeping
Sleeping
File size: 1,444 Bytes
9c888b7 cfae7a7 9c888b7 cf983b8 9c888b7 cfae7a7 9c888b7 f78cba2 cfae7a7 f78cba2 41a051f 9c888b7 f78cba2 cfae7a7 9c888b7 f78cba2 9c888b7 cfae7a7 f78cba2 dccaaac f78cba2 41a051f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | """
grader.py (Task 2 – Property Discovery)
-----------------------------------------
Deterministic scorer for natural-language property submissions.
One submission attempt per episode.
Grade range: 0.0 – 1.0 (matchscore output, already normalised).
"""
from typing import Tuple
from utils import SemanticMatcher
class Task2Grader:
"""
Grades a Task 2 property submission.
Parameters
----------
function_name : name of the target function
property : the 'property' field from the target function's data
"""
def __init__(self, function_name: str, property: str, n: int) -> None:
self.function_name = function_name
self.property = property
self.n = n
self._decay = 0.75
def _clamp(self, reward: float) -> float:
return max(0.001, min(0.999, reward))
def grade(self, submitted: str, steps: int, cummulative_cost: int) -> Tuple[float, str]:
"""Deterministic grade strictly in (0, 1)."""
if not submitted or not submitted.strip():
return 0.001, "no_match"
matcher = SemanticMatcher()
match_score = matcher.matchscore(self.property, submitted)
free_budget = (cummulative_cost / steps) * (self.n + 2)
final_score = (match_score * 0.5) + (self._decay ** max(0, cummulative_cost - free_budget))
return self._clamp(final_score), matcher.confidence() |