Spaces:
Sleeping
Sleeping
ajaxwin
refactor: Update task configurations and grading logic for improved scoring and consistency
dccaaac | """ | |
| grader.py (Task 1 – Targeted Vulnerability Detection) | |
| ------------------------------------------------------- | |
| Deterministic grader. Grade range: (0, 1) | |
| """ | |
| from __future__ import annotations | |
| from typing import Dict | |
| from utils import SemanticMatcher | |
| class Task1Grader: | |
| def __init__(self, target_function: str, vulnerability_issue: str, n: int) -> None: | |
| self.target_function = target_function.lower() | |
| self.vulnerability_issue = vulnerability_issue | |
| # Log of No. of functions (n) is a heurisitic used to decided the size of contract code | |
| self.n = n | |
| self._decay = 0.75 | |
| def _clamp(self, reward: float) -> float: | |
| return max(0.001, min(0.999, reward)) | |
| def grade(self, submitted_function: str, submitted_vuln_type: str, steps: int, cummulative_cost: int) -> float: | |
| """Returns grade strictly in (0, 1).""" | |
| func_match = submitted_function.strip().lower() == self.target_function | |
| issue_match = SemanticMatcher().matchscore(self.vulnerability_issue, submitted_vuln_type) | |
| # Score formula | |
| free_budget = (cummulative_cost / steps) * (self.n + 2) | |
| reward = (func_match * 0.5) + (issue_match * 0.5) + (self._decay ** max(0, cummulative_cost - free_budget)) | |
| return self._clamp(reward) | |
| def get_canonical_answer(self) -> Dict[str, str]: | |
| return {"function": self.target_function, "vulnerability": self.vulnerability_issue} |