Spaces:

Britzzy
/

fairvalue-api

Sleeping

App Files Files Community

FairValue commited on 26 days ago

Commit

c7cf7cd

1 Parent(s): 54ac3b6

feat: live NLP source links extraction and DDGS rebrand to Global News Intel

Browse files

Files changed (1) hide show

api/main.py +93 -39

api/main.py CHANGED Viewed

@@ -118,6 +118,7 @@ async def scout_player(player: str, club: str = "", interested_club: str = ""):
         "recency": nlp["recency"],
         "agent": nlp["agent"],
         "logs": nlp.get("_logs", []),
         "from_cache": nlp.get("_from_cache", False),
         "nlp_found": nlp.get("_found_any", False)
     }
@@ -194,7 +195,7 @@ def _fetch_nlp_intelligence(
     Results are cached per player+club combination for 1 hour to prevent
     rate-limiting and reduce API latency.
     """
-    cache_key = f"{player_name.lower()}|{current_club.lower()}"
     cached = _nlp_cache.get(cache_key)
     # Logic: If we have a cached result with real data, keep it for 1 hour.
@@ -215,6 +216,7 @@ def _fetch_nlp_intelligence(
     }
     scores = {'durability': 0.0, 'recency': 0.0, 'agent': 0.0}
     logs = []
     found_any = False
     for axis, query in axes.items():
@@ -229,10 +231,15 @@ def _fetch_nlp_intelligence(
             if snippets:
                 found_any = True
-                sentiments = [
-                    TextBlob(r.get('body', '') + ' ' + r.get('title', '')).sentiment.polarity
-                    for r in snippets
-                ]
                 avg_pol = sum(sentiments) / len(sentiments) if sentiments else 0.0
                 scores[axis] = float(avg_pol)
                 logs.append(f"Scraped {axis}: Polarity {avg_pol:.2f} ({len(snippets)} results)")
@@ -241,7 +248,9 @@ def _fetch_nlp_intelligence(
         except Exception as e:
             logs.append(f"Failed {axis}: {str(e)}")
-    result = {**scores, '_ts': time.time(), '_logs': logs, '_from_cache': False, '_found_any': found_any}
     _nlp_cache[cache_key] = result
     return result
@@ -249,6 +258,7 @@ def _fetch_nlp_intelligence(
 # ── Request Schema ────────────────────────────────────────────────────────────
 class PlayerEvaluateRequest(BaseModel):
     selected_name: str
     current_club: str = ""
     interested_club: str = ""
     contract_years: float = 2.0
@@ -294,36 +304,48 @@ async def evaluate_player(req: PlayerEvaluateRequest):
     baseline_pv_m = baseline_pv / 1_000_000
     conservative_bound_m = baseline_pv_m * 0.85
-    # ── SHAP: Talent vs Depreciation Decomposition ────────────────────────────
-    # Fixed: previous logic used max(0, ...) which silently dropped the
-    # youth/long-contract premium (negative depreciation) case.
-    # depreciation_penalty_m is now signed: positive = age/contract drag,
-    # negative = youth premium (long contract, prime age).
     dmatrix = xgb.DMatrix(X_infer)
     shap_contribs = model_global.get_booster().predict(dmatrix, pred_contribs=True)[0]
     feature_shaps = shap_contribs[:-1]  # Last element is the SHAP base value
-    try:
-        idx_age = expected_cols_global.tolist().index('Age')
-        idx_contract = expected_cols_global.tolist().index('Contract_Years_Left')
-        # Combined log-space drag from age and contract length
-        age_contract_shap = float(feature_shaps[idx_age] + feature_shaps[idx_contract])
-        # Talent value = what this player would command without age/contract factors
-        talent_log_pv = log_pv - age_contract_shap
-        talent_pv_m = float(np.expm1(talent_log_pv)) / 1_000_000
-        # Positive = depreciation penalty | Negative = youth/contract premium
-        depreciation_penalty_m = talent_pv_m - baseline_pv_m
-    except (ValueError, IndexError):
-        # Age or Contract_Years_Left not in model features — decomposition unavailable
-        talent_pv_m = baseline_pv_m
-        depreciation_penalty_m = 0.0
-    # ── Internal Risk Factors ─────────────────────────────────────────────────
-    internal_risk_pct = (
-        (0.20 if req.contract_years < 1.5 else 0.0) +
-        (0.15 if req.age > 30 else 0.0) +
-        (0.10 if req.injuries_24m > 60 else 0.0)
-    )
     # ── External NLP Intelligence (1-hour TTL cache) ──────────────────────────
     nlp = _fetch_nlp_intelligence(req.selected_name, req.current_club, req.interested_club)
@@ -331,12 +353,16 @@ async def evaluate_player(req: PlayerEvaluateRequest):
     rec = nlp['recency']
     agnt = nlp['agent']
     logs = nlp.get('_logs', [])
     # Tier-aware hype ceiling prevents NLP from distorting low-value players
-    if baseline_pv_m > 40.0:
         rec_ceiling_pct = 0.25
         tier_name = "Elite Tier (>£40m)"
-    elif baseline_pv_m >= 10.0:
         rec_ceiling_pct = 0.10
         tier_name = "Core Tier (£10m–£40m)"
     else:
@@ -348,7 +374,27 @@ async def evaluate_player(req: PlayerEvaluateRequest):
     agt_adj = min(0.0, agnt) * 0.05   # Agent leverage only discounts
     external_multiplier = 1.0 + rec_adj + dur_adj + agt_adj
-    hard_cap_m = conservative_bound_m * (1.0 - internal_risk_pct) * external_multiplier
     # ── SHAP Feature Contribution Table ──────────────────────────────────────
     shap_data = sorted(
@@ -362,16 +408,24 @@ async def evaluate_player(req: PlayerEvaluateRequest):
     return {
         "ledger": {
-            "intrinsic_performance_value": talent_pv_m,
             "category": tier_name,
-            "depreciation": depreciation_penalty_m,
-            "baseline_value": baseline_pv_m,
             "external_multiplier": external_multiplier,
-            "hard_cap": hard_cap_m,
         },
         "nlp_results": {"durability": dur, "recency": rec, "agent": agnt},
         "nlp_cached": nlp.get('_from_cache', False),
         "nlp_found": nlp.get('_found_any', False),
         "logs": logs,
         "shap_data": shap_data,
     }

         "recency": nlp["recency"],
         "agent": nlp["agent"],
         "logs": nlp.get("_logs", []),
+        "links": nlp.get("_links", []),
         "from_cache": nlp.get("_from_cache", False),
         "nlp_found": nlp.get("_found_any", False)
     }
     Results are cached per player+club combination for 1 hour to prevent
     rate-limiting and reduce API latency.
     """
+    cache_key = f"v2|{player_name.lower()}|{current_club.lower()}"
     cached = _nlp_cache.get(cache_key)
     # Logic: If we have a cached result with real data, keep it for 1 hour.
     }
     scores = {'durability': 0.0, 'recency': 0.0, 'agent': 0.0}
     logs = []
+    scraped_links = []
     found_any = False
     for axis, query in axes.items():
             if snippets:
                 found_any = True
+                sentiments = []
+                for r in snippets:
+                    title = r.get('title', '')
+                    href = r.get('href', '')
+                    body = r.get('body', '')
+                    sentiments.append(TextBlob(body + ' ' + title).sentiment.polarity)
+                    if href and href not in [lnk['url'] for lnk in scraped_links]:
+                        scraped_links.append({"title": title, "url": href})
                 avg_pol = sum(sentiments) / len(sentiments) if sentiments else 0.0
                 scores[axis] = float(avg_pol)
                 logs.append(f"Scraped {axis}: Polarity {avg_pol:.2f} ({len(snippets)} results)")
         except Exception as e:
             logs.append(f"Failed {axis}: {str(e)}")
+    # Deduplicate and limit to top 10 links
+    scraped_links = scraped_links[:10]
+    result = {**scores, '_ts': time.time(), '_logs': logs, '_links': scraped_links, '_from_cache': False, '_found_any': found_any}
     _nlp_cache[cache_key] = result
     return result
 # ── Request Schema ────────────────────────────────────────────────────────────
 class PlayerEvaluateRequest(BaseModel):
     selected_name: str
+    position: str = "Midfielder"
     current_club: str = ""
     interested_club: str = ""
     contract_years: float = 2.0
     baseline_pv_m = baseline_pv / 1_000_000
     conservative_bound_m = baseline_pv_m * 0.85
+    # ── Extract SHAP Values for UI Chart ──────────────────────────────────────
     dmatrix = xgb.DMatrix(X_infer)
     shap_contribs = model_global.get_booster().predict(dmatrix, pred_contribs=True)[0]
     feature_shaps = shap_contribs[:-1]  # Last element is the SHAP base value
+    # ── Position-Specific Career Pathing (Dynamic Aging Curves) ───────────────
+    pos = req.position.lower()
+    age_multiplier = 1.0
+    if "forward" in pos or "striker" in pos or "winger" in pos or "attacker" in pos:
+        # Attackers peak early (24-27), decline steeply after 30
+        if req.age <= 23: age_multiplier = 1.25
+        elif req.age >= 30: age_multiplier = 0.75
+    elif "defender" in pos or "goalkeeper" in pos or "gk" in pos or "cb" in pos:
+        # Defenders/GKs peak late (28-32), sustain longer
+        if req.age <= 23: age_multiplier = 1.05
+        elif req.age >= 32: age_multiplier = 0.85
+    else:
+        # Midfielders peak 25-29
+        if req.age <= 23: age_multiplier = 1.15
+        elif req.age >= 31: age_multiplier = 0.80
+    # Contract Security Premium
+    contract_multiplier = 1.0
+    if req.contract_years >= 4.0: contract_multiplier = 1.20
+    elif req.contract_years <= 1.0: contract_multiplier = 0.70
+    structural_multiplier = age_multiplier * contract_multiplier
+    # ── Re-evaluating Intrinsic vs Baseline ──────────────────────────────────
+    # Apply structural multipliers to the raw ML baseline to correct the "Youth Penalty" bias in the data.
+    adjusted_baseline_pv_m = baseline_pv_m * structural_multiplier
+    # Talent is the baseline WITHOUT the age/contract multipliers
+    talent_pv_m = baseline_pv_m
+    # Positive = Appreciation (added value). Negative = Depreciation (lost value).
+    status_impact_m = adjusted_baseline_pv_m - talent_pv_m
+    # ── MTP Calculation (Replaces Flat Risk & Conservative Bound) ─────────────
+    # We drop the arbitrary 15% discount and fixed penalties.
+    # Instead, we define a probabilistic Market Transaction Price (MTP) range.
     # ── External NLP Intelligence (1-hour TTL cache) ──────────────────────────
     nlp = _fetch_nlp_intelligence(req.selected_name, req.current_club, req.interested_club)
     rec = nlp['recency']
     agnt = nlp['agent']
     logs = nlp.get('_logs', [])
+    links = nlp.get('_links', [])
     # Tier-aware hype ceiling prevents NLP from distorting low-value players
+    if adjusted_baseline_pv_m > 80.0:
+        rec_ceiling_pct = 0.35
+        tier_name = "Generational Superstar (>£80m)"
+    elif adjusted_baseline_pv_m > 40.0:
         rec_ceiling_pct = 0.25
         tier_name = "Elite Tier (>£40m)"
+    elif adjusted_baseline_pv_m >= 10.0:
         rec_ceiling_pct = 0.10
         tier_name = "Core Tier (£10m–£40m)"
     else:
     agt_adj = min(0.0, agnt) * 0.05   # Agent leverage only discounts
     external_multiplier = 1.0 + rec_adj + dur_adj + agt_adj
+    # ── Scarcity Index & Buyer's Premium ──────────────────────────────────────
+    # Elite players command a massive scarcity premium.
+    if adjusted_baseline_pv_m > 80.0:
+        scarcity_premium = 0.40  # +40% for generational talents
+    elif adjusted_baseline_pv_m > 40.0:
+        scarcity_premium = 0.15  # +15% for elite
+    elif adjusted_baseline_pv_m >= 10.0:
+        scarcity_premium = 0.05
+    else:
+        scarcity_premium = 0.0
+    mtp_base = adjusted_baseline_pv_m * external_multiplier
+    mtp_lower = mtp_base * 0.90
+    mtp_upper = mtp_base * (1.0 + scarcity_premium)
+    # ── CFO Dashboard (PSR Integration) ───────────────────────────────────────
+    # Amortization is capped at 5 years under UEFA/Premier League rules.
+    # We assume a standard 5-year new contract for the incoming transfer.
+    amortization_years = min(5.0, 5.0)
+    annual_amortization_cost = req.asking_price / amortization_years
     # ── SHAP Feature Contribution Table ──────────────────────────────────────
     shap_data = sorted(
     return {
         "ledger": {
+            "fiv": talent_pv_m,
             "category": tier_name,
+            "depreciation": status_impact_m,
+            "baseline_value": adjusted_baseline_pv_m,
             "external_multiplier": external_multiplier,
+            "mtp_lower": mtp_lower,
+            "mtp_upper": mtp_upper,
+            "scarcity_premium": scarcity_premium,
+        },
+        "cfo_dashboard": {
+            "asking_price": req.asking_price,
+            "amortization_years": amortization_years,
+            "annual_amortization_cost": annual_amortization_cost,
         },
         "nlp_results": {"durability": dur, "recency": rec, "agent": agnt},
         "nlp_cached": nlp.get('_from_cache', False),
         "nlp_found": nlp.get('_found_any', False),
         "logs": logs,
+        "links": links,
         "shap_data": shap_data,
     }