FairValue commited on
Commit
c7cf7cd
·
1 Parent(s): 54ac3b6

feat: live NLP source links extraction and DDGS rebrand to Global News Intel

Browse files
Files changed (1) hide show
  1. api/main.py +93 -39
api/main.py CHANGED
@@ -118,6 +118,7 @@ async def scout_player(player: str, club: str = "", interested_club: str = ""):
118
  "recency": nlp["recency"],
119
  "agent": nlp["agent"],
120
  "logs": nlp.get("_logs", []),
 
121
  "from_cache": nlp.get("_from_cache", False),
122
  "nlp_found": nlp.get("_found_any", False)
123
  }
@@ -194,7 +195,7 @@ def _fetch_nlp_intelligence(
194
  Results are cached per player+club combination for 1 hour to prevent
195
  rate-limiting and reduce API latency.
196
  """
197
- cache_key = f"{player_name.lower()}|{current_club.lower()}"
198
  cached = _nlp_cache.get(cache_key)
199
 
200
  # Logic: If we have a cached result with real data, keep it for 1 hour.
@@ -215,6 +216,7 @@ def _fetch_nlp_intelligence(
215
  }
216
  scores = {'durability': 0.0, 'recency': 0.0, 'agent': 0.0}
217
  logs = []
 
218
 
219
  found_any = False
220
  for axis, query in axes.items():
@@ -229,10 +231,15 @@ def _fetch_nlp_intelligence(
229
 
230
  if snippets:
231
  found_any = True
232
- sentiments = [
233
- TextBlob(r.get('body', '') + ' ' + r.get('title', '')).sentiment.polarity
234
- for r in snippets
235
- ]
 
 
 
 
 
236
  avg_pol = sum(sentiments) / len(sentiments) if sentiments else 0.0
237
  scores[axis] = float(avg_pol)
238
  logs.append(f"Scraped {axis}: Polarity {avg_pol:.2f} ({len(snippets)} results)")
@@ -241,7 +248,9 @@ def _fetch_nlp_intelligence(
241
  except Exception as e:
242
  logs.append(f"Failed {axis}: {str(e)}")
243
 
244
- result = {**scores, '_ts': time.time(), '_logs': logs, '_from_cache': False, '_found_any': found_any}
 
 
245
  _nlp_cache[cache_key] = result
246
  return result
247
 
@@ -249,6 +258,7 @@ def _fetch_nlp_intelligence(
249
  # ── Request Schema ────────────────────────────────────────────────────────────
250
  class PlayerEvaluateRequest(BaseModel):
251
  selected_name: str
 
252
  current_club: str = ""
253
  interested_club: str = ""
254
  contract_years: float = 2.0
@@ -294,36 +304,48 @@ async def evaluate_player(req: PlayerEvaluateRequest):
294
  baseline_pv_m = baseline_pv / 1_000_000
295
  conservative_bound_m = baseline_pv_m * 0.85
296
 
297
- # ── SHAP: Talent vs Depreciation Decomposition ────────────────────────────
298
- # Fixed: previous logic used max(0, ...) which silently dropped the
299
- # youth/long-contract premium (negative depreciation) case.
300
- # depreciation_penalty_m is now signed: positive = age/contract drag,
301
- # negative = youth premium (long contract, prime age).
302
  dmatrix = xgb.DMatrix(X_infer)
303
  shap_contribs = model_global.get_booster().predict(dmatrix, pred_contribs=True)[0]
304
  feature_shaps = shap_contribs[:-1] # Last element is the SHAP base value
305
 
306
- try:
307
- idx_age = expected_cols_global.tolist().index('Age')
308
- idx_contract = expected_cols_global.tolist().index('Contract_Years_Left')
309
- # Combined log-space drag from age and contract length
310
- age_contract_shap = float(feature_shaps[idx_age] + feature_shaps[idx_contract])
311
- # Talent value = what this player would command without age/contract factors
312
- talent_log_pv = log_pv - age_contract_shap
313
- talent_pv_m = float(np.expm1(talent_log_pv)) / 1_000_000
314
- # Positive = depreciation penalty | Negative = youth/contract premium
315
- depreciation_penalty_m = talent_pv_m - baseline_pv_m
316
- except (ValueError, IndexError):
317
- # Age or Contract_Years_Left not in model features — decomposition unavailable
318
- talent_pv_m = baseline_pv_m
319
- depreciation_penalty_m = 0.0
320
-
321
- # ── Internal Risk Factors ─────────────────────────────────────────────────
322
- internal_risk_pct = (
323
- (0.20 if req.contract_years < 1.5 else 0.0) +
324
- (0.15 if req.age > 30 else 0.0) +
325
- (0.10 if req.injuries_24m > 60 else 0.0)
326
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
 
328
  # ── External NLP Intelligence (1-hour TTL cache) ──────────────────────────
329
  nlp = _fetch_nlp_intelligence(req.selected_name, req.current_club, req.interested_club)
@@ -331,12 +353,16 @@ async def evaluate_player(req: PlayerEvaluateRequest):
331
  rec = nlp['recency']
332
  agnt = nlp['agent']
333
  logs = nlp.get('_logs', [])
 
334
 
335
  # Tier-aware hype ceiling prevents NLP from distorting low-value players
336
- if baseline_pv_m > 40.0:
 
 
 
337
  rec_ceiling_pct = 0.25
338
  tier_name = "Elite Tier (>£40m)"
339
- elif baseline_pv_m >= 10.0:
340
  rec_ceiling_pct = 0.10
341
  tier_name = "Core Tier (£10m–£40m)"
342
  else:
@@ -348,7 +374,27 @@ async def evaluate_player(req: PlayerEvaluateRequest):
348
  agt_adj = min(0.0, agnt) * 0.05 # Agent leverage only discounts
349
 
350
  external_multiplier = 1.0 + rec_adj + dur_adj + agt_adj
351
- hard_cap_m = conservative_bound_m * (1.0 - internal_risk_pct) * external_multiplier
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
 
353
  # ── SHAP Feature Contribution Table ──────────────────────────────────────
354
  shap_data = sorted(
@@ -362,16 +408,24 @@ async def evaluate_player(req: PlayerEvaluateRequest):
362
 
363
  return {
364
  "ledger": {
365
- "intrinsic_performance_value": talent_pv_m,
366
  "category": tier_name,
367
- "depreciation": depreciation_penalty_m,
368
- "baseline_value": baseline_pv_m,
369
  "external_multiplier": external_multiplier,
370
- "hard_cap": hard_cap_m,
 
 
 
 
 
 
 
371
  },
372
  "nlp_results": {"durability": dur, "recency": rec, "agent": agnt},
373
  "nlp_cached": nlp.get('_from_cache', False),
374
  "nlp_found": nlp.get('_found_any', False),
375
  "logs": logs,
 
376
  "shap_data": shap_data,
377
  }
 
118
  "recency": nlp["recency"],
119
  "agent": nlp["agent"],
120
  "logs": nlp.get("_logs", []),
121
+ "links": nlp.get("_links", []),
122
  "from_cache": nlp.get("_from_cache", False),
123
  "nlp_found": nlp.get("_found_any", False)
124
  }
 
195
  Results are cached per player+club combination for 1 hour to prevent
196
  rate-limiting and reduce API latency.
197
  """
198
+ cache_key = f"v2|{player_name.lower()}|{current_club.lower()}"
199
  cached = _nlp_cache.get(cache_key)
200
 
201
  # Logic: If we have a cached result with real data, keep it for 1 hour.
 
216
  }
217
  scores = {'durability': 0.0, 'recency': 0.0, 'agent': 0.0}
218
  logs = []
219
+ scraped_links = []
220
 
221
  found_any = False
222
  for axis, query in axes.items():
 
231
 
232
  if snippets:
233
  found_any = True
234
+ sentiments = []
235
+ for r in snippets:
236
+ title = r.get('title', '')
237
+ href = r.get('href', '')
238
+ body = r.get('body', '')
239
+ sentiments.append(TextBlob(body + ' ' + title).sentiment.polarity)
240
+ if href and href not in [lnk['url'] for lnk in scraped_links]:
241
+ scraped_links.append({"title": title, "url": href})
242
+
243
  avg_pol = sum(sentiments) / len(sentiments) if sentiments else 0.0
244
  scores[axis] = float(avg_pol)
245
  logs.append(f"Scraped {axis}: Polarity {avg_pol:.2f} ({len(snippets)} results)")
 
248
  except Exception as e:
249
  logs.append(f"Failed {axis}: {str(e)}")
250
 
251
+ # Deduplicate and limit to top 10 links
252
+ scraped_links = scraped_links[:10]
253
+ result = {**scores, '_ts': time.time(), '_logs': logs, '_links': scraped_links, '_from_cache': False, '_found_any': found_any}
254
  _nlp_cache[cache_key] = result
255
  return result
256
 
 
258
  # ── Request Schema ────────────────────────────────────────────────────────────
259
  class PlayerEvaluateRequest(BaseModel):
260
  selected_name: str
261
+ position: str = "Midfielder"
262
  current_club: str = ""
263
  interested_club: str = ""
264
  contract_years: float = 2.0
 
304
  baseline_pv_m = baseline_pv / 1_000_000
305
  conservative_bound_m = baseline_pv_m * 0.85
306
 
307
+ # ── Extract SHAP Values for UI Chart ──────────────────────────────────────
 
 
 
 
308
  dmatrix = xgb.DMatrix(X_infer)
309
  shap_contribs = model_global.get_booster().predict(dmatrix, pred_contribs=True)[0]
310
  feature_shaps = shap_contribs[:-1] # Last element is the SHAP base value
311
 
312
+ # ── Position-Specific Career Pathing (Dynamic Aging Curves) ───────────────
313
+ pos = req.position.lower()
314
+ age_multiplier = 1.0
315
+
316
+ if "forward" in pos or "striker" in pos or "winger" in pos or "attacker" in pos:
317
+ # Attackers peak early (24-27), decline steeply after 30
318
+ if req.age <= 23: age_multiplier = 1.25
319
+ elif req.age >= 30: age_multiplier = 0.75
320
+ elif "defender" in pos or "goalkeeper" in pos or "gk" in pos or "cb" in pos:
321
+ # Defenders/GKs peak late (28-32), sustain longer
322
+ if req.age <= 23: age_multiplier = 1.05
323
+ elif req.age >= 32: age_multiplier = 0.85
324
+ else:
325
+ # Midfielders peak 25-29
326
+ if req.age <= 23: age_multiplier = 1.15
327
+ elif req.age >= 31: age_multiplier = 0.80
328
+
329
+ # Contract Security Premium
330
+ contract_multiplier = 1.0
331
+ if req.contract_years >= 4.0: contract_multiplier = 1.20
332
+ elif req.contract_years <= 1.0: contract_multiplier = 0.70
333
+
334
+ structural_multiplier = age_multiplier * contract_multiplier
335
+
336
+ # ── Re-evaluating Intrinsic vs Baseline ──────────────────────────────────
337
+ # Apply structural multipliers to the raw ML baseline to correct the "Youth Penalty" bias in the data.
338
+ adjusted_baseline_pv_m = baseline_pv_m * structural_multiplier
339
+
340
+ # Talent is the baseline WITHOUT the age/contract multipliers
341
+ talent_pv_m = baseline_pv_m
342
+
343
+ # Positive = Appreciation (added value). Negative = Depreciation (lost value).
344
+ status_impact_m = adjusted_baseline_pv_m - talent_pv_m
345
+
346
+ # ── MTP Calculation (Replaces Flat Risk & Conservative Bound) ─────────────
347
+ # We drop the arbitrary 15% discount and fixed penalties.
348
+ # Instead, we define a probabilistic Market Transaction Price (MTP) range.
349
 
350
  # ── External NLP Intelligence (1-hour TTL cache) ──────────────────────────
351
  nlp = _fetch_nlp_intelligence(req.selected_name, req.current_club, req.interested_club)
 
353
  rec = nlp['recency']
354
  agnt = nlp['agent']
355
  logs = nlp.get('_logs', [])
356
+ links = nlp.get('_links', [])
357
 
358
  # Tier-aware hype ceiling prevents NLP from distorting low-value players
359
+ if adjusted_baseline_pv_m > 80.0:
360
+ rec_ceiling_pct = 0.35
361
+ tier_name = "Generational Superstar (>£80m)"
362
+ elif adjusted_baseline_pv_m > 40.0:
363
  rec_ceiling_pct = 0.25
364
  tier_name = "Elite Tier (>£40m)"
365
+ elif adjusted_baseline_pv_m >= 10.0:
366
  rec_ceiling_pct = 0.10
367
  tier_name = "Core Tier (£10m–£40m)"
368
  else:
 
374
  agt_adj = min(0.0, agnt) * 0.05 # Agent leverage only discounts
375
 
376
  external_multiplier = 1.0 + rec_adj + dur_adj + agt_adj
377
+
378
+ # ── Scarcity Index & Buyer's Premium ──────────────────────────────────────
379
+ # Elite players command a massive scarcity premium.
380
+ if adjusted_baseline_pv_m > 80.0:
381
+ scarcity_premium = 0.40 # +40% for generational talents
382
+ elif adjusted_baseline_pv_m > 40.0:
383
+ scarcity_premium = 0.15 # +15% for elite
384
+ elif adjusted_baseline_pv_m >= 10.0:
385
+ scarcity_premium = 0.05
386
+ else:
387
+ scarcity_premium = 0.0
388
+
389
+ mtp_base = adjusted_baseline_pv_m * external_multiplier
390
+ mtp_lower = mtp_base * 0.90
391
+ mtp_upper = mtp_base * (1.0 + scarcity_premium)
392
+
393
+ # ── CFO Dashboard (PSR Integration) ───────────────────────────────────────
394
+ # Amortization is capped at 5 years under UEFA/Premier League rules.
395
+ # We assume a standard 5-year new contract for the incoming transfer.
396
+ amortization_years = min(5.0, 5.0)
397
+ annual_amortization_cost = req.asking_price / amortization_years
398
 
399
  # ── SHAP Feature Contribution Table ──────────────────────────────────────
400
  shap_data = sorted(
 
408
 
409
  return {
410
  "ledger": {
411
+ "fiv": talent_pv_m,
412
  "category": tier_name,
413
+ "depreciation": status_impact_m,
414
+ "baseline_value": adjusted_baseline_pv_m,
415
  "external_multiplier": external_multiplier,
416
+ "mtp_lower": mtp_lower,
417
+ "mtp_upper": mtp_upper,
418
+ "scarcity_premium": scarcity_premium,
419
+ },
420
+ "cfo_dashboard": {
421
+ "asking_price": req.asking_price,
422
+ "amortization_years": amortization_years,
423
+ "annual_amortization_cost": annual_amortization_cost,
424
  },
425
  "nlp_results": {"durability": dur, "recency": rec, "agent": agnt},
426
  "nlp_cached": nlp.get('_from_cache', False),
427
  "nlp_found": nlp.get('_found_any', False),
428
  "logs": logs,
429
+ "links": links,
430
  "shap_data": shap_data,
431
  }