WebashalarForML commited on
Commit
27bebfe
·
verified ·
1 Parent(s): 8574192

Update utility/utils.py

Browse files
Files changed (1) hide show
  1. utility/utils.py +13 -6
utility/utils.py CHANGED
@@ -169,15 +169,16 @@ def normalize_llm_schema(data):
169
  Accepts a dict that may have nulls, strings, or alternate key spellings.
170
  """
171
  data = data or {}
 
172
 
173
  # Common alternate keys seen in model outputs
174
  key_aliases = {
175
- "Name": ["Name", "name", "FullName", "full_name", "person_name"],
176
  "Designation": ["Designation", "designation", "Title", "title", "Role", "role"],
177
  "Company": ["Company", "company", "Organization", "organization", "Org", "org"],
178
- "Contact": ["Contact", "contact", "Phone", "phone", "Mobile", "mobile", "PhoneNumber", "phone_number"],
179
  "Address": ["Address", "address", "Location", "location"],
180
- "Email": ["Email", "email", "E-mail", "e_mail"],
181
  "Link": ["Link", "link", "URL", "url", "Website", "website", "Portfolio", "portfolio"]
182
  }
183
 
@@ -188,9 +189,11 @@ def normalize_llm_schema(data):
188
  for alias in aliases:
189
  if alias in data and data[alias] is not None:
190
  chosen = _coerce_list(data[alias])
191
- break
 
192
  normalized[canonical_key] = chosen
193
 
 
194
  return normalized
195
 
196
 
@@ -542,6 +545,9 @@ def process_resume_data(LLMdata, cont_data, extracted_text):
542
  Final merge step.
543
  Keeps the output structure exactly as you currently use in result.html.
544
  """
 
 
 
545
  LLMdata = normalize_llm_schema(LLMdata)
546
  cont_data = cont_data or {}
547
 
@@ -582,13 +588,14 @@ def process_resume_data(LLMdata, cont_data, extracted_text):
582
  "Link": LLMdata.get("Link", []),
583
  "Company": LLMdata.get("Company", []),
584
  "extracted_text": extracted_text,
585
- "status_message": f"Source: {LLMdata.get('meta', 'Primary+Backup')}"
586
  }
587
 
 
588
  for key in ["name", "contact_number", "Designation", "email", "Location", "Link", "Company"]:
589
  processed_data[key] = [
590
  v for v in processed_data[key]
591
- if str(v).strip().lower() not in {"not found", "none", "null", ""}
592
  ]
593
 
594
  return processed_data
 
169
  Accepts a dict that may have nulls, strings, or alternate key spellings.
170
  """
171
  data = data or {}
172
+ meta = data.get('meta', 'Primary Extraction')
173
 
174
  # Common alternate keys seen in model outputs
175
  key_aliases = {
176
+ "Name": ["Name", "name", "FullName", "full_name", "person_name", "Person"],
177
  "Designation": ["Designation", "designation", "Title", "title", "Role", "role"],
178
  "Company": ["Company", "company", "Organization", "organization", "Org", "org"],
179
+ "Contact": ["Contact", "contact", "Phone", "phone", "Mobile", "mobile", "PhoneNumber", "phone_number", "Number"],
180
  "Address": ["Address", "address", "Location", "location"],
181
+ "Email": ["Email", "email", "E-mail", "e_mail", "Mail"],
182
  "Link": ["Link", "link", "URL", "url", "Website", "website", "Portfolio", "portfolio"]
183
  }
184
 
 
189
  for alias in aliases:
190
  if alias in data and data[alias] is not None:
191
  chosen = _coerce_list(data[alias])
192
+ if chosen:
193
+ break
194
  normalized[canonical_key] = chosen
195
 
196
+ normalized['meta'] = meta
197
  return normalized
198
 
199
 
 
545
  Final merge step.
546
  Keeps the output structure exactly as you currently use in result.html.
547
  """
548
+ # Preserving meta if it exists before normalization
549
+ meta_info = LLMdata.get('meta') or "Primary+Backup Extraction"
550
+
551
  LLMdata = normalize_llm_schema(LLMdata)
552
  cont_data = cont_data or {}
553
 
 
588
  "Link": LLMdata.get("Link", []),
589
  "Company": LLMdata.get("Company", []),
590
  "extracted_text": extracted_text,
591
+ "status_message": f"Source: {meta_info}"
592
  }
593
 
594
+ # Final cleanup of empty or 'not found' values
595
  for key in ["name", "contact_number", "Designation", "email", "Location", "Link", "Company"]:
596
  processed_data[key] = [
597
  v for v in processed_data[key]
598
+ if str(v).strip().lower() not in {"not found", "none", "null", "", "[]", "unknown"}
599
  ]
600
 
601
  return processed_data