Spaces:
Sleeping
Sleeping
Update utility/utils.py
Browse files- utility/utils.py +13 -6
utility/utils.py
CHANGED
|
@@ -169,15 +169,16 @@ def normalize_llm_schema(data):
|
|
| 169 |
Accepts a dict that may have nulls, strings, or alternate key spellings.
|
| 170 |
"""
|
| 171 |
data = data or {}
|
|
|
|
| 172 |
|
| 173 |
# Common alternate keys seen in model outputs
|
| 174 |
key_aliases = {
|
| 175 |
-
"Name": ["Name", "name", "FullName", "full_name", "person_name"],
|
| 176 |
"Designation": ["Designation", "designation", "Title", "title", "Role", "role"],
|
| 177 |
"Company": ["Company", "company", "Organization", "organization", "Org", "org"],
|
| 178 |
-
"Contact": ["Contact", "contact", "Phone", "phone", "Mobile", "mobile", "PhoneNumber", "phone_number"],
|
| 179 |
"Address": ["Address", "address", "Location", "location"],
|
| 180 |
-
"Email": ["Email", "email", "E-mail", "e_mail"],
|
| 181 |
"Link": ["Link", "link", "URL", "url", "Website", "website", "Portfolio", "portfolio"]
|
| 182 |
}
|
| 183 |
|
|
@@ -188,9 +189,11 @@ def normalize_llm_schema(data):
|
|
| 188 |
for alias in aliases:
|
| 189 |
if alias in data and data[alias] is not None:
|
| 190 |
chosen = _coerce_list(data[alias])
|
| 191 |
-
|
|
|
|
| 192 |
normalized[canonical_key] = chosen
|
| 193 |
|
|
|
|
| 194 |
return normalized
|
| 195 |
|
| 196 |
|
|
@@ -542,6 +545,9 @@ def process_resume_data(LLMdata, cont_data, extracted_text):
|
|
| 542 |
Final merge step.
|
| 543 |
Keeps the output structure exactly as you currently use in result.html.
|
| 544 |
"""
|
|
|
|
|
|
|
|
|
|
| 545 |
LLMdata = normalize_llm_schema(LLMdata)
|
| 546 |
cont_data = cont_data or {}
|
| 547 |
|
|
@@ -582,13 +588,14 @@ def process_resume_data(LLMdata, cont_data, extracted_text):
|
|
| 582 |
"Link": LLMdata.get("Link", []),
|
| 583 |
"Company": LLMdata.get("Company", []),
|
| 584 |
"extracted_text": extracted_text,
|
| 585 |
-
"status_message": f"Source: {
|
| 586 |
}
|
| 587 |
|
|
|
|
| 588 |
for key in ["name", "contact_number", "Designation", "email", "Location", "Link", "Company"]:
|
| 589 |
processed_data[key] = [
|
| 590 |
v for v in processed_data[key]
|
| 591 |
-
if str(v).strip().lower() not in {"not found", "none", "null", ""}
|
| 592 |
]
|
| 593 |
|
| 594 |
return processed_data
|
|
|
|
| 169 |
Accepts a dict that may have nulls, strings, or alternate key spellings.
|
| 170 |
"""
|
| 171 |
data = data or {}
|
| 172 |
+
meta = data.get('meta', 'Primary Extraction')
|
| 173 |
|
| 174 |
# Common alternate keys seen in model outputs
|
| 175 |
key_aliases = {
|
| 176 |
+
"Name": ["Name", "name", "FullName", "full_name", "person_name", "Person"],
|
| 177 |
"Designation": ["Designation", "designation", "Title", "title", "Role", "role"],
|
| 178 |
"Company": ["Company", "company", "Organization", "organization", "Org", "org"],
|
| 179 |
+
"Contact": ["Contact", "contact", "Phone", "phone", "Mobile", "mobile", "PhoneNumber", "phone_number", "Number"],
|
| 180 |
"Address": ["Address", "address", "Location", "location"],
|
| 181 |
+
"Email": ["Email", "email", "E-mail", "e_mail", "Mail"],
|
| 182 |
"Link": ["Link", "link", "URL", "url", "Website", "website", "Portfolio", "portfolio"]
|
| 183 |
}
|
| 184 |
|
|
|
|
| 189 |
for alias in aliases:
|
| 190 |
if alias in data and data[alias] is not None:
|
| 191 |
chosen = _coerce_list(data[alias])
|
| 192 |
+
if chosen:
|
| 193 |
+
break
|
| 194 |
normalized[canonical_key] = chosen
|
| 195 |
|
| 196 |
+
normalized['meta'] = meta
|
| 197 |
return normalized
|
| 198 |
|
| 199 |
|
|
|
|
| 545 |
Final merge step.
|
| 546 |
Keeps the output structure exactly as you currently use in result.html.
|
| 547 |
"""
|
| 548 |
+
# Preserving meta if it exists before normalization
|
| 549 |
+
meta_info = LLMdata.get('meta') or "Primary+Backup Extraction"
|
| 550 |
+
|
| 551 |
LLMdata = normalize_llm_schema(LLMdata)
|
| 552 |
cont_data = cont_data or {}
|
| 553 |
|
|
|
|
| 588 |
"Link": LLMdata.get("Link", []),
|
| 589 |
"Company": LLMdata.get("Company", []),
|
| 590 |
"extracted_text": extracted_text,
|
| 591 |
+
"status_message": f"Source: {meta_info}"
|
| 592 |
}
|
| 593 |
|
| 594 |
+
# Final cleanup of empty or 'not found' values
|
| 595 |
for key in ["name", "contact_number", "Designation", "email", "Location", "Link", "Company"]:
|
| 596 |
processed_data[key] = [
|
| 597 |
v for v in processed_data[key]
|
| 598 |
+
if str(v).strip().lower() not in {"not found", "none", "null", "", "[]", "unknown"}
|
| 599 |
]
|
| 600 |
|
| 601 |
return processed_data
|