Spaces:

WebashalarForML
/

ImageDataExtractor2

Sleeping

App Files Files Community

WebashalarForML commited on 13 days ago

Commit

27bebfe

verified ·

1 Parent(s): 8574192

Update utility/utils.py

Browse files

Files changed (1) hide show

utility/utils.py +13 -6

utility/utils.py CHANGED Viewed

@@ -169,15 +169,16 @@ def normalize_llm_schema(data):
     Accepts a dict that may have nulls, strings, or alternate key spellings.
     """
     data = data or {}
     # Common alternate keys seen in model outputs
     key_aliases = {
-        "Name": ["Name", "name", "FullName", "full_name", "person_name"],
         "Designation": ["Designation", "designation", "Title", "title", "Role", "role"],
         "Company": ["Company", "company", "Organization", "organization", "Org", "org"],
-        "Contact": ["Contact", "contact", "Phone", "phone", "Mobile", "mobile", "PhoneNumber", "phone_number"],
         "Address": ["Address", "address", "Location", "location"],
-        "Email": ["Email", "email", "E-mail", "e_mail"],
         "Link": ["Link", "link", "URL", "url", "Website", "website", "Portfolio", "portfolio"]
     }
@@ -188,9 +189,11 @@ def normalize_llm_schema(data):
         for alias in aliases:
             if alias in data and data[alias] is not None:
                 chosen = _coerce_list(data[alias])
-                break
         normalized[canonical_key] = chosen
     return normalized
@@ -542,6 +545,9 @@ def process_resume_data(LLMdata, cont_data, extracted_text):
     Final merge step.
     Keeps the output structure exactly as you currently use in result.html.
     """
     LLMdata = normalize_llm_schema(LLMdata)
     cont_data = cont_data or {}
@@ -582,13 +588,14 @@ def process_resume_data(LLMdata, cont_data, extracted_text):
         "Link": LLMdata.get("Link", []),
         "Company": LLMdata.get("Company", []),
         "extracted_text": extracted_text,
-        "status_message": f"Source: {LLMdata.get('meta', 'Primary+Backup')}"
     }
     for key in ["name", "contact_number", "Designation", "email", "Location", "Link", "Company"]:
         processed_data[key] = [
             v for v in processed_data[key]
-            if str(v).strip().lower() not in {"not found", "none", "null", ""}
         ]
     return processed_data

     Accepts a dict that may have nulls, strings, or alternate key spellings.
     """
     data = data or {}
+    meta = data.get('meta', 'Primary Extraction')
     # Common alternate keys seen in model outputs
     key_aliases = {
+        "Name": ["Name", "name", "FullName", "full_name", "person_name", "Person"],
         "Designation": ["Designation", "designation", "Title", "title", "Role", "role"],
         "Company": ["Company", "company", "Organization", "organization", "Org", "org"],
+        "Contact": ["Contact", "contact", "Phone", "phone", "Mobile", "mobile", "PhoneNumber", "phone_number", "Number"],
         "Address": ["Address", "address", "Location", "location"],
+        "Email": ["Email", "email", "E-mail", "e_mail", "Mail"],
         "Link": ["Link", "link", "URL", "url", "Website", "website", "Portfolio", "portfolio"]
     }
         for alias in aliases:
             if alias in data and data[alias] is not None:
                 chosen = _coerce_list(data[alias])
+                if chosen:
+                    break
         normalized[canonical_key] = chosen
+    normalized['meta'] = meta
     return normalized
     Final merge step.
     Keeps the output structure exactly as you currently use in result.html.
     """
+    # Preserving meta if it exists before normalization
+    meta_info = LLMdata.get('meta') or "Primary+Backup Extraction"
     LLMdata = normalize_llm_schema(LLMdata)
     cont_data = cont_data or {}
         "Link": LLMdata.get("Link", []),
         "Company": LLMdata.get("Company", []),
         "extracted_text": extracted_text,
+        "status_message": f"Source: {meta_info}"
     }
+    # Final cleanup of empty or 'not found' values
     for key in ["name", "contact_number", "Designation", "email", "Location", "Link", "Company"]:
         processed_data[key] = [
             v for v in processed_data[key]
+            if str(v).strip().lower() not in {"not found", "none", "null", "", "[]", "unknown"}
         ]
     return processed_data