text_amon_API / reportanalysis.py
Abdullahcoder54's picture
Update reportanalysis.py
6a9b7a4 verified
import io
import re
import os
import docx
from PyPDF2 import PdfReader
import numpy as np
from PIL import Image
from doctr.models import ocr_predictor
from presidio_analyzer import AnalyzerEngine
from presidio_anonymizer import AnonymizerEngine
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from typing import List, Literal
from agents import (
Agent,
AsyncOpenAI,
OpenAIChatCompletionsModel,
AgentOutputSchema,
AgentOutputSchemaBase,
enable_verbose_stdout_logging,
set_tracing_disabled
)
enable_verbose_stdout_logging()
set_tracing_disabled(True)
load_dotenv()
model = ocr_predictor(pretrained=True)
analyzer = AnalyzerEngine()
anonymizer = AnonymizerEngine()
API = os.getenv("GEM_API_KEY")
class AiInsights(BaseModel):
overallAssessment: str
keyHighlights: List[dict[str, str]]
dietaryRecommendations: List[str]
lifestyleAdvice: List[str]
precautions: List[str]
risks: List[str]
actions: List[dict[str, str]]
tips: List[str]
class KeyFinding(BaseModel):
test: str
value: int
unit: str
range: str
shortExplaination: str
status: Literal["Red", "Yellow", "Green"]
class AnalysisResult(BaseModel):
fileName: str
reportType: str
summary: str
keyFindings: List[KeyFinding]
aiInsights: AiInsights
client = AsyncOpenAI(
api_key = API,
base_url = "https://generativelanguage.googleapis.com/v1beta/openai/",
)
agent_model = OpenAIChatCompletionsModel(
model = "gemini-2.5-flash",
openai_client = client,
)
def format_json(result):
analyzer_results = analyzer.analyze(text=result, language='en')
anonymized_text = anonymizer.anonymize(text=result, analyzer_results=analyzer_results)
result_text = anonymized_text.text
pattern = r'(<PERSON>\s+[\w\s\-]+)'
hospital_pattern = r'(?i)\b(?:[A-Z][a-zA-Z]+(?:\s+|,|&)?){1,6}(hospital|lab|clinic|diagnostic|medical|centre|pathology)\b'
result_text = re.sub(r'[,.()\'"-]', ' ', result_text).strip()
result_text = re.sub(pattern, r'<NAME>', result_text)
result_text = re.sub(hospital_pattern, r'<HOSPITAL>', result_text,)
print(result_text)
return result_text
def extract_text(content ,pdf ,doc) -> str:
if pdf:
reader = PdfReader(io.BytesIO(content))
text = ''
for page in reader.pages:
text += page.extract_text() + '\n'
print(text)
return text.strip()
elif doc:
doc = docx.Document(io.BytesIO(content))
text = ''
for para in doc.paragraphs:
text += para.text + '\n'
print(text)
return text.strip()
else:
image = Image.open(io.BytesIO(content)).convert("RGB")
npImg = np.ascontiguousarray(np.array(image, dtype='uint8'))
ORCresult = model([npImg])
clean_jason = format_json(ORCresult.render())
print(clean_jason)
return clean_jason
Report_Agent = Agent(
name = "Report_Analysis_Agent",
instructions = """You are a Medical Report Analysis Agent.
Your role is to analyze uploaded medical test reports and generate clear, accurate health advice in structured JSON format.
Your Main Task:
1. Analyze the extracted medical text carefully.
2. Identify each test name, its result (user value), and the normal reference range.
3. Assign a flag to each test based on the result:
- Red: Critical or abnormal
- Yellow: Slightly out of range or borderline
- Green: Normal or safe
4. Provide a clear summary of the findings.
5. Offer relevant AI-driven health tips, highlight potential risks, and suggest dietary and lifestyle improvements.
6. Structure the output in the specified JSON format.
Response format: {'type': 'json_schema', 'json_schema': {'name': 'final_output', 'strict': False, 'schema': {'$defs': {'AiInsights': {'properties': {'overallAssessment': {'title': 'Overallassessment', 'type': 'string'}, 'keyHighlights': {'items': {'additionalProperties': {'type': 'string'}, 'type': 'object'}, 'title': 'Keyhighlights', 'type': 'array'}, 'dietaryRecommendations': {'items': {'type': 'string'}, 'title': 'Dietaryrecommendations', 'type': 'array'}, 'lifestyleAdvice': {'items': {'type': 'string'}, 'title': 'Lifestyleadvice', 'type': 'array'}, 'precautions': {'items': {'type': 'string'}, 'title': 'Precautions', 'type': 'array'}, 'risks': {'items': {'type': 'string'}, 'title': 'Risks', 'type': 'array'}, 'actions': {'items': {'additionalProperties': {'type': 'string'}, 'type': 'object'}, 'title': 'Actions', 'type': 'array'}, 'tips': {'items': {'type': 'string'}, 'title': 'Tips', 'type': 'array'}}, 'required': ['overallAssessment', 'keyHighlights', 'dietaryRecommendations', 'lifestyleAdvice', 'precautions', 'risks', 'actions', 'tips'], 'title': 'AiInsights', 'type': 'object'}, 'KeyFinding': {'properties': {'test': {'title': 'Test', 'type': 'string'}, 'value': {'title': 'Value', 'type': 'integer'}, 'unit': {'title': 'Unit', 'type': 'string'}, 'range': {'title': 'Range', 'type': 'string'}, 'status': {'enum': ['Red', 'Yellow', 'Green'], 'title': 'Status', 'type': 'string'}}, 'required': ['test', 'value', 'unit', 'range', 'status'], 'title': 'KeyFinding', 'type': 'object'}}, 'properties': {'fileName': {'title': 'Filename', 'type': 'string'}, 'reportType': {'title': 'Reporttype', 'type': 'string'}, 'summary': {'title': 'Summary', 'type': 'string'}, 'keyFindings': {'items': {'$ref': '#/$defs/KeyFinding'}, 'title': 'Keyfindings', 'type': 'array'}, 'aiInsights': {'$ref': '#/$defs/AiInsights'}},
'required': ['fileName', 'reportType', 'summary', 'keyFindings', 'aiInsights'], 'title': 'AnalysisResult', 'type': 'object'}}}
""",
model = agent_model,
output_type= AgentOutputSchema(AnalysisResult, strict_json_schema=False)
)