Spaces:

Abdullahcoder54
/

text_amon_API

Sleeping

App Files Files Community

text_amon_API / reportanalysis.py

Abdullahcoder54

Update reportanalysis.py

6a9b7a4 verified 23 days ago

raw

history blame contribute delete

5.95 kB


	import io
	import re
	import os
	import docx
	from PyPDF2 import PdfReader
	import numpy as np
	from PIL import Image
	from doctr.models import ocr_predictor
	from presidio_analyzer import AnalyzerEngine
	from presidio_anonymizer import AnonymizerEngine
	from dotenv import load_dotenv
	from pydantic import BaseModel, Field
	from typing import List, Literal
	from agents import (
	Agent,
	AsyncOpenAI,
	OpenAIChatCompletionsModel,
	AgentOutputSchema,
	AgentOutputSchemaBase,
	enable_verbose_stdout_logging,
	set_tracing_disabled
	)
	enable_verbose_stdout_logging()
	set_tracing_disabled(True)

	load_dotenv()
	model = ocr_predictor(pretrained=True)
	analyzer = AnalyzerEngine()
	anonymizer = AnonymizerEngine()

	API = os.getenv("GEM_API_KEY")

	class AiInsights(BaseModel):
	overallAssessment: str
	keyHighlights: List[dict[str, str]]
	dietaryRecommendations: List[str]
	lifestyleAdvice: List[str]
	precautions: List[str]
	risks: List[str]
	actions: List[dict[str, str]]
	tips: List[str]

	class KeyFinding(BaseModel):
	test: str
	value: int
	unit: str
	range: str
	shortExplaination: str
	status: Literal["Red", "Yellow", "Green"]

	class AnalysisResult(BaseModel):
	fileName: str
	reportType: str
	summary: str
	keyFindings: List[KeyFinding]
	aiInsights: AiInsights

	client = AsyncOpenAI(
	api_key = API,
	base_url = "https://generativelanguage.googleapis.com/v1beta/openai/",
	)

	agent_model = OpenAIChatCompletionsModel(
	model = "gemini-2.5-flash",
	openai_client = client,

	)

	def format_json(result):
	analyzer_results = analyzer.analyze(text=result, language='en')
	anonymized_text = anonymizer.anonymize(text=result, analyzer_results=analyzer_results)
	result_text = anonymized_text.text
	pattern = r'(<PERSON>\s+[\w\s\-]+)'
	hospital_pattern = r'(?i)\b(?:[A-Z][a-zA-Z]+(?:\s+\|,\|&)?){1,6}(hospital\|lab\|clinic\|diagnostic\|medical\|centre\|pathology)\b'
	result_text = re.sub(r'[,.()\'"-]', ' ', result_text).strip()
	result_text = re.sub(pattern, r'<NAME>', result_text)
	result_text = re.sub(hospital_pattern, r'<HOSPITAL>', result_text,)
	print(result_text)
	return result_text

	def extract_text(content ,pdf ,doc) -> str:
	if pdf:
	reader = PdfReader(io.BytesIO(content))
	text = ''
	for page in reader.pages:
	text += page.extract_text() + '\n'
	print(text)
	return text.strip()
	elif doc:
	doc = docx.Document(io.BytesIO(content))
	text = ''
	for para in doc.paragraphs:
	text += para.text + '\n'
	print(text)
	return text.strip()

	else:
	image = Image.open(io.BytesIO(content)).convert("RGB")
	npImg = np.ascontiguousarray(np.array(image, dtype='uint8'))
	ORCresult = model([npImg])
	clean_jason = format_json(ORCresult.render())
	print(clean_jason)
	return clean_jason


	Report_Agent = Agent(
	name = "Report_Analysis_Agent",
	instructions = """You are a Medical Report Analysis Agent.

	Your role is to analyze uploaded medical test reports and generate clear, accurate health advice in structured JSON format.

	Your Main Task:
	1. Analyze the extracted medical text carefully.
	2. Identify each test name, its result (user value), and the normal reference range.
	3. Assign a flag to each test based on the result:
	- Red: Critical or abnormal
	- Yellow: Slightly out of range or borderline
	- Green: Normal or safe
	4. Provide a clear summary of the findings.
	5. Offer relevant AI-driven health tips, highlight potential risks, and suggest dietary and lifestyle improvements.
	6. Structure the output in the specified JSON format.
	Response format: {'type': 'json_schema', 'json_schema': {'name': 'final_output', 'strict': False, 'schema': {'$defs': {'AiInsights': {'properties': {'overallAssessment': {'title': 'Overallassessment', 'type': 'string'}, 'keyHighlights': {'items': {'additionalProperties': {'type': 'string'}, 'type': 'object'}, 'title': 'Keyhighlights', 'type': 'array'}, 'dietaryRecommendations': {'items': {'type': 'string'}, 'title': 'Dietaryrecommendations', 'type': 'array'}, 'lifestyleAdvice': {'items': {'type': 'string'}, 'title': 'Lifestyleadvice', 'type': 'array'}, 'precautions': {'items': {'type': 'string'}, 'title': 'Precautions', 'type': 'array'}, 'risks': {'items': {'type': 'string'}, 'title': 'Risks', 'type': 'array'}, 'actions': {'items': {'additionalProperties': {'type': 'string'}, 'type': 'object'}, 'title': 'Actions', 'type': 'array'}, 'tips': {'items': {'type': 'string'}, 'title': 'Tips', 'type': 'array'}}, 'required': ['overallAssessment', 'keyHighlights', 'dietaryRecommendations', 'lifestyleAdvice', 'precautions', 'risks', 'actions', 'tips'], 'title': 'AiInsights', 'type': 'object'}, 'KeyFinding': {'properties': {'test': {'title': 'Test', 'type': 'string'}, 'value': {'title': 'Value', 'type': 'integer'}, 'unit': {'title': 'Unit', 'type': 'string'}, 'range': {'title': 'Range', 'type': 'string'}, 'status': {'enum': ['Red', 'Yellow', 'Green'], 'title': 'Status', 'type': 'string'}}, 'required': ['test', 'value', 'unit', 'range', 'status'], 'title': 'KeyFinding', 'type': 'object'}}, 'properties': {'fileName': {'title': 'Filename', 'type': 'string'}, 'reportType': {'title': 'Reporttype', 'type': 'string'}, 'summary': {'title': 'Summary', 'type': 'string'}, 'keyFindings': {'items': {'$ref': '#/$defs/KeyFinding'}, 'title': 'Keyfindings', 'type': 'array'}, 'aiInsights': {'$ref': '#/$defs/AiInsights'}},
	'required': ['fileName', 'reportType', 'summary', 'keyFindings', 'aiInsights'], 'title': 'AnalysisResult', 'type': 'object'}}}
	""",
	model = agent_model,
	output_type= AgentOutputSchema(AnalysisResult, strict_json_schema=False)
	)