Upload retinalOCT_RPD_segmentation version 0.0.1

b8597df verified 7 months ago

5.41 kB

	import distutils.util
	import glob
	import os
	import shutil

	import cv2
	import pandas as pd
	from PIL import Image
	from pydicom import dcmread
	from pydicom.fileset import FileSet
	from tqdm import tqdm

	from .volReader import VolFile

	script_dir = os.path.dirname(__file__)


	class Error(Exception):
	"""Base class for exceptions in this module."""

	pass


	def extract_files(dirtoextract, extracted_path, input_format):
	"""Extracts individual image frames from .vol or DICOM files.

	This function scans a directory for source files of a specified format
	and extracts them into a structured output directory as PNG images.
	It handles both .vol files and standard DICOM files. If the
	output directory already contains files, it will prompt the user
	before proceeding to overwrite them.

	Args:
	dirtoextract (str): The root directory to search for source files.
	extracted_path (str): The destination directory where the extracted
	PNG images will be saved.
	input_format (str): The format of the input files. Must be either
	"vol" or "dicom".
	"""
	assert input_format in ["vol", "dicom"], 'Error: input_format must be "vol" or "dicom".'
	proceed = True
	if (os.path.isdir(extracted_path)) and (len(os.listdir(extracted_path)) != 0):
	val = input(
	f"{extracted_path} exists and is not empty. Files may be overwritten. Proceed with extraction? (Y/N)"
	)
	proceed = bool(distutils.util.strtobool(val))
	if proceed:
	print(f"Extracting files from {dirtoextract} into {extracted_path}...")
	if input_format == "vol":
	files_to_extract = glob.glob(os.path.join(dirtoextract, "*/.vol"), recursive=True)
	for _, line in enumerate(tqdm(files_to_extract)):
	fpath = line.strip("\n")
	vol = VolFile(fpath)
	fpath = fpath.replace("\\", "/")
	path, scan_str = fpath.strip(".vol").rsplit("/", 1)
	extractpath = os.path.join(extracted_path, scan_str.replace("_", "/"))
	os.makedirs(extractpath, exist_ok=True)
	preffix = os.path.join(extractpath, scan_str + "_oct")
	vol.render_oct_scans(preffix)
	elif input_format == "dicom":
	keywords = ["SOPInstanceUID", "PatientID", "ImageLaterality", "SeriesDate"]
	list_of_dicts = []
	dirgen = glob.iglob(os.path.join(dirtoextract, "**/DICOMDIR"), recursive=True)

	for dsstr in dirgen:
	fs = FileSet(dcmread(dsstr))
	fsgenopt = gen_opt_fs(fs)
	for fi in tqdm(fsgenopt):
	dd = dict()
	# top level keywords
	for key in keywords:
	dd[key] = fi.get(key)

	volpath = os.path.join(extracted_path, f"{fi.SOPInstanceUID}")
	shutil.rmtree(volpath, ignore_errors=True)
	os.mkdir(volpath)
	n = fi.NumberOfFrames
	for i in range(n):
	fname = os.path.join(volpath, f"{fi.SOPInstanceUID}_oct_{i:03d}.png")
	Image.fromarray(fi.pixel_array[i]).save(fname)
	list_of_dicts.append(dd.copy())
	dfoct = pd.DataFrame(list_of_dicts, columns=keywords)
	dfoct.to_csv(os.path.join(extracted_path, "basic_meta.csv"))
	else:
	pass


	def rpd_data(extracted_path):
	"""Generates a dataset list from a directory of extracted image files.

	Scans a directory recursively for PNG images and creates a list of
	dictionaries, one for each image. This format is designed to be compatible
	with Detectron2's `DatasetCatalog` and can be adapted to hold ground truth instances for evaluation.

	Args:
	extracted_path (str): The root directory containing the extracted
	.png image files to be included in the dataset.

	Returns:
	list[dict]: A list where each dictionary represents an image and
	contains its file path, dimensions, and a unique ID.
	"""
	dataset = []
	extracted_files = glob.glob(os.path.join(extracted_path, "*/.[Pp][Nn][Gg]"), recursive=True)
	print("Generating dataset of images...")
	for fn in tqdm(extracted_files):
	fn_adjusted = fn.replace("\\", "/")
	imageid = fn_adjusted.split("/")[-1]
	im = cv2.imread(fn)
	dat = dict(file_name=fn_adjusted, height=im.shape[0], width=im.shape[1], image_id=imageid)
	dataset.append(dat)
	print(f"Found {len(dataset)} images")
	return dataset


	def gen_opt_fs(fs):
	"""A generator for finding and loading OPT modality DICOM datasets.

	This function filters a pydicom `FileSet` object for instances that have
	the modality set to "OPT" (Ophthalmic Tomography) and yields each one
	as a fully loaded pydicom dataset.

	Args:
	fs (pydicom.fileset.FileSet): The pydicom FileSet to search through.

	Yields:
	pydicom.dataset.FileDataset: A loaded DICOM dataset for each instance
	with the "OPT" modality found in the FileSet.
	"""
	for instance in fs.find(Modality="OPT"):
	ds = instance.load()
	yield ds