| import distutils.util
|
| import glob
|
| import os
|
| import shutil
|
|
|
| import cv2
|
| import pandas as pd
|
| from PIL import Image
|
| from pydicom import dcmread
|
| from pydicom.fileset import FileSet
|
| from tqdm import tqdm
|
|
|
| from .volReader import VolFile
|
|
|
| script_dir = os.path.dirname(__file__)
|
|
|
|
|
| class Error(Exception):
|
| """Base class for exceptions in this module."""
|
|
|
| pass
|
|
|
|
|
| def extract_files(dirtoextract, extracted_path, input_format):
|
| """Extracts individual image frames from .vol or DICOM files.
|
|
|
| This function scans a directory for source files of a specified format
|
| and extracts them into a structured output directory as PNG images.
|
| It handles both .vol files and standard DICOM files. If the
|
| output directory already contains files, it will prompt the user
|
| before proceeding to overwrite them.
|
|
|
| Args:
|
| dirtoextract (str): The root directory to search for source files.
|
| extracted_path (str): The destination directory where the extracted
|
| PNG images will be saved.
|
| input_format (str): The format of the input files. Must be either
|
| "vol" or "dicom".
|
| """
|
| assert input_format in ["vol", "dicom"], 'Error: input_format must be "vol" or "dicom".'
|
| proceed = True
|
| if (os.path.isdir(extracted_path)) and (len(os.listdir(extracted_path)) != 0):
|
| val = input(
|
| f"{extracted_path} exists and is not empty. Files may be overwritten. Proceed with extraction? (Y/N)"
|
| )
|
| proceed = bool(distutils.util.strtobool(val))
|
| if proceed:
|
| print(f"Extracting files from {dirtoextract} into {extracted_path}...")
|
| if input_format == "vol":
|
| files_to_extract = glob.glob(os.path.join(dirtoextract, "**/*.vol"), recursive=True)
|
| for _, line in enumerate(tqdm(files_to_extract)):
|
| fpath = line.strip("\n")
|
| vol = VolFile(fpath)
|
| fpath = fpath.replace("\\", "/")
|
| path, scan_str = fpath.strip(".vol").rsplit("/", 1)
|
| extractpath = os.path.join(extracted_path, scan_str.replace("_", "/"))
|
| os.makedirs(extractpath, exist_ok=True)
|
| preffix = os.path.join(extractpath, scan_str + "_oct")
|
| vol.render_oct_scans(preffix)
|
| elif input_format == "dicom":
|
| keywords = ["SOPInstanceUID", "PatientID", "ImageLaterality", "SeriesDate"]
|
| list_of_dicts = []
|
| dirgen = glob.iglob(os.path.join(dirtoextract, "**/DICOMDIR"), recursive=True)
|
|
|
| for dsstr in dirgen:
|
| fs = FileSet(dcmread(dsstr))
|
| fsgenopt = gen_opt_fs(fs)
|
| for fi in tqdm(fsgenopt):
|
| dd = dict()
|
|
|
| for key in keywords:
|
| dd[key] = fi.get(key)
|
|
|
| volpath = os.path.join(extracted_path, f"{fi.SOPInstanceUID}")
|
| shutil.rmtree(volpath, ignore_errors=True)
|
| os.mkdir(volpath)
|
| n = fi.NumberOfFrames
|
| for i in range(n):
|
| fname = os.path.join(volpath, f"{fi.SOPInstanceUID}_oct_{i:03d}.png")
|
| Image.fromarray(fi.pixel_array[i]).save(fname)
|
| list_of_dicts.append(dd.copy())
|
| dfoct = pd.DataFrame(list_of_dicts, columns=keywords)
|
| dfoct.to_csv(os.path.join(extracted_path, "basic_meta.csv"))
|
| else:
|
| pass
|
|
|
|
|
| def rpd_data(extracted_path):
|
| """Generates a dataset list from a directory of extracted image files.
|
|
|
| Scans a directory recursively for PNG images and creates a list of
|
| dictionaries, one for each image. This format is designed to be compatible
|
| with Detectron2's `DatasetCatalog` and can be adapted to hold ground truth instances for evaluation.
|
|
|
| Args:
|
| extracted_path (str): The root directory containing the extracted
|
| .png image files to be included in the dataset.
|
|
|
| Returns:
|
| list[dict]: A list where each dictionary represents an image and
|
| contains its file path, dimensions, and a unique ID.
|
| """
|
| dataset = []
|
| extracted_files = glob.glob(os.path.join(extracted_path, "**/*.[Pp][Nn][Gg]"), recursive=True)
|
| print("Generating dataset of images...")
|
| for fn in tqdm(extracted_files):
|
| fn_adjusted = fn.replace("\\", "/")
|
| imageid = fn_adjusted.split("/")[-1]
|
| im = cv2.imread(fn)
|
| dat = dict(file_name=fn_adjusted, height=im.shape[0], width=im.shape[1], image_id=imageid)
|
| dataset.append(dat)
|
| print(f"Found {len(dataset)} images")
|
| return dataset
|
|
|
|
|
| def gen_opt_fs(fs):
|
| """A generator for finding and loading OPT modality DICOM datasets.
|
|
|
| This function filters a pydicom `FileSet` object for instances that have
|
| the modality set to "OPT" (Ophthalmic Tomography) and yields each one
|
| as a fully loaded pydicom dataset.
|
|
|
| Args:
|
| fs (pydicom.fileset.FileSet): The pydicom FileSet to search through.
|
|
|
| Yields:
|
| pydicom.dataset.FileDataset: A loaded DICOM dataset for each instance
|
| with the "OPT" modality found in the FileSet.
|
| """
|
| for instance in fs.find(Modality="OPT"):
|
| ds = instance.load()
|
| yield ds
|
|
|