| import json |
| import logging |
| import os |
| import pickle |
|
|
| import pandas as pd |
| import progressbar |
| from detectron2.checkpoint import DetectionCheckpointer |
| from detectron2.config import get_cfg |
| from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader |
| from detectron2.evaluation import COCOEvaluator, inference_on_dataset |
| from detectron2.modeling import build_model |
|
|
| from .analysis_lib import CreatePlotsRPD, EvaluateClass, OutputVis, grab_dataset |
| from .datasets import data |
| from .Ensembler import Ensembler |
| from .table_styles import styles |
|
|
| |
| os.chdir(os.path.dirname(os.path.abspath(__file__))) |
|
|
|
|
| logging.basicConfig(level=logging.INFO) |
|
|
| os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" |
| os.environ["CUDA_VISIBLE_DEVICES"] = "0" |
|
|
| dpi = 120 |
|
|
|
|
| class MyProgressBar: |
| |
| |
| def __init__(self): |
| self.pbar = None |
|
|
| def __call__(self, block_num, block_size, total_size): |
| if not self.pbar: |
| self.pbar = progressbar.ProgressBar(maxval=total_size) |
| self.pbar.start() |
|
|
| downloaded = block_num * block_size |
| if downloaded < total_size: |
| self.pbar.update(downloaded) |
| else: |
| self.pbar.finish() |
|
|
|
|
| def create_dataset(dataset_name, extracted_path): |
| """Creates a pickled dataset file from a directory of extracted images. |
| |
| This function scans the `extracted_path` for images, formats them into a |
| list of dictionaries compatible with Detectron2, and saves the list as a |
| pickle file. |
| |
| Args: |
| dataset_name (str): The name for the dataset, used for the output .pk file. |
| extracted_path (str): The directory containing the extracted image files. |
| """ |
| stored_data = data.rpd_data(extracted_path) |
| pickle.dump(stored_data, open(os.path.join(data.script_dir, f"{dataset_name}.pk"), "wb")) |
|
|
|
|
| def configure_model(): |
| """Loads and returns the model configuration from a YAML file. |
| |
| It reads a 'working.yaml' file located in the same directory as the script |
| to set up the Detectron2 configuration. |
| |
| Returns: |
| detectron2.config.CfgNode: The configuration object for the model. |
| """ |
| cfg = get_cfg() |
| moddir = os.path.dirname(os.path.realpath(__file__)) |
| name = "working.yaml" |
| cfg_path = os.path.join(moddir, name) |
| cfg.merge_from_file(cfg_path) |
| return cfg |
|
|
|
|
| def register_dataset(dataset_name): |
| """Registers a dataset with Detectron2's DatasetCatalog. |
| |
| This makes the dataset available to be loaded by Detectron2's data loaders. |
| It sets the class metadata to 'rpd'. |
| |
| Args: |
| dataset_name (str): The name under which to register the dataset. |
| """ |
| for name in [dataset_name]: |
| try: |
| DatasetCatalog.register(name, grab_dataset(name)) |
| except AssertionError as e: |
| print(f"Assertion failed: {e}. Already registered.") |
| MetadataCatalog.get(name).thing_classes = ["rpd"] |
|
|
|
|
| def run_prediction(cfg, dataset_name, output_path): |
| """Runs inference on a dataset using a cross-validation ensemble of models. |
| |
| It loads five different model weight files (fold1 to fold5), runs inference |
| for each model on the specified dataset, and saves the predictions in |
| separate subdirectories within `output_path`. |
| |
| Args: |
| cfg (CfgNode): The model configuration object. |
| dataset_name (str): The name of the registered dataset to run inference on. |
| output_path (str): The base directory to save prediction outputs. |
| """ |
| model = build_model(cfg) |
| myloader = build_detection_test_loader(cfg, dataset_name) |
| myeval = COCOEvaluator( |
| dataset_name, tasks={"bbox", "segm"}, output_dir=output_path |
| ) |
| for mdl in ("fold1", "fold2", "fold3", "fold4", "fold5"): |
| extract_directory = "../models" |
| file_name = mdl + "_model_final.pth" |
| model_weights_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), extract_directory, file_name) |
| print(model_weights_path) |
| DetectionCheckpointer(model).load(model_weights_path) |
| model.eval() |
| myeval.reset() |
| output_dir = os.path.join(output_path, mdl) |
| myeval._output_dir = output_dir |
| print("Running inference with model ", mdl) |
| _ = inference_on_dataset( |
| model, myloader, myeval |
| ) |
| print("Done with predictions!") |
|
|
|
|
| def run_ensemble(dataset_name, output_path, iou_thresh=0.2): |
| """Ensembles predictions from multiple models using NMS. |
| |
| It initializes an `Ensembler`, runs the non-maximum suppression logic, and |
| saves the final combined predictions to a single COCO results file. |
| |
| Args: |
| dataset_name (str): The name of the dataset. |
| output_path (str): The base directory containing the individual model |
| prediction subdirectories. |
| iou_thresh (float, optional): The IoU threshold for ensembling. Defaults to 0.2. |
| |
| Returns: |
| Ensembler: The ensembler instance after running NMS. |
| """ |
| ens = Ensembler(output_path, dataset_name, ["fold1", "fold2", "fold3", "fold4", "fold5"], iou_thresh=iou_thresh) |
| ens.mean_score_nms() |
| ens.save_coco_instances() |
| return ens |
|
|
|
|
| def evaluate_dataset(dataset_name, output_path, iou_thresh=0.2, prob_thresh=0.5): |
| """Evaluates the final ensembled predictions against ground truth. |
| |
| It uses the custom `EvaluateClass` to calculate performance metrics and saves |
| a summary to a JSON file. |
| |
| Args: |
| dataset_name (str): The name of the dataset. |
| output_path (str): The directory containing the ensembled predictions file. |
| iou_thresh (float, optional): The IoU threshold for evaluation. Defaults to 0.2. |
| prob_thresh (float, optional): The probability threshold for evaluation. Defaults to 0.5. |
| |
| Returns: |
| EvaluateClass: The evaluation object containing detailed metrics. |
| """ |
| myeval = EvaluateClass(dataset_name, output_path, iou_thresh=iou_thresh, prob_thresh=prob_thresh, evalsuper=False) |
| myeval.evaluate() |
| with open(os.path.join(output_path, "scalar_dict.json"), "w") as outfile: |
| json.dump(obj=myeval.summarize_scalars(), fp=outfile) |
| return myeval |
|
|
|
|
| def create_table(myeval): |
| """Creates a DataFrame of per-image statistics from evaluation results. |
| |
| Args: |
| myeval (EvaluateClass): The evaluation object containing COCO results. |
| |
| Returns: |
| CreatePlotsRPD: An object containing DataFrames for image and volume stats. |
| """ |
| dataset_table = CreatePlotsRPD.initfromcoco(myeval.mycoco, myeval.prob_thresh) |
| dataset_table.dfimg.sort_index(inplace=True) |
| return dataset_table |
| |
|
|
|
|
| def output_vol_predictions(dataset_table, vis, volid, output_path, output_mode="pred_overlay"): |
| """Generates and saves visualization TIFFs for a single scan volume. |
| |
| Args: |
| dataset_table (CreatePlotsRPD): Object containing the image/volume stats. |
| vis (OutputVis): The visualization object. |
| volid (str): The ID of the volume to visualize. |
| output_path (str): The directory to save the output TIFF file. |
| output_mode (str, optional): The type of visualization to create. |
| Options: "pred_overlay", "pred_only", "originals", "all". |
| Defaults to "pred_overlay". |
| """ |
| dfimg = dataset_table.dfimg |
| imgids = dfimg[dfimg["volID"] == volid].sort_index().index.values |
| outname = os.path.join(output_path, f"{volid}.tiff") |
| if output_mode == "pred_overlay": |
| vis.output_pred_to_tiff(imgids, outname, pred_only=False) |
| elif output_mode == "pred_only": |
| vis.output_pred_to_tiff(imgids, outname, pred_only=True) |
| elif output_mode == "originals": |
| vis.output_ori_to_tiff(imgids, outname) |
| elif output_mode == "all": |
| vis.output_all_to_tiff(imgids, outname) |
| else: |
| print(f"Invalid mode {output_mode} for function output_vol_predictions.") |
|
|
|
|
| def output_dataset_predictions(dataset_table, vis, output_path, output_mode="pred_overlay", draw_mode="default"): |
| """Generates and saves visualization TIFFs for all volumes in a dataset. |
| |
| Args: |
| dataset_table (CreatePlotsRPD): Object containing the image/volume stats. |
| vis (OutputVis): The visualization object. |
| output_path (str): The base directory to save the output TIFF files. |
| output_mode (str, optional): The type of visualization to create. |
| Defaults to "pred_overlay". |
| draw_mode (str, optional): The drawing style ("default" or "bw"). |
| Defaults to "default". |
| """ |
| vis.set_draw_mode(draw_mode) |
| os.makedirs(output_path, exist_ok=True) |
| for volid in dataset_table.dfvol.index: |
| output_vol_predictions(dataset_table, vis, volid, output_path, output_mode) |
|
|
|
|
| def create_dfvol(dataset_name, output_path, dataset_table): |
| """Creates and saves a styled HTML table of volume-level statistics. |
| |
| Args: |
| dataset_name (str): The name of the dataset. |
| output_path (str): The directory to save the HTML file. |
| dataset_table (CreatePlotsRPD): Object containing the volume DataFrame. |
| """ |
| dfvol = dataset_table.dfvol.sort_values(by=["dt_instances"], ascending=False) |
| with pd.option_context("styler.render.max_elements", int(dfvol.size) + 1): |
| html_str = dfvol.style.format("{:.0f}").set_table_styles(styles).to_html() |
| html_file = open(os.path.join(output_path, "dfvol_" + dataset_name + ".html"), "w") |
| html_file.write(html_str) |
| html_file.close() |
|
|
|
|
| def create_dfimg(dataset_name, output_path, dataset_table): |
| """Creates and saves a styled HTML table of image-level statistics. |
| |
| Args: |
| dataset_name (str): The name of the dataset. |
| output_path (str): The directory to save the HTML file. |
| dataset_table (CreatePlotsRPD): Object containing the image DataFrame. |
| """ |
| dfimg = dataset_table.dfimg.sort_index() |
| with pd.option_context("styler.render.max_elements", int(dfimg.size) + 1): |
| html_str = dfimg.style.set_table_styles(styles).to_html() |
| html_file = open(os.path.join(output_path, "dfimg_" + dataset_name + ".html"), "w") |
| html_file.write(html_str) |
| html_file.close() |
|
|
|
|
| def main(args): |
| """Main function to orchestrate the end-to-end analysis pipeline. |
| |
| This function controls the flow from data extraction to evaluation and |
| visualization based on the provided arguments. |
| |
| Args: |
| args (dict): A dictionary of command-line arguments and flags |
| controlling the pipeline execution. |
| """ |
| print(f"Received arguments: {args}") |
|
|
| |
| dataset_name = args.get("dataset_name") |
| input_dir = args.get("input_dir") |
| extracted_dir = args.get("extracted_dir") |
| input_format = args.get("input_format") |
| output_dir = args.get("output_dir") |
| run_extract = args.get("run_extract", True) |
| make_dataset = args.get("create_dataset", True) |
| run_inference = args.get("run_inference", True) |
| prob_thresh = args.get("prob_thresh", 0.5) |
| iou_thresh = args.get("iou_thresh", 0.2) |
| create_tables = args.get("create_tables", True) |
|
|
| |
| bm = args.get("binary_mask", False) |
| bmo = args.get("binary_mask_overlay", False) |
| imo = args.get("instance_mask_overlay", False) |
| make_visuals = bm or bmo or imo |
|
|
| |
| if run_extract: |
| os.makedirs(extracted_dir, exist_ok=True) |
| print("Starting file extraction...") |
| data.extract_files(input_dir, extracted_dir, input_format) |
| print("Image extraction complete!") |
| if make_dataset: |
| print("Creating dataset from extracted images...") |
| create_dataset(dataset_name, extracted_dir) |
| if run_inference: |
| print("Configuring model...") |
| cfg = configure_model() |
| print("Registering dataset...") |
| register_dataset(dataset_name) |
| os.makedirs(output_dir, exist_ok=True) |
| print("Running inference...") |
| run_prediction(cfg, dataset_name, output_dir) |
| print("Inference complete, running ensemble...") |
| run_ensemble(dataset_name, output_dir, iou_thresh) |
| print("Ensemble complete!") |
| if create_tables or make_visuals: |
| print("Registering dataset for evaluation...") |
| register_dataset(dataset_name) |
| print("Evaluating dataset...") |
| eval_obj = evaluate_dataset(dataset_name, output_dir, iou_thresh, prob_thresh) |
| print("Creating dataset table...") |
| table = create_table(eval_obj) |
| if create_tables: |
| create_dfvol(dataset_name, output_dir, table) |
| create_dfimg(dataset_name, output_dir, table) |
| print("Dataset HTML tables complete!") |
| if make_visuals: |
| print("Initializing visualizer...") |
| vis = OutputVis( |
| dataset_name, |
| prob_thresh=eval_obj.prob_thresh, |
| pred_mode="file", |
| pred_file=os.path.join(output_dir, "coco_instances_results.json"), |
| has_annotations=False, |
| ) |
| vis.scale = 1.0 |
| if bm: |
| print("Creating binary masks TIFF (no overlay)...") |
| vis.annotation_color = "w" |
| output_dataset_predictions( |
| table, vis, os.path.join(output_dir, "predicted_binary_masks"), "pred_only", "bw" |
| ) |
| if bmo: |
| print("Creating binary masks TIFF (with overlay)...") |
| output_dataset_predictions( |
| table, vis, os.path.join(output_dir, "predicted_binary_overlays"), "pred_overlay", "bw" |
| ) |
| if imo: |
| print("Creating instance masks TIFF (with overlay)...") |
| output_dataset_predictions( |
| table, vis, os.path.join(output_dir, "predicted_instance_overlays"), "pred_overlay", "default" |
| ) |
| print("Visualizations complete!") |
|
|