import asyncio import os from typing import Optional, Tuple # Save original asyncio.run BEFORE any imports that might patch it (nest_asyncio) _ORIGINAL_ASYNCIO_RUN = asyncio.run # On ZeroGPU H200, TF32 matmul paths can occasionally trip cuBLAS errors in # some einsum-heavy models. Prefer full FP32 math for stability. os.environ.setdefault("NVIDIA_TF32_OVERRIDE", "0") # ZeroGPU H200-specific workarounds for cuBLAS strided-batch GEMM issues # H200 has 70GB VRAM, so memory isn't the issue - focus on CUDA context stability # - Force synchronous CUDA execution to avoid race conditions during dynamic GPU allocation # - Use deterministic cuBLAS workspace to ensure consistent behavior across GPU allocations os.environ.setdefault("CUDA_LAUNCH_BLOCKING", "1") os.environ.setdefault("CUBLAS_WORKSPACE_CONFIG", ":16:8") import gradio as gr import numpy as np from PIL import Image, ImageDraw, ImageFont # ZeroGPU decorator - only import on Hugging Face Spaces to avoid asyncio conflicts locally def _make_spaces_fallback(): class _SpacesFallback: @staticmethod def GPU(*args, **kwargs): def _decorator(fn): return fn return _decorator return _SpacesFallback() if os.environ.get("SPACE_ID"): # Running on Hugging Face Spaces try: import spaces # type: ignore except Exception: spaces = _make_spaces_fallback() # type: ignore else: # Local development - skip spaces import to avoid asyncio conflicts spaces = _make_spaces_fallback() # type: ignore def _ensure_cache_dirs() -> None: os.makedirs("outputs", exist_ok=True) os.makedirs(os.path.join("outputs", "cache"), exist_ok=True) os.environ.setdefault("EARTH2STUDIO_CACHE", os.path.join(os.getcwd(), "outputs", "cache")) def _normalize_to_uint8(x: np.ndarray) -> np.ndarray: x = np.asarray(x, dtype=np.float32) finite = np.isfinite(x) if not finite.any(): return np.zeros_like(x, dtype=np.uint8) vmin = float(np.nanpercentile(x[finite], 2.0)) vmax = float(np.nanpercentile(x[finite], 98.0)) if vmax <= vmin: return np.zeros_like(x, dtype=np.uint8) y = (x - vmin) / (vmax - vmin) y = np.clip(y, 0.0, 1.0) return (y * 255.0).astype(np.uint8) def _apply_simple_colormap(u8: np.ndarray) -> np.ndarray: """ Lightweight colormap without matplotlib: map grayscale -> RGB using a simple blue->cyan->yellow->red ramp. """ u = u8.astype(np.float32) / 255.0 r = np.clip(1.5 * u, 0.0, 1.0) g = np.clip(1.5 * (1.0 - np.abs(u - 0.5) * 2.0), 0.0, 1.0) b = np.clip(1.5 * (1.0 - u), 0.0, 1.0) rgb = np.stack([r, g, b], axis=-1) return (rgb * 255.0).astype(np.uint8) def _plot_latlon_field(lon: np.ndarray, lat: np.ndarray, field2d: np.ndarray, title: str) -> str: """ Save a quick image to outputs/ and return the file path. Avoids matplotlib/cartopy to keep system deps minimal on Spaces. """ _ensure_cache_dirs() out_path = os.path.join("outputs", "t2m.png") gray = _normalize_to_uint8(field2d) rgb = _apply_simple_colormap(gray) img = Image.fromarray(rgb, mode="RGB").resize((1024, 512), resample=Image.BILINEAR) draw = ImageDraw.Draw(img) text = title try: font = ImageFont.load_default() except Exception: font = None # simple text background for readability pad = 6 tw, th = draw.textbbox((0, 0), text, font=font)[2:] draw.rectangle((0, 0, tw + 2 * pad, th + 2 * pad), fill=(0, 0, 0)) draw.text((pad, pad), text, fill=(255, 255, 255), font=font) img.save(out_path) return out_path def _gpu_duration(nsteps: int) -> int: """ Calculate GPU duration for inference only. """ nsteps = max(1, int(nsteps)) # 30s base (model to GPU) + 15s per step return int(min(300, 30 + nsteps * 15)) @spaces.GPU(duration=lambda forecast_date, nsteps: _gpu_duration(int(nsteps))) def _run_inference(forecast_date: str, nsteps: int): """ GPU-only function: load model, run inference, return extracted data. ZeroGPU uses multiprocessing so we can't pass unpicklable objects (GFS, model). Everything must be created inside this function. """ import torch import earth2studio.run as run from earth2studio.data import GFS from earth2studio.io import ZarrBackend _ensure_cache_dirs() # Critical precision settings for ZeroGPU H200 cuBLAS stability torch.backends.cudnn.benchmark = False torch.set_float32_matmul_precision("highest") # Full FP32, no TF32 torch.backends.cuda.matmul.allow_tf32 = False torch.backends.cudnn.allow_tf32 = False torch.cuda.empty_cache() # Force einsum operand contiguity to avoid cuBLAS strided-batch GEMM errors _orig_einsum = torch.einsum torch.einsum = lambda eq, *ops: _orig_einsum( eq, *[op.contiguous() if torch.is_tensor(op) else op for op in ops] ) # type: ignore[assignment] # Load model inside GPU function (ZeroGPU requirement) from earth2studio.models.px import FCN package = FCN.load_default_package() model = FCN.load_model(package) # Move to GPU with FP32 precision device = torch.device("cuda") model = model.float().to(device).eval() torch.cuda.empty_cache() # CRITICAL: Warmup CUDA/cuBLAS context on ZeroGPU's H200 before complex ops # This ensures cuBLAS is fully initialized and strided-batch GEMM handlers are ready try: with torch.no_grad(): # Create dummy tensors matching FCN's expected input shape # FCN expects (batch, channels, lat, lon) - use minimal batch/size for warmup dummy_input = torch.randn(1, 73, 8, 8, device=device, dtype=torch.float32) _ = model(dummy_input) torch.cuda.synchronize() torch.cuda.empty_cache() except Exception as warmup_err: # If warmup fails, log but continue - the actual inference might still work print(f"[Warning] CUDA warmup failed: {warmup_err}") data = GFS() io = ZarrBackend() try: with torch.no_grad(): io = run.deterministic([forecast_date], nsteps, model, data, io, device=device) # Extract ALL timesteps to numpy arrays (picklable) before returning lon = np.asarray(io["lon"][:]) lat = np.asarray(io["lat"][:]) # Return all timesteps: shape (1, nsteps+1, lat, lon) all_fields = np.asarray(io["t2m"][:]) return lon, lat, all_fields finally: # Cleanup: restore einsum and free GPU memory torch.einsum = _orig_einsum # type: ignore[assignment] del model, data, io torch.cuda.empty_cache() torch.cuda.synchronize() def run_forecast(forecast_date: str, nsteps: int): """ Run Earth2Studio deterministic inference and return cached results. Returns: (forecast_date, nsteps, lon, lat, all_fields, status_msg) """ _ensure_cache_dirs() # Validate inputs if not forecast_date: return None, None, None, None, None, "ERROR: forecast_date is required (YYYY-MM-DD)." nsteps = int(nsteps) if nsteps < 1: return None, None, None, None, None, "ERROR: nsteps must be >= 1" # Run inference on GPU (model loaded inside due to ZeroGPU pickling) try: lon, lat, all_fields = _run_inference(forecast_date, nsteps) except Exception as e: return None, None, None, None, None, f"ERROR during inference: {type(e).__name__}: {e}" # Return cached data for dynamic plot_step updates status = f"SUCCESS: Computed {nsteps} forecast steps ({(nsteps+1)*6} hours total). Use plot_step slider to explore." return forecast_date, nsteps, lon, lat, all_fields, status def update_plot_from_cache(forecast_date, nsteps, lon, lat, all_fields, plot_step): """ Update the displayed plot from cached inference results (no GPU needed). """ if lon is None or lat is None or all_fields is None: return None, "No cached results. Click 'Run Inference' first." plot_step = int(plot_step) nsteps = int(nsteps) # Validate plot_step if plot_step < 0 or plot_step > nsteps: return None, f"Invalid plot_step {plot_step} (must be 0-{nsteps})" # Extract the specific timestep field = all_fields[0, plot_step] # Plot img_path = _plot_latlon_field( lon, lat, field, title=f"{forecast_date} - t2m - lead={6 * plot_step}h", ) return img_path, f"Displaying step {plot_step} (lead time: {6 * plot_step} hours)" def build_ui() -> gr.Blocks: with gr.Blocks(title="Earth2Studio FCN (ZeroGPU)") as demo: gr.Markdown( """ # Introduction to Earth2Studio Earth2Studio is a Python package built to empower researchers, scientists, and enthusiasts in the fields of weather and climate science with the latest artificial intelligence models and capabilities. With an intuitive design and a comprehensive feature set, it serves as a robust toolkit for exploring modern AI workflows for weather and climate. #### Learning Outcomes - Earth2Studio key features - How to instantiate a built-in prognostic model - Creating a data source and IO object - Running a simple built-in workflow - Post-processing results --- ## Package Design The goal of Earth2Studio is to enable users to extrapolate and build beyond what is implemented in it. The design philosophy embodies a **modular architecture** where the inference workflow acts as a flexible adhesive, seamlessly binding together various specialized software components with well-defined interfaces.