Source code for ufs_da_diagnostics.plots.utils_loaders

"""
Universal Loader Utilities for Observation Diagnostics
======================================================

This module provides robust, observation‑type‑agnostic loader functions
for reading OMB, OMA, QC, ObsValue, and hofx fields from FV3‑JEDI /
IODA‑formatted diagnostics files.

The loaders are designed to handle:

- ATMS radiances
- GNSSRO bending angles
- SATWND / SCATWND winds
- Conventional observations (e.g., PS, T, Q)
- Legacy IODA formats (ombg/oman, innov1, DerivedMetaData/Innovation)
- Masked arrays, object arrays, and missing groups

All returned arrays are converted to **float64** with invalid entries
replaced by **NaN**, ensuring downstream plotting code never crashes due
to dtype inconsistencies.

This module is the foundation for all histogram, stats, and QC‑filtered
diagnostics.
"""

import numpy as np


# ----------------------------------------------------------------------
# Numeric conversion helper
# ----------------------------------------------------------------------

[docs] def to_numeric_safe(arr): """ Convert any ObsValue/hofx/OMB/OMA array to ``float64`` safely. This function handles: - masked arrays (``.filled(np.nan)``) - object arrays (element‑wise conversion) - invalid entries (converted to ``NaN``) - arbitrary shapes Parameters ---------- arr : array-like or None Input array from diagnostics file. Returns ------- numpy.ndarray or None Float64 array with invalid entries replaced by NaN. Returns ``None`` if input is ``None``. Notes ----- - This function is essential for GNSSRO, where some fields may be stored as object arrays. - Ensures downstream code can always call ``np.isfinite()`` safely. """ if arr is None: return None # Masked arrays → fill with NaN if hasattr(arr, "filled"): arr = arr.filled(np.nan) arr = np.array(arr) # Object arrays → element-wise conversion if arr.dtype == object: out = np.empty(arr.shape, dtype="float64") for idx, v in np.ndenumerate(arr): try: out[idx] = float(v) except Exception: out[idx] = np.nan return out return arr.astype("float64")
# ---------------------------------------------------------------------- # QC loader # ----------------------------------------------------------------------
[docs] def load_qc_universal(f, varname): """ Universal QC loader with multi-group fallback. Priority order: 1. EffectiveQC2 2. EffectiveQC1 3. EffectiveQC0 4. EffectiveQC 5. ObsDiag 6. QualityControl 7. FortranQC Behavior -------- - If a QC group exists but the variable is missing → return all-valid QC. - If no QC groups exist → return all-valid QC. - QC arrays are returned as ``int32``. Parameters ---------- f : netCDF4.Dataset Diagnostics file handle. varname : str Observation variable name. Returns ------- numpy.ndarray QC array of shape (Location,) or broadcastable to (Location, Channel). Notes ----- - All-valid QC means ``QC == 0`` everywhere. - This loader supports both IODA-v1 and IODA-v2 conventions. """ qc_groups = [ "EffectiveQC2", "EffectiveQC1", "EffectiveQC0", "EffectiveQC", "ObsDiag", "QualityControl", "FortranQC", ] for grp in qc_groups: if grp in f.groups: g = f.groups[grp] # Standard IODA-v2 layout if varname in g.variables: return to_numeric_safe(g.variables[varname][:]).astype("int32") # IODA-v1 style: varname@group alt = f"{varname}@{grp}" if alt in f.variables: return to_numeric_safe(f[alt][:]).astype("int32") # Group exists but variable missing → all-valid QC nloc = f.dimensions["Location"].size return np.zeros(nloc, dtype="int32") # No QC groups found → all-valid QC nloc = f.dimensions["Location"].size return np.zeros(nloc, dtype="int32")
# ---------------------------------------------------------------------- # ObsValue loader # ----------------------------------------------------------------------
[docs] def load_obsvalue(f, varname): """ Load ObsValue for a given variable. Parameters ---------- f : netCDF4.Dataset Diagnostics file. varname : str Variable name. Returns ------- numpy.ndarray or None Float64 ObsValue array, or ``None`` if missing. """ if "ObsValue" in f.groups and varname in f.groups["ObsValue"].variables: return to_numeric_safe(f.groups["ObsValue"].variables[varname][:]) return None
# ---------------------------------------------------------------------- # hofx loader # ----------------------------------------------------------------------
[docs] def load_hofx(f, varname): """ Load hofx (hofx0 group). Parameters ---------- f : netCDF4.Dataset Diagnostics file. varname : str Variable name. Returns ------- numpy.ndarray or None Float64 hofx array, or ``None`` if missing. """ if "hofx0" in f.groups and varname in f.groups["hofx0"].variables: return to_numeric_safe(f.groups["hofx0"].variables[varname][:]) return None
# ---------------------------------------------------------------------- # OMB loader # ----------------------------------------------------------------------
[docs] def load_omb(f, varname): """ Universal OMB loader. Priority -------- 1. ``ombg/<varname>`` (ATMS, GNSSRO, SATWND, SCATWND, CONVENTIONAL_PS) 2. ``innov1/<varname>`` (some IODA-v1 formats) 3. ``DerivedMetaData/Innovation`` (fallback, e.g., ATMS) Parameters ---------- f : netCDF4.Dataset Diagnostics file. varname : str Variable name. Returns ------- numpy.ndarray or None Float64 OMB array, or ``None`` if not found. """ if "ombg" in f.groups and varname in f.groups["ombg"].variables: return to_numeric_safe(f.groups["ombg"].variables[varname][:]) if "innov1" in f.groups and varname in f.groups["innov1"].variables: return to_numeric_safe(f.groups["innov1"].variables[varname][:]) if "DerivedMetaData" in f.groups and "Innovation" in f.groups["DerivedMetaData"].variables: return to_numeric_safe(f.groups["DerivedMetaData"].variables["Innovation"][:]) return None
# ---------------------------------------------------------------------- # OMA loader # ----------------------------------------------------------------------
[docs] def load_oma_explicit(f, varname): """ Universal OMA loader. Priority -------- 1. ``oman/<varname>`` (ATMS, GNSSRO, SATWND, SCATWND, CONVENTIONAL_PS) 2. ``ObsValue - hofx`` (fallback for formats without explicit OMA) Parameters ---------- f : netCDF4.Dataset Diagnostics file. varname : str Variable name. Returns ------- numpy.ndarray or None Float64 OMA array, or ``None`` if not computable. """ if "oman" in f.groups and varname in f.groups["oman"].variables: return to_numeric_safe(f.groups["oman"].variables[varname][:]) obs = load_obsvalue(f, varname) hofx = load_hofx(f, varname) if obs is None or hofx is None: return None return to_numeric_safe(obs - hofx)
# ---------------------------------------------------------------------- # Backward compatibility aliases # ---------------------------------------------------------------------- load_qc_any = load_qc_universal load_omb_any = load_omb load_oma_any = load_oma_explicit load_obs_any = load_obsvalue