Source code for upxo.viz.vizDistr

"""
vizDistr.py — Distribution visualisation for UPXO grain structure analyses.

Provides the DistrViz class for plotting scalar grain property distributions
(area, perimeter, aspect ratio, …) and angular misorientation distributions
(MDF). Designed to complement ebsdviz.plot_mdf — use DistrViz.plot_mdf when
peaks are not yet computed; use ebsdviz.plot_mdf for fully annotated MDF with
peak labels and KDE from the peaks dict.

Typical usage
-------------
Grain size:
    dv = DistrViz(areas, label='Grain area', units='µm²')
    fig, ax = dv.plot_hist(bins=40, show_kde=True, step_size=rdr.step_size)
    plt.show()
    dv.print_stats()

MDF (lightweight, no peaks dict required):
    dv = DistrViz.from_mdf(mdf)
    fig, ax = dv.plot_mdf(mdf)
    plt.show()

Multiple properties:
    fig, axes = DistrViz.multi(
        {'Grain area': areas, 'Aspect ratio': ar, 'Perimeter': perim},
        units_dict={'Grain area': 'µm²', 'Aspect ratio': '', 'Perimeter': 'µm'},
        step_size=rdr.step_size,
    )
    plt.show()
"""

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats as sp_stats

# Physical units for common grain morphological properties.
# Imported by EBSDReader.see_distr and repgen2d.see_distr to avoid duplication.
PROP_UNITS = {
    'area':               'µm²',
    'perimeter':          'µm',
    'eq_diameter':        'µm',
    'major_axis_length':  'µm',
    'minor_axis_length':  'µm',
    'aspect_ratio':       '',
    'eccentricity':       '',
    'solidity':           '',
    'npixels':            'px',
}

# CSL reference angles for cubic symmetry (Σ label → disorientation angle °)
_CSL_ANGLES = {
    'S3':   60.00,
    'S5':   36.87,
    'S7':   38.21,
    'S9':   38.94,
    'S11':  50.48,
    'S13a': 22.62,
    'S13b': 27.80,
}



[docs]
class DistrViz:
    """
    Distribution visualiser for scalar grain properties and MDF data.

    Parameters
    ----------
    data : array-like
        1-D array of values. NaN/Inf are stripped automatically.
    label : str
        Property name — used in axis labels and titles.
    units : str
        Unit string (e.g. 'µm²', '°'). Appended to x-label when non-empty.
    """

    def __init__(self, data, label='value', units=''):
        """Initialise the instance."""
        arr = np.asarray(data, dtype=float).ravel()
        self.data = arr[np.isfinite(arr)]
        self.label = label
        self.units = units

    # ── Alternate constructors ─────────────────────────────────────────────────


[docs]
    @classmethod
    def from_mdf(cls, mdf):
        """Build from an mdf dict (output of compute_mdf_from_quats)."""
        return cls(mdf['miso_deg'], label='Misorientation angle', units='°')


    # ── Statistics ─────────────────────────────────────────────────────────────

    @property
    def stats(self):
        """Dict of descriptive statistics computed from self.data."""
        d = self.data
        return {
            'n':      len(d),
            'min':    float(d.min()),
            'max':    float(d.max()),
            'mean':   float(d.mean()),
            'median': float(np.median(d)),
            'std':    float(d.std()),
            'skew':   float(sp_stats.skew(d)),
            'kurt':   float(sp_stats.kurtosis(d)),
            'p10':    float(np.percentile(d, 10)),
            'p90':    float(np.percentile(d, 90)),
        }


[docs]
    def print_stats(self):
        """Print a compact statistics summary to stdout."""
        s = self.stats
        u = f' ({self.units})' if self.units else ''
        print(f"{self.label}{u}  [n={s['n']}]")
        print(f"  min={s['min']:.3f}  max={s['max']:.3f}  "
              f"mean={s['mean']:.3f}  median={s['median']:.3f}")
        print(f"  std={s['std']:.3f}  skew={s['skew']:.3f}  "
              f"kurt={s['kurt']:.3f}")
        print(f"  P10={s['p10']:.3f}  P90={s['p90']:.3f}")


    # ── Internal helpers ───────────────────────────────────────────────────────

    def _xlabel(self, step_size=None):
        """ xlabel."""
        parts = [self.label]
        if self.units or step_size is not None:
            inner = self.units
            if step_size is not None:
                sep = ',  ' if inner else ''
                inner += f'{sep}step={step_size} µm'
            parts.append(f'({inner})')
        return '  '.join(parts)

    def _stat_title(self):
        """ stat title."""
        s = self.stats
        return (f'{self.label} distribution  '
                f'(n={s["n"]},  mean={s["mean"]:.2f},  std={s["std"]:.2f})')

    def _draw_stat_lines(self, ax):
        """ draw stat lines."""
        s = self.stats
        ax.axvline(s['mean'],   color='k',      ls='--', lw=1.2,
                   label=f'mean = {s["mean"]:.2f}')
        ax.axvline(s['median'], color='darkorange', ls=':',  lw=1.2,
                   label=f'median = {s["median"]:.2f}')

    # ── Unified dispatcher ─────────────────────────────────────────────────────


[docs]
    def plot(self, vis='hist', bins=40, show_kde=True, show_stats=True,
             color='steelblue', figsize=(7, 4), log_scale=False,
             step_size=None, bw_method='scott', fill=True, ax=None):
        """
        Unified plot dispatcher — routes to plot_hist, plot_kde, or
        plot_hist_kde based on *vis*.

        Parameters
        ----------
        vis : str
            ``'hist'``, ``'kde'``, or ``'hist_kde'``.
        bins : int
            Histogram bin count (used by ``'hist'`` and ``'hist_kde'``).
        show_kde : bool
            KDE overlay on histogram (``'hist'`` only).
        show_stats : bool
            Annotate mean / median lines.
        color : str
        figsize : tuple
        log_scale : bool
            Log x-axis (``'hist'`` only).
        step_size : float or None
            Appended to x-label when provided.
        bw_method : str or float
            KDE bandwidth selector (``'kde'`` only).
        fill : bool
            Fill KDE area (``'kde'`` only).
        ax : Axes or None

        Returns
        -------
        fig, ax
        """
        if vis == 'hist':
            return self.plot_hist(bins=bins, show_kde=show_kde,
                                  show_stats=show_stats, color=color,
                                  figsize=figsize, log_scale=log_scale,
                                  step_size=step_size, ax=ax)
        elif vis == 'kde':
            return self.plot_kde(bw_method=bw_method, fill=fill,
                                 color=color, show_stats=show_stats,
                                 figsize=figsize, step_size=step_size, ax=ax)
        elif vis == 'hist_kde':
            return self.plot_hist_kde(bins=bins, color=color,
                                      show_stats=show_stats, figsize=figsize,
                                      step_size=step_size, ax=ax)
        else:
            raise ValueError(
                f"vis must be 'hist', 'kde', or 'hist_kde'; got '{vis!r}'"
            )


    # ── Scalar distribution plots ──────────────────────────────────────────────


[docs]
    def plot_hist(self, bins=40, show_kde=True, show_stats=True,
                  color='steelblue', figsize=(7, 4), log_scale=False,
                  step_size=None, ax=None):
        """
        Histogram with optional KDE overlay and mean/median annotations.

        Parameters
        ----------
        bins : int
        show_kde : bool
            KDE curve scaled to match histogram counts.
        show_stats : bool
            Draw vertical mean and median lines.
        color : str
        figsize : tuple
        log_scale : bool
            Log x-axis.
        step_size : float or None
            EBSD step size — appended to x-label when provided.
        ax : Axes or None

        Returns
        -------
        fig, ax
        """
        own_fig = ax is None
        if own_fig:
            fig, ax = plt.subplots(figsize=figsize)
        else:
            fig = ax.figure

        counts, edges, _ = ax.hist(self.data, bins=bins,
                                   color=color, edgecolor='k',
                                   alpha=0.75, label='histogram')
        if show_kde:
            kde = sp_stats.gaussian_kde(self.data)
            x = np.linspace(self.data.min(), self.data.max(), 400)
            bw = edges[1] - edges[0]
            ax.plot(x, kde(x) * len(self.data) * bw,
                    color='crimson', lw=1.8, label='KDE')

        if show_stats:
            self._draw_stat_lines(ax)
            ax.legend(fontsize=8, framealpha=0.7)

        ax.set_xlabel(self._xlabel(step_size))
        ax.set_ylabel('Count')
        ax.set_title(self._stat_title())
        if log_scale:
            ax.set_xscale('log')
        if own_fig:
            plt.tight_layout()
        return fig, ax



[docs]
    def plot_kde(self, bw_method='scott', fill=True, color='steelblue',
                 show_stats=True, figsize=(7, 4), step_size=None, ax=None):
        """
        Pure KDE plot (probability density).

        Parameters
        ----------
        bw_method : str or float
            Bandwidth selector passed to scipy.stats.gaussian_kde.
        fill : bool
            Fill area under the KDE curve.
        color, figsize, step_size, ax
            Standard plot options.

        Returns
        -------
        fig, ax
        """
        own_fig = ax is None
        if own_fig:
            fig, ax = plt.subplots(figsize=figsize)
        else:
            fig = ax.figure

        kde = sp_stats.gaussian_kde(self.data, bw_method=bw_method)
        x = np.linspace(self.data.min(), self.data.max(), 400)
        y = kde(x)
        if fill:
            ax.fill_between(x, y, alpha=0.3, color=color)
        ax.plot(x, y, color=color, lw=2)

        if show_stats:
            self._draw_stat_lines(ax)
            ax.legend(fontsize=8, framealpha=0.7)

        ax.set_xlabel(self._xlabel(step_size))
        ax.set_ylabel('Density')
        ax.set_title(self._stat_title())
        if own_fig:
            plt.tight_layout()
        return fig, ax



[docs]
    def plot_hist_kde(self, bins=40, color='steelblue', show_stats=True,
                     figsize=(7, 4), step_size=None, ax=None):
        """
        Density-normalised histogram with KDE overlay.

        Returns
        -------
        fig, ax
        """
        own_fig = ax is None
        if own_fig:
            fig, ax = plt.subplots(figsize=figsize)
        else:
            fig = ax.figure

        ax.hist(self.data, bins=bins, density=True,
                color=color, edgecolor='k', alpha=0.5, label='histogram')
        kde = sp_stats.gaussian_kde(self.data)
        x = np.linspace(self.data.min(), self.data.max(), 400)
        ax.plot(x, kde(x), color='crimson', lw=2, label='KDE')

        if show_stats:
            self._draw_stat_lines(ax)
        ax.legend(fontsize=8, framealpha=0.7)
        ax.set_xlabel(self._xlabel(step_size))
        ax.set_ylabel('Density')
        ax.set_title(self._stat_title())
        if own_fig:
            plt.tight_layout()
        return fig, ax


    # ── MDF plot ───────────────────────────────────────────────────────────────


[docs]
    def plot_mdf(self, mdf, show_csl=True, show_stats=True,
                 angle_max=65.0, figsize=(8, 4), ax=None):
        """
        Bar-chart MDF from a pre-computed mdf dict with optional CSL markers.

        Lighter alternative to ebsdviz.plot_mdf — does not require the peaks
        dict. Use ebsdviz.plot_mdf when peak labels and KDE are needed.

        Parameters
        ----------
        mdf : dict
            Output of compute_mdf_from_quats. Required keys:
            'hist_bin_centers', 'hist_density', 'hist_bin_edges',
            'n_pairs', 'mean_angle', 'std_angle'.
        show_csl : bool
            Draw dashed vertical lines at common cubic CSL angles.
        show_stats : bool
            Annotate mean ± std in the legend.
        angle_max : float
            X-axis upper limit (degrees).
        figsize : tuple
        ax : Axes or None

        Returns
        -------
        fig, ax
        """
        own_fig = ax is None
        if own_fig:
            fig, ax = plt.subplots(figsize=figsize)
        else:
            fig = ax.figure

        bw = float(mdf['hist_bin_edges'][1] - mdf['hist_bin_edges'][0])
        ax.bar(mdf['hist_bin_centers'], mdf['hist_density'],
               width=bw, color='steelblue', edgecolor='k',
               linewidth=0.4, alpha=0.85)

        if show_csl:
            ymax = float(np.max(mdf['hist_density']))
            for lbl, angle in _CSL_ANGLES.items():
                if angle <= angle_max:
                    ax.axvline(angle, color='firebrick',
                               lw=0.9, ls='--', alpha=0.75)
                    ax.text(angle + 0.3, ymax * 0.93, lbl,
                            color='firebrick', fontsize=7,
                            va='top', rotation=90)

        if show_stats:
            mean_a = mdf['mean_angle']
            std_a  = mdf['std_angle']
            ax.axvline(mean_a, color='k', ls='--', lw=1.2,
                       label=f'mean = {mean_a:.1f}°  (σ = {std_a:.1f}°)')
            ax.legend(fontsize=8, framealpha=0.7)

        ax.set_xlabel('Misorientation angle (°)')
        ax.set_ylabel('Probability density (°⁻¹)')
        ax.set_title(f'Grain-boundary MDF  '
                     f'(n={mdf["n_pairs"]} pairs,  cubic symmetry)')
        ax.set_xlim(0, angle_max)
        if own_fig:
            plt.tight_layout()
        return fig, ax


    # ── Multi-property grid ────────────────────────────────────────────────────


[docs]
    @classmethod
    def multi(cls, data_dict, units_dict=None, step_size=None,
              bins=40, show_kde=True, show_stats=True,
              ncolumns=2, figsize_per=(5, 3.5), color='steelblue',
              log_scale=False):
        """
        Plot distributions for multiple grain properties in a subplot grid.

        Parameters
        ----------
        data_dict : dict
            {label: array-like} of grain properties to plot.
        units_dict : dict or None
            {label: units_str}. Missing keys default to no units.
        step_size : float or None
            Passed to each subplot for x-label annotation.
        bins : int
        show_kde : bool
        show_stats : bool
        ncolumns : int
        figsize_per : tuple
            (width, height) per panel in inches.
        color : str
        log_scale : bool

        Returns
        -------
        fig, axes  (axes is a flat ndarray)
        """
        labels = list(data_dict.keys())
        n = len(labels)
        nrows = (n + ncolumns - 1) // ncolumns
        figsize = (figsize_per[0] * ncolumns, figsize_per[1] * nrows)
        fig, axes = plt.subplots(nrows, ncolumns, figsize=figsize)
        axes_flat = np.array(axes).flatten()
        units_dict = units_dict or {}

        for ax, label in zip(axes_flat, labels):
            dv = cls(data_dict[label], label=label,
                     units=units_dict.get(label, ''))
            dv.plot_hist(bins=bins, show_kde=show_kde, show_stats=show_stats,
                         color=color, log_scale=log_scale,
                         step_size=step_size, ax=ax)

        for ax in axes_flat[n:]:
            ax.set_visible(False)

        plt.tight_layout()
        return fig, axes_flat




# ── Multi-group overlaid distribution plot ─────────────────────────────────────


[docs]
def plot_grouped_distributions(
        data,
        prop_labels=None,
        group_colors=None,
        group_labels=None,
        bins=40,
        bw_method='scott',
        peak_prominence=0.01,
        figsize_per=(5, 4),
        dpi=110,
        suptitle='Property distributions by group',
        ncols=None,
        fontsize=9.0,
        show_hist=True,
        show_peaks=True,
        show_legend=True,
        x_margin=0.03,
        do_tight_layout=True,
):
    """
    Overlaid histogram + KDE + peak markers for multiple properties and groups.

    Generic plotting function — no knowledge of grain structures or UPXO data
    formats.  Data must be pre-extracted into plain arrays before calling.

    Parameters
    ----------
    data : dict
        ``{prop_name: {group_name: array-like}}`` — one entry per property,
        each containing one array per group.  Arrays may be empty; empty/size-1
        groups are silently skipped.
    prop_labels : dict or None
        ``{prop_name: display_label}`` for axis / title text.  Missing keys
        fall back to the prop_name itself.
    group_colors : dict or None
        ``{group_name: colour_string}``.  Missing keys cycle through a default
        palette.
    group_labels : dict or None
        ``{group_name: display_label}`` for legend entries.  Missing keys fall
        back to the group_name itself.
    bins : int
        Number of histogram bins (shared x-range across groups per property).
    bw_method : str or float
        Bandwidth selector passed to ``scipy.stats.gaussian_kde``.
    peak_prominence : float
        Fraction of KDE maximum used as minimum prominence for ``find_peaks``.
    figsize_per : tuple
        ``(width, height)`` in inches per subplot panel.
    dpi : int
        Figure resolution.
    suptitle : str
        Figure-level title.
    ncols : int or None
        Subplot grid columns.  ``None`` places all panels in a single row.
    fontsize : float
        Base font size; tick labels use ``fontsize-2``, legend ``fontsize-2``,
        peak annotations ``fontsize-3``, suptitle ``fontsize+1``.
    show_hist : bool
        Draw histogram bars behind the KDE curves.  Default ``True``.
    show_peaks : bool
        Draw vertical dashed lines and value annotations at KDE peaks.
        Default ``True``.
    show_legend : bool
        Draw a per-group legend on each subplot.  Default ``True``.
    x_margin : float
        Fractional padding added to both sides of the x-axis so that tick
        labels are never clipped at the axis boundary.  Default ``0.03``.
    do_tight_layout : bool
        Call ``plt.tight_layout()`` before returning.  Set to ``False`` when
        the caller needs to adjust the figure (e.g. to add a colorbar) before
        finalising the layout.  Default ``True``.

    Returns
    -------
    fig, axes : Figure and 2-D axes array (shape ``(nrows, ncols_used)``).
    """
    from scipy.stats import gaussian_kde
    from scipy.signal import find_peaks

    _DEFAULT_PALETTE = [
        '#4878CF', '#D65F5F', '#59A14F', '#888888',
        '#F28E2B', '#76B7B2', '#E15759', '#B07AA1',
    ]

    prop_labels  = prop_labels  or {}
    group_colors = group_colors or {}
    group_labels = group_labels or {}

    prop_names = list(data.keys())
    n_props    = len(prop_names)

    # Assign default colours to any group not in group_colors
    all_groups = []
    for gd in data.values():
        for g in gd:
            if g not in all_groups:
                all_groups.append(g)
    for i, g in enumerate(all_groups):
        group_colors.setdefault(g, _DEFAULT_PALETTE[i % len(_DEFAULT_PALETTE)])

    _ncols = n_props if ncols is None else max(1, min(ncols, n_props))
    _nrows = int(np.ceil(n_props / _ncols))
    fig, axes = plt.subplots(
        _nrows, _ncols,
        figsize=(_ncols * figsize_per[0], _nrows * figsize_per[1]),
        dpi=dpi, squeeze=False,
    )

    for spare in range(n_props, _nrows * _ncols):
        axes[spare // _ncols, spare % _ncols].set_visible(False)

    for idx, pname in enumerate(prop_names):
        ax     = axes[idx // _ncols, idx % _ncols]
        groups = data[pname]

        arrays = {g: np.asarray(v, dtype=float) for g, v in groups.items()}
        arrays = {g: a[np.isfinite(a)] for g, a in arrays.items() if len(a) > 1}

        if not arrays:
            ax.set_visible(False)
            continue

        combined  = np.concatenate(list(arrays.values()))
        vmin, vmax = combined.min(), combined.max()
        if vmin == vmax:
            ax.set_visible(False)
            continue

        rng       = vmax - vmin
        pad       = x_margin * rng
        bin_edges = np.linspace(vmin, vmax, bins + 1)
        bin_w     = bin_edges[1] - bin_edges[0]
        xs        = np.linspace(vmin, vmax, 600)

        for grp, vals in arrays.items():
            colour = group_colors.get(grp, '#333333')

            if show_hist:
                counts, _ = np.histogram(vals, bins=bin_edges, density=True)
                ax.bar(bin_edges[:-1], counts, width=bin_w,
                       color=colour, alpha=0.28, edgecolor='none', align='edge')

            kde = gaussian_kde(vals, bw_method=bw_method)
            ys  = kde(xs)
            ax.plot(xs, ys, color=colour, linewidth=1.8)

            if show_peaks:
                peak_idx, _ = find_peaks(ys, prominence=peak_prominence * ys.max())
                for pi in peak_idx:
                    ax.axvline(xs[pi], color=colour, linewidth=0.8,
                               linestyle='--', alpha=0.7)
                    ax.text(xs[pi], ys[pi] * 1.03, f'{xs[pi]:.3g}',
                            fontsize=fontsize - 3, color=colour,
                            ha='center', va='bottom', rotation=90)

            if show_legend:
                mn, mx = vals.min(), vals.max()
                mu, sd = vals.mean(), vals.std()
                disp = group_labels.get(grp, grp)
                lbl  = (f'{disp} (n={len(vals)})\n'
                        f'  µ={mu:.3g}  σ={sd:.3g}  [{mn:.3g}, {mx:.3g}]')
                ax.plot([], [], color=colour, linewidth=2.5, label=lbl)

        xlabel = prop_labels.get(pname, pname)
        ax.set_xlabel(xlabel, fontsize=fontsize)
        ax.set_ylabel('Density', fontsize=fontsize)
        ax.set_title(xlabel, fontsize=fontsize)
        ax.set_xlim(vmin - pad, vmax + pad)
        if show_legend:
            ax.legend(fontsize=fontsize - 2, loc='upper right', framealpha=0.85,
                      handlelength=1.2)
        ax.tick_params(labelsize=fontsize - 2)

    fig.suptitle(suptitle, fontsize=fontsize + 1, y=1.02)
    if do_tight_layout:
        plt.tight_layout()
    return fig, axes




[docs]
def plot_repr_rank(
        repr_rank_ng: dict,
        figsize=None,
        dpi: int = 100,
        fontsize_annot: float = 8.0,
        fontsize_tick: float = 9.0,
        fontsize_title: float = 9.0,
        fontsize_suptitle: float = 11.0,
) -> None:
    """
    Five vertically stacked heatmaps showing the per-property rank of every
    MC time slice under each representativeness metric (ratio, Wasserstein,
    energy distance, KS statistic, Anderson–Darling statistic).

    Colour encodes rank within each column independently:
    green = best (rank 1), red = worst (rank N).  Cell text shows the raw
    numeric score.  Rows are ordered best-to-worst by the aggregate score
    (inherited from the DataFrame sort order in ``repr_rank_ng``).

    Ranking rule per column:
    - ratio, property columns  : rank by ``|value − 1|`` ascending
      (closest to 1.0 = best)
    - ratio, aggregate column  : rank by value ascending (lowest = best)
    - wasserstein / energy     : rank by value ascending (lowest = best)

    Parameters
    ----------
    repr_rank_ng : dict
        ``{'ratio': df, 'wasserstein': df, 'energy': df}`` — as stored in
        ``repgen2d.repr_rank_ng`` after calling ``find_repr_mcgs_props``.
    figsize : tuple or None
        Override default figure size.  Default auto-computes from data shape.
    dpi : int
        Figure resolution.
    fontsize_annot : float
        Font size for the numeric value printed in each cell.
    fontsize_tick : float
        Font size for axis tick labels (slice keys on y-axis, column names
        on x-axis).
    fontsize_title : float
        Font size for each panel title.
    fontsize_suptitle : float
        Font size for the overall figure title.
    """
    metrics = ('ratio', 'wasserstein', 'energy', 'ks', 'ad')
    titles = {
        'ratio':       'Ratio  (mean offset)\n1.0 = perfect  |  green = closest to 1.0',
        'wasserstein': 'Wasserstein  (shape distance)\n0 = identical  |  green = smallest',
        'energy':      'Energy  (shape distance)\n0 = identical  |  green = smallest',
        'ks':          'KS statistic  (max CDF gap)\n0 = identical  |  green = smallest',
        'ad':          'Anderson–Darling  (tail-sensitive CDF)\n0 = identical  |  green = smallest',
    }
    fmt = {'ratio': '{:.3f}', 'wasserstein': '{:.4f}', 'energy': '{:.4f}',
           'ks': '{:.4f}', 'ad': '{:.4f}'}

    sample_df = repr_rank_ng['wasserstein']
    n_slices, n_cols = sample_df.shape
    if figsize is None:
        figsize = (max(10, n_cols * 1.8), max(20, n_slices * 0.65 * 5))

    fig, axes = plt.subplots(5, 1, figsize=figsize, dpi=dpi)

    for ax, metric in zip(axes, metrics):
        df   = repr_rank_ng[metric]
        vals = df.values.astype(float)
        cols = list(df.columns)
        rows = [str(k) for k in df.index]
        nr, nc = vals.shape

        rank_mat = np.empty_like(vals)
        for j, col in enumerate(cols):
            col_vals = vals[:, j]
            if metric == 'ratio' and col != 'aggregate':
                order = np.argsort(np.abs(col_vals - 1.0))
            else:
                order = np.argsort(col_vals)
            ranks = np.empty(nr, dtype=float)
            ranks[order] = np.arange(nr)
            rank_mat[:, j] = ranks

        norm_rank = rank_mat / max(nr - 1, 1)   # 0 = best, 1 = worst

        ax.imshow(norm_rank, cmap='RdYlGn_r', vmin=0, vmax=1,
                  aspect='auto', interpolation='nearest')
        for i in range(nr):
            for j in range(nc):
                ax.text(j, i, fmt[metric].format(vals[i, j]),
                        ha='center', va='center',
                        fontsize=fontsize_annot, color='black')

        ax.set_xticks(range(nc))
        ax.set_xticklabels(cols, rotation=30, ha='right', fontsize=fontsize_tick)
        ax.set_yticks(range(nr))
        ax.set_yticklabels(rows, fontsize=fontsize_tick)
        ax.set_ylabel('MC time slice  (top = best aggregate)',
                      fontsize=fontsize_tick)
        ax.set_title(titles[metric], fontsize=fontsize_title, pad=8)
        ax.axvline(nc - 1.5, color='white', linewidth=2)

    fig.suptitle('MC–EBSD representativeness ranking',
                 fontsize=fontsize_suptitle, y=1.01)
    plt.tight_layout()
    plt.show()




[docs]
def plot_normalized_prop_distributions(
        ebsd_data: dict,
        mc_data: dict,
        props: list,
        scores: dict | None = None,
        prop_labels: dict | None = None,
        bins: int = 40,
        bw_method='scott',
        figsize_per: tuple = (5, 4),
        dpi: int = 100,
        ncols: int | None = None,
        fontsize: float = 9.0,
        show_hist: bool = True,
        show_peaks: bool = True,
        legend_loc: str = 'upper right',
        legend_ncol: int = 1,
        legend_fontsize: float | None = None,
) -> None:
    """
    Overlaid normalised property distributions for EBSD (merged) and MC slices.

    Each distribution is normalised by its own mean before plotting, matching
    the normalisation used in ``find_repr_mcgs_props``.  All curves are therefore
    centred near 1.0 on the x-axis and are directly shape-comparable.

    Wasserstein and energy distances are annotated in each subplot legend when
    ``scores`` is provided.

    Parameters
    ----------
    ebsd_data : dict
        ``{prop: array}`` of EBSD-merged property values, each already divided
        by its own mean.
    mc_data : dict
        ``{slice_key: {prop: array}}`` of MC property values, each already
        divided by its own mean.
    props : list of str
        Ordered list of property names to plot.
    scores : dict or None
        ``{slice_key: {prop: {'wasserstein': v, 'energy': v}}}`` extracted from
        ``repr_rank_ng``.  When supplied, each MC curve's legend entry is
        annotated with ``W=...  E=...`` for the per-property distance.
    prop_labels : dict or None
        ``{prop: display_label}``.  Defaults to ``f'{prop}  (mean normalized)'``.
    bins, bw_method, figsize_per, dpi, ncols, fontsize, show_hist, show_peaks
        Forwarded to :func:`plot_grouped_distributions`.
    legend_loc : str
        Legend location string passed to ``ax.legend(loc=...)``.
        Examples: ``'upper right'``, ``'upper left'``, ``'lower right'``,
        ``'center left'``, ``'best'``.  Default ``'upper right'``.
    legend_ncol : int
        Number of columns in the legend.  Values > 1 split entries side-by-side,
        reducing legend height and — when entries are uniform in width — the
        overall legend footprint.  Default ``1`` (single column).
    legend_fontsize : float or None
        Font size for legend text.  Reducing this is the most direct way to
        shrink the legend box since box width is driven by label text length.
        Defaults to ``fontsize - 2`` when None.
    """
    _MC_PALETTE = [
        '#4878CF', '#D65F5F', '#59A14F', '#F28E2B',
        '#76B7B2', '#E15759', '#B07AA1', '#FF9DA7',
    ]

    if prop_labels is None:
        prop_labels = {p: f'{p}  (mean normalized)' for p in props}

    group_colors = {'EBSD (merged)': '#222222'}
    for i, k in enumerate(mc_data):
        group_colors[f'MC  t={k}'] = _MC_PALETTE[i % len(_MC_PALETTE)]

    data = {}
    for p in props:
        groups = {'EBSD (merged)': ebsd_data[p]}
        for k, mc_props in mc_data.items():
            groups[f'MC  t={k}'] = mc_props[p]
        data[p] = groups

    # Always defer layout so we can post-process legends uniformly.
    fig, axes = plot_grouped_distributions(
        data,
        prop_labels=prop_labels,
        group_colors=group_colors,
        bins=bins, bw_method=bw_method,
        figsize_per=figsize_per, dpi=dpi, ncols=ncols, fontsize=fontsize,
        show_hist=show_hist, show_peaks=show_peaks,
        suptitle='Normalised property distributions — EBSD (merged) vs MC slices',
        do_tight_layout=False,
    )

    # Append score annotations and re-apply legend with user-controlled style.
    _MC_PALETTE_LIST = list(_MC_PALETTE)
    for idx, p in enumerate(props):
        ax = axes.flat[idx]
        if scores is not None:
            for i, k in enumerate(mc_data):
                if k in scores and p in scores[k]:
                    sc = scores[k][p]
                    w = sc.get('wasserstein', float('nan'))
                    e = sc.get('energy', float('nan'))
                    colour = _MC_PALETTE_LIST[i % len(_MC_PALETTE_LIST)]
                    ax.plot([], [], color=colour, lw=0,
                            label=f'  → W={w:.4f}  E={e:.4f}')
        ax.legend(fontsize=legend_fontsize if legend_fontsize is not None else fontsize - 2,
                  loc=legend_loc, framealpha=0.85,
                  ncol=legend_ncol)

    plt.tight_layout()
    plt.show()




[docs]
def plot_qq_comparison(
        ebsd_data: dict,
        mc_data: dict,
        props: list,
        prop_labels: dict | None = None,
        figsize_per: tuple = (4, 4),
        dpi: int = 100,
        ncols: int | None = None,
        fontsize: float = 9.0,
) -> None:
    """
    Quantile–Quantile (Q-Q) comparison of EBSD vs MC grain property distributions.

    A Q-Q plot maps the quantiles of one distribution against the quantiles of
    another at the same probability levels (0 % to 100 %).  Both distributions
    are normalised by their own mean before comparison, so the x- and y-axes
    share the same dimensionless scale centred near 1.0.

    Interpretation
    --------------
    - Points on the diagonal (y = x) — the two distributions have identical
      shape at that quantile.  Perfect agreement.
    - Points **above** the diagonal — the MC distribution has *larger* values
      than EBSD at that quantile (heavier upper tail or higher spread in MC).
    - Points **below** the diagonal — the MC distribution has *smaller* values
      than EBSD at that quantile.
    - Deviations concentrated in the **lower-left** — fine/small grains differ.
    - Deviations concentrated in the **upper-right** — large/coarse grains differ.

    One subplot is drawn per property; each MC slice is a separate line.
    The dashed black diagonal marks perfect distributional agreement.

    Parameters
    ----------
    ebsd_data : dict
        ``{prop: array}`` of EBSD-merged values, each normalised by own mean.
    mc_data : dict
        ``{slice_key: {prop: array}}`` of MC values, each normalised by own mean.
    props : list of str
        Properties to plot.
    prop_labels : dict or None
        ``{prop: display_label}``.  Defaults to ``f'{prop}  (mean normalized)'``.
    figsize_per : tuple
        ``(width, height)`` per subplot in inches.
    dpi : int
    ncols : int or None
        Subplot grid columns.  ``None`` places all panels in a single row.
    fontsize : float
    """
    _MC_PALETTE = [
        '#4878CF', '#D65F5F', '#59A14F', '#F28E2B',
        '#76B7B2', '#E15759', '#B07AA1', '#FF9DA7',
    ]

    if prop_labels is None:
        prop_labels = {p: f'{p}  (mean normalized)' for p in props}

    n_props = len(props)
    _ncols = n_props if ncols is None else max(1, min(ncols, n_props))
    _nrows = int(np.ceil(n_props / _ncols))
    fig, axes = plt.subplots(
        _nrows, _ncols,
        figsize=(_ncols * figsize_per[0], _nrows * figsize_per[1]),
        dpi=dpi, squeeze=False,
    )

    q = np.linspace(0, 100, 300)

    for idx, p in enumerate(props):
        ax = axes[idx // _ncols, idx % _ncols]
        ebsd_q = np.percentile(ebsd_data[p], q)

        all_vals = list(ebsd_q)
        for i, (k, mc_props) in enumerate(mc_data.items()):
            mc_q = np.percentile(mc_props[p], q)
            all_vals.extend(mc_q)
            colour = _MC_PALETTE[i % len(_MC_PALETTE)]
            ax.plot(ebsd_q, mc_q, color=colour, lw=1.5, label=f'MC  t={k}')

        vmin, vmax = min(all_vals), max(all_vals)
        ax.plot([vmin, vmax], [vmin, vmax], 'k--', lw=1.0, label='perfect match')

        ax.set_xlabel(f'EBSD  {prop_labels[p]}', fontsize=fontsize)
        ax.set_ylabel(f'MC  {prop_labels[p]}', fontsize=fontsize)
        ax.set_title(prop_labels[p], fontsize=fontsize)
        ax.tick_params(labelsize=fontsize - 1)
        ax.legend(fontsize=fontsize - 2, framealpha=0.8)

    for spare in range(n_props, _nrows * _ncols):
        axes[spare // _ncols, spare % _ncols].set_visible(False)

    fig.suptitle('Q-Q plots — EBSD (merged) vs MC slices  (mean-normalised)',
                 fontsize=fontsize + 1, y=1.01)
    plt.tight_layout()
    plt.show()




[docs]
def plot_ebsd_tvf(
        tvf_result: dict,
        figsize: tuple = (7, 4),
        dpi: int = 100,
        fontsize: float = 9.0,
        title: str = 'EBSD grain-role area fractions',
) -> None:
    """
    Horizontal bar chart of EBSD twin area fraction broken down by grain role.

    Bars are drawn for each of the four grain-role categories:

    - **Pure parents** — matrix grains; never a twin of any grain.
    - **Primary twins** — first-generation twins whose parent is a pure parent.
    - **Secondary twins** — twins whose parent is itself an intermediate
      (twin-of-a-twin, 2nd generation).
    - **Intermediate twins** — grains that are simultaneously a twin of one
      grain and a parent of another (twin chains).

    The overall twin area fraction (primary + secondary + intermediate) is
    annotated on the figure.

    Parameters
    ----------
    tvf_result : dict
        Output of ``repgen2d.compute_ebsd_tvf``.  Must contain keys
        ``'pure_parent_frac'``, ``'primary_twin_frac'``,
        ``'secondary_twin_frac'``, ``'intermediate_frac'``,
        ``'overall_twin_frac'``.
    figsize : tuple
        Figure size ``(width, height)`` in inches.
    dpi : int
        Figure resolution.
    fontsize : float
        Base font size for labels and tick marks.
    title : str
        Figure title.
    """
    categories = [
        ('Pure parents',     tvf_result['pure_parent_frac'],    '#555555'),
        ('Primary twins',    tvf_result['primary_twin_frac'],   '#4878CF'),
        ('Secondary twins',  tvf_result['secondary_twin_frac'], '#F28E2B'),
        ('Intermediate twins', tvf_result['intermediate_frac'], '#59A14F'),
    ]
    labels = [c[0] for c in categories]
    values = [c[1] for c in categories]
    colors = [c[2] for c in categories]

    fig, ax = plt.subplots(figsize=figsize, dpi=dpi)
    bars = ax.barh(labels, values, color=colors, edgecolor='white', height=0.5)

    for bar, val in zip(bars, values):
        ax.text(val + 0.002, bar.get_y() + bar.get_height() / 2,
                f'{val:.4f}', va='center', ha='left', fontsize=fontsize - 1)

    ax.set_xlabel('Area fraction', fontsize=fontsize)
    ax.tick_params(labelsize=fontsize)
    ax.set_title(title, fontsize=fontsize + 1)
    ax.set_xlim(0, max(values) * 1.25 if max(values) > 0 else 1)
    ax.invert_yaxis()

    overall = tvf_result['overall_twin_frac']
    ax.text(0.98, 0.04, f'Overall TVF = {overall:.4f}',
            transform=ax.transAxes, ha='right', va='bottom',
            fontsize=fontsize, color='#222222',
            bbox=dict(boxstyle='round,pad=0.3', facecolor='#f5f5f5',
                      edgecolor='#cccccc'))

    plt.tight_layout()
    plt.show()