"""
vizDistr.py — Distribution visualisation for UPXO grain structure analyses.
Provides the DistrViz class for plotting scalar grain property distributions
(area, perimeter, aspect ratio, …) and angular misorientation distributions
(MDF). Designed to complement ebsdviz.plot_mdf — use DistrViz.plot_mdf when
peaks are not yet computed; use ebsdviz.plot_mdf for fully annotated MDF with
peak labels and KDE from the peaks dict.
Typical usage
-------------
Grain size:
dv = DistrViz(areas, label='Grain area', units='µm²')
fig, ax = dv.plot_hist(bins=40, show_kde=True, step_size=rdr.step_size)
plt.show()
dv.print_stats()
MDF (lightweight, no peaks dict required):
dv = DistrViz.from_mdf(mdf)
fig, ax = dv.plot_mdf(mdf)
plt.show()
Multiple properties:
fig, axes = DistrViz.multi(
{'Grain area': areas, 'Aspect ratio': ar, 'Perimeter': perim},
units_dict={'Grain area': 'µm²', 'Aspect ratio': '', 'Perimeter': 'µm'},
step_size=rdr.step_size,
)
plt.show()
"""
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats as sp_stats
# Physical units for common grain morphological properties.
# Imported by EBSDReader.see_distr and repgen2d.see_distr to avoid duplication.
PROP_UNITS = {
'area': 'µm²',
'perimeter': 'µm',
'eq_diameter': 'µm',
'major_axis_length': 'µm',
'minor_axis_length': 'µm',
'aspect_ratio': '',
'eccentricity': '',
'solidity': '',
'npixels': 'px',
}
# CSL reference angles for cubic symmetry (Σ label → disorientation angle °)
_CSL_ANGLES = {
'S3': 60.00,
'S5': 36.87,
'S7': 38.21,
'S9': 38.94,
'S11': 50.48,
'S13a': 22.62,
'S13b': 27.80,
}
[docs]
class DistrViz:
"""
Distribution visualiser for scalar grain properties and MDF data.
Parameters
----------
data : array-like
1-D array of values. NaN/Inf are stripped automatically.
label : str
Property name — used in axis labels and titles.
units : str
Unit string (e.g. 'µm²', '°'). Appended to x-label when non-empty.
"""
def __init__(self, data, label='value', units=''):
"""Initialise the instance."""
arr = np.asarray(data, dtype=float).ravel()
self.data = arr[np.isfinite(arr)]
self.label = label
self.units = units
# ── Alternate constructors ─────────────────────────────────────────────────
[docs]
@classmethod
def from_mdf(cls, mdf):
"""Build from an mdf dict (output of compute_mdf_from_quats)."""
return cls(mdf['miso_deg'], label='Misorientation angle', units='°')
# ── Statistics ─────────────────────────────────────────────────────────────
@property
def stats(self):
"""Dict of descriptive statistics computed from self.data."""
d = self.data
return {
'n': len(d),
'min': float(d.min()),
'max': float(d.max()),
'mean': float(d.mean()),
'median': float(np.median(d)),
'std': float(d.std()),
'skew': float(sp_stats.skew(d)),
'kurt': float(sp_stats.kurtosis(d)),
'p10': float(np.percentile(d, 10)),
'p90': float(np.percentile(d, 90)),
}
[docs]
def print_stats(self):
"""Print a compact statistics summary to stdout."""
s = self.stats
u = f' ({self.units})' if self.units else ''
print(f"{self.label}{u} [n={s['n']}]")
print(f" min={s['min']:.3f} max={s['max']:.3f} "
f"mean={s['mean']:.3f} median={s['median']:.3f}")
print(f" std={s['std']:.3f} skew={s['skew']:.3f} "
f"kurt={s['kurt']:.3f}")
print(f" P10={s['p10']:.3f} P90={s['p90']:.3f}")
# ── Internal helpers ───────────────────────────────────────────────────────
def _xlabel(self, step_size=None):
""" xlabel."""
parts = [self.label]
if self.units or step_size is not None:
inner = self.units
if step_size is not None:
sep = ', ' if inner else ''
inner += f'{sep}step={step_size} µm'
parts.append(f'({inner})')
return ' '.join(parts)
def _stat_title(self):
""" stat title."""
s = self.stats
return (f'{self.label} distribution '
f'(n={s["n"]}, mean={s["mean"]:.2f}, std={s["std"]:.2f})')
def _draw_stat_lines(self, ax):
""" draw stat lines."""
s = self.stats
ax.axvline(s['mean'], color='k', ls='--', lw=1.2,
label=f'mean = {s["mean"]:.2f}')
ax.axvline(s['median'], color='darkorange', ls=':', lw=1.2,
label=f'median = {s["median"]:.2f}')
# ── Unified dispatcher ─────────────────────────────────────────────────────
[docs]
def plot(self, vis='hist', bins=40, show_kde=True, show_stats=True,
color='steelblue', figsize=(7, 4), log_scale=False,
step_size=None, bw_method='scott', fill=True, ax=None):
"""
Unified plot dispatcher — routes to plot_hist, plot_kde, or
plot_hist_kde based on *vis*.
Parameters
----------
vis : str
``'hist'``, ``'kde'``, or ``'hist_kde'``.
bins : int
Histogram bin count (used by ``'hist'`` and ``'hist_kde'``).
show_kde : bool
KDE overlay on histogram (``'hist'`` only).
show_stats : bool
Annotate mean / median lines.
color : str
figsize : tuple
log_scale : bool
Log x-axis (``'hist'`` only).
step_size : float or None
Appended to x-label when provided.
bw_method : str or float
KDE bandwidth selector (``'kde'`` only).
fill : bool
Fill KDE area (``'kde'`` only).
ax : Axes or None
Returns
-------
fig, ax
"""
if vis == 'hist':
return self.plot_hist(bins=bins, show_kde=show_kde,
show_stats=show_stats, color=color,
figsize=figsize, log_scale=log_scale,
step_size=step_size, ax=ax)
elif vis == 'kde':
return self.plot_kde(bw_method=bw_method, fill=fill,
color=color, show_stats=show_stats,
figsize=figsize, step_size=step_size, ax=ax)
elif vis == 'hist_kde':
return self.plot_hist_kde(bins=bins, color=color,
show_stats=show_stats, figsize=figsize,
step_size=step_size, ax=ax)
else:
raise ValueError(
f"vis must be 'hist', 'kde', or 'hist_kde'; got '{vis!r}'"
)
# ── Scalar distribution plots ──────────────────────────────────────────────
[docs]
def plot_hist(self, bins=40, show_kde=True, show_stats=True,
color='steelblue', figsize=(7, 4), log_scale=False,
step_size=None, ax=None):
"""
Histogram with optional KDE overlay and mean/median annotations.
Parameters
----------
bins : int
show_kde : bool
KDE curve scaled to match histogram counts.
show_stats : bool
Draw vertical mean and median lines.
color : str
figsize : tuple
log_scale : bool
Log x-axis.
step_size : float or None
EBSD step size — appended to x-label when provided.
ax : Axes or None
Returns
-------
fig, ax
"""
own_fig = ax is None
if own_fig:
fig, ax = plt.subplots(figsize=figsize)
else:
fig = ax.figure
counts, edges, _ = ax.hist(self.data, bins=bins,
color=color, edgecolor='k',
alpha=0.75, label='histogram')
if show_kde:
kde = sp_stats.gaussian_kde(self.data)
x = np.linspace(self.data.min(), self.data.max(), 400)
bw = edges[1] - edges[0]
ax.plot(x, kde(x) * len(self.data) * bw,
color='crimson', lw=1.8, label='KDE')
if show_stats:
self._draw_stat_lines(ax)
ax.legend(fontsize=8, framealpha=0.7)
ax.set_xlabel(self._xlabel(step_size))
ax.set_ylabel('Count')
ax.set_title(self._stat_title())
if log_scale:
ax.set_xscale('log')
if own_fig:
plt.tight_layout()
return fig, ax
[docs]
def plot_kde(self, bw_method='scott', fill=True, color='steelblue',
show_stats=True, figsize=(7, 4), step_size=None, ax=None):
"""
Pure KDE plot (probability density).
Parameters
----------
bw_method : str or float
Bandwidth selector passed to scipy.stats.gaussian_kde.
fill : bool
Fill area under the KDE curve.
color, figsize, step_size, ax
Standard plot options.
Returns
-------
fig, ax
"""
own_fig = ax is None
if own_fig:
fig, ax = plt.subplots(figsize=figsize)
else:
fig = ax.figure
kde = sp_stats.gaussian_kde(self.data, bw_method=bw_method)
x = np.linspace(self.data.min(), self.data.max(), 400)
y = kde(x)
if fill:
ax.fill_between(x, y, alpha=0.3, color=color)
ax.plot(x, y, color=color, lw=2)
if show_stats:
self._draw_stat_lines(ax)
ax.legend(fontsize=8, framealpha=0.7)
ax.set_xlabel(self._xlabel(step_size))
ax.set_ylabel('Density')
ax.set_title(self._stat_title())
if own_fig:
plt.tight_layout()
return fig, ax
[docs]
def plot_hist_kde(self, bins=40, color='steelblue', show_stats=True,
figsize=(7, 4), step_size=None, ax=None):
"""
Density-normalised histogram with KDE overlay.
Returns
-------
fig, ax
"""
own_fig = ax is None
if own_fig:
fig, ax = plt.subplots(figsize=figsize)
else:
fig = ax.figure
ax.hist(self.data, bins=bins, density=True,
color=color, edgecolor='k', alpha=0.5, label='histogram')
kde = sp_stats.gaussian_kde(self.data)
x = np.linspace(self.data.min(), self.data.max(), 400)
ax.plot(x, kde(x), color='crimson', lw=2, label='KDE')
if show_stats:
self._draw_stat_lines(ax)
ax.legend(fontsize=8, framealpha=0.7)
ax.set_xlabel(self._xlabel(step_size))
ax.set_ylabel('Density')
ax.set_title(self._stat_title())
if own_fig:
plt.tight_layout()
return fig, ax
# ── MDF plot ───────────────────────────────────────────────────────────────
[docs]
def plot_mdf(self, mdf, show_csl=True, show_stats=True,
angle_max=65.0, figsize=(8, 4), ax=None):
"""
Bar-chart MDF from a pre-computed mdf dict with optional CSL markers.
Lighter alternative to ebsdviz.plot_mdf — does not require the peaks
dict. Use ebsdviz.plot_mdf when peak labels and KDE are needed.
Parameters
----------
mdf : dict
Output of compute_mdf_from_quats. Required keys:
'hist_bin_centers', 'hist_density', 'hist_bin_edges',
'n_pairs', 'mean_angle', 'std_angle'.
show_csl : bool
Draw dashed vertical lines at common cubic CSL angles.
show_stats : bool
Annotate mean ± std in the legend.
angle_max : float
X-axis upper limit (degrees).
figsize : tuple
ax : Axes or None
Returns
-------
fig, ax
"""
own_fig = ax is None
if own_fig:
fig, ax = plt.subplots(figsize=figsize)
else:
fig = ax.figure
bw = float(mdf['hist_bin_edges'][1] - mdf['hist_bin_edges'][0])
ax.bar(mdf['hist_bin_centers'], mdf['hist_density'],
width=bw, color='steelblue', edgecolor='k',
linewidth=0.4, alpha=0.85)
if show_csl:
ymax = float(np.max(mdf['hist_density']))
for lbl, angle in _CSL_ANGLES.items():
if angle <= angle_max:
ax.axvline(angle, color='firebrick',
lw=0.9, ls='--', alpha=0.75)
ax.text(angle + 0.3, ymax * 0.93, lbl,
color='firebrick', fontsize=7,
va='top', rotation=90)
if show_stats:
mean_a = mdf['mean_angle']
std_a = mdf['std_angle']
ax.axvline(mean_a, color='k', ls='--', lw=1.2,
label=f'mean = {mean_a:.1f}° (σ = {std_a:.1f}°)')
ax.legend(fontsize=8, framealpha=0.7)
ax.set_xlabel('Misorientation angle (°)')
ax.set_ylabel('Probability density (°⁻¹)')
ax.set_title(f'Grain-boundary MDF '
f'(n={mdf["n_pairs"]} pairs, cubic symmetry)')
ax.set_xlim(0, angle_max)
if own_fig:
plt.tight_layout()
return fig, ax
# ── Multi-property grid ────────────────────────────────────────────────────
[docs]
@classmethod
def multi(cls, data_dict, units_dict=None, step_size=None,
bins=40, show_kde=True, show_stats=True,
ncolumns=2, figsize_per=(5, 3.5), color='steelblue',
log_scale=False):
"""
Plot distributions for multiple grain properties in a subplot grid.
Parameters
----------
data_dict : dict
{label: array-like} of grain properties to plot.
units_dict : dict or None
{label: units_str}. Missing keys default to no units.
step_size : float or None
Passed to each subplot for x-label annotation.
bins : int
show_kde : bool
show_stats : bool
ncolumns : int
figsize_per : tuple
(width, height) per panel in inches.
color : str
log_scale : bool
Returns
-------
fig, axes (axes is a flat ndarray)
"""
labels = list(data_dict.keys())
n = len(labels)
nrows = (n + ncolumns - 1) // ncolumns
figsize = (figsize_per[0] * ncolumns, figsize_per[1] * nrows)
fig, axes = plt.subplots(nrows, ncolumns, figsize=figsize)
axes_flat = np.array(axes).flatten()
units_dict = units_dict or {}
for ax, label in zip(axes_flat, labels):
dv = cls(data_dict[label], label=label,
units=units_dict.get(label, ''))
dv.plot_hist(bins=bins, show_kde=show_kde, show_stats=show_stats,
color=color, log_scale=log_scale,
step_size=step_size, ax=ax)
for ax in axes_flat[n:]:
ax.set_visible(False)
plt.tight_layout()
return fig, axes_flat
# ── Multi-group overlaid distribution plot ─────────────────────────────────────
[docs]
def plot_grouped_distributions(
data,
prop_labels=None,
group_colors=None,
group_labels=None,
bins=40,
bw_method='scott',
peak_prominence=0.01,
figsize_per=(5, 4),
dpi=110,
suptitle='Property distributions by group',
ncols=None,
fontsize=9.0,
show_hist=True,
show_peaks=True,
show_legend=True,
x_margin=0.03,
do_tight_layout=True,
):
"""
Overlaid histogram + KDE + peak markers for multiple properties and groups.
Generic plotting function — no knowledge of grain structures or UPXO data
formats. Data must be pre-extracted into plain arrays before calling.
Parameters
----------
data : dict
``{prop_name: {group_name: array-like}}`` — one entry per property,
each containing one array per group. Arrays may be empty; empty/size-1
groups are silently skipped.
prop_labels : dict or None
``{prop_name: display_label}`` for axis / title text. Missing keys
fall back to the prop_name itself.
group_colors : dict or None
``{group_name: colour_string}``. Missing keys cycle through a default
palette.
group_labels : dict or None
``{group_name: display_label}`` for legend entries. Missing keys fall
back to the group_name itself.
bins : int
Number of histogram bins (shared x-range across groups per property).
bw_method : str or float
Bandwidth selector passed to ``scipy.stats.gaussian_kde``.
peak_prominence : float
Fraction of KDE maximum used as minimum prominence for ``find_peaks``.
figsize_per : tuple
``(width, height)`` in inches per subplot panel.
dpi : int
Figure resolution.
suptitle : str
Figure-level title.
ncols : int or None
Subplot grid columns. ``None`` places all panels in a single row.
fontsize : float
Base font size; tick labels use ``fontsize-2``, legend ``fontsize-2``,
peak annotations ``fontsize-3``, suptitle ``fontsize+1``.
show_hist : bool
Draw histogram bars behind the KDE curves. Default ``True``.
show_peaks : bool
Draw vertical dashed lines and value annotations at KDE peaks.
Default ``True``.
show_legend : bool
Draw a per-group legend on each subplot. Default ``True``.
x_margin : float
Fractional padding added to both sides of the x-axis so that tick
labels are never clipped at the axis boundary. Default ``0.03``.
do_tight_layout : bool
Call ``plt.tight_layout()`` before returning. Set to ``False`` when
the caller needs to adjust the figure (e.g. to add a colorbar) before
finalising the layout. Default ``True``.
Returns
-------
fig, axes : Figure and 2-D axes array (shape ``(nrows, ncols_used)``).
"""
from scipy.stats import gaussian_kde
from scipy.signal import find_peaks
_DEFAULT_PALETTE = [
'#4878CF', '#D65F5F', '#59A14F', '#888888',
'#F28E2B', '#76B7B2', '#E15759', '#B07AA1',
]
prop_labels = prop_labels or {}
group_colors = group_colors or {}
group_labels = group_labels or {}
prop_names = list(data.keys())
n_props = len(prop_names)
# Assign default colours to any group not in group_colors
all_groups = []
for gd in data.values():
for g in gd:
if g not in all_groups:
all_groups.append(g)
for i, g in enumerate(all_groups):
group_colors.setdefault(g, _DEFAULT_PALETTE[i % len(_DEFAULT_PALETTE)])
_ncols = n_props if ncols is None else max(1, min(ncols, n_props))
_nrows = int(np.ceil(n_props / _ncols))
fig, axes = plt.subplots(
_nrows, _ncols,
figsize=(_ncols * figsize_per[0], _nrows * figsize_per[1]),
dpi=dpi, squeeze=False,
)
for spare in range(n_props, _nrows * _ncols):
axes[spare // _ncols, spare % _ncols].set_visible(False)
for idx, pname in enumerate(prop_names):
ax = axes[idx // _ncols, idx % _ncols]
groups = data[pname]
arrays = {g: np.asarray(v, dtype=float) for g, v in groups.items()}
arrays = {g: a[np.isfinite(a)] for g, a in arrays.items() if len(a) > 1}
if not arrays:
ax.set_visible(False)
continue
combined = np.concatenate(list(arrays.values()))
vmin, vmax = combined.min(), combined.max()
if vmin == vmax:
ax.set_visible(False)
continue
rng = vmax - vmin
pad = x_margin * rng
bin_edges = np.linspace(vmin, vmax, bins + 1)
bin_w = bin_edges[1] - bin_edges[0]
xs = np.linspace(vmin, vmax, 600)
for grp, vals in arrays.items():
colour = group_colors.get(grp, '#333333')
if show_hist:
counts, _ = np.histogram(vals, bins=bin_edges, density=True)
ax.bar(bin_edges[:-1], counts, width=bin_w,
color=colour, alpha=0.28, edgecolor='none', align='edge')
kde = gaussian_kde(vals, bw_method=bw_method)
ys = kde(xs)
ax.plot(xs, ys, color=colour, linewidth=1.8)
if show_peaks:
peak_idx, _ = find_peaks(ys, prominence=peak_prominence * ys.max())
for pi in peak_idx:
ax.axvline(xs[pi], color=colour, linewidth=0.8,
linestyle='--', alpha=0.7)
ax.text(xs[pi], ys[pi] * 1.03, f'{xs[pi]:.3g}',
fontsize=fontsize - 3, color=colour,
ha='center', va='bottom', rotation=90)
if show_legend:
mn, mx = vals.min(), vals.max()
mu, sd = vals.mean(), vals.std()
disp = group_labels.get(grp, grp)
lbl = (f'{disp} (n={len(vals)})\n'
f' µ={mu:.3g} σ={sd:.3g} [{mn:.3g}, {mx:.3g}]')
ax.plot([], [], color=colour, linewidth=2.5, label=lbl)
xlabel = prop_labels.get(pname, pname)
ax.set_xlabel(xlabel, fontsize=fontsize)
ax.set_ylabel('Density', fontsize=fontsize)
ax.set_title(xlabel, fontsize=fontsize)
ax.set_xlim(vmin - pad, vmax + pad)
if show_legend:
ax.legend(fontsize=fontsize - 2, loc='upper right', framealpha=0.85,
handlelength=1.2)
ax.tick_params(labelsize=fontsize - 2)
fig.suptitle(suptitle, fontsize=fontsize + 1, y=1.02)
if do_tight_layout:
plt.tight_layout()
return fig, axes
[docs]
def plot_repr_rank(
repr_rank_ng: dict,
figsize=None,
dpi: int = 100,
fontsize_annot: float = 8.0,
fontsize_tick: float = 9.0,
fontsize_title: float = 9.0,
fontsize_suptitle: float = 11.0,
) -> None:
"""
Five vertically stacked heatmaps showing the per-property rank of every
MC time slice under each representativeness metric (ratio, Wasserstein,
energy distance, KS statistic, Anderson–Darling statistic).
Colour encodes rank within each column independently:
green = best (rank 1), red = worst (rank N). Cell text shows the raw
numeric score. Rows are ordered best-to-worst by the aggregate score
(inherited from the DataFrame sort order in ``repr_rank_ng``).
Ranking rule per column:
- ratio, property columns : rank by ``|value − 1|`` ascending
(closest to 1.0 = best)
- ratio, aggregate column : rank by value ascending (lowest = best)
- wasserstein / energy : rank by value ascending (lowest = best)
Parameters
----------
repr_rank_ng : dict
``{'ratio': df, 'wasserstein': df, 'energy': df}`` — as stored in
``repgen2d.repr_rank_ng`` after calling ``find_repr_mcgs_props``.
figsize : tuple or None
Override default figure size. Default auto-computes from data shape.
dpi : int
Figure resolution.
fontsize_annot : float
Font size for the numeric value printed in each cell.
fontsize_tick : float
Font size for axis tick labels (slice keys on y-axis, column names
on x-axis).
fontsize_title : float
Font size for each panel title.
fontsize_suptitle : float
Font size for the overall figure title.
"""
metrics = ('ratio', 'wasserstein', 'energy', 'ks', 'ad')
titles = {
'ratio': 'Ratio (mean offset)\n1.0 = perfect | green = closest to 1.0',
'wasserstein': 'Wasserstein (shape distance)\n0 = identical | green = smallest',
'energy': 'Energy (shape distance)\n0 = identical | green = smallest',
'ks': 'KS statistic (max CDF gap)\n0 = identical | green = smallest',
'ad': 'Anderson–Darling (tail-sensitive CDF)\n0 = identical | green = smallest',
}
fmt = {'ratio': '{:.3f}', 'wasserstein': '{:.4f}', 'energy': '{:.4f}',
'ks': '{:.4f}', 'ad': '{:.4f}'}
sample_df = repr_rank_ng['wasserstein']
n_slices, n_cols = sample_df.shape
if figsize is None:
figsize = (max(10, n_cols * 1.8), max(20, n_slices * 0.65 * 5))
fig, axes = plt.subplots(5, 1, figsize=figsize, dpi=dpi)
for ax, metric in zip(axes, metrics):
df = repr_rank_ng[metric]
vals = df.values.astype(float)
cols = list(df.columns)
rows = [str(k) for k in df.index]
nr, nc = vals.shape
rank_mat = np.empty_like(vals)
for j, col in enumerate(cols):
col_vals = vals[:, j]
if metric == 'ratio' and col != 'aggregate':
order = np.argsort(np.abs(col_vals - 1.0))
else:
order = np.argsort(col_vals)
ranks = np.empty(nr, dtype=float)
ranks[order] = np.arange(nr)
rank_mat[:, j] = ranks
norm_rank = rank_mat / max(nr - 1, 1) # 0 = best, 1 = worst
ax.imshow(norm_rank, cmap='RdYlGn_r', vmin=0, vmax=1,
aspect='auto', interpolation='nearest')
for i in range(nr):
for j in range(nc):
ax.text(j, i, fmt[metric].format(vals[i, j]),
ha='center', va='center',
fontsize=fontsize_annot, color='black')
ax.set_xticks(range(nc))
ax.set_xticklabels(cols, rotation=30, ha='right', fontsize=fontsize_tick)
ax.set_yticks(range(nr))
ax.set_yticklabels(rows, fontsize=fontsize_tick)
ax.set_ylabel('MC time slice (top = best aggregate)',
fontsize=fontsize_tick)
ax.set_title(titles[metric], fontsize=fontsize_title, pad=8)
ax.axvline(nc - 1.5, color='white', linewidth=2)
fig.suptitle('MC–EBSD representativeness ranking',
fontsize=fontsize_suptitle, y=1.01)
plt.tight_layout()
plt.show()
[docs]
def plot_normalized_prop_distributions(
ebsd_data: dict,
mc_data: dict,
props: list,
scores: dict | None = None,
prop_labels: dict | None = None,
bins: int = 40,
bw_method='scott',
figsize_per: tuple = (5, 4),
dpi: int = 100,
ncols: int | None = None,
fontsize: float = 9.0,
show_hist: bool = True,
show_peaks: bool = True,
legend_loc: str = 'upper right',
legend_ncol: int = 1,
legend_fontsize: float | None = None,
) -> None:
"""
Overlaid normalised property distributions for EBSD (merged) and MC slices.
Each distribution is normalised by its own mean before plotting, matching
the normalisation used in ``find_repr_mcgs_props``. All curves are therefore
centred near 1.0 on the x-axis and are directly shape-comparable.
Wasserstein and energy distances are annotated in each subplot legend when
``scores`` is provided.
Parameters
----------
ebsd_data : dict
``{prop: array}`` of EBSD-merged property values, each already divided
by its own mean.
mc_data : dict
``{slice_key: {prop: array}}`` of MC property values, each already
divided by its own mean.
props : list of str
Ordered list of property names to plot.
scores : dict or None
``{slice_key: {prop: {'wasserstein': v, 'energy': v}}}`` extracted from
``repr_rank_ng``. When supplied, each MC curve's legend entry is
annotated with ``W=... E=...`` for the per-property distance.
prop_labels : dict or None
``{prop: display_label}``. Defaults to ``f'{prop} (mean normalized)'``.
bins, bw_method, figsize_per, dpi, ncols, fontsize, show_hist, show_peaks
Forwarded to :func:`plot_grouped_distributions`.
legend_loc : str
Legend location string passed to ``ax.legend(loc=...)``.
Examples: ``'upper right'``, ``'upper left'``, ``'lower right'``,
``'center left'``, ``'best'``. Default ``'upper right'``.
legend_ncol : int
Number of columns in the legend. Values > 1 split entries side-by-side,
reducing legend height and — when entries are uniform in width — the
overall legend footprint. Default ``1`` (single column).
legend_fontsize : float or None
Font size for legend text. Reducing this is the most direct way to
shrink the legend box since box width is driven by label text length.
Defaults to ``fontsize - 2`` when None.
"""
_MC_PALETTE = [
'#4878CF', '#D65F5F', '#59A14F', '#F28E2B',
'#76B7B2', '#E15759', '#B07AA1', '#FF9DA7',
]
if prop_labels is None:
prop_labels = {p: f'{p} (mean normalized)' for p in props}
group_colors = {'EBSD (merged)': '#222222'}
for i, k in enumerate(mc_data):
group_colors[f'MC t={k}'] = _MC_PALETTE[i % len(_MC_PALETTE)]
data = {}
for p in props:
groups = {'EBSD (merged)': ebsd_data[p]}
for k, mc_props in mc_data.items():
groups[f'MC t={k}'] = mc_props[p]
data[p] = groups
# Always defer layout so we can post-process legends uniformly.
fig, axes = plot_grouped_distributions(
data,
prop_labels=prop_labels,
group_colors=group_colors,
bins=bins, bw_method=bw_method,
figsize_per=figsize_per, dpi=dpi, ncols=ncols, fontsize=fontsize,
show_hist=show_hist, show_peaks=show_peaks,
suptitle='Normalised property distributions — EBSD (merged) vs MC slices',
do_tight_layout=False,
)
# Append score annotations and re-apply legend with user-controlled style.
_MC_PALETTE_LIST = list(_MC_PALETTE)
for idx, p in enumerate(props):
ax = axes.flat[idx]
if scores is not None:
for i, k in enumerate(mc_data):
if k in scores and p in scores[k]:
sc = scores[k][p]
w = sc.get('wasserstein', float('nan'))
e = sc.get('energy', float('nan'))
colour = _MC_PALETTE_LIST[i % len(_MC_PALETTE_LIST)]
ax.plot([], [], color=colour, lw=0,
label=f' → W={w:.4f} E={e:.4f}')
ax.legend(fontsize=legend_fontsize if legend_fontsize is not None else fontsize - 2,
loc=legend_loc, framealpha=0.85,
ncol=legend_ncol)
plt.tight_layout()
plt.show()
[docs]
def plot_qq_comparison(
ebsd_data: dict,
mc_data: dict,
props: list,
prop_labels: dict | None = None,
figsize_per: tuple = (4, 4),
dpi: int = 100,
ncols: int | None = None,
fontsize: float = 9.0,
) -> None:
"""
Quantile–Quantile (Q-Q) comparison of EBSD vs MC grain property distributions.
A Q-Q plot maps the quantiles of one distribution against the quantiles of
another at the same probability levels (0 % to 100 %). Both distributions
are normalised by their own mean before comparison, so the x- and y-axes
share the same dimensionless scale centred near 1.0.
Interpretation
--------------
- Points on the diagonal (y = x) — the two distributions have identical
shape at that quantile. Perfect agreement.
- Points **above** the diagonal — the MC distribution has *larger* values
than EBSD at that quantile (heavier upper tail or higher spread in MC).
- Points **below** the diagonal — the MC distribution has *smaller* values
than EBSD at that quantile.
- Deviations concentrated in the **lower-left** — fine/small grains differ.
- Deviations concentrated in the **upper-right** — large/coarse grains differ.
One subplot is drawn per property; each MC slice is a separate line.
The dashed black diagonal marks perfect distributional agreement.
Parameters
----------
ebsd_data : dict
``{prop: array}`` of EBSD-merged values, each normalised by own mean.
mc_data : dict
``{slice_key: {prop: array}}`` of MC values, each normalised by own mean.
props : list of str
Properties to plot.
prop_labels : dict or None
``{prop: display_label}``. Defaults to ``f'{prop} (mean normalized)'``.
figsize_per : tuple
``(width, height)`` per subplot in inches.
dpi : int
ncols : int or None
Subplot grid columns. ``None`` places all panels in a single row.
fontsize : float
"""
_MC_PALETTE = [
'#4878CF', '#D65F5F', '#59A14F', '#F28E2B',
'#76B7B2', '#E15759', '#B07AA1', '#FF9DA7',
]
if prop_labels is None:
prop_labels = {p: f'{p} (mean normalized)' for p in props}
n_props = len(props)
_ncols = n_props if ncols is None else max(1, min(ncols, n_props))
_nrows = int(np.ceil(n_props / _ncols))
fig, axes = plt.subplots(
_nrows, _ncols,
figsize=(_ncols * figsize_per[0], _nrows * figsize_per[1]),
dpi=dpi, squeeze=False,
)
q = np.linspace(0, 100, 300)
for idx, p in enumerate(props):
ax = axes[idx // _ncols, idx % _ncols]
ebsd_q = np.percentile(ebsd_data[p], q)
all_vals = list(ebsd_q)
for i, (k, mc_props) in enumerate(mc_data.items()):
mc_q = np.percentile(mc_props[p], q)
all_vals.extend(mc_q)
colour = _MC_PALETTE[i % len(_MC_PALETTE)]
ax.plot(ebsd_q, mc_q, color=colour, lw=1.5, label=f'MC t={k}')
vmin, vmax = min(all_vals), max(all_vals)
ax.plot([vmin, vmax], [vmin, vmax], 'k--', lw=1.0, label='perfect match')
ax.set_xlabel(f'EBSD {prop_labels[p]}', fontsize=fontsize)
ax.set_ylabel(f'MC {prop_labels[p]}', fontsize=fontsize)
ax.set_title(prop_labels[p], fontsize=fontsize)
ax.tick_params(labelsize=fontsize - 1)
ax.legend(fontsize=fontsize - 2, framealpha=0.8)
for spare in range(n_props, _nrows * _ncols):
axes[spare // _ncols, spare % _ncols].set_visible(False)
fig.suptitle('Q-Q plots — EBSD (merged) vs MC slices (mean-normalised)',
fontsize=fontsize + 1, y=1.01)
plt.tight_layout()
plt.show()
[docs]
def plot_ebsd_tvf(
tvf_result: dict,
figsize: tuple = (7, 4),
dpi: int = 100,
fontsize: float = 9.0,
title: str = 'EBSD grain-role area fractions',
) -> None:
"""
Horizontal bar chart of EBSD twin area fraction broken down by grain role.
Bars are drawn for each of the four grain-role categories:
- **Pure parents** — matrix grains; never a twin of any grain.
- **Primary twins** — first-generation twins whose parent is a pure parent.
- **Secondary twins** — twins whose parent is itself an intermediate
(twin-of-a-twin, 2nd generation).
- **Intermediate twins** — grains that are simultaneously a twin of one
grain and a parent of another (twin chains).
The overall twin area fraction (primary + secondary + intermediate) is
annotated on the figure.
Parameters
----------
tvf_result : dict
Output of ``repgen2d.compute_ebsd_tvf``. Must contain keys
``'pure_parent_frac'``, ``'primary_twin_frac'``,
``'secondary_twin_frac'``, ``'intermediate_frac'``,
``'overall_twin_frac'``.
figsize : tuple
Figure size ``(width, height)`` in inches.
dpi : int
Figure resolution.
fontsize : float
Base font size for labels and tick marks.
title : str
Figure title.
"""
categories = [
('Pure parents', tvf_result['pure_parent_frac'], '#555555'),
('Primary twins', tvf_result['primary_twin_frac'], '#4878CF'),
('Secondary twins', tvf_result['secondary_twin_frac'], '#F28E2B'),
('Intermediate twins', tvf_result['intermediate_frac'], '#59A14F'),
]
labels = [c[0] for c in categories]
values = [c[1] for c in categories]
colors = [c[2] for c in categories]
fig, ax = plt.subplots(figsize=figsize, dpi=dpi)
bars = ax.barh(labels, values, color=colors, edgecolor='white', height=0.5)
for bar, val in zip(bars, values):
ax.text(val + 0.002, bar.get_y() + bar.get_height() / 2,
f'{val:.4f}', va='center', ha='left', fontsize=fontsize - 1)
ax.set_xlabel('Area fraction', fontsize=fontsize)
ax.tick_params(labelsize=fontsize)
ax.set_title(title, fontsize=fontsize + 1)
ax.set_xlim(0, max(values) * 1.25 if max(values) > 0 else 1)
ax.invert_yaxis()
overall = tvf_result['overall_twin_frac']
ax.text(0.98, 0.04, f'Overall TVF = {overall:.4f}',
transform=ax.transAxes, ha='right', va='bottom',
fontsize=fontsize, color='#222222',
bbox=dict(boxstyle='round,pad=0.3', facecolor='#f5f5f5',
edgecolor='#cccccc'))
plt.tight_layout()
plt.show()