Source code for upxo.viz.dataviz

from upxo._sup import dataTypeHandlers as dth
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy import stats


[docs] def see_distr(self, gsdim=2, vis='hist', prop_data_format='dataframe', prop_df=None, prop_names=['area', 'perimeter', 'orientation', 'solidity'], props={'area': [], 'perimeter': [], 'orientation': [], 'solidity': []}, prop_units={'area': 'μm²', 'perimeter': 'μm', 'orientation': 'degrees', 'solidity': ''}, probability_density=False, nbins_values={'area': 30, 'perimeter': 30, 'orientation': 30, 'solidity': 30}, bw_adjust_values={'area': None, 'perimeter': None, 'orientation': None, 'solidity': None}, alpha_values={'area': 0.7, 'perimeter': 0.7, 'orientation': 0.7, 'solidity': 0.7}, color_values={'area': 'blue', 'perimeter': 'blue', 'orientation': 'blue', 'solidity': 'blue'}, edgecolor_values={'area': 'black', 'perimeter': 'black', 'orientation': 'black', 'solidity': 'black'}, binsize=30, alpha=0.7, color='blue', edgecolor='black', ncolumns=3, ylabel='count'): ''' Plot distributions of multiple grain properties in subplots Parameters ---------- gsdim : int, optional Dimensionality of the grain structure data (2 for 2D, 3 for 3D). Default is 2. vis : str, optional Visualization type: 'hist' for histogram, 'kde' for kernel density estimate, or 'hist_kde' for both overlaid. Default is 'hist'. prop_data_format : str, optional Format of the property data source. Options are 'dataframe' or 'dict'. Default is 'dataframe'. prop_df : pandas.DataFrame, optional DataFrame containing grain properties if prop_data_format is 'dataframe'. Default is None. prop_names : list of str, optional List of grain property names to plot distributions for. Default includes 'area', 'perimeter', 'orientation', and 'solidity'. props : dict, optional Dictionary of grain properties if prop_data_format is 'dict'. Default is empty dict. prop_units : dict, optional Dictionary mapping property names to their units for labeling axes. Default units are provided for common properties. probability_density : bool, optional If True, normalize distributions to form a probability density. Default is False. nbins_values : dict, optional Dictionary specifying number of bins for each property. Default is 30 bins for each. bw_adjust_values : dict, optional Dictionary specifying bandwidth adjustment for KDE plots. If None for a property, optimal bandwidth is calculated automatically using Scott's rule. Default is None for all. alpha_values : dict, optional Dictionary specifying transparency (alpha) for each property distribution. Default is 0.7. color_values : dict, optional Dictionary specifying fill color for each property distribution. Default is 'blue'. edgecolor_values : dict, optional Dictionary specifying edge color for each property distribution. Default is 'black'. binsize : int, optional Default number of bins to use if not specified in nbins_values. Default is 30 alpha : float, optional Default transparency (alpha) to use if not specified in alpha_values. Default is 0 color : str, optional Default fill color to use if not specified in color_values. Default is 'blue'. edgecolor : str, optional Default edge color to use if not specified in edgecolor_values. Default is 'black'. ncolumns : int, optional Number of columns in the subplot grid. Default is 3. ylabel : str, optional Label for the y-axis. Default is 'count'. Returns ------- None Notes ----- This function creates distributions for specified grain properties in a grid of subplots. It supports data input as either a pandas DataFrame or a dictionary of properties. Usage ----- from upxo.viz.dataviz import see_distr ''' if gsdim != 2: return # Select data source based on format if prop_data_format in ('dataframe', 'pd.DataFrame', 'df', 'pd', 'pdf'): if prop_df is None or not isinstance(prop_df, pd.DataFrame): return data_source = prop_df elif prop_data_format == 'dict': if not isinstance(props, dict): return data_source = props else: return if type(prop_names) != str and not isinstance(prop_names, (list, tuple, set)): raise ValueError("prop_names must be a string or an iterable of strings") if type(prop_names) == str: prop_names = [prop_names] # Filter valid properties properties = [prop for prop in prop_names if prop in dth.valid_region_properties.scikitimage_region_properties2d] if not properties: return nbins = {prop: nbins_values.get(prop, binsize) for prop in properties} alpha_prop = {prop: alpha_values.get(prop, alpha) for prop in properties} color_prop = {prop: color_values.get(prop, color) for prop in properties} edgecolor_prop = {prop: edgecolor_values.get(prop, edgecolor) for prop in properties} # Bandwidth adjustment: calculate optimal if None bw_adjust = {} for prop in properties: bw_val = bw_adjust_values.get(prop) if bw_val is None: # Use Scott's rule via seaborn default (bw_adjust=1) bw_adjust[prop] = 1.0 else: bw_adjust[prop] = bw_val # Calculate subplot layout num_of_subplots = len(properties) nrows = (num_of_subplots + ncolumns - 1) // ncolumns # Ceiling division # Create plots fig = plt.figure(1) for position, prop in enumerate(properties, start=1): ax = fig.add_subplot(nrows, ncolumns, position) data = data_source[prop] if vis == 'hist': sns.histplot(data, stat='density' if probability_density else 'count', bins=nbins[prop], alpha=alpha_prop[prop], color=color_prop[prop], edgecolor=edgecolor_prop[prop], ax=ax) elif vis == 'kde': sns.kdeplot(data, bw_adjust=bw_adjust[prop], color=color_prop[prop], fill=True, alpha=alpha_prop[prop], linewidth=2, ax=ax) elif vis == 'hist_kde': sns.histplot(data, stat='density', bins=nbins[prop], alpha=alpha_prop[prop] * 0.6, color=color_prop[prop], edgecolor=edgecolor_prop[prop], ax=ax) sns.kdeplot(data, bw_adjust=bw_adjust[prop], color='red', linewidth=2, ax=ax) ax.set_xlabel(f'{prop} ({prop_units.get(prop, "")})') ax.set_ylabel(ylabel) plt.tight_layout() plt.show()