Source code for orgmatt.plot.distributions

# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: 2022-2023 Tanguy Fardet
# SPDX-License-Identifier: GPL-3.0-or-later
# orgmatt/plot/distributions.py

import logging

from typing import Optional, Union

import numpy as np
import seaborn as sns  # type: ignore

from matplotlib.axes import Axes  # type: ignore
from pandas import DataFrame

from .._utils import _log_message
from ..data import get_dataset


logger = logging.getLogger()


[docs] def plot_dataset( dataset: str = "excretions", x: Optional[str] = None, y: Optional[str] = None, hue: Optional[str] = None, palette: str = "vlag", ax: Union[Axes, list[Axes], None] = None, split_axes: bool = True, show: bool = False, **kwargs ) -> list[Axes]: ''' Plot the distribution of data in a dataset. Parameters ---------- dataset : str, optional (default: "excretions") x : str, optional (default: deduced from dataset) Column to use as the x-axis. y : str, optional (default: deduced from dataset) Column to use as the y-axis. hue : str, optional (default: x) Column to use for column. palette : str, optional (default: "vlag") Palette to use for the colors, see https://seaborn.pydata.org/tutorial/color_palettes.html ax : Axes, optional (default: axis of a new figure) The matplotlib axis on which the plot will be drawn. split_axes : bool, optional (default: True) Whether to create one axis for each unit in the y column. show : bool, optional (default: True) Whether to display the plot immediately. Returns ------- axes : list A list of matplotlib axes containing the plots. ''' import matplotlib.pyplot as plt # type: ignore df = get_dataset(dataset) kwargs = kwargs.copy() # check units for split axes and set axis units = tuple(set(df.unit)) if "unit" in df else ("",) split_axes &= (len(units) > 1) if not split_axes and len(units) > 1: _log_message( logger, "WARN", "y-axis is displaying values in different " "units, consider setting `split_axes` to True.") if ax is None: figsize = kwargs.get("figsize", (6*len(units) + 0.4, 4.8)) _, ax = plt.subplots(1, len(units) if split_axes else 1, figsize=figsize, layout="constrained") elif split_axes: assert len(ax) == len(units), "One axis per unit required " \ f"with `split_axes=True`, `ax` should be of length {len(units)}." if isinstance(ax, Axes): ax = [ax] if "figsize" in kwargs: del kwargs["figsize"] for k, v in kwargs.items(): df = df[df[k] == v] # get x and y x = x or _x[dataset] y = y or _y[dataset] xy_error = f"Valid entries for `x` and `y` are {list(df.keys())}." assert x in df, xy_error assert y in df, xy_error num_x = [] for axis, u in zip(ax, units): dfu = df[df.unit == u] hue = hue if (hue in dfu and (~dfu[hue].isna()).any()) else x if dfu.empty: axis.text(0.5, 0.5, f"No data in {u} with constraints\n{kwargs}.", transform=axis.transAxes, ha="center", va="center") axis.set_xticks([]) axis.set_yticks([]) else: num_x.append(len(set(dfu[x]))) plot_dataframe(dfu, x, y, hue, ax=axis, palette=palette, unit=u, show=False) if np.greater(num_x, 3).any() or len(ax) >= 7: for axis in ax: plt.setp(axis.get_xticklabels(), rotation=45, ha='right') if show: plt.show() return ax
[docs] def plot_dataframe( df: DataFrame, x: str, y: str, hue: Optional[str] = None, palette: Optional[str] = None, unit: Optional[str] = None, draw_boxes: bool = True, draw_points: bool = True, ax: Optional[Axes] = None, show: bool = False ) -> Axes: ''' Plot the data contained in a DataFrame as a combination of box and stripplot (individual points). Parameters ---------- df : DataFrame The dataframe to plot. x : str Column to use as the x-axis. y : str Column to use as the y-axis. hue : str, optional (default: None) Column to use for column. palette : str, optional (default: None) Palette to use for the colors, see https://seaborn.pydata.org/tutorial/color_palettes.html unit : str, optional (default: None) Unit associated to the y-axis. draw_boxes : bool, optional (default: True) Whether to draw the boxplot. draw_points : bool, optional (default: True) Whether to draw the stripplot with the individual data points. ax : Axes, optional (default: axis of a new figure) The matplotlib axis on which the plot will be drawn. show : bool, optional (default: True) Whether to display the plot immediately. Returns ------- ax : Axes The matplotlib axis containing the plot. ''' import matplotlib.pyplot as plt if ax is None: _, ax = plt.subplots() if draw_boxes: sns.boxplot(x=x, y=y, data=df, ax=ax, showfliers=False, palette=palette, hue=hue, dodge=hue != x) if draw_points: sns.stripplot(x=x, y=y, data=df, size=4, linewidth=1, ax=ax, palette=palette, hue=hue, dodge=hue != x, legend=(hue != x and not draw_boxes)) if unit: ax.set_ylabel(ax.get_ylabel() + f" ({unit})") if hue == x and ax.get_legend(): ax.get_legend().remove() if show: plt.show() return ax
# map datasets to default x/y _x: dict[str, str] = { "body_composition": "compound", "excretions": "excreta", "excretion_frequency": "type", "excretion_content": "excreta", "food": "type", "nutrients_flows_body": "flow", "nutrients_flow_fractions": "flow", "nutrients_intake": "compound", "fertilizer_impact": "compound" } _y: dict[str, str] = { "body_composition": "content", "excretions": "amount", "excretion_frequency": "frequency", "excretion_content": "content", "food": "value", "nutrients_flows_body": "amount", "nutrients_flow_fractions": "amount", "nutrients_intake": "intake", "fertilizer_impact": "impact" }