# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: 2022-2023 Tanguy Fardet
# SPDX-License-Identifier: GPL-3.0-or-later
# orgmatt/plot/distributions.py
import logging
from typing import Optional, Union
import numpy as np
import seaborn as sns # type: ignore
from matplotlib.axes import Axes # type: ignore
from pandas import DataFrame
from .._utils import _log_message
from ..data import get_dataset
logger = logging.getLogger()
[docs]
def plot_dataset(
dataset: str = "excretions",
x: Optional[str] = None,
y: Optional[str] = None,
hue: Optional[str] = None,
palette: str = "vlag",
ax: Union[Axes, list[Axes], None] = None,
split_axes: bool = True,
show: bool = False,
**kwargs
) -> list[Axes]:
'''
Plot the distribution of data in a dataset.
Parameters
----------
dataset : str, optional (default: "excretions")
x : str, optional (default: deduced from dataset)
Column to use as the x-axis.
y : str, optional (default: deduced from dataset)
Column to use as the y-axis.
hue : str, optional (default: x)
Column to use for column.
palette : str, optional (default: "vlag")
Palette to use for the colors, see
https://seaborn.pydata.org/tutorial/color_palettes.html
ax : Axes, optional (default: axis of a new figure)
The matplotlib axis on which the plot will be drawn.
split_axes : bool, optional (default: True)
Whether to create one axis for each unit in the y column.
show : bool, optional (default: True)
Whether to display the plot immediately.
Returns
-------
axes : list
A list of matplotlib axes containing the plots.
'''
import matplotlib.pyplot as plt # type: ignore
df = get_dataset(dataset)
kwargs = kwargs.copy()
# check units for split axes and set axis
units = tuple(set(df.unit)) if "unit" in df else ("",)
split_axes &= (len(units) > 1)
if not split_axes and len(units) > 1:
_log_message(
logger, "WARN", "y-axis is displaying values in different "
"units, consider setting `split_axes` to True.")
if ax is None:
figsize = kwargs.get("figsize", (6*len(units) + 0.4, 4.8))
_, ax = plt.subplots(1, len(units) if split_axes else 1,
figsize=figsize, layout="constrained")
elif split_axes:
assert len(ax) == len(units), "One axis per unit required " \
f"with `split_axes=True`, `ax` should be of length {len(units)}."
if isinstance(ax, Axes):
ax = [ax]
if "figsize" in kwargs:
del kwargs["figsize"]
for k, v in kwargs.items():
df = df[df[k] == v]
# get x and y
x = x or _x[dataset]
y = y or _y[dataset]
xy_error = f"Valid entries for `x` and `y` are {list(df.keys())}."
assert x in df, xy_error
assert y in df, xy_error
num_x = []
for axis, u in zip(ax, units):
dfu = df[df.unit == u]
hue = hue if (hue in dfu and (~dfu[hue].isna()).any()) else x
if dfu.empty:
axis.text(0.5, 0.5, f"No data in {u} with constraints\n{kwargs}.",
transform=axis.transAxes, ha="center", va="center")
axis.set_xticks([])
axis.set_yticks([])
else:
num_x.append(len(set(dfu[x])))
plot_dataframe(dfu, x, y, hue, ax=axis, palette=palette, unit=u,
show=False)
if np.greater(num_x, 3).any() or len(ax) >= 7:
for axis in ax:
plt.setp(axis.get_xticklabels(), rotation=45, ha='right')
if show:
plt.show()
return ax
[docs]
def plot_dataframe(
df: DataFrame,
x: str,
y: str,
hue: Optional[str] = None,
palette: Optional[str] = None,
unit: Optional[str] = None,
draw_boxes: bool = True,
draw_points: bool = True,
ax: Optional[Axes] = None,
show: bool = False
) -> Axes:
'''
Plot the data contained in a DataFrame as a combination of box and
stripplot (individual points).
Parameters
----------
df : DataFrame
The dataframe to plot.
x : str
Column to use as the x-axis.
y : str
Column to use as the y-axis.
hue : str, optional (default: None)
Column to use for column.
palette : str, optional (default: None)
Palette to use for the colors, see
https://seaborn.pydata.org/tutorial/color_palettes.html
unit : str, optional (default: None)
Unit associated to the y-axis.
draw_boxes : bool, optional (default: True)
Whether to draw the boxplot.
draw_points : bool, optional (default: True)
Whether to draw the stripplot with the individual data points.
ax : Axes, optional (default: axis of a new figure)
The matplotlib axis on which the plot will be drawn.
show : bool, optional (default: True)
Whether to display the plot immediately.
Returns
-------
ax : Axes
The matplotlib axis containing the plot.
'''
import matplotlib.pyplot as plt
if ax is None:
_, ax = plt.subplots()
if draw_boxes:
sns.boxplot(x=x, y=y, data=df, ax=ax, showfliers=False,
palette=palette, hue=hue, dodge=hue != x)
if draw_points:
sns.stripplot(x=x, y=y, data=df, size=4, linewidth=1, ax=ax,
palette=palette, hue=hue, dodge=hue != x,
legend=(hue != x and not draw_boxes))
if unit:
ax.set_ylabel(ax.get_ylabel() + f" ({unit})")
if hue == x and ax.get_legend():
ax.get_legend().remove()
if show:
plt.show()
return ax
# map datasets to default x/y
_x: dict[str, str] = {
"body_composition": "compound",
"excretions": "excreta",
"excretion_frequency": "type",
"excretion_content": "excreta",
"food": "type",
"nutrients_flows_body": "flow",
"nutrients_flow_fractions": "flow",
"nutrients_intake": "compound",
"fertilizer_impact": "compound"
}
_y: dict[str, str] = {
"body_composition": "content",
"excretions": "amount",
"excretion_frequency": "frequency",
"excretion_content": "content",
"food": "value",
"nutrients_flows_body": "amount",
"nutrients_flow_fractions": "amount",
"nutrients_intake": "intake",
"fertilizer_impact": "impact"
}