Source code for orgmatt.data

# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: 2022-2023 Tanguy Fardet
# SPDX-License-Identifier: GPL-3.0-or-later
# orgmatt/data/__init__.py

"""
This module imports all databases used in the package.

These databases contain consolidated data from the literature about the amount
of organic matter generated by various actors and the chemical composition of
these resources.

Functions
---------
"""

from os.path import abspath as _abspath
from os.path import dirname as _dirname
from os.path import join as _join

import pandas as _pd

from pandas import DataFrame


__all__ = [
    "data_information",
    "list_datasets",
    "get_dataset",
]



[docs]
def data_information(entry: str) -> str:
    '''
    Get additional information about the data and database entries:

    * compound name and details about its properties or definition
    * accronyms
    * impact types
    * units
    '''
    definition = _definitions.get(entry)

    if definition is not None:
        return definition

    from orgmatt.units import ureg

    try:
        dim = ureg(entry).dimensionality
        result = repr(ureg(entry).u) + " with dimension "

        for i, (k, v) in enumerate(dim.items()):
            symbol = "*" if v > 0 else "/"
            exp = f"**{v}" if v**2 != 1 else ""
            result += (symbol if i > 0 else "") + k.replace("_", " ") + exp

        return result
    except Exception:
        raise ValueError(f"Unknown entry '{entry}'.")




[docs]
def list_datasets() -> list[str]:
    ''' Return the list of available datasets. '''
    return list(_name_to_df)




[docs]
def get_dataset(name: str) -> DataFrame:
    ''' Return the dataset associated to `name` as a DataFrame. '''
    assert name in _name_to_df, f"Valid datasets are {list(_name_to_df)}."

    return _name_to_df[name]



# utilities

_data_dir = _abspath(_dirname(__file__))


# load excreta data

excretions = _pd.read_csv(_join(_data_dir, "excretions.csv"))
#  ''' Database regarding excretion volumes and masses (urine and feces). '''

excr_freq = _pd.read_csv(_join(_data_dir, "excretion_frequency.csv"))
#  ''' Database regarding excretion frequencies (urination and defecation). '''

excr_content = _pd.read_csv(_join(_data_dir, "excretion_content.csv"))
#  ''' Database regarding excretions' content in various nutrients. '''


# load food data

food = _pd.read_csv(_join(_data_dir, "food.csv"))
#  ''' Database on food-related numbers (generation, nutrient content...) '''


# load metabolism data

body_compo = _pd.read_csv(_join(_data_dir, "body_composition.csv"))
#  ''' Database of the body nutrient composition. '''

nutrients_flows = _pd.read_csv(_join(_data_dir, "nutrients_flows_body.csv"))
#  ''' Database regarding the flows of nutrient within the body. '''

nutrients_flow_fractions = _pd.read_csv(
    _join(_data_dir, "nutrient_intake_flow_fractions.csv"))
#  ''' Database regarding the flows of nutrient within the body. '''

nutrients_intake = _pd.read_csv(_join(_data_dir, "nutrients_intake.csv"))
#  ''' Database regarding nutrients ingested from food. '''

# environmental impacts

fertilizer_impact = _pd.read_csv(_join(_data_dir, "fertilizer_impact.csv"))
#  ''' Database on the impact of various fertilizers. '''


_name_to_df: dict[str, DataFrame] = {
    "body_composition": body_compo,
    "excretions": excretions,
    "excretion_frequency": excr_freq,
    "excretion_content": excr_content,
    "food": food,
    "nutrients_flows_body": nutrients_flows,
    "nutrients_flow_fractions": nutrients_flow_fractions,
    "nutrients_intake": nutrients_intake,
    "fertilizer_impact": fertilizer_impact
}


# set missing number of individuals to 1
for df in _name_to_df.values():
    if "individuals" in df:
        df.loc[df.individuals.isna(), "individuals"] = 1

    if "sex" in df:
        df.loc[df.sex.isna(), "sex"] = "mixed"


nutrients_excr = nutrients_flows[nutrients_flows.flow.str.match("urine|feces")]
#  ''' Database regarding the mass of nutrient excreted in urine and feces. '''


# Information about the compounds

_definitions = {
    "AN": "Ammonium nitrate, $NH_4NO_3$",
    "CAN": "Calcium ammonium nitrate",
    "AS": "Ammonium sulfate, $(NH_4)_2SO_4$",
    "TSP": r"Triple Super Phosphate, $Ca(H_2PO_4)_2 \cdot H_2O$",
    "SSP": r"Single Super Phosphate, $Ca(H_2PO_4)_2 \cdot 2H_2O : CaSO_4$",
    "MOP": "Muriate of Potash or Potassium Chloride, KCl",
    "GWP": "Global Warming Potential (mass of CO2 equivalent to the "
           "greenhouse gas emitted in terms of their effect on global "
           "warming)",
    "EP": "Eutrophication Potential (mass of equivalent $PO_4^{3-}$)",
    "AP": "Acidification Potential (mass of equivalent sulfur dioxide)",
    "TOC": "Total Organic Carbon",
    "CNR": "Carbon/Nitrogen Ratio",
    "BiodegradableWaste": "Any organic component, including kitchen and food "
                          "waste, garden residues, etc.",
    "FoodResidues": "Any food-related product, including kitchen and food "
                    "waste.",
    "KitchenWaste": "Organic matter that was thrown during the preparation of "
                    "the meal, e.g. peels.",
    "FoodWaste": "Edible matter that was not consumed, with the caveat that "
                 "the definition of 'edible' may vary.",
    "urine": "Fresh human urine.",
    "stored urine": "Human urine that has been stored for an extended period "
                    "(more than a week).",
    "feces": "Human fecal matter.",
    "feces (wet)": "Fresh human fecal matter, with its full water content.",
    "feces (dry)": "Dried human fecal matter.",
    "retention": "Nutrients that are kept within the body (if positive) or "
                 "that leave the body, mainly from bone or muscle loss due to "
                 "aging mechanisms.",
    "infant": "Baby less than 1 year old.",
    "toddler": "Baby between 1 and 3 year old (included).",
    "kid": "An individual between 4 and 9 year old (included).",
    "teenager": "An individual between 10 and 19 year old (included).",
    "adult": "An individual between 20 and 64 year old (included).",
    "senior": "An individual aged 65 or older.",
}