Source code for pylandstats.spatiotemporal

"""Spatio-temporal analysis."""
import functools

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from . import landscape as pls_landscape
from . import multilandscape, zonal

__all__ = ["SpatioTemporalAnalysis", "SpatioTemporalBufferAnalysis"]


[docs]class SpatioTemporalAnalysis(multilandscape.MultiLandscape):
    """Spatio-temporal analysis."""

[docs]    def __init__(self, landscapes, dates=None, neighborhood_rule=None, **landscape_kws):
        """
        Initialize the spatio-temporal analysis.

        Parameters
        ----------
        landscapes : list-like
            A list-like of `Landscape` instances or of strings/file-like/pathlib.Path
            objects so that each is passed as the `landscape` argument of
            `Landscape.__init__`.
        dates : list-like, optional
            A list-like of ints or strings that label the date of each snapshot of
            `landscapes` (for DataFrame indices and plot labels).
        neighborhood_rule : {'8', '4'}, optional
            Neighborhood rule to determine patch adjacencies, i.e: '8' (queen's
            case/Moore neighborhood) or '4' (rook's case/Von Neumann neighborhood).
            Ignored if the passed-in landscapes are `Landscape` instances. If no value
            is provided and the passed-in landscapes are file-like objects or paths, the
            default value set in `settings.DEFAULT_NEIGHBORHOOD_RULE` will be taken.
        landscape_kws : dict, optional
            Other keyword arguments to be passed to the instantiation of
           `pylandstats.Landscape` for each element of `landscapes`. Ignored if the
            elements of `landscapes` are already instances of `pylandstats.Landcape`.
        """
        if dates is None:
            dates = ["t{}".format(i) for i in range(len(landscapes))]

        # pop the `neighborhood_rule` from `landscape_kws` (this is merely done
        # so that the `neighborhood_rule` argument is explicitly documented in
        # this method
        _ = landscape_kws.pop("neighborhood_rule", None)
        # call the parent's init
        super().__init__(
            landscapes,
            "dates",
            dates,
            neighborhood_rule=neighborhood_rule,
            **landscape_kws
        )

    # override docs
[docs]    def compute_class_metrics_df(  # noqa: D102
        self, metrics=None, classes=None, metrics_kws=None, fillna=None
    ):
        return super().compute_class_metrics_df(
            metrics=metrics,
            classes=classes,
            metrics_kws=metrics_kws,
            fillna=fillna,
        )

    compute_class_metrics_df.__doc__ = (
        multilandscape._compute_class_metrics_df_doc.format(
            index_descr="multi-indexed by the class and date",
            index_return="class, date (multi-index)",
        )
    )

[docs]    def compute_landscape_metrics_df(  # noqa: D102
        self, metrics=None, metrics_kws=None
    ):
        return super().compute_landscape_metrics_df(
            metrics=metrics, metrics_kws=metrics_kws
        )

    compute_landscape_metrics_df.__doc__ = (
        multilandscape._compute_landscape_metrics_df_doc.format(
            index_descr="indexed by the date", index_return="date (index)"
        )
    )

    # def plot_patch_metric(metric):
    #     # TODO: sns distplot?
    #     fig, ax = plt.subplots()
    #     ax.hist()


[docs]class SpatioTemporalBufferAnalysis(SpatioTemporalAnalysis):
    """Spatio-temporal buffer analysis around a feature of interest."""

[docs]    def __init__(
        self,
        landscapes,
        base_mask,
        buffer_dists,
        buffer_rings=False,
        base_mask_crs=None,
        landscape_crs=None,
        landscape_transform=None,
        dates=None,
        neighborhood_rule=None,
    ):
        """
        Initialize the spatio-temporal buffer analysis.

        Parameters
        ----------
        landscapes : list-like
            A list-like of `Landscape` instances or of strings/file-like/pathlib.Path
            objects so that each is passed as the `landscape` argument of
            `Landscape.__init__`.
        base_mask : shapely geometry or geopandas.GeoSeries
            Geometry that will serve as a base mask to buffer around.
        buffer_rings : bool, default False
            If `False`, each buffer zone will consist of the whole region that lies
            within the respective buffer distance around the base mask. If `True`,
            buffer zones will take the form of rings around the base mask.
        base_mask_crs : str, dict or pyproj.CRS, optional
            The coordinate reference system of the base mask. Required if the base mask
            is a shapely geometry or a geopandas GeoSeries without the `crs` attribute
            set.
        landscape_crs : str, dict or pyproj.CRS, optional
            The coordinate reference system of the landscapes. Required if the passed-in
            landscapes are `Landscape` instances, ignored if they are paths to raster
            datasets that already contain such information.
        landscape_transform : affine.Affine
            Transformation from pixel coordinates to coordinate reference system.
            Required if the passed-in landscapes are `Landscape` instances, ignored if
            they are paths to raster datasets that already contain such information.
        dates : list-like, optional
            A list-like of ints or strings that label the date of each snapshot of
            `landscapes` (for DataFrame indices and plot labels).
        neighborhood_rule : {'8', '4'}, optional
            Neighborhood rule to determine patch adjacencies, i.e: '8' (queen's
            case/Moore neighborhood) or '4' (rook's case/Von Neumann neighborhood).
            Ignored if the passed-in landscapes are `Landscape` instances. If no value
            is provided and the passed-in landscapes are file-like objects or paths, the
            default value set in `settings.DEFAULT_NEIGHBORHOOD_RULE` will be taken.
        """
        super().__init__(landscapes, dates=dates, neighborhood_rule=neighborhood_rule)
        ba = zonal.BufferAnalysis(
            landscapes[0],
            base_mask=base_mask,
            buffer_dists=buffer_dists,
            buffer_rings=buffer_rings,
            base_mask_crs=base_mask_crs,
            landscape_crs=landscape_crs,
            landscape_transform=landscape_transform,
        )
        # while `BufferAnalysis.__init__` will set the `buffer_dists`
        # attribute to the instantiated object (stored in the variable `ba`),
        # it will not set it to the current `SpatioTemporalBufferAnalysis`,
        # so we need to do it here
        self.buffer_dists = ba.buffer_dists

        # init the `SpatioTemporalAnalysis` instance
        self.stas = []
        for buffer_dist, mask_arr in zip(ba.buffer_dists, ba.masks_arr):
            self.stas.append(
                SpatioTemporalAnalysis(
                    [
                        pls_landscape.Landscape(
                            np.where(
                                mask_arr,
                                landscape.landscape_arr,
                                landscape.nodata,
                            ).astype(landscape.landscape_arr.dtype),
                            res=(landscape.cell_width, landscape.cell_height),
                            nodata=landscape.nodata,
                            transform=landscape.transform,
                            neighborhood_rule=landscape.neighborhood_rule,
                        )
                        for landscape in self.landscapes
                    ],
                    dates=dates,
                )
            )

        # the `self.present_classes` attribute will have been set by this
        # instance father's init (namely the `super` in the first line of this
        # method), however some of the classes may not actually be found in
        # any of buffer zones. We therefore need to get the union of the
        # classes found at the spatio-temporal analysis instance of each
        # `buffer_dist`
        self.present_classes = functools.reduce(
            np.union1d, tuple(sta.present_classes for sta in self.stas)
        )

        # the dates will be the same for all the `SpatioTemporalAnalysis`
        # instances stored in `self.stas`. We will just take them from the
        # first instance and store them as attribute of this
        # `SpatioTemporalBufferAnalysis` so that it can be used more
        # conveniently below.
        # ACHTUNG: we do it AFTER instantiating the `SpatioTemporalAnalysis`
        # instances of `self.stats` so that we let the `__init__` method of
        # `SpatioTemporalAnalysis.__init__` deal with the logic of what to do
        # with the `dates` argument
        self.dates = self.stas[0].dates

[docs]    def compute_class_metrics_df(  # noqa: D102
        self, metrics=None, classes=None, metrics_kws=None, fillna=None
    ):
        if classes is None:
            classes = self.present_classes

        # get the columns to init the data frame
        if metrics is None:
            columns = pls_landscape.Landscape.CLASS_METRICS
        else:
            columns = metrics

        # IMPORTANT: since some classes might not be present for each date
        # and/or buffer distance, we will init the MultiIndex manually to
        # ensure that every class is present in the resulting data frame. If
        # some class does not appear for some some date/buffer distance, the
        # corresponding row will be nan. This probably preferable than having
        # a MultiIndex that can have different levels (i.e., the second level
        # `class_val`) for each buffer distance.
        # Note that this approach is likely slower since for each of the
        # `buffer_dists`, we have to iterate as in (see below):
        # `for class_val, date in class_metrics_df.loc[buffer_dist].index`
        class_metrics_df = pd.DataFrame(
            index=pd.MultiIndex.from_product([self.buffer_dists, classes, self.dates]),
            columns=columns,
        )
        class_metrics_df.index.names = "buffer_dist", "class_val", "dates"
        class_metrics_df.columns.name = "metric"

        for buffer_dist, sta in zip(self.buffer_dists, self.stas):
            # get the class metrics data frame for the
            # `SpatioTemporalAnalysis` instance that corresponds to this
            # `buffer_dist`
            df = sta.compute_class_metrics_df(
                metrics=metrics,
                classes=classes,
                metrics_kws=metrics_kws,
                fillna=fillna,
            )
            # put the metrics data frame of the `SpatioTemporalAnalysis`
            # of this `buffer_dist` into the global metrics data frame of
            # the `SpatioTemporalBufferAnalysis`
            for class_val, date in class_metrics_df.loc[buffer_dist].index:
                # use `class_metrics_df.loc` for the first level (i.e.,
                # `buffer_dist`) again (we have already used it in the
                # iterator above) to avoid `SettingWithCopyWarning`
                try:
                    class_metrics_df.loc[buffer_dist, class_val, date] = df.loc[
                        class_val, date
                    ]
                except KeyError:
                    # this means that `class_val` is not in `df`,
                    # therefore we do nothing and the corresponding row of
                    # `class_metrics_df` will stay as nan
                    pass

        return class_metrics_df

    compute_class_metrics_df.__doc__ = (
        multilandscape._compute_class_metrics_df_doc.format(
            index_descr="multi-indexed by the buffer distance, class and date",
            index_return="buffer distance, class, distance (multi-index)",
        )
    )

[docs]    def compute_landscape_metrics_df(  # noqa: D102
        self, metrics=None, metrics_kws=None
    ):
        # we will create a dict where each key is a `buffer_dist`, and its
        # value is the corresponding metrics data frame of the
        # `SpatioTemporalAnalysis` instance
        df_dict = {
            buffer_dist: sta.compute_landscape_metrics_df(
                metrics=metrics, metrics_kws=metrics_kws
            )
            for buffer_dist, sta in zip(self.buffer_dists, self.stas)
        }

        # we concatenate each value of the dict dataframe using its respective
        # `buffer_dist` key to create an extra index level (i.e., using the
        # `keys` argument of `pd.concat`)
        landscape_metrics_df = pd.concat(df_dict.values(), keys=df_dict.keys())
        # now we set the name of each index and column level
        landscape_metrics_df.index.names = "buffer_dist", "dates"
        landscape_metrics_df.columns.name = "metric"

        return landscape_metrics_df

    compute_landscape_metrics_df.__doc__ = (
        multilandscape._compute_landscape_metrics_df_doc.format(
            index_descr="multi-indexed by the buffer distance and date",
            index_return="buffer distance, date (multi-index)",
        )
    )

[docs]    def plot_metric(
        self,
        metric,
        class_val=None,
        ax=None,
        metric_legend=True,
        metric_label=None,
        buffer_dist_legend=True,
        fmt="--o",
        plot_kws=None,
        subplots_kws=None,
    ):
        """
        Plot the time series of the metric accross the buffer zones.

        Parameters
        ----------
        metric : str
            A string indicating the name of the metric to plot.
        class_val : int, optional
            If provided, the metric will be plotted at the level of the corresponding
            class, otherwise it will be plotted at the landscape level.
        ax : axis object, optional
            Plot in given axis; if None creates a new figure.
        metric_legend : bool, default True
            Whether the metric label should be displayed within the plot (as label of
            the y-axis).
        metric_label : str, optional
            Label of the y-axis to be displayed if `metric_legend` is `True`. If the
            provided value is `None`, the label will be taken from the `settings`
            module.
        buffer_dist_legend : bool, default True
            Whether a legend linking each plotted line to a buffer distance should be
            displayed within the plot.
        fmt : str, default '--o'
            A format string for `matplotlib.pyplot.plot`.
        plot_kws : dict, default None
            Keyword arguments to be passed to `matplotlib.pyplot.plot`.
        subplots_kws : dict, default None
            Keyword arguments to be passed to `matplotlib.pyplot.subplots` only if no
            axis is given (through the `ax` argument).

        Returns
        -------
        ax : matplotlib.axes.Axes
            Returns the `Axes` object with the plot drawn onto it.
        """
        # TODO: refactor this method so that it uses `class_metrics_df` and
        # `landscape_metrics_df` properties?
        if ax is None:
            if subplots_kws is None:
                subplots_kws = {}
            fig, ax = plt.subplots(**subplots_kws)

        if plot_kws is None:
            plot_kws = {}

        if "label" not in plot_kws:
            # avoid alias/refrence issues
            _plot_kws = plot_kws.copy()
            for buffer_dist, sta in zip(self.buffer_dists, self.stas):
                _plot_kws["label"] = buffer_dist
                ax = sta.plot_metric(
                    metric,
                    class_val=class_val,
                    ax=ax,
                    metric_legend=metric_legend,
                    metric_label=metric_label,
                    fmt=fmt,
                    plot_kws=_plot_kws,
                )
        else:
            for sta in self.stas:
                ax = sta.plot_metric(
                    metric,
                    class_val=class_val,
                    ax=ax,
                    metric_legend=metric_legend,
                    metric_label=metric_label,
                    fmt=fmt,
                    plot_kws=plot_kws,
                )

        if buffer_dist_legend:
            ax.legend()

        return ax

[docs]    def plot_landscapes(
        self,
        cmap=None,
        legend=True,
        subplots_kws=None,
        show_kws=None,
        subplots_adjust_kws=None,
    ):
        """
        Plot each landscape snapshot in a dedicated matplotlib axis.

        Parameters
        ----------
        cmap : str or `~matplotlib.colors.Colormap`, optional
            A Colormap instance.
        legend : bool, optional
            If ``True``, display the legend of the land use/cover color codes.
        subplots_kws : dict, default None
            Keyword arguments to be passed to `matplotlib.pyplot.subplots`.
        show_kws : dict, default None
            Keyword arguments to be passed to `rasterio.plot.show`.
        subplots_adjust_kws : dict, default None
            Keyword arguments to be passed to `matplotlib.pyplot.subplots_adjust`.

        Returns
        -------
        fig : matplotlib.figure.Figure
            The figure with its corresponding plots drawn into its axes.
        """
        # the number of rows is the number of dates, which will be the same
        # for all the `SpatioTemporalAnalysis` instances of `self.stas`
        dates = self.stas[0].dates

        # avoid alias/refrence issues
        if subplots_kws is None:
            _subplots_kws = {}
        else:
            _subplots_kws = subplots_kws.copy()
        figsize = _subplots_kws.pop("figsize", None)
        if figsize is None:
            figwidth, figheight = plt.rcParams["figure.figsize"]
            figsize = (
                figwidth * len(self.buffer_dists),
                figheight * len(dates),
            )

        fig, axes = plt.subplots(
            len(self.buffer_dists), len(dates), figsize=figsize, **_subplots_kws
        )

        if show_kws is None:
            show_kws = {}
        flat_axes = axes.flat
        for buffer_dist, sta in zip(self.buffer_dists, self.stas):
            for date, landscape in zip(sta.dates, sta.landscapes):
                ax = landscape.plot_landscape(
                    cmap=cmap, ax=next(flat_axes), legend=legend, **show_kws
                )

        # labels in first row and column only
        for date, ax in zip(dates, axes[0]):
            ax.set_title(date)

        for buffer_dist, ax in zip(self.buffer_dists, axes[:, 0]):
            ax.set_ylabel(buffer_dist)

        # adjust spacing between axes
        if subplots_adjust_kws is not None:
            fig.subplots_adjust(**subplots_adjust_kws)

        return fig