Source code for harmonia.algorithms.arrays

"""
Structured arrays (:mod:`~harmonia.algorithms.arrays`)
===========================================================================

Provide structured arrays for cosmological data.

.. autosummary::

    DataArray
    SphericalArray
    CartesianArray

|

"""
from collections.abc import Sequence
from itertools import product

import numpy as np

from .discretisation import DiscreteSpectrum

try:
    import cPickle as pickle
except ModuleNotFoundError:
    import pickle


[docs]class IndexingError(IndexError):
    """Exception raised for unsupported slicing or indexing in
    `__getitem__` methods.

    """


[docs]class DataArray:
    """Abstract data array with save and load methods.

    """

    def __getstate__(self):

        raise NotImplementedError

    def __setstate__(self, state):

        raise NotImplementedError

[docs]    def save(self, output_file, file_extension):
        """Save the structured array.

        Parameters
        ----------
        output_file : *str or* :class:`pathlib.Path`
            Output file path.
        extension : {'.pkl', '.npz'}
            Output file extension.

        """
        if file_extension == '.pkl':
            with open(output_file, 'wb') as output_data:
                pickle.dump(self, output_data, protocol=-1)
        elif file_extension == '.npz':
            np.savez(output_file, **self.__getstate__())
        else:
            raise IOError(
                "Unwritable output file. "
                "The file extension must be either .npz or .pkl."
            )

[docs]    @classmethod
    def load(cls, input_file):
        """Load the structured array from a .npz or .pkl file.

        Parameters
        ----------
        input_file : *str or* :class:`pathlib.Path`
            Input file path.

        """
        try:
            extension = input_file.suffix
        except AttributeError:
            extension = input_file.rpartition(".")[-1]

        if extension.endswith('npz'):
            state_data = np.load(input_file, allow_pickle=True)

            state = {}
            for attr in state_data.files:
                try:
                    state.update({attr: state_data[attr].item()})
                except ValueError:
                    state.update({attr: state_data[attr]})

            self = object.__new__(cls)
            self.__setstate__(state)
        elif extension.endswith('pkl'):
            with open(input_file, 'rb') as input_data:
                self = pickle.load(input_data)
        else:
            raise IOError(
                "Unreadable input file. "
                "The file extension must be either .npz or .pkl."
            )

        return self


[docs]class SphericalArray(DataArray):
    r"""Structured array for spherically decomposed cosmological data.

    Array is initialised with a discrete spectrum of Fourier modes
    and consists of three fields: the 'index' field of
    :math:`(\ell, m_\ell, n_\ell)` triplets, the 'wavenumber' field
    of discrete :math:`k_{\ell n}`, and the 'coefficient' field of
    spherically decomposed data.

    Parameters
    ----------
    disc : :class:`~harmonia.algorithms.discretisation.DiscreteSpectrum`
        Discrete spectrum associated with the structured array.

    Attributes
    ----------
    array : :class:`numpy.ndarray`
        Structured NumPy array.
    size : int
        Total number of elements in the array.  This should equal the sum
        of ``disc.mode_counts``.
    attrs : dict
        Attributes of the structured array inherited from `disc`.

    See Also
    --------
    :class:`~harmonia.algorithms.discretisation.DiscreteSpectrum`

    """

    # Class-wide ``numpy.dtype`` for the structured array and any
    # collapsed array.
    _dtype = np.dtype({
        'names': ['index', 'wavenumber', 'coefficient', '_position'],
        'formats': ['(3,)i4', 'f8', 'c16', 'i8'],
    })

    _dtype_collapsed = np.dtype({
        'names': ['index', 'wavenumber', 'coefficient', '_position'],
        'formats': ['(2,)i4', 'f8', 'c16', 'i8'],
    })

    def __init__(self, disc):

        self.disc = disc
        self.size = sum(disc.mode_counts)

        self.attrs = {
            attr: getattr(disc, attr)
            for attr in ['degrees', 'depths', 'wavenumbers', 'mode_counts']
        }

        self.array, self._directory = self._initialise_array()

    def __str__(self):

        return f"{self.__class__.__name__}({str(self.disc)})"

[docs]    def __getitem__(self, key):
        """Get the 'coefficient' field value(s).

        The access key can be an integer, a slice expression, a tuple of
        index triplet or a string, e.g. ``[-1]``, ``[:]``, ``[(0, 0, 1)]``
        or ``'degree_0'``.

        Parameters
        ----------
        key : int, slice, tuple(int, int, int) or str
            'coefficient' field access key.

        Returns
        -------
        complex
            'coefficient' field data entry.

        """
        position = self._find_position(key)

        return self.array['coefficient'][position]

[docs]    def __setitem__(self, key, value):
        """Set the 'coefficient' field value(s).

        Parameters
        ----------
        key : int, tuple of int or slice
            'coefficient' field access key.
        value : complex
            'coefficient' field data entry.

        See Also
        --------
        :meth:`.SphericalArray.__getitem__`

        """
        position = self._find_position(key)

        self.array['coefficient'][position] = value

    def __getstate__(self):

        state = self.__dict__

        state.update({'disc': self.disc.__getstate__()})

        return state

    def __setstate__(self, state):

        for attr, value in state.items():
            if attr == 'disc':
                self.disc = DiscreteSpectrum._from_state(value)
            else:
                setattr(self, attr, value)

        # NOTE: For backward compatibility; may be removed in the future.
        if '_position' not in self.array.dtype.names:
            self.array = np.lib.recfunctions.append_fields(
                self.array, '_position', list(range(self.size))
            )

    @classmethod
    def _from_state(cls, state):  # internal classmethod

        self = object.__new__(cls)
        self.__setstate__(state)

        return self

[docs]    def vectorise(self, pivot, collapse=None):
        r"""Returrn a data vector from the 'coefficient' field.

        Vectorisation is performed by *pivoting* in either of the following
        orders of precedence---

            * 'natural': ordered by :math:`(\ell, m, n)`;
            * 'spectral': ordered by :math:`(k_{\ell n}, m)`.

        Subarrays of equivalent :math:`(\ell, n)` may be further collapsed
        over spherical order :math:`m` by simple averaging or averaging in
        quadrature.

        Parameters
        ----------
        pivot : {'natural', 'spectral'}
            Pivot order for vectorisation.
        collapse : {None, 'mean', 'qaudrature'}, optional
            If not `None` (default), subarrays are collapsed over
            equivalent spherical order :math:`m` by averaging ('mean') or
            averaging in quadrature ('qaudrature').

        Returns
        -------
        vectorised_data : :class:`numpy.ndarray`
            Vectorised coefficient data.

        """
        if collapse is None:
            array = self.array
        else:
            # Initialise the collapsed array.
            doublet_list = self._gen_index_list(
                self.disc.degrees, self.disc.depths, reduction=True
            )

            array = np.empty(len(doublet_list), dtype=self._dtype_collapsed)
            array['index'] = doublet_list

            for pos, ind in enumerate(array['index']):
                # Extract subarray matching the degree and depth of the
                # index doublet being considered.
                selector = np.all(
                    self.array['index'][:, [0, 2]] == ind, axis=-1
                )
                subarray = self.array['coefficient'][selector]

                # Collapse the extracted subarray.
                if collapse.lower() == 'mean':
                    collapsed_subarray = np.mean(subarray)
                elif collapse.lower() == 'qaudrature':
                    collapsed_subarray = np.mean(np.abs(subarray) ** 2)
                else:
                    raise ValueError(f"Unknown `collapse` option: {collapse}.")

                array['coefficient'][pos] = collapsed_subarray
                array['wavenumber'][pos] = self.disc.wavenumbers[tuple(ind)]

        # Sort array by the pivot order.  It is unsafe to sort by the
        # 'index' field which is multi-dimensional.  The private field
        # '_position' was initialised for this purpose.
        if pivot == 'natural':
            sort_order = ['_position', 'wavenumber']
        elif pivot == 'spectral':
            sort_order = ['wavenumber', '_position']
        else:
            raise ValueError(f"Unknown `pivot` option: {pivot}.")

        vectorised_data = np.sort(array, order=sort_order)['coefficient']

        return vectorised_data

    def _initialise_array(self):

        # Initialise the structured array in the natural order.
        array = np.empty(self.size, dtype=self._dtype)

        # Generate the list of index triplets and the dictionary of index
        # directory.
        triplet_list = self._gen_index_list(
            self.disc.degrees, self.disc.depths
        )
        triplet_directory = self._gen_index_directory(triplet_list)

        # Initialise index triplets and corresponding wavenumbers.
        array['index'] = triplet_list
        array['wavenumber'] = [
            self.disc.wavenumbers[ell, n] for ell, _, n in triplet_list
        ]
        array['_position'] = list(range(self.size))

        return array, triplet_directory

    def _find_position(self, key):

        # If accessed by integer, reinterprete negative values as reverse
        # indexing.
        if isinstance(key, int):
            position = key if key >= 0 else key % self.size
            if position > self.size:
                raise IndexingError(
                    f"Index {position} out of bound for key: {key}."
                )
            return position

        # Access by slice.
        if isinstance(key, slice):
            return key

        # If accessed by a sequence of (degree, order, depth); any list
        # is turned into a tuple.
        if isinstance(key, Sequence) and not isinstance(key, str):
            position = self._directory[tuple(key)]
            return position

        # Access by string of the form 'degree_<ell>'.
        if isinstance(key, str):
            degree = int(key.split('_')[-1])
            degree_idx = self.disc.degrees.index(degree)
            start = sum(self.disc.mode_counts[:degree_idx])
            stop = sum(self.disc.mode_counts[:(degree_idx + 1)])
            position = slice(start, stop)
            return position

        raise TypeError(f"Invalid type for key: {key}.")

    @staticmethod
    def _gen_index_list(degrees, depths, reduction=False):

        if reduction:
            index_list = [
                (ell, n + 1)
                for ell, nmax in zip(degrees, depths)
                for n in range(nmax)
            ]
        else:
            index_list = [
                (ell, m, n + 1)
                for ell, nmax in zip(degrees, depths)
                for m in range(- ell, ell+1)
                for n in range(nmax)
            ]

        return index_list

    @staticmethod
    def _gen_index_directory(index_list):

        # The directory is a mapping from an index tuple to an array position.
        index_directory = {
            tuple(tup): pos for pos, tup in enumerate(index_list)
        }

        return index_directory


[docs]class CartesianArray(DataArray):
    r"""Structured array for Cartesian decomposition of cosmological data.

    Array is initialised with three fields: the 'order' field of the
    Legendre multipoles, the 'wavenumber' field of :math:`k`-bin centres
    and the 'power' field for power spectrum multipole measurements.

    Parameters
    ----------
    orders : list of tuple of int
        Orders of the power spectrum multipole.
    wavenumbers : float, array_like
        Wavenumbers of the multipole data.
    mode_counts : list of int or None, optional
        Mode counts in wavenumber bins (default is `None`).
    shot_noise : float or None, optional
        Shot noise level (default is `None`).

    Attributes
    ----------
    array : :class:`numpy.ndarray`
        Structured NumPy array.
    size : int
        Total number of elements in the array.  This should equal the
        product of the numbers of wavenumbers and multipoles.
    attrs : dict
        Initialisation parameters as attributes.

    """

    # Class-wide ``numpy.dtype`` for the structured array.
    _dtype = np.dtype({
        'names': ['order', 'wavenumber', 'power'],
        'formats': ['i4', 'f8', 'f8'],
    })

    def __init__(self, orders, wavenumbers, mode_counts=None, shot_noise=None):

        self.size = np.size(orders) * np.size(wavenumbers)

        orders = np.sort(orders).tolist()

        wavenumbers = np.array(wavenumbers)[np.argsort(wavenumbers)]

        if mode_counts is not None:
            mode_counts = np.array(mode_counts)[np.argsort(wavenumbers)]

        self.attrs = dict(
            orders=orders,
            wavenumbers=wavenumbers,
            mode_counts=mode_counts,
            shot_noise=shot_noise,
        )

        self.array, self._directory = self._initialise_array()

[docs]    def __getitem__(self, key):
        """Access the 'power' field.

        The access key can be an integer positional index, a slice, a tuple
        of (order, wavenumber) or a string, e.g. ``[-1]``, ``[:]``,
        ``[(0, 0.04)]`` or ``'power_0'``.

        Parameters
        ----------
        key : int, slice, tuple(int, float) or str
            'power' field access key.

        Returns
        -------
        float
            'power' field data entry.

        """
        position = self._find_position(key)

        return self.array['power'][position]

[docs]    def __setitem__(self, key, value):
        """Set the 'power' field value(s).

        Parameters
        ----------
        key : int, tuple(int, float) or slice
            'power' field access key.
        value : float
            'power' field data entry.

        See Also
        --------
        :meth:`.CartesianArray.__getitem__`

        """
        position = self._find_position(key)

        self.array['power'][position] = value

    def __getstate__(self):

        return self.__dict__

    def __setstate__(self, state):

        for attr, value in state.items():
            setattr(self, attr, value)

[docs]    def vectorise(self, pivot):
        r"""Return a data vector from the 'power' field.

        Vectorisation is performed by *pivoting* in either of the following
        orders of precedence---

            * 'order': ordered by multipole order :math:`\ell`;
            * 'wavenumber': ordered by wavenumber :math:`k`.

        Parameters
        ----------
        pivot : {'order', 'wavenumber'}
            Pivot order for vectorisation.

        Returns
        -------
        vectorised_data : :class:`numpy.ndarray`
            Vectorised power spectrum data.

        """
        if pivot == 'order':
            sort_order = ['order', 'wavenumber']
        elif pivot == 'wavenumber':
            sort_order = ['wavenumber', 'order']
        else:
            raise ValueError(f"Unknown `pivot` option: {pivot}.")

        return np.sort(self.array, order=sort_order)['power']

    def _initialise_array(self):

        # Initialise the structured array ordered by order.
        array = np.empty(self.size, dtype=self._dtype)

        array['order'] = np.repeat(
            self.attrs['orders'], len(self.attrs['wavenumbers']))
        array['wavenumber'] = np.tile(
            self.attrs['wavenumbers'], len(self.attrs['orders'])
        )

        # Generate the dictionary mapping (order, wavenumber) tuples to
        # array positions.
        directory = self._gen_directory(
            self.attrs['orders'], self.attrs['wavenumbers']
        )

        return array, directory

    def _find_position(self, key):

        # If accessed by integer, reinterprete negative values as reverse
        # indexing.
        if isinstance(key, int):
            position = key if key >= 0 else key % self.size
            if position > self.size:
                raise IndexingError(
                    f"Index {position} out of bound for key: {key}."
                )
            return position

        # Access by slice.
        if isinstance(key, slice):
            return key

        # Access by sequence of (order, wavenumber).
        if isinstance(key, Sequence) and not isinstance(key, str):
            position = self._directory[tuple(key)]
            return position

        # Access by string of the form 'power_<ell>'.
        if isinstance(key, str):
            order = int(key.split('_')[-1])
            order_idx = self.attrs['orders'].index(order)
            length = len(self.attrs['wavenumbers'])
            position = slice(order_idx * length, (order_idx + 1) * length)
            return position

        raise TypeError(f"Invalid type for key: {key}.")

    @staticmethod
    def _gen_directory(orders, wavenumbers):

        directory = {
            tuple(key): pos
            for pos, key in enumerate(product(orders, wavenumbers))
        }

        return directory