Source code for tvb.analyzers.pca

# -*- coding: utf-8 -*-
#
#
# TheVirtualBrain-Scientific Package. This package holds all simulators, and
# analysers necessary to run brain-simulations. You can use it stand alone or
# in conjunction with TheVirtualBrain-Framework Package. See content of the
# documentation-folder for more details. See also http://www.thevirtualbrain.org
#
# (c) 2012-2023, Baycrest Centre for Geriatric Care ("Baycrest") and others
#
# This program is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.  See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with this
# program.  If not, see <http://www.gnu.org/licenses/>.
#
#
#   CITATION:
# When using The Virtual Brain for scientific publications, please cite it as explained here:
# https://www.thevirtualbrain.org/tvb/zwei/neuroscience-publications
#
#

"""
Perform Principal Component Analysis (PCA) on a TimeSeries datatype and return
a PrincipalComponents datatype.

.. moduleauthor:: Stuart A. Knock <Stuart@tvb.invalid>

"""

import numpy
import tvb.datatypes.mode_decompositions as mode_decompositions
from tvb.basic.logger.builder import get_logger
from tvb.basic.neotraits.api import HasTraits, Attr, narray_describe

log = get_logger(__name__)


def _compute_weights_and_fractions(data):
    """
    The code for this function has been taken and adapted from Matplotlib 2.1.0
    Aug 2019
    """
    n, m = data.shape
    if n < m:
        raise RuntimeError('we assume data in input is organized with '
                            'numrows>numcols')

    mu = data.mean(axis=0)
    sigma = data.std(axis=0)

    a = (data - mu) / sigma
    U, s, Vh = numpy.linalg.svd(a, full_matrices=False)
    Wt = Vh
    s = s ** 2

    vars = s / len(s)
    fracs = vars / vars.sum()

    return fracs, Wt

"""
Return principal component weights and the fraction of the variance that 
they explain. 
    
PCA takes time-points as observations and nodes as variables.
    
NOTE: The TimeSeries must be longer(more time-points) than the number of
        nodes -- Mostly a problem for TimeSeriesSurface datatypes, which, if 
        sampled at 1024Hz, would need to be greater than 16 seconds long.
"""


# # TODO: Maybe should support first N components or neccessary components to
# #      explain X% of the variance. NOTE: For default surface the weights
# #      matrix has a size ~ 2GB * modes * vars...
[docs]def compute_pca(time_series):
    """
    # type: (TimeSeries)  -> PrincipalComponents
    Compute the temporal covariance between nodes in the time_series.

    Parameters
    __________
    time_series : TimeSeries
    The timeseries to which the PCA is to be applied.
    """

    ts_shape = time_series.data.shape

    # Need more measurements than variables
    if ts_shape[0] < ts_shape[2]:
        msg = "PCA requires a longer timeseries (tpts > number of nodes)."
        log.error(msg)
        raise Exception(msg)

    # (nodes, nodes, state-variables, modes)
    weights_shape = (ts_shape[2], ts_shape[2], ts_shape[1], ts_shape[3])
    log.info("weights shape will be: %s" % str(weights_shape))

    fractions_shape = (ts_shape[2], ts_shape[1], ts_shape[3])
    log.info("fractions shape will be: %s" % str(fractions_shape))

    weights = numpy.zeros(weights_shape)
    fractions = numpy.zeros(fractions_shape)

    # One inter-node temporal covariance matrix for each state-var & mode.
    for mode in range(ts_shape[3]):
        for var in range(ts_shape[1]):
            data = time_series.data[:, var, :, mode]

            fracts, w = _compute_weights_and_fractions(data)
            fractions[:, var, mode] = fracts
            weights[:, :, var, mode] = w

    log.debug("fractions")
    log.debug(narray_describe(fractions))
    log.debug("weights")
    log.debug(narray_describe(weights))

    pca_result = mode_decompositions.PrincipalComponents(
        source=time_series,
        fractions=fractions,
        weights=weights,
        norm_source=numpy.array([]),
        component_time_series=numpy.array([]),
        normalised_component_time_series=numpy.array([]))

    return pca_result