Source code for tvb.core.neotraits._h5core

# -*- coding: utf-8 -*-
#
#
# TheVirtualBrain-Framework Package. This package holds all Data Management, and
# Web-UI helpful to run brain-simulations. To use it, you also need to download
# TheVirtualBrain-Scientific Package (for simulators). See content of the
# documentation-folder for more details. See also http://www.thevirtualbrain.org
#
# (c) 2012-2023, Baycrest Centre for Geriatric Care ("Baycrest") and others
#
# This program is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.  See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with this
# program.  If not, see <http://www.gnu.org/licenses/>.
#
#
#   CITATION:
# When using The Virtual Brain for scientific publications, please cite it as explained here:
# https://www.thevirtualbrain.org/tvb/zwei/neuroscience-publications
#
#
from datetime import datetime
import importlib
import typing
import uuid
import numpy
import scipy.sparse

from tvb.basic.logger.builder import get_logger
from tvb.basic.neotraits.api import HasTraits, TupleEnum, Attr, List, NArray, Range, EnumAttr, Final, TVBEnum
from tvb.basic.neotraits.ex import TraitFinalAttributeError
from tvb.core.entities.generic_attributes import GenericAttributes
from tvb.core.neotraits.h5 import EquationScalar, SparseMatrix, ReferenceList
from tvb.core.neotraits.h5 import Uuid, Scalar, Accessor, DataSet, Reference, JsonFinal, Json, JsonRange, Enum
from tvb.core.neotraits.view_model import DataTypeGidAttr
from tvb.core.utils import string2date, date2string
from tvb.datatypes.equations import Equation, EquationsEnum
from tvb.storage.h5.file.exceptions import MissingDataSetException
from tvb.storage.storage_interface import StorageInterface

LOGGER = get_logger(__name__)


[docs]class H5File(object):
    """
    A H5 based file format.
    This class implements reading and writing to a *specific* h5 based file format.
    A subclass of this defines a new file format.
    """
    KEY_WRITTEN_BY = 'written_by'
    is_new_file = False

    def __init__(self, path):
        # type: (str) -> None
        self.path = path
        self.storage_manager = StorageInterface.get_storage_manager(self.path)
        # would be nice to have an opened state for the chunked api instead of the close_file=False

        # common scalar headers
        self.gid = Uuid(HasTraits.gid, self)
        self.written_by = Scalar(Attr(str), self, name=self.KEY_WRITTEN_BY)
        self.create_date = Scalar(Attr(str), self, name='create_date')
        self.type = Scalar(Attr(str), self, name='type')

        # Generic attributes descriptors
        self.generic_attributes = GenericAttributes()
        self.invalid = Scalar(Attr(bool), self, name='invalid')
        self.is_nan = Scalar(Attr(bool), self, name='is_nan')
        self.subject = Scalar(Attr(str), self, name='subject')
        self.state = Scalar(Attr(str), self, name='state')
        self.user_tag_1 = Scalar(Attr(str), self, name='user_tag_1')
        self.user_tag_2 = Scalar(Attr(str), self, name='user_tag_2')
        self.user_tag_3 = Scalar(Attr(str), self, name='user_tag_3')
        self.user_tag_4 = Scalar(Attr(str), self, name='user_tag_4')
        self.user_tag_5 = Scalar(Attr(str), self, name='user_tag_5')
        self.operation_tag = Scalar(Attr(str, required=False), self, name='operation_tag')
        self.parent_burst = Uuid(Attr(uuid.UUID, required=False), self, name='parent_burst')
        self.visible = Scalar(Attr(bool), self, name='visible')
        self.metadata_cache = None
        # Keep a list with datasets for which we should write metadata before closing the file
        self.expandable_datasets = []

        if not self.storage_manager.is_valid_tvb_file():
            self.written_by.store(self.get_class_path())
            self.is_new_file = True

[docs]    @classmethod
    def file_name_base(cls):
        return cls.__name__.replace("H5", "")

[docs]    def read_subtype_attr(self):
        return None

[docs]    def get_class_path(self):
        return self.__class__.__module__ + '.' + self.__class__.__name__

[docs]    def iter_accessors(self):
        # type: () -> typing.Generator[Accessor]
        for accessor in self.__dict__.values():
            if isinstance(accessor, Accessor):
                yield accessor

[docs]    def iter_datasets(self):
        for dataset in self.__dict__.values():
            if isinstance(dataset, DataSet):
                yield dataset

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

[docs]    def close(self):
        for dataset in self.expandable_datasets:
            self.storage_manager.set_metadata(dataset.meta.to_dict(), dataset.field_name)
        self.storage_manager.close_file()

[docs]    def store(self, datatype, scalars_only=False, store_references=True):
        # type: (HasTraits, bool, bool) -> None
        for accessor in self.iter_accessors():
            f_name = accessor.trait_attribute.field_name
            if f_name is None:
                # skipp attribute that does not seem to belong to a traited type
                # accessor is an independent Accessor
                continue
            if scalars_only and not isinstance(accessor, Scalar):
                continue
            if not store_references and isinstance(accessor, Reference):
                continue
            accessor.store(getattr(datatype, f_name))

[docs]    def load_into(self, datatype):
        # type: (HasTraits) -> None
        for accessor in self.iter_accessors():
            if isinstance(accessor, (Reference, ReferenceList)):
                # we do not load references recursively
                continue
            f_name = accessor.trait_attribute.field_name
            if f_name is None:
                # skipp attribute that does not seem to belong to a traited type
                continue

            # handle optional data, that will be missing from the h5 files
            try:
                value = accessor.load()
            except MissingDataSetException:
                if accessor.trait_attribute.required:
                    raise
                else:
                    value = None

            if isinstance(accessor, JsonFinal):
                current_attr = getattr(datatype, f_name)
                for k, v in current_attr.items():
                    current_attr[k] = value[k]
            else:
                try:
                    setattr(datatype, f_name, value)
                except TraitFinalAttributeError:
                    if getattr(datatype, f_name) != value:
                        raise
                    else:
                        LOGGER.info(
                            'Cannot overwrite Final attribute: {} on {}, but it already has the expected value'.format(
                                f_name, type(datatype).__name__))

[docs]    def store_generic_attributes(self, generic_attributes, create=True):
        # type: (GenericAttributes, bool) -> None
        # write_metadata  creation time, serializer class name, etc
        if create:
            self.create_date.store(date2string(datetime.now()))

        self.generic_attributes.fill_from(generic_attributes)
        self.invalid.store(self.generic_attributes.invalid)
        self.is_nan.store(self.generic_attributes.is_nan)
        self.subject.store(self.generic_attributes.subject)
        self.state.store(self.generic_attributes.state)
        self.user_tag_1.store(self.generic_attributes.user_tag_1)
        self.user_tag_2.store(self.generic_attributes.user_tag_2)
        self.user_tag_3.store(self.generic_attributes.user_tag_3)
        self.user_tag_4.store(self.generic_attributes.user_tag_4)
        self.user_tag_5.store(self.generic_attributes.user_tag_5)
        self.operation_tag.store(self.generic_attributes.operation_tag)
        self.visible.store(self.generic_attributes.visible)
        if self.generic_attributes.parent_burst is not None:
            self.parent_burst.store(uuid.UUID(self.generic_attributes.parent_burst))

[docs]    def load_generic_attributes(self):
        # type: () -> GenericAttributes
        self.generic_attributes.invalid = self.invalid.load()
        self.generic_attributes.is_nan = self.is_nan.load()
        self.generic_attributes.subject = self.subject.load()
        self.generic_attributes.state = self.state.load()
        self.generic_attributes.user_tag_1 = self.user_tag_1.load()
        self.generic_attributes.user_tag_2 = self.user_tag_2.load()
        self.generic_attributes.user_tag_3 = self.user_tag_3.load()
        self.generic_attributes.user_tag_4 = self.user_tag_4.load()
        self.generic_attributes.user_tag_5 = self.user_tag_5.load()
        self.generic_attributes.visible = self.visible.load()
        self.generic_attributes.create_date = string2date(str(self.create_date.load())) or None
        try:
            self.generic_attributes.operation_tag = self.operation_tag.load()
        except MissingDataSetException:
            self.generic_attributes.operation_tag = None
        try:
            burst = self.parent_burst.load()
            self.generic_attributes.parent_burst = burst.hex if burst is not None else None
        except MissingDataSetException:
            self.generic_attributes.parent_burst = None
        return self.generic_attributes

[docs]    def gather_references(self, datatype_cls=None):
        ret = []
        for accessor in self.iter_accessors():
            trait_attribute = None
            if datatype_cls:
                if hasattr(datatype_cls, accessor.field_name):
                    trait_attribute = getattr(datatype_cls, accessor.field_name)
            if not trait_attribute:
                trait_attribute = accessor.trait_attribute
            if isinstance(accessor, Reference):
                ret.append((trait_attribute, accessor.load()))
            if isinstance(accessor, ReferenceList):
                hex_gids = accessor.load()
                gids = [uuid.UUID(hex_gid) for hex_gid in hex_gids]
                ret.append((trait_attribute, gids))
        return ret

[docs]    def determine_datatype_from_file(self):
        config_type = self.type.load()
        package, cls_name = config_type.rsplit('.', 1)
        module = importlib.import_module(package)
        datatype_cls = getattr(module, cls_name)
        return datatype_cls

[docs]    @staticmethod
    def determine_type(path):
        # type: (str) -> typing.Type[HasTraits]
        type_class_fqn = H5File.get_metadata_param(path, 'type')
        if type_class_fqn is None:
            return HasTraits
        package, cls_name = type_class_fqn.rsplit('.', 1)
        module = importlib.import_module(package)
        cls = getattr(module, cls_name)
        return cls

[docs]    @staticmethod
    def get_metadata_param(path, param):
        meta = StorageInterface.get_storage_manager(path).get_metadata()
        return meta.get(param)

[docs]    def store_metadata_param(self, key, value):
        self.storage_manager.set_metadata({key: value})

[docs]    @staticmethod
    def h5_class_from_file(path):
        # type: (str) -> typing.Type[H5File]
        h5file_class_fqn = H5File.get_metadata_param(path, H5File.KEY_WRITTEN_BY)
        if h5file_class_fqn is None:
            return H5File(path)
        package, cls_name = h5file_class_fqn.rsplit('.', 1)
        module = importlib.import_module(package)
        cls = getattr(module, cls_name)
        return cls

[docs]    @staticmethod
    def from_file(path):
        # type: (str) -> H5File
        cls = H5File.h5_class_from_file(path)
        return cls(path)

    def __repr__(self):
        return '<{}("{}")>'.format(type(self).__name__, self.path)


[docs]class ViewModelH5(H5File):

    # TODO  it will be good to be able to just call with H5File.from_file(h5_path) as f for ViewModelH5 also
    def __init__(self, path, view_model):
        super(ViewModelH5, self).__init__(path)
        self.view_model = type(view_model)
        attrs = self.view_model.declarative_attrs
        self._generate_accessors(attrs)

    def _generate_accessors(self, view_model_fields):
        for attr_name in view_model_fields:
            attr = getattr(self.view_model, attr_name)
            if not issubclass(type(attr), Attr):
                raise ValueError('expected a Attr, got a {}'.format(type(attr)))

            if isinstance(attr, DataTypeGidAttr):
                ref = Uuid(attr, self)
            elif isinstance(attr, NArray):
                ref = DataSet(attr, self)
            elif isinstance(attr, List):
                if issubclass(attr.element_type, HasTraits):
                    ref = ReferenceList(attr, self)
                else:
                    ref = Json(attr, self)
            elif issubclass(type(attr), Attr):
                if attr.field_type is scipy.sparse.spmatrix:
                    ref = SparseMatrix(attr, self)
                elif attr.field_type is numpy.random.RandomState:
                    continue
                elif attr.field_type is uuid.UUID:
                    ref = Uuid(attr, self)
                elif issubclass(attr.field_type, (Equation, EquationsEnum)):
                    ref = EquationScalar(attr, self)
                elif attr.field_type is Range:
                    ref = JsonRange(attr, self)
                elif isinstance(attr, Final):
                    if attr.field_type == dict:
                        ref = JsonFinal(attr, self)
                    elif attr.field_type == list:
                        ref = Json(attr, self)
                    else:
                        ref = Scalar(attr, self)
                elif issubclass(attr.field_type, (HasTraits, TupleEnum)):
                    ref = Reference(attr, self)
                elif issubclass(attr.field_type, TVBEnum):
                    ref = Enum(attr, self)
                else:
                    ref = Scalar(attr, self)
            else:
                ref = Accessor(attr, self)
            setattr(self, attr.field_name, ref)

[docs]    def gather_datatypes_references(self):
        """
        Mind that ViewModelH5 stores references towards ViewModel objects (eg. Coupling) as Reference attributes, and
        references towards existent Datatypes (eg. Connectivity) as Uuid.
        Thus, the method gather_references will return only references towards other ViewModels, and we need this
        method to gather also datatypes references.
        """
        ret = []
        for accessor in self.iter_accessors():
            if isinstance(accessor, Uuid) and not isinstance(accessor, Reference):
                if accessor.field_name in ('gid', 'parent_burst', 'operation_group_gid'):
                    continue
                ret.append((accessor.trait_attribute, accessor.load()))
        return ret