Source code for tvb.adapters.uploaders.tumor_dataset_importer

# -*- coding: utf-8 -*-
#
#
# TheVirtualBrain-Framework Package. This package holds all Data Management, and
# Web-UI helpful to run brain-simulations. To use it, you also need to download
# TheVirtualBrain-Scientific Package (for simulators). See content of the
# documentation-folder for more details. See also http://www.thevirtualbrain.org
#
# (c) 2012-2023, Baycrest Centre for Geriatric Care ("Baycrest") and others
#
# This program is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.  See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with this
# program.  If not, see <http://www.gnu.org/licenses/>.
#
#
#   CITATION:
# When using The Virtual Brain for scientific publications, please cite it as explained here:
# https://www.thevirtualbrain.org/tvb/zwei/neuroscience-publications
#
#


"""
Import Brain Tumor dataset

.. moduleauthor:: Bogdan Valean <bogdan.valean@codemart.ro>
.. moduleauthor:: Robert Vincze <robert.vincze@codemart.ro>
"""

import csv
import os
import uuid
import numpy as np
import json
from tvb.adapters.datatypes.db.graph import CorrelationCoefficientsIndex
from tvb.adapters.datatypes.db.time_series import TimeSeriesIndex
from tvb.adapters.datatypes.h5.graph_h5 import CorrelationCoefficientsH5
from tvb.adapters.datatypes.h5.time_series_h5 import TimeSeriesRegionH5
from tvb.adapters.uploaders.csv_connectivity_importer import CSVDelimiterOptionsEnum
from tvb.adapters.uploaders.zip_connectivity_importer import ZIPConnectivityImporter, ZIPConnectivityImporterModel
from tvb.config.algorithm_categories import DEFAULTDATASTATE_RAW_DATA
from tvb.core.adapters.abcadapter import ABCAdapter
from tvb.core.adapters.abcuploader import ABCUploader, ABCUploaderForm
from tvb.core.entities.generic_attributes import GenericAttributes
from tvb.core.entities.storage import dao
from tvb.core.neocom import h5
from tvb.core.neotraits.forms import TraitUploadField
from tvb.core.neotraits.view_model import Str, ViewModel
from tvb.datatypes.graph import CorrelationCoefficients
from tvb.datatypes.time_series import TimeSeriesRegion, TimeSeries

WARNING_MSG = "File {} does not exist."


[docs]class TumorDatasetImporterModel(ViewModel):
    data_file = Str(
        label='Tumor Dataset (BIDS + zip)'
    )


[docs]class TumorDatasetImporterForm(ABCUploaderForm):

    def __init__(self):
        super(TumorDatasetImporterForm, self).__init__()
        self.data_file = TraitUploadField(TumorDatasetImporterModel.data_file, '.zip', 'data_file')

[docs]    @staticmethod
    def get_upload_information():
        return {
            'data_file': '.zip'
        }

[docs]    @staticmethod
    def get_view_model():
        return TumorDatasetImporterModel


[docs]class TumorDatasetImporter(ABCAdapter):
    _ui_name = "Tumor Dataset"
    _ui_description = "Download manually Tumor Dataset from the EBRAINS KG and import it into TVB."

    MAXIMUM_DOWNLOAD_RETRIES = 3
    SLEEP_TIME = 3

    CONN_ZIP_FILE = "SC.zip"
    FC_MAT_FILE = "FC.mat"
    FC_DATASET_NAME = "FC_cc_DK68"
    TIME_SERIES_CSV_FILE = "HRF.csv"

[docs]    def get_form_class(self):
        return TumorDatasetImporterForm

[docs]    def get_output(self):
        return []

[docs]    def get_required_disk_size(self, view_model):
        return -1

[docs]    def get_required_memory_size(self, view_model):
        return -1

    def __import_tumor_connectivity(self, conn_folder, patient, user_tag):

        connectivity_zip = os.path.join(conn_folder, self.CONN_ZIP_FILE)
        if not os.path.exists(connectivity_zip):
            self.log.warning(WARNING_MSG.format(connectivity_zip))
            return

        import_conn_adapter = self.build_adapter_from_class(ZIPConnectivityImporter)
        operation = dao.get_operation_by_id(self.operation_id)
        import_conn_adapter.extract_operation_data(operation)
        import_conn_model = ZIPConnectivityImporterModel()
        import_conn_model.uploaded = connectivity_zip
        import_conn_model.data_subject = patient
        import_conn_model.generic_attributes.user_tag_1 = user_tag

        connectivity_index = import_conn_adapter.launch(import_conn_model)

        self.generic_attributes.subject = patient
        self.generic_attributes.user_tag_1 = user_tag
        self._capture_operation_results([connectivity_index])
        connectivity_index.fk_from_operation = self.operation_id
        dao.store_entity(connectivity_index)

        return connectivity_index.gid

    def __import_time_series_csv_datatype(self, hrf_folder, connectivity_gid, patient, user_tag):

        path = os.path.join(hrf_folder, self.TIME_SERIES_CSV_FILE)
        if not os.path.exists(path):
            self.log.warning(WARNING_MSG.format(path))
            return

        with open(path) as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=CSVDelimiterOptionsEnum.COMMA.value)
            ts = list(csv_reader)

        ts_data = np.array(ts, dtype=np.float64).reshape((len(ts), 1, len(ts[0]), 1))
        ts_time = np.random.rand(ts_data.shape[0], )

        project = dao.get_project_by_id(self.current_project_id)

        ts_gid = uuid.uuid4()
        h5_path = "TimeSeries_{}.h5".format(ts_gid.hex)
        operation_folder = self.storage_interface.get_project_folder(project.name, str(self.operation_id))
        h5_path = os.path.join(operation_folder, h5_path)

        conn = h5.load_from_gid(connectivity_gid)
        ts = TimeSeriesRegion()
        ts.data = ts_data
        ts.time = ts_time
        ts.gid = ts_gid
        ts.connectivity = conn
        generic_attributes = GenericAttributes()
        generic_attributes.user_tag_1 = user_tag
        generic_attributes.state = DEFAULTDATASTATE_RAW_DATA

        with TimeSeriesRegionH5(h5_path) as ts_h5:
            ts_h5.store(ts)
            ts_h5.nr_dimensions.store(4)
            ts_h5.subject.store(patient)
            ts_h5.store_generic_attributes(generic_attributes)

        ts_index = TimeSeriesIndex()
        ts_index.gid = ts_gid.hex
        ts_index.fk_from_operation = self.operation_id
        ts_index.time_series_type = "TimeSeriesRegion"
        ts_index.data_length_1d = ts_data.shape[0]
        ts_index.data_length_2d = ts_data.shape[1]
        ts_index.data_length_3d = ts_data.shape[2]
        ts_index.data_length_4d = ts_data.shape[3]
        ts_index.data_ndim = len(ts_data.shape)
        ts_index.sample_period_unit = 'ms'
        ts_index.sample_period = TimeSeries.sample_period.default
        ts_index.sample_rate = 1024.0
        ts_index.subject = patient
        ts_index.state = DEFAULTDATASTATE_RAW_DATA
        ts_index.labels_ordering = json.dumps(list(TimeSeries.labels_ordering.default))
        ts_index.labels_dimensions = json.dumps(TimeSeries.labels_dimensions.default)
        ts_index.visible = False  # we don't want to show these TimeSeries because they are dummy
        dao.store_entity(ts_index)

        return ts_gid

    def __import_pearson_coefficients_datatype(self, fc_folder, patient, user_tag, ts_gid):

        path = os.path.join(fc_folder, self.FC_MAT_FILE)
        if not os.path.exists(path):
            self.log.warning(WARNING_MSG.format(path))
            return

        result = ABCUploader.read_matlab_data(path, self.FC_DATASET_NAME)
        result = result.reshape((result.shape[0], result.shape[1], 1, 1))

        project = dao.get_project_by_id(self.current_project_id)

        pearson_gid = uuid.uuid4()
        h5_path = "CorrelationCoefficients_{}.h5".format(pearson_gid.hex)
        operation_folder = self.storage_interface.get_project_folder(project.name, str(self.operation_id))
        h5_path = os.path.join(operation_folder, h5_path)

        generic_attributes = GenericAttributes()
        generic_attributes.user_tag_1 = user_tag
        generic_attributes.state = DEFAULTDATASTATE_RAW_DATA

        with CorrelationCoefficientsH5(h5_path) as pearson_correlation_h5:
            pearson_correlation_h5.array_data.store(result)
            pearson_correlation_h5.gid.store(pearson_gid)
            pearson_correlation_h5.source.store(ts_gid)
            pearson_correlation_h5.labels_ordering.store(CorrelationCoefficients.labels_ordering.default)
            pearson_correlation_h5.subject.store(patient)
            pearson_correlation_h5.store_generic_attributes(generic_attributes)

        pearson_correlation_index = CorrelationCoefficientsIndex()
        pearson_correlation_index.gid = pearson_gid.hex
        pearson_correlation_index.fk_from_operation = self.operation_id
        pearson_correlation_index.subject = patient
        pearson_correlation_index.state = DEFAULTDATASTATE_RAW_DATA
        pearson_correlation_index.ndim = 4
        pearson_correlation_index.fk_source_gid = ts_gid.hex  # we need a random gid here to store the index
        pearson_correlation_index.has_valid_time_series = False
        dao.store_entity(pearson_correlation_index)

    def __import_from_folder(self, datatype_folder, patient, user_tag):
        conn_gid = self.__import_tumor_connectivity(datatype_folder, patient, user_tag)
        # The Time Series are invisible in the UI and are imported
        # just so we can link them with the Pearson Coefficients
        ts_gid = self.__import_time_series_csv_datatype(datatype_folder, conn_gid, patient, user_tag)
        self.__import_pearson_coefficients_datatype(datatype_folder, patient, user_tag, ts_gid)

[docs]    def launch(self, view_model):
        # type: (TumorDatasetImporterModel) -> []
        """
        Download the Tumor Dataset and then import its data
        (currently only the connectivities and pearson coefficients (FC) are imported).
        """
        structure = self.storage_interface.unpack_zip(view_model.data_file, self.get_storage_path())
        subject_folders = {}
        for name in structure:
            if os.path.isdir(name):
                user_tag = os.path.split(name)[1]
                if user_tag.startswith("sub-"):
                    subject_folders[user_tag] = name

        for patient, patient_path in subject_folders.items():
            root_folder_imported = False
            for user_tag in os.listdir(patient_path):
                datatype_folder = os.path.join(patient_path, user_tag)
                if os.path.isdir(datatype_folder):
                    self.__import_from_folder(datatype_folder, patient, user_tag)
                elif not root_folder_imported:
                    root_folder_imported = True
                    self.__import_from_folder(patient_path, patient, "")

        self.log.debug("Importing Tumor Dataset has been successfully completed!")