Source code for dispel.data.measures

"""A module containing models for measures."""
from typing import Any, Callable, ClassVar, Dict, List, Optional, Type, Union, cast

import numpy as np
import pandas as pd

from dispel.data.flags import FlagMixIn
from dispel.data.raw import MissingColumnError
from dispel.data.validators import RangeValidator
from dispel.data.values import AbbreviatedValue as AV
from dispel.data.values import (
    DefinitionId,
    Value,
    ValueDefinition,
    ValueDefinitionPrototype,
    ValueSet,
)


[docs] class MeasureValue(FlagMixIn, Value): """A measure value.""" def __repr__(self): return ( f"<MeasureValue ({self.definition}): {self.value} " f"({self.flag_count_repr})>" ) @staticmethod def _to_string(value): return "" if value is None else str(value)
[docs] def to_dict(self, stringify: bool = False) -> Dict[str, Optional[Any]]: """Get a dictionary representation of measure information. Parameters ---------- stringify ``True`` if all dictionary values are converted to strings. ``False`` otherwise. Returns ------- Dict[str, Optional[Any]] A dictionary summarizing measure value information. """ measure_min, measure_max = None, None if isinstance(self.definition.validator, RangeValidator): measure_min = self.definition.validator.lower_bound measure_max = self.definition.validator.upper_bound if stringify: value = str(self.value) measure_min = self._to_string(measure_min) measure_max = self._to_string(measure_max) else: value = self.value return dict( measure_id=str(self.id), measure_name=self.definition.name, measure_value=value, measure_unit=self.definition.unit, measure_type=self.definition.data_type, measure_min=measure_min, measure_max=measure_max, )
def _join_not_none(separator, values): return separator.join(map(str, filter(lambda x: x is not None, values)))
[docs] class MeasureId(DefinitionId): """The definition of a measure id for a task. Parameters ---------- task_name The name and abbreviation of the task. Note that if no abbreviation is provided the name is used directly in the id. measure_name The name of the measure and its abbreviation. modalities The modalities and their abbreviations under which the measure is constituted. aggregation A method that was used to aggregate a sequence of the underlying measure, e.g., for the measure ``mean response time`` it would be ``mean``. Notes ----- The abbreviations of values are passed using :class:`~dispel.data.values.AbbreviatedValue`. To generate the actual id the `.abbr` accessor is used. If one passes only strings, the class actually wraps those into ``AbbreviatedValue`` instances. Examples -------- >>> from dispel.data.values import AbbreviatedValue as AV >>> from dispel.data.measures import MeasureId >>> MeasureId( ... task_name=AV('Cognitive Processing Speed', 'CPS'), ... measure_name=AV('reaction time', 'rt'), ... modalities=[AV('digit-to-digit', 'dtd')], ... aggregation='mean' ... ) cps-dtd-rt-mean """
[docs] def __init__( self, task_name: Union[str, AV], measure_name: Union[str, AV], modalities: Optional[List[Union[str, AV]]] = None, aggregation: Optional[Union[str, AV]] = None, ): self.task_name = AV.wrap(task_name) self.measure_name = AV.wrap(measure_name) self.modalities = None if modalities: self.modalities = list(map(AV.wrap, modalities)) self.aggregation = AV.wrap(aggregation) if aggregation else None id_ = _join_not_none( "-", [ self.task_name.abbr.lower(), "_".join(map(lambda x: x.abbr.lower(), self.modalities)) if self.modalities else None, self.measure_name.abbr.lower(), self.aggregation.abbr.lower() if self.aggregation else None, ], ) super().__init__(id_)
[docs] @classmethod def from_str(cls, value: str) -> DefinitionId: """See :meth:`dispel.data.values.DefinitionId.from_str`. Parameters ---------- value The string from which the definition id is to be constructed. Raises ------ NotImplementedError Always raised. This method is not implemented since there is no unambiguous parsing of task ids. """ raise NotImplementedError("Not unambiguous parsing of ids possible")
[docs] class MeasureValueDefinition(ValueDefinition): """The definition of measures from tasks. Parameters ---------- task_name The full name of the task and its abbreviation, e.g., ``Cognitive Processing Speed test`` and ``CPS`` passed using :class:`~dispel.data.values.AbbreviatedValue`. measure_name The name of the measure, e.g. ``reaction time`` and its abbreviation passed using :class:`~dispel.data.values.AbbreviatedValue`. Note that aggregation methods are specified in ``aggregation`` and should not be direclty part of the measure name. unit See :class:`~dispel.data.values.ValueDefinition`. description See :class:`~dispel.data.values.ValueDefinition`. data_type See :class:`~dispel.data.values.ValueDefinition`. validator See :class:`~dispel.data.values.ValueDefinition`. modalities The modalities of the tasks, i.e. if there is more than one variant of the task. An example would be the ``digit-to-digit`` and ``symbol-to-digit`` or ``predefined key 1``, ``predefined key 2`` and ``random key`` variants of the CPS test. Abbreviations of the modalities can be passed using :class:`~dispel.data.values.AbbreviatedValue`. aggregation If the measure is the result of an aggregation, the method that was used to aggregate. E.g. for ``mean response time`` it would be ``mean``. Abbreviations are passed using :class:`~dispel.data.values.AbbreviatedValue`. precision See :class:`~dispel.data.values.ValueDefinition`. Examples -------- >>> from dispel.data.values import AbbreviatedValue as AV >>> from dispel.data.measures import MeasureValueDefinition >>> from dispel.data.validators import RangeValidator >>> MeasureValueDefinition( ... task_name = AV('Cognitive Processing Speed test', 'CPS'), ... measure_name = AV('response time', 'rt'), ... unit = 's', ... description = 'The mean time to respond to a presented stimulus', ... data_type = 'float64', ... validator = RangeValidator(lower_bound=0), ... modalities = [ ... AV('digit-to-digit', 'dtd'), ... AV('predefined key 1', 'key1') ... ], ... aggregation = 'mean' ... ) <MeasureValueDefinition: cps-dtd_key1-rt-mean (CPS digit-to-digit ...> """
[docs] def __init__( self, task_name: Union[str, AV], measure_name: Union[str, AV], unit: Optional[str] = None, description: Optional[str] = None, data_type: Optional[str] = None, validator: Optional[Callable[[Any], None]] = None, modalities: Optional[List[Union[str, AV]]] = None, aggregation: Optional[Union[str, AV]] = None, precision: Optional[int] = None, ): self.task_name = AV.wrap(task_name) self.measure_name = AV.wrap(measure_name) self.modalities = None if modalities: self.modalities = list(map(AV.wrap, modalities)) self.aggregation = AV.wrap(aggregation) if aggregation else None id_ = MeasureId( task_name=self.task_name, measure_name=self.measure_name, modalities=self.modalities, aggregation=aggregation, ) name = _join_not_none( " ", [ self.task_name.abbr.upper(), " ".join(map(str, self.modalities)) if self.modalities else None, self.aggregation if self.aggregation else None, self.measure_name, ], ) super().__init__( id_=id_, name=name, unit=unit, description=description, data_type=data_type, validator=validator, precision=precision, )
[docs] class MeasureValueDefinitionPrototype(ValueDefinitionPrototype): """A task measure value definition prototype. This is a convenience method that populates the ``cls`` argument with the :class:`~dispel.data.measures.MeasureValueDefinition` class. """
[docs] def __init__(self, **kwargs: Any): cls = kwargs.pop("cls", MeasureValueDefinition) super().__init__(cls=cls, **kwargs)
[docs] def row_to_definition(row: pd.Series) -> ValueDefinition: """Convert a pandas series to a value definition. Parameters ---------- row A pandas series containing definition information. Returns ------- ValueDefinition The corresponding value definition. Raises ------ MissingColumnError If required fields are missing from the pandas' series. """ expected_columns = {"measure_id", "measure_name", "measure_unit", "measure_type"} if not expected_columns.issubset(row.index): raise MissingColumnError(expected_columns - set(row.index)) validator = None if {"measure_min", "measure_max"} <= set(row.index): validator = RangeValidator( lower_bound=cast(Optional[float], np.float_(row.measure_min)), upper_bound=cast(Optional[float], np.float_(row.measure_max)), ) return ValueDefinition( id_=row.measure_id, name=row.measure_name, unit=row.measure_unit, data_type=row.measure_type, validator=validator, )
[docs] def row_to_value(row: pd.Series) -> MeasureValue: """Convert a pandas series to a measure value. Parameters ---------- row A pandas series containing definition information. Returns ------- MeasureValue The corresponding measure value. Raises ------ MissingColumnError If ``measure_value`` field is missing from the pandas' series. """ if "measure_value" not in row.index: raise MissingColumnError("measure_value") return MeasureValue( row_to_definition(row), np.array([row["measure_value"]]).astype(row["measure_type"])[0], )
[docs] class MeasureSet(ValueSet): """A collection of measures.""" VALUE_CLS: ClassVar[Type[Value]] = MeasureValue
[docs] @classmethod def from_data_frame(cls, data: pd.DataFrame) -> "MeasureSet": """Create a MeasureSet from a data frame. Parameters ---------- data A data frame containing information about measures Returns ------- MeasureSet A measure set derived from the provided data frame. """ return cls(data.apply(row_to_value, axis=1).to_list())
[docs] def to_list(self, stringify: bool = False) -> List[Dict[str, Optional[Any]]]: """Convert measure set to a list of measure dictionaries. Parameters ---------- stringify ``True`` if all dictionary values are converted to strings. ``False`` otherwise. Returns ------- List[Dict[str, Optional[Any]]] A dictionary summarizing measure value information. """ return [ cast(self.VALUE_CLS, measure).to_dict(stringify) # type: ignore for measure in self.values() ]