Source code for dispel.data.values

"""A module to handle values and their definitions."""
import inspect
from enum import Enum
from functools import total_ordering
from typing import (
    Any,
    Callable,
    ClassVar,
    Dict,
    ItemsView,
    Iterable,
    KeysView,
    List,
    Optional,
    Set,
    Type,
    Union,
    ValuesView,
)

from dispel.data.validators import ValidationException


[docs] @total_ordering class AbbreviatedValue: """An abbreviated value. Examples -------- This class allows to consistently handle abbreviated terms. Assuming you have a name of an assessment, e.g. `Cognitive Processing Speed` test and the respective abbreviation would be `CPS`, then you can create an abbreviated value like this: >>> from dispel.data.values import AbbreviatedValue as AV >>> value = AV('Cognitive Processing Speed test', 'CPS') >>> value Cognitive Processing Speed test (CPS) While this seems like a lot of overhead, it comes in handy when describing value definitions or higher-level abstractions, such as measure definitions. Parameters ---------- value The full description of the value abbr The abbreviated form of the value Attributes ---------- value The full description of the value """
[docs] def __init__(self, value: str, abbr: Optional[str] = None): self.value = value self._abbr = abbr
@property def abbr(self): """Get the abbreviated form of the value.""" return self._abbr or self.value def __str__(self): return self.value def __repr__(self): if self._abbr: return f"{self.value} ({self._abbr})" return self.value def __hash__(self): return hash((self.value, self._abbr)) def __eq__(self, other): if isinstance(other, str): return self._abbr is None and self.value == other if isinstance(other, AbbreviatedValue): return self.value == other.value and self.abbr == other.abbr return False def __lt__(self, other): if not isinstance(other, AbbreviatedValue): raise ValueError(f"Unsupported type in comparison: {type(other)}") if self.value == other.value: return self.abbr < other.abbr return self.value < other.value
[docs] def format(self, *args, **kwargs): """Format an abbreviated value.""" return AbbreviatedValue( self.value.format(*args, **kwargs), self._abbr.format(*args, **kwargs) if self._abbr else None, )
[docs] @classmethod def wrap(cls, value): """Wrap a value into an abbreviated value. This is a small helper class to conveniently wrap values into an abbreviated value, if they are not already one. Parameters ---------- value The value to be wrapped Returns ------- AbbreviatedValue The passed ``value`` if it is an instance of :class:`AbbreviatedValue`. If a string is passed, then the string is passed as ``value`` argument to the constructor. Raises ------ ValueError If the passed value is neither a string nor an instance of :class:`AbbreviatedValue`. """ if isinstance(value, cls): return value if isinstance(value, str): return cls(value) raise ValueError(f"Can only wrap string values. Got: {type(value)}")
[docs] class DefinitionId: """The definition of a measure id. This class provides the basic functionality around ids used to reference columns and definitions. Other structured ids inherit from this class. Parameters ---------- id_ The identifier of the definition. """
[docs] def __init__(self, id_: str): self._id = id_ # pylint: disable=C0103
@property def id(self) -> str: """Get the identifier.""" return self._id def __str__(self): return self.id __repr__ = __str__ def __eq__(self, other): if isinstance(other, str): return self.id == other if isinstance(other, DefinitionId): return self.id == other.id return False def __hash__(self): return hash(self.id)
[docs] @classmethod def from_str(cls, value: str) -> "DefinitionId": """Create a class instance from string.""" return cls(value)
DefinitionIdType = Union[DefinitionId, str]
[docs] class ValueDefinition: """The definition of a value. Parameters ---------- id_ The identifier of the value definition name The human-readable name of the values unit The unit of the value description A more elaborate description of the values and how they were produced data_type The numpy data type of the value in question validator A function that ensures values comply with the definition. The module :mod:`~dispel.data.validators` contains validators for common scenarios that can be used here. precision The number of significance for the values expected under definition. If set, the value will be rounded to the set number of digits. """
[docs] def __init__( self, id_: DefinitionIdType, name: str, unit: Optional[str] = None, description: Optional[str] = None, data_type: Optional[str] = None, validator: Optional[Callable[[Any], None]] = None, precision: Optional[int] = None, ): if isinstance(id_, str): id_ = DefinitionId.from_str(id_) self.id = id_ # pylint: disable=C0103 self.name = name self.unit = unit self.description = description self.data_type = data_type # Verify that the validator is Callable if validator and not callable(validator): raise TypeError(f"The {id_} measure validator is not Callable.") self.validator = validator self.precision = precision
def __repr__(self): unit_extra = f", {self.unit}" if self.unit else "" return f"<{self.__class__.__name__}: {self.id} " f"({self.name}{unit_extra})>" def __hash__(self): # TODO: make properties read-only return hash( ( self.id, self.name, self.unit, self.description, self.validator, self.data_type, ) ) def __eq__(self, other): if isinstance(other, ValueDefinition): return hash(self) == hash(other) return False @classmethod def _get_parameters(cls) -> Set[str]: params = set(inspect.signature(cls.__init__).parameters.keys()) params.remove("self") return params def _to_dict(self) -> Dict[str, Any]: """Turn instance into dict with values from constructor.""" def _getattr(name): if name == "id_": return self.id return getattr(self, name) return {name: _getattr(name) for name in self._get_parameters()}
[docs] def derive(self, **kwargs) -> "ValueDefinition": """Derive a value definition with updated properties. Parameters ---------- kwargs Keyword arguments to be set/updated in the derived definition. Returns ------- ValueDefinition A new definition with updated parameters. Raises ------ ValueError If one of the provided arguments is not a parameter of the constructor. """ diff = set(kwargs.keys()).difference(self._get_parameters()) if diff: raise ValueError( f"The following parameters are unknown to the constructor: " f'{", ".join(sorted(diff))}' ) new_kwargs = self._to_dict() new_kwargs.update(kwargs) return self.__class__(**new_kwargs)
[docs] class ValueDefinitionPrototype: """The prototype of a :class:`ValueDefinition`. Measure processing often leads to various related measures. To ease the creation of such, the :class:`ValueDefinitionPrototype` allows to specify prototypic measures that can be used to derive actual definitions. Parameters ---------- cls The class to be used when creating concrete instances with :meth:`create_definition`. By default, the class used is :class:ValueDefinition`. kwargs All named parameters passed to the constructor will be passed to the measure definition class constructor. The parameter ``cls`` is reserved to pass a different measure definition class. Placeholders to be filled upon creation are specified with curly brackets, i.e., ``a {placeholder} value`` is populated when calling ``prototype.create_definition(placeholder='special')``. Examples -------- Assuming we want to create a measure for different time windows one can create the following prototype: >>> from dispel.data.values import ValueDefinitionPrototype >>> prototype = ValueDefinitionPrototype( ... id_='measure-{lower}-{upper}', ... name='measure from {lower} to {upper}', ... unit='s' ... ) >>> prototype.create_definition(lower=5, upper=6) <ValueDefinition: measure-5-6 (measure from 5 to 6, s)> >>> prototype.create_definition(lower=1, upper=5) <ValueDefinition: measure-1-5 (measure from 1 to 5, s)> """
[docs] def __init__(self, **kwargs): self._cls = kwargs.pop("cls", ValueDefinition) self._kwargs = kwargs
[docs] def create_definition(self, **values: Any) -> ValueDefinition: """ Create a definition from this prototype. Parameters ---------- values The arguments and placeholders to be populated. All named arguments will be used to both provide additional named arguments to the measure definition class specified with ``cls`` during construction upon creation (the class is inspected for named parameters) and placeholders provided during construction of the prototype. Returns ------- ValueDefinition The value definition created from the value definition prototype. Examples -------- An example is given above to populate placeholders. This is also possible with arguments required by the definition class: >>> from dispel.data.values import ValueDefinitionPrototype >>> prototype = ValueDefinitionPrototype(unit='s') >>> prototype.create_definition(id_='foo', name='bar') <ValueDefinition: foo (bar, s)> >>> prototype.create_definition(id_='baz', name='bam') <ValueDefinition: baz (bam, s)> Raises ------ ValueError If a placeholder is missing from kwargs. """ def _can_format(value): return isinstance(value, (str, AbbreviatedValue)) try: kwargs = { k: v.format(**values) if _can_format(v) else v for k, v in self._kwargs.items() } except KeyError as error: raise ValueError(f"Missing placeholder: {error}") from error # inspect class for additional arguments to be passed signature = inspect.signature(self._cls.__init__) for param in signature.parameters: if param != "self" and param not in kwargs and param in values: kwargs[param] = values[param] return self._cls(**kwargs)
[docs] def create_definitions( self, items: Iterable[Dict[str, Any]] ) -> List[ValueDefinition]: """Create multiple definitions. This method provides a convenient way to specify multiple definitions at the same time for an iterable of dictionaries that are passed to :meth:`create_definition`. Parameters ---------- items An iterable of dictionaries passed to :meth:`create_definition`. Returns ------- List[ValueDefinition] A list of the created value definitions. """ return [self.create_definition(**values) for values in items]
[docs] def derive(self, **kwargs) -> "ValueDefinitionPrototype": """Derive a prototype with updated properties. Parameters ---------- kwargs Keyword arguments to be set/updated in the derived prototype. Returns ------- ValueDefinitionPrototype A new prototype with updated parameters. """ assert "cls" not in kwargs, "Class is set by derived class" new_kwargs = self._kwargs.copy() new_kwargs.update(kwargs) return self.__class__(cls=self._cls, **new_kwargs)
[docs] class Value: """A value with definition and actual value. Parameters ---------- definition The definition of the value. value The actual value. If `definition.precision` is set, then the value will be rounded to the number of significant digits. The pre-rounded value is stored in `raw_value`. """
[docs] def __init__(self, definition: ValueDefinition, value: Any): if not isinstance(definition, ValueDefinition): raise ValueError("Definition must be an instance of ValueDefinition") self.definition = definition # store original raw value before precision rounding self.raw_value = value if definition.precision is not None: value = round(value, ndigits=definition.precision) self.value = value # validate value if validator is present if self.definition.validator: try: self.definition.validator(self.value) except ValidationException as exc: raise ValueError( f"Provided value is not valid for {self.definition}: {exc}" ) from exc
@property def id(self) -> DefinitionId: """Get the identifier from the definition of the value.""" return self.definition.id def __repr__(self): return f"<{self.__class__.__name__} ({self.definition}): {self.value}>" def __hash__(self): return hash((self.definition, self.value)) def __eq__(self, other): if isinstance(other, self.__class__): return hash(self) == hash(other) return False
[docs] class ValueSet: r"""A collection of multiple values. Parameters ---------- values The values of the value set. This can be a list of :class:`Value`\ s or a list of any value. definitions An optional list of definitions describing the passed values through the parameter ``values``. Notes ----- The constructor passes both ``values`` and ``definitions`` to :meth:`ValueSet.set_values`. For details on how to specify values of the :class:`ValueSet` please have a look there. """ VALUE_CLS: ClassVar[Type[Value]] = Value
[docs] def __init__( self, values: Optional[List[Any]] = None, definitions: Optional[List[ValueDefinition]] = None, ): self._values: Dict[DefinitionId, Value] = {} if values: self.set_values(values, definitions)
[docs] def set( self, value: Any, definition: Optional[ValueDefinition] = None, overwrite: bool = False, ): """Set a value in the value set. Parameters ---------- value The value to be set. If the value is not an instance of :class:`Value` one needs to also provide a ``definition``. definition An optional definition of the passed value should the value not be an instance of :class:`Value`. overwrite By default values in the :class:`ValueSet` are not overwritten. If you want to update an already set value you will need to set ``overwrite = True``. Raises ------ ValueError If ``value`` is not a :class:`Value` and no definition is passed. ValueError If ``value``'s id is already present in the Value Set, and the ``overwrite`` argument is set to ``False``. """ if not isinstance(value, self.VALUE_CLS): if definition is None or not isinstance(definition, ValueDefinition): raise ValueError( "Value must be either a Value or a definition needs to be passed" ) value = self.VALUE_CLS(definition, value) if not overwrite and value.id in self._values: raise ValueError( f"Value with id already present: {value.id}. Set overwrite = True to " f"overwrite values." ) self._values[value.id] = value
[docs] def set_values( self, values: List[Any], definitions: Optional[List[ValueDefinition]] = None, overwrite: bool = True, ): """Set multiple values in the value set. Parameters ---------- values The values to be set. If the values are not an instance of :class:`Value` the optional parameter ``definitions`` needs to be provided with a list of :class:`ValueDefinition` describing each value in ``values``. definitions An optional list of definitions for values passed via ``values``. Both ``values`` and ``definitions`` need to be of equal length. overwrite The overwrite-behavior. See :meth:`ValueSet.set` for details. Raises ------ ValueError If ``values`` and ``definitions`` are not of equal length. """ if definitions: if len(values) != len(definitions): raise ValueError("Values and definitions need to be of equal length") values = [self.VALUE_CLS(d, v) for v, d in zip(values, definitions)] for value in values: self.set(value, overwrite=overwrite)
[docs] def has_value(self, id_: Union[DefinitionIdType, ValueDefinition]) -> bool: """Test if the set has a specific value. Parameters ---------- id_ The id or definition for which to lookup if a value is present Returns ------- bool ``True`` if the value set contains a value for the provided ``id_``. Otherwise, ``False``. Raises ------ TypeError If the id is neither a ``str``, :class:`DefinitionId` nor a :class:`ValueDefinition`. """ if isinstance(id_, str): return DefinitionId.from_str(id_) in self._values if isinstance(id_, DefinitionId): return id_ in self._values if isinstance(id_, ValueDefinition): return id_.id in self._values raise TypeError( "Id must be one of str, DefinitionId, ValueDefinition. " f"Got {type(id_)}" )
def __contains__(self, item: Union[DefinitionIdType, ValueDefinition]) -> bool: """Test if the set has a specific value. This is a convenience method for :meth:`ValueSet.has_value`. Parameters ---------- item The item whose existence in the value set is to be tested. Returns ------- ``True`` if the item exists in the value set. ``False`` otherwise. """ return self.has_value(item)
[docs] def get(self, id_: Union[DefinitionIdType, ValueDefinition]) -> Value: """Get a value for an id. Parameters ---------- id_ The id or definition for which to obtain the value. Returns ------- Value The respective :class:`Value` matching the provided ``id_``. Raises ------ KeyError If the provided ``id_`` is not present in the set. """ if isinstance(id_, str): return self.get(DefinitionId.from_str(id_)) if isinstance(id_, ValueDefinition): return self.get(id_.id) if not self.has_value(id_): raise KeyError( f"No value with id {id_} in set: " f"{list(self._values.keys())}" ) return self._values[id_]
def __getitem__(self, key: Union[DefinitionIdType, ValueDefinition]) -> Value: """Get a value for an id. This is a convenience wrapper around :meth:`ValueSet.get`. Parameters ---------- key The id or definition for which to retrieve the value. Returns ------- Value The value matching the passed id or definition. """ return self.get(key)
[docs] def get_raw_value(self, id_: Union[DefinitionIdType, ValueDefinition]) -> Any: """Get the raw value for an id. This is a convenience method to not have to call ``value_set.get(id).value``. Parameters ---------- id_ The id or definition for which to retrieve the raw value. Returns ------- Any The raw value of the :class:`Value` matching the passed id or definition. """ return self.get(id_).value
[docs] def get_definition( self, id_: Union[DefinitionIdType, ValueDefinition] ) -> ValueDefinition: """Get the definition of a value by its id. Parameters ---------- id_ The id for which to obtain the definition Returns ------- ValueDefinition The definition belonging to the passed id. """ return self.get(id_).definition
[docs] def values(self) -> ValuesView[Value]: """Get all values of the set. Returns ------- Iterable[Value] An iterable of all values within the set. """ return self._values.values()
[docs] def ids(self) -> KeysView[DefinitionId]: """Get all ids of the set. Returns ------- Iterable[DefinitionId] An iterable of all definition ids from all values of the set. """ return self._values.keys()
[docs] def definitions(self) -> Iterable[ValueDefinition]: """Get all definitions of the set. Returns ------- Iterable[ValueDefinition] An iterable of all value definitions from all values of the set. """ return (v.definition for v in self.values())
def __len__(self) -> int: return len(self._values) def __iter__(self): return iter(self._values) @property def empty(self) -> bool: """Get whether the value set is empty.""" return len(self) == 0 @staticmethod def _assert_add_type(other): if not isinstance(other, ValueSet): raise TypeError("Can only add values from ValueSet")
[docs] def items(self) -> ItemsView[DefinitionId, Value]: """Get an items view of all values.""" return self._values.items()
def _combine(self, other, overwrite): self._assert_add_type(other) res = self.__class__() res.set_values(list(self.values())) res.set_values(list(other.values()), overwrite=overwrite) return res def __add__(self, other): return self._combine(other, False) def __iadd__(self, other): self._assert_add_type(other) self.set_values(list(other.values())) return self def __or__(self, other: "ValueSet") -> "ValueSet": return self._combine(other, True) def __ior__(self, other: "ValueSet") -> "ValueSet": self._assert_add_type(other) self.set_values(list(other.values()), overwrite=True) return self def __eq__(self, other): if isinstance(other, ValueSet): return set(self.values()) == set(other.values()) raise TypeError("Can only compare ValueSets")
[docs] @total_ordering class AVEnum(Enum): """A base class for abbreviated value enumerations. When extracting measures from tasks they are often done for specific modalities. This base class allows to do this in a convenient fashion to address modalities both from a processing and representation form. The enumeration is ordered. Examples -------- Assuming you have a task that has two modalities, e.g. the *Cognitive Processing Speed* test has two forms: *symbol-to-digit* and *digit-to-digit*. The respective modalities class would look like: .. doctest:: enum >>> from dispel.data.values import AVEnum >>> class CPSTypeModality(AVEnum): ... SYMBOL_TO_DIGIT = ('symbol-to-digit', 'std') ... DIGIT_TO_DIGIT = ('digit-to-digit', 'dtd') ... >>> CPSTypeModality.SYMBOL_TO_DIGIT <CPSTypeModality.SYMBOL_TO_DIGIT: symbol-to-digit (std) [1]> The constants can be used directly in :class:`~pandas.Series` as well as can be converted to an integer representation: .. doctest:: enum >>> int(CPSTypeModality.SYMBOL_TO_DIGIT) 1 In order to conventiently pass the constants to modalities of :class:`~dispel.data.measures.MeasureValueDefinition` and :class:`~dispel.data.measures.MeasureValueDefinitionPrototype` a property is exposed that contains the :class:`~dispel.data.values.AbbreviatedValue`: .. doctest:: enum >>> CPSTypeModality.SYMBOL_TO_DIGIT.av symbol-to-digit (std) as well as for convenience the abbreviated value too: .. doctest:: enum >>> CPSTypeModality.SYMBOL_TO_DIGIT.abbr 'std' Since the enumeration is odered, one can also perform comparisons between them: .. doctest:: enum >>> CPSTypeModality.DIGIT_TO_DIGIT < CPSTypeModality.SYMBOL_TO_DIGIT False The member can also be retrieved from the abbreviation: .. doctest:: enum >>> CPSTypeModality.from_abbr('std') <CPSTypeModality.SYMBOL_TO_DIGIT: symbol-to-digit (std) [1]> As well as from the variable name (case-insensitive): .. doctest:: enum >>> CPSTypeModality.from_variable('symbol_to_digit') <CPSTypeModality.SYMBOL_TO_DIGIT: symbol-to-digit (std) [1]> """ def __new__(cls, *_args, **_kwargs): # noqa: D102 value = len(cls.__members__) + 1 obj = object.__new__(cls) obj._value_ = value return obj
[docs] def __init__(self, value, abbr=None): self.av = ( value if isinstance(value, AbbreviatedValue) else AbbreviatedValue(value, abbr) )
def __repr__(self): return ( f"<{self.__class__.__name__}.{self.name}: {repr(self.av)} " f"[{self.value}]>" ) def __str__(self): return str(self.av) def __int__(self): return self.value def __lt__(self, other): return self.value < other.value # pylint: disable=W0143 @property def abbr(self): """Get the abbreviated value.""" return self.av.abbr @property def variable(self): """Get the modality variable name.""" return str(self.name).lower()
[docs] @classmethod def from_abbr(cls, value: str): """Get the corresponding member from the abbreviated value.""" for member in cls: if member.abbr == value: return member raise KeyError(f"Unknown abbreviation: {value}")
[docs] @classmethod def from_variable(cls, value: str): """Get the corresponding member from the variable name.""" return getattr(cls, value.upper())